/data/runtests/work/sources/libsynthesis/src/sysync_SDK/Sources/sysync

Bug Summary

File:	libsynthesis/src/sysync_SDK/Sources/sysync_utils.cpp
Warning:	line 883, column 15 Value stored to 'c' is never read

Annotated Source Code

1	/*
2	* File: sysync_utils.cpp
3	*
4	* Author: Lukas Zeller (luz@plan44.ch)
5	*
6	* Provides some helper functions interfacing between SyncML Toolkit
7	* and C++
8	*
9	* Copyright (c) 2001-2011 by Synthesis AG + plan44.ch
10	*
11	* 2001-05-16 : luz : created
12	*
13	*/
14
15	#include "prefix_file.h"
16	#include "sync_include.h"
17	#include "sysync_utils.h"
18
19	#include "libmem.h"
20
21
22	#ifdef SYSYNC_TOOL
23	#include "syncappbase.h" // for CONSOLEPRINTF
24	#include "customimplagent.h" // for DBCharSetNames
25	#endif
26
27	namespace sysync {
28
29	// Support for SySync Diagnostic Tool
30	#ifdef SYSYNC_TOOL
31
32	// parse RFC 2822 addr spec
33	int parse2822AddrSpec(int argc, const char *argv[])
34	{
35	if (argc<0) {
36	// help requested
37	CONSOLEPRINTF((" addrparse <RFC2822 addr-spec string to parse>"))SySync_ConsolePrintf(stderr, "SYSYNC " " addrparse <RFC2822 addr-spec string to parse>" "\n");
38	CONSOLEPRINTF((" Parse name and email address out of a RFC2822-type addr-spec"))SySync_ConsolePrintf(stderr, "SYSYNC " " Parse name and email address out of a RFC2822-type addr-spec" "\n");
39	return EXIT_SUCCESS0;
40	}
41	// check for argument
42	if (argc<1) {
43	CONSOLEPRINTF(("1 argument required"))SySync_ConsolePrintf(stderr, "SYSYNC " "1 argument required" "\n" );
44	return EXIT_FAILURE1;
45	}
46	// parse
47	string addrname,addremail;
48	const char* p=argv[0];
49	p=parseRFC2822AddrSpec(p,addrname,addremail);
50	// show
51	CONSOLEPRINTF(("Input : %s",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %s" "\n" ,argv[0]);
52	CONSOLEPRINTF(("Name : %s",addrname.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Name : %s" "\n" ,addrname.c_str());
53	CONSOLEPRINTF(("email : %s",addremail.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "email : %s" "\n" ,addremail.c_str());
54	CONSOLEPRINTF(("unparsed rest : %s",p))SySync_ConsolePrintf(stderr, "SYSYNC " "unparsed rest : %s" "\n" ,p);
55	return EXIT_SUCCESS0;
56	} // parse2822AddrSpec
57
58
59	// convert between character sets
60	int charConv(int argc, const char *argv[])
61	{
62	if (argc<0) {
63	// help requested
64	CONSOLEPRINTF((" charconv [<input charset>] <output charset> <C-string to convert>"))SySync_ConsolePrintf(stderr, "SYSYNC " " charconv [<input charset>] <output charset> <C-string to convert>" "\n");
65	CONSOLEPRINTF((" Convert from one charset to another. Default input is UTF-8"))SySync_ConsolePrintf(stderr, "SYSYNC " " Convert from one charset to another. Default input is UTF-8" "\n");
66	return EXIT_SUCCESS0;
67	}
68
69	#ifdef __TEST_EQUALITY_OF_CP936_WITH_GB2312__
70	// quick test
71	uInt32 ch_in;
72	for (ch_in=0x8100; ch_in<=0xFFFF; ch_in++) {
73	// convert into internal UTF-8
74	string s_internal,s_in;
75	s_in.erase();
76	if (ch_in>=0x8100) s_in+=(ch_in >> 8) & 0xFF;
77	s_in+=(ch_in & 0xFF);
78	s_internal.erase();
79	appendStringAsUTF8(
80	s_in.c_str(),
81	s_internal,
82	chs_gb2312
83	);
84	// convert into output format
85	string s_out;
86	s_out.erase();
87	appendUTF8ToString(
88	s_internal.c_str(),
89	s_out,
90	chs_cp936
91	);
92	// show differences
93	if (s_in!=s_out && s_out.size()>0 && s_out[0]!=INCONVERTIBLE_PLACEHOLDER'_') {
94	string s1,s2;
95	s1.erase(); StrToCStrAppend(s_in.c_str(), s1);
96	s2.erase(); StrToCStrAppend(s_out.c_str(), s2);
97	CONSOLEPRINTF(("\"%s\" != \"%s\"",s1.c_str(),s2.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "\"%s\" != \"%s\"" "\n" ,s1.c_str(),s2.c_str());
98	}
99	}
100	return EXIT_SUCCESS0;
101	#endif
102
103	// check for argument
104	if (argc<2) {
105	CONSOLEPRINTF(("2 or 3 arguments required"))SySync_ConsolePrintf(stderr, "SYSYNC " "2 or 3 arguments required" "\n");
106	return EXIT_FAILURE1;
107	}
108	int ochsarg=1;
109	sInt16 enu;
110	// get input charset
111	TCharSets charset_in=chs_utf8;
112	if (argc==3) {
113	// first arg is input charset
114	if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[0])) {
115	CONSOLEPRINTF(("'%s' is not a valid input charset name",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid input charset name" "\n",argv[0]);
116	return EXIT_FAILURE1;
117	}
118	charset_in = (TCharSets)enu;
119	}
120	else {
121	ochsarg=0; // first arg ist input charset
122	}
123	// get output charset
124	TCharSets charset_out;
125	if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[ochsarg])) {
126	CONSOLEPRINTF(("'%s' is not a valid output charset name",argv[ochsarg]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid output charset name" "\n",argv[ochsarg]);
127	return EXIT_FAILURE1;
128	}
129	charset_out = (TCharSets)enu;
130	// get string to convert
131	string s_in;
132	s_in.erase();
133	CStrToStrAppend(argv[ochsarg+1], s_in);
134	// convert into internal UTF-8
135	string s_internal;
136	s_internal.erase();
137	appendStringAsUTF8(
138	s_in.c_str(),
139	s_internal,
140	charset_in
141	);
142	// convert into output format
143	string s_out;
144	s_out.erase();
145	appendUTF8ToString(
146	s_internal.c_str(),
147	s_out,
148	charset_out
149	);
150	// show all three
151	string show;
152	// - input
153	show.erase(); StrToCStrAppend(s_in.c_str(), show);
154	CONSOLEPRINTF(("Input : %-20s = \"%s\"",DBCharSetNames[charset_in], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %-20s = \"%s\"" "\n",DBCharSetNames[charset_in], show.c_str());
155	// - internal UTF8
156	show.erase(); StrToCStrAppend(s_internal.c_str(), show);
157	CONSOLEPRINTF(("Internal : %-20s = \"%s\"",DBCharSetNames[chs_utf8], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Internal : %-20s = \"%s\"" "\n",DBCharSetNames[chs_utf8], show.c_str());
158	// - output
159	show.erase(); StrToCStrAppend(s_out.c_str(), show);
160	CONSOLEPRINTF(("Output : %-20s = \"%s\"",DBCharSetNames[charset_out], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Output : %-20s = \"%s\"" "\n",DBCharSetNames[charset_out], show.c_str());
161	return EXIT_SUCCESS0;
162	} // charConv
163
164	#endif // SYSYNC_TOOL
165
166
167	// conversion table from ANSI 0x80..0x9F to UCS4
168	const uInt32 Ansi_80_to_9F_to_UCS4[0x20] = {
169	0x20AC, 0 ,0x201A,0x0192, 0x201E,0x2026,0x2020,0x2021, // 0x80..0x87
170	0x02C6,0x2030,0x0160,0x2039, 0x0152, 0 ,0x017D, 0 , // 0x88..0x8F
171	0 ,0x2018,0x2019,0x201C, 0x201D,0x2022,0x2013,0x2014, // 0x90..0x97
172	0x02DC,0x2122,0x0161,0x203A, 0x0153, 0 ,0x017E,0x0178 // 0x98..0x9F
173	};
174
175	// line end mode names
176	const char * const lineEndModeNames[numLineEndModes] = {
177	"none", // none specified
178	"unix", // 0x0A
179	"mac", // 0x0D
180	"dos", // 0x0D 0x0A
181	"cstr", // as in C strings, '\n' which is 0x0A normally (but might be 0x0D on some platforms)
182	"filemaker" // 0x0B (filemaker tab-separated text format, CR is shown as 0x0B within fields
183	};
184
185
186
187	// literal quoting mode names
188	const char * const quotingModeNames[numQuotingModes] = {
189	"none", // none specified
190	"singlequote", // single quote must be duplicated
191	"doublequote", // double quote must be duplicated
192	"backslash" // C-string-style escapes of CR,LF,TAB,BS,\," and ' (but no full c-string escape with \xXX etc.)
193	};
194
195
196	// Encoding format names for SyncML
197	const char * const encodingFmtSyncMLNames[numFmtTypes] = {
198	"chr", // plain chars
199	"bin", // binary
200	"b64" // base 64 encoding
201	};
202	// Encoding format names for user
203	const char * const encodingFmtNames[numFmtTypes] = {
204	"plain-text", // no encoding (plain text)
205	"binary", // plain binary (in WBXML only)
206	"base64" // base 64 encoding
207	};
208
209
210	// field (property) data type names
211	const char * const propDataTypeNames[numPropDataTypes] = {
212	"chr", // Character
213	"int", // Integer
214	"bool", // Boolean
215	"bin", // Binary
216	"datetime", // Date and time of day
217	"phonenum", // Phone number
218	"text", // plain text
219	"???" // unknown
220	};
221
222
223	// Auth type names
224	const char * const authTypeSyncMLNames[numAuthTypes] = {
225	NULL__null, // no authorisation
226	"syncml:auth-basic", // basic (B64 encoded user pw string)
227	"syncml:auth-md5" // Md5 encoded user:pw:nonce
228	};
229
230
231	// MIME encoding types
232	const char * const MIMEEncodingNames[numMIMEencodings] = {
233	"",
234	"7BIT",
235	"8BIT",
236	"BINARY",
237	"QUOTED-PRINTABLE",
238	"BASE64",
239	"B"
240	};
241
242	// Charset names for MIME based strings
243	const char * const MIMECharSetNames[numCharSets] = {
244	"unknown",
245	"US-ASCII",
246	"ANSI",
247	"ISO-8859-1",
248	"UTF-8",
249	"UTF-16",
250	#ifdef CHINESE_SUPPORT
251	"GB2312",
252	"CP936",
253	#endif
254	};
255
256
257	#ifdef SYSYNC_ENGINE1
258	// generate RFC2822-style address specificiation
259	// - Common Name will be quoted
260	// - recipient will be put in angle brackets
261	void makeRFC2822AddrSpec(
262	cAppCharP aCommonName,
263	cAppCharP aRecipient,
264	string &aRFCAddr
265	)
266	{
267	if (aCommonName && *aCommonName) {
268	aRFCAddr='"';
269	while (*aCommonName) {
270	if (*aCommonName=='"') aRFCAddr += "\\\"";
271	else aRFCAddr += *aCommonName;
272	aCommonName++;
273	}
274	aRFCAddr+="\" <";
275	aRFCAddr+=aRecipient;
276	aRFCAddr+=">";
277	}
278	else {
279	// plain email address
280	aRFCAddr=aRecipient;
281	}
282	} // makeRFC2822AddrSpec
283
284
285
286
287	// sysytool -f syncserv_odbc.xml addrparse "(Lukas Peter) luz@synthesis.ch (Zeller), gaga"
288
289	// Parse RFC2822-style address specificiation
290	// - aName will receive name and all (possible) comments
291	// - aRecipient will receive the (first, in case of a group) email address
292	cAppCharP parseRFC2822AddrSpec(
293	cAppCharP aText,
294	string &aName,
295	string &aRecipient
296	)
297	{
298	const char *p;
299	char c;
300
301	enum {
302	pstate_sepspace,
303	pstate_trailing,
304	pstate_text,
305	pstate_comment,
306	pstate_quoted,
307	pstate_email
308	} pstate = pstate_trailing;
309	string text,groupname;
310	bool textcouldbeemail=true;
311	bool atfound=false;
312	aName.erase();
313	aRecipient.erase();
314	p=aText;
315	do {
316	c=*p;
317	// check end of input
318	if (c==0) break; // done with the string
319	// advance to next char
320	p++;
321	// check according to state
322	switch (pstate) {
323	case pstate_sepspace:
324	if (c==' ') {
325	aName+=c;
326	}
327	pstate=pstate_trailing;
328	// otherwise treat like trailing
329	case pstate_trailing:
330	textcouldbeemail=aRecipient.empty();
331	atfound=false;
332	// skip trailing WSP first
333	if (c==' ' \|\| c=='\t' \|\| c=='\n' \|\| c=='\r') break; // simply ignore WSP in trailing mode
334	else pstate=pstate_text;
335	// fall trough to do text analysis
336	case pstate_text:
337	// now check specials
338	if (c==',') { c=0; break; } // end of address, cause exit from loop, next will start after comma
339	else if (c==';') { c=0; break; } // end of group address list, treat it like single address
340	else if (c=='@' && textcouldbeemail) atfound=true; // flag presence of @
341	// check if text could still be a email address by itself
342	if (textcouldbeemail && !isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') {
343	textcouldbeemail=false;
344	if (atfound) {
345	aRecipient=text;
346	text.erase();
347	}
348	atfound=false;
349	}
350	// now check other specials
351	if (c=='"') { pstate=pstate_quoted; } // start of quoted string
352	else if (c=='(') { pstate=pstate_comment; } // start of comment
353	else if (c=='<') { aRecipient.erase(); pstate=pstate_email; } // start of angle-addr, overrides other recipient texts
354	else if (c==':') {
355	groupname=aRecipient; // what we've probably parsed as recipient
356	groupname+=aName; // plus name so far
357	groupname+=text; // plus additional text
358	text.erase();
359	aName.erase();
360	aRecipient.erase();
361	pstate=pstate_trailing;
362	} // flag presence of a group name (which can be used as name if addr itself does not have one)
363	else {
364	// add other text chars to the text
365	text += c;
366	}
367	break;
368	case pstate_quoted:
369	if (c=='\\') {
370	if (p) c=p++; else break; // get next char (if any) and add to result untested
371	}
372	else if (c=='"') {
373	// end of quoted string
374	pstate=pstate_sepspace;
375	aName+=text;
376	text.erase();
377	break;
378	}
379	// add to text
380	text += c;
381	break;
382	case pstate_comment:
383	if (c==')') {
384	// end of comment
385	aName+=text;
386	text.erase();
387	pstate=pstate_sepspace;
388	break;
389	}
390	// add to text
391	text += c;
392	break;
393	case pstate_email:
394	if (!isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') {
395	// any non-email char terminates email, not only '>', but only '>' is swallowed
396	if (c!='>') p--; // re-evaluate char in next state
397	pstate=pstate_sepspace;
398	break;
399	}
400	// add to email
401	aRecipient += c;
402	break;
403	} // switch
404	} while (c!=0);
405	// handle case of pure email address without name and without < > brackets or :
406	if (aRecipient.empty() && textcouldbeemail && atfound)
407	aRecipient = text;
408	else
409	aName += text;
410	// if name is (now) empty, but we have a group name, use the group name
411	if (aName.empty()) aName=groupname;
412	// remove trailing spaces in aName
413	string::size_type n=aName.find_last_not_of(' ');
414	if (n!=string::npos) aName.resize(n+1);
415	// return where to continue parsing for next addr-spec (if not end of string)
416	return p;
417	} // parseRFC2822AddrSpec
418
419
420
421	// append internal UTF8 string as RFC2047 style encoding
422	const char *appendUTF8AsRFC2047(
423	const char *aText,
424	string &aString
425	)
426	{
427	const char p,q,*r;
428	char c;
429
430	p=aText;
431	do {
432	q=p; // remember start
433	// find chars until next char that must be stored as encoded word
434	do {
435	c=*p;
436	if (c==0 \|\| (c & 0x80) \|\| (c=='=' && *(p+1)=='?')) break;
437	p++;
438	} while(true);
439	// copy chars outside encoded word directly
440	if (p-q>0) aString.append(q,p-q);
441	// check if end of string
442	if (c==0) break;
443	// pack some chars into encoded word
444	// - start word
445	aString.append("=?utf-8?B?"); // 10 chars start (+ 2 chars will be added at end)
446	// - encoded data must be 75-12=63 chars or less
447	// Using B (=b64) encoding, output of 63 chars = 63/4*3 = max 47 chars.
448	// We use 45 max, as this is evenly divisible by 3 and output is 60 chars
449	q=p;
450	while (true) {
451	// find next space
452	while (q && !isspace(q) && q-p<45) q++;
453	if (q-p>=45) break; // abort if exhausted already
454	// find next non-space
455	r=q;
456	while (isspace(*r)) r++;
457	// check if next non-space will start a new word
458	if (*r & 0x80) {
459	// we should include the next word as well, if possible without exceeding size
460	if (r-p<45) {
461	q=r;
462	continue;
463	}
464	}
465	break;
466	}
467	// encode binary stream and append to string
468	appendEncoded((const uInt8 *)p,q-p,aString,enc_b);
469	p=q;
470	// - end word
471	aString.append("?=");
472	} while (true);
473	return p;
474	} // appendUTF8AsRFC2047
475
476
477	// parse character string from RFC2047 style encoding to UTF8 internal string
478	const char *appendRFC2047AsUTF8(
479	const char *aRFC2047,
480	stringSize aSize,
481	string &aString,
482	TLineEndModes aLEM
483	)
484	{
485	const char p,q,r,w;
486	char c = 0;
487	const char *eot = aRFC2047+aSize;
488
489	p=aRFC2047;
490	w=NULL__null; // start of last detected word (to avoid re-scanning)
491	while (p<eot) {
492	q=p; // remember start
493	// find chars until next encoded word
494	while (p<eot) {
495	c=*p;
496	if (c==0 \|\| (p!=w && c=='=' && *(p+1)=='?')) break;
497	p++;
498	}
499	// copy chars outside encoded word directly
500	aString.append(q,p-q);
501	// check if end of string
502	if (p>=eot \|\| c==0) break;
503	// try to parse encoded word
504	q=p+2;
505	scanword:
506	// q is now where we start to parse word contents
507	// p is where we would re-start reading normally if current word turns out not to be a word at all
508	// - remember start of word scan (to avoid re-scanning it)
509	w=p;
510	// - get charset
511	r=q;
512	while (q<eot && q!='?' && isgraph(q)) q++;
513	if (q>=eot \|\| *q!='?') continue; // is not an encoded word, parse normally
514	sInt16 en;
515	TCharSets charset=chs_unknown;
516	if (StrToEnum(MIMECharSetNames, numCharSets, en, r, q-r)) charset=(TCharSets)en;
517	// - get encoding
518	r=++q; // continue after ? separator
519	while (q<eot && q!='?' && isgraph(q)) q++;
520	if (q>=eot \|\| *q!='?') continue; // is not an encoded word, parse normally
521	TEncodingTypes encoding=enc_8bit;
522	if (StrToEnum(MIMEEncodingNames, numMIMEencodings, en, r, q-r)) encoding=(TEncodingTypes)en;
523	// - get data part
524	r=++q;
525	while (q+1<eot && q && q!=' ' && !(q=='?' && (q+1)=='=')) q++;
526	if (q>=eot \|\| *q!='?') continue; // is not an encoded word, parse normally
527	// - decode
528	string decoded;
529	appendDecoded(r,q-r,decoded,encoding);
530	// - convert to UTF-8
531	appendStringAsUTF8(
532	decoded.c_str(),
533	aString,
534	charset,
535	aLEM
536	);
537	// - skip word terminator
538	p=q+2;
539	// - check for special case of adjacent words
540	q=p;
541	while (q<eot && isspace(*q)) q++;
542	if (q+1<eot && q>p && q=='=' && (q+1)=='?') {
543	// adjacent encoded words, only separated by space -> ignore space
544	// p is after previous word
545	q+=2;
546	// q is after lead-in of next word
547	goto scanword;
548	}
549	// p is where we continue reading
550	}
551	return p;
552	} // appendRFC2047AsUTF8
553
554
555	// decode encoded data and append to string
556	const char *appendDecoded(
557	const char *aText,
558	size_t aSize,
559	string &aBinString,
560	TEncodingTypes aEncoding
561	)
562	{
563	char c;
564	const char *p=aText;
565	uInt32 binsz;
566	uInt8 *binP;
567
568	switch (aEncoding) {
569	case enc_quoted_printable :
570	// decode quoted-printable content
571	while ((c=*p++)) {
572	// char found
573	if (c=='=') {
574	uInt16 code;
575	char hex[2];
576	// check for soft break first
577	if (p=='\x0D' \|\| p=='\x0A') {
578	// soft break, swallow
579	if (*p=='\x0D') p++;
580	if (*p=='\x0A') p++;
581	continue;
582	}
583	// decode
584	hex[0]=*p;
585	if (*p) {
586	p++;
587	hex[1]=*p;
588	if (*p) {
589	p++;
590	if (HexStrToUShort(hex,code,2)==2) {
591	c=code; // decoded char
592	}
593	else continue; // simply ignore
594	}
595	else break;
596	}
597	else break;
598	}
599	// append char
600	aBinString+=c;
601	}
602	aText=p;
603	break;
604	case enc_base64:
605	case enc_b:
606	// decode base 64
607	binsz=0;
608	binP = b64::decode(aText, aSize, &binsz);
609	aBinString.append((const char *)binP,binsz);
610	b64::free(binP);
611	aText+=aSize;
612	break;
613	case enc_7bit:
614	case enc_8bit:
615	// copy no more than size
616	if (aSize>0) aBinString.reserve(aBinString.size()+aSize);
617	while (*p && aSize>0) {
618	aBinString+=*p++;
619	aSize--;
620	}
621	aText=p;
622	break;
623	case enc_none:
624	case enc_binary:
625	// copy bytes
626	aBinString.append(aText,aSize);
627	aText+=aSize;
628	break;
629	case numMIMEencodings:
630	// invalid
631	break;
632	} // quoted printable
633	return aText;
634	} // appendDecoded
635
636
637
638	// encode binary stream and append to string
639	void appendEncoded(
640	const uInt8 *aBinary,
641	size_t aSize,
642	string &aString,
643	TEncodingTypes aEncoding,
644	sInt16 aMaxLineSize,
645	sInt32 aCurrLineSize,
646	bool aSoftBreaksAsCR,
647	bool aEncodeBinary
648	)
649	{
650	char c;
651	string::size_type linestart;
652	const uInt8 *p;
653	bool softbreak;
654	uInt32 b64len;
655	char *b64;
656	bool processed;
657
658	switch (aEncoding) {
659	case enc_binary :
660	case enc_none :
661	case enc_8bit :
662	case enc_7bit : // assume we have no 8bit chars
663	// just copy 1:1
664	aString.append((const char *)aBinary,aSize);
665	break;
666	case enc_quoted_printable:
667	// quote-printable encoding
668	// - determine start of last line in aString
669	// Note: this is because property text will be folded when lines aMaxLineSize
670	linestart=aString.size()-aCurrLineSize;
671	for (p=aBinary;p<aBinary+aSize;p++) { // '\0' will not terminate the 'for' loop
672	c=*p;
673	if (!aEncodeBinary && !c) break; // still exit at NUL when not encoding real binary data
674	processed=false; // input data in c is not yet processed
675	// make sure we do not go over the limit (if one is set)
676	// - if less than 8 chars (=0D=0A + =\r) are free, soft break the line
677	softbreak= aMaxLineSize && (aString.size()-linestart>=string::size_type(aMaxLineSize)-8);
678	if (!aEncodeBinary) {
679	if (c=='\r') continue; // ignore them
680	if (c=='\b') continue; // ignore them (optional break indicators, not relevant for QP output)
681	if (c=='\n') { // - encode line ends
682	aString.append("=0D=0A"); // special string for Line Ends (CR LF)
683	processed = true; // c is processed now
684	softbreak = true;
685	} // if
686	} // if
687	// - handle soft line break (but only if really doing line breaking)
688	// Also: avoid adding a soft break at the very end of the string
689	if (softbreak && aMaxLineSize && p+1<aBinary+aSize) {
690	if (aSoftBreaksAsCR)
691	aString.append("=\r"); // '\r' signals softbreak for finalizeproperty()
692	else
693	aString.append("=\x0D\x0A"); // break line here
694	// new line starts after softbreak
695	linestart=aString.size();
696	// make sure soft line break is not followed by unencoded space
697	// (which would look like MIME folding)
698	if (c==' ' \|\| (processed && p[1]==' ')) {
699	aString.append("=20");
700	if (processed) p++; // if current char was already processed, we need to explicitly skip the space
701	processed=true; // char is now processed in any case
702	} // if
703	} // if
704	// now encode the char in c if not already processed by now
705	if (!processed) {
706	bool encodeIt=
707	(c=='=') // escape equal sign itself
708	\|\| (c=='<' && aEncodeBinary) // avoid XML mismatch problems
709	\|\| (uInt8)c>0x7F
710	\|\| (uInt8)c<0x20; // '\0' will be encoded as well
711	if (encodeIt) { // encode all non ASCII chars > 0x7F (and control chars as well)
712	aString+="=";
713	aString+=NibbleToHexDigit(c>>4);
714	aString+=NibbleToHexDigit(c);
715	}
716	else
717	aString+=c; // just copy
718	} // if
719	}
720	break;
721	case enc_base64:
722	case enc_b:
723	// use base64 encoding
724	if (aSize>0) {
725	// don't call b64 with size=0!
726	b64 = b64::encode(
727	aBinary,aSize, // what to encode
728	&b64len, // output size
729	aMaxLineSize, // max line size
730	aSoftBreaksAsCR
731	);
732	// append to output, if any
733	if (b64) {
734	aString.append(b64,b64len);
735	// release buffer
736	b64::free(b64);
737	}
738	if (aEncoding!=enc_b) {
739	// make sure it ends with a newline for "base64" (but NOT for "b" as used in RFC2047)
740	// Note: when used in vCard2.1, that newline is part of the property and show as an
741	// empty line in the vCard.
742	aString += aSoftBreaksAsCR ? "\r" : "\x0D\x0A";
743	}
744	}
745	break;
746	default:
747	// do nothing
748	break;
749	} // switch
750	} // appendEncoded
751
752
753	#ifdef CHINESE_SUPPORT
754	// the flatBinTree tables for converting to and from GB2312
755	#include "gb2312_tables_inc.cpp"
756	// the flatBinTree tables for converting to and from CP936
757	#include "cp936_tables_inc.cpp"
758	#endif
759
760
761	// add char (possibly multi-byte) as UTF8 to value and apply charset translation if needed
762	// - returns > 0 if aNumChars was not correct number of bytes needed to convert an entire character;
763	// return value is number of bytes needed to generate one output character. If return value
764	// is<>0, no char has been appended to aVal.
765	uInt16 appendCharsAsUTF8(const char *aChars, string &aVal, TCharSets aCharSet, uInt16 aNumChars)
766	{
767	uInt32 ucs4;
768	// first char
769	uInt8 c=*aChars;
770	// this is a 8-bit char
771	switch(aCharSet) {
772	case chs_utf8 :
773	// UTF8 is native charset of the application, simply add
774	aVal+=c;
775	break;
776	case chs_ansi :
777	case chs_iso_8859_1 :
778	// do poor man's conversion to UCS4
779	// - most ANSI chars are 1:1 mapped
780	ucs4 = ((uInt8)c & 0xFF);
781	// - except 0x80..0x9F, use table for these
782	if (ucs4>=0x80 && ucs4<=0x9F)
783	ucs4=Ansi_80_to_9F_to_UCS4[ucs4-0x80];
784	// - convert to UTF8
785	UCS4toUTF8(ucs4,aVal);
786	break;
787	#ifdef CHINESE_SUPPORT
788	case chs_gb2312 : // simplified Chinese GB-2312 charset
789	// all below 0x80 are passed as-is
790	if (c<0x80)
791	aVal+=c; // simply append
792	else {
793	// 16-bit GB2312 char
794	if (aNumChars!=2)
795	return 2; // we need 2 chars for a successful GB-2312
796	// we have 2 bytes, convert them
797	ucs4 = searchFlatBintree(gb2312_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_');
798	// - convert to UTF8
799	UCS4toUTF8(ucs4,aVal);
800	}
801	break;
802	case chs_cp936: // simplified chinese Windows codepage CP936
803	if (c<0x80)
804	aVal+=c; // simply append
805	else {
806	// 0x0080 (euro sign) or 2-byte CP936
807	if (c==0x80)
808	ucs4=searchFlatBintree(cp936_to_ucs2, 0x0080, INCONVERTIBLE_PLACEHOLDER'_');
809	else {
810	// 16-bit GB2312 char
811	if (aNumChars!=2)
812	return 2; // we need 2 chars for a successful CP936
813	// we have 2 bytes, convert them
814	ucs4 = searchFlatBintree(cp936_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_');
815	}
816	// - convert to UTF8
817	UCS4toUTF8(ucs4,aVal);
818	}
819	break;
820	#endif
821	case chs_ascii : // plain 7-bit ASCII
822	default : // unknown
823	// only 7-bit allowed
824	if (c & 0x80)
825	aVal+=INCONVERTIBLE_PLACEHOLDER'_';
826	else
827	aVal+=c;
828	break;
829	} // switch
830	return 0; // ok, converted aNumChars
831	} // appendCharsAsUTF8
832
833
834
835
836	// add string as UTF8 to value and apply charset translation if needed
837	// - if lineEndMode is not lem_none, all sorts of line ends will be converted
838	// to the specified mode.
839	void appendStringAsUTF8(const char *s, string &aVal, TCharSets aCharSet, TLineEndModes aLEM, bool aAllowFilemakerCR)
840	{
841	char c;
842	const char *start=s;
843	if (s) {
844	while ((c=*s++)!=0) {
845	if (aLEM!=lem_none) {
846	// line end handling enabled
847	if (c==0x0D) {
848	// could be mac (0x0D) or DOS (0x0D/0x0A)
849	if (*s==0x0A) {
850	// this is DOS-type line end
851	// - consume the 0x0A as well
852	s++;
853	// - check for 0x0D 0x0D 0x0A special case (caused by
854	// DOS-text-file conversion of non-DOS strings)
855	if (s>=start+3) {
856	if (*(s-3)==0x0D) {
857	// char before the DOS-CRLF was a 0x0D as well (and
858	// has already produced a newline in the output
859	// --> completely ignore this CRLF
860	continue;
861	}
862	}
863	}
864	// is a line end, convert it to platform-lineend
865	c='\n'; // platform
866	}
867	else if (c==0x0A) {
868	// 0x0A without preceeding 0x0D = unix
869	c='\n'; // platform
870	}
871	else if (c==0x0B && aAllowFilemakerCR) {
872	// 0x0B is used as lineend in filemaker export and achilformat
873	c='\n';
874	}
875	// line end converted to platform
876	if (c=='\n' && aLEM!=lem_cstr) {
877	// produce specified line end
878	switch (aLEM) {
879	case lem_mac : c=0x0D; break;
880	case lem_unix : c=0x0A; break;
881	case lem_filemaker : c=0x0B; break;
882	case lem_dos :
883	c=0x0A; // LF will be added later
	Value stored to 'c' is never read
884	aVal+=0x0D; // add CR
885	break;
886	default: break;
887	}
888	}
889	} // line end handling enabled
890	// normal add
891	uInt16 i,seqlen=1; // assume logical char consists of single byte
892	do {
893	seqlen=appendCharsAsUTF8(s-seqlen,aVal,aCharSet,seqlen); // add char (possibly with UTF8 expansion) to aVal
894	if (seqlen<=1) break; // done
895	for (i=1;i<seqlen;i++) { if (*s==0) break; else s++; }
896	if (i<seqlen) break; // not enough bytes
897	} while(true);
898	}
899	}
900	} // appendStringAsUTF8
901
902
903
904	// same as appendUTF8ToString, but output string is cleared first
905	bool storeUTF8ToString(
906	cAppCharP aUTF8, string &aVal,
907	TCharSets aCharSet,
908	TLineEndModes aLEM,
909	TQuotingModes aQuotingMode,
910	size_t aMaxBytes
911	)
912	{
913	aVal.erase();
914	return appendUTF8ToString(aUTF8,aVal,aCharSet,aLEM,aQuotingMode,aMaxBytes);
915	} // storeUTF8ToString
916
917
918
919	// helper for adding chars
920	static void appendCharToString(
921	char c,
922	string &aVal,
923	TQuotingModes aQuotingMode
924	) {
925	if (aQuotingMode==qm_none) {
926	aVal+=c;
927	}
928	else if (aQuotingMode==qm_backslash) {
929	// treat CR, LF, BS, TAB, single/doublequote and backslash specially
930	if (c==0x0D)
931	aVal+="\\r";
932	else if (c==0x0A)
933	aVal+="\\n";
934	else if (c==0x08)
935	aVal+="\\b";
936	else if (c==0x09)
937	aVal+="\\t";
938	else if (c=='"')
939	aVal+="\\\"";
940	else if (c=='\'')
941	aVal+="\\'";
942	else if (c=='\\')
943	aVal+="\\\\";
944	else
945	aVal+=c;
946	}
947	else if (aQuotingMode==qm_duplsingle) {
948	if (c=='\'') aVal+=c; // duplicate
949	aVal+=c; // normal append
950	}
951	else if (aQuotingMode==qm_dupldouble) {
952	if (c=='"') aVal+=c; // duplicate
953	aVal+=c; // normal append
954	}
955	} // appendCharToString
956
957
958	// add UTF8 string to value in custom charset
959	// - if aLEM is not lem_none, occurrence of any type of Linefeeds
960	// (LF,CR,CRLF and even CRCRLF) in input string will be
961	// replaced by the specified line end type
962	// - aQuotingMode specifies what quoting (for ODBC literals for example) should be used
963	// - output is clipped after aMaxBytes bytes (if not 0)
964	// - returns true if all input could be converted, false if output is clipped
965	bool appendUTF8ToString(
966	cAppCharP aUTF8,
967	string &aVal,
968	TCharSets aCharSet,
969	TLineEndModes aLEM,
970	TQuotingModes aQuotingMode,
971	size_t aMaxBytes
972	)
973	{
974	uInt32 ucs4;
975	uInt8 c;
976	size_t n=0;
977	cAppCharP p=aUTF8;
978	cAppCharP start=aUTF8;
979
980	if (!aUTF8) return true; // nothing to copy, copied everything of that!
981	if (aCharSet==chs_utf8 && aLEM==lem_none && aQuotingMode==qm_none) {
982	// shortcut: simply append entire string
983	if (aMaxBytes==0)
984	aVal+=aUTF8;
985	else
986	aVal.append(aUTF8,aMaxBytes);
987	// advance "processed" pointer behind consumed part of string
988	p=aUTF8+aVal.size();
989	}
990	else {
991	// process char by char
992	while((c=*aUTF8)!=0 && (aMaxBytes==0 \|\| n<aMaxBytes)) {
993	p=aUTF8;
994	// check for linefeed conversion
995	if (aLEM!=lem_none && (c==0x0D \|\| c==0x0A)) {
996	aUTF8++;
997	// line end, handling enabled
998	if (c==0x0D) {
999	// could be mac (0x0D) or DOS (0x0D/0x0A)
1000	if (*aUTF8==0x0A) {
1001	// this is DOS-type line end
1002	// - consume the 0x0A as well
1003	aUTF8++;
1004	// - check for 0x0D 0x0D 0x0A special case (caused by
1005	// DOS-text-file conversion of non-DOS strings)
1006	if (aUTF8>=start+3) {
1007	if (*(aUTF8-3)==0x0D) {
1008	// char before the DOS-CRLF was a 0x0D as well (and
1009	// has already produced a newline in the output
1010	// --> completely ignore this CRLF
1011	continue;
1012	}
1013	}
1014	}
1015	// is a line end, convert it to platform-lineend
1016	c='\n'; // platform
1017	}
1018	else { // must be 0x0A
1019	// 0x0A without preceeding 0x0D = unix
1020	c='\n'; // platform
1021	}
1022	// line end converted to platform
1023	if (aLEM!=lem_cstr) {
1024	// produce specified line end
1025	switch (aLEM) {
1026	case lem_mac : c=0x0D; break;
1027	case lem_filemaker : c=0x0B; break;
1028	case lem_unix : c=0x0A; break;
1029	case lem_dos :
1030	c=0x0A; // LF will be added later
1031	n++; // count it extra
1032	if (aMaxBytes && n>=aMaxBytes)
1033	goto stringfull; // no room to complete it, ignore it
1034	appendCharToString(0x0D,aVal,aQuotingMode);
1035	break;
1036	default: break;
1037	}
1038	}
1039	appendCharToString(c,aVal,aQuotingMode);
1040	n++; // count it
1041	} // line end, handling enabled
1042	else {
1043	// non lineend (or lineend not handled specially)
1044	if (aCharSet==chs_utf8) {
1045	aUTF8++;
1046	// - simply add char
1047	appendCharToString(c,aVal,aQuotingMode);
1048	n++;
1049	}
1050	else {
1051	// - make UCS4
1052	p=aUTF8; // save previous position to detect if we have processed all
1053	aUTF8=UTF8toUCS4(aUTF8,ucs4);
1054	// now we have UCS4
1055	if (ucs4==0) {
1056	// UTF8 resulting in UCS4 null char is not allowed
1057	ucs4=INCONVERTIBLE_PLACEHOLDER'_';
1058	}
1059	else {
1060	// convert to specified charset
1061	switch (aCharSet) {
1062	case chs_ansi:
1063	case chs_iso_8859_1:
1064	if ((ucs4<=0xFF && ucs4>=0xA0) \|\| ucs4<0x80)
1065	// 00..7F and A0..FF directly map to ANSI
1066	appendCharToString(ucs4,aVal,aQuotingMode);
1067	else {
1068	// search for matching ANSI in table
1069	uInt8 k;
1070	for (k=0; k<0x20; k++) {
1071	if (ucs4==Ansi_80_to_9F_to_UCS4[k]) {
1072	// found in table
1073	break;
1074	}
1075	}
1076	if (k<0x20)
1077	// conversion found
1078	aVal+=k+0x80;
1079	else
1080	// no conversion found in table
1081	aVal+=INCONVERTIBLE_PLACEHOLDER'_';
1082	} // not in 1:1 range 0..7F, A0..FF
1083	n++;
1084	break;
1085	#ifdef CHINESE_SUPPORT
1086	case chs_gb2312 : // simplified Chinese GB-2312 charset
1087	// all below 0x80 are passed as-is
1088	if (ucs4<0x80) {
1089	appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes
1090	n++;
1091	}
1092	else {
1093	// convert to 16-bit GB2312 char
1094	uInt16 gb = searchFlatBintree(ucs2_to_gb2312, ucs4, INCONVERTIBLE_PLACEHOLDER'_');
1095	// check if we have space
1096	if (aMaxBytes!=0 && n+2>aMaxBytes)
1097	goto stringfull;
1098	// append as two bytes to output string
1099	aVal+=gb >> 8;
1100	aVal+=gb & 0xFF;
1101	n+=2;
1102	}
1103	break;
1104	case chs_cp936 : // simplified Chinese CP936 windows codepage
1105	// all below 0x80 are passed as-is
1106	if (ucs4<0x80) {
1107	appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes
1108	n++;
1109	}
1110	else {
1111	// convert to CP936 16-bit representation
1112	uInt16 twobytes = searchFlatBintree(ucs2_to_cp936, ucs4, INCONVERTIBLE_PLACEHOLDER'_');
1113	// append as two bytes to output string, but only this is a CP936 two-byte at all
1114	if (twobytes>0x0080) {
1115	// check if we have space
1116	if (aMaxBytes!=0 && n+2>aMaxBytes)
1117	goto stringfull;
1118	aVal+=twobytes >> 8; // sub-page lead in
1119	n++;
1120	}
1121	aVal+=twobytes & 0xFF; // sub-page code
1122	n++;
1123	}
1124	break;
1125	#endif
1126	case chs_ascii:
1127	// explicit ASCII: convert some special chars to plain ASCII
1128	if ((ucs4 & 0xFFFFFF80) !=0) {
1129	// ASCIIfy table to convert umlauts etc. to nearest plain ASCII
1130	typedef struct {
1131	uInt32 ucs4;
1132	uInt8 ascii;
1133	} TASCIIfyEntry;
1134
1135	static const TASCIIfyEntry ASCIIfyTable[] = {
1136	{ 0x000000C4, 'A' }, // Adieresis
1137	{ 0x000000C5, 'A' }, // Aring
1138	{ 0x000000C7, 'C' }, // Ccedilla
1139	{ 0x000000C9, 'E' }, // Eacute
1140	{ 0x000000D1, 'N' }, // Ntilde
1141	{ 0x000000D6, 'O' }, // Odieresis
1142	{ 0x000000DC, 'U' }, // Udieresis
1143	{ 0x000000E1, 'a' }, // aacute
1144	{ 0x000000E0, 'a' }, // agrave
1145	{ 0x000000E2, 'a' }, // acircumflex
1146	{ 0x000000E4, 'a' }, // adieresis
1147	{ 0x000000E3, 'a' }, // atilde
1148	{ 0x000000E5, 'a' }, // aring
1149	{ 0x000000E7, 'c' }, // ccedilla
1150	{ 0x000000E9, 'e' }, // eacute
1151	{ 0x000000E8, 'e' }, // egrave
1152	{ 0x000000EA, 'e' }, // ecircumflex
1153	{ 0x000000EB, 'e' }, // edieresis
1154	{ 0x000000ED, 'i' }, // iacute
1155	{ 0x000000EC, 'i' }, // igrave
1156	{ 0x000000EE, 'i' }, // icircumflex
1157	{ 0x000000EF, 'i' }, // idieresis
1158	{ 0x000000F1, 'n' }, // ntilde
1159	{ 0x000000F3, 'o' }, // oacute
1160	{ 0x000000F2, 'o' }, // ograve
1161	{ 0x000000F4, 'o' }, // ocircumflex
1162	{ 0x000000F6, 'o' }, // odieresis
1163	{ 0x000000F5, 'o' }, // otilde
1164	{ 0x000000FA, 'u' }, // uacute
1165	{ 0x000000F9, 'u' }, // ugrave
1166	{ 0x000000FB, 'u' }, // ucircumflex
1167	{ 0x000000FC, 'u' }, // udieresis
1168	{ 0x000000DF, 's' }, // germandoubles
1169	{ 0x000000D8, 'O' }, // Oslash
1170	{ 0x000000F8, 'o' }, // oslash
1171	{ 0x000000C0, 'A' }, // Agrave
1172	{ 0x000000C3, 'A' }, // Atilde
1173	{ 0x000000D5, 'O' }, // Otilde
1174	{ 0x00000152, 'O' }, // OE
1175	{ 0x00000153, 'o' }, // oe
1176	{ 0x000000C6, 'A' }, // AE
1177	{ 0x000000E6, 'a' }, // ae
1178	{ 0x000000C2, 'A' }, // Acircumflex
1179	{ 0x000000CA, 'E' }, // Ecircumflex
1180	{ 0x000000C1, 'A' }, // Aacute
1181	{ 0x000000CB, 'E' }, // Edieresis
1182	{ 0x000000C8, 'E' }, // Egrave
1183	{ 0x000000CD, 'I' }, // Iacute
1184	{ 0x000000CC, 'I' }, // Igrave
1185	{ 0x000000CE, 'i' }, // Icircumflex
1186	{ 0x000000CF, 'i' }, // Odieresis
1187	{ 0x000000D3, 'O' }, // Oacute
1188	{ 0x000000D2, 'O' }, // Ograve
1189	{ 0x000000D4, 'O' }, // Ocircumflex
1190	// terminator
1191	{ 0,0 }
1192	};
1193
1194	// search in ASCIIfy table
1195	uInt16 k=0;
1196	while (ASCIIfyTable[k].ucs4!=0) {
1197	if (ucs4==ASCIIfyTable[k].ucs4) {
1198	// found, fetch ASCII-equivalent
1199	ucs4=ASCIIfyTable[k].ascii;
1200	break; // use it
1201	}
1202	k++;
1203	}
1204	}
1205	// fall through to default, which does not know ANY non-ASCII
1206	default:
1207	// only 7 bit ASCII is allowed
1208	if ((ucs4 & 0xFFFFFF80) !=0)
1209	aVal+=INCONVERTIBLE_PLACEHOLDER'_';
1210	else
1211	appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes
1212	n++;
1213	break;
1214	} // switch
1215	} // valid UCS4
1216	} // not already UTF8
1217	} // if not lineend
1218	// processed until here
1219	p=aUTF8;
1220	} // while not end of input string
1221	} // not already UTF8
1222	// return true if input string completely consumed
1223	stringfull:
1224	return (*p==0);
1225	} // appendUTF8ToString
1226
1227
1228	// convert UTF8 to UCS4
1229	// - returns pointer to next char
1230	// - returns UCS4=0 on error (no char, bad sequence, sequence not complete)
1231	const char UTF8toUCS4(const char aUTF8, uInt32 &aUCS4)
1232	{
1233	uInt8 c;
1234	sInt16 morechars;
1235
1236	if ((c=*aUTF8)!=0) {
1237	aUTF8++;
1238	// there is a char
1239	morechars=0;
1240	// decode UTF8 lead-in
1241	if ((c & 0x80) == 0) {
1242	// single byte
1243	aUCS4=c;
1244	morechars=0;
1245	}
1246	else if ((c & 0xE0) == 0xC0) {
1247	// two bytes
1248	aUCS4=c & 0x1F;
1249	morechars=1;
1250	}
1251	else if ((c & 0xF0) == 0xE0) {
1252	aUCS4=c & 0x0F;
1253	morechars=2;
1254	}
1255	else if ((c & 0xF8) == 0xF0) {
1256	aUCS4=c & 0x07;
1257	morechars=3;
1258	}
1259	else if ((c & 0xFC) == 0xF8) {
1260	aUCS4=c & 0x03;
1261	morechars=4;
1262	}
1263	else if ((c & 0xFE) == 0xFC) {
1264	aUCS4=c & 0x01;
1265	morechars=5;
1266	}
1267	else {
1268	// bad char
1269	aUCS4=0;
1270	}
1271	// process additional chars
1272	while(morechars--) {
1273	if ((c=*aUTF8)==0) {
1274	// unfinished sequence
1275	aUCS4=0;
1276	break;
1277	}
1278	aUTF8++;
1279	if ((c & 0xC0) != 0x80) {
1280	// bad additional char
1281	aUCS4=0;
1282	break;
1283	}
1284	// each additional char adds 6 new bits
1285	aUCS4 = aUCS4 << 6; // shift existing bits
1286	aUCS4 \|= (c & 0x3F); // add new bits
1287	}
1288	}
1289	else {
1290	// no char
1291	aUCS4=0;
1292	}
1293	// return pointer to next char
1294	return aUTF8;
1295	} // UTF8toUCS4
1296
1297
1298	// convert UCS4 to UTF8 (0 char is not allowed and will be ignored!)
1299	void UCS4toUTF8(uInt32 aUCS4, string &aUTF8)
1300	{
1301	uInt8 c;
1302
1303	// ignore null char
1304	if (aUCS4==0) return;
1305	// create UTF8 lead-in
1306	sInt16 morechars=0;
1307	if (aUCS4<0x00000080) {
1308	// one byte
1309	c=aUCS4;
1310	}
1311	else if (aUCS4<0x00000800) {
1312	// two bytes
1313	c=0xC0 \| ((aUCS4 >> 6) & 0x1F);
1314	morechars=1;
1315	}
1316	else if (aUCS4<0x00010000) {
1317	// three bytes
1318	c=0xE0 \| ((aUCS4 >> 12) & 0x0F);
1319	morechars=2;
1320	}
1321	else if (aUCS4<0x00200000) {
1322	// four bytes
1323	c=0xF0 \| ((aUCS4 >> 18) & 0x07);
1324	morechars=3;
1325	}
1326	else if (aUCS4<0x04000000) {
1327	// five bytes
1328	c=0xF8 \| ((aUCS4 >> 24) & 0x03);
1329	morechars=4;
1330	}
1331	else {
1332	// six bytes
1333	c=0xFC \| ((aUCS4 >> 30) & 0x01);
1334	morechars=5;
1335	}
1336	// add lead-in
1337	aUTF8+=c;
1338	// add rest of sequence
1339	while (morechars--) {
1340	c= 0x80 \| ((aUCS4 >> (morechars * 6)) & 0x3F);
1341	aUTF8+=c;
1342	}
1343	} // UCS4toUTF8
1344
1345
1346	/* Encoding UTF-16 (excerpt from RFC 2781, paragraph 2.1)
1347
1348	Encoding of a single character from an ISO 10646 character value to
1349	UTF-16 proceeds as follows. Let U be the character number, no greater
1350	than 0x10FFFF.
1351
1352	1) If U < 0x10000, encode U as a 16-bit unsigned integer and
1353	terminate.
1354
1355	2) Let U' = U - 0x10000. Because U is less than or equal to 0x10FFFF,
1356	U' must be less than or equal to 0xFFFFF. That is, U' can be
1357	represented in 20 bits.
1358
1359	3) Initialize two 16-bit unsigned integers, W1 and W2, to 0xD800 and
1360	0xDC00, respectively. These integers each have 10 bits free to
1361	encode the character value, for a total of 20 bits.
1362
1363	4) Assign the 10 high-order bits of the 20-bit U' to the 10 low-order
1364	bits of W1 and the 10 low-order bits of U' to the 10 low-order
1365	bits of W2. Terminate.
1366
1367	Graphically, steps 2 through 4 look like:
1368	U' = yyyyyyyyyyxxxxxxxxxx
1369	W1 = 110110yyyyyyyyyy
1370	W2 = 110111xxxxxxxxxx
1371	*/
1372
1373	// convert UCS4 to UTF-16
1374	// - returns 0 for UNICODE range UCS4 and first word of UTF-16 for non UNICODE
1375	uInt16 UCS4toUTF16(uInt32 aUCS4, uInt16 &aUTF16)
1376	{
1377	if (aUCS4<0x10000) {
1378	// in unicode range: single UNICODE char
1379	aUTF16=aUCS4;
1380	return 0; // no second char
1381	}
1382	else {
1383	// out of UNICODE range
1384	aUCS4-=0x10000;
1385	if (aUCS4>0xFFFF) {
1386	// inconvertible
1387	aUTF16=INCONVERTIBLE_PLACEHOLDER'_';
1388	return 0;
1389	}
1390	else {
1391	// convert to two-word UNICODE / UCS-2
1392	aUTF16=0xD800+(aUCS4>>10);
1393	return 0xDC00+(aUCS4 & 0x03FF);
1394	}
1395	}
1396	} // UCS4toUTF16
1397
1398
1399
1400	/* Decoding UTF-16
1401
1402	Decoding of a single character from UTF-16 to an ISO 10646 character
1403	value proceeds as follows. Let W1 be the next 16-bit integer in the
1404	sequence of integers representing the text. Let W2 be the (eventual)
1405	next integer following W1.
1406
1407	1) If W1 < 0xD800 or W1 > 0xDFFF, the character value U is the value
1408	of W1. Terminate.
1409
1410	2) Determine if W1 is between 0xD800 and 0xDBFF. If not, the sequence
1411	is in error and no valid character can be obtained using W1.
1412	Terminate.
1413
1414	3) If there is no W2 (that is, the sequence ends with W1), or if W2
1415	is not between 0xDC00 and 0xDFFF, the sequence is in error.
1416	Terminate.
1417
1418	4) Construct a 20-bit unsigned integer U', taking the 10 low-order
1419	bits of W1 as its 10 high-order bits and the 10 low-order bits of
1420	W2 as its 10 low-order bits.
1421
1422	5) Add 0x10000 to U' to obtain the character value U. Terminate.
1423
1424	Note that steps 2 and 3 indicate errors. Error recovery is not
1425	specified by this document. When terminating with an error in steps 2
1426	and 3, it may be wise to set U to the value of W1 to help the caller
1427	diagnose the error and not lose information. Also note that a string
1428	decoding algorithm, as opposed to the single-character decoding
1429	described above, need not terminate upon detection of an error, if
1430	proper error reporting and/or recovery is provided.
1431
1432	*/
1433
1434	// convert UTF-16 to UCS4
1435	// - returns pointer to next char
1436	// - returns UCS4=0 on error (no char, bad sequence, sequence not complete)
1437	const uInt16 UTF16toUCS4(const uInt16 aUTF16P, uInt32 &aUCS4)
1438	{
1439	uInt16 utf16=*aUTF16P++;
1440
1441	if (utf16<0xD800 \|\| utf16>0xDFFF) {
1442	// single char unicode
1443	aUCS4=utf16;
1444	}
1445	else {
1446	// could be two-char
1447	if (utf16<=0xDBFF) {
1448	// valid first char: check second char
1449	uInt16 utf16_2 = *aUTF16P; // next
1450	if (utf16_2 && utf16_2>=0xDC00 && utf16_2<=0xDFFF) {
1451	// second char exists and is valid
1452	aUTF16P++; // advance now
1453	aUCS4 =
1454	((utf16 & 0x3FF) << 10) +
1455	(utf16_2 & 0x3FF);
1456	}
1457	else
1458	aUCS4=0; // no char
1459	}
1460	else {
1461	aUCS4=0; // no char
1462	}
1463	}
1464	// return advanced pointer
1465	return aUTF16P;
1466	} // UCS4toUTF16
1467
1468
1469
1470
1471
1472
1473	// add UTF8 string as UTF-16 byte stream to 8-bit string
1474	// - if aLEM is not lem_none, occurrence of any type of Linefeeds
1475	// (LF,CR,CRLF and even CRCRLF) in input string will be
1476	// replaced by the specified line end type
1477	// - output is clipped after ByteString reaches aMaxBytes size (if not 0), = approx half as many Unicode chars
1478	// - returns true if all input could be converted, false if output is clipped
1479	bool appendUTF8ToUTF16ByteString(
1480	cAppCharP aUTF8,
1481	string &aUTF16ByteString,
1482	bool aBigEndian,
1483	TLineEndModes aLEM,
1484	uInt32 aMaxBytes
1485	)
1486	{
1487	uInt32 ucs4;
1488	uInt16 utf16=0,utf16_1;
1489	cAppCharP p;
1490
1491	while (aUTF8 && *aUTF8) {
1492	// convert next UTF8 char to UCS4
1493	p=UTF8toUCS4(aUTF8, ucs4);
1494	if (ucs4==0) break; // error in UTF8 encoding, exit
1495	// convert line ends
1496	if (ucs4 == '\n' && aLEM!=lem_none && aLEM!=lem_cstr) {
1497	// produce specified line end
1498	utf16_1=0;
1499	switch (aLEM) {
1500	case lem_mac : utf16=0x0D; break;
1501	case lem_filemaker : utf16=0x0B; break;
1502	case lem_unix : utf16=0x0A; break;
1503	case lem_dos :
1504	utf16_1=0x0D; // CR..
1505	utf16=0x0A; // ..then LF
1506	break;
1507	default: break;
1508	}
1509	}
1510	else {
1511	// ordinary char, use UTF16 encoding
1512	utf16_1 = UCS4toUTF16(ucs4,utf16);
1513	}
1514	// check if appending UTF16 would exceed max size specified
1515	if (aMaxBytes!=0 && aUTF16ByteString.size() + (utf16_1 ? 4 : 2) > aMaxBytes)
1516	break;
1517	// we can append, advance input pointer
1518	aUTF8 = p;
1519	// now append
1520	if (aBigEndian) {
1521	// Big end first, Motorola order
1522	if (utf16_1) {
1523	aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF);
1524	aUTF16ByteString += (char)(utf16_1 & 0xFF);
1525	}
1526	aUTF16ByteString += (char)((utf16 >> 8) & 0xFF);
1527	aUTF16ByteString += (char)(utf16 & 0xFF);
1528	}
1529	else {
1530	// Little end first, Intel order
1531	if (utf16_1) {
1532	aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF);
1533	aUTF16ByteString += (char)(utf16_1 & 0xFF);
1534	}
1535	aUTF16ByteString += (char)(utf16 & 0xFF);
1536	aUTF16ByteString += (char)((utf16 >> 8) & 0xFF);
1537	}
1538	} // while
1539	// true if all input consumed
1540	return (aUTF8==NULL__null) \|\| (*aUTF8==0);
1541	} // appendUTF8ToUTF16ByteString
1542
1543
1544	// add UTF16 byte string as UTF8 to value
1545	void appendUTF16AsUTF8(
1546	const uInt16 *aUTF16,
1547	uInt32 aNumUTF16Chars,
1548	bool aBigEndian,
1549	string &aVal,
1550	bool aConvertLineEnds,
1551	bool aAllowFilemakerCR
1552	)
1553	{
1554	uInt32 ucs4;
1555	uInt16 utf16pair[2];
1556	cAppCharP inP = (cAppCharP)aUTF16;
1557	bool lastWasCR=false;
1558
1559	while (inP && !(inP==0 && (inP+1)==0) && aNumUTF16Chars>0) {
1560	// get two words (in case of surrogate pair)
1561	if (aBigEndian) {
1562	// Motorola order
1563	utf16pair[0]=(((inP) & 0xFF)<<8) + ((inP+1) & 0xFF);
1564	if (aNumUTF16Chars>1) utf16pair[1]=(((inP+2) & 0xFF)<<8) + ((inP+3) & 0xFF);
1565	}
1566	else {
1567	// Intel order
1568	utf16pair[0]=(((inP+1) & 0xFF)<<8) + ((inP) & 0xFF);
1569	if (aNumUTF16Chars>1) utf16pair[1]=(((inP+3) & 0xFF)<<8) + ((inP+2) & 0xFF);
1570	}
1571	cAppCharP hP = (cAppCharP)UTF16toUCS4(utf16pair, ucs4);
1572	/*
1573	PDEBUGPRINTFX(DBG_PARSE+DBG_EXOTIC,(
1574	"Parsed %ld bytes: (inP)=0x%02hX, (inP+1)=0x%02hX, (inP+2)=0x%02hX, (inP+3)=0x%02hX, utf16pair[0]=0x%04hX, utf16pair[1]=0x%04hX, ucs4=0x%04lX",
1575	(uInt32)(hP-(cAppCharP)utf16pair),
1576	(uInt16)(inP), (uInt16)(inP+1), (uInt16)(inP+2), (uInt16)(inP+3),
1577	(uInt16)utf16pair[0], (uInt16)utf16pair[1],
1578	(uInt32)ucs4
1579	));
1580	*/
1581	uInt32 bytes=hP-(cAppCharP)utf16pair;
1582	inP+=bytes; // next UTF16 to check
1583	aNumUTF16Chars-=bytes/2; // count down UTF16 chars
1584	// convert line ends if selected
1585	if (aConvertLineEnds) {
1586	if (ucs4 == 0x0D) {
1587	lastWasCR=true;
1588	continue;
1589	}
1590	else {
1591	if (ucs4 == 0x0A \|\| (aAllowFilemakerCR && ucs4 == 0x0B))
1592	ucs4 = '\n'; // convert to LineEnd
1593	else if (lastWasCR)
1594	aVal += '\n'; // insert a LineEnd
1595	lastWasCR=false;
1596	}
1597	}
1598	// append to UTF-8 string
1599	UCS4toUTF8(ucs4, aVal);
1600	}
1601	if (lastWasCR)
1602	aVal += '\n'; // input string ended on CR, must be shown in output
1603	} // appendUTF16AsUTF8
1604
1605
1606
1607
1608
1609
1610	#ifdef BINTREE_GENERATOR
1611
1612	// add a key/value pair to the binary tree
1613	void addToBinTree(TBinTreeNode *&aBinTree, treeval_t aMinKey, treeval_t aMaxKey, treeval_t aKey, treeval_t aValue)
1614	{
1615	// start at root
1616	TBinTreeNode **nextPP = &aBinTree;
1617	treeval_t cmpval;
1618	do {
1619	// create the new decision value from max and min
1620	cmpval = aMinKey+((aMaxKey-aMinKey) >> 1);
1621	// create the node if not already there
1622	if (*nextPP==NULL__null) {
1623	*nextPP = new TBinTreeNode;
1624	(*nextPP)->key = cmpval;
1625	(*nextPP)->nextHigher=NULL__null;
1626	(*nextPP)->nextLowerOrEqual=NULL__null;
1627	(*nextPP)->value=0;
1628	}
1629	// check if the node CREATED is a leaf node
1630	// this is the case if max==min
1631	if (aMaxKey==aMinKey) {
1632	// save leaf value (possibly overwriting existing leaf value for same code)
1633	(*nextPP)->value=aValue;
1634	break;
1635	}
1636	// decide which way to go
1637	if (aKey>cmpval) {
1638	// go to the "higher" side
1639	nextPP = &((*nextPP)->nextHigher);
1640	// determine new minimum
1641	aMinKey = cmpval+1; // minimum must be higher than cmpval
1642	}
1643	else {
1644	// go to the "lower or equal" side
1645	nextPP = &((*nextPP)->nextLowerOrEqual);
1646	// determine new maximum
1647	aMaxKey = cmpval; // maximum must be lower or equal than cmpval
1648	}
1649	} while(true);
1650	} // addToBinTree
1651
1652
1653	// dispose a bintree
1654	void disposeBinTree(TBinTreeNode *&aBinTree)
1655	{
1656	if (!aBinTree) return;
1657	if (aBinTree->nextHigher)
1658	disposeBinTree(aBinTree->nextHigher);
1659	if (aBinTree->nextLowerOrEqual)
1660	disposeBinTree(aBinTree->nextLowerOrEqual);
1661	delete aBinTree;
1662	aBinTree=NULL__null;
1663	} // disposeBinTree
1664
1665
1666	// convert key to value using a flat bintree
1667	treeval_t searchBintree(TBinTreeNode *aBinTree, treeval_t aKey, treeval_t aUndefValue, treeval_t aMinKey, treeval_t aMaxKey)
1668	{
1669	treeval_t cmpval;
1670	while(aBinTree) {
1671	// create the new decision value from max and min
1672	cmpval = aMinKey+((aMaxKey-aMinKey) >> 1);
1673	// must match stored cmpval
1674	if (cmpval!=aBinTree->key)
1675	return aUndefValue;
1676	// check if next node must be leaf if the tree contains our key,
1677	// this is the case if max==min
1678	if (aMaxKey==aMinKey) {
1679	if (aBinTree->nextHigher!=NULL__null \|\| aBinTree->nextLowerOrEqual!=NULL__null) {
1680	// no leaf value here, should not be the case ever (we should have
1681	// encountered a node with no left or right link before this!)
1682	return aUndefValue;
1683	}
1684	else {
1685	// found a leaf value here
1686	return aBinTree->value;
1687	}
1688	}
1689	// decide which way to go
1690	if (aKey>cmpval) {
1691	// go to the "higher" side = just next element in array, except if we have the special marker here
1692	if (aBinTree->nextHigher == NULL__null)
1693	return aUndefValue; // we should go higher-side, but can't -> unknown key
1694	aBinTree=aBinTree->nextHigher;
1695	// determine new minimum
1696	aMinKey = cmpval+1; // minimum must be higher than cmpval
1697	}
1698	else {
1699	// go to the "lower" side = element at index indicated by current element, except if we have the special marker here
1700	if (aBinTree->nextLowerOrEqual == NULL__null)
1701	return aUndefValue; // we should go lower-or-equal-side, but can't -> unknown key
1702	aBinTree=aBinTree->nextLowerOrEqual;
1703	// determine new maximum
1704	aMaxKey = cmpval; // maximum must be lower or equal than cmpval
1705	}
1706	}
1707	// if we reach the end of the array, key is not in the tree
1708	return aUndefValue;
1709	} // searchBintree
1710
1711
1712
1713
1714	// make a flat form representation of the bintree in a one-dimensional array
1715	// - higher-side links are implicit (nodes following each other),
1716	// lower-or-equal-side links are explicit
1717	static bool flatBinTreeRecursion(
1718	TBinTreeNode aBinTree, size_t &aIndex, treeval_t aFlatArray, size_t aArrSize, treeval_t aLinksStart, treeval_t aLinksEnd
1719	)
1720	{
1721	// check if array is full
1722	if (aIndex>=aArrSize)
1723	return false;
1724	// examine node to flatten
1725	if (aBinTree->nextHigher==NULL__null && aBinTree->nextLowerOrEqual==NULL__null) {
1726	// this is a leaf node, containing only the value
1727	if (aBinTree->value>=aLinksStart && aBinTree->value<=aLinksEnd)
1728	return false; // link space and value space overlap
1729	aFlatArray[aIndex]=aBinTree->value;
1730	aIndex++;
1731	}
1732	else if (aBinTree->nextHigher==NULL__null) {
1733	// lower-side-only node: set special mark to specify that lower-or-equal side
1734	// implicitly follows (instead of higher-side)
1735	aFlatArray[aIndex]=aLinksStart + 1; // no node points to the immediately following node explicitly, so 1 can be used as special marker
1736	aIndex++;
1737	// - recurse to generate it
1738	if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))
1739	return false;
1740	}
1741	else {
1742	// this is a branch
1743	// - lower-or-equal side is represented as an index in the array
1744	aFlatArray[aIndex]=aLinksStart + 0; // default to not-existing (no node points to itself, so 0 can be used as NIL index value)
1745	// - higher side branch follows immediately
1746	size_t linkindex = aIndex++;
1747	// - recurse to generate it
1748	if (!flatBinTreeRecursion(aBinTree->nextHigher,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))
1749	return false;
1750	// - now we have the index where we must insert the lower-or-equal side
1751	if (aBinTree->nextLowerOrEqual!=NULL__null) {
1752	// there is a lower-or-equal side
1753	// - place relative link from original node
1754	uInt32 rellink=aIndex-linkindex;
1755	if ((uInt32)aLinksStart+rellink>(uInt32)aLinksEnd-1L) {
1756	// we need a long link
1757	// - move generated higher side branch one up
1758	for (size_t k=aIndex-1; k>linkindex; k--) aFlatArray[k+1]=aFlatArray[k];
1759	aIndex++; // we've eaten up one extra entry now
1760	// - now set long link
1761	aFlatArray[linkindex]=aLinksEnd-1; // long link marker
1762	if (rellink>0xFFFF)
1763	return false; // cannot jump more than 64k
1764	aFlatArray[linkindex+1]=rellink; // long link
1765	}
1766	else {
1767	// short link is ok
1768	aFlatArray[linkindex]=aLinksStart+rellink;
1769	}
1770	// - now create the lower-or-equal side
1771	if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))
1772	return false;
1773	}
1774	}
1775	return true;
1776	} // flatBinTreeRecursion
1777
1778
1779	// make a flat form representation of the bintree in a one-dimensional array
1780	// - higher-side links are implicit (nodes following each other),
1781	// lower-or-equal-side links are explicit
1782	bool flatBinTree(
1783	TBinTreeNode *aBinTree, TConvFlatTree &aFlatTree, size_t aArrSize,
1784	treeval_t aMinKey, treeval_t aMaxKey, treeval_t aLinksStart, treeval_t aLinksEnd
1785	)
1786	{
1787	// save tree params
1788	aFlatTree.numelems=0;
1789	aFlatTree.minkey=aMinKey;
1790	aFlatTree.maxkey=aMaxKey;
1791	aFlatTree.linksstart=aLinksStart;
1792	aFlatTree.linksend=aLinksEnd;
1793	// now create actual tree
1794	size_t index=0;
1795	if (!flatBinTreeRecursion(aBinTree,index,aFlatTree.elements,aArrSize,aLinksStart,aLinksEnd))
1796	return false;
1797	aFlatTree.numelems=index; // actual length of array
1798	return true;
1799	} // flatBinTree
1800
1801
1802
1803
1804	#endif
1805
1806
1807	// convert key to value using a flat bintree
1808	treeval_t searchFlatBintree(const TConvFlatTree &aFlatTree, treeval_t aKey, treeval_t aUndefValue)
1809	{
1810	treeval_t cmpval,thisnode;
1811	size_t index=0;
1812	// get start min and max
1813	treeval_t minKey = aFlatTree.minkey;
1814	treeval_t maxKey = aFlatTree.maxkey;
1815	// reject out-of-bounds keys immediately
1816	if (aKey<minKey \|\| aKey>maxKey)
1817	return aUndefValue;
1818	do {
1819	// create the new decision value from max and min
1820	cmpval = minKey+((maxKey-minKey) >> 1);
1821	thisnode = aFlatTree.elements[index];
1822	// check if next node must be leaf if the tree contains our key,
1823	// this is the case if max==min
1824	if (maxKey==minKey) {
1825	#ifdef BINTREE_GENERATOR
1826	if (thisnode>=aFlatTree.linksstart && thisnode<=aFlatTree.linksend) {
1827	// no leaf value here, should not be the case ever (we should have
1828	// encountered a node with no left or right link before this!)
1829	return aUndefValue;
1830	}
1831	else
1832	#endif
1833	{
1834	// found a leaf value here
1835	return (treeval_t) thisnode;
1836	}
1837	}
1838	// decide which way to go
1839	if (aKey>cmpval) {
1840	// go to the "higher" side = just next element in array, except if we have the special marker here
1841	if (thisnode == aFlatTree.linksstart+1)
1842	return aUndefValue; // we should go higher-side, but can't -> unknown key
1843	// next node is next index (or one more in case this is a long link)
1844	if (thisnode == aFlatTree.linksend-1)
1845	index++;
1846	index++;
1847	// determine new minimum
1848	minKey = cmpval+1; // minimum must be higher than cmpval
1849	}
1850	else {
1851	// go to the "lower" side = element at index indicated by current element, except if we have the special marker here
1852	if (thisnode == aFlatTree.linksstart+1)
1853	index++; // special case, "lower" side is immediately following because there is no "higher" side
1854	else {
1855	#ifdef BINTREE_GENERATOR
1856	// if node contains a leaf value instead of a link, something is wrong
1857	if (thisnode<aFlatTree.linksstart \|\| thisnode>aFlatTree.linksend)
1858	return aUndefValue; // no leaf expected here
1859	#endif
1860	if (thisnode==aFlatTree.linksend-1) {
1861	// long link
1862	index++; // skip long link marker
1863	thisnode = aFlatTree.elements[index]; // get link value
1864	index = index+thisnode; // jump by link value
1865	}
1866	else {
1867	// short link
1868	index = index+(thisnode-aFlatTree.linksstart); // get index of next node (relative branch)
1869	}
1870	if (index==0)
1871	return aUndefValue; // there is no link
1872	}
1873	// determine new maximum
1874	maxKey = cmpval; // maximum must be lower or equal than cmpval
1875	}
1876	} while(index<aFlatTree.numelems);
1877	// if we reach the end of the array, key is not in the tree
1878	return aUndefValue;
1879	} // searchFlatBintree
1880
1881	// MD5 and B64 given string
1882	void MD5B64(const char *aString, sInt32 aLen, string &aMD5B64)
1883	{
1884	// determine input length
1885	if (aLen<=0) aLen=strlen(aString);
1886	// calc MD5
1887	md5::SYSYNC_MD5_CTX context;
1888	uInt8 digest[16];
1889	md5::Init (&context);
1890	md5::Update (&context, (const uInt8 *)aString,aLen);
1891	md5::Final (digest, &context);
1892	// b64 encode the MD5 digest
1893	uInt32 b64md5len;
1894	char *b64md5=b64::encode(digest,16,&b64md5len);
1895	// assign result
1896	aMD5B64.assign(b64md5,b64md5len);
1897	// done
1898	b64::free(b64md5); // return buffer allocated by b64::encode
1899	} // MD5B64
1900
1901
1902	// format as Timestamp for use in debug logs
1903	void StringObjTimestamp(string &aStringObj, lineartime_t aTimer)
1904	{
1905	// format the time
1906	if (aTimer==noLinearTime) {
1907	aStringObj = "<no time>";
1908	return;
1909	}
1910	sInt16 y,mo,d,h,mi,s,ms;
1911	lineartime2date(aTimer,&y,&mo,&d);
1912	lineartime2time(aTimer,&h,&mi,&s,&ms);
1913	StringObjPrintf(
1914	aStringObj,
1915	"%04d-%02d-%02d %02d:%02d:%02d.%03d",
1916	y,mo,d,h,mi,s,ms
1917	);
1918	} // StringObjTimestamp
1919
1920
1921	// format as hex string
1922	void StringObjHexString(string &aStringObj, const uInt8 *aBinary, uInt32 aBinSz)
1923	{
1924	aStringObj.erase();
1925	if (!aBinary) return;
1926	while (aBinSz>0) {
1927	AppendHexByte(aStringObj,*aBinary++);
1928	aBinSz--;
1929	}
1930	} // StringObjHexString
1931
1932
1933	// add (already encoded!) CGI to existing URL string
1934	bool addCGItoString(string &aStringObj, cAppCharP aCGI, bool noduplicate)
1935	{
1936	if (!noduplicate \|\| aStringObj.find(aCGI)==string::npos) {
1937	// - Add CGI separator if and only if none exists already
1938	if (aStringObj.find("?")==string::npos)
1939	aStringObj += '?';
1940	aStringObj += aCGI;
1941	return true; // added
1942	}
1943	return false; // nothing added
1944	}
1945
1946
1947	// encode string for being used as a CGI key/value element
1948	string encodeForCGI(cAppCharP aCGI)
1949	{
1950	string cgi;
1951	cAppCharP p = aCGI;
1952	while (p && *p) {
1953	if (p>0x7E \|\| p<=0x20 \|\| p=='%' \|\| p=='?' \|\| p=='&' \|\| p=='#') {
1954	// CGI encode these
1955	cgi += '%';
1956	AppendHexByte(cgi, *p);
1957	}
1958	else {
1959	// use as-is
1960	cgi += *p;
1961	}
1962	p++;
1963	}
1964	return cgi;
1965	} // encodeForCGI
1966
1967
1968	// Count bits
1969	int countbits(uInt32 aMask)
1970	{
1971	int bits=0;
1972	uInt32 mask=0x0000001;
1973	while (mask) {
1974	if (aMask & mask) bits++;
1975	mask=mask << 1;
1976	}
1977	return bits;
1978	} // countbits
1979
1980
1981	// make uppercase
1982	void StringUpper(string &aString)
1983	{
1984	for(uInt32 k=0; k<aString.size(); k++) aString[k]=toupper(aString[k]);
1985	} // StringUpper
1986
1987
1988	// make lowercase
1989	void StringLower(string &aString)
1990	{
1991	for(uInt32 k=0; k<aString.size(); k++) aString[k]=tolower(aString[k]);
1992	} // StringLower
1993
1994
1995	// Substitute occurences of pattern with replacement in string
1996	void StringSubst(
1997	string &aString, const char *aPattern, const string &aReplacement,
1998	sInt32 aPatternLen,
1999	TCharSets aCharSet, TLineEndModes aLEM,
2000	TQuotingModes aQuotingMode
2001	)
2002	{
2003	StringSubst(
2004	aString, aPattern,
2005	aReplacement.c_str(),
2006	aPatternLen,
2007	aReplacement.size(),
2008	aCharSet, aLEM, aQuotingMode
2009	);
2010	} // StringSubst
2011
2012
2013	// Substitute occurences of pattern with replacement in string
2014	void StringSubst(
2015	string &aString, const char aPattern, const char aReplacement,
2016	sInt32 aPatternLen, sInt32 aReplacementLen,
2017	TCharSets aCharSet, TLineEndModes aLEM,
2018	TQuotingModes aQuotingMode
2019	)
2020	{
2021	string::size_type i;
2022	string s;
2023	i=0;
2024	if (aPatternLen<0) aPatternLen=strlen(aPattern);
2025	// convert if needed
2026	if (!aReplacement) {
2027	aReplacement=""; // empty string if not specified
2028	aReplacementLen=0;
2029	}
2030	if (aCharSet!=chs_unknown) {
2031	appendUTF8ToString(aReplacement,s,aCharSet,aLEM,aQuotingMode);
2032	aReplacement=s.c_str();
2033	aReplacementLen=s.size();
2034	}
2035	else {
2036	if (aReplacementLen<0) aReplacementLen=strlen(aReplacement);
2037	}
2038	// now replace
2039	while((i=aString.find(aPattern,i))!=string::npos) {
2040	aString.replace(i,aPatternLen,aReplacement);
2041	i+=aReplacementLen;
2042	}
2043	} // StringSubst
2044
2045
2046	// Substitute occurences of pattern with replacement in string
2047	void StringSubst(string &aString, const char *aPattern, const string &aReplacement, sInt32 aPatternLen)
2048	{
2049	StringSubst(aString,aPattern,aReplacement.c_str(),aPatternLen,aReplacement.size());
2050	} // StringSubst
2051
2052
2053	// Substitute occurences of pattern with integer number in string
2054	void StringSubst(string &aString, const char *aPattern, sInt32 aNumber, sInt32 aPatternLen)
2055	{
2056	string s;
2057	StringObjPrintf(s,"%ld",(long)aNumber);
2058	StringSubst(aString,aPattern,s,aPatternLen);
2059	} // StringSubst
2060
2061
2062
2063	// copy PCdata contents into std::string object
2064	void smlPCDataToStringObj(const SmlPcdataPtr_t aPcdataP, string &aStringObj)
2065	{
2066	if (!aPcdataP \|\| !aPcdataP->content) {
2067	// no content at all
2068	aStringObj.erase();
2069	}
2070	else if (
2071	// NOTE: Opaque works only with modified syncML toolkit which
2072	// makes sure opaque content is ALSO TERMINATED LIKE A C-STRING
2073	aPcdataP->contentType == SML_PCDATA_STRING \|\|
2074	aPcdataP->contentType == SML_PCDATA_OPAQUE
2075	) {
2076	// string or opaque type
2077	aStringObj.assign((char *)aPcdataP->content, aPcdataP->length);
2078	}
2079	else if (aPcdataP->contentType == SML_PCDATA_EXTENSION) {
2080	// extension type
2081	StringObjPrintf(aStringObj,"[PCDATA_EXTENSION Type=%hd]",(sInt16)aPcdataP->extension);
2082	}
2083	else {
2084	// other type
2085	StringObjPrintf(aStringObj,"[PCDATA Type=%hd]",(sInt16)aPcdataP->contentType);
2086	}
2087	} // smlPCDataToStringObj
2088
2089
2090	// returns item string or empty string (NEVER NULL)
2091	const char *smlItemDataToCharP(const SmlItemPtr_t aItemP)
2092	{
2093	if (!aItemP) return "";
2094	return smlPCDataToCharP(aItemP->data);
2095	} // smlItemDataToCharP
2096
2097
2098	// returns first item string or empty string (NEVER NULL)
2099	const char *smlFirstItemDataToCharP(const SmlItemListPtr_t aItemListP)
2100	{
2101	if (!aItemListP) return "";
2102	return smlItemDataToCharP(aItemListP->item);
2103	} // smlFirstItemDataToCharP
2104	#endif //SYSYNC_ENGINE
2105
2106	// returns pointer to PCdata contents or null string. If aSizeP!=NULL, length will be stored in *aSize
2107	const char smlPCDataToCharP(const SmlPcdataPtr_t aPcdataP, stringSize aSizeP)
2108	{
2109	const char *str = smlPCDataOptToCharP(aPcdataP, aSizeP);
2110	if (str) return str;
2111	return "";
2112	} // smlPCDataToCharP
2113
2114
2115	// returns pointer to PCdata contents if existing, NULL otherwise.
2116	// If aSizeP!=NULL, length will be stored in *aSize
2117	const char smlPCDataOptToCharP(const SmlPcdataPtr_t aPcdataP, stringSize aSizeP)
2118	{
2119	if (!aPcdataP \|\| !aPcdataP->content) {
2120	return NULL__null; // we have no value, it could be empty howevert
2121	if (aSizeP) *aSizeP=0;
2122	}
2123	if (aPcdataP->length==0) {
2124	// empty content
2125	if (aSizeP) *aSizeP=0;
2126	return ""; // return empty string
2127	}
2128	else if (
2129	// NOTE: Opaque works only with modified syncML toolkit which
2130	// makes sure opaque content is ALSO TERMINATED LIKE A C-STRING
2131	aPcdataP->contentType == SML_PCDATA_STRING \|\|
2132	aPcdataP->contentType == SML_PCDATA_CDATA \|\| // XML only
2133	aPcdataP->contentType == SML_PCDATA_OPAQUE // WBXML only
2134	) {
2135	// return pointer to content
2136	if (aSizeP) *aSizeP=aPcdataP->length;
2137	return (char *) aPcdataP->content;
2138	}
2139	else {
2140	// no string
2141	if (aSizeP) *aSizeP=11;
2142	return "[no string]";
2143	}
2144	} // smlPCDataOptToCharP
2145
2146
2147	// returns pointer to source or target LocURI
2148	const char *smlSrcTargLocURIToCharP(const SmlTargetPtr_t aSrcTargP)
2149	{
2150	if (!aSrcTargP \|\| !aSrcTargP->locURI) {
2151	return ""; // empty string
2152	}
2153	else {
2154	// return PCdata string contents
2155	return smlPCDataToCharP(aSrcTargP->locURI);
2156	}
2157	} // smlSrcTargLocURIToCharP
2158
2159
2160	// returns pointer to source or target LocName
2161	const char *smlSrcTargLocNameToCharP(const SmlTargetPtr_t aSrcTargP)
2162	{
2163	if (!aSrcTargP \|\| !aSrcTargP->locName) {
2164	return ""; // empty string
2165	}
2166	else {
2167	// return PCdata string contents
2168	return smlPCDataToCharP(aSrcTargP->locName);
2169	}
2170	} // smlSrcTargLocNameToCharP
2171
2172
2173	#ifdef SYSYNC_ENGINE1
2174	// returns error code made ready for SyncML sending (that is, remove offset
2175	// of 10000 if present, and make generic error 500 for non-SyncML errors,
2176	// and return LOCERR_OK as 200)
2177	localstatus syncmlError(localstatus aErr)
2178	{
2179	if (aErr==LOCERR_OK) return 200; // SyncML ok code
2180	if (aErr<999) return aErr; // return as is
2181	if (aErr>=LOCAL_STATUS_CODE+100 && aErr<=999)
2182	return aErr-LOCAL_STATUS_CODE; // return with offset removed
2183	// no suitable conversion
2184	return 500; // return generic "bad"
2185	} // localError
2186
2187
2188	// returns error code made local (that is, offset by 10000 in case aErr is a
2189	// SyncML status code <10000, and convert 200 into LOCERR_OK)
2190	localstatus localError(localstatus aErr)
2191	{
2192	if (aErr==200 \|\| aErr==0) return LOCERR_OK;
2193	if (aErr<LOCAL_STATUS_CODE) return aErr+LOCAL_STATUS_CODE;
2194	return aErr;
2195	} // localError
2196
2197
2198	// returns pure relative URI, if specified relative or absolute to
2199	// given server URI
2200	const char relativeURI(const char aURI,const char *aServerURI)
2201	{
2202	// check for "./" type relative URI
2203	if (strnncmp(aURI,URI_RELPREFIX"./",2)==0) {
2204	// relative URI prefixed with "./", just zap the relative part
2205	return aURI+2;
2206	}
2207	else if (aServerURI) {
2208	// test if absolute URI specifying the right server
2209	uInt32 n=strlen(aServerURI);
2210	if (strnncmp(aURI,aServerURI,n)==0) {
2211	// beginning of URI matches server's URI
2212	const char *p=aURI+n;
2213	// skip delimiter, if any
2214	if (*p=='/') p++;
2215	// return relative part of URI
2216	return p;
2217	}
2218	}
2219	// just return unmodified
2220	return aURI;
2221	} // relativeURI
2222
2223
2224	// split Hostname into address and port parts
2225	void splitHostname(const char aHost,string aAddr,string *aPort)
2226	{
2227	const char p,q;
2228	p=aHost;
2229	q=strchr(p,':');
2230	if (q) {
2231	// port spec found
2232	if (aAddr) aAddr->assign(p,q-p);
2233	if (aPort) aPort->assign(q+1);
2234	}
2235	else {
2236	// no prot spec
2237	if (aAddr) aAddr->assign(p);
2238	if (aPort) aPort->erase();
2239	}
2240	} // splitHostname
2241
2242	// translate %XX into corresponding character in-place
2243	void urlDecode(string *str)
2244	{
2245	// nothing todo?
2246	if (!str \|\|
2247	str->find('%') == string::npos) return;
2248
2249	string replacement;
2250	replacement.reserve(str->size());
2251	const char *in = str->c_str();
2252	char c;
2253	while ((c = *in++) != 0) {
2254	if (c == '%') {
2255	c = tolower(*in++);
2256	unsigned char value = 0;
2257	if (!c) {
2258	break;
2259	} else if (c >= '0' && c <= '9') {
2260	value = c - '0';
2261	} else if (c >= 'a' && c <= 'f') {
2262	value = c - 'a' + 10;
2263	} else {
2264	// silently skip invalid character
2265	}
2266	value *= 16;
2267	c = tolower(*in++);
2268	if (!c) {
2269	break;
2270	} else if (c >= '0' && c <= '9') {
2271	value += c - '0';
2272	replacement.append((char *)&value, 1);
2273	} else if (c >= 'a' && c <= 'f') {
2274	value += c - 'a' + 10;
2275	replacement.append((char *)&value, 1);
2276	} else {
2277	// silently skip invalid character
2278	}
2279	} else {
2280	replacement.append(&c, 1);
2281	}
2282	}
2283	*str = replacement;
2284	}
2285
2286	// translate %XX into corresponding character in-place
2287	void urlEncode(string *str)
2288	{
2289	if (!str) {
2290	return;
2291	}
2292
2293	string replacement;
2294	size_t i, start = 0;
2295	const char *t = str->c_str();
2296	const char *s = t;
2297	char buffer[4];
2298	char c;
2299	for (i = 0; (c = *t) != 0; i++, t++) {
2300	if (!isalnum(c)) {
2301	replacement.append(s + start, i - start);
2302	start = i + 1;
2303	sprintf(buffer, "%%%02X", c);
2304	replacement.append(buffer, 3);
2305	}
2306	}
2307
2308	if (start > 0) {
2309	// Something was added to replacement because we found unsafe
2310	// characters, finish the job.
2311	replacement.append(s + start, i - start);
2312	*str = replacement;
2313	}
2314	}
2315
2316	// split URL into protocol, hostname, document name and auth-info (user, password);
2317	// the optional query and port are not url-decoded, everything else is
2318	void splitURL(const char aURI,string aProtocol,string *aHost,
2319	string aDoc, string aUser, string *aPasswd,
2320	string aPort, string aQuery)
2321	{
2322	const char p,q,*r;
2323
2324	p=aURI;
2325	// extract protocol
2326	q=strchr(p,':');
2327	if (q) {
2328	// protocol found
2329	if (aProtocol) aProtocol->assign(p,q-p);
2330	p=q+1; // past colon
2331	int count = 0;
2332	while (*p=='/' && count < 2) {
2333	p++; // past trailing slashes (two expected, ignore if less are given)
2334	count++;
2335	}
2336	// now identify end of host part
2337	string host;
2338	q=strchr(p, '/');
2339	if (!q) {
2340	// no slash, skip forward to end of string
2341	q = p + strlen(p);
2342	}
2343	host.assign(p, q - p);
2344
2345	// if protocol specified, check for auth info
2346	const char *h = host.c_str();
2347	q=strchr(h,'@');
2348	r=strchr(h,':');
2349	if (q && r && q>r) {
2350	// auth exists
2351	if (aUser) aUser->assign(h,r-h);
2352	if (aPasswd) aPasswd->assign(r+1,q-r-1);
2353	// skip auth in full string
2354	p += q + 1 - h;
2355	}
2356	else {
2357	// no auth found
2358	if (aUser) aUser->erase();
2359	if (aPasswd) aPasswd->erase();
2360	}
2361	// p now points to host part, as expected below
2362	}
2363	else {
2364	// no protocol found
2365	if (aProtocol) aProtocol->erase();
2366	// no protocol, no auth
2367	if (aUser) aUser->erase();
2368	if (aPasswd) aPasswd->erase();
2369	}
2370	// separate hostname and document
2371	std::string host;
2372	// - check for path
2373	q=strchr(p,'/');
2374	// - if no path, check if there is a CGI param directly after the host name
2375	if (!q) {
2376	// doc part left empty in this case
2377	if (aDoc) aDoc->erase();
2378	q=strchr(p,'?');
2379	if (q) {
2380	// query directly follows host
2381	host.assign(p, q - p);
2382	if (aQuery) aQuery->assign(q + 1);
2383	} else {
2384	// entire string is considered the host
2385	host.assign(p);
2386	if (aQuery) aQuery->erase();
2387	}
2388	}
2389	else {
2390	// host part stops at slash
2391	host.assign(p, q - p);
2392	// in case of '/', do not put slash into docname
2393	// even if it would be empty (caller expected to add
2394	// slash as needed)
2395	p = q + 1; // exclude slash
2396	// now check for query
2397	q=strchr(p,'?');
2398	if (q) {
2399	// split at question mark
2400	if (aDoc) aDoc->assign(p, q - p);
2401	if (aQuery) aQuery->assign(q + 1);
2402	} else {
2403	// whole string is document name
2404	if (aDoc) aDoc->assign(p);
2405	if (aQuery) aQuery->erase();
2406	}
2407	}
2408
2409	// remove optional port from host part before url-decoding, because
2410	// that might introduce new : characters into the host name
2411	size_t colon = host.find(':');
2412	if (colon != host.npos) {
2413	if (aHost) aHost->assign(host.substr(0, colon));
2414	if (aPort) aPort->assign(host.substr(colon + 1));
2415	} else {
2416	if (aHost) aHost->assign(host);
2417	if (aPort) aPort->erase();
2418	}
2419	} // splitURL
2420
2421	#ifdef SPLIT_URL_MAIN
2422
2423	#include <stdio.h>
2424	#include <assert.h>
2425
2426	static void test(const std::string &in, const std::string &expected)
2427	{
2428	string protocol, host, doc, user, password, port, query;
2429	char buffer[1024];
2430
2431	splitURL(in.c_str(), &protocol, &host, &doc, &user, &password, &port, &query);
2432
2433	// URL-decode each part
2434	urlDecode(&protocol);
2435	urlDecode(&host);
2436	urlDecode(&doc);
2437	urlDecode(&user);
2438	urlDecode(&password);
2439
2440	sprintf(buffer,
2441	"prot '%s' user '%s' passwd '%s' host '%s' port '%s' doc '%s' query '%s'",
2442	protocol.c_str(),
2443	user.c_str(),
2444	password.c_str(),
2445	host.c_str(),
2446	port.c_str(),
2447	doc.c_str(),
2448	query.c_str());
2449	printf("%s -> %s\n", in.c_str(), buffer);
2450	assert(expected == buffer);
2451	}
2452
2453	int main(int argc, char **argv)
2454	{
2455	test("http://user:passwd@host/patha/pathb?query",
2456	"prot 'http' user 'user' passwd 'passwd' host 'host' port '' doc 'patha/pathb' query 'query'");
2457	test("http://user:passwd@host:port/patha/pathb?query",
2458	"prot 'http' user 'user' passwd 'passwd' host 'host' port 'port' doc 'patha/pathb' query 'query'");
2459	test("file:///foo/bar",
2460	"prot 'file' user '' passwd '' host '' port '' doc 'foo/bar' query ''");
2461	test("http://host%3a:port?param=value",
2462	"prot 'http' user '' passwd '' host 'host:' port 'port' doc '' query 'param=value'");
2463	test("http://host%3a?param=value",
2464	"prot 'http' user '' passwd '' host 'host:' port '' doc '' query 'param=value'");
2465	test("foo%24",
2466	"prot '' user '' passwd '' host 'foo$' port '' doc '' query ''");
2467	test("foo%2f",
2468	"prot '' user '' passwd '' host 'foo/' port '' doc '' query ''");
2469	test("foo%2A",
2470	"prot '' user '' passwd '' host 'foo*' port '' doc '' query ''");
2471	test("foo%24bar",
2472	"prot '' user '' passwd '' host 'foo$bar' port '' doc '' query ''");
2473	test("%24bar",
2474	"prot '' user '' passwd '' host '$bar' port '' doc '' query ''");
2475	test("foo%2",
2476	"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2477	test("foo%",
2478	"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2479	test("foo%g",
2480	"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2481	test("foo%gh",
2482	"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2483	test("%ghbar",
2484	"prot '' user '' passwd '' host 'bar' port '' doc '' query ''");
2485	return 0;
2486	}
2487	#endif // SPLIT_URL_MAIN
2488
2489	#endif //SYSYNC_ENGINE
2490
2491
2492	// returns type from meta
2493	const char *smlMetaTypeToCharP(SmlMetInfMetInfPtr_t aMetaP)
2494	{
2495	if (!aMetaP) return NULL__null; // no meta at all
2496	return smlPCDataToCharP(aMetaP->type);
2497	} // smlMetaTypeToCharP
2498
2499
2500
2501	// returns Next Anchor from meta
2502	const char *smlMetaNextAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP)
2503	{
2504	if (!aMetaP) return NULL__null; // no meta at all
2505	if (!aMetaP->anchor) return NULL__null; // no anchor at all
2506	return smlPCDataToCharP(aMetaP->anchor->next);
2507	} // smlMetaAnchorToCharP
2508
2509
2510	// returns Last Anchor from meta
2511	const char *smlMetaLastAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP)
2512	{
2513	if (!aMetaP) return NULL__null; // no meta at all
2514	if (!aMetaP->anchor) return NULL__null; // no anchor at all
2515	return smlPCDataToCharP(aMetaP->anchor->last);
2516	} // smlMetaLastAnchorToCharP
2517
2518
2519	// returns DevInf pointer if any in specified PCData, NULL otherwise
2520	SmlDevInfDevInfPtr_t smlPCDataToDevInfP(const SmlPcdataPtr_t aPCDataP)
2521	{
2522	if (!aPCDataP) return NULL__null;
2523	if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null;
2524	if (aPCDataP->extension!=SML_EXT_DEVINF) return NULL__null;
2525	return (SmlDevInfDevInfPtr_t)(aPCDataP->content);
2526	} // smlPCDataToDevInfP
2527
2528
2529	// returns MetInf pointer if any in specified PCData, NULL otherwise
2530	SmlMetInfMetInfPtr_t smlPCDataToMetInfP(const SmlPcdataPtr_t aPCDataP)
2531	{
2532	if (!aPCDataP) return NULL__null;
2533	if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null;
2534	if (aPCDataP->extension!=SML_EXT_METINF) return NULL__null;
2535	return (SmlMetInfMetInfPtr_t)(aPCDataP->content);
2536	} // smlPCDataToMetInfP
2537
2538
2539	// allocate memory via SyncML toolkit allocation function, but throw
2540	// exception if it fails. Used by SML
2541	void *_smlMalloc(MemSize_t size)
2542	{
2543	void *p;
2544
2545	p=smlLibMalloc(size);
2546	if (!p) SYSYNC_THROW(TMemException("smlLibMalloc() failed"))throw TMemException("smlLibMalloc() failed");
2547	return p;
2548	} // _smlMalloc
2549
2550
2551	// returns true on successful conversion of PCData string to sInt32
2552	bool smlPCDataToULong(const SmlPcdataPtr_t aPCDataP, uInt32 &aLong)
2553	{
2554	return StrToULong(smlPCDataToCharP(aPCDataP),aLong);
2555	} // smlPCDataToLong
2556
2557	// returns true on successful conversion of PCData string to sInt32
2558	bool smlPCDataToLong(const SmlPcdataPtr_t aPCDataP, sInt32 &aLong)
2559	{
2560	return StrToLong(smlPCDataToCharP(aPCDataP),aLong);
2561	} // smlPCDataToLong
2562
2563	#ifdef SYSYNC_ENGINE1
2564	// returns true on successful conversion of PCData string to format
2565	bool smlPCDataToFormat(const SmlPcdataPtr_t aPCDataP, TFmtTypes &aFmt)
2566	{
2567	const char *fmt = smlPCDataToCharP(aPCDataP);
2568	sInt16 sh;
2569	if (*fmt) {
2570	if (!StrToEnum(encodingFmtSyncMLNames,numFmtTypes,sh,fmt))
2571	return false; // unknown format
2572	aFmt=(TFmtTypes)sh;
2573	}
2574	else {
2575	aFmt=fmt_chr; // no spec = chr
2576	}
2577	return true;
2578	} // smlPCDataToFormat
2579	#endif //SYSYNC_ENGINE
2580
2581	// build Meta anchor
2582	SmlPcdataPtr_t newMetaAnchor(const char aNextAnchor, const char aLastAnchor)
2583	{
2584	SmlPcdataPtr_t metaP;
2585	SmlMetInfAnchorPtr_t anchorP;
2586
2587	// - create empty meta
2588	metaP=newMeta();
2589	// - create new anchor
2590	anchorP=SML_NEW(SmlMetInfAnchor_t)((SmlMetInfAnchor_t*) _smlMalloc(sizeof(SmlMetInfAnchor_t)));
2591	// - set anchor contents
2592	//%%% anchorP->last=newPCDataOptEmptyString(aLastAnchor); // optional, but omitted only if string is NULL (not if only empty)
2593	anchorP->last=newPCDataOptString(aLastAnchor); // optional
2594	anchorP->next=newPCDataString(aNextAnchor); // mandatory
2595	// - set anchor
2596	((SmlMetInfMetInfPtr_t)(metaP->content))->anchor=anchorP;
2597	// return
2598	return metaP;
2599	} // newMetaAnchor
2600
2601
2602	// build Meta type
2603	SmlPcdataPtr_t newMetaType(const char *aMetaType)
2604	{
2605	SmlPcdataPtr_t metaP;
2606
2607	// - if not type, we don't create a meta at all
2608	if (aMetaType==NULL__null \|\| *aMetaType==0) return NULL__null;
2609	// - create empty meta
2610	metaP=newMeta();
2611	// - set type
2612	((SmlMetInfMetInfPtr_t)(metaP->content))->type=newPCDataString(aMetaType);
2613	// return
2614	return metaP;
2615	} // newMetaType
2616
2617
2618	// build empty Meta
2619	SmlPcdataPtr_t newMeta(void)
2620	{
2621	SmlPcdataPtr_t metaP;
2622	SmlMetInfMetInfPtr_t metinfP;
2623
2624	// - create empty PCData
2625	metaP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t)));
2626	metaP->contentType=SML_PCDATA_EXTENSION;
2627	metaP->extension=SML_EXT_METINF;
2628	// - %%% assume length is not relevant for structured content (looks like in mgrutil.c)
2629	metaP->length=0;
2630	// - create empty meta
2631	metinfP = SML_NEW(SmlMetInfMetInf_t)((SmlMetInfMetInf_t*) _smlMalloc(sizeof(SmlMetInfMetInf_t)));
2632	metaP->content=metinfP; // link to PCdata
2633	// - init meta options
2634	metinfP->version=NULL__null;
2635	metinfP->format=NULL__null;
2636	metinfP->type=NULL__null;
2637	metinfP->mark=NULL__null;
2638	metinfP->size=NULL__null;
2639	metinfP->nextnonce=NULL__null;
2640	metinfP->maxmsgsize=NULL__null;
2641	metinfP->mem=NULL__null;
2642	metinfP->emi=NULL__null; // PCData list
2643	metinfP->anchor=NULL__null;
2644	// - SyncML 1.1
2645	metinfP->maxobjsize=NULL__null;
2646	// - SyncML 1.2
2647	metinfP->flags=0;
2648	// return
2649	return metaP;
2650	} // newMeta
2651
2652
2653	// copy meta from existing meta (for data items only
2654	// anchor, mem, emi, nonce are not copied!)
2655	// Note however that we copy maxobjsize, as we (mis-)use it for ZIPPED_BINDATA_SUPPORT
2656	SmlPcdataPtr_t copyMeta(SmlPcdataPtr_t aOldMetaP)
2657	{
2658	if (!aOldMetaP) return NULL__null;
2659	SmlPcdataPtr_t newmetaP=newMeta();
2660	if (!newmetaP) return NULL__null;
2661	SmlMetInfMetInfPtr_t oldmetinfP = smlPCDataToMetInfP(aOldMetaP);
2662	if (!oldmetinfP) return NULL__null;
2663	SmlMetInfMetInfPtr_t newmetInfP = smlPCDataToMetInfP(newmetaP);
2664	// - copy meta
2665	newmetInfP->version = smlPcdataDup(oldmetinfP->version);
2666	newmetInfP->format = smlPcdataDup(oldmetinfP->format);
2667	newmetInfP->type = smlPcdataDup(oldmetinfP->type);
2668	newmetInfP->mark = smlPcdataDup(oldmetinfP->mark);
2669	newmetInfP->size = smlPcdataDup(oldmetinfP->size);
2670	newmetInfP->maxobjsize = smlPcdataDup(oldmetinfP->maxobjsize);
2671	// return
2672	return newmetaP;
2673	} // copyMeta
2674
2675
2676
2677
2678	// add an item to an item list
2679	SmlItemListPtr_t *addItemToList(
2680	SmlItemPtr_t aItemP, // existing item data structure, ownership is passed to list
2681	SmlItemListPtr_t *aItemListPP // adress of pointer to existing item list or NULL
2682	)
2683	{
2684	if (aItemListPP && aItemP) {
2685	// find last itemlist pointer
2686	while (*aItemListPP) {
2687	aItemListPP=&((*aItemListPP)->next);
2688	}
2689	// aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry
2690	aItemListPP = SML_NEW(SmlItemList_t)((SmlItemList_t) _smlMalloc(sizeof(SmlItemList_t)));
2691	(*aItemListPP)->next=NULL__null;
2692	(*aItemListPP)->item=aItemP; // insert new item
2693	// return pointer to pointer to next element (which is now NULL).
2694	// Can be passed in to addPCDataToList() again to append more elements without searching
2695	// for end-of-list
2696	return &((*aItemListPP)->next);
2697	}
2698	// nop, return pointer unmodified
2699	return aItemListPP;
2700	} // addItemToList
2701
2702
2703	// add a CTData item to a CTDataList
2704	SmlDevInfCTDataListPtr_t *addCTDataToList(
2705	SmlDevInfCTDataPtr_t aCTDataP, // existing CTData item data structure, ownership is passed to list
2706	SmlDevInfCTDataListPtr_t *aCTDataListPP // adress of pointer to existing item list or NULL
2707	)
2708	{
2709	if (aCTDataListPP && aCTDataP) {
2710	// find last itemlist pointer
2711	while (*aCTDataListPP) {
2712	aCTDataListPP=&((*aCTDataListPP)->next);
2713	}
2714	// aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry
2715	aCTDataListPP = SML_NEW(SmlDevInfCTDataList_t)((SmlDevInfCTDataList_t) _smlMalloc(sizeof(SmlDevInfCTDataList_t )));
2716	(*aCTDataListPP)->next=NULL__null;
2717	(*aCTDataListPP)->data=aCTDataP; // insert new data
2718	// return pointer to pointer to next element (which is now NULL).
2719	// Can be passed in to addPCDataToList() again to append more elements without searching
2720	// for end-of-list
2721	return &((*aCTDataListPP)->next);
2722	}
2723	// nop, return pointer unmodified
2724	return aCTDataListPP;
2725	} // addCTDataToList
2726
2727
2728	// add a CTDataProp item to a CTDataPropList
2729	SmlDevInfCTDataPropListPtr_t *addCTDataPropToList(
2730	SmlDevInfCTDataPropPtr_t aCTDataPropP, // existing CTDataProp item data structure, ownership is passed to list
2731	SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL
2732	)
2733	{
2734	if (aCTDataPropListPP && aCTDataPropP) {
2735	// find last itemlist pointer
2736	while (*aCTDataPropListPP) {
2737	aCTDataPropListPP=&((*aCTDataPropListPP)->next);
2738	}
2739	// aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry
2740	aCTDataPropListPP = SML_NEW(SmlDevInfCTDataPropList_t)((SmlDevInfCTDataPropList_t) _smlMalloc(sizeof(SmlDevInfCTDataPropList_t )));
2741	(*aCTDataPropListPP)->next=NULL__null;
2742	(*aCTDataPropListPP)->data=aCTDataPropP; // insert new data
2743	// return pointer to pointer to next element (which is now NULL).
2744	// Can be passed in to addPCDataToList() again to append more elements without searching
2745	// for end-of-list
2746	return &((*aCTDataPropListPP)->next);
2747	}
2748	// nop, return pointer unmodified
2749	return aCTDataPropListPP;
2750	} // addCTDataPropToList
2751
2752
2753	// add a CTData describing a property (as returned by newDevInfCTData())
2754	// as a new property without parameters to a CTDataPropList
2755	SmlDevInfCTDataPropListPtr_t *addNewPropToList(
2756	SmlDevInfCTDataPtr_t aPropCTData, // CTData describing property
2757	SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL
2758	)
2759	{
2760	SmlDevInfCTDataPropPtr_t propdataP = SML_NEW(SmlDevInfCTDataProp_t)((SmlDevInfCTDataProp_t*) _smlMalloc(sizeof(SmlDevInfCTDataProp_t )));
2761	propdataP->param = NULL__null; // no params
2762	propdataP->prop = aPropCTData;
2763	return addCTDataPropToList(propdataP, aCTDataPropListPP);
2764	} // addNewPropToList
2765
2766
2767
2768	// add PCData element to a PCData list
2769	SmlPcdataListPtr_t *addPCDataToList(
2770	SmlPcdataPtr_t aPCDataP, // Existing PCData element to be added, ownership is passed to list
2771	SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL
2772	)
2773	{
2774	if (aPCDataListPP) {
2775	// find last PCDataList pointer
2776	while (*aPCDataListPP) {
2777	aPCDataListPP=&((*aPCDataListPP)->next);
2778	}
2779	// aItemListPP now points to a NULL pointer which must be replaced by addr of new PCDataList entry
2780	aPCDataListPP = SML_NEW(SmlPcdataList_t)((SmlPcdataList_t) _smlMalloc(sizeof(SmlPcdataList_t)));
2781	(*aPCDataListPP)->next=NULL__null;
2782	(*aPCDataListPP)->data=aPCDataP; // insert new item
2783	// return pointer to pointer to next element (which is now NULL).
2784	// Can be passed in to addPCDataToList() again to append more elements without searching
2785	// for end-of-list
2786	return &((*aPCDataListPP)->next);
2787	}
2788	return NULL__null;
2789	} // addPCDataToList
2790
2791
2792	// add PCData string to a PCData list
2793	SmlPcdataListPtr_t *addPCDataStringToList(
2794	const char *aString, // String to be added
2795	SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL
2796	)
2797	{
2798	return addPCDataToList(newPCDataString(aString),aPCDataListPP);
2799	} // addPCDataStringToList
2800
2801
2802	// create new optional location (source or target)
2803	// Returns NULL if URI specified is NULL or empty
2804	SmlSourcePtr_t newOptLocation(
2805	const char *aLocURI,
2806	const char *aLocName
2807	)
2808	{
2809	if (!aLocURI \|\| *aLocURI==0) return NULL__null;
2810	else return newLocation(aLocURI,aLocName);
2811	} // newOptLocation
2812
2813
2814	// create new location (source or target)
2815	// always returns location, even if URI and/or name are empty
2816	// If name is NULL or empty, only URI is generated
2817	SmlSourcePtr_t newLocation(
2818	const char *aLocURI,
2819	const char *aLocName
2820	)
2821	{
2822	SmlSourcePtr_t locP;
2823
2824	locP = SML_NEW(SmlSource_t)((SmlSource_t*) _smlMalloc(sizeof(SmlSource_t)));
2825	// URI is always present (might be empty, though)
2826	locP->locURI=newPCDataString(aLocURI);
2827	// name only if not empty
2828	if (aLocName && *aLocName!=0)
2829	locP->locName=newPCDataString(aLocName);
2830	else
2831	locP->locName=NULL__null;
2832	// filter defaults to NULL
2833	locP->filter=NULL__null;
2834	return locP;
2835	} // newLocation
2836
2837
2838	// create new empty Item
2839	SmlItemPtr_t newItem(void)
2840	{
2841	SmlItemPtr_t itemP;
2842
2843	itemP = SML_NEW(SmlItem_t)((SmlItem_t*) _smlMalloc(sizeof(SmlItem_t)));
2844	itemP->target=NULL__null;
2845	itemP->source=NULL__null;
2846	itemP->meta=NULL__null;
2847	itemP->data=NULL__null;
2848	// SyncML 1.1, no MoreData set
2849	itemP->flags=0;
2850	// SyncML 1.2
2851	itemP->targetParent=NULL__null;
2852	itemP->sourceParent=NULL__null;
2853	// custom data of client
2854	itemP->aux=NULL__null;
2855	return itemP;
2856	} // newItem
2857
2858
2859	// create new Item with string-type data
2860	SmlItemPtr_t newStringDataItem(
2861	const char *aString
2862	)
2863	{
2864	SmlItemPtr_t itemP=newItem();
2865	itemP->data=newPCDataString(aString);
2866	return itemP;
2867	} // newStringDataItem
2868
2869
2870	// create meta-format PCData
2871	SmlPcdataPtr_t newPCDataFormat(
2872	TFmtTypes aFmtType,
2873	bool aShowDefault
2874	)
2875	{
2876	if (aFmtType==fmt_chr && !aShowDefault)
2877	return NULL__null; // default
2878	else
2879	return newPCDataString(encodingFmtSyncMLNames[aFmtType]); // show format type
2880	} // newPCDataFormat
2881
2882
2883	// create new string-type PCData, if NULL or empty string is passed for aData,
2884	// NULL is returned (optional info not there)
2885	SmlPcdataPtr_t newPCDataFormatted(
2886	const uInt8 *aData, // data
2887	sInt32 aLength, // length of data, if<=0 then string length is calculated
2888	TFmtTypes aFmtType, // encoding Format
2889	bool aNeedsOpaque // set opaque needed (string that could confuse XML parsing or even binary)
2890	)
2891	{
2892	if (!aData) return NULL__null; // no data
2893	if (aLength==0) aLength=strlen((const char *)aData);
2894	if (aLength==0) return NULL__null; // no data
2895	// encode input string if needed
2896	SmlPcdataPtr_t pcdataP;
2897	char *b64data;
2898	uInt32 b64len;
2899	switch (aFmtType) {
2900	case fmt_b64:
2901	// convert to b64
2902	b64len=0;
2903	b64data=b64::encode(aData, aLength, &b64len);
2904	pcdataP = newPCDataString(b64data,b64len);
2905	b64::free(b64data);
2906	return pcdataP;
2907	default:
2908	// just copy into string or opaque/C_DATA string
2909	return newPCDataStringX(aData, aNeedsOpaque, aLength);
2910	}
2911	} // newPCDataEncoded
2912
2913
2914	// create new string-type PCData, if NULL or empty string is passed for aString,
2915	// NULL is returned (optional info not there)
2916	SmlPcdataPtr_t newPCDataOptString(
2917	const char *aString,
2918	sInt32 aLength // length of string, if<0 then length is calculated
2919	)
2920	{
2921	if (aString && (*aString!=0))
2922	return newPCDataString(aString,aLength);
2923	else
2924	return NULL__null;
2925	} // newPCDataOptString
2926
2927
2928	// create new string-type PCData, if NULL is passed for aString,
2929	// NULL is returned (optional info not there)
2930	// if empty string is passed, PCData with empty contents will be created
2931	SmlPcdataPtr_t newPCDataOptEmptyString(
2932	const char *aString,
2933	sInt32 aLength // length of string, if<0 then length is calculated
2934	)
2935	{
2936	if (aString)
2937	return newPCDataString(aString,aLength);
2938	else
2939	return NULL__null;
2940	} // newPCDataOptEmptyString
2941
2942
2943	// create new string-type PCData, if NULL is passed for aString,
2944	// an empty string is created (that is, a PCData with string terminator as
2945	// content only, length=0)
2946	SmlPcdataPtr_t newPCDataString(
2947	const char *aString,
2948	sInt32 aLength // length of string, if<0 then length is calculated
2949	)
2950	{
2951	return newPCDataStringX((const uInt8 *)aString,false,aLength);
2952	} // newPCDataString
2953
2954
2955	// create new PCData, aOpaque can be used to generate non-string data
2956	// Note: empty strings are always coded as non-opaque, even if aOpaque is set
2957	SmlPcdataPtr_t newPCDataStringX(
2958	const uInt8 *aString,
2959	bool aOpaque, // if set, an opaque method (OPAQUE or CDATA) is used
2960	sInt32 aLength // length of string, if<0 then length is calculated
2961	)
2962	{
2963	SmlPcdataPtr_t pcdataP;
2964
2965	pcdataP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t)));
2966
2967	// determine length
2968	if (aLength>=0 && aString)
2969	pcdataP->length = aLength; // as specified, and string argument not NULL
2970	else
2971	pcdataP->length = aString ? strlen((const char *)aString) : 0; // from argument, if NULL -> length=0
2972	// determine type
2973	if (aOpaque && aLength!=0) {
2974	// Note: due to modification in RTK, this generates
2975	// OPAQUE in WBXML and CDATA in XML
2976	pcdataP->contentType=SML_PCDATA_OPAQUE;
2977	}
2978	else {
2979	// non-critical string
2980	#ifdef SML_STRINGS_AS_OPAQUE
2981	pcdataP->contentType=SML_PCDATA_OPAQUE;
2982	#else
2983	pcdataP->contentType=SML_PCDATA_STRING;
2984	#endif
2985	}
2986	pcdataP->extension=SML_EXT_UNDEFINED;
2987	// - allocate data space (ALWAYS with room for a terminator, even if Opaque or empty string)
2988	pcdataP->content=smlLibMalloc(pcdataP->length+1); // +1 for terminator, see below
2989	// copy data (if any)
2990	if (pcdataP->length>0) {
2991	// - copy string
2992	smlLibMemcpy(pcdataP->content,aString,pcdataP->length);
2993	}
2994	// set terminator
2995	((char *)(pcdataP->content))[pcdataP->length]=0; // terminate C string
2996	// return
2997	return pcdataP;
2998	} // newPCDataStringX
2999
3000
3001	// create new string-type PCData from C++ string
3002	SmlPcdataPtr_t newPCDataString(
3003	const string &aString
3004	)
3005	{
3006	return newPCDataString(aString.c_str(),aString.length());
3007	} // newPCDataString(string&)
3008
3009
3010	// create new decimal string representation of sInt32 as PCData
3011	SmlPcdataPtr_t newPCDataLong(
3012	sInt32 aLong
3013	)
3014	{
3015	const int ssiz=20;
3016	char s[ssiz];
3017
3018	snprintf(s,ssiz,"%ld",(long)aLong);
3019	return newPCDataString(s);
3020	} // newPCDataLong
3021
3022
3023	// Nonce generator allowing last-session nonce to be correctly re-generated in next session
3024	void generateNonce(string &aNonce, const char *aDevStaticString, sInt32 aSessionStaticID)
3025	{
3026	md5::SYSYNC_MD5_CTX context;
3027	uInt8 digest[16];
3028	md5::Init (&context);
3029	// - add in static device string
3030	md5::Update (&context, (const uInt8 *)aDevStaticString, strlen(aDevStaticString));
3031	// - add in session static ID in binary format
3032	md5::Update (&context, (const uInt8 *)&aSessionStaticID, sizeof(sInt32));
3033	// - done
3034	md5::Final (digest, &context);
3035	// - make string of first 48 bit of MD5: 48 bits, use 6 bits per char = 8 chars
3036	uInt64 dig48 = ((uInt32)digest[0] << 0) \|
3037	((uInt32)digest[1] << 8) \|
3038	((uInt32)digest[2] << 16) \|
3039	((uInt32)digest[3] << 24);
3040	aNonce.erase();
3041	for (sInt16 k=0; k<8; k++) {
3042	aNonce+=((dig48 & 0x03F) + 0x21);
3043	dig48 = dig48 >> 6;
3044	}
3045	} // generateNonce
3046
3047
3048	// create challenge of requested type
3049	SmlChalPtr_t newChallenge(TAuthTypes aAuthType, const string &aNextNonce, bool aBinaryAllowed)
3050	{
3051	SmlChalPtr_t chalP=NULL__null;
3052	SmlMetInfMetInfPtr_t metaP;
3053
3054	if (aAuthType!=auth_none) {
3055	// new challenge record
3056	chalP = SML_NEW(SmlChal_t)((SmlChal_t*) _smlMalloc(sizeof(SmlChal_t)));
3057	// add empty meta
3058	chalP->meta=newMeta();
3059	metaP=(SmlMetInfMetInfPtr_t)(chalP->meta->content);
3060	// add type and format
3061	// - type
3062	metaP->type=newPCDataString(authTypeSyncMLNames[aAuthType]);
3063	// - format
3064	const char *fmt = NULL__null;
3065	switch (aAuthType) {
3066	case auth_basic:
3067	// always request b64
3068	fmt=encodingFmtSyncMLNames[fmt_b64];
3069	break;
3070	case auth_md5:
3071	// request b64 only for non-binary capable encoding (that is, XML)
3072	/* %%% dont do that, Nokia9210 miserably fails when we do that,
3073	* it sends its data B64 encoded, but obviously with bad
3074	* data in it. Ericsson T39m seems to do it correctly however.
3075	if (!aBinaryAllowed)
3076	fmt=encodingFmtSyncMLNames[fmt_b64];
3077	*/
3078	// always request b64 for now, seems to be safer with not fully compatible clients
3079	fmt=encodingFmtSyncMLNames[fmt_b64];
3080	break;
3081	default: break;
3082	}
3083	metaP->format=newPCDataOptString(fmt); // set format, but not empty
3084	// - add nonce if needed
3085	if (aAuthType==auth_md5) {
3086	// MD5 also might need nonce
3087	if (!aNextNonce.empty()) {
3088	// add base64 encoded nonce string
3089	uInt32 b64len;
3090	char b64=b64::encode((const uInt8 )aNextNonce.c_str(),aNextNonce.size(),&b64len);
3091	metaP->nextnonce=newPCDataString(b64,b64len);
3092	b64::free(b64); // return buffer allocated by b64_encode
3093	}
3094	}
3095	}
3096	return chalP;
3097	} // newChallenge
3098
3099
3100	// create new property or param descriptor for CTCap
3101	SmlDevInfCTDataPtr_t newDevInfCTData(cAppCharP aName,uInt32 aSize, bool aNoTruncate, uInt32 aMaxOccur, cAppCharP aDataType)
3102	{
3103	SmlDevInfCTDataPtr_t result = SML_NEW(SmlDevInfCTData_t)((SmlDevInfCTData_t*) _smlMalloc(sizeof(SmlDevInfCTData_t)));
3104	// fill descriptor
3105	// - name if property or param
3106	result->name=newPCDataString(aName);
3107	// - no display name so far
3108	result->dname=NULL__null; // no display name
3109	// - datatype (optional)
3110	result->datatype=newPCDataOptString(aDataType);
3111	// - max size
3112	if (aSize==0)
3113	result->maxsize=NULL__null; // no size
3114	else
3115	result->maxsize=newPCDataLong(aSize); // set size
3116	// - no valenum here, will be added later if any
3117	result->valenum=NULL__null; // no valenum
3118	// SyncML 1.2
3119	if (aMaxOccur==0)
3120	result->maxoccur=NULL__null; // no maxoccur
3121	else
3122	result->maxoccur=newPCDataLong(aMaxOccur); // set maxoccur
3123	result->flags = aNoTruncate ? SmlDevInfNoTruncate_f0x0020 : 0; // notruncate flag or none
3124	return result;
3125	} // newDevInfCTData
3126
3127
3128	// frees prototype element and sets calling pointer to NULL
3129	void FreeProtoElement(void * &aVoidP)
3130	{
3131	if (aVoidP) smlFreeProtoElement(aVoidP);
3132	aVoidP=NULL__null;
3133	} // FreeProtoElement
3134
3135	} // namespace sysync
3136
3137	// eof