Bug Summary

File:libsynthesis/src/sysync_SDK/Sources/sysync_utils.cpp
Warning:line 879, column 28
Value stored to 'c' is never read

Annotated Source Code

1/*
2 * File: sysync_utils.cpp
3 *
4 * Author: Lukas Zeller (luz@plan44.ch)
5 *
6 * Provides some helper functions interfacing between SyncML Toolkit
7 * and C++
8 *
9 * Copyright (c) 2001-2011 by Synthesis AG + plan44.ch
10 *
11 * 2001-05-16 : luz : created
12 *
13 */
14
15#include "prefix_file.h"
16#include "sync_include.h"
17#include "sysync_utils.h"
18
19#include "libmem.h"
20
21
22#ifdef SYSYNC_TOOL
23 #include "syncappbase.h" // for CONSOLEPRINTF
24 #include "customimplagent.h" // for DBCharSetNames
25#endif
26
27namespace sysync {
28
29// Support for SySync Diagnostic Tool
30#ifdef SYSYNC_TOOL
31
32// parse RFC 2822 addr spec
33int parse2822AddrSpec(int argc, const char *argv[])
34{
35 if (argc<0) {
36 // help requested
37 CONSOLEPRINTF((" addrparse <RFC2822 addr-spec string to parse>"))SySync_ConsolePrintf(stderr, "SYSYNC " " addrparse <RFC2822 addr-spec string to parse>"
"\n")
;
38 CONSOLEPRINTF((" Parse name and email address out of a RFC2822-type addr-spec"))SySync_ConsolePrintf(stderr, "SYSYNC " " Parse name and email address out of a RFC2822-type addr-spec"
"\n")
;
39 return EXIT_SUCCESS0;
40 }
41 // check for argument
42 if (argc<1) {
43 CONSOLEPRINTF(("1 argument required"))SySync_ConsolePrintf(stderr, "SYSYNC " "1 argument required" "\n"
)
;
44 return EXIT_FAILURE1;
45 }
46 // parse
47 string addrname,addremail;
48 const char* p=argv[0];
49 p=parseRFC2822AddrSpec(p,addrname,addremail);
50 // show
51 CONSOLEPRINTF(("Input : %s",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %s" "\n"
,argv[0])
;
52 CONSOLEPRINTF(("Name : %s",addrname.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Name : %s" "\n"
,addrname.c_str())
;
53 CONSOLEPRINTF(("email : %s",addremail.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "email : %s" "\n"
,addremail.c_str())
;
54 CONSOLEPRINTF(("unparsed rest : %s",p))SySync_ConsolePrintf(stderr, "SYSYNC " "unparsed rest : %s" "\n"
,p)
;
55 return EXIT_SUCCESS0;
56} // parse2822AddrSpec
57
58
59// convert between character sets
60int charConv(int argc, const char *argv[])
61{
62 if (argc<0) {
63 // help requested
64 CONSOLEPRINTF((" charconv [<input charset>] <output charset> <C-string to convert>"))SySync_ConsolePrintf(stderr, "SYSYNC " " charconv [<input charset>] <output charset> <C-string to convert>"
"\n")
;
65 CONSOLEPRINTF((" Convert from one charset to another. Default input is UTF-8"))SySync_ConsolePrintf(stderr, "SYSYNC " " Convert from one charset to another. Default input is UTF-8"
"\n")
;
66 return EXIT_SUCCESS0;
67 }
68
69 #ifdef __TEST_EQUALITY_OF_CP936_WITH_GB2312__
70 // quick test
71 uInt32 ch_in;
72 for (ch_in=0x8100; ch_in<=0xFFFF; ch_in++) {
73 // convert into internal UTF-8
74 string s_internal,s_in;
75 s_in.erase();
76 if (ch_in>=0x8100) s_in+=(ch_in >> 8) & 0xFF;
77 s_in+=(ch_in & 0xFF);
78 s_internal.erase();
79 appendStringAsUTF8(
80 s_in.c_str(),
81 s_internal,
82 chs_gb2312
83 );
84 // convert into output format
85 string s_out;
86 s_out.erase();
87 appendUTF8ToString(
88 s_internal.c_str(),
89 s_out,
90 chs_cp936
91 );
92 // show differences
93 if (s_in!=s_out && s_out.size()>0 && s_out[0]!=INCONVERTIBLE_PLACEHOLDER'_') {
94 string s1,s2;
95 s1.erase(); StrToCStrAppend(s_in.c_str(), s1);
96 s2.erase(); StrToCStrAppend(s_out.c_str(), s2);
97 CONSOLEPRINTF(("\"%s\" != \"%s\"",s1.c_str(),s2.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "\"%s\" != \"%s\"" "\n"
,s1.c_str(),s2.c_str())
;
98 }
99 }
100 return EXIT_SUCCESS0;
101 #endif
102
103 // check for argument
104 if (argc<2) {
105 CONSOLEPRINTF(("2 or 3 arguments required"))SySync_ConsolePrintf(stderr, "SYSYNC " "2 or 3 arguments required"
"\n")
;
106 return EXIT_FAILURE1;
107 }
108 int ochsarg=1;
109 sInt16 enu;
110 // get input charset
111 TCharSets charset_in=chs_utf8;
112 if (argc==3) {
113 // first arg is input charset
114 if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[0])) {
115 CONSOLEPRINTF(("'%s' is not a valid input charset name",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid input charset name"
"\n",argv[0])
;
116 return EXIT_FAILURE1;
117 }
118 charset_in = (TCharSets)enu;
119 }
120 else {
121 ochsarg=0; // first arg ist input charset
122 }
123 // get output charset
124 TCharSets charset_out;
125 if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[ochsarg])) {
126 CONSOLEPRINTF(("'%s' is not a valid output charset name",argv[ochsarg]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid output charset name"
"\n",argv[ochsarg])
;
127 return EXIT_FAILURE1;
128 }
129 charset_out = (TCharSets)enu;
130 // get string to convert
131 string s_in;
132 s_in.erase();
133 CStrToStrAppend(argv[ochsarg+1], s_in);
134 // convert into internal UTF-8
135 string s_internal;
136 s_internal.erase();
137 appendStringAsUTF8(
138 s_in.c_str(),
139 s_internal,
140 charset_in
141 );
142 // convert into output format
143 string s_out;
144 s_out.erase();
145 appendUTF8ToString(
146 s_internal.c_str(),
147 s_out,
148 charset_out
149 );
150 // show all three
151 string show;
152 // - input
153 show.erase(); StrToCStrAppend(s_in.c_str(), show);
154 CONSOLEPRINTF(("Input : %-20s = \"%s\"",DBCharSetNames[charset_in], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %-20s = \"%s\""
"\n",DBCharSetNames[charset_in], show.c_str())
;
155 // - internal UTF8
156 show.erase(); StrToCStrAppend(s_internal.c_str(), show);
157 CONSOLEPRINTF(("Internal : %-20s = \"%s\"",DBCharSetNames[chs_utf8], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Internal : %-20s = \"%s\""
"\n",DBCharSetNames[chs_utf8], show.c_str())
;
158 // - output
159 show.erase(); StrToCStrAppend(s_out.c_str(), show);
160 CONSOLEPRINTF(("Output : %-20s = \"%s\"",DBCharSetNames[charset_out], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Output : %-20s = \"%s\""
"\n",DBCharSetNames[charset_out], show.c_str())
;
161 return EXIT_SUCCESS0;
162} // charConv
163
164#endif // SYSYNC_TOOL
165
166
167// conversion table from ANSI 0x80..0x9F to UCS4
168const uInt32 Ansi_80_to_9F_to_UCS4[0x20] = {
169 0x20AC, 0 ,0x201A,0x0192, 0x201E,0x2026,0x2020,0x2021, // 0x80..0x87
170 0x02C6,0x2030,0x0160,0x2039, 0x0152, 0 ,0x017D, 0 , // 0x88..0x8F
171 0 ,0x2018,0x2019,0x201C, 0x201D,0x2022,0x2013,0x2014, // 0x90..0x97
172 0x02DC,0x2122,0x0161,0x203A, 0x0153, 0 ,0x017E,0x0178 // 0x98..0x9F
173};
174
175// line end mode names
176const char * const lineEndModeNames[numLineEndModes] = {
177 "none", // none specified
178 "unix", // 0x0A
179 "mac", // 0x0D
180 "dos", // 0x0D 0x0A
181 "cstr", // as in C strings, '\n' which is 0x0A normally (but might be 0x0D on some platforms)
182 "filemaker" // 0x0B (filemaker tab-separated text format, CR is shown as 0x0B within fields
183};
184
185
186
187// literal quoting mode names
188const char * const quotingModeNames[numQuotingModes] = {
189 "none", // none specified
190 "singlequote", // single quote must be duplicated
191 "doublequote", // double quote must be duplicated
192 "backslash" // C-string-style escapes of CR,LF,TAB,BS,\," and ' (but no full c-string escape with \xXX etc.)
193};
194
195
196// Encoding format names for SyncML
197const char * const encodingFmtSyncMLNames[numFmtTypes] = {
198 "chr", // plain chars
199 "bin", // binary
200 "b64" // base 64 encoding
201};
202// Encoding format names for user
203const char * const encodingFmtNames[numFmtTypes] = {
204 "plain-text", // no encoding (plain text)
205 "binary", // plain binary (in WBXML only)
206 "base64" // base 64 encoding
207};
208
209
210// field (property) data type names
211const char * const propDataTypeNames[numPropDataTypes] = {
212 "chr", // Character
213 "int", // Integer
214 "bool", // Boolean
215 "bin", // Binary
216 "datetime", // Date and time of day
217 "phonenum", // Phone number
218 "text", // plain text
219 "???" // unknown
220};
221
222
223// Auth type names
224const char * const authTypeSyncMLNames[numAuthTypes] = {
225 NULL__null, // no authorisation
226 "syncml:auth-basic", // basic (B64 encoded user pw string)
227 "syncml:auth-md5" // Md5 encoded user:pw:nonce
228};
229
230
231// MIME encoding types
232const char * const MIMEEncodingNames[numMIMEencodings] = {
233 "",
234 "7BIT",
235 "8BIT",
236 "BINARY",
237 "QUOTED-PRINTABLE",
238 "BASE64",
239 "B"
240};
241
242// Charset names for MIME based strings
243const char * const MIMECharSetNames[numCharSets] = {
244 "unknown",
245 "US-ASCII",
246 "ANSI",
247 "ISO-8859-1",
248 "UTF-8",
249 "UTF-16",
250 #ifdef CHINESE_SUPPORT
251 "GB2312",
252 "CP936",
253 #endif
254};
255
256
257#ifdef SYSYNC_ENGINE1
258// generate RFC2822-style address specificiation
259// - Common Name will be quoted
260// - recipient will be put in angle brackets
261void makeRFC2822AddrSpec(
262 cAppCharP aCommonName,
263 cAppCharP aRecipient,
264 string &aRFCAddr
265)
266{
267 if (aCommonName && *aCommonName) {
268 aRFCAddr='"';
269 while (*aCommonName) {
270 if (*aCommonName=='"') aRFCAddr += "\\\"";
271 else aRFCAddr += *aCommonName;
272 aCommonName++;
273 }
274 aRFCAddr+="\" <";
275 aRFCAddr+=aRecipient;
276 aRFCAddr+=">";
277 }
278 else {
279 // plain email address
280 aRFCAddr=aRecipient;
281 }
282} // makeRFC2822AddrSpec
283
284
285
286
287// sysytool -f syncserv_odbc.xml addrparse "(Lukas Peter) luz@synthesis.ch (Zeller), gaga"
288
289// Parse RFC2822-style address specificiation
290// - aName will receive name and all (possible) comments
291// - aRecipient will receive the (first, in case of a group) email address
292cAppCharP parseRFC2822AddrSpec(
293 cAppCharP aText,
294 string &aName,
295 string &aRecipient
296)
297{
298 const char *p;
299 char c;
300
301 enum {
302 pstate_sepspace,
303 pstate_trailing,
304 pstate_text,
305 pstate_comment,
306 pstate_quoted,
307 pstate_email
308 } pstate = pstate_trailing;
309 string text,groupname;
310 bool textcouldbeemail=true;
311 bool atfound=false;
312 aName.erase();
313 aRecipient.erase();
314 p=aText;
315 do {
316 c=*p;
317 // check end of input
318 if (c==0) break; // done with the string
319 // advance to next char
320 p++;
321 // check according to state
322 switch (pstate) {
323 case pstate_sepspace:
324 if (c==' ') {
325 aName+=c;
326 }
327 pstate=pstate_trailing;
328 // otherwise treat like trailing
329 case pstate_trailing:
330 textcouldbeemail=aRecipient.empty();
331 atfound=false;
332 // skip trailing WSP first
333 if (c==' ' || c=='\t' || c=='\n' || c=='\r') break; // simply ignore WSP in trailing mode
334 else pstate=pstate_text;
335 // fall trough to do text analysis
336 case pstate_text:
337 // now check specials
338 if (c==',') { c=0; break; } // end of address, cause exit from loop, next will start after comma
339 else if (c==';') { c=0; break; } // end of group address list, treat it like single address
340 else if (c=='@' && textcouldbeemail) atfound=true; // flag presence of @
341 // check if text could still be a email address by itself
342 if (textcouldbeemail && !isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') {
343 textcouldbeemail=false;
344 if (atfound) {
345 aRecipient=text;
346 text.erase();
347 }
348 atfound=false;
349 }
350 // now check other specials
351 if (c=='"') { pstate=pstate_quoted; } // start of quoted string
352 else if (c=='(') { pstate=pstate_comment; } // start of comment
353 else if (c=='<') { aRecipient.erase(); pstate=pstate_email; } // start of angle-addr, overrides other recipient texts
354 else if (c==':') {
355 groupname=aRecipient; // what we've probably parsed as recipient
356 groupname+=aName; // plus name so far
357 groupname+=text; // plus additional text
358 text.erase();
359 aName.erase();
360 aRecipient.erase();
361 pstate=pstate_trailing;
362 } // flag presence of a group name (which can be used as name if addr itself does not have one)
363 else {
364 // add other text chars to the text
365 text += c;
366 }
367 break;
368 case pstate_quoted:
369 if (c=='\\') {
370 if (*p) c=*p++; else break; // get next char (if any) and add to result untested
371 }
372 else if (c=='"') {
373 // end of quoted string
374 pstate=pstate_sepspace;
375 aName+=text;
376 text.erase();
377 break;
378 }
379 // add to text
380 text += c;
381 break;
382 case pstate_comment:
383 if (c==')') {
384 // end of comment
385 aName+=text;
386 text.erase();
387 pstate=pstate_sepspace;
388 break;
389 }
390 // add to text
391 text += c;
392 break;
393 case pstate_email:
394 if (!isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') {
395 // any non-email char terminates email, not only '>', but only '>' is swallowed
396 if (c!='>') p--; // re-evaluate char in next state
397 pstate=pstate_sepspace;
398 break;
399 }
400 // add to email
401 aRecipient += c;
402 break;
403 } // switch
404 } while (c!=0);
405 // handle case of pure email address without name and without < > brackets or :
406 if (aRecipient.empty() && textcouldbeemail && atfound)
407 aRecipient = text;
408 else
409 aName += text;
410 // if name is (now) empty, but we have a group name, use the group name
411 if (aName.empty()) aName=groupname;
412 // remove trailing spaces in aName
413 string::size_type n=aName.find_last_not_of(' ');
414 if (n!=string::npos) aName.resize(n+1);
415 // return where to continue parsing for next addr-spec (if not end of string)
416 return p;
417} // parseRFC2822AddrSpec
418
419
420
421// append internal UTF8 string as RFC2047 style encoding
422const char *appendUTF8AsRFC2047(
423 const char *aText,
424 string &aString
425)
426{
427 const char *p,*q,*r;
428 char c;
429
430 p=aText;
431 do {
432 q=p; // remember start
433 // find chars until next char that must be stored as encoded word
434 do {
435 c=*p;
436 if (c==0 || (c & 0x80) || (c=='=' && *(p+1)=='?')) break;
437 p++;
438 } while(true);
439 // copy chars outside encoded word directly
440 if (p-q>0) aString.append(q,p-q);
441 // check if end of string
442 if (c==0) break;
443 // pack some chars into encoded word
444 // - start word
445 aString.append("=?utf-8?B?"); // 10 chars start (+ 2 chars will be added at end)
446 // - encoded data must be 75-12=63 chars or less
447 // Using B (=b64) encoding, output of 63 chars = 63/4*3 = max 47 chars.
448 // We use 45 max, as this is evenly divisible by 3 and output is 60 chars
449 q=p;
450 while (true) {
451 // find next space
452 while (*q && !isspace(*q) && q-p<45) q++;
453 if (q-p>=45) break; // abort if exhausted already
454 // find next non-space
455 r=q;
456 while (isspace(*r)) r++;
457 // check if next non-space will start a new word
458 if (*r & 0x80) {
459 // we should include the next word as well, if possible without exceeding size
460 if (r-p<45) {
461 q=r;
462 continue;
463 }
464 }
465 break;
466 }
467 // encode binary stream and append to string
468 appendEncoded((const uInt8 *)p,q-p,aString,enc_b);
469 p=q;
470 // - end word
471 aString.append("?=");
472 } while (true);
473 return p;
474} // appendUTF8AsRFC2047
475
476
477// parse character string from RFC2047 style encoding to UTF8 internal string
478const char *appendRFC2047AsUTF8(
479 const char *aRFC2047,
480 stringSize aSize,
481 string &aString,
482 TLineEndModes aLEM
483)
484{
485 const char *p,*q,*r,*w;
486 char c = 0;
487 const char *eot = aRFC2047+aSize;
488
489 p=aRFC2047;
490 w=NULL__null; // start of last detected word (to avoid re-scanning)
491 while (p<eot) {
492 q=p; // remember start
493 // find chars until next encoded word
494 while (p<eot) {
495 c=*p;
496 if (c==0 || (p!=w && c=='=' && *(p+1)=='?')) break;
497 p++;
498 }
499 // copy chars outside encoded word directly
500 aString.append(q,p-q);
501 // check if end of string
502 if (p>=eot || c==0) break;
503 // try to parse encoded word
504 q=p+2;
505 scanword:
506 // q is now where we start to parse word contents
507 // p is where we would re-start reading normally if current word turns out not to be a word at all
508 // - remember start of word scan (to avoid re-scanning it)
509 w=p;
510 // - get charset
511 r=q;
512 while (q<eot && *q!='?' && isgraph(*q)) q++;
513 if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally
514 sInt16 en;
515 TCharSets charset=chs_unknown;
516 if (StrToEnum(MIMECharSetNames, numCharSets, en, r, q-r)) charset=(TCharSets)en;
517 // - get encoding
518 r=++q; // continue after ? separator
519 while (q<eot && *q!='?' && isgraph(*q)) q++;
520 if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally
521 TEncodingTypes encoding=enc_8bit;
522 if (StrToEnum(MIMEEncodingNames, numMIMEencodings, en, r, q-r)) encoding=(TEncodingTypes)en;
523 // - get data part
524 r=++q;
525 while (q+1<eot && *q && *q!=' ' && !(*q=='?' && *(q+1)=='=')) q++;
526 if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally
527 // - decode
528 string decoded;
529 appendDecoded(r,q-r,decoded,encoding);
530 // - convert to UTF-8
531 appendStringAsUTF8(
532 decoded.c_str(),
533 aString,
534 charset,
535 aLEM
536 );
537 // - skip word terminator
538 p=q+2;
539 // - check for special case of adjacent words
540 q=p;
541 while (q<eot && isspace(*q)) q++;
542 if (q+1<eot && q>p && *q=='=' && *(q+1)=='?') {
543 // adjacent encoded words, only separated by space -> ignore space
544 // p is after previous word
545 q+=2;
546 // q is after lead-in of next word
547 goto scanword;
548 }
549 // p is where we continue reading
550 }
551 return p;
552} // appendRFC2047AsUTF8
553
554
555// decode encoded data and append to string
556const char *appendDecoded(
557 const char *aText,
558 size_t aSize,
559 string &aBinString,
560 TEncodingTypes aEncoding
561)
562{
563 char c;
564 const char *p=aText;
565 uInt32 binsz;
566 uInt8 *binP;
567
568 switch (aEncoding) {
569 case enc_quoted_printable :
570 // decode quoted-printable content
571 while ((c=*p++)) {
572 // char found
573 if (c=='=') {
574 uInt16 code;
575 char hex[2];
576 // check for soft break first
577 if (*p=='\x0D' || *p=='\x0A') {
578 // soft break, swallow
579 if (*p=='\x0D') p++;
580 if (*p=='\x0A') p++;
581 continue;
582 }
583 // decode
584 hex[0]=*p;
585 if (*p) {
586 p++;
587 hex[1]=*p;
588 if (*p) {
589 p++;
590 if (HexStrToUShort(hex,code,2)==2) {
591 c=code; // decoded char
592 }
593 else continue; // simply ignore
594 }
595 else break;
596 }
597 else break;
598 }
599 // append char
600 aBinString+=c;
601 }
602 aText=p;
603 break;
604 case enc_base64:
605 case enc_b:
606 // decode base 64
607 binsz=0;
608 binP = b64::decode(aText, aSize, &binsz);
609 aBinString.append((const char *)binP,binsz);
610 b64::free(binP);
611 aText+=aSize;
612 break;
613 case enc_7bit:
614 case enc_8bit:
615 // copy no more than size
616 if (aSize>0) aBinString.reserve(aBinString.size()+aSize);
617 while (*p && aSize>0) {
618 aBinString+=*p++;
619 aSize--;
620 }
621 aText=p;
622 break;
623 case enc_none:
624 case enc_binary:
625 // copy bytes
626 aBinString.append(aText,aSize);
627 aText+=aSize;
628 break;
629 case numMIMEencodings:
630 // invalid
631 break;
632 } // quoted printable
633 return aText;
634} // appendDecoded
635
636
637
638// encode binary stream and append to string
639void appendEncoded(
640 const uInt8 *aBinary,
641 size_t aSize,
642 string &aString,
643 TEncodingTypes aEncoding,
644 sInt16 aMaxLineSize,
645 sInt32 aCurrLineSize,
646 bool aSoftBreaksAsCR,
647 bool aEncodeBinary
648)
649{
650 char c;
651 string::size_type linestart;
652 const uInt8 *p;
653 bool softbreak;
654 uInt32 b64len;
655 char *b64;
656 bool processed;
657
658 switch (aEncoding) {
659 case enc_binary :
660 case enc_none :
661 case enc_8bit :
662 case enc_7bit : // assume we have no 8bit chars
663 // just copy 1:1
664 aString.append((const char *)aBinary,aSize);
665 break;
666 case enc_quoted_printable:
667 // quote-printable encoding
668 // - determine start of last line in aString
669 // Note: this is because property text will be folded when lines aMaxLineSize
670 linestart=aString.size()-aCurrLineSize;
671 for (p=aBinary;p<aBinary+aSize;p++) { // '\0' will not terminate the 'for' loop
672 c=*p;
673 if (!aEncodeBinary && !c) break; // still exit at NUL when not encoding real binary data
674 processed=false; // input data in c is not yet processed
675 // make sure we do not go over the limit (if one is set)
676 // - if less than 8 chars (=0D=0A + =\r) are free, soft break the line
677 softbreak= aMaxLineSize && (aString.size()-linestart>=string::size_type(aMaxLineSize)-8);
678 if (!aEncodeBinary) {
679 if (c=='\r') continue; // ignore them
680 if (c=='\b') continue; // ignore them (optional break indicators, not relevant for QP output)
681 if (c=='\n') { // - encode line ends
682 aString.append("=0D=0A"); // special string for Line Ends (CR LF)
683 processed = true; // c is processed now
684 softbreak = true;
685 } // if
686 } // if
687 // - handle soft line break (but only if really doing line breaking)
688 // Also: avoid adding a soft break at the very end of the string
689 if (softbreak && aMaxLineSize && p+1<aBinary+aSize) {
690 if (aSoftBreaksAsCR)
691 aString.append("=\r"); // '\r' signals softbreak for finalizeproperty()
692 else
693 aString.append("=\x0D\x0A"); // break line here
694 // new line starts after softbreak
695 linestart=aString.size();
696 // make sure soft line break is not followed by unencoded space
697 // (which would look like MIME folding)
698 if (c==' ' || (processed && p[1]==' ')) {
699 aString.append("=20");
700 if (processed) p++; // if current char was already processed, we need to explicitly skip the space
701 processed=true; // char is now processed in any case
702 } // if
703 } // if
704 // now encode the char in c if not already processed by now
705 if (!processed) {
706 bool encodeIt=
707 (c=='=') // escape equal sign itself
708 || (c=='<' && aEncodeBinary) // avoid XML mismatch problems
709 || (uInt8)c>0x7F
710 || (uInt8)c<0x20; // '\0' will be encoded as well
711 if (encodeIt) { // encode all non ASCII chars > 0x7F (and control chars as well)
712 aString+="=";
713 aString+=NibbleToHexDigit(c>>4);
714 aString+=NibbleToHexDigit(c);
715 }
716 else
717 aString+=c; // just copy
718 } // if
719 }
720 break;
721 case enc_base64:
722 case enc_b:
723 // use base64 encoding
724 if (aSize>0) {
725 // don't call b64 with size=0!
726 b64 = b64::encode(
727 aBinary,aSize, // what to encode
728 &b64len, // output size
729 aMaxLineSize, // max line size
730 aSoftBreaksAsCR
731 );
732 // append to output, if any
733 if (b64) {
734 aString.append(b64,b64len);
735 // release buffer
736 b64::free(b64);
737 }
738 if (aEncoding!=enc_b) {
739 // make sure it ends with a newline for "base64" (but NOT for "b" as used in RFC2047)
740 // Note: when used in vCard2.1, that newline is part of the property and show as an
741 // empty line in the vCard.
742 aString += aSoftBreaksAsCR ? "\r" : "\x0D\x0A";
743 }
744 }
745 break;
746 default:
747 // do nothing
748 break;
749 } // switch
750} // appendEncoded
751
752
753#ifdef CHINESE_SUPPORT
754// the flatBinTree tables for converting to and from GB2312
755#include "gb2312_tables_inc.cpp"
756// the flatBinTree tables for converting to and from CP936
757#include "cp936_tables_inc.cpp"
758#endif
759
760
761// add char (possibly multi-byte) as UTF8 to value and apply charset translation if needed
762// - returns > 0 if aNumChars was not correct number of bytes needed to convert an entire character;
763// return value is number of bytes needed to generate one output character. If return value
764// is<>0, no char has been appended to aVal.
765uInt16 appendCharsAsUTF8(const char *aChars, string &aVal, TCharSets aCharSet, uInt16 aNumChars)
766{
767 uInt32 ucs4;
768 // first char
769 uInt8 c=*aChars;
770 // this is a 8-bit char
771 switch(aCharSet) {
772 case chs_utf8 :
773 // UTF8 is native charset of the application, simply add
774 aVal+=c;
775 break;
776 case chs_ansi :
777 case chs_iso_8859_1 :
778 // do poor man's conversion to UCS4
779 // - most ANSI chars are 1:1 mapped
780 ucs4 = ((uInt8)c & 0xFF);
781 // - except 0x80..0x9F, use table for these
782 if (ucs4>=0x80 && ucs4<=0x9F)
783 ucs4=Ansi_80_to_9F_to_UCS4[ucs4-0x80];
784 // - convert to UTF8
785 UCS4toUTF8(ucs4,aVal);
786 break;
787 #ifdef CHINESE_SUPPORT
788 case chs_gb2312 : // simplified Chinese GB-2312 charset
789 // all below 0x80 are passed as-is
790 if (c<0x80)
791 aVal+=c; // simply append
792 else {
793 // 16-bit GB2312 char
794 if (aNumChars!=2)
795 return 2; // we need 2 chars for a successful GB-2312
796 // we have 2 bytes, convert them
797 ucs4 = searchFlatBintree(gb2312_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_');
798 // - convert to UTF8
799 UCS4toUTF8(ucs4,aVal);
800 }
801 break;
802 case chs_cp936: // simplified chinese Windows codepage CP936
803 if (c<0x80)
804 aVal+=c; // simply append
805 else {
806 // 0x0080 (euro sign) or 2-byte CP936
807 if (c==0x80)
808 ucs4=searchFlatBintree(cp936_to_ucs2, 0x0080, INCONVERTIBLE_PLACEHOLDER'_');
809 else {
810 // 16-bit GB2312 char
811 if (aNumChars!=2)
812 return 2; // we need 2 chars for a successful CP936
813 // we have 2 bytes, convert them
814 ucs4 = searchFlatBintree(cp936_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_');
815 }
816 // - convert to UTF8
817 UCS4toUTF8(ucs4,aVal);
818 }
819 break;
820 #endif
821 case chs_ascii : // plain 7-bit ASCII
822 default : // unknown
823 // only 7-bit allowed
824 if (c & 0x80)
825 aVal+=INCONVERTIBLE_PLACEHOLDER'_';
826 else
827 aVal+=c;
828 break;
829 } // switch
830 return 0; // ok, converted aNumChars
831} // appendCharsAsUTF8
832
833
834
835
836// add string as UTF8 to value and apply charset translation if needed
837// - if lineEndMode is not lem_none, all sorts of line ends will be converted
838// to the specified mode.
839void appendStringAsUTF8(const char *s, string &aVal, TCharSets aCharSet, TLineEndModes aLEM, bool aAllowFilemakerCR)
840{
841 char c;
842 const char *start=s;
843 if (s) {
844 while ((c=*s++)!=0) {
845 if (aLEM!=lem_none) {
846 // line end handling enabled
847 if (c==0x0D) {
848 // could be mac (0x0D) or DOS (0x0D/0x0A)
849 if (*s==0x0A) {
850 // this is DOS-type line end
851 // - consume the 0x0A as well
852 s++;
853 // - check for 0x0D 0x0D 0x0A special case (caused by
854 // DOS-text-file conversion of non-DOS strings)
855 if (s>=start+3) {
856 if (*(s-3)==0x0D) {
857 // char before the DOS-CRLF was a 0x0D as well (and
858 // has already produced a newline in the output
859 // --> completely ignore this CRLF
860 continue;
861 }
862 }
863 }
864 // is a line end, convert it to platform-lineend
865 c='\n'; // platform
866 }
867 else if (c==0x0A) {
868 // 0x0A without preceeding 0x0D = unix
869 c='\n'; // platform
870 }
871 else if (c==0x0B && aAllowFilemakerCR) {
872 // 0x0B is used as lineend in filemaker export and achilformat
873 c='\n';
874 }
875 // line end converted to platform
876 if (c=='\n' && aLEM!=lem_cstr) {
877 // produce specified line end
878 switch (aLEM) {
879 case lem_mac : c=0x0D; break;
Value stored to 'c' is never read
880 case lem_unix : c=0x0A; break;
881 case lem_filemaker : c=0x0B; break;
882 case lem_dos :
883 c=0x0A; // LF will be added later
884 aVal+=0x0D; // add CR
885 break;
886 default: break;
887 }
888 }
889 } // line end handling enabled
890 // normal add
891 uInt16 i,seqlen=1; // assume logical char consists of single byte
892 do {
893 seqlen=appendCharsAsUTF8(s-seqlen,aVal,aCharSet,seqlen); // add char (possibly with UTF8 expansion) to aVal
894 if (seqlen<=1) break; // done
895 for (i=1;i<seqlen;i++) { if (*s==0) break; else s++; }
896 if (i<seqlen) break; // not enough bytes
897 } while(true);
898 }
899 }
900} // appendStringAsUTF8
901
902
903
904// same as appendUTF8ToString, but output string is cleared first
905bool storeUTF8ToString(
906 cAppCharP aUTF8, string &aVal,
907 TCharSets aCharSet,
908 TLineEndModes aLEM,
909 TQuotingModes aQuotingMode,
910 size_t aMaxBytes
911)
912{
913 aVal.erase();
914 return appendUTF8ToString(aUTF8,aVal,aCharSet,aLEM,aQuotingMode,aMaxBytes);
915} // storeUTF8ToString
916
917
918
919// helper for adding chars
920static void appendCharToString(
921 char c,
922 string &aVal,
923 TQuotingModes aQuotingMode
924) {
925 if (aQuotingMode==qm_none) {
926 aVal+=c;
927 }
928 else if (aQuotingMode==qm_backslash) {
929 // treat CR, LF, BS, TAB, single/doublequote and backslash specially
930 if (c==0x0D)
931 aVal+="\\r";
932 else if (c==0x0A)
933 aVal+="\\n";
934 else if (c==0x08)
935 aVal+="\\b";
936 else if (c==0x09)
937 aVal+="\\t";
938 else if (c=='"')
939 aVal+="\\\"";
940 else if (c=='\'')
941 aVal+="\\'";
942 else if (c=='\\')
943 aVal+="\\\\";
944 else
945 aVal+=c;
946 }
947 else if (aQuotingMode==qm_duplsingle) {
948 if (c=='\'') aVal+=c; // duplicate
949 aVal+=c; // normal append
950 }
951 else if (aQuotingMode==qm_dupldouble) {
952 if (c=='"') aVal+=c; // duplicate
953 aVal+=c; // normal append
954 }
955} // appendCharToString
956
957
958// add UTF8 string to value in custom charset
959// - if aLEM is not lem_none, occurrence of any type of Linefeeds
960// (LF,CR,CRLF and even CRCRLF) in input string will be
961// replaced by the specified line end type
962// - aQuotingMode specifies what quoting (for ODBC literals for example) should be used
963// - output is clipped after aMaxBytes bytes (if not 0)
964// - returns true if all input could be converted, false if output is clipped
965bool appendUTF8ToString(
966 cAppCharP aUTF8,
967 string &aVal,
968 TCharSets aCharSet,
969 TLineEndModes aLEM,
970 TQuotingModes aQuotingMode,
971 size_t aMaxBytes
972)
973{
974 uInt32 ucs4;
975 uInt8 c;
976 size_t n=0;
977 cAppCharP p=aUTF8;
978 cAppCharP start=aUTF8;
979
980 if (!aUTF8) return true; // nothing to copy, copied everything of that!
981 if (aCharSet==chs_utf8 && aLEM==lem_none && aQuotingMode==qm_none) {
982 // shortcut: simply append entire string
983 if (aMaxBytes==0)
984 aVal+=aUTF8;
985 else
986 aVal.append(aUTF8,aMaxBytes);
987 // advance "processed" pointer behind consumed part of string
988 p=aUTF8+aVal.size();
989 }
990 else {
991 // process char by char
992 while((c=*aUTF8)!=0 && (aMaxBytes==0 || n<aMaxBytes)) {
993 p=aUTF8;
994 // check for linefeed conversion
995 if (aLEM!=lem_none && (c==0x0D || c==0x0A)) {
996 aUTF8++;
997 // line end, handling enabled
998 if (c==0x0D) {
999 // could be mac (0x0D) or DOS (0x0D/0x0A)
1000 if (*aUTF8==0x0A) {
1001 // this is DOS-type line end
1002 // - consume the 0x0A as well
1003 aUTF8++;
1004 // - check for 0x0D 0x0D 0x0A special case (caused by
1005 // DOS-text-file conversion of non-DOS strings)
1006 if (aUTF8>=start+3) {
1007 if (*(aUTF8-3)==0x0D) {
1008 // char before the DOS-CRLF was a 0x0D as well (and
1009 // has already produced a newline in the output
1010 // --> completely ignore this CRLF
1011 continue;
1012 }
1013 }
1014 }
1015 // is a line end, convert it to platform-lineend
1016 c='\n'; // platform
1017 }
1018 else { // must be 0x0A
1019 // 0x0A without preceeding 0x0D = unix
1020 c='\n'; // platform
1021 }
1022 // line end converted to platform
1023 if (aLEM!=lem_cstr) {
1024 // produce specified line end
1025 switch (aLEM) {
1026 case lem_mac : c=0x0D; break;
1027 case lem_filemaker : c=0x0B; break;
1028 case lem_unix : c=0x0A; break;
1029 case lem_dos :
1030 c=0x0A; // LF will be added later
1031 n++; // count it extra
1032 if (aMaxBytes && n>=aMaxBytes)
1033 goto stringfull; // no room to complete it, ignore it
1034 appendCharToString(0x0D,aVal,aQuotingMode);
1035 break;
1036 default: break;
1037 }
1038 }
1039 appendCharToString(c,aVal,aQuotingMode);
1040 n++; // count it
1041 } // line end, handling enabled
1042 else {
1043 // non lineend (or lineend not handled specially)
1044 if (aCharSet==chs_utf8) {
1045 aUTF8++;
1046 // - simply add char
1047 appendCharToString(c,aVal,aQuotingMode);
1048 n++;
1049 }
1050 else {
1051 // - make UCS4
1052 p=aUTF8; // save previous position to detect if we have processed all
1053 aUTF8=UTF8toUCS4(aUTF8,ucs4);
1054 // now we have UCS4
1055 if (ucs4==0) {
1056 // UTF8 resulting in UCS4 null char is not allowed
1057 ucs4=INCONVERTIBLE_PLACEHOLDER'_';
1058 }
1059 else {
1060 // convert to specified charset
1061 switch (aCharSet) {
1062 case chs_ansi:
1063 case chs_iso_8859_1:
1064 if ((ucs4<=0xFF && ucs4>=0xA0) || ucs4<0x80)
1065 // 00..7F and A0..FF directly map to ANSI
1066 appendCharToString(ucs4,aVal,aQuotingMode);
1067 else {
1068 // search for matching ANSI in table
1069 uInt8 k;
1070 for (k=0; k<0x20; k++) {
1071 if (ucs4==Ansi_80_to_9F_to_UCS4[k]) {
1072 // found in table
1073 break;
1074 }
1075 }
1076 if (k<0x20)
1077 // conversion found
1078 aVal+=k+0x80;
1079 else
1080 // no conversion found in table
1081 aVal+=INCONVERTIBLE_PLACEHOLDER'_';
1082 } // not in 1:1 range 0..7F, A0..FF
1083 n++;
1084 break;
1085 #ifdef CHINESE_SUPPORT
1086 case chs_gb2312 : // simplified Chinese GB-2312 charset
1087 // all below 0x80 are passed as-is
1088 if (ucs4<0x80) {
1089 appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes
1090 n++;
1091 }
1092 else {
1093 // convert to 16-bit GB2312 char
1094 uInt16 gb = searchFlatBintree(ucs2_to_gb2312, ucs4, INCONVERTIBLE_PLACEHOLDER'_');
1095 // check if we have space
1096 if (aMaxBytes!=0 && n+2>aMaxBytes)
1097 goto stringfull;
1098 // append as two bytes to output string
1099 aVal+=gb >> 8;
1100 aVal+=gb & 0xFF;
1101 n+=2;
1102 }
1103 break;
1104 case chs_cp936 : // simplified Chinese CP936 windows codepage
1105 // all below 0x80 are passed as-is
1106 if (ucs4<0x80) {
1107 appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes
1108 n++;
1109 }
1110 else {
1111 // convert to CP936 16-bit representation
1112 uInt16 twobytes = searchFlatBintree(ucs2_to_cp936, ucs4, INCONVERTIBLE_PLACEHOLDER'_');
1113 // append as two bytes to output string, but only this is a CP936 two-byte at all
1114 if (twobytes>0x0080) {
1115 // check if we have space
1116 if (aMaxBytes!=0 && n+2>aMaxBytes)
1117 goto stringfull;
1118 aVal+=twobytes >> 8; // sub-page lead in
1119 n++;
1120 }
1121 aVal+=twobytes & 0xFF; // sub-page code
1122 n++;
1123 }
1124 break;
1125 #endif
1126 case chs_ascii:
1127 // explicit ASCII: convert some special chars to plain ASCII
1128 if ((ucs4 & 0xFFFFFF80) !=0) {
1129 // ASCIIfy table to convert umlauts etc. to nearest plain ASCII
1130 typedef struct {
1131 uInt32 ucs4;
1132 uInt8 ascii;
1133 } TASCIIfyEntry;
1134
1135 static const TASCIIfyEntry ASCIIfyTable[] = {
1136 { 0x000000C4, 'A' }, // Adieresis
1137 { 0x000000C5, 'A' }, // Aring
1138 { 0x000000C7, 'C' }, // Ccedilla
1139 { 0x000000C9, 'E' }, // Eacute
1140 { 0x000000D1, 'N' }, // Ntilde
1141 { 0x000000D6, 'O' }, // Odieresis
1142 { 0x000000DC, 'U' }, // Udieresis
1143 { 0x000000E1, 'a' }, // aacute
1144 { 0x000000E0, 'a' }, // agrave
1145 { 0x000000E2, 'a' }, // acircumflex
1146 { 0x000000E4, 'a' }, // adieresis
1147 { 0x000000E3, 'a' }, // atilde
1148 { 0x000000E5, 'a' }, // aring
1149 { 0x000000E7, 'c' }, // ccedilla
1150 { 0x000000E9, 'e' }, // eacute
1151 { 0x000000E8, 'e' }, // egrave
1152 { 0x000000EA, 'e' }, // ecircumflex
1153 { 0x000000EB, 'e' }, // edieresis
1154 { 0x000000ED, 'i' }, // iacute
1155 { 0x000000EC, 'i' }, // igrave
1156 { 0x000000EE, 'i' }, // icircumflex
1157 { 0x000000EF, 'i' }, // idieresis
1158 { 0x000000F1, 'n' }, // ntilde
1159 { 0x000000F3, 'o' }, // oacute
1160 { 0x000000F2, 'o' }, // ograve
1161 { 0x000000F4, 'o' }, // ocircumflex
1162 { 0x000000F6, 'o' }, // odieresis
1163 { 0x000000F5, 'o' }, // otilde
1164 { 0x000000FA, 'u' }, // uacute
1165 { 0x000000F9, 'u' }, // ugrave
1166 { 0x000000FB, 'u' }, // ucircumflex
1167 { 0x000000FC, 'u' }, // udieresis
1168 { 0x000000DF, 's' }, // germandoubles
1169 { 0x000000D8, 'O' }, // Oslash
1170 { 0x000000F8, 'o' }, // oslash
1171 { 0x000000C0, 'A' }, // Agrave
1172 { 0x000000C3, 'A' }, // Atilde
1173 { 0x000000D5, 'O' }, // Otilde
1174 { 0x00000152, 'O' }, // OE
1175 { 0x00000153, 'o' }, // oe
1176 { 0x000000C6, 'A' }, // AE
1177 { 0x000000E6, 'a' }, // ae
1178 { 0x000000C2, 'A' }, // Acircumflex
1179 { 0x000000CA, 'E' }, // Ecircumflex
1180 { 0x000000C1, 'A' }, // Aacute
1181 { 0x000000CB, 'E' }, // Edieresis
1182 { 0x000000C8, 'E' }, // Egrave
1183 { 0x000000CD, 'I' }, // Iacute
1184 { 0x000000CC, 'I' }, // Igrave
1185 { 0x000000CE, 'i' }, // Icircumflex
1186 { 0x000000CF, 'i' }, // Odieresis
1187 { 0x000000D3, 'O' }, // Oacute
1188 { 0x000000D2, 'O' }, // Ograve
1189 { 0x000000D4, 'O' }, // Ocircumflex
1190 // terminator
1191 { 0,0 }
1192 };
1193
1194 // search in ASCIIfy table
1195 uInt16 k=0;
1196 while (ASCIIfyTable[k].ucs4!=0) {
1197 if (ucs4==ASCIIfyTable[k].ucs4) {
1198 // found, fetch ASCII-equivalent
1199 ucs4=ASCIIfyTable[k].ascii;
1200 break; // use it
1201 }
1202 k++;
1203 }
1204 }
1205 // fall through to default, which does not know ANY non-ASCII
1206 default:
1207 // only 7 bit ASCII is allowed
1208 if ((ucs4 & 0xFFFFFF80) !=0)
1209 aVal+=INCONVERTIBLE_PLACEHOLDER'_';
1210 else
1211 appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes
1212 n++;
1213 break;
1214 } // switch
1215 } // valid UCS4
1216 } // not already UTF8
1217 } // if not lineend
1218 // processed until here
1219 p=aUTF8;
1220 } // while not end of input string
1221 } // not already UTF8
1222 // return true if input string completely consumed
1223stringfull:
1224 return (*p==0);
1225} // appendUTF8ToString
1226
1227
1228// convert UTF8 to UCS4
1229// - returns pointer to next char
1230// - returns UCS4=0 on error (no char, bad sequence, sequence not complete)
1231const char *UTF8toUCS4(const char *aUTF8, uInt32 &aUCS4)
1232{
1233 uInt8 c;
1234 sInt16 morechars;
1235
1236 if ((c=*aUTF8)!=0) {
1237 aUTF8++;
1238 // there is a char
1239 morechars=0;
1240 // decode UTF8 lead-in
1241 if ((c & 0x80) == 0) {
1242 // single byte
1243 aUCS4=c;
1244 morechars=0;
1245 }
1246 else if ((c & 0xE0) == 0xC0) {
1247 // two bytes
1248 aUCS4=c & 0x1F;
1249 morechars=1;
1250 }
1251 else if ((c & 0xF0) == 0xE0) {
1252 aUCS4=c & 0x0F;
1253 morechars=2;
1254 }
1255 else if ((c & 0xF8) == 0xF0) {
1256 aUCS4=c & 0x07;
1257 morechars=3;
1258 }
1259 else if ((c & 0xFC) == 0xF8) {
1260 aUCS4=c & 0x03;
1261 morechars=4;
1262 }
1263 else if ((c & 0xFE) == 0xFC) {
1264 aUCS4=c & 0x01;
1265 morechars=5;
1266 }
1267 else {
1268 // bad char
1269 aUCS4=0;
1270 }
1271 // process additional chars
1272 while(morechars--) {
1273 if ((c=*aUTF8)==0) {
1274 // unfinished sequence
1275 aUCS4=0;
1276 break;
1277 }
1278 aUTF8++;
1279 if ((c & 0xC0) != 0x80) {
1280 // bad additional char
1281 aUCS4=0;
1282 break;
1283 }
1284 // each additional char adds 6 new bits
1285 aUCS4 = aUCS4 << 6; // shift existing bits
1286 aUCS4 |= (c & 0x3F); // add new bits
1287 }
1288 }
1289 else {
1290 // no char
1291 aUCS4=0;
1292 }
1293 // return pointer to next char
1294 return aUTF8;
1295} // UTF8toUCS4
1296
1297
1298// convert UCS4 to UTF8 (0 char is not allowed and will be ignored!)
1299void UCS4toUTF8(uInt32 aUCS4, string &aUTF8)
1300{
1301 uInt8 c;
1302
1303 // ignore null char
1304 if (aUCS4==0) return;
1305 // create UTF8 lead-in
1306 sInt16 morechars=0;
1307 if (aUCS4<0x00000080) {
1308 // one byte
1309 c=aUCS4;
1310 }
1311 else if (aUCS4<0x00000800) {
1312 // two bytes
1313 c=0xC0 | ((aUCS4 >> 6) & 0x1F);
1314 morechars=1;
1315 }
1316 else if (aUCS4<0x00010000) {
1317 // three bytes
1318 c=0xE0 | ((aUCS4 >> 12) & 0x0F);
1319 morechars=2;
1320 }
1321 else if (aUCS4<0x00200000) {
1322 // four bytes
1323 c=0xF0 | ((aUCS4 >> 18) & 0x07);
1324 morechars=3;
1325 }
1326 else if (aUCS4<0x04000000) {
1327 // five bytes
1328 c=0xF8 | ((aUCS4 >> 24) & 0x03);
1329 morechars=4;
1330 }
1331 else {
1332 // six bytes
1333 c=0xFC | ((aUCS4 >> 30) & 0x01);
1334 morechars=5;
1335 }
1336 // add lead-in
1337 aUTF8+=c;
1338 // add rest of sequence
1339 while (morechars--) {
1340 c= 0x80 | ((aUCS4 >> (morechars * 6)) & 0x3F);
1341 aUTF8+=c;
1342 }
1343} // UCS4toUTF8
1344
1345
1346/* Encoding UTF-16 (excerpt from RFC 2781, paragraph 2.1)
1347
1348 Encoding of a single character from an ISO 10646 character value to
1349 UTF-16 proceeds as follows. Let U be the character number, no greater
1350 than 0x10FFFF.
1351
1352 1) If U < 0x10000, encode U as a 16-bit unsigned integer and
1353 terminate.
1354
1355 2) Let U' = U - 0x10000. Because U is less than or equal to 0x10FFFF,
1356 U' must be less than or equal to 0xFFFFF. That is, U' can be
1357 represented in 20 bits.
1358
1359 3) Initialize two 16-bit unsigned integers, W1 and W2, to 0xD800 and
1360 0xDC00, respectively. These integers each have 10 bits free to
1361 encode the character value, for a total of 20 bits.
1362
1363 4) Assign the 10 high-order bits of the 20-bit U' to the 10 low-order
1364 bits of W1 and the 10 low-order bits of U' to the 10 low-order
1365 bits of W2. Terminate.
1366
1367 Graphically, steps 2 through 4 look like:
1368 U' = yyyyyyyyyyxxxxxxxxxx
1369 W1 = 110110yyyyyyyyyy
1370 W2 = 110111xxxxxxxxxx
1371*/
1372
1373// convert UCS4 to UTF-16
1374// - returns 0 for UNICODE range UCS4 and first word of UTF-16 for non UNICODE
1375uInt16 UCS4toUTF16(uInt32 aUCS4, uInt16 &aUTF16)
1376{
1377 if (aUCS4<0x10000) {
1378 // in unicode range: single UNICODE char
1379 aUTF16=aUCS4;
1380 return 0; // no second char
1381 }
1382 else {
1383 // out of UNICODE range
1384 aUCS4-=0x10000;
1385 if (aUCS4>0xFFFF) {
1386 // inconvertible
1387 aUTF16=INCONVERTIBLE_PLACEHOLDER'_';
1388 return 0;
1389 }
1390 else {
1391 // convert to two-word UNICODE / UCS-2
1392 aUTF16=0xD800+(aUCS4>>10);
1393 return 0xDC00+(aUCS4 & 0x03FF);
1394 }
1395 }
1396} // UCS4toUTF16
1397
1398
1399
1400/* Decoding UTF-16
1401
1402 Decoding of a single character from UTF-16 to an ISO 10646 character
1403 value proceeds as follows. Let W1 be the next 16-bit integer in the
1404 sequence of integers representing the text. Let W2 be the (eventual)
1405 next integer following W1.
1406
1407 1) If W1 < 0xD800 or W1 > 0xDFFF, the character value U is the value
1408 of W1. Terminate.
1409
1410 2) Determine if W1 is between 0xD800 and 0xDBFF. If not, the sequence
1411 is in error and no valid character can be obtained using W1.
1412 Terminate.
1413
1414 3) If there is no W2 (that is, the sequence ends with W1), or if W2
1415 is not between 0xDC00 and 0xDFFF, the sequence is in error.
1416 Terminate.
1417
1418 4) Construct a 20-bit unsigned integer U', taking the 10 low-order
1419 bits of W1 as its 10 high-order bits and the 10 low-order bits of
1420 W2 as its 10 low-order bits.
1421
1422 5) Add 0x10000 to U' to obtain the character value U. Terminate.
1423
1424 Note that steps 2 and 3 indicate errors. Error recovery is not
1425 specified by this document. When terminating with an error in steps 2
1426 and 3, it may be wise to set U to the value of W1 to help the caller
1427 diagnose the error and not lose information. Also note that a string
1428 decoding algorithm, as opposed to the single-character decoding
1429 described above, need not terminate upon detection of an error, if
1430 proper error reporting and/or recovery is provided.
1431
1432*/
1433
1434// convert UTF-16 to UCS4
1435// - returns pointer to next char
1436// - returns UCS4=0 on error (no char, bad sequence, sequence not complete)
1437const uInt16 *UTF16toUCS4(const uInt16 *aUTF16P, uInt32 &aUCS4)
1438{
1439 uInt16 utf16=*aUTF16P++;
1440
1441 if (utf16<0xD800 || utf16>0xDFFF) {
1442 // single char unicode
1443 aUCS4=utf16;
1444 }
1445 else {
1446 // could be two-char
1447 if (utf16<=0xDBFF) {
1448 // valid first char: check second char
1449 uInt16 utf16_2 = *aUTF16P; // next
1450 if (utf16_2 && utf16_2>=0xDC00 && utf16_2<=0xDFFF) {
1451 // second char exists and is valid
1452 aUTF16P++; // advance now
1453 aUCS4 =
1454 ((utf16 & 0x3FF) << 10) +
1455 (utf16_2 & 0x3FF);
1456 }
1457 else
1458 aUCS4=0; // no char
1459 }
1460 else {
1461 aUCS4=0; // no char
1462 }
1463 }
1464 // return advanced pointer
1465 return aUTF16P;
1466} // UCS4toUTF16
1467
1468
1469
1470
1471
1472
1473// add UTF8 string as UTF-16 byte stream to 8-bit string
1474// - if aLEM is not lem_none, occurrence of any type of Linefeeds
1475// (LF,CR,CRLF and even CRCRLF) in input string will be
1476// replaced by the specified line end type
1477// - output is clipped after ByteString reaches aMaxBytes size (if not 0), = approx half as many Unicode chars
1478// - returns true if all input could be converted, false if output is clipped
1479bool appendUTF8ToUTF16ByteString(
1480 cAppCharP aUTF8,
1481 string &aUTF16ByteString,
1482 bool aBigEndian,
1483 TLineEndModes aLEM,
1484 uInt32 aMaxBytes
1485)
1486{
1487 uInt32 ucs4;
1488 uInt16 utf16=0,utf16_1;
1489 cAppCharP p;
1490
1491 while (aUTF8 && *aUTF8) {
1492 // convert next UTF8 char to UCS4
1493 p=UTF8toUCS4(aUTF8, ucs4);
1494 if (ucs4==0) break; // error in UTF8 encoding, exit
1495 // convert line ends
1496 if (ucs4 == '\n' && aLEM!=lem_none && aLEM!=lem_cstr) {
1497 // produce specified line end
1498 utf16_1=0;
1499 switch (aLEM) {
1500 case lem_mac : utf16=0x0D; break;
1501 case lem_filemaker : utf16=0x0B; break;
1502 case lem_unix : utf16=0x0A; break;
1503 case lem_dos :
1504 utf16_1=0x0D; // CR..
1505 utf16=0x0A; // ..then LF
1506 break;
1507 default: break;
1508 }
1509 }
1510 else {
1511 // ordinary char, use UTF16 encoding
1512 utf16_1 = UCS4toUTF16(ucs4,utf16);
1513 }
1514 // check if appending UTF16 would exceed max size specified
1515 if (aMaxBytes!=0 && aUTF16ByteString.size() + (utf16_1 ? 4 : 2) > aMaxBytes)
1516 break;
1517 // we can append, advance input pointer
1518 aUTF8 = p;
1519 // now append
1520 if (aBigEndian) {
1521 // Big end first, Motorola order
1522 if (utf16_1) {
1523 aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF);
1524 aUTF16ByteString += (char)(utf16_1 & 0xFF);
1525 }
1526 aUTF16ByteString += (char)((utf16 >> 8) & 0xFF);
1527 aUTF16ByteString += (char)(utf16 & 0xFF);
1528 }
1529 else {
1530 // Little end first, Intel order
1531 if (utf16_1) {
1532 aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF);
1533 aUTF16ByteString += (char)(utf16_1 & 0xFF);
1534 }
1535 aUTF16ByteString += (char)(utf16 & 0xFF);
1536 aUTF16ByteString += (char)((utf16 >> 8) & 0xFF);
1537 }
1538 } // while
1539 // true if all input consumed
1540 return (aUTF8==NULL__null) || (*aUTF8==0);
1541} // appendUTF8ToUTF16ByteString
1542
1543
1544// add UTF16 byte string as UTF8 to value
1545void appendUTF16AsUTF8(
1546 const uInt16 *aUTF16,
1547 uInt32 aNumUTF16Chars,
1548 bool aBigEndian,
1549 string &aVal,
1550 bool aConvertLineEnds,
1551 bool aAllowFilemakerCR
1552)
1553{
1554 uInt32 ucs4;
1555 uInt16 utf16pair[2];
1556 cAppCharP inP = (cAppCharP)aUTF16;
1557 bool lastWasCR=false;
1558
1559 while (inP && !(*inP==0 && *(inP+1)==0) && aNumUTF16Chars>0) {
1560 // get two words (in case of surrogate pair)
1561 if (aBigEndian) {
1562 // Motorola order
1563 utf16pair[0]=((*(inP) & 0xFF)<<8) + (*(inP+1) & 0xFF);
1564 if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+2) & 0xFF)<<8) + (*(inP+3) & 0xFF);
1565 }
1566 else {
1567 // Intel order
1568 utf16pair[0]=((*(inP+1) & 0xFF)<<8) + (*(inP) & 0xFF);
1569 if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+3) & 0xFF)<<8) + (*(inP+2) & 0xFF);
1570 }
1571 cAppCharP hP = (cAppCharP)UTF16toUCS4(utf16pair, ucs4);
1572 /*
1573 PDEBUGPRINTFX(DBG_PARSE+DBG_EXOTIC,(
1574 "Parsed %ld bytes: *(inP)=0x%02hX, *(inP+1)=0x%02hX, *(inP+2)=0x%02hX, *(inP+3)=0x%02hX, utf16pair[0]=0x%04hX, utf16pair[1]=0x%04hX, ucs4=0x%04lX",
1575 (uInt32)(hP-(cAppCharP)utf16pair),
1576 (uInt16)*(inP), (uInt16)*(inP+1), (uInt16)*(inP+2), (uInt16)*(inP+3),
1577 (uInt16)utf16pair[0], (uInt16)utf16pair[1],
1578 (uInt32)ucs4
1579 ));
1580 */
1581 uInt32 bytes=hP-(cAppCharP)utf16pair;
1582 inP+=bytes; // next UTF16 to check
1583 aNumUTF16Chars-=bytes/2; // count down UTF16 chars
1584 // convert line ends if selected
1585 if (aConvertLineEnds) {
1586 if (ucs4 == 0x0D) {
1587 lastWasCR=true;
1588 continue;
1589 }
1590 else {
1591 if (ucs4 == 0x0A || (aAllowFilemakerCR && ucs4 == 0x0B))
1592 ucs4 = '\n'; // convert to LineEnd
1593 else if (lastWasCR)
1594 aVal += '\n'; // insert a LineEnd
1595 lastWasCR=false;
1596 }
1597 }
1598 // append to UTF-8 string
1599 UCS4toUTF8(ucs4, aVal);
1600 }
1601 if (lastWasCR)
1602 aVal += '\n'; // input string ended on CR, must be shown in output
1603} // appendUTF16AsUTF8
1604
1605
1606
1607
1608
1609
1610#ifdef BINTREE_GENERATOR
1611
1612// add a key/value pair to the binary tree
1613void addToBinTree(TBinTreeNode *&aBinTree, treeval_t aMinKey, treeval_t aMaxKey, treeval_t aKey, treeval_t aValue)
1614{
1615 // start at root
1616 TBinTreeNode **nextPP = &aBinTree;
1617 treeval_t cmpval;
1618 do {
1619 // create the new decision value from max and min
1620 cmpval = aMinKey+((aMaxKey-aMinKey) >> 1);
1621 // create the node if not already there
1622 if (*nextPP==NULL__null) {
1623 *nextPP = new TBinTreeNode;
1624 (*nextPP)->key = cmpval;
1625 (*nextPP)->nextHigher=NULL__null;
1626 (*nextPP)->nextLowerOrEqual=NULL__null;
1627 (*nextPP)->value=0;
1628 }
1629 // check if the node CREATED is a leaf node
1630 // this is the case if max==min
1631 if (aMaxKey==aMinKey) {
1632 // save leaf value (possibly overwriting existing leaf value for same code)
1633 (*nextPP)->value=aValue;
1634 break;
1635 }
1636 // decide which way to go
1637 if (aKey>cmpval) {
1638 // go to the "higher" side
1639 nextPP = &((*nextPP)->nextHigher);
1640 // determine new minimum
1641 aMinKey = cmpval+1; // minimum must be higher than cmpval
1642 }
1643 else {
1644 // go to the "lower or equal" side
1645 nextPP = &((*nextPP)->nextLowerOrEqual);
1646 // determine new maximum
1647 aMaxKey = cmpval; // maximum must be lower or equal than cmpval
1648 }
1649 } while(true);
1650} // addToBinTree
1651
1652
1653// dispose a bintree
1654void disposeBinTree(TBinTreeNode *&aBinTree)
1655{
1656 if (!aBinTree) return;
1657 if (aBinTree->nextHigher)
1658 disposeBinTree(aBinTree->nextHigher);
1659 if (aBinTree->nextLowerOrEqual)
1660 disposeBinTree(aBinTree->nextLowerOrEqual);
1661 delete aBinTree;
1662 aBinTree=NULL__null;
1663} // disposeBinTree
1664
1665
1666// convert key to value using a flat bintree
1667treeval_t searchBintree(TBinTreeNode *aBinTree, treeval_t aKey, treeval_t aUndefValue, treeval_t aMinKey, treeval_t aMaxKey)
1668{
1669 treeval_t cmpval;
1670 while(aBinTree) {
1671 // create the new decision value from max and min
1672 cmpval = aMinKey+((aMaxKey-aMinKey) >> 1);
1673 // must match stored cmpval
1674 if (cmpval!=aBinTree->key)
1675 return aUndefValue;
1676 // check if next node must be leaf if the tree contains our key,
1677 // this is the case if max==min
1678 if (aMaxKey==aMinKey) {
1679 if (aBinTree->nextHigher!=NULL__null || aBinTree->nextLowerOrEqual!=NULL__null) {
1680 // no leaf value here, should not be the case ever (we should have
1681 // encountered a node with no left or right link before this!)
1682 return aUndefValue;
1683 }
1684 else {
1685 // found a leaf value here
1686 return aBinTree->value;
1687 }
1688 }
1689 // decide which way to go
1690 if (aKey>cmpval) {
1691 // go to the "higher" side = just next element in array, except if we have the special marker here
1692 if (aBinTree->nextHigher == NULL__null)
1693 return aUndefValue; // we should go higher-side, but can't -> unknown key
1694 aBinTree=aBinTree->nextHigher;
1695 // determine new minimum
1696 aMinKey = cmpval+1; // minimum must be higher than cmpval
1697 }
1698 else {
1699 // go to the "lower" side = element at index indicated by current element, except if we have the special marker here
1700 if (aBinTree->nextLowerOrEqual == NULL__null)
1701 return aUndefValue; // we should go lower-or-equal-side, but can't -> unknown key
1702 aBinTree=aBinTree->nextLowerOrEqual;
1703 // determine new maximum
1704 aMaxKey = cmpval; // maximum must be lower or equal than cmpval
1705 }
1706 }
1707 // if we reach the end of the array, key is not in the tree
1708 return aUndefValue;
1709} // searchBintree
1710
1711
1712
1713
1714// make a flat form representation of the bintree in a one-dimensional array
1715// - higher-side links are implicit (nodes following each other),
1716// lower-or-equal-side links are explicit
1717static bool flatBinTreeRecursion(
1718 TBinTreeNode *aBinTree, size_t &aIndex, treeval_t *aFlatArray, size_t aArrSize, treeval_t aLinksStart, treeval_t aLinksEnd
1719)
1720{
1721 // check if array is full
1722 if (aIndex>=aArrSize)
1723 return false;
1724 // examine node to flatten
1725 if (aBinTree->nextHigher==NULL__null && aBinTree->nextLowerOrEqual==NULL__null) {
1726 // this is a leaf node, containing only the value
1727 if (aBinTree->value>=aLinksStart && aBinTree->value<=aLinksEnd)
1728 return false; // link space and value space overlap
1729 aFlatArray[aIndex]=aBinTree->value;
1730 aIndex++;
1731 }
1732 else if (aBinTree->nextHigher==NULL__null) {
1733 // lower-side-only node: set special mark to specify that lower-or-equal side
1734 // implicitly follows (instead of higher-side)
1735 aFlatArray[aIndex]=aLinksStart + 1; // no node points to the immediately following node explicitly, so 1 can be used as special marker
1736 aIndex++;
1737 // - recurse to generate it
1738 if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))
1739 return false;
1740 }
1741 else {
1742 // this is a branch
1743 // - lower-or-equal side is represented as an index in the array
1744 aFlatArray[aIndex]=aLinksStart + 0; // default to not-existing (no node points to itself, so 0 can be used as NIL index value)
1745 // - higher side branch follows immediately
1746 size_t linkindex = aIndex++;
1747 // - recurse to generate it
1748 if (!flatBinTreeRecursion(aBinTree->nextHigher,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))
1749 return false;
1750 // - now we have the index where we must insert the lower-or-equal side
1751 if (aBinTree->nextLowerOrEqual!=NULL__null) {
1752 // there is a lower-or-equal side
1753 // - place relative link from original node
1754 uInt32 rellink=aIndex-linkindex;
1755 if ((uInt32)aLinksStart+rellink>(uInt32)aLinksEnd-1L) {
1756 // we need a long link
1757 // - move generated higher side branch one up
1758 for (size_t k=aIndex-1; k>linkindex; k--) aFlatArray[k+1]=aFlatArray[k];
1759 aIndex++; // we've eaten up one extra entry now
1760 // - now set long link
1761 aFlatArray[linkindex]=aLinksEnd-1; // long link marker
1762 if (rellink>0xFFFF)
1763 return false; // cannot jump more than 64k
1764 aFlatArray[linkindex+1]=rellink; // long link
1765 }
1766 else {
1767 // short link is ok
1768 aFlatArray[linkindex]=aLinksStart+rellink;
1769 }
1770 // - now create the lower-or-equal side
1771 if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))
1772 return false;
1773 }
1774 }
1775 return true;
1776} // flatBinTreeRecursion
1777
1778
1779// make a flat form representation of the bintree in a one-dimensional array
1780// - higher-side links are implicit (nodes following each other),
1781// lower-or-equal-side links are explicit
1782bool flatBinTree(
1783 TBinTreeNode *aBinTree, TConvFlatTree &aFlatTree, size_t aArrSize,
1784 treeval_t aMinKey, treeval_t aMaxKey, treeval_t aLinksStart, treeval_t aLinksEnd
1785)
1786{
1787 // save tree params
1788 aFlatTree.numelems=0;
1789 aFlatTree.minkey=aMinKey;
1790 aFlatTree.maxkey=aMaxKey;
1791 aFlatTree.linksstart=aLinksStart;
1792 aFlatTree.linksend=aLinksEnd;
1793 // now create actual tree
1794 size_t index=0;
1795 if (!flatBinTreeRecursion(aBinTree,index,aFlatTree.elements,aArrSize,aLinksStart,aLinksEnd))
1796 return false;
1797 aFlatTree.numelems=index; // actual length of array
1798 return true;
1799} // flatBinTree
1800
1801
1802
1803
1804#endif
1805
1806
1807// convert key to value using a flat bintree
1808treeval_t searchFlatBintree(const TConvFlatTree &aFlatTree, treeval_t aKey, treeval_t aUndefValue)
1809{
1810 treeval_t cmpval,thisnode;
1811 size_t index=0;
1812 // get start min and max
1813 treeval_t minKey = aFlatTree.minkey;
1814 treeval_t maxKey = aFlatTree.maxkey;
1815 // reject out-of-bounds keys immediately
1816 if (aKey<minKey || aKey>maxKey)
1817 return aUndefValue;
1818 do {
1819 // create the new decision value from max and min
1820 cmpval = minKey+((maxKey-minKey) >> 1);
1821 thisnode = aFlatTree.elements[index];
1822 // check if next node must be leaf if the tree contains our key,
1823 // this is the case if max==min
1824 if (maxKey==minKey) {
1825 #ifdef BINTREE_GENERATOR
1826 if (thisnode>=aFlatTree.linksstart && thisnode<=aFlatTree.linksend) {
1827 // no leaf value here, should not be the case ever (we should have
1828 // encountered a node with no left or right link before this!)
1829 return aUndefValue;
1830 }
1831 else
1832 #endif
1833 {
1834 // found a leaf value here
1835 return (treeval_t) thisnode;
1836 }
1837 }
1838 // decide which way to go
1839 if (aKey>cmpval) {
1840 // go to the "higher" side = just next element in array, except if we have the special marker here
1841 if (thisnode == aFlatTree.linksstart+1)
1842 return aUndefValue; // we should go higher-side, but can't -> unknown key
1843 // next node is next index (or one more in case this is a long link)
1844 if (thisnode == aFlatTree.linksend-1)
1845 index++;
1846 index++;
1847 // determine new minimum
1848 minKey = cmpval+1; // minimum must be higher than cmpval
1849 }
1850 else {
1851 // go to the "lower" side = element at index indicated by current element, except if we have the special marker here
1852 if (thisnode == aFlatTree.linksstart+1)
1853 index++; // special case, "lower" side is immediately following because there is no "higher" side
1854 else {
1855 #ifdef BINTREE_GENERATOR
1856 // if node contains a leaf value instead of a link, something is wrong
1857 if (thisnode<aFlatTree.linksstart || thisnode>aFlatTree.linksend)
1858 return aUndefValue; // no leaf expected here
1859 #endif
1860 if (thisnode==aFlatTree.linksend-1) {
1861 // long link
1862 index++; // skip long link marker
1863 thisnode = aFlatTree.elements[index]; // get link value
1864 index = index+thisnode; // jump by link value
1865 }
1866 else {
1867 // short link
1868 index = index+(thisnode-aFlatTree.linksstart); // get index of next node (relative branch)
1869 }
1870 if (index==0)
1871 return aUndefValue; // there is no link
1872 }
1873 // determine new maximum
1874 maxKey = cmpval; // maximum must be lower or equal than cmpval
1875 }
1876 } while(index<aFlatTree.numelems);
1877 // if we reach the end of the array, key is not in the tree
1878 return aUndefValue;
1879} // searchFlatBintree
1880
1881// MD5 and B64 given string
1882void MD5B64(const char *aString, sInt32 aLen, string &aMD5B64)
1883{
1884 // determine input length
1885 if (aLen<=0) aLen=strlen(aString);
1886 // calc MD5
1887 md5::SYSYNC_MD5_CTX context;
1888 uInt8 digest[16];
1889 md5::Init (&context);
1890 md5::Update (&context, (const uInt8 *)aString,aLen);
1891 md5::Final (digest, &context);
1892 // b64 encode the MD5 digest
1893 uInt32 b64md5len;
1894 char *b64md5=b64::encode(digest,16,&b64md5len);
1895 // assign result
1896 aMD5B64.assign(b64md5,b64md5len);
1897 // done
1898 b64::free(b64md5); // return buffer allocated by b64::encode
1899} // MD5B64
1900
1901
1902// format as Timestamp for use in debug logs
1903void StringObjTimestamp(string &aStringObj, lineartime_t aTimer)
1904{
1905 // format the time
1906 if (aTimer==noLinearTime) {
1907 aStringObj = "<no time>";
1908 return;
1909 }
1910 sInt16 y,mo,d,h,mi,s,ms;
1911 lineartime2date(aTimer,&y,&mo,&d);
1912 lineartime2time(aTimer,&h,&mi,&s,&ms);
1913 StringObjPrintf(
1914 aStringObj,
1915 "%04d-%02d-%02d %02d:%02d:%02d.%03d",
1916 y,mo,d,h,mi,s,ms
1917 );
1918} // StringObjTimestamp
1919
1920
1921// format as hex string
1922void StringObjHexString(string &aStringObj, const uInt8 *aBinary, uInt32 aBinSz)
1923{
1924 aStringObj.erase();
1925 if (!aBinary) return;
1926 while (aBinSz>0) {
1927 AppendHexByte(aStringObj,*aBinary++);
1928 aBinSz--;
1929 }
1930} // StringObjHexString
1931
1932
1933// add (already encoded!) CGI to existing URL string
1934bool addCGItoString(string &aStringObj, cAppCharP aCGI, bool noduplicate)
1935{
1936 if (!noduplicate || aStringObj.find(aCGI)==string::npos) {
1937 // - Add CGI separator if and only if none exists already
1938 if (aStringObj.find("?")==string::npos)
1939 aStringObj += '?';
1940 aStringObj += aCGI;
1941 return true; // added
1942 }
1943 return false; // nothing added
1944}
1945
1946
1947// encode string for being used as a CGI key/value element
1948string encodeForCGI(cAppCharP aCGI)
1949{
1950 string cgi;
1951 cAppCharP p = aCGI;
1952 while (p && *p) {
1953 if (*p>0x7E || *p<=0x20 || *p=='%' || *p=='?' || *p=='&' || *p=='#') {
1954 // CGI encode these
1955 cgi += '%';
1956 AppendHexByte(cgi, *p);
1957 }
1958 else {
1959 // use as-is
1960 cgi += *p;
1961 }
1962 p++;
1963 }
1964 return cgi;
1965} // encodeForCGI
1966
1967
1968// Count bits
1969int countbits(uInt32 aMask)
1970{
1971 int bits=0;
1972 uInt32 mask=0x0000001;
1973 while (mask) {
1974 if (aMask & mask) bits++;
1975 mask=mask << 1;
1976 }
1977 return bits;
1978} // countbits
1979
1980
1981// make uppercase
1982void StringUpper(string &aString)
1983{
1984 for(uInt32 k=0; k<aString.size(); k++) aString[k]=toupper(aString[k]);
1985} // StringUpper
1986
1987
1988// make lowercase
1989void StringLower(string &aString)
1990{
1991 for(uInt32 k=0; k<aString.size(); k++) aString[k]=tolower(aString[k]);
1992} // StringLower
1993
1994
1995// Substitute occurences of pattern with replacement in string
1996void StringSubst(
1997 string &aString, const char *aPattern, const string &aReplacement,
1998 sInt32 aPatternLen,
1999 TCharSets aCharSet, TLineEndModes aLEM,
2000 TQuotingModes aQuotingMode
2001)
2002{
2003 StringSubst(
2004 aString, aPattern,
2005 aReplacement.c_str(),
2006 aPatternLen,
2007 aReplacement.size(),
2008 aCharSet, aLEM, aQuotingMode
2009 );
2010} // StringSubst
2011
2012
2013// Substitute occurences of pattern with replacement in string
2014void StringSubst(
2015 string &aString, const char *aPattern, const char *aReplacement,
2016 sInt32 aPatternLen, sInt32 aReplacementLen,
2017 TCharSets aCharSet, TLineEndModes aLEM,
2018 TQuotingModes aQuotingMode
2019)
2020{
2021 string::size_type i;
2022 string s;
2023 i=0;
2024 if (aPatternLen<0) aPatternLen=strlen(aPattern);
2025 // convert if needed
2026 if (!aReplacement) {
2027 aReplacement=""; // empty string if not specified
2028 aReplacementLen=0;
2029 }
2030 if (aCharSet!=chs_unknown) {
2031 appendUTF8ToString(aReplacement,s,aCharSet,aLEM,aQuotingMode);
2032 aReplacement=s.c_str();
2033 aReplacementLen=s.size();
2034 }
2035 else {
2036 if (aReplacementLen<0) aReplacementLen=strlen(aReplacement);
2037 }
2038 // now replace
2039 while((i=aString.find(aPattern,i))!=string::npos) {
2040 aString.replace(i,aPatternLen,aReplacement);
2041 i+=aReplacementLen;
2042 }
2043} // StringSubst
2044
2045
2046// Substitute occurences of pattern with replacement in string
2047void StringSubst(string &aString, const char *aPattern, const string &aReplacement, sInt32 aPatternLen)
2048{
2049 StringSubst(aString,aPattern,aReplacement.c_str(),aPatternLen,aReplacement.size());
2050} // StringSubst
2051
2052
2053// Substitute occurences of pattern with integer number in string
2054void StringSubst(string &aString, const char *aPattern, sInt32 aNumber, sInt32 aPatternLen)
2055{
2056 string s;
2057 StringObjPrintf(s,"%ld",(long)aNumber);
2058 StringSubst(aString,aPattern,s,aPatternLen);
2059} // StringSubst
2060
2061
2062
2063// copy PCdata contents into std::string object
2064void smlPCDataToStringObj(const SmlPcdataPtr_t aPcdataP, string &aStringObj)
2065{
2066 if (!aPcdataP || !aPcdataP->content) {
2067 // no content at all
2068 aStringObj.erase();
2069 }
2070 else if (
2071 // NOTE: Opaque works only with modified syncML toolkit which
2072 // makes sure opaque content is ALSO TERMINATED LIKE A C-STRING
2073 aPcdataP->contentType == SML_PCDATA_STRING ||
2074 aPcdataP->contentType == SML_PCDATA_OPAQUE
2075 ) {
2076 // string or opaque type
2077 aStringObj.assign((char *)aPcdataP->content, aPcdataP->length);
2078 }
2079 else if (aPcdataP->contentType == SML_PCDATA_EXTENSION) {
2080 // extension type
2081 StringObjPrintf(aStringObj,"[PCDATA_EXTENSION Type=%hd]",(sInt16)aPcdataP->extension);
2082 }
2083 else {
2084 // other type
2085 StringObjPrintf(aStringObj,"[PCDATA Type=%hd]",(sInt16)aPcdataP->contentType);
2086 }
2087} // smlPCDataToStringObj
2088
2089
2090// returns item string or empty string (NEVER NULL)
2091const char *smlItemDataToCharP(const SmlItemPtr_t aItemP)
2092{
2093 if (!aItemP) return "";
2094 return smlPCDataToCharP(aItemP->data);
2095} // smlItemDataToCharP
2096
2097
2098// returns first item string or empty string (NEVER NULL)
2099const char *smlFirstItemDataToCharP(const SmlItemListPtr_t aItemListP)
2100{
2101 if (!aItemListP) return "";
2102 return smlItemDataToCharP(aItemListP->item);
2103} // smlFirstItemDataToCharP
2104#endif //SYSYNC_ENGINE
2105
2106// returns pointer to PCdata contents or null string. If aSizeP!=NULL, length will be stored in *aSize
2107const char *smlPCDataToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP)
2108{
2109 const char *str = smlPCDataOptToCharP(aPcdataP, aSizeP);
2110 if (str) return str;
2111 return "";
2112} // smlPCDataToCharP
2113
2114
2115// returns pointer to PCdata contents if existing, NULL otherwise.
2116// If aSizeP!=NULL, length will be stored in *aSize
2117const char *smlPCDataOptToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP)
2118{
2119 if (!aPcdataP || !aPcdataP->content) {
2120 return NULL__null; // we have no value, it could be empty howevert
2121 if (aSizeP) *aSizeP=0;
2122 }
2123 if (aPcdataP->length==0) {
2124 // empty content
2125 if (aSizeP) *aSizeP=0;
2126 return ""; // return empty string
2127 }
2128 else if (
2129 // NOTE: Opaque works only with modified syncML toolkit which
2130 // makes sure opaque content is ALSO TERMINATED LIKE A C-STRING
2131 aPcdataP->contentType == SML_PCDATA_STRING ||
2132 aPcdataP->contentType == SML_PCDATA_CDATA || // XML only
2133 aPcdataP->contentType == SML_PCDATA_OPAQUE // WBXML only
2134 ) {
2135 // return pointer to content
2136 if (aSizeP) *aSizeP=aPcdataP->length;
2137 return (char *) aPcdataP->content;
2138 }
2139 else {
2140 // no string
2141 if (aSizeP) *aSizeP=11;
2142 return "[no string]";
2143 }
2144} // smlPCDataOptToCharP
2145
2146
2147// returns pointer to source or target LocURI
2148const char *smlSrcTargLocURIToCharP(const SmlTargetPtr_t aSrcTargP)
2149{
2150 if (!aSrcTargP || !aSrcTargP->locURI) {
2151 return ""; // empty string
2152 }
2153 else {
2154 // return PCdata string contents
2155 return smlPCDataToCharP(aSrcTargP->locURI);
2156 }
2157} // smlSrcTargLocURIToCharP
2158
2159
2160// returns pointer to source or target LocName
2161const char *smlSrcTargLocNameToCharP(const SmlTargetPtr_t aSrcTargP)
2162{
2163 if (!aSrcTargP || !aSrcTargP->locName) {
2164 return ""; // empty string
2165 }
2166 else {
2167 // return PCdata string contents
2168 return smlPCDataToCharP(aSrcTargP->locName);
2169 }
2170} // smlSrcTargLocNameToCharP
2171
2172
2173#ifdef SYSYNC_ENGINE1
2174// returns error code made ready for SyncML sending (that is, remove offset
2175// of 10000 if present, and make generic error 500 for non-SyncML errors,
2176// and return LOCERR_OK as 200)
2177localstatus syncmlError(localstatus aErr)
2178{
2179 if (aErr==LOCERR_OK) return 200; // SyncML ok code
2180 if (aErr<999) return aErr; // return as is
2181 if (aErr>=LOCAL_STATUS_CODE+100 && aErr<=999)
2182 return aErr-LOCAL_STATUS_CODE; // return with offset removed
2183 // no suitable conversion
2184 return 500; // return generic "bad"
2185} // localError
2186
2187
2188// returns error code made local (that is, offset by 10000 in case aErr is a
2189// SyncML status code <10000, and convert 200 into LOCERR_OK)
2190localstatus localError(localstatus aErr)
2191{
2192 if (aErr==200 || aErr==0) return LOCERR_OK;
2193 if (aErr<LOCAL_STATUS_CODE) return aErr+LOCAL_STATUS_CODE;
2194 return aErr;
2195} // localError
2196
2197
2198// returns pure relative URI, if specified relative or absolute to
2199// given server URI
2200const char *relativeURI(const char *aURI,const char *aServerURI)
2201{
2202 // check for "./" type relative URI
2203 if (strnncmp(aURI,URI_RELPREFIX"./",2)==0) {
2204 // relative URI prefixed with "./", just zap the relative part
2205 return aURI+2;
2206 }
2207 else if (aServerURI) {
2208 // test if absolute URI specifying the right server
2209 uInt32 n=strlen(aServerURI);
2210 if (strnncmp(aURI,aServerURI,n)==0) {
2211 // beginning of URI matches server's URI
2212 const char *p=aURI+n;
2213 // skip delimiter, if any
2214 if (*p=='/') p++;
2215 // return relative part of URI
2216 return p;
2217 }
2218 }
2219 // just return unmodified
2220 return aURI;
2221} // relativeURI
2222
2223
2224// split Hostname into address and port parts
2225void splitHostname(const char *aHost,string *aAddr,string *aPort)
2226{
2227 const char *p,*q;
2228 p=aHost;
2229 q=strchr(p,':');
2230 if (q) {
2231 // port spec found
2232 if (aAddr) aAddr->assign(p,q-p);
2233 if (aPort) aPort->assign(q+1);
2234 }
2235 else {
2236 // no prot spec
2237 if (aAddr) aAddr->assign(p);
2238 if (aPort) aPort->erase();
2239 }
2240} // splitHostname
2241
2242// translate %XX into corresponding character in-place
2243void urlDecode(string *str)
2244{
2245 // nothing todo?
2246 if (!str ||
2247 str->find('%') == string::npos) return;
2248
2249 string replacement;
2250 replacement.reserve(str->size());
2251 const char *in = str->c_str();
2252 char c;
2253 while ((c = *in++) != 0) {
2254 if (c == '%') {
2255 c = tolower(*in++);
2256 unsigned char value = 0;
2257 if (!c) {
2258 break;
2259 } else if (c >= '0' && c <= '9') {
2260 value = c - '0';
2261 } else if (c >= 'a' && c <= 'f') {
2262 value = c - 'a' + 10;
2263 } else {
2264 // silently skip invalid character
2265 }
2266 value *= 16;
2267 c = tolower(*in++);
2268 if (!c) {
2269 break;
2270 } else if (c >= '0' && c <= '9') {
2271 value += c - '0';
2272 replacement.append((char *)&value, 1);
2273 } else if (c >= 'a' && c <= 'f') {
2274 value += c - 'a' + 10;
2275 replacement.append((char *)&value, 1);
2276 } else {
2277 // silently skip invalid character
2278 }
2279 } else {
2280 replacement.append(&c, 1);
2281 }
2282 }
2283 *str = replacement;
2284}
2285
2286// translate %XX into corresponding character in-place
2287void urlEncode(string *str)
2288{
2289 if (!str) {
2290 return;
2291 }
2292
2293 string replacement;
2294 size_t i, start = 0;
2295 const char *t = str->c_str();
2296 const char *s = t;
2297 char buffer[4];
2298 char c;
2299 for (i = 0; (c = *t) != 0; i++, t++) {
2300 if (!isalnum(c)) {
2301 replacement.append(s + start, i - start);
2302 start = i + 1;
2303 sprintf(buffer, "%%%02X", c);
2304 replacement.append(buffer, 3);
2305 }
2306 }
2307
2308 if (start > 0) {
2309 // Something was added to replacement because we found unsafe
2310 // characters, finish the job.
2311 replacement.append(s + start, i - start);
2312 *str = replacement;
2313 }
2314}
2315
2316// split URL into protocol, hostname, document name and auth-info (user, password);
2317// the optional query and port are not url-decoded, everything else is
2318void splitURL(const char *aURI,string *aProtocol,string *aHost,
2319 string *aDoc, string *aUser, string *aPasswd,
2320 string *aPort, string *aQuery)
2321{
2322 const char *p,*q,*r;
2323
2324 p=aURI;
2325 // extract protocol
2326 q=strchr(p,':');
2327 if (q) {
2328 // protocol found
2329 if (aProtocol) aProtocol->assign(p,q-p);
2330 p=q+1; // past colon
2331 int count = 0;
2332 while (*p=='/' && count < 2) {
2333 p++; // past trailing slashes (two expected, ignore if less are given)
2334 count++;
2335 }
2336 // now identify end of host part
2337 string host;
2338 q=strchr(p, '/');
2339 if (!q) {
2340 // no slash, skip forward to end of string
2341 q = p + strlen(p);
2342 }
2343 host.assign(p, q - p);
2344
2345 // if protocol specified, check for auth info
2346 const char *h = host.c_str();
2347 q=strchr(h,'@');
2348 r=strchr(h,':');
2349 if (q && r && q>r) {
2350 // auth exists
2351 if (aUser) aUser->assign(h,r-h);
2352 if (aPasswd) aPasswd->assign(r+1,q-r-1);
2353 // skip auth in full string
2354 p += q + 1 - h;
2355 }
2356 else {
2357 // no auth found
2358 if (aUser) aUser->erase();
2359 if (aPasswd) aPasswd->erase();
2360 }
2361 // p now points to host part, as expected below
2362 }
2363 else {
2364 // no protocol found
2365 if (aProtocol) aProtocol->erase();
2366 // no protocol, no auth
2367 if (aUser) aUser->erase();
2368 if (aPasswd) aPasswd->erase();
2369 }
2370 // separate hostname and document
2371 std::string host;
2372 // - check for path
2373 q=strchr(p,'/');
2374 // - if no path, check if there is a CGI param directly after the host name
2375 if (!q) {
2376 // doc part left empty in this case
2377 if (aDoc) aDoc->erase();
2378 q=strchr(p,'?');
2379 if (q) {
2380 // query directly follows host
2381 host.assign(p, q - p);
2382 if (aQuery) aQuery->assign(q + 1);
2383 } else {
2384 // entire string is considered the host
2385 host.assign(p);
2386 if (aQuery) aQuery->erase();
2387 }
2388 }
2389 else {
2390 // host part stops at slash
2391 host.assign(p, q - p);
2392 // in case of '/', do not put slash into docname
2393 // even if it would be empty (caller expected to add
2394 // slash as needed)
2395 p = q + 1; // exclude slash
2396 // now check for query
2397 q=strchr(p,'?');
2398 if (q) {
2399 // split at question mark
2400 if (aDoc) aDoc->assign(p, q - p);
2401 if (aQuery) aQuery->assign(q + 1);
2402 } else {
2403 // whole string is document name
2404 if (aDoc) aDoc->assign(p);
2405 if (aQuery) aQuery->erase();
2406 }
2407 }
2408
2409 // remove optional port from host part before url-decoding, because
2410 // that might introduce new : characters into the host name
2411 size_t colon = host.find(':');
2412 if (colon != host.npos) {
2413 if (aHost) aHost->assign(host.substr(0, colon));
2414 if (aPort) aPort->assign(host.substr(colon + 1));
2415 } else {
2416 if (aHost) aHost->assign(host);
2417 if (aPort) aPort->erase();
2418 }
2419} // splitURL
2420
2421#ifdef SPLIT_URL_MAIN
2422
2423#include <stdio.h>
2424#include <assert.h>
2425
2426static void test(const std::string &in, const std::string &expected)
2427{
2428 string protocol, host, doc, user, password, port, query;
2429 char buffer[1024];
2430
2431 splitURL(in.c_str(), &protocol, &host, &doc, &user, &password, &port, &query);
2432
2433 // URL-decode each part
2434 urlDecode(&protocol);
2435 urlDecode(&host);
2436 urlDecode(&doc);
2437 urlDecode(&user);
2438 urlDecode(&password);
2439
2440 sprintf(buffer,
2441 "prot '%s' user '%s' passwd '%s' host '%s' port '%s' doc '%s' query '%s'",
2442 protocol.c_str(),
2443 user.c_str(),
2444 password.c_str(),
2445 host.c_str(),
2446 port.c_str(),
2447 doc.c_str(),
2448 query.c_str());
2449 printf("%s -> %s\n", in.c_str(), buffer);
2450 assert(expected == buffer);
2451}
2452
2453int main(int argc, char **argv)
2454{
2455 test("http://user:passwd@host/patha/pathb?query",
2456 "prot 'http' user 'user' passwd 'passwd' host 'host' port '' doc 'patha/pathb' query 'query'");
2457 test("http://user:passwd@host:port/patha/pathb?query",
2458 "prot 'http' user 'user' passwd 'passwd' host 'host' port 'port' doc 'patha/pathb' query 'query'");
2459 test("file:///foo/bar",
2460 "prot 'file' user '' passwd '' host '' port '' doc 'foo/bar' query ''");
2461 test("http://host%3a:port?param=value",
2462 "prot 'http' user '' passwd '' host 'host:' port 'port' doc '' query 'param=value'");
2463 test("http://host%3a?param=value",
2464 "prot 'http' user '' passwd '' host 'host:' port '' doc '' query 'param=value'");
2465 test("foo%24",
2466 "prot '' user '' passwd '' host 'foo$' port '' doc '' query ''");
2467 test("foo%2f",
2468 "prot '' user '' passwd '' host 'foo/' port '' doc '' query ''");
2469 test("foo%2A",
2470 "prot '' user '' passwd '' host 'foo*' port '' doc '' query ''");
2471 test("foo%24bar",
2472 "prot '' user '' passwd '' host 'foo$bar' port '' doc '' query ''");
2473 test("%24bar",
2474 "prot '' user '' passwd '' host '$bar' port '' doc '' query ''");
2475 test("foo%2",
2476 "prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2477 test("foo%",
2478 "prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2479 test("foo%g",
2480 "prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2481 test("foo%gh",
2482 "prot '' user '' passwd '' host 'foo' port '' doc '' query ''");
2483 test("%ghbar",
2484 "prot '' user '' passwd '' host 'bar' port '' doc '' query ''");
2485 return 0;
2486}
2487#endif // SPLIT_URL_MAIN
2488
2489#endif //SYSYNC_ENGINE
2490
2491
2492// returns type from meta
2493const char *smlMetaTypeToCharP(SmlMetInfMetInfPtr_t aMetaP)
2494{
2495 if (!aMetaP) return NULL__null; // no meta at all
2496 return smlPCDataToCharP(aMetaP->type);
2497} // smlMetaTypeToCharP
2498
2499
2500
2501// returns Next Anchor from meta
2502const char *smlMetaNextAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP)
2503{
2504 if (!aMetaP) return NULL__null; // no meta at all
2505 if (!aMetaP->anchor) return NULL__null; // no anchor at all
2506 return smlPCDataToCharP(aMetaP->anchor->next);
2507} // smlMetaAnchorToCharP
2508
2509
2510// returns Last Anchor from meta
2511const char *smlMetaLastAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP)
2512{
2513 if (!aMetaP) return NULL__null; // no meta at all
2514 if (!aMetaP->anchor) return NULL__null; // no anchor at all
2515 return smlPCDataToCharP(aMetaP->anchor->last);
2516} // smlMetaLastAnchorToCharP
2517
2518
2519// returns DevInf pointer if any in specified PCData, NULL otherwise
2520SmlDevInfDevInfPtr_t smlPCDataToDevInfP(const SmlPcdataPtr_t aPCDataP)
2521{
2522 if (!aPCDataP) return NULL__null;
2523 if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null;
2524 if (aPCDataP->extension!=SML_EXT_DEVINF) return NULL__null;
2525 return (SmlDevInfDevInfPtr_t)(aPCDataP->content);
2526} // smlPCDataToDevInfP
2527
2528
2529// returns MetInf pointer if any in specified PCData, NULL otherwise
2530SmlMetInfMetInfPtr_t smlPCDataToMetInfP(const SmlPcdataPtr_t aPCDataP)
2531{
2532 if (!aPCDataP) return NULL__null;
2533 if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null;
2534 if (aPCDataP->extension!=SML_EXT_METINF) return NULL__null;
2535 return (SmlMetInfMetInfPtr_t)(aPCDataP->content);
2536} // smlPCDataToMetInfP
2537
2538
2539// allocate memory via SyncML toolkit allocation function, but throw
2540// exception if it fails. Used by SML
2541void *_smlMalloc(MemSize_t size)
2542{
2543 void *p;
2544
2545 p=smlLibMalloc(size);
2546 if (!p) SYSYNC_THROW(TMemException("smlLibMalloc() failed"))throw TMemException("smlLibMalloc() failed");
2547 return p;
2548} // _smlMalloc
2549
2550
2551// returns true on successful conversion of PCData string to sInt32
2552bool smlPCDataToULong(const SmlPcdataPtr_t aPCDataP, uInt32 &aLong)
2553{
2554 return StrToULong(smlPCDataToCharP(aPCDataP),aLong);
2555} // smlPCDataToLong
2556
2557// returns true on successful conversion of PCData string to sInt32
2558bool smlPCDataToLong(const SmlPcdataPtr_t aPCDataP, sInt32 &aLong)
2559{
2560 return StrToLong(smlPCDataToCharP(aPCDataP),aLong);
2561} // smlPCDataToLong
2562
2563#ifdef SYSYNC_ENGINE1
2564// returns true on successful conversion of PCData string to format
2565bool smlPCDataToFormat(const SmlPcdataPtr_t aPCDataP, TFmtTypes &aFmt)
2566{
2567 const char *fmt = smlPCDataToCharP(aPCDataP);
2568 sInt16 sh;
2569 if (*fmt) {
2570 if (!StrToEnum(encodingFmtSyncMLNames,numFmtTypes,sh,fmt))
2571 return false; // unknown format
2572 aFmt=(TFmtTypes)sh;
2573 }
2574 else {
2575 aFmt=fmt_chr; // no spec = chr
2576 }
2577 return true;
2578} // smlPCDataToFormat
2579#endif //SYSYNC_ENGINE
2580
2581// build Meta anchor
2582SmlPcdataPtr_t newMetaAnchor(const char *aNextAnchor, const char *aLastAnchor)
2583{
2584 SmlPcdataPtr_t metaP;
2585 SmlMetInfAnchorPtr_t anchorP;
2586
2587 // - create empty meta
2588 metaP=newMeta();
2589 // - create new anchor
2590 anchorP=SML_NEW(SmlMetInfAnchor_t)((SmlMetInfAnchor_t*) _smlMalloc(sizeof(SmlMetInfAnchor_t)));
2591 // - set anchor contents
2592//%%% anchorP->last=newPCDataOptEmptyString(aLastAnchor); // optional, but omitted only if string is NULL (not if only empty)
2593 anchorP->last=newPCDataOptString(aLastAnchor); // optional
2594 anchorP->next=newPCDataString(aNextAnchor); // mandatory
2595 // - set anchor
2596 ((SmlMetInfMetInfPtr_t)(metaP->content))->anchor=anchorP;
2597 // return
2598 return metaP;
2599} // newMetaAnchor
2600
2601
2602// build Meta type
2603SmlPcdataPtr_t newMetaType(const char *aMetaType)
2604{
2605 SmlPcdataPtr_t metaP;
2606
2607 // - if not type, we don't create a meta at all
2608 if (aMetaType==NULL__null || *aMetaType==0) return NULL__null;
2609 // - create empty meta
2610 metaP=newMeta();
2611 // - set type
2612 ((SmlMetInfMetInfPtr_t)(metaP->content))->type=newPCDataString(aMetaType);
2613 // return
2614 return metaP;
2615} // newMetaType
2616
2617
2618// build empty Meta
2619SmlPcdataPtr_t newMeta(void)
2620{
2621 SmlPcdataPtr_t metaP;
2622 SmlMetInfMetInfPtr_t metinfP;
2623
2624 // - create empty PCData
2625 metaP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t)));
2626 metaP->contentType=SML_PCDATA_EXTENSION;
2627 metaP->extension=SML_EXT_METINF;
2628 // - %%% assume length is not relevant for structured content (looks like in mgrutil.c)
2629 metaP->length=0;
2630 // - create empty meta
2631 metinfP = SML_NEW(SmlMetInfMetInf_t)((SmlMetInfMetInf_t*) _smlMalloc(sizeof(SmlMetInfMetInf_t)));
2632 metaP->content=metinfP; // link to PCdata
2633 // - init meta options
2634 metinfP->version=NULL__null;
2635 metinfP->format=NULL__null;
2636 metinfP->type=NULL__null;
2637 metinfP->mark=NULL__null;
2638 metinfP->size=NULL__null;
2639 metinfP->nextnonce=NULL__null;
2640 metinfP->maxmsgsize=NULL__null;
2641 metinfP->mem=NULL__null;
2642 metinfP->emi=NULL__null; // PCData list
2643 metinfP->anchor=NULL__null;
2644 // - SyncML 1.1
2645 metinfP->maxobjsize=NULL__null;
2646 // - SyncML 1.2
2647 metinfP->flags=0;
2648 // return
2649 return metaP;
2650} // newMeta
2651
2652
2653// copy meta from existing meta (for data items only
2654// anchor, mem, emi, nonce are not copied!)
2655// Note however that we copy maxobjsize, as we (mis-)use it for ZIPPED_BINDATA_SUPPORT
2656SmlPcdataPtr_t copyMeta(SmlPcdataPtr_t aOldMetaP)
2657{
2658 if (!aOldMetaP) return NULL__null;
2659 SmlPcdataPtr_t newmetaP=newMeta();
2660 if (!newmetaP) return NULL__null;
2661 SmlMetInfMetInfPtr_t oldmetinfP = smlPCDataToMetInfP(aOldMetaP);
2662 if (!oldmetinfP) return NULL__null;
2663 SmlMetInfMetInfPtr_t newmetInfP = smlPCDataToMetInfP(newmetaP);
2664 // - copy meta
2665 newmetInfP->version = smlPcdataDup(oldmetinfP->version);
2666 newmetInfP->format = smlPcdataDup(oldmetinfP->format);
2667 newmetInfP->type = smlPcdataDup(oldmetinfP->type);
2668 newmetInfP->mark = smlPcdataDup(oldmetinfP->mark);
2669 newmetInfP->size = smlPcdataDup(oldmetinfP->size);
2670 newmetInfP->maxobjsize = smlPcdataDup(oldmetinfP->maxobjsize);
2671 // return
2672 return newmetaP;
2673} // copyMeta
2674
2675
2676
2677
2678// add an item to an item list
2679SmlItemListPtr_t *addItemToList(
2680 SmlItemPtr_t aItemP, // existing item data structure, ownership is passed to list
2681 SmlItemListPtr_t *aItemListPP // adress of pointer to existing item list or NULL
2682)
2683{
2684 if (aItemListPP && aItemP) {
2685 // find last itemlist pointer
2686 while (*aItemListPP) {
2687 aItemListPP=&((*aItemListPP)->next);
2688 }
2689 // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry
2690 *aItemListPP = SML_NEW(SmlItemList_t)((SmlItemList_t*) _smlMalloc(sizeof(SmlItemList_t)));
2691 (*aItemListPP)->next=NULL__null;
2692 (*aItemListPP)->item=aItemP; // insert new item
2693 // return pointer to pointer to next element (which is now NULL).
2694 // Can be passed in to addPCDataToList() again to append more elements without searching
2695 // for end-of-list
2696 return &((*aItemListPP)->next);
2697 }
2698 // nop, return pointer unmodified
2699 return aItemListPP;
2700} // addItemToList
2701
2702
2703// add a CTData item to a CTDataList
2704SmlDevInfCTDataListPtr_t *addCTDataToList(
2705 SmlDevInfCTDataPtr_t aCTDataP, // existing CTData item data structure, ownership is passed to list
2706 SmlDevInfCTDataListPtr_t *aCTDataListPP // adress of pointer to existing item list or NULL
2707)
2708{
2709 if (aCTDataListPP && aCTDataP) {
2710 // find last itemlist pointer
2711 while (*aCTDataListPP) {
2712 aCTDataListPP=&((*aCTDataListPP)->next);
2713 }
2714 // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry
2715 *aCTDataListPP = SML_NEW(SmlDevInfCTDataList_t)((SmlDevInfCTDataList_t*) _smlMalloc(sizeof(SmlDevInfCTDataList_t
)))
;
2716 (*aCTDataListPP)->next=NULL__null;
2717 (*aCTDataListPP)->data=aCTDataP; // insert new data
2718 // return pointer to pointer to next element (which is now NULL).
2719 // Can be passed in to addPCDataToList() again to append more elements without searching
2720 // for end-of-list
2721 return &((*aCTDataListPP)->next);
2722 }
2723 // nop, return pointer unmodified
2724 return aCTDataListPP;
2725} // addCTDataToList
2726
2727
2728// add a CTDataProp item to a CTDataPropList
2729SmlDevInfCTDataPropListPtr_t *addCTDataPropToList(
2730 SmlDevInfCTDataPropPtr_t aCTDataPropP, // existing CTDataProp item data structure, ownership is passed to list
2731 SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL
2732)
2733{
2734 if (aCTDataPropListPP && aCTDataPropP) {
2735 // find last itemlist pointer
2736 while (*aCTDataPropListPP) {
2737 aCTDataPropListPP=&((*aCTDataPropListPP)->next);
2738 }
2739 // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry
2740 *aCTDataPropListPP = SML_NEW(SmlDevInfCTDataPropList_t)((SmlDevInfCTDataPropList_t*) _smlMalloc(sizeof(SmlDevInfCTDataPropList_t
)))
;
2741 (*aCTDataPropListPP)->next=NULL__null;
2742 (*aCTDataPropListPP)->data=aCTDataPropP; // insert new data
2743 // return pointer to pointer to next element (which is now NULL).
2744 // Can be passed in to addPCDataToList() again to append more elements without searching
2745 // for end-of-list
2746 return &((*aCTDataPropListPP)->next);
2747 }
2748 // nop, return pointer unmodified
2749 return aCTDataPropListPP;
2750} // addCTDataPropToList
2751
2752
2753// add a CTData describing a property (as returned by newDevInfCTData())
2754// as a new property without parameters to a CTDataPropList
2755SmlDevInfCTDataPropListPtr_t *addNewPropToList(
2756 SmlDevInfCTDataPtr_t aPropCTData, // CTData describing property
2757 SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL
2758)
2759{
2760 SmlDevInfCTDataPropPtr_t propdataP = SML_NEW(SmlDevInfCTDataProp_t)((SmlDevInfCTDataProp_t*) _smlMalloc(sizeof(SmlDevInfCTDataProp_t
)))
;
2761 propdataP->param = NULL__null; // no params
2762 propdataP->prop = aPropCTData;
2763 return addCTDataPropToList(propdataP, aCTDataPropListPP);
2764} // addNewPropToList
2765
2766
2767
2768// add PCData element to a PCData list
2769SmlPcdataListPtr_t *addPCDataToList(
2770 SmlPcdataPtr_t aPCDataP, // Existing PCData element to be added, ownership is passed to list
2771 SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL
2772)
2773{
2774 if (aPCDataListPP) {
2775 // find last PCDataList pointer
2776 while (*aPCDataListPP) {
2777 aPCDataListPP=&((*aPCDataListPP)->next);
2778 }
2779 // aItemListPP now points to a NULL pointer which must be replaced by addr of new PCDataList entry
2780 *aPCDataListPP = SML_NEW(SmlPcdataList_t)((SmlPcdataList_t*) _smlMalloc(sizeof(SmlPcdataList_t)));
2781 (*aPCDataListPP)->next=NULL__null;
2782 (*aPCDataListPP)->data=aPCDataP; // insert new item
2783 // return pointer to pointer to next element (which is now NULL).
2784 // Can be passed in to addPCDataToList() again to append more elements without searching
2785 // for end-of-list
2786 return &((*aPCDataListPP)->next);
2787 }
2788 return NULL__null;
2789} // addPCDataToList
2790
2791
2792// add PCData string to a PCData list
2793SmlPcdataListPtr_t *addPCDataStringToList(
2794 const char *aString, // String to be added
2795 SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL
2796)
2797{
2798 return addPCDataToList(newPCDataString(aString),aPCDataListPP);
2799} // addPCDataStringToList
2800
2801
2802// create new optional location (source or target)
2803// Returns NULL if URI specified is NULL or empty
2804SmlSourcePtr_t newOptLocation(
2805 const char *aLocURI,
2806 const char *aLocName
2807)
2808{
2809 if (!aLocURI || *aLocURI==0) return NULL__null;
2810 else return newLocation(aLocURI,aLocName);
2811} // newOptLocation
2812
2813
2814// create new location (source or target)
2815// always returns location, even if URI and/or name are empty
2816// If name is NULL or empty, only URI is generated
2817SmlSourcePtr_t newLocation(
2818 const char *aLocURI,
2819 const char *aLocName
2820)
2821{
2822 SmlSourcePtr_t locP;
2823
2824 locP = SML_NEW(SmlSource_t)((SmlSource_t*) _smlMalloc(sizeof(SmlSource_t)));
2825 // URI is always present (might be empty, though)
2826 locP->locURI=newPCDataString(aLocURI);
2827 // name only if not empty
2828 if (aLocName && *aLocName!=0)
2829 locP->locName=newPCDataString(aLocName);
2830 else
2831 locP->locName=NULL__null;
2832 // filter defaults to NULL
2833 locP->filter=NULL__null;
2834 return locP;
2835} // newLocation
2836
2837
2838// create new empty Item
2839SmlItemPtr_t newItem(void)
2840{
2841 SmlItemPtr_t itemP;
2842
2843 itemP = SML_NEW(SmlItem_t)((SmlItem_t*) _smlMalloc(sizeof(SmlItem_t)));
2844 itemP->target=NULL__null;
2845 itemP->source=NULL__null;
2846 itemP->meta=NULL__null;
2847 itemP->data=NULL__null;
2848 // SyncML 1.1, no MoreData set
2849 itemP->flags=0;
2850 // SyncML 1.2
2851 itemP->targetParent=NULL__null;
2852 itemP->sourceParent=NULL__null;
2853 // custom data of client
2854 itemP->aux=NULL__null;
2855 return itemP;
2856} // newItem
2857
2858
2859// create new Item with string-type data
2860SmlItemPtr_t newStringDataItem(
2861 const char *aString
2862)
2863{
2864 SmlItemPtr_t itemP=newItem();
2865 itemP->data=newPCDataString(aString);
2866 return itemP;
2867} // newStringDataItem
2868
2869
2870// create meta-format PCData
2871SmlPcdataPtr_t newPCDataFormat(
2872 TFmtTypes aFmtType,
2873 bool aShowDefault
2874)
2875{
2876 if (aFmtType==fmt_chr && !aShowDefault)
2877 return NULL__null; // default
2878 else
2879 return newPCDataString(encodingFmtSyncMLNames[aFmtType]); // show format type
2880} // newPCDataFormat
2881
2882
2883// create new string-type PCData, if NULL or empty string is passed for aData,
2884// NULL is returned (optional info not there)
2885SmlPcdataPtr_t newPCDataFormatted(
2886 const uInt8 *aData, // data
2887 sInt32 aLength, // length of data, if<=0 then string length is calculated
2888 TFmtTypes aFmtType, // encoding Format
2889 bool aNeedsOpaque // set opaque needed (string that could confuse XML parsing or even binary)
2890)
2891{
2892 if (!aData) return NULL__null; // no data
2893 if (aLength==0) aLength=strlen((const char *)aData);
2894 if (aLength==0) return NULL__null; // no data
2895 // encode input string if needed
2896 SmlPcdataPtr_t pcdataP;
2897 char *b64data;
2898 uInt32 b64len;
2899 switch (aFmtType) {
2900 case fmt_b64:
2901 // convert to b64
2902 b64len=0;
2903 b64data=b64::encode(aData, aLength, &b64len);
2904 pcdataP = newPCDataString(b64data,b64len);
2905 b64::free(b64data);
2906 return pcdataP;
2907 default:
2908 // just copy into string or opaque/C_DATA string
2909 return newPCDataStringX(aData, aNeedsOpaque, aLength);
2910 }
2911} // newPCDataEncoded
2912
2913
2914// create new string-type PCData, if NULL or empty string is passed for aString,
2915// NULL is returned (optional info not there)
2916SmlPcdataPtr_t newPCDataOptString(
2917 const char *aString,
2918 sInt32 aLength // length of string, if<0 then length is calculated
2919)
2920{
2921 if (aString && (*aString!=0))
2922 return newPCDataString(aString,aLength);
2923 else
2924 return NULL__null;
2925} // newPCDataOptString
2926
2927
2928// create new string-type PCData, if NULL is passed for aString,
2929// NULL is returned (optional info not there)
2930// if empty string is passed, PCData with empty contents will be created
2931SmlPcdataPtr_t newPCDataOptEmptyString(
2932 const char *aString,
2933 sInt32 aLength // length of string, if<0 then length is calculated
2934)
2935{
2936 if (aString)
2937 return newPCDataString(aString,aLength);
2938 else
2939 return NULL__null;
2940} // newPCDataOptEmptyString
2941
2942
2943// create new string-type PCData, if NULL is passed for aString,
2944// an empty string is created (that is, a PCData with string terminator as
2945// content only, length=0)
2946SmlPcdataPtr_t newPCDataString(
2947 const char *aString,
2948 sInt32 aLength // length of string, if<0 then length is calculated
2949)
2950{
2951 return newPCDataStringX((const uInt8 *)aString,false,aLength);
2952} // newPCDataString
2953
2954
2955// create new PCData, aOpaque can be used to generate non-string data
2956// Note: empty strings are always coded as non-opaque, even if aOpaque is set
2957SmlPcdataPtr_t newPCDataStringX(
2958 const uInt8 *aString,
2959 bool aOpaque, // if set, an opaque method (OPAQUE or CDATA) is used
2960 sInt32 aLength // length of string, if<0 then length is calculated
2961)
2962{
2963 SmlPcdataPtr_t pcdataP;
2964
2965 pcdataP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t)));
2966
2967 // determine length
2968 if (aLength>=0 && aString)
2969 pcdataP->length = aLength; // as specified, and string argument not NULL
2970 else
2971 pcdataP->length = aString ? strlen((const char *)aString) : 0; // from argument, if NULL -> length=0
2972 // determine type
2973 if (aOpaque && aLength!=0) {
2974 // Note: due to modification in RTK, this generates
2975 // OPAQUE in WBXML and CDATA in XML
2976 pcdataP->contentType=SML_PCDATA_OPAQUE;
2977 }
2978 else {
2979 // non-critical string
2980 #ifdef SML_STRINGS_AS_OPAQUE
2981 pcdataP->contentType=SML_PCDATA_OPAQUE;
2982 #else
2983 pcdataP->contentType=SML_PCDATA_STRING;
2984 #endif
2985 }
2986 pcdataP->extension=SML_EXT_UNDEFINED;
2987 // - allocate data space (ALWAYS with room for a terminator, even if Opaque or empty string)
2988 pcdataP->content=smlLibMalloc(pcdataP->length+1); // +1 for terminator, see below
2989 // copy data (if any)
2990 if (pcdataP->length>0) {
2991 // - copy string
2992 smlLibMemcpy(pcdataP->content,aString,pcdataP->length);
2993 }
2994 // set terminator
2995 ((char *)(pcdataP->content))[pcdataP->length]=0; // terminate C string
2996 // return
2997 return pcdataP;
2998} // newPCDataStringX
2999
3000
3001// create new string-type PCData from C++ string
3002SmlPcdataPtr_t newPCDataString(
3003 const string &aString
3004)
3005{
3006 return newPCDataString(aString.c_str(),aString.length());
3007} // newPCDataString(string&)
3008
3009
3010// create new decimal string representation of sInt32 as PCData
3011SmlPcdataPtr_t newPCDataLong(
3012 sInt32 aLong
3013)
3014{
3015 const int ssiz=20;
3016 char s[ssiz];
3017
3018 snprintf(s,ssiz,"%ld",(long)aLong);
3019 return newPCDataString(s);
3020} // newPCDataLong
3021
3022
3023// Nonce generator allowing last-session nonce to be correctly re-generated in next session
3024void generateNonce(string &aNonce, const char *aDevStaticString, sInt32 aSessionStaticID)
3025{
3026 md5::SYSYNC_MD5_CTX context;
3027 uInt8 digest[16];
3028 md5::Init (&context);
3029 // - add in static device string
3030 md5::Update (&context, (const uInt8 *)aDevStaticString, strlen(aDevStaticString));
3031 // - add in session static ID in binary format
3032 md5::Update (&context, (const uInt8 *)&aSessionStaticID, sizeof(sInt32));
3033 // - done
3034 md5::Final (digest, &context);
3035 // - make string of first 48 bit of MD5: 48 bits, use 6 bits per char = 8 chars
3036 uInt64 dig48 = ((uInt32)digest[0] << 0) |
3037 ((uInt32)digest[1] << 8) |
3038 ((uInt32)digest[2] << 16) |
3039 ((uInt32)digest[3] << 24);
3040 aNonce.erase();
3041 for (sInt16 k=0; k<8; k++) {
3042 aNonce+=((dig48 & 0x03F) + 0x21);
3043 dig48 = dig48 >> 6;
3044 }
3045} // generateNonce
3046
3047
3048// create challenge of requested type
3049SmlChalPtr_t newChallenge(TAuthTypes aAuthType, const string &aNextNonce, bool aBinaryAllowed)
3050{
3051 SmlChalPtr_t chalP=NULL__null;
3052 SmlMetInfMetInfPtr_t metaP;
3053
3054 if (aAuthType!=auth_none) {
3055 // new challenge record
3056 chalP = SML_NEW(SmlChal_t)((SmlChal_t*) _smlMalloc(sizeof(SmlChal_t)));
3057 // add empty meta
3058 chalP->meta=newMeta();
3059 metaP=(SmlMetInfMetInfPtr_t)(chalP->meta->content);
3060 // add type and format
3061 // - type
3062 metaP->type=newPCDataString(authTypeSyncMLNames[aAuthType]);
3063 // - format
3064 const char *fmt = NULL__null;
3065 switch (aAuthType) {
3066 case auth_basic:
3067 // always request b64
3068 fmt=encodingFmtSyncMLNames[fmt_b64];
3069 break;
3070 case auth_md5:
3071 // request b64 only for non-binary capable encoding (that is, XML)
3072 /* %%% dont do that, Nokia9210 miserably fails when we do that,
3073 * it sends its data B64 encoded, but obviously with bad
3074 * data in it. Ericsson T39m seems to do it correctly however.
3075 if (!aBinaryAllowed)
3076 fmt=encodingFmtSyncMLNames[fmt_b64];
3077 */
3078 // always request b64 for now, seems to be safer with not fully compatible clients
3079 fmt=encodingFmtSyncMLNames[fmt_b64];
3080 break;
3081 default: break;
3082 }
3083 metaP->format=newPCDataOptString(fmt); // set format, but not empty
3084 // - add nonce if needed
3085 if (aAuthType==auth_md5) {
3086 // MD5 also might need nonce
3087 if (!aNextNonce.empty()) {
3088 // add base64 encoded nonce string
3089 uInt32 b64len;
3090 char *b64=b64::encode((const uInt8 *)aNextNonce.c_str(),aNextNonce.size(),&b64len);
3091 metaP->nextnonce=newPCDataString(b64,b64len);
3092 b64::free(b64); // return buffer allocated by b64_encode
3093 }
3094 }
3095 }
3096 return chalP;
3097} // newChallenge
3098
3099
3100// create new property or param descriptor for CTCap
3101SmlDevInfCTDataPtr_t newDevInfCTData(cAppCharP aName,uInt32 aSize, bool aNoTruncate, uInt32 aMaxOccur, cAppCharP aDataType)
3102{
3103 SmlDevInfCTDataPtr_t result = SML_NEW(SmlDevInfCTData_t)((SmlDevInfCTData_t*) _smlMalloc(sizeof(SmlDevInfCTData_t)));
3104 // fill descriptor
3105 // - name if property or param
3106 result->name=newPCDataString(aName);
3107 // - no display name so far
3108 result->dname=NULL__null; // no display name
3109 // - datatype (optional)
3110 result->datatype=newPCDataOptString(aDataType);
3111 // - max size
3112 if (aSize==0)
3113 result->maxsize=NULL__null; // no size
3114 else
3115 result->maxsize=newPCDataLong(aSize); // set size
3116 // - no valenum here, will be added later if any
3117 result->valenum=NULL__null; // no valenum
3118 // SyncML 1.2
3119 if (aMaxOccur==0)
3120 result->maxoccur=NULL__null; // no maxoccur
3121 else
3122 result->maxoccur=newPCDataLong(aMaxOccur); // set maxoccur
3123 result->flags = aNoTruncate ? SmlDevInfNoTruncate_f0x0020 : 0; // notruncate flag or none
3124 return result;
3125} // newDevInfCTData
3126
3127
3128// frees prototype element and sets calling pointer to NULL
3129void FreeProtoElement(void * &aVoidP)
3130{
3131 if (aVoidP) smlFreeProtoElement(aVoidP);
3132 aVoidP=NULL__null;
3133} // FreeProtoElement
3134
3135} // namespace sysync
3136
3137// eof