File: | libsynthesis/src/sysync_SDK/Sources/sysync_utils.cpp |
Warning: | line 883, column 15 Value stored to 'c' is never read |
1 | /* |
2 | * File: sysync_utils.cpp |
3 | * |
4 | * Author: Lukas Zeller (luz@plan44.ch) |
5 | * |
6 | * Provides some helper functions interfacing between SyncML Toolkit |
7 | * and C++ |
8 | * |
9 | * Copyright (c) 2001-2011 by Synthesis AG + plan44.ch |
10 | * |
11 | * 2001-05-16 : luz : created |
12 | * |
13 | */ |
14 | |
15 | #include "prefix_file.h" |
16 | #include "sync_include.h" |
17 | #include "sysync_utils.h" |
18 | |
19 | #include "libmem.h" |
20 | |
21 | |
22 | #ifdef SYSYNC_TOOL |
23 | #include "syncappbase.h" // for CONSOLEPRINTF |
24 | #include "customimplagent.h" // for DBCharSetNames |
25 | #endif |
26 | |
27 | namespace sysync { |
28 | |
29 | // Support for SySync Diagnostic Tool |
30 | #ifdef SYSYNC_TOOL |
31 | |
32 | // parse RFC 2822 addr spec |
33 | int parse2822AddrSpec(int argc, const char *argv[]) |
34 | { |
35 | if (argc<0) { |
36 | // help requested |
37 | CONSOLEPRINTF((" addrparse <RFC2822 addr-spec string to parse>"))SySync_ConsolePrintf(stderr, "SYSYNC " " addrparse <RFC2822 addr-spec string to parse>" "\n"); |
38 | CONSOLEPRINTF((" Parse name and email address out of a RFC2822-type addr-spec"))SySync_ConsolePrintf(stderr, "SYSYNC " " Parse name and email address out of a RFC2822-type addr-spec" "\n"); |
39 | return EXIT_SUCCESS0; |
40 | } |
41 | // check for argument |
42 | if (argc<1) { |
43 | CONSOLEPRINTF(("1 argument required"))SySync_ConsolePrintf(stderr, "SYSYNC " "1 argument required" "\n" ); |
44 | return EXIT_FAILURE1; |
45 | } |
46 | // parse |
47 | string addrname,addremail; |
48 | const char* p=argv[0]; |
49 | p=parseRFC2822AddrSpec(p,addrname,addremail); |
50 | // show |
51 | CONSOLEPRINTF(("Input : %s",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %s" "\n" ,argv[0]); |
52 | CONSOLEPRINTF(("Name : %s",addrname.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Name : %s" "\n" ,addrname.c_str()); |
53 | CONSOLEPRINTF(("email : %s",addremail.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "email : %s" "\n" ,addremail.c_str()); |
54 | CONSOLEPRINTF(("unparsed rest : %s",p))SySync_ConsolePrintf(stderr, "SYSYNC " "unparsed rest : %s" "\n" ,p); |
55 | return EXIT_SUCCESS0; |
56 | } // parse2822AddrSpec |
57 | |
58 | |
59 | // convert between character sets |
60 | int charConv(int argc, const char *argv[]) |
61 | { |
62 | if (argc<0) { |
63 | // help requested |
64 | CONSOLEPRINTF((" charconv [<input charset>] <output charset> <C-string to convert>"))SySync_ConsolePrintf(stderr, "SYSYNC " " charconv [<input charset>] <output charset> <C-string to convert>" "\n"); |
65 | CONSOLEPRINTF((" Convert from one charset to another. Default input is UTF-8"))SySync_ConsolePrintf(stderr, "SYSYNC " " Convert from one charset to another. Default input is UTF-8" "\n"); |
66 | return EXIT_SUCCESS0; |
67 | } |
68 | |
69 | #ifdef __TEST_EQUALITY_OF_CP936_WITH_GB2312__ |
70 | // quick test |
71 | uInt32 ch_in; |
72 | for (ch_in=0x8100; ch_in<=0xFFFF; ch_in++) { |
73 | // convert into internal UTF-8 |
74 | string s_internal,s_in; |
75 | s_in.erase(); |
76 | if (ch_in>=0x8100) s_in+=(ch_in >> 8) & 0xFF; |
77 | s_in+=(ch_in & 0xFF); |
78 | s_internal.erase(); |
79 | appendStringAsUTF8( |
80 | s_in.c_str(), |
81 | s_internal, |
82 | chs_gb2312 |
83 | ); |
84 | // convert into output format |
85 | string s_out; |
86 | s_out.erase(); |
87 | appendUTF8ToString( |
88 | s_internal.c_str(), |
89 | s_out, |
90 | chs_cp936 |
91 | ); |
92 | // show differences |
93 | if (s_in!=s_out && s_out.size()>0 && s_out[0]!=INCONVERTIBLE_PLACEHOLDER'_') { |
94 | string s1,s2; |
95 | s1.erase(); StrToCStrAppend(s_in.c_str(), s1); |
96 | s2.erase(); StrToCStrAppend(s_out.c_str(), s2); |
97 | CONSOLEPRINTF(("\"%s\" != \"%s\"",s1.c_str(),s2.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "\"%s\" != \"%s\"" "\n" ,s1.c_str(),s2.c_str()); |
98 | } |
99 | } |
100 | return EXIT_SUCCESS0; |
101 | #endif |
102 | |
103 | // check for argument |
104 | if (argc<2) { |
105 | CONSOLEPRINTF(("2 or 3 arguments required"))SySync_ConsolePrintf(stderr, "SYSYNC " "2 or 3 arguments required" "\n"); |
106 | return EXIT_FAILURE1; |
107 | } |
108 | int ochsarg=1; |
109 | sInt16 enu; |
110 | // get input charset |
111 | TCharSets charset_in=chs_utf8; |
112 | if (argc==3) { |
113 | // first arg is input charset |
114 | if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[0])) { |
115 | CONSOLEPRINTF(("'%s' is not a valid input charset name",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid input charset name" "\n",argv[0]); |
116 | return EXIT_FAILURE1; |
117 | } |
118 | charset_in = (TCharSets)enu; |
119 | } |
120 | else { |
121 | ochsarg=0; // first arg ist input charset |
122 | } |
123 | // get output charset |
124 | TCharSets charset_out; |
125 | if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[ochsarg])) { |
126 | CONSOLEPRINTF(("'%s' is not a valid output charset name",argv[ochsarg]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid output charset name" "\n",argv[ochsarg]); |
127 | return EXIT_FAILURE1; |
128 | } |
129 | charset_out = (TCharSets)enu; |
130 | // get string to convert |
131 | string s_in; |
132 | s_in.erase(); |
133 | CStrToStrAppend(argv[ochsarg+1], s_in); |
134 | // convert into internal UTF-8 |
135 | string s_internal; |
136 | s_internal.erase(); |
137 | appendStringAsUTF8( |
138 | s_in.c_str(), |
139 | s_internal, |
140 | charset_in |
141 | ); |
142 | // convert into output format |
143 | string s_out; |
144 | s_out.erase(); |
145 | appendUTF8ToString( |
146 | s_internal.c_str(), |
147 | s_out, |
148 | charset_out |
149 | ); |
150 | // show all three |
151 | string show; |
152 | // - input |
153 | show.erase(); StrToCStrAppend(s_in.c_str(), show); |
154 | CONSOLEPRINTF(("Input : %-20s = \"%s\"",DBCharSetNames[charset_in], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %-20s = \"%s\"" "\n",DBCharSetNames[charset_in], show.c_str()); |
155 | // - internal UTF8 |
156 | show.erase(); StrToCStrAppend(s_internal.c_str(), show); |
157 | CONSOLEPRINTF(("Internal : %-20s = \"%s\"",DBCharSetNames[chs_utf8], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Internal : %-20s = \"%s\"" "\n",DBCharSetNames[chs_utf8], show.c_str()); |
158 | // - output |
159 | show.erase(); StrToCStrAppend(s_out.c_str(), show); |
160 | CONSOLEPRINTF(("Output : %-20s = \"%s\"",DBCharSetNames[charset_out], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Output : %-20s = \"%s\"" "\n",DBCharSetNames[charset_out], show.c_str()); |
161 | return EXIT_SUCCESS0; |
162 | } // charConv |
163 | |
164 | #endif // SYSYNC_TOOL |
165 | |
166 | |
167 | // conversion table from ANSI 0x80..0x9F to UCS4 |
168 | const uInt32 Ansi_80_to_9F_to_UCS4[0x20] = { |
169 | 0x20AC, 0 ,0x201A,0x0192, 0x201E,0x2026,0x2020,0x2021, // 0x80..0x87 |
170 | 0x02C6,0x2030,0x0160,0x2039, 0x0152, 0 ,0x017D, 0 , // 0x88..0x8F |
171 | 0 ,0x2018,0x2019,0x201C, 0x201D,0x2022,0x2013,0x2014, // 0x90..0x97 |
172 | 0x02DC,0x2122,0x0161,0x203A, 0x0153, 0 ,0x017E,0x0178 // 0x98..0x9F |
173 | }; |
174 | |
175 | // line end mode names |
176 | const char * const lineEndModeNames[numLineEndModes] = { |
177 | "none", // none specified |
178 | "unix", // 0x0A |
179 | "mac", // 0x0D |
180 | "dos", // 0x0D 0x0A |
181 | "cstr", // as in C strings, '\n' which is 0x0A normally (but might be 0x0D on some platforms) |
182 | "filemaker" // 0x0B (filemaker tab-separated text format, CR is shown as 0x0B within fields |
183 | }; |
184 | |
185 | |
186 | |
187 | // literal quoting mode names |
188 | const char * const quotingModeNames[numQuotingModes] = { |
189 | "none", // none specified |
190 | "singlequote", // single quote must be duplicated |
191 | "doublequote", // double quote must be duplicated |
192 | "backslash" // C-string-style escapes of CR,LF,TAB,BS,\," and ' (but no full c-string escape with \xXX etc.) |
193 | }; |
194 | |
195 | |
196 | // Encoding format names for SyncML |
197 | const char * const encodingFmtSyncMLNames[numFmtTypes] = { |
198 | "chr", // plain chars |
199 | "bin", // binary |
200 | "b64" // base 64 encoding |
201 | }; |
202 | // Encoding format names for user |
203 | const char * const encodingFmtNames[numFmtTypes] = { |
204 | "plain-text", // no encoding (plain text) |
205 | "binary", // plain binary (in WBXML only) |
206 | "base64" // base 64 encoding |
207 | }; |
208 | |
209 | |
210 | // field (property) data type names |
211 | const char * const propDataTypeNames[numPropDataTypes] = { |
212 | "chr", // Character |
213 | "int", // Integer |
214 | "bool", // Boolean |
215 | "bin", // Binary |
216 | "datetime", // Date and time of day |
217 | "phonenum", // Phone number |
218 | "text", // plain text |
219 | "???" // unknown |
220 | }; |
221 | |
222 | |
223 | // Auth type names |
224 | const char * const authTypeSyncMLNames[numAuthTypes] = { |
225 | NULL__null, // no authorisation |
226 | "syncml:auth-basic", // basic (B64 encoded user pw string) |
227 | "syncml:auth-md5" // Md5 encoded user:pw:nonce |
228 | }; |
229 | |
230 | |
231 | // MIME encoding types |
232 | const char * const MIMEEncodingNames[numMIMEencodings] = { |
233 | "", |
234 | "7BIT", |
235 | "8BIT", |
236 | "BINARY", |
237 | "QUOTED-PRINTABLE", |
238 | "BASE64", |
239 | "B" |
240 | }; |
241 | |
242 | // Charset names for MIME based strings |
243 | const char * const MIMECharSetNames[numCharSets] = { |
244 | "unknown", |
245 | "US-ASCII", |
246 | "ANSI", |
247 | "ISO-8859-1", |
248 | "UTF-8", |
249 | "UTF-16", |
250 | #ifdef CHINESE_SUPPORT |
251 | "GB2312", |
252 | "CP936", |
253 | #endif |
254 | }; |
255 | |
256 | |
257 | #ifdef SYSYNC_ENGINE1 |
258 | // generate RFC2822-style address specificiation |
259 | // - Common Name will be quoted |
260 | // - recipient will be put in angle brackets |
261 | void makeRFC2822AddrSpec( |
262 | cAppCharP aCommonName, |
263 | cAppCharP aRecipient, |
264 | string &aRFCAddr |
265 | ) |
266 | { |
267 | if (aCommonName && *aCommonName) { |
268 | aRFCAddr='"'; |
269 | while (*aCommonName) { |
270 | if (*aCommonName=='"') aRFCAddr += "\\\""; |
271 | else aRFCAddr += *aCommonName; |
272 | aCommonName++; |
273 | } |
274 | aRFCAddr+="\" <"; |
275 | aRFCAddr+=aRecipient; |
276 | aRFCAddr+=">"; |
277 | } |
278 | else { |
279 | // plain email address |
280 | aRFCAddr=aRecipient; |
281 | } |
282 | } // makeRFC2822AddrSpec |
283 | |
284 | |
285 | |
286 | |
287 | // sysytool -f syncserv_odbc.xml addrparse "(Lukas Peter) luz@synthesis.ch (Zeller), gaga" |
288 | |
289 | // Parse RFC2822-style address specificiation |
290 | // - aName will receive name and all (possible) comments |
291 | // - aRecipient will receive the (first, in case of a group) email address |
292 | cAppCharP parseRFC2822AddrSpec( |
293 | cAppCharP aText, |
294 | string &aName, |
295 | string &aRecipient |
296 | ) |
297 | { |
298 | const char *p; |
299 | char c; |
300 | |
301 | enum { |
302 | pstate_sepspace, |
303 | pstate_trailing, |
304 | pstate_text, |
305 | pstate_comment, |
306 | pstate_quoted, |
307 | pstate_email |
308 | } pstate = pstate_trailing; |
309 | string text,groupname; |
310 | bool textcouldbeemail=true; |
311 | bool atfound=false; |
312 | aName.erase(); |
313 | aRecipient.erase(); |
314 | p=aText; |
315 | do { |
316 | c=*p; |
317 | // check end of input |
318 | if (c==0) break; // done with the string |
319 | // advance to next char |
320 | p++; |
321 | // check according to state |
322 | switch (pstate) { |
323 | case pstate_sepspace: |
324 | if (c==' ') { |
325 | aName+=c; |
326 | } |
327 | pstate=pstate_trailing; |
328 | // otherwise treat like trailing |
329 | case pstate_trailing: |
330 | textcouldbeemail=aRecipient.empty(); |
331 | atfound=false; |
332 | // skip trailing WSP first |
333 | if (c==' ' || c=='\t' || c=='\n' || c=='\r') break; // simply ignore WSP in trailing mode |
334 | else pstate=pstate_text; |
335 | // fall trough to do text analysis |
336 | case pstate_text: |
337 | // now check specials |
338 | if (c==',') { c=0; break; } // end of address, cause exit from loop, next will start after comma |
339 | else if (c==';') { c=0; break; } // end of group address list, treat it like single address |
340 | else if (c=='@' && textcouldbeemail) atfound=true; // flag presence of @ |
341 | // check if text could still be a email address by itself |
342 | if (textcouldbeemail && !isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') { |
343 | textcouldbeemail=false; |
344 | if (atfound) { |
345 | aRecipient=text; |
346 | text.erase(); |
347 | } |
348 | atfound=false; |
349 | } |
350 | // now check other specials |
351 | if (c=='"') { pstate=pstate_quoted; } // start of quoted string |
352 | else if (c=='(') { pstate=pstate_comment; } // start of comment |
353 | else if (c=='<') { aRecipient.erase(); pstate=pstate_email; } // start of angle-addr, overrides other recipient texts |
354 | else if (c==':') { |
355 | groupname=aRecipient; // what we've probably parsed as recipient |
356 | groupname+=aName; // plus name so far |
357 | groupname+=text; // plus additional text |
358 | text.erase(); |
359 | aName.erase(); |
360 | aRecipient.erase(); |
361 | pstate=pstate_trailing; |
362 | } // flag presence of a group name (which can be used as name if addr itself does not have one) |
363 | else { |
364 | // add other text chars to the text |
365 | text += c; |
366 | } |
367 | break; |
368 | case pstate_quoted: |
369 | if (c=='\\') { |
370 | if (*p) c=*p++; else break; // get next char (if any) and add to result untested |
371 | } |
372 | else if (c=='"') { |
373 | // end of quoted string |
374 | pstate=pstate_sepspace; |
375 | aName+=text; |
376 | text.erase(); |
377 | break; |
378 | } |
379 | // add to text |
380 | text += c; |
381 | break; |
382 | case pstate_comment: |
383 | if (c==')') { |
384 | // end of comment |
385 | aName+=text; |
386 | text.erase(); |
387 | pstate=pstate_sepspace; |
388 | break; |
389 | } |
390 | // add to text |
391 | text += c; |
392 | break; |
393 | case pstate_email: |
394 | if (!isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') { |
395 | // any non-email char terminates email, not only '>', but only '>' is swallowed |
396 | if (c!='>') p--; // re-evaluate char in next state |
397 | pstate=pstate_sepspace; |
398 | break; |
399 | } |
400 | // add to email |
401 | aRecipient += c; |
402 | break; |
403 | } // switch |
404 | } while (c!=0); |
405 | // handle case of pure email address without name and without < > brackets or : |
406 | if (aRecipient.empty() && textcouldbeemail && atfound) |
407 | aRecipient = text; |
408 | else |
409 | aName += text; |
410 | // if name is (now) empty, but we have a group name, use the group name |
411 | if (aName.empty()) aName=groupname; |
412 | // remove trailing spaces in aName |
413 | string::size_type n=aName.find_last_not_of(' '); |
414 | if (n!=string::npos) aName.resize(n+1); |
415 | // return where to continue parsing for next addr-spec (if not end of string) |
416 | return p; |
417 | } // parseRFC2822AddrSpec |
418 | |
419 | |
420 | |
421 | // append internal UTF8 string as RFC2047 style encoding |
422 | const char *appendUTF8AsRFC2047( |
423 | const char *aText, |
424 | string &aString |
425 | ) |
426 | { |
427 | const char *p,*q,*r; |
428 | char c; |
429 | |
430 | p=aText; |
431 | do { |
432 | q=p; // remember start |
433 | // find chars until next char that must be stored as encoded word |
434 | do { |
435 | c=*p; |
436 | if (c==0 || (c & 0x80) || (c=='=' && *(p+1)=='?')) break; |
437 | p++; |
438 | } while(true); |
439 | // copy chars outside encoded word directly |
440 | if (p-q>0) aString.append(q,p-q); |
441 | // check if end of string |
442 | if (c==0) break; |
443 | // pack some chars into encoded word |
444 | // - start word |
445 | aString.append("=?utf-8?B?"); // 10 chars start (+ 2 chars will be added at end) |
446 | // - encoded data must be 75-12=63 chars or less |
447 | // Using B (=b64) encoding, output of 63 chars = 63/4*3 = max 47 chars. |
448 | // We use 45 max, as this is evenly divisible by 3 and output is 60 chars |
449 | q=p; |
450 | while (true) { |
451 | // find next space |
452 | while (*q && !isspace(*q) && q-p<45) q++; |
453 | if (q-p>=45) break; // abort if exhausted already |
454 | // find next non-space |
455 | r=q; |
456 | while (isspace(*r)) r++; |
457 | // check if next non-space will start a new word |
458 | if (*r & 0x80) { |
459 | // we should include the next word as well, if possible without exceeding size |
460 | if (r-p<45) { |
461 | q=r; |
462 | continue; |
463 | } |
464 | } |
465 | break; |
466 | } |
467 | // encode binary stream and append to string |
468 | appendEncoded((const uInt8 *)p,q-p,aString,enc_b); |
469 | p=q; |
470 | // - end word |
471 | aString.append("?="); |
472 | } while (true); |
473 | return p; |
474 | } // appendUTF8AsRFC2047 |
475 | |
476 | |
477 | // parse character string from RFC2047 style encoding to UTF8 internal string |
478 | const char *appendRFC2047AsUTF8( |
479 | const char *aRFC2047, |
480 | stringSize aSize, |
481 | string &aString, |
482 | TLineEndModes aLEM |
483 | ) |
484 | { |
485 | const char *p,*q,*r,*w; |
486 | char c = 0; |
487 | const char *eot = aRFC2047+aSize; |
488 | |
489 | p=aRFC2047; |
490 | w=NULL__null; // start of last detected word (to avoid re-scanning) |
491 | while (p<eot) { |
492 | q=p; // remember start |
493 | // find chars until next encoded word |
494 | while (p<eot) { |
495 | c=*p; |
496 | if (c==0 || (p!=w && c=='=' && *(p+1)=='?')) break; |
497 | p++; |
498 | } |
499 | // copy chars outside encoded word directly |
500 | aString.append(q,p-q); |
501 | // check if end of string |
502 | if (p>=eot || c==0) break; |
503 | // try to parse encoded word |
504 | q=p+2; |
505 | scanword: |
506 | // q is now where we start to parse word contents |
507 | // p is where we would re-start reading normally if current word turns out not to be a word at all |
508 | // - remember start of word scan (to avoid re-scanning it) |
509 | w=p; |
510 | // - get charset |
511 | r=q; |
512 | while (q<eot && *q!='?' && isgraph(*q)) q++; |
513 | if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally |
514 | sInt16 en; |
515 | TCharSets charset=chs_unknown; |
516 | if (StrToEnum(MIMECharSetNames, numCharSets, en, r, q-r)) charset=(TCharSets)en; |
517 | // - get encoding |
518 | r=++q; // continue after ? separator |
519 | while (q<eot && *q!='?' && isgraph(*q)) q++; |
520 | if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally |
521 | TEncodingTypes encoding=enc_8bit; |
522 | if (StrToEnum(MIMEEncodingNames, numMIMEencodings, en, r, q-r)) encoding=(TEncodingTypes)en; |
523 | // - get data part |
524 | r=++q; |
525 | while (q+1<eot && *q && *q!=' ' && !(*q=='?' && *(q+1)=='=')) q++; |
526 | if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally |
527 | // - decode |
528 | string decoded; |
529 | appendDecoded(r,q-r,decoded,encoding); |
530 | // - convert to UTF-8 |
531 | appendStringAsUTF8( |
532 | decoded.c_str(), |
533 | aString, |
534 | charset, |
535 | aLEM |
536 | ); |
537 | // - skip word terminator |
538 | p=q+2; |
539 | // - check for special case of adjacent words |
540 | q=p; |
541 | while (q<eot && isspace(*q)) q++; |
542 | if (q+1<eot && q>p && *q=='=' && *(q+1)=='?') { |
543 | // adjacent encoded words, only separated by space -> ignore space |
544 | // p is after previous word |
545 | q+=2; |
546 | // q is after lead-in of next word |
547 | goto scanword; |
548 | } |
549 | // p is where we continue reading |
550 | } |
551 | return p; |
552 | } // appendRFC2047AsUTF8 |
553 | |
554 | |
555 | // decode encoded data and append to string |
556 | const char *appendDecoded( |
557 | const char *aText, |
558 | size_t aSize, |
559 | string &aBinString, |
560 | TEncodingTypes aEncoding |
561 | ) |
562 | { |
563 | char c; |
564 | const char *p=aText; |
565 | uInt32 binsz; |
566 | uInt8 *binP; |
567 | |
568 | switch (aEncoding) { |
569 | case enc_quoted_printable : |
570 | // decode quoted-printable content |
571 | while ((c=*p++)) { |
572 | // char found |
573 | if (c=='=') { |
574 | uInt16 code; |
575 | char hex[2]; |
576 | // check for soft break first |
577 | if (*p=='\x0D' || *p=='\x0A') { |
578 | // soft break, swallow |
579 | if (*p=='\x0D') p++; |
580 | if (*p=='\x0A') p++; |
581 | continue; |
582 | } |
583 | // decode |
584 | hex[0]=*p; |
585 | if (*p) { |
586 | p++; |
587 | hex[1]=*p; |
588 | if (*p) { |
589 | p++; |
590 | if (HexStrToUShort(hex,code,2)==2) { |
591 | c=code; // decoded char |
592 | } |
593 | else continue; // simply ignore |
594 | } |
595 | else break; |
596 | } |
597 | else break; |
598 | } |
599 | // append char |
600 | aBinString+=c; |
601 | } |
602 | aText=p; |
603 | break; |
604 | case enc_base64: |
605 | case enc_b: |
606 | // decode base 64 |
607 | binsz=0; |
608 | binP = b64::decode(aText, aSize, &binsz); |
609 | aBinString.append((const char *)binP,binsz); |
610 | b64::free(binP); |
611 | aText+=aSize; |
612 | break; |
613 | case enc_7bit: |
614 | case enc_8bit: |
615 | // copy no more than size |
616 | if (aSize>0) aBinString.reserve(aBinString.size()+aSize); |
617 | while (*p && aSize>0) { |
618 | aBinString+=*p++; |
619 | aSize--; |
620 | } |
621 | aText=p; |
622 | break; |
623 | case enc_none: |
624 | case enc_binary: |
625 | // copy bytes |
626 | aBinString.append(aText,aSize); |
627 | aText+=aSize; |
628 | break; |
629 | case numMIMEencodings: |
630 | // invalid |
631 | break; |
632 | } // quoted printable |
633 | return aText; |
634 | } // appendDecoded |
635 | |
636 | |
637 | |
638 | // encode binary stream and append to string |
639 | void appendEncoded( |
640 | const uInt8 *aBinary, |
641 | size_t aSize, |
642 | string &aString, |
643 | TEncodingTypes aEncoding, |
644 | sInt16 aMaxLineSize, |
645 | sInt32 aCurrLineSize, |
646 | bool aSoftBreaksAsCR, |
647 | bool aEncodeBinary |
648 | ) |
649 | { |
650 | char c; |
651 | string::size_type linestart; |
652 | const uInt8 *p; |
653 | bool softbreak; |
654 | uInt32 b64len; |
655 | char *b64; |
656 | bool processed; |
657 | |
658 | switch (aEncoding) { |
659 | case enc_binary : |
660 | case enc_none : |
661 | case enc_8bit : |
662 | case enc_7bit : // assume we have no 8bit chars |
663 | // just copy 1:1 |
664 | aString.append((const char *)aBinary,aSize); |
665 | break; |
666 | case enc_quoted_printable: |
667 | // quote-printable encoding |
668 | // - determine start of last line in aString |
669 | // Note: this is because property text will be folded when lines aMaxLineSize |
670 | linestart=aString.size()-aCurrLineSize; |
671 | for (p=aBinary;p<aBinary+aSize;p++) { // '\0' will not terminate the 'for' loop |
672 | c=*p; |
673 | if (!aEncodeBinary && !c) break; // still exit at NUL when not encoding real binary data |
674 | processed=false; // input data in c is not yet processed |
675 | // make sure we do not go over the limit (if one is set) |
676 | // - if less than 8 chars (=0D=0A + =\r) are free, soft break the line |
677 | softbreak= aMaxLineSize && (aString.size()-linestart>=string::size_type(aMaxLineSize)-8); |
678 | if (!aEncodeBinary) { |
679 | if (c=='\r') continue; // ignore them |
680 | if (c=='\b') continue; // ignore them (optional break indicators, not relevant for QP output) |
681 | if (c=='\n') { // - encode line ends |
682 | aString.append("=0D=0A"); // special string for Line Ends (CR LF) |
683 | processed = true; // c is processed now |
684 | softbreak = true; |
685 | } // if |
686 | } // if |
687 | // - handle soft line break (but only if really doing line breaking) |
688 | // Also: avoid adding a soft break at the very end of the string |
689 | if (softbreak && aMaxLineSize && p+1<aBinary+aSize) { |
690 | if (aSoftBreaksAsCR) |
691 | aString.append("=\r"); // '\r' signals softbreak for finalizeproperty() |
692 | else |
693 | aString.append("=\x0D\x0A"); // break line here |
694 | // new line starts after softbreak |
695 | linestart=aString.size(); |
696 | // make sure soft line break is not followed by unencoded space |
697 | // (which would look like MIME folding) |
698 | if (c==' ' || (processed && p[1]==' ')) { |
699 | aString.append("=20"); |
700 | if (processed) p++; // if current char was already processed, we need to explicitly skip the space |
701 | processed=true; // char is now processed in any case |
702 | } // if |
703 | } // if |
704 | // now encode the char in c if not already processed by now |
705 | if (!processed) { |
706 | bool encodeIt= |
707 | (c=='=') // escape equal sign itself |
708 | || (c=='<' && aEncodeBinary) // avoid XML mismatch problems |
709 | || (uInt8)c>0x7F |
710 | || (uInt8)c<0x20; // '\0' will be encoded as well |
711 | if (encodeIt) { // encode all non ASCII chars > 0x7F (and control chars as well) |
712 | aString+="="; |
713 | aString+=NibbleToHexDigit(c>>4); |
714 | aString+=NibbleToHexDigit(c); |
715 | } |
716 | else |
717 | aString+=c; // just copy |
718 | } // if |
719 | } |
720 | break; |
721 | case enc_base64: |
722 | case enc_b: |
723 | // use base64 encoding |
724 | if (aSize>0) { |
725 | // don't call b64 with size=0! |
726 | b64 = b64::encode( |
727 | aBinary,aSize, // what to encode |
728 | &b64len, // output size |
729 | aMaxLineSize, // max line size |
730 | aSoftBreaksAsCR |
731 | ); |
732 | // append to output, if any |
733 | if (b64) { |
734 | aString.append(b64,b64len); |
735 | // release buffer |
736 | b64::free(b64); |
737 | } |
738 | if (aEncoding!=enc_b) { |
739 | // make sure it ends with a newline for "base64" (but NOT for "b" as used in RFC2047) |
740 | // Note: when used in vCard2.1, that newline is part of the property and show as an |
741 | // empty line in the vCard. |
742 | aString += aSoftBreaksAsCR ? "\r" : "\x0D\x0A"; |
743 | } |
744 | } |
745 | break; |
746 | default: |
747 | // do nothing |
748 | break; |
749 | } // switch |
750 | } // appendEncoded |
751 | |
752 | |
753 | #ifdef CHINESE_SUPPORT |
754 | // the flatBinTree tables for converting to and from GB2312 |
755 | #include "gb2312_tables_inc.cpp" |
756 | // the flatBinTree tables for converting to and from CP936 |
757 | #include "cp936_tables_inc.cpp" |
758 | #endif |
759 | |
760 | |
761 | // add char (possibly multi-byte) as UTF8 to value and apply charset translation if needed |
762 | // - returns > 0 if aNumChars was not correct number of bytes needed to convert an entire character; |
763 | // return value is number of bytes needed to generate one output character. If return value |
764 | // is<>0, no char has been appended to aVal. |
765 | uInt16 appendCharsAsUTF8(const char *aChars, string &aVal, TCharSets aCharSet, uInt16 aNumChars) |
766 | { |
767 | uInt32 ucs4; |
768 | // first char |
769 | uInt8 c=*aChars; |
770 | // this is a 8-bit char |
771 | switch(aCharSet) { |
772 | case chs_utf8 : |
773 | // UTF8 is native charset of the application, simply add |
774 | aVal+=c; |
775 | break; |
776 | case chs_ansi : |
777 | case chs_iso_8859_1 : |
778 | // do poor man's conversion to UCS4 |
779 | // - most ANSI chars are 1:1 mapped |
780 | ucs4 = ((uInt8)c & 0xFF); |
781 | // - except 0x80..0x9F, use table for these |
782 | if (ucs4>=0x80 && ucs4<=0x9F) |
783 | ucs4=Ansi_80_to_9F_to_UCS4[ucs4-0x80]; |
784 | // - convert to UTF8 |
785 | UCS4toUTF8(ucs4,aVal); |
786 | break; |
787 | #ifdef CHINESE_SUPPORT |
788 | case chs_gb2312 : // simplified Chinese GB-2312 charset |
789 | // all below 0x80 are passed as-is |
790 | if (c<0x80) |
791 | aVal+=c; // simply append |
792 | else { |
793 | // 16-bit GB2312 char |
794 | if (aNumChars!=2) |
795 | return 2; // we need 2 chars for a successful GB-2312 |
796 | // we have 2 bytes, convert them |
797 | ucs4 = searchFlatBintree(gb2312_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_'); |
798 | // - convert to UTF8 |
799 | UCS4toUTF8(ucs4,aVal); |
800 | } |
801 | break; |
802 | case chs_cp936: // simplified chinese Windows codepage CP936 |
803 | if (c<0x80) |
804 | aVal+=c; // simply append |
805 | else { |
806 | // 0x0080 (euro sign) or 2-byte CP936 |
807 | if (c==0x80) |
808 | ucs4=searchFlatBintree(cp936_to_ucs2, 0x0080, INCONVERTIBLE_PLACEHOLDER'_'); |
809 | else { |
810 | // 16-bit GB2312 char |
811 | if (aNumChars!=2) |
812 | return 2; // we need 2 chars for a successful CP936 |
813 | // we have 2 bytes, convert them |
814 | ucs4 = searchFlatBintree(cp936_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_'); |
815 | } |
816 | // - convert to UTF8 |
817 | UCS4toUTF8(ucs4,aVal); |
818 | } |
819 | break; |
820 | #endif |
821 | case chs_ascii : // plain 7-bit ASCII |
822 | default : // unknown |
823 | // only 7-bit allowed |
824 | if (c & 0x80) |
825 | aVal+=INCONVERTIBLE_PLACEHOLDER'_'; |
826 | else |
827 | aVal+=c; |
828 | break; |
829 | } // switch |
830 | return 0; // ok, converted aNumChars |
831 | } // appendCharsAsUTF8 |
832 | |
833 | |
834 | |
835 | |
836 | // add string as UTF8 to value and apply charset translation if needed |
837 | // - if lineEndMode is not lem_none, all sorts of line ends will be converted |
838 | // to the specified mode. |
839 | void appendStringAsUTF8(const char *s, string &aVal, TCharSets aCharSet, TLineEndModes aLEM, bool aAllowFilemakerCR) |
840 | { |
841 | char c; |
842 | const char *start=s; |
843 | if (s) { |
844 | while ((c=*s++)!=0) { |
845 | if (aLEM!=lem_none) { |
846 | // line end handling enabled |
847 | if (c==0x0D) { |
848 | // could be mac (0x0D) or DOS (0x0D/0x0A) |
849 | if (*s==0x0A) { |
850 | // this is DOS-type line end |
851 | // - consume the 0x0A as well |
852 | s++; |
853 | // - check for 0x0D 0x0D 0x0A special case (caused by |
854 | // DOS-text-file conversion of non-DOS strings) |
855 | if (s>=start+3) { |
856 | if (*(s-3)==0x0D) { |
857 | // char before the DOS-CRLF was a 0x0D as well (and |
858 | // has already produced a newline in the output |
859 | // --> completely ignore this CRLF |
860 | continue; |
861 | } |
862 | } |
863 | } |
864 | // is a line end, convert it to platform-lineend |
865 | c='\n'; // platform |
866 | } |
867 | else if (c==0x0A) { |
868 | // 0x0A without preceeding 0x0D = unix |
869 | c='\n'; // platform |
870 | } |
871 | else if (c==0x0B && aAllowFilemakerCR) { |
872 | // 0x0B is used as lineend in filemaker export and achilformat |
873 | c='\n'; |
874 | } |
875 | // line end converted to platform |
876 | if (c=='\n' && aLEM!=lem_cstr) { |
877 | // produce specified line end |
878 | switch (aLEM) { |
879 | case lem_mac : c=0x0D; break; |
880 | case lem_unix : c=0x0A; break; |
881 | case lem_filemaker : c=0x0B; break; |
882 | case lem_dos : |
883 | c=0x0A; // LF will be added later |
Value stored to 'c' is never read | |
884 | aVal+=0x0D; // add CR |
885 | break; |
886 | default: break; |
887 | } |
888 | } |
889 | } // line end handling enabled |
890 | // normal add |
891 | uInt16 i,seqlen=1; // assume logical char consists of single byte |
892 | do { |
893 | seqlen=appendCharsAsUTF8(s-seqlen,aVal,aCharSet,seqlen); // add char (possibly with UTF8 expansion) to aVal |
894 | if (seqlen<=1) break; // done |
895 | for (i=1;i<seqlen;i++) { if (*s==0) break; else s++; } |
896 | if (i<seqlen) break; // not enough bytes |
897 | } while(true); |
898 | } |
899 | } |
900 | } // appendStringAsUTF8 |
901 | |
902 | |
903 | |
904 | // same as appendUTF8ToString, but output string is cleared first |
905 | bool storeUTF8ToString( |
906 | cAppCharP aUTF8, string &aVal, |
907 | TCharSets aCharSet, |
908 | TLineEndModes aLEM, |
909 | TQuotingModes aQuotingMode, |
910 | size_t aMaxBytes |
911 | ) |
912 | { |
913 | aVal.erase(); |
914 | return appendUTF8ToString(aUTF8,aVal,aCharSet,aLEM,aQuotingMode,aMaxBytes); |
915 | } // storeUTF8ToString |
916 | |
917 | |
918 | |
919 | // helper for adding chars |
920 | static void appendCharToString( |
921 | char c, |
922 | string &aVal, |
923 | TQuotingModes aQuotingMode |
924 | ) { |
925 | if (aQuotingMode==qm_none) { |
926 | aVal+=c; |
927 | } |
928 | else if (aQuotingMode==qm_backslash) { |
929 | // treat CR, LF, BS, TAB, single/doublequote and backslash specially |
930 | if (c==0x0D) |
931 | aVal+="\\r"; |
932 | else if (c==0x0A) |
933 | aVal+="\\n"; |
934 | else if (c==0x08) |
935 | aVal+="\\b"; |
936 | else if (c==0x09) |
937 | aVal+="\\t"; |
938 | else if (c=='"') |
939 | aVal+="\\\""; |
940 | else if (c=='\'') |
941 | aVal+="\\'"; |
942 | else if (c=='\\') |
943 | aVal+="\\\\"; |
944 | else |
945 | aVal+=c; |
946 | } |
947 | else if (aQuotingMode==qm_duplsingle) { |
948 | if (c=='\'') aVal+=c; // duplicate |
949 | aVal+=c; // normal append |
950 | } |
951 | else if (aQuotingMode==qm_dupldouble) { |
952 | if (c=='"') aVal+=c; // duplicate |
953 | aVal+=c; // normal append |
954 | } |
955 | } // appendCharToString |
956 | |
957 | |
958 | // add UTF8 string to value in custom charset |
959 | // - if aLEM is not lem_none, occurrence of any type of Linefeeds |
960 | // (LF,CR,CRLF and even CRCRLF) in input string will be |
961 | // replaced by the specified line end type |
962 | // - aQuotingMode specifies what quoting (for ODBC literals for example) should be used |
963 | // - output is clipped after aMaxBytes bytes (if not 0) |
964 | // - returns true if all input could be converted, false if output is clipped |
965 | bool appendUTF8ToString( |
966 | cAppCharP aUTF8, |
967 | string &aVal, |
968 | TCharSets aCharSet, |
969 | TLineEndModes aLEM, |
970 | TQuotingModes aQuotingMode, |
971 | size_t aMaxBytes |
972 | ) |
973 | { |
974 | uInt32 ucs4; |
975 | uInt8 c; |
976 | size_t n=0; |
977 | cAppCharP p=aUTF8; |
978 | cAppCharP start=aUTF8; |
979 | |
980 | if (!aUTF8) return true; // nothing to copy, copied everything of that! |
981 | if (aCharSet==chs_utf8 && aLEM==lem_none && aQuotingMode==qm_none) { |
982 | // shortcut: simply append entire string |
983 | if (aMaxBytes==0) |
984 | aVal+=aUTF8; |
985 | else |
986 | aVal.append(aUTF8,aMaxBytes); |
987 | // advance "processed" pointer behind consumed part of string |
988 | p=aUTF8+aVal.size(); |
989 | } |
990 | else { |
991 | // process char by char |
992 | while((c=*aUTF8)!=0 && (aMaxBytes==0 || n<aMaxBytes)) { |
993 | p=aUTF8; |
994 | // check for linefeed conversion |
995 | if (aLEM!=lem_none && (c==0x0D || c==0x0A)) { |
996 | aUTF8++; |
997 | // line end, handling enabled |
998 | if (c==0x0D) { |
999 | // could be mac (0x0D) or DOS (0x0D/0x0A) |
1000 | if (*aUTF8==0x0A) { |
1001 | // this is DOS-type line end |
1002 | // - consume the 0x0A as well |
1003 | aUTF8++; |
1004 | // - check for 0x0D 0x0D 0x0A special case (caused by |
1005 | // DOS-text-file conversion of non-DOS strings) |
1006 | if (aUTF8>=start+3) { |
1007 | if (*(aUTF8-3)==0x0D) { |
1008 | // char before the DOS-CRLF was a 0x0D as well (and |
1009 | // has already produced a newline in the output |
1010 | // --> completely ignore this CRLF |
1011 | continue; |
1012 | } |
1013 | } |
1014 | } |
1015 | // is a line end, convert it to platform-lineend |
1016 | c='\n'; // platform |
1017 | } |
1018 | else { // must be 0x0A |
1019 | // 0x0A without preceeding 0x0D = unix |
1020 | c='\n'; // platform |
1021 | } |
1022 | // line end converted to platform |
1023 | if (aLEM!=lem_cstr) { |
1024 | // produce specified line end |
1025 | switch (aLEM) { |
1026 | case lem_mac : c=0x0D; break; |
1027 | case lem_filemaker : c=0x0B; break; |
1028 | case lem_unix : c=0x0A; break; |
1029 | case lem_dos : |
1030 | c=0x0A; // LF will be added later |
1031 | n++; // count it extra |
1032 | if (aMaxBytes && n>=aMaxBytes) |
1033 | goto stringfull; // no room to complete it, ignore it |
1034 | appendCharToString(0x0D,aVal,aQuotingMode); |
1035 | break; |
1036 | default: break; |
1037 | } |
1038 | } |
1039 | appendCharToString(c,aVal,aQuotingMode); |
1040 | n++; // count it |
1041 | } // line end, handling enabled |
1042 | else { |
1043 | // non lineend (or lineend not handled specially) |
1044 | if (aCharSet==chs_utf8) { |
1045 | aUTF8++; |
1046 | // - simply add char |
1047 | appendCharToString(c,aVal,aQuotingMode); |
1048 | n++; |
1049 | } |
1050 | else { |
1051 | // - make UCS4 |
1052 | p=aUTF8; // save previous position to detect if we have processed all |
1053 | aUTF8=UTF8toUCS4(aUTF8,ucs4); |
1054 | // now we have UCS4 |
1055 | if (ucs4==0) { |
1056 | // UTF8 resulting in UCS4 null char is not allowed |
1057 | ucs4=INCONVERTIBLE_PLACEHOLDER'_'; |
1058 | } |
1059 | else { |
1060 | // convert to specified charset |
1061 | switch (aCharSet) { |
1062 | case chs_ansi: |
1063 | case chs_iso_8859_1: |
1064 | if ((ucs4<=0xFF && ucs4>=0xA0) || ucs4<0x80) |
1065 | // 00..7F and A0..FF directly map to ANSI |
1066 | appendCharToString(ucs4,aVal,aQuotingMode); |
1067 | else { |
1068 | // search for matching ANSI in table |
1069 | uInt8 k; |
1070 | for (k=0; k<0x20; k++) { |
1071 | if (ucs4==Ansi_80_to_9F_to_UCS4[k]) { |
1072 | // found in table |
1073 | break; |
1074 | } |
1075 | } |
1076 | if (k<0x20) |
1077 | // conversion found |
1078 | aVal+=k+0x80; |
1079 | else |
1080 | // no conversion found in table |
1081 | aVal+=INCONVERTIBLE_PLACEHOLDER'_'; |
1082 | } // not in 1:1 range 0..7F, A0..FF |
1083 | n++; |
1084 | break; |
1085 | #ifdef CHINESE_SUPPORT |
1086 | case chs_gb2312 : // simplified Chinese GB-2312 charset |
1087 | // all below 0x80 are passed as-is |
1088 | if (ucs4<0x80) { |
1089 | appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes |
1090 | n++; |
1091 | } |
1092 | else { |
1093 | // convert to 16-bit GB2312 char |
1094 | uInt16 gb = searchFlatBintree(ucs2_to_gb2312, ucs4, INCONVERTIBLE_PLACEHOLDER'_'); |
1095 | // check if we have space |
1096 | if (aMaxBytes!=0 && n+2>aMaxBytes) |
1097 | goto stringfull; |
1098 | // append as two bytes to output string |
1099 | aVal+=gb >> 8; |
1100 | aVal+=gb & 0xFF; |
1101 | n+=2; |
1102 | } |
1103 | break; |
1104 | case chs_cp936 : // simplified Chinese CP936 windows codepage |
1105 | // all below 0x80 are passed as-is |
1106 | if (ucs4<0x80) { |
1107 | appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes |
1108 | n++; |
1109 | } |
1110 | else { |
1111 | // convert to CP936 16-bit representation |
1112 | uInt16 twobytes = searchFlatBintree(ucs2_to_cp936, ucs4, INCONVERTIBLE_PLACEHOLDER'_'); |
1113 | // append as two bytes to output string, but only this is a CP936 two-byte at all |
1114 | if (twobytes>0x0080) { |
1115 | // check if we have space |
1116 | if (aMaxBytes!=0 && n+2>aMaxBytes) |
1117 | goto stringfull; |
1118 | aVal+=twobytes >> 8; // sub-page lead in |
1119 | n++; |
1120 | } |
1121 | aVal+=twobytes & 0xFF; // sub-page code |
1122 | n++; |
1123 | } |
1124 | break; |
1125 | #endif |
1126 | case chs_ascii: |
1127 | // explicit ASCII: convert some special chars to plain ASCII |
1128 | if ((ucs4 & 0xFFFFFF80) !=0) { |
1129 | // ASCIIfy table to convert umlauts etc. to nearest plain ASCII |
1130 | typedef struct { |
1131 | uInt32 ucs4; |
1132 | uInt8 ascii; |
1133 | } TASCIIfyEntry; |
1134 | |
1135 | static const TASCIIfyEntry ASCIIfyTable[] = { |
1136 | { 0x000000C4, 'A' }, // Adieresis |
1137 | { 0x000000C5, 'A' }, // Aring |
1138 | { 0x000000C7, 'C' }, // Ccedilla |
1139 | { 0x000000C9, 'E' }, // Eacute |
1140 | { 0x000000D1, 'N' }, // Ntilde |
1141 | { 0x000000D6, 'O' }, // Odieresis |
1142 | { 0x000000DC, 'U' }, // Udieresis |
1143 | { 0x000000E1, 'a' }, // aacute |
1144 | { 0x000000E0, 'a' }, // agrave |
1145 | { 0x000000E2, 'a' }, // acircumflex |
1146 | { 0x000000E4, 'a' }, // adieresis |
1147 | { 0x000000E3, 'a' }, // atilde |
1148 | { 0x000000E5, 'a' }, // aring |
1149 | { 0x000000E7, 'c' }, // ccedilla |
1150 | { 0x000000E9, 'e' }, // eacute |
1151 | { 0x000000E8, 'e' }, // egrave |
1152 | { 0x000000EA, 'e' }, // ecircumflex |
1153 | { 0x000000EB, 'e' }, // edieresis |
1154 | { 0x000000ED, 'i' }, // iacute |
1155 | { 0x000000EC, 'i' }, // igrave |
1156 | { 0x000000EE, 'i' }, // icircumflex |
1157 | { 0x000000EF, 'i' }, // idieresis |
1158 | { 0x000000F1, 'n' }, // ntilde |
1159 | { 0x000000F3, 'o' }, // oacute |
1160 | { 0x000000F2, 'o' }, // ograve |
1161 | { 0x000000F4, 'o' }, // ocircumflex |
1162 | { 0x000000F6, 'o' }, // odieresis |
1163 | { 0x000000F5, 'o' }, // otilde |
1164 | { 0x000000FA, 'u' }, // uacute |
1165 | { 0x000000F9, 'u' }, // ugrave |
1166 | { 0x000000FB, 'u' }, // ucircumflex |
1167 | { 0x000000FC, 'u' }, // udieresis |
1168 | { 0x000000DF, 's' }, // germandoubles |
1169 | { 0x000000D8, 'O' }, // Oslash |
1170 | { 0x000000F8, 'o' }, // oslash |
1171 | { 0x000000C0, 'A' }, // Agrave |
1172 | { 0x000000C3, 'A' }, // Atilde |
1173 | { 0x000000D5, 'O' }, // Otilde |
1174 | { 0x00000152, 'O' }, // OE |
1175 | { 0x00000153, 'o' }, // oe |
1176 | { 0x000000C6, 'A' }, // AE |
1177 | { 0x000000E6, 'a' }, // ae |
1178 | { 0x000000C2, 'A' }, // Acircumflex |
1179 | { 0x000000CA, 'E' }, // Ecircumflex |
1180 | { 0x000000C1, 'A' }, // Aacute |
1181 | { 0x000000CB, 'E' }, // Edieresis |
1182 | { 0x000000C8, 'E' }, // Egrave |
1183 | { 0x000000CD, 'I' }, // Iacute |
1184 | { 0x000000CC, 'I' }, // Igrave |
1185 | { 0x000000CE, 'i' }, // Icircumflex |
1186 | { 0x000000CF, 'i' }, // Odieresis |
1187 | { 0x000000D3, 'O' }, // Oacute |
1188 | { 0x000000D2, 'O' }, // Ograve |
1189 | { 0x000000D4, 'O' }, // Ocircumflex |
1190 | // terminator |
1191 | { 0,0 } |
1192 | }; |
1193 | |
1194 | // search in ASCIIfy table |
1195 | uInt16 k=0; |
1196 | while (ASCIIfyTable[k].ucs4!=0) { |
1197 | if (ucs4==ASCIIfyTable[k].ucs4) { |
1198 | // found, fetch ASCII-equivalent |
1199 | ucs4=ASCIIfyTable[k].ascii; |
1200 | break; // use it |
1201 | } |
1202 | k++; |
1203 | } |
1204 | } |
1205 | // fall through to default, which does not know ANY non-ASCII |
1206 | default: |
1207 | // only 7 bit ASCII is allowed |
1208 | if ((ucs4 & 0xFFFFFF80) !=0) |
1209 | aVal+=INCONVERTIBLE_PLACEHOLDER'_'; |
1210 | else |
1211 | appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes |
1212 | n++; |
1213 | break; |
1214 | } // switch |
1215 | } // valid UCS4 |
1216 | } // not already UTF8 |
1217 | } // if not lineend |
1218 | // processed until here |
1219 | p=aUTF8; |
1220 | } // while not end of input string |
1221 | } // not already UTF8 |
1222 | // return true if input string completely consumed |
1223 | stringfull: |
1224 | return (*p==0); |
1225 | } // appendUTF8ToString |
1226 | |
1227 | |
1228 | // convert UTF8 to UCS4 |
1229 | // - returns pointer to next char |
1230 | // - returns UCS4=0 on error (no char, bad sequence, sequence not complete) |
1231 | const char *UTF8toUCS4(const char *aUTF8, uInt32 &aUCS4) |
1232 | { |
1233 | uInt8 c; |
1234 | sInt16 morechars; |
1235 | |
1236 | if ((c=*aUTF8)!=0) { |
1237 | aUTF8++; |
1238 | // there is a char |
1239 | morechars=0; |
1240 | // decode UTF8 lead-in |
1241 | if ((c & 0x80) == 0) { |
1242 | // single byte |
1243 | aUCS4=c; |
1244 | morechars=0; |
1245 | } |
1246 | else if ((c & 0xE0) == 0xC0) { |
1247 | // two bytes |
1248 | aUCS4=c & 0x1F; |
1249 | morechars=1; |
1250 | } |
1251 | else if ((c & 0xF0) == 0xE0) { |
1252 | aUCS4=c & 0x0F; |
1253 | morechars=2; |
1254 | } |
1255 | else if ((c & 0xF8) == 0xF0) { |
1256 | aUCS4=c & 0x07; |
1257 | morechars=3; |
1258 | } |
1259 | else if ((c & 0xFC) == 0xF8) { |
1260 | aUCS4=c & 0x03; |
1261 | morechars=4; |
1262 | } |
1263 | else if ((c & 0xFE) == 0xFC) { |
1264 | aUCS4=c & 0x01; |
1265 | morechars=5; |
1266 | } |
1267 | else { |
1268 | // bad char |
1269 | aUCS4=0; |
1270 | } |
1271 | // process additional chars |
1272 | while(morechars--) { |
1273 | if ((c=*aUTF8)==0) { |
1274 | // unfinished sequence |
1275 | aUCS4=0; |
1276 | break; |
1277 | } |
1278 | aUTF8++; |
1279 | if ((c & 0xC0) != 0x80) { |
1280 | // bad additional char |
1281 | aUCS4=0; |
1282 | break; |
1283 | } |
1284 | // each additional char adds 6 new bits |
1285 | aUCS4 = aUCS4 << 6; // shift existing bits |
1286 | aUCS4 |= (c & 0x3F); // add new bits |
1287 | } |
1288 | } |
1289 | else { |
1290 | // no char |
1291 | aUCS4=0; |
1292 | } |
1293 | // return pointer to next char |
1294 | return aUTF8; |
1295 | } // UTF8toUCS4 |
1296 | |
1297 | |
1298 | // convert UCS4 to UTF8 (0 char is not allowed and will be ignored!) |
1299 | void UCS4toUTF8(uInt32 aUCS4, string &aUTF8) |
1300 | { |
1301 | uInt8 c; |
1302 | |
1303 | // ignore null char |
1304 | if (aUCS4==0) return; |
1305 | // create UTF8 lead-in |
1306 | sInt16 morechars=0; |
1307 | if (aUCS4<0x00000080) { |
1308 | // one byte |
1309 | c=aUCS4; |
1310 | } |
1311 | else if (aUCS4<0x00000800) { |
1312 | // two bytes |
1313 | c=0xC0 | ((aUCS4 >> 6) & 0x1F); |
1314 | morechars=1; |
1315 | } |
1316 | else if (aUCS4<0x00010000) { |
1317 | // three bytes |
1318 | c=0xE0 | ((aUCS4 >> 12) & 0x0F); |
1319 | morechars=2; |
1320 | } |
1321 | else if (aUCS4<0x00200000) { |
1322 | // four bytes |
1323 | c=0xF0 | ((aUCS4 >> 18) & 0x07); |
1324 | morechars=3; |
1325 | } |
1326 | else if (aUCS4<0x04000000) { |
1327 | // five bytes |
1328 | c=0xF8 | ((aUCS4 >> 24) & 0x03); |
1329 | morechars=4; |
1330 | } |
1331 | else { |
1332 | // six bytes |
1333 | c=0xFC | ((aUCS4 >> 30) & 0x01); |
1334 | morechars=5; |
1335 | } |
1336 | // add lead-in |
1337 | aUTF8+=c; |
1338 | // add rest of sequence |
1339 | while (morechars--) { |
1340 | c= 0x80 | ((aUCS4 >> (morechars * 6)) & 0x3F); |
1341 | aUTF8+=c; |
1342 | } |
1343 | } // UCS4toUTF8 |
1344 | |
1345 | |
1346 | /* Encoding UTF-16 (excerpt from RFC 2781, paragraph 2.1) |
1347 | |
1348 | Encoding of a single character from an ISO 10646 character value to |
1349 | UTF-16 proceeds as follows. Let U be the character number, no greater |
1350 | than 0x10FFFF. |
1351 | |
1352 | 1) If U < 0x10000, encode U as a 16-bit unsigned integer and |
1353 | terminate. |
1354 | |
1355 | 2) Let U' = U - 0x10000. Because U is less than or equal to 0x10FFFF, |
1356 | U' must be less than or equal to 0xFFFFF. That is, U' can be |
1357 | represented in 20 bits. |
1358 | |
1359 | 3) Initialize two 16-bit unsigned integers, W1 and W2, to 0xD800 and |
1360 | 0xDC00, respectively. These integers each have 10 bits free to |
1361 | encode the character value, for a total of 20 bits. |
1362 | |
1363 | 4) Assign the 10 high-order bits of the 20-bit U' to the 10 low-order |
1364 | bits of W1 and the 10 low-order bits of U' to the 10 low-order |
1365 | bits of W2. Terminate. |
1366 | |
1367 | Graphically, steps 2 through 4 look like: |
1368 | U' = yyyyyyyyyyxxxxxxxxxx |
1369 | W1 = 110110yyyyyyyyyy |
1370 | W2 = 110111xxxxxxxxxx |
1371 | */ |
1372 | |
1373 | // convert UCS4 to UTF-16 |
1374 | // - returns 0 for UNICODE range UCS4 and first word of UTF-16 for non UNICODE |
1375 | uInt16 UCS4toUTF16(uInt32 aUCS4, uInt16 &aUTF16) |
1376 | { |
1377 | if (aUCS4<0x10000) { |
1378 | // in unicode range: single UNICODE char |
1379 | aUTF16=aUCS4; |
1380 | return 0; // no second char |
1381 | } |
1382 | else { |
1383 | // out of UNICODE range |
1384 | aUCS4-=0x10000; |
1385 | if (aUCS4>0xFFFF) { |
1386 | // inconvertible |
1387 | aUTF16=INCONVERTIBLE_PLACEHOLDER'_'; |
1388 | return 0; |
1389 | } |
1390 | else { |
1391 | // convert to two-word UNICODE / UCS-2 |
1392 | aUTF16=0xD800+(aUCS4>>10); |
1393 | return 0xDC00+(aUCS4 & 0x03FF); |
1394 | } |
1395 | } |
1396 | } // UCS4toUTF16 |
1397 | |
1398 | |
1399 | |
1400 | /* Decoding UTF-16 |
1401 | |
1402 | Decoding of a single character from UTF-16 to an ISO 10646 character |
1403 | value proceeds as follows. Let W1 be the next 16-bit integer in the |
1404 | sequence of integers representing the text. Let W2 be the (eventual) |
1405 | next integer following W1. |
1406 | |
1407 | 1) If W1 < 0xD800 or W1 > 0xDFFF, the character value U is the value |
1408 | of W1. Terminate. |
1409 | |
1410 | 2) Determine if W1 is between 0xD800 and 0xDBFF. If not, the sequence |
1411 | is in error and no valid character can be obtained using W1. |
1412 | Terminate. |
1413 | |
1414 | 3) If there is no W2 (that is, the sequence ends with W1), or if W2 |
1415 | is not between 0xDC00 and 0xDFFF, the sequence is in error. |
1416 | Terminate. |
1417 | |
1418 | 4) Construct a 20-bit unsigned integer U', taking the 10 low-order |
1419 | bits of W1 as its 10 high-order bits and the 10 low-order bits of |
1420 | W2 as its 10 low-order bits. |
1421 | |
1422 | 5) Add 0x10000 to U' to obtain the character value U. Terminate. |
1423 | |
1424 | Note that steps 2 and 3 indicate errors. Error recovery is not |
1425 | specified by this document. When terminating with an error in steps 2 |
1426 | and 3, it may be wise to set U to the value of W1 to help the caller |
1427 | diagnose the error and not lose information. Also note that a string |
1428 | decoding algorithm, as opposed to the single-character decoding |
1429 | described above, need not terminate upon detection of an error, if |
1430 | proper error reporting and/or recovery is provided. |
1431 | |
1432 | */ |
1433 | |
1434 | // convert UTF-16 to UCS4 |
1435 | // - returns pointer to next char |
1436 | // - returns UCS4=0 on error (no char, bad sequence, sequence not complete) |
1437 | const uInt16 *UTF16toUCS4(const uInt16 *aUTF16P, uInt32 &aUCS4) |
1438 | { |
1439 | uInt16 utf16=*aUTF16P++; |
1440 | |
1441 | if (utf16<0xD800 || utf16>0xDFFF) { |
1442 | // single char unicode |
1443 | aUCS4=utf16; |
1444 | } |
1445 | else { |
1446 | // could be two-char |
1447 | if (utf16<=0xDBFF) { |
1448 | // valid first char: check second char |
1449 | uInt16 utf16_2 = *aUTF16P; // next |
1450 | if (utf16_2 && utf16_2>=0xDC00 && utf16_2<=0xDFFF) { |
1451 | // second char exists and is valid |
1452 | aUTF16P++; // advance now |
1453 | aUCS4 = |
1454 | ((utf16 & 0x3FF) << 10) + |
1455 | (utf16_2 & 0x3FF); |
1456 | } |
1457 | else |
1458 | aUCS4=0; // no char |
1459 | } |
1460 | else { |
1461 | aUCS4=0; // no char |
1462 | } |
1463 | } |
1464 | // return advanced pointer |
1465 | return aUTF16P; |
1466 | } // UCS4toUTF16 |
1467 | |
1468 | |
1469 | |
1470 | |
1471 | |
1472 | |
1473 | // add UTF8 string as UTF-16 byte stream to 8-bit string |
1474 | // - if aLEM is not lem_none, occurrence of any type of Linefeeds |
1475 | // (LF,CR,CRLF and even CRCRLF) in input string will be |
1476 | // replaced by the specified line end type |
1477 | // - output is clipped after ByteString reaches aMaxBytes size (if not 0), = approx half as many Unicode chars |
1478 | // - returns true if all input could be converted, false if output is clipped |
1479 | bool appendUTF8ToUTF16ByteString( |
1480 | cAppCharP aUTF8, |
1481 | string &aUTF16ByteString, |
1482 | bool aBigEndian, |
1483 | TLineEndModes aLEM, |
1484 | uInt32 aMaxBytes |
1485 | ) |
1486 | { |
1487 | uInt32 ucs4; |
1488 | uInt16 utf16=0,utf16_1; |
1489 | cAppCharP p; |
1490 | |
1491 | while (aUTF8 && *aUTF8) { |
1492 | // convert next UTF8 char to UCS4 |
1493 | p=UTF8toUCS4(aUTF8, ucs4); |
1494 | if (ucs4==0) break; // error in UTF8 encoding, exit |
1495 | // convert line ends |
1496 | if (ucs4 == '\n' && aLEM!=lem_none && aLEM!=lem_cstr) { |
1497 | // produce specified line end |
1498 | utf16_1=0; |
1499 | switch (aLEM) { |
1500 | case lem_mac : utf16=0x0D; break; |
1501 | case lem_filemaker : utf16=0x0B; break; |
1502 | case lem_unix : utf16=0x0A; break; |
1503 | case lem_dos : |
1504 | utf16_1=0x0D; // CR.. |
1505 | utf16=0x0A; // ..then LF |
1506 | break; |
1507 | default: break; |
1508 | } |
1509 | } |
1510 | else { |
1511 | // ordinary char, use UTF16 encoding |
1512 | utf16_1 = UCS4toUTF16(ucs4,utf16); |
1513 | } |
1514 | // check if appending UTF16 would exceed max size specified |
1515 | if (aMaxBytes!=0 && aUTF16ByteString.size() + (utf16_1 ? 4 : 2) > aMaxBytes) |
1516 | break; |
1517 | // we can append, advance input pointer |
1518 | aUTF8 = p; |
1519 | // now append |
1520 | if (aBigEndian) { |
1521 | // Big end first, Motorola order |
1522 | if (utf16_1) { |
1523 | aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF); |
1524 | aUTF16ByteString += (char)(utf16_1 & 0xFF); |
1525 | } |
1526 | aUTF16ByteString += (char)((utf16 >> 8) & 0xFF); |
1527 | aUTF16ByteString += (char)(utf16 & 0xFF); |
1528 | } |
1529 | else { |
1530 | // Little end first, Intel order |
1531 | if (utf16_1) { |
1532 | aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF); |
1533 | aUTF16ByteString += (char)(utf16_1 & 0xFF); |
1534 | } |
1535 | aUTF16ByteString += (char)(utf16 & 0xFF); |
1536 | aUTF16ByteString += (char)((utf16 >> 8) & 0xFF); |
1537 | } |
1538 | } // while |
1539 | // true if all input consumed |
1540 | return (aUTF8==NULL__null) || (*aUTF8==0); |
1541 | } // appendUTF8ToUTF16ByteString |
1542 | |
1543 | |
1544 | // add UTF16 byte string as UTF8 to value |
1545 | void appendUTF16AsUTF8( |
1546 | const uInt16 *aUTF16, |
1547 | uInt32 aNumUTF16Chars, |
1548 | bool aBigEndian, |
1549 | string &aVal, |
1550 | bool aConvertLineEnds, |
1551 | bool aAllowFilemakerCR |
1552 | ) |
1553 | { |
1554 | uInt32 ucs4; |
1555 | uInt16 utf16pair[2]; |
1556 | cAppCharP inP = (cAppCharP)aUTF16; |
1557 | bool lastWasCR=false; |
1558 | |
1559 | while (inP && !(*inP==0 && *(inP+1)==0) && aNumUTF16Chars>0) { |
1560 | // get two words (in case of surrogate pair) |
1561 | if (aBigEndian) { |
1562 | // Motorola order |
1563 | utf16pair[0]=((*(inP) & 0xFF)<<8) + (*(inP+1) & 0xFF); |
1564 | if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+2) & 0xFF)<<8) + (*(inP+3) & 0xFF); |
1565 | } |
1566 | else { |
1567 | // Intel order |
1568 | utf16pair[0]=((*(inP+1) & 0xFF)<<8) + (*(inP) & 0xFF); |
1569 | if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+3) & 0xFF)<<8) + (*(inP+2) & 0xFF); |
1570 | } |
1571 | cAppCharP hP = (cAppCharP)UTF16toUCS4(utf16pair, ucs4); |
1572 | /* |
1573 | PDEBUGPRINTFX(DBG_PARSE+DBG_EXOTIC,( |
1574 | "Parsed %ld bytes: *(inP)=0x%02hX, *(inP+1)=0x%02hX, *(inP+2)=0x%02hX, *(inP+3)=0x%02hX, utf16pair[0]=0x%04hX, utf16pair[1]=0x%04hX, ucs4=0x%04lX", |
1575 | (uInt32)(hP-(cAppCharP)utf16pair), |
1576 | (uInt16)*(inP), (uInt16)*(inP+1), (uInt16)*(inP+2), (uInt16)*(inP+3), |
1577 | (uInt16)utf16pair[0], (uInt16)utf16pair[1], |
1578 | (uInt32)ucs4 |
1579 | )); |
1580 | */ |
1581 | uInt32 bytes=hP-(cAppCharP)utf16pair; |
1582 | inP+=bytes; // next UTF16 to check |
1583 | aNumUTF16Chars-=bytes/2; // count down UTF16 chars |
1584 | // convert line ends if selected |
1585 | if (aConvertLineEnds) { |
1586 | if (ucs4 == 0x0D) { |
1587 | lastWasCR=true; |
1588 | continue; |
1589 | } |
1590 | else { |
1591 | if (ucs4 == 0x0A || (aAllowFilemakerCR && ucs4 == 0x0B)) |
1592 | ucs4 = '\n'; // convert to LineEnd |
1593 | else if (lastWasCR) |
1594 | aVal += '\n'; // insert a LineEnd |
1595 | lastWasCR=false; |
1596 | } |
1597 | } |
1598 | // append to UTF-8 string |
1599 | UCS4toUTF8(ucs4, aVal); |
1600 | } |
1601 | if (lastWasCR) |
1602 | aVal += '\n'; // input string ended on CR, must be shown in output |
1603 | } // appendUTF16AsUTF8 |
1604 | |
1605 | |
1606 | |
1607 | |
1608 | |
1609 | |
1610 | #ifdef BINTREE_GENERATOR |
1611 | |
1612 | // add a key/value pair to the binary tree |
1613 | void addToBinTree(TBinTreeNode *&aBinTree, treeval_t aMinKey, treeval_t aMaxKey, treeval_t aKey, treeval_t aValue) |
1614 | { |
1615 | // start at root |
1616 | TBinTreeNode **nextPP = &aBinTree; |
1617 | treeval_t cmpval; |
1618 | do { |
1619 | // create the new decision value from max and min |
1620 | cmpval = aMinKey+((aMaxKey-aMinKey) >> 1); |
1621 | // create the node if not already there |
1622 | if (*nextPP==NULL__null) { |
1623 | *nextPP = new TBinTreeNode; |
1624 | (*nextPP)->key = cmpval; |
1625 | (*nextPP)->nextHigher=NULL__null; |
1626 | (*nextPP)->nextLowerOrEqual=NULL__null; |
1627 | (*nextPP)->value=0; |
1628 | } |
1629 | // check if the node CREATED is a leaf node |
1630 | // this is the case if max==min |
1631 | if (aMaxKey==aMinKey) { |
1632 | // save leaf value (possibly overwriting existing leaf value for same code) |
1633 | (*nextPP)->value=aValue; |
1634 | break; |
1635 | } |
1636 | // decide which way to go |
1637 | if (aKey>cmpval) { |
1638 | // go to the "higher" side |
1639 | nextPP = &((*nextPP)->nextHigher); |
1640 | // determine new minimum |
1641 | aMinKey = cmpval+1; // minimum must be higher than cmpval |
1642 | } |
1643 | else { |
1644 | // go to the "lower or equal" side |
1645 | nextPP = &((*nextPP)->nextLowerOrEqual); |
1646 | // determine new maximum |
1647 | aMaxKey = cmpval; // maximum must be lower or equal than cmpval |
1648 | } |
1649 | } while(true); |
1650 | } // addToBinTree |
1651 | |
1652 | |
1653 | // dispose a bintree |
1654 | void disposeBinTree(TBinTreeNode *&aBinTree) |
1655 | { |
1656 | if (!aBinTree) return; |
1657 | if (aBinTree->nextHigher) |
1658 | disposeBinTree(aBinTree->nextHigher); |
1659 | if (aBinTree->nextLowerOrEqual) |
1660 | disposeBinTree(aBinTree->nextLowerOrEqual); |
1661 | delete aBinTree; |
1662 | aBinTree=NULL__null; |
1663 | } // disposeBinTree |
1664 | |
1665 | |
1666 | // convert key to value using a flat bintree |
1667 | treeval_t searchBintree(TBinTreeNode *aBinTree, treeval_t aKey, treeval_t aUndefValue, treeval_t aMinKey, treeval_t aMaxKey) |
1668 | { |
1669 | treeval_t cmpval; |
1670 | while(aBinTree) { |
1671 | // create the new decision value from max and min |
1672 | cmpval = aMinKey+((aMaxKey-aMinKey) >> 1); |
1673 | // must match stored cmpval |
1674 | if (cmpval!=aBinTree->key) |
1675 | return aUndefValue; |
1676 | // check if next node must be leaf if the tree contains our key, |
1677 | // this is the case if max==min |
1678 | if (aMaxKey==aMinKey) { |
1679 | if (aBinTree->nextHigher!=NULL__null || aBinTree->nextLowerOrEqual!=NULL__null) { |
1680 | // no leaf value here, should not be the case ever (we should have |
1681 | // encountered a node with no left or right link before this!) |
1682 | return aUndefValue; |
1683 | } |
1684 | else { |
1685 | // found a leaf value here |
1686 | return aBinTree->value; |
1687 | } |
1688 | } |
1689 | // decide which way to go |
1690 | if (aKey>cmpval) { |
1691 | // go to the "higher" side = just next element in array, except if we have the special marker here |
1692 | if (aBinTree->nextHigher == NULL__null) |
1693 | return aUndefValue; // we should go higher-side, but can't -> unknown key |
1694 | aBinTree=aBinTree->nextHigher; |
1695 | // determine new minimum |
1696 | aMinKey = cmpval+1; // minimum must be higher than cmpval |
1697 | } |
1698 | else { |
1699 | // go to the "lower" side = element at index indicated by current element, except if we have the special marker here |
1700 | if (aBinTree->nextLowerOrEqual == NULL__null) |
1701 | return aUndefValue; // we should go lower-or-equal-side, but can't -> unknown key |
1702 | aBinTree=aBinTree->nextLowerOrEqual; |
1703 | // determine new maximum |
1704 | aMaxKey = cmpval; // maximum must be lower or equal than cmpval |
1705 | } |
1706 | } |
1707 | // if we reach the end of the array, key is not in the tree |
1708 | return aUndefValue; |
1709 | } // searchBintree |
1710 | |
1711 | |
1712 | |
1713 | |
1714 | // make a flat form representation of the bintree in a one-dimensional array |
1715 | // - higher-side links are implicit (nodes following each other), |
1716 | // lower-or-equal-side links are explicit |
1717 | static bool flatBinTreeRecursion( |
1718 | TBinTreeNode *aBinTree, size_t &aIndex, treeval_t *aFlatArray, size_t aArrSize, treeval_t aLinksStart, treeval_t aLinksEnd |
1719 | ) |
1720 | { |
1721 | // check if array is full |
1722 | if (aIndex>=aArrSize) |
1723 | return false; |
1724 | // examine node to flatten |
1725 | if (aBinTree->nextHigher==NULL__null && aBinTree->nextLowerOrEqual==NULL__null) { |
1726 | // this is a leaf node, containing only the value |
1727 | if (aBinTree->value>=aLinksStart && aBinTree->value<=aLinksEnd) |
1728 | return false; // link space and value space overlap |
1729 | aFlatArray[aIndex]=aBinTree->value; |
1730 | aIndex++; |
1731 | } |
1732 | else if (aBinTree->nextHigher==NULL__null) { |
1733 | // lower-side-only node: set special mark to specify that lower-or-equal side |
1734 | // implicitly follows (instead of higher-side) |
1735 | aFlatArray[aIndex]=aLinksStart + 1; // no node points to the immediately following node explicitly, so 1 can be used as special marker |
1736 | aIndex++; |
1737 | // - recurse to generate it |
1738 | if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd)) |
1739 | return false; |
1740 | } |
1741 | else { |
1742 | // this is a branch |
1743 | // - lower-or-equal side is represented as an index in the array |
1744 | aFlatArray[aIndex]=aLinksStart + 0; // default to not-existing (no node points to itself, so 0 can be used as NIL index value) |
1745 | // - higher side branch follows immediately |
1746 | size_t linkindex = aIndex++; |
1747 | // - recurse to generate it |
1748 | if (!flatBinTreeRecursion(aBinTree->nextHigher,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd)) |
1749 | return false; |
1750 | // - now we have the index where we must insert the lower-or-equal side |
1751 | if (aBinTree->nextLowerOrEqual!=NULL__null) { |
1752 | // there is a lower-or-equal side |
1753 | // - place relative link from original node |
1754 | uInt32 rellink=aIndex-linkindex; |
1755 | if ((uInt32)aLinksStart+rellink>(uInt32)aLinksEnd-1L) { |
1756 | // we need a long link |
1757 | // - move generated higher side branch one up |
1758 | for (size_t k=aIndex-1; k>linkindex; k--) aFlatArray[k+1]=aFlatArray[k]; |
1759 | aIndex++; // we've eaten up one extra entry now |
1760 | // - now set long link |
1761 | aFlatArray[linkindex]=aLinksEnd-1; // long link marker |
1762 | if (rellink>0xFFFF) |
1763 | return false; // cannot jump more than 64k |
1764 | aFlatArray[linkindex+1]=rellink; // long link |
1765 | } |
1766 | else { |
1767 | // short link is ok |
1768 | aFlatArray[linkindex]=aLinksStart+rellink; |
1769 | } |
1770 | // - now create the lower-or-equal side |
1771 | if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd)) |
1772 | return false; |
1773 | } |
1774 | } |
1775 | return true; |
1776 | } // flatBinTreeRecursion |
1777 | |
1778 | |
1779 | // make a flat form representation of the bintree in a one-dimensional array |
1780 | // - higher-side links are implicit (nodes following each other), |
1781 | // lower-or-equal-side links are explicit |
1782 | bool flatBinTree( |
1783 | TBinTreeNode *aBinTree, TConvFlatTree &aFlatTree, size_t aArrSize, |
1784 | treeval_t aMinKey, treeval_t aMaxKey, treeval_t aLinksStart, treeval_t aLinksEnd |
1785 | ) |
1786 | { |
1787 | // save tree params |
1788 | aFlatTree.numelems=0; |
1789 | aFlatTree.minkey=aMinKey; |
1790 | aFlatTree.maxkey=aMaxKey; |
1791 | aFlatTree.linksstart=aLinksStart; |
1792 | aFlatTree.linksend=aLinksEnd; |
1793 | // now create actual tree |
1794 | size_t index=0; |
1795 | if (!flatBinTreeRecursion(aBinTree,index,aFlatTree.elements,aArrSize,aLinksStart,aLinksEnd)) |
1796 | return false; |
1797 | aFlatTree.numelems=index; // actual length of array |
1798 | return true; |
1799 | } // flatBinTree |
1800 | |
1801 | |
1802 | |
1803 | |
1804 | #endif |
1805 | |
1806 | |
1807 | // convert key to value using a flat bintree |
1808 | treeval_t searchFlatBintree(const TConvFlatTree &aFlatTree, treeval_t aKey, treeval_t aUndefValue) |
1809 | { |
1810 | treeval_t cmpval,thisnode; |
1811 | size_t index=0; |
1812 | // get start min and max |
1813 | treeval_t minKey = aFlatTree.minkey; |
1814 | treeval_t maxKey = aFlatTree.maxkey; |
1815 | // reject out-of-bounds keys immediately |
1816 | if (aKey<minKey || aKey>maxKey) |
1817 | return aUndefValue; |
1818 | do { |
1819 | // create the new decision value from max and min |
1820 | cmpval = minKey+((maxKey-minKey) >> 1); |
1821 | thisnode = aFlatTree.elements[index]; |
1822 | // check if next node must be leaf if the tree contains our key, |
1823 | // this is the case if max==min |
1824 | if (maxKey==minKey) { |
1825 | #ifdef BINTREE_GENERATOR |
1826 | if (thisnode>=aFlatTree.linksstart && thisnode<=aFlatTree.linksend) { |
1827 | // no leaf value here, should not be the case ever (we should have |
1828 | // encountered a node with no left or right link before this!) |
1829 | return aUndefValue; |
1830 | } |
1831 | else |
1832 | #endif |
1833 | { |
1834 | // found a leaf value here |
1835 | return (treeval_t) thisnode; |
1836 | } |
1837 | } |
1838 | // decide which way to go |
1839 | if (aKey>cmpval) { |
1840 | // go to the "higher" side = just next element in array, except if we have the special marker here |
1841 | if (thisnode == aFlatTree.linksstart+1) |
1842 | return aUndefValue; // we should go higher-side, but can't -> unknown key |
1843 | // next node is next index (or one more in case this is a long link) |
1844 | if (thisnode == aFlatTree.linksend-1) |
1845 | index++; |
1846 | index++; |
1847 | // determine new minimum |
1848 | minKey = cmpval+1; // minimum must be higher than cmpval |
1849 | } |
1850 | else { |
1851 | // go to the "lower" side = element at index indicated by current element, except if we have the special marker here |
1852 | if (thisnode == aFlatTree.linksstart+1) |
1853 | index++; // special case, "lower" side is immediately following because there is no "higher" side |
1854 | else { |
1855 | #ifdef BINTREE_GENERATOR |
1856 | // if node contains a leaf value instead of a link, something is wrong |
1857 | if (thisnode<aFlatTree.linksstart || thisnode>aFlatTree.linksend) |
1858 | return aUndefValue; // no leaf expected here |
1859 | #endif |
1860 | if (thisnode==aFlatTree.linksend-1) { |
1861 | // long link |
1862 | index++; // skip long link marker |
1863 | thisnode = aFlatTree.elements[index]; // get link value |
1864 | index = index+thisnode; // jump by link value |
1865 | } |
1866 | else { |
1867 | // short link |
1868 | index = index+(thisnode-aFlatTree.linksstart); // get index of next node (relative branch) |
1869 | } |
1870 | if (index==0) |
1871 | return aUndefValue; // there is no link |
1872 | } |
1873 | // determine new maximum |
1874 | maxKey = cmpval; // maximum must be lower or equal than cmpval |
1875 | } |
1876 | } while(index<aFlatTree.numelems); |
1877 | // if we reach the end of the array, key is not in the tree |
1878 | return aUndefValue; |
1879 | } // searchFlatBintree |
1880 | |
1881 | // MD5 and B64 given string |
1882 | void MD5B64(const char *aString, sInt32 aLen, string &aMD5B64) |
1883 | { |
1884 | // determine input length |
1885 | if (aLen<=0) aLen=strlen(aString); |
1886 | // calc MD5 |
1887 | md5::SYSYNC_MD5_CTX context; |
1888 | uInt8 digest[16]; |
1889 | md5::Init (&context); |
1890 | md5::Update (&context, (const uInt8 *)aString,aLen); |
1891 | md5::Final (digest, &context); |
1892 | // b64 encode the MD5 digest |
1893 | uInt32 b64md5len; |
1894 | char *b64md5=b64::encode(digest,16,&b64md5len); |
1895 | // assign result |
1896 | aMD5B64.assign(b64md5,b64md5len); |
1897 | // done |
1898 | b64::free(b64md5); // return buffer allocated by b64::encode |
1899 | } // MD5B64 |
1900 | |
1901 | |
1902 | // format as Timestamp for use in debug logs |
1903 | void StringObjTimestamp(string &aStringObj, lineartime_t aTimer) |
1904 | { |
1905 | // format the time |
1906 | if (aTimer==noLinearTime) { |
1907 | aStringObj = "<no time>"; |
1908 | return; |
1909 | } |
1910 | sInt16 y,mo,d,h,mi,s,ms; |
1911 | lineartime2date(aTimer,&y,&mo,&d); |
1912 | lineartime2time(aTimer,&h,&mi,&s,&ms); |
1913 | StringObjPrintf( |
1914 | aStringObj, |
1915 | "%04d-%02d-%02d %02d:%02d:%02d.%03d", |
1916 | y,mo,d,h,mi,s,ms |
1917 | ); |
1918 | } // StringObjTimestamp |
1919 | |
1920 | |
1921 | // format as hex string |
1922 | void StringObjHexString(string &aStringObj, const uInt8 *aBinary, uInt32 aBinSz) |
1923 | { |
1924 | aStringObj.erase(); |
1925 | if (!aBinary) return; |
1926 | while (aBinSz>0) { |
1927 | AppendHexByte(aStringObj,*aBinary++); |
1928 | aBinSz--; |
1929 | } |
1930 | } // StringObjHexString |
1931 | |
1932 | |
1933 | // add (already encoded!) CGI to existing URL string |
1934 | bool addCGItoString(string &aStringObj, cAppCharP aCGI, bool noduplicate) |
1935 | { |
1936 | if (!noduplicate || aStringObj.find(aCGI)==string::npos) { |
1937 | // - Add CGI separator if and only if none exists already |
1938 | if (aStringObj.find("?")==string::npos) |
1939 | aStringObj += '?'; |
1940 | aStringObj += aCGI; |
1941 | return true; // added |
1942 | } |
1943 | return false; // nothing added |
1944 | } |
1945 | |
1946 | |
1947 | // encode string for being used as a CGI key/value element |
1948 | string encodeForCGI(cAppCharP aCGI) |
1949 | { |
1950 | string cgi; |
1951 | cAppCharP p = aCGI; |
1952 | while (p && *p) { |
1953 | if (*p>0x7E || *p<=0x20 || *p=='%' || *p=='?' || *p=='&' || *p=='#') { |
1954 | // CGI encode these |
1955 | cgi += '%'; |
1956 | AppendHexByte(cgi, *p); |
1957 | } |
1958 | else { |
1959 | // use as-is |
1960 | cgi += *p; |
1961 | } |
1962 | p++; |
1963 | } |
1964 | return cgi; |
1965 | } // encodeForCGI |
1966 | |
1967 | |
1968 | // Count bits |
1969 | int countbits(uInt32 aMask) |
1970 | { |
1971 | int bits=0; |
1972 | uInt32 mask=0x0000001; |
1973 | while (mask) { |
1974 | if (aMask & mask) bits++; |
1975 | mask=mask << 1; |
1976 | } |
1977 | return bits; |
1978 | } // countbits |
1979 | |
1980 | |
1981 | // make uppercase |
1982 | void StringUpper(string &aString) |
1983 | { |
1984 | for(uInt32 k=0; k<aString.size(); k++) aString[k]=toupper(aString[k]); |
1985 | } // StringUpper |
1986 | |
1987 | |
1988 | // make lowercase |
1989 | void StringLower(string &aString) |
1990 | { |
1991 | for(uInt32 k=0; k<aString.size(); k++) aString[k]=tolower(aString[k]); |
1992 | } // StringLower |
1993 | |
1994 | |
1995 | // Substitute occurences of pattern with replacement in string |
1996 | void StringSubst( |
1997 | string &aString, const char *aPattern, const string &aReplacement, |
1998 | sInt32 aPatternLen, |
1999 | TCharSets aCharSet, TLineEndModes aLEM, |
2000 | TQuotingModes aQuotingMode |
2001 | ) |
2002 | { |
2003 | StringSubst( |
2004 | aString, aPattern, |
2005 | aReplacement.c_str(), |
2006 | aPatternLen, |
2007 | aReplacement.size(), |
2008 | aCharSet, aLEM, aQuotingMode |
2009 | ); |
2010 | } // StringSubst |
2011 | |
2012 | |
2013 | // Substitute occurences of pattern with replacement in string |
2014 | void StringSubst( |
2015 | string &aString, const char *aPattern, const char *aReplacement, |
2016 | sInt32 aPatternLen, sInt32 aReplacementLen, |
2017 | TCharSets aCharSet, TLineEndModes aLEM, |
2018 | TQuotingModes aQuotingMode |
2019 | ) |
2020 | { |
2021 | string::size_type i; |
2022 | string s; |
2023 | i=0; |
2024 | if (aPatternLen<0) aPatternLen=strlen(aPattern); |
2025 | // convert if needed |
2026 | if (!aReplacement) { |
2027 | aReplacement=""; // empty string if not specified |
2028 | aReplacementLen=0; |
2029 | } |
2030 | if (aCharSet!=chs_unknown) { |
2031 | appendUTF8ToString(aReplacement,s,aCharSet,aLEM,aQuotingMode); |
2032 | aReplacement=s.c_str(); |
2033 | aReplacementLen=s.size(); |
2034 | } |
2035 | else { |
2036 | if (aReplacementLen<0) aReplacementLen=strlen(aReplacement); |
2037 | } |
2038 | // now replace |
2039 | while((i=aString.find(aPattern,i))!=string::npos) { |
2040 | aString.replace(i,aPatternLen,aReplacement); |
2041 | i+=aReplacementLen; |
2042 | } |
2043 | } // StringSubst |
2044 | |
2045 | |
2046 | // Substitute occurences of pattern with replacement in string |
2047 | void StringSubst(string &aString, const char *aPattern, const string &aReplacement, sInt32 aPatternLen) |
2048 | { |
2049 | StringSubst(aString,aPattern,aReplacement.c_str(),aPatternLen,aReplacement.size()); |
2050 | } // StringSubst |
2051 | |
2052 | |
2053 | // Substitute occurences of pattern with integer number in string |
2054 | void StringSubst(string &aString, const char *aPattern, sInt32 aNumber, sInt32 aPatternLen) |
2055 | { |
2056 | string s; |
2057 | StringObjPrintf(s,"%ld",(long)aNumber); |
2058 | StringSubst(aString,aPattern,s,aPatternLen); |
2059 | } // StringSubst |
2060 | |
2061 | |
2062 | |
2063 | // copy PCdata contents into std::string object |
2064 | void smlPCDataToStringObj(const SmlPcdataPtr_t aPcdataP, string &aStringObj) |
2065 | { |
2066 | if (!aPcdataP || !aPcdataP->content) { |
2067 | // no content at all |
2068 | aStringObj.erase(); |
2069 | } |
2070 | else if ( |
2071 | // NOTE: Opaque works only with modified syncML toolkit which |
2072 | // makes sure opaque content is ALSO TERMINATED LIKE A C-STRING |
2073 | aPcdataP->contentType == SML_PCDATA_STRING || |
2074 | aPcdataP->contentType == SML_PCDATA_OPAQUE |
2075 | ) { |
2076 | // string or opaque type |
2077 | aStringObj.assign((char *)aPcdataP->content, aPcdataP->length); |
2078 | } |
2079 | else if (aPcdataP->contentType == SML_PCDATA_EXTENSION) { |
2080 | // extension type |
2081 | StringObjPrintf(aStringObj,"[PCDATA_EXTENSION Type=%hd]",(sInt16)aPcdataP->extension); |
2082 | } |
2083 | else { |
2084 | // other type |
2085 | StringObjPrintf(aStringObj,"[PCDATA Type=%hd]",(sInt16)aPcdataP->contentType); |
2086 | } |
2087 | } // smlPCDataToStringObj |
2088 | |
2089 | |
2090 | // returns item string or empty string (NEVER NULL) |
2091 | const char *smlItemDataToCharP(const SmlItemPtr_t aItemP) |
2092 | { |
2093 | if (!aItemP) return ""; |
2094 | return smlPCDataToCharP(aItemP->data); |
2095 | } // smlItemDataToCharP |
2096 | |
2097 | |
2098 | // returns first item string or empty string (NEVER NULL) |
2099 | const char *smlFirstItemDataToCharP(const SmlItemListPtr_t aItemListP) |
2100 | { |
2101 | if (!aItemListP) return ""; |
2102 | return smlItemDataToCharP(aItemListP->item); |
2103 | } // smlFirstItemDataToCharP |
2104 | #endif //SYSYNC_ENGINE |
2105 | |
2106 | // returns pointer to PCdata contents or null string. If aSizeP!=NULL, length will be stored in *aSize |
2107 | const char *smlPCDataToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP) |
2108 | { |
2109 | const char *str = smlPCDataOptToCharP(aPcdataP, aSizeP); |
2110 | if (str) return str; |
2111 | return ""; |
2112 | } // smlPCDataToCharP |
2113 | |
2114 | |
2115 | // returns pointer to PCdata contents if existing, NULL otherwise. |
2116 | // If aSizeP!=NULL, length will be stored in *aSize |
2117 | const char *smlPCDataOptToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP) |
2118 | { |
2119 | if (!aPcdataP || !aPcdataP->content) { |
2120 | return NULL__null; // we have no value, it could be empty howevert |
2121 | if (aSizeP) *aSizeP=0; |
2122 | } |
2123 | if (aPcdataP->length==0) { |
2124 | // empty content |
2125 | if (aSizeP) *aSizeP=0; |
2126 | return ""; // return empty string |
2127 | } |
2128 | else if ( |
2129 | // NOTE: Opaque works only with modified syncML toolkit which |
2130 | // makes sure opaque content is ALSO TERMINATED LIKE A C-STRING |
2131 | aPcdataP->contentType == SML_PCDATA_STRING || |
2132 | aPcdataP->contentType == SML_PCDATA_CDATA || // XML only |
2133 | aPcdataP->contentType == SML_PCDATA_OPAQUE // WBXML only |
2134 | ) { |
2135 | // return pointer to content |
2136 | if (aSizeP) *aSizeP=aPcdataP->length; |
2137 | return (char *) aPcdataP->content; |
2138 | } |
2139 | else { |
2140 | // no string |
2141 | if (aSizeP) *aSizeP=11; |
2142 | return "[no string]"; |
2143 | } |
2144 | } // smlPCDataOptToCharP |
2145 | |
2146 | |
2147 | // returns pointer to source or target LocURI |
2148 | const char *smlSrcTargLocURIToCharP(const SmlTargetPtr_t aSrcTargP) |
2149 | { |
2150 | if (!aSrcTargP || !aSrcTargP->locURI) { |
2151 | return ""; // empty string |
2152 | } |
2153 | else { |
2154 | // return PCdata string contents |
2155 | return smlPCDataToCharP(aSrcTargP->locURI); |
2156 | } |
2157 | } // smlSrcTargLocURIToCharP |
2158 | |
2159 | |
2160 | // returns pointer to source or target LocName |
2161 | const char *smlSrcTargLocNameToCharP(const SmlTargetPtr_t aSrcTargP) |
2162 | { |
2163 | if (!aSrcTargP || !aSrcTargP->locName) { |
2164 | return ""; // empty string |
2165 | } |
2166 | else { |
2167 | // return PCdata string contents |
2168 | return smlPCDataToCharP(aSrcTargP->locName); |
2169 | } |
2170 | } // smlSrcTargLocNameToCharP |
2171 | |
2172 | |
2173 | #ifdef SYSYNC_ENGINE1 |
2174 | // returns error code made ready for SyncML sending (that is, remove offset |
2175 | // of 10000 if present, and make generic error 500 for non-SyncML errors, |
2176 | // and return LOCERR_OK as 200) |
2177 | localstatus syncmlError(localstatus aErr) |
2178 | { |
2179 | if (aErr==LOCERR_OK) return 200; // SyncML ok code |
2180 | if (aErr<999) return aErr; // return as is |
2181 | if (aErr>=LOCAL_STATUS_CODE+100 && aErr<=999) |
2182 | return aErr-LOCAL_STATUS_CODE; // return with offset removed |
2183 | // no suitable conversion |
2184 | return 500; // return generic "bad" |
2185 | } // localError |
2186 | |
2187 | |
2188 | // returns error code made local (that is, offset by 10000 in case aErr is a |
2189 | // SyncML status code <10000, and convert 200 into LOCERR_OK) |
2190 | localstatus localError(localstatus aErr) |
2191 | { |
2192 | if (aErr==200 || aErr==0) return LOCERR_OK; |
2193 | if (aErr<LOCAL_STATUS_CODE) return aErr+LOCAL_STATUS_CODE; |
2194 | return aErr; |
2195 | } // localError |
2196 | |
2197 | |
2198 | // returns pure relative URI, if specified relative or absolute to |
2199 | // given server URI |
2200 | const char *relativeURI(const char *aURI,const char *aServerURI) |
2201 | { |
2202 | // check for "./" type relative URI |
2203 | if (strnncmp(aURI,URI_RELPREFIX"./",2)==0) { |
2204 | // relative URI prefixed with "./", just zap the relative part |
2205 | return aURI+2; |
2206 | } |
2207 | else if (aServerURI) { |
2208 | // test if absolute URI specifying the right server |
2209 | uInt32 n=strlen(aServerURI); |
2210 | if (strnncmp(aURI,aServerURI,n)==0) { |
2211 | // beginning of URI matches server's URI |
2212 | const char *p=aURI+n; |
2213 | // skip delimiter, if any |
2214 | if (*p=='/') p++; |
2215 | // return relative part of URI |
2216 | return p; |
2217 | } |
2218 | } |
2219 | // just return unmodified |
2220 | return aURI; |
2221 | } // relativeURI |
2222 | |
2223 | |
2224 | // split Hostname into address and port parts |
2225 | void splitHostname(const char *aHost,string *aAddr,string *aPort) |
2226 | { |
2227 | const char *p,*q; |
2228 | p=aHost; |
2229 | q=strchr(p,':'); |
2230 | if (q) { |
2231 | // port spec found |
2232 | if (aAddr) aAddr->assign(p,q-p); |
2233 | if (aPort) aPort->assign(q+1); |
2234 | } |
2235 | else { |
2236 | // no prot spec |
2237 | if (aAddr) aAddr->assign(p); |
2238 | if (aPort) aPort->erase(); |
2239 | } |
2240 | } // splitHostname |
2241 | |
2242 | // translate %XX into corresponding character in-place |
2243 | void urlDecode(string *str) |
2244 | { |
2245 | // nothing todo? |
2246 | if (!str || |
2247 | str->find('%') == string::npos) return; |
2248 | |
2249 | string replacement; |
2250 | replacement.reserve(str->size()); |
2251 | const char *in = str->c_str(); |
2252 | char c; |
2253 | while ((c = *in++) != 0) { |
2254 | if (c == '%') { |
2255 | c = tolower(*in++); |
2256 | unsigned char value = 0; |
2257 | if (!c) { |
2258 | break; |
2259 | } else if (c >= '0' && c <= '9') { |
2260 | value = c - '0'; |
2261 | } else if (c >= 'a' && c <= 'f') { |
2262 | value = c - 'a' + 10; |
2263 | } else { |
2264 | // silently skip invalid character |
2265 | } |
2266 | value *= 16; |
2267 | c = tolower(*in++); |
2268 | if (!c) { |
2269 | break; |
2270 | } else if (c >= '0' && c <= '9') { |
2271 | value += c - '0'; |
2272 | replacement.append((char *)&value, 1); |
2273 | } else if (c >= 'a' && c <= 'f') { |
2274 | value += c - 'a' + 10; |
2275 | replacement.append((char *)&value, 1); |
2276 | } else { |
2277 | // silently skip invalid character |
2278 | } |
2279 | } else { |
2280 | replacement.append(&c, 1); |
2281 | } |
2282 | } |
2283 | *str = replacement; |
2284 | } |
2285 | |
2286 | // translate %XX into corresponding character in-place |
2287 | void urlEncode(string *str) |
2288 | { |
2289 | if (!str) { |
2290 | return; |
2291 | } |
2292 | |
2293 | string replacement; |
2294 | size_t i, start = 0; |
2295 | const char *t = str->c_str(); |
2296 | const char *s = t; |
2297 | char buffer[4]; |
2298 | char c; |
2299 | for (i = 0; (c = *t) != 0; i++, t++) { |
2300 | if (!isalnum(c)) { |
2301 | replacement.append(s + start, i - start); |
2302 | start = i + 1; |
2303 | sprintf(buffer, "%%%02X", c); |
2304 | replacement.append(buffer, 3); |
2305 | } |
2306 | } |
2307 | |
2308 | if (start > 0) { |
2309 | // Something was added to replacement because we found unsafe |
2310 | // characters, finish the job. |
2311 | replacement.append(s + start, i - start); |
2312 | *str = replacement; |
2313 | } |
2314 | } |
2315 | |
2316 | // split URL into protocol, hostname, document name and auth-info (user, password); |
2317 | // the optional query and port are not url-decoded, everything else is |
2318 | void splitURL(const char *aURI,string *aProtocol,string *aHost, |
2319 | string *aDoc, string *aUser, string *aPasswd, |
2320 | string *aPort, string *aQuery) |
2321 | { |
2322 | const char *p,*q,*r; |
2323 | |
2324 | p=aURI; |
2325 | // extract protocol |
2326 | q=strchr(p,':'); |
2327 | if (q) { |
2328 | // protocol found |
2329 | if (aProtocol) aProtocol->assign(p,q-p); |
2330 | p=q+1; // past colon |
2331 | int count = 0; |
2332 | while (*p=='/' && count < 2) { |
2333 | p++; // past trailing slashes (two expected, ignore if less are given) |
2334 | count++; |
2335 | } |
2336 | // now identify end of host part |
2337 | string host; |
2338 | q=strchr(p, '/'); |
2339 | if (!q) { |
2340 | // no slash, skip forward to end of string |
2341 | q = p + strlen(p); |
2342 | } |
2343 | host.assign(p, q - p); |
2344 | |
2345 | // if protocol specified, check for auth info |
2346 | const char *h = host.c_str(); |
2347 | q=strchr(h,'@'); |
2348 | r=strchr(h,':'); |
2349 | if (q && r && q>r) { |
2350 | // auth exists |
2351 | if (aUser) aUser->assign(h,r-h); |
2352 | if (aPasswd) aPasswd->assign(r+1,q-r-1); |
2353 | // skip auth in full string |
2354 | p += q + 1 - h; |
2355 | } |
2356 | else { |
2357 | // no auth found |
2358 | if (aUser) aUser->erase(); |
2359 | if (aPasswd) aPasswd->erase(); |
2360 | } |
2361 | // p now points to host part, as expected below |
2362 | } |
2363 | else { |
2364 | // no protocol found |
2365 | if (aProtocol) aProtocol->erase(); |
2366 | // no protocol, no auth |
2367 | if (aUser) aUser->erase(); |
2368 | if (aPasswd) aPasswd->erase(); |
2369 | } |
2370 | // separate hostname and document |
2371 | std::string host; |
2372 | // - check for path |
2373 | q=strchr(p,'/'); |
2374 | // - if no path, check if there is a CGI param directly after the host name |
2375 | if (!q) { |
2376 | // doc part left empty in this case |
2377 | if (aDoc) aDoc->erase(); |
2378 | q=strchr(p,'?'); |
2379 | if (q) { |
2380 | // query directly follows host |
2381 | host.assign(p, q - p); |
2382 | if (aQuery) aQuery->assign(q + 1); |
2383 | } else { |
2384 | // entire string is considered the host |
2385 | host.assign(p); |
2386 | if (aQuery) aQuery->erase(); |
2387 | } |
2388 | } |
2389 | else { |
2390 | // host part stops at slash |
2391 | host.assign(p, q - p); |
2392 | // in case of '/', do not put slash into docname |
2393 | // even if it would be empty (caller expected to add |
2394 | // slash as needed) |
2395 | p = q + 1; // exclude slash |
2396 | // now check for query |
2397 | q=strchr(p,'?'); |
2398 | if (q) { |
2399 | // split at question mark |
2400 | if (aDoc) aDoc->assign(p, q - p); |
2401 | if (aQuery) aQuery->assign(q + 1); |
2402 | } else { |
2403 | // whole string is document name |
2404 | if (aDoc) aDoc->assign(p); |
2405 | if (aQuery) aQuery->erase(); |
2406 | } |
2407 | } |
2408 | |
2409 | // remove optional port from host part before url-decoding, because |
2410 | // that might introduce new : characters into the host name |
2411 | size_t colon = host.find(':'); |
2412 | if (colon != host.npos) { |
2413 | if (aHost) aHost->assign(host.substr(0, colon)); |
2414 | if (aPort) aPort->assign(host.substr(colon + 1)); |
2415 | } else { |
2416 | if (aHost) aHost->assign(host); |
2417 | if (aPort) aPort->erase(); |
2418 | } |
2419 | } // splitURL |
2420 | |
2421 | #ifdef SPLIT_URL_MAIN |
2422 | |
2423 | #include <stdio.h> |
2424 | #include <assert.h> |
2425 | |
2426 | static void test(const std::string &in, const std::string &expected) |
2427 | { |
2428 | string protocol, host, doc, user, password, port, query; |
2429 | char buffer[1024]; |
2430 | |
2431 | splitURL(in.c_str(), &protocol, &host, &doc, &user, &password, &port, &query); |
2432 | |
2433 | // URL-decode each part |
2434 | urlDecode(&protocol); |
2435 | urlDecode(&host); |
2436 | urlDecode(&doc); |
2437 | urlDecode(&user); |
2438 | urlDecode(&password); |
2439 | |
2440 | sprintf(buffer, |
2441 | "prot '%s' user '%s' passwd '%s' host '%s' port '%s' doc '%s' query '%s'", |
2442 | protocol.c_str(), |
2443 | user.c_str(), |
2444 | password.c_str(), |
2445 | host.c_str(), |
2446 | port.c_str(), |
2447 | doc.c_str(), |
2448 | query.c_str()); |
2449 | printf("%s -> %s\n", in.c_str(), buffer); |
2450 | assert(expected == buffer); |
2451 | } |
2452 | |
2453 | int main(int argc, char **argv) |
2454 | { |
2455 | test("http://user:passwd@host/patha/pathb?query", |
2456 | "prot 'http' user 'user' passwd 'passwd' host 'host' port '' doc 'patha/pathb' query 'query'"); |
2457 | test("http://user:passwd@host:port/patha/pathb?query", |
2458 | "prot 'http' user 'user' passwd 'passwd' host 'host' port 'port' doc 'patha/pathb' query 'query'"); |
2459 | test("file:///foo/bar", |
2460 | "prot 'file' user '' passwd '' host '' port '' doc 'foo/bar' query ''"); |
2461 | test("http://host%3a:port?param=value", |
2462 | "prot 'http' user '' passwd '' host 'host:' port 'port' doc '' query 'param=value'"); |
2463 | test("http://host%3a?param=value", |
2464 | "prot 'http' user '' passwd '' host 'host:' port '' doc '' query 'param=value'"); |
2465 | test("foo%24", |
2466 | "prot '' user '' passwd '' host 'foo$' port '' doc '' query ''"); |
2467 | test("foo%2f", |
2468 | "prot '' user '' passwd '' host 'foo/' port '' doc '' query ''"); |
2469 | test("foo%2A", |
2470 | "prot '' user '' passwd '' host 'foo*' port '' doc '' query ''"); |
2471 | test("foo%24bar", |
2472 | "prot '' user '' passwd '' host 'foo$bar' port '' doc '' query ''"); |
2473 | test("%24bar", |
2474 | "prot '' user '' passwd '' host '$bar' port '' doc '' query ''"); |
2475 | test("foo%2", |
2476 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); |
2477 | test("foo%", |
2478 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); |
2479 | test("foo%g", |
2480 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); |
2481 | test("foo%gh", |
2482 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); |
2483 | test("%ghbar", |
2484 | "prot '' user '' passwd '' host 'bar' port '' doc '' query ''"); |
2485 | return 0; |
2486 | } |
2487 | #endif // SPLIT_URL_MAIN |
2488 | |
2489 | #endif //SYSYNC_ENGINE |
2490 | |
2491 | |
2492 | // returns type from meta |
2493 | const char *smlMetaTypeToCharP(SmlMetInfMetInfPtr_t aMetaP) |
2494 | { |
2495 | if (!aMetaP) return NULL__null; // no meta at all |
2496 | return smlPCDataToCharP(aMetaP->type); |
2497 | } // smlMetaTypeToCharP |
2498 | |
2499 | |
2500 | |
2501 | // returns Next Anchor from meta |
2502 | const char *smlMetaNextAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP) |
2503 | { |
2504 | if (!aMetaP) return NULL__null; // no meta at all |
2505 | if (!aMetaP->anchor) return NULL__null; // no anchor at all |
2506 | return smlPCDataToCharP(aMetaP->anchor->next); |
2507 | } // smlMetaAnchorToCharP |
2508 | |
2509 | |
2510 | // returns Last Anchor from meta |
2511 | const char *smlMetaLastAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP) |
2512 | { |
2513 | if (!aMetaP) return NULL__null; // no meta at all |
2514 | if (!aMetaP->anchor) return NULL__null; // no anchor at all |
2515 | return smlPCDataToCharP(aMetaP->anchor->last); |
2516 | } // smlMetaLastAnchorToCharP |
2517 | |
2518 | |
2519 | // returns DevInf pointer if any in specified PCData, NULL otherwise |
2520 | SmlDevInfDevInfPtr_t smlPCDataToDevInfP(const SmlPcdataPtr_t aPCDataP) |
2521 | { |
2522 | if (!aPCDataP) return NULL__null; |
2523 | if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null; |
2524 | if (aPCDataP->extension!=SML_EXT_DEVINF) return NULL__null; |
2525 | return (SmlDevInfDevInfPtr_t)(aPCDataP->content); |
2526 | } // smlPCDataToDevInfP |
2527 | |
2528 | |
2529 | // returns MetInf pointer if any in specified PCData, NULL otherwise |
2530 | SmlMetInfMetInfPtr_t smlPCDataToMetInfP(const SmlPcdataPtr_t aPCDataP) |
2531 | { |
2532 | if (!aPCDataP) return NULL__null; |
2533 | if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null; |
2534 | if (aPCDataP->extension!=SML_EXT_METINF) return NULL__null; |
2535 | return (SmlMetInfMetInfPtr_t)(aPCDataP->content); |
2536 | } // smlPCDataToMetInfP |
2537 | |
2538 | |
2539 | // allocate memory via SyncML toolkit allocation function, but throw |
2540 | // exception if it fails. Used by SML |
2541 | void *_smlMalloc(MemSize_t size) |
2542 | { |
2543 | void *p; |
2544 | |
2545 | p=smlLibMalloc(size); |
2546 | if (!p) SYSYNC_THROW(TMemException("smlLibMalloc() failed"))throw TMemException("smlLibMalloc() failed"); |
2547 | return p; |
2548 | } // _smlMalloc |
2549 | |
2550 | |
2551 | // returns true on successful conversion of PCData string to sInt32 |
2552 | bool smlPCDataToULong(const SmlPcdataPtr_t aPCDataP, uInt32 &aLong) |
2553 | { |
2554 | return StrToULong(smlPCDataToCharP(aPCDataP),aLong); |
2555 | } // smlPCDataToLong |
2556 | |
2557 | // returns true on successful conversion of PCData string to sInt32 |
2558 | bool smlPCDataToLong(const SmlPcdataPtr_t aPCDataP, sInt32 &aLong) |
2559 | { |
2560 | return StrToLong(smlPCDataToCharP(aPCDataP),aLong); |
2561 | } // smlPCDataToLong |
2562 | |
2563 | #ifdef SYSYNC_ENGINE1 |
2564 | // returns true on successful conversion of PCData string to format |
2565 | bool smlPCDataToFormat(const SmlPcdataPtr_t aPCDataP, TFmtTypes &aFmt) |
2566 | { |
2567 | const char *fmt = smlPCDataToCharP(aPCDataP); |
2568 | sInt16 sh; |
2569 | if (*fmt) { |
2570 | if (!StrToEnum(encodingFmtSyncMLNames,numFmtTypes,sh,fmt)) |
2571 | return false; // unknown format |
2572 | aFmt=(TFmtTypes)sh; |
2573 | } |
2574 | else { |
2575 | aFmt=fmt_chr; // no spec = chr |
2576 | } |
2577 | return true; |
2578 | } // smlPCDataToFormat |
2579 | #endif //SYSYNC_ENGINE |
2580 | |
2581 | // build Meta anchor |
2582 | SmlPcdataPtr_t newMetaAnchor(const char *aNextAnchor, const char *aLastAnchor) |
2583 | { |
2584 | SmlPcdataPtr_t metaP; |
2585 | SmlMetInfAnchorPtr_t anchorP; |
2586 | |
2587 | // - create empty meta |
2588 | metaP=newMeta(); |
2589 | // - create new anchor |
2590 | anchorP=SML_NEW(SmlMetInfAnchor_t)((SmlMetInfAnchor_t*) _smlMalloc(sizeof(SmlMetInfAnchor_t))); |
2591 | // - set anchor contents |
2592 | //%%% anchorP->last=newPCDataOptEmptyString(aLastAnchor); // optional, but omitted only if string is NULL (not if only empty) |
2593 | anchorP->last=newPCDataOptString(aLastAnchor); // optional |
2594 | anchorP->next=newPCDataString(aNextAnchor); // mandatory |
2595 | // - set anchor |
2596 | ((SmlMetInfMetInfPtr_t)(metaP->content))->anchor=anchorP; |
2597 | // return |
2598 | return metaP; |
2599 | } // newMetaAnchor |
2600 | |
2601 | |
2602 | // build Meta type |
2603 | SmlPcdataPtr_t newMetaType(const char *aMetaType) |
2604 | { |
2605 | SmlPcdataPtr_t metaP; |
2606 | |
2607 | // - if not type, we don't create a meta at all |
2608 | if (aMetaType==NULL__null || *aMetaType==0) return NULL__null; |
2609 | // - create empty meta |
2610 | metaP=newMeta(); |
2611 | // - set type |
2612 | ((SmlMetInfMetInfPtr_t)(metaP->content))->type=newPCDataString(aMetaType); |
2613 | // return |
2614 | return metaP; |
2615 | } // newMetaType |
2616 | |
2617 | |
2618 | // build empty Meta |
2619 | SmlPcdataPtr_t newMeta(void) |
2620 | { |
2621 | SmlPcdataPtr_t metaP; |
2622 | SmlMetInfMetInfPtr_t metinfP; |
2623 | |
2624 | // - create empty PCData |
2625 | metaP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t))); |
2626 | metaP->contentType=SML_PCDATA_EXTENSION; |
2627 | metaP->extension=SML_EXT_METINF; |
2628 | // - %%% assume length is not relevant for structured content (looks like in mgrutil.c) |
2629 | metaP->length=0; |
2630 | // - create empty meta |
2631 | metinfP = SML_NEW(SmlMetInfMetInf_t)((SmlMetInfMetInf_t*) _smlMalloc(sizeof(SmlMetInfMetInf_t))); |
2632 | metaP->content=metinfP; // link to PCdata |
2633 | // - init meta options |
2634 | metinfP->version=NULL__null; |
2635 | metinfP->format=NULL__null; |
2636 | metinfP->type=NULL__null; |
2637 | metinfP->mark=NULL__null; |
2638 | metinfP->size=NULL__null; |
2639 | metinfP->nextnonce=NULL__null; |
2640 | metinfP->maxmsgsize=NULL__null; |
2641 | metinfP->mem=NULL__null; |
2642 | metinfP->emi=NULL__null; // PCData list |
2643 | metinfP->anchor=NULL__null; |
2644 | // - SyncML 1.1 |
2645 | metinfP->maxobjsize=NULL__null; |
2646 | // - SyncML 1.2 |
2647 | metinfP->flags=0; |
2648 | // return |
2649 | return metaP; |
2650 | } // newMeta |
2651 | |
2652 | |
2653 | // copy meta from existing meta (for data items only |
2654 | // anchor, mem, emi, nonce are not copied!) |
2655 | // Note however that we copy maxobjsize, as we (mis-)use it for ZIPPED_BINDATA_SUPPORT |
2656 | SmlPcdataPtr_t copyMeta(SmlPcdataPtr_t aOldMetaP) |
2657 | { |
2658 | if (!aOldMetaP) return NULL__null; |
2659 | SmlPcdataPtr_t newmetaP=newMeta(); |
2660 | if (!newmetaP) return NULL__null; |
2661 | SmlMetInfMetInfPtr_t oldmetinfP = smlPCDataToMetInfP(aOldMetaP); |
2662 | if (!oldmetinfP) return NULL__null; |
2663 | SmlMetInfMetInfPtr_t newmetInfP = smlPCDataToMetInfP(newmetaP); |
2664 | // - copy meta |
2665 | newmetInfP->version = smlPcdataDup(oldmetinfP->version); |
2666 | newmetInfP->format = smlPcdataDup(oldmetinfP->format); |
2667 | newmetInfP->type = smlPcdataDup(oldmetinfP->type); |
2668 | newmetInfP->mark = smlPcdataDup(oldmetinfP->mark); |
2669 | newmetInfP->size = smlPcdataDup(oldmetinfP->size); |
2670 | newmetInfP->maxobjsize = smlPcdataDup(oldmetinfP->maxobjsize); |
2671 | // return |
2672 | return newmetaP; |
2673 | } // copyMeta |
2674 | |
2675 | |
2676 | |
2677 | |
2678 | // add an item to an item list |
2679 | SmlItemListPtr_t *addItemToList( |
2680 | SmlItemPtr_t aItemP, // existing item data structure, ownership is passed to list |
2681 | SmlItemListPtr_t *aItemListPP // adress of pointer to existing item list or NULL |
2682 | ) |
2683 | { |
2684 | if (aItemListPP && aItemP) { |
2685 | // find last itemlist pointer |
2686 | while (*aItemListPP) { |
2687 | aItemListPP=&((*aItemListPP)->next); |
2688 | } |
2689 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry |
2690 | *aItemListPP = SML_NEW(SmlItemList_t)((SmlItemList_t*) _smlMalloc(sizeof(SmlItemList_t))); |
2691 | (*aItemListPP)->next=NULL__null; |
2692 | (*aItemListPP)->item=aItemP; // insert new item |
2693 | // return pointer to pointer to next element (which is now NULL). |
2694 | // Can be passed in to addPCDataToList() again to append more elements without searching |
2695 | // for end-of-list |
2696 | return &((*aItemListPP)->next); |
2697 | } |
2698 | // nop, return pointer unmodified |
2699 | return aItemListPP; |
2700 | } // addItemToList |
2701 | |
2702 | |
2703 | // add a CTData item to a CTDataList |
2704 | SmlDevInfCTDataListPtr_t *addCTDataToList( |
2705 | SmlDevInfCTDataPtr_t aCTDataP, // existing CTData item data structure, ownership is passed to list |
2706 | SmlDevInfCTDataListPtr_t *aCTDataListPP // adress of pointer to existing item list or NULL |
2707 | ) |
2708 | { |
2709 | if (aCTDataListPP && aCTDataP) { |
2710 | // find last itemlist pointer |
2711 | while (*aCTDataListPP) { |
2712 | aCTDataListPP=&((*aCTDataListPP)->next); |
2713 | } |
2714 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry |
2715 | *aCTDataListPP = SML_NEW(SmlDevInfCTDataList_t)((SmlDevInfCTDataList_t*) _smlMalloc(sizeof(SmlDevInfCTDataList_t ))); |
2716 | (*aCTDataListPP)->next=NULL__null; |
2717 | (*aCTDataListPP)->data=aCTDataP; // insert new data |
2718 | // return pointer to pointer to next element (which is now NULL). |
2719 | // Can be passed in to addPCDataToList() again to append more elements without searching |
2720 | // for end-of-list |
2721 | return &((*aCTDataListPP)->next); |
2722 | } |
2723 | // nop, return pointer unmodified |
2724 | return aCTDataListPP; |
2725 | } // addCTDataToList |
2726 | |
2727 | |
2728 | // add a CTDataProp item to a CTDataPropList |
2729 | SmlDevInfCTDataPropListPtr_t *addCTDataPropToList( |
2730 | SmlDevInfCTDataPropPtr_t aCTDataPropP, // existing CTDataProp item data structure, ownership is passed to list |
2731 | SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL |
2732 | ) |
2733 | { |
2734 | if (aCTDataPropListPP && aCTDataPropP) { |
2735 | // find last itemlist pointer |
2736 | while (*aCTDataPropListPP) { |
2737 | aCTDataPropListPP=&((*aCTDataPropListPP)->next); |
2738 | } |
2739 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry |
2740 | *aCTDataPropListPP = SML_NEW(SmlDevInfCTDataPropList_t)((SmlDevInfCTDataPropList_t*) _smlMalloc(sizeof(SmlDevInfCTDataPropList_t ))); |
2741 | (*aCTDataPropListPP)->next=NULL__null; |
2742 | (*aCTDataPropListPP)->data=aCTDataPropP; // insert new data |
2743 | // return pointer to pointer to next element (which is now NULL). |
2744 | // Can be passed in to addPCDataToList() again to append more elements without searching |
2745 | // for end-of-list |
2746 | return &((*aCTDataPropListPP)->next); |
2747 | } |
2748 | // nop, return pointer unmodified |
2749 | return aCTDataPropListPP; |
2750 | } // addCTDataPropToList |
2751 | |
2752 | |
2753 | // add a CTData describing a property (as returned by newDevInfCTData()) |
2754 | // as a new property without parameters to a CTDataPropList |
2755 | SmlDevInfCTDataPropListPtr_t *addNewPropToList( |
2756 | SmlDevInfCTDataPtr_t aPropCTData, // CTData describing property |
2757 | SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL |
2758 | ) |
2759 | { |
2760 | SmlDevInfCTDataPropPtr_t propdataP = SML_NEW(SmlDevInfCTDataProp_t)((SmlDevInfCTDataProp_t*) _smlMalloc(sizeof(SmlDevInfCTDataProp_t ))); |
2761 | propdataP->param = NULL__null; // no params |
2762 | propdataP->prop = aPropCTData; |
2763 | return addCTDataPropToList(propdataP, aCTDataPropListPP); |
2764 | } // addNewPropToList |
2765 | |
2766 | |
2767 | |
2768 | // add PCData element to a PCData list |
2769 | SmlPcdataListPtr_t *addPCDataToList( |
2770 | SmlPcdataPtr_t aPCDataP, // Existing PCData element to be added, ownership is passed to list |
2771 | SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL |
2772 | ) |
2773 | { |
2774 | if (aPCDataListPP) { |
2775 | // find last PCDataList pointer |
2776 | while (*aPCDataListPP) { |
2777 | aPCDataListPP=&((*aPCDataListPP)->next); |
2778 | } |
2779 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new PCDataList entry |
2780 | *aPCDataListPP = SML_NEW(SmlPcdataList_t)((SmlPcdataList_t*) _smlMalloc(sizeof(SmlPcdataList_t))); |
2781 | (*aPCDataListPP)->next=NULL__null; |
2782 | (*aPCDataListPP)->data=aPCDataP; // insert new item |
2783 | // return pointer to pointer to next element (which is now NULL). |
2784 | // Can be passed in to addPCDataToList() again to append more elements without searching |
2785 | // for end-of-list |
2786 | return &((*aPCDataListPP)->next); |
2787 | } |
2788 | return NULL__null; |
2789 | } // addPCDataToList |
2790 | |
2791 | |
2792 | // add PCData string to a PCData list |
2793 | SmlPcdataListPtr_t *addPCDataStringToList( |
2794 | const char *aString, // String to be added |
2795 | SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL |
2796 | ) |
2797 | { |
2798 | return addPCDataToList(newPCDataString(aString),aPCDataListPP); |
2799 | } // addPCDataStringToList |
2800 | |
2801 | |
2802 | // create new optional location (source or target) |
2803 | // Returns NULL if URI specified is NULL or empty |
2804 | SmlSourcePtr_t newOptLocation( |
2805 | const char *aLocURI, |
2806 | const char *aLocName |
2807 | ) |
2808 | { |
2809 | if (!aLocURI || *aLocURI==0) return NULL__null; |
2810 | else return newLocation(aLocURI,aLocName); |
2811 | } // newOptLocation |
2812 | |
2813 | |
2814 | // create new location (source or target) |
2815 | // always returns location, even if URI and/or name are empty |
2816 | // If name is NULL or empty, only URI is generated |
2817 | SmlSourcePtr_t newLocation( |
2818 | const char *aLocURI, |
2819 | const char *aLocName |
2820 | ) |
2821 | { |
2822 | SmlSourcePtr_t locP; |
2823 | |
2824 | locP = SML_NEW(SmlSource_t)((SmlSource_t*) _smlMalloc(sizeof(SmlSource_t))); |
2825 | // URI is always present (might be empty, though) |
2826 | locP->locURI=newPCDataString(aLocURI); |
2827 | // name only if not empty |
2828 | if (aLocName && *aLocName!=0) |
2829 | locP->locName=newPCDataString(aLocName); |
2830 | else |
2831 | locP->locName=NULL__null; |
2832 | // filter defaults to NULL |
2833 | locP->filter=NULL__null; |
2834 | return locP; |
2835 | } // newLocation |
2836 | |
2837 | |
2838 | // create new empty Item |
2839 | SmlItemPtr_t newItem(void) |
2840 | { |
2841 | SmlItemPtr_t itemP; |
2842 | |
2843 | itemP = SML_NEW(SmlItem_t)((SmlItem_t*) _smlMalloc(sizeof(SmlItem_t))); |
2844 | itemP->target=NULL__null; |
2845 | itemP->source=NULL__null; |
2846 | itemP->meta=NULL__null; |
2847 | itemP->data=NULL__null; |
2848 | // SyncML 1.1, no MoreData set |
2849 | itemP->flags=0; |
2850 | // SyncML 1.2 |
2851 | itemP->targetParent=NULL__null; |
2852 | itemP->sourceParent=NULL__null; |
2853 | // custom data of client |
2854 | itemP->aux=NULL__null; |
2855 | return itemP; |
2856 | } // newItem |
2857 | |
2858 | |
2859 | // create new Item with string-type data |
2860 | SmlItemPtr_t newStringDataItem( |
2861 | const char *aString |
2862 | ) |
2863 | { |
2864 | SmlItemPtr_t itemP=newItem(); |
2865 | itemP->data=newPCDataString(aString); |
2866 | return itemP; |
2867 | } // newStringDataItem |
2868 | |
2869 | |
2870 | // create meta-format PCData |
2871 | SmlPcdataPtr_t newPCDataFormat( |
2872 | TFmtTypes aFmtType, |
2873 | bool aShowDefault |
2874 | ) |
2875 | { |
2876 | if (aFmtType==fmt_chr && !aShowDefault) |
2877 | return NULL__null; // default |
2878 | else |
2879 | return newPCDataString(encodingFmtSyncMLNames[aFmtType]); // show format type |
2880 | } // newPCDataFormat |
2881 | |
2882 | |
2883 | // create new string-type PCData, if NULL or empty string is passed for aData, |
2884 | // NULL is returned (optional info not there) |
2885 | SmlPcdataPtr_t newPCDataFormatted( |
2886 | const uInt8 *aData, // data |
2887 | sInt32 aLength, // length of data, if<=0 then string length is calculated |
2888 | TFmtTypes aFmtType, // encoding Format |
2889 | bool aNeedsOpaque // set opaque needed (string that could confuse XML parsing or even binary) |
2890 | ) |
2891 | { |
2892 | if (!aData) return NULL__null; // no data |
2893 | if (aLength==0) aLength=strlen((const char *)aData); |
2894 | if (aLength==0) return NULL__null; // no data |
2895 | // encode input string if needed |
2896 | SmlPcdataPtr_t pcdataP; |
2897 | char *b64data; |
2898 | uInt32 b64len; |
2899 | switch (aFmtType) { |
2900 | case fmt_b64: |
2901 | // convert to b64 |
2902 | b64len=0; |
2903 | b64data=b64::encode(aData, aLength, &b64len); |
2904 | pcdataP = newPCDataString(b64data,b64len); |
2905 | b64::free(b64data); |
2906 | return pcdataP; |
2907 | default: |
2908 | // just copy into string or opaque/C_DATA string |
2909 | return newPCDataStringX(aData, aNeedsOpaque, aLength); |
2910 | } |
2911 | } // newPCDataEncoded |
2912 | |
2913 | |
2914 | // create new string-type PCData, if NULL or empty string is passed for aString, |
2915 | // NULL is returned (optional info not there) |
2916 | SmlPcdataPtr_t newPCDataOptString( |
2917 | const char *aString, |
2918 | sInt32 aLength // length of string, if<0 then length is calculated |
2919 | ) |
2920 | { |
2921 | if (aString && (*aString!=0)) |
2922 | return newPCDataString(aString,aLength); |
2923 | else |
2924 | return NULL__null; |
2925 | } // newPCDataOptString |
2926 | |
2927 | |
2928 | // create new string-type PCData, if NULL is passed for aString, |
2929 | // NULL is returned (optional info not there) |
2930 | // if empty string is passed, PCData with empty contents will be created |
2931 | SmlPcdataPtr_t newPCDataOptEmptyString( |
2932 | const char *aString, |
2933 | sInt32 aLength // length of string, if<0 then length is calculated |
2934 | ) |
2935 | { |
2936 | if (aString) |
2937 | return newPCDataString(aString,aLength); |
2938 | else |
2939 | return NULL__null; |
2940 | } // newPCDataOptEmptyString |
2941 | |
2942 | |
2943 | // create new string-type PCData, if NULL is passed for aString, |
2944 | // an empty string is created (that is, a PCData with string terminator as |
2945 | // content only, length=0) |
2946 | SmlPcdataPtr_t newPCDataString( |
2947 | const char *aString, |
2948 | sInt32 aLength // length of string, if<0 then length is calculated |
2949 | ) |
2950 | { |
2951 | return newPCDataStringX((const uInt8 *)aString,false,aLength); |
2952 | } // newPCDataString |
2953 | |
2954 | |
2955 | // create new PCData, aOpaque can be used to generate non-string data |
2956 | // Note: empty strings are always coded as non-opaque, even if aOpaque is set |
2957 | SmlPcdataPtr_t newPCDataStringX( |
2958 | const uInt8 *aString, |
2959 | bool aOpaque, // if set, an opaque method (OPAQUE or CDATA) is used |
2960 | sInt32 aLength // length of string, if<0 then length is calculated |
2961 | ) |
2962 | { |
2963 | SmlPcdataPtr_t pcdataP; |
2964 | |
2965 | pcdataP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t))); |
2966 | |
2967 | // determine length |
2968 | if (aLength>=0 && aString) |
2969 | pcdataP->length = aLength; // as specified, and string argument not NULL |
2970 | else |
2971 | pcdataP->length = aString ? strlen((const char *)aString) : 0; // from argument, if NULL -> length=0 |
2972 | // determine type |
2973 | if (aOpaque && aLength!=0) { |
2974 | // Note: due to modification in RTK, this generates |
2975 | // OPAQUE in WBXML and CDATA in XML |
2976 | pcdataP->contentType=SML_PCDATA_OPAQUE; |
2977 | } |
2978 | else { |
2979 | // non-critical string |
2980 | #ifdef SML_STRINGS_AS_OPAQUE |
2981 | pcdataP->contentType=SML_PCDATA_OPAQUE; |
2982 | #else |
2983 | pcdataP->contentType=SML_PCDATA_STRING; |
2984 | #endif |
2985 | } |
2986 | pcdataP->extension=SML_EXT_UNDEFINED; |
2987 | // - allocate data space (ALWAYS with room for a terminator, even if Opaque or empty string) |
2988 | pcdataP->content=smlLibMalloc(pcdataP->length+1); // +1 for terminator, see below |
2989 | // copy data (if any) |
2990 | if (pcdataP->length>0) { |
2991 | // - copy string |
2992 | smlLibMemcpy(pcdataP->content,aString,pcdataP->length); |
2993 | } |
2994 | // set terminator |
2995 | ((char *)(pcdataP->content))[pcdataP->length]=0; // terminate C string |
2996 | // return |
2997 | return pcdataP; |
2998 | } // newPCDataStringX |
2999 | |
3000 | |
3001 | // create new string-type PCData from C++ string |
3002 | SmlPcdataPtr_t newPCDataString( |
3003 | const string &aString |
3004 | ) |
3005 | { |
3006 | return newPCDataString(aString.c_str(),aString.length()); |
3007 | } // newPCDataString(string&) |
3008 | |
3009 | |
3010 | // create new decimal string representation of sInt32 as PCData |
3011 | SmlPcdataPtr_t newPCDataLong( |
3012 | sInt32 aLong |
3013 | ) |
3014 | { |
3015 | const int ssiz=20; |
3016 | char s[ssiz]; |
3017 | |
3018 | snprintf(s,ssiz,"%ld",(long)aLong); |
3019 | return newPCDataString(s); |
3020 | } // newPCDataLong |
3021 | |
3022 | |
3023 | // Nonce generator allowing last-session nonce to be correctly re-generated in next session |
3024 | void generateNonce(string &aNonce, const char *aDevStaticString, sInt32 aSessionStaticID) |
3025 | { |
3026 | md5::SYSYNC_MD5_CTX context; |
3027 | uInt8 digest[16]; |
3028 | md5::Init (&context); |
3029 | // - add in static device string |
3030 | md5::Update (&context, (const uInt8 *)aDevStaticString, strlen(aDevStaticString)); |
3031 | // - add in session static ID in binary format |
3032 | md5::Update (&context, (const uInt8 *)&aSessionStaticID, sizeof(sInt32)); |
3033 | // - done |
3034 | md5::Final (digest, &context); |
3035 | // - make string of first 48 bit of MD5: 48 bits, use 6 bits per char = 8 chars |
3036 | uInt64 dig48 = ((uInt32)digest[0] << 0) | |
3037 | ((uInt32)digest[1] << 8) | |
3038 | ((uInt32)digest[2] << 16) | |
3039 | ((uInt32)digest[3] << 24); |
3040 | aNonce.erase(); |
3041 | for (sInt16 k=0; k<8; k++) { |
3042 | aNonce+=((dig48 & 0x03F) + 0x21); |
3043 | dig48 = dig48 >> 6; |
3044 | } |
3045 | } // generateNonce |
3046 | |
3047 | |
3048 | // create challenge of requested type |
3049 | SmlChalPtr_t newChallenge(TAuthTypes aAuthType, const string &aNextNonce, bool aBinaryAllowed) |
3050 | { |
3051 | SmlChalPtr_t chalP=NULL__null; |
3052 | SmlMetInfMetInfPtr_t metaP; |
3053 | |
3054 | if (aAuthType!=auth_none) { |
3055 | // new challenge record |
3056 | chalP = SML_NEW(SmlChal_t)((SmlChal_t*) _smlMalloc(sizeof(SmlChal_t))); |
3057 | // add empty meta |
3058 | chalP->meta=newMeta(); |
3059 | metaP=(SmlMetInfMetInfPtr_t)(chalP->meta->content); |
3060 | // add type and format |
3061 | // - type |
3062 | metaP->type=newPCDataString(authTypeSyncMLNames[aAuthType]); |
3063 | // - format |
3064 | const char *fmt = NULL__null; |
3065 | switch (aAuthType) { |
3066 | case auth_basic: |
3067 | // always request b64 |
3068 | fmt=encodingFmtSyncMLNames[fmt_b64]; |
3069 | break; |
3070 | case auth_md5: |
3071 | // request b64 only for non-binary capable encoding (that is, XML) |
3072 | /* %%% dont do that, Nokia9210 miserably fails when we do that, |
3073 | * it sends its data B64 encoded, but obviously with bad |
3074 | * data in it. Ericsson T39m seems to do it correctly however. |
3075 | if (!aBinaryAllowed) |
3076 | fmt=encodingFmtSyncMLNames[fmt_b64]; |
3077 | */ |
3078 | // always request b64 for now, seems to be safer with not fully compatible clients |
3079 | fmt=encodingFmtSyncMLNames[fmt_b64]; |
3080 | break; |
3081 | default: break; |
3082 | } |
3083 | metaP->format=newPCDataOptString(fmt); // set format, but not empty |
3084 | // - add nonce if needed |
3085 | if (aAuthType==auth_md5) { |
3086 | // MD5 also might need nonce |
3087 | if (!aNextNonce.empty()) { |
3088 | // add base64 encoded nonce string |
3089 | uInt32 b64len; |
3090 | char *b64=b64::encode((const uInt8 *)aNextNonce.c_str(),aNextNonce.size(),&b64len); |
3091 | metaP->nextnonce=newPCDataString(b64,b64len); |
3092 | b64::free(b64); // return buffer allocated by b64_encode |
3093 | } |
3094 | } |
3095 | } |
3096 | return chalP; |
3097 | } // newChallenge |
3098 | |
3099 | |
3100 | // create new property or param descriptor for CTCap |
3101 | SmlDevInfCTDataPtr_t newDevInfCTData(cAppCharP aName,uInt32 aSize, bool aNoTruncate, uInt32 aMaxOccur, cAppCharP aDataType) |
3102 | { |
3103 | SmlDevInfCTDataPtr_t result = SML_NEW(SmlDevInfCTData_t)((SmlDevInfCTData_t*) _smlMalloc(sizeof(SmlDevInfCTData_t))); |
3104 | // fill descriptor |
3105 | // - name if property or param |
3106 | result->name=newPCDataString(aName); |
3107 | // - no display name so far |
3108 | result->dname=NULL__null; // no display name |
3109 | // - datatype (optional) |
3110 | result->datatype=newPCDataOptString(aDataType); |
3111 | // - max size |
3112 | if (aSize==0) |
3113 | result->maxsize=NULL__null; // no size |
3114 | else |
3115 | result->maxsize=newPCDataLong(aSize); // set size |
3116 | // - no valenum here, will be added later if any |
3117 | result->valenum=NULL__null; // no valenum |
3118 | // SyncML 1.2 |
3119 | if (aMaxOccur==0) |
3120 | result->maxoccur=NULL__null; // no maxoccur |
3121 | else |
3122 | result->maxoccur=newPCDataLong(aMaxOccur); // set maxoccur |
3123 | result->flags = aNoTruncate ? SmlDevInfNoTruncate_f0x0020 : 0; // notruncate flag or none |
3124 | return result; |
3125 | } // newDevInfCTData |
3126 | |
3127 | |
3128 | // frees prototype element and sets calling pointer to NULL |
3129 | void FreeProtoElement(void * &aVoidP) |
3130 | { |
3131 | if (aVoidP) smlFreeProtoElement(aVoidP); |
3132 | aVoidP=NULL__null; |
3133 | } // FreeProtoElement |
3134 | |
3135 | } // namespace sysync |
3136 | |
3137 | // eof |