File: | libsynthesis/src/sysync_SDK/Sources/sysync_utils.cpp |
Warning: | line 1449, column 7 Assigned value is garbage or undefined |
1 | /* | |||
2 | * File: sysync_utils.cpp | |||
3 | * | |||
4 | * Author: Lukas Zeller (luz@plan44.ch) | |||
5 | * | |||
6 | * Provides some helper functions interfacing between SyncML Toolkit | |||
7 | * and C++ | |||
8 | * | |||
9 | * Copyright (c) 2001-2011 by Synthesis AG + plan44.ch | |||
10 | * | |||
11 | * 2001-05-16 : luz : created | |||
12 | * | |||
13 | */ | |||
14 | ||||
15 | #include "prefix_file.h" | |||
16 | #include "sync_include.h" | |||
17 | #include "sysync_utils.h" | |||
18 | ||||
19 | #include "libmem.h" | |||
20 | ||||
21 | ||||
22 | #ifdef SYSYNC_TOOL | |||
23 | #include "syncappbase.h" // for CONSOLEPRINTF | |||
24 | #include "customimplagent.h" // for DBCharSetNames | |||
25 | #endif | |||
26 | ||||
27 | namespace sysync { | |||
28 | ||||
29 | // Support for SySync Diagnostic Tool | |||
30 | #ifdef SYSYNC_TOOL | |||
31 | ||||
32 | // parse RFC 2822 addr spec | |||
33 | int parse2822AddrSpec(int argc, const char *argv[]) | |||
34 | { | |||
35 | if (argc<0) { | |||
36 | // help requested | |||
37 | CONSOLEPRINTF((" addrparse <RFC2822 addr-spec string to parse>"))SySync_ConsolePrintf(stderr, "SYSYNC " " addrparse <RFC2822 addr-spec string to parse>" "\n"); | |||
38 | CONSOLEPRINTF((" Parse name and email address out of a RFC2822-type addr-spec"))SySync_ConsolePrintf(stderr, "SYSYNC " " Parse name and email address out of a RFC2822-type addr-spec" "\n"); | |||
39 | return EXIT_SUCCESS0; | |||
40 | } | |||
41 | // check for argument | |||
42 | if (argc<1) { | |||
43 | CONSOLEPRINTF(("1 argument required"))SySync_ConsolePrintf(stderr, "SYSYNC " "1 argument required" "\n" ); | |||
44 | return EXIT_FAILURE1; | |||
45 | } | |||
46 | // parse | |||
47 | string addrname,addremail; | |||
48 | const char* p=argv[0]; | |||
49 | p=parseRFC2822AddrSpec(p,addrname,addremail); | |||
50 | // show | |||
51 | CONSOLEPRINTF(("Input : %s",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %s" "\n" ,argv[0]); | |||
52 | CONSOLEPRINTF(("Name : %s",addrname.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Name : %s" "\n" ,addrname.c_str()); | |||
53 | CONSOLEPRINTF(("email : %s",addremail.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "email : %s" "\n" ,addremail.c_str()); | |||
54 | CONSOLEPRINTF(("unparsed rest : %s",p))SySync_ConsolePrintf(stderr, "SYSYNC " "unparsed rest : %s" "\n" ,p); | |||
55 | return EXIT_SUCCESS0; | |||
56 | } // parse2822AddrSpec | |||
57 | ||||
58 | ||||
59 | // convert between character sets | |||
60 | int charConv(int argc, const char *argv[]) | |||
61 | { | |||
62 | if (argc<0) { | |||
63 | // help requested | |||
64 | CONSOLEPRINTF((" charconv [<input charset>] <output charset> <C-string to convert>"))SySync_ConsolePrintf(stderr, "SYSYNC " " charconv [<input charset>] <output charset> <C-string to convert>" "\n"); | |||
65 | CONSOLEPRINTF((" Convert from one charset to another. Default input is UTF-8"))SySync_ConsolePrintf(stderr, "SYSYNC " " Convert from one charset to another. Default input is UTF-8" "\n"); | |||
66 | return EXIT_SUCCESS0; | |||
67 | } | |||
68 | ||||
69 | #ifdef __TEST_EQUALITY_OF_CP936_WITH_GB2312__ | |||
70 | // quick test | |||
71 | uInt32 ch_in; | |||
72 | for (ch_in=0x8100; ch_in<=0xFFFF; ch_in++) { | |||
73 | // convert into internal UTF-8 | |||
74 | string s_internal,s_in; | |||
75 | s_in.erase(); | |||
76 | if (ch_in>=0x8100) s_in+=(ch_in >> 8) & 0xFF; | |||
77 | s_in+=(ch_in & 0xFF); | |||
78 | s_internal.erase(); | |||
79 | appendStringAsUTF8( | |||
80 | s_in.c_str(), | |||
81 | s_internal, | |||
82 | chs_gb2312 | |||
83 | ); | |||
84 | // convert into output format | |||
85 | string s_out; | |||
86 | s_out.erase(); | |||
87 | appendUTF8ToString( | |||
88 | s_internal.c_str(), | |||
89 | s_out, | |||
90 | chs_cp936 | |||
91 | ); | |||
92 | // show differences | |||
93 | if (s_in!=s_out && s_out.size()>0 && s_out[0]!=INCONVERTIBLE_PLACEHOLDER'_') { | |||
94 | string s1,s2; | |||
95 | s1.erase(); StrToCStrAppend(s_in.c_str(), s1); | |||
96 | s2.erase(); StrToCStrAppend(s_out.c_str(), s2); | |||
97 | CONSOLEPRINTF(("\"%s\" != \"%s\"",s1.c_str(),s2.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "\"%s\" != \"%s\"" "\n" ,s1.c_str(),s2.c_str()); | |||
98 | } | |||
99 | } | |||
100 | return EXIT_SUCCESS0; | |||
101 | #endif | |||
102 | ||||
103 | // check for argument | |||
104 | if (argc<2) { | |||
105 | CONSOLEPRINTF(("2 or 3 arguments required"))SySync_ConsolePrintf(stderr, "SYSYNC " "2 or 3 arguments required" "\n"); | |||
106 | return EXIT_FAILURE1; | |||
107 | } | |||
108 | int ochsarg=1; | |||
109 | sInt16 enu; | |||
110 | // get input charset | |||
111 | TCharSets charset_in=chs_utf8; | |||
112 | if (argc==3) { | |||
113 | // first arg is input charset | |||
114 | if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[0])) { | |||
115 | CONSOLEPRINTF(("'%s' is not a valid input charset name",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid input charset name" "\n",argv[0]); | |||
116 | return EXIT_FAILURE1; | |||
117 | } | |||
118 | charset_in = (TCharSets)enu; | |||
119 | } | |||
120 | else { | |||
121 | ochsarg=0; // first arg ist input charset | |||
122 | } | |||
123 | // get output charset | |||
124 | TCharSets charset_out; | |||
125 | if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[ochsarg])) { | |||
126 | CONSOLEPRINTF(("'%s' is not a valid output charset name",argv[ochsarg]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid output charset name" "\n",argv[ochsarg]); | |||
127 | return EXIT_FAILURE1; | |||
128 | } | |||
129 | charset_out = (TCharSets)enu; | |||
130 | // get string to convert | |||
131 | string s_in; | |||
132 | s_in.erase(); | |||
133 | CStrToStrAppend(argv[ochsarg+1], s_in); | |||
134 | // convert into internal UTF-8 | |||
135 | string s_internal; | |||
136 | s_internal.erase(); | |||
137 | appendStringAsUTF8( | |||
138 | s_in.c_str(), | |||
139 | s_internal, | |||
140 | charset_in | |||
141 | ); | |||
142 | // convert into output format | |||
143 | string s_out; | |||
144 | s_out.erase(); | |||
145 | appendUTF8ToString( | |||
146 | s_internal.c_str(), | |||
147 | s_out, | |||
148 | charset_out | |||
149 | ); | |||
150 | // show all three | |||
151 | string show; | |||
152 | // - input | |||
153 | show.erase(); StrToCStrAppend(s_in.c_str(), show); | |||
154 | CONSOLEPRINTF(("Input : %-20s = \"%s\"",DBCharSetNames[charset_in], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %-20s = \"%s\"" "\n",DBCharSetNames[charset_in], show.c_str()); | |||
155 | // - internal UTF8 | |||
156 | show.erase(); StrToCStrAppend(s_internal.c_str(), show); | |||
157 | CONSOLEPRINTF(("Internal : %-20s = \"%s\"",DBCharSetNames[chs_utf8], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Internal : %-20s = \"%s\"" "\n",DBCharSetNames[chs_utf8], show.c_str()); | |||
158 | // - output | |||
159 | show.erase(); StrToCStrAppend(s_out.c_str(), show); | |||
160 | CONSOLEPRINTF(("Output : %-20s = \"%s\"",DBCharSetNames[charset_out], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Output : %-20s = \"%s\"" "\n",DBCharSetNames[charset_out], show.c_str()); | |||
161 | return EXIT_SUCCESS0; | |||
162 | } // charConv | |||
163 | ||||
164 | #endif // SYSYNC_TOOL | |||
165 | ||||
166 | ||||
167 | // conversion table from ANSI 0x80..0x9F to UCS4 | |||
168 | const uInt32 Ansi_80_to_9F_to_UCS4[0x20] = { | |||
169 | 0x20AC, 0 ,0x201A,0x0192, 0x201E,0x2026,0x2020,0x2021, // 0x80..0x87 | |||
170 | 0x02C6,0x2030,0x0160,0x2039, 0x0152, 0 ,0x017D, 0 , // 0x88..0x8F | |||
171 | 0 ,0x2018,0x2019,0x201C, 0x201D,0x2022,0x2013,0x2014, // 0x90..0x97 | |||
172 | 0x02DC,0x2122,0x0161,0x203A, 0x0153, 0 ,0x017E,0x0178 // 0x98..0x9F | |||
173 | }; | |||
174 | ||||
175 | // line end mode names | |||
176 | const char * const lineEndModeNames[numLineEndModes] = { | |||
177 | "none", // none specified | |||
178 | "unix", // 0x0A | |||
179 | "mac", // 0x0D | |||
180 | "dos", // 0x0D 0x0A | |||
181 | "cstr", // as in C strings, '\n' which is 0x0A normally (but might be 0x0D on some platforms) | |||
182 | "filemaker" // 0x0B (filemaker tab-separated text format, CR is shown as 0x0B within fields | |||
183 | }; | |||
184 | ||||
185 | ||||
186 | ||||
187 | // literal quoting mode names | |||
188 | const char * const quotingModeNames[numQuotingModes] = { | |||
189 | "none", // none specified | |||
190 | "singlequote", // single quote must be duplicated | |||
191 | "doublequote", // double quote must be duplicated | |||
192 | "backslash" // C-string-style escapes of CR,LF,TAB,BS,\," and ' (but no full c-string escape with \xXX etc.) | |||
193 | }; | |||
194 | ||||
195 | ||||
196 | // Encoding format names for SyncML | |||
197 | const char * const encodingFmtSyncMLNames[numFmtTypes] = { | |||
198 | "chr", // plain chars | |||
199 | "bin", // binary | |||
200 | "b64" // base 64 encoding | |||
201 | }; | |||
202 | // Encoding format names for user | |||
203 | const char * const encodingFmtNames[numFmtTypes] = { | |||
204 | "plain-text", // no encoding (plain text) | |||
205 | "binary", // plain binary (in WBXML only) | |||
206 | "base64" // base 64 encoding | |||
207 | }; | |||
208 | ||||
209 | ||||
210 | // field (property) data type names | |||
211 | const char * const propDataTypeNames[numPropDataTypes] = { | |||
212 | "chr", // Character | |||
213 | "int", // Integer | |||
214 | "bool", // Boolean | |||
215 | "bin", // Binary | |||
216 | "datetime", // Date and time of day | |||
217 | "phonenum", // Phone number | |||
218 | "text", // plain text | |||
219 | "???" // unknown | |||
220 | }; | |||
221 | ||||
222 | ||||
223 | // Auth type names | |||
224 | const char * const authTypeSyncMLNames[numAuthTypes] = { | |||
225 | NULL__null, // no authorisation | |||
226 | "syncml:auth-basic", // basic (B64 encoded user pw string) | |||
227 | "syncml:auth-md5" // Md5 encoded user:pw:nonce | |||
228 | }; | |||
229 | ||||
230 | ||||
231 | // MIME encoding types | |||
232 | const char * const MIMEEncodingNames[numMIMEencodings] = { | |||
233 | "", | |||
234 | "7BIT", | |||
235 | "8BIT", | |||
236 | "BINARY", | |||
237 | "QUOTED-PRINTABLE", | |||
238 | "BASE64", | |||
239 | "B" | |||
240 | }; | |||
241 | ||||
242 | // Charset names for MIME based strings | |||
243 | const char * const MIMECharSetNames[numCharSets] = { | |||
244 | "unknown", | |||
245 | "US-ASCII", | |||
246 | "ANSI", | |||
247 | "ISO-8859-1", | |||
248 | "UTF-8", | |||
249 | "UTF-16", | |||
250 | #ifdef CHINESE_SUPPORT | |||
251 | "GB2312", | |||
252 | "CP936", | |||
253 | #endif | |||
254 | }; | |||
255 | ||||
256 | ||||
257 | #ifdef SYSYNC_ENGINE1 | |||
258 | // generate RFC2822-style address specificiation | |||
259 | // - Common Name will be quoted | |||
260 | // - recipient will be put in angle brackets | |||
261 | void makeRFC2822AddrSpec( | |||
262 | cAppCharP aCommonName, | |||
263 | cAppCharP aRecipient, | |||
264 | string &aRFCAddr | |||
265 | ) | |||
266 | { | |||
267 | if (aCommonName && *aCommonName) { | |||
268 | aRFCAddr='"'; | |||
269 | while (*aCommonName) { | |||
270 | if (*aCommonName=='"') aRFCAddr += "\\\""; | |||
271 | else aRFCAddr += *aCommonName; | |||
272 | aCommonName++; | |||
273 | } | |||
274 | aRFCAddr+="\" <"; | |||
275 | aRFCAddr+=aRecipient; | |||
276 | aRFCAddr+=">"; | |||
277 | } | |||
278 | else { | |||
279 | // plain email address | |||
280 | aRFCAddr=aRecipient; | |||
281 | } | |||
282 | } // makeRFC2822AddrSpec | |||
283 | ||||
284 | ||||
285 | ||||
286 | ||||
287 | // sysytool -f syncserv_odbc.xml addrparse "(Lukas Peter) luz@synthesis.ch (Zeller), gaga" | |||
288 | ||||
289 | // Parse RFC2822-style address specificiation | |||
290 | // - aName will receive name and all (possible) comments | |||
291 | // - aRecipient will receive the (first, in case of a group) email address | |||
292 | cAppCharP parseRFC2822AddrSpec( | |||
293 | cAppCharP aText, | |||
294 | string &aName, | |||
295 | string &aRecipient | |||
296 | ) | |||
297 | { | |||
298 | const char *p; | |||
299 | char c; | |||
300 | ||||
301 | enum { | |||
302 | pstate_sepspace, | |||
303 | pstate_trailing, | |||
304 | pstate_text, | |||
305 | pstate_comment, | |||
306 | pstate_quoted, | |||
307 | pstate_email | |||
308 | } pstate = pstate_trailing; | |||
309 | string text,groupname; | |||
310 | bool textcouldbeemail=true; | |||
311 | bool atfound=false; | |||
312 | aName.erase(); | |||
313 | aRecipient.erase(); | |||
314 | p=aText; | |||
315 | do { | |||
316 | c=*p; | |||
317 | // check end of input | |||
318 | if (c==0) break; // done with the string | |||
319 | // advance to next char | |||
320 | p++; | |||
321 | // check according to state | |||
322 | switch (pstate) { | |||
323 | case pstate_sepspace: | |||
324 | if (c==' ') { | |||
325 | aName+=c; | |||
326 | } | |||
327 | pstate=pstate_trailing; | |||
328 | // otherwise treat like trailing | |||
329 | case pstate_trailing: | |||
330 | textcouldbeemail=aRecipient.empty(); | |||
331 | atfound=false; | |||
332 | // skip trailing WSP first | |||
333 | if (c==' ' || c=='\t' || c=='\n' || c=='\r') break; // simply ignore WSP in trailing mode | |||
334 | else pstate=pstate_text; | |||
335 | // fall trough to do text analysis | |||
336 | case pstate_text: | |||
337 | // now check specials | |||
338 | if (c==',') { c=0; break; } // end of address, cause exit from loop, next will start after comma | |||
339 | else if (c==';') { c=0; break; } // end of group address list, treat it like single address | |||
340 | else if (c=='@' && textcouldbeemail) atfound=true; // flag presence of @ | |||
341 | // check if text could still be a email address by itself | |||
342 | if (textcouldbeemail && !isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') { | |||
343 | textcouldbeemail=false; | |||
344 | if (atfound) { | |||
345 | aRecipient=text; | |||
346 | text.erase(); | |||
347 | } | |||
348 | atfound=false; | |||
349 | } | |||
350 | // now check other specials | |||
351 | if (c=='"') { pstate=pstate_quoted; } // start of quoted string | |||
352 | else if (c=='(') { pstate=pstate_comment; } // start of comment | |||
353 | else if (c=='<') { aRecipient.erase(); pstate=pstate_email; } // start of angle-addr, overrides other recipient texts | |||
354 | else if (c==':') { | |||
355 | groupname=aRecipient; // what we've probably parsed as recipient | |||
356 | groupname+=aName; // plus name so far | |||
357 | groupname+=text; // plus additional text | |||
358 | text.erase(); | |||
359 | aName.erase(); | |||
360 | aRecipient.erase(); | |||
361 | pstate=pstate_trailing; | |||
362 | } // flag presence of a group name (which can be used as name if addr itself does not have one) | |||
363 | else { | |||
364 | // add other text chars to the text | |||
365 | text += c; | |||
366 | } | |||
367 | break; | |||
368 | case pstate_quoted: | |||
369 | if (c=='\\') { | |||
370 | if (*p) c=*p++; else break; // get next char (if any) and add to result untested | |||
371 | } | |||
372 | else if (c=='"') { | |||
373 | // end of quoted string | |||
374 | pstate=pstate_sepspace; | |||
375 | aName+=text; | |||
376 | text.erase(); | |||
377 | break; | |||
378 | } | |||
379 | // add to text | |||
380 | text += c; | |||
381 | break; | |||
382 | case pstate_comment: | |||
383 | if (c==')') { | |||
384 | // end of comment | |||
385 | aName+=text; | |||
386 | text.erase(); | |||
387 | pstate=pstate_sepspace; | |||
388 | break; | |||
389 | } | |||
390 | // add to text | |||
391 | text += c; | |||
392 | break; | |||
393 | case pstate_email: | |||
394 | if (!isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') { | |||
395 | // any non-email char terminates email, not only '>', but only '>' is swallowed | |||
396 | if (c!='>') p--; // re-evaluate char in next state | |||
397 | pstate=pstate_sepspace; | |||
398 | break; | |||
399 | } | |||
400 | // add to email | |||
401 | aRecipient += c; | |||
402 | break; | |||
403 | } // switch | |||
404 | } while (c!=0); | |||
405 | // handle case of pure email address without name and without < > brackets or : | |||
406 | if (aRecipient.empty() && textcouldbeemail && atfound) | |||
407 | aRecipient = text; | |||
408 | else | |||
409 | aName += text; | |||
410 | // if name is (now) empty, but we have a group name, use the group name | |||
411 | if (aName.empty()) aName=groupname; | |||
412 | // remove trailing spaces in aName | |||
413 | string::size_type n=aName.find_last_not_of(' '); | |||
414 | if (n!=string::npos) aName.resize(n+1); | |||
415 | // return where to continue parsing for next addr-spec (if not end of string) | |||
416 | return p; | |||
417 | } // parseRFC2822AddrSpec | |||
418 | ||||
419 | ||||
420 | ||||
421 | // append internal UTF8 string as RFC2047 style encoding | |||
422 | const char *appendUTF8AsRFC2047( | |||
423 | const char *aText, | |||
424 | string &aString | |||
425 | ) | |||
426 | { | |||
427 | const char *p,*q,*r; | |||
428 | char c; | |||
429 | ||||
430 | p=aText; | |||
431 | do { | |||
432 | q=p; // remember start | |||
433 | // find chars until next char that must be stored as encoded word | |||
434 | do { | |||
435 | c=*p; | |||
436 | if (c==0 || (c & 0x80) || (c=='=' && *(p+1)=='?')) break; | |||
437 | p++; | |||
438 | } while(true); | |||
439 | // copy chars outside encoded word directly | |||
440 | if (p-q>0) aString.append(q,p-q); | |||
441 | // check if end of string | |||
442 | if (c==0) break; | |||
443 | // pack some chars into encoded word | |||
444 | // - start word | |||
445 | aString.append("=?utf-8?B?"); // 10 chars start (+ 2 chars will be added at end) | |||
446 | // - encoded data must be 75-12=63 chars or less | |||
447 | // Using B (=b64) encoding, output of 63 chars = 63/4*3 = max 47 chars. | |||
448 | // We use 45 max, as this is evenly divisible by 3 and output is 60 chars | |||
449 | q=p; | |||
450 | while (true) { | |||
451 | // find next space | |||
452 | while (*q && !isspace(*q) && q-p<45) q++; | |||
453 | if (q-p>=45) break; // abort if exhausted already | |||
454 | // find next non-space | |||
455 | r=q; | |||
456 | while (isspace(*r)) r++; | |||
457 | // check if next non-space will start a new word | |||
458 | if (*r & 0x80) { | |||
459 | // we should include the next word as well, if possible without exceeding size | |||
460 | if (r-p<45) { | |||
461 | q=r; | |||
462 | continue; | |||
463 | } | |||
464 | } | |||
465 | break; | |||
466 | } | |||
467 | // encode binary stream and append to string | |||
468 | appendEncoded((const uInt8 *)p,q-p,aString,enc_b); | |||
469 | p=q; | |||
470 | // - end word | |||
471 | aString.append("?="); | |||
472 | } while (true); | |||
473 | return p; | |||
474 | } // appendUTF8AsRFC2047 | |||
475 | ||||
476 | ||||
477 | // parse character string from RFC2047 style encoding to UTF8 internal string | |||
478 | const char *appendRFC2047AsUTF8( | |||
479 | const char *aRFC2047, | |||
480 | stringSize aSize, | |||
481 | string &aString, | |||
482 | TLineEndModes aLEM | |||
483 | ) | |||
484 | { | |||
485 | const char *p,*q,*r,*w; | |||
486 | char c = 0; | |||
487 | const char *eot = aRFC2047+aSize; | |||
488 | ||||
489 | p=aRFC2047; | |||
490 | w=NULL__null; // start of last detected word (to avoid re-scanning) | |||
491 | while (p<eot) { | |||
492 | q=p; // remember start | |||
493 | // find chars until next encoded word | |||
494 | while (p<eot) { | |||
495 | c=*p; | |||
496 | if (c==0 || (p!=w && c=='=' && *(p+1)=='?')) break; | |||
497 | p++; | |||
498 | } | |||
499 | // copy chars outside encoded word directly | |||
500 | aString.append(q,p-q); | |||
501 | // check if end of string | |||
502 | if (p>=eot || c==0) break; | |||
503 | // try to parse encoded word | |||
504 | q=p+2; | |||
505 | scanword: | |||
506 | // q is now where we start to parse word contents | |||
507 | // p is where we would re-start reading normally if current word turns out not to be a word at all | |||
508 | // - remember start of word scan (to avoid re-scanning it) | |||
509 | w=p; | |||
510 | // - get charset | |||
511 | r=q; | |||
512 | while (q<eot && *q!='?' && isgraph(*q)) q++; | |||
513 | if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally | |||
514 | sInt16 en; | |||
515 | TCharSets charset=chs_unknown; | |||
516 | if (StrToEnum(MIMECharSetNames, numCharSets, en, r, q-r)) charset=(TCharSets)en; | |||
517 | // - get encoding | |||
518 | r=++q; // continue after ? separator | |||
519 | while (q<eot && *q!='?' && isgraph(*q)) q++; | |||
520 | if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally | |||
521 | TEncodingTypes encoding=enc_8bit; | |||
522 | if (StrToEnum(MIMEEncodingNames, numMIMEencodings, en, r, q-r)) encoding=(TEncodingTypes)en; | |||
523 | // - get data part | |||
524 | r=++q; | |||
525 | while (q+1<eot && *q && *q!=' ' && !(*q=='?' && *(q+1)=='=')) q++; | |||
526 | if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally | |||
527 | // - decode | |||
528 | string decoded; | |||
529 | appendDecoded(r,q-r,decoded,encoding); | |||
530 | // - convert to UTF-8 | |||
531 | appendStringAsUTF8( | |||
532 | decoded.c_str(), | |||
533 | aString, | |||
534 | charset, | |||
535 | aLEM | |||
536 | ); | |||
537 | // - skip word terminator | |||
538 | p=q+2; | |||
539 | // - check for special case of adjacent words | |||
540 | q=p; | |||
541 | while (q<eot && isspace(*q)) q++; | |||
542 | if (q+1<eot && q>p && *q=='=' && *(q+1)=='?') { | |||
543 | // adjacent encoded words, only separated by space -> ignore space | |||
544 | // p is after previous word | |||
545 | q+=2; | |||
546 | // q is after lead-in of next word | |||
547 | goto scanword; | |||
548 | } | |||
549 | // p is where we continue reading | |||
550 | } | |||
551 | return p; | |||
552 | } // appendRFC2047AsUTF8 | |||
553 | ||||
554 | ||||
555 | // decode encoded data and append to string | |||
556 | const char *appendDecoded( | |||
557 | const char *aText, | |||
558 | size_t aSize, | |||
559 | string &aBinString, | |||
560 | TEncodingTypes aEncoding | |||
561 | ) | |||
562 | { | |||
563 | char c; | |||
564 | const char *p=aText; | |||
565 | uInt32 binsz; | |||
566 | uInt8 *binP; | |||
567 | ||||
568 | switch (aEncoding) { | |||
569 | case enc_quoted_printable : | |||
570 | // decode quoted-printable content | |||
571 | while ((c=*p++)) { | |||
572 | // char found | |||
573 | if (c=='=') { | |||
574 | uInt16 code; | |||
575 | char hex[2]; | |||
576 | // check for soft break first | |||
577 | if (*p=='\x0D' || *p=='\x0A') { | |||
578 | // soft break, swallow | |||
579 | if (*p=='\x0D') p++; | |||
580 | if (*p=='\x0A') p++; | |||
581 | continue; | |||
582 | } | |||
583 | // decode | |||
584 | hex[0]=*p; | |||
585 | if (*p) { | |||
586 | p++; | |||
587 | hex[1]=*p; | |||
588 | if (*p) { | |||
589 | p++; | |||
590 | if (HexStrToUShort(hex,code,2)==2) { | |||
591 | c=code; // decoded char | |||
592 | } | |||
593 | else continue; // simply ignore | |||
594 | } | |||
595 | else break; | |||
596 | } | |||
597 | else break; | |||
598 | } | |||
599 | // append char | |||
600 | aBinString+=c; | |||
601 | } | |||
602 | aText=p; | |||
603 | break; | |||
604 | case enc_base64: | |||
605 | case enc_b: | |||
606 | // decode base 64 | |||
607 | binsz=0; | |||
608 | binP = b64::decode(aText, aSize, &binsz); | |||
609 | aBinString.append((const char *)binP,binsz); | |||
610 | b64::free(binP); | |||
611 | aText+=aSize; | |||
612 | break; | |||
613 | case enc_7bit: | |||
614 | case enc_8bit: | |||
615 | // copy no more than size | |||
616 | if (aSize>0) aBinString.reserve(aBinString.size()+aSize); | |||
617 | while (*p && aSize>0) { | |||
618 | aBinString+=*p++; | |||
619 | aSize--; | |||
620 | } | |||
621 | aText=p; | |||
622 | break; | |||
623 | case enc_none: | |||
624 | case enc_binary: | |||
625 | // copy bytes | |||
626 | aBinString.append(aText,aSize); | |||
627 | aText+=aSize; | |||
628 | break; | |||
629 | case numMIMEencodings: | |||
630 | // invalid | |||
631 | break; | |||
632 | } // quoted printable | |||
633 | return aText; | |||
634 | } // appendDecoded | |||
635 | ||||
636 | ||||
637 | ||||
638 | // encode binary stream and append to string | |||
639 | void appendEncoded( | |||
640 | const uInt8 *aBinary, | |||
641 | size_t aSize, | |||
642 | string &aString, | |||
643 | TEncodingTypes aEncoding, | |||
644 | sInt16 aMaxLineSize, | |||
645 | sInt32 aCurrLineSize, | |||
646 | bool aSoftBreaksAsCR, | |||
647 | bool aEncodeBinary | |||
648 | ) | |||
649 | { | |||
650 | char c; | |||
651 | string::size_type linestart; | |||
652 | const uInt8 *p; | |||
653 | bool softbreak; | |||
654 | uInt32 b64len; | |||
655 | char *b64; | |||
656 | bool processed; | |||
657 | ||||
658 | switch (aEncoding) { | |||
659 | case enc_binary : | |||
660 | case enc_none : | |||
661 | case enc_8bit : | |||
662 | case enc_7bit : // assume we have no 8bit chars | |||
663 | // just copy 1:1 | |||
664 | aString.append((const char *)aBinary,aSize); | |||
665 | break; | |||
666 | case enc_quoted_printable: | |||
667 | // quote-printable encoding | |||
668 | // - determine start of last line in aString | |||
669 | // Note: this is because property text will be folded when lines aMaxLineSize | |||
670 | linestart=aString.size()-aCurrLineSize; | |||
671 | for (p=aBinary;p<aBinary+aSize;p++) { // '\0' will not terminate the 'for' loop | |||
672 | c=*p; | |||
673 | if (!aEncodeBinary && !c) break; // still exit at NUL when not encoding real binary data | |||
674 | processed=false; // input data in c is not yet processed | |||
675 | // make sure we do not go over the limit (if one is set) | |||
676 | // - if less than 8 chars (=0D=0A + =\r) are free, soft break the line | |||
677 | softbreak= aMaxLineSize && (aString.size()-linestart>=string::size_type(aMaxLineSize)-8); | |||
678 | if (!aEncodeBinary) { | |||
679 | if (c=='\r') continue; // ignore them | |||
680 | if (c=='\b') continue; // ignore them (optional break indicators, not relevant for QP output) | |||
681 | if (c=='\n') { // - encode line ends | |||
682 | aString.append("=0D=0A"); // special string for Line Ends (CR LF) | |||
683 | processed = true; // c is processed now | |||
684 | softbreak = true; | |||
685 | } // if | |||
686 | } // if | |||
687 | // - handle soft line break (but only if really doing line breaking) | |||
688 | // Also: avoid adding a soft break at the very end of the string | |||
689 | if (softbreak && aMaxLineSize && p+1<aBinary+aSize) { | |||
690 | if (aSoftBreaksAsCR) | |||
691 | aString.append("=\r"); // '\r' signals softbreak for finalizeproperty() | |||
692 | else | |||
693 | aString.append("=\x0D\x0A"); // break line here | |||
694 | // new line starts after softbreak | |||
695 | linestart=aString.size(); | |||
696 | // make sure soft line break is not followed by unencoded space | |||
697 | // (which would look like MIME folding) | |||
698 | if (c==' ' || (processed && p[1]==' ')) { | |||
699 | aString.append("=20"); | |||
700 | if (processed) p++; // if current char was already processed, we need to explicitly skip the space | |||
701 | processed=true; // char is now processed in any case | |||
702 | } // if | |||
703 | } // if | |||
704 | // now encode the char in c if not already processed by now | |||
705 | if (!processed) { | |||
706 | bool encodeIt= | |||
707 | (c=='=') // escape equal sign itself | |||
708 | || (c=='<' && aEncodeBinary) // avoid XML mismatch problems | |||
709 | || (uInt8)c>0x7F | |||
710 | || (uInt8)c<0x20; // '\0' will be encoded as well | |||
711 | if (encodeIt) { // encode all non ASCII chars > 0x7F (and control chars as well) | |||
712 | aString+="="; | |||
713 | aString+=NibbleToHexDigit(c>>4); | |||
714 | aString+=NibbleToHexDigit(c); | |||
715 | } | |||
716 | else | |||
717 | aString+=c; // just copy | |||
718 | } // if | |||
719 | } | |||
720 | break; | |||
721 | case enc_base64: | |||
722 | case enc_b: | |||
723 | // use base64 encoding | |||
724 | if (aSize>0) { | |||
725 | // don't call b64 with size=0! | |||
726 | b64 = b64::encode( | |||
727 | aBinary,aSize, // what to encode | |||
728 | &b64len, // output size | |||
729 | aMaxLineSize, // max line size | |||
730 | aSoftBreaksAsCR | |||
731 | ); | |||
732 | // append to output, if any | |||
733 | if (b64) { | |||
734 | aString.append(b64,b64len); | |||
735 | // release buffer | |||
736 | b64::free(b64); | |||
737 | } | |||
738 | if (aEncoding!=enc_b) { | |||
739 | // make sure it ends with a newline for "base64" (but NOT for "b" as used in RFC2047) | |||
740 | // Note: when used in vCard2.1, that newline is part of the property and show as an | |||
741 | // empty line in the vCard. | |||
742 | aString += aSoftBreaksAsCR ? "\r" : "\x0D\x0A"; | |||
743 | } | |||
744 | } | |||
745 | break; | |||
746 | default: | |||
747 | // do nothing | |||
748 | break; | |||
749 | } // switch | |||
750 | } // appendEncoded | |||
751 | ||||
752 | ||||
753 | #ifdef CHINESE_SUPPORT | |||
754 | // the flatBinTree tables for converting to and from GB2312 | |||
755 | #include "gb2312_tables_inc.cpp" | |||
756 | // the flatBinTree tables for converting to and from CP936 | |||
757 | #include "cp936_tables_inc.cpp" | |||
758 | #endif | |||
759 | ||||
760 | ||||
761 | // add char (possibly multi-byte) as UTF8 to value and apply charset translation if needed | |||
762 | // - returns > 0 if aNumChars was not correct number of bytes needed to convert an entire character; | |||
763 | // return value is number of bytes needed to generate one output character. If return value | |||
764 | // is<>0, no char has been appended to aVal. | |||
765 | uInt16 appendCharsAsUTF8(const char *aChars, string &aVal, TCharSets aCharSet, uInt16 aNumChars) | |||
766 | { | |||
767 | uInt32 ucs4; | |||
768 | // first char | |||
769 | uInt8 c=*aChars; | |||
770 | // this is a 8-bit char | |||
771 | switch(aCharSet) { | |||
772 | case chs_utf8 : | |||
773 | // UTF8 is native charset of the application, simply add | |||
774 | aVal+=c; | |||
775 | break; | |||
776 | case chs_ansi : | |||
777 | case chs_iso_8859_1 : | |||
778 | // do poor man's conversion to UCS4 | |||
779 | // - most ANSI chars are 1:1 mapped | |||
780 | ucs4 = ((uInt8)c & 0xFF); | |||
781 | // - except 0x80..0x9F, use table for these | |||
782 | if (ucs4>=0x80 && ucs4<=0x9F) | |||
783 | ucs4=Ansi_80_to_9F_to_UCS4[ucs4-0x80]; | |||
784 | // - convert to UTF8 | |||
785 | UCS4toUTF8(ucs4,aVal); | |||
786 | break; | |||
787 | #ifdef CHINESE_SUPPORT | |||
788 | case chs_gb2312 : // simplified Chinese GB-2312 charset | |||
789 | // all below 0x80 are passed as-is | |||
790 | if (c<0x80) | |||
791 | aVal+=c; // simply append | |||
792 | else { | |||
793 | // 16-bit GB2312 char | |||
794 | if (aNumChars!=2) | |||
795 | return 2; // we need 2 chars for a successful GB-2312 | |||
796 | // we have 2 bytes, convert them | |||
797 | ucs4 = searchFlatBintree(gb2312_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_'); | |||
798 | // - convert to UTF8 | |||
799 | UCS4toUTF8(ucs4,aVal); | |||
800 | } | |||
801 | break; | |||
802 | case chs_cp936: // simplified chinese Windows codepage CP936 | |||
803 | if (c<0x80) | |||
804 | aVal+=c; // simply append | |||
805 | else { | |||
806 | // 0x0080 (euro sign) or 2-byte CP936 | |||
807 | if (c==0x80) | |||
808 | ucs4=searchFlatBintree(cp936_to_ucs2, 0x0080, INCONVERTIBLE_PLACEHOLDER'_'); | |||
809 | else { | |||
810 | // 16-bit GB2312 char | |||
811 | if (aNumChars!=2) | |||
812 | return 2; // we need 2 chars for a successful CP936 | |||
813 | // we have 2 bytes, convert them | |||
814 | ucs4 = searchFlatBintree(cp936_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_'); | |||
815 | } | |||
816 | // - convert to UTF8 | |||
817 | UCS4toUTF8(ucs4,aVal); | |||
818 | } | |||
819 | break; | |||
820 | #endif | |||
821 | case chs_ascii : // plain 7-bit ASCII | |||
822 | default : // unknown | |||
823 | // only 7-bit allowed | |||
824 | if (c & 0x80) | |||
825 | aVal+=INCONVERTIBLE_PLACEHOLDER'_'; | |||
826 | else | |||
827 | aVal+=c; | |||
828 | break; | |||
829 | } // switch | |||
830 | return 0; // ok, converted aNumChars | |||
831 | } // appendCharsAsUTF8 | |||
832 | ||||
833 | ||||
834 | ||||
835 | ||||
836 | // add string as UTF8 to value and apply charset translation if needed | |||
837 | // - if lineEndMode is not lem_none, all sorts of line ends will be converted | |||
838 | // to the specified mode. | |||
839 | void appendStringAsUTF8(const char *s, string &aVal, TCharSets aCharSet, TLineEndModes aLEM, bool aAllowFilemakerCR) | |||
840 | { | |||
841 | char c; | |||
842 | const char *start=s; | |||
843 | if (s) { | |||
844 | while ((c=*s++)!=0) { | |||
845 | if (aLEM!=lem_none) { | |||
846 | // line end handling enabled | |||
847 | if (c==0x0D) { | |||
848 | // could be mac (0x0D) or DOS (0x0D/0x0A) | |||
849 | if (*s==0x0A) { | |||
850 | // this is DOS-type line end | |||
851 | // - consume the 0x0A as well | |||
852 | s++; | |||
853 | // - check for 0x0D 0x0D 0x0A special case (caused by | |||
854 | // DOS-text-file conversion of non-DOS strings) | |||
855 | if (s>=start+3) { | |||
856 | if (*(s-3)==0x0D) { | |||
857 | // char before the DOS-CRLF was a 0x0D as well (and | |||
858 | // has already produced a newline in the output | |||
859 | // --> completely ignore this CRLF | |||
860 | continue; | |||
861 | } | |||
862 | } | |||
863 | } | |||
864 | // is a line end, convert it to platform-lineend | |||
865 | c='\n'; // platform | |||
866 | } | |||
867 | else if (c==0x0A) { | |||
868 | // 0x0A without preceeding 0x0D = unix | |||
869 | c='\n'; // platform | |||
870 | } | |||
871 | else if (c==0x0B && aAllowFilemakerCR) { | |||
872 | // 0x0B is used as lineend in filemaker export and achilformat | |||
873 | c='\n'; | |||
874 | } | |||
875 | // line end converted to platform | |||
876 | if (c=='\n' && aLEM!=lem_cstr) { | |||
877 | // produce specified line end | |||
878 | switch (aLEM) { | |||
879 | case lem_mac : c=0x0D; break; | |||
880 | case lem_unix : c=0x0A; break; | |||
881 | case lem_filemaker : c=0x0B; break; | |||
882 | case lem_dos : | |||
883 | c=0x0A; // LF will be added later | |||
884 | aVal+=0x0D; // add CR | |||
885 | break; | |||
886 | default: break; | |||
887 | } | |||
888 | } | |||
889 | } // line end handling enabled | |||
890 | // normal add | |||
891 | uInt16 i,seqlen=1; // assume logical char consists of single byte | |||
892 | do { | |||
893 | seqlen=appendCharsAsUTF8(s-seqlen,aVal,aCharSet,seqlen); // add char (possibly with UTF8 expansion) to aVal | |||
894 | if (seqlen<=1) break; // done | |||
895 | for (i=1;i<seqlen;i++) { if (*s==0) break; else s++; } | |||
896 | if (i<seqlen) break; // not enough bytes | |||
897 | } while(true); | |||
898 | } | |||
899 | } | |||
900 | } // appendStringAsUTF8 | |||
901 | ||||
902 | ||||
903 | ||||
904 | // same as appendUTF8ToString, but output string is cleared first | |||
905 | bool storeUTF8ToString( | |||
906 | cAppCharP aUTF8, string &aVal, | |||
907 | TCharSets aCharSet, | |||
908 | TLineEndModes aLEM, | |||
909 | TQuotingModes aQuotingMode, | |||
910 | size_t aMaxBytes | |||
911 | ) | |||
912 | { | |||
913 | aVal.erase(); | |||
914 | return appendUTF8ToString(aUTF8,aVal,aCharSet,aLEM,aQuotingMode,aMaxBytes); | |||
915 | } // storeUTF8ToString | |||
916 | ||||
917 | ||||
918 | ||||
919 | // helper for adding chars | |||
920 | static void appendCharToString( | |||
921 | char c, | |||
922 | string &aVal, | |||
923 | TQuotingModes aQuotingMode | |||
924 | ) { | |||
925 | if (aQuotingMode==qm_none) { | |||
926 | aVal+=c; | |||
927 | } | |||
928 | else if (aQuotingMode==qm_backslash) { | |||
929 | // treat CR, LF, BS, TAB, single/doublequote and backslash specially | |||
930 | if (c==0x0D) | |||
931 | aVal+="\\r"; | |||
932 | else if (c==0x0A) | |||
933 | aVal+="\\n"; | |||
934 | else if (c==0x08) | |||
935 | aVal+="\\b"; | |||
936 | else if (c==0x09) | |||
937 | aVal+="\\t"; | |||
938 | else if (c=='"') | |||
939 | aVal+="\\\""; | |||
940 | else if (c=='\'') | |||
941 | aVal+="\\'"; | |||
942 | else if (c=='\\') | |||
943 | aVal+="\\\\"; | |||
944 | else | |||
945 | aVal+=c; | |||
946 | } | |||
947 | else if (aQuotingMode==qm_duplsingle) { | |||
948 | if (c=='\'') aVal+=c; // duplicate | |||
949 | aVal+=c; // normal append | |||
950 | } | |||
951 | else if (aQuotingMode==qm_dupldouble) { | |||
952 | if (c=='"') aVal+=c; // duplicate | |||
953 | aVal+=c; // normal append | |||
954 | } | |||
955 | } // appendCharToString | |||
956 | ||||
957 | ||||
958 | // add UTF8 string to value in custom charset | |||
959 | // - if aLEM is not lem_none, occurrence of any type of Linefeeds | |||
960 | // (LF,CR,CRLF and even CRCRLF) in input string will be | |||
961 | // replaced by the specified line end type | |||
962 | // - aQuotingMode specifies what quoting (for ODBC literals for example) should be used | |||
963 | // - output is clipped after aMaxBytes bytes (if not 0) | |||
964 | // - returns true if all input could be converted, false if output is clipped | |||
965 | bool appendUTF8ToString( | |||
966 | cAppCharP aUTF8, | |||
967 | string &aVal, | |||
968 | TCharSets aCharSet, | |||
969 | TLineEndModes aLEM, | |||
970 | TQuotingModes aQuotingMode, | |||
971 | size_t aMaxBytes | |||
972 | ) | |||
973 | { | |||
974 | uInt32 ucs4; | |||
975 | uInt8 c; | |||
976 | size_t n=0; | |||
977 | cAppCharP p=aUTF8; | |||
978 | cAppCharP start=aUTF8; | |||
979 | ||||
980 | if (!aUTF8) return true; // nothing to copy, copied everything of that! | |||
981 | if (aCharSet==chs_utf8 && aLEM==lem_none && aQuotingMode==qm_none) { | |||
982 | // shortcut: simply append entire string | |||
983 | if (aMaxBytes==0) | |||
984 | aVal+=aUTF8; | |||
985 | else | |||
986 | aVal.append(aUTF8,aMaxBytes); | |||
987 | // advance "processed" pointer behind consumed part of string | |||
988 | p=aUTF8+aVal.size(); | |||
989 | } | |||
990 | else { | |||
991 | // process char by char | |||
992 | while((c=*aUTF8)!=0 && (aMaxBytes==0 || n<aMaxBytes)) { | |||
993 | p=aUTF8; | |||
994 | // check for linefeed conversion | |||
995 | if (aLEM!=lem_none && (c==0x0D || c==0x0A)) { | |||
996 | aUTF8++; | |||
997 | // line end, handling enabled | |||
998 | if (c==0x0D) { | |||
999 | // could be mac (0x0D) or DOS (0x0D/0x0A) | |||
1000 | if (*aUTF8==0x0A) { | |||
1001 | // this is DOS-type line end | |||
1002 | // - consume the 0x0A as well | |||
1003 | aUTF8++; | |||
1004 | // - check for 0x0D 0x0D 0x0A special case (caused by | |||
1005 | // DOS-text-file conversion of non-DOS strings) | |||
1006 | if (aUTF8>=start+3) { | |||
1007 | if (*(aUTF8-3)==0x0D) { | |||
1008 | // char before the DOS-CRLF was a 0x0D as well (and | |||
1009 | // has already produced a newline in the output | |||
1010 | // --> completely ignore this CRLF | |||
1011 | continue; | |||
1012 | } | |||
1013 | } | |||
1014 | } | |||
1015 | // is a line end, convert it to platform-lineend | |||
1016 | c='\n'; // platform | |||
1017 | } | |||
1018 | else { // must be 0x0A | |||
1019 | // 0x0A without preceeding 0x0D = unix | |||
1020 | c='\n'; // platform | |||
1021 | } | |||
1022 | // line end converted to platform | |||
1023 | if (aLEM!=lem_cstr) { | |||
1024 | // produce specified line end | |||
1025 | switch (aLEM) { | |||
1026 | case lem_mac : c=0x0D; break; | |||
1027 | case lem_filemaker : c=0x0B; break; | |||
1028 | case lem_unix : c=0x0A; break; | |||
1029 | case lem_dos : | |||
1030 | c=0x0A; // LF will be added later | |||
1031 | n++; // count it extra | |||
1032 | if (aMaxBytes && n>=aMaxBytes) | |||
1033 | goto stringfull; // no room to complete it, ignore it | |||
1034 | appendCharToString(0x0D,aVal,aQuotingMode); | |||
1035 | break; | |||
1036 | default: break; | |||
1037 | } | |||
1038 | } | |||
1039 | appendCharToString(c,aVal,aQuotingMode); | |||
1040 | n++; // count it | |||
1041 | } // line end, handling enabled | |||
1042 | else { | |||
1043 | // non lineend (or lineend not handled specially) | |||
1044 | if (aCharSet==chs_utf8) { | |||
1045 | aUTF8++; | |||
1046 | // - simply add char | |||
1047 | appendCharToString(c,aVal,aQuotingMode); | |||
1048 | n++; | |||
1049 | } | |||
1050 | else { | |||
1051 | // - make UCS4 | |||
1052 | p=aUTF8; // save previous position to detect if we have processed all | |||
1053 | aUTF8=UTF8toUCS4(aUTF8,ucs4); | |||
1054 | // now we have UCS4 | |||
1055 | if (ucs4==0) { | |||
1056 | // UTF8 resulting in UCS4 null char is not allowed | |||
1057 | ucs4=INCONVERTIBLE_PLACEHOLDER'_'; | |||
1058 | } | |||
1059 | else { | |||
1060 | // convert to specified charset | |||
1061 | switch (aCharSet) { | |||
1062 | case chs_ansi: | |||
1063 | case chs_iso_8859_1: | |||
1064 | if ((ucs4<=0xFF && ucs4>=0xA0) || ucs4<0x80) | |||
1065 | // 00..7F and A0..FF directly map to ANSI | |||
1066 | appendCharToString(ucs4,aVal,aQuotingMode); | |||
1067 | else { | |||
1068 | // search for matching ANSI in table | |||
1069 | uInt8 k; | |||
1070 | for (k=0; k<0x20; k++) { | |||
1071 | if (ucs4==Ansi_80_to_9F_to_UCS4[k]) { | |||
1072 | // found in table | |||
1073 | break; | |||
1074 | } | |||
1075 | } | |||
1076 | if (k<0x20) | |||
1077 | // conversion found | |||
1078 | aVal+=k+0x80; | |||
1079 | else | |||
1080 | // no conversion found in table | |||
1081 | aVal+=INCONVERTIBLE_PLACEHOLDER'_'; | |||
1082 | } // not in 1:1 range 0..7F, A0..FF | |||
1083 | n++; | |||
1084 | break; | |||
1085 | #ifdef CHINESE_SUPPORT | |||
1086 | case chs_gb2312 : // simplified Chinese GB-2312 charset | |||
1087 | // all below 0x80 are passed as-is | |||
1088 | if (ucs4<0x80) { | |||
1089 | appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes | |||
1090 | n++; | |||
1091 | } | |||
1092 | else { | |||
1093 | // convert to 16-bit GB2312 char | |||
1094 | uInt16 gb = searchFlatBintree(ucs2_to_gb2312, ucs4, INCONVERTIBLE_PLACEHOLDER'_'); | |||
1095 | // check if we have space | |||
1096 | if (aMaxBytes!=0 && n+2>aMaxBytes) | |||
1097 | goto stringfull; | |||
1098 | // append as two bytes to output string | |||
1099 | aVal+=gb >> 8; | |||
1100 | aVal+=gb & 0xFF; | |||
1101 | n+=2; | |||
1102 | } | |||
1103 | break; | |||
1104 | case chs_cp936 : // simplified Chinese CP936 windows codepage | |||
1105 | // all below 0x80 are passed as-is | |||
1106 | if (ucs4<0x80) { | |||
1107 | appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes | |||
1108 | n++; | |||
1109 | } | |||
1110 | else { | |||
1111 | // convert to CP936 16-bit representation | |||
1112 | uInt16 twobytes = searchFlatBintree(ucs2_to_cp936, ucs4, INCONVERTIBLE_PLACEHOLDER'_'); | |||
1113 | // append as two bytes to output string, but only this is a CP936 two-byte at all | |||
1114 | if (twobytes>0x0080) { | |||
1115 | // check if we have space | |||
1116 | if (aMaxBytes!=0 && n+2>aMaxBytes) | |||
1117 | goto stringfull; | |||
1118 | aVal+=twobytes >> 8; // sub-page lead in | |||
1119 | n++; | |||
1120 | } | |||
1121 | aVal+=twobytes & 0xFF; // sub-page code | |||
1122 | n++; | |||
1123 | } | |||
1124 | break; | |||
1125 | #endif | |||
1126 | case chs_ascii: | |||
1127 | // explicit ASCII: convert some special chars to plain ASCII | |||
1128 | if ((ucs4 & 0xFFFFFF80) !=0) { | |||
1129 | // ASCIIfy table to convert umlauts etc. to nearest plain ASCII | |||
1130 | typedef struct { | |||
1131 | uInt32 ucs4; | |||
1132 | uInt8 ascii; | |||
1133 | } TASCIIfyEntry; | |||
1134 | ||||
1135 | static const TASCIIfyEntry ASCIIfyTable[] = { | |||
1136 | { 0x000000C4, 'A' }, // Adieresis | |||
1137 | { 0x000000C5, 'A' }, // Aring | |||
1138 | { 0x000000C7, 'C' }, // Ccedilla | |||
1139 | { 0x000000C9, 'E' }, // Eacute | |||
1140 | { 0x000000D1, 'N' }, // Ntilde | |||
1141 | { 0x000000D6, 'O' }, // Odieresis | |||
1142 | { 0x000000DC, 'U' }, // Udieresis | |||
1143 | { 0x000000E1, 'a' }, // aacute | |||
1144 | { 0x000000E0, 'a' }, // agrave | |||
1145 | { 0x000000E2, 'a' }, // acircumflex | |||
1146 | { 0x000000E4, 'a' }, // adieresis | |||
1147 | { 0x000000E3, 'a' }, // atilde | |||
1148 | { 0x000000E5, 'a' }, // aring | |||
1149 | { 0x000000E7, 'c' }, // ccedilla | |||
1150 | { 0x000000E9, 'e' }, // eacute | |||
1151 | { 0x000000E8, 'e' }, // egrave | |||
1152 | { 0x000000EA, 'e' }, // ecircumflex | |||
1153 | { 0x000000EB, 'e' }, // edieresis | |||
1154 | { 0x000000ED, 'i' }, // iacute | |||
1155 | { 0x000000EC, 'i' }, // igrave | |||
1156 | { 0x000000EE, 'i' }, // icircumflex | |||
1157 | { 0x000000EF, 'i' }, // idieresis | |||
1158 | { 0x000000F1, 'n' }, // ntilde | |||
1159 | { 0x000000F3, 'o' }, // oacute | |||
1160 | { 0x000000F2, 'o' }, // ograve | |||
1161 | { 0x000000F4, 'o' }, // ocircumflex | |||
1162 | { 0x000000F6, 'o' }, // odieresis | |||
1163 | { 0x000000F5, 'o' }, // otilde | |||
1164 | { 0x000000FA, 'u' }, // uacute | |||
1165 | { 0x000000F9, 'u' }, // ugrave | |||
1166 | { 0x000000FB, 'u' }, // ucircumflex | |||
1167 | { 0x000000FC, 'u' }, // udieresis | |||
1168 | { 0x000000DF, 's' }, // germandoubles | |||
1169 | { 0x000000D8, 'O' }, // Oslash | |||
1170 | { 0x000000F8, 'o' }, // oslash | |||
1171 | { 0x000000C0, 'A' }, // Agrave | |||
1172 | { 0x000000C3, 'A' }, // Atilde | |||
1173 | { 0x000000D5, 'O' }, // Otilde | |||
1174 | { 0x00000152, 'O' }, // OE | |||
1175 | { 0x00000153, 'o' }, // oe | |||
1176 | { 0x000000C6, 'A' }, // AE | |||
1177 | { 0x000000E6, 'a' }, // ae | |||
1178 | { 0x000000C2, 'A' }, // Acircumflex | |||
1179 | { 0x000000CA, 'E' }, // Ecircumflex | |||
1180 | { 0x000000C1, 'A' }, // Aacute | |||
1181 | { 0x000000CB, 'E' }, // Edieresis | |||
1182 | { 0x000000C8, 'E' }, // Egrave | |||
1183 | { 0x000000CD, 'I' }, // Iacute | |||
1184 | { 0x000000CC, 'I' }, // Igrave | |||
1185 | { 0x000000CE, 'i' }, // Icircumflex | |||
1186 | { 0x000000CF, 'i' }, // Odieresis | |||
1187 | { 0x000000D3, 'O' }, // Oacute | |||
1188 | { 0x000000D2, 'O' }, // Ograve | |||
1189 | { 0x000000D4, 'O' }, // Ocircumflex | |||
1190 | // terminator | |||
1191 | { 0,0 } | |||
1192 | }; | |||
1193 | ||||
1194 | // search in ASCIIfy table | |||
1195 | uInt16 k=0; | |||
1196 | while (ASCIIfyTable[k].ucs4!=0) { | |||
1197 | if (ucs4==ASCIIfyTable[k].ucs4) { | |||
1198 | // found, fetch ASCII-equivalent | |||
1199 | ucs4=ASCIIfyTable[k].ascii; | |||
1200 | break; // use it | |||
1201 | } | |||
1202 | k++; | |||
1203 | } | |||
1204 | } | |||
1205 | // fall through to default, which does not know ANY non-ASCII | |||
1206 | default: | |||
1207 | // only 7 bit ASCII is allowed | |||
1208 | if ((ucs4 & 0xFFFFFF80) !=0) | |||
1209 | aVal+=INCONVERTIBLE_PLACEHOLDER'_'; | |||
1210 | else | |||
1211 | appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes | |||
1212 | n++; | |||
1213 | break; | |||
1214 | } // switch | |||
1215 | } // valid UCS4 | |||
1216 | } // not already UTF8 | |||
1217 | } // if not lineend | |||
1218 | // processed until here | |||
1219 | p=aUTF8; | |||
1220 | } // while not end of input string | |||
1221 | } // not already UTF8 | |||
1222 | // return true if input string completely consumed | |||
1223 | stringfull: | |||
1224 | return (*p==0); | |||
1225 | } // appendUTF8ToString | |||
1226 | ||||
1227 | ||||
1228 | // convert UTF8 to UCS4 | |||
1229 | // - returns pointer to next char | |||
1230 | // - returns UCS4=0 on error (no char, bad sequence, sequence not complete) | |||
1231 | const char *UTF8toUCS4(const char *aUTF8, uInt32 &aUCS4) | |||
1232 | { | |||
1233 | uInt8 c; | |||
1234 | sInt16 morechars; | |||
1235 | ||||
1236 | if ((c=*aUTF8)!=0) { | |||
1237 | aUTF8++; | |||
1238 | // there is a char | |||
1239 | morechars=0; | |||
1240 | // decode UTF8 lead-in | |||
1241 | if ((c & 0x80) == 0) { | |||
1242 | // single byte | |||
1243 | aUCS4=c; | |||
1244 | morechars=0; | |||
1245 | } | |||
1246 | else if ((c & 0xE0) == 0xC0) { | |||
1247 | // two bytes | |||
1248 | aUCS4=c & 0x1F; | |||
1249 | morechars=1; | |||
1250 | } | |||
1251 | else if ((c & 0xF0) == 0xE0) { | |||
1252 | aUCS4=c & 0x0F; | |||
1253 | morechars=2; | |||
1254 | } | |||
1255 | else if ((c & 0xF8) == 0xF0) { | |||
1256 | aUCS4=c & 0x07; | |||
1257 | morechars=3; | |||
1258 | } | |||
1259 | else if ((c & 0xFC) == 0xF8) { | |||
1260 | aUCS4=c & 0x03; | |||
1261 | morechars=4; | |||
1262 | } | |||
1263 | else if ((c & 0xFE) == 0xFC) { | |||
1264 | aUCS4=c & 0x01; | |||
1265 | morechars=5; | |||
1266 | } | |||
1267 | else { | |||
1268 | // bad char | |||
1269 | aUCS4=0; | |||
1270 | } | |||
1271 | // process additional chars | |||
1272 | while(morechars--) { | |||
1273 | if ((c=*aUTF8)==0) { | |||
1274 | // unfinished sequence | |||
1275 | aUCS4=0; | |||
1276 | break; | |||
1277 | } | |||
1278 | aUTF8++; | |||
1279 | if ((c & 0xC0) != 0x80) { | |||
1280 | // bad additional char | |||
1281 | aUCS4=0; | |||
1282 | break; | |||
1283 | } | |||
1284 | // each additional char adds 6 new bits | |||
1285 | aUCS4 = aUCS4 << 6; // shift existing bits | |||
1286 | aUCS4 |= (c & 0x3F); // add new bits | |||
1287 | } | |||
1288 | } | |||
1289 | else { | |||
1290 | // no char | |||
1291 | aUCS4=0; | |||
1292 | } | |||
1293 | // return pointer to next char | |||
1294 | return aUTF8; | |||
1295 | } // UTF8toUCS4 | |||
1296 | ||||
1297 | ||||
1298 | // convert UCS4 to UTF8 (0 char is not allowed and will be ignored!) | |||
1299 | void UCS4toUTF8(uInt32 aUCS4, string &aUTF8) | |||
1300 | { | |||
1301 | uInt8 c; | |||
1302 | ||||
1303 | // ignore null char | |||
1304 | if (aUCS4==0) return; | |||
1305 | // create UTF8 lead-in | |||
1306 | sInt16 morechars=0; | |||
1307 | if (aUCS4<0x00000080) { | |||
1308 | // one byte | |||
1309 | c=aUCS4; | |||
1310 | } | |||
1311 | else if (aUCS4<0x00000800) { | |||
1312 | // two bytes | |||
1313 | c=0xC0 | ((aUCS4 >> 6) & 0x1F); | |||
1314 | morechars=1; | |||
1315 | } | |||
1316 | else if (aUCS4<0x00010000) { | |||
1317 | // three bytes | |||
1318 | c=0xE0 | ((aUCS4 >> 12) & 0x0F); | |||
1319 | morechars=2; | |||
1320 | } | |||
1321 | else if (aUCS4<0x00200000) { | |||
1322 | // four bytes | |||
1323 | c=0xF0 | ((aUCS4 >> 18) & 0x07); | |||
1324 | morechars=3; | |||
1325 | } | |||
1326 | else if (aUCS4<0x04000000) { | |||
1327 | // five bytes | |||
1328 | c=0xF8 | ((aUCS4 >> 24) & 0x03); | |||
1329 | morechars=4; | |||
1330 | } | |||
1331 | else { | |||
1332 | // six bytes | |||
1333 | c=0xFC | ((aUCS4 >> 30) & 0x01); | |||
1334 | morechars=5; | |||
1335 | } | |||
1336 | // add lead-in | |||
1337 | aUTF8+=c; | |||
1338 | // add rest of sequence | |||
1339 | while (morechars--) { | |||
1340 | c= 0x80 | ((aUCS4 >> (morechars * 6)) & 0x3F); | |||
1341 | aUTF8+=c; | |||
1342 | } | |||
1343 | } // UCS4toUTF8 | |||
1344 | ||||
1345 | ||||
1346 | /* Encoding UTF-16 (excerpt from RFC 2781, paragraph 2.1) | |||
1347 | ||||
1348 | Encoding of a single character from an ISO 10646 character value to | |||
1349 | UTF-16 proceeds as follows. Let U be the character number, no greater | |||
1350 | than 0x10FFFF. | |||
1351 | ||||
1352 | 1) If U < 0x10000, encode U as a 16-bit unsigned integer and | |||
1353 | terminate. | |||
1354 | ||||
1355 | 2) Let U' = U - 0x10000. Because U is less than or equal to 0x10FFFF, | |||
1356 | U' must be less than or equal to 0xFFFFF. That is, U' can be | |||
1357 | represented in 20 bits. | |||
1358 | ||||
1359 | 3) Initialize two 16-bit unsigned integers, W1 and W2, to 0xD800 and | |||
1360 | 0xDC00, respectively. These integers each have 10 bits free to | |||
1361 | encode the character value, for a total of 20 bits. | |||
1362 | ||||
1363 | 4) Assign the 10 high-order bits of the 20-bit U' to the 10 low-order | |||
1364 | bits of W1 and the 10 low-order bits of U' to the 10 low-order | |||
1365 | bits of W2. Terminate. | |||
1366 | ||||
1367 | Graphically, steps 2 through 4 look like: | |||
1368 | U' = yyyyyyyyyyxxxxxxxxxx | |||
1369 | W1 = 110110yyyyyyyyyy | |||
1370 | W2 = 110111xxxxxxxxxx | |||
1371 | */ | |||
1372 | ||||
1373 | // convert UCS4 to UTF-16 | |||
1374 | // - returns 0 for UNICODE range UCS4 and first word of UTF-16 for non UNICODE | |||
1375 | uInt16 UCS4toUTF16(uInt32 aUCS4, uInt16 &aUTF16) | |||
1376 | { | |||
1377 | if (aUCS4<0x10000) { | |||
1378 | // in unicode range: single UNICODE char | |||
1379 | aUTF16=aUCS4; | |||
1380 | return 0; // no second char | |||
1381 | } | |||
1382 | else { | |||
1383 | // out of UNICODE range | |||
1384 | aUCS4-=0x10000; | |||
1385 | if (aUCS4>0xFFFF) { | |||
1386 | // inconvertible | |||
1387 | aUTF16=INCONVERTIBLE_PLACEHOLDER'_'; | |||
1388 | return 0; | |||
1389 | } | |||
1390 | else { | |||
1391 | // convert to two-word UNICODE / UCS-2 | |||
1392 | aUTF16=0xD800+(aUCS4>>10); | |||
1393 | return 0xDC00+(aUCS4 & 0x03FF); | |||
1394 | } | |||
1395 | } | |||
1396 | } // UCS4toUTF16 | |||
1397 | ||||
1398 | ||||
1399 | ||||
1400 | /* Decoding UTF-16 | |||
1401 | ||||
1402 | Decoding of a single character from UTF-16 to an ISO 10646 character | |||
1403 | value proceeds as follows. Let W1 be the next 16-bit integer in the | |||
1404 | sequence of integers representing the text. Let W2 be the (eventual) | |||
1405 | next integer following W1. | |||
1406 | ||||
1407 | 1) If W1 < 0xD800 or W1 > 0xDFFF, the character value U is the value | |||
1408 | of W1. Terminate. | |||
1409 | ||||
1410 | 2) Determine if W1 is between 0xD800 and 0xDBFF. If not, the sequence | |||
1411 | is in error and no valid character can be obtained using W1. | |||
1412 | Terminate. | |||
1413 | ||||
1414 | 3) If there is no W2 (that is, the sequence ends with W1), or if W2 | |||
1415 | is not between 0xDC00 and 0xDFFF, the sequence is in error. | |||
1416 | Terminate. | |||
1417 | ||||
1418 | 4) Construct a 20-bit unsigned integer U', taking the 10 low-order | |||
1419 | bits of W1 as its 10 high-order bits and the 10 low-order bits of | |||
1420 | W2 as its 10 low-order bits. | |||
1421 | ||||
1422 | 5) Add 0x10000 to U' to obtain the character value U. Terminate. | |||
1423 | ||||
1424 | Note that steps 2 and 3 indicate errors. Error recovery is not | |||
1425 | specified by this document. When terminating with an error in steps 2 | |||
1426 | and 3, it may be wise to set U to the value of W1 to help the caller | |||
1427 | diagnose the error and not lose information. Also note that a string | |||
1428 | decoding algorithm, as opposed to the single-character decoding | |||
1429 | described above, need not terminate upon detection of an error, if | |||
1430 | proper error reporting and/or recovery is provided. | |||
1431 | ||||
1432 | */ | |||
1433 | ||||
1434 | // convert UTF-16 to UCS4 | |||
1435 | // - returns pointer to next char | |||
1436 | // - returns UCS4=0 on error (no char, bad sequence, sequence not complete) | |||
1437 | const uInt16 *UTF16toUCS4(const uInt16 *aUTF16P, uInt32 &aUCS4) | |||
1438 | { | |||
1439 | uInt16 utf16=*aUTF16P++; | |||
1440 | ||||
1441 | if (utf16<0xD800 || utf16>0xDFFF) { | |||
1442 | // single char unicode | |||
1443 | aUCS4=utf16; | |||
1444 | } | |||
1445 | else { | |||
1446 | // could be two-char | |||
1447 | if (utf16<=0xDBFF) { | |||
1448 | // valid first char: check second char | |||
1449 | uInt16 utf16_2 = *aUTF16P; // next | |||
| ||||
1450 | if (utf16_2 && utf16_2>=0xDC00 && utf16_2<=0xDFFF) { | |||
1451 | // second char exists and is valid | |||
1452 | aUTF16P++; // advance now | |||
1453 | aUCS4 = | |||
1454 | ((utf16 & 0x3FF) << 10) + | |||
1455 | (utf16_2 & 0x3FF); | |||
1456 | } | |||
1457 | else | |||
1458 | aUCS4=0; // no char | |||
1459 | } | |||
1460 | else { | |||
1461 | aUCS4=0; // no char | |||
1462 | } | |||
1463 | } | |||
1464 | // return advanced pointer | |||
1465 | return aUTF16P; | |||
1466 | } // UCS4toUTF16 | |||
1467 | ||||
1468 | ||||
1469 | ||||
1470 | ||||
1471 | ||||
1472 | ||||
1473 | // add UTF8 string as UTF-16 byte stream to 8-bit string | |||
1474 | // - if aLEM is not lem_none, occurrence of any type of Linefeeds | |||
1475 | // (LF,CR,CRLF and even CRCRLF) in input string will be | |||
1476 | // replaced by the specified line end type | |||
1477 | // - output is clipped after ByteString reaches aMaxBytes size (if not 0), = approx half as many Unicode chars | |||
1478 | // - returns true if all input could be converted, false if output is clipped | |||
1479 | bool appendUTF8ToUTF16ByteString( | |||
1480 | cAppCharP aUTF8, | |||
1481 | string &aUTF16ByteString, | |||
1482 | bool aBigEndian, | |||
1483 | TLineEndModes aLEM, | |||
1484 | uInt32 aMaxBytes | |||
1485 | ) | |||
1486 | { | |||
1487 | uInt32 ucs4; | |||
1488 | uInt16 utf16=0,utf16_1; | |||
1489 | cAppCharP p; | |||
1490 | ||||
1491 | while (aUTF8 && *aUTF8) { | |||
1492 | // convert next UTF8 char to UCS4 | |||
1493 | p=UTF8toUCS4(aUTF8, ucs4); | |||
1494 | if (ucs4==0) break; // error in UTF8 encoding, exit | |||
1495 | // convert line ends | |||
1496 | if (ucs4 == '\n' && aLEM!=lem_none && aLEM!=lem_cstr) { | |||
1497 | // produce specified line end | |||
1498 | utf16_1=0; | |||
1499 | switch (aLEM) { | |||
1500 | case lem_mac : utf16=0x0D; break; | |||
1501 | case lem_filemaker : utf16=0x0B; break; | |||
1502 | case lem_unix : utf16=0x0A; break; | |||
1503 | case lem_dos : | |||
1504 | utf16_1=0x0D; // CR.. | |||
1505 | utf16=0x0A; // ..then LF | |||
1506 | break; | |||
1507 | default: break; | |||
1508 | } | |||
1509 | } | |||
1510 | else { | |||
1511 | // ordinary char, use UTF16 encoding | |||
1512 | utf16_1 = UCS4toUTF16(ucs4,utf16); | |||
1513 | } | |||
1514 | // check if appending UTF16 would exceed max size specified | |||
1515 | if (aMaxBytes!=0 && aUTF16ByteString.size() + (utf16_1 ? 4 : 2) > aMaxBytes) | |||
1516 | break; | |||
1517 | // we can append, advance input pointer | |||
1518 | aUTF8 = p; | |||
1519 | // now append | |||
1520 | if (aBigEndian) { | |||
1521 | // Big end first, Motorola order | |||
1522 | if (utf16_1) { | |||
1523 | aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF); | |||
1524 | aUTF16ByteString += (char)(utf16_1 & 0xFF); | |||
1525 | } | |||
1526 | aUTF16ByteString += (char)((utf16 >> 8) & 0xFF); | |||
1527 | aUTF16ByteString += (char)(utf16 & 0xFF); | |||
1528 | } | |||
1529 | else { | |||
1530 | // Little end first, Intel order | |||
1531 | if (utf16_1) { | |||
1532 | aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF); | |||
1533 | aUTF16ByteString += (char)(utf16_1 & 0xFF); | |||
1534 | } | |||
1535 | aUTF16ByteString += (char)(utf16 & 0xFF); | |||
1536 | aUTF16ByteString += (char)((utf16 >> 8) & 0xFF); | |||
1537 | } | |||
1538 | } // while | |||
1539 | // true if all input consumed | |||
1540 | return (aUTF8==NULL__null) || (*aUTF8==0); | |||
1541 | } // appendUTF8ToUTF16ByteString | |||
1542 | ||||
1543 | ||||
1544 | // add UTF16 byte string as UTF8 to value | |||
1545 | void appendUTF16AsUTF8( | |||
1546 | const uInt16 *aUTF16, | |||
1547 | uInt32 aNumUTF16Chars, | |||
1548 | bool aBigEndian, | |||
1549 | string &aVal, | |||
1550 | bool aConvertLineEnds, | |||
1551 | bool aAllowFilemakerCR | |||
1552 | ) | |||
1553 | { | |||
1554 | uInt32 ucs4; | |||
1555 | uInt16 utf16pair[2]; | |||
1556 | cAppCharP inP = (cAppCharP)aUTF16; | |||
1557 | bool lastWasCR=false; | |||
1558 | ||||
1559 | while (inP && !(*inP==0 && *(inP+1)==0) && aNumUTF16Chars>0) { | |||
| ||||
1560 | // get two words (in case of surrogate pair) | |||
1561 | if (aBigEndian) { | |||
1562 | // Motorola order | |||
1563 | utf16pair[0]=((*(inP) & 0xFF)<<8) + (*(inP+1) & 0xFF); | |||
1564 | if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+2) & 0xFF)<<8) + (*(inP+3) & 0xFF); | |||
1565 | } | |||
1566 | else { | |||
1567 | // Intel order | |||
1568 | utf16pair[0]=((*(inP+1) & 0xFF)<<8) + (*(inP) & 0xFF); | |||
1569 | if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+3) & 0xFF)<<8) + (*(inP+2) & 0xFF); | |||
1570 | } | |||
1571 | cAppCharP hP = (cAppCharP)UTF16toUCS4(utf16pair, ucs4); | |||
1572 | /* | |||
1573 | PDEBUGPRINTFX(DBG_PARSE+DBG_EXOTIC,( | |||
1574 | "Parsed %ld bytes: *(inP)=0x%02hX, *(inP+1)=0x%02hX, *(inP+2)=0x%02hX, *(inP+3)=0x%02hX, utf16pair[0]=0x%04hX, utf16pair[1]=0x%04hX, ucs4=0x%04lX", | |||
1575 | (uInt32)(hP-(cAppCharP)utf16pair), | |||
1576 | (uInt16)*(inP), (uInt16)*(inP+1), (uInt16)*(inP+2), (uInt16)*(inP+3), | |||
1577 | (uInt16)utf16pair[0], (uInt16)utf16pair[1], | |||
1578 | (uInt32)ucs4 | |||
1579 | )); | |||
1580 | */ | |||
1581 | uInt32 bytes=hP-(cAppCharP)utf16pair; | |||
1582 | inP+=bytes; // next UTF16 to check | |||
1583 | aNumUTF16Chars-=bytes/2; // count down UTF16 chars | |||
1584 | // convert line ends if selected | |||
1585 | if (aConvertLineEnds) { | |||
1586 | if (ucs4 == 0x0D) { | |||
1587 | lastWasCR=true; | |||
1588 | continue; | |||
1589 | } | |||
1590 | else { | |||
1591 | if (ucs4 == 0x0A || (aAllowFilemakerCR && ucs4 == 0x0B)) | |||
1592 | ucs4 = '\n'; // convert to LineEnd | |||
1593 | else if (lastWasCR) | |||
1594 | aVal += '\n'; // insert a LineEnd | |||
1595 | lastWasCR=false; | |||
1596 | } | |||
1597 | } | |||
1598 | // append to UTF-8 string | |||
1599 | UCS4toUTF8(ucs4, aVal); | |||
1600 | } | |||
1601 | if (lastWasCR) | |||
1602 | aVal += '\n'; // input string ended on CR, must be shown in output | |||
1603 | } // appendUTF16AsUTF8 | |||
1604 | ||||
1605 | ||||
1606 | ||||
1607 | ||||
1608 | ||||
1609 | ||||
1610 | #ifdef BINTREE_GENERATOR | |||
1611 | ||||
1612 | // add a key/value pair to the binary tree | |||
1613 | void addToBinTree(TBinTreeNode *&aBinTree, treeval_t aMinKey, treeval_t aMaxKey, treeval_t aKey, treeval_t aValue) | |||
1614 | { | |||
1615 | // start at root | |||
1616 | TBinTreeNode **nextPP = &aBinTree; | |||
1617 | treeval_t cmpval; | |||
1618 | do { | |||
1619 | // create the new decision value from max and min | |||
1620 | cmpval = aMinKey+((aMaxKey-aMinKey) >> 1); | |||
1621 | // create the node if not already there | |||
1622 | if (*nextPP==NULL__null) { | |||
1623 | *nextPP = new TBinTreeNode; | |||
1624 | (*nextPP)->key = cmpval; | |||
1625 | (*nextPP)->nextHigher=NULL__null; | |||
1626 | (*nextPP)->nextLowerOrEqual=NULL__null; | |||
1627 | (*nextPP)->value=0; | |||
1628 | } | |||
1629 | // check if the node CREATED is a leaf node | |||
1630 | // this is the case if max==min | |||
1631 | if (aMaxKey==aMinKey) { | |||
1632 | // save leaf value (possibly overwriting existing leaf value for same code) | |||
1633 | (*nextPP)->value=aValue; | |||
1634 | break; | |||
1635 | } | |||
1636 | // decide which way to go | |||
1637 | if (aKey>cmpval) { | |||
1638 | // go to the "higher" side | |||
1639 | nextPP = &((*nextPP)->nextHigher); | |||
1640 | // determine new minimum | |||
1641 | aMinKey = cmpval+1; // minimum must be higher than cmpval | |||
1642 | } | |||
1643 | else { | |||
1644 | // go to the "lower or equal" side | |||
1645 | nextPP = &((*nextPP)->nextLowerOrEqual); | |||
1646 | // determine new maximum | |||
1647 | aMaxKey = cmpval; // maximum must be lower or equal than cmpval | |||
1648 | } | |||
1649 | } while(true); | |||
1650 | } // addToBinTree | |||
1651 | ||||
1652 | ||||
1653 | // dispose a bintree | |||
1654 | void disposeBinTree(TBinTreeNode *&aBinTree) | |||
1655 | { | |||
1656 | if (!aBinTree) return; | |||
1657 | if (aBinTree->nextHigher) | |||
1658 | disposeBinTree(aBinTree->nextHigher); | |||
1659 | if (aBinTree->nextLowerOrEqual) | |||
1660 | disposeBinTree(aBinTree->nextLowerOrEqual); | |||
1661 | delete aBinTree; | |||
1662 | aBinTree=NULL__null; | |||
1663 | } // disposeBinTree | |||
1664 | ||||
1665 | ||||
1666 | // convert key to value using a flat bintree | |||
1667 | treeval_t searchBintree(TBinTreeNode *aBinTree, treeval_t aKey, treeval_t aUndefValue, treeval_t aMinKey, treeval_t aMaxKey) | |||
1668 | { | |||
1669 | treeval_t cmpval; | |||
1670 | while(aBinTree) { | |||
1671 | // create the new decision value from max and min | |||
1672 | cmpval = aMinKey+((aMaxKey-aMinKey) >> 1); | |||
1673 | // must match stored cmpval | |||
1674 | if (cmpval!=aBinTree->key) | |||
1675 | return aUndefValue; | |||
1676 | // check if next node must be leaf if the tree contains our key, | |||
1677 | // this is the case if max==min | |||
1678 | if (aMaxKey==aMinKey) { | |||
1679 | if (aBinTree->nextHigher!=NULL__null || aBinTree->nextLowerOrEqual!=NULL__null) { | |||
1680 | // no leaf value here, should not be the case ever (we should have | |||
1681 | // encountered a node with no left or right link before this!) | |||
1682 | return aUndefValue; | |||
1683 | } | |||
1684 | else { | |||
1685 | // found a leaf value here | |||
1686 | return aBinTree->value; | |||
1687 | } | |||
1688 | } | |||
1689 | // decide which way to go | |||
1690 | if (aKey>cmpval) { | |||
1691 | // go to the "higher" side = just next element in array, except if we have the special marker here | |||
1692 | if (aBinTree->nextHigher == NULL__null) | |||
1693 | return aUndefValue; // we should go higher-side, but can't -> unknown key | |||
1694 | aBinTree=aBinTree->nextHigher; | |||
1695 | // determine new minimum | |||
1696 | aMinKey = cmpval+1; // minimum must be higher than cmpval | |||
1697 | } | |||
1698 | else { | |||
1699 | // go to the "lower" side = element at index indicated by current element, except if we have the special marker here | |||
1700 | if (aBinTree->nextLowerOrEqual == NULL__null) | |||
1701 | return aUndefValue; // we should go lower-or-equal-side, but can't -> unknown key | |||
1702 | aBinTree=aBinTree->nextLowerOrEqual; | |||
1703 | // determine new maximum | |||
1704 | aMaxKey = cmpval; // maximum must be lower or equal than cmpval | |||
1705 | } | |||
1706 | } | |||
1707 | // if we reach the end of the array, key is not in the tree | |||
1708 | return aUndefValue; | |||
1709 | } // searchBintree | |||
1710 | ||||
1711 | ||||
1712 | ||||
1713 | ||||
1714 | // make a flat form representation of the bintree in a one-dimensional array | |||
1715 | // - higher-side links are implicit (nodes following each other), | |||
1716 | // lower-or-equal-side links are explicit | |||
1717 | static bool flatBinTreeRecursion( | |||
1718 | TBinTreeNode *aBinTree, size_t &aIndex, treeval_t *aFlatArray, size_t aArrSize, treeval_t aLinksStart, treeval_t aLinksEnd | |||
1719 | ) | |||
1720 | { | |||
1721 | // check if array is full | |||
1722 | if (aIndex>=aArrSize) | |||
1723 | return false; | |||
1724 | // examine node to flatten | |||
1725 | if (aBinTree->nextHigher==NULL__null && aBinTree->nextLowerOrEqual==NULL__null) { | |||
1726 | // this is a leaf node, containing only the value | |||
1727 | if (aBinTree->value>=aLinksStart && aBinTree->value<=aLinksEnd) | |||
1728 | return false; // link space and value space overlap | |||
1729 | aFlatArray[aIndex]=aBinTree->value; | |||
1730 | aIndex++; | |||
1731 | } | |||
1732 | else if (aBinTree->nextHigher==NULL__null) { | |||
1733 | // lower-side-only node: set special mark to specify that lower-or-equal side | |||
1734 | // implicitly follows (instead of higher-side) | |||
1735 | aFlatArray[aIndex]=aLinksStart + 1; // no node points to the immediately following node explicitly, so 1 can be used as special marker | |||
1736 | aIndex++; | |||
1737 | // - recurse to generate it | |||
1738 | if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd)) | |||
1739 | return false; | |||
1740 | } | |||
1741 | else { | |||
1742 | // this is a branch | |||
1743 | // - lower-or-equal side is represented as an index in the array | |||
1744 | aFlatArray[aIndex]=aLinksStart + 0; // default to not-existing (no node points to itself, so 0 can be used as NIL index value) | |||
1745 | // - higher side branch follows immediately | |||
1746 | size_t linkindex = aIndex++; | |||
1747 | // - recurse to generate it | |||
1748 | if (!flatBinTreeRecursion(aBinTree->nextHigher,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd)) | |||
1749 | return false; | |||
1750 | // - now we have the index where we must insert the lower-or-equal side | |||
1751 | if (aBinTree->nextLowerOrEqual!=NULL__null) { | |||
1752 | // there is a lower-or-equal side | |||
1753 | // - place relative link from original node | |||
1754 | uInt32 rellink=aIndex-linkindex; | |||
1755 | if ((uInt32)aLinksStart+rellink>(uInt32)aLinksEnd-1L) { | |||
1756 | // we need a long link | |||
1757 | // - move generated higher side branch one up | |||
1758 | for (size_t k=aIndex-1; k>linkindex; k--) aFlatArray[k+1]=aFlatArray[k]; | |||
1759 | aIndex++; // we've eaten up one extra entry now | |||
1760 | // - now set long link | |||
1761 | aFlatArray[linkindex]=aLinksEnd-1; // long link marker | |||
1762 | if (rellink>0xFFFF) | |||
1763 | return false; // cannot jump more than 64k | |||
1764 | aFlatArray[linkindex+1]=rellink; // long link | |||
1765 | } | |||
1766 | else { | |||
1767 | // short link is ok | |||
1768 | aFlatArray[linkindex]=aLinksStart+rellink; | |||
1769 | } | |||
1770 | // - now create the lower-or-equal side | |||
1771 | if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd)) | |||
1772 | return false; | |||
1773 | } | |||
1774 | } | |||
1775 | return true; | |||
1776 | } // flatBinTreeRecursion | |||
1777 | ||||
1778 | ||||
1779 | // make a flat form representation of the bintree in a one-dimensional array | |||
1780 | // - higher-side links are implicit (nodes following each other), | |||
1781 | // lower-or-equal-side links are explicit | |||
1782 | bool flatBinTree( | |||
1783 | TBinTreeNode *aBinTree, TConvFlatTree &aFlatTree, size_t aArrSize, | |||
1784 | treeval_t aMinKey, treeval_t aMaxKey, treeval_t aLinksStart, treeval_t aLinksEnd | |||
1785 | ) | |||
1786 | { | |||
1787 | // save tree params | |||
1788 | aFlatTree.numelems=0; | |||
1789 | aFlatTree.minkey=aMinKey; | |||
1790 | aFlatTree.maxkey=aMaxKey; | |||
1791 | aFlatTree.linksstart=aLinksStart; | |||
1792 | aFlatTree.linksend=aLinksEnd; | |||
1793 | // now create actual tree | |||
1794 | size_t index=0; | |||
1795 | if (!flatBinTreeRecursion(aBinTree,index,aFlatTree.elements,aArrSize,aLinksStart,aLinksEnd)) | |||
1796 | return false; | |||
1797 | aFlatTree.numelems=index; // actual length of array | |||
1798 | return true; | |||
1799 | } // flatBinTree | |||
1800 | ||||
1801 | ||||
1802 | ||||
1803 | ||||
1804 | #endif | |||
1805 | ||||
1806 | ||||
1807 | // convert key to value using a flat bintree | |||
1808 | treeval_t searchFlatBintree(const TConvFlatTree &aFlatTree, treeval_t aKey, treeval_t aUndefValue) | |||
1809 | { | |||
1810 | treeval_t cmpval,thisnode; | |||
1811 | size_t index=0; | |||
1812 | // get start min and max | |||
1813 | treeval_t minKey = aFlatTree.minkey; | |||
1814 | treeval_t maxKey = aFlatTree.maxkey; | |||
1815 | // reject out-of-bounds keys immediately | |||
1816 | if (aKey<minKey || aKey>maxKey) | |||
1817 | return aUndefValue; | |||
1818 | do { | |||
1819 | // create the new decision value from max and min | |||
1820 | cmpval = minKey+((maxKey-minKey) >> 1); | |||
1821 | thisnode = aFlatTree.elements[index]; | |||
1822 | // check if next node must be leaf if the tree contains our key, | |||
1823 | // this is the case if max==min | |||
1824 | if (maxKey==minKey) { | |||
1825 | #ifdef BINTREE_GENERATOR | |||
1826 | if (thisnode>=aFlatTree.linksstart && thisnode<=aFlatTree.linksend) { | |||
1827 | // no leaf value here, should not be the case ever (we should have | |||
1828 | // encountered a node with no left or right link before this!) | |||
1829 | return aUndefValue; | |||
1830 | } | |||
1831 | else | |||
1832 | #endif | |||
1833 | { | |||
1834 | // found a leaf value here | |||
1835 | return (treeval_t) thisnode; | |||
1836 | } | |||
1837 | } | |||
1838 | // decide which way to go | |||
1839 | if (aKey>cmpval) { | |||
1840 | // go to the "higher" side = just next element in array, except if we have the special marker here | |||
1841 | if (thisnode == aFlatTree.linksstart+1) | |||
1842 | return aUndefValue; // we should go higher-side, but can't -> unknown key | |||
1843 | // next node is next index (or one more in case this is a long link) | |||
1844 | if (thisnode == aFlatTree.linksend-1) | |||
1845 | index++; | |||
1846 | index++; | |||
1847 | // determine new minimum | |||
1848 | minKey = cmpval+1; // minimum must be higher than cmpval | |||
1849 | } | |||
1850 | else { | |||
1851 | // go to the "lower" side = element at index indicated by current element, except if we have the special marker here | |||
1852 | if (thisnode == aFlatTree.linksstart+1) | |||
1853 | index++; // special case, "lower" side is immediately following because there is no "higher" side | |||
1854 | else { | |||
1855 | #ifdef BINTREE_GENERATOR | |||
1856 | // if node contains a leaf value instead of a link, something is wrong | |||
1857 | if (thisnode<aFlatTree.linksstart || thisnode>aFlatTree.linksend) | |||
1858 | return aUndefValue; // no leaf expected here | |||
1859 | #endif | |||
1860 | if (thisnode==aFlatTree.linksend-1) { | |||
1861 | // long link | |||
1862 | index++; // skip long link marker | |||
1863 | thisnode = aFlatTree.elements[index]; // get link value | |||
1864 | index = index+thisnode; // jump by link value | |||
1865 | } | |||
1866 | else { | |||
1867 | // short link | |||
1868 | index = index+(thisnode-aFlatTree.linksstart); // get index of next node (relative branch) | |||
1869 | } | |||
1870 | if (index==0) | |||
1871 | return aUndefValue; // there is no link | |||
1872 | } | |||
1873 | // determine new maximum | |||
1874 | maxKey = cmpval; // maximum must be lower or equal than cmpval | |||
1875 | } | |||
1876 | } while(index<aFlatTree.numelems); | |||
1877 | // if we reach the end of the array, key is not in the tree | |||
1878 | return aUndefValue; | |||
1879 | } // searchFlatBintree | |||
1880 | ||||
1881 | // MD5 and B64 given string | |||
1882 | void MD5B64(const char *aString, sInt32 aLen, string &aMD5B64) | |||
1883 | { | |||
1884 | // determine input length | |||
1885 | if (aLen<=0) aLen=strlen(aString); | |||
1886 | // calc MD5 | |||
1887 | md5::SYSYNC_MD5_CTX context; | |||
1888 | uInt8 digest[16]; | |||
1889 | md5::Init (&context); | |||
1890 | md5::Update (&context, (const uInt8 *)aString,aLen); | |||
1891 | md5::Final (digest, &context); | |||
1892 | // b64 encode the MD5 digest | |||
1893 | uInt32 b64md5len; | |||
1894 | char *b64md5=b64::encode(digest,16,&b64md5len); | |||
1895 | // assign result | |||
1896 | aMD5B64.assign(b64md5,b64md5len); | |||
1897 | // done | |||
1898 | b64::free(b64md5); // return buffer allocated by b64::encode | |||
1899 | } // MD5B64 | |||
1900 | ||||
1901 | ||||
1902 | // format as Timestamp for use in debug logs | |||
1903 | void StringObjTimestamp(string &aStringObj, lineartime_t aTimer) | |||
1904 | { | |||
1905 | // format the time | |||
1906 | if (aTimer==noLinearTime) { | |||
1907 | aStringObj = "<no time>"; | |||
1908 | return; | |||
1909 | } | |||
1910 | sInt16 y,mo,d,h,mi,s,ms; | |||
1911 | lineartime2date(aTimer,&y,&mo,&d); | |||
1912 | lineartime2time(aTimer,&h,&mi,&s,&ms); | |||
1913 | StringObjPrintf( | |||
1914 | aStringObj, | |||
1915 | "%04d-%02d-%02d %02d:%02d:%02d.%03d", | |||
1916 | y,mo,d,h,mi,s,ms | |||
1917 | ); | |||
1918 | } // StringObjTimestamp | |||
1919 | ||||
1920 | ||||
1921 | // format as hex string | |||
1922 | void StringObjHexString(string &aStringObj, const uInt8 *aBinary, uInt32 aBinSz) | |||
1923 | { | |||
1924 | aStringObj.erase(); | |||
1925 | if (!aBinary) return; | |||
1926 | while (aBinSz>0) { | |||
1927 | AppendHexByte(aStringObj,*aBinary++); | |||
1928 | aBinSz--; | |||
1929 | } | |||
1930 | } // StringObjHexString | |||
1931 | ||||
1932 | ||||
1933 | // add (already encoded!) CGI to existing URL string | |||
1934 | bool addCGItoString(string &aStringObj, cAppCharP aCGI, bool noduplicate) | |||
1935 | { | |||
1936 | if (!noduplicate || aStringObj.find(aCGI)==string::npos) { | |||
1937 | // - Add CGI separator if and only if none exists already | |||
1938 | if (aStringObj.find("?")==string::npos) | |||
1939 | aStringObj += '?'; | |||
1940 | aStringObj += aCGI; | |||
1941 | return true; // added | |||
1942 | } | |||
1943 | return false; // nothing added | |||
1944 | } | |||
1945 | ||||
1946 | ||||
1947 | // encode string for being used as a CGI key/value element | |||
1948 | string encodeForCGI(cAppCharP aCGI) | |||
1949 | { | |||
1950 | string cgi; | |||
1951 | cAppCharP p = aCGI; | |||
1952 | while (p && *p) { | |||
1953 | if (*p>0x7E || *p<=0x20 || *p=='%' || *p=='?' || *p=='&' || *p=='#') { | |||
1954 | // CGI encode these | |||
1955 | cgi += '%'; | |||
1956 | AppendHexByte(cgi, *p); | |||
1957 | } | |||
1958 | else { | |||
1959 | // use as-is | |||
1960 | cgi += *p; | |||
1961 | } | |||
1962 | p++; | |||
1963 | } | |||
1964 | return cgi; | |||
1965 | } // encodeForCGI | |||
1966 | ||||
1967 | ||||
1968 | // Count bits | |||
1969 | int countbits(uInt32 aMask) | |||
1970 | { | |||
1971 | int bits=0; | |||
1972 | uInt32 mask=0x0000001; | |||
1973 | while (mask) { | |||
1974 | if (aMask & mask) bits++; | |||
1975 | mask=mask << 1; | |||
1976 | } | |||
1977 | return bits; | |||
1978 | } // countbits | |||
1979 | ||||
1980 | ||||
1981 | // make uppercase | |||
1982 | void StringUpper(string &aString) | |||
1983 | { | |||
1984 | for(uInt32 k=0; k<aString.size(); k++) aString[k]=toupper(aString[k]); | |||
1985 | } // StringUpper | |||
1986 | ||||
1987 | ||||
1988 | // make lowercase | |||
1989 | void StringLower(string &aString) | |||
1990 | { | |||
1991 | for(uInt32 k=0; k<aString.size(); k++) aString[k]=tolower(aString[k]); | |||
1992 | } // StringLower | |||
1993 | ||||
1994 | ||||
1995 | // Substitute occurences of pattern with replacement in string | |||
1996 | void StringSubst( | |||
1997 | string &aString, const char *aPattern, const string &aReplacement, | |||
1998 | sInt32 aPatternLen, | |||
1999 | TCharSets aCharSet, TLineEndModes aLEM, | |||
2000 | TQuotingModes aQuotingMode | |||
2001 | ) | |||
2002 | { | |||
2003 | StringSubst( | |||
2004 | aString, aPattern, | |||
2005 | aReplacement.c_str(), | |||
2006 | aPatternLen, | |||
2007 | aReplacement.size(), | |||
2008 | aCharSet, aLEM, aQuotingMode | |||
2009 | ); | |||
2010 | } // StringSubst | |||
2011 | ||||
2012 | ||||
2013 | // Substitute occurences of pattern with replacement in string | |||
2014 | void StringSubst( | |||
2015 | string &aString, const char *aPattern, const char *aReplacement, | |||
2016 | sInt32 aPatternLen, sInt32 aReplacementLen, | |||
2017 | TCharSets aCharSet, TLineEndModes aLEM, | |||
2018 | TQuotingModes aQuotingMode | |||
2019 | ) | |||
2020 | { | |||
2021 | string::size_type i; | |||
2022 | string s; | |||
2023 | i=0; | |||
2024 | if (aPatternLen<0) aPatternLen=strlen(aPattern); | |||
2025 | // convert if needed | |||
2026 | if (!aReplacement) { | |||
2027 | aReplacement=""; // empty string if not specified | |||
2028 | aReplacementLen=0; | |||
2029 | } | |||
2030 | if (aCharSet!=chs_unknown) { | |||
2031 | appendUTF8ToString(aReplacement,s,aCharSet,aLEM,aQuotingMode); | |||
2032 | aReplacement=s.c_str(); | |||
2033 | aReplacementLen=s.size(); | |||
2034 | } | |||
2035 | else { | |||
2036 | if (aReplacementLen<0) aReplacementLen=strlen(aReplacement); | |||
2037 | } | |||
2038 | // now replace | |||
2039 | while((i=aString.find(aPattern,i))!=string::npos) { | |||
2040 | aString.replace(i,aPatternLen,aReplacement); | |||
2041 | i+=aReplacementLen; | |||
2042 | } | |||
2043 | } // StringSubst | |||
2044 | ||||
2045 | ||||
2046 | // Substitute occurences of pattern with replacement in string | |||
2047 | void StringSubst(string &aString, const char *aPattern, const string &aReplacement, sInt32 aPatternLen) | |||
2048 | { | |||
2049 | StringSubst(aString,aPattern,aReplacement.c_str(),aPatternLen,aReplacement.size()); | |||
2050 | } // StringSubst | |||
2051 | ||||
2052 | ||||
2053 | // Substitute occurences of pattern with integer number in string | |||
2054 | void StringSubst(string &aString, const char *aPattern, sInt32 aNumber, sInt32 aPatternLen) | |||
2055 | { | |||
2056 | string s; | |||
2057 | StringObjPrintf(s,"%ld",(long)aNumber); | |||
2058 | StringSubst(aString,aPattern,s,aPatternLen); | |||
2059 | } // StringSubst | |||
2060 | ||||
2061 | ||||
2062 | ||||
2063 | // copy PCdata contents into std::string object | |||
2064 | void smlPCDataToStringObj(const SmlPcdataPtr_t aPcdataP, string &aStringObj) | |||
2065 | { | |||
2066 | if (!aPcdataP || !aPcdataP->content) { | |||
2067 | // no content at all | |||
2068 | aStringObj.erase(); | |||
2069 | } | |||
2070 | else if ( | |||
2071 | // NOTE: Opaque works only with modified syncML toolkit which | |||
2072 | // makes sure opaque content is ALSO TERMINATED LIKE A C-STRING | |||
2073 | aPcdataP->contentType == SML_PCDATA_STRING || | |||
2074 | aPcdataP->contentType == SML_PCDATA_OPAQUE | |||
2075 | ) { | |||
2076 | // string or opaque type | |||
2077 | aStringObj.assign((char *)aPcdataP->content, aPcdataP->length); | |||
2078 | } | |||
2079 | else if (aPcdataP->contentType == SML_PCDATA_EXTENSION) { | |||
2080 | // extension type | |||
2081 | StringObjPrintf(aStringObj,"[PCDATA_EXTENSION Type=%hd]",(sInt16)aPcdataP->extension); | |||
2082 | } | |||
2083 | else { | |||
2084 | // other type | |||
2085 | StringObjPrintf(aStringObj,"[PCDATA Type=%hd]",(sInt16)aPcdataP->contentType); | |||
2086 | } | |||
2087 | } // smlPCDataToStringObj | |||
2088 | ||||
2089 | ||||
2090 | // returns item string or empty string (NEVER NULL) | |||
2091 | const char *smlItemDataToCharP(const SmlItemPtr_t aItemP) | |||
2092 | { | |||
2093 | if (!aItemP) return ""; | |||
2094 | return smlPCDataToCharP(aItemP->data); | |||
2095 | } // smlItemDataToCharP | |||
2096 | ||||
2097 | ||||
2098 | // returns first item string or empty string (NEVER NULL) | |||
2099 | const char *smlFirstItemDataToCharP(const SmlItemListPtr_t aItemListP) | |||
2100 | { | |||
2101 | if (!aItemListP) return ""; | |||
2102 | return smlItemDataToCharP(aItemListP->item); | |||
2103 | } // smlFirstItemDataToCharP | |||
2104 | #endif //SYSYNC_ENGINE | |||
2105 | ||||
2106 | // returns pointer to PCdata contents or null string. If aSizeP!=NULL, length will be stored in *aSize | |||
2107 | const char *smlPCDataToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP) | |||
2108 | { | |||
2109 | const char *str = smlPCDataOptToCharP(aPcdataP, aSizeP); | |||
2110 | if (str) return str; | |||
2111 | return ""; | |||
2112 | } // smlPCDataToCharP | |||
2113 | ||||
2114 | ||||
2115 | // returns pointer to PCdata contents if existing, NULL otherwise. | |||
2116 | // If aSizeP!=NULL, length will be stored in *aSize | |||
2117 | const char *smlPCDataOptToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP) | |||
2118 | { | |||
2119 | if (!aPcdataP || !aPcdataP->content) { | |||
2120 | return NULL__null; // we have no value, it could be empty howevert | |||
2121 | if (aSizeP) *aSizeP=0; | |||
2122 | } | |||
2123 | if (aPcdataP->length==0) { | |||
2124 | // empty content | |||
2125 | if (aSizeP) *aSizeP=0; | |||
2126 | return ""; // return empty string | |||
2127 | } | |||
2128 | else if ( | |||
2129 | // NOTE: Opaque works only with modified syncML toolkit which | |||
2130 | // makes sure opaque content is ALSO TERMINATED LIKE A C-STRING | |||
2131 | aPcdataP->contentType == SML_PCDATA_STRING || | |||
2132 | aPcdataP->contentType == SML_PCDATA_CDATA || // XML only | |||
2133 | aPcdataP->contentType == SML_PCDATA_OPAQUE // WBXML only | |||
2134 | ) { | |||
2135 | // return pointer to content | |||
2136 | if (aSizeP) *aSizeP=aPcdataP->length; | |||
2137 | return (char *) aPcdataP->content; | |||
2138 | } | |||
2139 | else { | |||
2140 | // no string | |||
2141 | if (aSizeP) *aSizeP=11; | |||
2142 | return "[no string]"; | |||
2143 | } | |||
2144 | } // smlPCDataOptToCharP | |||
2145 | ||||
2146 | ||||
2147 | // returns pointer to source or target LocURI | |||
2148 | const char *smlSrcTargLocURIToCharP(const SmlTargetPtr_t aSrcTargP) | |||
2149 | { | |||
2150 | if (!aSrcTargP || !aSrcTargP->locURI) { | |||
2151 | return ""; // empty string | |||
2152 | } | |||
2153 | else { | |||
2154 | // return PCdata string contents | |||
2155 | return smlPCDataToCharP(aSrcTargP->locURI); | |||
2156 | } | |||
2157 | } // smlSrcTargLocURIToCharP | |||
2158 | ||||
2159 | ||||
2160 | // returns pointer to source or target LocName | |||
2161 | const char *smlSrcTargLocNameToCharP(const SmlTargetPtr_t aSrcTargP) | |||
2162 | { | |||
2163 | if (!aSrcTargP || !aSrcTargP->locName) { | |||
2164 | return ""; // empty string | |||
2165 | } | |||
2166 | else { | |||
2167 | // return PCdata string contents | |||
2168 | return smlPCDataToCharP(aSrcTargP->locName); | |||
2169 | } | |||
2170 | } // smlSrcTargLocNameToCharP | |||
2171 | ||||
2172 | ||||
2173 | #ifdef SYSYNC_ENGINE1 | |||
2174 | // returns error code made ready for SyncML sending (that is, remove offset | |||
2175 | // of 10000 if present, and make generic error 500 for non-SyncML errors, | |||
2176 | // and return LOCERR_OK as 200) | |||
2177 | localstatus syncmlError(localstatus aErr) | |||
2178 | { | |||
2179 | if (aErr==LOCERR_OK) return 200; // SyncML ok code | |||
2180 | if (aErr<999) return aErr; // return as is | |||
2181 | if (aErr>=LOCAL_STATUS_CODE+100 && aErr<=999) | |||
2182 | return aErr-LOCAL_STATUS_CODE; // return with offset removed | |||
2183 | // no suitable conversion | |||
2184 | return 500; // return generic "bad" | |||
2185 | } // localError | |||
2186 | ||||
2187 | ||||
2188 | // returns error code made local (that is, offset by 10000 in case aErr is a | |||
2189 | // SyncML status code <10000, and convert 200 into LOCERR_OK) | |||
2190 | localstatus localError(localstatus aErr) | |||
2191 | { | |||
2192 | if (aErr==200 || aErr==0) return LOCERR_OK; | |||
2193 | if (aErr<LOCAL_STATUS_CODE) return aErr+LOCAL_STATUS_CODE; | |||
2194 | return aErr; | |||
2195 | } // localError | |||
2196 | ||||
2197 | ||||
2198 | // returns pure relative URI, if specified relative or absolute to | |||
2199 | // given server URI | |||
2200 | const char *relativeURI(const char *aURI,const char *aServerURI) | |||
2201 | { | |||
2202 | // check for "./" type relative URI | |||
2203 | if (strnncmp(aURI,URI_RELPREFIX"./",2)==0) { | |||
2204 | // relative URI prefixed with "./", just zap the relative part | |||
2205 | return aURI+2; | |||
2206 | } | |||
2207 | else if (aServerURI) { | |||
2208 | // test if absolute URI specifying the right server | |||
2209 | uInt32 n=strlen(aServerURI); | |||
2210 | if (strnncmp(aURI,aServerURI,n)==0) { | |||
2211 | // beginning of URI matches server's URI | |||
2212 | const char *p=aURI+n; | |||
2213 | // skip delimiter, if any | |||
2214 | if (*p=='/') p++; | |||
2215 | // return relative part of URI | |||
2216 | return p; | |||
2217 | } | |||
2218 | } | |||
2219 | // just return unmodified | |||
2220 | return aURI; | |||
2221 | } // relativeURI | |||
2222 | ||||
2223 | ||||
2224 | // split Hostname into address and port parts | |||
2225 | void splitHostname(const char *aHost,string *aAddr,string *aPort) | |||
2226 | { | |||
2227 | const char *p,*q; | |||
2228 | p=aHost; | |||
2229 | q=strchr(p,':'); | |||
2230 | if (q) { | |||
2231 | // port spec found | |||
2232 | if (aAddr) aAddr->assign(p,q-p); | |||
2233 | if (aPort) aPort->assign(q+1); | |||
2234 | } | |||
2235 | else { | |||
2236 | // no prot spec | |||
2237 | if (aAddr) aAddr->assign(p); | |||
2238 | if (aPort) aPort->erase(); | |||
2239 | } | |||
2240 | } // splitHostname | |||
2241 | ||||
2242 | // translate %XX into corresponding character in-place | |||
2243 | void urlDecode(string *str) | |||
2244 | { | |||
2245 | // nothing todo? | |||
2246 | if (!str || | |||
2247 | str->find('%') == string::npos) return; | |||
2248 | ||||
2249 | string replacement; | |||
2250 | replacement.reserve(str->size()); | |||
2251 | const char *in = str->c_str(); | |||
2252 | char c; | |||
2253 | while ((c = *in++) != 0) { | |||
2254 | if (c == '%') { | |||
2255 | c = tolower(*in++); | |||
2256 | unsigned char value = 0; | |||
2257 | if (!c) { | |||
2258 | break; | |||
2259 | } else if (c >= '0' && c <= '9') { | |||
2260 | value = c - '0'; | |||
2261 | } else if (c >= 'a' && c <= 'f') { | |||
2262 | value = c - 'a' + 10; | |||
2263 | } else { | |||
2264 | // silently skip invalid character | |||
2265 | } | |||
2266 | value *= 16; | |||
2267 | c = tolower(*in++); | |||
2268 | if (!c) { | |||
2269 | break; | |||
2270 | } else if (c >= '0' && c <= '9') { | |||
2271 | value += c - '0'; | |||
2272 | replacement.append((char *)&value, 1); | |||
2273 | } else if (c >= 'a' && c <= 'f') { | |||
2274 | value += c - 'a' + 10; | |||
2275 | replacement.append((char *)&value, 1); | |||
2276 | } else { | |||
2277 | // silently skip invalid character | |||
2278 | } | |||
2279 | } else { | |||
2280 | replacement.append(&c, 1); | |||
2281 | } | |||
2282 | } | |||
2283 | *str = replacement; | |||
2284 | } | |||
2285 | ||||
2286 | // translate %XX into corresponding character in-place | |||
2287 | void urlEncode(string *str) | |||
2288 | { | |||
2289 | if (!str) { | |||
2290 | return; | |||
2291 | } | |||
2292 | ||||
2293 | string replacement; | |||
2294 | size_t i, start = 0; | |||
2295 | const char *t = str->c_str(); | |||
2296 | const char *s = t; | |||
2297 | char buffer[4]; | |||
2298 | char c; | |||
2299 | for (i = 0; (c = *t) != 0; i++, t++) { | |||
2300 | if (!isalnum(c)) { | |||
2301 | replacement.append(s + start, i - start); | |||
2302 | start = i + 1; | |||
2303 | sprintf(buffer, "%%%02X", c); | |||
2304 | replacement.append(buffer, 3); | |||
2305 | } | |||
2306 | } | |||
2307 | ||||
2308 | if (start > 0) { | |||
2309 | // Something was added to replacement because we found unsafe | |||
2310 | // characters, finish the job. | |||
2311 | replacement.append(s + start, i - start); | |||
2312 | *str = replacement; | |||
2313 | } | |||
2314 | } | |||
2315 | ||||
2316 | // split URL into protocol, hostname, document name and auth-info (user, password); | |||
2317 | // the optional query and port are not url-decoded, everything else is | |||
2318 | void splitURL(const char *aURI,string *aProtocol,string *aHost, | |||
2319 | string *aDoc, string *aUser, string *aPasswd, | |||
2320 | string *aPort, string *aQuery) | |||
2321 | { | |||
2322 | const char *p,*q,*r; | |||
2323 | ||||
2324 | p=aURI; | |||
2325 | // extract protocol | |||
2326 | q=strchr(p,':'); | |||
2327 | if (q) { | |||
2328 | // protocol found | |||
2329 | if (aProtocol) aProtocol->assign(p,q-p); | |||
2330 | p=q+1; // past colon | |||
2331 | int count = 0; | |||
2332 | while (*p=='/' && count < 2) { | |||
2333 | p++; // past trailing slashes (two expected, ignore if less are given) | |||
2334 | count++; | |||
2335 | } | |||
2336 | // now identify end of host part | |||
2337 | string host; | |||
2338 | q=strchr(p, '/'); | |||
2339 | if (!q) { | |||
2340 | // no slash, skip forward to end of string | |||
2341 | q = p + strlen(p); | |||
2342 | } | |||
2343 | host.assign(p, q - p); | |||
2344 | ||||
2345 | // if protocol specified, check for auth info | |||
2346 | const char *h = host.c_str(); | |||
2347 | q=strchr(h,'@'); | |||
2348 | r=strchr(h,':'); | |||
2349 | if (q && r && q>r) { | |||
2350 | // auth exists | |||
2351 | if (aUser) aUser->assign(h,r-h); | |||
2352 | if (aPasswd) aPasswd->assign(r+1,q-r-1); | |||
2353 | // skip auth in full string | |||
2354 | p += q + 1 - h; | |||
2355 | } | |||
2356 | else { | |||
2357 | // no auth found | |||
2358 | if (aUser) aUser->erase(); | |||
2359 | if (aPasswd) aPasswd->erase(); | |||
2360 | } | |||
2361 | // p now points to host part, as expected below | |||
2362 | } | |||
2363 | else { | |||
2364 | // no protocol found | |||
2365 | if (aProtocol) aProtocol->erase(); | |||
2366 | // no protocol, no auth | |||
2367 | if (aUser) aUser->erase(); | |||
2368 | if (aPasswd) aPasswd->erase(); | |||
2369 | } | |||
2370 | // separate hostname and document | |||
2371 | std::string host; | |||
2372 | // - check for path | |||
2373 | q=strchr(p,'/'); | |||
2374 | // - if no path, check if there is a CGI param directly after the host name | |||
2375 | if (!q) { | |||
2376 | // doc part left empty in this case | |||
2377 | if (aDoc) aDoc->erase(); | |||
2378 | q=strchr(p,'?'); | |||
2379 | if (q) { | |||
2380 | // query directly follows host | |||
2381 | host.assign(p, q - p); | |||
2382 | if (aQuery) aQuery->assign(q + 1); | |||
2383 | } else { | |||
2384 | // entire string is considered the host | |||
2385 | host.assign(p); | |||
2386 | if (aQuery) aQuery->erase(); | |||
2387 | } | |||
2388 | } | |||
2389 | else { | |||
2390 | // host part stops at slash | |||
2391 | host.assign(p, q - p); | |||
2392 | // in case of '/', do not put slash into docname | |||
2393 | // even if it would be empty (caller expected to add | |||
2394 | // slash as needed) | |||
2395 | p = q + 1; // exclude slash | |||
2396 | // now check for query | |||
2397 | q=strchr(p,'?'); | |||
2398 | if (q) { | |||
2399 | // split at question mark | |||
2400 | if (aDoc) aDoc->assign(p, q - p); | |||
2401 | if (aQuery) aQuery->assign(q + 1); | |||
2402 | } else { | |||
2403 | // whole string is document name | |||
2404 | if (aDoc) aDoc->assign(p); | |||
2405 | if (aQuery) aQuery->erase(); | |||
2406 | } | |||
2407 | } | |||
2408 | ||||
2409 | // remove optional port from host part before url-decoding, because | |||
2410 | // that might introduce new : characters into the host name | |||
2411 | size_t colon = host.find(':'); | |||
2412 | if (colon != host.npos) { | |||
2413 | if (aHost) aHost->assign(host.substr(0, colon)); | |||
2414 | if (aPort) aPort->assign(host.substr(colon + 1)); | |||
2415 | } else { | |||
2416 | if (aHost) aHost->assign(host); | |||
2417 | if (aPort) aPort->erase(); | |||
2418 | } | |||
2419 | } // splitURL | |||
2420 | ||||
2421 | #ifdef SPLIT_URL_MAIN | |||
2422 | ||||
2423 | #include <stdio.h> | |||
2424 | #include <assert.h> | |||
2425 | ||||
2426 | static void test(const std::string &in, const std::string &expected) | |||
2427 | { | |||
2428 | string protocol, host, doc, user, password, port, query; | |||
2429 | char buffer[1024]; | |||
2430 | ||||
2431 | splitURL(in.c_str(), &protocol, &host, &doc, &user, &password, &port, &query); | |||
2432 | ||||
2433 | // URL-decode each part | |||
2434 | urlDecode(&protocol); | |||
2435 | urlDecode(&host); | |||
2436 | urlDecode(&doc); | |||
2437 | urlDecode(&user); | |||
2438 | urlDecode(&password); | |||
2439 | ||||
2440 | sprintf(buffer, | |||
2441 | "prot '%s' user '%s' passwd '%s' host '%s' port '%s' doc '%s' query '%s'", | |||
2442 | protocol.c_str(), | |||
2443 | user.c_str(), | |||
2444 | password.c_str(), | |||
2445 | host.c_str(), | |||
2446 | port.c_str(), | |||
2447 | doc.c_str(), | |||
2448 | query.c_str()); | |||
2449 | printf("%s -> %s\n", in.c_str(), buffer); | |||
2450 | assert(expected == buffer); | |||
2451 | } | |||
2452 | ||||
2453 | int main(int argc, char **argv) | |||
2454 | { | |||
2455 | test("http://user:passwd@host/patha/pathb?query", | |||
2456 | "prot 'http' user 'user' passwd 'passwd' host 'host' port '' doc 'patha/pathb' query 'query'"); | |||
2457 | test("http://user:passwd@host:port/patha/pathb?query", | |||
2458 | "prot 'http' user 'user' passwd 'passwd' host 'host' port 'port' doc 'patha/pathb' query 'query'"); | |||
2459 | test("file:///foo/bar", | |||
2460 | "prot 'file' user '' passwd '' host '' port '' doc 'foo/bar' query ''"); | |||
2461 | test("http://host%3a:port?param=value", | |||
2462 | "prot 'http' user '' passwd '' host 'host:' port 'port' doc '' query 'param=value'"); | |||
2463 | test("http://host%3a?param=value", | |||
2464 | "prot 'http' user '' passwd '' host 'host:' port '' doc '' query 'param=value'"); | |||
2465 | test("foo%24", | |||
2466 | "prot '' user '' passwd '' host 'foo$' port '' doc '' query ''"); | |||
2467 | test("foo%2f", | |||
2468 | "prot '' user '' passwd '' host 'foo/' port '' doc '' query ''"); | |||
2469 | test("foo%2A", | |||
2470 | "prot '' user '' passwd '' host 'foo*' port '' doc '' query ''"); | |||
2471 | test("foo%24bar", | |||
2472 | "prot '' user '' passwd '' host 'foo$bar' port '' doc '' query ''"); | |||
2473 | test("%24bar", | |||
2474 | "prot '' user '' passwd '' host '$bar' port '' doc '' query ''"); | |||
2475 | test("foo%2", | |||
2476 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); | |||
2477 | test("foo%", | |||
2478 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); | |||
2479 | test("foo%g", | |||
2480 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); | |||
2481 | test("foo%gh", | |||
2482 | "prot '' user '' passwd '' host 'foo' port '' doc '' query ''"); | |||
2483 | test("%ghbar", | |||
2484 | "prot '' user '' passwd '' host 'bar' port '' doc '' query ''"); | |||
2485 | return 0; | |||
2486 | } | |||
2487 | #endif // SPLIT_URL_MAIN | |||
2488 | ||||
2489 | #endif //SYSYNC_ENGINE | |||
2490 | ||||
2491 | ||||
2492 | // returns type from meta | |||
2493 | const char *smlMetaTypeToCharP(SmlMetInfMetInfPtr_t aMetaP) | |||
2494 | { | |||
2495 | if (!aMetaP) return NULL__null; // no meta at all | |||
2496 | return smlPCDataToCharP(aMetaP->type); | |||
2497 | } // smlMetaTypeToCharP | |||
2498 | ||||
2499 | ||||
2500 | ||||
2501 | // returns Next Anchor from meta | |||
2502 | const char *smlMetaNextAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP) | |||
2503 | { | |||
2504 | if (!aMetaP) return NULL__null; // no meta at all | |||
2505 | if (!aMetaP->anchor) return NULL__null; // no anchor at all | |||
2506 | return smlPCDataToCharP(aMetaP->anchor->next); | |||
2507 | } // smlMetaAnchorToCharP | |||
2508 | ||||
2509 | ||||
2510 | // returns Last Anchor from meta | |||
2511 | const char *smlMetaLastAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP) | |||
2512 | { | |||
2513 | if (!aMetaP) return NULL__null; // no meta at all | |||
2514 | if (!aMetaP->anchor) return NULL__null; // no anchor at all | |||
2515 | return smlPCDataToCharP(aMetaP->anchor->last); | |||
2516 | } // smlMetaLastAnchorToCharP | |||
2517 | ||||
2518 | ||||
2519 | // returns DevInf pointer if any in specified PCData, NULL otherwise | |||
2520 | SmlDevInfDevInfPtr_t smlPCDataToDevInfP(const SmlPcdataPtr_t aPCDataP) | |||
2521 | { | |||
2522 | if (!aPCDataP) return NULL__null; | |||
2523 | if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null; | |||
2524 | if (aPCDataP->extension!=SML_EXT_DEVINF) return NULL__null; | |||
2525 | return (SmlDevInfDevInfPtr_t)(aPCDataP->content); | |||
2526 | } // smlPCDataToDevInfP | |||
2527 | ||||
2528 | ||||
2529 | // returns MetInf pointer if any in specified PCData, NULL otherwise | |||
2530 | SmlMetInfMetInfPtr_t smlPCDataToMetInfP(const SmlPcdataPtr_t aPCDataP) | |||
2531 | { | |||
2532 | if (!aPCDataP) return NULL__null; | |||
2533 | if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null; | |||
2534 | if (aPCDataP->extension!=SML_EXT_METINF) return NULL__null; | |||
2535 | return (SmlMetInfMetInfPtr_t)(aPCDataP->content); | |||
2536 | } // smlPCDataToMetInfP | |||
2537 | ||||
2538 | ||||
2539 | // allocate memory via SyncML toolkit allocation function, but throw | |||
2540 | // exception if it fails. Used by SML | |||
2541 | void *_smlMalloc(MemSize_t size) | |||
2542 | { | |||
2543 | void *p; | |||
2544 | ||||
2545 | p=smlLibMalloc(size); | |||
2546 | if (!p) SYSYNC_THROW(TMemException("smlLibMalloc() failed"))throw TMemException("smlLibMalloc() failed"); | |||
2547 | return p; | |||
2548 | } // _smlMalloc | |||
2549 | ||||
2550 | ||||
2551 | // returns true on successful conversion of PCData string to sInt32 | |||
2552 | bool smlPCDataToULong(const SmlPcdataPtr_t aPCDataP, uInt32 &aLong) | |||
2553 | { | |||
2554 | return StrToULong(smlPCDataToCharP(aPCDataP),aLong); | |||
2555 | } // smlPCDataToLong | |||
2556 | ||||
2557 | // returns true on successful conversion of PCData string to sInt32 | |||
2558 | bool smlPCDataToLong(const SmlPcdataPtr_t aPCDataP, sInt32 &aLong) | |||
2559 | { | |||
2560 | return StrToLong(smlPCDataToCharP(aPCDataP),aLong); | |||
2561 | } // smlPCDataToLong | |||
2562 | ||||
2563 | #ifdef SYSYNC_ENGINE1 | |||
2564 | // returns true on successful conversion of PCData string to format | |||
2565 | bool smlPCDataToFormat(const SmlPcdataPtr_t aPCDataP, TFmtTypes &aFmt) | |||
2566 | { | |||
2567 | const char *fmt = smlPCDataToCharP(aPCDataP); | |||
2568 | sInt16 sh; | |||
2569 | if (*fmt) { | |||
2570 | if (!StrToEnum(encodingFmtSyncMLNames,numFmtTypes,sh,fmt)) | |||
2571 | return false; // unknown format | |||
2572 | aFmt=(TFmtTypes)sh; | |||
2573 | } | |||
2574 | else { | |||
2575 | aFmt=fmt_chr; // no spec = chr | |||
2576 | } | |||
2577 | return true; | |||
2578 | } // smlPCDataToFormat | |||
2579 | #endif //SYSYNC_ENGINE | |||
2580 | ||||
2581 | // build Meta anchor | |||
2582 | SmlPcdataPtr_t newMetaAnchor(const char *aNextAnchor, const char *aLastAnchor) | |||
2583 | { | |||
2584 | SmlPcdataPtr_t metaP; | |||
2585 | SmlMetInfAnchorPtr_t anchorP; | |||
2586 | ||||
2587 | // - create empty meta | |||
2588 | metaP=newMeta(); | |||
2589 | // - create new anchor | |||
2590 | anchorP=SML_NEW(SmlMetInfAnchor_t)((SmlMetInfAnchor_t*) _smlMalloc(sizeof(SmlMetInfAnchor_t))); | |||
2591 | // - set anchor contents | |||
2592 | //%%% anchorP->last=newPCDataOptEmptyString(aLastAnchor); // optional, but omitted only if string is NULL (not if only empty) | |||
2593 | anchorP->last=newPCDataOptString(aLastAnchor); // optional | |||
2594 | anchorP->next=newPCDataString(aNextAnchor); // mandatory | |||
2595 | // - set anchor | |||
2596 | ((SmlMetInfMetInfPtr_t)(metaP->content))->anchor=anchorP; | |||
2597 | // return | |||
2598 | return metaP; | |||
2599 | } // newMetaAnchor | |||
2600 | ||||
2601 | ||||
2602 | // build Meta type | |||
2603 | SmlPcdataPtr_t newMetaType(const char *aMetaType) | |||
2604 | { | |||
2605 | SmlPcdataPtr_t metaP; | |||
2606 | ||||
2607 | // - if not type, we don't create a meta at all | |||
2608 | if (aMetaType==NULL__null || *aMetaType==0) return NULL__null; | |||
2609 | // - create empty meta | |||
2610 | metaP=newMeta(); | |||
2611 | // - set type | |||
2612 | ((SmlMetInfMetInfPtr_t)(metaP->content))->type=newPCDataString(aMetaType); | |||
2613 | // return | |||
2614 | return metaP; | |||
2615 | } // newMetaType | |||
2616 | ||||
2617 | ||||
2618 | // build empty Meta | |||
2619 | SmlPcdataPtr_t newMeta(void) | |||
2620 | { | |||
2621 | SmlPcdataPtr_t metaP; | |||
2622 | SmlMetInfMetInfPtr_t metinfP; | |||
2623 | ||||
2624 | // - create empty PCData | |||
2625 | metaP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t))); | |||
2626 | metaP->contentType=SML_PCDATA_EXTENSION; | |||
2627 | metaP->extension=SML_EXT_METINF; | |||
2628 | // - %%% assume length is not relevant for structured content (looks like in mgrutil.c) | |||
2629 | metaP->length=0; | |||
2630 | // - create empty meta | |||
2631 | metinfP = SML_NEW(SmlMetInfMetInf_t)((SmlMetInfMetInf_t*) _smlMalloc(sizeof(SmlMetInfMetInf_t))); | |||
2632 | metaP->content=metinfP; // link to PCdata | |||
2633 | // - init meta options | |||
2634 | metinfP->version=NULL__null; | |||
2635 | metinfP->format=NULL__null; | |||
2636 | metinfP->type=NULL__null; | |||
2637 | metinfP->mark=NULL__null; | |||
2638 | metinfP->size=NULL__null; | |||
2639 | metinfP->nextnonce=NULL__null; | |||
2640 | metinfP->maxmsgsize=NULL__null; | |||
2641 | metinfP->mem=NULL__null; | |||
2642 | metinfP->emi=NULL__null; // PCData list | |||
2643 | metinfP->anchor=NULL__null; | |||
2644 | // - SyncML 1.1 | |||
2645 | metinfP->maxobjsize=NULL__null; | |||
2646 | // - SyncML 1.2 | |||
2647 | metinfP->flags=0; | |||
2648 | // return | |||
2649 | return metaP; | |||
2650 | } // newMeta | |||
2651 | ||||
2652 | ||||
2653 | // copy meta from existing meta (for data items only | |||
2654 | // anchor, mem, emi, nonce are not copied!) | |||
2655 | // Note however that we copy maxobjsize, as we (mis-)use it for ZIPPED_BINDATA_SUPPORT | |||
2656 | SmlPcdataPtr_t copyMeta(SmlPcdataPtr_t aOldMetaP) | |||
2657 | { | |||
2658 | if (!aOldMetaP) return NULL__null; | |||
2659 | SmlPcdataPtr_t newmetaP=newMeta(); | |||
2660 | if (!newmetaP) return NULL__null; | |||
2661 | SmlMetInfMetInfPtr_t oldmetinfP = smlPCDataToMetInfP(aOldMetaP); | |||
2662 | if (!oldmetinfP) return NULL__null; | |||
2663 | SmlMetInfMetInfPtr_t newmetInfP = smlPCDataToMetInfP(newmetaP); | |||
2664 | // - copy meta | |||
2665 | newmetInfP->version = smlPcdataDup(oldmetinfP->version); | |||
2666 | newmetInfP->format = smlPcdataDup(oldmetinfP->format); | |||
2667 | newmetInfP->type = smlPcdataDup(oldmetinfP->type); | |||
2668 | newmetInfP->mark = smlPcdataDup(oldmetinfP->mark); | |||
2669 | newmetInfP->size = smlPcdataDup(oldmetinfP->size); | |||
2670 | newmetInfP->maxobjsize = smlPcdataDup(oldmetinfP->maxobjsize); | |||
2671 | // return | |||
2672 | return newmetaP; | |||
2673 | } // copyMeta | |||
2674 | ||||
2675 | ||||
2676 | ||||
2677 | ||||
2678 | // add an item to an item list | |||
2679 | SmlItemListPtr_t *addItemToList( | |||
2680 | SmlItemPtr_t aItemP, // existing item data structure, ownership is passed to list | |||
2681 | SmlItemListPtr_t *aItemListPP // adress of pointer to existing item list or NULL | |||
2682 | ) | |||
2683 | { | |||
2684 | if (aItemListPP && aItemP) { | |||
2685 | // find last itemlist pointer | |||
2686 | while (*aItemListPP) { | |||
2687 | aItemListPP=&((*aItemListPP)->next); | |||
2688 | } | |||
2689 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry | |||
2690 | *aItemListPP = SML_NEW(SmlItemList_t)((SmlItemList_t*) _smlMalloc(sizeof(SmlItemList_t))); | |||
2691 | (*aItemListPP)->next=NULL__null; | |||
2692 | (*aItemListPP)->item=aItemP; // insert new item | |||
2693 | // return pointer to pointer to next element (which is now NULL). | |||
2694 | // Can be passed in to addPCDataToList() again to append more elements without searching | |||
2695 | // for end-of-list | |||
2696 | return &((*aItemListPP)->next); | |||
2697 | } | |||
2698 | // nop, return pointer unmodified | |||
2699 | return aItemListPP; | |||
2700 | } // addItemToList | |||
2701 | ||||
2702 | ||||
2703 | // add a CTData item to a CTDataList | |||
2704 | SmlDevInfCTDataListPtr_t *addCTDataToList( | |||
2705 | SmlDevInfCTDataPtr_t aCTDataP, // existing CTData item data structure, ownership is passed to list | |||
2706 | SmlDevInfCTDataListPtr_t *aCTDataListPP // adress of pointer to existing item list or NULL | |||
2707 | ) | |||
2708 | { | |||
2709 | if (aCTDataListPP && aCTDataP) { | |||
2710 | // find last itemlist pointer | |||
2711 | while (*aCTDataListPP) { | |||
2712 | aCTDataListPP=&((*aCTDataListPP)->next); | |||
2713 | } | |||
2714 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry | |||
2715 | *aCTDataListPP = SML_NEW(SmlDevInfCTDataList_t)((SmlDevInfCTDataList_t*) _smlMalloc(sizeof(SmlDevInfCTDataList_t ))); | |||
2716 | (*aCTDataListPP)->next=NULL__null; | |||
2717 | (*aCTDataListPP)->data=aCTDataP; // insert new data | |||
2718 | // return pointer to pointer to next element (which is now NULL). | |||
2719 | // Can be passed in to addPCDataToList() again to append more elements without searching | |||
2720 | // for end-of-list | |||
2721 | return &((*aCTDataListPP)->next); | |||
2722 | } | |||
2723 | // nop, return pointer unmodified | |||
2724 | return aCTDataListPP; | |||
2725 | } // addCTDataToList | |||
2726 | ||||
2727 | ||||
2728 | // add a CTDataProp item to a CTDataPropList | |||
2729 | SmlDevInfCTDataPropListPtr_t *addCTDataPropToList( | |||
2730 | SmlDevInfCTDataPropPtr_t aCTDataPropP, // existing CTDataProp item data structure, ownership is passed to list | |||
2731 | SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL | |||
2732 | ) | |||
2733 | { | |||
2734 | if (aCTDataPropListPP && aCTDataPropP) { | |||
2735 | // find last itemlist pointer | |||
2736 | while (*aCTDataPropListPP) { | |||
2737 | aCTDataPropListPP=&((*aCTDataPropListPP)->next); | |||
2738 | } | |||
2739 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry | |||
2740 | *aCTDataPropListPP = SML_NEW(SmlDevInfCTDataPropList_t)((SmlDevInfCTDataPropList_t*) _smlMalloc(sizeof(SmlDevInfCTDataPropList_t ))); | |||
2741 | (*aCTDataPropListPP)->next=NULL__null; | |||
2742 | (*aCTDataPropListPP)->data=aCTDataPropP; // insert new data | |||
2743 | // return pointer to pointer to next element (which is now NULL). | |||
2744 | // Can be passed in to addPCDataToList() again to append more elements without searching | |||
2745 | // for end-of-list | |||
2746 | return &((*aCTDataPropListPP)->next); | |||
2747 | } | |||
2748 | // nop, return pointer unmodified | |||
2749 | return aCTDataPropListPP; | |||
2750 | } // addCTDataPropToList | |||
2751 | ||||
2752 | ||||
2753 | // add a CTData describing a property (as returned by newDevInfCTData()) | |||
2754 | // as a new property without parameters to a CTDataPropList | |||
2755 | SmlDevInfCTDataPropListPtr_t *addNewPropToList( | |||
2756 | SmlDevInfCTDataPtr_t aPropCTData, // CTData describing property | |||
2757 | SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL | |||
2758 | ) | |||
2759 | { | |||
2760 | SmlDevInfCTDataPropPtr_t propdataP = SML_NEW(SmlDevInfCTDataProp_t)((SmlDevInfCTDataProp_t*) _smlMalloc(sizeof(SmlDevInfCTDataProp_t ))); | |||
2761 | propdataP->param = NULL__null; // no params | |||
2762 | propdataP->prop = aPropCTData; | |||
2763 | return addCTDataPropToList(propdataP, aCTDataPropListPP); | |||
2764 | } // addNewPropToList | |||
2765 | ||||
2766 | ||||
2767 | ||||
2768 | // add PCData element to a PCData list | |||
2769 | SmlPcdataListPtr_t *addPCDataToList( | |||
2770 | SmlPcdataPtr_t aPCDataP, // Existing PCData element to be added, ownership is passed to list | |||
2771 | SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL | |||
2772 | ) | |||
2773 | { | |||
2774 | if (aPCDataListPP) { | |||
2775 | // find last PCDataList pointer | |||
2776 | while (*aPCDataListPP) { | |||
2777 | aPCDataListPP=&((*aPCDataListPP)->next); | |||
2778 | } | |||
2779 | // aItemListPP now points to a NULL pointer which must be replaced by addr of new PCDataList entry | |||
2780 | *aPCDataListPP = SML_NEW(SmlPcdataList_t)((SmlPcdataList_t*) _smlMalloc(sizeof(SmlPcdataList_t))); | |||
2781 | (*aPCDataListPP)->next=NULL__null; | |||
2782 | (*aPCDataListPP)->data=aPCDataP; // insert new item | |||
2783 | // return pointer to pointer to next element (which is now NULL). | |||
2784 | // Can be passed in to addPCDataToList() again to append more elements without searching | |||
2785 | // for end-of-list | |||
2786 | return &((*aPCDataListPP)->next); | |||
2787 | } | |||
2788 | return NULL__null; | |||
2789 | } // addPCDataToList | |||
2790 | ||||
2791 | ||||
2792 | // add PCData string to a PCData list | |||
2793 | SmlPcdataListPtr_t *addPCDataStringToList( | |||
2794 | const char *aString, // String to be added | |||
2795 | SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL | |||
2796 | ) | |||
2797 | { | |||
2798 | return addPCDataToList(newPCDataString(aString),aPCDataListPP); | |||
2799 | } // addPCDataStringToList | |||
2800 | ||||
2801 | ||||
2802 | // create new optional location (source or target) | |||
2803 | // Returns NULL if URI specified is NULL or empty | |||
2804 | SmlSourcePtr_t newOptLocation( | |||
2805 | const char *aLocURI, | |||
2806 | const char *aLocName | |||
2807 | ) | |||
2808 | { | |||
2809 | if (!aLocURI || *aLocURI==0) return NULL__null; | |||
2810 | else return newLocation(aLocURI,aLocName); | |||
2811 | } // newOptLocation | |||
2812 | ||||
2813 | ||||
2814 | // create new location (source or target) | |||
2815 | // always returns location, even if URI and/or name are empty | |||
2816 | // If name is NULL or empty, only URI is generated | |||
2817 | SmlSourcePtr_t newLocation( | |||
2818 | const char *aLocURI, | |||
2819 | const char *aLocName | |||
2820 | ) | |||
2821 | { | |||
2822 | SmlSourcePtr_t locP; | |||
2823 | ||||
2824 | locP = SML_NEW(SmlSource_t)((SmlSource_t*) _smlMalloc(sizeof(SmlSource_t))); | |||
2825 | // URI is always present (might be empty, though) | |||
2826 | locP->locURI=newPCDataString(aLocURI); | |||
2827 | // name only if not empty | |||
2828 | if (aLocName && *aLocName!=0) | |||
2829 | locP->locName=newPCDataString(aLocName); | |||
2830 | else | |||
2831 | locP->locName=NULL__null; | |||
2832 | // filter defaults to NULL | |||
2833 | locP->filter=NULL__null; | |||
2834 | return locP; | |||
2835 | } // newLocation | |||
2836 | ||||
2837 | ||||
2838 | // create new empty Item | |||
2839 | SmlItemPtr_t newItem(void) | |||
2840 | { | |||
2841 | SmlItemPtr_t itemP; | |||
2842 | ||||
2843 | itemP = SML_NEW(SmlItem_t)((SmlItem_t*) _smlMalloc(sizeof(SmlItem_t))); | |||
2844 | itemP->target=NULL__null; | |||
2845 | itemP->source=NULL__null; | |||
2846 | itemP->meta=NULL__null; | |||
2847 | itemP->data=NULL__null; | |||
2848 | // SyncML 1.1, no MoreData set | |||
2849 | itemP->flags=0; | |||
2850 | // SyncML 1.2 | |||
2851 | itemP->targetParent=NULL__null; | |||
2852 | itemP->sourceParent=NULL__null; | |||
2853 | // custom data of client | |||
2854 | itemP->aux=NULL__null; | |||
2855 | return itemP; | |||
2856 | } // newItem | |||
2857 | ||||
2858 | ||||
2859 | // create new Item with string-type data | |||
2860 | SmlItemPtr_t newStringDataItem( | |||
2861 | const char *aString | |||
2862 | ) | |||
2863 | { | |||
2864 | SmlItemPtr_t itemP=newItem(); | |||
2865 | itemP->data=newPCDataString(aString); | |||
2866 | return itemP; | |||
2867 | } // newStringDataItem | |||
2868 | ||||
2869 | ||||
2870 | // create meta-format PCData | |||
2871 | SmlPcdataPtr_t newPCDataFormat( | |||
2872 | TFmtTypes aFmtType, | |||
2873 | bool aShowDefault | |||
2874 | ) | |||
2875 | { | |||
2876 | if (aFmtType==fmt_chr && !aShowDefault) | |||
2877 | return NULL__null; // default | |||
2878 | else | |||
2879 | return newPCDataString(encodingFmtSyncMLNames[aFmtType]); // show format type | |||
2880 | } // newPCDataFormat | |||
2881 | ||||
2882 | ||||
2883 | // create new string-type PCData, if NULL or empty string is passed for aData, | |||
2884 | // NULL is returned (optional info not there) | |||
2885 | SmlPcdataPtr_t newPCDataFormatted( | |||
2886 | const uInt8 *aData, // data | |||
2887 | sInt32 aLength, // length of data, if<=0 then string length is calculated | |||
2888 | TFmtTypes aFmtType, // encoding Format | |||
2889 | bool aNeedsOpaque // set opaque needed (string that could confuse XML parsing or even binary) | |||
2890 | ) | |||
2891 | { | |||
2892 | if (!aData) return NULL__null; // no data | |||
2893 | if (aLength==0) aLength=strlen((const char *)aData); | |||
2894 | if (aLength==0) return NULL__null; // no data | |||
2895 | // encode input string if needed | |||
2896 | SmlPcdataPtr_t pcdataP; | |||
2897 | char *b64data; | |||
2898 | uInt32 b64len; | |||
2899 | switch (aFmtType) { | |||
2900 | case fmt_b64: | |||
2901 | // convert to b64 | |||
2902 | b64len=0; | |||
2903 | b64data=b64::encode(aData, aLength, &b64len); | |||
2904 | pcdataP = newPCDataString(b64data,b64len); | |||
2905 | b64::free(b64data); | |||
2906 | return pcdataP; | |||
2907 | default: | |||
2908 | // just copy into string or opaque/C_DATA string | |||
2909 | return newPCDataStringX(aData, aNeedsOpaque, aLength); | |||
2910 | } | |||
2911 | } // newPCDataEncoded | |||
2912 | ||||
2913 | ||||
2914 | // create new string-type PCData, if NULL or empty string is passed for aString, | |||
2915 | // NULL is returned (optional info not there) | |||
2916 | SmlPcdataPtr_t newPCDataOptString( | |||
2917 | const char *aString, | |||
2918 | sInt32 aLength // length of string, if<0 then length is calculated | |||
2919 | ) | |||
2920 | { | |||
2921 | if (aString && (*aString!=0)) | |||
2922 | return newPCDataString(aString,aLength); | |||
2923 | else | |||
2924 | return NULL__null; | |||
2925 | } // newPCDataOptString | |||
2926 | ||||
2927 | ||||
2928 | // create new string-type PCData, if NULL is passed for aString, | |||
2929 | // NULL is returned (optional info not there) | |||
2930 | // if empty string is passed, PCData with empty contents will be created | |||
2931 | SmlPcdataPtr_t newPCDataOptEmptyString( | |||
2932 | const char *aString, | |||
2933 | sInt32 aLength // length of string, if<0 then length is calculated | |||
2934 | ) | |||
2935 | { | |||
2936 | if (aString) | |||
2937 | return newPCDataString(aString,aLength); | |||
2938 | else | |||
2939 | return NULL__null; | |||
2940 | } // newPCDataOptEmptyString | |||
2941 | ||||
2942 | ||||
2943 | // create new string-type PCData, if NULL is passed for aString, | |||
2944 | // an empty string is created (that is, a PCData with string terminator as | |||
2945 | // content only, length=0) | |||
2946 | SmlPcdataPtr_t newPCDataString( | |||
2947 | const char *aString, | |||
2948 | sInt32 aLength // length of string, if<0 then length is calculated | |||
2949 | ) | |||
2950 | { | |||
2951 | return newPCDataStringX((const uInt8 *)aString,false,aLength); | |||
2952 | } // newPCDataString | |||
2953 | ||||
2954 | ||||
2955 | // create new PCData, aOpaque can be used to generate non-string data | |||
2956 | // Note: empty strings are always coded as non-opaque, even if aOpaque is set | |||
2957 | SmlPcdataPtr_t newPCDataStringX( | |||
2958 | const uInt8 *aString, | |||
2959 | bool aOpaque, // if set, an opaque method (OPAQUE or CDATA) is used | |||
2960 | sInt32 aLength // length of string, if<0 then length is calculated | |||
2961 | ) | |||
2962 | { | |||
2963 | SmlPcdataPtr_t pcdataP; | |||
2964 | ||||
2965 | pcdataP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t))); | |||
2966 | ||||
2967 | // determine length | |||
2968 | if (aLength>=0 && aString) | |||
2969 | pcdataP->length = aLength; // as specified, and string argument not NULL | |||
2970 | else | |||
2971 | pcdataP->length = aString ? strlen((const char *)aString) : 0; // from argument, if NULL -> length=0 | |||
2972 | // determine type | |||
2973 | if (aOpaque && aLength!=0) { | |||
2974 | // Note: due to modification in RTK, this generates | |||
2975 | // OPAQUE in WBXML and CDATA in XML | |||
2976 | pcdataP->contentType=SML_PCDATA_OPAQUE; | |||
2977 | } | |||
2978 | else { | |||
2979 | // non-critical string | |||
2980 | #ifdef SML_STRINGS_AS_OPAQUE | |||
2981 | pcdataP->contentType=SML_PCDATA_OPAQUE; | |||
2982 | #else | |||
2983 | pcdataP->contentType=SML_PCDATA_STRING; | |||
2984 | #endif | |||
2985 | } | |||
2986 | pcdataP->extension=SML_EXT_UNDEFINED; | |||
2987 | // - allocate data space (ALWAYS with room for a terminator, even if Opaque or empty string) | |||
2988 | pcdataP->content=smlLibMalloc(pcdataP->length+1); // +1 for terminator, see below | |||
2989 | // copy data (if any) | |||
2990 | if (pcdataP->length>0) { | |||
2991 | // - copy string | |||
2992 | smlLibMemcpy(pcdataP->content,aString,pcdataP->length); | |||
2993 | } | |||
2994 | // set terminator | |||
2995 | ((char *)(pcdataP->content))[pcdataP->length]=0; // terminate C string | |||
2996 | // return | |||
2997 | return pcdataP; | |||
2998 | } // newPCDataStringX | |||
2999 | ||||
3000 | ||||
3001 | // create new string-type PCData from C++ string | |||
3002 | SmlPcdataPtr_t newPCDataString( | |||
3003 | const string &aString | |||
3004 | ) | |||
3005 | { | |||
3006 | return newPCDataString(aString.c_str(),aString.length()); | |||
3007 | } // newPCDataString(string&) | |||
3008 | ||||
3009 | ||||
3010 | // create new decimal string representation of sInt32 as PCData | |||
3011 | SmlPcdataPtr_t newPCDataLong( | |||
3012 | sInt32 aLong | |||
3013 | ) | |||
3014 | { | |||
3015 | const int ssiz=20; | |||
3016 | char s[ssiz]; | |||
3017 | ||||
3018 | snprintf(s,ssiz,"%ld",(long)aLong); | |||
3019 | return newPCDataString(s); | |||
3020 | } // newPCDataLong | |||
3021 | ||||
3022 | ||||
3023 | // Nonce generator allowing last-session nonce to be correctly re-generated in next session | |||
3024 | void generateNonce(string &aNonce, const char *aDevStaticString, sInt32 aSessionStaticID) | |||
3025 | { | |||
3026 | md5::SYSYNC_MD5_CTX context; | |||
3027 | uInt8 digest[16]; | |||
3028 | md5::Init (&context); | |||
3029 | // - add in static device string | |||
3030 | md5::Update (&context, (const uInt8 *)aDevStaticString, strlen(aDevStaticString)); | |||
3031 | // - add in session static ID in binary format | |||
3032 | md5::Update (&context, (const uInt8 *)&aSessionStaticID, sizeof(sInt32)); | |||
3033 | // - done | |||
3034 | md5::Final (digest, &context); | |||
3035 | // - make string of first 48 bit of MD5: 48 bits, use 6 bits per char = 8 chars | |||
3036 | uInt64 dig48 = ((uInt32)digest[0] << 0) | | |||
3037 | ((uInt32)digest[1] << 8) | | |||
3038 | ((uInt32)digest[2] << 16) | | |||
3039 | ((uInt32)digest[3] << 24); | |||
3040 | aNonce.erase(); | |||
3041 | for (sInt16 k=0; k<8; k++) { | |||
3042 | aNonce+=((dig48 & 0x03F) + 0x21); | |||
3043 | dig48 = dig48 >> 6; | |||
3044 | } | |||
3045 | } // generateNonce | |||
3046 | ||||
3047 | ||||
3048 | // create challenge of requested type | |||
3049 | SmlChalPtr_t newChallenge(TAuthTypes aAuthType, const string &aNextNonce, bool aBinaryAllowed) | |||
3050 | { | |||
3051 | SmlChalPtr_t chalP=NULL__null; | |||
3052 | SmlMetInfMetInfPtr_t metaP; | |||
3053 | ||||
3054 | if (aAuthType!=auth_none) { | |||
3055 | // new challenge record | |||
3056 | chalP = SML_NEW(SmlChal_t)((SmlChal_t*) _smlMalloc(sizeof(SmlChal_t))); | |||
3057 | // add empty meta | |||
3058 | chalP->meta=newMeta(); | |||
3059 | metaP=(SmlMetInfMetInfPtr_t)(chalP->meta->content); | |||
3060 | // add type and format | |||
3061 | // - type | |||
3062 | metaP->type=newPCDataString(authTypeSyncMLNames[aAuthType]); | |||
3063 | // - format | |||
3064 | const char *fmt = NULL__null; | |||
3065 | switch (aAuthType) { | |||
3066 | case auth_basic: | |||
3067 | // always request b64 | |||
3068 | fmt=encodingFmtSyncMLNames[fmt_b64]; | |||
3069 | break; | |||
3070 | case auth_md5: | |||
3071 | // request b64 only for non-binary capable encoding (that is, XML) | |||
3072 | /* %%% dont do that, Nokia9210 miserably fails when we do that, | |||
3073 | * it sends its data B64 encoded, but obviously with bad | |||
3074 | * data in it. Ericsson T39m seems to do it correctly however. | |||
3075 | if (!aBinaryAllowed) | |||
3076 | fmt=encodingFmtSyncMLNames[fmt_b64]; | |||
3077 | */ | |||
3078 | // always request b64 for now, seems to be safer with not fully compatible clients | |||
3079 | fmt=encodingFmtSyncMLNames[fmt_b64]; | |||
3080 | break; | |||
3081 | default: break; | |||
3082 | } | |||
3083 | metaP->format=newPCDataOptString(fmt); // set format, but not empty | |||
3084 | // - add nonce if needed | |||
3085 | if (aAuthType==auth_md5) { | |||
3086 | // MD5 also might need nonce | |||
3087 | if (!aNextNonce.empty()) { | |||
3088 | // add base64 encoded nonce string | |||
3089 | uInt32 b64len; | |||
3090 | char *b64=b64::encode((const uInt8 *)aNextNonce.c_str(),aNextNonce.size(),&b64len); | |||
3091 | metaP->nextnonce=newPCDataString(b64,b64len); | |||
3092 | b64::free(b64); // return buffer allocated by b64_encode | |||
3093 | } | |||
3094 | } | |||
3095 | } | |||
3096 | return chalP; | |||
3097 | } // newChallenge | |||
3098 | ||||
3099 | ||||
3100 | // create new property or param descriptor for CTCap | |||
3101 | SmlDevInfCTDataPtr_t newDevInfCTData(cAppCharP aName,uInt32 aSize, bool aNoTruncate, uInt32 aMaxOccur, cAppCharP aDataType) | |||
3102 | { | |||
3103 | SmlDevInfCTDataPtr_t result = SML_NEW(SmlDevInfCTData_t)((SmlDevInfCTData_t*) _smlMalloc(sizeof(SmlDevInfCTData_t))); | |||
3104 | // fill descriptor | |||
3105 | // - name if property or param | |||
3106 | result->name=newPCDataString(aName); | |||
3107 | // - no display name so far | |||
3108 | result->dname=NULL__null; // no display name | |||
3109 | // - datatype (optional) | |||
3110 | result->datatype=newPCDataOptString(aDataType); | |||
3111 | // - max size | |||
3112 | if (aSize==0) | |||
3113 | result->maxsize=NULL__null; // no size | |||
3114 | else | |||
3115 | result->maxsize=newPCDataLong(aSize); // set size | |||
3116 | // - no valenum here, will be added later if any | |||
3117 | result->valenum=NULL__null; // no valenum | |||
3118 | // SyncML 1.2 | |||
3119 | if (aMaxOccur==0) | |||
3120 | result->maxoccur=NULL__null; // no maxoccur | |||
3121 | else | |||
3122 | result->maxoccur=newPCDataLong(aMaxOccur); // set maxoccur | |||
3123 | result->flags = aNoTruncate ? SmlDevInfNoTruncate_f0x0020 : 0; // notruncate flag or none | |||
3124 | return result; | |||
3125 | } // newDevInfCTData | |||
3126 | ||||
3127 | ||||
3128 | // frees prototype element and sets calling pointer to NULL | |||
3129 | void FreeProtoElement(void * &aVoidP) | |||
3130 | { | |||
3131 | if (aVoidP) smlFreeProtoElement(aVoidP); | |||
3132 | aVoidP=NULL__null; | |||
3133 | } // FreeProtoElement | |||
3134 | ||||
3135 | } // namespace sysync | |||
3136 | ||||
3137 | // eof |