/data/runtests/work/sources/libsynthesis/src/sysync_SDK/Sources/sysync

Bug Summary

File:	libsynthesis/src/sysync_SDK/Sources/sysync_utils.cpp
Warning:	line 1449, column 7 Assigned value is garbage or undefined

Annotated Source Code

* File: sysync_utils.cpp

* Author: Lukas Zeller (luz@plan44.ch)

* Provides some helper functions interfacing between SyncML Toolkit

* and C++

* 2001-05-16 : luz : created

#include "prefix_file.h"

#include "sync_include.h"

#include "sysync_utils.h"

#include "libmem.h"

#ifdef SYSYNC_TOOL

#include "syncappbase.h" // for CONSOLEPRINTF

#include "customimplagent.h" // for DBCharSetNames

#endif

namespace sysync {

// Support for SySync Diagnostic Tool

#ifdef SYSYNC_TOOL

// parse RFC 2822 addr spec

int parse2822AddrSpec(int argc, const char *argv[])

{

if (argc<0) {

// help requested

CONSOLEPRINTF((" addrparse <RFC2822 addr-spec string to parse>"))SySync_ConsolePrintf(stderr, "SYSYNC " " addrparse <RFC2822 addr-spec string to parse>"
"\n");

CONSOLEPRINTF((" Parse name and email address out of a RFC2822-type addr-spec"))SySync_ConsolePrintf(stderr, "SYSYNC " " Parse name and email address out of a RFC2822-type addr-spec"
"\n");

return EXIT_SUCCESS0;

}

// check for argument

if (argc<1) {

CONSOLEPRINTF(("1 argument required"))SySync_ConsolePrintf(stderr, "SYSYNC " "1 argument required" "\n"
);

return EXIT_FAILURE1;

}

// parse

string addrname,addremail;

const char* p=argv[0];

p=parseRFC2822AddrSpec(p,addrname,addremail);

// show

CONSOLEPRINTF(("Input : %s",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %s" "\n"
,argv[0]);

CONSOLEPRINTF(("Name : %s",addrname.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Name : %s" "\n"
,addrname.c_str());

CONSOLEPRINTF(("email : %s",addremail.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "email : %s" "\n"
,addremail.c_str());

CONSOLEPRINTF(("unparsed rest : %s",p))SySync_ConsolePrintf(stderr, "SYSYNC " "unparsed rest : %s" "\n"
,p);

return EXIT_SUCCESS0;

} // parse2822AddrSpec

// convert between character sets

int charConv(int argc, const char *argv[])

{

if (argc<0) {

// help requested

CONSOLEPRINTF((" charconv [<input charset>] <output charset> <C-string to convert>"))SySync_ConsolePrintf(stderr, "SYSYNC " " charconv [<input charset>] <output charset> <C-string to convert>"
"\n");

CONSOLEPRINTF((" Convert from one charset to another. Default input is UTF-8"))SySync_ConsolePrintf(stderr, "SYSYNC " " Convert from one charset to another. Default input is UTF-8"
"\n");

return EXIT_SUCCESS0;

}

#ifdef __TEST_EQUALITY_OF_CP936_WITH_GB2312__

// quick test

uInt32 ch_in;

for (ch_in=0x8100; ch_in<=0xFFFF; ch_in++) {

// convert into internal UTF-8

string s_internal,s_in;

s_in.erase();

if (ch_in>=0x8100) s_in+=(ch_in >> 8) & 0xFF;

s_in+=(ch_in & 0xFF);

s_internal.erase();

appendStringAsUTF8(

s_in.c_str(),

s_internal,

chs_gb2312

);

// convert into output format

string s_out;

s_out.erase();

appendUTF8ToString(

s_internal.c_str(),

s_out,

chs_cp936

);

// show differences

if (s_in!=s_out && s_out.size()>0 && s_out[0]!=INCONVERTIBLE_PLACEHOLDER'_') {

string s1,s2;

s1.erase(); StrToCStrAppend(s_in.c_str(), s1);

s2.erase(); StrToCStrAppend(s_out.c_str(), s2);

CONSOLEPRINTF(("\"%s\" != \"%s\"",s1.c_str(),s2.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "\"%s\" != \"%s\"" "\n"
,s1.c_str(),s2.c_str());

}

100

return EXIT_SUCCESS0;

101

#endif

102

103

// check for argument

104

if (argc<2) {

105

CONSOLEPRINTF(("2 or 3 arguments required"))SySync_ConsolePrintf(stderr, "SYSYNC " "2 or 3 arguments required"
"\n");

106

return EXIT_FAILURE1;

107

}

108

int ochsarg=1;

109

sInt16 enu;

110

// get input charset

111

TCharSets charset_in=chs_utf8;

112

if (argc==3) {

113

// first arg is input charset

114

if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[0])) {

115

CONSOLEPRINTF(("'%s' is not a valid input charset name",argv[0]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid input charset name"
"\n",argv[0]);

116

return EXIT_FAILURE1;

117

}

118

charset_in = (TCharSets)enu;

119

}

120

else {

121

ochsarg=0; // first arg ist input charset

122

}

123

// get output charset

124

TCharSets charset_out;

125

if (!StrToEnum(DBCharSetNames, numCharSets, enu, argv[ochsarg])) {

126

CONSOLEPRINTF(("'%s' is not a valid output charset name",argv[ochsarg]))SySync_ConsolePrintf(stderr, "SYSYNC " "'%s' is not a valid output charset name"
"\n",argv[ochsarg]);

127

return EXIT_FAILURE1;

128

}

129

charset_out = (TCharSets)enu;

130

// get string to convert

131

string s_in;

132

s_in.erase();

133

CStrToStrAppend(argv[ochsarg+1], s_in);

134

// convert into internal UTF-8

135

string s_internal;

136

s_internal.erase();

137

appendStringAsUTF8(

138

s_in.c_str(),

139

s_internal,

140

charset_in

141

);

142

// convert into output format

143

string s_out;

144

s_out.erase();

145

appendUTF8ToString(

146

s_internal.c_str(),

147

s_out,

148

charset_out

149

);

150

// show all three

151

string show;

152

// - input

153

show.erase(); StrToCStrAppend(s_in.c_str(), show);

154

CONSOLEPRINTF(("Input : %-20s = \"%s\"",DBCharSetNames[charset_in], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Input : %-20s = \"%s\""
"\n",DBCharSetNames[charset_in], show.c_str());

155

// - internal UTF8

156

show.erase(); StrToCStrAppend(s_internal.c_str(), show);

157

CONSOLEPRINTF(("Internal : %-20s = \"%s\"",DBCharSetNames[chs_utf8], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Internal : %-20s = \"%s\""
"\n",DBCharSetNames[chs_utf8], show.c_str());

158

// - output

159

show.erase(); StrToCStrAppend(s_out.c_str(), show);

160

CONSOLEPRINTF(("Output : %-20s = \"%s\"",DBCharSetNames[charset_out], show.c_str()))SySync_ConsolePrintf(stderr, "SYSYNC " "Output : %-20s = \"%s\""
"\n",DBCharSetNames[charset_out], show.c_str());

161

return EXIT_SUCCESS0;

162

} // charConv

163

164

#endif // SYSYNC_TOOL

165

166

167

// conversion table from ANSI 0x80..0x9F to UCS4

168

const uInt32 Ansi_80_to_9F_to_UCS4[0x20] = {

169

0x20AC, 0 ,0x201A,0x0192, 0x201E,0x2026,0x2020,0x2021, // 0x80..0x87

170

0x02C6,0x2030,0x0160,0x2039, 0x0152, 0 ,0x017D, 0 , // 0x88..0x8F

171

0 ,0x2018,0x2019,0x201C, 0x201D,0x2022,0x2013,0x2014, // 0x90..0x97

172

0x02DC,0x2122,0x0161,0x203A, 0x0153, 0 ,0x017E,0x0178 // 0x98..0x9F

173

};

174

175

// line end mode names

176

const char * const lineEndModeNames[numLineEndModes] = {

177

"none", // none specified

178

"unix", // 0x0A

179

"mac", // 0x0D

180

"dos", // 0x0D 0x0A

181

"cstr", // as in C strings, '\n' which is 0x0A normally (but might be 0x0D on some platforms)

182

"filemaker" // 0x0B (filemaker tab-separated text format, CR is shown as 0x0B within fields

183

};

184

185

186

187

// literal quoting mode names

188

const char * const quotingModeNames[numQuotingModes] = {

189

"none", // none specified

190

"singlequote", // single quote must be duplicated

191

"doublequote", // double quote must be duplicated

192

"backslash" // C-string-style escapes of CR,LF,TAB,BS,\," and ' (but no full c-string escape with \xXX etc.)

193

};

194

195

196

// Encoding format names for SyncML

197

const char * const encodingFmtSyncMLNames[numFmtTypes] = {

198

"chr", // plain chars

199

"bin", // binary

200

"b64" // base 64 encoding

201

};

202

// Encoding format names for user

203

const char * const encodingFmtNames[numFmtTypes] = {

204

"plain-text", // no encoding (plain text)

205

"binary", // plain binary (in WBXML only)

206

"base64" // base 64 encoding

207

};

208

209

210

// field (property) data type names

211

const char * const propDataTypeNames[numPropDataTypes] = {

212

"chr", // Character

213

"int", // Integer

214

"bool", // Boolean

215

"bin", // Binary

216

"datetime", // Date and time of day

217

"phonenum", // Phone number

218

"text", // plain text

219

"???" // unknown

220

};

221

222

223

// Auth type names

224

const char * const authTypeSyncMLNames[numAuthTypes] = {

225

NULL__null, // no authorisation

226

"syncml:auth-basic", // basic (B64 encoded user pw string)

227

"syncml:auth-md5" // Md5 encoded user:pw:nonce

228

};

229

230

231

// MIME encoding types

232

const char * const MIMEEncodingNames[numMIMEencodings] = {

233

"",

234

"7BIT",

235

"8BIT",

236

"BINARY",

237

"QUOTED-PRINTABLE",

238

"BASE64",

239

"B"

240

};

241

242

// Charset names for MIME based strings

243

const char * const MIMECharSetNames[numCharSets] = {

244

"unknown",

245

"US-ASCII",

246

"ANSI",

247

"ISO-8859-1",

248

"UTF-8",

249

"UTF-16",

250

#ifdef CHINESE_SUPPORT

251

"GB2312",

252

"CP936",

253

#endif

254

};

255

256

257

#ifdef SYSYNC_ENGINE1

258

// generate RFC2822-style address specificiation

259

// - Common Name will be quoted

260

// - recipient will be put in angle brackets

261

void makeRFC2822AddrSpec(

262

cAppCharP aCommonName,

263

cAppCharP aRecipient,

264

string &aRFCAddr

265

)

266

{

267

if (aCommonName && *aCommonName) {

268

aRFCAddr='"';

269

while (*aCommonName) {

270

if (*aCommonName=='"') aRFCAddr += "\\\"";

271

else aRFCAddr += *aCommonName;

272

aCommonName++;

273

}

274

aRFCAddr+="\" <";

275

aRFCAddr+=aRecipient;

276

aRFCAddr+=">";

277

}

278

else {

279

// plain email address

280

aRFCAddr=aRecipient;

281

}

282

} // makeRFC2822AddrSpec

283

284

285

286

287

// sysytool -f syncserv_odbc.xml addrparse "(Lukas Peter) luz@synthesis.ch (Zeller), gaga"

288

289

// Parse RFC2822-style address specificiation

290

// - aName will receive name and all (possible) comments

291

// - aRecipient will receive the (first, in case of a group) email address

292

cAppCharP parseRFC2822AddrSpec(

293

cAppCharP aText,

294

string &aName,

295

string &aRecipient

296

)

297

{

298

const char *p;

299

char c;

300

301

enum {

302

pstate_sepspace,

303

pstate_trailing,

304

pstate_text,

305

pstate_comment,

306

pstate_quoted,

307

pstate_email

308

} pstate = pstate_trailing;

309

string text,groupname;

310

bool textcouldbeemail=true;

311

bool atfound=false;

312

aName.erase();

313

aRecipient.erase();

314

p=aText;

315

do {

316

c=*p;

317

// check end of input

318

if (c==0) break; // done with the string

319

// advance to next char

320

p++;

321

// check according to state

322

switch (pstate) {

323

case pstate_sepspace:

324

if (c==' ') {

325

aName+=c;

326

}

327

pstate=pstate_trailing;

328

// otherwise treat like trailing

329

case pstate_trailing:

330

textcouldbeemail=aRecipient.empty();

331

atfound=false;

332

// skip trailing WSP first

333

if (c==' ' || c=='\t' || c=='\n' || c=='\r') break; // simply ignore WSP in trailing mode

334

else pstate=pstate_text;

335

// fall trough to do text analysis

336

case pstate_text:

337

// now check specials

338

if (c==',') { c=0; break; } // end of address, cause exit from loop, next will start after comma

339

else if (c==';') { c=0; break; } // end of group address list, treat it like single address

340

else if (c=='@' && textcouldbeemail) atfound=true; // flag presence of @

341

// check if text could still be a email address by itself

342

if (textcouldbeemail && !isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') {

343

textcouldbeemail=false;

344

if (atfound) {

345

aRecipient=text;

346

text.erase();

347

}

348

atfound=false;

349

}

350

// now check other specials

351

if (c=='"') { pstate=pstate_quoted; } // start of quoted string

352

else if (c=='(') { pstate=pstate_comment; } // start of comment

353

else if (c=='<') { aRecipient.erase(); pstate=pstate_email; } // start of angle-addr, overrides other recipient texts

354

else if (c==':') {

355

groupname=aRecipient; // what we've probably parsed as recipient

356

groupname+=aName; // plus name so far

357

groupname+=text; // plus additional text

358

text.erase();

359

aName.erase();

360

aRecipient.erase();

361

pstate=pstate_trailing;

362

} // flag presence of a group name (which can be used as name if addr itself does not have one)

363

else {

364

// add other text chars to the text

365

text += c;

366

}

367

break;

368

case pstate_quoted:

369

if (c=='\\') {

370

if (*p) c=*p++; else break; // get next char (if any) and add to result untested

371

}

372

else if (c=='"') {

373

// end of quoted string

374

pstate=pstate_sepspace;

375

aName+=text;

376

text.erase();

377

break;

378

}

379

// add to text

380

text += c;

381

break;

382

case pstate_comment:

383

if (c==')') {

384

// end of comment

385

aName+=text;

386

text.erase();

387

pstate=pstate_sepspace;

388

break;

389

}

390

// add to text

391

text += c;

392

break;

393

case pstate_email:

394

if (!isalnum(c) && c!='@' && c!='_' && c!='-' && c!='.') {

395

// any non-email char terminates email, not only '>', but only '>' is swallowed

396

if (c!='>') p--; // re-evaluate char in next state

397

pstate=pstate_sepspace;

398

break;

399

}

400

// add to email

401

aRecipient += c;

402

break;

403

} // switch

404

} while (c!=0);

405

// handle case of pure email address without name and without < > brackets or :

406

if (aRecipient.empty() && textcouldbeemail && atfound)

407

aRecipient = text;

408

else

409

aName += text;

410

// if name is (now) empty, but we have a group name, use the group name

411

if (aName.empty()) aName=groupname;

412

// remove trailing spaces in aName

413

string::size_type n=aName.find_last_not_of(' ');

414

if (n!=string::npos) aName.resize(n+1);

415

// return where to continue parsing for next addr-spec (if not end of string)

416

return p;

417

} // parseRFC2822AddrSpec

418

419

420

421

// append internal UTF8 string as RFC2047 style encoding

422

const char *appendUTF8AsRFC2047(

423

const char *aText,

424

string &aString

425

)

426

{

427

const char *p,*q,*r;

428

char c;

429

430

p=aText;

431

do {

432

q=p; // remember start

433

// find chars until next char that must be stored as encoded word

434

do {

435

c=*p;

436

if (c==0 || (c & 0x80) || (c=='=' && *(p+1)=='?')) break;

437

p++;

438

} while(true);

439

// copy chars outside encoded word directly

440

if (p-q>0) aString.append(q,p-q);

441

// check if end of string

442

if (c==0) break;

443

// pack some chars into encoded word

444

// - start word

445

aString.append("=?utf-8?B?"); // 10 chars start (+ 2 chars will be added at end)

446

// - encoded data must be 75-12=63 chars or less

447

// Using B (=b64) encoding, output of 63 chars = 63/4*3 = max 47 chars.

448

// We use 45 max, as this is evenly divisible by 3 and output is 60 chars

449

q=p;

450

while (true) {

451

// find next space

452

while (*q && !isspace(*q) && q-p<45) q++;

453

if (q-p>=45) break; // abort if exhausted already

454

// find next non-space

455

r=q;

456

while (isspace(*r)) r++;

457

// check if next non-space will start a new word

458

if (*r & 0x80) {

459

// we should include the next word as well, if possible without exceeding size

460

if (r-p<45) {

461

q=r;

462

continue;

463

}

464

}

465

break;

466

}

467

// encode binary stream and append to string

468

appendEncoded((const uInt8 *)p,q-p,aString,enc_b);

469

p=q;

470

// - end word

471

aString.append("?=");

472

} while (true);

473

return p;

474

} // appendUTF8AsRFC2047

475

476

477

// parse character string from RFC2047 style encoding to UTF8 internal string

478

const char *appendRFC2047AsUTF8(

479

const char *aRFC2047,

480

stringSize aSize,

481

string &aString,

482

TLineEndModes aLEM

483

)

484

{

485

const char *p,*q,*r,*w;

486

char c = 0;

487

const char *eot = aRFC2047+aSize;

488

489

p=aRFC2047;

490

w=NULL__null; // start of last detected word (to avoid re-scanning)

491

while (p<eot) {

492

q=p; // remember start

493

// find chars until next encoded word

494

while (p<eot) {

495

c=*p;

496

if (c==0 || (p!=w && c=='=' && *(p+1)=='?')) break;

497

p++;

498

}

499

// copy chars outside encoded word directly

500

aString.append(q,p-q);

501

// check if end of string

502

if (p>=eot || c==0) break;

503

// try to parse encoded word

504

q=p+2;

505

scanword:

506

// q is now where we start to parse word contents

507

// p is where we would re-start reading normally if current word turns out not to be a word at all

508

// - remember start of word scan (to avoid re-scanning it)

509

w=p;

510

// - get charset

511

r=q;

512

while (q<eot && *q!='?' && isgraph(*q)) q++;

513

if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally

514

sInt16 en;

515

TCharSets charset=chs_unknown;

516

if (StrToEnum(MIMECharSetNames, numCharSets, en, r, q-r)) charset=(TCharSets)en;

517

// - get encoding

518

r=++q; // continue after ? separator

519

while (q<eot && *q!='?' && isgraph(*q)) q++;

520

if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally

521

TEncodingTypes encoding=enc_8bit;

522

if (StrToEnum(MIMEEncodingNames, numMIMEencodings, en, r, q-r)) encoding=(TEncodingTypes)en;

523

// - get data part

524

r=++q;

525

while (q+1<eot && *q && *q!=' ' && !(*q=='?' && *(q+1)=='=')) q++;

526

if (q>=eot || *q!='?') continue; // is not an encoded word, parse normally

527

// - decode

528

string decoded;

529

appendDecoded(r,q-r,decoded,encoding);

530

// - convert to UTF-8

531

appendStringAsUTF8(

532

decoded.c_str(),

533

aString,

534

charset,

535

aLEM

536

);

537

// - skip word terminator

538

p=q+2;

539

// - check for special case of adjacent words

540

q=p;

541

while (q<eot && isspace(*q)) q++;

542

if (q+1<eot && q>p && *q=='=' && *(q+1)=='?') {

543

// adjacent encoded words, only separated by space -> ignore space

544

// p is after previous word

545

q+=2;

546

// q is after lead-in of next word

547

goto scanword;

548

}

549

// p is where we continue reading

550

}

551

return p;

552

} // appendRFC2047AsUTF8

553

554

555

// decode encoded data and append to string

556

const char *appendDecoded(

557

const char *aText,

558

size_t aSize,

559

string &aBinString,

560

TEncodingTypes aEncoding

561

)

562

{

563

char c;

564

const char *p=aText;

565

uInt32 binsz;

566

uInt8 *binP;

567

568

switch (aEncoding) {

569

case enc_quoted_printable :

570

// decode quoted-printable content

571

while ((c=*p++)) {

572

// char found

573

if (c=='=') {

574

uInt16 code;

575

char hex[2];

576

// check for soft break first

577

if (*p=='\x0D' || *p=='\x0A') {

578

// soft break, swallow

579

if (*p=='\x0D') p++;

580

if (*p=='\x0A') p++;

581

continue;

582

}

583

// decode

584

hex[0]=*p;

585

if (*p) {

586

p++;

587

hex[1]=*p;

588

if (*p) {

589

p++;

590

if (HexStrToUShort(hex,code,2)==2) {

591

c=code; // decoded char

592

}

593

else continue; // simply ignore

594

}

595

else break;

596

}

597

else break;

598

}

599

// append char

600

aBinString+=c;

601

}

602

aText=p;

603

break;

604

case enc_base64:

605

case enc_b:

606

// decode base 64

607

binsz=0;

608

binP = b64::decode(aText, aSize, &binsz);

609

aBinString.append((const char *)binP,binsz);

610

b64::free(binP);

611

aText+=aSize;

612

break;

613

case enc_7bit:

614

case enc_8bit:

615

// copy no more than size

616

if (aSize>0) aBinString.reserve(aBinString.size()+aSize);

617

while (*p && aSize>0) {

618

aBinString+=*p++;

619

aSize--;

620

}

621

aText=p;

622

break;

623

case enc_none:

624

case enc_binary:

625

// copy bytes

626

aBinString.append(aText,aSize);

627

aText+=aSize;

628

break;

629

case numMIMEencodings:

630

// invalid

631

break;

632

} // quoted printable

633

return aText;

634

} // appendDecoded

635

636

637

638

// encode binary stream and append to string

639

void appendEncoded(

640

const uInt8 *aBinary,

641

size_t aSize,

642

string &aString,

643

TEncodingTypes aEncoding,

644

sInt16 aMaxLineSize,

645

sInt32 aCurrLineSize,

646

bool aSoftBreaksAsCR,

647

bool aEncodeBinary

648

)

649

{

650

char c;

651

string::size_type linestart;

652

const uInt8 *p;

653

bool softbreak;

654

uInt32 b64len;

655

char *b64;

656

bool processed;

657

658

switch (aEncoding) {

659

case enc_binary :

660

case enc_none :

661

case enc_8bit :

662

case enc_7bit : // assume we have no 8bit chars

663

// just copy 1:1

664

aString.append((const char *)aBinary,aSize);

665

break;

666

case enc_quoted_printable:

667

// quote-printable encoding

668

// - determine start of last line in aString

669

// Note: this is because property text will be folded when lines aMaxLineSize

670

linestart=aString.size()-aCurrLineSize;

671

for (p=aBinary;p<aBinary+aSize;p++) { // '\0' will not terminate the 'for' loop

672

c=*p;

673

if (!aEncodeBinary && !c) break; // still exit at NUL when not encoding real binary data

674

processed=false; // input data in c is not yet processed

675

// make sure we do not go over the limit (if one is set)

676

// - if less than 8 chars (=0D=0A + =\r) are free, soft break the line

677

softbreak= aMaxLineSize && (aString.size()-linestart>=string::size_type(aMaxLineSize)-8);

678

if (!aEncodeBinary) {

679

if (c=='\r') continue; // ignore them

680

if (c=='\b') continue; // ignore them (optional break indicators, not relevant for QP output)

681

if (c=='\n') { // - encode line ends

682

aString.append("=0D=0A"); // special string for Line Ends (CR LF)

683

processed = true; // c is processed now

684

softbreak = true;

685

} // if

686

} // if

687

// - handle soft line break (but only if really doing line breaking)

688

// Also: avoid adding a soft break at the very end of the string

689

if (softbreak && aMaxLineSize && p+1<aBinary+aSize) {

690

if (aSoftBreaksAsCR)

691

aString.append("=\r"); // '\r' signals softbreak for finalizeproperty()

692

else

693

aString.append("=\x0D\x0A"); // break line here

694

// new line starts after softbreak

695

linestart=aString.size();

696

// make sure soft line break is not followed by unencoded space

697

// (which would look like MIME folding)

698

if (c==' ' || (processed && p[1]==' ')) {

699

aString.append("=20");

700

if (processed) p++; // if current char was already processed, we need to explicitly skip the space

701

processed=true; // char is now processed in any case

702

} // if

703

} // if

704

// now encode the char in c if not already processed by now

705

if (!processed) {

706

bool encodeIt=

707

(c=='=') // escape equal sign itself

708

|| (c=='<' && aEncodeBinary) // avoid XML mismatch problems

709

|| (uInt8)c>0x7F

710

|| (uInt8)c<0x20; // '\0' will be encoded as well

711

if (encodeIt) { // encode all non ASCII chars > 0x7F (and control chars as well)

712

aString+="=";

713

aString+=NibbleToHexDigit(c>>4);

714

aString+=NibbleToHexDigit(c);

715

}

716

else

717

aString+=c; // just copy

718

} // if

719

}

720

break;

721

case enc_base64:

722

case enc_b:

723

// use base64 encoding

724

if (aSize>0) {

725

// don't call b64 with size=0!

726

b64 = b64::encode(

727

aBinary,aSize, // what to encode

728

&b64len, // output size

729

aMaxLineSize, // max line size

730

aSoftBreaksAsCR

731

);

732

// append to output, if any

733

if (b64) {

734

aString.append(b64,b64len);

735

// release buffer

736

b64::free(b64);

737

}

738

if (aEncoding!=enc_b) {

739

// make sure it ends with a newline for "base64" (but NOT for "b" as used in RFC2047)

740

// Note: when used in vCard2.1, that newline is part of the property and show as an

741

// empty line in the vCard.

742

aString += aSoftBreaksAsCR ? "\r" : "\x0D\x0A";

743

}

744

}

745

break;

746

default:

747

// do nothing

748

break;

749

} // switch

750

} // appendEncoded

751

752

753

#ifdef CHINESE_SUPPORT

754

// the flatBinTree tables for converting to and from GB2312

755

#include "gb2312_tables_inc.cpp"

756

// the flatBinTree tables for converting to and from CP936

757

#include "cp936_tables_inc.cpp"

758

#endif

759

760

761

// add char (possibly multi-byte) as UTF8 to value and apply charset translation if needed

762

// - returns > 0 if aNumChars was not correct number of bytes needed to convert an entire character;

763

// return value is number of bytes needed to generate one output character. If return value

764

// is<>0, no char has been appended to aVal.

765

uInt16 appendCharsAsUTF8(const char *aChars, string &aVal, TCharSets aCharSet, uInt16 aNumChars)

766

{

767

uInt32 ucs4;

768

// first char

769

uInt8 c=*aChars;

770

// this is a 8-bit char

771

switch(aCharSet) {

772

case chs_utf8 :

773

// UTF8 is native charset of the application, simply add

774

aVal+=c;

775

break;

776

case chs_ansi :

777

case chs_iso_8859_1 :

778

// do poor man's conversion to UCS4

779

// - most ANSI chars are 1:1 mapped

780

ucs4 = ((uInt8)c & 0xFF);

781

// - except 0x80..0x9F, use table for these

782

if (ucs4>=0x80 && ucs4<=0x9F)

783

ucs4=Ansi_80_to_9F_to_UCS4[ucs4-0x80];

784

// - convert to UTF8

785

UCS4toUTF8(ucs4,aVal);

786

break;

787

#ifdef CHINESE_SUPPORT

788

case chs_gb2312 : // simplified Chinese GB-2312 charset

789

// all below 0x80 are passed as-is

790

if (c<0x80)

791

aVal+=c; // simply append

792

else {

793

// 16-bit GB2312 char

794

if (aNumChars!=2)

795

return 2; // we need 2 chars for a successful GB-2312

796

// we have 2 bytes, convert them

797

ucs4 = searchFlatBintree(gb2312_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_');

798

// - convert to UTF8

799

UCS4toUTF8(ucs4,aVal);

800

}

801

break;

802

case chs_cp936: // simplified chinese Windows codepage CP936

803

if (c<0x80)

804

aVal+=c; // simply append

805

else {

806

// 0x0080 (euro sign) or 2-byte CP936

807

if (c==0x80)

808

ucs4=searchFlatBintree(cp936_to_ucs2, 0x0080, INCONVERTIBLE_PLACEHOLDER'_');

809

else {

810

// 16-bit GB2312 char

811

if (aNumChars!=2)

812

return 2; // we need 2 chars for a successful CP936

813

// we have 2 bytes, convert them

814

ucs4 = searchFlatBintree(cp936_to_ucs2, (c<<8) + (uInt8)aChars[1], INCONVERTIBLE_PLACEHOLDER'_');

815

}

816

// - convert to UTF8

817

UCS4toUTF8(ucs4,aVal);

818

}

819

break;

820

#endif

821

case chs_ascii : // plain 7-bit ASCII

822

default : // unknown

823

// only 7-bit allowed

824

if (c & 0x80)

825

aVal+=INCONVERTIBLE_PLACEHOLDER'_';

826

else

827

aVal+=c;

828

break;

829

} // switch

830

return 0; // ok, converted aNumChars

831

} // appendCharsAsUTF8

832

833

834

835

836

// add string as UTF8 to value and apply charset translation if needed

837

// - if lineEndMode is not lem_none, all sorts of line ends will be converted

838

// to the specified mode.

839

void appendStringAsUTF8(const char *s, string &aVal, TCharSets aCharSet, TLineEndModes aLEM, bool aAllowFilemakerCR)

840

{

841

char c;

842

const char *start=s;

843

if (s) {

844

while ((c=*s++)!=0) {

845

if (aLEM!=lem_none) {

846

// line end handling enabled

847

if (c==0x0D) {

848

// could be mac (0x0D) or DOS (0x0D/0x0A)

849

if (*s==0x0A) {

850

// this is DOS-type line end

851

// - consume the 0x0A as well

852

s++;

853

// - check for 0x0D 0x0D 0x0A special case (caused by

854

// DOS-text-file conversion of non-DOS strings)

855

if (s>=start+3) {

856

if (*(s-3)==0x0D) {

857

// char before the DOS-CRLF was a 0x0D as well (and

858

// has already produced a newline in the output

859

// --> completely ignore this CRLF

860

continue;

861

}

862

}

863

}

864

// is a line end, convert it to platform-lineend

865

c='\n'; // platform

866

}

867

else if (c==0x0A) {

868

// 0x0A without preceeding 0x0D = unix

869

c='\n'; // platform

870

}

871

else if (c==0x0B && aAllowFilemakerCR) {

872

// 0x0B is used as lineend in filemaker export and achilformat

873

c='\n';

874

}

875

// line end converted to platform

876

if (c=='\n' && aLEM!=lem_cstr) {

877

// produce specified line end

878

switch (aLEM) {

879

case lem_mac : c=0x0D; break;

880

case lem_unix : c=0x0A; break;

881

case lem_filemaker : c=0x0B; break;

882

case lem_dos :

883

c=0x0A; // LF will be added later

884

aVal+=0x0D; // add CR

885

break;

886

default: break;

887

}

888

}

889

} // line end handling enabled

890

// normal add

891

uInt16 i,seqlen=1; // assume logical char consists of single byte

892

do {

893

seqlen=appendCharsAsUTF8(s-seqlen,aVal,aCharSet,seqlen); // add char (possibly with UTF8 expansion) to aVal

894

if (seqlen<=1) break; // done

895

for (i=1;i<seqlen;i++) { if (*s==0) break; else s++; }

896

if (i<seqlen) break; // not enough bytes

897

} while(true);

898

}

899

}

900

} // appendStringAsUTF8

901

902

903

904

// same as appendUTF8ToString, but output string is cleared first

905

bool storeUTF8ToString(

906

cAppCharP aUTF8, string &aVal,

907

TCharSets aCharSet,

908

TLineEndModes aLEM,

909

TQuotingModes aQuotingMode,

910

size_t aMaxBytes

911

)

912

{

913

aVal.erase();

914

return appendUTF8ToString(aUTF8,aVal,aCharSet,aLEM,aQuotingMode,aMaxBytes);

915

} // storeUTF8ToString

916

917

918

919

// helper for adding chars

920

static void appendCharToString(

921

char c,

922

string &aVal,

923

TQuotingModes aQuotingMode

924

) {

925

if (aQuotingMode==qm_none) {

926

aVal+=c;

927

}

928

else if (aQuotingMode==qm_backslash) {

929

// treat CR, LF, BS, TAB, single/doublequote and backslash specially

930

if (c==0x0D)

931

aVal+="\\r";

932

else if (c==0x0A)

933

aVal+="\\n";

934

else if (c==0x08)

935

aVal+="\\b";

936

else if (c==0x09)

937

aVal+="\\t";

938

else if (c=='"')

939

aVal+="\\\"";

940

else if (c=='\'')

941

aVal+="\\'";

942

else if (c=='\\')

943

aVal+="\\\\";

944

else

945

aVal+=c;

946

}

947

else if (aQuotingMode==qm_duplsingle) {

948

if (c=='\'') aVal+=c; // duplicate

949

aVal+=c; // normal append

950

}

951

else if (aQuotingMode==qm_dupldouble) {

952

if (c=='"') aVal+=c; // duplicate

953

aVal+=c; // normal append

954

}

955

} // appendCharToString

956

957

958

// add UTF8 string to value in custom charset

959

// - if aLEM is not lem_none, occurrence of any type of Linefeeds

960

// (LF,CR,CRLF and even CRCRLF) in input string will be

961

// replaced by the specified line end type

962

// - aQuotingMode specifies what quoting (for ODBC literals for example) should be used

963

// - output is clipped after aMaxBytes bytes (if not 0)

964

// - returns true if all input could be converted, false if output is clipped

965

bool appendUTF8ToString(

966

cAppCharP aUTF8,

967

string &aVal,

968

TCharSets aCharSet,

969

TLineEndModes aLEM,

970

TQuotingModes aQuotingMode,

971

size_t aMaxBytes

972

)

973

{

974

uInt32 ucs4;

975

uInt8 c;

976

size_t n=0;

977

cAppCharP p=aUTF8;

978

cAppCharP start=aUTF8;

979

980

if (!aUTF8) return true; // nothing to copy, copied everything of that!

981

if (aCharSet==chs_utf8 && aLEM==lem_none && aQuotingMode==qm_none) {

982

// shortcut: simply append entire string

983

if (aMaxBytes==0)

984

aVal+=aUTF8;

985

else

986

aVal.append(aUTF8,aMaxBytes);

987

// advance "processed" pointer behind consumed part of string

988

p=aUTF8+aVal.size();

989

}

990

else {

991

// process char by char

992

while((c=*aUTF8)!=0 && (aMaxBytes==0 || n<aMaxBytes)) {

993

p=aUTF8;

994

// check for linefeed conversion

995

if (aLEM!=lem_none && (c==0x0D || c==0x0A)) {

996

aUTF8++;

997

// line end, handling enabled

998

if (c==0x0D) {

999

// could be mac (0x0D) or DOS (0x0D/0x0A)

1000

if (*aUTF8==0x0A) {

1001

// this is DOS-type line end

1002

// - consume the 0x0A as well

1003

aUTF8++;

1004

// - check for 0x0D 0x0D 0x0A special case (caused by

1005

// DOS-text-file conversion of non-DOS strings)

1006

if (aUTF8>=start+3) {

1007

if (*(aUTF8-3)==0x0D) {

1008

// char before the DOS-CRLF was a 0x0D as well (and

1009

// has already produced a newline in the output

1010

// --> completely ignore this CRLF

1011

continue;

1012

}

1013

}

1014

}

1015

// is a line end, convert it to platform-lineend

1016

c='\n'; // platform

1017

}

1018

else { // must be 0x0A

1019

// 0x0A without preceeding 0x0D = unix

1020

c='\n'; // platform

1021

}

1022

// line end converted to platform

1023

if (aLEM!=lem_cstr) {

1024

// produce specified line end

1025

switch (aLEM) {

1026

case lem_mac : c=0x0D; break;

1027

case lem_filemaker : c=0x0B; break;

1028

case lem_unix : c=0x0A; break;

1029

case lem_dos :

1030

c=0x0A; // LF will be added later

1031

n++; // count it extra

1032

if (aMaxBytes && n>=aMaxBytes)

1033

goto stringfull; // no room to complete it, ignore it

1034

appendCharToString(0x0D,aVal,aQuotingMode);

1035

break;

1036

default: break;

1037

}

1038

}

1039

appendCharToString(c,aVal,aQuotingMode);

1040

n++; // count it

1041

} // line end, handling enabled

1042

else {

1043

// non lineend (or lineend not handled specially)

1044

if (aCharSet==chs_utf8) {

1045

aUTF8++;

1046

// - simply add char

1047

appendCharToString(c,aVal,aQuotingMode);

1048

n++;

1049

}

1050

else {

1051

// - make UCS4

1052

p=aUTF8; // save previous position to detect if we have processed all

1053

aUTF8=UTF8toUCS4(aUTF8,ucs4);

1054

// now we have UCS4

1055

if (ucs4==0) {

1056

// UTF8 resulting in UCS4 null char is not allowed

1057

ucs4=INCONVERTIBLE_PLACEHOLDER'_';

1058

}

1059

else {

1060

// convert to specified charset

1061

switch (aCharSet) {

1062

case chs_ansi:

1063

case chs_iso_8859_1:

1064

if ((ucs4<=0xFF && ucs4>=0xA0) || ucs4<0x80)

1065

// 00..7F and A0..FF directly map to ANSI

1066

appendCharToString(ucs4,aVal,aQuotingMode);

1067

else {

1068

// search for matching ANSI in table

1069

uInt8 k;

1070

for (k=0; k<0x20; k++) {

1071

if (ucs4==Ansi_80_to_9F_to_UCS4[k]) {

1072

// found in table

1073

break;

1074

}

1075

}

1076

if (k<0x20)

1077

// conversion found

1078

aVal+=k+0x80;

1079

else

1080

// no conversion found in table

1081

aVal+=INCONVERTIBLE_PLACEHOLDER'_';

1082

} // not in 1:1 range 0..7F, A0..FF

1083

n++;

1084

break;

1085

#ifdef CHINESE_SUPPORT

1086

case chs_gb2312 : // simplified Chinese GB-2312 charset

1087

// all below 0x80 are passed as-is

1088

if (ucs4<0x80) {

1089

appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes

1090

n++;

1091

}

1092

else {

1093

// convert to 16-bit GB2312 char

1094

uInt16 gb = searchFlatBintree(ucs2_to_gb2312, ucs4, INCONVERTIBLE_PLACEHOLDER'_');

1095

// check if we have space

1096

if (aMaxBytes!=0 && n+2>aMaxBytes)

1097

goto stringfull;

1098

// append as two bytes to output string

1099

aVal+=gb >> 8;

1100

aVal+=gb & 0xFF;

1101

n+=2;

1102

}

1103

break;

1104

case chs_cp936 : // simplified Chinese CP936 windows codepage

1105

// all below 0x80 are passed as-is

1106

if (ucs4<0x80) {

1107

appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes

1108

n++;

1109

}

1110

else {

1111

// convert to CP936 16-bit representation

1112

uInt16 twobytes = searchFlatBintree(ucs2_to_cp936, ucs4, INCONVERTIBLE_PLACEHOLDER'_');

1113

// append as two bytes to output string, but only this is a CP936 two-byte at all

1114

if (twobytes>0x0080) {

1115

// check if we have space

1116

if (aMaxBytes!=0 && n+2>aMaxBytes)

1117

goto stringfull;

1118

aVal+=twobytes >> 8; // sub-page lead in

1119

n++;

1120

}

1121

aVal+=twobytes & 0xFF; // sub-page code

1122

n++;

1123

}

1124

break;

1125

#endif

1126

case chs_ascii:

1127

// explicit ASCII: convert some special chars to plain ASCII

1128

if ((ucs4 & 0xFFFFFF80) !=0) {

1129

// ASCIIfy table to convert umlauts etc. to nearest plain ASCII

1130

typedef struct {

1131

uInt32 ucs4;

1132

uInt8 ascii;

1133

} TASCIIfyEntry;

1134

1135

static const TASCIIfyEntry ASCIIfyTable[] = {

1136

{ 0x000000C4, 'A' }, // Adieresis

1137

{ 0x000000C5, 'A' }, // Aring

1138

{ 0x000000C7, 'C' }, // Ccedilla

1139

{ 0x000000C9, 'E' }, // Eacute

1140

{ 0x000000D1, 'N' }, // Ntilde

1141

{ 0x000000D6, 'O' }, // Odieresis

1142

{ 0x000000DC, 'U' }, // Udieresis

1143

{ 0x000000E1, 'a' }, // aacute

1144

{ 0x000000E0, 'a' }, // agrave

1145

{ 0x000000E2, 'a' }, // acircumflex

1146

{ 0x000000E4, 'a' }, // adieresis

1147

{ 0x000000E3, 'a' }, // atilde

1148

{ 0x000000E5, 'a' }, // aring

1149

{ 0x000000E7, 'c' }, // ccedilla

1150

{ 0x000000E9, 'e' }, // eacute

1151

{ 0x000000E8, 'e' }, // egrave

1152

{ 0x000000EA, 'e' }, // ecircumflex

1153

{ 0x000000EB, 'e' }, // edieresis

1154

{ 0x000000ED, 'i' }, // iacute

1155

{ 0x000000EC, 'i' }, // igrave

1156

{ 0x000000EE, 'i' }, // icircumflex

1157

{ 0x000000EF, 'i' }, // idieresis

1158

{ 0x000000F1, 'n' }, // ntilde

1159

{ 0x000000F3, 'o' }, // oacute

1160

{ 0x000000F2, 'o' }, // ograve

1161

{ 0x000000F4, 'o' }, // ocircumflex

1162

{ 0x000000F6, 'o' }, // odieresis

1163

{ 0x000000F5, 'o' }, // otilde

1164

{ 0x000000FA, 'u' }, // uacute

1165

{ 0x000000F9, 'u' }, // ugrave

1166

{ 0x000000FB, 'u' }, // ucircumflex

1167

{ 0x000000FC, 'u' }, // udieresis

1168

{ 0x000000DF, 's' }, // germandoubles

1169

{ 0x000000D8, 'O' }, // Oslash

1170

{ 0x000000F8, 'o' }, // oslash

1171

{ 0x000000C0, 'A' }, // Agrave

1172

{ 0x000000C3, 'A' }, // Atilde

1173

{ 0x000000D5, 'O' }, // Otilde

1174

{ 0x00000152, 'O' }, // OE

1175

{ 0x00000153, 'o' }, // oe

1176

{ 0x000000C6, 'A' }, // AE

1177

{ 0x000000E6, 'a' }, // ae

1178

{ 0x000000C2, 'A' }, // Acircumflex

1179

{ 0x000000CA, 'E' }, // Ecircumflex

1180

{ 0x000000C1, 'A' }, // Aacute

1181

{ 0x000000CB, 'E' }, // Edieresis

1182

{ 0x000000C8, 'E' }, // Egrave

1183

{ 0x000000CD, 'I' }, // Iacute

1184

{ 0x000000CC, 'I' }, // Igrave

1185

{ 0x000000CE, 'i' }, // Icircumflex

1186

{ 0x000000CF, 'i' }, // Odieresis

1187

{ 0x000000D3, 'O' }, // Oacute

1188

{ 0x000000D2, 'O' }, // Ograve

1189

{ 0x000000D4, 'O' }, // Ocircumflex

1190

// terminator

1191

{ 0,0 }

1192

};

1193

1194

// search in ASCIIfy table

1195

uInt16 k=0;

1196

while (ASCIIfyTable[k].ucs4!=0) {

1197

if (ucs4==ASCIIfyTable[k].ucs4) {

1198

// found, fetch ASCII-equivalent

1199

ucs4=ASCIIfyTable[k].ascii;

1200

break; // use it

1201

}

1202

k++;

1203

}

1204

}

1205

// fall through to default, which does not know ANY non-ASCII

1206

default:

1207

// only 7 bit ASCII is allowed

1208

if ((ucs4 & 0xFFFFFF80) !=0)

1209

aVal+=INCONVERTIBLE_PLACEHOLDER'_';

1210

else

1211

appendCharToString(ucs4,aVal,aQuotingMode); // simply append ASCII codes

1212

n++;

1213

break;

1214

} // switch

1215

} // valid UCS4

1216

} // not already UTF8

1217

} // if not lineend

1218

// processed until here

1219

p=aUTF8;

1220

} // while not end of input string

1221

} // not already UTF8

1222

// return true if input string completely consumed

1223

stringfull:

1224

return (*p==0);

1225

} // appendUTF8ToString

1226

1227

1228

// convert UTF8 to UCS4

1229

// - returns pointer to next char

1230

// - returns UCS4=0 on error (no char, bad sequence, sequence not complete)

1231

const char *UTF8toUCS4(const char *aUTF8, uInt32 &aUCS4)

1232

{

1233

uInt8 c;

1234

sInt16 morechars;

1235

1236

if ((c=*aUTF8)!=0) {

1237

aUTF8++;

1238

// there is a char

1239

morechars=0;

1240

// decode UTF8 lead-in

1241

if ((c & 0x80) == 0) {

1242

// single byte

1243

aUCS4=c;

1244

morechars=0;

1245

}

1246

else if ((c & 0xE0) == 0xC0) {

1247

// two bytes

1248

aUCS4=c & 0x1F;

1249

morechars=1;

1250

}

1251

else if ((c & 0xF0) == 0xE0) {

1252

aUCS4=c & 0x0F;

1253

morechars=2;

1254

}

1255

else if ((c & 0xF8) == 0xF0) {

1256

aUCS4=c & 0x07;

1257

morechars=3;

1258

}

1259

else if ((c & 0xFC) == 0xF8) {

1260

aUCS4=c & 0x03;

1261

morechars=4;

1262

}

1263

else if ((c & 0xFE) == 0xFC) {

1264

aUCS4=c & 0x01;

1265

morechars=5;

1266

}

1267

else {

1268

// bad char

1269

aUCS4=0;

1270

}

1271

// process additional chars

1272

while(morechars--) {

1273

if ((c=*aUTF8)==0) {

1274

// unfinished sequence

1275

aUCS4=0;

1276

break;

1277

}

1278

aUTF8++;

1279

if ((c & 0xC0) != 0x80) {

1280

// bad additional char

1281

aUCS4=0;

1282

break;

1283

}

1284

// each additional char adds 6 new bits

1285

aUCS4 = aUCS4 << 6; // shift existing bits

1286

aUCS4 |= (c & 0x3F); // add new bits

1287

}

1288

}

1289

else {

1290

// no char

1291

aUCS4=0;

1292

}

1293

// return pointer to next char

1294

return aUTF8;

1295

} // UTF8toUCS4

1296

1297

1298

// convert UCS4 to UTF8 (0 char is not allowed and will be ignored!)

1299

void UCS4toUTF8(uInt32 aUCS4, string &aUTF8)

1300

{

1301

uInt8 c;

1302

1303

// ignore null char

1304

if (aUCS4==0) return;

1305

// create UTF8 lead-in

1306

sInt16 morechars=0;

1307

if (aUCS4<0x00000080) {

1308

// one byte

1309

c=aUCS4;

1310

}

1311

else if (aUCS4<0x00000800) {

1312

// two bytes

1313

c=0xC0 | ((aUCS4 >> 6) & 0x1F);

1314

morechars=1;

1315

}

1316

else if (aUCS4<0x00010000) {

1317

// three bytes

1318

c=0xE0 | ((aUCS4 >> 12) & 0x0F);

1319

morechars=2;

1320

}

1321

else if (aUCS4<0x00200000) {

1322

// four bytes

1323

c=0xF0 | ((aUCS4 >> 18) & 0x07);

1324

morechars=3;

1325

}

1326

else if (aUCS4<0x04000000) {

1327

// five bytes

1328

c=0xF8 | ((aUCS4 >> 24) & 0x03);

1329

morechars=4;

1330

}

1331

else {

1332

// six bytes

1333

c=0xFC | ((aUCS4 >> 30) & 0x01);

1334

morechars=5;

1335

}

1336

// add lead-in

1337

aUTF8+=c;

1338

// add rest of sequence

1339

while (morechars--) {

1340

c= 0x80 | ((aUCS4 >> (morechars * 6)) & 0x3F);

1341

aUTF8+=c;

1342

}

1343

} // UCS4toUTF8

1344

1345

1346

/* Encoding UTF-16 (excerpt from RFC 2781, paragraph 2.1)

1347

1348

Encoding of a single character from an ISO 10646 character value to

1349

UTF-16 proceeds as follows. Let U be the character number, no greater

1350

than 0x10FFFF.

1351

1352

1) If U < 0x10000, encode U as a 16-bit unsigned integer and

1353

terminate.

1354

1355

2) Let U' = U - 0x10000. Because U is less than or equal to 0x10FFFF,

1356

U' must be less than or equal to 0xFFFFF. That is, U' can be

1357

represented in 20 bits.

1358

1359

3) Initialize two 16-bit unsigned integers, W1 and W2, to 0xD800 and

1360

0xDC00, respectively. These integers each have 10 bits free to

1361

encode the character value, for a total of 20 bits.

1362

1363

4) Assign the 10 high-order bits of the 20-bit U' to the 10 low-order

1364

bits of W1 and the 10 low-order bits of U' to the 10 low-order

1365

bits of W2. Terminate.

1366

1367

Graphically, steps 2 through 4 look like:

1368

U' = yyyyyyyyyyxxxxxxxxxx

1369

W1 = 110110yyyyyyyyyy

1370

W2 = 110111xxxxxxxxxx

1371

1372

1373

// convert UCS4 to UTF-16

1374

// - returns 0 for UNICODE range UCS4 and first word of UTF-16 for non UNICODE

1375

uInt16 UCS4toUTF16(uInt32 aUCS4, uInt16 &aUTF16)

1376

{

1377

if (aUCS4<0x10000) {

1378

// in unicode range: single UNICODE char

1379

aUTF16=aUCS4;

1380

return 0; // no second char

1381

}

1382

else {

1383

// out of UNICODE range

1384

aUCS4-=0x10000;

1385

if (aUCS4>0xFFFF) {

1386

// inconvertible

1387

aUTF16=INCONVERTIBLE_PLACEHOLDER'_';

1388

return 0;

1389

}

1390

else {

1391

// convert to two-word UNICODE / UCS-2

1392

aUTF16=0xD800+(aUCS4>>10);

1393

return 0xDC00+(aUCS4 & 0x03FF);

1394

}

1395

}

1396

} // UCS4toUTF16

1397

1398

1399

1400

/* Decoding UTF-16

1401

1402

Decoding of a single character from UTF-16 to an ISO 10646 character

1403

value proceeds as follows. Let W1 be the next 16-bit integer in the

1404

sequence of integers representing the text. Let W2 be the (eventual)

1405

next integer following W1.

1406

1407

1) If W1 < 0xD800 or W1 > 0xDFFF, the character value U is the value

1408

of W1. Terminate.

1409

1410

2) Determine if W1 is between 0xD800 and 0xDBFF. If not, the sequence

1411

is in error and no valid character can be obtained using W1.

1412

Terminate.

1413

1414

3) If there is no W2 (that is, the sequence ends with W1), or if W2

1415

is not between 0xDC00 and 0xDFFF, the sequence is in error.

1416

Terminate.

1417

1418

4) Construct a 20-bit unsigned integer U', taking the 10 low-order

1419

bits of W1 as its 10 high-order bits and the 10 low-order bits of

1420

W2 as its 10 low-order bits.

1421

1422

5) Add 0x10000 to U' to obtain the character value U. Terminate.

1423

1424

Note that steps 2 and 3 indicate errors. Error recovery is not

1425

specified by this document. When terminating with an error in steps 2

1426

and 3, it may be wise to set U to the value of W1 to help the caller

1427

diagnose the error and not lose information. Also note that a string

1428

decoding algorithm, as opposed to the single-character decoding

1429

described above, need not terminate upon detection of an error, if

1430

proper error reporting and/or recovery is provided.

1431

1432

1433

1434

// convert UTF-16 to UCS4

1435

// - returns pointer to next char

1436

// - returns UCS4=0 on error (no char, bad sequence, sequence not complete)

1437

const uInt16 *UTF16toUCS4(const uInt16 *aUTF16P, uInt32 &aUCS4)

1438

{

1439

uInt16 utf16=*aUTF16P++;

1440

1441

if (utf16<0xD800 || utf16>0xDFFF) {

←

Assuming 'utf16' is >= 55296

→

←

Assuming 'utf16' is <= 57343

→

←

Taking false branch

→

1442

// single char unicode

1443

aUCS4=utf16;

1444

}

1445

else {

1446

// could be two-char

1447

if (utf16<=0xDBFF) {

←

Assuming 'utf16' is <= 56319

→

←

Taking true branch

→

1448

// valid first char: check second char

1449

uInt16 utf16_2 = *aUTF16P; // next

←

Assigned value is garbage or undefined

1450

if (utf16_2 && utf16_2>=0xDC00 && utf16_2<=0xDFFF) {

1451

// second char exists and is valid

1452

aUTF16P++; // advance now

1453

aUCS4 =

1454

((utf16 & 0x3FF) << 10) +

1455

(utf16_2 & 0x3FF);

1456

}

1457

else

1458

aUCS4=0; // no char

1459

}

1460

else {

1461

aUCS4=0; // no char

1462

}

1463

}

1464

// return advanced pointer

1465

return aUTF16P;

1466

} // UCS4toUTF16

1467

1468

1469

1470

1471

1472

1473

// add UTF8 string as UTF-16 byte stream to 8-bit string

1474

// - if aLEM is not lem_none, occurrence of any type of Linefeeds

1475

// (LF,CR,CRLF and even CRCRLF) in input string will be

1476

// replaced by the specified line end type

1477

// - output is clipped after ByteString reaches aMaxBytes size (if not 0), = approx half as many Unicode chars

1478

// - returns true if all input could be converted, false if output is clipped

1479

bool appendUTF8ToUTF16ByteString(

1480

cAppCharP aUTF8,

1481

string &aUTF16ByteString,

1482

bool aBigEndian,

1483

TLineEndModes aLEM,

1484

uInt32 aMaxBytes

1485

)

1486

{

1487

uInt32 ucs4;

1488

uInt16 utf16=0,utf16_1;

1489

cAppCharP p;

1490

1491

while (aUTF8 && *aUTF8) {

1492

// convert next UTF8 char to UCS4

1493

p=UTF8toUCS4(aUTF8, ucs4);

1494

if (ucs4==0) break; // error in UTF8 encoding, exit

1495

// convert line ends

1496

if (ucs4 == '\n' && aLEM!=lem_none && aLEM!=lem_cstr) {

1497

// produce specified line end

1498

utf16_1=0;

1499

switch (aLEM) {

1500

case lem_mac : utf16=0x0D; break;

1501

case lem_filemaker : utf16=0x0B; break;

1502

case lem_unix : utf16=0x0A; break;

1503

case lem_dos :

1504

utf16_1=0x0D; // CR..

1505

utf16=0x0A; // ..then LF

1506

break;

1507

default: break;

1508

}

1509

}

1510

else {

1511

// ordinary char, use UTF16 encoding

1512

utf16_1 = UCS4toUTF16(ucs4,utf16);

1513

}

1514

// check if appending UTF16 would exceed max size specified

1515

if (aMaxBytes!=0 && aUTF16ByteString.size() + (utf16_1 ? 4 : 2) > aMaxBytes)

1516

break;

1517

// we can append, advance input pointer

1518

aUTF8 = p;

1519

// now append

1520

if (aBigEndian) {

1521

// Big end first, Motorola order

1522

if (utf16_1) {

1523

aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF);

1524

aUTF16ByteString += (char)(utf16_1 & 0xFF);

1525

}

1526

aUTF16ByteString += (char)((utf16 >> 8) & 0xFF);

1527

aUTF16ByteString += (char)(utf16 & 0xFF);

1528

}

1529

else {

1530

// Little end first, Intel order

1531

if (utf16_1) {

1532

aUTF16ByteString += (char)((utf16_1 >> 8) & 0xFF);

1533

aUTF16ByteString += (char)(utf16_1 & 0xFF);

1534

}

1535

aUTF16ByteString += (char)(utf16 & 0xFF);

1536

aUTF16ByteString += (char)((utf16 >> 8) & 0xFF);

1537

}

1538

} // while

1539

// true if all input consumed

1540

return (aUTF8==NULL__null) || (*aUTF8==0);

1541

} // appendUTF8ToUTF16ByteString

1542

1543

1544

// add UTF16 byte string as UTF8 to value

1545

void appendUTF16AsUTF8(

1546

const uInt16 *aUTF16,

1547

uInt32 aNumUTF16Chars,

1548

bool aBigEndian,

1549

string &aVal,

1550

bool aConvertLineEnds,

1551

bool aAllowFilemakerCR

1552

)

1553

{

1554

uInt32 ucs4;

1555

uInt16 utf16pair[2];

1556

cAppCharP inP = (cAppCharP)aUTF16;

1557

bool lastWasCR=false;

1558

1559

while (inP && !(*inP==0 && *(inP+1)==0) && aNumUTF16Chars>0) {

Assuming 'inP' is non-null

→

←

Assuming the condition is false

→

←

Assuming 'aNumUTF16Chars' is > 0

→

←

Loop condition is true. Entering loop body

→

1560

// get two words (in case of surrogate pair)

1561

if (aBigEndian) {

←

Assuming 'aBigEndian' is 0

→

←

Taking false branch

→

1562

// Motorola order

1563

utf16pair[0]=((*(inP) & 0xFF)<<8) + (*(inP+1) & 0xFF);

1564

if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+2) & 0xFF)<<8) + (*(inP+3) & 0xFF);

1565

}

1566

else {

1567

// Intel order

1568

utf16pair[0]=((*(inP+1) & 0xFF)<<8) + (*(inP) & 0xFF);

1569

if (aNumUTF16Chars>1) utf16pair[1]=((*(inP+3) & 0xFF)<<8) + (*(inP+2) & 0xFF);

←

Assuming 'aNumUTF16Chars' is <= 1

→

←

Taking false branch

→

1570

}

1571

cAppCharP hP = (cAppCharP)UTF16toUCS4(utf16pair, ucs4);

←

Calling 'UTF16toUCS4'

→

1572

1573

PDEBUGPRINTFX(DBG_PARSE+DBG_EXOTIC,(

1574

"Parsed %ld bytes: *(inP)=0x%02hX, *(inP+1)=0x%02hX, *(inP+2)=0x%02hX, *(inP+3)=0x%02hX, utf16pair[0]=0x%04hX, utf16pair[1]=0x%04hX, ucs4=0x%04lX",

1575

(uInt32)(hP-(cAppCharP)utf16pair),

1576

(uInt16)*(inP), (uInt16)*(inP+1), (uInt16)*(inP+2), (uInt16)*(inP+3),

1577

(uInt16)utf16pair[0], (uInt16)utf16pair[1],

1578

(uInt32)ucs4

1579

));

1580

1581

uInt32 bytes=hP-(cAppCharP)utf16pair;

1582

inP+=bytes; // next UTF16 to check

1583

aNumUTF16Chars-=bytes/2; // count down UTF16 chars

1584

// convert line ends if selected

1585

if (aConvertLineEnds) {

1586

if (ucs4 == 0x0D) {

1587

lastWasCR=true;

1588

continue;

1589

}

1590

else {

1591

if (ucs4 == 0x0A || (aAllowFilemakerCR && ucs4 == 0x0B))

1592

ucs4 = '\n'; // convert to LineEnd

1593

else if (lastWasCR)

1594

aVal += '\n'; // insert a LineEnd

1595

lastWasCR=false;

1596

}

1597

}

1598

// append to UTF-8 string

1599

UCS4toUTF8(ucs4, aVal);

1600

}

1601

if (lastWasCR)

1602

aVal += '\n'; // input string ended on CR, must be shown in output

1603

} // appendUTF16AsUTF8

1604

1605

1606

1607

1608

1609

1610

#ifdef BINTREE_GENERATOR

1611

1612

// add a key/value pair to the binary tree

1613

void addToBinTree(TBinTreeNode *&aBinTree, treeval_t aMinKey, treeval_t aMaxKey, treeval_t aKey, treeval_t aValue)

1614

{

1615

// start at root

1616

TBinTreeNode **nextPP = &aBinTree;

1617

treeval_t cmpval;

1618

do {

1619

// create the new decision value from max and min

1620

cmpval = aMinKey+((aMaxKey-aMinKey) >> 1);

1621

// create the node if not already there

1622

if (*nextPP==NULL__null) {

1623

*nextPP = new TBinTreeNode;

1624

(*nextPP)->key = cmpval;

1625

(*nextPP)->nextHigher=NULL__null;

1626

(*nextPP)->nextLowerOrEqual=NULL__null;

1627

(*nextPP)->value=0;

1628

}

1629

// check if the node CREATED is a leaf node

1630

// this is the case if max==min

1631

if (aMaxKey==aMinKey) {

1632

// save leaf value (possibly overwriting existing leaf value for same code)

1633

(*nextPP)->value=aValue;

1634

break;

1635

}

1636

// decide which way to go

1637

if (aKey>cmpval) {

1638

// go to the "higher" side

1639

nextPP = &((*nextPP)->nextHigher);

1640

// determine new minimum

1641

aMinKey = cmpval+1; // minimum must be higher than cmpval

1642

}

1643

else {

1644

// go to the "lower or equal" side

1645

nextPP = &((*nextPP)->nextLowerOrEqual);

1646

// determine new maximum

1647

aMaxKey = cmpval; // maximum must be lower or equal than cmpval

1648

}

1649

} while(true);

1650

} // addToBinTree

1651

1652

1653

// dispose a bintree

1654

void disposeBinTree(TBinTreeNode *&aBinTree)

1655

{

1656

if (!aBinTree) return;

1657

if (aBinTree->nextHigher)

1658

disposeBinTree(aBinTree->nextHigher);

1659

if (aBinTree->nextLowerOrEqual)

1660

disposeBinTree(aBinTree->nextLowerOrEqual);

1661

delete aBinTree;

1662

aBinTree=NULL__null;

1663

} // disposeBinTree

1664

1665

1666

// convert key to value using a flat bintree

1667

treeval_t searchBintree(TBinTreeNode *aBinTree, treeval_t aKey, treeval_t aUndefValue, treeval_t aMinKey, treeval_t aMaxKey)

1668

{

1669

treeval_t cmpval;

1670

while(aBinTree) {

1671

// create the new decision value from max and min

1672

cmpval = aMinKey+((aMaxKey-aMinKey) >> 1);

1673

// must match stored cmpval

1674

if (cmpval!=aBinTree->key)

1675

return aUndefValue;

1676

// check if next node must be leaf if the tree contains our key,

1677

// this is the case if max==min

1678

if (aMaxKey==aMinKey) {

1679

if (aBinTree->nextHigher!=NULL__null || aBinTree->nextLowerOrEqual!=NULL__null) {

1680

// no leaf value here, should not be the case ever (we should have

1681

// encountered a node with no left or right link before this!)

1682

return aUndefValue;

1683

}

1684

else {

1685

// found a leaf value here

1686

return aBinTree->value;

1687

}

1688

}

1689

// decide which way to go

1690

if (aKey>cmpval) {

1691

// go to the "higher" side = just next element in array, except if we have the special marker here

1692

if (aBinTree->nextHigher == NULL__null)

1693

return aUndefValue; // we should go higher-side, but can't -> unknown key

1694

aBinTree=aBinTree->nextHigher;

1695

// determine new minimum

1696

aMinKey = cmpval+1; // minimum must be higher than cmpval

1697

}

1698

else {

1699

// go to the "lower" side = element at index indicated by current element, except if we have the special marker here

1700

if (aBinTree->nextLowerOrEqual == NULL__null)

1701

return aUndefValue; // we should go lower-or-equal-side, but can't -> unknown key

1702

aBinTree=aBinTree->nextLowerOrEqual;

1703

// determine new maximum

1704

aMaxKey = cmpval; // maximum must be lower or equal than cmpval

1705

}

1706

}

1707

// if we reach the end of the array, key is not in the tree

1708

return aUndefValue;

1709

} // searchBintree

1710

1711

1712

1713

1714

// make a flat form representation of the bintree in a one-dimensional array

1715

// - higher-side links are implicit (nodes following each other),

1716

// lower-or-equal-side links are explicit

1717

static bool flatBinTreeRecursion(

1718

TBinTreeNode *aBinTree, size_t &aIndex, treeval_t *aFlatArray, size_t aArrSize, treeval_t aLinksStart, treeval_t aLinksEnd

1719

)

1720

{

1721

// check if array is full

1722

if (aIndex>=aArrSize)

1723

return false;

1724

// examine node to flatten

1725

if (aBinTree->nextHigher==NULL__null && aBinTree->nextLowerOrEqual==NULL__null) {

1726

// this is a leaf node, containing only the value

1727

if (aBinTree->value>=aLinksStart && aBinTree->value<=aLinksEnd)

1728

return false; // link space and value space overlap

1729

aFlatArray[aIndex]=aBinTree->value;

1730

aIndex++;

1731

}

1732

else if (aBinTree->nextHigher==NULL__null) {

1733

// lower-side-only node: set special mark to specify that lower-or-equal side

1734

// implicitly follows (instead of higher-side)

1735

aFlatArray[aIndex]=aLinksStart + 1; // no node points to the immediately following node explicitly, so 1 can be used as special marker

1736

aIndex++;

1737

// - recurse to generate it

1738

if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))

1739

return false;

1740

}

1741

else {

1742

// this is a branch

1743

// - lower-or-equal side is represented as an index in the array

1744

aFlatArray[aIndex]=aLinksStart + 0; // default to not-existing (no node points to itself, so 0 can be used as NIL index value)

1745

// - higher side branch follows immediately

1746

size_t linkindex = aIndex++;

1747

// - recurse to generate it

1748

if (!flatBinTreeRecursion(aBinTree->nextHigher,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))

1749

return false;

1750

// - now we have the index where we must insert the lower-or-equal side

1751

if (aBinTree->nextLowerOrEqual!=NULL__null) {

1752

// there is a lower-or-equal side

1753

// - place relative link from original node

1754

uInt32 rellink=aIndex-linkindex;

1755

if ((uInt32)aLinksStart+rellink>(uInt32)aLinksEnd-1L) {

1756

// we need a long link

1757

// - move generated higher side branch one up

1758

for (size_t k=aIndex-1; k>linkindex; k--) aFlatArray[k+1]=aFlatArray[k];

1759

aIndex++; // we've eaten up one extra entry now

1760

// - now set long link

1761

aFlatArray[linkindex]=aLinksEnd-1; // long link marker

1762

if (rellink>0xFFFF)

1763

return false; // cannot jump more than 64k

1764

aFlatArray[linkindex+1]=rellink; // long link

1765

}

1766

else {

1767

// short link is ok

1768

aFlatArray[linkindex]=aLinksStart+rellink;

1769

}

1770

// - now create the lower-or-equal side

1771

if (!flatBinTreeRecursion(aBinTree->nextLowerOrEqual,aIndex,aFlatArray,aArrSize,aLinksStart,aLinksEnd))

1772

return false;

1773

}

1774

}

1775

return true;

1776

} // flatBinTreeRecursion

1777

1778

1779

// make a flat form representation of the bintree in a one-dimensional array

1780

// - higher-side links are implicit (nodes following each other),

1781

// lower-or-equal-side links are explicit

1782

bool flatBinTree(

1783

TBinTreeNode *aBinTree, TConvFlatTree &aFlatTree, size_t aArrSize,

1784

treeval_t aMinKey, treeval_t aMaxKey, treeval_t aLinksStart, treeval_t aLinksEnd

1785

)

1786

{

1787

// save tree params

1788

aFlatTree.numelems=0;

1789

aFlatTree.minkey=aMinKey;

1790

aFlatTree.maxkey=aMaxKey;

1791

aFlatTree.linksstart=aLinksStart;

1792

aFlatTree.linksend=aLinksEnd;

1793

// now create actual tree

1794

size_t index=0;

1795

if (!flatBinTreeRecursion(aBinTree,index,aFlatTree.elements,aArrSize,aLinksStart,aLinksEnd))

1796

return false;

1797

aFlatTree.numelems=index; // actual length of array

1798

return true;

1799

} // flatBinTree

1800

1801

1802

1803

1804

#endif

1805

1806

1807

// convert key to value using a flat bintree

1808

treeval_t searchFlatBintree(const TConvFlatTree &aFlatTree, treeval_t aKey, treeval_t aUndefValue)

1809

{

1810

treeval_t cmpval,thisnode;

1811

size_t index=0;

1812

// get start min and max

1813

treeval_t minKey = aFlatTree.minkey;

1814

treeval_t maxKey = aFlatTree.maxkey;

1815

// reject out-of-bounds keys immediately

1816

if (aKey<minKey || aKey>maxKey)

1817

return aUndefValue;

1818

do {

1819

// create the new decision value from max and min

1820

cmpval = minKey+((maxKey-minKey) >> 1);

1821

thisnode = aFlatTree.elements[index];

1822

// check if next node must be leaf if the tree contains our key,

1823

// this is the case if max==min

1824

if (maxKey==minKey) {

1825

#ifdef BINTREE_GENERATOR

1826

if (thisnode>=aFlatTree.linksstart && thisnode<=aFlatTree.linksend) {

1827

// no leaf value here, should not be the case ever (we should have

1828

// encountered a node with no left or right link before this!)

1829

return aUndefValue;

1830

}

1831

else

1832

#endif

1833

{

1834

// found a leaf value here

1835

return (treeval_t) thisnode;

1836

}

1837

}

1838

// decide which way to go

1839

if (aKey>cmpval) {

1840

// go to the "higher" side = just next element in array, except if we have the special marker here

1841

if (thisnode == aFlatTree.linksstart+1)

1842

return aUndefValue; // we should go higher-side, but can't -> unknown key

1843

// next node is next index (or one more in case this is a long link)

1844

if (thisnode == aFlatTree.linksend-1)

1845

index++;

1846

index++;

1847

// determine new minimum

1848

minKey = cmpval+1; // minimum must be higher than cmpval

1849

}

1850

else {

1851

// go to the "lower" side = element at index indicated by current element, except if we have the special marker here

1852

if (thisnode == aFlatTree.linksstart+1)

1853

index++; // special case, "lower" side is immediately following because there is no "higher" side

1854

else {

1855

#ifdef BINTREE_GENERATOR

1856

// if node contains a leaf value instead of a link, something is wrong

1857

if (thisnode<aFlatTree.linksstart || thisnode>aFlatTree.linksend)

1858

return aUndefValue; // no leaf expected here

1859

#endif

1860

if (thisnode==aFlatTree.linksend-1) {

1861

// long link

1862

index++; // skip long link marker

1863

thisnode = aFlatTree.elements[index]; // get link value

1864

index = index+thisnode; // jump by link value

1865

}

1866

else {

1867

// short link

1868

index = index+(thisnode-aFlatTree.linksstart); // get index of next node (relative branch)

1869

}

1870

if (index==0)

1871

return aUndefValue; // there is no link

1872

}

1873

// determine new maximum

1874

maxKey = cmpval; // maximum must be lower or equal than cmpval

1875

}

1876

} while(index<aFlatTree.numelems);

1877

// if we reach the end of the array, key is not in the tree

1878

return aUndefValue;

1879

} // searchFlatBintree

1880

1881

// MD5 and B64 given string

1882

void MD5B64(const char *aString, sInt32 aLen, string &aMD5B64)

1883

{

1884

// determine input length

1885

if (aLen<=0) aLen=strlen(aString);

1886

// calc MD5

1887

md5::SYSYNC_MD5_CTX context;

1888

uInt8 digest[16];

1889

md5::Init (&context);

1890

md5::Update (&context, (const uInt8 *)aString,aLen);

1891

md5::Final (digest, &context);

1892

// b64 encode the MD5 digest

1893

uInt32 b64md5len;

1894

char *b64md5=b64::encode(digest,16,&b64md5len);

1895

// assign result

1896

aMD5B64.assign(b64md5,b64md5len);

1897

// done

1898

b64::free(b64md5); // return buffer allocated by b64::encode

1899

} // MD5B64

1900

1901

1902

// format as Timestamp for use in debug logs

1903

void StringObjTimestamp(string &aStringObj, lineartime_t aTimer)

1904

{

1905

// format the time

1906

if (aTimer==noLinearTime) {

1907

aStringObj = "<no time>";

1908

return;

1909

}

1910

sInt16 y,mo,d,h,mi,s,ms;

1911

lineartime2date(aTimer,&y,&mo,&d);

1912

lineartime2time(aTimer,&h,&mi,&s,&ms);

1913

StringObjPrintf(

1914

aStringObj,

1915

"%04d-%02d-%02d %02d:%02d:%02d.%03d",

1916

y,mo,d,h,mi,s,ms

1917

);

1918

} // StringObjTimestamp

1919

1920

1921

// format as hex string

1922

void StringObjHexString(string &aStringObj, const uInt8 *aBinary, uInt32 aBinSz)

1923

{

1924

aStringObj.erase();

1925

if (!aBinary) return;

1926

while (aBinSz>0) {

1927

AppendHexByte(aStringObj,*aBinary++);

1928

aBinSz--;

1929

}

1930

} // StringObjHexString

1931

1932

1933

// add (already encoded!) CGI to existing URL string

1934

bool addCGItoString(string &aStringObj, cAppCharP aCGI, bool noduplicate)

1935

{

1936

if (!noduplicate || aStringObj.find(aCGI)==string::npos) {

1937

// - Add CGI separator if and only if none exists already

1938

if (aStringObj.find("?")==string::npos)

1939

aStringObj += '?';

1940

aStringObj += aCGI;

1941

return true; // added

1942

}

1943

return false; // nothing added

1944

}

1945

1946

1947

// encode string for being used as a CGI key/value element

1948

string encodeForCGI(cAppCharP aCGI)

1949

{

1950

string cgi;

1951

cAppCharP p = aCGI;

1952

while (p && *p) {

1953

if (*p>0x7E || *p<=0x20 || *p=='%' || *p=='?' || *p=='&' || *p=='#') {

1954

// CGI encode these

1955

cgi += '%';

1956

AppendHexByte(cgi, *p);

1957

}

1958

else {

1959

// use as-is

1960

cgi += *p;

1961

}

1962

p++;

1963

}

1964

return cgi;

1965

} // encodeForCGI

1966

1967

1968

// Count bits

1969

int countbits(uInt32 aMask)

1970

{

1971

int bits=0;

1972

uInt32 mask=0x0000001;

1973

while (mask) {

1974

if (aMask & mask) bits++;

1975

mask=mask << 1;

1976

}

1977

return bits;

1978

} // countbits

1979

1980

1981

// make uppercase

1982

void StringUpper(string &aString)

1983

{

1984

for(uInt32 k=0; k<aString.size(); k++) aString[k]=toupper(aString[k]);

1985

} // StringUpper

1986

1987

1988

// make lowercase

1989

void StringLower(string &aString)

1990

{

1991

for(uInt32 k=0; k<aString.size(); k++) aString[k]=tolower(aString[k]);

1992

} // StringLower

1993

1994

1995

// Substitute occurences of pattern with replacement in string

1996

void StringSubst(

1997

string &aString, const char *aPattern, const string &aReplacement,

1998

sInt32 aPatternLen,

1999

TCharSets aCharSet, TLineEndModes aLEM,

2000

TQuotingModes aQuotingMode

2001

)

2002

{

2003

StringSubst(

2004

aString, aPattern,

2005

aReplacement.c_str(),

2006

aPatternLen,

2007

aReplacement.size(),

2008

aCharSet, aLEM, aQuotingMode

2009

);

2010

} // StringSubst

2011

2012

2013

// Substitute occurences of pattern with replacement in string

2014

void StringSubst(

2015

string &aString, const char *aPattern, const char *aReplacement,

2016

sInt32 aPatternLen, sInt32 aReplacementLen,

2017

TCharSets aCharSet, TLineEndModes aLEM,

2018

TQuotingModes aQuotingMode

2019

)

2020

{

2021

string::size_type i;

2022

string s;

2023

i=0;

2024

if (aPatternLen<0) aPatternLen=strlen(aPattern);

2025

// convert if needed

2026

if (!aReplacement) {

2027

aReplacement=""; // empty string if not specified

2028

aReplacementLen=0;

2029

}

2030

if (aCharSet!=chs_unknown) {

2031

appendUTF8ToString(aReplacement,s,aCharSet,aLEM,aQuotingMode);

2032

aReplacement=s.c_str();

2033

aReplacementLen=s.size();

2034

}

2035

else {

2036

if (aReplacementLen<0) aReplacementLen=strlen(aReplacement);

2037

}

2038

// now replace

2039

while((i=aString.find(aPattern,i))!=string::npos) {

2040

aString.replace(i,aPatternLen,aReplacement);

2041

i+=aReplacementLen;

2042

}

2043

} // StringSubst

2044

2045

2046

// Substitute occurences of pattern with replacement in string

2047

void StringSubst(string &aString, const char *aPattern, const string &aReplacement, sInt32 aPatternLen)

2048

{

2049

StringSubst(aString,aPattern,aReplacement.c_str(),aPatternLen,aReplacement.size());

2050

} // StringSubst

2051

2052

2053

// Substitute occurences of pattern with integer number in string

2054

void StringSubst(string &aString, const char *aPattern, sInt32 aNumber, sInt32 aPatternLen)

2055

{

2056

string s;

2057

StringObjPrintf(s,"%ld",(long)aNumber);

2058

StringSubst(aString,aPattern,s,aPatternLen);

2059

} // StringSubst

2060

2061

2062

2063

// copy PCdata contents into std::string object

2064

void smlPCDataToStringObj(const SmlPcdataPtr_t aPcdataP, string &aStringObj)

2065

{

2066

if (!aPcdataP || !aPcdataP->content) {

2067

// no content at all

2068

aStringObj.erase();

2069

}

2070

else if (

2071

// NOTE: Opaque works only with modified syncML toolkit which

2072

// makes sure opaque content is ALSO TERMINATED LIKE A C-STRING

2073

aPcdataP->contentType == SML_PCDATA_STRING ||

2074

aPcdataP->contentType == SML_PCDATA_OPAQUE

2075

) {

2076

// string or opaque type

2077

aStringObj.assign((char *)aPcdataP->content, aPcdataP->length);

2078

}

2079

else if (aPcdataP->contentType == SML_PCDATA_EXTENSION) {

2080

// extension type

2081

StringObjPrintf(aStringObj,"[PCDATA_EXTENSION Type=%hd]",(sInt16)aPcdataP->extension);

2082

}

2083

else {

2084

// other type

2085

StringObjPrintf(aStringObj,"[PCDATA Type=%hd]",(sInt16)aPcdataP->contentType);

2086

}

2087

} // smlPCDataToStringObj

2088

2089

2090

// returns item string or empty string (NEVER NULL)

2091

const char *smlItemDataToCharP(const SmlItemPtr_t aItemP)

2092

{

2093

if (!aItemP) return "";

2094

return smlPCDataToCharP(aItemP->data);

2095

} // smlItemDataToCharP

2096

2097

2098

// returns first item string or empty string (NEVER NULL)

2099

const char *smlFirstItemDataToCharP(const SmlItemListPtr_t aItemListP)

2100

{

2101

if (!aItemListP) return "";

2102

return smlItemDataToCharP(aItemListP->item);

2103

} // smlFirstItemDataToCharP

2104

#endif //SYSYNC_ENGINE

2105

2106

// returns pointer to PCdata contents or null string. If aSizeP!=NULL, length will be stored in *aSize

2107

const char *smlPCDataToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP)

2108

{

2109

const char *str = smlPCDataOptToCharP(aPcdataP, aSizeP);

2110

if (str) return str;

2111

return "";

2112

} // smlPCDataToCharP

2113

2114

2115

// returns pointer to PCdata contents if existing, NULL otherwise.

2116

// If aSizeP!=NULL, length will be stored in *aSize

2117

const char *smlPCDataOptToCharP(const SmlPcdataPtr_t aPcdataP, stringSize *aSizeP)

2118

{

2119

if (!aPcdataP || !aPcdataP->content) {

2120

return NULL__null; // we have no value, it could be empty howevert

2121

if (aSizeP) *aSizeP=0;

2122

}

2123

if (aPcdataP->length==0) {

2124

// empty content

2125

if (aSizeP) *aSizeP=0;

2126

return ""; // return empty string

2127

}

2128

else if (

2129

// NOTE: Opaque works only with modified syncML toolkit which

2130

// makes sure opaque content is ALSO TERMINATED LIKE A C-STRING

2131

aPcdataP->contentType == SML_PCDATA_STRING ||

2132

aPcdataP->contentType == SML_PCDATA_CDATA || // XML only

2133

aPcdataP->contentType == SML_PCDATA_OPAQUE // WBXML only

2134

) {

2135

// return pointer to content

2136

if (aSizeP) *aSizeP=aPcdataP->length;

2137

return (char *) aPcdataP->content;

2138

}

2139

else {

2140

// no string

2141

if (aSizeP) *aSizeP=11;

2142

return "[no string]";

2143

}

2144

} // smlPCDataOptToCharP

2145

2146

2147

// returns pointer to source or target LocURI

2148

const char *smlSrcTargLocURIToCharP(const SmlTargetPtr_t aSrcTargP)

2149

{

2150

if (!aSrcTargP || !aSrcTargP->locURI) {

2151

return ""; // empty string

2152

}

2153

else {

2154

// return PCdata string contents

2155

return smlPCDataToCharP(aSrcTargP->locURI);

2156

}

2157

} // smlSrcTargLocURIToCharP

2158

2159

2160

// returns pointer to source or target LocName

2161

const char *smlSrcTargLocNameToCharP(const SmlTargetPtr_t aSrcTargP)

2162

{

2163

if (!aSrcTargP || !aSrcTargP->locName) {

2164

return ""; // empty string

2165

}

2166

else {

2167

// return PCdata string contents

2168

return smlPCDataToCharP(aSrcTargP->locName);

2169

}

2170

} // smlSrcTargLocNameToCharP

2171

2172

2173

#ifdef SYSYNC_ENGINE1

2174

// returns error code made ready for SyncML sending (that is, remove offset

2175

// of 10000 if present, and make generic error 500 for non-SyncML errors,

2176

// and return LOCERR_OK as 200)

2177

localstatus syncmlError(localstatus aErr)

2178

{

2179

if (aErr==LOCERR_OK) return 200; // SyncML ok code

2180

if (aErr<999) return aErr; // return as is

2181

if (aErr>=LOCAL_STATUS_CODE+100 && aErr<=999)

2182

return aErr-LOCAL_STATUS_CODE; // return with offset removed

2183

// no suitable conversion

2184

return 500; // return generic "bad"

2185

} // localError

2186

2187

2188

// returns error code made local (that is, offset by 10000 in case aErr is a

2189

// SyncML status code <10000, and convert 200 into LOCERR_OK)

2190

localstatus localError(localstatus aErr)

2191

{

2192

if (aErr==200 || aErr==0) return LOCERR_OK;

2193

if (aErr<LOCAL_STATUS_CODE) return aErr+LOCAL_STATUS_CODE;

2194

return aErr;

2195

} // localError

2196

2197

2198

// returns pure relative URI, if specified relative or absolute to

2199

// given server URI

2200

const char *relativeURI(const char *aURI,const char *aServerURI)

2201

{

2202

// check for "./" type relative URI

2203

if (strnncmp(aURI,URI_RELPREFIX"./",2)==0) {

2204

// relative URI prefixed with "./", just zap the relative part

2205

return aURI+2;

2206

}

2207

else if (aServerURI) {

2208

// test if absolute URI specifying the right server

2209

uInt32 n=strlen(aServerURI);

2210

if (strnncmp(aURI,aServerURI,n)==0) {

2211

// beginning of URI matches server's URI

2212

const char *p=aURI+n;

2213

// skip delimiter, if any

2214

if (*p=='/') p++;

2215

// return relative part of URI

2216

return p;

2217

}

2218

}

2219

// just return unmodified

2220

return aURI;

2221

} // relativeURI

2222

2223

2224

// split Hostname into address and port parts

2225

void splitHostname(const char *aHost,string *aAddr,string *aPort)

2226

{

2227

const char *p,*q;

2228

p=aHost;

2229

q=strchr(p,':');

2230

if (q) {

2231

// port spec found

2232

if (aAddr) aAddr->assign(p,q-p);

2233

if (aPort) aPort->assign(q+1);

2234

}

2235

else {

2236

// no prot spec

2237

if (aAddr) aAddr->assign(p);

2238

if (aPort) aPort->erase();

2239

}

2240

} // splitHostname

2241

2242

// translate %XX into corresponding character in-place

2243

void urlDecode(string *str)

2244

{

2245

// nothing todo?

2246

if (!str ||

2247

str->find('%') == string::npos) return;

2248

2249

string replacement;

2250

replacement.reserve(str->size());

2251

const char *in = str->c_str();

2252

char c;

2253

while ((c = *in++) != 0) {

2254

if (c == '%') {

2255

c = tolower(*in++);

2256

unsigned char value = 0;

2257

if (!c) {

2258

break;

2259

} else if (c >= '0' && c <= '9') {

2260

value = c - '0';

2261

} else if (c >= 'a' && c <= 'f') {

2262

value = c - 'a' + 10;

2263

} else {

2264

// silently skip invalid character

2265

}

2266

value *= 16;

2267

c = tolower(*in++);

2268

if (!c) {

2269

break;

2270

} else if (c >= '0' && c <= '9') {

2271

value += c - '0';

2272

replacement.append((char *)&value, 1);

2273

} else if (c >= 'a' && c <= 'f') {

2274

value += c - 'a' + 10;

2275

replacement.append((char *)&value, 1);

2276

} else {

2277

// silently skip invalid character

2278

}

2279

} else {

2280

replacement.append(&c, 1);

2281

}

2282

}

2283

*str = replacement;

2284

}

2285

2286

// translate %XX into corresponding character in-place

2287

void urlEncode(string *str)

2288

{

2289

if (!str) {

2290

return;

2291

}

2292

2293

string replacement;

2294

size_t i, start = 0;

2295

const char *t = str->c_str();

2296

const char *s = t;

2297

char buffer[4];

2298

char c;

2299

for (i = 0; (c = *t) != 0; i++, t++) {

2300

if (!isalnum(c)) {

2301

replacement.append(s + start, i - start);

2302

start = i + 1;

2303

sprintf(buffer, "%%%02X", c);

2304

replacement.append(buffer, 3);

2305

}

2306

}

2307

2308

if (start > 0) {

2309

// Something was added to replacement because we found unsafe

2310

// characters, finish the job.

2311

replacement.append(s + start, i - start);

2312

*str = replacement;

2313

}

2314

}

2315

2316

// split URL into protocol, hostname, document name and auth-info (user, password);

2317

// the optional query and port are not url-decoded, everything else is

2318

void splitURL(const char *aURI,string *aProtocol,string *aHost,

2319

string *aDoc, string *aUser, string *aPasswd,

2320

string *aPort, string *aQuery)

2321

{

2322

const char *p,*q,*r;

2323

2324

p=aURI;

2325

// extract protocol

2326

q=strchr(p,':');

2327

if (q) {

2328

// protocol found

2329

if (aProtocol) aProtocol->assign(p,q-p);

2330

p=q+1; // past colon

2331

int count = 0;

2332

while (*p=='/' && count < 2) {

2333

p++; // past trailing slashes (two expected, ignore if less are given)

2334

count++;

2335

}

2336

// now identify end of host part

2337

string host;

2338

q=strchr(p, '/');

2339

if (!q) {

2340

// no slash, skip forward to end of string

2341

q = p + strlen(p);

2342

}

2343

host.assign(p, q - p);

2344

2345

// if protocol specified, check for auth info

2346

const char *h = host.c_str();

2347

q=strchr(h,'@');

2348

r=strchr(h,':');

2349

if (q && r && q>r) {

2350

// auth exists

2351

if (aUser) aUser->assign(h,r-h);

2352

if (aPasswd) aPasswd->assign(r+1,q-r-1);

2353

// skip auth in full string

2354

p += q + 1 - h;

2355

}

2356

else {

2357

// no auth found

2358

if (aUser) aUser->erase();

2359

if (aPasswd) aPasswd->erase();

2360

}

2361

// p now points to host part, as expected below

2362

}

2363

else {

2364

// no protocol found

2365

if (aProtocol) aProtocol->erase();

2366

// no protocol, no auth

2367

if (aUser) aUser->erase();

2368

if (aPasswd) aPasswd->erase();

2369

}

2370

// separate hostname and document

2371

std::string host;

2372

// - check for path

2373

q=strchr(p,'/');

2374

// - if no path, check if there is a CGI param directly after the host name

2375

if (!q) {

2376

// doc part left empty in this case

2377

if (aDoc) aDoc->erase();

2378

q=strchr(p,'?');

2379

if (q) {

2380

// query directly follows host

2381

host.assign(p, q - p);

2382

if (aQuery) aQuery->assign(q + 1);

2383

} else {

2384

// entire string is considered the host

2385

host.assign(p);

2386

if (aQuery) aQuery->erase();

2387

}

2388

}

2389

else {

2390

// host part stops at slash

2391

host.assign(p, q - p);

2392

// in case of '/', do not put slash into docname

2393

// even if it would be empty (caller expected to add

2394

// slash as needed)

2395

p = q + 1; // exclude slash

2396

// now check for query

2397

q=strchr(p,'?');

2398

if (q) {

2399

// split at question mark

2400

if (aDoc) aDoc->assign(p, q - p);

2401

if (aQuery) aQuery->assign(q + 1);

2402

} else {

2403

// whole string is document name

2404

if (aDoc) aDoc->assign(p);

2405

if (aQuery) aQuery->erase();

2406

}

2407

}

2408

2409

// remove optional port from host part before url-decoding, because

2410

// that might introduce new : characters into the host name

2411

size_t colon = host.find(':');

2412

if (colon != host.npos) {

2413

if (aHost) aHost->assign(host.substr(0, colon));

2414

if (aPort) aPort->assign(host.substr(colon + 1));

2415

} else {

2416

if (aHost) aHost->assign(host);

2417

if (aPort) aPort->erase();

2418

}

2419

} // splitURL

2420

2421

#ifdef SPLIT_URL_MAIN

2422

2423

#include <stdio.h>

2424

#include <assert.h>

2425

2426

static void test(const std::string &in, const std::string &expected)

2427

{

2428

string protocol, host, doc, user, password, port, query;

2429

char buffer[1024];

2430

2431

splitURL(in.c_str(), &protocol, &host, &doc, &user, &password, &port, &query);

2432

2433

// URL-decode each part

2434

urlDecode(&protocol);

2435

urlDecode(&host);

2436

urlDecode(&doc);

2437

urlDecode(&user);

2438

urlDecode(&password);

2439

2440

sprintf(buffer,

2441

"prot '%s' user '%s' passwd '%s' host '%s' port '%s' doc '%s' query '%s'",

2442

protocol.c_str(),

2443

user.c_str(),

2444

password.c_str(),

2445

host.c_str(),

2446

port.c_str(),

2447

doc.c_str(),

2448

query.c_str());

2449

printf("%s -> %s\n", in.c_str(), buffer);

2450

assert(expected == buffer);

2451

}

2452

2453

int main(int argc, char **argv)

2454

{

2455

test("http://user:passwd@host/patha/pathb?query",

2456

"prot 'http' user 'user' passwd 'passwd' host 'host' port '' doc 'patha/pathb' query 'query'");

2457

test("http://user:passwd@host:port/patha/pathb?query",

2458

"prot 'http' user 'user' passwd 'passwd' host 'host' port 'port' doc 'patha/pathb' query 'query'");

2459

test("file:///foo/bar",

2460

"prot 'file' user '' passwd '' host '' port '' doc 'foo/bar' query ''");

2461

test("http://host%3a:port?param=value",

2462

"prot 'http' user '' passwd '' host 'host:' port 'port' doc '' query 'param=value'");

2463

test("http://host%3a?param=value",

2464

"prot 'http' user '' passwd '' host 'host:' port '' doc '' query 'param=value'");

2465

test("foo%24",

2466

"prot '' user '' passwd '' host 'foo$' port '' doc '' query ''");

2467

test("foo%2f",

2468

"prot '' user '' passwd '' host 'foo/' port '' doc '' query ''");

2469

test("foo%2A",

2470

"prot '' user '' passwd '' host 'foo*' port '' doc '' query ''");

2471

test("foo%24bar",

2472

"prot '' user '' passwd '' host 'foo$bar' port '' doc '' query ''");

2473

test("%24bar",

2474

"prot '' user '' passwd '' host '$bar' port '' doc '' query ''");

2475

test("foo%2",

2476

"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");

2477

test("foo%",

2478

"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");

2479

test("foo%g",

2480

"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");

2481

test("foo%gh",

2482

"prot '' user '' passwd '' host 'foo' port '' doc '' query ''");

2483

test("%ghbar",

2484

"prot '' user '' passwd '' host 'bar' port '' doc '' query ''");

2485

return 0;

2486

}

2487

#endif // SPLIT_URL_MAIN

2488

2489

#endif //SYSYNC_ENGINE

2490

2491

2492

// returns type from meta

2493

const char *smlMetaTypeToCharP(SmlMetInfMetInfPtr_t aMetaP)

2494

{

2495

if (!aMetaP) return NULL__null; // no meta at all

2496

return smlPCDataToCharP(aMetaP->type);

2497

} // smlMetaTypeToCharP

2498

2499

2500

2501

// returns Next Anchor from meta

2502

const char *smlMetaNextAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP)

2503

{

2504

if (!aMetaP) return NULL__null; // no meta at all

2505

if (!aMetaP->anchor) return NULL__null; // no anchor at all

2506

return smlPCDataToCharP(aMetaP->anchor->next);

2507

} // smlMetaAnchorToCharP

2508

2509

2510

// returns Last Anchor from meta

2511

const char *smlMetaLastAnchorToCharP(SmlMetInfMetInfPtr_t aMetaP)

2512

{

2513

if (!aMetaP) return NULL__null; // no meta at all

2514

if (!aMetaP->anchor) return NULL__null; // no anchor at all

2515

return smlPCDataToCharP(aMetaP->anchor->last);

2516

} // smlMetaLastAnchorToCharP

2517

2518

2519

// returns DevInf pointer if any in specified PCData, NULL otherwise

2520

SmlDevInfDevInfPtr_t smlPCDataToDevInfP(const SmlPcdataPtr_t aPCDataP)

2521

{

2522

if (!aPCDataP) return NULL__null;

2523

if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null;

2524

if (aPCDataP->extension!=SML_EXT_DEVINF) return NULL__null;

2525

return (SmlDevInfDevInfPtr_t)(aPCDataP->content);

2526

} // smlPCDataToDevInfP

2527

2528

2529

// returns MetInf pointer if any in specified PCData, NULL otherwise

2530

SmlMetInfMetInfPtr_t smlPCDataToMetInfP(const SmlPcdataPtr_t aPCDataP)

2531

{

2532

if (!aPCDataP) return NULL__null;

2533

if (aPCDataP->contentType!=SML_PCDATA_EXTENSION) return NULL__null;

2534

if (aPCDataP->extension!=SML_EXT_METINF) return NULL__null;

2535

return (SmlMetInfMetInfPtr_t)(aPCDataP->content);

2536

} // smlPCDataToMetInfP

2537

2538

2539

// allocate memory via SyncML toolkit allocation function, but throw

2540

// exception if it fails. Used by SML

2541

void *_smlMalloc(MemSize_t size)

2542

{

2543

void *p;

2544

2545

p=smlLibMalloc(size);

2546

if (!p) SYSYNC_THROW(TMemException("smlLibMalloc() failed"))throw TMemException("smlLibMalloc() failed");

2547

return p;

2548

} // _smlMalloc

2549

2550

2551

// returns true on successful conversion of PCData string to sInt32

2552

bool smlPCDataToULong(const SmlPcdataPtr_t aPCDataP, uInt32 &aLong)

2553

{

2554

return StrToULong(smlPCDataToCharP(aPCDataP),aLong);

2555

} // smlPCDataToLong

2556

2557

// returns true on successful conversion of PCData string to sInt32

2558

bool smlPCDataToLong(const SmlPcdataPtr_t aPCDataP, sInt32 &aLong)

2559

{

2560

return StrToLong(smlPCDataToCharP(aPCDataP),aLong);

2561

} // smlPCDataToLong

2562

2563

#ifdef SYSYNC_ENGINE1

2564

// returns true on successful conversion of PCData string to format

2565

bool smlPCDataToFormat(const SmlPcdataPtr_t aPCDataP, TFmtTypes &aFmt)

2566

{

2567

const char *fmt = smlPCDataToCharP(aPCDataP);

2568

sInt16 sh;

2569

if (*fmt) {

2570

if (!StrToEnum(encodingFmtSyncMLNames,numFmtTypes,sh,fmt))

2571

return false; // unknown format

2572

aFmt=(TFmtTypes)sh;

2573

}

2574

else {

2575

aFmt=fmt_chr; // no spec = chr

2576

}

2577

return true;

2578

} // smlPCDataToFormat

2579

#endif //SYSYNC_ENGINE

2580

2581

// build Meta anchor

2582

SmlPcdataPtr_t newMetaAnchor(const char *aNextAnchor, const char *aLastAnchor)

2583

{

2584

SmlPcdataPtr_t metaP;

2585

SmlMetInfAnchorPtr_t anchorP;

2586

2587

// - create empty meta

2588

metaP=newMeta();

2589

// - create new anchor

2590

anchorP=SML_NEW(SmlMetInfAnchor_t)((SmlMetInfAnchor_t*) _smlMalloc(sizeof(SmlMetInfAnchor_t)));

2591

// - set anchor contents

2592

//%%% anchorP->last=newPCDataOptEmptyString(aLastAnchor); // optional, but omitted only if string is NULL (not if only empty)

2593

anchorP->last=newPCDataOptString(aLastAnchor); // optional

2594

anchorP->next=newPCDataString(aNextAnchor); // mandatory

2595

// - set anchor

2596

((SmlMetInfMetInfPtr_t)(metaP->content))->anchor=anchorP;

2597

// return

2598

return metaP;

2599

} // newMetaAnchor

2600

2601

2602

// build Meta type

2603

SmlPcdataPtr_t newMetaType(const char *aMetaType)

2604

{

2605

SmlPcdataPtr_t metaP;

2606

2607

// - if not type, we don't create a meta at all

2608

if (aMetaType==NULL__null || *aMetaType==0) return NULL__null;

2609

// - create empty meta

2610

metaP=newMeta();

2611

// - set type

2612

((SmlMetInfMetInfPtr_t)(metaP->content))->type=newPCDataString(aMetaType);

2613

// return

2614

return metaP;

2615

} // newMetaType

2616

2617

2618

// build empty Meta

2619

SmlPcdataPtr_t newMeta(void)

2620

{

2621

SmlPcdataPtr_t metaP;

2622

SmlMetInfMetInfPtr_t metinfP;

2623

2624

// - create empty PCData

2625

metaP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t)));

2626

metaP->contentType=SML_PCDATA_EXTENSION;

2627

metaP->extension=SML_EXT_METINF;

2628

// - %%% assume length is not relevant for structured content (looks like in mgrutil.c)

2629

metaP->length=0;

2630

// - create empty meta

2631

metinfP = SML_NEW(SmlMetInfMetInf_t)((SmlMetInfMetInf_t*) _smlMalloc(sizeof(SmlMetInfMetInf_t)));

2632

metaP->content=metinfP; // link to PCdata

2633

// - init meta options

2634

metinfP->version=NULL__null;

2635

metinfP->format=NULL__null;

2636

metinfP->type=NULL__null;

2637

metinfP->mark=NULL__null;

2638

metinfP->size=NULL__null;

2639

metinfP->nextnonce=NULL__null;

2640

metinfP->maxmsgsize=NULL__null;

2641

metinfP->mem=NULL__null;

2642

metinfP->emi=NULL__null; // PCData list

2643

metinfP->anchor=NULL__null;

2644

// - SyncML 1.1

2645

metinfP->maxobjsize=NULL__null;

2646

// - SyncML 1.2

2647

metinfP->flags=0;

2648

// return

2649

return metaP;

2650

} // newMeta

2651

2652

2653

// copy meta from existing meta (for data items only

2654

// anchor, mem, emi, nonce are not copied!)

2655

// Note however that we copy maxobjsize, as we (mis-)use it for ZIPPED_BINDATA_SUPPORT

2656

SmlPcdataPtr_t copyMeta(SmlPcdataPtr_t aOldMetaP)

2657

{

2658

if (!aOldMetaP) return NULL__null;

2659

SmlPcdataPtr_t newmetaP=newMeta();

2660

if (!newmetaP) return NULL__null;

2661

SmlMetInfMetInfPtr_t oldmetinfP = smlPCDataToMetInfP(aOldMetaP);

2662

if (!oldmetinfP) return NULL__null;

2663

SmlMetInfMetInfPtr_t newmetInfP = smlPCDataToMetInfP(newmetaP);

2664

// - copy meta

2665

newmetInfP->version = smlPcdataDup(oldmetinfP->version);

2666

newmetInfP->format = smlPcdataDup(oldmetinfP->format);

2667

newmetInfP->type = smlPcdataDup(oldmetinfP->type);

2668

newmetInfP->mark = smlPcdataDup(oldmetinfP->mark);

2669

newmetInfP->size = smlPcdataDup(oldmetinfP->size);

2670

newmetInfP->maxobjsize = smlPcdataDup(oldmetinfP->maxobjsize);

2671

// return

2672

return newmetaP;

2673

} // copyMeta

2674

2675

2676

2677

2678

// add an item to an item list

2679

SmlItemListPtr_t *addItemToList(

2680

SmlItemPtr_t aItemP, // existing item data structure, ownership is passed to list

2681

SmlItemListPtr_t *aItemListPP // adress of pointer to existing item list or NULL

2682

)

2683

{

2684

if (aItemListPP && aItemP) {

2685

// find last itemlist pointer

2686

while (*aItemListPP) {

2687

aItemListPP=&((*aItemListPP)->next);

2688

}

2689

// aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry

2690

*aItemListPP = SML_NEW(SmlItemList_t)((SmlItemList_t*) _smlMalloc(sizeof(SmlItemList_t)));

2691

(*aItemListPP)->next=NULL__null;

2692

(*aItemListPP)->item=aItemP; // insert new item

2693

// return pointer to pointer to next element (which is now NULL).

2694

// Can be passed in to addPCDataToList() again to append more elements without searching

2695

// for end-of-list

2696

return &((*aItemListPP)->next);

2697

}

2698

// nop, return pointer unmodified

2699

return aItemListPP;

2700

} // addItemToList

2701

2702

2703

// add a CTData item to a CTDataList

2704

SmlDevInfCTDataListPtr_t *addCTDataToList(

2705

SmlDevInfCTDataPtr_t aCTDataP, // existing CTData item data structure, ownership is passed to list

2706

SmlDevInfCTDataListPtr_t *aCTDataListPP // adress of pointer to existing item list or NULL

2707

)

2708

{

2709

if (aCTDataListPP && aCTDataP) {

2710

// find last itemlist pointer

2711

while (*aCTDataListPP) {

2712

aCTDataListPP=&((*aCTDataListPP)->next);

2713

}

2714

// aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry

2715

*aCTDataListPP = SML_NEW(SmlDevInfCTDataList_t)((SmlDevInfCTDataList_t*) _smlMalloc(sizeof(SmlDevInfCTDataList_t
)));

2716

(*aCTDataListPP)->next=NULL__null;

2717

(*aCTDataListPP)->data=aCTDataP; // insert new data

2718

// return pointer to pointer to next element (which is now NULL).

2719

// Can be passed in to addPCDataToList() again to append more elements without searching

2720

// for end-of-list

2721

return &((*aCTDataListPP)->next);

2722

}

2723

// nop, return pointer unmodified

2724

return aCTDataListPP;

2725

} // addCTDataToList

2726

2727

2728

// add a CTDataProp item to a CTDataPropList

2729

SmlDevInfCTDataPropListPtr_t *addCTDataPropToList(

2730

SmlDevInfCTDataPropPtr_t aCTDataPropP, // existing CTDataProp item data structure, ownership is passed to list

2731

SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL

2732

)

2733

{

2734

if (aCTDataPropListPP && aCTDataPropP) {

2735

// find last itemlist pointer

2736

while (*aCTDataPropListPP) {

2737

aCTDataPropListPP=&((*aCTDataPropListPP)->next);

2738

}

2739

// aItemListPP now points to a NULL pointer which must be replaced by addr of new ItemList entry

2740

*aCTDataPropListPP = SML_NEW(SmlDevInfCTDataPropList_t)((SmlDevInfCTDataPropList_t*) _smlMalloc(sizeof(SmlDevInfCTDataPropList_t
)));

2741

(*aCTDataPropListPP)->next=NULL__null;

2742

(*aCTDataPropListPP)->data=aCTDataPropP; // insert new data

2743

// return pointer to pointer to next element (which is now NULL).

2744

// Can be passed in to addPCDataToList() again to append more elements without searching

2745

// for end-of-list

2746

return &((*aCTDataPropListPP)->next);

2747

}

2748

// nop, return pointer unmodified

2749

return aCTDataPropListPP;

2750

} // addCTDataPropToList

2751

2752

2753

// add a CTData describing a property (as returned by newDevInfCTData())

2754

// as a new property without parameters to a CTDataPropList

2755

SmlDevInfCTDataPropListPtr_t *addNewPropToList(

2756

SmlDevInfCTDataPtr_t aPropCTData, // CTData describing property

2757

SmlDevInfCTDataPropListPtr_t *aCTDataPropListPP // adress of pointer to existing item list or NULL

2758

)

2759

{

2760

SmlDevInfCTDataPropPtr_t propdataP = SML_NEW(SmlDevInfCTDataProp_t)((SmlDevInfCTDataProp_t*) _smlMalloc(sizeof(SmlDevInfCTDataProp_t
)));

2761

propdataP->param = NULL__null; // no params

2762

propdataP->prop = aPropCTData;

2763

return addCTDataPropToList(propdataP, aCTDataPropListPP);

2764

} // addNewPropToList

2765

2766

2767

2768

// add PCData element to a PCData list

2769

SmlPcdataListPtr_t *addPCDataToList(

2770

SmlPcdataPtr_t aPCDataP, // Existing PCData element to be added, ownership is passed to list

2771

SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL

2772

)

2773

{

2774

if (aPCDataListPP) {

2775

// find last PCDataList pointer

2776

while (*aPCDataListPP) {

2777

aPCDataListPP=&((*aPCDataListPP)->next);

2778

}

2779

// aItemListPP now points to a NULL pointer which must be replaced by addr of new PCDataList entry

2780

*aPCDataListPP = SML_NEW(SmlPcdataList_t)((SmlPcdataList_t*) _smlMalloc(sizeof(SmlPcdataList_t)));

2781

(*aPCDataListPP)->next=NULL__null;

2782

(*aPCDataListPP)->data=aPCDataP; // insert new item

2783

// return pointer to pointer to next element (which is now NULL).

2784

// Can be passed in to addPCDataToList() again to append more elements without searching

2785

// for end-of-list

2786

return &((*aPCDataListPP)->next);

2787

}

2788

return NULL__null;

2789

} // addPCDataToList

2790

2791

2792

// add PCData string to a PCData list

2793

SmlPcdataListPtr_t *addPCDataStringToList(

2794

const char *aString, // String to be added

2795

SmlPcdataListPtr_t *aPCDataListPP // adress of pointer to existing PCData list or NULL

2796

)

2797

{

2798

return addPCDataToList(newPCDataString(aString),aPCDataListPP);

2799

} // addPCDataStringToList

2800

2801

2802

// create new optional location (source or target)

2803

// Returns NULL if URI specified is NULL or empty

2804

SmlSourcePtr_t newOptLocation(

2805

const char *aLocURI,

2806

const char *aLocName

2807

)

2808

{

2809

if (!aLocURI || *aLocURI==0) return NULL__null;

2810

else return newLocation(aLocURI,aLocName);

2811

} // newOptLocation

2812

2813

2814

// create new location (source or target)

2815

// always returns location, even if URI and/or name are empty

2816

// If name is NULL or empty, only URI is generated

2817

SmlSourcePtr_t newLocation(

2818

const char *aLocURI,

2819

const char *aLocName

2820

)

2821

{

2822

SmlSourcePtr_t locP;

2823

2824

locP = SML_NEW(SmlSource_t)((SmlSource_t*) _smlMalloc(sizeof(SmlSource_t)));

2825

// URI is always present (might be empty, though)

2826

locP->locURI=newPCDataString(aLocURI);

2827

// name only if not empty

2828

if (aLocName && *aLocName!=0)

2829

locP->locName=newPCDataString(aLocName);

2830

else

2831

locP->locName=NULL__null;

2832

// filter defaults to NULL

2833

locP->filter=NULL__null;

2834

return locP;

2835

} // newLocation

2836

2837

2838

// create new empty Item

2839

SmlItemPtr_t newItem(void)

2840

{

2841

SmlItemPtr_t itemP;

2842

2843

itemP = SML_NEW(SmlItem_t)((SmlItem_t*) _smlMalloc(sizeof(SmlItem_t)));

2844

itemP->target=NULL__null;

2845

itemP->source=NULL__null;

2846

itemP->meta=NULL__null;

2847

itemP->data=NULL__null;

2848

// SyncML 1.1, no MoreData set

2849

itemP->flags=0;

2850

// SyncML 1.2

2851

itemP->targetParent=NULL__null;

2852

itemP->sourceParent=NULL__null;

2853

// custom data of client

2854

itemP->aux=NULL__null;

2855

return itemP;

2856

} // newItem

2857

2858

2859

// create new Item with string-type data

2860

SmlItemPtr_t newStringDataItem(

2861

const char *aString

2862

)

2863

{

2864

SmlItemPtr_t itemP=newItem();

2865

itemP->data=newPCDataString(aString);

2866

return itemP;

2867

} // newStringDataItem

2868

2869

2870

// create meta-format PCData

2871

SmlPcdataPtr_t newPCDataFormat(

2872

TFmtTypes aFmtType,

2873

bool aShowDefault

2874

)

2875

{

2876

if (aFmtType==fmt_chr && !aShowDefault)

2877

return NULL__null; // default

2878

else

2879

return newPCDataString(encodingFmtSyncMLNames[aFmtType]); // show format type

2880

} // newPCDataFormat

2881

2882

2883

// create new string-type PCData, if NULL or empty string is passed for aData,

2884

// NULL is returned (optional info not there)

2885

SmlPcdataPtr_t newPCDataFormatted(

2886

const uInt8 *aData, // data

2887

sInt32 aLength, // length of data, if<=0 then string length is calculated

2888

TFmtTypes aFmtType, // encoding Format

2889

bool aNeedsOpaque // set opaque needed (string that could confuse XML parsing or even binary)

2890

)

2891

{

2892

if (!aData) return NULL__null; // no data

2893

if (aLength==0) aLength=strlen((const char *)aData);

2894

if (aLength==0) return NULL__null; // no data

2895

// encode input string if needed

2896

SmlPcdataPtr_t pcdataP;

2897

char *b64data;

2898

uInt32 b64len;

2899

switch (aFmtType) {

2900

case fmt_b64:

2901

// convert to b64

2902

b64len=0;

2903

b64data=b64::encode(aData, aLength, &b64len);

2904

pcdataP = newPCDataString(b64data,b64len);

2905

b64::free(b64data);

2906

return pcdataP;

2907

default:

2908

// just copy into string or opaque/C_DATA string

2909

return newPCDataStringX(aData, aNeedsOpaque, aLength);

2910

}

2911

} // newPCDataEncoded

2912

2913

2914

// create new string-type PCData, if NULL or empty string is passed for aString,

2915

// NULL is returned (optional info not there)

2916

SmlPcdataPtr_t newPCDataOptString(

2917

const char *aString,

2918

sInt32 aLength // length of string, if<0 then length is calculated

2919

)

2920

{

2921

if (aString && (*aString!=0))

2922

return newPCDataString(aString,aLength);

2923

else

2924

return NULL__null;

2925

} // newPCDataOptString

2926

2927

2928

// create new string-type PCData, if NULL is passed for aString,

2929

// NULL is returned (optional info not there)

2930

// if empty string is passed, PCData with empty contents will be created

2931

SmlPcdataPtr_t newPCDataOptEmptyString(

2932

const char *aString,

2933

sInt32 aLength // length of string, if<0 then length is calculated

2934

)

2935

{

2936

if (aString)

2937

return newPCDataString(aString,aLength);

2938

else

2939

return NULL__null;

2940

} // newPCDataOptEmptyString

2941

2942

2943

// create new string-type PCData, if NULL is passed for aString,

2944

// an empty string is created (that is, a PCData with string terminator as

2945

// content only, length=0)

2946

SmlPcdataPtr_t newPCDataString(

2947

const char *aString,

2948

sInt32 aLength // length of string, if<0 then length is calculated

2949

)

2950

{

2951

return newPCDataStringX((const uInt8 *)aString,false,aLength);

2952

} // newPCDataString

2953

2954

2955

// create new PCData, aOpaque can be used to generate non-string data

2956

// Note: empty strings are always coded as non-opaque, even if aOpaque is set

2957

SmlPcdataPtr_t newPCDataStringX(

2958

const uInt8 *aString,

2959

bool aOpaque, // if set, an opaque method (OPAQUE or CDATA) is used

2960

sInt32 aLength // length of string, if<0 then length is calculated

2961

)

2962

{

2963

SmlPcdataPtr_t pcdataP;

2964

2965

pcdataP = SML_NEW(SmlPcdata_t)((SmlPcdata_t*) _smlMalloc(sizeof(SmlPcdata_t)));

2966

2967

// determine length

2968

if (aLength>=0 && aString)

2969

pcdataP->length = aLength; // as specified, and string argument not NULL

2970

else

2971

pcdataP->length = aString ? strlen((const char *)aString) : 0; // from argument, if NULL -> length=0

2972

// determine type

2973

if (aOpaque && aLength!=0) {

2974

// Note: due to modification in RTK, this generates

2975

// OPAQUE in WBXML and CDATA in XML

2976

pcdataP->contentType=SML_PCDATA_OPAQUE;

2977

}

2978

else {

2979

// non-critical string

2980

#ifdef SML_STRINGS_AS_OPAQUE

2981

pcdataP->contentType=SML_PCDATA_OPAQUE;

2982

#else

2983

pcdataP->contentType=SML_PCDATA_STRING;

2984

#endif

2985

}

2986

pcdataP->extension=SML_EXT_UNDEFINED;

2987

// - allocate data space (ALWAYS with room for a terminator, even if Opaque or empty string)

2988

pcdataP->content=smlLibMalloc(pcdataP->length+1); // +1 for terminator, see below

2989

// copy data (if any)

2990

if (pcdataP->length>0) {

2991

// - copy string

2992

smlLibMemcpy(pcdataP->content,aString,pcdataP->length);

2993

}

2994

// set terminator

2995

((char *)(pcdataP->content))[pcdataP->length]=0; // terminate C string

2996

// return

2997

return pcdataP;

2998

} // newPCDataStringX

2999

3000

3001

// create new string-type PCData from C++ string

3002

SmlPcdataPtr_t newPCDataString(

3003

const string &aString

3004

)

3005

{

3006

return newPCDataString(aString.c_str(),aString.length());

3007

} // newPCDataString(string&)

3008

3009

3010

// create new decimal string representation of sInt32 as PCData

3011

SmlPcdataPtr_t newPCDataLong(

3012

sInt32 aLong

3013

)

3014

{

3015

const int ssiz=20;

3016

char s[ssiz];

3017

3018

snprintf(s,ssiz,"%ld",(long)aLong);

3019

return newPCDataString(s);

3020

} // newPCDataLong

3021

3022

3023

// Nonce generator allowing last-session nonce to be correctly re-generated in next session

3024

void generateNonce(string &aNonce, const char *aDevStaticString, sInt32 aSessionStaticID)

3025

{

3026

md5::SYSYNC_MD5_CTX context;

3027

uInt8 digest[16];

3028

md5::Init (&context);

3029

// - add in static device string

3030

md5::Update (&context, (const uInt8 *)aDevStaticString, strlen(aDevStaticString));

3031

// - add in session static ID in binary format

3032

md5::Update (&context, (const uInt8 *)&aSessionStaticID, sizeof(sInt32));

3033

// - done

3034

md5::Final (digest, &context);

3035

// - make string of first 48 bit of MD5: 48 bits, use 6 bits per char = 8 chars

3036

uInt64 dig48 = ((uInt32)digest[0] << 0) |

3037

((uInt32)digest[1] << 8) |

3038

((uInt32)digest[2] << 16) |

3039

((uInt32)digest[3] << 24);

3040

aNonce.erase();

3041

for (sInt16 k=0; k<8; k++) {

3042

aNonce+=((dig48 & 0x03F) + 0x21);

3043

dig48 = dig48 >> 6;

3044

}

3045

} // generateNonce

3046

3047

3048

// create challenge of requested type

3049

SmlChalPtr_t newChallenge(TAuthTypes aAuthType, const string &aNextNonce, bool aBinaryAllowed)

3050

{

3051

SmlChalPtr_t chalP=NULL__null;

3052

SmlMetInfMetInfPtr_t metaP;

3053

3054

if (aAuthType!=auth_none) {

3055

// new challenge record

3056

chalP = SML_NEW(SmlChal_t)((SmlChal_t*) _smlMalloc(sizeof(SmlChal_t)));

3057

// add empty meta

3058

chalP->meta=newMeta();

3059

metaP=(SmlMetInfMetInfPtr_t)(chalP->meta->content);

3060

// add type and format

3061

// - type

3062

metaP->type=newPCDataString(authTypeSyncMLNames[aAuthType]);

3063

// - format

3064

const char *fmt = NULL__null;

3065

switch (aAuthType) {

3066

case auth_basic:

3067

// always request b64

3068

fmt=encodingFmtSyncMLNames[fmt_b64];

3069

break;

3070

case auth_md5:

3071

// request b64 only for non-binary capable encoding (that is, XML)

3072

/* %%% dont do that, Nokia9210 miserably fails when we do that,

3073

* it sends its data B64 encoded, but obviously with bad

3074

* data in it. Ericsson T39m seems to do it correctly however.

3075

if (!aBinaryAllowed)

3076

fmt=encodingFmtSyncMLNames[fmt_b64];

3077

3078

// always request b64 for now, seems to be safer with not fully compatible clients

3079

fmt=encodingFmtSyncMLNames[fmt_b64];

3080

break;

3081

default: break;

3082

}

3083

metaP->format=newPCDataOptString(fmt); // set format, but not empty

3084

// - add nonce if needed

3085

if (aAuthType==auth_md5) {

3086

// MD5 also might need nonce

3087

if (!aNextNonce.empty()) {

3088

// add base64 encoded nonce string

3089

uInt32 b64len;

3090

char *b64=b64::encode((const uInt8 *)aNextNonce.c_str(),aNextNonce.size(),&b64len);

3091

metaP->nextnonce=newPCDataString(b64,b64len);

3092

b64::free(b64); // return buffer allocated by b64_encode

3093

}

3094

}

3095

}

3096

return chalP;

3097

} // newChallenge

3098

3099

3100

// create new property or param descriptor for CTCap

3101

SmlDevInfCTDataPtr_t newDevInfCTData(cAppCharP aName,uInt32 aSize, bool aNoTruncate, uInt32 aMaxOccur, cAppCharP aDataType)

3102

{

3103

SmlDevInfCTDataPtr_t result = SML_NEW(SmlDevInfCTData_t)((SmlDevInfCTData_t*) _smlMalloc(sizeof(SmlDevInfCTData_t)));

3104

// fill descriptor

3105

// - name if property or param

3106

result->name=newPCDataString(aName);

3107

// - no display name so far

3108

result->dname=NULL__null; // no display name

3109

// - datatype (optional)

3110

result->datatype=newPCDataOptString(aDataType);

3111

// - max size

3112

if (aSize==0)

3113

result->maxsize=NULL__null; // no size

3114

else

3115

result->maxsize=newPCDataLong(aSize); // set size

3116

// - no valenum here, will be added later if any

3117

result->valenum=NULL__null; // no valenum

3118

// SyncML 1.2

3119

if (aMaxOccur==0)

3120

result->maxoccur=NULL__null; // no maxoccur

3121

else

3122

result->maxoccur=newPCDataLong(aMaxOccur); // set maxoccur

3123

result->flags = aNoTruncate ? SmlDevInfNoTruncate_f0x0020 : 0; // notruncate flag or none

3124

return result;

3125

} // newDevInfCTData

3126

3127

3128

// frees prototype element and sets calling pointer to NULL

3129

void FreeProtoElement(void * &aVoidP)

3130

{

3131

if (aVoidP) smlFreeProtoElement(aVoidP);

3132

aVoidP=NULL__null;

3133

} // FreeProtoElement

3134

3135

} // namespace sysync

3136

3137

// eof