xpcom/string/nsReadableUtils.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsReadableUtils.h"
   8
   9 #include "nsMemory.h"
  10 #include "nsString.h"
  11 #include "nsTArray.h"
  12 #include "nsUTF8Utils.h"
  13
  14 void
  15 LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
  16 {
  17   aDest.Truncate();
  18   LossyAppendUTF16toASCII(aSource, aDest);
  19 }
  20
  21 void
  22 CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
  23 {
  24   aDest.Truncate();
  25   AppendASCIItoUTF16(aSource, aDest);
  26 }
  27
  28 void
  29 LossyCopyUTF16toASCII(const char16_t* aSource, nsACString& aDest)
  30 {
  31   aDest.Truncate();
  32   if (aSource) {
  33     LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
  34   }
  35 }
  36
  37 void
  38 CopyASCIItoUTF16(const char* aSource, nsAString& aDest)
  39 {
  40   aDest.Truncate();
  41   if (aSource) {
  42     AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
  43   }
  44 }
  45
  46 void
  47 CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
  48 {
  49   aDest.Truncate();
  50   AppendUTF16toUTF8(aSource, aDest);
  51 }
  52
  53 void
  54 CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
  55 {
  56   aDest.Truncate();
  57   AppendUTF8toUTF16(aSource, aDest);
  58 }
  59
  60 void
  61 CopyUTF16toUTF8(const char16_t* aSource, nsACString& aDest)
  62 {
  63   aDest.Truncate();
  64   AppendUTF16toUTF8(aSource, aDest);
  65 }
  66
  67 void
  68 CopyUTF8toUTF16(const char* aSource, nsAString& aDest)
  69 {
  70   aDest.Truncate();
  71   AppendUTF8toUTF16(aSource, aDest);
  72 }
  73
  74 void
  75 LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest)
  76 {
  77   uint32_t old_dest_length = aDest.Length();
  78   aDest.SetLength(old_dest_length + aSource.Length());
  79
  80   nsAString::const_iterator fromBegin, fromEnd;
  81
  82   nsACString::iterator dest;
  83   aDest.BeginWriting(dest);
  84
  85   dest.advance(old_dest_length);
  86
  87   // right now, this won't work on multi-fragment destinations
  88   LossyConvertEncoding16to8 converter(dest.get());
  89
  90   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  91               converter);
  92 }
  93
  94 void
  95 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
  96 {
  97   if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) {
  98     NS_ABORT_OOM(aDest.Length() + aSource.Length());
  99   }
 100 }
 101
 102 bool
 103 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
 104                    const mozilla::fallible_t&)
 105 {
 106   uint32_t old_dest_length = aDest.Length();
 107   if (!aDest.SetLength(old_dest_length + aSource.Length(),
 108                        mozilla::fallible_t())) {
 109     return false;
 110   }
 111
 112   nsACString::const_iterator fromBegin, fromEnd;
 113
 114   nsAString::iterator dest;
 115   aDest.BeginWriting(dest);
 116
 117   dest.advance(old_dest_length);
 118
 119   // right now, this won't work on multi-fragment destinations
 120   LossyConvertEncoding8to16 converter(dest.get());
 121
 122   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
 123               converter);
 124   return true;
 125 }
 126
 127 void
 128 LossyAppendUTF16toASCII(const char16_t* aSource, nsACString& aDest)
 129 {
 130   if (aSource) {
 131     LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
 132   }
 133 }
 134
 135 void
 136 AppendASCIItoUTF16(const char* aSource, nsAString& aDest)
 137 {
 138   if (aSource) {
 139     AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
 140   }
 141 }
 142
 143 void
 144 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
 145 {
 146   if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) {
 147     NS_ABORT_OOM(aDest.Length() + aSource.Length());
 148   }
 149 }
 150
 151 bool
 152 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
 153                   const mozilla::fallible_t&)
 154 {
 155   nsAString::const_iterator source_start, source_end;
 156   CalculateUTF8Size calculator;
 157   copy_string(aSource.BeginReading(source_start),
 158               aSource.EndReading(source_end), calculator);
 159
 160   uint32_t count = calculator.Size();
 161
 162   if (count) {
 163     uint32_t old_dest_length = aDest.Length();
 164
 165     // Grow the buffer if we need to.
 166     if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {
 167       return false;
 168     }
 169
 170     // All ready? Time to convert
 171
 172     ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);
 173     copy_string(aSource.BeginReading(source_start),
 174                 aSource.EndReading(source_end), converter);
 175
 176     NS_ASSERTION(converter.Size() == count,
 177                  "Unexpected disparity between CalculateUTF8Size and "
 178                  "ConvertUTF16toUTF8");
 179   }
 180
 181   return true;
 182 }
 183
 184 void
 185 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
 186 {
 187   if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) {
 188     NS_ABORT_OOM(aDest.Length() + aSource.Length());
 189   }
 190 }
 191
 192 bool
 193 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest,
 194                   const mozilla::fallible_t&)
 195 {
 196   nsACString::const_iterator source_start, source_end;
 197   CalculateUTF8Length calculator;
 198   copy_string(aSource.BeginReading(source_start),
 199               aSource.EndReading(source_end), calculator);
 200
 201   uint32_t count = calculator.Length();
 202
 203   // Avoid making the string mutable if we're appending an empty string
 204   if (count) {
 205     uint32_t old_dest_length = aDest.Length();
 206
 207     // Grow the buffer if we need to.
 208     if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {
 209       return false;
 210     }
 211
 212     // All ready? Time to convert
 213
 214     ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
 215     copy_string(aSource.BeginReading(source_start),
 216                 aSource.EndReading(source_end), converter);
 217
 218     NS_ASSERTION(converter.ErrorEncountered() ||
 219                  converter.Length() == count,
 220                  "CalculateUTF8Length produced the wrong length");
 221
 222     if (converter.ErrorEncountered()) {
 223       NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
 224       aDest.SetLength(old_dest_length);
 225     }
 226   }
 227
 228   return true;
 229 }
 230
 231 void
 232 AppendUTF16toUTF8(const char16_t* aSource, nsACString& aDest)
 233 {
 234   if (aSource) {
 235     AppendUTF16toUTF8(nsDependentString(aSource), aDest);
 236   }
 237 }
 238
 239 void
 240 AppendUTF8toUTF16(const char* aSource, nsAString& aDest)
 241 {
 242   if (aSource) {
 243     AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
 244   }
 245 }
 246
 247
 248 /**
 249  * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
 250  *
 251  * @param aSource an string you will eventually be making a copy of
 252  * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
 253  *
 254  */
 255 template <class FromStringT, class ToCharT>
 256 inline
 257 ToCharT*
 258 AllocateStringCopy(const FromStringT& aSource, ToCharT*)
 259 {
 260   return static_cast<ToCharT*>(nsMemory::Alloc(
 261     (aSource.Length() + 1) * sizeof(ToCharT)));
 262 }
 263
 264
 265 char*
 266 ToNewCString(const nsAString& aSource)
 267 {
 268   char* result = AllocateStringCopy(aSource, (char*)0);
 269   if (!result) {
 270     return nullptr;
 271   }
 272
 273   nsAString::const_iterator fromBegin, fromEnd;
 274   LossyConvertEncoding16to8 converter(result);
 275   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
 276               converter).write_terminator();
 277   return result;
 278 }
 279
 280 char*
 281 ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
 282 {
 283   nsAString::const_iterator start, end;
 284   CalculateUTF8Size calculator;
 285   copy_string(aSource.BeginReading(start), aSource.EndReading(end),
 286               calculator);
 287
 288   if (aUTF8Count) {
 289     *aUTF8Count = calculator.Size();
 290   }
 291
 292   char* result = static_cast<char*>
 293                  (nsMemory::Alloc(calculator.Size() + 1));
 294   if (!result) {
 295     return nullptr;
 296   }
 297
 298   ConvertUTF16toUTF8 converter(result);
 299   copy_string(aSource.BeginReading(start), aSource.EndReading(end),
 300               converter).write_terminator();
 301   NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
 302
 303   return result;
 304 }
 305
 306 char*
 307 ToNewCString(const nsACString& aSource)
 308 {
 309   // no conversion needed, just allocate a buffer of the correct length and copy into it
 310
 311   char* result = AllocateStringCopy(aSource, (char*)0);
 312   if (!result) {
 313     return nullptr;
 314   }
 315
 316   nsACString::const_iterator fromBegin, fromEnd;
 317   char* toBegin = result;
 318   *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
 319                toBegin) = char(0);
 320   return result;
 321 }
 322
 323 char16_t*
 324 ToNewUnicode(const nsAString& aSource)
 325 {
 326   // no conversion needed, just allocate a buffer of the correct length and copy into it
 327
 328   char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
 329   if (!result) {
 330     return nullptr;
 331   }
 332
 333   nsAString::const_iterator fromBegin, fromEnd;
 334   char16_t* toBegin = result;
 335   *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
 336                toBegin) = char16_t(0);
 337   return result;
 338 }
 339
 340 char16_t*
 341 ToNewUnicode(const nsACString& aSource)
 342 {
 343   char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
 344   if (!result) {
 345     return nullptr;
 346   }
 347
 348   nsACString::const_iterator fromBegin, fromEnd;
 349   LossyConvertEncoding8to16 converter(result);
 350   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
 351               converter).write_terminator();
 352   return result;
 353 }
 354
 355 uint32_t
 356 CalcUTF8ToUnicodeLength(const nsACString& aSource)
 357 {
 358   nsACString::const_iterator start, end;
 359   CalculateUTF8Length calculator;
 360   copy_string(aSource.BeginReading(start), aSource.EndReading(end),
 361               calculator);
 362   return calculator.Length();
 363 }
 364
 365 char16_t*
 366 UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer,
 367                     uint32_t* aUTF16Count)
 368 {
 369   nsACString::const_iterator start, end;
 370   ConvertUTF8toUTF16 converter(aBuffer);
 371   copy_string(aSource.BeginReading(start),
 372               aSource.EndReading(end),
 373               converter).write_terminator();
 374   if (aUTF16Count) {
 375     *aUTF16Count = converter.Length();
 376   }
 377   return aBuffer;
 378 }
 379
 380 char16_t*
 381 UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
 382 {
 383   const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
 384   const size_t buffer_size = (length + 1) * sizeof(char16_t);
 385   char16_t* buffer = static_cast<char16_t*>(nsMemory::Alloc(buffer_size));
 386   if (!buffer) {
 387     return nullptr;
 388   }
 389
 390   uint32_t copied;
 391   UTF8ToUnicodeBuffer(aSource, buffer, &copied);
 392   NS_ASSERTION(length == copied, "length mismatch");
 393
 394   if (aUTF16Count) {
 395     *aUTF16Count = copied;
 396   }
 397   return buffer;
 398 }
 399
 400 char16_t*
 401 CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
 402               uint32_t aLength)
 403 {
 404   nsAString::const_iterator fromBegin, fromEnd;
 405   char16_t* toBegin = aDest;
 406   copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)),
 407               aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)),
 408               toBegin);
 409   return aDest;
 410 }
 411
 412 void
 413 CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
 414               const nsAString::const_iterator& aSrcEnd,
 415               nsAString& aDest)
 416 {
 417   nsAString::iterator writer;
 418   aDest.SetLength(Distance(aSrcStart, aSrcEnd));
 419
 420   aDest.BeginWriting(writer);
 421   nsAString::const_iterator fromBegin(aSrcStart);
 422
 423   copy_string(fromBegin, aSrcEnd, writer);
 424 }
 425
 426 void
 427 AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
 428                 const nsAString::const_iterator& aSrcEnd,
 429                 nsAString& aDest)
 430 {
 431   nsAString::iterator writer;
 432   uint32_t oldLength = aDest.Length();
 433   aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
 434
 435   aDest.BeginWriting(writer).advance(oldLength);
 436   nsAString::const_iterator fromBegin(aSrcStart);
 437
 438   copy_string(fromBegin, aSrcEnd, writer);
 439 }
 440
 441 bool
 442 IsASCII(const nsAString& aString)
 443 {
 444   static const char16_t NOT_ASCII = char16_t(~0x007F);
 445
 446
 447   // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
 448
 449   nsAString::const_iterator iter, done_reading;
 450   aString.BeginReading(iter);
 451   aString.EndReading(done_reading);
 452
 453   const char16_t* c = iter.get();
 454   const char16_t* end = done_reading.get();
 455
 456   while (c < end) {
 457     if (*c++ & NOT_ASCII) {
 458       return false;
 459     }
 460   }
 461
 462   return true;
 463 }
 464
 465 bool
 466 IsASCII(const nsACString& aString)
 467 {
 468   static const char NOT_ASCII = char(~0x7F);
 469
 470
 471   // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
 472
 473   nsACString::const_iterator iter, done_reading;
 474   aString.BeginReading(iter);
 475   aString.EndReading(done_reading);
 476
 477   const char* c = iter.get();
 478   const char* end = done_reading.get();
 479
 480   while (c < end) {
 481     if (*c++ & NOT_ASCII) {
 482       return false;
 483     }
 484   }
 485
 486   return true;
 487 }
 488
 489 bool
 490 IsUTF8(const nsACString& aString, bool aRejectNonChar)
 491 {
 492   nsReadingIterator<char> done_reading;
 493   aString.EndReading(done_reading);
 494
 495   int32_t state = 0;
 496   bool overlong = false;
 497   bool surrogate = false;
 498   bool nonchar = false;
 499   uint16_t olupper = 0; // overlong byte upper bound.
 500   uint16_t slower = 0;  // surrogate byte lower bound.
 501
 502   nsReadingIterator<char> iter;
 503   aString.BeginReading(iter);
 504
 505   const char* ptr = iter.get();
 506   const char* end = done_reading.get();
 507   while (ptr < end) {
 508     uint8_t c;
 509
 510     if (0 == state) {
 511       c = *ptr++;
 512
 513       if (UTF8traits::isASCII(c)) {
 514         continue;
 515       }
 516
 517       if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong.
 518         return false;
 519       } else if (UTF8traits::is2byte(c)) {
 520         state = 1;
 521       } else if (UTF8traits::is3byte(c)) {
 522         state = 2;
 523         if (c == 0xE0) { // to exclude E0[80-9F][80-BF]
 524           overlong = true;
 525           olupper = 0x9F;
 526         } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint
 527           surrogate = true;
 528           slower = 0xA0;
 529         } else if (c == 0xEF) { // EF BF [BE-BF] : non-character
 530           nonchar = true;
 531         }
 532       } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
 533         state = 3;
 534         nonchar = true;
 535         if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2}
 536           overlong = true;
 537           olupper = 0x8F;
 538         } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF]
 539           // actually not surrogates but codepoints beyond 0x10FFFF
 540           surrogate = true;
 541           slower = 0x90;
 542         }
 543       } else {
 544         return false;  // Not UTF-8 string
 545       }
 546     }
 547
 548     if (nonchar && !aRejectNonChar) {
 549       nonchar = false;
 550     }
 551
 552     while (ptr < end && state) {
 553       c = *ptr++;
 554       --state;
 555
 556       // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
 557       if (nonchar &&
 558           ((!state && c < 0xBE) ||
 559            (state == 1 && c != 0xBF)  ||
 560            (state == 2 && 0x0F != (0x0F & c)))) {
 561         nonchar = false;
 562       }
 563
 564       if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) ||
 565           (surrogate && slower <= c) || (nonchar && !state)) {
 566         return false;  // Not UTF-8 string
 567       }
 568
 569       overlong = surrogate = false;
 570     }
 571   }
 572   return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
 573 }
 574
 575 /**
 576  * A character sink for in-place case conversion.
 577  */
 578 class ConvertToUpperCase
 579 {
 580 public:
 581   typedef char value_type;
 582
 583   uint32_t
 584   write(const char* aSource, uint32_t aSourceLength)
 585   {
 586     char* cp = const_cast<char*>(aSource);
 587     const char* end = aSource + aSourceLength;
 588     while (cp != end) {
 589       char ch = *cp;
 590       if (ch >= 'a' && ch <= 'z') {
 591         *cp = ch - ('a' - 'A');
 592       }
 593       ++cp;
 594     }
 595     return aSourceLength;
 596   }
 597 };
 598
 599 void
 600 ToUpperCase(nsCSubstring& aCString)
 601 {
 602   ConvertToUpperCase converter;
 603   char* start;
 604   converter.write(aCString.BeginWriting(start), aCString.Length());
 605 }
 606
 607 /**
 608  * A character sink for copying with case conversion.
 609  */
 610 class CopyToUpperCase
 611 {
 612 public:
 613   typedef char value_type;
 614
 615   explicit CopyToUpperCase(nsACString::iterator& aDestIter)
 616     : mIter(aDestIter)
 617   {
 618   }
 619
 620   uint32_t
 621   write(const char* aSource, uint32_t aSourceLength)
 622   {
 623     uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);
 624     char* cp = mIter.get();
 625     const char* end = aSource + len;
 626     while (aSource != end) {
 627       char ch = *aSource;
 628       if ((ch >= 'a') && (ch <= 'z')) {
 629         *cp = ch - ('a' - 'A');
 630       } else {
 631         *cp = ch;
 632       }
 633       ++aSource;
 634       ++cp;
 635     }
 636     mIter.advance(len);
 637     return len;
 638   }
 639
 640 protected:
 641   nsACString::iterator& mIter;
 642 };
 643
 644 void
 645 ToUpperCase(const nsACString& aSource, nsACString& aDest)
 646 {
 647   nsACString::const_iterator fromBegin, fromEnd;
 648   nsACString::iterator toBegin;
 649   aDest.SetLength(aSource.Length());
 650
 651   CopyToUpperCase converter(aDest.BeginWriting(toBegin));
 652   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
 653               converter);
 654 }
 655
 656 /**
 657  * A character sink for case conversion.
 658  */
 659 class ConvertToLowerCase
 660 {
 661 public:
 662   typedef char value_type;
 663
 664   uint32_t
 665   write(const char* aSource, uint32_t aSourceLength)
 666   {
 667     char* cp = const_cast<char*>(aSource);
 668     const char* end = aSource + aSourceLength;
 669     while (cp != end) {
 670       char ch = *cp;
 671       if ((ch >= 'A') && (ch <= 'Z')) {
 672         *cp = ch + ('a' - 'A');
 673       }
 674       ++cp;
 675     }
 676     return aSourceLength;
 677   }
 678 };
 679
 680 void
 681 ToLowerCase(nsCSubstring& aCString)
 682 {
 683   ConvertToLowerCase converter;
 684   char* start;
 685   converter.write(aCString.BeginWriting(start), aCString.Length());
 686 }
 687
 688 /**
 689  * A character sink for copying with case conversion.
 690  */
 691 class CopyToLowerCase
 692 {
 693 public:
 694   typedef char value_type;
 695
 696   explicit CopyToLowerCase(nsACString::iterator& aDestIter)
 697     : mIter(aDestIter)
 698   {
 699   }
 700
 701   uint32_t
 702   write(const char* aSource, uint32_t aSourceLength)
 703   {
 704     uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);
 705     char* cp = mIter.get();
 706     const char* end = aSource + len;
 707     while (aSource != end) {
 708       char ch = *aSource;
 709       if ((ch >= 'A') && (ch <= 'Z')) {
 710         *cp = ch + ('a' - 'A');
 711       } else {
 712         *cp = ch;
 713       }
 714       ++aSource;
 715       ++cp;
 716     }
 717     mIter.advance(len);
 718     return len;
 719   }
 720
 721 protected:
 722   nsACString::iterator& mIter;
 723 };
 724
 725 void
 726 ToLowerCase(const nsACString& aSource, nsACString& aDest)
 727 {
 728   nsACString::const_iterator fromBegin, fromEnd;
 729   nsACString::iterator toBegin;
 730   aDest.SetLength(aSource.Length());
 731
 732   CopyToLowerCase converter(aDest.BeginWriting(toBegin));
 733   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
 734               converter);
 735 }
 736
 737 bool
 738 ParseString(const nsACString& aSource, char aDelimiter,
 739             nsTArray<nsCString>& aArray)
 740 {
 741   nsACString::const_iterator start, end;
 742   aSource.BeginReading(start);
 743   aSource.EndReading(end);
 744
 745   uint32_t oldLength = aArray.Length();
 746
 747   for (;;) {
 748     nsACString::const_iterator delimiter = start;
 749     FindCharInReadable(aDelimiter, delimiter, end);
 750
 751     if (delimiter != start) {
 752       if (!aArray.AppendElement(Substring(start, delimiter))) {
 753         aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);
 754         return false;
 755       }
 756     }
 757
 758     if (delimiter == end) {
 759       break;
 760     }
 761     start = ++delimiter;
 762     if (start == end) {
 763       break;
 764     }
 765   }
 766
 767   return true;
 768 }
 769
 770 template <class StringT, class IteratorT, class Comparator>
 771 bool
 772 FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
 773                     IteratorT& aSearchEnd, const Comparator& aCompare)
 774 {
 775   bool found_it = false;
 776
 777   // only bother searching at all if we're given a non-empty range to search
 778   if (aSearchStart != aSearchEnd) {
 779     IteratorT aPatternStart, aPatternEnd;
 780     aPattern.BeginReading(aPatternStart);
 781     aPattern.EndReading(aPatternEnd);
 782
 783     // outer loop keeps searching till we find it or run out of string to search
 784     while (!found_it) {
 785       // fast inner loop (that's what it's called, not what it is) looks for a potential match
 786       while (aSearchStart != aSearchEnd &&
 787              aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
 788         ++aSearchStart;
 789       }
 790
 791       // if we broke out of the `fast' loop because we're out of string ... we're done: no match
 792       if (aSearchStart == aSearchEnd) {
 793         break;
 794       }
 795
 796       // otherwise, we're at a potential match, let's see if we really hit one
 797       IteratorT testPattern(aPatternStart);
 798       IteratorT testSearch(aSearchStart);
 799
 800       // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
 801       for (;;) {
 802         // we already compared the first character in the outer loop,
 803         //  so we'll advance before the next comparison
 804         ++testPattern;
 805         ++testSearch;
 806
 807         // if we verified all the way to the end of the pattern, then we found it!
 808         if (testPattern == aPatternEnd) {
 809           found_it = true;
 810           aSearchEnd = testSearch; // return the exact found range through the parameters
 811           break;
 812         }
 813
 814         // if we got to end of the string we're searching before we hit the end of the
 815         //  pattern, we'll never find what we're looking for
 816         if (testSearch == aSearchEnd) {
 817           aSearchStart = aSearchEnd;
 818           break;
 819         }
 820
 821         // else if we mismatched ... it's time to advance to the next search position
 822         //  and get back into the `fast' loop
 823         if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
 824           ++aSearchStart;
 825           break;
 826         }
 827       }
 828     }
 829   }
 830
 831   return found_it;
 832 }
 833
 834 /**
 835  * This searches the entire string from right to left, and returns the first match found, if any.
 836  */
 837 template <class StringT, class IteratorT, class Comparator>
 838 bool
 839 RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
 840                      IteratorT& aSearchEnd, const Comparator& aCompare)
 841 {
 842   IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
 843   aPattern.BeginReading(patternStart);
 844   aPattern.EndReading(patternEnd);
 845
 846   // Point to the last character in the pattern
 847   --patternEnd;
 848   // outer loop keeps searching till we run out of string to search
 849   while (aSearchStart != searchEnd) {
 850     // Point to the end position of the next possible match
 851     --searchEnd;
 852
 853     // Check last character, if a match, explore further from here
 854     if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
 855       // We're at a potential match, let's see if we really hit one
 856       IteratorT testPattern(patternEnd);
 857       IteratorT testSearch(searchEnd);
 858
 859       // inner loop verifies the potential match at the current position
 860       do {
 861         // if we verified all the way to the end of the pattern, then we found it!
 862         if (testPattern == patternStart) {
 863           aSearchStart = testSearch;  // point to start of match
 864           aSearchEnd = ++searchEnd;   // point to end of match
 865           return true;
 866         }
 867
 868         // if we got to end of the string we're searching before we hit the end of the
 869         //  pattern, we'll never find what we're looking for
 870         if (testSearch == aSearchStart) {
 871           aSearchStart = aSearchEnd;
 872           return false;
 873         }
 874
 875         // test previous character for a match
 876         --testPattern;
 877         --testSearch;
 878       } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
 879     }
 880   }
 881
 882   aSearchStart = aSearchEnd;
 883   return false;
 884 }
 885
 886 bool
 887 FindInReadable(const nsAString& aPattern,
 888                nsAString::const_iterator& aSearchStart,
 889                nsAString::const_iterator& aSearchEnd,
 890                const nsStringComparator& aComparator)
 891 {
 892   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 893 }
 894
 895 bool
 896 FindInReadable(const nsACString& aPattern,
 897                nsACString::const_iterator& aSearchStart,
 898                nsACString::const_iterator& aSearchEnd,
 899                const nsCStringComparator& aComparator)
 900 {
 901   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 902 }
 903
 904 bool
 905 CaseInsensitiveFindInReadable(const nsACString& aPattern,
 906                               nsACString::const_iterator& aSearchStart,
 907                               nsACString::const_iterator& aSearchEnd)
 908 {
 909   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
 910                              nsCaseInsensitiveCStringComparator());
 911 }
 912
 913 bool
 914 RFindInReadable(const nsAString& aPattern,
 915                 nsAString::const_iterator& aSearchStart,
 916                 nsAString::const_iterator& aSearchEnd,
 917                 const nsStringComparator& aComparator)
 918 {
 919   return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 920 }
 921
 922 bool
 923 RFindInReadable(const nsACString& aPattern,
 924                 nsACString::const_iterator& aSearchStart,
 925                 nsACString::const_iterator& aSearchEnd,
 926                 const nsCStringComparator& aComparator)
 927 {
 928   return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 929 }
 930
 931 bool
 932 FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
 933                    const nsAString::const_iterator& aSearchEnd)
 934 {
 935   int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
 936
 937   const char16_t* charFoundAt =
 938     nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
 939   if (charFoundAt) {
 940     aSearchStart.advance(charFoundAt - aSearchStart.get());
 941     return true;
 942   }
 943
 944   aSearchStart.advance(fragmentLength);
 945   return false;
 946 }
 947
 948 bool
 949 FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
 950                    const nsACString::const_iterator& aSearchEnd)
 951 {
 952   int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
 953
 954   const char* charFoundAt =
 955     nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
 956   if (charFoundAt) {
 957     aSearchStart.advance(charFoundAt - aSearchStart.get());
 958     return true;
 959   }
 960
 961   aSearchStart.advance(fragmentLength);
 962   return false;
 963 }
 964
 965 uint32_t
 966 CountCharInReadable(const nsAString& aStr, char16_t aChar)
 967 {
 968   uint32_t count = 0;
 969   nsAString::const_iterator begin, end;
 970
 971   aStr.BeginReading(begin);
 972   aStr.EndReading(end);
 973
 974   while (begin != end) {
 975     if (*begin == aChar) {
 976       ++count;
 977     }
 978     ++begin;
 979   }
 980
 981   return count;
 982 }
 983
 984 uint32_t
 985 CountCharInReadable(const nsACString& aStr, char aChar)
 986 {
 987   uint32_t count = 0;
 988   nsACString::const_iterator begin, end;
 989
 990   aStr.BeginReading(begin);
 991   aStr.EndReading(end);
 992
 993   while (begin != end) {
 994     if (*begin == aChar) {
 995       ++count;
 996     }
 997     ++begin;
 998   }
 999
1000   return count;
1001 }
1002
1003 bool
1004 StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
1005                  const nsStringComparator& aComparator)
1006 {
1007   nsAString::size_type src_len = aSource.Length(),
1008                        sub_len = aSubstring.Length();
1009   if (sub_len > src_len) {
1010     return false;
1011   }
1012   return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1013 }
1014
1015 bool
1016 StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
1017                  const nsCStringComparator& aComparator)
1018 {
1019   nsACString::size_type src_len = aSource.Length(),
1020                         sub_len = aSubstring.Length();
1021   if (sub_len > src_len) {
1022     return false;
1023   }
1024   return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1025 }
1026
1027 bool
1028 StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
1029                const nsStringComparator& aComparator)
1030 {
1031   nsAString::size_type src_len = aSource.Length(),
1032                        sub_len = aSubstring.Length();
1033   if (sub_len > src_len) {
1034     return false;
1035   }
1036   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1037                                                                aComparator);
1038 }
1039
1040 bool
1041 StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
1042                const nsCStringComparator& aComparator)
1043 {
1044   nsACString::size_type src_len = aSource.Length(),
1045                         sub_len = aSubstring.Length();
1046   if (sub_len > src_len) {
1047     return false;
1048   }
1049   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1050                                                                aComparator);
1051 }
1052
1053
1054
1055 static const char16_t empty_buffer[1] = { '\0' };
1056
1057 const nsAFlatString&
1058 EmptyString()
1059 {
1060   static const nsDependentString sEmpty(empty_buffer);
1061
1062   return sEmpty;
1063 }
1064
1065 const nsAFlatCString&
1066 EmptyCString()
1067 {
1068   static const nsDependentCString sEmpty((const char*)empty_buffer);
1069
1070   return sEmpty;
1071 }
1072
1073 const nsAFlatString&
1074 NullString()
1075 {
1076   static const nsXPIDLString sNull;
1077
1078   return sNull;
1079 }
1080
1081 const nsAFlatCString&
1082 NullCString()
1083 {
1084   static const nsXPIDLCString sNull;
1085
1086   return sNull;
1087 }
1088
1089 int32_t
1090 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
1091                    const nsASingleFragmentString& aUTF16String)
1092 {
1093   static const uint32_t NOT_ASCII = uint32_t(~0x7F);
1094
1095   const char* u8;
1096   const char* u8end;
1097   aUTF8String.BeginReading(u8);
1098   aUTF8String.EndReading(u8end);
1099
1100   const char16_t* u16;
1101   const char16_t* u16end;
1102   aUTF16String.BeginReading(u16);
1103   aUTF16String.EndReading(u16end);
1104
1105   while (u8 != u8end && u16 != u16end) {
1106     // Cast away the signedness of *u8 to prevent signextension when
1107     // converting to uint32_t
1108     uint32_t c8_32 = (uint8_t)*u8;
1109
1110     if (c8_32 & NOT_ASCII) {
1111       bool err;
1112       c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
1113       if (err) {
1114         return INT32_MIN;
1115       }
1116
1117       uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
1118       // The above UTF16CharEnumerator::NextChar() calls can
1119       // fail, but if it does for anything other than no data to
1120       // look at (which can't happen here), it returns the
1121       // Unicode replacement character 0xFFFD for the invalid
1122       // data they were fed. Ignore that error and treat invalid
1123       // UTF16 as 0xFFFD.
1124       //
1125       // This matches what our UTF16 to UTF8 conversion code
1126       // does, and thus a UTF8 string that came from an invalid
1127       // UTF16 string will compare equal to the invalid UTF16
1128       // string it came from. Same is true for any other UTF16
1129       // string differs only in the invalid part of the string.
1130
1131       if (c8_32 != c16_32) {
1132         return c8_32 < c16_32 ? -1 : 1;
1133       }
1134     } else {
1135       if (c8_32 != *u16) {
1136         return c8_32 > *u16 ? 1 : -1;
1137       }
1138
1139       ++u8;
1140       ++u16;
1141     }
1142   }
1143
1144   if (u8 != u8end) {
1145     // We get to the end of the UTF16 string, but no to the end of
1146     // the UTF8 string. The UTF8 string is longer than the UTF16
1147     // string
1148
1149     return 1;
1150   }
1151
1152   if (u16 != u16end) {
1153     // We get to the end of the UTF8 string, but no to the end of
1154     // the UTF16 string. The UTF16 string is longer than the UTF8
1155     // string
1156
1157     return -1;
1158   }
1159
1160   // The two strings match.
1161
1162   return 0;
1163 }
1164
1165 void
1166 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
1167 {
1168   NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
1169   if (IS_IN_BMP(aSource)) {
1170     aDest.Append(char16_t(aSource));
1171   } else {
1172     aDest.Append(H_SURROGATE(aSource));
1173     aDest.Append(L_SURROGATE(aSource));
1174   }
1175 }