xpcom/string/nsReadableUtils.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsReadableUtils.h"
   8
   9 #include <algorithm>
  10
  11 #include "mozilla/CheckedInt.h"
  12 #include "mozilla/Utf8.h"
  13
  14 #include "nscore.h"
  15 #include "nsString.h"
  16 #include "nsTArray.h"
  17 #include "nsUTF8Utils.h"
  18
  19 using mozilla::Span;
  20
  21 /**
  22  * A helper function that allocates a buffer of the desired character type big
  23  * enough to hold a copy of the supplied string (plus a zero terminator).
  24  *
  25  * @param aSource an string you will eventually be making a copy of
  26  * @return a new buffer which you must free with |free|.
  27  *
  28  */
  29 template <class FromStringT, class CharT>
  30 inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) {
  31   return static_cast<CharT*>(
  32       malloc((size_t(aSource.Length()) + 1) * sizeof(CharT)));
  33 }
  34
  35 char* ToNewCString(const nsAString& aSource) {
  36   char* str = ToNewCString(aSource, mozilla::fallible);
  37   if (!str) {
  38     MOZ_CRASH("Unable to allocate memory");
  39   }
  40   return str;
  41 }
  42
  43 char* ToNewCString(const nsAString& aSource,
  44                    const mozilla::fallible_t& aFallible) {
  45   char* dest = AllocateStringCopy(aSource, (char*)nullptr);
  46   if (!dest) {
  47     return nullptr;
  48   }
  49
  50   auto len = aSource.Length();
  51   LossyConvertUtf16toLatin1(aSource, Span(dest, len));
  52   dest[len] = 0;
  53   return dest;
  54 }
  55
  56 char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count,
  57                       const mozilla::fallible_t& aFallible) {
  58   auto len = aSource.Length();
  59   // The uses of this function seem temporary enough that it's not
  60   // worthwhile to be fancy about the allocation size. Let's just use
  61   // the worst case.
  62   // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and
  63   // then we have the terminator.
  64   // Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for
  65   // historical reasons.
  66   mozilla::CheckedInt<uint32_t> destLen(len);
  67   destLen *= 3;
  68   destLen += 1;
  69   if (!destLen.isValid()) {
  70     return nullptr;
  71   }
  72   size_t destLenVal = destLen.value();
  73   char* dest = static_cast<char*>(malloc(destLenVal));
  74   if (!dest) {
  75     return nullptr;
  76   }
  77
  78   size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal));
  79   dest[written] = 0;
  80
  81   if (aUTF8Count) {
  82     *aUTF8Count = written;
  83   }
  84
  85   return dest;
  86 }
  87
  88 char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) {
  89   char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible);
  90   if (!str) {
  91     MOZ_CRASH("Unable to allocate memory");
  92   }
  93   return str;
  94 }
  95
  96 char* ToNewCString(const nsACString& aSource) {
  97   char* str = ToNewCString(aSource, mozilla::fallible);
  98   if (!str) {
  99     MOZ_CRASH("Unable to allocate memory");
 100   }
 101   return str;
 102 }
 103
 104 char* ToNewCString(const nsACString& aSource,
 105                    const mozilla::fallible_t& aFallible) {
 106   // no conversion needed, just allocate a buffer of the correct length and copy
 107   // into it
 108
 109   char* dest = AllocateStringCopy(aSource, (char*)nullptr);
 110   if (!dest) {
 111     return nullptr;
 112   }
 113
 114   auto len = aSource.Length();
 115   memcpy(dest, aSource.BeginReading(), len * sizeof(char));
 116   dest[len] = 0;
 117   return dest;
 118 }
 119
 120 char16_t* ToNewUnicode(const nsAString& aSource) {
 121   char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
 122   if (!str) {
 123     MOZ_CRASH("Unable to allocate memory");
 124   }
 125   return str;
 126 }
 127
 128 char16_t* ToNewUnicode(const nsAString& aSource,
 129                        const mozilla::fallible_t& aFallible) {
 130   // no conversion needed, just allocate a buffer of the correct length and copy
 131   // into it
 132
 133   char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
 134   if (!dest) {
 135     return nullptr;
 136   }
 137
 138   auto len = aSource.Length();
 139   memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t));
 140   dest[len] = 0;
 141   return dest;
 142 }
 143
 144 char16_t* ToNewUnicode(const nsACString& aSource) {
 145   char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
 146   if (!str) {
 147     MOZ_CRASH("Unable to allocate memory");
 148   }
 149   return str;
 150 }
 151
 152 char16_t* ToNewUnicode(const nsACString& aSource,
 153                        const mozilla::fallible_t& aFallible) {
 154   char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
 155   if (!dest) {
 156     return nullptr;
 157   }
 158
 159   auto len = aSource.Length();
 160   ConvertLatin1toUtf16(aSource, Span(dest, len));
 161   dest[len] = 0;
 162   return dest;
 163 }
 164
 165 char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count,
 166                            const mozilla::fallible_t& aFallible) {
 167   // Compute length plus one as required by ConvertUTF8toUTF16
 168   uint32_t lengthPlusOne = aSource.Length() + 1;  // Can't overflow
 169
 170   mozilla::CheckedInt<size_t> allocLength(lengthPlusOne);
 171   // Add space for zero-termination
 172   allocLength += 1;
 173   // We need UTF-16 units
 174   allocLength *= sizeof(char16_t);
 175
 176   if (!allocLength.isValid()) {
 177     return nullptr;
 178   }
 179
 180   char16_t* dest = (char16_t*)malloc(allocLength.value());
 181   if (!dest) {
 182     return nullptr;
 183   }
 184
 185   size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne));
 186   dest[written] = 0;
 187
 188   if (aUTF16Count) {
 189     *aUTF16Count = written;
 190   }
 191
 192   return dest;
 193 }
 194
 195 char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) {
 196   char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible);
 197   if (!str) {
 198     MOZ_CRASH("Unable to allocate memory");
 199   }
 200   return str;
 201 }
 202
 203 char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset,
 204                         char16_t* aDest, uint32_t aLength) {
 205   MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length());
 206   memcpy(aDest, aSource.BeginReading() + aSrcOffset,
 207          size_t(aLength) * sizeof(char16_t));
 208   return aDest;
 209 }
 210
 211 void ToUpperCase(nsACString& aCString) {
 212   char* cp = aCString.BeginWriting();
 213   char* end = cp + aCString.Length();
 214   while (cp != end) {
 215     char ch = *cp;
 216     if (ch >= 'a' && ch <= 'z') {
 217       *cp = ch - ('a' - 'A');
 218     }
 219     ++cp;
 220   }
 221 }
 222
 223 void ToUpperCase(const nsACString& aSource, nsACString& aDest) {
 224   aDest.SetLength(aSource.Length());
 225   const char* src = aSource.BeginReading();
 226   const char* end = src + aSource.Length();
 227   char* dst = aDest.BeginWriting();
 228   while (src != end) {
 229     char ch = *src;
 230     if (ch >= 'a' && ch <= 'z') {
 231       *dst = ch - ('a' - 'A');
 232     } else {
 233       *dst = ch;
 234     }
 235     ++src;
 236     ++dst;
 237   }
 238 }
 239
 240 void ToLowerCase(nsACString& aCString) {
 241   char* cp = aCString.BeginWriting();
 242   char* end = cp + aCString.Length();
 243   while (cp != end) {
 244     char ch = *cp;
 245     if (ch >= 'A' && ch <= 'Z') {
 246       *cp = ch + ('a' - 'A');
 247     }
 248     ++cp;
 249   }
 250 }
 251
 252 void ToLowerCase(const nsACString& aSource, nsACString& aDest) {
 253   aDest.SetLength(aSource.Length());
 254   const char* src = aSource.BeginReading();
 255   const char* end = src + aSource.Length();
 256   char* dst = aDest.BeginWriting();
 257   while (src != end) {
 258     char ch = *src;
 259     if (ch >= 'A' && ch <= 'Z') {
 260       *dst = ch + ('a' - 'A');
 261     } else {
 262       *dst = ch;
 263     }
 264     ++src;
 265     ++dst;
 266   }
 267 }
 268
 269 void ParseString(const nsACString& aSource, char aDelimiter,
 270                  nsTArray<nsCString>& aArray) {
 271   nsACString::const_iterator start, end;
 272   aSource.BeginReading(start);
 273   aSource.EndReading(end);
 274
 275   for (;;) {
 276     nsACString::const_iterator delimiter = start;
 277     FindCharInReadable(aDelimiter, delimiter, end);
 278
 279     if (delimiter != start) {
 280       aArray.AppendElement(Substring(start, delimiter));
 281     }
 282
 283     if (delimiter == end) {
 284       break;
 285     }
 286     start = ++delimiter;
 287     if (start == end) {
 288       break;
 289     }
 290   }
 291 }
 292
 293 template <class StringT, class IteratorT>
 294 bool FindInReadable_Impl(
 295     const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
 296     nsTStringComparator<typename StringT::char_type> aCompare) {
 297   bool found_it = false;
 298
 299   // only bother searching at all if we're given a non-empty range to search
 300   if (aSearchStart != aSearchEnd) {
 301     IteratorT aPatternStart, aPatternEnd;
 302     aPattern.BeginReading(aPatternStart);
 303     aPattern.EndReading(aPatternEnd);
 304
 305     // outer loop keeps searching till we find it or run out of string to search
 306     while (!found_it) {
 307       // fast inner loop (that's what it's called, not what it is) looks for a
 308       // potential match
 309       while (aSearchStart != aSearchEnd &&
 310              aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
 311         ++aSearchStart;
 312       }
 313
 314       // if we broke out of the `fast' loop because we're out of string ...
 315       // we're done: no match
 316       if (aSearchStart == aSearchEnd) {
 317         break;
 318       }
 319
 320       // otherwise, we're at a potential match, let's see if we really hit one
 321       IteratorT testPattern(aPatternStart);
 322       IteratorT testSearch(aSearchStart);
 323
 324       // slow inner loop verifies the potential match (found by the `fast' loop)
 325       // at the current position
 326       for (;;) {
 327         // we already compared the first character in the outer loop,
 328         //  so we'll advance before the next comparison
 329         ++testPattern;
 330         ++testSearch;
 331
 332         // if we verified all the way to the end of the pattern, then we found
 333         // it!
 334         if (testPattern == aPatternEnd) {
 335           found_it = true;
 336           aSearchEnd = testSearch;  // return the exact found range through the
 337                                     // parameters
 338           break;
 339         }
 340
 341         // if we got to end of the string we're searching before we hit the end
 342         // of the
 343         //  pattern, we'll never find what we're looking for
 344         if (testSearch == aSearchEnd) {
 345           aSearchStart = aSearchEnd;
 346           break;
 347         }
 348
 349         // else if we mismatched ... it's time to advance to the next search
 350         // position
 351         //  and get back into the `fast' loop
 352         if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
 353           ++aSearchStart;
 354           break;
 355         }
 356       }
 357     }
 358   }
 359
 360   return found_it;
 361 }
 362
 363 /**
 364  * This searches the entire string from right to left, and returns the first
 365  * match found, if any.
 366  */
 367 template <class StringT, class IteratorT>
 368 bool RFindInReadable_Impl(
 369     const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
 370     nsTStringComparator<typename StringT::char_type> aCompare) {
 371   IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
 372   aPattern.BeginReading(patternStart);
 373   aPattern.EndReading(patternEnd);
 374
 375   // Point to the last character in the pattern
 376   --patternEnd;
 377   // outer loop keeps searching till we run out of string to search
 378   while (aSearchStart != searchEnd) {
 379     // Point to the end position of the next possible match
 380     --searchEnd;
 381
 382     // Check last character, if a match, explore further from here
 383     if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
 384       // We're at a potential match, let's see if we really hit one
 385       IteratorT testPattern(patternEnd);
 386       IteratorT testSearch(searchEnd);
 387
 388       // inner loop verifies the potential match at the current position
 389       do {
 390         // if we verified all the way to the end of the pattern, then we found
 391         // it!
 392         if (testPattern == patternStart) {
 393           aSearchStart = testSearch;  // point to start of match
 394           aSearchEnd = ++searchEnd;   // point to end of match
 395           return true;
 396         }
 397
 398         // if we got to end of the string we're searching before we hit the end
 399         // of the
 400         //  pattern, we'll never find what we're looking for
 401         if (testSearch == aSearchStart) {
 402           aSearchStart = aSearchEnd;
 403           return false;
 404         }
 405
 406         // test previous character for a match
 407         --testPattern;
 408         --testSearch;
 409       } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
 410     }
 411   }
 412
 413   aSearchStart = aSearchEnd;
 414   return false;
 415 }
 416
 417 bool FindInReadable(const nsAString& aPattern,
 418                     nsAString::const_iterator& aSearchStart,
 419                     nsAString::const_iterator& aSearchEnd,
 420                     nsStringComparator aComparator) {
 421   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 422 }
 423
 424 bool FindInReadable(const nsACString& aPattern,
 425                     nsACString::const_iterator& aSearchStart,
 426                     nsACString::const_iterator& aSearchEnd,
 427                     nsCStringComparator aComparator) {
 428   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 429 }
 430
 431 bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
 432                                    nsACString::const_iterator& aSearchStart,
 433                                    nsACString::const_iterator& aSearchEnd) {
 434   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
 435                              nsCaseInsensitiveCStringComparator);
 436 }
 437
 438 bool RFindInReadable(const nsAString& aPattern,
 439                      nsAString::const_iterator& aSearchStart,
 440                      nsAString::const_iterator& aSearchEnd,
 441                      const nsStringComparator aComparator) {
 442   return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 443 }
 444
 445 bool RFindInReadable(const nsACString& aPattern,
 446                      nsACString::const_iterator& aSearchStart,
 447                      nsACString::const_iterator& aSearchEnd,
 448                      const nsCStringComparator aComparator) {
 449   return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 450 }
 451
 452 bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
 453                         const nsAString::const_iterator& aSearchEnd) {
 454   ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
 455
 456   const char16_t* charFoundAt =
 457       nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
 458   if (charFoundAt) {
 459     aSearchStart.advance(charFoundAt - aSearchStart.get());
 460     return true;
 461   }
 462
 463   aSearchStart.advance(fragmentLength);
 464   return false;
 465 }
 466
 467 bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
 468                         const nsACString::const_iterator& aSearchEnd) {
 469   ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
 470
 471   const char* charFoundAt =
 472       nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
 473   if (charFoundAt) {
 474     aSearchStart.advance(charFoundAt - aSearchStart.get());
 475     return true;
 476   }
 477
 478   aSearchStart.advance(fragmentLength);
 479   return false;
 480 }
 481
 482 bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) {
 483   nsAString::size_type src_len = aSource.Length(),
 484                        sub_len = aSubstring.Length();
 485   if (sub_len > src_len) {
 486     return false;
 487   }
 488   return Substring(aSource, 0, sub_len).Equals(aSubstring);
 489 }
 490
 491 bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
 492                       nsStringComparator aComparator) {
 493   nsAString::size_type src_len = aSource.Length(),
 494                        sub_len = aSubstring.Length();
 495   if (sub_len > src_len) {
 496     return false;
 497   }
 498   return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
 499 }
 500
 501 bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) {
 502   nsACString::size_type src_len = aSource.Length(),
 503                         sub_len = aSubstring.Length();
 504   if (sub_len > src_len) {
 505     return false;
 506   }
 507   return Substring(aSource, 0, sub_len).Equals(aSubstring);
 508 }
 509
 510 bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
 511                       nsCStringComparator aComparator) {
 512   nsACString::size_type src_len = aSource.Length(),
 513                         sub_len = aSubstring.Length();
 514   if (sub_len > src_len) {
 515     return false;
 516   }
 517   return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
 518 }
 519
 520 bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) {
 521   nsAString::size_type src_len = aSource.Length(),
 522                        sub_len = aSubstring.Length();
 523   if (sub_len > src_len) {
 524     return false;
 525   }
 526   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
 527 }
 528
 529 bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
 530                     nsStringComparator aComparator) {
 531   nsAString::size_type src_len = aSource.Length(),
 532                        sub_len = aSubstring.Length();
 533   if (sub_len > src_len) {
 534     return false;
 535   }
 536   return Substring(aSource, src_len - sub_len, sub_len)
 537       .Equals(aSubstring, aComparator);
 538 }
 539
 540 bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) {
 541   nsACString::size_type src_len = aSource.Length(),
 542                         sub_len = aSubstring.Length();
 543   if (sub_len > src_len) {
 544     return false;
 545   }
 546   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
 547 }
 548
 549 bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
 550                     nsCStringComparator aComparator) {
 551   nsACString::size_type src_len = aSource.Length(),
 552                         sub_len = aSubstring.Length();
 553   if (sub_len > src_len) {
 554     return false;
 555   }
 556   return Substring(aSource, src_len - sub_len, sub_len)
 557       .Equals(aSubstring, aComparator);
 558 }
 559
 560 static const char16_t empty_buffer[1] = {'\0'};
 561
 562 const nsString& EmptyString() {
 563   static const nsDependentString sEmpty(empty_buffer);
 564
 565   return sEmpty;
 566 }
 567
 568 const nsCString& EmptyCString() {
 569   static const nsDependentCString sEmpty((const char*)empty_buffer);
 570
 571   return sEmpty;
 572 }
 573
 574 const nsString& VoidString() {
 575   static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED);
 576
 577   return sNull;
 578 }
 579
 580 const nsCString& VoidCString() {
 581   static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED);
 582
 583   return sNull;
 584 }
 585
 586 int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
 587                            const nsAString& aUTF16String, bool* aErr) {
 588   const char* u8;
 589   const char* u8end;
 590   aUTF8String.BeginReading(u8);
 591   aUTF8String.EndReading(u8end);
 592
 593   const char16_t* u16;
 594   const char16_t* u16end;
 595   aUTF16String.BeginReading(u16);
 596   aUTF16String.EndReading(u16end);
 597
 598   for (;;) {
 599     if (u8 == u8end) {
 600       if (u16 == u16end) {
 601         return 0;
 602       }
 603       return -1;
 604     }
 605     if (u16 == u16end) {
 606       return 1;
 607     }
 608     // No need for ASCII optimization, since both NextChar()
 609     // calls get inlined.
 610     uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr);
 611     uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr);
 612     if (scalar16 == scalar8) {
 613       continue;
 614     }
 615     if (scalar8 < scalar16) {
 616       return -1;
 617     }
 618     return 1;
 619   }
 620 }
 621
 622 void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) {
 623   NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
 624   if (IS_IN_BMP(aSource)) {
 625     aDest.Append(char16_t(aSource));
 626   } else {
 627     aDest.Append(H_SURROGATE(aSource));
 628     aDest.Append(L_SURROGATE(aSource));
 629   }
 630 }