Bumping manifests a=b2g-bump
[gecko.git] / xpcom / string / nsReadableUtils.cpp
blob7cdb8e86c05e967d4d6eebe51a7c2f36457aa525
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsReadableUtils.h"
9 #include "nsMemory.h"
10 #include "nsString.h"
11 #include "nsTArray.h"
12 #include "nsUTF8Utils.h"
14 void
15 LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
17 aDest.Truncate();
18 LossyAppendUTF16toASCII(aSource, aDest);
21 void
22 CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
24 aDest.Truncate();
25 AppendASCIItoUTF16(aSource, aDest);
28 void
29 LossyCopyUTF16toASCII(const char16_t* aSource, nsACString& aDest)
31 aDest.Truncate();
32 if (aSource) {
33 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
37 void
38 CopyASCIItoUTF16(const char* aSource, nsAString& aDest)
40 aDest.Truncate();
41 if (aSource) {
42 AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
46 void
47 CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
49 aDest.Truncate();
50 AppendUTF16toUTF8(aSource, aDest);
53 void
54 CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
56 aDest.Truncate();
57 AppendUTF8toUTF16(aSource, aDest);
60 void
61 CopyUTF16toUTF8(const char16_t* aSource, nsACString& aDest)
63 aDest.Truncate();
64 AppendUTF16toUTF8(aSource, aDest);
67 void
68 CopyUTF8toUTF16(const char* aSource, nsAString& aDest)
70 aDest.Truncate();
71 AppendUTF8toUTF16(aSource, aDest);
74 void
75 LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest)
77 uint32_t old_dest_length = aDest.Length();
78 aDest.SetLength(old_dest_length + aSource.Length());
80 nsAString::const_iterator fromBegin, fromEnd;
82 nsACString::iterator dest;
83 aDest.BeginWriting(dest);
85 dest.advance(old_dest_length);
87 // right now, this won't work on multi-fragment destinations
88 LossyConvertEncoding16to8 converter(dest.get());
90 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
91 converter);
94 void
95 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
97 if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) {
98 NS_ABORT_OOM(aDest.Length() + aSource.Length());
102 bool
103 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
104 const mozilla::fallible_t&)
106 uint32_t old_dest_length = aDest.Length();
107 if (!aDest.SetLength(old_dest_length + aSource.Length(),
108 mozilla::fallible_t())) {
109 return false;
112 nsACString::const_iterator fromBegin, fromEnd;
114 nsAString::iterator dest;
115 aDest.BeginWriting(dest);
117 dest.advance(old_dest_length);
119 // right now, this won't work on multi-fragment destinations
120 LossyConvertEncoding8to16 converter(dest.get());
122 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
123 converter);
124 return true;
127 void
128 LossyAppendUTF16toASCII(const char16_t* aSource, nsACString& aDest)
130 if (aSource) {
131 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
135 void
136 AppendASCIItoUTF16(const char* aSource, nsAString& aDest)
138 if (aSource) {
139 AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
143 void
144 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
146 if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) {
147 NS_ABORT_OOM(aDest.Length() + aSource.Length());
151 bool
152 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
153 const mozilla::fallible_t&)
155 nsAString::const_iterator source_start, source_end;
156 CalculateUTF8Size calculator;
157 copy_string(aSource.BeginReading(source_start),
158 aSource.EndReading(source_end), calculator);
160 uint32_t count = calculator.Size();
162 if (count) {
163 uint32_t old_dest_length = aDest.Length();
165 // Grow the buffer if we need to.
166 if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {
167 return false;
170 // All ready? Time to convert
172 ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);
173 copy_string(aSource.BeginReading(source_start),
174 aSource.EndReading(source_end), converter);
176 NS_ASSERTION(converter.Size() == count,
177 "Unexpected disparity between CalculateUTF8Size and "
178 "ConvertUTF16toUTF8");
181 return true;
184 void
185 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
187 if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) {
188 NS_ABORT_OOM(aDest.Length() + aSource.Length());
192 bool
193 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest,
194 const mozilla::fallible_t&)
196 nsACString::const_iterator source_start, source_end;
197 CalculateUTF8Length calculator;
198 copy_string(aSource.BeginReading(source_start),
199 aSource.EndReading(source_end), calculator);
201 uint32_t count = calculator.Length();
203 // Avoid making the string mutable if we're appending an empty string
204 if (count) {
205 uint32_t old_dest_length = aDest.Length();
207 // Grow the buffer if we need to.
208 if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {
209 return false;
212 // All ready? Time to convert
214 ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
215 copy_string(aSource.BeginReading(source_start),
216 aSource.EndReading(source_end), converter);
218 NS_ASSERTION(converter.ErrorEncountered() ||
219 converter.Length() == count,
220 "CalculateUTF8Length produced the wrong length");
222 if (converter.ErrorEncountered()) {
223 NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
224 aDest.SetLength(old_dest_length);
228 return true;
231 void
232 AppendUTF16toUTF8(const char16_t* aSource, nsACString& aDest)
234 if (aSource) {
235 AppendUTF16toUTF8(nsDependentString(aSource), aDest);
239 void
240 AppendUTF8toUTF16(const char* aSource, nsAString& aDest)
242 if (aSource) {
243 AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
249 * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
251 * @param aSource an string you will eventually be making a copy of
252 * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
255 template <class FromStringT, class ToCharT>
256 inline
257 ToCharT*
258 AllocateStringCopy(const FromStringT& aSource, ToCharT*)
260 return static_cast<ToCharT*>(nsMemory::Alloc(
261 (aSource.Length() + 1) * sizeof(ToCharT)));
265 char*
266 ToNewCString(const nsAString& aSource)
268 char* result = AllocateStringCopy(aSource, (char*)0);
269 if (!result) {
270 return nullptr;
273 nsAString::const_iterator fromBegin, fromEnd;
274 LossyConvertEncoding16to8 converter(result);
275 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
276 converter).write_terminator();
277 return result;
280 char*
281 ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
283 nsAString::const_iterator start, end;
284 CalculateUTF8Size calculator;
285 copy_string(aSource.BeginReading(start), aSource.EndReading(end),
286 calculator);
288 if (aUTF8Count) {
289 *aUTF8Count = calculator.Size();
292 char* result = static_cast<char*>
293 (nsMemory::Alloc(calculator.Size() + 1));
294 if (!result) {
295 return nullptr;
298 ConvertUTF16toUTF8 converter(result);
299 copy_string(aSource.BeginReading(start), aSource.EndReading(end),
300 converter).write_terminator();
301 NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
303 return result;
306 char*
307 ToNewCString(const nsACString& aSource)
309 // no conversion needed, just allocate a buffer of the correct length and copy into it
311 char* result = AllocateStringCopy(aSource, (char*)0);
312 if (!result) {
313 return nullptr;
316 nsACString::const_iterator fromBegin, fromEnd;
317 char* toBegin = result;
318 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
319 toBegin) = char(0);
320 return result;
323 char16_t*
324 ToNewUnicode(const nsAString& aSource)
326 // no conversion needed, just allocate a buffer of the correct length and copy into it
328 char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
329 if (!result) {
330 return nullptr;
333 nsAString::const_iterator fromBegin, fromEnd;
334 char16_t* toBegin = result;
335 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
336 toBegin) = char16_t(0);
337 return result;
340 char16_t*
341 ToNewUnicode(const nsACString& aSource)
343 char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
344 if (!result) {
345 return nullptr;
348 nsACString::const_iterator fromBegin, fromEnd;
349 LossyConvertEncoding8to16 converter(result);
350 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
351 converter).write_terminator();
352 return result;
355 uint32_t
356 CalcUTF8ToUnicodeLength(const nsACString& aSource)
358 nsACString::const_iterator start, end;
359 CalculateUTF8Length calculator;
360 copy_string(aSource.BeginReading(start), aSource.EndReading(end),
361 calculator);
362 return calculator.Length();
365 char16_t*
366 UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer,
367 uint32_t* aUTF16Count)
369 nsACString::const_iterator start, end;
370 ConvertUTF8toUTF16 converter(aBuffer);
371 copy_string(aSource.BeginReading(start),
372 aSource.EndReading(end),
373 converter).write_terminator();
374 if (aUTF16Count) {
375 *aUTF16Count = converter.Length();
377 return aBuffer;
380 char16_t*
381 UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
383 const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
384 const size_t buffer_size = (length + 1) * sizeof(char16_t);
385 char16_t* buffer = static_cast<char16_t*>(nsMemory::Alloc(buffer_size));
386 if (!buffer) {
387 return nullptr;
390 uint32_t copied;
391 UTF8ToUnicodeBuffer(aSource, buffer, &copied);
392 NS_ASSERTION(length == copied, "length mismatch");
394 if (aUTF16Count) {
395 *aUTF16Count = copied;
397 return buffer;
400 char16_t*
401 CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
402 uint32_t aLength)
404 nsAString::const_iterator fromBegin, fromEnd;
405 char16_t* toBegin = aDest;
406 copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)),
407 aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)),
408 toBegin);
409 return aDest;
412 void
413 CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
414 const nsAString::const_iterator& aSrcEnd,
415 nsAString& aDest)
417 nsAString::iterator writer;
418 aDest.SetLength(Distance(aSrcStart, aSrcEnd));
420 aDest.BeginWriting(writer);
421 nsAString::const_iterator fromBegin(aSrcStart);
423 copy_string(fromBegin, aSrcEnd, writer);
426 void
427 AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
428 const nsAString::const_iterator& aSrcEnd,
429 nsAString& aDest)
431 nsAString::iterator writer;
432 uint32_t oldLength = aDest.Length();
433 aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
435 aDest.BeginWriting(writer).advance(oldLength);
436 nsAString::const_iterator fromBegin(aSrcStart);
438 copy_string(fromBegin, aSrcEnd, writer);
441 bool
442 IsASCII(const nsAString& aString)
444 static const char16_t NOT_ASCII = char16_t(~0x007F);
447 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
449 nsAString::const_iterator iter, done_reading;
450 aString.BeginReading(iter);
451 aString.EndReading(done_reading);
453 const char16_t* c = iter.get();
454 const char16_t* end = done_reading.get();
456 while (c < end) {
457 if (*c++ & NOT_ASCII) {
458 return false;
462 return true;
465 bool
466 IsASCII(const nsACString& aString)
468 static const char NOT_ASCII = char(~0x7F);
471 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
473 nsACString::const_iterator iter, done_reading;
474 aString.BeginReading(iter);
475 aString.EndReading(done_reading);
477 const char* c = iter.get();
478 const char* end = done_reading.get();
480 while (c < end) {
481 if (*c++ & NOT_ASCII) {
482 return false;
486 return true;
489 bool
490 IsUTF8(const nsACString& aString, bool aRejectNonChar)
492 nsReadingIterator<char> done_reading;
493 aString.EndReading(done_reading);
495 int32_t state = 0;
496 bool overlong = false;
497 bool surrogate = false;
498 bool nonchar = false;
499 uint16_t olupper = 0; // overlong byte upper bound.
500 uint16_t slower = 0; // surrogate byte lower bound.
502 nsReadingIterator<char> iter;
503 aString.BeginReading(iter);
505 const char* ptr = iter.get();
506 const char* end = done_reading.get();
507 while (ptr < end) {
508 uint8_t c;
510 if (0 == state) {
511 c = *ptr++;
513 if (UTF8traits::isASCII(c)) {
514 continue;
517 if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong.
518 return false;
519 } else if (UTF8traits::is2byte(c)) {
520 state = 1;
521 } else if (UTF8traits::is3byte(c)) {
522 state = 2;
523 if (c == 0xE0) { // to exclude E0[80-9F][80-BF]
524 overlong = true;
525 olupper = 0x9F;
526 } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint
527 surrogate = true;
528 slower = 0xA0;
529 } else if (c == 0xEF) { // EF BF [BE-BF] : non-character
530 nonchar = true;
532 } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
533 state = 3;
534 nonchar = true;
535 if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2}
536 overlong = true;
537 olupper = 0x8F;
538 } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF]
539 // actually not surrogates but codepoints beyond 0x10FFFF
540 surrogate = true;
541 slower = 0x90;
543 } else {
544 return false; // Not UTF-8 string
548 if (nonchar && !aRejectNonChar) {
549 nonchar = false;
552 while (ptr < end && state) {
553 c = *ptr++;
554 --state;
556 // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
557 if (nonchar &&
558 ((!state && c < 0xBE) ||
559 (state == 1 && c != 0xBF) ||
560 (state == 2 && 0x0F != (0x0F & c)))) {
561 nonchar = false;
564 if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) ||
565 (surrogate && slower <= c) || (nonchar && !state)) {
566 return false; // Not UTF-8 string
569 overlong = surrogate = false;
572 return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
576 * A character sink for in-place case conversion.
578 class ConvertToUpperCase
580 public:
581 typedef char value_type;
583 uint32_t
584 write(const char* aSource, uint32_t aSourceLength)
586 char* cp = const_cast<char*>(aSource);
587 const char* end = aSource + aSourceLength;
588 while (cp != end) {
589 char ch = *cp;
590 if (ch >= 'a' && ch <= 'z') {
591 *cp = ch - ('a' - 'A');
593 ++cp;
595 return aSourceLength;
599 void
600 ToUpperCase(nsCSubstring& aCString)
602 ConvertToUpperCase converter;
603 char* start;
604 converter.write(aCString.BeginWriting(start), aCString.Length());
608 * A character sink for copying with case conversion.
610 class CopyToUpperCase
612 public:
613 typedef char value_type;
615 explicit CopyToUpperCase(nsACString::iterator& aDestIter)
616 : mIter(aDestIter)
620 uint32_t
621 write(const char* aSource, uint32_t aSourceLength)
623 uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);
624 char* cp = mIter.get();
625 const char* end = aSource + len;
626 while (aSource != end) {
627 char ch = *aSource;
628 if ((ch >= 'a') && (ch <= 'z')) {
629 *cp = ch - ('a' - 'A');
630 } else {
631 *cp = ch;
633 ++aSource;
634 ++cp;
636 mIter.advance(len);
637 return len;
640 protected:
641 nsACString::iterator& mIter;
644 void
645 ToUpperCase(const nsACString& aSource, nsACString& aDest)
647 nsACString::const_iterator fromBegin, fromEnd;
648 nsACString::iterator toBegin;
649 aDest.SetLength(aSource.Length());
651 CopyToUpperCase converter(aDest.BeginWriting(toBegin));
652 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
653 converter);
657 * A character sink for case conversion.
659 class ConvertToLowerCase
661 public:
662 typedef char value_type;
664 uint32_t
665 write(const char* aSource, uint32_t aSourceLength)
667 char* cp = const_cast<char*>(aSource);
668 const char* end = aSource + aSourceLength;
669 while (cp != end) {
670 char ch = *cp;
671 if ((ch >= 'A') && (ch <= 'Z')) {
672 *cp = ch + ('a' - 'A');
674 ++cp;
676 return aSourceLength;
680 void
681 ToLowerCase(nsCSubstring& aCString)
683 ConvertToLowerCase converter;
684 char* start;
685 converter.write(aCString.BeginWriting(start), aCString.Length());
689 * A character sink for copying with case conversion.
691 class CopyToLowerCase
693 public:
694 typedef char value_type;
696 explicit CopyToLowerCase(nsACString::iterator& aDestIter)
697 : mIter(aDestIter)
701 uint32_t
702 write(const char* aSource, uint32_t aSourceLength)
704 uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);
705 char* cp = mIter.get();
706 const char* end = aSource + len;
707 while (aSource != end) {
708 char ch = *aSource;
709 if ((ch >= 'A') && (ch <= 'Z')) {
710 *cp = ch + ('a' - 'A');
711 } else {
712 *cp = ch;
714 ++aSource;
715 ++cp;
717 mIter.advance(len);
718 return len;
721 protected:
722 nsACString::iterator& mIter;
725 void
726 ToLowerCase(const nsACString& aSource, nsACString& aDest)
728 nsACString::const_iterator fromBegin, fromEnd;
729 nsACString::iterator toBegin;
730 aDest.SetLength(aSource.Length());
732 CopyToLowerCase converter(aDest.BeginWriting(toBegin));
733 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
734 converter);
737 bool
738 ParseString(const nsACString& aSource, char aDelimiter,
739 nsTArray<nsCString>& aArray)
741 nsACString::const_iterator start, end;
742 aSource.BeginReading(start);
743 aSource.EndReading(end);
745 uint32_t oldLength = aArray.Length();
747 for (;;) {
748 nsACString::const_iterator delimiter = start;
749 FindCharInReadable(aDelimiter, delimiter, end);
751 if (delimiter != start) {
752 if (!aArray.AppendElement(Substring(start, delimiter))) {
753 aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);
754 return false;
758 if (delimiter == end) {
759 break;
761 start = ++delimiter;
762 if (start == end) {
763 break;
767 return true;
770 template <class StringT, class IteratorT, class Comparator>
771 bool
772 FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
773 IteratorT& aSearchEnd, const Comparator& aCompare)
775 bool found_it = false;
777 // only bother searching at all if we're given a non-empty range to search
778 if (aSearchStart != aSearchEnd) {
779 IteratorT aPatternStart, aPatternEnd;
780 aPattern.BeginReading(aPatternStart);
781 aPattern.EndReading(aPatternEnd);
783 // outer loop keeps searching till we find it or run out of string to search
784 while (!found_it) {
785 // fast inner loop (that's what it's called, not what it is) looks for a potential match
786 while (aSearchStart != aSearchEnd &&
787 aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
788 ++aSearchStart;
791 // if we broke out of the `fast' loop because we're out of string ... we're done: no match
792 if (aSearchStart == aSearchEnd) {
793 break;
796 // otherwise, we're at a potential match, let's see if we really hit one
797 IteratorT testPattern(aPatternStart);
798 IteratorT testSearch(aSearchStart);
800 // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
801 for (;;) {
802 // we already compared the first character in the outer loop,
803 // so we'll advance before the next comparison
804 ++testPattern;
805 ++testSearch;
807 // if we verified all the way to the end of the pattern, then we found it!
808 if (testPattern == aPatternEnd) {
809 found_it = true;
810 aSearchEnd = testSearch; // return the exact found range through the parameters
811 break;
814 // if we got to end of the string we're searching before we hit the end of the
815 // pattern, we'll never find what we're looking for
816 if (testSearch == aSearchEnd) {
817 aSearchStart = aSearchEnd;
818 break;
821 // else if we mismatched ... it's time to advance to the next search position
822 // and get back into the `fast' loop
823 if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
824 ++aSearchStart;
825 break;
831 return found_it;
835 * This searches the entire string from right to left, and returns the first match found, if any.
837 template <class StringT, class IteratorT, class Comparator>
838 bool
839 RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
840 IteratorT& aSearchEnd, const Comparator& aCompare)
842 IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
843 aPattern.BeginReading(patternStart);
844 aPattern.EndReading(patternEnd);
846 // Point to the last character in the pattern
847 --patternEnd;
848 // outer loop keeps searching till we run out of string to search
849 while (aSearchStart != searchEnd) {
850 // Point to the end position of the next possible match
851 --searchEnd;
853 // Check last character, if a match, explore further from here
854 if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
855 // We're at a potential match, let's see if we really hit one
856 IteratorT testPattern(patternEnd);
857 IteratorT testSearch(searchEnd);
859 // inner loop verifies the potential match at the current position
860 do {
861 // if we verified all the way to the end of the pattern, then we found it!
862 if (testPattern == patternStart) {
863 aSearchStart = testSearch; // point to start of match
864 aSearchEnd = ++searchEnd; // point to end of match
865 return true;
868 // if we got to end of the string we're searching before we hit the end of the
869 // pattern, we'll never find what we're looking for
870 if (testSearch == aSearchStart) {
871 aSearchStart = aSearchEnd;
872 return false;
875 // test previous character for a match
876 --testPattern;
877 --testSearch;
878 } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
882 aSearchStart = aSearchEnd;
883 return false;
886 bool
887 FindInReadable(const nsAString& aPattern,
888 nsAString::const_iterator& aSearchStart,
889 nsAString::const_iterator& aSearchEnd,
890 const nsStringComparator& aComparator)
892 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
895 bool
896 FindInReadable(const nsACString& aPattern,
897 nsACString::const_iterator& aSearchStart,
898 nsACString::const_iterator& aSearchEnd,
899 const nsCStringComparator& aComparator)
901 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
904 bool
905 CaseInsensitiveFindInReadable(const nsACString& aPattern,
906 nsACString::const_iterator& aSearchStart,
907 nsACString::const_iterator& aSearchEnd)
909 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
910 nsCaseInsensitiveCStringComparator());
913 bool
914 RFindInReadable(const nsAString& aPattern,
915 nsAString::const_iterator& aSearchStart,
916 nsAString::const_iterator& aSearchEnd,
917 const nsStringComparator& aComparator)
919 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
922 bool
923 RFindInReadable(const nsACString& aPattern,
924 nsACString::const_iterator& aSearchStart,
925 nsACString::const_iterator& aSearchEnd,
926 const nsCStringComparator& aComparator)
928 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
931 bool
932 FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
933 const nsAString::const_iterator& aSearchEnd)
935 int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
937 const char16_t* charFoundAt =
938 nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
939 if (charFoundAt) {
940 aSearchStart.advance(charFoundAt - aSearchStart.get());
941 return true;
944 aSearchStart.advance(fragmentLength);
945 return false;
948 bool
949 FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
950 const nsACString::const_iterator& aSearchEnd)
952 int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
954 const char* charFoundAt =
955 nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
956 if (charFoundAt) {
957 aSearchStart.advance(charFoundAt - aSearchStart.get());
958 return true;
961 aSearchStart.advance(fragmentLength);
962 return false;
965 uint32_t
966 CountCharInReadable(const nsAString& aStr, char16_t aChar)
968 uint32_t count = 0;
969 nsAString::const_iterator begin, end;
971 aStr.BeginReading(begin);
972 aStr.EndReading(end);
974 while (begin != end) {
975 if (*begin == aChar) {
976 ++count;
978 ++begin;
981 return count;
984 uint32_t
985 CountCharInReadable(const nsACString& aStr, char aChar)
987 uint32_t count = 0;
988 nsACString::const_iterator begin, end;
990 aStr.BeginReading(begin);
991 aStr.EndReading(end);
993 while (begin != end) {
994 if (*begin == aChar) {
995 ++count;
997 ++begin;
1000 return count;
1003 bool
1004 StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
1005 const nsStringComparator& aComparator)
1007 nsAString::size_type src_len = aSource.Length(),
1008 sub_len = aSubstring.Length();
1009 if (sub_len > src_len) {
1010 return false;
1012 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1015 bool
1016 StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
1017 const nsCStringComparator& aComparator)
1019 nsACString::size_type src_len = aSource.Length(),
1020 sub_len = aSubstring.Length();
1021 if (sub_len > src_len) {
1022 return false;
1024 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1027 bool
1028 StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
1029 const nsStringComparator& aComparator)
1031 nsAString::size_type src_len = aSource.Length(),
1032 sub_len = aSubstring.Length();
1033 if (sub_len > src_len) {
1034 return false;
1036 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1037 aComparator);
1040 bool
1041 StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
1042 const nsCStringComparator& aComparator)
1044 nsACString::size_type src_len = aSource.Length(),
1045 sub_len = aSubstring.Length();
1046 if (sub_len > src_len) {
1047 return false;
1049 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1050 aComparator);
1055 static const char16_t empty_buffer[1] = { '\0' };
1057 const nsAFlatString&
1058 EmptyString()
1060 static const nsDependentString sEmpty(empty_buffer);
1062 return sEmpty;
1065 const nsAFlatCString&
1066 EmptyCString()
1068 static const nsDependentCString sEmpty((const char*)empty_buffer);
1070 return sEmpty;
1073 const nsAFlatString&
1074 NullString()
1076 static const nsXPIDLString sNull;
1078 return sNull;
1081 const nsAFlatCString&
1082 NullCString()
1084 static const nsXPIDLCString sNull;
1086 return sNull;
1089 int32_t
1090 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
1091 const nsASingleFragmentString& aUTF16String)
1093 static const uint32_t NOT_ASCII = uint32_t(~0x7F);
1095 const char* u8;
1096 const char* u8end;
1097 aUTF8String.BeginReading(u8);
1098 aUTF8String.EndReading(u8end);
1100 const char16_t* u16;
1101 const char16_t* u16end;
1102 aUTF16String.BeginReading(u16);
1103 aUTF16String.EndReading(u16end);
1105 while (u8 != u8end && u16 != u16end) {
1106 // Cast away the signedness of *u8 to prevent signextension when
1107 // converting to uint32_t
1108 uint32_t c8_32 = (uint8_t)*u8;
1110 if (c8_32 & NOT_ASCII) {
1111 bool err;
1112 c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
1113 if (err) {
1114 return INT32_MIN;
1117 uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
1118 // The above UTF16CharEnumerator::NextChar() calls can
1119 // fail, but if it does for anything other than no data to
1120 // look at (which can't happen here), it returns the
1121 // Unicode replacement character 0xFFFD for the invalid
1122 // data they were fed. Ignore that error and treat invalid
1123 // UTF16 as 0xFFFD.
1125 // This matches what our UTF16 to UTF8 conversion code
1126 // does, and thus a UTF8 string that came from an invalid
1127 // UTF16 string will compare equal to the invalid UTF16
1128 // string it came from. Same is true for any other UTF16
1129 // string differs only in the invalid part of the string.
1131 if (c8_32 != c16_32) {
1132 return c8_32 < c16_32 ? -1 : 1;
1134 } else {
1135 if (c8_32 != *u16) {
1136 return c8_32 > *u16 ? 1 : -1;
1139 ++u8;
1140 ++u16;
1144 if (u8 != u8end) {
1145 // We get to the end of the UTF16 string, but no to the end of
1146 // the UTF8 string. The UTF8 string is longer than the UTF16
1147 // string
1149 return 1;
1152 if (u16 != u16end) {
1153 // We get to the end of the UTF8 string, but no to the end of
1154 // the UTF16 string. The UTF16 string is longer than the UTF8
1155 // string
1157 return -1;
1160 // The two strings match.
1162 return 0;
1165 void
1166 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
1168 NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
1169 if (IS_IN_BMP(aSource)) {
1170 aDest.Append(char16_t(aSource));
1171 } else {
1172 aDest.Append(H_SURROGATE(aSource));
1173 aDest.Append(L_SURROGATE(aSource));