Bug 1874684 - Part 4: Prefer const references instead of copying Instant values....
[gecko.git] / xpcom / string / nsReadableUtils.cpp
blobfa4c4bc69b75671a8a822f1d819eed6b43cdf181
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsReadableUtils.h"
9 #include <algorithm>
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/Utf8.h"
14 #include "nscore.h"
15 #include "nsString.h"
16 #include "nsTArray.h"
17 #include "nsUTF8Utils.h"
19 using mozilla::Span;
21 /**
22 * A helper function that allocates a buffer of the desired character type big
23 * enough to hold a copy of the supplied string (plus a zero terminator).
25 * @param aSource an string you will eventually be making a copy of
26 * @return a new buffer which you must free with |free|.
29 template <class FromStringT, class CharT>
30 inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) {
31 return static_cast<CharT*>(
32 malloc((size_t(aSource.Length()) + 1) * sizeof(CharT)));
35 char* ToNewCString(const nsAString& aSource) {
36 char* str = ToNewCString(aSource, mozilla::fallible);
37 if (!str) {
38 MOZ_CRASH("Unable to allocate memory");
40 return str;
43 char* ToNewCString(const nsAString& aSource,
44 const mozilla::fallible_t& aFallible) {
45 char* dest = AllocateStringCopy(aSource, (char*)nullptr);
46 if (!dest) {
47 return nullptr;
50 auto len = aSource.Length();
51 LossyConvertUtf16toLatin1(aSource, Span(dest, len));
52 dest[len] = 0;
53 return dest;
56 char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count,
57 const mozilla::fallible_t& aFallible) {
58 auto len = aSource.Length();
59 // The uses of this function seem temporary enough that it's not
60 // worthwhile to be fancy about the allocation size. Let's just use
61 // the worst case.
62 // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and
63 // then we have the terminator.
64 // Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for
65 // historical reasons.
66 mozilla::CheckedInt<uint32_t> destLen(len);
67 destLen *= 3;
68 destLen += 1;
69 if (!destLen.isValid()) {
70 return nullptr;
72 size_t destLenVal = destLen.value();
73 char* dest = static_cast<char*>(malloc(destLenVal));
74 if (!dest) {
75 return nullptr;
78 size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal));
79 dest[written] = 0;
81 if (aUTF8Count) {
82 *aUTF8Count = written;
85 return dest;
88 char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) {
89 char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible);
90 if (!str) {
91 MOZ_CRASH("Unable to allocate memory");
93 return str;
96 char* ToNewCString(const nsACString& aSource) {
97 char* str = ToNewCString(aSource, mozilla::fallible);
98 if (!str) {
99 MOZ_CRASH("Unable to allocate memory");
101 return str;
104 char* ToNewCString(const nsACString& aSource,
105 const mozilla::fallible_t& aFallible) {
106 // no conversion needed, just allocate a buffer of the correct length and copy
107 // into it
109 char* dest = AllocateStringCopy(aSource, (char*)nullptr);
110 if (!dest) {
111 return nullptr;
114 auto len = aSource.Length();
115 memcpy(dest, aSource.BeginReading(), len * sizeof(char));
116 dest[len] = 0;
117 return dest;
120 char16_t* ToNewUnicode(const nsAString& aSource) {
121 char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
122 if (!str) {
123 MOZ_CRASH("Unable to allocate memory");
125 return str;
128 char16_t* ToNewUnicode(const nsAString& aSource,
129 const mozilla::fallible_t& aFallible) {
130 // no conversion needed, just allocate a buffer of the correct length and copy
131 // into it
133 char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
134 if (!dest) {
135 return nullptr;
138 auto len = aSource.Length();
139 memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t));
140 dest[len] = 0;
141 return dest;
144 char16_t* ToNewUnicode(const nsACString& aSource) {
145 char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
146 if (!str) {
147 MOZ_CRASH("Unable to allocate memory");
149 return str;
152 char16_t* ToNewUnicode(const nsACString& aSource,
153 const mozilla::fallible_t& aFallible) {
154 char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
155 if (!dest) {
156 return nullptr;
159 auto len = aSource.Length();
160 ConvertLatin1toUtf16(aSource, Span(dest, len));
161 dest[len] = 0;
162 return dest;
165 char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count,
166 const mozilla::fallible_t& aFallible) {
167 // Compute length plus one as required by ConvertUTF8toUTF16
168 uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow
170 mozilla::CheckedInt<size_t> allocLength(lengthPlusOne);
171 // Add space for zero-termination
172 allocLength += 1;
173 // We need UTF-16 units
174 allocLength *= sizeof(char16_t);
176 if (!allocLength.isValid()) {
177 return nullptr;
180 char16_t* dest = (char16_t*)malloc(allocLength.value());
181 if (!dest) {
182 return nullptr;
185 size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne));
186 dest[written] = 0;
188 if (aUTF16Count) {
189 *aUTF16Count = written;
192 return dest;
195 char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) {
196 char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible);
197 if (!str) {
198 MOZ_CRASH("Unable to allocate memory");
200 return str;
203 char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset,
204 char16_t* aDest, uint32_t aLength) {
205 MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length());
206 memcpy(aDest, aSource.BeginReading() + aSrcOffset,
207 size_t(aLength) * sizeof(char16_t));
208 return aDest;
211 void ToUpperCase(nsACString& aCString) {
212 char* cp = aCString.BeginWriting();
213 char* end = cp + aCString.Length();
214 while (cp != end) {
215 char ch = *cp;
216 if (ch >= 'a' && ch <= 'z') {
217 *cp = ch - ('a' - 'A');
219 ++cp;
223 void ToUpperCase(const nsACString& aSource, nsACString& aDest) {
224 aDest.SetLength(aSource.Length());
225 const char* src = aSource.BeginReading();
226 const char* end = src + aSource.Length();
227 char* dst = aDest.BeginWriting();
228 while (src != end) {
229 char ch = *src;
230 if (ch >= 'a' && ch <= 'z') {
231 *dst = ch - ('a' - 'A');
232 } else {
233 *dst = ch;
235 ++src;
236 ++dst;
240 void ToLowerCase(nsACString& aCString) {
241 char* cp = aCString.BeginWriting();
242 char* end = cp + aCString.Length();
243 while (cp != end) {
244 char ch = *cp;
245 if (ch >= 'A' && ch <= 'Z') {
246 *cp = ch + ('a' - 'A');
248 ++cp;
252 void ToLowerCase(const nsACString& aSource, nsACString& aDest) {
253 aDest.SetLength(aSource.Length());
254 const char* src = aSource.BeginReading();
255 const char* end = src + aSource.Length();
256 char* dst = aDest.BeginWriting();
257 while (src != end) {
258 char ch = *src;
259 if (ch >= 'A' && ch <= 'Z') {
260 *dst = ch + ('a' - 'A');
261 } else {
262 *dst = ch;
264 ++src;
265 ++dst;
269 void ParseString(const nsACString& aSource, char aDelimiter,
270 nsTArray<nsCString>& aArray) {
271 nsACString::const_iterator start, end;
272 aSource.BeginReading(start);
273 aSource.EndReading(end);
275 for (;;) {
276 nsACString::const_iterator delimiter = start;
277 FindCharInReadable(aDelimiter, delimiter, end);
279 if (delimiter != start) {
280 aArray.AppendElement(Substring(start, delimiter));
283 if (delimiter == end) {
284 break;
286 start = ++delimiter;
287 if (start == end) {
288 break;
293 template <class StringT, class IteratorT>
294 bool FindInReadable_Impl(
295 const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
296 nsTStringComparator<typename StringT::char_type> aCompare) {
297 bool found_it = false;
299 // only bother searching at all if we're given a non-empty range to search
300 if (aSearchStart != aSearchEnd) {
301 IteratorT aPatternStart, aPatternEnd;
302 aPattern.BeginReading(aPatternStart);
303 aPattern.EndReading(aPatternEnd);
305 // outer loop keeps searching till we find it or run out of string to search
306 while (!found_it) {
307 // fast inner loop (that's what it's called, not what it is) looks for a
308 // potential match
309 while (aSearchStart != aSearchEnd &&
310 aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
311 ++aSearchStart;
314 // if we broke out of the `fast' loop because we're out of string ...
315 // we're done: no match
316 if (aSearchStart == aSearchEnd) {
317 break;
320 // otherwise, we're at a potential match, let's see if we really hit one
321 IteratorT testPattern(aPatternStart);
322 IteratorT testSearch(aSearchStart);
324 // slow inner loop verifies the potential match (found by the `fast' loop)
325 // at the current position
326 for (;;) {
327 // we already compared the first character in the outer loop,
328 // so we'll advance before the next comparison
329 ++testPattern;
330 ++testSearch;
332 // if we verified all the way to the end of the pattern, then we found
333 // it!
334 if (testPattern == aPatternEnd) {
335 found_it = true;
336 aSearchEnd = testSearch; // return the exact found range through the
337 // parameters
338 break;
341 // if we got to end of the string we're searching before we hit the end
342 // of the
343 // pattern, we'll never find what we're looking for
344 if (testSearch == aSearchEnd) {
345 aSearchStart = aSearchEnd;
346 break;
349 // else if we mismatched ... it's time to advance to the next search
350 // position
351 // and get back into the `fast' loop
352 if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
353 ++aSearchStart;
354 break;
360 return found_it;
364 * This searches the entire string from right to left, and returns the first
365 * match found, if any.
367 template <class StringT, class IteratorT>
368 bool RFindInReadable_Impl(
369 const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
370 nsTStringComparator<typename StringT::char_type> aCompare) {
371 IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
372 aPattern.BeginReading(patternStart);
373 aPattern.EndReading(patternEnd);
375 // Point to the last character in the pattern
376 --patternEnd;
377 // outer loop keeps searching till we run out of string to search
378 while (aSearchStart != searchEnd) {
379 // Point to the end position of the next possible match
380 --searchEnd;
382 // Check last character, if a match, explore further from here
383 if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
384 // We're at a potential match, let's see if we really hit one
385 IteratorT testPattern(patternEnd);
386 IteratorT testSearch(searchEnd);
388 // inner loop verifies the potential match at the current position
389 do {
390 // if we verified all the way to the end of the pattern, then we found
391 // it!
392 if (testPattern == patternStart) {
393 aSearchStart = testSearch; // point to start of match
394 aSearchEnd = ++searchEnd; // point to end of match
395 return true;
398 // if we got to end of the string we're searching before we hit the end
399 // of the
400 // pattern, we'll never find what we're looking for
401 if (testSearch == aSearchStart) {
402 aSearchStart = aSearchEnd;
403 return false;
406 // test previous character for a match
407 --testPattern;
408 --testSearch;
409 } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
413 aSearchStart = aSearchEnd;
414 return false;
417 bool FindInReadable(const nsAString& aPattern,
418 nsAString::const_iterator& aSearchStart,
419 nsAString::const_iterator& aSearchEnd,
420 nsStringComparator aComparator) {
421 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
424 bool FindInReadable(const nsACString& aPattern,
425 nsACString::const_iterator& aSearchStart,
426 nsACString::const_iterator& aSearchEnd,
427 nsCStringComparator aComparator) {
428 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
431 bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
432 nsACString::const_iterator& aSearchStart,
433 nsACString::const_iterator& aSearchEnd) {
434 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
435 nsCaseInsensitiveCStringComparator);
438 bool RFindInReadable(const nsAString& aPattern,
439 nsAString::const_iterator& aSearchStart,
440 nsAString::const_iterator& aSearchEnd,
441 const nsStringComparator aComparator) {
442 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
445 bool RFindInReadable(const nsACString& aPattern,
446 nsACString::const_iterator& aSearchStart,
447 nsACString::const_iterator& aSearchEnd,
448 const nsCStringComparator aComparator) {
449 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
452 bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
453 const nsAString::const_iterator& aSearchEnd) {
454 ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
456 const char16_t* charFoundAt =
457 nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
458 if (charFoundAt) {
459 aSearchStart.advance(charFoundAt - aSearchStart.get());
460 return true;
463 aSearchStart.advance(fragmentLength);
464 return false;
467 bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
468 const nsACString::const_iterator& aSearchEnd) {
469 ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
471 const char* charFoundAt =
472 nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
473 if (charFoundAt) {
474 aSearchStart.advance(charFoundAt - aSearchStart.get());
475 return true;
478 aSearchStart.advance(fragmentLength);
479 return false;
482 bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) {
483 nsAString::size_type src_len = aSource.Length(),
484 sub_len = aSubstring.Length();
485 if (sub_len > src_len) {
486 return false;
488 return Substring(aSource, 0, sub_len).Equals(aSubstring);
491 bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
492 nsStringComparator aComparator) {
493 nsAString::size_type src_len = aSource.Length(),
494 sub_len = aSubstring.Length();
495 if (sub_len > src_len) {
496 return false;
498 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
501 bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) {
502 nsACString::size_type src_len = aSource.Length(),
503 sub_len = aSubstring.Length();
504 if (sub_len > src_len) {
505 return false;
507 return Substring(aSource, 0, sub_len).Equals(aSubstring);
510 bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
511 nsCStringComparator aComparator) {
512 nsACString::size_type src_len = aSource.Length(),
513 sub_len = aSubstring.Length();
514 if (sub_len > src_len) {
515 return false;
517 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
520 bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) {
521 nsAString::size_type src_len = aSource.Length(),
522 sub_len = aSubstring.Length();
523 if (sub_len > src_len) {
524 return false;
526 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
529 bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
530 nsStringComparator aComparator) {
531 nsAString::size_type src_len = aSource.Length(),
532 sub_len = aSubstring.Length();
533 if (sub_len > src_len) {
534 return false;
536 return Substring(aSource, src_len - sub_len, sub_len)
537 .Equals(aSubstring, aComparator);
540 bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) {
541 nsACString::size_type src_len = aSource.Length(),
542 sub_len = aSubstring.Length();
543 if (sub_len > src_len) {
544 return false;
546 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
549 bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
550 nsCStringComparator aComparator) {
551 nsACString::size_type src_len = aSource.Length(),
552 sub_len = aSubstring.Length();
553 if (sub_len > src_len) {
554 return false;
556 return Substring(aSource, src_len - sub_len, sub_len)
557 .Equals(aSubstring, aComparator);
560 static const char16_t empty_buffer[1] = {'\0'};
562 const nsString& EmptyString() {
563 static const nsDependentString sEmpty(empty_buffer);
565 return sEmpty;
568 const nsCString& EmptyCString() {
569 static const nsDependentCString sEmpty((const char*)empty_buffer);
571 return sEmpty;
574 const nsString& VoidString() {
575 static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED);
577 return sNull;
580 const nsCString& VoidCString() {
581 static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED);
583 return sNull;
586 int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
587 const nsAString& aUTF16String, bool* aErr) {
588 const char* u8;
589 const char* u8end;
590 aUTF8String.BeginReading(u8);
591 aUTF8String.EndReading(u8end);
593 const char16_t* u16;
594 const char16_t* u16end;
595 aUTF16String.BeginReading(u16);
596 aUTF16String.EndReading(u16end);
598 for (;;) {
599 if (u8 == u8end) {
600 if (u16 == u16end) {
601 return 0;
603 return -1;
605 if (u16 == u16end) {
606 return 1;
608 // No need for ASCII optimization, since both NextChar()
609 // calls get inlined.
610 uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr);
611 uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr);
612 if (scalar16 == scalar8) {
613 continue;
615 if (scalar8 < scalar16) {
616 return -1;
618 return 1;
622 void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) {
623 NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
624 if (IS_IN_BMP(aSource)) {
625 aDest.Append(char16_t(aSource));
626 } else {
627 aDest.Append(H_SURROGATE(aSource));
628 aDest.Append(L_SURROGATE(aSource));