1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsReadableUtils.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/Utf8.h"
17 #include "nsUTF8Utils.h"
22 * A helper function that allocates a buffer of the desired character type big
23 * enough to hold a copy of the supplied string (plus a zero terminator).
25 * @param aSource an string you will eventually be making a copy of
26 * @return a new buffer which you must free with |free|.
29 template <class FromStringT
, class CharT
>
30 inline CharT
* AllocateStringCopy(const FromStringT
& aSource
, CharT
*) {
31 return static_cast<CharT
*>(
32 malloc((size_t(aSource
.Length()) + 1) * sizeof(CharT
)));
35 char* ToNewCString(const nsAString
& aSource
) {
36 char* str
= ToNewCString(aSource
, mozilla::fallible
);
38 MOZ_CRASH("Unable to allocate memory");
43 char* ToNewCString(const nsAString
& aSource
,
44 const mozilla::fallible_t
& aFallible
) {
45 char* dest
= AllocateStringCopy(aSource
, (char*)nullptr);
50 auto len
= aSource
.Length();
51 LossyConvertUtf16toLatin1(aSource
, Span(dest
, len
));
56 char* ToNewUTF8String(const nsAString
& aSource
, uint32_t* aUTF8Count
,
57 const mozilla::fallible_t
& aFallible
) {
58 auto len
= aSource
.Length();
59 // The uses of this function seem temporary enough that it's not
60 // worthwhile to be fancy about the allocation size. Let's just use
62 // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and
63 // then we have the terminator.
64 // Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for
65 // historical reasons.
66 mozilla::CheckedInt
<uint32_t> destLen(len
);
69 if (!destLen
.isValid()) {
72 size_t destLenVal
= destLen
.value();
73 char* dest
= static_cast<char*>(malloc(destLenVal
));
78 size_t written
= ConvertUtf16toUtf8(aSource
, Span(dest
, destLenVal
));
82 *aUTF8Count
= written
;
88 char* ToNewUTF8String(const nsAString
& aSource
, uint32_t* aUTF8Count
) {
89 char* str
= ToNewUTF8String(aSource
, aUTF8Count
, mozilla::fallible
);
91 MOZ_CRASH("Unable to allocate memory");
96 char* ToNewCString(const nsACString
& aSource
) {
97 char* str
= ToNewCString(aSource
, mozilla::fallible
);
99 MOZ_CRASH("Unable to allocate memory");
104 char* ToNewCString(const nsACString
& aSource
,
105 const mozilla::fallible_t
& aFallible
) {
106 // no conversion needed, just allocate a buffer of the correct length and copy
109 char* dest
= AllocateStringCopy(aSource
, (char*)nullptr);
114 auto len
= aSource
.Length();
115 memcpy(dest
, aSource
.BeginReading(), len
* sizeof(char));
120 char16_t
* ToNewUnicode(const nsAString
& aSource
) {
121 char16_t
* str
= ToNewUnicode(aSource
, mozilla::fallible
);
123 MOZ_CRASH("Unable to allocate memory");
128 char16_t
* ToNewUnicode(const nsAString
& aSource
,
129 const mozilla::fallible_t
& aFallible
) {
130 // no conversion needed, just allocate a buffer of the correct length and copy
133 char16_t
* dest
= AllocateStringCopy(aSource
, (char16_t
*)nullptr);
138 auto len
= aSource
.Length();
139 memcpy(dest
, aSource
.BeginReading(), len
* sizeof(char16_t
));
144 char16_t
* ToNewUnicode(const nsACString
& aSource
) {
145 char16_t
* str
= ToNewUnicode(aSource
, mozilla::fallible
);
147 MOZ_CRASH("Unable to allocate memory");
152 char16_t
* ToNewUnicode(const nsACString
& aSource
,
153 const mozilla::fallible_t
& aFallible
) {
154 char16_t
* dest
= AllocateStringCopy(aSource
, (char16_t
*)nullptr);
159 auto len
= aSource
.Length();
160 ConvertLatin1toUtf16(aSource
, Span(dest
, len
));
165 char16_t
* UTF8ToNewUnicode(const nsACString
& aSource
, uint32_t* aUTF16Count
,
166 const mozilla::fallible_t
& aFallible
) {
167 // Compute length plus one as required by ConvertUTF8toUTF16
168 uint32_t lengthPlusOne
= aSource
.Length() + 1; // Can't overflow
170 mozilla::CheckedInt
<size_t> allocLength(lengthPlusOne
);
171 // Add space for zero-termination
173 // We need UTF-16 units
174 allocLength
*= sizeof(char16_t
);
176 if (!allocLength
.isValid()) {
180 char16_t
* dest
= (char16_t
*)malloc(allocLength
.value());
185 size_t written
= ConvertUtf8toUtf16(aSource
, Span(dest
, lengthPlusOne
));
189 *aUTF16Count
= written
;
195 char16_t
* UTF8ToNewUnicode(const nsACString
& aSource
, uint32_t* aUTF16Count
) {
196 char16_t
* str
= UTF8ToNewUnicode(aSource
, aUTF16Count
, mozilla::fallible
);
198 MOZ_CRASH("Unable to allocate memory");
203 char16_t
* CopyUnicodeTo(const nsAString
& aSource
, uint32_t aSrcOffset
,
204 char16_t
* aDest
, uint32_t aLength
) {
205 MOZ_ASSERT(aSrcOffset
+ aLength
<= aSource
.Length());
206 memcpy(aDest
, aSource
.BeginReading() + aSrcOffset
,
207 size_t(aLength
) * sizeof(char16_t
));
211 void ToUpperCase(nsACString
& aCString
) {
212 char* cp
= aCString
.BeginWriting();
213 char* end
= cp
+ aCString
.Length();
216 if (ch
>= 'a' && ch
<= 'z') {
217 *cp
= ch
- ('a' - 'A');
223 void ToUpperCase(const nsACString
& aSource
, nsACString
& aDest
) {
224 aDest
.SetLength(aSource
.Length());
225 const char* src
= aSource
.BeginReading();
226 const char* end
= src
+ aSource
.Length();
227 char* dst
= aDest
.BeginWriting();
230 if (ch
>= 'a' && ch
<= 'z') {
231 *dst
= ch
- ('a' - 'A');
240 void ToLowerCase(nsACString
& aCString
) {
241 char* cp
= aCString
.BeginWriting();
242 char* end
= cp
+ aCString
.Length();
245 if (ch
>= 'A' && ch
<= 'Z') {
246 *cp
= ch
+ ('a' - 'A');
252 void ToLowerCase(const nsACString
& aSource
, nsACString
& aDest
) {
253 aDest
.SetLength(aSource
.Length());
254 const char* src
= aSource
.BeginReading();
255 const char* end
= src
+ aSource
.Length();
256 char* dst
= aDest
.BeginWriting();
259 if (ch
>= 'A' && ch
<= 'Z') {
260 *dst
= ch
+ ('a' - 'A');
269 void ParseString(const nsACString
& aSource
, char aDelimiter
,
270 nsTArray
<nsCString
>& aArray
) {
271 nsACString::const_iterator start
, end
;
272 aSource
.BeginReading(start
);
273 aSource
.EndReading(end
);
276 nsACString::const_iterator delimiter
= start
;
277 FindCharInReadable(aDelimiter
, delimiter
, end
);
279 if (delimiter
!= start
) {
280 aArray
.AppendElement(Substring(start
, delimiter
));
283 if (delimiter
== end
) {
293 template <class StringT
, class IteratorT
>
294 bool FindInReadable_Impl(
295 const StringT
& aPattern
, IteratorT
& aSearchStart
, IteratorT
& aSearchEnd
,
296 nsTStringComparator
<typename
StringT::char_type
> aCompare
) {
297 bool found_it
= false;
299 // only bother searching at all if we're given a non-empty range to search
300 if (aSearchStart
!= aSearchEnd
) {
301 IteratorT aPatternStart
, aPatternEnd
;
302 aPattern
.BeginReading(aPatternStart
);
303 aPattern
.EndReading(aPatternEnd
);
305 // outer loop keeps searching till we find it or run out of string to search
307 // fast inner loop (that's what it's called, not what it is) looks for a
309 while (aSearchStart
!= aSearchEnd
&&
310 aCompare(aPatternStart
.get(), aSearchStart
.get(), 1, 1)) {
314 // if we broke out of the `fast' loop because we're out of string ...
315 // we're done: no match
316 if (aSearchStart
== aSearchEnd
) {
320 // otherwise, we're at a potential match, let's see if we really hit one
321 IteratorT
testPattern(aPatternStart
);
322 IteratorT
testSearch(aSearchStart
);
324 // slow inner loop verifies the potential match (found by the `fast' loop)
325 // at the current position
327 // we already compared the first character in the outer loop,
328 // so we'll advance before the next comparison
332 // if we verified all the way to the end of the pattern, then we found
334 if (testPattern
== aPatternEnd
) {
336 aSearchEnd
= testSearch
; // return the exact found range through the
341 // if we got to end of the string we're searching before we hit the end
343 // pattern, we'll never find what we're looking for
344 if (testSearch
== aSearchEnd
) {
345 aSearchStart
= aSearchEnd
;
349 // else if we mismatched ... it's time to advance to the next search
351 // and get back into the `fast' loop
352 if (aCompare(testPattern
.get(), testSearch
.get(), 1, 1)) {
364 * This searches the entire string from right to left, and returns the first
365 * match found, if any.
367 template <class StringT
, class IteratorT
>
368 bool RFindInReadable_Impl(
369 const StringT
& aPattern
, IteratorT
& aSearchStart
, IteratorT
& aSearchEnd
,
370 nsTStringComparator
<typename
StringT::char_type
> aCompare
) {
371 IteratorT patternStart
, patternEnd
, searchEnd
= aSearchEnd
;
372 aPattern
.BeginReading(patternStart
);
373 aPattern
.EndReading(patternEnd
);
375 // Point to the last character in the pattern
377 // outer loop keeps searching till we run out of string to search
378 while (aSearchStart
!= searchEnd
) {
379 // Point to the end position of the next possible match
382 // Check last character, if a match, explore further from here
383 if (aCompare(patternEnd
.get(), searchEnd
.get(), 1, 1) == 0) {
384 // We're at a potential match, let's see if we really hit one
385 IteratorT
testPattern(patternEnd
);
386 IteratorT
testSearch(searchEnd
);
388 // inner loop verifies the potential match at the current position
390 // if we verified all the way to the end of the pattern, then we found
392 if (testPattern
== patternStart
) {
393 aSearchStart
= testSearch
; // point to start of match
394 aSearchEnd
= ++searchEnd
; // point to end of match
398 // if we got to end of the string we're searching before we hit the end
400 // pattern, we'll never find what we're looking for
401 if (testSearch
== aSearchStart
) {
402 aSearchStart
= aSearchEnd
;
406 // test previous character for a match
409 } while (aCompare(testPattern
.get(), testSearch
.get(), 1, 1) == 0);
413 aSearchStart
= aSearchEnd
;
417 bool FindInReadable(const nsAString
& aPattern
,
418 nsAString::const_iterator
& aSearchStart
,
419 nsAString::const_iterator
& aSearchEnd
,
420 nsStringComparator aComparator
) {
421 return FindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
424 bool FindInReadable(const nsACString
& aPattern
,
425 nsACString::const_iterator
& aSearchStart
,
426 nsACString::const_iterator
& aSearchEnd
,
427 nsCStringComparator aComparator
) {
428 return FindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
431 bool CaseInsensitiveFindInReadable(const nsACString
& aPattern
,
432 nsACString::const_iterator
& aSearchStart
,
433 nsACString::const_iterator
& aSearchEnd
) {
434 return FindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
,
435 nsCaseInsensitiveCStringComparator
);
438 bool RFindInReadable(const nsAString
& aPattern
,
439 nsAString::const_iterator
& aSearchStart
,
440 nsAString::const_iterator
& aSearchEnd
,
441 const nsStringComparator aComparator
) {
442 return RFindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
445 bool RFindInReadable(const nsACString
& aPattern
,
446 nsACString::const_iterator
& aSearchStart
,
447 nsACString::const_iterator
& aSearchEnd
,
448 const nsCStringComparator aComparator
) {
449 return RFindInReadable_Impl(aPattern
, aSearchStart
, aSearchEnd
, aComparator
);
452 bool FindCharInReadable(char16_t aChar
, nsAString::const_iterator
& aSearchStart
,
453 const nsAString::const_iterator
& aSearchEnd
) {
454 ptrdiff_t fragmentLength
= aSearchEnd
.get() - aSearchStart
.get();
456 const char16_t
* charFoundAt
=
457 nsCharTraits
<char16_t
>::find(aSearchStart
.get(), fragmentLength
, aChar
);
459 aSearchStart
.advance(charFoundAt
- aSearchStart
.get());
463 aSearchStart
.advance(fragmentLength
);
467 bool FindCharInReadable(char aChar
, nsACString::const_iterator
& aSearchStart
,
468 const nsACString::const_iterator
& aSearchEnd
) {
469 ptrdiff_t fragmentLength
= aSearchEnd
.get() - aSearchStart
.get();
471 const char* charFoundAt
=
472 nsCharTraits
<char>::find(aSearchStart
.get(), fragmentLength
, aChar
);
474 aSearchStart
.advance(charFoundAt
- aSearchStart
.get());
478 aSearchStart
.advance(fragmentLength
);
482 bool StringBeginsWith(const nsAString
& aSource
, const nsAString
& aSubstring
) {
483 nsAString::size_type src_len
= aSource
.Length(),
484 sub_len
= aSubstring
.Length();
485 if (sub_len
> src_len
) {
488 return Substring(aSource
, 0, sub_len
).Equals(aSubstring
);
491 bool StringBeginsWith(const nsAString
& aSource
, const nsAString
& aSubstring
,
492 nsStringComparator aComparator
) {
493 nsAString::size_type src_len
= aSource
.Length(),
494 sub_len
= aSubstring
.Length();
495 if (sub_len
> src_len
) {
498 return Substring(aSource
, 0, sub_len
).Equals(aSubstring
, aComparator
);
501 bool StringBeginsWith(const nsACString
& aSource
, const nsACString
& aSubstring
) {
502 nsACString::size_type src_len
= aSource
.Length(),
503 sub_len
= aSubstring
.Length();
504 if (sub_len
> src_len
) {
507 return Substring(aSource
, 0, sub_len
).Equals(aSubstring
);
510 bool StringBeginsWith(const nsACString
& aSource
, const nsACString
& aSubstring
,
511 nsCStringComparator aComparator
) {
512 nsACString::size_type src_len
= aSource
.Length(),
513 sub_len
= aSubstring
.Length();
514 if (sub_len
> src_len
) {
517 return Substring(aSource
, 0, sub_len
).Equals(aSubstring
, aComparator
);
520 bool StringEndsWith(const nsAString
& aSource
, const nsAString
& aSubstring
) {
521 nsAString::size_type src_len
= aSource
.Length(),
522 sub_len
= aSubstring
.Length();
523 if (sub_len
> src_len
) {
526 return Substring(aSource
, src_len
- sub_len
, sub_len
).Equals(aSubstring
);
529 bool StringEndsWith(const nsAString
& aSource
, const nsAString
& aSubstring
,
530 nsStringComparator aComparator
) {
531 nsAString::size_type src_len
= aSource
.Length(),
532 sub_len
= aSubstring
.Length();
533 if (sub_len
> src_len
) {
536 return Substring(aSource
, src_len
- sub_len
, sub_len
)
537 .Equals(aSubstring
, aComparator
);
540 bool StringEndsWith(const nsACString
& aSource
, const nsACString
& aSubstring
) {
541 nsACString::size_type src_len
= aSource
.Length(),
542 sub_len
= aSubstring
.Length();
543 if (sub_len
> src_len
) {
546 return Substring(aSource
, src_len
- sub_len
, sub_len
).Equals(aSubstring
);
549 bool StringEndsWith(const nsACString
& aSource
, const nsACString
& aSubstring
,
550 nsCStringComparator aComparator
) {
551 nsACString::size_type src_len
= aSource
.Length(),
552 sub_len
= aSubstring
.Length();
553 if (sub_len
> src_len
) {
556 return Substring(aSource
, src_len
- sub_len
, sub_len
)
557 .Equals(aSubstring
, aComparator
);
560 static const char16_t empty_buffer
[1] = {'\0'};
562 const nsString
& EmptyString() {
563 static const nsDependentString
sEmpty(empty_buffer
);
568 const nsCString
& EmptyCString() {
569 static const nsDependentCString
sEmpty((const char*)empty_buffer
);
574 const nsString
& VoidString() {
575 static const nsString
sNull(mozilla::detail::StringDataFlags::VOIDED
);
580 const nsCString
& VoidCString() {
581 static const nsCString
sNull(mozilla::detail::StringDataFlags::VOIDED
);
586 int32_t CompareUTF8toUTF16(const nsACString
& aUTF8String
,
587 const nsAString
& aUTF16String
, bool* aErr
) {
590 aUTF8String
.BeginReading(u8
);
591 aUTF8String
.EndReading(u8end
);
594 const char16_t
* u16end
;
595 aUTF16String
.BeginReading(u16
);
596 aUTF16String
.EndReading(u16end
);
608 // No need for ASCII optimization, since both NextChar()
609 // calls get inlined.
610 uint32_t scalar8
= UTF8CharEnumerator::NextChar(&u8
, u8end
, aErr
);
611 uint32_t scalar16
= UTF16CharEnumerator::NextChar(&u16
, u16end
, aErr
);
612 if (scalar16
== scalar8
) {
615 if (scalar8
< scalar16
) {
622 void AppendUCS4ToUTF16(const uint32_t aSource
, nsAString
& aDest
) {
623 NS_ASSERTION(IS_VALID_CHAR(aSource
), "Invalid UCS4 char");
624 if (IS_IN_BMP(aSource
)) {
625 aDest
.Append(char16_t(aSource
));
627 aDest
.Append(H_SURROGATE(aSource
));
628 aDest
.Append(L_SURROGATE(aSource
));