1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // IWYU pragma: private, include "nsString.h"
8 #ifndef nsReadableUtils_h___
9 #define nsReadableUtils_h___
12 * I guess all the routines in this file are all mis-named.
13 * According to our conventions, they should be |NS_xxx|.
16 #include "mozilla/Assertions.h"
17 #include "nsAString.h"
18 #include "mozilla/TextUtils.h"
20 #include "nsTArrayForwardDeclare.h"
22 // From the nsstring crate
24 bool nsstring_fallible_append_utf8_impl(nsAString
* aThis
, const char* aOther
,
25 size_t aOtherLen
, size_t aOldLen
);
27 bool nsstring_fallible_append_latin1_impl(nsAString
* aThis
, const char* aOther
,
28 size_t aOtherLen
, size_t aOldLen
,
29 bool aAllowShrinking
);
31 bool nscstring_fallible_append_utf16_to_utf8_impl(nsACString
* aThis
,
36 bool nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString
* aThis
,
40 bool aAllowShrinking
);
42 bool nscstring_fallible_append_utf8_to_latin1_lossy_check(
43 nsACString
* aThis
, const nsACString
* aOther
, size_t aOldLen
);
45 bool nscstring_fallible_append_latin1_to_utf8_check(nsACString
* aThis
,
46 const nsACString
* aOther
,
50 inline size_t Distance(const nsReadingIterator
<char16_t
>& aStart
,
51 const nsReadingIterator
<char16_t
>& aEnd
) {
52 MOZ_ASSERT(aStart
.get() <= aEnd
.get());
53 return static_cast<size_t>(aEnd
.get() - aStart
.get());
56 inline size_t Distance(const nsReadingIterator
<char>& aStart
,
57 const nsReadingIterator
<char>& aEnd
) {
58 MOZ_ASSERT(aStart
.get() <= aEnd
.get());
59 return static_cast<size_t>(aEnd
.get() - aStart
.get());
62 // NOTE: Operations that don't need an operand to be an XPCOM string
63 // are in mozilla/TextUtils.h and mozilla/Utf8.h.
66 // Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER.
68 [[nodiscard
]] inline bool CopyUTF8toUTF16(mozilla::Span
<const char> aSource
,
70 const mozilla::fallible_t
&) {
71 return nsstring_fallible_append_utf8_impl(&aDest
, aSource
.Elements(),
75 inline void CopyUTF8toUTF16(mozilla::Span
<const char> aSource
,
77 if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource
, aDest
, mozilla::fallible
))) {
78 aDest
.AllocFailed(aSource
.Length());
82 [[nodiscard
]] inline bool AppendUTF8toUTF16(mozilla::Span
<const char> aSource
,
84 const mozilla::fallible_t
&) {
85 return nsstring_fallible_append_utf8_impl(&aDest
, aSource
.Elements(),
86 aSource
.Length(), aDest
.Length());
89 inline void AppendUTF8toUTF16(mozilla::Span
<const char> aSource
,
91 if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource
, aDest
, mozilla::fallible
))) {
92 aDest
.AllocFailed(aDest
.Length() + aSource
.Length());
97 // Interpret each incoming unsigned byte value as a Unicode scalar value (not
98 // windows-1252!). The function names say "ASCII" instead of "Latin1" for
101 [[nodiscard
]] inline bool CopyASCIItoUTF16(mozilla::Span
<const char> aSource
,
103 const mozilla::fallible_t
&) {
104 return nsstring_fallible_append_latin1_impl(&aDest
, aSource
.Elements(),
105 aSource
.Length(), 0, true);
108 inline void CopyASCIItoUTF16(mozilla::Span
<const char> aSource
,
110 if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource
, aDest
, mozilla::fallible
))) {
111 aDest
.AllocFailed(aSource
.Length());
115 [[nodiscard
]] inline bool AppendASCIItoUTF16(mozilla::Span
<const char> aSource
,
117 const mozilla::fallible_t
&) {
118 return nsstring_fallible_append_latin1_impl(
119 &aDest
, aSource
.Elements(), aSource
.Length(), aDest
.Length(), false);
122 inline void AppendASCIItoUTF16(mozilla::Span
<const char> aSource
,
124 if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource
, aDest
, mozilla::fallible
))) {
125 aDest
.AllocFailed(aDest
.Length() + aSource
.Length());
130 // Unpaired surrogates are replaced with the REPLACEMENT CHARACTER.
132 [[nodiscard
]] inline bool CopyUTF16toUTF8(mozilla::Span
<const char16_t
> aSource
,
134 const mozilla::fallible_t
&) {
135 return nscstring_fallible_append_utf16_to_utf8_impl(
136 &aDest
, aSource
.Elements(), aSource
.Length(), 0);
139 inline void CopyUTF16toUTF8(mozilla::Span
<const char16_t
> aSource
,
141 if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource
, aDest
, mozilla::fallible
))) {
142 aDest
.AllocFailed(aSource
.Length());
146 [[nodiscard
]] inline bool AppendUTF16toUTF8(
147 mozilla::Span
<const char16_t
> aSource
, nsACString
& aDest
,
148 const mozilla::fallible_t
&) {
149 return nscstring_fallible_append_utf16_to_utf8_impl(
150 &aDest
, aSource
.Elements(), aSource
.Length(), aDest
.Length());
153 inline void AppendUTF16toUTF8(mozilla::Span
<const char16_t
> aSource
,
155 if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource
, aDest
, mozilla::fallible
))) {
156 aDest
.AllocFailed(aDest
.Length() + aSource
.Length());
161 // If all code points in the input are below U+0100, represents each scalar
162 // value as an unsigned byte. (This is not windows-1252!) If there are code
163 // points above U+00FF, memory-safely produces garbage and will likely start
164 // asserting in future debug builds. The nature of the garbage may differ
165 // based on CPU architecture and must not be relied upon. The names say
166 // "ASCII" instead of "Latin1" for legacy reasons.
168 [[nodiscard
]] inline bool LossyCopyUTF16toASCII(
169 mozilla::Span
<const char16_t
> aSource
, nsACString
& aDest
,
170 const mozilla::fallible_t
&) {
171 return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
172 &aDest
, aSource
.Elements(), aSource
.Length(), 0, true);
175 inline void LossyCopyUTF16toASCII(mozilla::Span
<const char16_t
> aSource
,
177 if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource
, aDest
, mozilla::fallible
))) {
178 aDest
.AllocFailed(aSource
.Length());
182 [[nodiscard
]] inline bool LossyAppendUTF16toASCII(
183 mozilla::Span
<const char16_t
> aSource
, nsACString
& aDest
,
184 const mozilla::fallible_t
&) {
185 return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
186 &aDest
, aSource
.Elements(), aSource
.Length(), aDest
.Length(), false);
189 inline void LossyAppendUTF16toASCII(mozilla::Span
<const char16_t
> aSource
,
192 !LossyAppendUTF16toASCII(aSource
, aDest
, mozilla::fallible
))) {
193 aDest
.AllocFailed(aDest
.Length() + aSource
.Length());
198 // Interpret each incoming unsigned byte value as a Unicode scalar value (not
200 // If the input is ASCII, the heap-allocated nsStringBuffer is shared if
203 [[nodiscard
]] inline bool CopyLatin1toUTF8(const nsACString
& aSource
,
205 const mozilla::fallible_t
&) {
206 return nscstring_fallible_append_latin1_to_utf8_check(&aDest
, &aSource
, 0);
209 inline void CopyLatin1toUTF8(const nsACString
& aSource
, nsACString
& aDest
) {
210 if (MOZ_UNLIKELY(!CopyLatin1toUTF8(aSource
, aDest
, mozilla::fallible
))) {
211 aDest
.AllocFailed(aSource
.Length());
215 [[nodiscard
]] inline bool AppendLatin1toUTF8(const nsACString
& aSource
,
217 const mozilla::fallible_t
&) {
218 return nscstring_fallible_append_latin1_to_utf8_check(&aDest
, &aSource
,
222 inline void AppendLatin1toUTF8(const nsACString
& aSource
, nsACString
& aDest
) {
223 if (MOZ_UNLIKELY(!AppendLatin1toUTF8(aSource
, aDest
, mozilla::fallible
))) {
224 aDest
.AllocFailed(aDest
.Length() + aSource
.Length());
229 // If all code points in the input are below U+0100, represents each scalar
230 // value as an unsigned byte. (This is not windows-1252!) If there are code
231 // points above U+00FF, memory-safely produces garbage in release builds and
232 // asserts in debug builds. The nature of the garbage may differ
233 // based on CPU architecture and must not be relied upon.
234 // If the input is ASCII, the heap-allocated nsStringBuffer is shared if
237 [[nodiscard
]] inline bool LossyCopyUTF8toLatin1(const nsACString
& aSource
,
239 const mozilla::fallible_t
&) {
240 return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest
, &aSource
,
244 inline void LossyCopyUTF8toLatin1(const nsACString
& aSource
,
246 if (MOZ_UNLIKELY(!LossyCopyUTF8toLatin1(aSource
, aDest
, mozilla::fallible
))) {
247 aDest
.AllocFailed(aSource
.Length());
251 [[nodiscard
]] inline bool LossyAppendUTF8toLatin1(const nsACString
& aSource
,
253 const mozilla::fallible_t
&) {
254 return nscstring_fallible_append_utf8_to_latin1_lossy_check(&aDest
, &aSource
,
258 inline void LossyAppendUTF8toLatin1(const nsACString
& aSource
,
261 !LossyAppendUTF8toLatin1(aSource
, aDest
, mozilla::fallible
))) {
262 aDest
.AllocFailed(aDest
.Length() + aSource
.Length());
267 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
269 * Infallibly allocates and returns a new |char| buffer which you must
271 * Performs a conversion with LossyConvertUTF16toLatin1() writing into the
272 * newly-allocated buffer.
274 * The new buffer is zero-terminated, but that may not help you if |aSource|
275 * contains embedded nulls.
277 * @param aSource a 16-bit wide string
278 * @return a new |char| buffer you must free with |free|.
280 char* ToNewCString(const nsAString
& aSource
);
282 /* A fallible version of ToNewCString. Returns nullptr on failure. */
283 char* ToNewCString(const nsAString
& aSource
,
284 const mozilla::fallible_t
& aFallible
);
287 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
289 * Infallibly allocates and returns a new |char| buffer which you must
292 * The new buffer is zero-terminated, but that may not help you if |aSource|
293 * contains embedded nulls.
295 * @param aSource an 8-bit wide string
296 * @return a new |char| buffer you must free with |free|.
298 char* ToNewCString(const nsACString
& aSource
);
300 /* A fallible version of ToNewCString. Returns nullptr on failure. */
301 char* ToNewCString(const nsACString
& aSource
,
302 const mozilla::fallible_t
& aFallible
);
305 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
307 * Infallibly allocates and returns a new |char| buffer which you must
309 * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with
310 * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying
311 * |aSource| to your new buffer.
313 * The new buffer is zero-terminated, but that may not help you if |aSource|
314 * contains embedded nulls.
316 * @param aSource a UTF-16 string (made of char16_t's)
317 * @param aUTF8Count the number of 8-bit units that was returned
318 * @return a new |char| buffer you must free with |free|.
320 char* ToNewUTF8String(const nsAString
& aSource
, uint32_t* aUTF8Count
= nullptr);
322 /* A fallible version of ToNewUTF8String. Returns nullptr on failure. */
323 char* ToNewUTF8String(const nsAString
& aSource
, uint32_t* aUTF8Count
,
324 const mozilla::fallible_t
& aFallible
);
327 * Returns a new |char16_t| buffer containing a zero-terminated copy
330 * Infallibly allocates and returns a new |char16_t| buffer which you must
333 * The new buffer is zero-terminated, but that may not help you if |aSource|
334 * contains embedded nulls.
336 * @param aSource a UTF-16 string
337 * @return a new |char16_t| buffer you must free with |free|.
339 char16_t
* ToNewUnicode(const nsAString
& aSource
);
341 /* A fallible version of ToNewUnicode. Returns nullptr on failure. */
342 char16_t
* ToNewUnicode(const nsAString
& aSource
,
343 const mozilla::fallible_t
& aFallible
);
346 * Returns a new |char16_t| buffer containing a zero-terminated copy
349 * Infallibly allocates and returns a new |char16_t| buffer which you must
352 * Performs an encoding conversion by 0-padding 8-bit wide characters up to
353 * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource|
354 * to your new buffer.
356 * The new buffer is zero-terminated, but that may not help you if |aSource|
357 * contains embedded nulls.
359 * @param aSource a Latin1 string
360 * @return a new |char16_t| buffer you must free with |free|.
362 char16_t
* ToNewUnicode(const nsACString
& aSource
);
364 /* A fallible version of ToNewUnicode. Returns nullptr on failure. */
365 char16_t
* ToNewUnicode(const nsACString
& aSource
,
366 const mozilla::fallible_t
& aFallible
);
369 * Returns a new |char16_t| buffer containing a zero-terminated copy
372 * Infallibly allocates and returns a new |char| buffer which you must
373 * free with |free|. Performs an encoding conversion from UTF-8 to UTF-16
374 * while copying |aSource| to your new buffer. Malformed byte sequences
375 * are replaced with the REPLACEMENT CHARACTER.
377 * The new buffer is zero-terminated, but that may not help you if |aSource|
378 * contains embedded nulls.
380 * @param aSource an 8-bit wide string, UTF-8 encoded
381 * @param aUTF16Count the number of 16-bit units that was returned
382 * @return a new |char16_t| buffer you must free with |free|.
385 char16_t
* UTF8ToNewUnicode(const nsACString
& aSource
,
386 uint32_t* aUTF16Count
= nullptr);
388 /* A fallible version of UTF8ToNewUnicode. Returns nullptr on failure. */
389 char16_t
* UTF8ToNewUnicode(const nsACString
& aSource
, uint32_t* aUTF16Count
,
390 const mozilla::fallible_t
& aFallible
);
393 * Copies |aLength| 16-bit code units from the start of |aSource| to the
394 * |char16_t| buffer |aDest|.
396 * After this operation |aDest| is not null terminated.
398 * @param aSource a UTF-16 string
399 * @param aSrcOffset start offset in the source string
400 * @param aDest a |char16_t| buffer
401 * @param aLength the number of 16-bit code units to copy
402 * @return pointer to destination buffer - identical to |aDest|
404 char16_t
* CopyUnicodeTo(const nsAString
& aSource
, uint32_t aSrcOffset
,
405 char16_t
* aDest
, uint32_t aLength
);
408 * Replaces unpaired surrogates with U+FFFD in the argument.
410 * Copies a shared string buffer or an otherwise read-only
411 * buffer only if there are unpaired surrogates.
413 [[nodiscard
]] inline bool EnsureUTF16Validity(nsAString
& aString
) {
414 size_t upTo
= mozilla::Utf16ValidUpTo(aString
);
415 size_t len
= aString
.Length();
419 char16_t
* ptr
= aString
.BeginWriting(mozilla::fallible
);
423 auto span
= mozilla::Span(ptr
, len
);
425 mozilla::EnsureUtf16ValiditySpan(span
.From(upTo
+ 1));
429 void ParseString(const nsACString
& aSource
, char aDelimiter
,
430 nsTArray
<nsCString
>& aArray
);
432 namespace mozilla::detail
{
434 constexpr auto kStringJoinAppendDefault
=
435 [](auto& aResult
, const auto& aValue
) { aResult
.Append(aValue
); };
437 } // namespace mozilla::detail
440 * Join a sequence of items, each optionally transformed to a string, with a
441 * given separator, appending to a given string.
443 * \tparam CharType char or char16_t
444 * \tparam InputRange a range usable with range-based for
445 * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and
446 * an item of InputRange which appends the latter to the former
449 typename CharType
, typename InputRange
,
450 typename Func
= const decltype(mozilla::detail::kStringJoinAppendDefault
)&>
451 void StringJoinAppend(
452 nsTSubstring
<CharType
>& aOutput
,
453 const nsTLiteralString
<CharType
>& aSeparator
, const InputRange
& aInputRange
,
454 Func
&& aFunc
= mozilla::detail::kStringJoinAppendDefault
) {
456 for (const auto& item
: aInputRange
) {
460 aOutput
.Append(aSeparator
);
463 aFunc(aOutput
, item
);
468 * Join a sequence of items, each optionally transformed to a string, with a
469 * given separator, returning a new string.
471 * \tparam CharType char or char16_t
472 * \tparam InputRange a range usable with range-based for
473 * \tparam Func optionally, a functor accepting a nsTSubstring<CharType>& and
474 * an item of InputRange which appends the latter to the former
478 typename CharType
, typename InputRange
,
479 typename Func
= const decltype(mozilla::detail::kStringJoinAppendDefault
)&>
480 auto StringJoin(const nsTLiteralString
<CharType
>& aSeparator
,
481 const InputRange
& aInputRange
,
482 Func
&& aFunc
= mozilla::detail::kStringJoinAppendDefault
) {
483 nsTAutoString
<CharType
> res
;
484 StringJoinAppend(res
, aSeparator
, aInputRange
, std::forward
<Func
>(aFunc
));
489 * Converts case in place in the argument string.
491 void ToUpperCase(nsACString
&);
493 void ToLowerCase(nsACString
&);
495 void ToUpperCase(nsACString
&);
497 void ToLowerCase(nsACString
&);
500 * Converts case from string aSource to aDest.
502 void ToUpperCase(const nsACString
& aSource
, nsACString
& aDest
);
504 void ToLowerCase(const nsACString
& aSource
, nsACString
& aDest
);
507 * Finds the leftmost occurrence of |aPattern|, if any in the range
508 * |aSearchStart|..|aSearchEnd|.
510 * Returns |true| if a match was found, and adjusts |aSearchStart| and
511 * |aSearchEnd| to point to the match. If no match was found, returns |false|
512 * and makes |aSearchStart == aSearchEnd|.
514 * Currently, this is equivalent to the O(m*n) implementation previously on
517 * If we need something faster, then we can implement that later.
520 bool FindInReadable(const nsAString
& aPattern
, nsAString::const_iterator
&,
521 nsAString::const_iterator
&,
522 nsStringComparator
= nsTDefaultStringComparator
);
523 bool FindInReadable(const nsACString
& aPattern
, nsACString::const_iterator
&,
524 nsACString::const_iterator
&,
525 nsCStringComparator
= nsTDefaultStringComparator
);
527 /* sometimes we don't care about where the string was, just that we
529 inline bool FindInReadable(
530 const nsAString
& aPattern
, const nsAString
& aSource
,
531 nsStringComparator aCompare
= nsTDefaultStringComparator
) {
532 nsAString::const_iterator start
, end
;
533 aSource
.BeginReading(start
);
534 aSource
.EndReading(end
);
535 return FindInReadable(aPattern
, start
, end
, aCompare
);
538 inline bool FindInReadable(
539 const nsACString
& aPattern
, const nsACString
& aSource
,
540 nsCStringComparator aCompare
= nsTDefaultStringComparator
) {
541 nsACString::const_iterator start
, end
;
542 aSource
.BeginReading(start
);
543 aSource
.EndReading(end
);
544 return FindInReadable(aPattern
, start
, end
, aCompare
);
547 bool CaseInsensitiveFindInReadable(const nsACString
& aPattern
,
548 nsACString::const_iterator
&,
549 nsACString::const_iterator
&);
552 * Finds the rightmost occurrence of |aPattern|
553 * Returns |true| if a match was found, and adjusts |aSearchStart| and
554 * |aSearchEnd| to point to the match. If no match was found, returns |false|
555 * and makes |aSearchStart == aSearchEnd|.
557 bool RFindInReadable(const nsAString
& aPattern
, nsAString::const_iterator
&,
558 nsAString::const_iterator
&,
559 nsStringComparator
= nsTDefaultStringComparator
);
560 bool RFindInReadable(const nsACString
& aPattern
, nsACString::const_iterator
&,
561 nsACString::const_iterator
&,
562 nsCStringComparator
= nsTDefaultStringComparator
);
565 * Finds the leftmost occurrence of |aChar|, if any in the range
566 * |aSearchStart|..|aSearchEnd|.
568 * Returns |true| if a match was found, and adjusts |aSearchStart| to
569 * point to the match. If no match was found, returns |false| and
570 * makes |aSearchStart == aSearchEnd|.
572 bool FindCharInReadable(char16_t aChar
, nsAString::const_iterator
& aSearchStart
,
573 const nsAString::const_iterator
& aSearchEnd
);
574 bool FindCharInReadable(char aChar
, nsACString::const_iterator
& aSearchStart
,
575 const nsACString::const_iterator
& aSearchEnd
);
577 bool StringBeginsWith(const nsAString
& aSource
, const nsAString
& aSubstring
);
578 bool StringBeginsWith(const nsAString
& aSource
, const nsAString
& aSubstring
,
580 bool StringBeginsWith(const nsACString
& aSource
, const nsACString
& aSubstring
);
581 bool StringBeginsWith(const nsACString
& aSource
, const nsACString
& aSubstring
,
582 nsCStringComparator
);
583 bool StringEndsWith(const nsAString
& aSource
, const nsAString
& aSubstring
);
584 bool StringEndsWith(const nsAString
& aSource
, const nsAString
& aSubstring
,
586 bool StringEndsWith(const nsACString
& aSource
, const nsACString
& aSubstring
);
587 bool StringEndsWith(const nsACString
& aSource
, const nsACString
& aSubstring
,
588 nsCStringComparator
);
590 const nsString
& EmptyString();
591 const nsCString
& EmptyCString();
593 const nsString
& VoidString();
594 const nsCString
& VoidCString();
597 * Compare a UTF-8 string to an UTF-16 string.
599 * Returns 0 if the strings are equal, -1 if aUTF8String is less
600 * than aUTF16Count, and 1 in the reverse case. Errors are replaced
601 * with U+FFFD and then the U+FFFD is compared as if it had occurred
602 * in the input. If aErr is not nullptr, *aErr is set to true if
603 * either string had malformed sequences.
605 int32_t CompareUTF8toUTF16(const nsACString
& aUTF8String
,
606 const nsAString
& aUTF16String
, bool* aErr
= nullptr);
608 void AppendUCS4ToUTF16(const uint32_t aSource
, nsAString
& aDest
);
610 #endif // !defined(nsReadableUtils_h___)