1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // IWYU pragma: private, include "nsString.h"
8 #ifndef nsReadableUtils_h___
9 #define nsReadableUtils_h___
12 * I guess all the routines in this file are all mis-named.
13 * According to our conventions, they should be |NS_xxx|.
16 #include "mozilla/Assertions.h"
17 #include "nsAString.h"
19 #include "nsTArrayForwardDeclare.h"
22 Distance(const nsReadingIterator
<char16_t
>& aStart
,
23 const nsReadingIterator
<char16_t
>& aEnd
)
25 MOZ_ASSERT(aStart
.get() <= aEnd
.get());
26 return static_cast<size_t>(aEnd
.get() - aStart
.get());
29 Distance(const nsReadingIterator
<char>& aStart
,
30 const nsReadingIterator
<char>& aEnd
)
32 MOZ_ASSERT(aStart
.get() <= aEnd
.get());
33 return static_cast<size_t>(aEnd
.get() - aStart
.get());
36 void LossyCopyUTF16toASCII(const nsAString
& aSource
, nsACString
& aDest
);
37 void CopyASCIItoUTF16(const nsACString
& aSource
, nsAString
& aDest
);
39 void LossyCopyUTF16toASCII(const char16_t
* aSource
, nsACString
& aDest
);
40 void CopyASCIItoUTF16(const char* aSource
, nsAString
& aDest
);
42 void CopyUTF16toUTF8(const nsAString
& aSource
, nsACString
& aDest
);
43 void CopyUTF8toUTF16(const nsACString
& aSource
, nsAString
& aDest
);
45 void CopyUTF16toUTF8(const char16_t
* aSource
, nsACString
& aDest
);
46 void CopyUTF8toUTF16(const char* aSource
, nsAString
& aDest
);
48 void LossyAppendUTF16toASCII(const nsAString
& aSource
, nsACString
& aDest
);
49 void AppendASCIItoUTF16(const nsACString
& aSource
, nsAString
& aDest
);
50 NS_WARN_UNUSED_RESULT
bool AppendASCIItoUTF16(const nsACString
& aSource
,
52 const mozilla::fallible_t
&);
54 void LossyAppendUTF16toASCII(const char16_t
* aSource
, nsACString
& aDest
);
55 void AppendASCIItoUTF16(const char* aSource
, nsAString
& aDest
);
57 void AppendUTF16toUTF8(const nsAString
& aSource
, nsACString
& aDest
);
58 NS_WARN_UNUSED_RESULT
bool AppendUTF16toUTF8(const nsAString
& aSource
,
60 const mozilla::fallible_t
&);
61 void AppendUTF8toUTF16(const nsACString
& aSource
, nsAString
& aDest
);
62 NS_WARN_UNUSED_RESULT
bool AppendUTF8toUTF16(const nsACString
& aSource
,
64 const mozilla::fallible_t
&);
66 void AppendUTF16toUTF8(const char16_t
* aSource
, nsACString
& aDest
);
67 void AppendUTF8toUTF16(const char* aSource
, nsAString
& aDest
);
69 #ifdef MOZ_USE_CHAR16_WRAPPER
70 inline void AppendUTF16toUTF8(char16ptr_t aSource
, nsACString
& aDest
)
72 return AppendUTF16toUTF8(static_cast<const char16_t
*>(aSource
), aDest
);
77 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
79 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
80 * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
81 * This conversion is not well defined; but it reproduces legacy string behavior.
82 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
84 * @param aSource a 16-bit wide string
85 * @return a new |char| buffer you must free with |nsMemory::Free|.
87 char* ToNewCString(const nsAString
& aSource
);
91 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
93 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
94 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
96 * @param aSource an 8-bit wide string
97 * @return a new |char| buffer you must free with |nsMemory::Free|.
99 char* ToNewCString(const nsACString
& aSource
);
102 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
104 * Allocates and returns a new |char| buffer which you must free with
106 * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
107 * copying |aSource| to your new buffer.
108 * The new buffer is zero-terminated, but that may not help you if |aSource|
109 * contains embedded nulls.
111 * @param aSource a UTF-16 string (made of char16_t's)
112 * @param aUTF8Count the number of 8-bit units that was returned
113 * @return a new |char| buffer you must free with |nsMemory::Free|.
116 char* ToNewUTF8String(const nsAString
& aSource
, uint32_t* aUTF8Count
= nullptr);
120 * Returns a new |char16_t| buffer containing a zero-terminated copy of
123 * Allocates and returns a new |char16_t| buffer which you must free with
125 * The new buffer is zero-terminated, but that may not help you if |aSource|
126 * contains embedded nulls.
128 * @param aSource a UTF-16 string
129 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
131 char16_t
* ToNewUnicode(const nsAString
& aSource
);
135 * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
137 * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|.
138 * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
139 * This conversion is not well defined; but it reproduces legacy string behavior.
140 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
142 * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
143 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
145 char16_t
* ToNewUnicode(const nsACString
& aSource
);
148 * Returns the required length for a char16_t buffer holding
149 * a copy of aSource, using UTF-8 to UTF-16 conversion.
150 * The length does NOT include any space for zero-termination.
152 * @param aSource an 8-bit wide string, UTF-8 encoded
153 * @return length of UTF-16 encoded string copy, not zero-terminated
155 uint32_t CalcUTF8ToUnicodeLength(const nsACString
& aSource
);
158 * Copies the source string into the specified buffer, converting UTF-8 to
159 * UTF-16 in the process. The conversion is well defined for valid UTF-8
161 * The copied string will be zero-terminated! Any embedded nulls will be
162 * copied nonetheless. It is the caller's responsiblity to ensure the buffer
163 * is large enough to hold the string copy plus one char16_t for
166 * @see CalcUTF8ToUnicodeLength( const nsACString& )
167 * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
169 * @param aSource an 8-bit wide string, UTF-8 encoded
170 * @param aBuffer the buffer holding the converted string copy
171 * @param aUTF16Count receiving optionally the number of 16-bit units that
173 * @return aBuffer pointer, for convenience
175 char16_t
* UTF8ToUnicodeBuffer(const nsACString
& aSource
,
177 uint32_t* aUTF16Count
= nullptr);
180 * Returns a new |char16_t| buffer containing a zero-terminated copy
183 * Allocates and returns a new |char| buffer which you must free with
184 * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16
185 * while copying |aSource| to your new buffer. This conversion is well defined
186 * for a valid UTF-8 string. The new buffer is zero-terminated, but that
187 * may not help you if |aSource| contains embedded nulls.
189 * @param aSource an 8-bit wide string, UTF-8 encoded
190 * @param aUTF16Count the number of 16-bit units that was returned
191 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
194 char16_t
* UTF8ToNewUnicode(const nsACString
& aSource
,
195 uint32_t* aUTF16Count
= nullptr);
198 * Copies |aLength| 16-bit code units from the start of |aSource| to the
199 * |char16_t| buffer |aDest|.
201 * After this operation |aDest| is not null terminated.
203 * @param aSource a UTF-16 string
204 * @param aSrcOffset start offset in the source string
205 * @param aDest a |char16_t| buffer
206 * @param aLength the number of 16-bit code units to copy
207 * @return pointer to destination buffer - identical to |aDest|
209 char16_t
* CopyUnicodeTo(const nsAString
& aSource
,
216 * Copies 16-bit characters between iterators |aSrcStart| and
217 * |aSrcEnd| to the writable string |aDest|. Similar to the
218 * |nsString::Mid| method.
220 * After this operation |aDest| is not null terminated.
222 * @param aSrcStart start source iterator
223 * @param aSrcEnd end source iterator
224 * @param aDest destination for the copy
226 void CopyUnicodeTo(const nsAString::const_iterator
& aSrcStart
,
227 const nsAString::const_iterator
& aSrcEnd
,
231 * Appends 16-bit characters between iterators |aSrcStart| and
232 * |aSrcEnd| to the writable string |aDest|.
234 * After this operation |aDest| is not null terminated.
236 * @param aSrcStart start source iterator
237 * @param aSrcEnd end source iterator
238 * @param aDest destination for the copy
240 void AppendUnicodeTo(const nsAString::const_iterator
& aSrcStart
,
241 const nsAString::const_iterator
& aSrcEnd
,
245 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
247 * @param aString a 16-bit wide string to scan
249 bool IsASCII(const nsAString
& aString
);
252 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
254 * @param aString a 8-bit wide string to scan
256 bool IsASCII(const nsACString
& aString
);
259 * Returns |true| if |aString| is a valid UTF-8 string.
260 * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
261 * It is mainly written to replace and roughly equivalent to
263 * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
266 * As such, it does not check for non-UTF-8 7bit encodings such as
267 * ISO-2022-JP and HZ.
269 * It rejects sequences with the following errors:
271 * byte sequences that cannot be decoded into characters according to
272 * UTF-8's rules (including cases where the input is part of a valid
273 * UTF-8 sequence but starts or ends mid-character)
274 * overlong sequences (i.e., cases where a character was encoded
275 * non-canonically by using more bytes than necessary)
276 * surrogate codepoints (i.e., the codepoints reserved for
277 representing astral characters in UTF-16)
278 * codepoints above the unicode range (i.e., outside the first 17
279 * planes; higher than U+10FFFF), in accordance with
280 * http://tools.ietf.org/html/rfc3629
281 * when aRejectNonChar is true (the default), any codepoint whose low
282 * 16 bits are 0xFFFE or 0xFFFF
285 * @param aString an 8-bit wide string to scan
286 * @param aRejectNonChar a boolean to control the rejection of utf-8
289 bool IsUTF8(const nsACString
& aString
, bool aRejectNonChar
= true);
291 bool ParseString(const nsACString
& aAstring
, char aDelimiter
,
292 nsTArray
<nsCString
>& aArray
);
295 * Converts case in place in the argument string.
297 void ToUpperCase(nsACString
&);
299 void ToLowerCase(nsACString
&);
301 void ToUpperCase(nsCSubstring
&);
303 void ToLowerCase(nsCSubstring
&);
306 * Converts case from string aSource to aDest.
308 void ToUpperCase(const nsACString
& aSource
, nsACString
& aDest
);
310 void ToLowerCase(const nsACString
& aSource
, nsACString
& aDest
);
313 * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
315 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
316 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
318 * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
319 * If we need something faster, then we can implement that later.
322 bool FindInReadable(const nsAString
& aPattern
, nsAString::const_iterator
&,
323 nsAString::const_iterator
&,
324 const nsStringComparator
& = nsDefaultStringComparator());
325 bool FindInReadable(const nsACString
& aPattern
, nsACString::const_iterator
&,
326 nsACString::const_iterator
&,
327 const nsCStringComparator
& = nsDefaultCStringComparator());
329 /* sometimes we don't care about where the string was, just that we
332 FindInReadable(const nsAString
& aPattern
, const nsAString
& aSource
,
333 const nsStringComparator
& aCompare
= nsDefaultStringComparator())
335 nsAString::const_iterator start
, end
;
336 aSource
.BeginReading(start
);
337 aSource
.EndReading(end
);
338 return FindInReadable(aPattern
, start
, end
, aCompare
);
342 FindInReadable(const nsACString
& aPattern
, const nsACString
& aSource
,
343 const nsCStringComparator
& aCompare
= nsDefaultCStringComparator())
345 nsACString::const_iterator start
, end
;
346 aSource
.BeginReading(start
);
347 aSource
.EndReading(end
);
348 return FindInReadable(aPattern
, start
, end
, aCompare
);
352 bool CaseInsensitiveFindInReadable(const nsACString
& aPattern
,
353 nsACString::const_iterator
&,
354 nsACString::const_iterator
&);
357 * Finds the rightmost occurrence of |aPattern|
358 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
359 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
362 bool RFindInReadable(const nsAString
& aPattern
, nsAString::const_iterator
&,
363 nsAString::const_iterator
&,
364 const nsStringComparator
& = nsDefaultStringComparator());
365 bool RFindInReadable(const nsACString
& aPattern
, nsACString::const_iterator
&,
366 nsACString::const_iterator
&,
367 const nsCStringComparator
& = nsDefaultCStringComparator());
370 * Finds the leftmost occurrence of |aChar|, if any in the range
371 * |aSearchStart|..|aSearchEnd|.
373 * Returns |true| if a match was found, and adjusts |aSearchStart| to
374 * point to the match. If no match was found, returns |false| and
375 * makes |aSearchStart == aSearchEnd|.
377 bool FindCharInReadable(char16_t aChar
, nsAString::const_iterator
& aSearchStart
,
378 const nsAString::const_iterator
& aSearchEnd
);
379 bool FindCharInReadable(char aChar
, nsACString::const_iterator
& aSearchStart
,
380 const nsACString::const_iterator
& aSearchEnd
);
383 * Finds the number of occurences of |aChar| in the string |aStr|
385 uint32_t CountCharInReadable(const nsAString
& aStr
,
387 uint32_t CountCharInReadable(const nsACString
& aStr
,
390 bool StringBeginsWith(const nsAString
& aSource
, const nsAString
& aSubstring
,
391 const nsStringComparator
& aComparator
=
392 nsDefaultStringComparator());
393 bool StringBeginsWith(const nsACString
& aSource
, const nsACString
& aSubstring
,
394 const nsCStringComparator
& aComparator
=
395 nsDefaultCStringComparator());
396 bool StringEndsWith(const nsAString
& aSource
, const nsAString
& aSubstring
,
397 const nsStringComparator
& aComparator
=
398 nsDefaultStringComparator());
399 bool StringEndsWith(const nsACString
& aSource
, const nsACString
& aSubstring
,
400 const nsCStringComparator
& aComparator
=
401 nsDefaultCStringComparator());
403 const nsAFlatString
& EmptyString();
404 const nsAFlatCString
& EmptyCString();
406 const nsAFlatString
& NullString();
407 const nsAFlatCString
& NullCString();
410 * Compare a UTF-8 string to an UTF-16 string.
412 * Returns 0 if the strings are equal, -1 if aUTF8String is less
413 * than aUTF16Count, and 1 in the reverse case. In case of fatal
414 * error (eg the strings are not valid UTF8 and UTF16 respectively),
415 * this method will return INT32_MIN.
417 int32_t CompareUTF8toUTF16(const nsASingleFragmentCString
& aUTF8String
,
418 const nsASingleFragmentString
& aUTF16String
);
420 void AppendUCS4ToUTF16(const uint32_t aSource
, nsAString
& aDest
);
424 EnsureStringLength(T
& aStr
, uint32_t aLen
)
426 aStr
.SetLength(aLen
);
427 return (aStr
.Length() == aLen
);
430 #endif // !defined(nsReadableUtils_h___)