Bumping manifests a=b2g-bump
[gecko.git] / xpcom / string / nsReadableUtils.h
blob1e081c47c12cf148ec4497fdd203a7167d7c9561
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // IWYU pragma: private, include "nsString.h"
8 #ifndef nsReadableUtils_h___
9 #define nsReadableUtils_h___
11 /**
12 * I guess all the routines in this file are all mis-named.
13 * According to our conventions, they should be |NS_xxx|.
16 #include "mozilla/Assertions.h"
17 #include "nsAString.h"
19 #include "nsTArrayForwardDeclare.h"
21 inline size_t
22 Distance(const nsReadingIterator<char16_t>& aStart,
23 const nsReadingIterator<char16_t>& aEnd)
25 MOZ_ASSERT(aStart.get() <= aEnd.get());
26 return static_cast<size_t>(aEnd.get() - aStart.get());
28 inline size_t
29 Distance(const nsReadingIterator<char>& aStart,
30 const nsReadingIterator<char>& aEnd)
32 MOZ_ASSERT(aStart.get() <= aEnd.get());
33 return static_cast<size_t>(aEnd.get() - aStart.get());
36 void LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest);
37 void CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
39 void LossyCopyUTF16toASCII(const char16_t* aSource, nsACString& aDest);
40 void CopyASCIItoUTF16(const char* aSource, nsAString& aDest);
42 void CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
43 void CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
45 void CopyUTF16toUTF8(const char16_t* aSource, nsACString& aDest);
46 void CopyUTF8toUTF16(const char* aSource, nsAString& aDest);
48 void LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest);
49 void AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
50 NS_WARN_UNUSED_RESULT bool AppendASCIItoUTF16(const nsACString& aSource,
51 nsAString& aDest,
52 const mozilla::fallible_t&);
54 void LossyAppendUTF16toASCII(const char16_t* aSource, nsACString& aDest);
55 void AppendASCIItoUTF16(const char* aSource, nsAString& aDest);
57 void AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
58 NS_WARN_UNUSED_RESULT bool AppendUTF16toUTF8(const nsAString& aSource,
59 nsACString& aDest,
60 const mozilla::fallible_t&);
61 void AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
62 NS_WARN_UNUSED_RESULT bool AppendUTF8toUTF16(const nsACString& aSource,
63 nsAString& aDest,
64 const mozilla::fallible_t&);
66 void AppendUTF16toUTF8(const char16_t* aSource, nsACString& aDest);
67 void AppendUTF8toUTF16(const char* aSource, nsAString& aDest);
69 #ifdef MOZ_USE_CHAR16_WRAPPER
70 inline void AppendUTF16toUTF8(char16ptr_t aSource, nsACString& aDest)
72 return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest);
74 #endif
76 /**
77 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
79 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
80 * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
81 * This conversion is not well defined; but it reproduces legacy string behavior.
82 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
84 * @param aSource a 16-bit wide string
85 * @return a new |char| buffer you must free with |nsMemory::Free|.
87 char* ToNewCString(const nsAString& aSource);
90 /**
91 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
93 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
94 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
96 * @param aSource an 8-bit wide string
97 * @return a new |char| buffer you must free with |nsMemory::Free|.
99 char* ToNewCString(const nsACString& aSource);
102 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
104 * Allocates and returns a new |char| buffer which you must free with
105 * |nsMemory::Free|.
106 * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
107 * copying |aSource| to your new buffer.
108 * The new buffer is zero-terminated, but that may not help you if |aSource|
109 * contains embedded nulls.
111 * @param aSource a UTF-16 string (made of char16_t's)
112 * @param aUTF8Count the number of 8-bit units that was returned
113 * @return a new |char| buffer you must free with |nsMemory::Free|.
116 char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr);
120 * Returns a new |char16_t| buffer containing a zero-terminated copy of
121 * |aSource|.
123 * Allocates and returns a new |char16_t| buffer which you must free with
124 * |nsMemory::Free|.
125 * The new buffer is zero-terminated, but that may not help you if |aSource|
126 * contains embedded nulls.
128 * @param aSource a UTF-16 string
129 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
131 char16_t* ToNewUnicode(const nsAString& aSource);
135 * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
137 * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|.
138 * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
139 * This conversion is not well defined; but it reproduces legacy string behavior.
140 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
142 * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
143 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
145 char16_t* ToNewUnicode(const nsACString& aSource);
148 * Returns the required length for a char16_t buffer holding
149 * a copy of aSource, using UTF-8 to UTF-16 conversion.
150 * The length does NOT include any space for zero-termination.
152 * @param aSource an 8-bit wide string, UTF-8 encoded
153 * @return length of UTF-16 encoded string copy, not zero-terminated
155 uint32_t CalcUTF8ToUnicodeLength(const nsACString& aSource);
158 * Copies the source string into the specified buffer, converting UTF-8 to
159 * UTF-16 in the process. The conversion is well defined for valid UTF-8
160 * strings.
161 * The copied string will be zero-terminated! Any embedded nulls will be
162 * copied nonetheless. It is the caller's responsiblity to ensure the buffer
163 * is large enough to hold the string copy plus one char16_t for
164 * zero-termination!
166 * @see CalcUTF8ToUnicodeLength( const nsACString& )
167 * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
169 * @param aSource an 8-bit wide string, UTF-8 encoded
170 * @param aBuffer the buffer holding the converted string copy
171 * @param aUTF16Count receiving optionally the number of 16-bit units that
172 * were copied
173 * @return aBuffer pointer, for convenience
175 char16_t* UTF8ToUnicodeBuffer(const nsACString& aSource,
176 char16_t* aBuffer,
177 uint32_t* aUTF16Count = nullptr);
180 * Returns a new |char16_t| buffer containing a zero-terminated copy
181 * of |aSource|.
183 * Allocates and returns a new |char| buffer which you must free with
184 * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16
185 * while copying |aSource| to your new buffer. This conversion is well defined
186 * for a valid UTF-8 string. The new buffer is zero-terminated, but that
187 * may not help you if |aSource| contains embedded nulls.
189 * @param aSource an 8-bit wide string, UTF-8 encoded
190 * @param aUTF16Count the number of 16-bit units that was returned
191 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
192 * (UTF-16 encoded)
194 char16_t* UTF8ToNewUnicode(const nsACString& aSource,
195 uint32_t* aUTF16Count = nullptr);
198 * Copies |aLength| 16-bit code units from the start of |aSource| to the
199 * |char16_t| buffer |aDest|.
201 * After this operation |aDest| is not null terminated.
203 * @param aSource a UTF-16 string
204 * @param aSrcOffset start offset in the source string
205 * @param aDest a |char16_t| buffer
206 * @param aLength the number of 16-bit code units to copy
207 * @return pointer to destination buffer - identical to |aDest|
209 char16_t* CopyUnicodeTo(const nsAString& aSource,
210 uint32_t aSrcOffset,
211 char16_t* aDest,
212 uint32_t aLength);
216 * Copies 16-bit characters between iterators |aSrcStart| and
217 * |aSrcEnd| to the writable string |aDest|. Similar to the
218 * |nsString::Mid| method.
220 * After this operation |aDest| is not null terminated.
222 * @param aSrcStart start source iterator
223 * @param aSrcEnd end source iterator
224 * @param aDest destination for the copy
226 void CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
227 const nsAString::const_iterator& aSrcEnd,
228 nsAString& aDest);
231 * Appends 16-bit characters between iterators |aSrcStart| and
232 * |aSrcEnd| to the writable string |aDest|.
234 * After this operation |aDest| is not null terminated.
236 * @param aSrcStart start source iterator
237 * @param aSrcEnd end source iterator
238 * @param aDest destination for the copy
240 void AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
241 const nsAString::const_iterator& aSrcEnd,
242 nsAString& aDest);
245 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
247 * @param aString a 16-bit wide string to scan
249 bool IsASCII(const nsAString& aString);
252 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
254 * @param aString a 8-bit wide string to scan
256 bool IsASCII(const nsACString& aString);
259 * Returns |true| if |aString| is a valid UTF-8 string.
260 * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
261 * It is mainly written to replace and roughly equivalent to
263 * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
265 * (see bug 191541)
266 * As such, it does not check for non-UTF-8 7bit encodings such as
267 * ISO-2022-JP and HZ.
269 * It rejects sequences with the following errors:
271 * byte sequences that cannot be decoded into characters according to
272 * UTF-8's rules (including cases where the input is part of a valid
273 * UTF-8 sequence but starts or ends mid-character)
274 * overlong sequences (i.e., cases where a character was encoded
275 * non-canonically by using more bytes than necessary)
276 * surrogate codepoints (i.e., the codepoints reserved for
277 representing astral characters in UTF-16)
278 * codepoints above the unicode range (i.e., outside the first 17
279 * planes; higher than U+10FFFF), in accordance with
280 * http://tools.ietf.org/html/rfc3629
281 * when aRejectNonChar is true (the default), any codepoint whose low
282 * 16 bits are 0xFFFE or 0xFFFF
285 * @param aString an 8-bit wide string to scan
286 * @param aRejectNonChar a boolean to control the rejection of utf-8
287 * non characters
289 bool IsUTF8(const nsACString& aString, bool aRejectNonChar = true);
291 bool ParseString(const nsACString& aAstring, char aDelimiter,
292 nsTArray<nsCString>& aArray);
295 * Converts case in place in the argument string.
297 void ToUpperCase(nsACString&);
299 void ToLowerCase(nsACString&);
301 void ToUpperCase(nsCSubstring&);
303 void ToLowerCase(nsCSubstring&);
306 * Converts case from string aSource to aDest.
308 void ToUpperCase(const nsACString& aSource, nsACString& aDest);
310 void ToLowerCase(const nsACString& aSource, nsACString& aDest);
313 * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
315 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
316 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
318 * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
319 * If we need something faster, then we can implement that later.
322 bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
323 nsAString::const_iterator&,
324 const nsStringComparator& = nsDefaultStringComparator());
325 bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
326 nsACString::const_iterator&,
327 const nsCStringComparator& = nsDefaultCStringComparator());
329 /* sometimes we don't care about where the string was, just that we
330 * found it or not */
331 inline bool
332 FindInReadable(const nsAString& aPattern, const nsAString& aSource,
333 const nsStringComparator& aCompare = nsDefaultStringComparator())
335 nsAString::const_iterator start, end;
336 aSource.BeginReading(start);
337 aSource.EndReading(end);
338 return FindInReadable(aPattern, start, end, aCompare);
341 inline bool
342 FindInReadable(const nsACString& aPattern, const nsACString& aSource,
343 const nsCStringComparator& aCompare = nsDefaultCStringComparator())
345 nsACString::const_iterator start, end;
346 aSource.BeginReading(start);
347 aSource.EndReading(end);
348 return FindInReadable(aPattern, start, end, aCompare);
352 bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
353 nsACString::const_iterator&,
354 nsACString::const_iterator&);
357 * Finds the rightmost occurrence of |aPattern|
358 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
359 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
362 bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
363 nsAString::const_iterator&,
364 const nsStringComparator& = nsDefaultStringComparator());
365 bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
366 nsACString::const_iterator&,
367 const nsCStringComparator& = nsDefaultCStringComparator());
370 * Finds the leftmost occurrence of |aChar|, if any in the range
371 * |aSearchStart|..|aSearchEnd|.
373 * Returns |true| if a match was found, and adjusts |aSearchStart| to
374 * point to the match. If no match was found, returns |false| and
375 * makes |aSearchStart == aSearchEnd|.
377 bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
378 const nsAString::const_iterator& aSearchEnd);
379 bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
380 const nsACString::const_iterator& aSearchEnd);
383 * Finds the number of occurences of |aChar| in the string |aStr|
385 uint32_t CountCharInReadable(const nsAString& aStr,
386 char16_t aChar);
387 uint32_t CountCharInReadable(const nsACString& aStr,
388 char aChar);
390 bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
391 const nsStringComparator& aComparator =
392 nsDefaultStringComparator());
393 bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
394 const nsCStringComparator& aComparator =
395 nsDefaultCStringComparator());
396 bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
397 const nsStringComparator& aComparator =
398 nsDefaultStringComparator());
399 bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
400 const nsCStringComparator& aComparator =
401 nsDefaultCStringComparator());
403 const nsAFlatString& EmptyString();
404 const nsAFlatCString& EmptyCString();
406 const nsAFlatString& NullString();
407 const nsAFlatCString& NullCString();
410 * Compare a UTF-8 string to an UTF-16 string.
412 * Returns 0 if the strings are equal, -1 if aUTF8String is less
413 * than aUTF16Count, and 1 in the reverse case. In case of fatal
414 * error (eg the strings are not valid UTF8 and UTF16 respectively),
415 * this method will return INT32_MIN.
417 int32_t CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
418 const nsASingleFragmentString& aUTF16String);
420 void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
422 template<class T>
423 inline bool
424 EnsureStringLength(T& aStr, uint32_t aLen)
426 aStr.SetLength(aLen);
427 return (aStr.Length() == aLen);
430 #endif // !defined(nsReadableUtils_h___)