xpcom/string/nsReadableUtils.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6 // IWYU pragma: private, include "nsString.h"
   7
   8 #ifndef nsReadableUtils_h___
   9 #define nsReadableUtils_h___
  10
  11 /**
  12  * I guess all the routines in this file are all mis-named.
  13  * According to our conventions, they should be |NS_xxx|.
  14  */
  15
  16 #include "mozilla/Assertions.h"
  17 #include "nsAString.h"
  18
  19 #include "nsTArrayForwardDeclare.h"
  20
  21 inline size_t
  22 Distance(const nsReadingIterator<char16_t>& aStart,
  23          const nsReadingIterator<char16_t>& aEnd)
  24 {
  25   MOZ_ASSERT(aStart.get() <= aEnd.get());
  26   return static_cast<size_t>(aEnd.get() - aStart.get());
  27 }
  28 inline size_t
  29 Distance(const nsReadingIterator<char>& aStart,
  30          const nsReadingIterator<char>& aEnd)
  31 {
  32   MOZ_ASSERT(aStart.get() <= aEnd.get());
  33   return static_cast<size_t>(aEnd.get() - aStart.get());
  34 }
  35
  36 void LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest);
  37 void CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
  38
  39 void LossyCopyUTF16toASCII(const char16_t* aSource, nsACString& aDest);
  40 void CopyASCIItoUTF16(const char* aSource, nsAString& aDest);
  41
  42 void CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
  43 void CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
  44
  45 void CopyUTF16toUTF8(const char16_t* aSource, nsACString& aDest);
  46 void CopyUTF8toUTF16(const char* aSource, nsAString& aDest);
  47
  48 void LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest);
  49 void AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
  50 NS_WARN_UNUSED_RESULT bool AppendASCIItoUTF16(const nsACString& aSource,
  51                                               nsAString& aDest,
  52                                               const mozilla::fallible_t&);
  53
  54 void LossyAppendUTF16toASCII(const char16_t* aSource, nsACString& aDest);
  55 void AppendASCIItoUTF16(const char* aSource, nsAString& aDest);
  56
  57 void AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
  58 NS_WARN_UNUSED_RESULT bool AppendUTF16toUTF8(const nsAString& aSource,
  59                                              nsACString& aDest,
  60                                              const mozilla::fallible_t&);
  61 void AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
  62 NS_WARN_UNUSED_RESULT bool AppendUTF8toUTF16(const nsACString& aSource,
  63                                              nsAString& aDest,
  64                                              const mozilla::fallible_t&);
  65
  66 void AppendUTF16toUTF8(const char16_t* aSource, nsACString& aDest);
  67 void AppendUTF8toUTF16(const char* aSource, nsAString& aDest);
  68
  69 #ifdef MOZ_USE_CHAR16_WRAPPER
  70 inline void AppendUTF16toUTF8(char16ptr_t aSource, nsACString& aDest)
  71 {
  72   return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest);
  73 }
  74 #endif
  75
  76 /**
  77  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  78  *
  79  * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
  80  * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
  81  * This conversion is not well defined; but it reproduces legacy string behavior.
  82  * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
  83  *
  84  * @param aSource a 16-bit wide string
  85  * @return a new |char| buffer you must free with |nsMemory::Free|.
  86  */
  87 char* ToNewCString(const nsAString& aSource);
  88
  89
  90 /**
  91  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  92  *
  93  * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
  94  * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
  95  *
  96  * @param aSource an 8-bit wide string
  97  * @return a new |char| buffer you must free with |nsMemory::Free|.
  98  */
  99 char* ToNewCString(const nsACString& aSource);
 100
 101 /**
 102  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
 103  *
 104  * Allocates and returns a new |char| buffer which you must free with
 105  * |nsMemory::Free|.
 106  * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
 107  * copying |aSource| to your new buffer.
 108  * The new buffer is zero-terminated, but that may not help you if |aSource|
 109  * contains embedded nulls.
 110  *
 111  * @param aSource a UTF-16 string (made of char16_t's)
 112  * @param aUTF8Count the number of 8-bit units that was returned
 113  * @return a new |char| buffer you must free with |nsMemory::Free|.
 114  */
 115
 116 char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr);
 117
 118
 119 /**
 120  * Returns a new |char16_t| buffer containing a zero-terminated copy of
 121  * |aSource|.
 122  *
 123  * Allocates and returns a new |char16_t| buffer which you must free with
 124  * |nsMemory::Free|.
 125  * The new buffer is zero-terminated, but that may not help you if |aSource|
 126  * contains embedded nulls.
 127  *
 128  * @param aSource a UTF-16 string
 129  * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
 130  */
 131 char16_t* ToNewUnicode(const nsAString& aSource);
 132
 133
 134 /**
 135  * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
 136  *
 137  * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|.
 138  * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
 139  * This conversion is not well defined; but it reproduces legacy string behavior.
 140  * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
 141  *
 142  * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
 143  * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
 144  */
 145 char16_t* ToNewUnicode(const nsACString& aSource);
 146
 147 /**
 148  * Returns the required length for a char16_t buffer holding
 149  * a copy of aSource, using UTF-8 to UTF-16 conversion.
 150  * The length does NOT include any space for zero-termination.
 151  *
 152  * @param aSource an 8-bit wide string, UTF-8 encoded
 153  * @return length of UTF-16 encoded string copy, not zero-terminated
 154  */
 155 uint32_t CalcUTF8ToUnicodeLength(const nsACString& aSource);
 156
 157 /**
 158  * Copies the source string into the specified buffer, converting UTF-8 to
 159  * UTF-16 in the process. The conversion is well defined for valid UTF-8
 160  * strings.
 161  * The copied string will be zero-terminated! Any embedded nulls will be
 162  * copied nonetheless. It is the caller's responsiblity to ensure the buffer
 163  * is large enough to hold the string copy plus one char16_t for
 164  * zero-termination!
 165  *
 166  * @see CalcUTF8ToUnicodeLength( const nsACString& )
 167  * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
 168  *
 169  * @param aSource an 8-bit wide string, UTF-8 encoded
 170  * @param aBuffer the buffer holding the converted string copy
 171  * @param aUTF16Count receiving optionally the number of 16-bit units that
 172  *                    were copied
 173  * @return aBuffer pointer, for convenience
 174  */
 175 char16_t* UTF8ToUnicodeBuffer(const nsACString& aSource,
 176                               char16_t* aBuffer,
 177                               uint32_t* aUTF16Count = nullptr);
 178
 179 /**
 180  * Returns a new |char16_t| buffer containing a zero-terminated copy
 181  * of |aSource|.
 182  *
 183  * Allocates and returns a new |char| buffer which you must free with
 184  * |nsMemory::Free|.  Performs an encoding conversion from UTF-8 to UTF-16
 185  * while copying |aSource| to your new buffer.  This conversion is well defined
 186  * for a valid UTF-8 string.  The new buffer is zero-terminated, but that
 187  * may not help you if |aSource| contains embedded nulls.
 188  *
 189  * @param aSource an 8-bit wide string, UTF-8 encoded
 190  * @param aUTF16Count the number of 16-bit units that was returned
 191  * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
 192  *         (UTF-16 encoded)
 193  */
 194 char16_t* UTF8ToNewUnicode(const nsACString& aSource,
 195                            uint32_t* aUTF16Count = nullptr);
 196
 197 /**
 198  * Copies |aLength| 16-bit code units from the start of |aSource| to the
 199  * |char16_t| buffer |aDest|.
 200  *
 201  * After this operation |aDest| is not null terminated.
 202  *
 203  * @param aSource a UTF-16 string
 204  * @param aSrcOffset start offset in the source string
 205  * @param aDest a |char16_t| buffer
 206  * @param aLength the number of 16-bit code units to copy
 207  * @return pointer to destination buffer - identical to |aDest|
 208  */
 209 char16_t* CopyUnicodeTo(const nsAString& aSource,
 210                         uint32_t aSrcOffset,
 211                         char16_t* aDest,
 212                         uint32_t aLength);
 213
 214
 215 /**
 216  * Copies 16-bit characters between iterators |aSrcStart| and
 217  * |aSrcEnd| to the writable string |aDest|. Similar to the
 218  * |nsString::Mid| method.
 219  *
 220  * After this operation |aDest| is not null terminated.
 221  *
 222  * @param aSrcStart start source iterator
 223  * @param aSrcEnd end source iterator
 224  * @param aDest destination for the copy
 225  */
 226 void CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
 227                    const nsAString::const_iterator& aSrcEnd,
 228                    nsAString& aDest);
 229
 230 /**
 231  * Appends 16-bit characters between iterators |aSrcStart| and
 232  * |aSrcEnd| to the writable string |aDest|.
 233  *
 234  * After this operation |aDest| is not null terminated.
 235  *
 236  * @param aSrcStart start source iterator
 237  * @param aSrcEnd end source iterator
 238  * @param aDest destination for the copy
 239  */
 240 void AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
 241                      const nsAString::const_iterator& aSrcEnd,
 242                      nsAString& aDest);
 243
 244 /**
 245  * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
 246  *
 247  * @param aString a 16-bit wide string to scan
 248  */
 249 bool IsASCII(const nsAString& aString);
 250
 251 /**
 252  * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
 253  *
 254  * @param aString a 8-bit wide string to scan
 255  */
 256 bool IsASCII(const nsACString& aString);
 257
 258 /**
 259  * Returns |true| if |aString| is a valid UTF-8 string.
 260  * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
 261  * It is mainly written to replace and roughly equivalent to
 262  *
 263  *    str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
 264  *
 265  * (see bug 191541)
 266  * As such,  it does not check for non-UTF-8 7bit encodings such as
 267  * ISO-2022-JP and HZ.
 268  *
 269  * It rejects sequences with the following errors:
 270  *
 271  * byte sequences that cannot be decoded into characters according to
 272  *   UTF-8's rules (including cases where the input is part of a valid
 273  *   UTF-8 sequence but starts or ends mid-character)
 274  * overlong sequences (i.e., cases where a character was encoded
 275  *   non-canonically by using more bytes than necessary)
 276  * surrogate codepoints (i.e., the codepoints reserved for
 277      representing astral characters in UTF-16)
 278  * codepoints above the unicode range (i.e., outside the first 17
 279  *   planes; higher than U+10FFFF), in accordance with
 280  *   http://tools.ietf.org/html/rfc3629
 281  * when aRejectNonChar is true (the default), any codepoint whose low
 282  *   16 bits are 0xFFFE or 0xFFFF
 283
 284  *
 285  * @param aString an 8-bit wide string to scan
 286  * @param aRejectNonChar a boolean to control the rejection of utf-8
 287  *        non characters
 288  */
 289 bool IsUTF8(const nsACString& aString, bool aRejectNonChar = true);
 290
 291 bool ParseString(const nsACString& aAstring, char aDelimiter,
 292                  nsTArray<nsCString>& aArray);
 293
 294 /**
 295  * Converts case in place in the argument string.
 296  */
 297 void ToUpperCase(nsACString&);
 298
 299 void ToLowerCase(nsACString&);
 300
 301 void ToUpperCase(nsCSubstring&);
 302
 303 void ToLowerCase(nsCSubstring&);
 304
 305 /**
 306  * Converts case from string aSource to aDest.
 307  */
 308 void ToUpperCase(const nsACString& aSource, nsACString& aDest);
 309
 310 void ToLowerCase(const nsACString& aSource, nsACString& aDest);
 311
 312 /**
 313  * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
 314  *
 315  * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
 316  * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
 317  *
 318  * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
 319  * If we need something faster, then we can implement that later.
 320  */
 321
 322 bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
 323                     nsAString::const_iterator&,
 324                     const nsStringComparator& = nsDefaultStringComparator());
 325 bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
 326                     nsACString::const_iterator&,
 327                     const nsCStringComparator& = nsDefaultCStringComparator());
 328
 329 /* sometimes we don't care about where the string was, just that we
 330  * found it or not */
 331 inline bool
 332 FindInReadable(const nsAString& aPattern, const nsAString& aSource,
 333                const nsStringComparator& aCompare = nsDefaultStringComparator())
 334 {
 335   nsAString::const_iterator start, end;
 336   aSource.BeginReading(start);
 337   aSource.EndReading(end);
 338   return FindInReadable(aPattern, start, end, aCompare);
 339 }
 340
 341 inline bool
 342 FindInReadable(const nsACString& aPattern, const nsACString& aSource,
 343                const nsCStringComparator& aCompare = nsDefaultCStringComparator())
 344 {
 345   nsACString::const_iterator start, end;
 346   aSource.BeginReading(start);
 347   aSource.EndReading(end);
 348   return FindInReadable(aPattern, start, end, aCompare);
 349 }
 350
 351
 352 bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
 353                                    nsACString::const_iterator&,
 354                                    nsACString::const_iterator&);
 355
 356 /**
 357  * Finds the rightmost occurrence of |aPattern|
 358  * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
 359  * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
 360  *
 361  */
 362 bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
 363                      nsAString::const_iterator&,
 364                      const nsStringComparator& = nsDefaultStringComparator());
 365 bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
 366                      nsACString::const_iterator&,
 367                      const nsCStringComparator& = nsDefaultCStringComparator());
 368
 369 /**
 370 * Finds the leftmost occurrence of |aChar|, if any in the range
 371 * |aSearchStart|..|aSearchEnd|.
 372 *
 373 * Returns |true| if a match was found, and adjusts |aSearchStart| to
 374 * point to the match.  If no match was found, returns |false| and
 375 * makes |aSearchStart == aSearchEnd|.
 376 */
 377 bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
 378                         const nsAString::const_iterator& aSearchEnd);
 379 bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
 380                         const nsACString::const_iterator& aSearchEnd);
 381
 382 /**
 383 * Finds the number of occurences of |aChar| in the string |aStr|
 384 */
 385 uint32_t CountCharInReadable(const nsAString& aStr,
 386                              char16_t aChar);
 387 uint32_t CountCharInReadable(const nsACString& aStr,
 388                              char aChar);
 389
 390 bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
 391                       const nsStringComparator& aComparator =
 392                         nsDefaultStringComparator());
 393 bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
 394                       const nsCStringComparator& aComparator =
 395                         nsDefaultCStringComparator());
 396 bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
 397                     const nsStringComparator& aComparator =
 398                       nsDefaultStringComparator());
 399 bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
 400                     const nsCStringComparator& aComparator =
 401                       nsDefaultCStringComparator());
 402
 403 const nsAFlatString& EmptyString();
 404 const nsAFlatCString& EmptyCString();
 405
 406 const nsAFlatString& NullString();
 407 const nsAFlatCString& NullCString();
 408
 409 /**
 410 * Compare a UTF-8 string to an UTF-16 string.
 411 *
 412 * Returns 0 if the strings are equal, -1 if aUTF8String is less
 413 * than aUTF16Count, and 1 in the reverse case.  In case of fatal
 414 * error (eg the strings are not valid UTF8 and UTF16 respectively),
 415 * this method will return INT32_MIN.
 416 */
 417 int32_t CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
 418                            const nsASingleFragmentString& aUTF16String);
 419
 420 void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
 421
 422 template<class T>
 423 inline bool
 424 EnsureStringLength(T& aStr, uint32_t aLen)
 425 {
 426   aStr.SetLength(aLen);
 427   return (aStr.Length() == aLen);
 428 }
 429
 430 #endif // !defined(nsReadableUtils_h___)