Bug 1883023 [wpt PR 44879] - Avoid forced layout when finished parsing an empty subfr...
[gecko.git] / mfbt / TextUtils.h
blobec497c52eed23d201ffe8545c7d5a55e2c23be1f
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* Character/text operations. */
9 #ifndef mozilla_TextUtils_h
10 #define mozilla_TextUtils_h
12 #include "mozilla/Assertions.h"
13 #include "mozilla/Latin1.h"
15 #ifdef MOZ_HAS_JSRUST
16 // Can't include mozilla/Encoding.h here.
17 extern "C" {
18 // Declared as uint8_t instead of char to match declaration in another header.
19 size_t encoding_ascii_valid_up_to(uint8_t const* buffer, size_t buffer_len);
21 #endif
23 namespace mozilla {
25 // See Utf8.h for IsUtf8() and conversions between UTF-8 and UTF-16.
26 // See Latin1.h for testing UTF-16 and UTF-8 for Latin1ness and
27 // for conversions to and from Latin1.
29 // The overloads below are not templated in order to make
30 // implicit conversions to span work as expected for the Span
31 // overloads.
33 /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
34 inline constexpr bool IsAscii(unsigned char aChar) { return aChar < 0x80; }
36 /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
37 inline constexpr bool IsAscii(signed char aChar) {
38 return IsAscii(static_cast<unsigned char>(aChar));
41 /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
42 inline constexpr bool IsAscii(char aChar) {
43 return IsAscii(static_cast<unsigned char>(aChar));
46 #ifdef __cpp_char8_t
47 /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
48 inline constexpr bool IsAscii(char8_t aChar) {
49 return IsAscii(static_cast<unsigned char>(aChar));
51 #endif
53 /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
54 inline constexpr bool IsAscii(char16_t aChar) { return aChar < 0x80; }
56 /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
57 inline constexpr bool IsAscii(char32_t aChar) { return aChar < 0x80; }
59 /**
60 * Returns |true| iff |aString| contains only ASCII characters, that is,
61 * characters in the range [0x00, 0x80).
63 * @param aString a 8-bit wide string to scan
65 inline bool IsAscii(mozilla::Span<const char> aString) {
66 #if MOZ_HAS_JSRUST()
67 size_t length = aString.Length();
68 const char* ptr = aString.Elements();
69 // For short strings, avoid the function call, since, the SIMD
70 // code won't have a chance to kick in anyway.
71 if (length < mozilla::detail::kShortStringLimitForInlinePaths) {
72 const uint8_t* uptr = reinterpret_cast<const uint8_t*>(ptr);
73 uint8_t accu = 0;
74 for (size_t i = 0; i < length; i++) {
75 accu |= uptr[i];
77 return accu < 0x80;
79 return encoding_mem_is_ascii(ptr, length);
80 #else
81 for (char c : aString) {
82 if (!IsAscii(c)) {
83 return false;
86 return true;
87 #endif
90 /**
91 * Returns |true| iff |aString| contains only ASCII characters, that is,
92 * characters in the range [0x00, 0x80).
94 * @param aString a 16-bit wide string to scan
96 inline bool IsAscii(mozilla::Span<const char16_t> aString) {
97 #if MOZ_HAS_JSRUST()
98 size_t length = aString.Length();
99 const char16_t* ptr = aString.Elements();
100 // For short strings, calling into Rust is a pessimization, and the SIMD
101 // code won't have a chance to kick in anyway.
102 // 16 is a bit larger than logically necessary for this function alone,
103 // but it's important that the limit here matches the limit used in
104 // LossyConvertUtf16toLatin1!
105 if (length < mozilla::detail::kShortStringLimitForInlinePaths) {
106 char16_t accu = 0;
107 for (size_t i = 0; i < length; i++) {
108 accu |= ptr[i];
110 return accu < 0x80;
112 return encoding_mem_is_basic_latin(ptr, length);
113 #else
114 for (char16_t c : aString) {
115 if (!IsAscii(c)) {
116 return false;
119 return true;
120 #endif
124 * Returns true iff every character in the null-terminated string pointed to by
125 * |aChar| is ASCII, i.e. in the range [0, 0x80).
127 template <typename Char>
128 constexpr bool IsAsciiNullTerminated(const Char* aChar) {
129 while (Char c = *aChar++) {
130 if (!IsAscii(c)) {
131 return false;
134 return true;
137 #if MOZ_HAS_JSRUST()
139 * Returns the index of the first non-ASCII byte or
140 * the length of the string if there are none.
142 inline size_t AsciiValidUpTo(mozilla::Span<const char> aString) {
143 return encoding_ascii_valid_up_to(
144 reinterpret_cast<const uint8_t*>(aString.Elements()), aString.Length());
148 * Returns the index of the first unpaired surrogate or
149 * the length of the string if there are none.
151 inline size_t Utf16ValidUpTo(mozilla::Span<const char16_t> aString) {
152 return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
156 * Replaces unpaired surrogates with U+FFFD in the argument.
158 * Note: If you have an nsAString, use EnsureUTF16Validity() from
159 * nsReadableUtils.h instead to avoid unsharing a valid shared
160 * string.
162 inline void EnsureUtf16ValiditySpan(mozilla::Span<char16_t> aString) {
163 encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
167 * Convert ASCII to UTF-16. In debug builds, assert that the input is
168 * ASCII.
170 * The length of aDest must not be less than the length of aSource.
172 inline void ConvertAsciitoUtf16(mozilla::Span<const char> aSource,
173 mozilla::Span<char16_t> aDest) {
174 MOZ_ASSERT(IsAscii(aSource));
175 ConvertLatin1toUtf16(aSource, aDest);
178 #endif // MOZ_HAS_JSRUST
181 * Returns true iff |aChar| matches Ascii Whitespace.
183 * This function is intended to match the Infra standard
184 * (https://infra.spec.whatwg.org/#ascii-whitespace)
186 template <typename Char>
187 constexpr bool IsAsciiWhitespace(Char aChar) {
188 using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
189 auto uc = static_cast<UnsignedChar>(aChar);
190 return uc == 0x9 || uc == 0xA || uc == 0xC || uc == 0xD || uc == 0x20;
194 * Returns true iff |aChar| matches [a-z].
196 * This function is basically what you thought islower was, except its behavior
197 * doesn't depend on the user's current locale.
199 template <typename Char>
200 constexpr bool IsAsciiLowercaseAlpha(Char aChar) {
201 using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
202 auto uc = static_cast<UnsignedChar>(aChar);
203 return 'a' <= uc && uc <= 'z';
207 * Returns true iff |aChar| matches [A-Z].
209 * This function is basically what you thought isupper was, except its behavior
210 * doesn't depend on the user's current locale.
212 template <typename Char>
213 constexpr bool IsAsciiUppercaseAlpha(Char aChar) {
214 using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
215 auto uc = static_cast<UnsignedChar>(aChar);
216 return 'A' <= uc && uc <= 'Z';
220 * Returns true iff |aChar| matches [a-zA-Z].
222 * This function is basically what you thought isalpha was, except its behavior
223 * doesn't depend on the user's current locale.
225 template <typename Char>
226 constexpr bool IsAsciiAlpha(Char aChar) {
227 return IsAsciiLowercaseAlpha(aChar) || IsAsciiUppercaseAlpha(aChar);
231 * Returns true iff |aChar| matches [0-9].
233 * This function is basically what you thought isdigit was, except its behavior
234 * doesn't depend on the user's current locale.
236 template <typename Char>
237 constexpr bool IsAsciiDigit(Char aChar) {
238 using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
239 auto uc = static_cast<UnsignedChar>(aChar);
240 return '0' <= uc && uc <= '9';
244 * Returns true iff |aChar| matches [0-9a-fA-F].
246 * This function is basically isxdigit, but guaranteed to be only for ASCII.
248 template <typename Char>
249 constexpr bool IsAsciiHexDigit(Char aChar) {
250 using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
251 auto uc = static_cast<UnsignedChar>(aChar);
252 return ('0' <= uc && uc <= '9') || ('a' <= uc && uc <= 'f') ||
253 ('A' <= uc && uc <= 'F');
257 * Returns true iff |aChar| matches [a-zA-Z0-9].
259 * This function is basically what you thought isalnum was, except its behavior
260 * doesn't depend on the user's current locale.
262 template <typename Char>
263 constexpr bool IsAsciiAlphanumeric(Char aChar) {
264 return IsAsciiDigit(aChar) || IsAsciiAlpha(aChar);
268 * Converts an ASCII alphanumeric digit [0-9a-zA-Z] to number as if in base-36.
269 * (This function therefore works for decimal, hexadecimal, etc.).
271 template <typename Char>
272 uint8_t AsciiAlphanumericToNumber(Char aChar) {
273 using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
274 auto uc = static_cast<UnsignedChar>(aChar);
276 if ('0' <= uc && uc <= '9') {
277 return uc - '0';
280 if ('A' <= uc && uc <= 'Z') {
281 return uc - 'A' + 10;
284 // Ideally this function would be constexpr, but unfortunately gcc at least as
285 // of 6.4 forbids non-constexpr function calls in unevaluated constexpr
286 // function calls. See bug 1453456. So for now, just assert and leave the
287 // entire function non-constexpr.
288 MOZ_ASSERT('a' <= uc && uc <= 'z',
289 "non-ASCII alphanumeric character can't be converted to number");
290 return uc - 'a' + 10;
293 } // namespace mozilla
295 #endif /* mozilla_TextUtils_h */