Bug 1746711 Part 2: Ensure the enqueued surface has a color space. r=gfx-reviewers...
[gecko.git] / intl / components / src / String.h
blobf07acd6578fd3f80f65afa2d334d89558ba1a818
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #ifndef intl_components_String_h_
6 #define intl_components_String_h_
8 #include "mozilla/Assertions.h"
9 #include "mozilla/Casting.h"
10 #include "mozilla/intl/ICU4CGlue.h"
11 #include "mozilla/intl/ICUError.h"
12 #include "mozilla/PodOperations.h"
13 #include "mozilla/Result.h"
14 #include "mozilla/Span.h"
16 #include "unicode/uchar.h"
17 #include "unicode/unorm2.h"
18 #include "unicode/ustring.h"
19 #include "unicode/utext.h"
20 #include "unicode/utypes.h"
22 namespace mozilla::intl {
24 /**
25 * This component is a Mozilla-focused API for working with strings in
26 * internationalization code.
28 class String final {
29 public:
30 String() = delete;
32 /**
33 * Return the locale-sensitive lower case string of the input.
35 template <typename B>
36 static Result<Ok, ICUError> ToLocaleLowerCase(const char* aLocale,
37 Span<const char16_t> aString,
38 B& aBuffer) {
39 if (!aBuffer.reserve(aString.size())) {
40 return Err(ICUError::OutOfMemory);
42 return FillBufferWithICUCall(
43 aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
44 return u_strToLower(target, length, aString.data(), aString.size(),
45 aLocale, status);
46 });
49 /**
50 * Return the locale-sensitive upper case string of the input.
52 template <typename B>
53 static Result<Ok, ICUError> ToLocaleUpperCase(const char* aLocale,
54 Span<const char16_t> aString,
55 B& aBuffer) {
56 if (!aBuffer.reserve(aString.size())) {
57 return Err(ICUError::OutOfMemory);
59 return FillBufferWithICUCall(
60 aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
61 return u_strToUpper(target, length, aString.data(), aString.size(),
62 aLocale, status);
63 });
66 /**
67 * Normalization form constants to describe which normalization algorithm
68 * should be performed.
70 * Also see:
71 * - Unicode Standard, §2.12 Equivalent Sequences
72 * - Unicode Standard, §3.11 Normalization Forms
73 * - https://unicode.org/reports/tr15/
75 enum class NormalizationForm {
76 /**
77 * Normalization Form C
79 NFC,
81 /**
82 * Normalization Form D
84 NFD,
86 /**
87 * Normalization Form KC
89 NFKC,
91 /**
92 * Normalization Form KD
94 NFKD,
97 enum class AlreadyNormalized : bool { No, Yes };
99 /**
100 * Normalize the input string according to requested normalization form.
102 * Returns `AlreadyNormalized::Yes` when the string is already in normalized
103 * form. The output buffer is unchanged in this case. Otherwise returns
104 * `AlreadyNormalized::No` and places the normalized string into the output
105 * buffer.
107 template <typename B>
108 static Result<AlreadyNormalized, ICUError> Normalize(
109 NormalizationForm aForm, Span<const char16_t> aString, B& aBuffer) {
110 // The unorm2_getXXXInstance() methods return a shared instance which must
111 // not be deleted.
112 UErrorCode status = U_ZERO_ERROR;
113 const UNormalizer2* normalizer;
114 switch (aForm) {
115 case NormalizationForm::NFC:
116 normalizer = unorm2_getNFCInstance(&status);
117 break;
118 case NormalizationForm::NFD:
119 normalizer = unorm2_getNFDInstance(&status);
120 break;
121 case NormalizationForm::NFKC:
122 normalizer = unorm2_getNFKCInstance(&status);
123 break;
124 case NormalizationForm::NFKD:
125 normalizer = unorm2_getNFKDInstance(&status);
126 break;
128 if (U_FAILURE(status)) {
129 return Err(ToICUError(status));
132 int32_t spanLengthInt = unorm2_spanQuickCheckYes(normalizer, aString.data(),
133 aString.size(), &status);
134 if (U_FAILURE(status)) {
135 return Err(ToICUError(status));
138 size_t spanLength = AssertedCast<size_t>(spanLengthInt);
139 MOZ_ASSERT(spanLength <= aString.size());
141 // Return if the input string is already normalized.
142 if (spanLength == aString.size()) {
143 return AlreadyNormalized::Yes;
146 if (!aBuffer.reserve(aString.size())) {
147 return Err(ICUError::OutOfMemory);
150 // Copy the already normalized prefix.
151 if (spanLength > 0) {
152 PodCopy(aBuffer.data(), aString.data(), spanLength);
154 aBuffer.written(spanLength);
157 MOZ_TRY(FillBufferWithICUCall(
158 aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
159 Span<const char16_t> remaining = aString.From(spanLength);
160 return unorm2_normalizeSecondAndAppend(normalizer, target, spanLength,
161 length, remaining.data(),
162 remaining.size(), status);
163 }));
165 return AlreadyNormalized::No;
169 * Return true if the code point has the binary property "Cased".
171 static bool IsCased(char32_t codePoint) {
172 return u_hasBinaryProperty(static_cast<UChar32>(codePoint), UCHAR_CASED);
176 * Return true if the code point has the binary property "Case_Ignorable".
178 static bool IsCaseIgnorable(char32_t codePoint) {
179 return u_hasBinaryProperty(static_cast<UChar32>(codePoint),
180 UCHAR_CASE_IGNORABLE);
184 * Return the NFC pairwise composition of the two input characters, if any;
185 * returns 0 (which we know is not a composed char!) if none exists.
187 static char32_t ComposePairNFC(char32_t a, char32_t b) {
188 // unorm2_getNFCInstance returns a static instance that does not have to be
189 // released here. If it fails, we just return 0 (no composition) always.
190 static UErrorCode status = U_ZERO_ERROR;
191 static const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
192 if (U_FAILURE(status)) {
193 return 0;
195 UChar32 ch = unorm2_composePair(normalizer, static_cast<UChar32>(a),
196 static_cast<UChar32>(b));
197 return ch < 0 ? 0 : static_cast<char32_t>(ch);
201 * Put the "raw" (single-level) canonical decomposition of the input char, if
202 * any, into the provided buffer. Canonical decomps are never more than two
203 * chars in length (although full normalization may result in longer output
204 * due to recursion).
205 * Returns the length of the decomposition (0 if none, else 1 or 2).
207 static int DecomposeRawNFD(char32_t ab, char32_t decomp[2]) {
208 // unorm2_getNFCInstance returns a static instance that does not have to be
209 // released here. If it fails, we just return 0 (no decomposition) always.
210 // Although we are using it to query for a decomposition, the mode of the
211 // Normalizer2 is irrelevant here, so we may as well use the same singleton
212 // instance as ComposePairNFC.
213 static UErrorCode status = U_ZERO_ERROR;
214 static const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
215 if (U_FAILURE(status)) {
216 return 0;
219 // Canonical decompositions are never more than two Unicode characters,
220 // or a maximum of 4 utf-16 code units.
221 const unsigned MAX_DECOMP_LENGTH = 4;
222 UErrorCode error = U_ZERO_ERROR;
223 UChar decompUtf16[MAX_DECOMP_LENGTH];
224 int32_t len =
225 unorm2_getRawDecomposition(normalizer, static_cast<UChar32>(ab),
226 decompUtf16, MAX_DECOMP_LENGTH, &error);
227 if (U_FAILURE(error) || len < 0) {
228 return 0;
230 UText text = UTEXT_INITIALIZER;
231 utext_openUChars(&text, decompUtf16, len, &error);
232 MOZ_ASSERT(U_SUCCESS(error));
233 UChar32 ch = UTEXT_NEXT32(&text);
234 len = 0;
235 if (ch != U_SENTINEL) {
236 decomp[0] = static_cast<char32_t>(ch);
237 ++len;
238 ch = UTEXT_NEXT32(&text);
239 if (ch != U_SENTINEL) {
240 decomp[1] = static_cast<char32_t>(ch);
241 ++len;
244 utext_close(&text);
245 return len;
249 * Return the Unicode version, for example "13.0".
251 static Span<const char> GetUnicodeVersion();
254 } // namespace mozilla::intl
256 #endif