Bug 1746711 Part 2: Ensure the enqueued surface has a color space. r=gfx-reviewers...
[gecko.git] / intl / components / src / ICU4CGlue.h
blobaf1590680bd3724aedfa480569cbbd9cbee23e8c
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #ifndef intl_components_ICUUtils_h
6 #define intl_components_ICUUtils_h
8 #include "unicode/uenum.h"
9 #include "unicode/utypes.h"
10 #include "mozilla/Buffer.h"
11 #include "mozilla/DebugOnly.h"
12 #include "mozilla/Maybe.h"
13 #include "mozilla/Result.h"
14 #include "mozilla/Span.h"
15 #include "mozilla/Utf8.h"
16 #include "mozilla/Vector.h"
17 #include "mozilla/intl/ICUError.h"
19 // When building standalone js shell, it will include headers from
20 // intl/components if JS_HAS_INTL_API is true (the default value), but js shell
21 // won't include headers from XPCOM, so don't include nsTArray.h when building
22 // standalone js shell.
23 #ifndef JS_STANDALONE
24 # include "nsTArray.h"
25 #endif
27 #include <cstring>
28 #include <iterator>
29 #include <stddef.h>
30 #include <stdint.h>
31 #include <string>
32 #include <string_view>
34 struct UFormattedValue;
35 namespace mozilla::intl {
37 template <typename CharType>
38 static inline CharType* AssertNullTerminatedString(Span<CharType> aSpan) {
39 // Intentionally check one past the last character, because we expect that the
40 // NUL character isn't part of the string.
41 MOZ_ASSERT(*(aSpan.data() + aSpan.size()) == '\0');
43 // Also ensure there aren't any other NUL characters within the string.
44 MOZ_ASSERT(std::char_traits<CharType>::length(aSpan.data()) == aSpan.size());
46 return aSpan.data();
49 static inline const char* AssertNullTerminatedString(std::string_view aView) {
50 // Intentionally check one past the last character, because we expect that the
51 // NUL character isn't part of the string.
52 MOZ_ASSERT(*(aView.data() + aView.size()) == '\0');
54 // Also ensure there aren't any other NUL characters within the string.
55 MOZ_ASSERT(std::strlen(aView.data()) == aView.size());
57 return aView.data();
60 /**
61 * Map the "und" locale to an empty string, which ICU uses internally.
63 static inline const char* IcuLocale(const char* aLocale) {
64 // Return the empty string if the input is exactly equal to the string "und".
65 const char* locale = aLocale;
66 if (!std::strcmp(locale, "und")) {
67 locale = ""; // ICU root locale
69 return locale;
72 /**
73 * Ensure a locale is null-terminated, and map the "und" locale to an empty
74 * string, which ICU uses internally.
76 static inline const char* IcuLocale(Span<const char> aLocale) {
77 return IcuLocale(AssertNullTerminatedString(aLocale));
80 /**
81 * Ensure a locale in the buffer is null-terminated, and map the "und" locale to
82 * an empty string, which ICU uses internally.
84 static inline const char* IcuLocale(const Buffer<char>& aLocale) {
85 return IcuLocale(Span(aLocale.begin(), aLocale.Length() - 1));
88 using ICUResult = Result<Ok, ICUError>;
90 /**
91 * Convert a UErrorCode to ICUError. This will correctly apply the OutOfMemory
92 * case.
94 ICUError ToICUError(UErrorCode status);
96 /**
97 * Convert a UErrorCode to ICUResult. This will correctly apply the OutOfMemory
98 * case.
100 ICUResult ToICUResult(UErrorCode status);
103 * The ICU status can complain about a string not being terminated, but this
104 * is fine for this API, as it deals with the mozilla::Span that has a pointer
105 * and a length.
107 static inline bool ICUSuccessForStringSpan(UErrorCode status) {
108 return U_SUCCESS(status) || status == U_STRING_NOT_TERMINATED_WARNING;
112 * This class enforces that the unified mozilla::intl methods match the
113 * const-ness of the underlying ICU4C API calls. const ICU4C APIs take a const
114 * pointer, while mutable ones take a non-const pointer.
116 * For const ICU4C calls use:
117 * ICUPointer::GetConst().
119 * For non-const ICU4C calls use:
120 * ICUPointer::GetMut().
122 * This will propagate the `const` specifier from the ICU4C API call to the
123 * unified method, and it will be enforced by the compiler. This helps ensures
124 * a consistence and correct implementation.
126 template <typename T>
127 class ICUPointer {
128 public:
129 explicit ICUPointer(T* aPointer) : mPointer(aPointer) {}
131 // Only allow moves of ICUPointers, no copies.
132 ICUPointer(ICUPointer&& other) noexcept = default;
133 ICUPointer& operator=(ICUPointer&& other) noexcept = default;
135 // Implicitly take ownership of a raw pointer through copy assignment.
136 ICUPointer& operator=(T* aPointer) noexcept {
137 mPointer = aPointer;
138 return *this;
141 const T* GetConst() const { return const_cast<const T*>(mPointer); }
142 T* GetMut() { return mPointer; }
144 explicit operator bool() const { return !!mPointer; }
146 private:
147 T* mPointer;
151 * Calling into ICU with the C-API can be a bit tricky. This function wraps up
152 * the relatively risky operations involving pointers, lengths, and buffers into
153 * a simpler call. This function accepts a lambda that performs the ICU call,
154 * and returns the length of characters in the buffer. When using a temporary
155 * stack-based buffer, the calls can often be done in one trip. However, if
156 * additional memory is needed, this function will call the C-API twice, in
157 * order to first get the size of the result, and then second to copy the result
158 * over to the buffer.
160 template <typename ICUStringFunction, typename Buffer>
161 static ICUResult FillBufferWithICUCall(Buffer& buffer,
162 const ICUStringFunction& strFn) {
163 static_assert(std::is_same_v<typename Buffer::CharType, char16_t> ||
164 std::is_same_v<typename Buffer::CharType, char> ||
165 std::is_same_v<typename Buffer::CharType, uint8_t>);
167 UErrorCode status = U_ZERO_ERROR;
168 int32_t length = strFn(buffer.data(), buffer.capacity(), &status);
169 if (status == U_BUFFER_OVERFLOW_ERROR) {
170 MOZ_ASSERT(length >= 0);
172 if (!buffer.reserve(length)) {
173 return Err(ICUError::OutOfMemory);
176 status = U_ZERO_ERROR;
177 mozilla::DebugOnly<int32_t> length2 = strFn(buffer.data(), length, &status);
178 MOZ_ASSERT(length == length2);
180 if (!ICUSuccessForStringSpan(status)) {
181 return Err(ToICUError(status));
184 buffer.written(length);
186 return Ok{};
190 * Adaptor for mozilla::Vector to implement the Buffer interface.
192 template <typename T, size_t N>
193 class VectorToBufferAdaptor {
194 mozilla::Vector<T, N>& vector;
196 public:
197 using CharType = T;
199 explicit VectorToBufferAdaptor(mozilla::Vector<T, N>& vector)
200 : vector(vector) {}
202 T* data() { return vector.begin(); }
204 size_t capacity() const { return vector.capacity(); }
206 bool reserve(size_t length) { return vector.reserve(length); }
208 void written(size_t length) {
209 mozilla::DebugOnly<bool> result = vector.resizeUninitialized(length);
210 MOZ_ASSERT(result);
215 * An overload of FillBufferWithICUCall that accepts a mozilla::Vector rather
216 * than a Buffer.
218 template <typename ICUStringFunction, size_t InlineSize, typename CharType>
219 static ICUResult FillBufferWithICUCall(Vector<CharType, InlineSize>& vector,
220 const ICUStringFunction& strFn) {
221 VectorToBufferAdaptor buffer(vector);
222 return FillBufferWithICUCall(buffer, strFn);
225 #ifndef JS_STANDALONE
227 * mozilla::intl APIs require sizeable buffers. This class abstracts over
228 * the nsTArray.
230 template <typename T>
231 class nsTArrayToBufferAdapter {
232 public:
233 using CharType = T;
235 // Do not allow copy or move. Move could be added in the future if needed.
236 nsTArrayToBufferAdapter(const nsTArrayToBufferAdapter&) = delete;
237 nsTArrayToBufferAdapter& operator=(const nsTArrayToBufferAdapter&) = delete;
239 explicit nsTArrayToBufferAdapter(nsTArray<CharType>& aArray)
240 : mArray(aArray) {}
243 * Ensures the buffer has enough space to accommodate |size| elements.
245 [[nodiscard]] bool reserve(size_t size) {
246 // Use fallible behavior here.
247 return mArray.SetCapacity(size, fallible);
251 * Returns the raw data inside the buffer.
253 CharType* data() { return mArray.Elements(); }
256 * Returns the count of elements written into the buffer.
258 size_t length() const { return mArray.Length(); }
261 * Returns the buffer's overall capacity.
263 size_t capacity() const { return mArray.Capacity(); }
266 * Resizes the buffer to the given amount of written elements.
268 void written(size_t amount) {
269 MOZ_ASSERT(amount <= mArray.Capacity());
270 // This sets |mArray|'s internal size so that it matches how much was
271 // written. This is necessary because the write happens across FFI
272 // boundaries.
273 mArray.SetLengthAndRetainStorage(amount);
276 private:
277 nsTArray<CharType>& mArray;
280 template <typename T, size_t N>
281 class AutoTArrayToBufferAdapter : public nsTArrayToBufferAdapter<T> {
282 using nsTArrayToBufferAdapter<T>::nsTArrayToBufferAdapter;
286 * An overload of FillBufferWithICUCall that accepts a nsTArray.
288 template <typename ICUStringFunction, typename CharType>
289 static ICUResult FillBufferWithICUCall(nsTArray<CharType>& array,
290 const ICUStringFunction& strFn) {
291 nsTArrayToBufferAdapter<CharType> buffer(array);
292 return FillBufferWithICUCall(buffer, strFn);
295 template <typename ICUStringFunction, typename CharType, size_t N>
296 static ICUResult FillBufferWithICUCall(AutoTArray<CharType, N>& array,
297 const ICUStringFunction& strFn) {
298 AutoTArrayToBufferAdapter<CharType, N> buffer(array);
299 return FillBufferWithICUCall(buffer, strFn);
301 #endif
304 * Fill a UTF-8 or a UTF-16 buffer with a UTF-16 span. ICU4C mostly uses UTF-16
305 * internally, but different consumers may have different situations with their
306 * buffers.
308 template <typename Buffer>
309 [[nodiscard]] bool FillBuffer(Span<const char16_t> utf16Span,
310 Buffer& targetBuffer) {
311 static_assert(std::is_same_v<typename Buffer::CharType, char> ||
312 std::is_same_v<typename Buffer::CharType, unsigned char> ||
313 std::is_same_v<typename Buffer::CharType, char16_t>);
315 if constexpr (std::is_same_v<typename Buffer::CharType, char> ||
316 std::is_same_v<typename Buffer::CharType, unsigned char>) {
317 if (utf16Span.Length() & mozilla::tl::MulOverflowMask<3>::value) {
318 // Tripling the size of the buffer overflows the size_t.
319 return false;
322 if (!targetBuffer.reserve(3 * utf16Span.Length())) {
323 return false;
326 size_t amount = ConvertUtf16toUtf8(
327 utf16Span, Span(reinterpret_cast<char*>(targetBuffer.data()),
328 targetBuffer.capacity()));
330 targetBuffer.written(amount);
332 if constexpr (std::is_same_v<typename Buffer::CharType, char16_t>) {
333 size_t amount = utf16Span.Length();
334 if (!targetBuffer.reserve(amount)) {
335 return false;
337 for (size_t i = 0; i < amount; i++) {
338 targetBuffer.data()[i] = utf16Span[i];
340 targetBuffer.written(amount);
343 return true;
347 * Fill a UTF-8 or a UTF-16 buffer with a UTF-8 span. ICU4C mostly uses UTF-16
348 * internally, but different consumers may have different situations with their
349 * buffers.
351 template <typename Buffer>
352 [[nodiscard]] bool FillBuffer(Span<const char> utf8Span, Buffer& targetBuffer) {
353 static_assert(std::is_same_v<typename Buffer::CharType, char> ||
354 std::is_same_v<typename Buffer::CharType, unsigned char> ||
355 std::is_same_v<typename Buffer::CharType, char16_t>);
357 if constexpr (std::is_same_v<typename Buffer::CharType, char> ||
358 std::is_same_v<typename Buffer::CharType, unsigned char>) {
359 size_t amount = utf8Span.Length();
360 if (!targetBuffer.reserve(amount)) {
361 return false;
363 for (size_t i = 0; i < amount; i++) {
364 targetBuffer.data()[i] =
365 // Static cast in case of a mismatch between `unsigned char` and
366 // `char`
367 static_cast<typename Buffer::CharType>(utf8Span[i]);
369 targetBuffer.written(amount);
371 if constexpr (std::is_same_v<typename Buffer::CharType, char16_t>) {
372 if (!targetBuffer.reserve(utf8Span.Length() + 1)) {
373 return false;
376 size_t amount = ConvertUtf8toUtf16(
377 utf8Span, Span(targetBuffer.data(), targetBuffer.capacity()));
379 targetBuffer.written(amount);
382 return true;
386 * It is convenient for callers to be able to pass in UTF-8 strings to the API.
387 * This function can be used to convert that to a stack-allocated UTF-16
388 * mozilla::Vector that can then be passed into ICU calls. The string will be
389 * null terminated.
391 template <size_t StackSize>
392 [[nodiscard]] static bool FillUTF16Vector(
393 Span<const char> utf8Span,
394 mozilla::Vector<char16_t, StackSize>& utf16TargetVec) {
395 // Per ConvertUtf8toUtf16: The length of aDest must be at least one greater
396 // than the length of aSource. This additional length will be used for null
397 // termination.
398 if (!utf16TargetVec.reserve(utf8Span.Length() + 1)) {
399 return false;
402 // ConvertUtf8toUtf16 fills the buffer with the data, but the length of the
403 // vector is unchanged.
404 size_t length = ConvertUtf8toUtf16(
405 utf8Span, Span(utf16TargetVec.begin(), utf16TargetVec.capacity()));
407 // Assert that the last element is free for writing a null terminator.
408 MOZ_ASSERT(length < utf16TargetVec.capacity());
409 utf16TargetVec.begin()[length] = '\0';
411 // The call to resizeUninitialized notifies the vector of how much was written
412 // exclusive of the null terminated character.
413 return utf16TargetVec.resizeUninitialized(length);
417 * An iterable class that wraps calls to the ICU UEnumeration C API.
419 * Usage:
421 * // Make sure the range expression is non-temporary, otherwise there is a
422 * // risk of undefined behavior:
423 * auto result = Calendar::GetBcp47KeywordValuesForLocale("en-US");
425 * for (auto name : result.unwrap()) {
426 * MOZ_ASSERT(name.unwrap(), "An iterable value exists".);
429 template <typename CharType, typename T, T(Mapper)(const CharType*, int32_t)>
430 class Enumeration {
431 public:
432 class Iterator;
433 friend class Iterator;
435 // Transfer ownership of the UEnumeration in the move constructor.
436 Enumeration(Enumeration&& other) noexcept
437 : mUEnumeration(other.mUEnumeration) {
438 other.mUEnumeration = nullptr;
441 // Transfer ownership of the UEnumeration in the move assignment operator.
442 Enumeration& operator=(Enumeration&& other) noexcept {
443 if (this == &other) {
444 return *this;
446 if (mUEnumeration) {
447 uenum_close(mUEnumeration);
449 mUEnumeration = other.mUEnumeration;
450 other.mUEnumeration = nullptr;
451 return *this;
454 class Iterator {
455 Enumeration& mEnumeration;
456 // `Nothing` signifies that no enumeration has been loaded through ICU yet.
457 Maybe<int32_t> mIteration = Nothing{};
458 const CharType* mNext = nullptr;
459 int32_t mNextLength = 0;
461 public:
462 using value_type = const CharType*;
463 using reference = T;
464 using iterator_category = std::input_iterator_tag;
466 explicit Iterator(Enumeration& aEnumeration, bool aIsBegin)
467 : mEnumeration(aEnumeration) {
468 if (aIsBegin) {
469 AdvanceUEnum();
473 Iterator& operator++() {
474 AdvanceUEnum();
475 return *this;
478 Iterator operator++(int) {
479 Iterator retval = *this;
480 ++(*this);
481 return retval;
484 bool operator==(Iterator other) const {
485 return mIteration == other.mIteration;
488 bool operator!=(Iterator other) const { return !(*this == other); }
490 T operator*() const {
491 // Map the iterated value to something new.
492 return Mapper(mNext, mNextLength);
495 private:
496 void AdvanceUEnum() {
497 if (mIteration.isNothing()) {
498 mIteration = Some(-1);
500 UErrorCode status = U_ZERO_ERROR;
501 if constexpr (std::is_same_v<CharType, char16_t>) {
502 mNext = uenum_unext(mEnumeration.mUEnumeration, &mNextLength, &status);
503 } else {
504 static_assert(std::is_same_v<CharType, char>,
505 "Only char16_t and char are supported by "
506 "mozilla::intl::Enumeration.");
507 mNext = uenum_next(mEnumeration.mUEnumeration, &mNextLength, &status);
509 if (U_FAILURE(status)) {
510 mNext = nullptr;
513 if (mNext) {
514 (*mIteration)++;
515 } else {
516 // The iterator is complete.
517 mIteration = Nothing{};
522 Iterator begin() { return Iterator(*this, true); }
523 Iterator end() { return Iterator(*this, false); }
525 explicit Enumeration(UEnumeration* aUEnumeration)
526 : mUEnumeration(aUEnumeration) {}
528 ~Enumeration() {
529 if (mUEnumeration) {
530 // Only close when the object is being destructed, not moved.
531 uenum_close(mUEnumeration);
535 private:
536 UEnumeration* mUEnumeration = nullptr;
539 template <typename CharType>
540 Result<Span<const CharType>, InternalError> SpanMapper(const CharType* string,
541 int32_t length) {
542 // Return the raw value from this Iterator.
543 if (string == nullptr) {
544 return Err(InternalError{});
546 MOZ_ASSERT(length >= 0);
547 return Span<const CharType>(string, static_cast<size_t>(length));
550 template <typename CharType>
551 using SpanResult = Result<Span<const CharType>, InternalError>;
553 template <typename CharType>
554 using SpanEnumeration = Enumeration<CharType, SpanResult<CharType>, SpanMapper>;
557 * An iterable class that wraps calls to ICU's available locales API.
559 template <int32_t(CountAvailable)(), const char*(GetAvailable)(int32_t)>
560 class AvailableLocalesEnumeration final {
561 // The overall count of available locales.
562 int32_t mLocalesCount = 0;
564 public:
565 AvailableLocalesEnumeration() { mLocalesCount = CountAvailable(); }
567 class Iterator {
568 public:
569 // std::iterator traits.
570 using iterator_category = std::input_iterator_tag;
571 using value_type = const char*;
572 using difference_type = ptrdiff_t;
573 using pointer = value_type*;
574 using reference = value_type&;
576 private:
577 // The current position in the list of available locales.
578 int32_t mLocalesPos = 0;
580 public:
581 explicit Iterator(int32_t aLocalesPos) : mLocalesPos(aLocalesPos) {}
583 Iterator& operator++() {
584 mLocalesPos++;
585 return *this;
588 Iterator operator++(int) {
589 Iterator result = *this;
590 ++(*this);
591 return result;
594 bool operator==(const Iterator& aOther) const {
595 return mLocalesPos == aOther.mLocalesPos;
598 bool operator!=(const Iterator& aOther) const { return !(*this == aOther); }
600 value_type operator*() const { return GetAvailable(mLocalesPos); }
603 // std::iterator begin() and end() methods.
606 * Return an iterator pointing to the first available locale.
608 Iterator begin() const { return Iterator(0); }
611 * Return an iterator pointing to one past the last available locale.
613 Iterator end() const { return Iterator(mLocalesCount); }
617 * A helper class to wrap calling ICU function in cpp file so we don't have to
618 * include the ICU header here.
620 class FormattedResult {
621 protected:
622 static Result<Span<const char16_t>, ICUError> ToSpanImpl(
623 const UFormattedValue* value);
627 * A RAII class to hold the formatted value of format result.
629 * The caller will need to create this AutoFormattedResult on the stack, with
630 * the following parameters:
631 * 1. Native ICU type.
632 * 2. An ICU function which opens the result.
633 * 3. An ICU function which can get the result as UFormattedValue.
634 * 4. An ICU function which closes the result.
636 * After the object is created, caller needs to call IsValid() method to check
637 * if the native object has been created properly, and then passes this
638 * object to other format interfaces.
639 * The format result will be stored in this object, the caller can use ToSpan()
640 * method to get the formatted string.
642 * The methods GetFormatted() and Value() are private methods since they expose
643 * native ICU types. If the caller wants to call these methods, the caller needs
644 * to register itself as a friend class in AutoFormattedResult.
646 * The formatted value and the native ICU object will be released once this
647 * class is destructed.
649 template <typename T, T*(Open)(UErrorCode*),
650 const UFormattedValue*(GetValue)(const T*, UErrorCode*),
651 void(Close)(T*)>
652 class MOZ_RAII AutoFormattedResult : FormattedResult {
653 public:
654 AutoFormattedResult() {
655 mFormatted = Open(&mError);
656 if (U_FAILURE(mError)) {
657 mFormatted = nullptr;
660 ~AutoFormattedResult() {
661 if (mFormatted) {
662 Close(mFormatted);
666 AutoFormattedResult(const AutoFormattedResult& other) = delete;
667 AutoFormattedResult& operator=(const AutoFormattedResult& other) = delete;
669 AutoFormattedResult(AutoFormattedResult&& other) = delete;
670 AutoFormattedResult& operator=(AutoFormattedResult&& other) = delete;
673 * Check if the native UFormattedDateInterval was created successfully.
675 bool IsValid() const { return !!mFormatted; }
678 * Get error code if IsValid() returns false.
680 ICUError GetError() const { return ToICUError(mError); }
683 * Get the formatted result.
685 Result<Span<const char16_t>, ICUError> ToSpan() const {
686 if (!IsValid()) {
687 return Err(GetError());
690 const UFormattedValue* value = Value();
691 if (!value) {
692 return Err(ICUError::InternalError);
695 return ToSpanImpl(value);
698 private:
699 friend class DateIntervalFormat;
700 friend class ListFormat;
701 T* GetFormatted() const { return mFormatted; }
703 const UFormattedValue* Value() const {
704 if (!IsValid()) {
705 return nullptr;
708 UErrorCode status = U_ZERO_ERROR;
709 const UFormattedValue* value = GetValue(mFormatted, &status);
710 if (U_FAILURE(status)) {
711 return nullptr;
714 return value;
717 T* mFormatted = nullptr;
718 UErrorCode mError = U_ZERO_ERROR;
720 } // namespace mozilla::intl
722 #endif /* intl_components_ICUUtils_h */