1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #ifndef intl_components_ICUUtils_h
6 #define intl_components_ICUUtils_h
8 #include "unicode/uenum.h"
9 #include "unicode/utypes.h"
10 #include "mozilla/Buffer.h"
11 #include "mozilla/DebugOnly.h"
12 #include "mozilla/Maybe.h"
13 #include "mozilla/Result.h"
14 #include "mozilla/Span.h"
15 #include "mozilla/Utf8.h"
16 #include "mozilla/Vector.h"
17 #include "mozilla/intl/ICUError.h"
19 // When building standalone js shell, it will include headers from
20 // intl/components if JS_HAS_INTL_API is true (the default value), but js shell
21 // won't include headers from XPCOM, so don't include nsTArray.h when building
22 // standalone js shell.
24 # include "nsTArray.h"
32 #include <string_view>
34 struct UFormattedValue
;
35 namespace mozilla::intl
{
37 template <typename CharType
>
38 static inline CharType
* AssertNullTerminatedString(Span
<CharType
> aSpan
) {
39 // Intentionally check one past the last character, because we expect that the
40 // NUL character isn't part of the string.
41 MOZ_ASSERT(*(aSpan
.data() + aSpan
.size()) == '\0');
43 // Also ensure there aren't any other NUL characters within the string.
44 MOZ_ASSERT(std::char_traits
<CharType
>::length(aSpan
.data()) == aSpan
.size());
49 static inline const char* AssertNullTerminatedString(std::string_view aView
) {
50 // Intentionally check one past the last character, because we expect that the
51 // NUL character isn't part of the string.
52 MOZ_ASSERT(*(aView
.data() + aView
.size()) == '\0');
54 // Also ensure there aren't any other NUL characters within the string.
55 MOZ_ASSERT(std::strlen(aView
.data()) == aView
.size());
61 * Map the "und" locale to an empty string, which ICU uses internally.
63 static inline const char* IcuLocale(const char* aLocale
) {
64 // Return the empty string if the input is exactly equal to the string "und".
65 const char* locale
= aLocale
;
66 if (!std::strcmp(locale
, "und")) {
67 locale
= ""; // ICU root locale
73 * Ensure a locale is null-terminated, and map the "und" locale to an empty
74 * string, which ICU uses internally.
76 static inline const char* IcuLocale(Span
<const char> aLocale
) {
77 return IcuLocale(AssertNullTerminatedString(aLocale
));
81 * Ensure a locale in the buffer is null-terminated, and map the "und" locale to
82 * an empty string, which ICU uses internally.
84 static inline const char* IcuLocale(const Buffer
<char>& aLocale
) {
85 return IcuLocale(Span(aLocale
.begin(), aLocale
.Length() - 1));
88 using ICUResult
= Result
<Ok
, ICUError
>;
91 * Convert a UErrorCode to ICUError. This will correctly apply the OutOfMemory
94 ICUError
ToICUError(UErrorCode status
);
97 * Convert a UErrorCode to ICUResult. This will correctly apply the OutOfMemory
100 ICUResult
ToICUResult(UErrorCode status
);
103 * The ICU status can complain about a string not being terminated, but this
104 * is fine for this API, as it deals with the mozilla::Span that has a pointer
107 static inline bool ICUSuccessForStringSpan(UErrorCode status
) {
108 return U_SUCCESS(status
) || status
== U_STRING_NOT_TERMINATED_WARNING
;
112 * This class enforces that the unified mozilla::intl methods match the
113 * const-ness of the underlying ICU4C API calls. const ICU4C APIs take a const
114 * pointer, while mutable ones take a non-const pointer.
116 * For const ICU4C calls use:
117 * ICUPointer::GetConst().
119 * For non-const ICU4C calls use:
120 * ICUPointer::GetMut().
122 * This will propagate the `const` specifier from the ICU4C API call to the
123 * unified method, and it will be enforced by the compiler. This helps ensures
124 * a consistence and correct implementation.
126 template <typename T
>
129 explicit ICUPointer(T
* aPointer
) : mPointer(aPointer
) {}
131 // Only allow moves of ICUPointers, no copies.
132 ICUPointer(ICUPointer
&& other
) noexcept
= default;
133 ICUPointer
& operator=(ICUPointer
&& other
) noexcept
= default;
135 // Implicitly take ownership of a raw pointer through copy assignment.
136 ICUPointer
& operator=(T
* aPointer
) noexcept
{
141 const T
* GetConst() const { return const_cast<const T
*>(mPointer
); }
142 T
* GetMut() { return mPointer
; }
144 explicit operator bool() const { return !!mPointer
; }
151 * Calling into ICU with the C-API can be a bit tricky. This function wraps up
152 * the relatively risky operations involving pointers, lengths, and buffers into
153 * a simpler call. This function accepts a lambda that performs the ICU call,
154 * and returns the length of characters in the buffer. When using a temporary
155 * stack-based buffer, the calls can often be done in one trip. However, if
156 * additional memory is needed, this function will call the C-API twice, in
157 * order to first get the size of the result, and then second to copy the result
158 * over to the buffer.
160 template <typename ICUStringFunction
, typename Buffer
>
161 static ICUResult
FillBufferWithICUCall(Buffer
& buffer
,
162 const ICUStringFunction
& strFn
) {
163 static_assert(std::is_same_v
<typename
Buffer::CharType
, char16_t
> ||
164 std::is_same_v
<typename
Buffer::CharType
, char> ||
165 std::is_same_v
<typename
Buffer::CharType
, uint8_t>);
167 UErrorCode status
= U_ZERO_ERROR
;
168 int32_t length
= strFn(buffer
.data(), buffer
.capacity(), &status
);
169 if (status
== U_BUFFER_OVERFLOW_ERROR
) {
170 MOZ_ASSERT(length
>= 0);
172 if (!buffer
.reserve(length
)) {
173 return Err(ICUError::OutOfMemory
);
176 status
= U_ZERO_ERROR
;
177 mozilla::DebugOnly
<int32_t> length2
= strFn(buffer
.data(), length
, &status
);
178 MOZ_ASSERT(length
== length2
);
180 if (!ICUSuccessForStringSpan(status
)) {
181 return Err(ToICUError(status
));
184 buffer
.written(length
);
190 * Adaptor for mozilla::Vector to implement the Buffer interface.
192 template <typename T
, size_t N
>
193 class VectorToBufferAdaptor
{
194 mozilla::Vector
<T
, N
>& vector
;
199 explicit VectorToBufferAdaptor(mozilla::Vector
<T
, N
>& vector
)
202 T
* data() { return vector
.begin(); }
204 size_t capacity() const { return vector
.capacity(); }
206 bool reserve(size_t length
) { return vector
.reserve(length
); }
208 void written(size_t length
) {
209 mozilla::DebugOnly
<bool> result
= vector
.resizeUninitialized(length
);
215 * An overload of FillBufferWithICUCall that accepts a mozilla::Vector rather
218 template <typename ICUStringFunction
, size_t InlineSize
, typename CharType
>
219 static ICUResult
FillBufferWithICUCall(Vector
<CharType
, InlineSize
>& vector
,
220 const ICUStringFunction
& strFn
) {
221 VectorToBufferAdaptor
buffer(vector
);
222 return FillBufferWithICUCall(buffer
, strFn
);
225 #ifndef JS_STANDALONE
227 * mozilla::intl APIs require sizeable buffers. This class abstracts over
230 template <typename T
>
231 class nsTArrayToBufferAdapter
{
235 // Do not allow copy or move. Move could be added in the future if needed.
236 nsTArrayToBufferAdapter(const nsTArrayToBufferAdapter
&) = delete;
237 nsTArrayToBufferAdapter
& operator=(const nsTArrayToBufferAdapter
&) = delete;
239 explicit nsTArrayToBufferAdapter(nsTArray
<CharType
>& aArray
)
243 * Ensures the buffer has enough space to accommodate |size| elements.
245 [[nodiscard
]] bool reserve(size_t size
) {
246 // Use fallible behavior here.
247 return mArray
.SetCapacity(size
, fallible
);
251 * Returns the raw data inside the buffer.
253 CharType
* data() { return mArray
.Elements(); }
256 * Returns the count of elements written into the buffer.
258 size_t length() const { return mArray
.Length(); }
261 * Returns the buffer's overall capacity.
263 size_t capacity() const { return mArray
.Capacity(); }
266 * Resizes the buffer to the given amount of written elements.
268 void written(size_t amount
) {
269 MOZ_ASSERT(amount
<= mArray
.Capacity());
270 // This sets |mArray|'s internal size so that it matches how much was
271 // written. This is necessary because the write happens across FFI
273 mArray
.SetLengthAndRetainStorage(amount
);
277 nsTArray
<CharType
>& mArray
;
280 template <typename T
, size_t N
>
281 class AutoTArrayToBufferAdapter
: public nsTArrayToBufferAdapter
<T
> {
282 using nsTArrayToBufferAdapter
<T
>::nsTArrayToBufferAdapter
;
286 * An overload of FillBufferWithICUCall that accepts a nsTArray.
288 template <typename ICUStringFunction
, typename CharType
>
289 static ICUResult
FillBufferWithICUCall(nsTArray
<CharType
>& array
,
290 const ICUStringFunction
& strFn
) {
291 nsTArrayToBufferAdapter
<CharType
> buffer(array
);
292 return FillBufferWithICUCall(buffer
, strFn
);
295 template <typename ICUStringFunction
, typename CharType
, size_t N
>
296 static ICUResult
FillBufferWithICUCall(AutoTArray
<CharType
, N
>& array
,
297 const ICUStringFunction
& strFn
) {
298 AutoTArrayToBufferAdapter
<CharType
, N
> buffer(array
);
299 return FillBufferWithICUCall(buffer
, strFn
);
304 * Fill a UTF-8 or a UTF-16 buffer with a UTF-16 span. ICU4C mostly uses UTF-16
305 * internally, but different consumers may have different situations with their
308 template <typename Buffer
>
309 [[nodiscard
]] bool FillBuffer(Span
<const char16_t
> utf16Span
,
310 Buffer
& targetBuffer
) {
311 static_assert(std::is_same_v
<typename
Buffer::CharType
, char> ||
312 std::is_same_v
<typename
Buffer::CharType
, unsigned char> ||
313 std::is_same_v
<typename
Buffer::CharType
, char16_t
>);
315 if constexpr (std::is_same_v
<typename
Buffer::CharType
, char> ||
316 std::is_same_v
<typename
Buffer::CharType
, unsigned char>) {
317 if (utf16Span
.Length() & mozilla::tl::MulOverflowMask
<3>::value
) {
318 // Tripling the size of the buffer overflows the size_t.
322 if (!targetBuffer
.reserve(3 * utf16Span
.Length())) {
326 size_t amount
= ConvertUtf16toUtf8(
327 utf16Span
, Span(reinterpret_cast<char*>(targetBuffer
.data()),
328 targetBuffer
.capacity()));
330 targetBuffer
.written(amount
);
332 if constexpr (std::is_same_v
<typename
Buffer::CharType
, char16_t
>) {
333 size_t amount
= utf16Span
.Length();
334 if (!targetBuffer
.reserve(amount
)) {
337 for (size_t i
= 0; i
< amount
; i
++) {
338 targetBuffer
.data()[i
] = utf16Span
[i
];
340 targetBuffer
.written(amount
);
347 * Fill a UTF-8 or a UTF-16 buffer with a UTF-8 span. ICU4C mostly uses UTF-16
348 * internally, but different consumers may have different situations with their
351 template <typename Buffer
>
352 [[nodiscard
]] bool FillBuffer(Span
<const char> utf8Span
, Buffer
& targetBuffer
) {
353 static_assert(std::is_same_v
<typename
Buffer::CharType
, char> ||
354 std::is_same_v
<typename
Buffer::CharType
, unsigned char> ||
355 std::is_same_v
<typename
Buffer::CharType
, char16_t
>);
357 if constexpr (std::is_same_v
<typename
Buffer::CharType
, char> ||
358 std::is_same_v
<typename
Buffer::CharType
, unsigned char>) {
359 size_t amount
= utf8Span
.Length();
360 if (!targetBuffer
.reserve(amount
)) {
363 for (size_t i
= 0; i
< amount
; i
++) {
364 targetBuffer
.data()[i
] =
365 // Static cast in case of a mismatch between `unsigned char` and
367 static_cast<typename
Buffer::CharType
>(utf8Span
[i
]);
369 targetBuffer
.written(amount
);
371 if constexpr (std::is_same_v
<typename
Buffer::CharType
, char16_t
>) {
372 if (!targetBuffer
.reserve(utf8Span
.Length() + 1)) {
376 size_t amount
= ConvertUtf8toUtf16(
377 utf8Span
, Span(targetBuffer
.data(), targetBuffer
.capacity()));
379 targetBuffer
.written(amount
);
386 * It is convenient for callers to be able to pass in UTF-8 strings to the API.
387 * This function can be used to convert that to a stack-allocated UTF-16
388 * mozilla::Vector that can then be passed into ICU calls. The string will be
391 template <size_t StackSize
>
392 [[nodiscard
]] static bool FillUTF16Vector(
393 Span
<const char> utf8Span
,
394 mozilla::Vector
<char16_t
, StackSize
>& utf16TargetVec
) {
395 // Per ConvertUtf8toUtf16: The length of aDest must be at least one greater
396 // than the length of aSource. This additional length will be used for null
398 if (!utf16TargetVec
.reserve(utf8Span
.Length() + 1)) {
402 // ConvertUtf8toUtf16 fills the buffer with the data, but the length of the
403 // vector is unchanged.
404 size_t length
= ConvertUtf8toUtf16(
405 utf8Span
, Span(utf16TargetVec
.begin(), utf16TargetVec
.capacity()));
407 // Assert that the last element is free for writing a null terminator.
408 MOZ_ASSERT(length
< utf16TargetVec
.capacity());
409 utf16TargetVec
.begin()[length
] = '\0';
411 // The call to resizeUninitialized notifies the vector of how much was written
412 // exclusive of the null terminated character.
413 return utf16TargetVec
.resizeUninitialized(length
);
417 * An iterable class that wraps calls to the ICU UEnumeration C API.
421 * // Make sure the range expression is non-temporary, otherwise there is a
422 * // risk of undefined behavior:
423 * auto result = Calendar::GetBcp47KeywordValuesForLocale("en-US");
425 * for (auto name : result.unwrap()) {
426 * MOZ_ASSERT(name.unwrap(), "An iterable value exists".);
429 template <typename CharType
, typename T
, T(Mapper
)(const CharType
*, int32_t)>
433 friend class Iterator
;
435 // Transfer ownership of the UEnumeration in the move constructor.
436 Enumeration(Enumeration
&& other
) noexcept
437 : mUEnumeration(other
.mUEnumeration
) {
438 other
.mUEnumeration
= nullptr;
441 // Transfer ownership of the UEnumeration in the move assignment operator.
442 Enumeration
& operator=(Enumeration
&& other
) noexcept
{
443 if (this == &other
) {
447 uenum_close(mUEnumeration
);
449 mUEnumeration
= other
.mUEnumeration
;
450 other
.mUEnumeration
= nullptr;
455 Enumeration
& mEnumeration
;
456 // `Nothing` signifies that no enumeration has been loaded through ICU yet.
457 Maybe
<int32_t> mIteration
= Nothing
{};
458 const CharType
* mNext
= nullptr;
459 int32_t mNextLength
= 0;
462 using value_type
= const CharType
*;
464 using iterator_category
= std::input_iterator_tag
;
466 explicit Iterator(Enumeration
& aEnumeration
, bool aIsBegin
)
467 : mEnumeration(aEnumeration
) {
473 Iterator
& operator++() {
478 Iterator
operator++(int) {
479 Iterator retval
= *this;
484 bool operator==(Iterator other
) const {
485 return mIteration
== other
.mIteration
;
488 bool operator!=(Iterator other
) const { return !(*this == other
); }
490 T
operator*() const {
491 // Map the iterated value to something new.
492 return Mapper(mNext
, mNextLength
);
496 void AdvanceUEnum() {
497 if (mIteration
.isNothing()) {
498 mIteration
= Some(-1);
500 UErrorCode status
= U_ZERO_ERROR
;
501 if constexpr (std::is_same_v
<CharType
, char16_t
>) {
502 mNext
= uenum_unext(mEnumeration
.mUEnumeration
, &mNextLength
, &status
);
504 static_assert(std::is_same_v
<CharType
, char>,
505 "Only char16_t and char are supported by "
506 "mozilla::intl::Enumeration.");
507 mNext
= uenum_next(mEnumeration
.mUEnumeration
, &mNextLength
, &status
);
509 if (U_FAILURE(status
)) {
516 // The iterator is complete.
517 mIteration
= Nothing
{};
522 Iterator
begin() { return Iterator(*this, true); }
523 Iterator
end() { return Iterator(*this, false); }
525 explicit Enumeration(UEnumeration
* aUEnumeration
)
526 : mUEnumeration(aUEnumeration
) {}
530 // Only close when the object is being destructed, not moved.
531 uenum_close(mUEnumeration
);
536 UEnumeration
* mUEnumeration
= nullptr;
539 template <typename CharType
>
540 Result
<Span
<const CharType
>, InternalError
> SpanMapper(const CharType
* string
,
542 // Return the raw value from this Iterator.
543 if (string
== nullptr) {
544 return Err(InternalError
{});
546 MOZ_ASSERT(length
>= 0);
547 return Span
<const CharType
>(string
, static_cast<size_t>(length
));
550 template <typename CharType
>
551 using SpanResult
= Result
<Span
<const CharType
>, InternalError
>;
553 template <typename CharType
>
554 using SpanEnumeration
= Enumeration
<CharType
, SpanResult
<CharType
>, SpanMapper
>;
557 * An iterable class that wraps calls to ICU's available locales API.
559 template <int32_t(CountAvailable
)(), const char*(GetAvailable
)(int32_t)>
560 class AvailableLocalesEnumeration final
{
561 // The overall count of available locales.
562 int32_t mLocalesCount
= 0;
565 AvailableLocalesEnumeration() { mLocalesCount
= CountAvailable(); }
569 // std::iterator traits.
570 using iterator_category
= std::input_iterator_tag
;
571 using value_type
= const char*;
572 using difference_type
= ptrdiff_t;
573 using pointer
= value_type
*;
574 using reference
= value_type
&;
577 // The current position in the list of available locales.
578 int32_t mLocalesPos
= 0;
581 explicit Iterator(int32_t aLocalesPos
) : mLocalesPos(aLocalesPos
) {}
583 Iterator
& operator++() {
588 Iterator
operator++(int) {
589 Iterator result
= *this;
594 bool operator==(const Iterator
& aOther
) const {
595 return mLocalesPos
== aOther
.mLocalesPos
;
598 bool operator!=(const Iterator
& aOther
) const { return !(*this == aOther
); }
600 value_type
operator*() const { return GetAvailable(mLocalesPos
); }
603 // std::iterator begin() and end() methods.
606 * Return an iterator pointing to the first available locale.
608 Iterator
begin() const { return Iterator(0); }
611 * Return an iterator pointing to one past the last available locale.
613 Iterator
end() const { return Iterator(mLocalesCount
); }
617 * A helper class to wrap calling ICU function in cpp file so we don't have to
618 * include the ICU header here.
620 class FormattedResult
{
622 static Result
<Span
<const char16_t
>, ICUError
> ToSpanImpl(
623 const UFormattedValue
* value
);
627 * A RAII class to hold the formatted value of format result.
629 * The caller will need to create this AutoFormattedResult on the stack, with
630 * the following parameters:
631 * 1. Native ICU type.
632 * 2. An ICU function which opens the result.
633 * 3. An ICU function which can get the result as UFormattedValue.
634 * 4. An ICU function which closes the result.
636 * After the object is created, caller needs to call IsValid() method to check
637 * if the native object has been created properly, and then passes this
638 * object to other format interfaces.
639 * The format result will be stored in this object, the caller can use ToSpan()
640 * method to get the formatted string.
642 * The methods GetFormatted() and Value() are private methods since they expose
643 * native ICU types. If the caller wants to call these methods, the caller needs
644 * to register itself as a friend class in AutoFormattedResult.
646 * The formatted value and the native ICU object will be released once this
647 * class is destructed.
649 template <typename T
, T
*(Open
)(UErrorCode
*),
650 const UFormattedValue
*(GetValue
)(const T
*, UErrorCode
*),
652 class MOZ_RAII AutoFormattedResult
: FormattedResult
{
654 AutoFormattedResult() {
655 mFormatted
= Open(&mError
);
656 if (U_FAILURE(mError
)) {
657 mFormatted
= nullptr;
660 ~AutoFormattedResult() {
666 AutoFormattedResult(const AutoFormattedResult
& other
) = delete;
667 AutoFormattedResult
& operator=(const AutoFormattedResult
& other
) = delete;
669 AutoFormattedResult(AutoFormattedResult
&& other
) = delete;
670 AutoFormattedResult
& operator=(AutoFormattedResult
&& other
) = delete;
673 * Check if the native UFormattedDateInterval was created successfully.
675 bool IsValid() const { return !!mFormatted
; }
678 * Get error code if IsValid() returns false.
680 ICUError
GetError() const { return ToICUError(mError
); }
683 * Get the formatted result.
685 Result
<Span
<const char16_t
>, ICUError
> ToSpan() const {
687 return Err(GetError());
690 const UFormattedValue
* value
= Value();
692 return Err(ICUError::InternalError
);
695 return ToSpanImpl(value
);
699 friend class DateIntervalFormat
;
700 friend class ListFormat
;
701 T
* GetFormatted() const { return mFormatted
; }
703 const UFormattedValue
* Value() const {
708 UErrorCode status
= U_ZERO_ERROR
;
709 const UFormattedValue
* value
= GetValue(mFormatted
, &status
);
710 if (U_FAILURE(status
)) {
717 T
* mFormatted
= nullptr;
718 UErrorCode mError
= U_ZERO_ERROR
;
720 } // namespace mozilla::intl
722 #endif /* intl_components_ICUUtils_h */