2 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 #include "nsIScriptableUConv.h"
9 #include "nsScriptableUConv.h"
10 #include "nsComponentManagerUtils.h"
14 using namespace mozilla
;
16 /* Implementation file */
17 NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter
, nsIScriptableUnicodeConverter
)
19 nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
20 : mIsInternal(false) {}
22 nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;
25 nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString
& aSrc
,
26 nsACString
& _retval
) {
27 if (!mEncoder
) return NS_ERROR_FAILURE
;
29 // We can compute the length without replacement, because the
30 // the replacement is only one byte long and a mappable character
31 // would always output something, i.e. at least one byte.
32 // When encoding to ISO-2022-JP, unmappables shouldn't be able
33 // to cause more escape sequences to be emitted than the mappable
34 // worst case where every input character causes an escape into
36 CheckedInt
<size_t> needed
=
37 mEncoder
->MaxBufferLengthFromUTF16WithoutReplacement(aSrc
.Length());
38 if (!needed
.isValid() || needed
.value() > UINT32_MAX
) {
39 return NS_ERROR_OUT_OF_MEMORY
;
42 auto dstChars
= _retval
.GetMutableData(needed
.value(), fallible
);
44 return NS_ERROR_OUT_OF_MEMORY
;
47 auto src
= Span(aSrc
);
48 auto dst
= AsWritableBytes(*dstChars
);
49 size_t totalWritten
= 0;
51 auto [result
, read
, written
] =
52 mEncoder
->EncodeFromUTF16WithoutReplacement(src
, dst
, false);
53 if (result
!= kInputEmpty
&& result
!= kOutputFull
) {
54 MOZ_RELEASE_ASSERT(written
< dst
.Length(),
55 "Unmappables with one-byte replacement should not "
56 "exceed mappable worst case.");
59 totalWritten
+= written
;
60 if (result
== kInputEmpty
) {
61 MOZ_ASSERT(totalWritten
<= UINT32_MAX
);
62 if (!_retval
.SetLength(totalWritten
, fallible
)) {
63 return NS_ERROR_OUT_OF_MEMORY
;
68 dst
= dst
.From(written
);
73 nsScriptableUnicodeConverter::Finish(nsACString
& _retval
) {
74 // The documentation for this method says it should be called after
75 // ConvertFromUnicode(). However, our own tests called it after
76 // convertFromByteArray(), i.e. when *decoding*.
77 // Assuming that there exists extensions that similarly call
78 // this at the wrong time, let's deal. In general, it is a design
79 // error for this class to handle conversions in both directions.
82 mDecoder
->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder
);
85 // If we are encoding to ISO-2022-JP, potentially
86 // transition back to the ASCII state. The buffer
87 // needs to be large enough for an additional NCR,
89 _retval
.SetLength(13);
90 auto dst
= AsWritableBytes(_retval
.GetMutableData(13));
91 Span
<char16_t
> src(nullptr);
95 std::tie(result
, read
, written
, std::ignore
) =
96 mEncoder
->EncodeFromUTF16(src
, dst
, true);
98 MOZ_ASSERT(result
== kInputEmpty
);
99 _retval
.SetLength(written
);
101 mDecoder
->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder
);
102 mEncoder
->Encoding()->NewEncoderInto(*mEncoder
);
107 nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString
& aSrc
,
108 nsAString
& _retval
) {
109 if (!mDecoder
) return NS_ERROR_FAILURE
;
111 uint32_t length
= aSrc
.Length();
113 CheckedInt
<size_t> needed
= mDecoder
->MaxUTF16BufferLength(length
);
114 if (!needed
.isValid() || needed
.value() > UINT32_MAX
) {
115 return NS_ERROR_OUT_OF_MEMORY
;
118 auto dst
= _retval
.GetMutableData(needed
.value(), fallible
);
120 return NS_ERROR_OUT_OF_MEMORY
;
124 Span(reinterpret_cast<const uint8_t*>(aSrc
.BeginReading()), length
);
128 // The UTF-8 decoder used to throw regardless of the error behavior.
129 // Simulating the old behavior for compatibility with legacy callers.
130 // If callers want control over the behavior, they should switch to
132 if (mDecoder
->Encoding() == UTF_8_ENCODING
) {
133 std::tie(result
, read
, written
) =
134 mDecoder
->DecodeToUTF16WithoutReplacement(src
, *dst
, false);
135 if (result
!= kInputEmpty
) {
136 return NS_ERROR_UDEC_ILLEGALINPUT
;
139 std::tie(result
, read
, written
, std::ignore
) =
140 mDecoder
->DecodeToUTF16(src
, *dst
, false);
142 MOZ_ASSERT(result
== kInputEmpty
);
143 MOZ_ASSERT(read
== length
);
144 MOZ_ASSERT(written
<= needed
.value());
145 if (!_retval
.SetLength(written
, fallible
)) {
146 return NS_ERROR_OUT_OF_MEMORY
;
152 nsScriptableUnicodeConverter::GetCharset(nsACString
& aCharset
) {
156 mDecoder
->Encoding()->Name(aCharset
);
162 nsScriptableUnicodeConverter::SetCharset(const nsACString
& aCharset
) {
163 return InitConverter(aCharset
);
167 nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal
) {
168 *aIsInternal
= mIsInternal
;
173 nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal
) {
174 mIsInternal
= aIsInternal
;
178 nsresult
nsScriptableUnicodeConverter::InitConverter(
179 const nsACString
& aCharset
) {
183 auto encoding
= Encoding::ForLabelNoReplacement(aCharset
);
185 return NS_ERROR_UCONV_NOCONV
;
187 if (!(encoding
== UTF_16LE_ENCODING
|| encoding
== UTF_16BE_ENCODING
)) {
188 mEncoder
= encoding
->NewEncoder();
190 mDecoder
= encoding
->NewDecoderWithBOMRemoval();