1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsConverterInputStream.h"
7 #include "nsIInputStream.h"
8 #include "nsReadLine.h"
9 #include "nsStreamUtils.h"
14 using namespace mozilla
;
16 #define CONVERTER_BUFFER_SIZE 8192
18 NS_IMPL_ISUPPORTS(nsConverterInputStream
, nsIConverterInputStream
,
19 nsIUnicharInputStream
, nsIUnicharLineInputStream
)
22 nsConverterInputStream::Init(nsIInputStream
* aStream
, const char* aCharset
,
23 int32_t aBufferSize
, char16_t aReplacementChar
) {
26 label
.AssignLiteral("UTF-8");
31 auto encoding
= Encoding::ForLabelNoReplacement(label
);
33 return NS_ERROR_UCONV_NOCONV
;
35 // Previously, the implementation auto-switched only
36 // between the two UTF-16 variants and only when
37 // initialized with an endianness-unspecific label.
38 mConverter
= encoding
->NewDecoder();
40 size_t outputBufferSize
;
41 if (aBufferSize
<= 0) {
42 aBufferSize
= CONVERTER_BUFFER_SIZE
;
43 outputBufferSize
= CONVERTER_BUFFER_SIZE
;
45 // NetUtil.jsm assumes that if buffer size equals
46 // the input size, the whole stream will be processed
47 // as one readString. This is not true with encoding_rs,
48 // because encoding_rs might want to see space for a
49 // surrogate pair, so let's compute a larger output
51 CheckedInt
<size_t> needed
= mConverter
->MaxUTF16BufferLength(aBufferSize
);
52 if (!needed
.isValid()) {
53 return NS_ERROR_OUT_OF_MEMORY
;
55 outputBufferSize
= needed
.value();
58 // set up our buffers.
59 if (!mByteData
.SetCapacity(aBufferSize
, mozilla::fallible
) ||
60 !mUnicharData
.SetLength(outputBufferSize
, mozilla::fallible
)) {
61 return NS_ERROR_OUT_OF_MEMORY
;
65 mErrorsAreFatal
= !aReplacementChar
;
70 nsConverterInputStream::Close() {
71 nsresult rv
= mInput
? mInput
->Close() : NS_OK
;
72 mLineBuffer
= nullptr;
81 nsConverterInputStream::Read(char16_t
* aBuf
, uint32_t aCount
,
82 uint32_t* aReadCount
) {
83 NS_ASSERTION(mUnicharDataLength
>= mUnicharDataOffset
, "unsigned madness");
84 uint32_t readCount
= mUnicharDataLength
- mUnicharDataOffset
;
86 // Fill the unichar buffer
87 readCount
= Fill(&mLastErrorCode
);
90 return mLastErrorCode
;
93 if (readCount
> aCount
) {
96 memcpy(aBuf
, mUnicharData
.Elements() + mUnicharDataOffset
,
97 readCount
* sizeof(char16_t
));
98 mUnicharDataOffset
+= readCount
;
99 *aReadCount
= readCount
;
104 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter
,
105 void* aClosure
, uint32_t aCount
,
106 uint32_t* aReadCount
) {
107 NS_ASSERTION(mUnicharDataLength
>= mUnicharDataOffset
, "unsigned madness");
108 uint32_t codeUnitsToWrite
= mUnicharDataLength
- mUnicharDataOffset
;
109 if (0 == codeUnitsToWrite
) {
110 // Fill the unichar buffer
111 codeUnitsToWrite
= Fill(&mLastErrorCode
);
112 if (codeUnitsToWrite
== 0) {
114 return mLastErrorCode
;
118 if (codeUnitsToWrite
> aCount
) {
119 codeUnitsToWrite
= aCount
;
122 uint32_t codeUnitsWritten
;
123 uint32_t totalCodeUnitsWritten
= 0;
125 while (codeUnitsToWrite
) {
127 aWriter(this, aClosure
, mUnicharData
.Elements() + mUnicharDataOffset
,
128 totalCodeUnitsWritten
, codeUnitsToWrite
, &codeUnitsWritten
);
130 // don't propagate errors to the caller
134 codeUnitsToWrite
-= codeUnitsWritten
;
135 totalCodeUnitsWritten
+= codeUnitsWritten
;
136 mUnicharDataOffset
+= codeUnitsWritten
;
139 *aReadCount
= totalCodeUnitsWritten
;
145 nsConverterInputStream::ReadString(uint32_t aCount
, nsAString
& aString
,
146 uint32_t* aReadCount
) {
147 NS_ASSERTION(mUnicharDataLength
>= mUnicharDataOffset
, "unsigned madness");
148 uint32_t readCount
= mUnicharDataLength
- mUnicharDataOffset
;
149 if (0 == readCount
) {
150 // Fill the unichar buffer
151 readCount
= Fill(&mLastErrorCode
);
152 if (readCount
== 0) {
154 return mLastErrorCode
;
157 if (readCount
> aCount
) {
160 const char16_t
* buf
= mUnicharData
.Elements() + mUnicharDataOffset
;
161 aString
.Assign(buf
, readCount
);
162 mUnicharDataOffset
+= readCount
;
163 *aReadCount
= readCount
;
167 uint32_t nsConverterInputStream::Fill(nsresult
* aErrorCode
) {
169 // We already closed the stream!
170 *aErrorCode
= NS_BASE_STREAM_CLOSED
;
174 if (NS_FAILED(mLastErrorCode
)) {
175 // We failed to completely convert last time, and error-recovery
176 // is disabled. We will fare no better this time, so...
177 *aErrorCode
= mLastErrorCode
;
181 // mUnicharData.Length() is the buffer length, not the fill status.
182 // mUnicharDataLength reflects the current fill status.
183 mUnicharDataLength
= 0;
184 // Whenever we convert, mUnicharData is logically empty.
185 mUnicharDataOffset
= 0;
187 // Continue trying to read from the source stream until we successfully decode
188 // a character or encounter an error, as returning `0` here implies that the
189 // stream is complete.
191 // If the converter has been cleared, we've fully consumed the stream, and
192 // want to report EOF.
193 while (mUnicharDataLength
== 0 && mConverter
) {
194 // We assume a many to one conversion and are using equal sizes for
195 // the two buffers. However if an error happens at the very start
196 // of a byte buffer we may end up in a situation where n bytes lead
197 // to n+1 unicode chars. Thus we need to keep track of the leftover
198 // bytes as we convert.
201 *aErrorCode
= NS_FillArray(mByteData
, mInput
, mLeftOverBytes
, &nb
);
202 if (NS_FAILED(*aErrorCode
)) {
206 NS_ASSERTION(uint32_t(nb
) + mLeftOverBytes
== mByteData
.Length(),
207 "mByteData is lying to us somewhere");
209 // If `NS_FillArray` failed to read any new bytes, this is the last read,
210 // and we're at the end of the stream.
211 bool last
= (nb
== 0);
213 // Now convert as much of the byte buffer to unicode as possible
214 auto src
= AsBytes(Span(mByteData
));
215 auto dst
= Span(mUnicharData
);
217 // Truncation from size_t to uint32_t below is OK, because the sizes
218 // are bounded by the lengths of mByteData and mUnicharData.
222 if (mErrorsAreFatal
) {
223 std::tie(result
, read
, written
) =
224 mConverter
->DecodeToUTF16WithoutReplacement(src
, dst
, last
);
226 std::tie(result
, read
, written
, std::ignore
) =
227 mConverter
->DecodeToUTF16(src
, dst
, last
);
229 mLeftOverBytes
= mByteData
.Length() - read
;
230 mUnicharDataLength
= written
;
231 // Clear `mConverter` if we reached the end of the stream, as we can't
232 // call methods on it anymore. This will also signal EOF to the caller
233 // through the loop condition.
235 MOZ_ASSERT(mLeftOverBytes
== 0,
236 "Failed to read all bytes on the last pass?");
237 mConverter
= nullptr;
239 // If we got a decode error, we're done.
240 if (result
!= kInputEmpty
&& result
!= kOutputFull
) {
241 MOZ_ASSERT(mErrorsAreFatal
, "How come DecodeToUTF16() reported error?");
242 *aErrorCode
= NS_ERROR_UDEC_ILLEGALINPUT
;
247 return mUnicharDataLength
;
251 nsConverterInputStream::ReadLine(nsAString
& aLine
, bool* aResult
) {
253 mLineBuffer
= MakeUnique
<nsLineBuffer
<char16_t
>>();
255 return NS_ReadLine(this, mLineBuffer
.get(), aLine
, aResult
);