Backed out changeset 2450366cf7ca (bug 1891629) for causing win msix mochitest failures
[gecko.git] / intl / uconv / nsConverterInputStream.cpp
blob96fad345b29f34b61ed250f48de0dfb9f584c12b
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsConverterInputStream.h"
7 #include "nsIInputStream.h"
8 #include "nsReadLine.h"
9 #include "nsStreamUtils.h"
11 #include <algorithm>
12 #include <tuple>
14 using namespace mozilla;
16 #define CONVERTER_BUFFER_SIZE 8192
18 NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
19 nsIUnicharInputStream, nsIUnicharLineInputStream)
21 NS_IMETHODIMP
22 nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset,
23 int32_t aBufferSize, char16_t aReplacementChar) {
24 nsAutoCString label;
25 if (!aCharset) {
26 label.AssignLiteral("UTF-8");
27 } else {
28 label = aCharset;
31 auto encoding = Encoding::ForLabelNoReplacement(label);
32 if (!encoding) {
33 return NS_ERROR_UCONV_NOCONV;
35 // Previously, the implementation auto-switched only
36 // between the two UTF-16 variants and only when
37 // initialized with an endianness-unspecific label.
38 mConverter = encoding->NewDecoder();
40 size_t outputBufferSize;
41 if (aBufferSize <= 0) {
42 aBufferSize = CONVERTER_BUFFER_SIZE;
43 outputBufferSize = CONVERTER_BUFFER_SIZE;
44 } else {
45 // NetUtil.sys.mjs assumes that if buffer size equals
46 // the input size, the whole stream will be processed
47 // as one readString. This is not true with encoding_rs,
48 // because encoding_rs might want to see space for a
49 // surrogate pair, so let's compute a larger output
50 // buffer length.
51 CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
52 if (!needed.isValid()) {
53 return NS_ERROR_OUT_OF_MEMORY;
55 outputBufferSize = needed.value();
58 // set up our buffers.
59 if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
60 !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
61 return NS_ERROR_OUT_OF_MEMORY;
64 mInput = aStream;
65 mErrorsAreFatal = !aReplacementChar;
66 return NS_OK;
69 NS_IMETHODIMP
70 nsConverterInputStream::Close() {
71 nsresult rv = mInput ? mInput->Close() : NS_OK;
72 mLineBuffer = nullptr;
73 mInput = nullptr;
74 mConverter = nullptr;
75 mByteData.Clear();
76 mUnicharData.Clear();
77 return rv;
80 NS_IMETHODIMP
81 nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount,
82 uint32_t* aReadCount) {
83 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
84 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
85 if (0 == readCount) {
86 // Fill the unichar buffer
87 readCount = Fill(&mLastErrorCode);
88 if (readCount == 0) {
89 *aReadCount = 0;
90 return mLastErrorCode;
93 if (readCount > aCount) {
94 readCount = aCount;
96 memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
97 readCount * sizeof(char16_t));
98 mUnicharDataOffset += readCount;
99 *aReadCount = readCount;
100 return NS_OK;
103 NS_IMETHODIMP
104 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
105 void* aClosure, uint32_t aCount,
106 uint32_t* aReadCount) {
107 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
108 uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset;
109 if (0 == codeUnitsToWrite) {
110 // Fill the unichar buffer
111 codeUnitsToWrite = Fill(&mLastErrorCode);
112 if (codeUnitsToWrite == 0) {
113 *aReadCount = 0;
114 return mLastErrorCode;
118 if (codeUnitsToWrite > aCount) {
119 codeUnitsToWrite = aCount;
122 uint32_t codeUnitsWritten;
123 uint32_t totalCodeUnitsWritten = 0;
125 while (codeUnitsToWrite) {
126 nsresult rv =
127 aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
128 totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten);
129 if (NS_FAILED(rv)) {
130 // don't propagate errors to the caller
131 break;
134 codeUnitsToWrite -= codeUnitsWritten;
135 totalCodeUnitsWritten += codeUnitsWritten;
136 mUnicharDataOffset += codeUnitsWritten;
139 *aReadCount = totalCodeUnitsWritten;
141 return NS_OK;
144 NS_IMETHODIMP
145 nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
146 uint32_t* aReadCount) {
147 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
148 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
149 if (0 == readCount) {
150 // Fill the unichar buffer
151 readCount = Fill(&mLastErrorCode);
152 if (readCount == 0) {
153 *aReadCount = 0;
154 return mLastErrorCode;
157 if (readCount > aCount) {
158 readCount = aCount;
160 const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
161 aString.Assign(buf, readCount);
162 mUnicharDataOffset += readCount;
163 *aReadCount = readCount;
164 return NS_OK;
167 uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
168 if (!mInput) {
169 // We already closed the stream!
170 *aErrorCode = NS_BASE_STREAM_CLOSED;
171 return 0;
174 if (NS_FAILED(mLastErrorCode)) {
175 // We failed to completely convert last time, and error-recovery
176 // is disabled. We will fare no better this time, so...
177 *aErrorCode = mLastErrorCode;
178 return 0;
181 // mUnicharData.Length() is the buffer length, not the fill status.
182 // mUnicharDataLength reflects the current fill status.
183 mUnicharDataLength = 0;
184 // Whenever we convert, mUnicharData is logically empty.
185 mUnicharDataOffset = 0;
187 // Continue trying to read from the source stream until we successfully decode
188 // a character or encounter an error, as returning `0` here implies that the
189 // stream is complete.
191 // If the converter has been cleared, we've fully consumed the stream, and
192 // want to report EOF.
193 while (mUnicharDataLength == 0 && mConverter) {
194 // We assume a many to one conversion and are using equal sizes for
195 // the two buffers. However if an error happens at the very start
196 // of a byte buffer we may end up in a situation where n bytes lead
197 // to n+1 unicode chars. Thus we need to keep track of the leftover
198 // bytes as we convert.
200 uint32_t nb;
201 *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
202 if (NS_FAILED(*aErrorCode)) {
203 return 0;
206 NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
207 "mByteData is lying to us somewhere");
209 // If `NS_FillArray` failed to read any new bytes, this is the last read,
210 // and we're at the end of the stream.
211 bool last = (nb == 0);
213 // Now convert as much of the byte buffer to unicode as possible
214 auto src = AsBytes(Span(mByteData));
215 auto dst = Span(mUnicharData);
217 // Truncation from size_t to uint32_t below is OK, because the sizes
218 // are bounded by the lengths of mByteData and mUnicharData.
219 uint32_t result;
220 size_t read;
221 size_t written;
222 if (mErrorsAreFatal) {
223 std::tie(result, read, written) =
224 mConverter->DecodeToUTF16WithoutReplacement(src, dst, last);
225 } else {
226 std::tie(result, read, written, std::ignore) =
227 mConverter->DecodeToUTF16(src, dst, last);
229 mLeftOverBytes = mByteData.Length() - read;
230 mUnicharDataLength = written;
231 // Clear `mConverter` if we reached the end of the stream, as we can't
232 // call methods on it anymore. This will also signal EOF to the caller
233 // through the loop condition.
234 if (last) {
235 MOZ_ASSERT(mLeftOverBytes == 0,
236 "Failed to read all bytes on the last pass?");
237 mConverter = nullptr;
239 // If we got a decode error, we're done.
240 if (result != kInputEmpty && result != kOutputFull) {
241 MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
242 *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
243 return 0;
246 *aErrorCode = NS_OK;
247 return mUnicharDataLength;
250 NS_IMETHODIMP
251 nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) {
252 if (!mLineBuffer) {
253 mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>();
255 return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);