intl/uconv/nsConverterInputStream.cpp

   1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "nsConverterInputStream.h"
   7 #include "nsIInputStream.h"
   8 #include "nsReadLine.h"
   9 #include "nsStreamUtils.h"
  10
  11 #include <algorithm>
  12 #include <tuple>
  13
  14 using namespace mozilla;
  15
  16 #define CONVERTER_BUFFER_SIZE 8192
  17
  18 NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
  19                   nsIUnicharInputStream, nsIUnicharLineInputStream)
  20
  21 NS_IMETHODIMP
  22 nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset,
  23                              int32_t aBufferSize, char16_t aReplacementChar) {
  24   nsAutoCString label;
  25   if (!aCharset) {
  26     label.AssignLiteral("UTF-8");
  27   } else {
  28     label = aCharset;
  29   }
  30
  31   auto encoding = Encoding::ForLabelNoReplacement(label);
  32   if (!encoding) {
  33     return NS_ERROR_UCONV_NOCONV;
  34   }
  35   // Previously, the implementation auto-switched only
  36   // between the two UTF-16 variants and only when
  37   // initialized with an endianness-unspecific label.
  38   mConverter = encoding->NewDecoder();
  39
  40   size_t outputBufferSize;
  41   if (aBufferSize <= 0) {
  42     aBufferSize = CONVERTER_BUFFER_SIZE;
  43     outputBufferSize = CONVERTER_BUFFER_SIZE;
  44   } else {
  45     // NetUtil.jsm assumes that if buffer size equals
  46     // the input size, the whole stream will be processed
  47     // as one readString. This is not true with encoding_rs,
  48     // because encoding_rs might want to see space for a
  49     // surrogate pair, so let's compute a larger output
  50     // buffer length.
  51     CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
  52     if (!needed.isValid()) {
  53       return NS_ERROR_OUT_OF_MEMORY;
  54     }
  55     outputBufferSize = needed.value();
  56   }
  57
  58   // set up our buffers.
  59   if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
  60       !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
  61     return NS_ERROR_OUT_OF_MEMORY;
  62   }
  63
  64   mInput = aStream;
  65   mErrorsAreFatal = !aReplacementChar;
  66   return NS_OK;
  67 }
  68
  69 NS_IMETHODIMP
  70 nsConverterInputStream::Close() {
  71   nsresult rv = mInput ? mInput->Close() : NS_OK;
  72   mLineBuffer = nullptr;
  73   mInput = nullptr;
  74   mConverter = nullptr;
  75   mByteData.Clear();
  76   mUnicharData.Clear();
  77   return rv;
  78 }
  79
  80 NS_IMETHODIMP
  81 nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount,
  82                              uint32_t* aReadCount) {
  83   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  84   uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
  85   if (0 == readCount) {
  86     // Fill the unichar buffer
  87     readCount = Fill(&mLastErrorCode);
  88     if (readCount == 0) {
  89       *aReadCount = 0;
  90       return mLastErrorCode;
  91     }
  92   }
  93   if (readCount > aCount) {
  94     readCount = aCount;
  95   }
  96   memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
  97          readCount * sizeof(char16_t));
  98   mUnicharDataOffset += readCount;
  99   *aReadCount = readCount;
 100   return NS_OK;
 101 }
 102
 103 NS_IMETHODIMP
 104 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
 105                                      void* aClosure, uint32_t aCount,
 106                                      uint32_t* aReadCount) {
 107   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
 108   uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset;
 109   if (0 == codeUnitsToWrite) {
 110     // Fill the unichar buffer
 111     codeUnitsToWrite = Fill(&mLastErrorCode);
 112     if (codeUnitsToWrite == 0) {
 113       *aReadCount = 0;
 114       return mLastErrorCode;
 115     }
 116   }
 117
 118   if (codeUnitsToWrite > aCount) {
 119     codeUnitsToWrite = aCount;
 120   }
 121
 122   uint32_t codeUnitsWritten;
 123   uint32_t totalCodeUnitsWritten = 0;
 124
 125   while (codeUnitsToWrite) {
 126     nsresult rv =
 127         aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
 128                 totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten);
 129     if (NS_FAILED(rv)) {
 130       // don't propagate errors to the caller
 131       break;
 132     }
 133
 134     codeUnitsToWrite -= codeUnitsWritten;
 135     totalCodeUnitsWritten += codeUnitsWritten;
 136     mUnicharDataOffset += codeUnitsWritten;
 137   }
 138
 139   *aReadCount = totalCodeUnitsWritten;
 140
 141   return NS_OK;
 142 }
 143
 144 NS_IMETHODIMP
 145 nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
 146                                    uint32_t* aReadCount) {
 147   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
 148   uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
 149   if (0 == readCount) {
 150     // Fill the unichar buffer
 151     readCount = Fill(&mLastErrorCode);
 152     if (readCount == 0) {
 153       *aReadCount = 0;
 154       return mLastErrorCode;
 155     }
 156   }
 157   if (readCount > aCount) {
 158     readCount = aCount;
 159   }
 160   const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
 161   aString.Assign(buf, readCount);
 162   mUnicharDataOffset += readCount;
 163   *aReadCount = readCount;
 164   return NS_OK;
 165 }
 166
 167 uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
 168   if (!mInput) {
 169     // We already closed the stream!
 170     *aErrorCode = NS_BASE_STREAM_CLOSED;
 171     return 0;
 172   }
 173
 174   if (NS_FAILED(mLastErrorCode)) {
 175     // We failed to completely convert last time, and error-recovery
 176     // is disabled.  We will fare no better this time, so...
 177     *aErrorCode = mLastErrorCode;
 178     return 0;
 179   }
 180
 181   // mUnicharData.Length() is the buffer length, not the fill status.
 182   // mUnicharDataLength reflects the current fill status.
 183   mUnicharDataLength = 0;
 184   // Whenever we convert, mUnicharData is logically empty.
 185   mUnicharDataOffset = 0;
 186
 187   // Continue trying to read from the source stream until we successfully decode
 188   // a character or encounter an error, as returning `0` here implies that the
 189   // stream is complete.
 190   //
 191   // If the converter has been cleared, we've fully consumed the stream, and
 192   // want to report EOF.
 193   while (mUnicharDataLength == 0 && mConverter) {
 194     // We assume a many to one conversion and are using equal sizes for
 195     // the two buffers.  However if an error happens at the very start
 196     // of a byte buffer we may end up in a situation where n bytes lead
 197     // to n+1 unicode chars.  Thus we need to keep track of the leftover
 198     // bytes as we convert.
 199
 200     uint32_t nb;
 201     *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
 202     if (NS_FAILED(*aErrorCode)) {
 203       return 0;
 204     }
 205
 206     NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
 207                  "mByteData is lying to us somewhere");
 208
 209     // If `NS_FillArray` failed to read any new bytes, this is the last read,
 210     // and we're at the end of the stream.
 211     bool last = (nb == 0);
 212
 213     // Now convert as much of the byte buffer to unicode as possible
 214     auto src = AsBytes(Span(mByteData));
 215     auto dst = Span(mUnicharData);
 216
 217     // Truncation from size_t to uint32_t below is OK, because the sizes
 218     // are bounded by the lengths of mByteData and mUnicharData.
 219     uint32_t result;
 220     size_t read;
 221     size_t written;
 222     if (mErrorsAreFatal) {
 223       std::tie(result, read, written) =
 224           mConverter->DecodeToUTF16WithoutReplacement(src, dst, last);
 225     } else {
 226       std::tie(result, read, written, std::ignore) =
 227           mConverter->DecodeToUTF16(src, dst, last);
 228     }
 229     mLeftOverBytes = mByteData.Length() - read;
 230     mUnicharDataLength = written;
 231     // Clear `mConverter` if we reached the end of the stream, as we can't
 232     // call methods on it anymore. This will also signal EOF to the caller
 233     // through the loop condition.
 234     if (last) {
 235       MOZ_ASSERT(mLeftOverBytes == 0,
 236                  "Failed to read all bytes on the last pass?");
 237       mConverter = nullptr;
 238     }
 239     // If we got a decode error, we're done.
 240     if (result != kInputEmpty && result != kOutputFull) {
 241       MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
 242       *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
 243       return 0;
 244     }
 245   }
 246   *aErrorCode = NS_OK;
 247   return mUnicharDataLength;
 248 }
 249
 250 NS_IMETHODIMP
 251 nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) {
 252   if (!mLineBuffer) {
 253     mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>();
 254   }
 255   return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
 256 }