Bug 1855360 - Fix the skip-if syntax. a=bustage-fix
[gecko.git] / parser / htmlparser / nsScanner.cpp
blob36a14e1083e391d9d007c58338fb924b925380d8
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=2 sw=2 et tw=78: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // #define __INCREMENTAL 1
9 #include "nsScanner.h"
11 #include "mozilla/Attributes.h"
12 #include "mozilla/DebugOnly.h"
13 #include "mozilla/Encoding.h"
14 #include "mozilla/UniquePtr.h"
15 #include "nsDebug.h"
16 #include "nsReadableUtils.h"
17 #include "nsUTF8Utils.h" // for LossyConvertEncoding
18 #include "nsCRT.h"
19 #include "nsParser.h"
20 #include "nsCharsetSource.h"
22 nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars)
23 : mChars(aTerminateChars),
24 mFilter(char16_t(~0)) // All bits set
26 // Build filter that will be used to filter out characters with
27 // bits that none of the terminal chars have. This works very well
28 // because terminal chars often have only the last 4-6 bits set and
29 // normal ascii letters have bit 7 set. Other letters have even higher
30 // bits set.
32 // Calculate filter
33 const char16_t* current = aTerminateChars;
34 char16_t terminalChar = *current;
35 while (terminalChar) {
36 mFilter &= ~terminalChar;
37 ++current;
38 terminalChar = *current;
42 /**
43 * Use this constructor if you want i/o to be based on
44 * a single string you hand in during construction.
45 * This short cut was added for Javascript.
47 * @update gess 5/12/98
48 * @param aMode represents the parser mode (nav, other)
49 * @return
51 nsScanner::nsScanner(const nsAString& anHTMLString, bool aIncremental)
52 : mIncremental(aIncremental) {
53 MOZ_COUNT_CTOR(nsScanner);
55 AppendToBuffer(anHTMLString);
56 MOZ_ASSERT(mMarkPosition == mCurrentPosition);
59 /**
60 * Use this constructor if you want i/o to be based on strings
61 * the scanner receives. If you pass a null filename, you
62 * can still provide data to the scanner via append.
64 nsScanner::nsScanner(nsIURI* aURI) : mURI(aURI), mIncremental(true) {
65 MOZ_COUNT_CTOR(nsScanner);
67 // XXX This is a big hack. We need to initialize the iterators to something.
68 // What matters is that mCurrentPosition == mEndPosition, so that our methods
69 // believe that we are at EOF (see bug 182067). We null out mCurrentPosition
70 // so that we have some hope of catching null pointer dereferences associated
71 // with this hack. --darin
72 memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
73 mMarkPosition = mCurrentPosition;
74 mEndPosition = mCurrentPosition;
76 // XML defaults to UTF-8 and about:blank is UTF-8, too.
77 SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault);
80 nsresult nsScanner::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
81 int32_t aSource) {
82 if (aSource < mCharsetSource) // priority is lower than the current one
83 return NS_OK;
85 mCharsetSource = aSource;
86 nsCString charsetName;
87 aEncoding->Name(charsetName);
88 if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
89 return NS_OK; // no difference, don't change it
92 // different, need to change it
94 mCharset.Assign(charsetName);
96 mUnicodeDecoder = aEncoding->NewDecoderWithBOMRemoval();
98 return NS_OK;
102 * default destructor
104 * @update gess 3/25/98
105 * @param
106 * @return
108 nsScanner::~nsScanner() { MOZ_COUNT_DTOR(nsScanner); }
111 * Resets current offset position of input stream to marked position.
112 * This allows us to back up to this point if the need should arise,
113 * such as when tokenization gets interrupted.
114 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
116 * @update gess 5/12/98
117 * @param
118 * @return
120 void nsScanner::RewindToMark(void) {
121 if (mSlidingBuffer) {
122 mCurrentPosition = mMarkPosition;
127 * Records current offset position in input stream. This allows us
128 * to back up to this point if the need should arise, such as when
129 * tokenization gets interrupted.
131 * @update gess 7/29/98
132 * @param
133 * @return
135 int32_t nsScanner::Mark() {
136 int32_t distance = 0;
137 if (mSlidingBuffer) {
138 nsScannerIterator oldStart;
139 mSlidingBuffer->BeginReading(oldStart);
141 distance = Distance(oldStart, mCurrentPosition);
143 mSlidingBuffer->DiscardPrefix(mCurrentPosition);
144 mSlidingBuffer->BeginReading(mCurrentPosition);
145 mMarkPosition = mCurrentPosition;
148 return distance;
152 * Insert data to our underlying input buffer as
153 * if it were read from an input stream.
155 * @update harishd 01/12/99
156 * @return error code
158 bool nsScanner::UngetReadable(const nsAString& aBuffer) {
159 if (!mSlidingBuffer) {
160 return false;
163 mSlidingBuffer->UngetReadable(aBuffer, mCurrentPosition);
164 mSlidingBuffer->BeginReading(
165 mCurrentPosition); // Insertion invalidated our iterators
166 mSlidingBuffer->EndReading(mEndPosition);
168 return true;
172 * Append data to our underlying input buffer as
173 * if it were read from an input stream.
175 * @update gess4/3/98
176 * @return error code
178 nsresult nsScanner::Append(const nsAString& aBuffer) {
179 if (!AppendToBuffer(aBuffer)) return NS_ERROR_OUT_OF_MEMORY;
180 return NS_OK;
186 * @update gess 5/21/98
187 * @param
188 * @return
190 nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen) {
191 nsresult res = NS_OK;
192 if (mUnicodeDecoder) {
193 mozilla::CheckedInt<size_t> needed =
194 mUnicodeDecoder->MaxUTF16BufferLength(aLen);
195 if (!needed.isValid()) {
196 return NS_ERROR_OUT_OF_MEMORY;
198 mozilla::CheckedInt<uint32_t> allocLen(
199 1); // null terminator due to legacy sadness
200 allocLen += needed.value();
201 if (!allocLen.isValid()) {
202 return NS_ERROR_OUT_OF_MEMORY;
204 nsScannerString::Buffer* buffer =
205 nsScannerString::AllocBuffer(allocLen.value());
206 NS_ENSURE_TRUE(buffer, NS_ERROR_OUT_OF_MEMORY);
207 char16_t* unichars = buffer->DataStart();
209 uint32_t result;
210 size_t read;
211 size_t written;
212 // Do not use structured binding lest deal with [-Werror=unused-variable]
213 std::tie(result, read, written) =
214 mUnicodeDecoder->DecodeToUTF16WithoutReplacement(
215 AsBytes(mozilla::Span(aBuffer, aLen)),
216 mozilla::Span(unichars, needed.value()),
217 false); // Retain bug about failure to handle EOF
218 MOZ_ASSERT(result != mozilla::kOutputFull);
219 MOZ_ASSERT(read <= aLen);
220 MOZ_ASSERT(written <= needed.value());
221 if (result != mozilla::kInputEmpty) {
222 // Since about:blank is empty, this line runs only for XML. Use a
223 // character that's illegal in XML instead of U+FFFD in order to make
224 // expat flag the error. There is no need to loop and convert more, since
225 // expat will stop here anyway.
226 unichars[written++] = 0xFFFF;
228 buffer->SetDataLength(written);
229 // Don't propagate return code of unicode decoder
230 // since it doesn't reflect on our success or failure
231 // - Ref. bug 87110
232 res = NS_OK;
233 AppendToBuffer(buffer);
234 } else {
235 NS_WARNING("No decoder found.");
236 res = NS_ERROR_FAILURE;
239 return res;
243 * retrieve next char from scanners internal input stream
245 * @update gess 3/25/98
246 * @param
247 * @return error code reflecting read status
249 nsresult nsScanner::GetChar(char16_t& aChar) {
250 if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
251 aChar = 0;
252 return NS_ERROR_HTMLPARSER_EOF;
255 aChar = *mCurrentPosition++;
257 return NS_OK;
260 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring,
261 const nsScannerIterator& aStart,
262 const nsScannerIterator& aEnd) {
263 aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
266 void nsScanner::CurrentPosition(nsScannerIterator& aPosition) {
267 aPosition = mCurrentPosition;
270 void nsScanner::EndReading(nsScannerIterator& aPosition) {
271 aPosition = mEndPosition;
274 void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate) {
275 if (mSlidingBuffer) {
276 mCurrentPosition = aPosition;
277 if (aTerminate && (mCurrentPosition == mEndPosition)) {
278 mMarkPosition = mCurrentPosition;
279 mSlidingBuffer->DiscardPrefix(mCurrentPosition);
284 void nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf) {
285 if (!mSlidingBuffer) {
286 mSlidingBuffer = mozilla::MakeUnique<nsScannerString>(aBuf);
287 mSlidingBuffer->BeginReading(mCurrentPosition);
288 mMarkPosition = mCurrentPosition;
289 } else {
290 mSlidingBuffer->AppendBuffer(aBuf);
291 if (mCurrentPosition == mEndPosition) {
292 mSlidingBuffer->BeginReading(mCurrentPosition);
295 mSlidingBuffer->EndReading(mEndPosition);
299 * call this to copy bytes out of the scanner that have not yet been consumed
300 * by the tokenization process.
302 * @update gess 5/12/98
303 * @param aCopyBuffer is where the scanner buffer will be copied to
304 * @return true if OK or false on OOM
306 bool nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
307 if (!mSlidingBuffer) {
308 aCopyBuffer.Truncate();
309 return true;
312 nsScannerIterator start, end;
313 start = mCurrentPosition;
314 end = mEndPosition;
316 return CopyUnicodeTo(start, end, aCopyBuffer);
320 * Conduct self test. Actually, selftesting for this class
321 * occurs in the parser selftest.
323 * @update gess 3/25/98
324 * @param
325 * @return
328 void nsScanner::SelfTest(void) {
329 #ifdef _DEBUG
330 #endif