1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
10 * The scanner is a low-level service class that knows
11 * how to consume characters out of an (internal) stream.
12 * This class also offers a series of utility methods
13 * that most tokenizers want, such as readUntil()
14 * and SkipWhitespace().
22 #include "nsIParser.h"
23 #include "mozilla/Encoding.h"
24 #include "nsScannerString.h"
25 #include "mozilla/CheckedInt.h"
27 class nsReadEndCondition
{
29 const char16_t
* mChars
;
31 explicit nsReadEndCondition(const char16_t
* aTerminateChars
);
34 nsReadEndCondition(const nsReadEndCondition
& aOther
); // No copying
35 void operator=(const nsReadEndCondition
& aOther
); // No assigning
38 class nsScanner final
{
39 using Encoding
= mozilla::Encoding
;
41 using NotNull
= mozilla::NotNull
<T
>;
45 * Use this constructor for the XML fragment parsing case
47 explicit nsScanner(const nsAString
& anHTMLString
);
50 * Use this constructor if you want i/o to be based on
51 * a file (therefore a stream) or just data you provide via Append().
53 nsScanner(nsString
& aFilename
, bool aCreateStream
);
58 * retrieve next char from internal input stream
60 * @update gess 3/25/98
61 * @param ch is the char to accept new value
62 * @return error code reflecting read status
64 nsresult
GetChar(char16_t
& ch
);
67 * Records current offset position in input stream. This allows us
68 * to back up to this point if the need should arise, such as when
69 * tokenization gets interrupted.
71 * @update gess 5/12/98
78 * Resets current offset position of input stream to marked position.
79 * This allows us to back up to this point if the need should arise,
80 * such as when tokenization gets interrupted.
81 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
83 * @update gess 5/12/98
87 void RewindToMark(void);
92 * @update harishd 01/12/99
96 bool UngetReadable(const nsAString
& aBuffer
);
101 * @update gess 5/13/98
105 nsresult
Append(const nsAString
& aBuffer
);
110 * @update gess 5/21/98
114 nsresult
Append(const char* aBuffer
, uint32_t aLen
);
117 * Call this to copy bytes out of the scanner that have not yet been consumed
118 * by the tokenization process.
120 * @update gess 5/12/98
121 * @param aCopyBuffer is where the scanner buffer will be copied to
122 * @return true if OK or false on OOM
124 bool CopyUnusedData(nsString
& aCopyBuffer
);
127 * Retrieve the name of the file that the scanner is reading from.
128 * In some cases, it's just a given name, because the scanner isn't
129 * really reading from a file.
131 * @update gess 5/12/98
134 nsString
& GetFilename(void);
136 static void SelfTest();
139 * Use this setter to change the scanner's unicode decoder
141 * @update ftang 3/02/99
142 * @param aCharset a normalized (alias resolved) charset name
143 * @param aCharsetSource- where the charset info came from
146 nsresult
SetDocumentCharset(NotNull
<const Encoding
*> aEncoding
,
149 void BindSubstring(nsScannerSubstring
& aSubstring
,
150 const nsScannerIterator
& aStart
,
151 const nsScannerIterator
& aEnd
);
152 void CurrentPosition(nsScannerIterator
& aPosition
);
153 void EndReading(nsScannerIterator
& aPosition
);
154 void SetPosition(nsScannerIterator
& aPosition
, bool aTruncate
= false);
157 * Internal method used to cause the internal buffer to
158 * be filled with data.
162 bool IsIncremental(void) { return mIncremental
; }
163 void SetIncremental(bool anIncrValue
) { mIncremental
= anIncrValue
; }
166 bool AppendToBuffer(nsScannerString::Buffer
* aBuffer
);
167 bool AppendToBuffer(const nsAString
& aStr
) {
168 nsScannerString::Buffer
* buf
= nsScannerString::AllocBufferFromString(aStr
);
169 if (!buf
) return false;
174 nsScannerString
* mSlidingBuffer
;
175 nsScannerIterator mCurrentPosition
; // The position we will next read from in
176 // the scanner buffer
178 mMarkPosition
; // The position last marked (we may rewind to here)
179 nsScannerIterator mEndPosition
; // The current end of the scanner buffer
182 int32_t mCharsetSource
;
184 mozilla::UniquePtr
<mozilla::Decoder
> mUnicodeDecoder
;
187 nsScanner
& operator=(const nsScanner
&); // Not implemented.