1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
10 * The scanner is a low-level service class that knows
11 * how to consume characters out of an (internal) stream.
12 * This class also offers a series of utility methods
13 * that most tokenizers want, such as readUntil()
14 * and SkipWhitespace().
20 #include "nsCharsetSource.h"
23 #include "nsIParser.h"
24 #include "mozilla/Encoding.h"
25 #include "nsScannerString.h"
26 #include "mozilla/CheckedInt.h"
28 class nsReadEndCondition
{
30 const char16_t
* mChars
;
32 explicit nsReadEndCondition(const char16_t
* aTerminateChars
);
35 nsReadEndCondition(const nsReadEndCondition
& aOther
); // No copying
36 void operator=(const nsReadEndCondition
& aOther
); // No assigning
39 class nsScanner final
{
40 using Encoding
= mozilla::Encoding
;
42 using NotNull
= mozilla::NotNull
<T
>;
46 * Use this constructor for the XML fragment parsing case
48 nsScanner(const nsAString
& anHTMLString
, bool aIncremental
);
51 * Use this constructor if you want i/o to be based on
52 * a file (therefore a stream) or just data you provide via Append().
54 explicit nsScanner(nsIURI
* aURI
);
59 * retrieve next char from internal input stream
61 * @update gess 3/25/98
62 * @param ch is the char to accept new value
63 * @return error code reflecting read status
65 nsresult
GetChar(char16_t
& ch
);
68 * Records current offset position in input stream. This allows us
69 * to back up to this point if the need should arise, such as when
70 * tokenization gets interrupted.
72 * @update gess 5/12/98
79 * Resets current offset position of input stream to marked position.
80 * This allows us to back up to this point if the need should arise,
81 * such as when tokenization gets interrupted.
82 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
84 * @update gess 5/12/98
88 void RewindToMark(void);
93 * @update harishd 01/12/99
97 bool UngetReadable(const nsAString
& aBuffer
);
102 * @update gess 5/13/98
106 nsresult
Append(const nsAString
& aBuffer
);
111 * @update gess 5/21/98
115 nsresult
Append(const char* aBuffer
, uint32_t aLen
);
118 * Call this to copy bytes out of the scanner that have not yet been consumed
119 * by the tokenization process.
121 * @update gess 5/12/98
122 * @param aCopyBuffer is where the scanner buffer will be copied to
123 * @return true if OK or false on OOM
125 bool CopyUnusedData(nsString
& aCopyBuffer
);
128 * Retrieve the URI of the file that the scanner is reading from.
129 * In some cases, it's just a given name, because the scanner isn't
130 * really reading from a file.
132 nsIURI
* GetURI(void) const { return mURI
; }
134 static void SelfTest();
137 * Use this setter to change the scanner's unicode decoder
139 * @update ftang 3/02/99
140 * @param aCharset a normalized (alias resolved) charset name
141 * @param aCharsetSource- where the charset info came from
144 nsresult
SetDocumentCharset(NotNull
<const Encoding
*> aEncoding
,
147 void BindSubstring(nsScannerSubstring
& aSubstring
,
148 const nsScannerIterator
& aStart
,
149 const nsScannerIterator
& aEnd
);
150 void CurrentPosition(nsScannerIterator
& aPosition
);
151 void EndReading(nsScannerIterator
& aPosition
);
152 void SetPosition(nsScannerIterator
& aPosition
, bool aTruncate
= false);
155 * Internal method used to cause the internal buffer to
156 * be filled with data.
160 bool IsIncremental(void) { return mIncremental
; }
161 void SetIncremental(bool anIncrValue
) { mIncremental
= anIncrValue
; }
164 void AppendToBuffer(nsScannerString::Buffer
* aBuffer
);
165 bool AppendToBuffer(const nsAString
& aStr
) {
166 nsScannerString::Buffer
* buf
= nsScannerString::AllocBufferFromString(aStr
);
167 if (!buf
) return false;
172 mozilla::UniquePtr
<nsScannerString
> mSlidingBuffer
;
173 nsScannerIterator mCurrentPosition
; // The position we will next read from in
174 // the scanner buffer
176 mMarkPosition
; // The position last marked (we may rewind to here)
177 nsScannerIterator mEndPosition
; // The current end of the scanner buffer
178 nsCOMPtr
<nsIURI
> mURI
;
180 int32_t mCharsetSource
= kCharsetUninitialized
;
182 mozilla::UniquePtr
<mozilla::Decoder
> mUnicodeDecoder
;
185 nsScanner
& operator=(const nsScanner
&); // Not implemented.