1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef INCREMENTAL_TOKENIZER_H__
8 #define INCREMENTAL_TOKENIZER_H__
10 #include "mozilla/Tokenizer.h"
19 class IncrementalTokenizer
: public TokenizerBase
<char> {
22 * The consumer callback. The function is called for every single token
23 * as found in the input. Failure result returned by this callback stops
24 * the tokenization immediately and bubbles to result of Feed/FinishInput.
26 * Fragment()s of consumed tokens are ensured to remain valid until next call
27 * to Feed/FinishInput and are pointing to a single linear buffer. Hence,
28 * those can be safely used to accumulate the data for processing after
29 * Feed/FinishInput returned.
31 typedef std::function
<nsresult(Token
const&, IncrementalTokenizer
& i
)>
35 * For aWhitespaces and aAdditionalWordChars arguments see TokenizerBase.
38 * A mandatory non-null argument, a function that consumes the tokens as
39 * they come when the tokenizer is fed.
40 * @param aRawMinBuffered
41 * When we have buffered at least aRawMinBuffered data, but there was no
42 * custom token found so far because of too small incremental feed chunks,
43 * deliver the raw data to preserve streaming and to save memory. This only
44 * has effect in OnlyCustomTokenizing mode.
46 explicit IncrementalTokenizer(Consumer
&& aConsumer
,
47 const char* aWhitespaces
= nullptr,
48 const char* aAdditionalWordChars
= nullptr,
49 uint32_t aRawMinBuffered
= 1024);
52 * Pushes the input to be tokenized. These directly call the Consumer
53 * callback on every found token. Result of the Consumer callback is returned
56 * The tokenizer must be initialized with a valid consumer prior call to these
57 * methods. It's not allowed to call Feed/FinishInput from inside the
60 nsresult
FeedInput(const nsACString
& aInput
);
61 nsresult
FeedInput(nsIInputStream
* aInput
, uint32_t aCount
);
62 nsresult
FinishInput();
65 * Can only be called from inside the consumer callback.
67 * When there is still anything to read from the input, tokenize it, store
68 * the token type and value to aToken result and shift the cursor past this
69 * just parsed token. Each call to Next() reads another token from
70 * the input and shifts the cursor.
72 * Returns false if there is not enough data to deterministically recognize
73 * tokens or when the last returned token was EOF.
75 [[nodiscard
]] bool Next(Token
& aToken
);
78 * Can only be called from inside the consumer callback.
80 * Tells the tokenizer to revert the cursor and stop the async parsing until
81 * next feed of the input. This is useful when more than one token is needed
82 * to decide on the syntax but there is not enough input to get a next token
83 * (Next() returned false.)
88 * Can only be called from inside the consumer callback.
90 * This makes the consumer callback be called again while parsing
91 * the input at the previous cursor position again. This is useful when
92 * the tokenizer state (custom tokens, tokenization mode) has changed and
93 * we want to re-parse the input again.
98 // Loops over the input with TokenizerBase::Parse and calls the Consumer
103 // True when inside the consumer callback, used only for assertions.
106 // Modifyable only from the Consumer callback, tells the parser to break,
107 // rollback and wait for more input.
109 // Modifyable only from the Consumer callback, tells the parser to rollback
110 // and parse the input again, with (if modified) new settings of the
113 // The input buffer. Updated with each call to Feed/FinishInput.
115 // Numerical index pointing at the current cursor position. We don't keep
116 // direct reference to the string buffer since the buffer gets often
118 nsCString::index_type mInputCursor
;
119 // Refernce to the consumer function.
123 } // namespace mozilla