1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * Token-affiliated data structures except for TokenKind (defined in its own
12 #ifndef frontend_Token_h
13 #define frontend_Token_h
15 #include "mozilla/Assertions.h" // MOZ_ASSERT
17 #include <stdint.h> // uint32_t
19 #include "frontend/ParserAtom.h" // TaggedParserAtomIndex, TrivialTaggedParserAtomIndex
20 #include "frontend/TokenKind.h" // js::frontend::TokenKind
21 #include "js/RegExpFlags.h" // JS::RegExpFlags
28 uint32_t begin
= 0; // Offset of the token's first code unit.
29 uint32_t end
= 0; // Offset of 1 past the token's last code unit.
32 TokenPos(uint32_t begin
, uint32_t end
) : begin(begin
), end(end
) {}
34 // Return a TokenPos that covers left, right, and anything in between.
35 static TokenPos
box(const TokenPos
& left
, const TokenPos
& right
) {
36 MOZ_ASSERT(left
.begin
<= left
.end
);
37 MOZ_ASSERT(left
.end
<= right
.begin
);
38 MOZ_ASSERT(right
.begin
<= right
.end
);
39 return TokenPos(left
.begin
, right
.end
);
42 bool operator==(const TokenPos
& bpos
) const {
43 return begin
== bpos
.begin
&& end
== bpos
.end
;
46 bool operator!=(const TokenPos
& bpos
) const {
47 return begin
!= bpos
.begin
|| end
!= bpos
.end
;
50 bool operator<(const TokenPos
& bpos
) const { return begin
< bpos
.begin
; }
52 bool operator<=(const TokenPos
& bpos
) const { return begin
<= bpos
.begin
; }
54 bool operator>(const TokenPos
& bpos
) const { return !(*this <= bpos
); }
56 bool operator>=(const TokenPos
& bpos
) const { return !(*this < bpos
); }
58 bool encloses(const TokenPos
& pos
) const {
59 return begin
<= pos
.begin
&& pos
.end
<= end
;
63 enum DecimalPoint
{ NoDecimal
= false, HasDecimal
= true };
65 // The only escapes found in IdentifierName are of the Unicode flavor.
66 enum class IdentifierEscapes
{ None
, SawUnicodeEscape
};
68 enum class NameVisibility
{ Public
, Private
};
70 class TokenStreamShared
;
74 // The lexical grammar of JavaScript has a quirk around the '/' character.
75 // As the spec puts it:
77 // > There are several situations where the identification of lexical input
78 // > elements is sensitive to the syntactic grammar context that is consuming
79 // > the input elements. This requires multiple goal symbols for the lexical
80 // > grammar. [...] The InputElementRegExp goal symbol is used in all
81 // > syntactic grammar contexts where a RegularExpressionLiteral is permitted
82 // > [...] In all other contexts, InputElementDiv is used as the lexical
85 // https://tc39.github.io/ecma262/#sec-lexical-and-regexp-grammars
87 // What "sensitive to the syntactic grammar context" means is, the parser has
88 // to tell the TokenStream whether to interpret '/' as division or
89 // RegExp. Because only one or the other (or neither) will be legal at that
90 // point in the program, and only the parser knows which one.
92 // But there's a problem: the parser often gets a token, puts it back, then
93 // consumes it later; or (equivalently) peeks at a token, leaves it, peeks
94 // again later, then finally consumes it. Of course we don't actually re-scan
95 // the token every time; we cache it in the TokenStream. This leads to the
98 // The parser must not pass SlashIsRegExp when getting/peeking at a token
99 // previously scanned with SlashIsDiv; or vice versa.
101 // That way, code that asks for a SlashIsRegExp mode will never get a cached
102 // Div token. But this rule is easy to screw up, because tokens are so often
103 // peeked at on Parser.cpp line A and consumed on line B, where |A-B| is
104 // thousands of lines. We therefore enforce it with the frontend's most
105 // annoying assertion (in verifyConsistentModifier), and provide
106 // Modifier::SlashIsInvalid to help avoid tripping it.
108 // This enum belongs in TokenStream, but C++, so we define it here and
111 // Parse `/` and `/=` as the division operators. (That is, use
112 // InputElementDiv as the goal symbol.)
115 // Parse `/` as the beginning of a RegExp literal. (That is, use
116 // InputElementRegExp.)
119 // Neither a Div token nor a RegExp token is syntactically valid here. When
120 // the parser calls `getToken(SlashIsInvalid)`, it must be prepared to see
121 // either one (and throw a SyntaxError either way).
123 // It's OK to use SlashIsInvalid to get a token that was originally scanned
124 // with SlashIsDiv or SlashIsRegExp. The reverse--peeking with
125 // SlashIsInvalid, then getting with another mode--is not OK. If either Div
126 // or RegExp is syntactically valid here, use the appropriate modifier.
129 friend class TokenStreamShared
;
132 /** The type of this token. */
135 /** The token's position in the overall script. */
142 TrivialTaggedParserAtomIndex atom
;
145 /** Numeric literal's value. */
148 /** Does the numeric literal contain a '.'? */
149 DecimalPoint decimalPoint
;
152 /** Regular expression flags; use charBuffer to access source chars. */
153 JS::RegExpFlags reflags
;
157 /** The modifier used to get this token. */
163 void setName(TaggedParserAtomIndex name
) {
164 MOZ_ASSERT(type
== TokenKind::Name
|| type
== TokenKind::PrivateName
);
165 u
.atom
= TrivialTaggedParserAtomIndex::from(name
);
168 void setAtom(TaggedParserAtomIndex atom
) {
169 MOZ_ASSERT(type
== TokenKind::String
|| type
== TokenKind::TemplateHead
||
170 type
== TokenKind::NoSubsTemplate
);
171 u
.atom
= TrivialTaggedParserAtomIndex::from(atom
);
174 void setRegExpFlags(JS::RegExpFlags flags
) {
175 MOZ_ASSERT(type
== TokenKind::RegExp
);
179 void setNumber(double n
, DecimalPoint decimalPoint
) {
180 MOZ_ASSERT(type
== TokenKind::Number
);
182 u
.number
.decimalPoint
= decimalPoint
;
185 // Type-safe accessors
187 TaggedParserAtomIndex
name() const {
188 MOZ_ASSERT(type
== TokenKind::Name
|| type
== TokenKind::PrivateName
);
192 TaggedParserAtomIndex
atom() const {
193 MOZ_ASSERT(type
== TokenKind::String
|| type
== TokenKind::TemplateHead
||
194 type
== TokenKind::NoSubsTemplate
);
198 JS::RegExpFlags
regExpFlags() const {
199 MOZ_ASSERT(type
== TokenKind::RegExp
);
203 double number() const {
204 MOZ_ASSERT(type
== TokenKind::Number
);
205 return u
.number
.value
;
208 DecimalPoint
decimalPoint() const {
209 MOZ_ASSERT(type
== TokenKind::Number
);
210 return u
.number
.decimalPoint
;
214 } // namespace frontend
218 #endif // frontend_Token_h