js/src/frontend/Token.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
   2  * vim: set ts=8 sts=2 et sw=2 tw=80:
   3  * This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * Token-affiliated data structures except for TokenKind (defined in its own
   9  * header).
  10  */
  11
  12 #ifndef frontend_Token_h
  13 #define frontend_Token_h
  14
  15 #include "mozilla/Assertions.h"  // MOZ_ASSERT
  16
  17 #include <stdint.h>  // uint32_t
  18
  19 #include "frontend/ParserAtom.h"  // TaggedParserAtomIndex, TrivialTaggedParserAtomIndex
  20 #include "frontend/TokenKind.h"  // js::frontend::TokenKind
  21 #include "js/RegExpFlags.h"      // JS::RegExpFlags
  22
  23 namespace js {
  24
  25 namespace frontend {
  26
  27 struct TokenPos {
  28   uint32_t begin = 0;  // Offset of the token's first code unit.
  29   uint32_t end = 0;    // Offset of 1 past the token's last code unit.
  30
  31   TokenPos() = default;
  32   TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
  33
  34   // Return a TokenPos that covers left, right, and anything in between.
  35   static TokenPos box(const TokenPos& left, const TokenPos& right) {
  36     MOZ_ASSERT(left.begin <= left.end);
  37     MOZ_ASSERT(left.end <= right.begin);
  38     MOZ_ASSERT(right.begin <= right.end);
  39     return TokenPos(left.begin, right.end);
  40   }
  41
  42   bool operator==(const TokenPos& bpos) const {
  43     return begin == bpos.begin && end == bpos.end;
  44   }
  45
  46   bool operator!=(const TokenPos& bpos) const {
  47     return begin != bpos.begin || end != bpos.end;
  48   }
  49
  50   bool operator<(const TokenPos& bpos) const { return begin < bpos.begin; }
  51
  52   bool operator<=(const TokenPos& bpos) const { return begin <= bpos.begin; }
  53
  54   bool operator>(const TokenPos& bpos) const { return !(*this <= bpos); }
  55
  56   bool operator>=(const TokenPos& bpos) const { return !(*this < bpos); }
  57
  58   bool encloses(const TokenPos& pos) const {
  59     return begin <= pos.begin && pos.end <= end;
  60   }
  61 };
  62
  63 enum DecimalPoint { NoDecimal = false, HasDecimal = true };
  64
  65 // The only escapes found in IdentifierName are of the Unicode flavor.
  66 enum class IdentifierEscapes { None, SawUnicodeEscape };
  67
  68 enum class NameVisibility { Public, Private };
  69
  70 class TokenStreamShared;
  71
  72 struct Token {
  73  private:
  74   // The lexical grammar of JavaScript has a quirk around the '/' character.
  75   // As the spec puts it:
  76   //
  77   // > There are several situations where the identification of lexical input
  78   // > elements is sensitive to the syntactic grammar context that is consuming
  79   // > the input elements. This requires multiple goal symbols for the lexical
  80   // > grammar. [...] The InputElementRegExp goal symbol is used in all
  81   // > syntactic grammar contexts where a RegularExpressionLiteral is permitted
  82   // > [...]  In all other contexts, InputElementDiv is used as the lexical
  83   // > goal symbol.
  84   //
  85   // https://tc39.github.io/ecma262/#sec-lexical-and-regexp-grammars
  86   //
  87   // What "sensitive to the syntactic grammar context" means is, the parser has
  88   // to tell the TokenStream whether to interpret '/' as division or
  89   // RegExp. Because only one or the other (or neither) will be legal at that
  90   // point in the program, and only the parser knows which one.
  91   //
  92   // But there's a problem: the parser often gets a token, puts it back, then
  93   // consumes it later; or (equivalently) peeks at a token, leaves it, peeks
  94   // again later, then finally consumes it. Of course we don't actually re-scan
  95   // the token every time; we cache it in the TokenStream. This leads to the
  96   // following rule:
  97   //
  98   // The parser must not pass SlashIsRegExp when getting/peeking at a token
  99   // previously scanned with SlashIsDiv; or vice versa.
 100   //
 101   // That way, code that asks for a SlashIsRegExp mode will never get a cached
 102   // Div token. But this rule is easy to screw up, because tokens are so often
 103   // peeked at on Parser.cpp line A and consumed on line B, where |A-B| is
 104   // thousands of lines. We therefore enforce it with the frontend's most
 105   // annoying assertion (in verifyConsistentModifier), and provide
 106   // Modifier::SlashIsInvalid to help avoid tripping it.
 107   //
 108   // This enum belongs in TokenStream, but C++, so we define it here and
 109   // typedef it there.
 110   enum Modifier {
 111     // Parse `/` and `/=` as the division operators. (That is, use
 112     // InputElementDiv as the goal symbol.)
 113     SlashIsDiv,
 114
 115     // Parse `/` as the beginning of a RegExp literal. (That is, use
 116     // InputElementRegExp.)
 117     SlashIsRegExp,
 118
 119     // Neither a Div token nor a RegExp token is syntactically valid here. When
 120     // the parser calls `getToken(SlashIsInvalid)`, it must be prepared to see
 121     // either one (and throw a SyntaxError either way).
 122     //
 123     // It's OK to use SlashIsInvalid to get a token that was originally scanned
 124     // with SlashIsDiv or SlashIsRegExp. The reverse--peeking with
 125     // SlashIsInvalid, then getting with another mode--is not OK. If either Div
 126     // or RegExp is syntactically valid here, use the appropriate modifier.
 127     SlashIsInvalid,
 128   };
 129   friend class TokenStreamShared;
 130
 131  public:
 132   /** The type of this token. */
 133   TokenKind type;
 134
 135   /** The token's position in the overall script. */
 136   TokenPos pos;
 137
 138   union {
 139    private:
 140     friend struct Token;
 141
 142     TrivialTaggedParserAtomIndex atom;
 143
 144     struct {
 145       /** Numeric literal's value. */
 146       double value;
 147
 148       /** Does the numeric literal contain a '.'? */
 149       DecimalPoint decimalPoint;
 150     } number;
 151
 152     /** Regular expression flags; use charBuffer to access source chars. */
 153     JS::RegExpFlags reflags;
 154   } u;
 155
 156 #ifdef DEBUG
 157   /** The modifier used to get this token. */
 158   Modifier modifier;
 159 #endif
 160
 161   // Mutators
 162
 163   void setName(TaggedParserAtomIndex name) {
 164     MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName);
 165     u.atom = TrivialTaggedParserAtomIndex::from(name);
 166   }
 167
 168   void setAtom(TaggedParserAtomIndex atom) {
 169     MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead ||
 170                type == TokenKind::NoSubsTemplate);
 171     u.atom = TrivialTaggedParserAtomIndex::from(atom);
 172   }
 173
 174   void setRegExpFlags(JS::RegExpFlags flags) {
 175     MOZ_ASSERT(type == TokenKind::RegExp);
 176     u.reflags = flags;
 177   }
 178
 179   void setNumber(double n, DecimalPoint decimalPoint) {
 180     MOZ_ASSERT(type == TokenKind::Number);
 181     u.number.value = n;
 182     u.number.decimalPoint = decimalPoint;
 183   }
 184
 185   // Type-safe accessors
 186
 187   TaggedParserAtomIndex name() const {
 188     MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName);
 189     return u.atom;
 190   }
 191
 192   TaggedParserAtomIndex atom() const {
 193     MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead ||
 194                type == TokenKind::NoSubsTemplate);
 195     return u.atom;
 196   }
 197
 198   JS::RegExpFlags regExpFlags() const {
 199     MOZ_ASSERT(type == TokenKind::RegExp);
 200     return u.reflags;
 201   }
 202
 203   double number() const {
 204     MOZ_ASSERT(type == TokenKind::Number);
 205     return u.number.value;
 206   }
 207
 208   DecimalPoint decimalPoint() const {
 209     MOZ_ASSERT(type == TokenKind::Number);
 210     return u.number.decimalPoint;
 211   }
 212 };
 213
 214 }  // namespace frontend
 215
 216 }  // namespace js
 217
 218 #endif  // frontend_Token_h