Bug 623379: runtests: Check for java binary before asc invokes (r=fklockii)
[tamarin-stm.git] / eval / eval-lex.h
blob253381bb0df865b8f96bb48fb03ef2f64c65e58e
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is [Open Source Virtual Machine.].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2008
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Adobe AS3 Team
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 // This file is included into eval.h
41 namespace avmplus {
42 namespace RTC {
44 enum Token {
45 // Operators
47 // The values assigned for operators are fixed; they are used
48 // to construct the table Compiler::opcodeMapping in eval-parse.cpp.
49 // If you add entries to the operators list you *must* extend that
50 // table.
52 // Keep them alphabetical.
54 T_As = 0,
55 T_Assign,
56 T_BitwiseAnd,
57 T_BitwiseAndAssign,
58 T_BitwiseNot,
59 T_BitwiseOr,
60 T_BitwiseOrAssign,
61 T_BitwiseXor,
62 T_BitwiseXorAssign,
63 T_Delete,
64 T_Divide,
65 T_DivideAssign,
66 T_Equal,
67 T_GreaterThan,
68 T_GreaterThanOrEqual,
69 T_In,
70 T_InstanceOf,
71 T_Is,
72 T_LeftShift,
73 T_LeftShiftAssign,
74 T_LessThan,
75 T_LessThanOrEqual,
76 T_LogicalAnd,
77 T_LogicalAndAssign,
78 T_LogicalOr,
79 T_LogicalOrAssign,
80 T_Minus,
81 T_MinusAssign,
82 T_MinusMinus,
83 T_Multiply,
84 T_MultiplyAssign,
85 T_Not,
86 T_NotEqual,
87 T_Plus,
88 T_PlusAssign,
89 T_PlusPlus,
90 T_Remainder,
91 T_RemainderAssign,
92 T_RightShift,
93 T_RightShiftAssign,
94 T_StrictEqual,
95 T_StrictNotEqual,
96 T_To,
97 T_TypeOf,
98 T_UnsignedRightShift,
99 T_UnsignedRightShiftAssign,
100 T_Void,
102 T_OPERATOR_SENTINEL,
104 // Sundry punctuation
106 T_LeftParen = 100,
107 T_RightParen,
108 T_Comma,
109 T_Dot,
110 T_DoubleDot,
111 T_TripleDot,
112 T_LeftDotAngle,
113 T_Colon,
114 T_DoubleColon,
115 T_Semicolon,
116 T_Question,
117 T_LeftBracket,
118 T_RightBracket,
119 T_LeftBrace,
120 T_RightBrace,
121 T_AtSign,
122 T_XmlLeftBrace,
123 T_XmlRightBrace,
124 T_XmlEquals,
125 T_XmlLeftAngle,
126 T_XmlRightAngle,
127 T_XmlLeftAngleSlash,
128 T_XmlSlashRightAngle,
130 // Reserved words that are not operators. Commented-out entries are operators, above.
132 /*T_As,*/
133 T_Break = 200,
134 T_Case,
135 T_Catch,
136 T_Class,
137 T_Const,
138 T_Continue,
139 T_Default,
140 /*T_Delete,*/
141 T_Do,
142 T_Dynamic,
143 T_Else,
144 T_Extends,
145 T_False,
146 T_Finally,
147 T_For,
148 T_Function,
149 T_If,
150 T_Implements,
151 T_Import,
152 /*T_In,*/
153 /*T_InstanceOf,*/
154 T_Interface,
155 T_Internal,
156 /*T_Is,*/
157 T_Native,
158 T_New,
159 T_Null,
160 T_Override,
161 T_Package,
162 T_Private,
163 T_Protected,
164 T_Public,
165 T_Return,
166 T_Super,
167 T_Switch,
168 T_This,
169 T_Throw,
170 /*T_To,*/
171 T_True,
172 T_Try,
173 /*T_TypeOf,*/
174 T_Use,
175 T_Var,
176 /*T_Void,*/
177 T_While,
178 T_With,
180 // sundry
182 T_Identifier = 300,
183 T_IntLiteral,
184 T_UIntLiteral,
185 T_DoubleLiteral,
186 T_RegexpLiteral,
187 T_StringLiteral,
188 T_XmlCDATA, // "<![CDATA[...]]>" (including the punctuation, ditto for the three following tokens)
189 T_XmlComment, // "<!-- ... -->"
190 T_XmlProcessingInstruction, // "<? ... ?>
191 T_XmlString, // '...' or "..."
192 T_XmlName, // string of XMLName characters
193 T_XmlWhitespaces, // string of XMLWhitespace characters
194 T_XmlText, // string of characters that are not XMLName or XMLWhitespace
196 // meta
198 T_EOS = 400,
199 T_BreakSlash,
200 T_BreakXml, // <?, <!-- seen but not consumed
201 T_BreakRightAngle,
203 // LAST also serves double duty as NONE
205 T_LAST = 500
208 // Value carrier for tokens that carry values.
210 union TokenValue {
211 double d; // T_DoubleLiteral
212 int32_t i; // T_IntLiteral
213 uint32_t u; // T_UintLiteral
214 Str *s; // T_StringLiteral, T_RegexpLiteral, T_Identifier
219 * Lexical analysis.
221 * A client retrieves a stream of tokens from the lexer by calling
222 * lex() repeatedly. When the special tokens T_BreakSlash and
223 * T_BreakRightAngle are returned the client must disambiguate
224 * the context by calling divideOperator() or regexp() in the forme
225 * case and rightAngle() or shiftOrRelationalOperator() in the latter.
227 * A few tokens carry values. These values are available through
228 * accessor functions on the lexer when the most recent call to
229 * the lexer returned the particular token in question. In debug
230 * builds there are checks to catch incorrect uses of these APIs.
232 * A line number is maintained by the lexer and made available
233 * through an accessor function. Following the return of a token,
234 * the line number corresponds to the line number of the last
235 * consumed character of the most recently consumed token. The only
236 * multi-line tokens are strings, regular expression literals, and
237 * identifiers containing \<newline> sequences.
240 class Lexer {
241 public:
243 * @param compiler The compiler structure, from which we take flags and allocator
244 * @param src The source text as a string with a trailing NUL; it may contain
245 * embedded NULs but the last is considered a terminator, not part
246 * of the input
247 * @param keyword_or_ident True iff this scanner is simply being used to check
248 * whether an identifier that contains a backslash sequence looks
249 * like a keyword.
251 Lexer(Compiler* compiler, const wchar* src, uint32_t srclen, bool keyword_or_ident=false);
253 Token lex(uint32_t* linep, TokenValue* valuep); // Lex a token
254 Token regexp(uint32_t* linep, TokenValue* valuep); // Following T_BreakSlash, to lex a regex literal
255 Token divideOperator(uint32_t* linep); // Following T_BreakSlash, to lex a division operator
256 Token rightAngle(uint32_t* linep); // Following T_BreakRightAngle, to lex '>' at the end of a type instantiator
257 Token rightShiftOrRelationalOperator(uint32_t* linep); // Following T_BreakRightAngle, to lex a shift or relational operator
260 * Last consumed character must have been c; back up once
262 void xmlPushback(wchar c);
265 * Lex one XML atom.
266 * xmlAtom returns one of:
268 * XmlComment
269 * XmlCDATA
270 * XmlProcessingInstruction
271 * XmlName
272 * XmlWhitespaces
273 * XmlText
274 * XmlString
275 * XmlLeftBrace
276 * XmlRightBrace
277 * XmlEquals
278 * XmlLeftAngle
279 * XmlRightAngle
280 * XmlLeftAngleSlash
281 * XmlSlashRightAngle
283 * For XmlComment, XmlCDATA, XmlProcessingInstruction, XmlName, XmlWhitespaces, XmlText,
284 * and XmlString, valuep->s is set to the actual text.
286 Token xmlAtom(uint32_t* linep, TokenValue* valuep);
288 #ifdef DEBUG
289 void trace(); // enable tracing
290 bool getTrace() const; // retrieve the current tracing flag
291 #endif
293 private:
294 enum {
295 // Special spaces
296 UNICHAR_LS = 0x2028,
297 UNICHAR_PS = 0x2029,
299 // Various Zs characters
300 UNICHAR_Zs1 = 0x1680,
301 UNICHAR_Zs2 = 0x180E,
302 UNICHAR_Zs3 = 0x2000,
303 UNICHAR_Zs4 = 0x2001,
304 UNICHAR_Zs5 = 0x2002,
305 UNICHAR_Zs6 = 0x2003,
306 UNICHAR_Zs7 = 0x2004,
307 UNICHAR_Zs8 = 0x2005,
308 UNICHAR_Zs9 = 0x2006,
309 UNICHAR_Zs10 = 0x2007,
310 UNICHAR_Zs11 = 0x2008,
311 UNICHAR_Zs12 = 0x2009,
312 UNICHAR_Zs13 = 0x200A,
313 UNICHAR_Zs14 = 0x202F,
314 UNICHAR_Zs15 = 0x205F,
315 UNICHAR_Zs16 = 0x3000,
317 // Byte-order marks that act like spaces when not at the beginning of the input
318 UNICHAR_BOM1 = 0xFFFE,
319 UNICHAR_BOM2 = 0xFEFF,
322 enum {
323 // The character among the LS/PS, BOM1/BOM2, and Zs* with the lowest value
324 UNICHAR_LOWEST_ODDSPACE = 0x1680
327 // 8 bits available in the char_attrs table
328 enum {
329 CHAR_ATTR_OCTAL = 1,
330 CHAR_ATTR_DECIMAL = 2,
331 CHAR_ATTR_HEX = 4,
332 CHAR_ATTR_LETTER = 8,
333 CHAR_ATTR_UNDERBAR = 16,
334 CHAR_ATTR_DOLLAR = 32,
336 CHAR_ATTR_INITIAL = CHAR_ATTR_LETTER | CHAR_ATTR_UNDERBAR | CHAR_ATTR_DOLLAR,
337 CHAR_ATTR_SUBSEQUENT = CHAR_ATTR_INITIAL | CHAR_ATTR_DECIMAL
340 Token lexImpl();
341 Token regexpImpl();
342 Token divideOperatorImpl();
343 Token rightAngleImpl();
344 Token rightShiftOrRelationalOperatorImpl();
346 Token xmlAtomImpl();
347 Token xmlMarkup(Token t, const char* terminator);
348 Token xmlWhitespaces();
349 Token xmlName();
350 Token xmlString();
351 Token xmlText();
352 bool isXmlNameStart(wchar c);
353 bool isXmlNameSubsequent(wchar c);
355 void lineComment();
356 void blockComment();
358 Token identifier();
360 Token stringLiteral(int delimiter);
362 int escapeSequence();
363 int octalOrNulEscape();
364 int octalEscape(int n);
365 int hexEscape(int n);
366 int unicodeEscape();
368 Token numberLiteral();
369 Token integerLiteral(int base);
370 Token floatingLiteral();
371 void checkNextCharForNumber();
372 bool numberLiteralPrime();
373 void numberFraction(bool has_leading_digits);
374 void numberExponent();
375 bool octalDigits(int k);
376 bool decimalDigits(int k);
377 bool hexDigits(int k);
378 bool digits(int k, int mask);
379 double parseFloat();
380 double parseInt(int base);
382 bool notPartOfIdent(int c);
383 bool isUnicodeIdentifierStart(int c);
384 bool isUnicodeIdentifierPart(int c);
385 #ifdef DEBUG
386 void print(Token t, uint32_t l, TokenValue v);
387 #endif
389 Compiler * const compiler;
390 const wchar* src; // input
391 const wchar* limit; // one past end of input
392 const wchar* idx; // next char in input
393 const wchar* mark; // a remembered position, typically the start of a lexeme (not always valid)
394 uint32_t lineno; // line number of last char of last token returned
395 const bool keyword_or_ident;
396 #ifdef DEBUG
397 Token last_token; // last token returned
398 bool traceflag; // true iff we're tracing
399 #endif
400 TokenValue val; // temporary slot
402 // Character attributes for the ASCII range, bit vectors of the CHAR_ATTR_ values above.
403 static const uint8_t char_attrs[128];