1 /* vim: set sw=4 ts=8 et tw=78: */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is the Narcissus JavaScript engine.
17 * The Initial Developer of the Original Code is
18 * Brendan Eich <brendan@mozilla.org>.
19 * Portions created by the Initial Developer are Copyright (C) 2004
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
39 * Narcissus - JS implemented in JS.
41 * Lexical scanner and parser.
44 // Build a regexp that recognizes operators and punctuators (except newline).
45 var opRegExpSrc = "^";
46 for (i in opTypeNames) {
49 if (opRegExpSrc != "^")
51 opRegExpSrc += i.replace(/[?|^&(){}\[\]+\-*\/\.]/g, "\\$&");
53 var opRegExp = new RegExp(opRegExpSrc);
55 // A regexp to match floating point literals (but not integer literals).
56 var fpRegExp = /^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+|^\.\d+(?:[eE][-+]?\d+)?/;
58 // A regexp to match regexp literals.
59 var reRegExp = /^\/((?:\\.|\[(?:\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/;
61 function Tokenizer(s, f, l) {
63 this.source = String(s);
67 this.scanNewlines = false;
68 this.scanOperand = true;
69 this.filename = f || "";
73 Tokenizer.prototype = {
75 return this.source.substring(this.cursor);
79 return this.peek() == END;
83 return this.tokens[this.tokenIndex];
86 match: function (tt) {
87 return this.get() == tt || this.unget();
90 mustMatch: function (tt) {
92 throw this.newSyntaxError("Missing " + tokens[tt].toLowerCase());
99 next = this.tokens[(this.tokenIndex + this.lookahead) & 3];
100 if (this.scanNewlines && next.lineno != this.lineno)
111 peekOnSameLine: function () {
112 this.scanNewlines = true;
113 var tt = this.peek();
114 this.scanNewlines = false;
120 while (this.lookahead) {
122 this.tokenIndex = (this.tokenIndex + 1) & 3;
123 token = this.tokens[this.tokenIndex];
124 if (token.type != NEWLINE || this.scanNewlines)
129 var input = this.input;
130 var match = (this.scanNewlines ? /^[ \t]+/ : /^\s+/)(input);
132 var spaces = match[0];
133 this.cursor += spaces.length;
134 var newlines = spaces.match(/\n/g);
136 this.lineno += newlines.length;
140 if (!(match = /^\/(?:\*(?:.|\n)*?\*\/|\/.*)/(input)))
142 var comment = match[0];
143 this.cursor += comment.length;
144 newlines = comment.match(/\n/g);
146 this.lineno += newlines.length
149 this.tokenIndex = (this.tokenIndex + 1) & 3;
150 token = this.tokens[this.tokenIndex];
152 this.tokens[this.tokenIndex] = token = {};
155 return token.type = END;
157 if ((match = fpRegExp(input))) {
159 token.value = parseFloat(match[0]);
160 } else if ((match = /^0[xX][\da-fA-F]+|^0[0-7]*|^\d+/(input))) {
162 token.value = parseInt(match[0]);
163 } else if ((match = /^[$_\w]+/(input))) { // FIXME no ES3 unicode
165 token.type = keywords[id] || IDENTIFIER;
167 } else if ((match = /^"(?:\\.|[^"])*"|^'(?:\\.|[^'])*'/(input))) { //"){
169 token.value = eval(match[0]);
170 } else if (this.scanOperand && (match = reRegExp(input))) {
172 token.value = new RegExp(match[1], match[2]);
173 } else if ((match = opRegExp(input))) {
175 if (assignOps[op] && input[op.length] == '=') {
177 token.assignOp = GLOBAL[opTypeNames[op]];
180 token.type = GLOBAL[opTypeNames[op]];
181 if (this.scanOperand &&
182 (token.type == PLUS || token.type == MINUS)) {
183 token.type += UNARY_PLUS - PLUS;
185 token.assignOp = null;
188 } else if (this.scanNewlines && (match = /^\n/(input))) {
189 token.type = NEWLINE;
191 throw this.newSyntaxError("Illegal token");
194 token.start = this.cursor;
195 this.cursor += match[0].length;
196 token.end = this.cursor;
197 token.lineno = this.lineno;
202 if (++this.lookahead == 4) throw "PANIC: too much lookahead!";
203 this.tokenIndex = (this.tokenIndex - 1) & 3;
206 newSyntaxError: function (m) {
207 var e = new SyntaxError(m, this.filename, this.lineno);
208 e.source = this.source;
209 e.cursor = this.cursor;
214 function CompilerContext(inFunction) {
215 this.inFunction = inFunction;
221 var CCp = CompilerContext.prototype;
222 CCp.bracketLevel = CCp.curlyLevel = CCp.parenLevel = CCp.hookLevel = 0;
223 CCp.ecmaStrictMode = CCp.inForLoopInit = false;
225 function Script(t, x) {
226 var n = Statements(t, x);
228 n.funDecls = x.funDecls;
229 n.varDecls = x.varDecls;
233 // Node extends Array, which we extend slightly with a top-of-stack method.
234 Array.prototype.__defineProperty__(
237 return this.length && this[this.length-1];
242 function Node(t, type) {
245 this.type = type || token.type;
246 this.value = token.value;
247 this.lineno = token.lineno;
248 this.start = token.start;
249 this.end = token.end;
252 this.lineno = t.lineno;
256 for (var i = 2; i < arguments.length; i++)
257 this.push(arguments[i]);
260 var Np = Node.prototype = new Array;
261 Np.constructor = Node;
262 Np.toSource = Object.prototype.toSource;
264 // Always use push to add operands to an expression, to update start and end.
265 Np.push = function (kid) {
266 if (kid.start < this.start)
267 this.start = kid.start;
268 if (this.end < kid.end)
270 return Array.prototype.push.call(this, kid);
273 Node.indentLevel = 0;
275 function tokenstr(tt) {
277 return /^\W/.test(t) ? opTypeNames[t] : t.toUpperCase();
280 Np.toString = function () {
282 for (var i in this) {
283 if (this.hasOwnProperty(i) && i != 'type' && i != 'target')
284 a.push({id: i, value: this[i]});
286 a.sort(function (a,b) { return (a.id < b.id) ? -1 : 1; });
287 const INDENTATION = " ";
288 var n = ++Node.indentLevel;
289 var s = "{\n" + INDENTATION.repeat(n) + "type: " + tokenstr(this.type);
290 for (i = 0; i < a.length; i++)
291 s += ",\n" + INDENTATION.repeat(n) + a[i].id + ": " + a[i].value;
292 n = --Node.indentLevel;
293 s += "\n" + INDENTATION.repeat(n) + "}";
297 Np.getSource = function () {
298 return this.tokenizer.source.slice(this.start, this.end);
301 Np.__defineGetter__('filename',
302 function () { return this.tokenizer.filename; });
304 String.prototype.__defineProperty__(
307 var s = "", t = this + s;
315 // Statement stack and nested statement handler.
316 function nest(t, x, node, func, end) {
317 x.stmtStack.push(node);
320 end && t.mustMatch(end);
324 function Statements(t, x) {
325 var n = new Node(t, BLOCK);
327 while (!t.done && t.peek() != RIGHT_CURLY)
328 n.push(Statement(t, x));
333 function Block(t, x) {
334 t.mustMatch(LEFT_CURLY);
335 var n = Statements(t, x);
336 t.mustMatch(RIGHT_CURLY);
340 const DECLARED_FORM = 0, EXPRESSED_FORM = 1, STATEMENT_FORM = 2;
342 function Statement(t, x) {
343 var i, label, n, n2, ss, tt = t.get();
345 // Cases for statements ending in a right curly return early, avoiding the
346 // common semicolon insertion magic after this switch.
349 return FunctionDefinition(t, x, true,
350 (x.stmtStack.length > 1)
355 n = Statements(t, x);
356 t.mustMatch(RIGHT_CURLY);
361 n.condition = ParenExpression(t, x);
363 n.thenPart = Statement(t, x);
364 n.elsePart = t.match(ELSE) ? Statement(t, x) : null;
370 t.mustMatch(LEFT_PAREN);
371 n.discriminant = Expression(t, x);
372 t.mustMatch(RIGHT_PAREN);
376 t.mustMatch(LEFT_CURLY);
377 while ((tt = t.get()) != RIGHT_CURLY) {
380 if (n.defaultIndex >= 0)
381 throw t.newSyntaxError("More than one switch default");
386 n.defaultIndex = n.cases.length;
388 n2.caseLabel = Expression(t, x, COLON);
391 throw t.newSyntaxError("Invalid switch case");
394 n2.statements = new Node(t, BLOCK);
395 while ((tt=t.peek()) != CASE && tt != DEFAULT && tt != RIGHT_CURLY)
396 n2.statements.push(Statement(t, x));
405 t.mustMatch(LEFT_PAREN);
406 if ((tt = t.peek()) != SEMICOLON) {
407 x.inForLoopInit = true;
408 if (tt == VAR || tt == CONST) {
410 n2 = Variables(t, x);
412 n2 = Expression(t, x);
414 x.inForLoopInit = false;
416 if (n2 && t.match(IN)) {
418 if (n2.type == VAR) {
419 if (n2.length != 1) {
420 throw new SyntaxError("Invalid for..in left-hand side",
421 t.filename, n2.lineno);
424 // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
431 n.object = Expression(t, x);
433 n.setup = n2 || null;
434 t.mustMatch(SEMICOLON);
435 n.condition = (t.peek() == SEMICOLON) ? null : Expression(t, x);
436 t.mustMatch(SEMICOLON);
437 n.update = (t.peek() == RIGHT_PAREN) ? null : Expression(t, x);
439 t.mustMatch(RIGHT_PAREN);
440 n.body = nest(t, x, n, Statement);
446 n.condition = ParenExpression(t, x);
447 n.body = nest(t, x, n, Statement);
453 n.body = nest(t, x, n, Statement, WHILE);
454 n.condition = ParenExpression(t, x);
455 if (!x.ecmaStrictMode) {
456 // <script language="JavaScript"> (without version hints) may need
457 // automatic semicolon insertion without a newline after do-while.
458 // See http://bugzilla.mozilla.org/show_bug.cgi?id=238945.
467 if (t.peekOnSameLine() == IDENTIFIER) {
469 n.label = t.token.value;
477 throw t.newSyntaxError("Label not found");
478 } while (ss[i].label != label);
481 * Both break and continue to label need to be handled specially
482 * within a labeled loop, so that they target that loop. If not in
483 * a loop, then break targets its labeled statement. Labels can be
484 * nested so we skip all labels immediately enclosing the nearest
485 * non-label statement.
487 while (i < ss.length - 1 && ss[i+1].type == LABEL)
489 if (i < ss.length - 1 && ss[i+1].isLoop)
491 else if (tt == CONTINUE)
492 throw t.newSyntaxError("Invalid continue");
496 throw t.newSyntaxError("Invalid " + ((tt == BREAK)
500 } while (!ss[i].isLoop && (tt != BREAK || ss[i].type != SWITCH));
507 n.tryBlock = Block(t, x);
509 while (t.match(CATCH)) {
511 t.mustMatch(LEFT_PAREN);
512 n2.varName = t.mustMatch(IDENTIFIER).value;
514 if (x.ecmaStrictMode)
515 throw t.newSyntaxError("Illegal catch guard");
516 if (n.catchClauses.length && !n.catchClauses.top().guard)
517 throw t.newSyntaxError("Guarded catch after unguarded");
518 n2.guard = Expression(t, x);
522 t.mustMatch(RIGHT_PAREN);
523 n2.block = Block(t, x);
524 n.catchClauses.push(n2);
526 if (t.match(FINALLY))
527 n.finallyBlock = Block(t, x);
528 if (!n.catchClauses.length && !n.finallyBlock)
529 throw t.newSyntaxError("Invalid try statement");
534 throw t.newSyntaxError(tokens[tt] + " without preceding try");
538 n.exception = Expression(t, x);
543 throw t.newSyntaxError("Invalid return");
545 tt = t.peekOnSameLine();
546 if (tt != END && tt != NEWLINE && tt != SEMICOLON && tt != RIGHT_CURLY)
547 n.value = Expression(t, x);
552 n.object = ParenExpression(t, x);
553 n.body = nest(t, x, n, Statement);
567 n = new Node(t, SEMICOLON);
572 if (tt == IDENTIFIER) {
573 t.scanOperand = false;
575 t.scanOperand = true;
577 label = t.token.value;
579 for (i = ss.length-1; i >= 0; --i) {
580 if (ss[i].label == label)
581 throw t.newSyntaxError("Duplicate label");
584 n = new Node(t, LABEL);
586 n.statement = nest(t, x, n, Statement);
591 n = new Node(t, SEMICOLON);
593 n.expression = Expression(t, x);
594 n.end = n.expression.end;
598 if (t.lineno == t.token.lineno) {
599 tt = t.peekOnSameLine();
600 if (tt != END && tt != NEWLINE && tt != SEMICOLON && tt != RIGHT_CURLY)
601 throw t.newSyntaxError("Missing ; before statement");
607 function FunctionDefinition(t, x, requireName, functionForm) {
609 if (f.type != FUNCTION)
610 f.type = (f.value == "get") ? GETTER : SETTER;
611 if (t.match(IDENTIFIER))
612 f.name = t.token.value;
613 else if (requireName)
614 throw t.newSyntaxError("Missing function identifier");
616 t.mustMatch(LEFT_PAREN);
619 while ((tt = t.get()) != RIGHT_PAREN) {
620 if (tt != IDENTIFIER)
621 throw t.newSyntaxError("Missing formal parameter");
622 f.params.push(t.token.value);
623 if (t.peek() != RIGHT_PAREN)
627 t.mustMatch(LEFT_CURLY);
628 var x2 = new CompilerContext(true);
629 f.body = Script(t, x2);
630 t.mustMatch(RIGHT_CURLY);
633 f.functionForm = functionForm;
634 if (functionForm == DECLARED_FORM)
639 function Variables(t, x) {
642 t.mustMatch(IDENTIFIER);
643 var n2 = new Node(t);
645 if (t.match(ASSIGN)) {
646 if (t.token.assignOp)
647 throw t.newSyntaxError("Invalid variable initialization");
648 n2.initializer = Expression(t, x, COMMA);
650 n2.readOnly = (n.type == CONST);
653 } while (t.match(COMMA));
657 function ParenExpression(t, x) {
658 t.mustMatch(LEFT_PAREN);
659 var n = Expression(t, x);
660 t.mustMatch(RIGHT_PAREN);
667 ASSIGN: 2, HOOK: 2, COLON: 2,
668 // The above all have to have the same precedence, see bug 330975.
674 EQ: 9, NE: 9, STRICT_EQ: 9, STRICT_NE: 9,
675 LT: 10, LE: 10, GE: 10, GT: 10, IN: 10, INSTANCEOF: 10,
676 LSH: 11, RSH: 11, URSH: 11,
678 MUL: 13, DIV: 13, MOD: 13,
679 DELETE: 14, VOID: 14, TYPEOF: 14, // PRE_INCREMENT: 14, PRE_DECREMENT: 14,
680 NOT: 14, BITWISE_NOT: 14, UNARY_PLUS: 14, UNARY_MINUS: 14,
681 INCREMENT: 15, DECREMENT: 15, // postfix
686 // Map operator type code to precedence.
687 for (i in opPrecedence)
688 opPrecedence[GLOBAL[i]] = opPrecedence[i];
699 EQ: 2, NE: 2, STRICT_EQ: 2, STRICT_NE: 2,
700 LT: 2, LE: 2, GE: 2, GT: 2, IN: 2, INSTANCEOF: 2,
701 LSH: 2, RSH: 2, URSH: 2,
703 MUL: 2, DIV: 2, MOD: 2,
704 DELETE: 1, VOID: 1, TYPEOF: 1, // PRE_INCREMENT: 1, PRE_DECREMENT: 1,
705 NOT: 1, BITWISE_NOT: 1, UNARY_PLUS: 1, UNARY_MINUS: 1,
706 INCREMENT: 1, DECREMENT: 1, // postfix
707 NEW: 1, NEW_WITH_ARGS: 2, DOT: 2, INDEX: 2, CALL: 2,
708 ARRAY_INIT: 1, OBJECT_INIT: 1, GROUP: 1
711 // Map operator type code to arity.
713 opArity[GLOBAL[i]] = opArity[i];
715 function Expression(t, x, stop) {
716 var n, id, tt, operators = [], operands = [];
717 var bl = x.bracketLevel, cl = x.curlyLevel, pl = x.parenLevel,
721 var n = operators.pop();
723 var arity = opArity[op];
725 // Flatten left-associative trees.
726 var left = operands.length >= 2 && operands[operands.length-2];
727 if (left.type == op) {
728 var right = operands.pop();
735 // Always use push to add operands to n, to update start and end.
736 var a = operands.splice(operands.length - arity);
737 for (var i = 0; i < arity; i++)
740 // Include closing bracket or postfix operator in [start,end).
741 if (n.end < t.token.end)
749 while ((tt = t.get()) != END) {
751 x.bracketLevel == bl && x.curlyLevel == cl && x.parenLevel == pl &&
753 // Stop only if tt matches the optional stop parameter, and that
754 // token is not quoted by some kind of bracket.
759 // NB: cannot be empty, Statement handled that.
767 // Use >, not >=, for right-associative ASSIGN and HOOK/COLON.
768 while (opPrecedence[operators.top().type] > opPrecedence[tt] ||
769 (tt == COLON && operators.top().type == ASSIGN)) {
775 throw t.newSyntaxError("Invalid label");
778 operators.push(new Node(t));
780 operands.top().assignOp = t.token.assignOp;
782 ++x.hookLevel; // tt == HOOK
784 t.scanOperand = true;
788 // An in operator should not be parsed if we're parsing the head of
789 // a for (...) loop, unless it is in the then part of a conditional
790 // expression, or parenthesized somehow.
791 if (x.inForLoopInit && !x.hookLevel &&
792 !x.bracketLevel && !x.curlyLevel && !x.parenLevel) {
797 // Treat comma as left-associative so reduce can fold left-heavy
798 // COMMA trees into a single array.
805 case EQ: case NE: case STRICT_EQ: case STRICT_NE:
806 case LT: case LE: case GE: case GT:
808 case LSH: case RSH: case URSH:
809 case PLUS: case MINUS:
810 case MUL: case DIV: case MOD:
814 while (opPrecedence[operators.top().type] >= opPrecedence[tt])
817 t.mustMatch(IDENTIFIER);
818 operands.push(new Node(t, DOT, operands.pop(), new Node(t)));
820 operators.push(new Node(t));
821 t.scanOperand = true;
825 case DELETE: case VOID: case TYPEOF:
826 case NOT: case BITWISE_NOT: case UNARY_PLUS: case UNARY_MINUS:
830 operators.push(new Node(t));
833 case INCREMENT: case DECREMENT:
835 operators.push(new Node(t)); // prefix increment or decrement
837 // Don't cross a line boundary for postfix {in,de}crement.
838 if (t.tokens[(t.tokenIndex + t.lookahead - 1) & 3].lineno !=
843 // Use >, not >=, so postfix has higher precedence than prefix.
844 while (opPrecedence[operators.top().type] > opPrecedence[tt])
846 n = new Node(t, tt, operands.pop());
855 operands.push(FunctionDefinition(t, x, false, EXPRESSED_FORM));
856 t.scanOperand = false;
859 case NULL: case THIS: case TRUE: case FALSE:
860 case IDENTIFIER: case NUMBER: case STRING: case REGEXP:
863 operands.push(new Node(t));
864 t.scanOperand = false;
869 // Array initialiser. Parse using recursive descent, as the
870 // sub-grammar here is not an operator grammar.
871 n = new Node(t, ARRAY_INIT);
872 while ((tt = t.peek()) != RIGHT_BRACKET) {
878 n.push(Expression(t, x, COMMA));
882 t.mustMatch(RIGHT_BRACKET);
884 t.scanOperand = false;
886 // Property indexing operator.
887 operators.push(new Node(t, INDEX));
888 t.scanOperand = true;
894 if (t.scanOperand || x.bracketLevel == bl)
896 while (reduce().type != INDEX)
904 // Object initialiser. As for array initialisers (see above),
905 // parse using recursive descent.
907 n = new Node(t, OBJECT_INIT);
909 if (!t.match(RIGHT_CURLY)) {
912 if ((t.token.value == "get" || t.token.value == "set") &&
913 t.peek() == IDENTIFIER) {
914 if (x.ecmaStrictMode)
915 throw t.newSyntaxError("Illegal property accessor");
916 n.push(FunctionDefinition(t, x, true, EXPRESSED_FORM));
925 if (x.ecmaStrictMode)
926 throw t.newSyntaxError("Illegal trailing ,");
929 throw t.newSyntaxError("Invalid property name");
932 n.push(new Node(t, PROPERTY_INIT, id,
933 Expression(t, x, COMMA)));
935 } while (t.match(COMMA));
936 t.mustMatch(RIGHT_CURLY);
939 t.scanOperand = false;
944 if (!t.scanOperand && x.curlyLevel != cl)
945 throw "PANIC: right curly botch";
950 operators.push(new Node(t, GROUP));
952 while (opPrecedence[operators.top().type] > opPrecedence[NEW])
955 // Handle () now, to regularize the n-ary case for n > 0.
956 // We must set scanOperand in case there are arguments and
957 // the first one is a regexp or unary+/-.
959 t.scanOperand = true;
960 if (t.match(RIGHT_PAREN)) {
963 n.push(operands.pop());
965 n = new Node(t, CALL, operands.pop(),
969 t.scanOperand = false;
973 n.type = NEW_WITH_ARGS;
975 operators.push(new Node(t, CALL));
981 if (t.scanOperand || x.parenLevel == pl)
983 while ((tt = reduce().type) != GROUP && tt != CALL &&
984 tt != NEW_WITH_ARGS) {
989 if (n[1].type != COMMA)
990 n[1] = new Node(t, LIST, n[1]);
997 // Automatic semicolon insertion means we may scan across a newline
998 // and into the beginning of another statement. If so, break out of
999 // the while loop and let the t.scanOperand logic handle errors.
1005 if (x.hookLevel != hl)
1006 throw t.newSyntaxError("Missing : after ?");
1007 if (x.parenLevel != pl)
1008 throw t.newSyntaxError("Missing ) in parenthetical");
1009 if (x.bracketLevel != bl)
1010 throw t.newSyntaxError("Missing ] in index expression");
1012 throw t.newSyntaxError("Missing operand");
1014 // Resume default mode, scanning for operands, not operators.
1015 t.scanOperand = true;
1017 while (operators.length)
1019 return operands.pop();
1022 function parse(s, f, l) {
1023 var t = new Tokenizer(s, f, l);
1024 var x = new CompilerContext(false);
1025 var n = Script(t, x);
1027 throw t.newSyntaxError("Syntax error");