1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
14 // file := (statement)*
15 // statement := block | if | assignment
16 // block := '{' statement* '}'
17 // if := 'if' '(' expr ')' statement [ else ]
18 // else := 'else' (if | statement)*
19 // assignment := ident {'=' | '+=' | '-='} expr
22 PRECEDENCE_ASSIGNMENT
= 1,
25 PRECEDENCE_EQUALITY
= 4,
26 PRECEDENCE_RELATION
= 5,
28 PRECEDENCE_PREFIX
= 7,
32 // The top-level for blocks/ifs is still recursive descent, the expression
33 // parser is a Pratt parser. The basic idea there is to have the precedences
34 // (and associativities) encoded relative to each other and only parse up
35 // until you hit something of that precedence. There's a dispatch table in
36 // expressions_ at the top of parser.cc that describes how each token
37 // dispatches if it's seen as either a prefix or infix operator, and if it's
38 // infix, what its precedence is.
41 // - http://javascript.crockford.com/tdop/tdop.html
42 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
44 // Indexed by Token::Type.
45 ParserHelper
Parser::expressions_
[] = {
46 {NULL
, NULL
, -1}, // INVALID
47 {&Parser::Literal
, NULL
, -1}, // INTEGER
48 {&Parser::Literal
, NULL
, -1}, // STRING
49 {&Parser::Literal
, NULL
, -1}, // TRUE_TOKEN
50 {&Parser::Literal
, NULL
, -1}, // FALSE_TOKEN
51 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // EQUAL
52 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // PLUS
53 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // MINUS
54 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // PLUS_EQUALS
55 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // MINUS_EQUALS
56 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // EQUAL_EQUAL
57 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // NOT_EQUAL
58 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_EQUAL
59 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_EQUAL
60 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_THAN
61 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_THAN
62 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_AND
}, // BOOLEAN_AND
63 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_OR
}, // BOOLEAN_OR
64 {&Parser::Not
, NULL
, -1}, // BANG
65 {&Parser::Group
, NULL
, -1}, // LEFT_PAREN
66 {NULL
, NULL
, -1}, // RIGHT_PAREN
67 {&Parser::List
, &Parser::Subscript
, PRECEDENCE_CALL
}, // LEFT_BRACKET
68 {NULL
, NULL
, -1}, // RIGHT_BRACKET
69 {NULL
, NULL
, -1}, // LEFT_BRACE
70 {NULL
, NULL
, -1}, // RIGHT_BRACE
71 {NULL
, NULL
, -1}, // IF
72 {NULL
, NULL
, -1}, // ELSE
73 {&Parser::Name
, &Parser::IdentifierOrCall
, PRECEDENCE_CALL
}, // IDENTIFIER
74 {NULL
, NULL
, -1}, // COMMA
75 {NULL
, NULL
, -1}, // COMMENT
78 Parser::Parser(const std::vector
<Token
>& tokens
, Err
* err
)
79 : tokens_(tokens
), err_(err
), cur_(0) {
86 scoped_ptr
<ParseNode
> Parser::Parse(const std::vector
<Token
>& tokens
,
88 Parser
p(tokens
, err
);
89 return p
.ParseFile().PassAs
<ParseNode
>();
93 scoped_ptr
<ParseNode
> Parser::ParseExpression(const std::vector
<Token
>& tokens
,
95 Parser
p(tokens
, err
);
96 return p
.ParseExpression().Pass();
99 bool Parser::IsAssignment(const ParseNode
* node
) const {
100 return node
&& node
->AsBinaryOp() &&
101 (node
->AsBinaryOp()->op().type() == Token::EQUAL
||
102 node
->AsBinaryOp()->op().type() == Token::PLUS_EQUALS
||
103 node
->AsBinaryOp()->op().type() == Token::MINUS_EQUALS
);
106 bool Parser::IsStatementBreak(Token::Type token_type
) const {
107 switch (token_type
) {
108 case Token::IDENTIFIER
:
109 case Token::LEFT_BRACE
:
110 case Token::RIGHT_BRACE
:
119 bool Parser::LookAhead(Token::Type type
) {
122 return cur_token().type() == type
;
125 bool Parser::Match(Token::Type type
) {
126 if (!LookAhead(type
))
132 Token
Parser::Consume(Token::Type type
, const char* error_message
) {
133 Token::Type types
[1] = { type
};
134 return Consume(types
, 1, error_message
);
137 Token
Parser::Consume(Token::Type
* types
,
139 const char* error_message
) {
141 // Don't overwrite current error, but make progress through tokens so that
142 // a loop that's expecting a particular token will still terminate.
144 return Token(Location(), Token::INVALID
, base::StringPiece());
147 const char kEOFMsg
[] = "I hit EOF instead.";
149 *err_
= Err(Location(), error_message
, kEOFMsg
);
151 *err_
= Err(tokens_
[tokens_
.size() - 1], error_message
, kEOFMsg
);
152 return Token(Location(), Token::INVALID
, base::StringPiece());
155 for (size_t i
= 0; i
< num_types
; ++i
) {
156 if (cur_token().type() == types
[i
])
157 return tokens_
[cur_
++];
159 *err_
= Err(cur_token(), error_message
);
160 return Token(Location(), Token::INVALID
, base::StringPiece());
163 Token
Parser::Consume() {
164 return tokens_
[cur_
++];
167 scoped_ptr
<ParseNode
> Parser::ParseExpression() {
168 return ParseExpression(0);
171 scoped_ptr
<ParseNode
> Parser::ParseExpression(int precedence
) {
173 return scoped_ptr
<ParseNode
>();
175 Token token
= Consume();
176 PrefixFunc prefix
= expressions_
[token
.type()].prefix
;
178 if (prefix
== NULL
) {
180 std::string("Unexpected token '") + token
.value().as_string() +
182 return scoped_ptr
<ParseNode
>();
185 scoped_ptr
<ParseNode
> left
= (this->*prefix
)(token
);
189 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
190 precedence
<= expressions_
[cur_token().type()].precedence
) {
192 InfixFunc infix
= expressions_
[token
.type()].infix
;
195 std::string("Unexpected token '") +
196 token
.value().as_string() + std::string("'"));
197 return scoped_ptr
<ParseNode
>();
199 left
= (this->*infix
)(left
.Pass(), token
);
201 return scoped_ptr
<ParseNode
>();
207 scoped_ptr
<ParseNode
> Parser::Literal(Token token
) {
208 return scoped_ptr
<ParseNode
>(new LiteralNode(token
)).Pass();
211 scoped_ptr
<ParseNode
> Parser::Name(Token token
) {
212 return IdentifierOrCall(scoped_ptr
<ParseNode
>(), token
).Pass();
215 scoped_ptr
<ParseNode
> Parser::Group(Token token
) {
216 scoped_ptr
<ParseNode
> expr
= ParseExpression();
218 return scoped_ptr
<ParseNode
>();
219 Consume(Token::RIGHT_PAREN
, "Expected ')'");
223 scoped_ptr
<ParseNode
> Parser::Not(Token token
) {
224 scoped_ptr
<ParseNode
> expr
= ParseExpression(PRECEDENCE_PREFIX
+ 1);
226 return scoped_ptr
<ParseNode
>();
227 scoped_ptr
<UnaryOpNode
> unary_op(new UnaryOpNode
);
228 unary_op
->set_op(token
);
229 unary_op
->set_operand(expr
.Pass());
230 return unary_op
.PassAs
<ParseNode
>();
233 scoped_ptr
<ParseNode
> Parser::List(Token node
) {
234 scoped_ptr
<ParseNode
> list(ParseList(Token::RIGHT_BRACKET
, true));
235 if (!has_error() && !at_end())
236 Consume(Token::RIGHT_BRACKET
, "Expected ']'");
240 scoped_ptr
<ParseNode
> Parser::BinaryOperator(scoped_ptr
<ParseNode
> left
,
242 scoped_ptr
<ParseNode
> right
=
243 ParseExpression(expressions_
[token
.type()].precedence
+ 1);
247 "Expected right hand side for '" + token
.value().as_string() + "'");
248 return scoped_ptr
<ParseNode
>();
250 scoped_ptr
<BinaryOpNode
> binary_op(new BinaryOpNode
);
251 binary_op
->set_op(token
);
252 binary_op
->set_left(left
.Pass());
253 binary_op
->set_right(right
.Pass());
254 return binary_op
.PassAs
<ParseNode
>();
257 scoped_ptr
<ParseNode
> Parser::IdentifierOrCall(scoped_ptr
<ParseNode
> left
,
259 scoped_ptr
<ListNode
> list(new ListNode
);
260 list
->set_begin_token(token
);
261 list
->set_end_token(token
);
262 scoped_ptr
<BlockNode
> block
;
263 bool has_arg
= false;
264 if (Match(Token::LEFT_PAREN
)) {
265 // Parsing a function call.
267 if (Match(Token::RIGHT_PAREN
)) {
268 // Nothing, just an empty call.
270 list
= ParseList(Token::RIGHT_PAREN
, false);
272 return scoped_ptr
<ParseNode
>();
273 Consume(Token::RIGHT_PAREN
, "Expected ')' after call");
275 // Optionally with a scope.
276 if (LookAhead(Token::LEFT_BRACE
)) {
277 block
= ParseBlock();
279 return scoped_ptr
<ParseNode
>();
283 if (!left
&& !has_arg
) {
284 // Not a function call, just a standalone identifier.
285 return scoped_ptr
<ParseNode
>(new IdentifierNode(token
)).Pass();
287 scoped_ptr
<FunctionCallNode
> func_call(new FunctionCallNode
);
288 func_call
->set_function(token
);
289 func_call
->set_args(list
.Pass());
291 func_call
->set_block(block
.Pass());
292 return func_call
.PassAs
<ParseNode
>();
295 scoped_ptr
<ParseNode
> Parser::Assignment(scoped_ptr
<ParseNode
> left
,
297 if (left
->AsIdentifier() == NULL
) {
298 *err_
= Err(left
.get(), "Left-hand side of assignment must be identifier.");
299 return scoped_ptr
<ParseNode
>();
301 scoped_ptr
<ParseNode
> value
= ParseExpression(PRECEDENCE_ASSIGNMENT
);
302 scoped_ptr
<BinaryOpNode
> assign(new BinaryOpNode
);
303 assign
->set_op(token
);
304 assign
->set_left(left
.Pass());
305 assign
->set_right(value
.Pass());
306 return assign
.PassAs
<ParseNode
>();
309 scoped_ptr
<ParseNode
> Parser::Subscript(scoped_ptr
<ParseNode
> left
,
311 // TODO: Maybe support more complex expressions like a[0][0]. This would
312 // require work on the evaluator too.
313 if (left
->AsIdentifier() == NULL
) {
314 *err_
= Err(left
.get(), "May only subscript simple identifiers");
315 return scoped_ptr
<ParseNode
>();
317 scoped_ptr
<ParseNode
> value
= ParseExpression();
318 Consume(Token::RIGHT_BRACKET
, "Expecting ']' after subscript.");
319 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
320 accessor
->set_base(left
->AsIdentifier()->value());
321 accessor
->set_index(value
.Pass());
322 return accessor
.PassAs
<ParseNode
>();
325 // Does not Consume the start or end token.
326 scoped_ptr
<ListNode
> Parser::ParseList(Token::Type stop_before
,
327 bool allow_trailing_comma
) {
328 scoped_ptr
<ListNode
> list(new ListNode
);
329 list
->set_begin_token(cur_token());
330 bool just_got_comma
= false;
331 while (!LookAhead(stop_before
)) {
332 just_got_comma
= false;
333 // Why _OR? We're parsing things that are higher precedence than the ,
334 // that separates the items of the list. , should appear lower than
335 // boolean expressions (the lowest of which is OR), but above assignments.
336 list
->append_item(ParseExpression(PRECEDENCE_OR
));
338 return scoped_ptr
<ListNode
>();
341 Err(tokens_
[tokens_
.size() - 1], "Unexpected end of file in list.");
342 return scoped_ptr
<ListNode
>();
344 just_got_comma
= Match(Token::COMMA
);
346 if (just_got_comma
&& !allow_trailing_comma
) {
347 *err_
= Err(cur_token(), "Trailing comma");
348 return scoped_ptr
<ListNode
>();
350 list
->set_end_token(cur_token());
354 scoped_ptr
<ParseNode
> Parser::ParseFile() {
355 scoped_ptr
<BlockNode
> file(new BlockNode(false));
359 scoped_ptr
<ParseNode
> statement
= ParseStatement();
362 file
->append_statement(statement
.Pass());
364 if (!at_end() && !has_error())
365 *err_
= Err(cur_token(), "Unexpected here, should be newline.");
367 return scoped_ptr
<ParseNode
>();
368 return file
.PassAs
<ParseNode
>();
371 scoped_ptr
<ParseNode
> Parser::ParseStatement() {
372 if (LookAhead(Token::LEFT_BRACE
)) {
373 return ParseBlock().PassAs
<ParseNode
>();
374 } else if (LookAhead(Token::IF
)) {
375 return ParseCondition();
377 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
378 // to allow "pointless" expressions and return ParseExpression() directly.
379 scoped_ptr
<ParseNode
> stmt
= ParseExpression();
381 if (stmt
->AsFunctionCall() || IsAssignment(stmt
.get()))
385 Token token
= at_end() ? tokens_
[tokens_
.size() - 1] : cur_token();
386 *err_
= Err(token
, "Expecting assignment or function call.");
388 return scoped_ptr
<ParseNode
>();
392 scoped_ptr
<BlockNode
> Parser::ParseBlock() {
394 Consume(Token::LEFT_BRACE
, "Expected '{' to start a block.");
396 return scoped_ptr
<BlockNode
>();
397 scoped_ptr
<BlockNode
> block(new BlockNode(true));
398 block
->set_begin_token(begin_token
);
401 if (LookAhead(Token::RIGHT_BRACE
)) {
402 block
->set_end_token(Consume());
406 scoped_ptr
<ParseNode
> statement
= ParseStatement();
408 return scoped_ptr
<BlockNode
>();
409 block
->append_statement(statement
.Pass());
414 scoped_ptr
<ParseNode
> Parser::ParseCondition() {
415 scoped_ptr
<ConditionNode
> condition(new ConditionNode
);
416 Consume(Token::IF
, "Expected 'if'");
417 Consume(Token::LEFT_PAREN
, "Expected '(' after 'if'.");
418 condition
->set_condition(ParseExpression());
419 if (IsAssignment(condition
->condition()))
420 *err_
= Err(condition
->condition(), "Assignment not allowed in 'if'.");
421 Consume(Token::RIGHT_PAREN
, "Expected ')' after condition of 'if'.");
422 condition
->set_if_true(ParseBlock().Pass());
423 if (Match(Token::ELSE
))
424 condition
->set_if_false(ParseStatement().Pass());
426 return scoped_ptr
<ParseNode
>();
427 return condition
.PassAs
<ParseNode
>();