Bash-style while loop syntax support
[hiphop-php.git] / hphp / hack / src / parser / full_fidelity_expression_parser.ml
blob50ea4e9bf8fc82f173e13cafcb4a95e740636328
1 (**
2 * Copyright (c) 2016, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
9 *)
11 module WithSyntax(Syntax : Syntax_sig.Syntax_S) = struct
13 module Token = Syntax.Token
14 module SyntaxKind = Full_fidelity_syntax_kind
15 module TokenKind = Full_fidelity_token_kind
16 module SourceText = Full_fidelity_source_text
17 module SyntaxError = Full_fidelity_syntax_error
18 module Operator = Full_fidelity_operator
19 module Lexer = Full_fidelity_lexer.WithToken(Syntax.Token)
20 module Env = Full_fidelity_parser_env
21 module PrecedenceSyntax = Full_fidelity_precedence_parser
22 .WithSyntax(Syntax)
23 module PrecedenceParser = PrecedenceSyntax
24 .WithLexer(Full_fidelity_lexer.WithToken(Syntax.Token))
25 module type SCWithKind_S = SmartConstructorsWrappers.SyntaxKind_S
27 module type StatementParser_S = Full_fidelity_statement_parser_type
28 .WithSyntax(Syntax)
29 .WithLexer(Full_fidelity_lexer.WithToken(Syntax.Token))
30 .StatementParser_S
32 module type DeclarationParser_S = Full_fidelity_declaration_parser_type
33 .WithSyntax(Syntax)
34 .WithLexer(Full_fidelity_lexer.WithToken(Syntax.Token))
35 .DeclarationParser_S
37 module type TypeParser_S = Full_fidelity_type_parser_type
38 .WithSyntax(Syntax)
39 .WithLexer(Full_fidelity_lexer.WithToken(Syntax.Token))
40 .TypeParser_S
42 module type ExpressionParser_S = Full_fidelity_expression_parser_type
43 .WithSyntax(Syntax)
44 .WithLexer(Full_fidelity_lexer.WithToken(Syntax.Token))
45 .ExpressionParser_S
47 module ParserHelperSyntax = Full_fidelity_parser_helpers.WithSyntax(Syntax)
48 module ParserHelper =
49 ParserHelperSyntax.WithLexer(Full_fidelity_lexer.WithToken(Syntax.Token))
51 module WithSmartConstructors (SCI : SCWithKind_S with module Token = Syntax.Token)
52 = struct
54 module WithStatementAndDeclAndTypeParser
55 (StatementParser : StatementParser_S with module SC = SCI)
56 (DeclParser : DeclarationParser_S with module SC = SCI)
57 (TypeParser : TypeParser_S with module SC = SCI)
58 : (ExpressionParser_S with module SC = SCI)
59 = struct
61 open TokenKind
62 open Syntax
64 module Parser = PrecedenceParser.WithSmartConstructors(SCI)
65 include Parser
66 include ParserHelper.WithParser(Parser)
68 type binary_expression_prefix_kind =
69 | Prefix_byref_assignment | Prefix_assignment | Prefix_none
71 let make_and_track_prefix_unary_expression parser operator kind operand =
72 let node = make_prefix_unary_expression operator operand in
73 let prefix_unary_expression_stack =
74 {node; operator_kind = kind; operand} ::
75 parser.prefix_unary_expression_stack
77 {parser with prefix_unary_expression_stack}, node
79 let find_in_prefix_unary_expression_stack parser node =
80 List.find_opt (fun {node = n; _} -> n == node)
81 parser.prefix_unary_expression_stack
83 (* [Trick] *)
85 let with_type_parser : 'a . t -> (TypeParser.t -> TypeParser.t * 'a) -> t * 'a
86 = fun parser f ->
87 let type_parser =
88 TypeParser.make
89 parser.env
90 parser.lexer
91 parser.errors
92 parser.context
93 parser.sc_state
95 let (type_parser, node) = f type_parser in
96 let env = TypeParser.env type_parser in
97 let lexer = TypeParser.lexer type_parser in
98 let errors = TypeParser.errors type_parser in
99 let context = TypeParser.context type_parser in
100 let sc_state = TypeParser.sc_state type_parser in
101 let parser = { parser with env; lexer; errors; context; sc_state } in
102 (parser, node)
104 let parse_generic_type_arguments_opt parser =
105 with_type_parser parser
106 (fun p ->
107 let (p, items, no_arg_is_missing) =
108 TypeParser.parse_generic_type_argument_list_opt p
110 (p, (items, no_arg_is_missing))
113 let with_decl_parser : 'a . t -> (DeclParser.t -> DeclParser.t * 'a) -> t * 'a
114 = fun parser f ->
115 let decl_parser =
116 DeclParser.make
117 parser.env
118 parser.lexer
119 parser.errors
120 parser.context
121 parser.sc_state
123 let (decl_parser, node) = f decl_parser in
124 let env = DeclParser.env decl_parser in
125 let lexer = DeclParser.lexer decl_parser in
126 let errors = DeclParser.errors decl_parser in
127 let context = DeclParser.context decl_parser in
128 let sc_state = DeclParser.sc_state decl_parser in
129 let parser = { parser with env; lexer; errors; context; sc_state } in
130 (parser, node)
132 let parse_compound_statement parser =
133 let statement_parser =
134 StatementParser.make
135 parser.env
136 parser.lexer
137 parser.errors
138 parser.context
139 parser.sc_state
141 let (statement_parser, statement) =
142 StatementParser.parse_compound_statement statement_parser in
143 let env = StatementParser.env statement_parser in
144 let lexer = StatementParser.lexer statement_parser in
145 let errors = StatementParser.errors statement_parser in
146 let context = StatementParser.context statement_parser in
147 let sc_state = StatementParser.sc_state statement_parser in
148 let parser = { parser with env; lexer; errors; context; sc_state } in
149 (parser, statement)
151 let parse_parameter_list_opt parser =
152 let (parser, (left, token, right)) = with_decl_parser parser
153 (fun decl_parser ->
154 let (parser, left, token, right) =
155 DeclParser.parse_parameter_list_opt decl_parser
157 parser, (left, token, right)
160 (parser, left, token, right)
162 let rec parse_expression parser =
163 let (parser, term) = parse_term parser in
164 parse_remaining_expression parser term
166 and parse_expression_with_reset_precedence parser =
167 with_reset_precedence parser parse_expression
169 and parse_expression_with_operator_precedence parser operator =
170 with_operator_precedence parser operator parse_expression
172 and parse_if_no_error parser f =
173 let old_errors = List.length (errors parser) in
175 let (parser, result) = f parser in
176 let new_errors = List.length(errors parser) in
177 Option.some_if (old_errors = new_errors) (parser, result)
178 with Failure _ -> None
180 and parse_as_name_or_error parser =
181 (* TODO: Are there "reserved" keywords that absolutely cannot start
182 an expression? If so, list them above and make them produce an
183 error. *)
184 let (parser1, token) = next_token_as_name parser in
185 match (Token.kind token) with
186 | Name ->
187 let (parser1, name) =
188 scan_remaining_qualified_name parser1 (make_token token) in
189 parse_name_or_collection_literal_expression parser1 name
190 | kind when Parser.expects_here parser kind ->
191 (* ERROR RECOVERY: If we're encountering a token that matches a kind in
192 * the previous scope of the expected stack, don't eat it--just mark the
193 * name missing and continue parsing, starting from the offending token. *)
194 let missing = make_missing parser in
195 let parser = with_error parser SyntaxError.error1015 in
196 (parser, missing)
197 | _ ->
198 (* ERROR RECOVERY: If we're encountering anything other than a Name
199 * or the next expected kind, eat the offending token.
200 * TODO: Increase the coverage of PrecedenceParser.expects_next, so that
201 * we wind up eating fewer of the tokens that'll be needed by the outer
202 * statement / declaration parsers. *)
203 let parser = with_error parser1 SyntaxError.error1015 in
204 (parser, make_token token)
206 and parse_term parser =
207 let (parser1, token) = next_xhp_class_name_or_other_token parser in
208 match (Token.kind token) with
209 (* TODO: Make these an error in Hack *)
210 | ExecutionStringLiteral
211 | DecimalLiteral
212 | OctalLiteral
213 | HexadecimalLiteral
214 | BinaryLiteral
215 | FloatingLiteral
216 | SingleQuotedStringLiteral
217 | NowdocStringLiteral
218 | DoubleQuotedStringLiteral
219 | BooleanLiteral
220 | NullLiteral -> (parser1, make_literal_expression (make_token token))
221 | HeredocStringLiteral ->
222 (* We have a heredoc string literal but it might contain embedded
223 expressions. Start over. *)
224 let (parser, token, name) = next_docstring_header parser in
225 parse_heredoc_string parser token name
226 | HeredocStringLiteralHead
227 | DoubleQuotedStringLiteralHead ->
228 parse_double_quoted_like_string
229 parser1 token Lexer.Literal_double_quoted
230 | ExecutionStringLiteralHead ->
231 parse_double_quoted_like_string
232 parser1 token Lexer.Literal_execution_string
233 | Variable -> parse_variable_or_lambda parser
234 | XHPClassName ->
235 parse_name_or_collection_literal_expression parser1 (make_token token)
236 | Name ->
237 let (parser1, qualified_name) =
238 scan_remaining_qualified_name parser1 (make_token token) in
239 parse_name_or_collection_literal_expression parser1 qualified_name
240 | Backslash ->
241 let (parser1, qualified_name) =
242 scan_qualified_name parser1 (make_token token) in
243 parse_name_or_collection_literal_expression parser1 qualified_name
244 | Self
245 | Parent -> parse_scope_resolution_or_name parser
246 | Static ->
247 parse_anon_or_awaitable_or_scope_resolution_or_name parser
248 | Yield -> parse_yield_expression parser
249 | Dollar -> parse_dollar_expression parser
250 | Suspend
251 (* TODO: The operand to a suspend is required to be a call to a
252 coroutine. Give an error in a later pass if this isn't the case. *)
253 | Exclamation
254 | PlusPlus
255 | MinusMinus
256 | Tilde
257 | Minus
258 | Plus
259 | Ampersand
260 | Await
261 | Clone
262 | Print
263 | At -> parse_prefix_unary_expression parser
264 | LeftParen -> parse_cast_or_parenthesized_or_lambda_expression parser
265 | LessThan -> parse_possible_xhp_expression ~consume_trailing_trivia:true parser
266 | List -> parse_list_expression parser
267 | New -> parse_object_creation_expression parser
268 | Array -> parse_array_intrinsic_expression parser
269 | Varray -> parse_varray_intrinsic_expression parser
270 | Vec -> parse_vector_intrinsic_expression parser
271 | Darray -> parse_darray_intrinsic_expression parser
272 | Dict -> parse_dictionary_intrinsic_expression parser
273 | Keyset -> parse_keyset_intrinsic_expression parser
274 | LeftBracket -> parse_array_creation_expression parser
275 | Tuple -> parse_tuple_expression parser
276 | Shape -> parse_shape_expression parser
277 | Function -> parse_anon parser
278 | DollarDollar ->
279 (parser1, make_pipe_variable_expression (make_token token))
280 | Async
281 | Coroutine -> parse_anon_or_lambda_or_awaitable parser
282 | Include
283 | Include_once
284 | Require
285 | Require_once -> parse_inclusion_expression parser
286 | Empty -> parse_empty_expression parser
287 | Isset -> parse_isset_expression parser
288 | Define -> parse_define_expression parser
289 | HaltCompiler -> parse_halt_compiler_expression parser
290 | Eval -> parse_eval_expression parser
291 | kind when Parser.expects parser kind ->
292 (* ERROR RECOVERY: if we've prematurely found a token we're expecting
293 * later, mark the expression missing, throw an error, and do not advance
294 * the parser. *)
295 let missing = make_missing parser in
296 let parser = with_error parser SyntaxError.error1015 in
297 (parser, missing)
298 | TokenKind.EndOfFile
299 | _ -> parse_as_name_or_error parser
301 and parse_empty_expression parser =
302 (* TODO: This is a PHP-ism. Open questions:
303 * Should we allow a trailing comma? it is not a function call and
304 never has more than one argument. See D4273242 for discussion.
305 * Is there any restriction on the kind of expression this can be?
306 * Should this be an error in strict mode?
307 * Should this be in the specification?
308 * Empty is case-insensitive; should use of non-lowercase be an error?
310 (* TODO: The original Hack and HHVM parsers accept "empty" as an
311 identifier, so we do too; consider whether it should be reserved. *)
312 let (parser1, keyword) = assert_token parser Empty in
313 if peek_token_kind parser1 = LeftParen then
314 let (parser, left) = assert_token parser1 LeftParen in
315 let (parser, arg) = parse_expression_with_reset_precedence parser in
316 let (parser, right) = require_right_paren parser in
317 let result = make_empty_expression keyword left arg right in
318 (parser, result)
319 else
320 parse_as_name_or_error parser
322 and parse_eval_expression parser =
323 (* TODO: This is a PHP-ism. Open questions:
324 * Should we allow a trailing comma? it is not a function call and
325 never has more than one argument. See D4273242 for discussion.
326 * Is there any restriction on the kind of expression this can be?
327 * Should this be an error in strict mode?
328 * Should this be in the specification?
329 * Eval is case-insensitive. Should use of non-lowercase be an error?
331 (* TODO: The original Hack and HHVM parsers accept "eval" as an
332 identifier, so we do too; consider whether it should be reserved. *)
333 let (parser1, keyword) = assert_token parser Eval in
334 if peek_token_kind parser1 = LeftParen then
335 let (parser, left) = assert_token parser1 LeftParen in
336 let (parser, arg) = parse_expression_with_reset_precedence parser in
337 let (parser, right) = require_right_paren parser in
338 let result = make_eval_expression keyword left arg right in
339 (parser, result)
340 else
341 parse_as_name_or_error parser
343 and parse_isset_expression parser =
344 (* TODO: This is a PHP-ism. Open questions:
345 * Should we allow a trailing comma? See D4273242 for discussion.
346 * Is there any restriction on the kind of expression the arguments can be?
347 * Should this be an error in strict mode?
348 * Should this be in the specification?
349 * PHP requires that there be at least one argument; should we require
350 that? if so, should we give the error in the parser or a later pass?
351 * Isset is case-insensitive. Should use of non-lowercase be an error?
353 (* TODO: The original Hack and HHVM parsers accept "isset" as an
354 identifier, so we do too; consider whether it should be reserved. *)
356 let (parser1, keyword) = assert_token parser Isset in
357 if peek_token_kind parser1 = LeftParen then
358 let (parser, left, args, right) = parse_expression_list_opt parser1 in
359 let result = make_isset_expression keyword left args right in
360 (parser, result)
361 else
362 parse_as_name_or_error parser
364 and parse_define_expression parser =
365 (* TODO: This is a PHP-ism. Open questions:
366 * Should we allow a trailing comma? See D4273242 for discussion.
367 * Is there any restriction on the kind of expression the arguments can be?
368 They must be string, value, bool, but do they have to be compile-time
369 constants, for instance?
370 * Should this be an error in strict mode? You should use const instead.
371 * Should this be in the specification?
372 * PHP requires that there be at least two arguments; should we require
373 that? if so, should we give the error in the parser or a later pass?
374 * is define case-insensitive?
376 (* TODO: The original Hack and HHVM parsers accept "define" as an
377 identifier, so we do too; consider whether it should be reserved. *)
378 let (parser1, keyword) = assert_token parser Define in
379 if peek_token_kind parser1 = LeftParen then
380 let (parser, left, args, right) = parse_expression_list_opt parser1 in
381 let result = make_define_expression keyword left args right in
382 (parser, result)
383 else
384 parse_as_name_or_error parser
386 and parse_halt_compiler_expression parser =
387 let (parser1, keyword) = assert_token parser HaltCompiler in
388 if peek_token_kind parser1 = LeftParen then
389 let (parser, left, args, right) = parse_expression_list_opt parser1 in
390 parser, make_halt_compiler_expression keyword left args right
391 else
392 let parser = with_error parser SyntaxError.error1019 in
393 parse_as_name_or_error parser
395 and parse_double_quoted_like_string parser head literal_kind =
396 parse_string_literal parser head literal_kind
398 and parse_heredoc_string parser head name =
399 parse_string_literal parser head (Lexer.Literal_heredoc name)
401 and parse_braced_expression_in_string parser =
403 We are parsing something like "abc{$x}def" or "abc${x}def", and we
404 are at the left brace.
406 We know that the left brace will not be preceded by trivia. However in the
407 second of the two cases mentioned above it is legal for there to be trivia
408 following the left brace. If we are in the first case, we've already
409 verified that there is no trailing trivia after the left brace.
411 The expression may be followed by arbitrary trivia, including
412 newlines and comments. That means that the closing brace may have
413 leading trivia. But under no circumstances does the closing brace have
414 trailing trivia.
416 It's an error for the closing brace to be missing.
418 Therefore we lex the left brace normally, parse the expression normally,
419 but require that there be a right brace. We do not lex the trailing trivia
420 on the right brace.
422 ERROR RECOVERY: If the right brace is missing, treat the remainder as
423 string text. *)
425 let (parser, left_brace) = assert_token parser LeftBrace in
426 let (parser1, name_or_keyword_as_name) = next_token_as_name parser in
427 let (parser1, right_brace) = next_token_no_trailing parser1 in
428 let (parser, expr, right_brace) =
429 match Token.kind name_or_keyword_as_name, Token.kind right_brace with
430 | Name, RightBrace ->
431 let expr = make_token name_or_keyword_as_name in
432 let right_brace = make_token right_brace in
433 parser1, expr, right_brace
434 | _, _ ->
435 let (parser, expr) = parse_expression_with_reset_precedence parser in
436 let (parser1, token) = next_token_no_trailing parser in
437 let (parser, right_brace) =
438 if (Token.kind token) = RightBrace then
439 (parser1, make_token token)
440 else
441 let missing = make_missing parser in
442 let parser = with_error parser SyntaxError.error1006 in
443 (parser, missing)
445 parser, expr, right_brace
447 let node = make_embedded_braced_expression left_brace expr right_brace in
448 (parser, node)
450 and parse_string_literal parser head literal_kind =
451 (* SPEC
453 Double-quoted string literals and heredoc string literals use basically
454 the same rules; here we have just the grammar for double-quoted string
455 literals.
457 string-variable::
458 variable-name offset-or-property-opt
460 offset-or-property::
461 offset-in-string
462 property-in-string
464 offset-in-string::
465 [ name ]
466 [ variable-name ]
467 [ integer-literal ]
469 property-in-string::
470 -> name
472 TODO: What about ?->
474 The actual situation is considerably more complex than indicated
475 in the specification.
477 TODO: Consider updating the specification.
479 * The tokens in the grammar above have no leading or trailing trivia.
481 * An embedded variable expression may also be enclosed in curly braces;
482 however, the $ of the variable expression must follow immediately after
483 the left brace.
485 * An embedded variable expression inside braces allows trivia between
486 the tokens and before the right brace.
488 * An embedded variable expression inside braces can be a much more complex
489 expression than indicated by the grammar above. For example,
490 {$c->x->y[0]} is good, and {$c[$x instanceof foo ? 0 : 1]} is good,
491 but {$c instanceof foo ? $x : $y} is not. It is not clear to me what
492 the legal grammar here is; it seems best in this situation to simply
493 parse any expression and do an error pass later.
495 * Note that the braced expressions can include double-quoted strings.
496 {$c["abc"]} is good, for instance.
498 * ${ is illegal in strict mode. In non-strict mode, ${varname is treated
499 the same as {$varname, and may be an arbitrary expression.
501 * TODO: We need to produce errors if there are unbalanced brackets,
502 example: "$x[0" is illegal.
504 * TODO: Similarly for any non-valid thing following the left bracket,
505 including trivia. example: "$x[ 0]" is illegal.
509 let merge token = function
510 (* TODO: Assert that new head has no leading trivia, old head has no
511 trailing trivia. *)
512 (* Invariant: A token inside a list of string fragments is always a head,
513 body or tail. *)
514 (* TODO: Is this invariant what we want? We could preserve the parse of
515 the string. That is, something like "a${b}c${d}e" is at present
516 represented as head, expr, body, expr, tail. It could be instead
517 head, dollar, left brace, expr, right brace, body, dollar, left
518 brace, expr, right brace, tail. Is that better?
520 TODO: Similarly we might want to preserve the structure of
521 heredoc strings in the parse: that there is a header consisting of
522 an identifier, and so on, and then body text, etc. *)
523 | Some head ->
524 let k = match (Token.kind head, Token.kind token) with
525 | (DoubleQuotedStringLiteralHead, DoubleQuotedStringLiteralTail) ->
526 DoubleQuotedStringLiteral
527 | (ExecutionStringLiteralHead, ExecutionStringLiteralTail) ->
528 ExecutionStringLiteral
529 | (HeredocStringLiteralHead, HeredocStringLiteralTail) ->
530 HeredocStringLiteral
531 | (DoubleQuotedStringLiteralHead, _) ->
532 DoubleQuotedStringLiteralHead
533 | (ExecutionStringLiteralHead, _) ->
534 ExecutionStringLiteralHead
535 | (HeredocStringLiteralHead, _) ->
536 HeredocStringLiteralHead
537 | (_, DoubleQuotedStringLiteralTail) ->
538 DoubleQuotedStringLiteralTail
539 | (_, HeredocStringLiteralTail) ->
540 HeredocStringLiteralTail
541 | (_, ExecutionStringLiteralTail) ->
542 ExecutionStringLiteralTail
543 | _ ->
544 StringLiteralBody
546 let s = Token.source_text head in
547 let o = Token.leading_start_offset head in
548 let w = (Token.width head) + (Token.width token) in
549 let l = Token.leading head in
550 let t = Token.trailing token in
551 (* TODO: Make a "position" type that is a tuple of source and offset. *)
552 Some (Token.make k s o w l t)
553 | None ->
554 let token = match Token.kind token with
555 | StringLiteralBody
556 | HeredocStringLiteralTail
557 | DoubleQuotedStringLiteralTail
558 | ExecutionStringLiteralTail ->
559 token
560 | _ ->
561 Token.with_kind token StringLiteralBody
563 Some token
566 let put_opt head acc =
567 Option.value_map ~default:acc ~f:(fun h -> make_token h :: acc) head
570 let parse_embedded_expression parser token =
571 let var_expr = make_variable_expression (make_token token) in
572 let (parser1, token1) = next_token_in_string parser literal_kind in
573 let (parser2, token2) = next_token_in_string parser1 literal_kind in
574 let (parser3, token3) = next_token_in_string parser2 literal_kind in
575 match (Token.kind token1, Token.kind token2, Token.kind token3) with
576 | (MinusGreaterThan, Name, _) ->
577 let expr = make_embedded_member_selection_expression var_expr
578 (make_token token1) (make_token token2) in
579 (parser2, expr)
580 | (LeftBracket, Name, RightBracket) ->
581 let expr = make_embedded_subscript_expression var_expr
582 (make_token token1)
583 (make_token token2)
584 (make_token token3) in
585 (parser3, expr)
586 | (LeftBracket, Variable, RightBracket) ->
587 let expr = make_embedded_subscript_expression var_expr
588 (make_token token1) (make_variable_expression (make_token token2))
589 (make_token token3) in
590 (parser3, expr)
591 | (LeftBracket, DecimalLiteral, RightBracket)
592 | (LeftBracket, OctalLiteral, RightBracket)
593 | (LeftBracket, HexadecimalLiteral, RightBracket)
594 | (LeftBracket, BinaryLiteral, RightBracket) ->
595 let expr = make_embedded_subscript_expression var_expr
596 (make_token token1) (make_literal_expression (make_token token2))
597 (make_token token3) in
598 (parser3, expr)
599 | _ -> (parser, var_expr)
602 let rec handle_left_brace parser head acc =
603 (* Note that here we use next_token_in_string because we need to know
604 whether there is trivia between the left brace and the $x which follows.*)
605 let (parser1, left_brace) = next_token_in_string parser literal_kind in
606 let (_, token) = next_token_in_string parser1 literal_kind in
607 (* TODO: What about "{$$}" ? *)
608 match Token.kind token with
609 | Variable ->
610 (* Parse any expression followed by a close brace.
611 TODO: We do not actually support all possible expressions;
612 see above. Do we want to (1) catch this at parse time,
613 (2) catch it in a later pass, or (3) just allow any
614 expression here? *)
615 let (parser, expr) = parse_braced_expression_in_string parser in
616 aux parser None (expr :: (put_opt head acc))
617 | _ ->
618 (* We do not support {$ inside a string unless the $ begins a
619 variable name. Append the { and start again on the $. *)
620 (* TODO: Is this right? Suppose we have "{${x}". Is that the same
621 as "{"."${x}" ? Double check this. *)
622 (* TODO: Give an error. *)
623 (* We got a { not followed by a $. Ignore it. *)
624 (* TODO: Give a warning? *)
625 aux parser1 (merge left_brace head) acc
627 and handle_dollar parser dollar head acc =
628 (* We need to parse ${x} as though it was {$x} *)
629 (* TODO: This should be an error in strict mode. *)
630 (* We must not have trivia between the $ and the {, but we can have
631 trivia after the {. That's why we use next_token_in_string here. *)
632 let (_, token) = next_token_in_string parser literal_kind in
633 match Token.kind token with
634 | LeftBrace ->
635 (* The thing in the braces has to be an expression that begins
636 with a variable, and the variable does *not* begin with a $. It's
637 just the word.
639 Unlike the {$var} case, there *can* be trivia before the expression,
640 which means that trivia is likely the trailing trivia of the brace,
641 not leading trivia of the expression. *)
642 (* TODO: Enforce these rules by producing an error if they are
643 violated. *)
644 (* TODO: Make the parse tree for the leading word in the expression
645 a variable expression, not a qualified name expression. *)
647 let (parser, expr) = parse_braced_expression_in_string parser in
648 aux parser None (expr :: (make_token dollar) :: (put_opt head acc))
649 | _ ->
650 (* We got a $ not followed by a { or variable name. Ignore it. *)
651 (* TODO: Give a warning? *)
652 aux parser (merge dollar head) acc
654 and aux parser head acc =
655 let (parser1, token) = next_token_in_string parser literal_kind in
656 match Token.kind token with
657 | HeredocStringLiteralTail
658 | DoubleQuotedStringLiteralTail
659 | ExecutionStringLiteralTail ->
660 parser1, (put_opt (merge token head) acc)
661 | LeftBrace ->
662 handle_left_brace parser head acc
663 | Variable ->
664 let (parser, expr) = parse_embedded_expression parser1 token in
665 aux parser None (expr :: (put_opt head acc))
666 | Dollar ->
667 handle_dollar parser1 token head acc
668 | _ ->
669 aux parser1 (merge token head) acc
672 let (parser, results) = aux parser (Some head) [] in
673 (* If we've ended up with a single string literal with no internal
674 structure, do not represent that as a list with one item. *)
675 let results = match results with
676 | h :: [] -> h
677 | _ -> make_list parser (List.rev results) in
678 let result = make_literal_expression results in
679 (parser, result)
681 and parse_inclusion_expression parser =
682 (* SPEC:
683 inclusion-directive:
684 require-multiple-directive
685 require-once-directive
687 require-multiple-directive:
688 require include-filename ;
690 include-filename:
691 expression
693 require-once-directive:
694 require_once include-filename ;
696 In non-strict mode we allow an inclusion directive (without semi) to be
697 used as an expression. It is therefore easier to actually parse this as:
699 inclusion-directive:
700 inclusion-expression ;
702 inclusion-expression:
703 require include-filename
704 require_once include-filename
706 TODO: We allow "include" and "include_once" as well, which are PHP-isms
707 specified as not supported in Hack. Do we need to produce an error in
708 strict mode?
710 TODO: Produce an error if this is used in an expression context
711 in strict mode.
714 let (parser, require) = next_token parser in
715 let operator = Operator.prefix_unary_from_token (Token.kind require) in
716 let require = make_token require in
717 let (parser, filename) = parse_expression_with_operator_precedence
718 parser operator in
719 let result = make_inclusion_expression require filename in
720 (parser, result)
722 and peek_next_kind_if_operator parser =
723 let kind = peek_token_kind parser in
724 if Operator.is_trailing_operator_token kind then
725 Some kind
726 else
727 None
729 and operator_has_lower_precedence operator_kind parser =
730 let operator = Operator.trailing_from_token operator_kind in
731 (Operator.precedence operator) < parser.precedence
733 and next_is_lower_precedence parser =
734 match peek_next_kind_if_operator parser with
735 | None -> true
736 | Some kind -> operator_has_lower_precedence kind parser
738 and parse_remaining_expression_or_specified_function_call parser term
739 prefix_kind =
740 let (parser1, (type_arguments, no_arg_is_missing)) =
741 parse_generic_type_arguments_opt parser
743 if no_arg_is_missing
744 && is_type_arguments type_arguments
745 && parser.errors = parser1.errors
746 then
747 let parser, result =
748 begin match peek_token_kind parser1 with
749 | ColonColon ->
750 (* handle a<type-args>::... case *)
751 let type_specifier =
752 make_generic_type_specifier term type_arguments in
753 parse_scope_resolution_expression parser1 type_specifier
754 | _ ->
755 let (parser, left, args, right) = parse_expression_list_opt parser1 in
756 parser, make_function_call_with_type_arguments_expression
757 term type_arguments left args right
758 end in
759 parse_remaining_expression parser result
760 else
761 parse_remaining_binary_expression parser term prefix_kind
763 (* Checks if given expression is a PHP variable.
764 per PHP grammar:
765 https://github.com/php/php-langspec/blob/master/spec/10-expressions.md#grammar-variable
766 A variable is an expression that can in principle be used as an lvalue *)
767 and can_be_used_as_lvalue parser t =
768 if is_variable_expression t
769 || is_subscript_expression t
770 || is_member_selection_expression t
771 || is_scope_resolution_expression t
772 then true
773 else prefix_unary_expression_checker_helper parser t Dollar
775 (* Checks if given node is prefix unary expression and verifies operator kind.
776 Recursively run can_be_used_as_lvalue *)
777 and prefix_unary_expression_checker_helper parser t kind =
778 match find_in_prefix_unary_expression_stack parser t with
779 | Some { operator_kind; operand; _ } ->
780 if operator_kind = kind then
781 can_be_used_as_lvalue parser operand
782 else
783 false
784 | None -> false
786 (* checks if expression is a valid right hand side in by-ref assignment
787 which is '&'PHP variable *)
788 and is_byref_assignment_source parser t =
789 prefix_unary_expression_checker_helper parser t Ampersand
791 (*detects if left_term and operator can be treated as a beginning of
792 assignment (respecting the precedence of operator on the left of
793 left term). Returns
794 - Prefix_none - either operator is not one of assignment operators or
795 precedence of the operator on the left is higher than precedence of
796 assignment.
797 - Prefix_assignment - left_term and operator can be interpreted as a
798 prefix of assignment
799 - Prefix_byref_assignment - left_term and operator can be interpreted as a
800 prefix of byref assignment.*)
801 and check_if_parsable_as_assignment parser left_term operator left_precedence
803 (* in PHP precedence of assignment in expression is bumped up to
804 recognize cases like !$x = ... or $a == $b || $c = ...
805 which should be parsed as !($x = ...) and $a == $b || ($c = ...)
807 if left_precedence >= Operator.precedence_for_assignment_in_expressions then
808 Prefix_none
809 else match operator with
810 | Equal when can_be_used_as_lvalue parser left_term ->
811 Prefix_byref_assignment
812 | Equal when is_list_expression left_term -> Prefix_assignment
813 | PlusEqual | MinusEqual | StarEqual | SlashEqual |
814 StarStarEqual | DotEqual | PercentEqual | AmpersandEqual |
815 BarEqual | CaratEqual | LessThanLessThanEqual |
816 GreaterThanGreaterThanEqual
817 when can_be_used_as_lvalue parser left_term ->
818 Prefix_assignment
819 | _ -> Prefix_none
821 and can_term_take_type_args term =
822 is_name term
823 || is_qualified_name term
824 || is_member_selection_expression term
825 || is_safe_member_selection_expression term
826 || is_scope_resolution_expression term
828 and parse_remaining_expression parser term =
829 match peek_next_kind_if_operator parser with
830 | None -> (parser, term)
831 | Some token ->
832 let assignment_prefix_kind =
833 check_if_parsable_as_assignment parser term token parser.precedence
835 (* stop parsing expression if:
836 - precedence of the operator is less than precedence of the operator
837 on the left
839 - <term> <operator> does not look like a prefix of
840 some assignment expression*)
841 if operator_has_lower_precedence token parser &&
842 assignment_prefix_kind = Prefix_none then (parser, term)
843 else match token with
844 (* Binary operators *)
845 (* TODO Add an error if PHP and / or / xor are used in Hack. *)
846 (* TODO Add an error if PHP style <> is used in Hack. *)
847 | LessThan when can_term_take_type_args term ->
848 parse_remaining_expression_or_specified_function_call parser term
849 assignment_prefix_kind
850 | And
851 | Or
852 | Xor
853 | Plus
854 | Minus
855 | Star
856 | Slash
857 | StarStar
858 | Equal
859 | BarEqual
860 | PlusEqual
861 | StarEqual
862 | StarStarEqual
863 | SlashEqual
864 | DotEqual
865 | MinusEqual
866 | PercentEqual
867 | CaratEqual
868 | AmpersandEqual
869 | LessThanLessThanEqual
870 | GreaterThanGreaterThanEqual
871 | EqualEqualEqual
872 | LessThan
873 | GreaterThan
874 | Percent
875 | Dot
876 | EqualEqual
877 | AmpersandAmpersand
878 | BarBar
879 | ExclamationEqual
880 | LessThanGreaterThan
881 | ExclamationEqualEqual
882 | LessThanEqual
883 | LessThanEqualGreaterThan
884 | GreaterThanEqual
885 | Ampersand
886 | Bar
887 | LessThanLessThan
888 | GreaterThanGreaterThan
889 | Carat
890 | BarGreaterThan
891 | QuestionQuestion ->
892 parse_remaining_binary_expression parser term assignment_prefix_kind
893 | Instanceof ->
894 parse_instanceof_expression parser term
895 | Is ->
896 parse_is_expression parser term
897 | QuestionMinusGreaterThan
898 | MinusGreaterThan ->
899 let (parser, result) = parse_member_selection_expression parser term in
900 parse_remaining_expression parser result
901 | ColonColon ->
902 let (parser, result) = parse_scope_resolution_expression parser term in
903 parse_remaining_expression parser result
904 | PlusPlus
905 | MinusMinus -> parse_postfix_unary parser term
906 | LeftParen -> parse_function_call parser term
907 | LeftBracket
908 | LeftBrace -> parse_subscript parser term
909 | Question ->
910 let (parser, token) = assert_token parser Question in
911 let (parser, result) = parse_conditional_expression parser term token in
912 parse_remaining_expression parser result
913 | QuestionColon ->
914 parse_remaining_binary_expression parser term assignment_prefix_kind
915 | _ -> (parser, term)
917 and parse_member_selection_expression parser term =
918 (* SPEC:
919 member-selection-expression:
920 postfix-expression -> name
921 postfix-expression -> variable-name
922 postfix-expression -> xhp-class-name (DRAFT XHP SPEC)
924 null-safe-member-selection-expression:
925 postfix-expression ?-> name
926 postfix-expression ?-> variable-name
927 postfix-expression ?-> xhp-class-name (DRAFT XHP SPEC)
929 PHP allows $a->{$b}; to be more compatible with PHP, and give
930 good errors, we allow that here as well.
932 TODO: Produce an error if the braced syntax is used in Hack.
935 let (parser, token) = next_token parser in
936 let op = make_token token in
937 (* TODO: We are putting the name / variable into the tree as a token
938 leaf, rather than as a name or variable expression. Is that right? *)
939 let (parser, name) =
940 match peek_token_kind parser with
941 | LeftBrace ->
942 parse_braced_expression parser
943 | Variable when Env.php5_compat_mode (env parser) ->
944 parse_variable_in_php5_compat_mode parser
945 | Dollar ->
946 parse_dollar_expression parser
947 | _ ->
948 require_xhp_class_name_or_name_or_variable parser in
949 let result = if (Token.kind token) = MinusGreaterThan then
950 make_member_selection_expression term op name
951 else
952 make_safe_member_selection_expression term op name in
953 (parser, result)
955 and parse_variable_in_php5_compat_mode parser =
956 (* PHP7 had a breaking change in parsing variables:
957 (https://wiki.php.net/rfc/uniform_variable_syntax).
958 Hack parser by default uses PHP7 compatible more which interprets
959 variables accesses left-to-right. It usually matches PHP5 behavior
960 except for cases with '$' operator, member accesses and scope resolution
961 operators:
962 $$a[1][2] -> ($$a)[1][2]
963 $a->$b[c] -> ($a->$b)[c]
964 X::$a[b]() -> (X::$a)[b]()
966 In order to preserve backward compatibility we can parse
967 variable/subscript expressions and treat them as if
968 braced expressions to enfore PHP5 semantics
969 $$a[1][2] -> ${$a[1][2]}
970 $a->$b[c] -> $a->{$b[c]}
971 X::$a[b]() -> X::{$a[b]}()
973 let parser1, e =
974 let precedence = Operator.precedence Operator.IndexingOperator in
975 parse_expression (with_precedence parser precedence) in
976 let parser1 = with_precedence parser1 parser.precedence in
977 parser1, e
979 and parse_subscript parser term =
980 (* SPEC
981 subscript-expression:
982 postfix-expression [ expression-opt ]
983 postfix-expression { expression-opt } [Deprecated form]
985 (* TODO: Produce an error for brace case in a later pass *)
986 let (parser, left) = next_token parser in
987 let (parser1, right) = next_token parser in
988 match (Token.kind left, Token.kind right) with
989 | (LeftBracket, RightBracket)
990 | (LeftBrace, RightBrace) ->
991 let left = make_token left in
992 let index = make_missing parser in
993 let right = make_token right in
994 let result = make_subscript_expression term left index right in
995 parse_remaining_expression parser1 result
996 | _ ->
997 begin
998 let (parser, index) = with_reset_precedence parser parse_expression in
999 let (parser, right) = match Token.kind left with
1000 | LeftBracket -> require_right_bracket parser
1001 | _ -> require_right_brace parser in
1002 let left = make_token left in
1003 let result = make_subscript_expression term left index right in
1004 parse_remaining_expression parser result
1007 and parse_expression_list_opt parser =
1008 (* SPEC
1010 TODO: This business of allowing ... does not appear in the spec. Add it.
1012 TODO: Add call-convention-opt to the specification.
1013 (This work is tracked by task T22582676.)
1015 TODO: Update grammar for inout parameters.
1016 (This work is tracked by task T22582715.)
1018 ERROR RECOVERY: A ... expression can only appear at the end of a
1019 formal parameter list. However, we parse it everywhere without error,
1020 and detect the error in a later pass.
1022 Note that it *is* legal for a ... expression be followed by a trailing
1023 comma, even though it is not legal for such in a formal parameter list.
1025 TODO: Can *any* expression appear after the ... ?
1027 argument-expression-list:
1028 argument-expressions ,-opt
1029 argument-expressions:
1030 expression
1031 ... expression
1032 call-convention-opt expression
1033 argument-expressions , expression
1035 (* This function parses the parens as well. *)
1036 let f parser =
1037 with_reset_precedence parser parse_decorated_expression_opt in
1038 parse_parenthesized_comma_list_opt_allow_trailing parser f
1040 and parse_decorated_expression_opt parser =
1041 match peek_token_kind parser with
1042 | DotDotDot
1043 | Inout ->
1044 let (parser, decorator) = next_token parser in
1045 let (parser, expr) = parse_expression parser in
1046 let decorator = make_token decorator in
1047 parser, make_decorated_expression decorator expr
1048 | _ -> parse_expression parser
1050 and parse_start_of_type_specifier parser start_token =
1051 let (parser, name) =
1052 if Token.kind start_token = Backslash
1053 then scan_qualified_name parser (make_token start_token)
1054 else scan_remaining_qualified_name parser (make_token start_token) in
1055 match peek_token_kind parser with
1056 | LeftParen | LessThan -> Some (parser, name)
1057 | _ -> None
1059 and parse_designator parser =
1060 (* SPEC:
1061 class-type-designator:
1062 parent
1063 self
1064 static
1065 member-selection-expression
1066 null-safe-member-selection-expression
1067 qualified-name
1068 scope-resolution-expression
1069 subscript-expression
1070 variable-name
1072 TODO: Update the spec to allow qualified-name < type arguments >
1073 TODO: This will need to be fixed to allow situations where the qualified name
1074 is also a non-reserved token.
1076 let default parser =
1077 parse_expression_with_operator_precedence parser Operator.NewOperator in
1078 let (parser1, token) = next_token parser in
1079 match Token.kind token with
1080 | Parent
1081 | Self ->
1082 begin match peek_token_kind parser1 with
1083 | LeftParen -> (parser1, make_token token)
1084 | LessThan ->
1085 let (parser1, (type_arguments, no_arg_is_missing)) =
1086 parse_generic_type_arguments_opt parser1
1088 if no_arg_is_missing
1089 && is_type_arguments type_arguments
1090 && parser.errors = parser1.errors
1091 then
1092 let type_specifier =
1093 make_generic_type_specifier (make_token token) type_arguments in
1094 parser1, type_specifier
1095 else
1096 default parser
1097 | _ ->
1098 default parser
1100 | Static when peek_token_kind parser1 = LeftParen ->
1101 (parser1, make_token token)
1102 | Name
1103 | Backslash ->
1104 begin match parse_start_of_type_specifier parser1 token with
1105 | Some (parser, name) ->
1106 (* We want to parse new C() and new C<int>() as types, but
1107 new C::$x() as an expression. *)
1108 with_type_parser parser (TypeParser.parse_remaining_type_specifier name)
1109 | None ->
1110 default parser
1112 | _ ->
1113 default parser
1114 (* TODO: We need to verify in a later pass that the expression is a
1115 scope resolution (that does not end in class!), a member selection,
1116 a name, a variable, a property, or an array subscript expression. *)
1118 and parse_object_creation_expression parser =
1119 (* SPEC
1120 object-creation-expression:
1121 new object-creation-what
1123 let (parser, new_token) = assert_token parser New in
1124 let (parser, new_what) =
1125 let (parser1, token) = next_token parser in
1126 begin match Token.kind token with
1127 | Class -> parse_anonymous_class token parser1
1128 | _ -> parse_constructor_call parser
1129 end in
1130 let result = make_object_creation_expression new_token new_what in
1131 (parser, result)
1133 and parse_anonymous_class class_token parser =
1134 let class_token = make_token class_token in
1135 let (parser, left, args, right) =
1136 if peek_token_kind parser = LeftParen
1137 then parse_expression_list_opt parser
1138 else
1139 let missing1 = make_missing parser in
1140 let missing2 = make_missing parser in
1141 let missing3 = make_missing parser in
1142 (parser, missing1, missing2, missing3)
1144 let parser
1145 , ( classish_extends
1146 , classish_extends_list
1147 , classish_implements
1148 , classish_implements_list
1149 , body
1151 = with_decl_parser parser
1152 (fun decl_parser ->
1153 let (decl_parser, classish_extends, classish_extends_list) =
1154 DeclParser.parse_classish_extends_opt decl_parser in
1155 let (decl_parser, classish_implements, classish_implements_list) =
1156 DeclParser.parse_classish_implements_opt decl_parser in
1157 let (decl_parser, body) = DeclParser.parse_classish_body decl_parser in
1158 decl_parser
1159 , ( classish_extends
1160 , classish_extends_list
1161 , classish_implements
1162 , classish_implements_list
1163 , body
1167 let result = make_anonymous_class class_token left args right
1168 classish_extends classish_extends_list classish_implements
1169 classish_implements_list body in
1170 (parser, result)
1172 and parse_constructor_call parser =
1173 (* SPEC
1174 constructor-call:
1175 class-type-designator ( argument-expression-list-opt )
1177 (* PHP allows the entire expression list to be omitted. *)
1178 (* TODO: SPEC ERROR: PHP allows the entire expression list to be omitted,
1179 * but Hack disallows this behavior. (See SyntaxError.error2038.) However,
1180 * the Hack spec still states that the argument expression list is optional.
1181 * Update the spec to say that the argument expression list is required. *)
1182 let (parser, designator) = parse_designator parser in
1183 let (parser, left, args, right) =
1184 if peek_token_kind parser = LeftParen then
1185 parse_expression_list_opt parser
1186 else
1187 let missing1 = make_missing parser in
1188 let missing2 = make_missing parser in
1189 let missing3 = make_missing parser in
1190 (parser, missing1, missing2, missing3)
1192 let result =
1193 make_constructor_call designator left args right in
1194 (parser, result)
1196 and parse_function_call parser receiver =
1197 (* SPEC
1198 function-call-expression:
1199 postfix-expression ( argument-expression-list-opt )
1201 let (parser, left, args, right) = parse_expression_list_opt parser in
1202 let result = make_function_call_expression receiver left args right in
1203 parse_remaining_expression parser result
1205 and parse_variable_or_lambda parser =
1206 let (parser1, variable) = assert_token parser Variable in
1207 if peek_token_kind parser1 = EqualEqualGreaterThan then
1208 parse_lambda_expression parser
1209 else
1210 (parser1, make_variable_expression variable)
1212 and parse_yield_expression parser =
1213 (* SPEC:
1214 yield array-element-initializer
1215 TODO: Hack allows "yield break".
1216 TODO: Should this be its own production, or can it be a yield expression?
1217 TODO: Is this an expression or a statement?
1218 TODO: Add it to the specification.
1220 let parser, yield_kw = assert_token parser Yield in
1221 match peek_token_kind parser with
1222 | From ->
1223 let parser, from_kw = assert_token parser From in
1224 let parser, operand = parse_expression parser in
1225 parser, make_yield_from_expression yield_kw from_kw operand
1226 | Break ->
1227 let parser, break_kw = assert_token parser Break in
1228 parser, make_yield_expression yield_kw break_kw
1229 | Semicolon ->
1230 let missing = make_missing parser in
1231 let yield_expr = make_yield_expression yield_kw missing in
1232 parser, yield_expr
1233 | _ ->
1234 let parser, operand = parse_array_element_init parser in
1235 parser, make_yield_expression yield_kw operand
1237 and parse_cast_or_parenthesized_or_lambda_expression parser =
1238 (* We need to disambiguate between casts, lambdas and ordinary
1239 parenthesized expressions. *)
1240 match possible_cast_expression parser with
1241 | Some (parser, left, cast_type, right) ->
1242 let (parser, operand) = parse_expression_with_operator_precedence
1243 parser Operator.CastOperator in
1244 let result = make_cast_expression left cast_type right operand in
1245 (parser, result)
1246 | _ -> begin
1247 match possible_lambda_expression parser with
1248 | Some (parser, signature) ->
1249 parse_lambda_expression_after_signature parser signature
1250 | None ->
1251 parse_parenthesized_expression parser
1254 and token_implies_cast kind =
1255 (* See comments below. *)
1256 match kind with
1257 (* Keywords that imply cast *)
1258 | Abstract
1259 | Array
1260 | Arraykey
1261 | Async
1262 | TokenKind.Attribute
1263 | Await
1264 | Bool
1265 | Break
1266 | Case
1267 | Catch
1268 | Category
1269 | Children
1270 | Class
1271 | Classname
1272 | Clone
1273 | Const
1274 | Construct
1275 | Continue
1276 | Coroutine
1277 | Darray
1278 | Dict
1279 | Default
1280 | Define
1281 | HaltCompiler
1282 | Declare
1283 | Destruct
1284 | Do
1285 | Double
1286 | Echo
1287 | Else
1288 | Elseif
1289 | Empty
1290 | Endfor
1291 | Endforeach
1292 | Endif
1293 | Endwhile
1294 | Enum
1295 | Eval
1296 | Extends
1297 | Fallthrough
1298 | Float
1299 | Final
1300 | Finally
1301 | For
1302 | Foreach
1303 | From
1304 | Function
1305 | Global
1306 | Goto
1307 | If
1308 | Implements
1309 | Include
1310 | Include_once
1311 | Inout
1312 | Insteadof
1313 | Int
1314 | Interface
1315 | Isset
1316 | Keyset
1317 | List
1318 | Mixed
1319 | Namespace
1320 | New
1321 | Newtype
1322 | Noreturn
1323 | Num
1324 | Object
1325 | Parent
1326 | Print
1327 | Private
1328 | Protected
1329 | Public
1330 | Require
1331 | Require_once
1332 | Required
1333 | Resource
1334 | Return
1335 | Self
1336 | Shape
1337 | Static
1338 | String
1339 | Super
1340 | Suspend
1341 | Switch
1342 | This
1343 | Throw
1344 | Trait
1345 | Try
1346 | Tuple
1347 | Type
1348 | Unset
1349 | Use
1350 | Using
1351 | Var
1352 | Varray
1353 | Vec
1354 | Void
1355 | Where
1356 | While
1357 | Yield -> true
1358 (* Names that imply cast *)
1359 | Name
1360 | Backslash
1361 | Variable -> true
1362 (* Symbols that imply cast *)
1363 | At
1364 | DollarDollar
1365 | Exclamation
1366 | LeftParen
1367 | Minus
1368 | MinusMinus
1369 | Dollar
1370 | Plus
1371 | PlusPlus
1372 | Tilde -> true
1373 (* Literals that imply cast *)
1374 | BinaryLiteral
1375 | BooleanLiteral
1376 | DecimalLiteral
1377 | DoubleQuotedStringLiteral
1378 | DoubleQuotedStringLiteralHead
1379 | StringLiteralBody
1380 | DoubleQuotedStringLiteralTail
1381 | ExecutionStringLiteral
1382 | ExecutionStringLiteralHead
1383 | ExecutionStringLiteralTail
1384 | FloatingLiteral
1385 | HeredocStringLiteral
1386 | HeredocStringLiteralHead
1387 | HeredocStringLiteralTail
1388 | HexadecimalLiteral
1389 | NowdocStringLiteral
1390 | NullLiteral
1391 | OctalLiteral
1392 | SingleQuotedStringLiteral -> true
1393 (* Keywords that imply parenthesized expression *)
1394 | And
1395 | As
1396 | Instanceof
1397 | Is
1398 | Or
1399 | Xor -> false
1400 (* Symbols that imply parenthesized expression *)
1401 | Ampersand
1402 | AmpersandAmpersand
1403 | AmpersandEqual
1404 | Bar
1405 | BarBar
1406 | BarEqual
1407 | BarGreaterThan
1408 | Carat
1409 | CaratEqual
1410 | Colon
1411 | ColonColon
1412 | Comma
1413 | Dot
1414 | DotEqual
1415 | DotDotDot
1416 | Equal
1417 | EqualEqual
1418 | EqualEqualEqual
1419 | EqualEqualGreaterThan
1420 | EqualGreaterThan
1421 | ExclamationEqual
1422 | LessThanGreaterThan
1423 | ExclamationEqualEqual
1424 | GreaterThan
1425 | GreaterThanEqual
1426 | GreaterThanGreaterThan
1427 | GreaterThanGreaterThanEqual
1428 | LessThanLessThanEqual
1429 | MinusEqual
1430 | MinusGreaterThan
1431 | Question
1432 | QuestionMinusGreaterThan
1433 | QuestionQuestion
1434 | QuestionColon
1435 | RightBrace
1436 | RightBracket
1437 | RightParen
1438 | LeftBrace
1439 | LeftBracket
1440 | LessThan
1441 | LessThanEqual
1442 | LessThanEqualGreaterThan
1443 | LessThanLessThan
1444 | Percent
1445 | PercentEqual
1446 | PlusEqual
1447 | Semicolon
1448 | Slash
1449 | SlashEqual
1450 | SlashGreaterThan
1451 | Star
1452 | StarEqual
1453 | StarStar
1454 | StarStarEqual -> false
1455 (* Misc *)
1456 | Markup
1457 | LessThanQuestion
1458 | QuestionGreaterThan
1459 | ErrorToken
1460 | TokenKind.EndOfFile -> false
1461 (* TODO: Sort out rules for interactions between casts and XHP. *)
1462 | LessThanSlash
1463 | XHPCategoryName
1464 | XHPElementName
1465 | XHPClassName
1466 | XHPStringLiteral
1467 | XHPBody
1468 | XHPComment -> false
1470 and possible_cast_expression parser =
1471 (* SPEC:
1472 cast-expression:
1473 ( cast-type ) unary-expression
1474 cast-type:
1475 array, bool, double, float, int, object, string, unset or a name
1477 TODO: This implies that a cast "(name)" can only be a simple name, but
1478 I would expect that (\Foo\Bar), (:foo), (array<int>), and the like
1479 should also be legal casts. If we implement that then we will need
1480 a sophisticated heuristic to determine whether this is a cast or a
1481 parenthesized expression.
1483 The cast expression introduces an ambiguity: (x)-y could be a
1484 subtraction or a cast on top of a unary minus. We resolve this
1485 ambiguity as follows:
1487 * If the thing in parens is one of the keywords mentioned above, then
1488 it's a cast.
1489 * If the token which follows (x) is "as" or "instanceof" then
1490 it's a parenthesized expression.
1491 * PHP-ism extension: if the token is "and", "or" or "xor", then it's a
1492 parenthesized expression.
1493 * Otherwise, if the token which follows (x) is $$, @, ~, !, (, +, -,
1494 any name, qualified name, variable name, literal, or keyword then
1495 it's a cast.
1496 * Otherwise, it's a parenthesized expression. *)
1498 let (parser, left_paren) = assert_token parser LeftParen in
1499 let (parser, type_token) = next_token parser in
1500 let type_token_kind = Token.kind type_token in
1501 let (parser, right_paren) = next_token parser in
1502 let is_easy_cast_type_or_at_least_name =
1503 match type_token_kind with
1504 | Array | Bool | Double | Float | Int | Object | String | Unset -> Some true
1505 | Name -> Some false
1506 | _ -> None in
1507 let is_cast = Token.kind right_paren = RightParen &&
1508 Option.value_map ~default:false is_easy_cast_type_or_at_least_name
1509 ~f:(fun b -> b || token_implies_cast (peek_token_kind parser)) in
1510 if is_cast then
1511 Some (parser, left_paren, make_token type_token, make_token right_paren)
1512 else
1513 None
1515 and possible_lambda_expression parser =
1516 (* We have a left paren in hand and we already know we're not in a cast.
1517 We need to know whether this is a parenthesized expression or the
1518 signature of a lambda.
1520 There are a number of difficulties. For example, we cannot simply
1521 check to see if a colon follows the expression:
1523 $a = $b ? ($x) : ($y) ($x) is parenthesized expression
1524 $a = $b ? ($x) : int ==> 1 : ($y) ($x) is lambda signature
1526 ERROR RECOVERY:
1528 What we'll do here is simply attempt to parse a lambda formal parameter
1529 list. If we manage to do so *without error*, and the thing which follows
1530 is ==>, then this is definitely a lambda. If those conditions are not
1531 met then we assume we have a parenthesized expression in hand.
1533 TODO: There could be situations where we have good evidence that a
1534 lambda is intended but these conditions are not met. Consider
1535 a more sophisticated recovery strategy. For example, if we have
1536 (x)==> then odds are pretty good that a lambda was intended and the
1537 error should say that ($x)==> was expected.
1539 let signature_result = parse_if_no_error parser parse_lambda_signature in
1540 match signature_result with
1541 | Some (parser, _) when (peek_token_kind parser) = EqualEqualGreaterThan ->
1542 signature_result
1543 | _ -> None
1545 and parse_lambda_expression parser =
1546 (* SPEC
1547 lambda-expression:
1548 async-opt lambda-function-signature ==> lambda-body
1550 let (parser, async) = optional_token parser Async in
1551 let (parser, coroutine) = optional_token parser Coroutine in
1552 let (parser, signature) = parse_lambda_signature parser in
1553 let (parser, arrow) = require_lambda_arrow parser in
1554 let (parser, body) = parse_lambda_body parser in
1555 let result = make_lambda_expression async coroutine signature arrow body in
1556 (parser, result)
1558 and parse_lambda_expression_after_signature parser signature =
1559 (* We had a signature with no async or coroutine, and we disambiguated it
1560 from a cast. *)
1561 let async = make_missing parser in
1562 let coroutine = make_missing parser in
1563 let (parser, arrow) = require_lambda_arrow parser in
1564 let (parser, body) = parse_lambda_body parser in
1565 let result = make_lambda_expression async coroutine signature arrow body in
1566 (parser, result)
1568 and parse_lambda_signature parser =
1569 (* SPEC:
1570 lambda-function-signature:
1571 variable-name
1572 ( anonymous-function-parameter-declaration-list-opt ) /
1573 anonymous-function-return-opt
1575 let (parser1, token) = next_token parser in
1576 if Token.kind token = Variable then
1577 (parser1, make_token token)
1578 else
1579 let (parser, left, params, right) = parse_parameter_list_opt parser in
1580 let (parser, colon, return_type) = parse_optional_return parser in
1581 let result = make_lambda_signature left params right colon return_type in
1582 (parser, result)
1584 and parse_lambda_body parser =
1585 (* SPEC:
1586 lambda-body:
1587 expression
1588 compound-statement
1590 if peek_token_kind parser = LeftBrace then
1591 parse_compound_statement parser
1592 else
1593 with_reset_precedence parser parse_expression
1595 and parse_parenthesized_expression parser =
1596 let (parser, left_paren) = assert_token parser LeftParen in
1597 let (parser, expression) = with_reset_precedence parser parse_expression in
1598 let (parser, right_paren) = require_right_paren parser in
1599 let syntax =
1600 make_parenthesized_expression left_paren expression right_paren in
1601 (parser, syntax)
1603 and parse_postfix_unary parser term =
1604 let (parser, token) = next_token parser in
1605 let term = make_postfix_unary_expression term (make_token token) in
1606 parse_remaining_expression parser term
1608 and parse_prefix_unary_expression parser =
1609 (* TODO: Operand to ++ and -- must be an lvalue. *)
1610 let (parser, token) = next_token parser in
1611 let kind = Token.kind token in
1612 let operator = Operator.prefix_unary_from_token kind in
1613 let token = make_token token in
1614 let (parser, operand) = parse_expression_with_operator_precedence
1615 parser operator in
1616 make_and_track_prefix_unary_expression parser token kind operand
1618 and parse_simple_variable parser =
1619 match peek_token_kind parser with
1620 | Variable ->
1621 let (parser1, variable) = next_token parser in
1622 (parser1, make_token variable)
1623 | Dollar -> parse_dollar_expression parser
1624 | _ -> require_variable parser
1626 and parse_dollar_expression parser =
1627 let (parser, dollar) = assert_token parser Dollar in
1628 let (parser, operand) =
1629 match peek_token_kind parser with
1630 | LeftBrace ->
1631 parse_braced_expression parser
1632 | Variable when Env.php5_compat_mode (env parser) ->
1633 parse_variable_in_php5_compat_mode parser
1634 | _ ->
1635 parse_expression_with_operator_precedence parser
1636 (Operator.prefix_unary_from_token Dollar) in
1637 make_and_track_prefix_unary_expression parser dollar Dollar operand
1639 and parse_instanceof_expression parser left =
1640 (* SPEC:
1641 instanceof-expression:
1642 instanceof-subject instanceof instanceof-type-designator
1644 instanceof-subject:
1645 expression
1647 instanceof-type-designator:
1648 qualified-name
1649 variable-name
1651 TODO: The spec is plainly wrong here. This is a bit of a mess and there
1652 are a number of issues.
1654 The issues arise from the fact that the thing on the right can be either
1655 a type, or an expression that evaluates to a string that names the type.
1657 The grammar in the spec, above, says that the only things that can be
1658 here are a qualified name -- in which case it names the type directly --
1659 or a variable of classname type, which names the type. But this is
1660 not the grammar that is accepted by Hack / HHVM. The accepted grammar
1661 treats "instanceof" as a binary operator which takes expressions on
1662 each side, and is of lower precedence than ->. Thus
1664 $x instanceof $y -> z
1666 must be parsed as ($x instanceof ($y -> z)), and not, as the grammar
1667 implies, (($x instanceof $y) -> z).
1669 But wait, it gets worse.
1671 The less-than operator is of lower precedence than instanceof, so
1672 "$x instanceof foo < 10" should be parsed as (($x instanceof foo) < 10).
1673 But it seems plausible that we might want to parse
1674 "$x instanceof foo<int>" someday, in which case now we have an ambiguity.
1675 How do we know when we see the < whether we are attempting to parse a type?
1677 Moreover: we need to be able to parse XHP class names on the right hand
1678 side of the operator. That is, we need to be able to say
1680 $x instanceof :foo
1682 However, we cannot simply say that the grammar is
1684 instanceof-type-designator:
1685 xhp-class-name
1686 expression
1688 Why not? Because that then gives the wrong parse for:
1690 class :foo { static $bar = "abc" }
1691 class abc { }
1693 $x instanceof :foo :: $bar
1695 We need to parse that as $x instanceof (:foo :: $bar).
1697 The solution to all this is as follows.
1699 First, an XHP class name must be a legal expression. I had thought that
1700 it might be possible to say that an XHP class name is a legal type, or
1701 legal in an expression context when immediately followed by ::, but
1702 that's not the case. We need to be able to parse both
1704 $x instanceof :foo :: $bar
1708 $x instanceof :foo
1710 so the most expedient way to do that is to parse any expression on the
1711 right, and to make XHP class names into legal expressions.
1713 So, with all this in mind, the grammar we will actually parse here is:
1715 instanceof-type-designator:
1716 expression
1718 This has the unfortunate property that the common case, say,
1720 $x instanceof C
1722 creates a parse node for C as a name token, not as a name token wrapped
1723 up as a simple type.
1725 Should we ever need to parse both arbitrary expressions and arbitrary
1726 types here, we'll have some tricky problems to solve.
1729 let (parser, op) = assert_token parser Instanceof in
1730 let precedence = Operator.precedence Operator.InstanceofOperator in
1731 let (parser, right_term) = parse_term parser in
1732 let (parser, right) = parse_remaining_binary_expression_helper
1733 parser right_term precedence in
1734 let result = make_instanceof_expression left op right in
1735 parse_remaining_expression parser result
1737 and parse_is_expression parser left =
1738 (* SPEC:
1739 is-expression:
1740 is-subject is type-specifier
1742 is-subject:
1743 expression
1745 let (parser, op) = assert_token parser Is in
1746 let (parser, right) =
1747 with_type_parser parser TypeParser.parse_type_specifier
1749 let result = make_is_expression left op right in
1750 parse_remaining_expression parser result
1752 and parse_remaining_binary_expression
1753 parser left_term assignment_prefix_kind =
1754 (* We have a left term. If we get here then we know that
1755 * we have a binary operator to its right, and that furthermore,
1756 * the binary operator is of equal or higher precedence than the
1757 * whatever is going on in the left term.
1759 * Here's how this works. Suppose we have something like
1761 * A x B y C
1763 * where A, B and C are terms, and x and y are operators.
1764 * We must determine whether this parses as
1766 * (A x B) y C
1768 * or
1770 * A x (B y C)
1772 * We have the former if either x is higher precedence than y,
1773 * or x and y are the same precedence and x is left associative.
1774 * Otherwise, if x is lower precedence than y, or x is right
1775 * associative, then we have the latter.
1777 * How are we going to figure this out?
1779 * We have the term A in hand; the precedence is low.
1780 * We see that x follows A.
1781 * We obtain the precedence of x. It is higher than the precedence of A,
1782 * so we obtain B, and then we call a helper method that
1783 * collects together everything to the right of B that is
1784 * of higher precedence than x. (Or equal, and right-associative.)
1786 * So, if x is of lower precedence than y (or equal and right-assoc)
1787 * then the helper will construct (B y C) as the right term, and then
1788 * we'll make A x (B y C), and we're done. Otherwise, the helper
1789 * will simply return B, we'll construct (A x B) and recurse with that
1790 * as the left term.
1792 let is_rhs_of_assignment = assignment_prefix_kind <> Prefix_none in
1793 assert (not (next_is_lower_precedence parser) || is_rhs_of_assignment);
1795 let (parser1, token) = next_token parser in
1796 let operator = Operator.trailing_from_token (Token.kind token) in
1797 let default () =
1798 let precedence = Operator.precedence operator in
1799 let (parser2, right_term) =
1800 if is_rhs_of_assignment then
1801 (* reset the current precedence to make sure that expression on
1802 the right hand side of the assignment is fully consumed *)
1803 with_reset_precedence parser1 parse_term
1804 else
1805 parse_term parser1 in
1806 let (parser2, right_term) = parse_remaining_binary_expression_helper
1807 parser2 right_term precedence in
1808 let term = make_binary_expression
1809 left_term (make_token token) right_term in
1810 parse_remaining_expression parser2 term
1812 (*if we are on the right hand side of the assignment - peek if next
1813 token is '&'. If it is - then parse next term. If overall next term is
1814 '&'PHP variable then the overall expression should be parsed as
1815 ... (left_term = & right_term) ...
1817 if assignment_prefix_kind = Prefix_byref_assignment &&
1818 Token.kind (peek_token parser1) = Ampersand then
1819 let (parser2, right_term) =
1820 parse_term @@ with_precedence
1821 parser1
1822 Operator.precedence_for_assignment_in_expressions in
1823 if is_byref_assignment_source parser2 right_term then
1824 let left_term = make_binary_expression
1825 left_term (make_token token) right_term
1827 let (parser2, left_term) = parse_remaining_binary_expression_helper
1828 parser2 left_term parser.precedence
1830 parse_remaining_expression parser2 left_term
1831 else
1832 default ()
1833 else
1834 default ()
1836 and parse_remaining_binary_expression_helper
1837 parser right_term left_precedence =
1838 (* This gathers up terms to the right of an operator that are
1839 operands of operators of higher precedence than the
1840 operator to the left. For instance, if we have
1841 A + B * C / D + E and we just parsed A +, then we want to
1842 gather up B * C / D into the right side of the +.
1843 In this case "right term" would be B and "left precedence"
1844 would be the precedence of +.
1845 See comments above for more details. *)
1846 let kind = Token.kind (peek_token parser) in
1847 if Operator.is_trailing_operator_token kind then
1848 let right_operator = Operator.trailing_from_token kind in
1849 let right_precedence = Operator.precedence right_operator in
1850 let associativity = Operator.associativity right_operator in
1851 let is_parsable_as_assignment =
1852 (* check if this is the case ... $a = ...
1853 where
1854 'left_precedence' - precedence of the operation on the left of $a
1855 'rigft_term' - $a
1856 'kind' - operator that follows right_term
1858 in case if right_term is valid left hand side for the assignment
1859 and token is assignment operator and left_precedence is less than
1860 bumped priority fort the assignment we reset precedence before parsing
1861 right hand side of the assignment to make sure it is consumed.
1863 check_if_parsable_as_assignment
1864 parser
1865 right_term
1866 kind
1867 left_precedence <> Prefix_none
1869 if right_precedence > left_precedence ||
1870 (associativity = Operator.RightAssociative &&
1871 right_precedence = left_precedence ) ||
1872 is_parsable_as_assignment then
1873 let (parser2, right_term) =
1874 let precedence =
1875 if is_parsable_as_assignment then
1876 (* if expression can be parsed as an assignment, keep track of
1877 the precedence on the left of the assignment (it is ok since
1878 we'll internally boost the precedence when parsing rhs of the
1879 assignment)
1880 This is necessary for cases like:
1881 ... + $a = &$b * $c + ...
1884 it should be parsed as
1885 (... + ($a = &$b) * $c) + ...
1886 when we are at position (#)
1887 - we will first consume byref assignment as a e1
1888 - check that precedence of '*' is greater than precedence of
1889 the '+' (left_precedence) and consume e1 * $c as $e2
1890 - check that precedence of '+' is less or equal than precedence
1891 of the '+' (left_precedence) and stop so the final result
1892 before we get to the point ($) will be
1893 (... + $e2)
1895 left_precedence
1896 else
1897 right_precedence
1899 let parser1 = with_precedence parser precedence in
1900 parse_remaining_expression parser1 right_term
1902 let parser3 = with_precedence parser2 parser.precedence in
1903 parse_remaining_binary_expression_helper
1904 parser3 right_term left_precedence
1905 else
1906 (parser, right_term)
1907 else
1908 (parser, right_term)
1910 and parse_conditional_expression parser test question =
1911 (* POSSIBLE SPEC PROBLEM
1912 We allow any expression, including assignment expressions, to be in
1913 the consequence and alternative of a conditional expression, even
1914 though assignment is lower precedence than ?:. This is legal:
1915 $a ? $b = $c : $d = $e
1916 Interestingly, this is illegal in C and Java, which require parens,
1917 but legal in C#.
1919 let kind = peek_token_kind parser in
1920 (* e1 ?: e2 -- where there is no consequence -- is legal.
1921 However this introduces an ambiguity:
1922 x ? :y::m : z
1923 is that
1924 x ?: y::m : z
1926 x ? :y::m : z
1928 We assume the latter.
1929 TODO: Review this decision.
1930 TODO: Add this to the XHP draft specification.
1932 let missing_consequence =
1933 kind = Colon && not (is_next_xhp_class_name parser) in
1934 let (parser, consequence) =
1935 if missing_consequence then
1936 let missing = make_missing parser in
1937 (parser, missing)
1938 else
1939 with_reset_precedence parser parse_expression
1941 let (parser, colon) = require_colon parser in
1942 let (parser, term) = parse_term parser in
1943 let precedence = Operator.precedence Operator.ConditionalQuestionOperator in
1944 let (parser, alternative) = parse_remaining_binary_expression_helper
1945 parser term precedence in
1946 let result = make_conditional_expression
1947 test question consequence colon alternative in
1948 (parser, result)
1950 and parse_name_or_collection_literal_expression parser name =
1951 match peek_token_kind parser with
1952 | LeftBrace ->
1953 let name = make_simple_type_specifier name in
1954 parse_collection_literal_expression parser name
1955 | LessThan ->
1956 let (parser1, (type_arguments, no_arg_is_missing)) =
1957 parse_generic_type_arguments_opt parser
1959 if no_arg_is_missing
1960 && is_type_arguments type_arguments
1961 && parser.errors = parser1.errors
1962 && peek_token_kind parser1 = LeftBrace
1963 then
1964 let name = make_generic_type_specifier name type_arguments in
1965 parse_collection_literal_expression parser1 name
1966 else
1967 (parser, name)
1968 | _ ->
1969 (parser, name)
1971 and parse_collection_literal_expression parser name =
1973 (* SPEC
1974 collection-literal:
1975 key-collection-class-type { cl-initializer-list-with-keys-opt }
1976 non-key-collection-class-type { cl-initializer-list-without-keys-opt }
1977 pair-type { cl-element-value , cl-element-value }
1979 The types are grammatically qualified names; however the specification
1980 states that they must be as follows:
1981 * keyed collection type can be Map or ImmMap
1982 * non-keyed collection type can be Vector, ImmVector, Set or ImmSet
1983 * pair type can be Pair
1985 We will not attempt to determine if the names give the name of an
1986 appropriate type here. That's for the type checker.
1988 The argumment lists are:
1990 * for keyed, an optional comma-separated list of
1991 expression => expression pairs
1992 * for non-keyed, an optional comma-separated list of expressions
1993 * for pairs, a comma-separated list of exactly two expressions
1995 In all three cases, the lists may be comma-terminated.
1996 TODO: This fact is not represented in the specification; it should be.
1997 This work item is tracked by spec issue #109.
2000 let (parser, left_brace, initialization_list, right_brace) =
2001 parse_braced_comma_list_opt_allow_trailing parser parse_init_expression in
2002 (* Validating the name is a collection type happens in a later phase *)
2003 let syntax = make_collection_literal_expression
2004 name left_brace initialization_list right_brace in
2005 (parser, syntax)
2007 and parse_init_expression parser =
2008 (* ERROR RECOVERY
2009 We expect either a list of expr, expr, expr, ... or
2010 expr => expr, expr => expr, expr => expr, ...
2011 Rather than require at parse time that the list be all one or the other,
2012 we allow both, and give an error in the type checker.
2014 let parser, expr1 = parse_expression_with_reset_precedence parser in
2015 let parser, arrow = optional_token parser TokenKind.EqualGreaterThan in
2016 if is_missing arrow then
2017 (parser, expr1)
2018 else
2019 let parser, expr2 = parse_expression_with_reset_precedence parser in
2020 let syntax = make_element_initializer expr1 arrow expr2 in
2021 (parser, syntax)
2023 and parse_keyed_element_initializer parser =
2024 let parser, expr1 = parse_expression_with_reset_precedence parser in
2025 let parser, arrow = require_arrow parser in
2026 let parser, expr2 = parse_expression_with_reset_precedence parser in
2027 let syntax = make_element_initializer expr1 arrow expr2 in
2028 (parser, syntax)
2030 and parse_list_expression parser =
2031 (* SPEC:
2032 list-intrinsic:
2033 list ( expression-list-opt )
2034 expression-list:
2035 expression-opt
2036 expression-list , expression-opt
2038 See https://github.com/hhvm/hack-langspec/issues/82
2040 list-intrinsic must be used as the left-hand operand in a
2041 simple-assignment-expression of which the right-hand operand
2042 must be an expression that designates a vector-like array or
2043 an instance of the class types Vector, ImmVector, or Pair
2044 (the "source").
2046 TODO: Produce an error later if the expressions in the list destructuring
2047 are not lvalues.
2049 let (parser, keyword) = assert_token parser List in
2050 let (parser, left, items, right) =
2051 parse_parenthesized_comma_list_opt_items_opt
2052 parser parse_expression_with_reset_precedence in
2053 let result = make_list_expression keyword left items right in
2054 (parser, result)
2056 (* grammar:
2057 * array_intrinsic := array ( array-initializer-opt )
2059 and parse_array_intrinsic_expression parser =
2060 let (parser, array_keyword) = assert_token parser Array in
2061 let (parser, left_paren, members, right_paren) =
2062 parse_parenthesized_comma_list_opt_allow_trailing
2063 parser parse_array_element_init in
2064 let syntax = make_array_intrinsic_expression array_keyword left_paren
2065 members right_paren in
2066 (parser, syntax)
2068 and parse_bracketed_collection_intrinsic_expression
2069 parser
2070 keyword_token
2071 parse_element_function
2072 make_intrinsinc_function =
2073 let (parser1, keyword) = assert_token parser keyword_token in
2074 let (parser1, left_bracket) = optional_token parser1 LeftBracket in
2075 if is_missing left_bracket then
2076 (* Fall back to dict being an ordinary name. Perhaps we're calling a
2077 function whose name is indicated by the keyword_token, for example. *)
2078 parse_as_name_or_error parser
2079 else
2080 let (parser, members) =
2081 parse_comma_list_opt_allow_trailing
2082 parser1
2083 RightBracket
2084 SyntaxError.error1015
2085 parse_element_function in
2086 let (parser, right_bracket) = require_right_bracket parser in
2087 let result =
2088 make_intrinsinc_function keyword left_bracket members right_bracket in
2089 (parser, result)
2092 and parse_darray_intrinsic_expression parser =
2093 (* TODO: Create the grammar and add it to the spec. *)
2094 parse_bracketed_collection_intrinsic_expression
2095 parser
2096 Darray
2097 parse_keyed_element_initializer
2098 make_darray_intrinsic_expression
2100 and parse_dictionary_intrinsic_expression parser =
2101 (* TODO: Create the grammar and add it to the spec. *)
2102 (* TODO: Can the list have a trailing comma? *)
2103 parse_bracketed_collection_intrinsic_expression
2104 parser
2105 Dict
2106 parse_keyed_element_initializer
2107 make_dictionary_intrinsic_expression
2109 and parse_keyset_intrinsic_expression parser =
2110 parse_bracketed_collection_intrinsic_expression
2111 parser
2112 Keyset
2113 parse_expression_with_reset_precedence
2114 make_keyset_intrinsic_expression
2116 and parse_varray_intrinsic_expression parser =
2117 (* TODO: Create the grammar and add it to the spec. *)
2118 parse_bracketed_collection_intrinsic_expression
2119 parser
2120 Varray
2121 parse_expression_with_reset_precedence
2122 make_varray_intrinsic_expression
2124 and parse_vector_intrinsic_expression parser =
2125 (* TODO: Create the grammar and add it to the spec. *)
2126 (* TODO: Can the list have a trailing comma? *)
2127 parse_bracketed_collection_intrinsic_expression
2128 parser
2130 parse_expression_with_reset_precedence
2131 make_vector_intrinsic_expression
2133 (* array_creation_expression :=
2134 [ array-initializer-opt ]
2135 array-initializer :=
2136 array-initializer-list ,-opt
2137 array-initializer-list :=
2138 array-element-initializer
2139 array-element-initializer , array-initializer-list
2141 and parse_array_creation_expression parser =
2142 let (parser, left_bracket, members, right_bracket) =
2143 parse_bracketted_comma_list_opt_allow_trailing
2144 parser parse_array_element_init in
2145 let syntax = make_array_creation_expression left_bracket
2146 members right_bracket in
2147 (parser, syntax)
2149 (* array-element-initializer :=
2150 * expression
2151 * expression => expression
2153 and parse_array_element_init parser =
2154 let parser, expr1 =
2155 with_reset_precedence parser parse_expression in
2156 let parser1, token = next_token parser in
2157 match Token.kind token with
2158 | EqualGreaterThan ->
2159 let parser, expr2 = with_reset_precedence parser1 parse_expression in
2160 let arrow = make_token token in
2161 let result = make_element_initializer expr1 arrow expr2 in
2162 (parser, result)
2163 | _ -> (parser, expr1)
2165 and parse_field_initializer parser =
2166 (* SPEC
2167 field-initializer:
2168 single-quoted-string-literal => expression
2169 double_quoted_string_literal => expression
2170 qualified-name => expression
2171 scope-resolution-expression => expression
2174 (* Specification is wrong, and fixing it is being tracked by
2175 * https://github.com/hhvm/hack-langspec/issues/108
2178 (* ERROR RECOVERY: We allow any expression on the left-hand side,
2179 * even though only some expressions are legal;
2180 * we will give an error in a later pass
2182 let (parser, name) = with_reset_precedence parser parse_expression in
2183 let (parser, arrow) = require_arrow parser in
2184 let (parser, value) = with_reset_precedence parser parse_expression in
2185 let result = make_field_initializer name arrow value in
2186 (parser, result)
2188 and parse_shape_expression parser =
2189 (* SPEC
2190 shape-literal:
2191 shape ( field-initializer-list-opt )
2193 field-initializer-list:
2194 field-initializers ,-op
2196 field-initializers:
2197 field-initializer
2198 field-initializers , field-initializer
2200 let (parser, shape) = assert_token parser Shape in
2201 let (parser, left_paren, fields, right_paren) =
2202 parse_parenthesized_comma_list_opt_allow_trailing
2203 parser parse_field_initializer in
2204 let result = make_shape_expression shape left_paren fields right_paren in
2205 (parser, result)
2207 and parse_tuple_expression parser =
2208 (* SPEC
2209 tuple-literal:
2210 tuple ( expression-list-one-or-more )
2212 expression-list-one-or-more:
2213 expression
2214 expression-list-one-or-more , expression
2216 TODO: Can the list be comma-terminated? If so, update the spec.
2217 TODO: We need to produce an error in a later pass if the list is empty.
2219 let (parser, keyword) = assert_token parser Tuple in
2220 let (parser, left_paren, items, right_paren) =
2221 parse_parenthesized_comma_list_opt_allow_trailing
2222 parser parse_expression_with_reset_precedence in
2223 let result = make_tuple_expression keyword left_paren items right_paren in
2224 (parser, result)
2226 and parse_use_variable parser =
2227 (* TODO: Is it better that this returns the variable as a *token*, or
2228 as an *expression* that consists of the token? We do the former. *)
2229 let (parser, ampersand) = optional_token parser Ampersand in
2230 let (parser, variable) = require_variable parser in
2231 if is_missing ampersand then
2232 (parser, variable)
2233 else
2234 make_and_track_prefix_unary_expression parser ampersand Ampersand variable
2236 and parse_anon_or_lambda_or_awaitable parser =
2237 (* TODO: The original Hack parser accepts "async" as an identifier, and
2238 so we do too. We might consider making it reserved. *)
2239 (* Skip any async or coroutine declarations that may be present. When we
2240 feed the original parser into the syntax parsers. they will take care of
2241 them as appropriate. *)
2242 let (parser1, _) = optional_token parser Static in
2243 let (parser1, _) = optional_token parser1 Async in
2244 let (parser1, _) = optional_token parser1 Coroutine in
2245 match peek_token_kind parser1 with
2246 | Function -> parse_anon parser
2247 | LeftBrace -> parse_async_block parser
2248 | Variable
2249 | LeftParen -> parse_lambda_expression parser
2250 | _ -> parse_as_name_or_error parser
2252 and parse_async_block parser =
2254 * grammar:
2255 * awaitable-creation-expression :
2256 * async-opt coroutine-opt compound-statement
2257 * TODO awaitable-creation-expression must not be used as the
2258 * anonymous-function-body in a lambda-expression
2260 let parser, async = optional_token parser Async in
2261 let parser, coroutine = optional_token parser Coroutine in
2262 let parser, stmt = parse_compound_statement parser in
2263 parser, make_awaitable_creation_expression async coroutine stmt
2265 and parse_anon_use_opt parser =
2266 (* SPEC:
2267 anonymous-function-use-clause:
2268 use ( use-variable-name-list ,-opt )
2270 use-variable-name-list:
2271 variable-name
2272 use-variable-name-list , variable-name
2274 TODO: Strict mode requires that it be a list of variables; in
2275 non-strict mode we allow variables to be decorated with a leading
2276 & to indicate they are captured by reference. We need to give an
2277 error in a later pass for this.
2279 let (parser, use_token) = optional_token parser Use in
2280 if is_missing use_token then
2281 let missing = make_missing parser in
2282 (parser, missing)
2283 else
2284 let (parser, left, vars, right) =
2285 parse_parenthesized_comma_list_opt_allow_trailing
2286 parser parse_use_variable in
2287 let result = make_anonymous_function_use_clause use_token
2288 left vars right
2290 (parser, result)
2292 and parse_optional_return parser =
2293 (* Parse an optional "colon-folowed-by-return-type" *)
2294 let (parser, colon) = optional_token parser Colon in
2295 let (parser, return_type) =
2296 if is_missing colon then
2297 let missing = make_missing parser in
2298 (parser, missing)
2299 else
2300 with_type_parser parser TypeParser.parse_return_type
2302 (parser, colon, return_type)
2304 and parse_anon parser =
2305 (* SPEC
2306 anonymous-function-creation-expression:
2307 static-opt async-opt coroutine-opt function
2308 ( anonymous-function-parameter-list-opt )
2309 anonymous-function-return-opt
2310 anonymous-function-use-clauseopt
2311 compound-statement
2313 (* An anonymous function's formal parameter list is the same as a named
2314 function's formal parameter list except that types are optional.
2315 The "..." syntax and trailing commas are supported. We'll simply
2316 parse an optional parameter list; it already takes care of making the
2317 type annotations optional. *)
2318 let (parser, static) = optional_token parser Static in
2319 let (parser, async) = optional_token parser Async in
2320 let (parser, coroutine) = optional_token parser Coroutine in
2321 let (parser, fn) = assert_token parser Function in
2322 let (parser, left_paren, params, right_paren) =
2323 parse_parameter_list_opt parser in
2324 let (parser, colon, return_type, use_clause, is_php7) =
2325 let (parser, use_clause) = parse_anon_use_opt parser in
2326 if is_missing use_clause then begin
2327 let (parser, colon, return_type) = parse_optional_return parser in
2328 let (parser, use_clause) = parse_anon_use_opt parser in
2329 (parser, colon, return_type, use_clause, false)
2331 else begin
2332 (* might be PHP7 style lambda where return type follows use clause *)
2333 let (parser, colon, return_type) = parse_optional_return parser in
2334 (parser, colon, return_type, use_clause, not (is_missing colon))
2335 end in
2336 let (parser, body) = parse_compound_statement parser in
2337 let result =
2338 if is_php7
2339 then
2340 make_php7_anonymous_function
2341 static
2342 async
2343 coroutine
2345 left_paren
2346 params
2347 right_paren
2348 use_clause
2349 colon
2350 return_type
2351 body
2352 else
2353 make_anonymous_function
2354 static
2355 async
2356 coroutine
2358 left_paren
2359 params
2360 right_paren
2361 colon
2362 return_type
2363 use_clause
2364 body in
2365 (parser, result)
2367 and parse_braced_expression parser =
2368 let (parser, left_brace) = assert_token parser LeftBrace in
2369 let (parser, expression) = parse_expression_with_reset_precedence parser in
2370 let (parser, right_brace) = require_right_brace parser in
2371 let node = make_braced_expression left_brace expression right_brace in
2372 (parser, node)
2374 and require_right_brace_xhp parser =
2375 let (parser1, token) = next_xhp_body_token parser in
2376 if (Token.kind token) = TokenKind.RightBrace then
2377 (parser1, make_token token)
2378 else
2379 (* ERROR RECOVERY: Create a missing token for the expected token,
2380 and continue on from the current token. Don't skip it. *)
2381 let missing = make_missing parser in
2382 let parser = with_error parser SyntaxError.error1006 in
2383 (parser, missing)
2385 and parse_xhp_body_braced_expression parser =
2386 (* The difference between a regular braced expression and an
2387 XHP body braced expression is:
2388 <foo bar={$x}/*this_is_a_comment*/>{$y}/*this_is_body_text!*/</foo>
2390 let (parser, left_brace) = assert_token parser LeftBrace in
2391 let (parser, expression) = parse_expression_with_reset_precedence parser in
2392 let (parser, right_brace) = require_right_brace_xhp parser in
2393 let node = make_braced_expression left_brace expression right_brace in
2394 (parser, node)
2396 and parse_xhp_attribute parser =
2397 let (parser', token, _) = next_xhp_element_token parser in
2398 match (Token.kind token) with
2399 | LeftBrace -> parse_xhp_spread_attribute parser
2400 | XHPElementName -> parse_xhp_simple_attribute parser' (make_token token)
2401 | _ -> (parser, None)
2403 and parse_xhp_spread_attribute parser =
2404 let (parser, left_brace, _) = next_xhp_element_token parser in
2405 let (parser, ellipsis) = assert_token parser DotDotDot in
2406 let (parser, expression) = parse_expression_with_reset_precedence parser in
2407 let (parser, right_brace) = require_right_brace parser in
2408 let node = make_xhp_spread_attribute (make_token left_brace) ellipsis expression right_brace in
2409 (parser, Some node)
2411 and parse_xhp_simple_attribute parser name =
2412 (* Parse the attribute name and then defensively check for well-formed
2413 * attribute assignment *)
2414 let (parser', token, _) = next_xhp_element_token parser in
2415 if (Token.kind token) != Equal then
2416 let value = make_missing parser in
2417 let node = make_xhp_simple_attribute name (make_missing parser') value in
2418 let parser = with_error parser SyntaxError.error1016 in
2419 (* ERROR RECOVERY: The = is missing; assume that the name belongs
2420 to the attribute, but that the remainder is missing, and start
2421 looking for the next attribute. *)
2422 (parser, Some node)
2423 else
2424 let equal = make_token token in
2425 let (parser'', token, text) = next_xhp_element_token parser' in
2426 match (Token.kind token) with
2427 | XHPStringLiteral ->
2428 let node = make_xhp_simple_attribute name equal (make_token token) in
2429 (parser'', Some node)
2430 | LeftBrace ->
2431 let (parser, expr) = parse_braced_expression parser' in
2432 let node = make_xhp_simple_attribute name equal expr in
2433 (parser, Some node)
2434 | _ ->
2435 (* ERROR RECOVERY: The expression is missing; assume that the "name ="
2436 belongs to the attribute and start looking for the next attribute. *)
2437 let node = make_xhp_simple_attribute name equal (make_missing parser'') in
2438 let parser = with_error parser' SyntaxError.error1017 in
2439 (parser, Some node)
2441 and parse_xhp_body_element parser =
2442 let (parser1, token) = next_xhp_body_token parser in
2443 match Token.kind token with
2444 | XHPComment
2445 | XHPBody -> (parser1, Some (make_token token))
2446 | LeftBrace ->
2447 let (parser, expr) = parse_xhp_body_braced_expression parser in
2448 (parser, Some expr)
2449 | RightBrace ->
2450 (* If we find a free-floating right-brace in the middle of an XHP body
2451 that's just fine. It's part of the text. However, it is also likely
2452 to be a mis-edit, so we'll keep it as a right-brace token so that
2453 tooling can flag it as suspicious. *)
2454 (parser1, Some (make_token token))
2455 | LessThan ->
2456 let (parser, expr) =
2457 parse_possible_xhp_expression ~consume_trailing_trivia:false parser in
2458 (parser, Some expr)
2459 | _ -> (parser, None)
2461 and parse_xhp_close ~consume_trailing_trivia parser _ =
2462 let (parser1, less_than_slash, _) = next_xhp_element_token parser in
2463 if (Token.kind less_than_slash) = LessThanSlash then
2464 let (parser2, name, name_text) = next_xhp_element_token parser1 in
2465 if (Token.kind name) = XHPElementName then
2466 (* TODO: Check that the given and name_text are the same. *)
2467 let (parser3, greater_than, _) =
2468 next_xhp_element_token ~no_trailing:(not consume_trailing_trivia) parser2 in
2469 if (Token.kind greater_than) = GreaterThan then
2470 (parser3, make_xhp_close (make_token less_than_slash)
2471 (make_token name) (make_token greater_than))
2472 else
2473 (* ERROR RECOVERY: *)
2474 let parser = with_error parser2 SyntaxError.error1039 in
2475 let less_than_slash_token = make_token less_than_slash in
2476 let name_token = make_token name in
2477 let missing = make_missing parser in
2478 (parser, make_xhp_close less_than_slash_token name_token missing)
2479 else
2480 (* ERROR RECOVERY: *)
2481 let parser = with_error parser1 SyntaxError.error1039 in
2482 let less_than_slash_token = make_token less_than_slash in
2483 let missing1 = make_missing parser in
2484 let missing2 = make_missing parser in
2485 (parser, make_xhp_close less_than_slash_token missing1 missing2)
2486 else
2487 (* ERROR RECOVERY: We probably got a < without a following / or name.
2488 TODO: For now we'll just bail out. We could use a more
2489 sophisticated strategy here. *)
2490 let parser = with_error parser1 SyntaxError.error1026 in
2491 let less_than_slash_token = make_token less_than_slash in
2492 let missing1 = make_missing parser in
2493 let missing2 = make_missing parser in
2494 (parser, make_xhp_close less_than_slash_token missing1 missing2)
2496 and parse_xhp_expression ~consume_trailing_trivia parser left_angle name name_text =
2497 let (parser, attrs) = parse_list_until_none parser parse_xhp_attribute in
2498 let (parser1, token, _) = next_xhp_element_token ~no_trailing:true parser in
2499 match (Token.kind token) with
2500 | SlashGreaterThan ->
2501 let xhp_open = make_xhp_open left_angle name attrs (make_token token) in
2502 let missing1 = make_missing parser in
2503 let missing2 = make_missing parser in
2504 let xhp = make_xhp_expression xhp_open missing1 missing2 in
2505 (parser1, xhp)
2506 | GreaterThan ->
2507 let xhp_open = make_xhp_open left_angle name attrs (make_token token) in
2508 let (parser, xhp_body) =
2509 parse_list_until_none parser1 parse_xhp_body_element in
2510 let (parser, xhp_close) = parse_xhp_close ~consume_trailing_trivia parser name_text in
2511 let xhp = make_xhp_expression xhp_open xhp_body xhp_close in
2512 (parser, xhp)
2513 | _ ->
2514 (* ERROR RECOVERY: Assume the unexpected token belongs to whatever
2515 comes next. *)
2516 let missing = make_missing parser in
2517 let xhp_open = make_xhp_open left_angle name attrs missing in
2518 let missing1 = make_missing parser in
2519 let missing2 = make_missing parser in
2520 let xhp = make_xhp_expression xhp_open missing1 missing2 in
2521 let parser = with_error parser SyntaxError.error1013 in
2522 (parser, xhp)
2524 and parse_possible_xhp_expression ~consume_trailing_trivia parser =
2525 (* We got a < token where an expression was expected. *)
2526 let (parser, less_than) = assert_token parser LessThan in
2527 let (parser1, name, text) = next_xhp_element_token parser in
2528 if (Token.kind name) = XHPElementName then
2529 parse_xhp_expression
2530 ~consume_trailing_trivia parser1 less_than (make_token name) text
2531 else
2532 (* ERROR RECOVERY
2533 Hard to say what to do here. We are expecting an expression;
2534 we could simply produce an error for the < and call that the
2535 expression. Or we could assume the the left side of an inequality is
2536 missing, give a missing node for the left side, and parse the
2537 remainder as the right side. We'll go for the former for now. *)
2538 (with_error parser SyntaxError.error1015, less_than)
2540 and parse_anon_or_awaitable_or_scope_resolution_or_name parser =
2541 (* static is a legal identifier, if next token is scope resolution operatpr
2542 - parse expresson as scope resolution operator, otherwise try to interpret
2543 it as anonymous function (will fallback to name in case of failure) *)
2544 if peek_token_kind ~lookahead:1 parser = ColonColon then
2545 parse_scope_resolution_or_name parser
2546 else
2547 parse_anon_or_lambda_or_awaitable parser
2549 and parse_scope_resolution_or_name parser =
2550 (* parent, self and static are legal identifiers. If the next
2551 thing that follows is a scope resolution operator, parse them as
2552 ordinary tokens, and then we'll pick them up as the operand to the
2553 scope resolution operator when we call parse_remaining_expression.
2554 Otherwise, parse them as ordinary names. *)
2555 let (parser1, qualifier) = next_token parser in
2556 if peek_token_kind parser1 = ColonColon then
2557 (parser1, (make_token qualifier))
2558 else
2559 parse_as_name_or_error parser
2561 and parse_scope_resolution_expression parser qualifier =
2562 (* SPEC
2563 scope-resolution-expression:
2564 scope-resolution-qualifier :: name
2565 scope-resolution-qualifier :: class
2567 scope-resolution-qualifier:
2568 qualified-name
2569 variable-name
2570 self
2571 parent
2572 static
2574 (* TODO: The left hand side can in fact be any expression in this parser;
2575 we need to add a later error pass to detect that the left hand side is
2576 a valid qualifier. *)
2577 (* TODO: The right hand side, if a name or a variable, is treated as a
2578 name or variable *token* and not a name or variable *expression*. Is
2579 that the desired tree topology? Give this more thought; it might impact
2580 rename refactoring semantics. *)
2581 let (parser, op) = require_coloncolon parser in
2582 let (parser, name) =
2583 let parser1, token = next_token parser in
2584 match Token.kind token with
2585 | Class -> parser1, make_token token
2586 | Dollar -> parse_dollar_expression parser
2587 | LeftBrace -> parse_braced_expression parser
2588 | Variable when Env.php5_compat_mode (env parser) ->
2589 let parser1, e = parse_variable_in_php5_compat_mode parser in
2590 (* for :: only do PHP5 transform for call expressions
2591 in other cases fall back to the regular parsing logic *)
2592 if peek_token_kind parser1 = LeftParen &&
2593 (* make sure the left parenthesis means a call
2594 for the expression we are currently parsing, and
2595 are not for example for a constructor call whose
2596 name would be the result of this expression. *)
2597 not @@ operator_has_lower_precedence LeftParen parser
2598 then parser1, e
2599 else require_name_or_variable_or_error parser SyntaxError.error1048
2600 | _ ->
2601 require_name_or_variable_or_error parser SyntaxError.error1048
2603 let result = make_scope_resolution_expression qualifier op name in
2604 (parser, result)
2606 end (* WithSmartConstructors *)
2607 end (* WithSyntax *)