consider only `define(string, ..)` as a form recognized by the emitter
[hiphop-php.git] / hphp / hack / src / parser / full_fidelity_ast.ml
blob071f44c2172d56781ea269efd0814826b0536f2e
1 (**
2 * Copyright (c) 2016, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
9 *)
11 (* What we're lowering from *)
12 open Full_fidelity_positioned_syntax
13 type node = Full_fidelity_positioned_syntax.t (* Let's be more explicit *)
14 (* What we're lowering to *)
15 open Ast
17 module SyntaxKind = Full_fidelity_syntax_kind
18 module TK = Full_fidelity_token_kind
19 module PT = Full_fidelity_positioned_token
20 module SourceText = Full_fidelity_source_text
21 open Prim_defs
23 open Core
25 let drop_pstr : int -> pstring -> pstring = fun cnt (pos, str) ->
26 let len = String.length str in
27 pos, if cnt >= len then "" else String.sub str cnt (len - cnt)
29 (* Context of the file being parsed, as global state. *)
30 type state_variables =
31 { language : string ref
32 ; filePath : Relative_path.t ref
33 ; mode : FileInfo.mode ref
34 ; popt : ParserOptions.t ref
35 ; ignorePos : bool ref
36 ; quickMode : bool ref
37 ; suppress_output : bool ref
40 let lowerer_state =
41 { language = ref "UNINITIALIZED"
42 ; filePath = ref Relative_path.default
43 ; mode = ref FileInfo.Mstrict
44 ; popt = ref ParserOptions.default
45 ; ignorePos = ref false
46 ; quickMode = ref false
47 ; suppress_output = ref false
50 let php_file () = !(lowerer_state.mode) == FileInfo.Mphp
52 (* "Local" context. *)
53 type env =
54 { saw_yield : bool ref
55 ; errors : (Pos.t * string) list ref
56 ; max_depth : int
59 type +'a parser = node -> env -> 'a
60 type ('a, 'b) metaparser = 'a parser -> 'b parser
62 let get_pos : node -> Pos.t = fun node ->
63 if !(lowerer_state.ignorePos)
64 then Pos.none
65 else begin
66 let pos_file = !(lowerer_state.filePath) in
67 let text = source_text node in
68 (* TODO(8086635) Figure out where this off-by-one comes from *)
69 let start_offset =
70 (*should be removed after resolving TODO above *)
71 let start_offset = start_offset node in
72 if start_offset = 0 then 0 else start_offset - 1
74 let end_offset = end_offset node in
75 let s_line, s_column = SourceText.offset_to_position text start_offset in
76 let e_line, e_column = SourceText.offset_to_position text end_offset in
77 let pos_start =
78 File_pos.of_line_column_offset
79 ~line:s_line
80 ~column:s_column
81 ~offset:start_offset
83 let pos_end =
84 File_pos.of_line_column_offset
85 ~line:e_line
86 ~column:e_column
87 ~offset:end_offset
89 Pos.make_from_file_pos ~pos_file ~pos_start ~pos_end
90 end
92 exception Lowerer_invariant_failure of string * string
93 let invariant_failure where what =
94 raise (Lowerer_invariant_failure (where, what))
96 exception API_Missing_syntax of string * env * node
97 let missing_syntax : string -> node -> env -> 'a = fun s n env ->
98 raise (API_Missing_syntax (s, env, n))
99 let handle_missing_syntax : string -> node -> env -> 'a = fun s n _ ->
100 let pos = Pos.string (Pos.to_absolute (get_pos n)) in
101 let msg = Printf.sprintf
102 "Missing case in %s.
103 - Pos: %s
104 - Unexpected: '%s'
105 - Kind: %s
109 (text n)
110 (SyntaxKind.to_string (kind n))
112 if not !(lowerer_state.suppress_output) then
113 Printf.eprintf "EXCEPTION\n---------\n%s\n" msg;
114 raise (Failure msg)
116 let runP : 'a parser -> node -> env -> 'a = fun pThing thing env ->
117 try pThing thing env with
118 | API_Missing_syntax (ex, env, n) -> handle_missing_syntax ex n env
119 | e -> raise e
122 (* TODO: Cleanup this hopeless Noop mess *)
123 let mk_noop : stmt list -> stmt list = function
124 | [] -> [Noop]
125 | s -> s
126 let mpStripNoop pThing node env = match pThing node env with
127 | [Noop] -> []
128 | stmtl -> stmtl
130 let mpOptional : ('a, 'a option) metaparser = fun p -> fun node env ->
131 Option.try_with (fun () -> p node env)
132 let mpYielding : ('a, ('a * bool)) metaparser = fun p node env ->
133 let local_ptr = ref false in
134 let result = p node { env with saw_yield = local_ptr } in
135 result, !local_ptr
140 let pos_name node =
141 let name = text node in
142 let local_ignore_pos = !(lowerer_state.ignorePos) in
143 (* Special case for __LINE__; never ignore position for that special name *)
144 if name = "__LINE__" then lowerer_state.ignorePos := false;
145 let p = get_pos node in
146 lowerer_state.ignorePos := local_ignore_pos;
147 p, name
149 let is_ret_by_ref node = not @@ is_missing node
151 let couldMap : 'a . f:'a parser -> 'a list parser = fun ~f -> fun node env ->
152 let rec synmap : 'a . 'a parser -> 'a list parser = fun f node env ->
153 match syntax node with
154 | SyntaxList l -> List.concat_map l ~f:(fun n -> go ~f n env)
155 | ListItem i -> [f i.list_item env]
156 | _ -> [f node env]
157 and go : 'a . f:'a parser -> 'a list parser = fun ~f -> function
158 | node when is_missing node -> fun _env -> []
159 | node -> synmap f node
161 go ~f node env
163 let as_list : node -> node list =
164 let strip_list_item = function
165 | { syntax = ListItem { list_item = i; _ }; _ } -> i
166 | x -> x
167 in function
168 | { syntax = SyntaxList ({syntax = ListItem _; _}::_ as synl); _ } ->
169 List.map ~f:strip_list_item synl
170 | { syntax = SyntaxList synl; _ } -> synl
171 | { syntax = Missing; _ } -> []
172 | syn -> [syn]
174 let missing =
175 let module V = Full_fidelity_positioned_syntax.PositionedSyntaxValue in
176 let open Full_fidelity_source_text in
177 { syntax = Missing
178 ; value =
179 { V.source_text = { text = ""; offset_map = Line_break_map.make "" }
180 ; V.offset = 0
181 ; V.leading_width = 0
182 ; V.width = 0
183 ; V.trailing_width = 0
189 let token_kind : node -> TK.t option = function
190 | { syntax = Token t; _ } -> Some (PT.kind t)
191 | _ -> None
193 let pBop : (expr -> expr -> expr_) parser = fun node env lhs rhs ->
194 match token_kind node with
195 | Some TK.Equal -> Binop (Eq None, lhs, rhs)
196 | Some TK.Bar -> Binop (Bar, lhs, rhs)
197 | Some TK.Ampersand -> Binop (Amp, lhs, rhs)
198 | Some TK.Plus -> Binop (Plus, lhs, rhs)
199 | Some TK.Minus -> Binop (Minus, lhs, rhs)
200 | Some TK.Star -> Binop (Star, lhs, rhs)
201 | Some TK.Or -> Binop (BArbar, lhs, rhs)
202 | Some TK.And -> Binop (AMpamp, lhs, rhs)
203 | Some TK.Xor -> Binop (LogXor, lhs, rhs)
204 | Some TK.Carat -> Binop (Xor, lhs, rhs)
205 | Some TK.Slash -> Binop (Slash, lhs, rhs)
206 | Some TK.Dot -> Binop (Dot, lhs, rhs)
207 | Some TK.Percent -> Binop (Percent, lhs, rhs)
208 | Some TK.LessThan -> Binop (Lt, lhs, rhs)
209 | Some TK.GreaterThan -> Binop (Gt, lhs, rhs)
210 | Some TK.EqualEqual -> Binop (Eqeq, lhs, rhs)
211 | Some TK.LessThanEqual -> Binop (Lte, lhs, rhs)
212 | Some TK.GreaterThanEqual -> Binop (Gte, lhs, rhs)
213 | Some TK.StarStar -> Binop (Starstar, lhs, rhs)
214 | Some TK.ExclamationEqual -> Binop (Diff, lhs, rhs)
215 | Some TK.BarEqual -> Binop (Eq (Some Bar), lhs, rhs)
216 | Some TK.PlusEqual -> Binop (Eq (Some Plus), lhs, rhs)
217 | Some TK.MinusEqual -> Binop (Eq (Some Minus), lhs, rhs)
218 | Some TK.StarEqual -> Binop (Eq (Some Star), lhs, rhs)
219 | Some TK.StarStarEqual -> Binop (Eq (Some Starstar),lhs, rhs)
220 | Some TK.SlashEqual -> Binop (Eq (Some Slash), lhs, rhs)
221 | Some TK.DotEqual -> Binop (Eq (Some Dot), lhs, rhs)
222 | Some TK.PercentEqual -> Binop (Eq (Some Percent), lhs, rhs)
223 | Some TK.CaratEqual -> Binop (Eq (Some Xor), lhs, rhs)
224 | Some TK.AmpersandEqual -> Binop (Eq (Some Amp), lhs, rhs)
225 | Some TK.BarBar -> Binop (BArbar, lhs, rhs)
226 | Some TK.AmpersandAmpersand -> Binop (AMpamp, lhs, rhs)
227 | Some TK.LessThanLessThan -> Binop (Ltlt, lhs, rhs)
228 | Some TK.GreaterThanGreaterThan -> Binop (Gtgt, lhs, rhs)
229 | Some TK.EqualEqualEqual -> Binop (EQeqeq, lhs, rhs)
230 | Some TK.LessThanLessThanEqual -> Binop (Eq (Some Ltlt), lhs, rhs)
231 | Some TK.GreaterThanGreaterThanEqual -> Binop (Eq (Some Gtgt), lhs, rhs)
232 | Some TK.LessThanGreaterThan -> Binop (Diff, lhs, rhs)
233 | Some TK.ExclamationEqualEqual -> Binop (Diff2, lhs, rhs)
234 | Some TK.LessThanEqualGreaterThan -> Binop (Cmp, lhs, rhs)
235 (* The ugly ducklings; In the FFP, `|>` and '??' are parsed as
236 * `BinaryOperator`s, whereas the typed AST has separate constructors for
237 * NullCoalesce, Pipe and Binop. This is why we don't just project onto a
238 * `bop`, but a `expr -> expr -> expr_`.
240 | Some TK.BarGreaterThan -> Pipe (lhs, rhs)
241 | Some TK.QuestionQuestion -> NullCoalesce (lhs, rhs)
242 (* TODO: Figure out why this fails silently when used in a pBlock; probably
243 just caught somewhere *)
244 | _ -> missing_syntax "binary operator" node env
246 let pImportFlavor : import_flavor parser = fun node env ->
247 match token_kind node with
248 | Some TK.Include -> Include
249 | Some TK.Require -> Require
250 | Some TK.Include_once -> IncludeOnce
251 | Some TK.Require_once -> RequireOnce
252 | _ -> missing_syntax "import flavor" node env
254 let pNullFlavor : og_null_flavor parser = fun node env ->
255 match token_kind node with
256 | Some TK.QuestionMinusGreaterThan -> OG_nullsafe
257 | Some TK.MinusGreaterThan -> OG_nullthrows
258 | _ -> missing_syntax "null flavor" node env
260 let pKinds : kind list parser = couldMap ~f:(fun node env ->
261 match token_kind node with
262 | Some TK.Final -> Final
263 | Some TK.Static -> Static
264 | Some TK.Abstract -> Abstract
265 | Some TK.Private -> Private
266 | Some TK.Public -> Public
267 | Some TK.Protected -> Protected
268 | Some TK.Var -> Public
269 | _ -> missing_syntax "kind" node env
272 let syntax_of_token : PositionedToken.t -> node = fun t ->
273 { syntax = Token t
274 ; value = PositionedSyntaxValue.make t.PT.source_text
275 t.PT.offset t.PT.leading_width t.PT.width t.PT.trailing_width
278 (* TODO: Clean up string escaping *)
279 let prepString2 : node list -> node list =
280 let trimLeft t =
281 PT.({ t with leading_width = t.leading_width + 1; width = t.width - 1 })
283 let trimRight t =
284 PT.({ t with trailing_width = t.trailing_width + 1; width = t.width - 1 })
286 function
287 | ({ syntax = Token t; _ }::ss)
288 when t.PT.width > 0 && (PT.text t).[0] = '"' ->
289 let rec unwind = function
290 | [{ syntax = Token t; _ }]
291 when t.PT.width > 0 && (PT.text t).[t.PT.width - 1] = '"' ->
292 let s = syntax_of_token (trimRight t) in
293 if width s > 0 then [s] else []
294 | x :: xs -> x :: unwind xs
295 | _ -> raise (Invalid_argument "Malformed String2 SyntaxList")
297 let s = syntax_of_token (trimLeft t) in
298 if width s > 0 then s :: unwind ss else unwind ss
299 | x -> x (* unchanged *)
301 let mkStr : (string -> string) -> string -> string = fun unescaper content ->
302 let no_quotes = try
303 if String.sub content 0 3 = "<<<" (* The heredoc case *)
304 then String.sub content 3 (String.length content - 4)
305 else String.sub content 1 (String.length content - 2)
306 with
307 | Invalid_argument _ -> content
309 try unescaper no_quotes with
310 | Php_escaping.Invalid_string _ -> raise @@
311 Failure (Printf.sprintf "Malformed string literal <<%s>>" no_quotes)
312 let unempty_str = function
313 | "''" | "\"\"" -> ""
314 | s -> s
315 let unesc_dbl s = unempty_str @@ Php_escaping.unescape_double s
316 let unesc_sgl s = unempty_str @@ Php_escaping.unescape_single s
317 let unesc_xhp s =
318 let whitespace = Str.regexp "[ \t\n\r\012]+" in
319 let s = Str.global_replace whitespace " " s in
320 let quotes = Str.regexp " ?\"\\([^\"]*\\)\" ?" in
321 if Str.string_match quotes s 0
322 then Str.matched_group 1 s
323 else s
324 let unesc_xhp_attr s =
325 let open Str in
326 unesc_dbl @@
327 if string_match (regexp "[ \t\n\r\012]*\"\\(\\(.\\|\n\\)*\\)\"") s 0
328 then matched_group 1 s
329 else s
331 type suspension_kind =
332 | SKSync
333 | SKAsync
334 | SKCoroutine
336 let mk_suspension_kind async_node coroutine_node =
337 match is_missing async_node, is_missing coroutine_node with
338 | true, true -> SKSync
339 | false, true -> SKAsync
340 | true, false -> SKCoroutine
341 | false, false -> raise (Failure "Couroutine functions may not be async")
343 let mk_fun_kind suspension_kind yield =
344 match suspension_kind, yield with
345 | SKSync, true -> FGenerator
346 | SKAsync, true -> FAsyncGenerator
347 | SKSync, false -> FSync
348 | SKAsync, false -> FAsync
349 (* TODO(t17335630): Implement an FCoroutine fun_kind *)
350 | SKCoroutine, false -> assert false
351 | SKCoroutine, true -> raise (Failure "Couroutine functions may not yield")
353 let fun_template yielding node suspension_kind =
354 let p = get_pos node in
355 { f_mode = !(lowerer_state.mode)
356 ; f_tparams = []
357 ; f_constrs = []
358 ; f_ret = None
359 ; f_ret_by_ref = false
360 ; f_name = p, ";anonymous"
361 ; f_params = []
362 ; f_body = []
363 ; f_user_attributes = []
364 ; f_fun_kind = mk_fun_kind suspension_kind yielding
365 ; f_namespace = Namespace_env.empty !(lowerer_state.popt)
366 ; f_span = p
369 let param_template node =
370 { param_hint = None
371 ; param_is_reference = false
372 ; param_is_variadic = false
373 ; param_id = pos_name node
374 ; param_expr = None
375 ; param_modifier = None
376 ; param_user_attributes = []
379 let pShapeFieldName : shape_field_name parser = fun name _env ->
380 match syntax name with
381 | ScopeResolutionExpression
382 { scope_resolution_qualifier; scope_resolution_name; _ } ->
383 SFclass_const
384 ( pos_name scope_resolution_qualifier
385 , pos_name scope_resolution_name
387 | _ -> let p, n = pos_name name in SFlit (p, mkStr unesc_dbl n)
389 let mpShapeExpressionField : ('a, (shape_field_name * 'a)) metaparser =
390 fun hintParser node env ->
391 match syntax node with
392 | FieldInitializer
393 { field_initializer_name = name; field_initializer_value = ty; _ } ->
394 let name = pShapeFieldName name env in
395 let ty = hintParser ty env in
396 name, ty
397 | _ -> missing_syntax "shape field" node env
399 let mpShapeField : ('a, shape_field) metaparser =
400 fun hintParser node env ->
401 match syntax node with
402 | FieldSpecifier { field_question; field_name; field_type; _ } ->
403 let sf_optional = not (is_missing field_question) in
404 let sf_name = pShapeFieldName field_name env in
405 let sf_hint = hintParser field_type env in
406 { sf_optional; sf_name; sf_hint }
407 | _ ->
408 let sf_name, sf_hint = mpShapeExpressionField hintParser node env in
409 (* Shape expressions can never have optional fields. *)
410 { sf_optional = false; sf_name; sf_hint }
412 let rec pHint : hint parser = fun node env ->
413 let rec pHint_ : hint_ parser = fun node env ->
414 match syntax node with
415 (* Dirty hack; CastExpression can have type represented by token *)
416 | Token _
417 | SimpleTypeSpecifier _
418 -> Happly (pos_name node, [])
419 | ShapeTypeSpecifier { shape_type_fields; shape_type_ellipsis; _ } ->
420 let si_allows_unknown_fields = not (is_missing shape_type_ellipsis) in
421 let si_shape_field_list =
422 couldMap ~f:(mpShapeField pHint) shape_type_fields env in
423 Hshape { si_allows_unknown_fields; si_shape_field_list }
424 | TupleTypeSpecifier { tuple_types; _ } ->
425 Htuple (couldMap ~f:pHint tuple_types env)
427 | KeysetTypeSpecifier { keyset_type_keyword = kw; keyset_type_type = ty; _ }
428 | VectorTypeSpecifier { vector_type_keyword = kw; vector_type_type = ty; _ }
429 | ClassnameTypeSpecifier {classname_keyword = kw; classname_type = ty; _ }
430 | TupleTypeExplicitSpecifier
431 { tuple_type_keyword = kw
432 ; tuple_type_types = ty
433 ; _ }
434 | VarrayTypeSpecifier
435 { varray_keyword = kw
436 ; varray_type = ty
437 ; _ }
438 | VectorArrayTypeSpecifier
439 { vector_array_keyword = kw
440 ; vector_array_type = ty
441 ; _ }
442 -> Happly (pos_name kw, couldMap ~f:pHint ty env)
444 | DarrayTypeSpecifier
445 { darray_keyword = kw
446 ; darray_key = key
447 ; darray_value = value
448 ; _ }
449 | MapArrayTypeSpecifier
450 { map_array_keyword = kw
451 ; map_array_key = key
452 ; map_array_value = value
453 ; _ } ->
454 Happly
455 ( pos_name kw
456 , List.map ~f:(fun x -> pHint x env) [ key; value ]
458 | DictionaryTypeSpecifier
459 { dictionary_type_keyword = kw
460 ; dictionary_type_members = members
461 ; _ } -> Happly (pos_name kw, couldMap ~f:pHint members env)
462 | GenericTypeSpecifier { generic_class_type; generic_argument_list } ->
463 Happly
464 ( pos_name generic_class_type
465 , match syntax generic_argument_list with
466 | TypeArguments { type_arguments_types; _ }
467 -> couldMap ~f:pHint type_arguments_types env
468 | _ -> missing_syntax "generic type arguments" generic_argument_list env
470 | NullableTypeSpecifier { nullable_type; _ } ->
471 Hoption (pHint nullable_type env)
472 | SoftTypeSpecifier { soft_type; _ } ->
473 Hsoft (pHint soft_type env)
474 | ClosureTypeSpecifier { closure_parameter_types; closure_return_type; _} ->
475 let param_types =
476 List.map ~f:(fun x -> pHint x env)
477 (as_list closure_parameter_types)
479 let is_variadic_param x = snd x = Htuple [] in
480 Hfun
481 ( List.filter ~f:(fun x -> not (is_variadic_param x)) param_types
482 , List.exists ~f:is_variadic_param param_types
483 , pHint closure_return_type env
485 | TypeConstant { type_constant_left_type; type_constant_right_type; _ } ->
486 let child = pos_name type_constant_right_type in
487 (match pHint_ type_constant_left_type env with
488 | Haccess (b, c, cs) -> Haccess (b, c, cs @ [child])
489 | Happly (b, []) -> Haccess (b, child, [])
490 | _ -> missing_syntax "type constant base" node env
492 | VariadicParameter _ ->
493 (* Clever trick warning: empty tuple types indicating variadic params *)
494 Htuple []
495 | _ -> missing_syntax "type hint" node env
497 get_pos node, pHint_ node env
499 type fun_hdr =
500 { fh_suspension_kind : suspension_kind
501 ; fh_name : pstring
502 ; fh_type_parameters : tparam list
503 ; fh_parameters : fun_param list
504 ; fh_return_type : hint option
505 ; fh_param_modifiers : fun_param list
506 ; fh_ret_by_ref : bool
509 let empty_fun_hdr =
510 { fh_suspension_kind = SKSync
511 ; fh_name = Pos.none, "<ANONYMOUS>"
512 ; fh_type_parameters = []
513 ; fh_parameters = []
514 ; fh_return_type = None
515 ; fh_param_modifiers = []
516 ; fh_ret_by_ref = false
519 let rec pSimpleInitializer node env =
520 match syntax node with
521 | SimpleInitializer { simple_initializer_value; _ } ->
522 pExpr simple_initializer_value env
523 | _ -> missing_syntax "simple initializer" node env
524 and pFunParam : fun_param parser = fun node env ->
525 match syntax node with
526 | ParameterDeclaration
527 { parameter_attribute
528 ; parameter_visibility
529 ; parameter_type
530 ; parameter_name
531 ; parameter_default_value
532 } ->
533 let is_reference, is_variadic, name =
534 match syntax parameter_name with
535 | DecoratedExpression
536 { decorated_expression_decorator; decorated_expression_expression } ->
537 (* There is a chance that the expression might be nested with an
538 additional decorator, check this *)
539 begin match syntax decorated_expression_expression with
540 | DecoratedExpression
541 { decorated_expression_decorator = nested_decorator
542 ; decorated_expression_expression = nested_expression } ->
543 let decorator = text decorated_expression_decorator in
544 let nested_decorator = text nested_decorator in
545 decorator = "&" || nested_decorator = "&",
546 decorator = "..." || nested_decorator = "...",
547 nested_expression
548 | _ ->
549 let decorator = text decorated_expression_decorator in
550 decorator = "&", decorator = "...", decorated_expression_expression
552 | _ -> false, false, parameter_name
554 { param_hint = mpOptional pHint parameter_type env
555 ; param_is_reference = is_reference
556 ; param_is_variadic = is_variadic
557 ; param_id = pos_name name
558 ; param_expr =
559 mpOptional pSimpleInitializer parameter_default_value env
560 ; param_user_attributes = List.concat @@
561 couldMap ~f:pUserAttribute parameter_attribute env
562 (* implicit field via constructor parameter.
563 * This is always None except for constructors and the modifier
564 * can be only Public or Protected or Private.
566 ; param_modifier =
567 let rec go = function
568 | [] -> None
569 | x :: _ when List.mem [Private; Public; Protected] x -> Some x
570 | _ :: xs -> go xs
572 go (pKinds parameter_visibility env)
574 | VariadicParameter _
575 | Token _ when text node = "..."
576 -> { (param_template node) with param_is_variadic = true }
577 | _ -> missing_syntax "function parameter" node env
578 and pUserAttribute : user_attribute list parser = fun node env ->
579 match syntax node with
580 | AttributeSpecification { attribute_specification_attributes; _ } ->
581 couldMap attribute_specification_attributes env ~f:begin function
582 | { syntax = Attribute { attribute_name; attribute_values; _}; _ } ->
583 fun env ->
584 { ua_name = pos_name attribute_name
585 ; ua_params = couldMap ~f:pExpr attribute_values env
587 | node -> missing_syntax "attribute" node
589 | _ -> missing_syntax "attribute specification" node env
590 and pAField : afield parser = fun node env ->
591 match syntax node with
592 | ElementInitializer { element_key; element_value; _ } ->
593 AFkvalue (pExpr element_key env, pExpr element_value env)
594 | _ -> AFvalue (pExpr node env)
595 and pString2: node list -> env -> expr list =
596 let rec aux l env acc =
597 (* in PHP "${x}" in strings is treated as if it was written "$x",
598 here we recognize pattern: Dollar; EmbeddedBracedExpression { QName (Token.Name) }
599 produced by FFP and lower it into Lvar.
601 match l with
602 | [] -> List.rev acc
603 | ({ syntax = Token { PT.kind = TK.Dollar; _ }; _ } as l)::
604 ({ syntax = EmbeddedBracedExpression {
605 embedded_braced_expression_expression = {
606 syntax = QualifiedNameExpression {
607 qualified_name_expression = {
608 syntax = Token { PT.kind = TK.Name; _ }
609 ; _ } as name
610 ; _ }
611 ; _ }
612 ; _ }
613 ; _ } as r)
614 ::tl ->
615 let pos, name = pos_name name in
616 let id = Lvar (pos, "$" ^ name) in
617 let left_pos = get_pos l in
618 let right_pos = get_pos r in
619 let pos =
620 if left_pos = Pos.none || right_pos = Pos.none then Pos.none
621 else
622 (* build final position as:
623 start_pos = start position of Dollar token
624 end_pos = end position pof embedded brace expression *)
625 let pos_file = Pos.filename left_pos in
626 let pos_start = Pos.pos_start left_pos in
627 let pos_end = Pos.pos_end right_pos in
628 Pos.make_from_file_pos ~pos_file ~pos_start ~pos_end
630 aux tl env ((pos, id)::acc)
631 | x::xs -> aux xs env ((pExpr ~top_level:false x env)::acc)
633 fun l env -> aux l env []
634 and pExpr ?top_level:(top_level=true) : expr parser = fun node env ->
635 let rec pExpr_ : expr_ parser = fun node env ->
636 let pos = get_pos node in
637 match syntax node with
638 | LambdaExpression {
639 lambda_async; lambda_coroutine; lambda_signature; lambda_body; _ } ->
640 let suspension_kind =
641 mk_suspension_kind lambda_async lambda_coroutine in
642 let f_params, f_ret =
643 match syntax lambda_signature with
644 | LambdaSignature { lambda_parameters; lambda_type; _ } ->
645 ( couldMap ~f:pFunParam lambda_parameters env
646 , mpOptional pHint lambda_type env
648 | Token _ -> ([param_template lambda_signature], None)
649 | _ -> missing_syntax "lambda signature" lambda_signature env
651 let pBody node env =
652 try mk_noop (pBlock node env) with
653 | _ ->
654 let (p,r) = pExpr node env in
655 [ Return (p, Some (p, r)) ]
657 let f_body, yield = mpYielding pBody lambda_body env in
658 Lfun
659 { (fun_template yield node suspension_kind) with f_ret; f_params; f_body }
661 | BracedExpression { braced_expression_expression = expr; _ }
662 | EmbeddedBracedExpression
663 { embedded_braced_expression_expression = expr; _ }
664 | ParenthesizedExpression { parenthesized_expression_expression = expr; _ }
665 -> pExpr_ expr env
667 | DictionaryIntrinsicExpression
668 { dictionary_intrinsic_keyword = kw
669 ; dictionary_intrinsic_members = members
670 ; _ }
671 | KeysetIntrinsicExpression
672 { keyset_intrinsic_keyword = kw
673 ; keyset_intrinsic_members = members
674 ; _ }
675 | VectorIntrinsicExpression
676 { vector_intrinsic_keyword = kw
677 ; vector_intrinsic_members = members
678 ; _ }
679 | CollectionLiteralExpression
680 { collection_literal_name = kw
681 ; collection_literal_initializers = members
682 ; _ }
683 -> Collection (pos_name kw, couldMap ~f:pAField members env)
685 | ArrayIntrinsicExpression { array_intrinsic_members = members; _ }
686 | ArrayCreationExpression { array_creation_members = members; _ }
688 (* TODO: Or tie in with other intrinsics and post-process to Array *)
689 Array (couldMap ~f:pAField members env)
691 | ListExpression { list_members = members; _ } ->
692 (* TODO: Or tie in with other intrinsics and post-process to List *)
693 let pBinderOrIgnore node env =
694 Option.value ~default:(Pos.none, Omitted) @@ mpOptional pExpr node env
696 List (couldMap ~f:pBinderOrIgnore members env)
698 | EvalExpression { eval_keyword = recv; eval_argument = args; _ }
699 | EmptyExpression { empty_keyword = recv; empty_argument = args; _ }
700 | IssetExpression { isset_keyword = recv; isset_argument_list = args; _ }
701 | TupleExpression
702 { tuple_expression_keyword = recv
703 ; tuple_expression_items = args
704 ; _ }
705 | FunctionCallExpression
706 { function_call_receiver = recv
707 ; function_call_argument_list = args
708 ; _ }
709 -> Call (pExpr recv env, couldMap ~f:pExpr args env, [])
711 | PrintExpression { print_keyword = _recv; print_expression = args } ->
712 (* TODO: Or tie in with FunctionCallExpression et al and post-process *)
713 Call ((pos, Id (pos, "echo")), couldMap ~f:pExpr args env, [])
715 | QualifiedNameExpression { qualified_name_expression } ->
716 Id (pos_name qualified_name_expression)
717 | VariableExpression { variable_expression } ->
718 Lvar (pos_name variable_expression)
720 | PipeVariableExpression _ ->
721 Lvar (pos, "$$")
723 | InclusionExpression { inclusion_require; inclusion_filename } ->
724 Import
725 ( pImportFlavor inclusion_require env
726 , pExpr inclusion_filename env
729 | MemberSelectionExpression
730 { member_object = recv
731 ; member_operator = op
732 ; member_name = name
734 | SafeMemberSelectionExpression
735 { safe_member_object = recv
736 ; safe_member_operator = op
737 ; safe_member_name = name
739 | EmbeddedMemberSelectionExpression
740 { embedded_member_object = recv
741 ; embedded_member_operator = op
742 ; embedded_member_name = name
744 -> Obj_get (pExpr recv env, pExpr name env, pNullFlavor op env)
746 | PrefixUnaryExpression
747 { prefix_unary_operator = operator
748 ; prefix_unary_operand = operand
750 | PostfixUnaryExpression
751 { postfix_unary_operand = operand
752 ; postfix_unary_operator = operator
754 | DecoratedExpression
755 { decorated_expression_expression = operand
756 ; decorated_expression_decorator = operator
759 let expr = pExpr operand env in
761 * FFP does not destinguish between ++$i and $i++ on the level of token
762 * kind annotation. Prevent duplication by switching on `postfix` for
763 * the two operatores for which AST /does/ differentiate between
764 * fixities.
766 let postfix = kind node = SyntaxKind.PostfixUnaryExpression in
767 (match token_kind operator with
768 | Some TK.PlusPlus when postfix -> Unop (Upincr, expr)
769 | Some TK.MinusMinus when postfix -> Unop (Updecr, expr)
770 | Some TK.PlusPlus -> Unop (Uincr, expr)
771 | Some TK.MinusMinus -> Unop (Udecr, expr)
772 | Some TK.Exclamation -> Unop (Unot, expr)
773 | Some TK.Tilde -> Unop (Utild, expr)
774 | Some TK.Plus -> Unop (Uplus, expr)
775 | Some TK.Minus -> Unop (Uminus, expr)
776 | Some TK.Ampersand -> Unop (Uref, expr)
777 | Some TK.DotDotDot -> Unop (Usplat, expr)
778 | Some TK.At -> Unop (Usilence, expr)
779 | Some TK.Await -> Await expr
780 | Some TK.Clone -> Clone expr
781 | Some TK.Dollar ->
782 (match snd expr with
783 | Lvarvar (n, id) -> Lvarvar (n + 1, id)
784 | Lvar id -> Lvarvar (1, id)
785 | _ -> BracedExpr expr
787 | _ -> missing_syntax "unary operator" node env
789 | BinaryExpression
790 { binary_left_operand; binary_operator; binary_right_operand }
792 pBop binary_operator env
793 (pExpr binary_left_operand env)
794 (pExpr binary_right_operand env)
796 | Token _ when top_level -> Id (pos_name node)
797 | Token _ -> String (pos, unesc_dbl (text node))
799 | YieldExpression { yield_operand; _ } when text yield_operand = "break" ->
800 env.saw_yield := true;
801 Yield_break
802 | YieldExpression { yield_operand; _ } ->
803 env.saw_yield := true;
804 Yield (pAField yield_operand env)
806 | DefineExpression { define_keyword; define_argument_list; _ } -> Call
807 ( (let name = pos_name define_keyword in fst name, Id name)
808 , List.map ~f:(fun x -> pExpr x env) (as_list define_argument_list)
809 , []
812 | ScopeResolutionExpression
813 { scope_resolution_qualifier; scope_resolution_name; _ } ->
814 let (_, n) as name = pos_name scope_resolution_name in
815 let qual = pos_name scope_resolution_qualifier in
816 if String.length n > 0 && n.[0] = '$'
817 then Class_get (qual, name)
818 else Class_const (qual, name)
820 | CastExpression { cast_type; cast_operand; _ } ->
821 Cast (pHint cast_type env, pExpr cast_operand env)
822 | ConditionalExpression
823 { conditional_test; conditional_consequence; conditional_alternative; _ }
824 -> Eif
825 ( pExpr conditional_test env
826 , mpOptional pExpr conditional_consequence env
827 , pExpr conditional_alternative env
829 | SubscriptExpression { subscript_receiver; subscript_index; _ } ->
830 Array_get
831 ( pExpr subscript_receiver env
832 , mpOptional pExpr subscript_index env
834 | EmbeddedSubscriptExpression
835 { embedded_subscript_receiver; embedded_subscript_index; _ } ->
836 Array_get
837 ( pExpr embedded_subscript_receiver env
838 , mpOptional pExpr embedded_subscript_index env
840 | ShapeExpression { shape_expression_fields; _ } ->
841 Shape (
842 couldMap ~f:(mpShapeExpressionField pExpr) shape_expression_fields env
844 | ObjectCreationExpression
845 { object_creation_type; object_creation_argument_list; _ } ->
847 ( (match syntax object_creation_type with
848 | GenericTypeSpecifier { generic_class_type; generic_argument_list } ->
849 let name = pos_name generic_class_type in
850 let hints =
851 match syntax generic_argument_list with
852 | TypeArguments { type_arguments_types; _ }
853 -> couldMap ~f:pHint type_arguments_types env
854 | _ ->
855 missing_syntax "generic type arguments" generic_argument_list env
857 fst name, Id_type_arguments (name, hints)
858 | QualifiedNameExpression _
859 | SimpleTypeSpecifier _
860 | Token _
861 -> let name = pos_name object_creation_type in fst name, Id name
862 | _ -> pExpr object_creation_type env
864 , couldMap ~f:pExpr object_creation_argument_list env
865 , []
867 | GenericTypeSpecifier
868 { generic_class_type
869 ; generic_argument_list
870 } ->
871 let name = pos_name generic_class_type in
872 let hints =
873 match syntax generic_argument_list with
874 | TypeArguments { type_arguments_types; _ }
875 -> couldMap ~f:pHint type_arguments_types env
876 | _ ->
877 missing_syntax "generic type arguments" generic_argument_list env
879 Id_type_arguments (name, hints)
880 | LiteralExpression { literal_expression = expr } ->
881 (match syntax expr with
882 | Token _ ->
883 let s = text expr in
884 (* TODO(17796330): Get rid of linter functionality in the lowerer *)
885 if s <> String.lowercase s then Lint.lowercase_constant pos s;
886 (match token_kind expr with
887 | Some TK.DecimalLiteral
888 | Some TK.OctalLiteral
889 | Some TK.HexadecimalLiteral
890 | Some TK.BinaryLiteral -> Int (pos, s)
891 | Some TK.FloatingLiteral -> Float (pos, s)
892 | Some TK.SingleQuotedStringLiteral -> String (pos, mkStr unesc_sgl s)
893 | Some TK.DoubleQuotedStringLiteral
894 | Some TK.HeredocStringLiteral
895 | Some TK.NowdocStringLiteral -> String (pos, mkStr unesc_dbl s)
896 | Some TK.NullLiteral -> Null
897 | Some TK.BooleanLiteral ->
898 (match String.lowercase_ascii s with
899 | "false" -> False
900 | "true" -> True
901 | _ -> missing_syntax ("boolean (not: " ^ s ^ ")") expr env
903 | _ -> missing_syntax "literal" expr env
906 | SyntaxList ts -> String2 (pString2 (prepString2 ts) env)
907 | _ -> missing_syntax "literal expression" expr env
910 | InstanceofExpression
911 { instanceof_left_operand; instanceof_right_operand; _ } ->
912 let ty =
913 match pExpr instanceof_right_operand env with
914 | p, Class_const (pid, (_, "")) -> p, Id pid
915 | ty -> ty
917 InstanceOf (pExpr instanceof_left_operand env, ty)
918 (* TODO: Priority fix? *)
919 (*match pExpr instanceof_left_operand env with
920 | p, Unop (o,e) -> Unop (0, (p, InstanceOf (e, ty)))
921 | e -> InstanceOf (e, ty)
923 | AnonymousFunction
924 { anonymous_async_keyword
925 ; anonymous_coroutine_keyword
926 ; anonymous_parameters
927 ; anonymous_type
928 ; anonymous_use
929 ; anonymous_body
930 ; _ } ->
931 let pArg node env =
932 match syntax node with
933 | PrefixUnaryExpression
934 { prefix_unary_operator = op; prefix_unary_operand = v } ->
935 pos_name v, token_kind op = Some TK.Ampersand
936 | Token _ -> pos_name node, false
937 | _ -> missing_syntax "use variable" node env
939 let pUse node =
940 match syntax node with
941 | AnonymousFunctionUseClause { anonymous_use_variables; _ } ->
942 couldMap ~f:pArg anonymous_use_variables
943 | _ -> fun _env -> []
945 let suspension_kind =
946 mk_suspension_kind
947 anonymous_async_keyword
948 anonymous_coroutine_keyword in
949 let body, yield = mpYielding pBlock anonymous_body env in
950 Efun
951 ( { (fun_template yield node suspension_kind) with
952 f_ret = mpOptional pHint anonymous_type env
953 ; f_params = couldMap ~f:pFunParam anonymous_parameters env
954 ; f_body = mk_noop body
956 , try pUse anonymous_use env with _ -> []
959 | AwaitableCreationExpression
960 { awaitable_async; awaitable_coroutine; awaitable_compound_statement } ->
961 let suspension_kind =
962 mk_suspension_kind awaitable_async awaitable_coroutine in
963 let blk, yld = mpYielding pBlock awaitable_compound_statement env in
964 let body =
965 { (fun_template yld node suspension_kind) with f_body = mk_noop blk }
967 Call ((get_pos node, Lfun body), [], [])
968 | XHPExpression
969 { xhp_open =
970 { syntax = XHPOpen { xhp_open_name; xhp_open_attributes; _ }; _ }
971 ; xhp_body = body
972 ; _ } ->
973 lowerer_state.ignorePos := false;
974 let name =
975 let pos, name = pos_name xhp_open_name in
976 (pos, ":" ^ name)
978 let combine b e =
979 syntax_of_token PT.(
980 make (kind b) (source_text b) (leading_start_offset b)
981 (start_offset e - start_offset b + width e) (leading b) (trailing e)
984 let aggregate_tokens node =
985 let rec search = function (* scroll through non-token things *)
986 | [] -> []
987 | t :: xs when token_kind t = Some TK.XHPComment -> search xs
988 | { syntax = Token b; _ } as t :: xs -> track t b None xs
989 | x :: xs -> x :: search xs
990 and track t b oe = function (* keep going through consecutive tokens *)
991 | { syntax = Token e; _ } :: xs -> track t b (Some e) xs
992 | xs -> Option.value_map oe ~default:t ~f:(combine b) :: search xs
994 search (as_list node)
996 let pEmbedded escaper node env =
997 match syntax node with
998 | Token _ ->
999 let p = get_pos node in
1000 p, String (p, escaper (full_text node))
1001 | _ -> pExpr node env
1003 let pAttr = fun node env ->
1004 match syntax node with
1005 | XHPAttribute { xhp_attribute_name; xhp_attribute_expression; _ } ->
1006 ( pos_name xhp_attribute_name
1007 , pEmbedded unesc_xhp_attr xhp_attribute_expression env
1009 | _ -> missing_syntax "XHP attribute" node env
1012 ( name
1013 , couldMap ~f:pAttr xhp_open_attributes env
1014 , List.map ~f:(fun x -> pEmbedded unesc_xhp x env)
1015 (aggregate_tokens body)
1017 (* FIXME; should this include Missing? ; "| Missing -> Null" *)
1018 | _ -> missing_syntax "expression" node env
1020 (* Since we need positions in XHP, regardless of the ignorePos flag, we
1021 * parenthesise the call to pExpr_ so that the XHP expression case can flip
1022 * the switch. The key part is that `get_pos node` happens before the old
1023 * setting is restored.
1025 * Evaluation order matters here!
1027 let local_ignore_pos = !(lowerer_state.ignorePos) in
1028 let expr_ = pExpr_ node env in
1029 let p = get_pos node in
1030 lowerer_state.ignorePos := local_ignore_pos;
1031 p, expr_
1032 and pBlock : block parser = fun node env ->
1033 match pStmt node env with
1034 | Block block -> List.filter ~f:(fun x -> x <> Noop) block
1035 | stmt -> [stmt]
1036 and pStmt : stmt parser = fun node env ->
1037 match syntax node with
1038 | SwitchStatement { switch_expression; switch_sections; _ } ->
1039 let pSwitchLabel : (block -> case) parser = fun node env cont ->
1040 match syntax node with
1041 | CaseLabel { case_expression; _ } ->
1042 Case (pExpr case_expression env, cont)
1043 | DefaultLabel _ -> Default cont
1044 | _ -> missing_syntax "pSwitchLabel" node env
1046 let pSwitchSection : case list parser = fun node env ->
1047 match syntax node with
1048 | SwitchSection { switch_section_labels; switch_section_statements; _ } ->
1049 let rec null_out cont = function
1050 | [x] -> [x cont]
1051 | (x::xs) -> x [] :: null_out cont xs
1052 | _ -> raise (Failure "Malformed block result")
1054 let blk = couldMap ~f:pStmt switch_section_statements env in
1055 null_out blk (couldMap ~f:pSwitchLabel switch_section_labels env)
1056 | _ -> missing_syntax "switch section" node env
1058 Switch
1059 ( pExpr switch_expression env
1060 , List.concat @@ couldMap ~f:pSwitchSection switch_sections env
1062 | IfStatement
1063 { if_condition; if_statement; if_elseif_clauses; if_else_clause; _ } ->
1064 let pElseIf : (block -> block) parser = fun node env ->
1065 match syntax node with
1066 | ElseifClause { elseif_condition; elseif_statement; _ } ->
1067 fun next_clause ->
1068 [ If
1069 ( pExpr elseif_condition env
1070 , [ pStmt elseif_statement env ]
1071 , next_clause
1074 | _ -> missing_syntax "elseif clause" node env
1077 ( pExpr if_condition env
1078 , [ pStmt if_statement env ]
1079 , List.fold_right ~f:(@@)
1080 (couldMap ~f:pElseIf if_elseif_clauses env)
1081 ~init:[ match syntax if_else_clause with
1082 | ElseClause { else_statement; _ } -> pStmt else_statement env
1083 | Missing -> Noop
1084 | _ -> missing_syntax "else clause" if_else_clause env
1087 | ExpressionStatement { expression_statement_expression; _ } ->
1088 if is_missing expression_statement_expression
1089 then Noop
1090 else Expr (pExpr expression_statement_expression env)
1091 | CompoundStatement { compound_statements; _ } ->
1092 Block (List.filter ~f:(fun x -> x <> Noop) @@
1093 couldMap ~f:pStmt compound_statements env)
1094 | ThrowStatement { throw_expression; _ } -> Throw (pExpr throw_expression env)
1095 | DoStatement { do_body; do_condition; _ } ->
1096 Do ([Block (pBlock do_body env)], pExpr do_condition env)
1097 | WhileStatement { while_condition; while_body; _ } ->
1098 While (pExpr while_condition env, [ pStmt while_body env ])
1099 | ForStatement
1100 { for_initializer; for_control; for_end_of_loop; for_body; _ } ->
1101 let pExprL node env =
1102 (get_pos node, Expr_list (couldMap ~f:pExpr node env))
1105 ( pExprL for_initializer env
1106 , pExprL for_control env
1107 , pExprL for_end_of_loop env
1108 , [Block (pBlock for_body env)]
1110 | ForeachStatement
1111 { foreach_collection
1112 ; foreach_await_keyword
1113 ; foreach_key
1114 ; foreach_value
1115 ; foreach_body
1116 ; _ } ->
1117 Foreach
1118 ( pExpr foreach_collection env
1119 , ( if token_kind foreach_await_keyword = Some TK.Await
1120 then Some (get_pos foreach_await_keyword)
1121 else None
1123 , ( let value = pExpr foreach_value env in
1124 Option.value_map (mpOptional pExpr foreach_key env)
1125 ~default:(As_v value)
1126 ~f:(fun key -> As_kv (key, value))
1128 , [ pStmt foreach_body env ]
1130 | TryStatement
1131 { try_compound_statement; try_catch_clauses; try_finally_clause; _ } ->
1133 ( [ Block (pBlock try_compound_statement env) ]
1134 , couldMap try_catch_clauses env ~f:begin fun node env ->
1135 match syntax node with
1136 | CatchClause { catch_type; catch_variable; catch_body; _ } ->
1137 ( pos_name catch_type
1138 , pos_name catch_variable
1139 , [ Block (mpStripNoop pBlock catch_body env) ]
1141 | _ -> missing_syntax "catch clause" node env
1143 , match syntax try_finally_clause with
1144 | FinallyClause { finally_body; _ } -> [ Block (pBlock finally_body env) ]
1145 | _ -> []
1147 | FunctionStaticStatement { static_declarations; _ } ->
1148 let pStaticDeclarator node env =
1149 match syntax node with
1150 | StaticDeclarator { static_name; static_initializer } ->
1151 let lhs =
1152 match pExpr static_name env with
1153 | p, Id (p', s) -> p, Lvar (p', s)
1154 | x -> x
1156 (match syntax static_initializer with
1157 | SimpleInitializer { simple_initializer_value; _ } ->
1158 ( get_pos static_initializer
1159 , Binop (Eq None, lhs, pExpr simple_initializer_value env)
1161 | _ -> lhs
1163 | _ -> missing_syntax "static declarator" node env
1165 Static_var (couldMap ~f:pStaticDeclarator static_declarations env)
1166 | ReturnStatement { return_expression; _ } -> Return
1167 ( get_pos return_expression
1168 , mpOptional pExpr return_expression env
1170 | Full_fidelity_positioned_syntax.GotoLabel { goto_label_name; _ } ->
1171 let pos = get_pos goto_label_name in
1172 let label_name = text goto_label_name in
1173 Ast.GotoLabel (pos, label_name)
1174 | GotoStatement { goto_statement_label_name; _ } ->
1175 Goto (pos_name goto_statement_label_name)
1176 | EchoStatement { echo_keyword = kw; echo_expressions = exprs; _ }
1177 | UnsetStatement { unset_keyword = kw; unset_variables = exprs; _ }
1178 -> Expr
1179 ( get_pos node
1180 , Call
1181 ( (match syntax kw with
1182 | QualifiedNameExpression _
1183 | SimpleTypeSpecifier _
1184 | Token _
1185 -> let name = pos_name kw in fst name, Id name
1186 | _ -> missing_syntax "id" kw env
1188 , couldMap ~f:pExpr exprs env
1189 , []
1191 | BreakStatement { break_level=level; _ } ->
1192 Break (get_pos node, pBreak_or_continue_level env level)
1193 | ContinueStatement { continue_level=level; _ } ->
1194 Continue (get_pos node, pBreak_or_continue_level env level)
1195 | GlobalStatement { global_variables; _ } ->
1196 Global_var (couldMap ~f:pExpr global_variables env)
1197 | MarkupSection _ -> pMarkup node env
1198 | _ when env.max_depth > 0 ->
1199 (* OCaml optimisers; Forgive them, for they know not what they do!
1201 * The max_depth is only there to stop the *optimised* version from an
1202 * unbounded recursion. Sad times.
1204 Def_inline (pDef node { env with max_depth = env.max_depth - 1 })
1205 | _ -> missing_syntax "statement" node env
1207 and pMarkup node env =
1208 match syntax node with
1209 | MarkupSection { markup_text; markup_expression; _ } ->
1210 let expr =
1211 match syntax markup_expression with
1212 | Missing -> None
1213 | ExpressionStatement {
1214 expression_statement_expression = e
1215 ; _} -> Some (pExpr e env)
1216 | _ -> failwith "expression expected"
1218 Markup ((get_pos node, text markup_text), expr)
1219 | _ -> failwith "invalid node"
1221 and pBreak_or_continue_level env level =
1222 match mpOptional pExpr level env with
1223 | Some (_, Int(_, s)) -> Some (int_of_string s)
1224 | _ -> None
1226 and pTConstraintTy : hint parser = fun node ->
1227 match syntax node with
1228 | TypeConstraint { constraint_type; _ } -> pHint constraint_type
1229 | _ -> missing_syntax "type constraint" node
1231 and pTConstraint : (constraint_kind * hint) parser = fun node env ->
1232 match syntax node with
1233 | TypeConstraint { constraint_keyword; constraint_type } ->
1234 ( (match token_kind constraint_keyword with
1235 | Some TK.As -> Constraint_as
1236 | Some TK.Super -> Constraint_super
1237 | Some TK.Equal -> Constraint_eq
1238 | _ -> missing_syntax "constraint operator" constraint_keyword env
1240 , pHint constraint_type env
1242 | _ -> missing_syntax "type constraint" node env
1244 and pTParaml : tparam list parser = fun node env ->
1245 let pTParam : tparam parser = fun node env ->
1246 match syntax node with
1247 | TypeParameter { type_variance; type_name; type_constraints } ->
1248 ( (match token_kind type_variance with
1249 | Some TK.Plus -> Covariant
1250 | Some TK.Minus -> Contravariant
1251 | _ -> Invariant
1253 , pos_name type_name
1254 , couldMap ~f:pTConstraint type_constraints env
1256 | _ -> missing_syntax "type parameter" node env
1258 match syntax node with
1259 | Missing -> []
1260 | TypeParameters { type_parameters_parameters; _ } ->
1261 couldMap ~f:pTParam type_parameters_parameters env
1262 | _ -> missing_syntax "type parameter list" node env
1264 (* TODO: Translate the where clause *)
1265 and pFunHdr : fun_hdr parser = fun node env ->
1266 match syntax node with
1267 | FunctionDeclarationHeader
1268 { function_async
1269 ; function_coroutine
1270 ; function_ampersand
1271 ; function_name
1272 ; function_type_parameter_list
1273 ; function_parameter_list
1274 ; function_type
1275 ; _ } ->
1276 let fh_parameters = couldMap ~f:pFunParam function_parameter_list env in
1277 let fh_return_type = mpOptional pHint function_type env in
1278 let fh_suspension_kind =
1279 mk_suspension_kind function_async function_coroutine in
1280 { fh_suspension_kind
1281 ; fh_name = pos_name function_name
1282 ; fh_type_parameters = pTParaml function_type_parameter_list env
1283 ; fh_parameters
1284 ; fh_return_type
1285 ; fh_param_modifiers =
1286 List.filter ~f:(fun p -> Option.is_some p.param_modifier) fh_parameters
1287 ; fh_ret_by_ref = is_ret_by_ref function_ampersand
1289 | LambdaSignature { lambda_parameters; lambda_type; _ } ->
1290 { empty_fun_hdr with
1291 fh_parameters = couldMap ~f:pFunParam lambda_parameters env
1292 ; fh_return_type = mpOptional pHint lambda_type env
1294 | Token _ -> empty_fun_hdr
1295 | _ -> missing_syntax "function header" node env
1297 and pClassElt : class_elt list parser = fun node env ->
1298 match syntax node with
1299 | ConstDeclaration
1300 { const_abstract; const_type_specifier; const_declarators; _ } ->
1301 let ty = mpOptional pHint const_type_specifier env in
1302 let res =
1303 couldMap const_declarators env ~f:begin function
1304 | { syntax = ConstantDeclarator
1305 { constant_declarator_name; constant_declarator_initializer }
1306 ; _ } -> fun env ->
1307 ( pos_name constant_declarator_name
1308 (* TODO: Parse error when const is abstract and has inits *)
1309 , if is_missing const_abstract
1310 then mpOptional pSimpleInitializer constant_declarator_initializer env
1311 else None
1313 | node -> missing_syntax "constant declarator" node env
1316 let rec aux absts concrs = function
1317 | (id, None ) :: xs -> aux (AbsConst (ty, id) :: absts) concrs xs
1318 | (id, Some x) :: xs -> aux absts ((id, x) :: concrs) xs
1319 | [] when concrs = [] -> List.rev absts
1320 | [] -> Const (ty, List.rev concrs) :: List.rev absts
1322 aux [] [] res
1323 | TypeConstDeclaration
1324 { type_const_abstract
1325 ; type_const_name
1326 ; type_const_type_constraint
1327 ; type_const_type_specifier
1328 ; _ } ->
1329 [ TypeConst
1330 { tconst_abstract = not (is_missing type_const_abstract)
1331 ; tconst_name = pos_name type_const_name
1332 ; tconst_constraint = mpOptional pTConstraintTy type_const_type_constraint env
1333 ; tconst_type = mpOptional pHint type_const_type_specifier env
1334 ; tconst_span = get_pos node
1337 | PropertyDeclaration
1338 { property_modifiers; property_type; property_declarators; _ } ->
1339 [ ClassVars
1340 ( pKinds property_modifiers env
1341 , mpOptional pHint property_type env
1342 , couldMap property_declarators env ~f:begin fun node env ->
1343 match syntax node with
1344 | PropertyDeclarator { property_name; property_initializer } ->
1345 ( let _, n as name = pos_name property_name in
1346 ( get_pos node
1347 , ( if n.[0] = '$'
1348 then drop_pstr 1 name
1349 else name
1351 , mpOptional pSimpleInitializer property_initializer env
1354 | _ -> missing_syntax "property declarator" node env
1358 | MethodishDeclaration
1359 { methodish_attribute
1360 ; methodish_modifiers
1361 ; methodish_function_decl_header
1362 ; methodish_function_body
1363 ; _ } ->
1364 let classvar_init : fun_param -> stmt * class_elt = fun param ->
1365 let p, _ as cvname = drop_pstr 1 param.param_id in (* Drop the '$' *)
1366 let span =
1367 match param.param_expr with
1368 | Some (pos_end, _) -> Pos.btw p pos_end
1369 | None -> p
1371 ( Expr (p, Binop (Eq None,
1372 (p, Obj_get((p, Lvar (p, "$this")), (p, Id cvname), OG_nullthrows)),
1373 (p, Lvar param.param_id)
1375 , ClassVars
1376 ( Option.to_list param.param_modifier
1377 , param.param_hint
1378 , [span, cvname, None]
1382 let hdr = pFunHdr methodish_function_decl_header env in
1383 let member_init, member_def =
1384 List.unzip @@
1385 List.filter_map hdr.fh_parameters ~f:(fun p ->
1386 Option.map ~f: (fun _ -> classvar_init p) p.param_modifier
1389 let pBody = fun node env ->
1390 if is_missing node then [] else
1391 (* TODO: Give parse error when not abstract, but body is missing *)
1392 List.rev member_init @
1393 match pStmt node env with
1394 | Block [] -> [Noop]
1395 | Block stmtl -> stmtl
1396 | stmt -> [stmt]
1398 let body, body_has_yield = mpYielding pBody methodish_function_body env in
1399 let body =
1400 (* Drop it on the floor in quickMode; we still need to process the body
1401 * to know, e.g. whether it contains a yield.
1403 if !(lowerer_state.quickMode)
1404 then [Noop]
1405 else body
1407 let kind = pKinds methodish_modifiers env in
1408 member_def @ [Method
1409 { m_kind = kind
1410 ; m_tparams = hdr.fh_type_parameters
1411 ; m_constrs = []
1412 ; m_name = hdr.fh_name
1413 ; m_params = hdr.fh_parameters
1414 ; m_body = body
1415 ; m_user_attributes = List.concat @@
1416 couldMap ~f:pUserAttribute methodish_attribute env
1417 ; m_ret = hdr.fh_return_type
1418 ; m_ret_by_ref = hdr.fh_ret_by_ref
1419 ; m_span = get_pos node
1420 ; m_fun_kind = mk_fun_kind hdr.fh_suspension_kind body_has_yield
1422 | TraitUseConflictResolution {
1423 trait_use_conflict_resolution_names;
1424 trait_use_conflict_resolution_clauses;
1426 } ->
1427 let pTraitUseConflictResolutionItem node env =
1428 match syntax node with
1429 | TraitUseConflictResolutionItem
1430 { trait_use_conflict_resolution_item_aliasing_name = aliasing_name;
1431 trait_use_conflict_resolution_item_aliasing_keyword = alias_kw;
1432 trait_use_conflict_resolution_item_aliased_name = aliased_name;
1434 } ->
1435 let aliasing_name, opt_scope_resolution_name =
1436 match syntax aliasing_name with
1437 | ScopeResolutionExpression
1438 { scope_resolution_qualifier; scope_resolution_name; _ } ->
1439 pos_name scope_resolution_qualifier,
1440 Some (pos_name scope_resolution_name)
1441 | _ -> pos_name aliasing_name, None
1443 let alias_type =
1444 match token_kind alias_kw with
1445 | Some TK.As -> CU_as
1446 | Some TK.Insteadof -> CU_insteadof
1447 | _ ->
1448 missing_syntax "trait use conflict resolution item" alias_kw env
1450 ClassUseAlias ((aliasing_name, opt_scope_resolution_name),
1451 pos_name aliased_name,
1452 alias_type)
1453 | _ -> missing_syntax "trait use conflict resolution item" node env
1455 (couldMap ~f:(fun n e ->
1456 ClassUse (pHint n e)) trait_use_conflict_resolution_names env)
1457 @ (couldMap ~f:pTraitUseConflictResolutionItem
1458 trait_use_conflict_resolution_clauses env)
1459 | TraitUse { trait_use_names; _ } ->
1460 couldMap ~f:(fun n e -> ClassUse (pHint n e)) trait_use_names env
1461 | RequireClause { require_kind; require_name; _ } ->
1462 [ ClassTraitRequire
1463 ( (match token_kind require_kind with
1464 | Some TK.Implements -> MustImplement
1465 | Some TK.Extends -> MustExtend
1466 | _ -> missing_syntax "trait require kind" require_kind env
1468 , pHint require_name env
1471 | XHPClassAttributeDeclaration { xhp_attribute_attributes; _ } ->
1472 let pXHPAttr node env =
1473 match syntax node with
1474 | XHPClassAttribute
1475 { xhp_attribute_decl_type = ty
1476 ; xhp_attribute_decl_name = name
1477 ; xhp_attribute_decl_initializer = init
1478 ; xhp_attribute_decl_required = req
1479 } ->
1480 let (p, name) = pos_name name in
1481 XhpAttr
1482 ( mpOptional pHint ty env
1483 , (Pos.none, (p, ":" ^ name), mpOptional pSimpleInitializer init env)
1484 , not (is_missing req)
1485 , match syntax ty with
1486 | XHPEnumType { xhp_enum_values; _ } ->
1487 Some (get_pos ty, couldMap ~f:pExpr xhp_enum_values env)
1488 | _ -> None
1490 | XHPSimpleClassAttribute { xhp_simple_class_attribute_type = attr } ->
1491 XhpAttrUse (get_pos attr, Happly (pos_name attr, []))
1492 | Token _ ->
1493 XhpAttrUse (get_pos node, Happly (pos_name node, []))
1494 | _ -> missing_syntax "XHP attribute" node env
1496 couldMap ~f:pXHPAttr xhp_attribute_attributes env
1497 | XHPChildrenDeclaration { xhp_children_expression; _; } ->
1498 [ XhpChild (pXhpChild xhp_children_expression env) ]
1499 | XHPCategoryDeclaration { xhp_category_categories = cats; _ } ->
1500 let pNameSansPercent node _env = drop_pstr 1 (pos_name node) in
1501 [ XhpCategory (couldMap ~f:pNameSansPercent cats env) ]
1502 | _ -> missing_syntax "expression" node env
1504 and pXhpChild : xhp_child parser = fun node env ->
1505 match syntax node with
1506 | Token t -> ChildName (pos_name node)
1507 | PostfixUnaryExpression { postfix_unary_operand; postfix_unary_operator;} ->
1508 let operand = pXhpChild postfix_unary_operand env in
1509 let operator =
1510 begin
1511 match token_kind postfix_unary_operator with
1512 | Some TK.Question -> ChildQuestion
1513 | Some TK.Plus -> ChildPlus
1514 | Some TK.Star -> ChildStar
1515 | _ -> missing_syntax "xhp children operator" node env
1516 end in
1517 ChildUnary(operand, operator)
1518 | BinaryExpression
1519 { binary_left_operand; binary_right_operand; _ } ->
1520 let left = pXhpChild binary_left_operand env in
1521 let right = pXhpChild binary_right_operand env in
1522 ChildBinary(left, right)
1523 | XHPChildrenParenthesizedList {xhp_children_list_xhp_children; _} ->
1524 let children = as_list xhp_children_list_xhp_children in
1525 let children = List.map ~f:(fun x -> pXhpChild x env) children in
1526 ChildList children
1527 | _ -> missing_syntax "xhp children" node env
1530 (*****************************************************************************(
1531 * Parsing definitions (AST's `def`)
1532 )*****************************************************************************)
1533 and pDef : def parser = fun node env ->
1534 match syntax node with
1535 | FunctionDeclaration
1536 { function_attribute_spec; function_declaration_header; function_body } ->
1537 let hdr = pFunHdr function_declaration_header env in
1538 let block, yield = mpYielding (mpOptional pBlock) function_body env in
1540 { (fun_template yield node hdr.fh_suspension_kind) with
1541 f_tparams = hdr.fh_type_parameters
1542 ; f_ret = hdr.fh_return_type
1543 ; f_name = hdr.fh_name
1544 ; f_params = hdr.fh_parameters
1545 ; f_ret_by_ref = hdr.fh_ret_by_ref
1546 ; f_body =
1547 if !(lowerer_state.quickMode)
1548 then [Noop]
1549 else begin
1550 (* FIXME: Filthy hack to catch UNSAFE *)
1551 let containsUNSAFE =
1552 let re = Str.regexp_string "UNSAFE" in
1553 try Str.search_forward re (full_text function_body) 0 >= 0 with
1554 | Not_found -> false
1556 match block with
1557 | Some [Noop] when containsUNSAFE -> [Unsafe]
1558 | Some [] -> [Noop]
1559 | None -> []
1560 | Some b -> b
1562 ; f_user_attributes =
1563 List.concat @@ couldMap ~f:pUserAttribute function_attribute_spec env
1565 | ClassishDeclaration
1566 { classish_attribute = attr
1567 ; classish_modifiers = mods
1568 ; classish_keyword = kw
1569 ; classish_name = name
1570 ; classish_type_parameters = tparaml
1571 ; classish_extends_list = exts
1572 ; classish_implements_list = impls
1573 ; classish_body =
1574 { syntax = ClassishBody { classish_body_elements = elts; _ }; _ }
1575 ; _ } ->
1576 Class
1577 { c_mode = !(lowerer_state.mode)
1578 ; c_user_attributes = List.concat @@ couldMap ~f:pUserAttribute attr env
1579 ; c_final = List.mem (pKinds mods env) Final
1580 ; c_is_xhp =
1581 (match token_kind name with
1582 | Some TK.XHPElementName | Some TK.XHPClassName -> true
1583 | _ -> false
1585 ; c_name = pos_name name
1586 ; c_tparams = pTParaml tparaml env
1587 ; c_extends = couldMap ~f:pHint exts env
1588 ; c_implements = couldMap ~f:pHint impls env
1589 ; c_body = List.concat (couldMap ~f:pClassElt elts env)
1590 ; c_namespace = Namespace_env.empty !(lowerer_state.popt)
1591 ; c_enum = None
1592 ; c_span = get_pos node
1593 ; c_kind =
1594 let is_abs = Str.(string_match (regexp ".*abstract.*") (text mods) 0) in
1595 match token_kind kw with
1596 | Some TK.Class when is_abs -> Cabstract
1597 | Some TK.Class -> Cnormal
1598 | Some TK.Interface -> Cinterface
1599 | Some TK.Trait -> Ctrait
1600 | Some TK.Enum -> Cenum
1601 | _ -> missing_syntax "class kind" kw env
1603 | ConstDeclaration
1604 { const_type_specifier = ty
1605 ; const_declarators = decls
1606 ; _ } ->
1607 (match List.map ~f:syntax (as_list decls) with
1608 | [ ConstantDeclarator
1609 { constant_declarator_name = name
1610 ; constant_declarator_initializer = init
1612 ] -> Constant
1613 { cst_mode = !(lowerer_state.mode)
1614 ; cst_kind = Cst_const
1615 ; cst_name = pos_name name
1616 ; cst_type = mpOptional pHint ty env
1617 ; cst_value = pSimpleInitializer init env
1618 ; cst_namespace = Namespace_env.empty !(lowerer_state.popt)
1620 | _ -> missing_syntax "constant declaration" decls env
1622 | AliasDeclaration
1623 { alias_attribute_spec = attr
1624 ; alias_keyword = kw
1625 ; alias_name = name
1626 ; alias_generic_parameter = tparams
1627 ; alias_constraint = constr
1628 ; alias_type = hint
1629 ; _ } -> Typedef
1630 { t_id = pos_name name
1631 ; t_tparams = pTParaml tparams env
1632 ; t_constraint = Option.map ~f:snd @@
1633 mpOptional pTConstraint constr env
1634 ; t_user_attributes = List.concat @@
1635 List.map ~f:(fun x -> pUserAttribute x env) (as_list attr)
1636 ; t_namespace = Namespace_env.empty !(lowerer_state.popt)
1637 ; t_mode = !(lowerer_state.mode)
1638 ; t_kind =
1639 match token_kind kw with
1640 | Some TK.Newtype -> NewType (pHint hint env)
1641 | Some TK.Type -> Alias (pHint hint env)
1642 | _ -> missing_syntax "kind" kw env
1644 | EnumDeclaration
1645 { enum_attribute_spec = attrs
1646 ; enum_name = name
1647 ; enum_base = base
1648 ; enum_type = constr
1649 ; enum_enumerators = enums
1650 ; _ } ->
1651 let pEnumerator node =
1652 match syntax node with
1653 | Enumerator { enumerator_name = name; enumerator_value = value; _ } ->
1654 fun env -> Const (None, [pos_name name, pExpr value env])
1655 | _ -> missing_syntax "enumerator" node
1657 Class
1658 { c_mode = !(lowerer_state.mode)
1659 ; c_user_attributes = List.concat @@ couldMap ~f:pUserAttribute attrs env
1660 ; c_final = false
1661 ; c_kind = Cenum
1662 ; c_is_xhp = false
1663 ; c_name = pos_name name
1664 ; c_tparams = []
1665 ; c_extends = []
1666 ; c_implements = []
1667 ; c_body = couldMap enums env ~f:pEnumerator
1668 ; c_namespace = Namespace_env.empty !(lowerer_state.popt)
1669 ; c_span = get_pos node
1670 ; c_enum = Some
1671 { e_base = pHint base env
1672 ; e_constraint = mpOptional pTConstraintTy constr env
1675 | InclusionDirective
1676 { inclusion_expression =
1677 { syntax = InclusionExpression
1678 { inclusion_require = req
1679 ; inclusion_filename = file
1681 ; _ }
1682 ; inclusion_semicolon = _
1683 } ->
1684 let flavor = pImportFlavor req env in
1685 Stmt (Expr (get_pos node, Import (flavor, pExpr file env)))
1686 | NamespaceDeclaration
1687 { namespace_name = name
1688 ; namespace_body =
1689 { syntax = NamespaceBody { namespace_declarations = decls; _ }; _ }
1690 ; _ } -> Namespace
1691 ( pos_name name
1692 , List.map ~f:(fun x -> pDef x env) (as_list decls)
1694 | NamespaceDeclaration { namespace_name = name; _ } ->
1695 Namespace (pos_name name, [])
1696 | NamespaceUseDeclaration
1697 { namespace_use_kind = kind
1698 ; namespace_use_clauses = clauses
1699 ; _ } ->
1700 let f node = match syntax node with
1701 | NamespaceUseClause
1702 { namespace_use_name = name
1703 ; namespace_use_alias = alias
1704 ; _ } ->
1705 let (p, n) as name = pos_name name in
1706 let x = Str.search_forward (Str.regexp "[^\\\\]*$") n 0 in
1707 let key = drop_pstr x name in
1708 let kind =
1709 match syntax kind with
1710 | Missing -> NSClass
1711 | Token { PT.kind = TK.Function; _ } -> NSFun
1712 | Token { PT.kind = TK.Const ; _ } -> NSConst
1713 | _ -> missing_syntax "namespace use kind" kind env
1715 ( kind
1716 , (p, if n.[0] = '\\' then n else "\\" ^ n)
1717 , if is_missing alias
1718 then key
1719 else pos_name alias
1721 | _ -> missing_syntax "namespace use clause" node env
1723 NamespaceUse (List.map ~f (as_list clauses))
1724 | NamespaceGroupUseDeclaration _ -> NamespaceUse []
1725 (* Fail open, assume top-level statement. Not too nice when reporting bugs,
1726 * but if this turns out prohibitive, just `try` this and catch-and-correct
1727 * the raised exception.
1729 | _ -> Stmt (pStmt node env)
1730 let pProgram : program parser = fun node env ->
1731 let rec post_process program =
1732 let span (p : 'a -> bool) =
1733 let rec go yes = function
1734 | (x::xs) when p x -> go (x::yes) xs
1735 | xs -> (List.rev yes, xs)
1736 in go []
1738 let not_namespace = function
1739 | Namespace _ -> false
1740 | _ -> true
1742 match program with
1743 | [] -> []
1744 | (Namespace (n, [])::el) ->
1745 let body, remainder = span not_namespace el in
1746 Namespace (n, body) :: post_process remainder
1747 | (Namespace (n, il)::el) ->
1748 Namespace (n, post_process il) :: post_process el
1749 | (Stmt Noop::el) -> post_process el
1750 | ((Stmt (Expr (_, (Call
1751 ( (_, (Id (_, "define")))
1752 , [ (_, (String name))
1753 ; value
1755 , []
1757 )))) :: el) -> Constant
1758 { cst_mode = !(lowerer_state.mode)
1759 ; cst_kind = Cst_define
1760 ; cst_name = name
1761 ; cst_type = None
1762 ; cst_value = value
1763 ; cst_namespace = Namespace_env.empty !(lowerer_state.popt)
1764 } :: post_process el
1765 | (e::el) -> e :: post_process el
1768 (* The list of top-level things in a file is somewhat special. *)
1769 let rec aux env = function
1770 | [] -> []
1771 (* EOF happens only as the last token in the list. *)
1772 | [{ syntax = EndOfFile _; _ }] -> []
1773 (* There's an incompatibility between the Full-Fidelity (FF) and the AST view
1774 * of the world; `define` is an *expression* in FF, but a *definition* in AST.
1775 * Luckily, `define` only happens at the level of definitions.
1777 | { syntax = ExpressionStatement
1778 { expression_statement_expression =
1779 { syntax = DefineExpression
1780 { define_keyword; define_argument_list = args; _ }
1781 ; _ }
1782 ; _ }
1783 ; _ } :: nodel ->
1784 ( match List.map ~f:(fun x -> pExpr x env) (as_list args) with
1785 | [ _, String name; e ] -> Constant
1786 { cst_mode = !(lowerer_state.mode)
1787 ; cst_kind = Cst_define
1788 ; cst_name = name
1789 ; cst_type = None
1790 ; cst_value = e
1791 ; cst_namespace = Namespace_env.empty !(lowerer_state.popt)
1793 | args ->
1794 let name = pos_name define_keyword in
1795 Stmt (Expr (fst name, Call ((fst name, Id name), args, [])))
1796 ) :: aux env nodel
1797 | node :: nodel -> pDef node env :: aux env nodel
1799 post_process @@ aux env (as_list node)
1801 let pScript node env =
1802 match syntax node with
1803 | Script { script_declarations; _ } -> pProgram script_declarations env
1804 | _ -> missing_syntax "script" node env
1806 (* The full fidelity parser considers all comments "simply" trivia. Some
1807 * comments have meaning, though. This meaning can either be relevant for the
1808 * type checker (like UNSAFE, HH_FIXME, etc.), but also for other uses, like
1809 * Codex, where comments are used for documentation generation.
1811 * Inlining the scrape for comments in the lowering code would be prohibitively
1812 * complicated, but a separate pass is fine.
1815 exception Malformed_trivia of int
1817 type scoured_comment = Pos.t * comment
1818 type scoured_comments = scoured_comment list
1820 let scour_comments
1821 (path : Relative_path.t)
1822 (source_text : Full_fidelity_source_text.t)
1823 (tree : node)
1824 : scoured_comments =
1825 let pos_of_offset =
1826 Full_fidelity_source_text.relative_pos path source_text
1828 let parse
1829 (acc : scoured_comments)
1830 (offset : int)
1831 (str : string)
1832 : scoured_comments =
1833 let fail state n =
1834 let state =
1835 match state with
1836 | `Free -> "Free"
1837 | `LineCmt -> "LineCmt"
1838 | `SawSlash -> "SawSlash"
1839 | `EmbeddedCmt -> "EmbeddedCmt"
1840 | `EndEmbedded -> "EndEmbedded"
1842 if not !(lowerer_state.suppress_output) then
1843 Printf.eprintf "Error parsing trivia in state %s: '%s'\n" state str;
1844 raise (Malformed_trivia n)
1846 let length = String.length str in
1847 let mk tag (start : int) (end_plus_one : int) acc : scoured_comments =
1848 match tag with
1849 | `Line ->
1850 (* Correct for the offset of the comment in the file *)
1851 let start = offset + start in
1852 let end_ = offset + end_plus_one in
1853 let (p, c) as result =
1854 Full_fidelity_source_text.
1855 ( pos_of_offset start end_
1856 , CmtLine (sub source_text start (end_ - start))
1859 result :: acc
1860 | `Block ->
1861 (* Correct for the offset of the comment in the file *)
1862 let start = offset + start in
1863 let end_ = offset + end_plus_one - 1 in
1864 let (p, c) as result =
1865 Full_fidelity_source_text.
1866 (* Should be 'start end_', but keeping broken for fidelity. *)
1867 ( pos_of_offset (end_) (end_ + 1)
1868 , CmtBlock (sub source_text start (end_ - start))
1871 result :: acc
1873 let rec go start state idx : scoured_comments =
1874 if idx = length (* finished? *)
1875 then begin
1876 match state with
1877 | `Free -> acc
1878 | `LineCmt -> mk `Line start length acc
1879 | _ -> fail state start
1880 end else begin
1881 let next = idx + 1 in
1882 match state, str.[idx] with
1883 (* Ending comments produces the comment just scanned *)
1884 | `LineCmt, '\n' -> mk `Line start idx @@ go next `Free next
1885 | `EndEmbedded, '/' -> mk `Block start idx @@ go next `Free next
1886 (* PHP has line comments delimited by a # *)
1887 | `Free, '#' -> go next `LineCmt next
1888 (* All other comment delimiters start with a / *)
1889 | `Free, '/' -> go start `SawSlash next
1890 (* After a / in trivia, we must see either another / or a * *)
1891 | `SawSlash, '/' -> go next `LineCmt next
1892 | `SawSlash, '*' -> go next `EmbeddedCmt next
1893 (* A * without a / does not end an embedded comment *)
1894 | `EmbeddedCmt, '*' -> go start `EndEmbedded next
1895 | `EndEmbedded, '*' -> go start `EndEmbedded next
1896 | `EndEmbedded, _ -> go start `EmbeddedCmt next
1897 (* Whitespace skips everywhere else *)
1898 | _, (' ' | '\t' | '\n') -> go start state next
1899 (* When scanning comments, anything else is accepted *)
1900 | `LineCmt, _ -> go start state next
1901 | `EmbeddedCmt, _ -> go start state next
1902 (* Anything else; bail *)
1903 | _ -> fail state start
1906 go 0 `Free 0
1907 in (* Now that we have a parser *)
1908 let rec aux (acc : scoured_comments) node : scoured_comments =
1909 match syntax node with
1910 | Token _ ->
1911 let acc = parse acc (leading_start_offset node) (leading_text node) in
1912 parse acc (trailing_start_offset node) (trailing_text node)
1913 | _ -> List.fold_left ~f:aux ~init:acc (children node)
1915 aux [] tree
1917 (*****************************************************************************(
1918 * Front-end matter
1919 )*****************************************************************************)
1921 type result =
1922 { fi_mode : FileInfo.mode
1923 ; ast : Ast.program
1924 ; content : string
1925 ; file : Relative_path.t
1926 ; comments : (Pos.t * comment) list
1929 let from_text
1930 ?(elaborate_namespaces = true)
1931 ?(include_line_comments = false)
1932 ?(keep_errors = true)
1933 ?(ignore_pos = false)
1934 ?(quick = false)
1935 ?(suppress_output = false)
1936 ?(lower_coroutines = true)
1937 ?(parser_options = ParserOptions.default)
1938 (file : Relative_path.t)
1939 (source_text : Full_fidelity_source_text.t)
1940 : result =
1941 let open Full_fidelity_syntax_tree in
1942 let tree = make source_text in
1943 let tree =
1944 if lower_coroutines then
1945 Coroutine_lowerer.lower_coroutines tree
1946 else
1947 tree in
1948 let script = Full_fidelity_positioned_syntax.from_tree tree in
1949 let fi_mode = if is_php tree then FileInfo.Mphp else
1950 let mode_string = String.trim (mode tree) in
1951 let mode_word =
1952 try List.hd (Str.split (Str.regexp " +") mode_string) with
1953 | _ -> None
1955 Option.value_map mode_word ~default:FileInfo.Mpartial ~f:(function
1956 | "decl" -> FileInfo.Mdecl
1957 | "strict" -> FileInfo.Mstrict
1958 | ("partial" | "") -> FileInfo.Mpartial
1959 (* TODO: Come up with better mode detection *)
1960 | _ -> FileInfo.Mpartial
1963 lowerer_state.language := language tree;
1964 lowerer_state.filePath := file;
1965 lowerer_state.mode := fi_mode;
1966 lowerer_state.popt := parser_options;
1967 lowerer_state.ignorePos := ignore_pos;
1968 lowerer_state.quickMode := quick;
1969 lowerer_state.suppress_output := suppress_output;
1970 let saw_yield = ref false in
1971 let errors = ref [] in (* The top-level error list. *)
1972 let max_depth = 42 in (* Filthy hack around OCaml bug *)
1973 let ast = runP pScript script { saw_yield; errors; max_depth } in
1974 let ast =
1975 if elaborate_namespaces
1976 then Namespaces.elaborate_defs parser_options ast
1977 else ast
1979 let content = Full_fidelity_source_text.text source_text in
1980 let comments, fixmes =
1981 if not include_line_comments
1982 then [], IMap.empty
1983 else
1984 let comments = scour_comments file source_text script in
1985 let fixmes = IMap.empty (*TODO*) in
1986 comments, fixmes
1988 if keep_errors then begin
1989 Fixmes.HH_FIXMES.add file fixmes;
1990 Option.iter (List.last !errors) Errors.parsing_error
1991 end;
1992 { fi_mode; ast; content; comments; file }
1994 let from_file
1995 ?(elaborate_namespaces = true)
1996 ?(include_line_comments = false)
1997 ?(keep_errors = true)
1998 ?(ignore_pos = false)
1999 ?(quick = false)
2000 ?lower_coroutines
2001 ?(parser_options = ParserOptions.default)
2002 (path : Relative_path.t)
2003 : result =
2004 from_text
2005 ~elaborate_namespaces
2006 ~include_line_comments
2007 ~keep_errors
2008 ~ignore_pos
2009 ~quick
2010 ?lower_coroutines
2011 ~parser_options
2012 path
2013 (Full_fidelity_source_text.from_file path)
2015 (*****************************************************************************(
2016 * Backward compatibility matter (should be short-lived)
2017 )*****************************************************************************)
2019 let legacy (x : result) : Parser_hack.parser_return =
2020 { Parser_hack.file_mode = Some x.fi_mode
2021 ; Parser_hack.comments = x.comments
2022 ; Parser_hack.ast = x.ast
2023 ; Parser_hack.content = x.content
2026 let from_text_with_legacy
2027 ?(elaborate_namespaces = true)
2028 ?(include_line_comments = false)
2029 ?(keep_errors = true)
2030 ?(ignore_pos = false)
2031 ?(quick = false)
2032 ?(suppress_output = false)
2033 ?lower_coroutines
2034 ?(parser_options = ParserOptions.default)
2035 (file : Relative_path.t)
2036 (content : string)
2037 : Parser_hack.parser_return =
2038 legacy @@ from_text
2039 ~elaborate_namespaces
2040 ~include_line_comments
2041 ~keep_errors
2042 ~ignore_pos
2043 ~quick
2044 ~suppress_output
2045 ?lower_coroutines
2046 ~parser_options
2047 file
2048 (Full_fidelity_source_text.make content)
2050 let from_file_with_legacy
2051 ?(elaborate_namespaces = true)
2052 ?(include_line_comments = false)
2053 ?(keep_errors = true)
2054 ?(ignore_pos = false)
2055 ?(quick = false)
2056 ?lower_coroutines
2057 ?(parser_options = ParserOptions.default)
2058 (file : Relative_path.t)
2059 : Parser_hack.parser_return =
2060 legacy @@ from_file
2061 ~elaborate_namespaces
2062 ~include_line_comments
2063 ~keep_errors
2064 ~ignore_pos
2065 ~quick
2066 ?lower_coroutines
2067 ~parser_options
2068 file