Introduce dedicated node to store markup
[hiphop-php.git] / hphp / hack / src / parser / full_fidelity_ast.ml
blob88e08e24bb59c1ac35638dc2b42ce1e8fce0784c
1 (**
2 * Copyright (c) 2016, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
9 *)
11 (* What we're lowering from *)
12 open Full_fidelity_positioned_syntax
13 type node = Full_fidelity_positioned_syntax.t (* Let's be more explicit *)
14 (* What we're lowering to *)
15 open Ast
17 module SyntaxKind = Full_fidelity_syntax_kind
18 module TK = Full_fidelity_token_kind
19 module PT = Full_fidelity_positioned_token
20 module SourceText = Full_fidelity_source_text
22 open Core
24 let drop_pstr : int -> pstring -> pstring = fun cnt (pos, str) ->
25 let len = String.length str in
26 pos, if cnt >= len then "" else String.sub str cnt (len - cnt)
28 (* Context of the file being parsed, as global state. *)
29 type state_variables =
30 { language : string ref
31 ; filePath : Relative_path.t ref
32 ; mode : FileInfo.mode ref
33 ; popt : ParserOptions.t ref
34 ; ignorePos : bool ref
35 ; quickMode : bool ref
36 ; suppress_output : bool ref
39 let lowerer_state =
40 { language = ref "UNINITIALIZED"
41 ; filePath = ref Relative_path.default
42 ; mode = ref FileInfo.Mstrict
43 ; popt = ref ParserOptions.default
44 ; ignorePos = ref false
45 ; quickMode = ref false
46 ; suppress_output = ref false
49 let php_file () = !(lowerer_state.mode) == FileInfo.Mphp
51 (* "Local" context. *)
52 type env =
53 { saw_yield : bool ref
54 ; errors : (Pos.t * string) list ref
55 ; max_depth : int
58 type +'a parser = node -> env -> 'a
59 type ('a, 'b) metaparser = 'a parser -> 'b parser
61 let get_pos : node -> Pos.t = fun node ->
62 if !(lowerer_state.ignorePos)
63 then Pos.none
64 else begin
65 let pos_file = !(lowerer_state.filePath) in
66 let text = source_text node in
67 (* TODO(8086635) Figure out where this off-by-one comes from *)
68 let start_offset =
69 (*should be removed after resolving TODO above *)
70 let start_offset = start_offset node in
71 if start_offset = 0 then 0 else start_offset - 1
73 let end_offset = end_offset node in
74 let s_line, s_column = SourceText.offset_to_position text start_offset in
75 let e_line, e_column = SourceText.offset_to_position text end_offset in
76 let pos_start =
77 File_pos.of_line_column_offset
78 ~line:s_line
79 ~column:s_column
80 ~offset:start_offset
82 let pos_end =
83 File_pos.of_line_column_offset
84 ~line:e_line
85 ~column:e_column
86 ~offset:end_offset
88 Pos.make_from_file_pos ~pos_file ~pos_start ~pos_end
89 end
91 exception Lowerer_invariant_failure of string * string
92 let invariant_failure where what =
93 raise (Lowerer_invariant_failure (where, what))
95 exception API_Missing_syntax of string * env * node
96 let missing_syntax : string -> node -> env -> 'a = fun s n env ->
97 raise (API_Missing_syntax (s, env, n))
98 let handle_missing_syntax : string -> node -> env -> 'a = fun s n _ ->
99 let pos = Pos.string (Pos.to_absolute (get_pos n)) in
100 let msg = Printf.sprintf
101 "Missing case in %s.
102 - Pos: %s
103 - Unexpected: '%s'
104 - Kind: %s
108 (text n)
109 (SyntaxKind.to_string (kind n))
111 if not !(lowerer_state.suppress_output) then
112 Printf.eprintf "EXCEPTION\n---------\n%s\n" msg;
113 raise (Failure msg)
115 let runP : 'a parser -> node -> env -> 'a = fun pThing thing env ->
116 try pThing thing env with
117 | API_Missing_syntax (ex, env, n) -> handle_missing_syntax ex n env
118 | e -> raise e
121 (* TODO: Cleanup this hopeless Noop mess *)
122 let mk_noop : stmt list -> stmt list = function
123 | [] -> [Noop]
124 | s -> s
125 let mpStripNoop pThing node env = match pThing node env with
126 | [Noop] -> []
127 | stmtl -> stmtl
129 let mpOptional : ('a, 'a option) metaparser = fun p -> fun node env ->
130 Option.try_with (fun () -> p node env)
131 let mpYielding : ('a, ('a * bool)) metaparser = fun p node env ->
132 let local_ptr = ref false in
133 let result = p node { env with saw_yield = local_ptr } in
134 result, !local_ptr
139 let pos_name node =
140 let name = text node in
141 let local_ignore_pos = !(lowerer_state.ignorePos) in
142 (* Special case for __LINE__; never ignore position for that special name *)
143 if name = "__LINE__" then lowerer_state.ignorePos := false;
144 let p = get_pos node in
145 lowerer_state.ignorePos := local_ignore_pos;
146 p, name
148 let is_ret_by_ref node = not @@ is_missing node
150 let couldMap : 'a . f:'a parser -> 'a list parser = fun ~f -> fun node env ->
151 let rec synmap : 'a . 'a parser -> 'a list parser = fun f node env ->
152 match syntax node with
153 | SyntaxList l -> List.concat_map l ~f:(fun n -> go ~f n env)
154 | ListItem i -> [f i.list_item env]
155 | _ -> [f node env]
156 and go : 'a . f:'a parser -> 'a list parser = fun ~f -> function
157 | node when is_missing node -> fun _env -> []
158 | node -> synmap f node
160 go ~f node env
162 let as_list : node -> node list =
163 let strip_list_item = function
164 | { syntax = ListItem { list_item = i; _ }; _ } -> i
165 | x -> x
166 in function
167 | { syntax = SyntaxList ({syntax = ListItem _; _}::_ as synl); _ } ->
168 List.map ~f:strip_list_item synl
169 | { syntax = SyntaxList synl; _ } -> synl
170 | { syntax = Missing; _ } -> []
171 | syn -> [syn]
173 let missing =
174 let module V = Full_fidelity_positioned_syntax.PositionedSyntaxValue in
175 let open Full_fidelity_source_text in
176 { syntax = Missing
177 ; value =
178 { V.source_text = { text = ""; offset_map = Line_break_map.make "" }
179 ; V.offset = 0
180 ; V.leading_width = 0
181 ; V.width = 0
182 ; V.trailing_width = 0
188 let token_kind : node -> TK.t option = function
189 | { syntax = Token t; _ } -> Some (PT.kind t)
190 | _ -> None
192 let pBop : (expr -> expr -> expr_) parser = fun node env lhs rhs ->
193 match token_kind node with
194 | Some TK.Equal -> Binop (Eq None, lhs, rhs)
195 | Some TK.Bar -> Binop (Bar, lhs, rhs)
196 | Some TK.Ampersand -> Binop (Amp, lhs, rhs)
197 | Some TK.Plus -> Binop (Plus, lhs, rhs)
198 | Some TK.Minus -> Binop (Minus, lhs, rhs)
199 | Some TK.Star -> Binop (Star, lhs, rhs)
200 | Some TK.Or -> Binop (BArbar, lhs, rhs)
201 | Some TK.And -> Binop (AMpamp, lhs, rhs)
202 | Some TK.Xor -> Binop (LogXor, lhs, rhs)
203 | Some TK.Carat -> Binop (Xor, lhs, rhs)
204 | Some TK.Slash -> Binop (Slash, lhs, rhs)
205 | Some TK.Dot -> Binop (Dot, lhs, rhs)
206 | Some TK.Percent -> Binop (Percent, lhs, rhs)
207 | Some TK.LessThan -> Binop (Lt, lhs, rhs)
208 | Some TK.GreaterThan -> Binop (Gt, lhs, rhs)
209 | Some TK.EqualEqual -> Binop (Eqeq, lhs, rhs)
210 | Some TK.LessThanEqual -> Binop (Lte, lhs, rhs)
211 | Some TK.GreaterThanEqual -> Binop (Gte, lhs, rhs)
212 | Some TK.StarStar -> Binop (Starstar, lhs, rhs)
213 | Some TK.ExclamationEqual -> Binop (Diff, lhs, rhs)
214 | Some TK.BarEqual -> Binop (Eq (Some Bar), lhs, rhs)
215 | Some TK.PlusEqual -> Binop (Eq (Some Plus), lhs, rhs)
216 | Some TK.MinusEqual -> Binop (Eq (Some Minus), lhs, rhs)
217 | Some TK.StarEqual -> Binop (Eq (Some Star), lhs, rhs)
218 | Some TK.StarStarEqual -> Binop (Eq (Some Starstar),lhs, rhs)
219 | Some TK.SlashEqual -> Binop (Eq (Some Slash), lhs, rhs)
220 | Some TK.DotEqual -> Binop (Eq (Some Dot), lhs, rhs)
221 | Some TK.PercentEqual -> Binop (Eq (Some Percent), lhs, rhs)
222 | Some TK.CaratEqual -> Binop (Eq (Some Xor), lhs, rhs)
223 | Some TK.AmpersandEqual -> Binop (Eq (Some Amp), lhs, rhs)
224 | Some TK.BarBar -> Binop (BArbar, lhs, rhs)
225 | Some TK.AmpersandAmpersand -> Binop (AMpamp, lhs, rhs)
226 | Some TK.LessThanLessThan -> Binop (Ltlt, lhs, rhs)
227 | Some TK.GreaterThanGreaterThan -> Binop (Gtgt, lhs, rhs)
228 | Some TK.EqualEqualEqual -> Binop (EQeqeq, lhs, rhs)
229 | Some TK.LessThanLessThanEqual -> Binop (Eq (Some Ltlt), lhs, rhs)
230 | Some TK.GreaterThanGreaterThanEqual -> Binop (Eq (Some Gtgt), lhs, rhs)
231 | Some TK.LessThanGreaterThan -> Binop (Diff, lhs, rhs)
232 | Some TK.ExclamationEqualEqual -> Binop (Diff2, lhs, rhs)
233 | Some TK.LessThanEqualGreaterThan -> Binop (Cmp, lhs, rhs)
234 (* The ugly ducklings; In the FFP, `|>` and '??' are parsed as
235 * `BinaryOperator`s, whereas the typed AST has separate constructors for
236 * NullCoalesce, Pipe and Binop. This is why we don't just project onto a
237 * `bop`, but a `expr -> expr -> expr_`.
239 | Some TK.BarGreaterThan -> Pipe (lhs, rhs)
240 | Some TK.QuestionQuestion -> NullCoalesce (lhs, rhs)
241 (* TODO: Figure out why this fails silently when used in a pBlock; probably
242 just caught somewhere *)
243 | _ -> missing_syntax "binary operator" node env
245 let pImportFlavor : import_flavor parser = fun node env ->
246 match token_kind node with
247 | Some TK.Include -> Include
248 | Some TK.Require -> Require
249 | Some TK.Include_once -> IncludeOnce
250 | Some TK.Require_once -> RequireOnce
251 | _ -> missing_syntax "import flavor" node env
253 let pNullFlavor : og_null_flavor parser = fun node env ->
254 match token_kind node with
255 | Some TK.QuestionMinusGreaterThan -> OG_nullsafe
256 | Some TK.MinusGreaterThan -> OG_nullthrows
257 | _ -> missing_syntax "null flavor" node env
259 let pKinds : kind list parser = couldMap ~f:(fun node env ->
260 match token_kind node with
261 | Some TK.Final -> Final
262 | Some TK.Static -> Static
263 | Some TK.Abstract -> Abstract
264 | Some TK.Private -> Private
265 | Some TK.Public -> Public
266 | Some TK.Protected -> Protected
267 | Some TK.Var -> Public
268 | _ -> missing_syntax "kind" node env
271 let syntax_of_token : PositionedToken.t -> node = fun t ->
272 { syntax = Token t
273 ; value = PositionedSyntaxValue.make t.PT.source_text
274 t.PT.offset t.PT.leading_width t.PT.width t.PT.trailing_width
277 (* TODO: Clean up string escaping *)
278 let prepString2 : node list -> node list =
279 let trimLeft t =
280 PT.({ t with leading_width = t.leading_width + 1; width = t.width - 1 })
282 let trimRight t =
283 PT.({ t with trailing_width = t.trailing_width + 1; width = t.width - 1 })
285 function
286 | ({ syntax = Token t; _ }::ss)
287 when t.PT.width > 0 && (PT.text t).[0] = '"' ->
288 let rec unwind = function
289 | [{ syntax = Token t; _ }]
290 when t.PT.width > 0 && (PT.text t).[t.PT.width - 1] = '"' ->
291 let s = syntax_of_token (trimRight t) in
292 if width s > 0 then [s] else []
293 | x :: xs -> x :: unwind xs
294 | _ -> raise (Invalid_argument "Malformed String2 SyntaxList")
296 let s = syntax_of_token (trimLeft t) in
297 if width s > 0 then s :: unwind ss else unwind ss
298 | x -> x (* unchanged *)
300 let mkStr : (string -> string) -> string -> string = fun unescaper content ->
301 let no_quotes = try
302 if String.sub content 0 3 = "<<<" (* The heredoc case *)
303 then String.sub content 3 (String.length content - 4)
304 else String.sub content 1 (String.length content - 2)
305 with
306 | Invalid_argument _ -> content
308 try unescaper no_quotes with
309 | Php_escaping.Invalid_string _ -> raise @@
310 Failure (Printf.sprintf "Malformed string literal <<%s>>" no_quotes)
311 let unempty_str = function
312 | "''" | "\"\"" -> ""
313 | s -> s
314 let unesc_dbl s = unempty_str @@ Php_escaping.unescape_double s
315 let unesc_sgl s = unempty_str @@ Php_escaping.unescape_single s
316 let unesc_xhp s =
317 let whitespace = Str.regexp "[ \t\n\r\012]+" in
318 let s = Str.global_replace whitespace " " s in
319 let quotes = Str.regexp " ?\"\\([^\"]*\\)\" ?" in
320 if Str.string_match quotes s 0
321 then Str.matched_group 1 s
322 else s
323 let unesc_xhp_attr s =
324 let open Str in
325 unesc_dbl @@
326 if string_match (regexp "[ \t\n\r\012]*\"\\(\\(.\\|\n\\)*\\)\"") s 0
327 then matched_group 1 s
328 else s
330 type suspension_kind =
331 | SKSync
332 | SKAsync
333 | SKCoroutine
335 let mk_suspension_kind async_node coroutine_node =
336 match is_missing async_node, is_missing coroutine_node with
337 | true, true -> SKSync
338 | false, true -> SKAsync
339 | true, false -> SKCoroutine
340 | false, false -> raise (Failure "Couroutine functions may not be async")
342 let mk_fun_kind suspension_kind yield =
343 match suspension_kind, yield with
344 | SKSync, true -> FGenerator
345 | SKAsync, true -> FAsyncGenerator
346 | SKSync, false -> FSync
347 | SKAsync, false -> FAsync
348 (* TODO(t17335630): Implement an FCoroutine fun_kind *)
349 | SKCoroutine, false -> assert false
350 | SKCoroutine, true -> raise (Failure "Couroutine functions may not yield")
352 let fun_template yielding node suspension_kind =
353 let p = get_pos node in
354 { f_mode = !(lowerer_state.mode)
355 ; f_tparams = []
356 ; f_ret = None
357 ; f_ret_by_ref = false
358 ; f_name = p, ";anonymous"
359 ; f_params = []
360 ; f_body = []
361 ; f_user_attributes = []
362 ; f_fun_kind = mk_fun_kind suspension_kind yielding
363 ; f_namespace = Namespace_env.empty !(lowerer_state.popt)
364 ; f_span = p
367 let param_template node =
368 { param_hint = None
369 ; param_is_reference = false
370 ; param_is_variadic = false
371 ; param_id = pos_name node
372 ; param_expr = None
373 ; param_modifier = None
374 ; param_user_attributes = []
377 let pShapeFieldName : shape_field_name parser = fun name _env ->
378 match syntax name with
379 | ScopeResolutionExpression
380 { scope_resolution_qualifier; scope_resolution_name; _ } ->
381 SFclass_const
382 ( pos_name scope_resolution_qualifier
383 , pos_name scope_resolution_name
385 | _ -> let p, n = pos_name name in SFlit (p, mkStr unesc_dbl n)
387 let mpShapeExpressionField : ('a, (shape_field_name * 'a)) metaparser =
388 fun hintParser node env ->
389 match syntax node with
390 | FieldInitializer
391 { field_initializer_name = name; field_initializer_value = ty; _ } ->
392 let name = pShapeFieldName name env in
393 let ty = hintParser ty env in
394 name, ty
395 | _ -> missing_syntax "shape field" node env
397 let mpShapeField : ('a, shape_field) metaparser =
398 fun hintParser node env ->
399 match syntax node with
400 | FieldSpecifier { field_question; field_name; field_type; _ } ->
401 let sf_optional = not (is_missing field_question) in
402 let sf_name = pShapeFieldName field_name env in
403 let sf_hint = hintParser field_type env in
404 { sf_optional; sf_name; sf_hint }
405 | _ ->
406 let sf_name, sf_hint = mpShapeExpressionField hintParser node env in
407 (* Shape expressions can never have optional fields. *)
408 { sf_optional = false; sf_name; sf_hint }
410 let rec pHint : hint parser = fun node env ->
411 let rec pHint_ : hint_ parser = fun node env ->
412 match syntax node with
413 (* Dirty hack; CastExpression can have type represented by token *)
414 | Token _
415 | SimpleTypeSpecifier _
416 -> Happly (pos_name node, [])
417 | ShapeTypeSpecifier { shape_type_fields; shape_type_ellipsis; _ } ->
418 let si_allows_unknown_fields = not (is_missing shape_type_ellipsis) in
419 let si_shape_field_list =
420 couldMap ~f:(mpShapeField pHint) shape_type_fields env in
421 Hshape { si_allows_unknown_fields; si_shape_field_list }
422 | TupleTypeSpecifier { tuple_types; _ } ->
423 Htuple (couldMap ~f:pHint tuple_types env)
425 | KeysetTypeSpecifier { keyset_type_keyword = kw; keyset_type_type = ty; _ }
426 | VectorTypeSpecifier { vector_type_keyword = kw; vector_type_type = ty; _ }
427 | ClassnameTypeSpecifier {classname_keyword = kw; classname_type = ty; _ }
428 | TupleTypeExplicitSpecifier
429 { tuple_type_keyword = kw
430 ; tuple_type_types = ty
431 ; _ }
432 | VectorArrayTypeSpecifier
433 { vector_array_keyword = kw
434 ; vector_array_type = ty
435 ; _ }
436 -> Happly (pos_name kw, couldMap ~f:pHint ty env)
438 | MapArrayTypeSpecifier
439 { map_array_keyword = kw
440 ; map_array_key = key
441 ; map_array_value = value
442 ; _ } ->
443 Happly
444 ( pos_name kw
445 , List.map ~f:(fun x -> pHint x env) [ key; value ]
447 | DictionaryTypeSpecifier
448 { dictionary_type_keyword = kw
449 ; dictionary_type_members = members
450 ; _ } -> Happly (pos_name kw, couldMap ~f:pHint members env)
451 | GenericTypeSpecifier { generic_class_type; generic_argument_list } ->
452 Happly
453 ( pos_name generic_class_type
454 , match syntax generic_argument_list with
455 | TypeArguments { type_arguments_types; _ }
456 -> couldMap ~f:pHint type_arguments_types env
457 | _ -> missing_syntax "generic type arguments" generic_argument_list env
459 | NullableTypeSpecifier { nullable_type; _ } ->
460 Hoption (pHint nullable_type env)
461 | SoftTypeSpecifier { soft_type; _ } ->
462 Hsoft (pHint soft_type env)
463 | ClosureTypeSpecifier { closure_parameter_types; closure_return_type; _} ->
464 let param_types =
465 List.map ~f:(fun x -> pHint x env)
466 (as_list closure_parameter_types)
468 let is_variadic_param x = snd x = Htuple [] in
469 Hfun
470 ( List.filter ~f:(fun x -> not (is_variadic_param x)) param_types
471 , List.exists ~f:is_variadic_param param_types
472 , pHint closure_return_type env
474 | TypeConstant { type_constant_left_type; type_constant_right_type; _ } ->
475 let child = pos_name type_constant_right_type in
476 (match pHint_ type_constant_left_type env with
477 | Haccess (b, c, cs) -> Haccess (b, c, cs @ [child])
478 | Happly (b, []) -> Haccess (b, child, [])
479 | _ -> missing_syntax "type constant base" node env
481 | VariadicParameter _ ->
482 (* Clever trick warning: empty tuple types indicating variadic params *)
483 Htuple []
484 | _ -> missing_syntax "type hint" node env
486 get_pos node, pHint_ node env
488 type fun_hdr =
489 { fh_suspension_kind : suspension_kind
490 ; fh_name : pstring
491 ; fh_type_parameters : tparam list
492 ; fh_parameters : fun_param list
493 ; fh_return_type : hint option
494 ; fh_param_modifiers : fun_param list
495 ; fh_ret_by_ref : bool
498 let empty_fun_hdr =
499 { fh_suspension_kind = SKSync
500 ; fh_name = Pos.none, "<ANONYMOUS>"
501 ; fh_type_parameters = []
502 ; fh_parameters = []
503 ; fh_return_type = None
504 ; fh_param_modifiers = []
505 ; fh_ret_by_ref = false
508 let rec pSimpleInitializer node env =
509 match syntax node with
510 | SimpleInitializer { simple_initializer_value; _ } ->
511 pExpr simple_initializer_value env
512 | _ -> missing_syntax "simple initializer" node env
513 and pFunParam : fun_param parser = fun node env ->
514 match syntax node with
515 | ParameterDeclaration
516 { parameter_attribute
517 ; parameter_visibility
518 ; parameter_type
519 ; parameter_name
520 ; parameter_default_value
521 } ->
522 let is_reference, is_variadic, name =
523 match syntax parameter_name with
524 | DecoratedExpression
525 { decorated_expression_decorator; decorated_expression_expression } ->
526 (* There is a chance that the expression might be nested with an
527 additional decorator, check this *)
528 begin match syntax decorated_expression_expression with
529 | DecoratedExpression
530 { decorated_expression_decorator = nested_decorator
531 ; decorated_expression_expression = nested_expression } ->
532 let decorator = text decorated_expression_decorator in
533 let nested_decorator = text nested_decorator in
534 decorator = "&" || nested_decorator = "&",
535 decorator = "..." || nested_decorator = "...",
536 nested_expression
537 | _ ->
538 let decorator = text decorated_expression_decorator in
539 decorator = "&", decorator = "...", decorated_expression_expression
541 | _ -> false, false, parameter_name
543 { param_hint = mpOptional pHint parameter_type env
544 ; param_is_reference = is_reference
545 ; param_is_variadic = is_variadic
546 ; param_id = pos_name name
547 ; param_expr =
548 mpOptional pSimpleInitializer parameter_default_value env
549 ; param_user_attributes = List.concat @@
550 couldMap ~f:pUserAttribute parameter_attribute env
551 (* implicit field via constructor parameter.
552 * This is always None except for constructors and the modifier
553 * can be only Public or Protected or Private.
555 ; param_modifier =
556 let rec go = function
557 | [] -> None
558 | x :: _ when List.mem [Private; Public; Protected] x -> Some x
559 | _ :: xs -> go xs
561 go (pKinds parameter_visibility env)
563 | VariadicParameter _
564 | Token _ when text node = "..."
565 -> { (param_template node) with param_is_variadic = true }
566 | _ -> missing_syntax "function parameter" node env
567 and pUserAttribute : user_attribute list parser = fun node env ->
568 match syntax node with
569 | AttributeSpecification { attribute_specification_attributes; _ } ->
570 couldMap attribute_specification_attributes env ~f:begin function
571 | { syntax = Attribute { attribute_name; attribute_values; _}; _ } ->
572 fun env ->
573 { ua_name = pos_name attribute_name
574 ; ua_params = couldMap ~f:pExpr attribute_values env
576 | node -> missing_syntax "attribute" node
578 | _ -> missing_syntax "attribute specification" node env
579 and pAField : afield parser = fun node env ->
580 match syntax node with
581 | ElementInitializer { element_key; element_value; _ } ->
582 AFkvalue (pExpr element_key env, pExpr element_value env)
583 | _ -> AFvalue (pExpr node env)
584 and pString2: node list -> env -> expr list =
585 let rec aux l env acc =
586 (* in PHP "${x}" in strings is treated as if it was written "$x",
587 here we recognize pattern: Dollar; EmbeddedBracedExpression { QName (Token.Name) }
588 produced by FFP and lower it into Lvar.
590 match l with
591 | [] -> List.rev acc
592 | ({ syntax = Token { PT.kind = TK.Dollar; _ }; _ } as l)::
593 ({ syntax = EmbeddedBracedExpression {
594 embedded_braced_expression_expression = {
595 syntax = QualifiedNameExpression {
596 qualified_name_expression = {
597 syntax = Token { PT.kind = TK.Name; _ }
598 ; _ } as name
599 ; _ }
600 ; _ }
601 ; _ }
602 ; _ } as r)
603 ::tl ->
604 let pos, name = pos_name name in
605 let id = Lvar (pos, "$" ^ name) in
606 let left_pos = get_pos l in
607 let right_pos = get_pos r in
608 let pos =
609 if left_pos = Pos.none || right_pos = Pos.none then Pos.none
610 else
611 (* build final position as:
612 start_pos = start position of Dollar token
613 end_pos = end position pof embedded brace expression *)
614 let pos_file = Pos.filename left_pos in
615 let pos_start = Pos.pos_start left_pos in
616 let pos_end = Pos.pos_end right_pos in
617 Pos.make_from_file_pos ~pos_file ~pos_start ~pos_end
619 aux tl env ((pos, id)::acc)
620 | x::xs -> aux xs env ((pExpr ~top_level:false x env)::acc)
622 fun l env -> aux l env []
623 and pExpr ?top_level:(top_level=true) : expr parser = fun node env ->
624 let rec pExpr_ : expr_ parser = fun node env ->
625 let pos = get_pos node in
626 match syntax node with
627 | LambdaExpression {
628 lambda_async; lambda_coroutine; lambda_signature; lambda_body; _ } ->
629 let suspension_kind =
630 mk_suspension_kind lambda_async lambda_coroutine in
631 let f_params, f_ret =
632 match syntax lambda_signature with
633 | LambdaSignature { lambda_parameters; lambda_type; _ } ->
634 ( couldMap ~f:pFunParam lambda_parameters env
635 , mpOptional pHint lambda_type env
637 | Token _ -> ([param_template lambda_signature], None)
638 | _ -> missing_syntax "lambda signature" lambda_signature env
640 let pBody node env =
641 try mk_noop (pBlock node env) with
642 | _ ->
643 let (p,r) = pExpr node env in
644 [ Return (p, Some (p, r)) ]
646 let f_body, yield = mpYielding pBody lambda_body env in
647 Lfun
648 { (fun_template yield node suspension_kind) with f_ret; f_params; f_body }
650 | BracedExpression { braced_expression_expression = expr; _ }
651 | EmbeddedBracedExpression
652 { embedded_braced_expression_expression = expr; _ }
653 | ParenthesizedExpression { parenthesized_expression_expression = expr; _ }
654 -> pExpr_ expr env
656 | DictionaryIntrinsicExpression
657 { dictionary_intrinsic_keyword = kw
658 ; dictionary_intrinsic_members = members
659 ; _ }
660 | KeysetIntrinsicExpression
661 { keyset_intrinsic_keyword = kw
662 ; keyset_intrinsic_members = members
663 ; _ }
664 | VectorIntrinsicExpression
665 { vector_intrinsic_keyword = kw
666 ; vector_intrinsic_members = members
667 ; _ }
668 | CollectionLiteralExpression
669 { collection_literal_name = kw
670 ; collection_literal_initializers = members
671 ; _ }
672 -> Collection (pos_name kw, couldMap ~f:pAField members env)
674 | ArrayIntrinsicExpression { array_intrinsic_members = members; _ }
675 | ArrayCreationExpression { array_creation_members = members; _ }
677 (* TODO: Or tie in with other intrinsics and post-process to Array *)
678 Array (couldMap ~f:pAField members env)
680 | ListExpression { list_members = members; _ } ->
681 (* TODO: Or tie in with other intrinsics and post-process to List *)
682 let pBinderOrIgnore node env =
683 Option.value ~default:(Pos.none, Omitted) @@ mpOptional pExpr node env
685 List (couldMap ~f:pBinderOrIgnore members env)
687 | EvalExpression { eval_keyword = recv; eval_argument = args; _ }
688 | EmptyExpression { empty_keyword = recv; empty_argument = args; _ }
689 | IssetExpression { isset_keyword = recv; isset_argument_list = args; _ }
690 | TupleExpression
691 { tuple_expression_keyword = recv
692 ; tuple_expression_items = args
693 ; _ }
694 | FunctionCallExpression
695 { function_call_receiver = recv
696 ; function_call_argument_list = args
697 ; _ }
698 -> Call (pExpr recv env, couldMap ~f:pExpr args env, [])
700 | PrintExpression { print_keyword = _recv; print_expression = args } ->
701 (* TODO: Or tie in with FunctionCallExpression et al and post-process *)
702 Call ((pos, Id (pos, "echo")), couldMap ~f:pExpr args env, [])
704 | QualifiedNameExpression { qualified_name_expression } ->
705 Id (pos_name qualified_name_expression)
706 | VariableExpression { variable_expression } ->
707 Lvar (pos_name variable_expression)
709 | PipeVariableExpression _ ->
710 Lvar (pos, "$$")
712 | InclusionExpression { inclusion_require; inclusion_filename } ->
713 Import
714 ( pImportFlavor inclusion_require env
715 , pExpr inclusion_filename env
718 | MemberSelectionExpression
719 { member_object = recv
720 ; member_operator = op
721 ; member_name = name
723 | SafeMemberSelectionExpression
724 { safe_member_object = recv
725 ; safe_member_operator = op
726 ; safe_member_name = name
728 | EmbeddedMemberSelectionExpression
729 { embedded_member_object = recv
730 ; embedded_member_operator = op
731 ; embedded_member_name = name
733 -> Obj_get (pExpr recv env, pExpr name env, pNullFlavor op env)
735 | PrefixUnaryExpression
736 { prefix_unary_operator = operator
737 ; prefix_unary_operand = operand
739 | PostfixUnaryExpression
740 { postfix_unary_operand = operand
741 ; postfix_unary_operator = operator
743 | DecoratedExpression
744 { decorated_expression_expression = operand
745 ; decorated_expression_decorator = operator
748 let expr = pExpr operand env in
750 * FFP does not destinguish between ++$i and $i++ on the level of token
751 * kind annotation. Prevent duplication by switching on `postfix` for
752 * the two operatores for which AST /does/ differentiate between
753 * fixities.
755 let postfix = kind node = SyntaxKind.PostfixUnaryExpression in
756 (match token_kind operator with
757 | Some TK.PlusPlus when postfix -> Unop (Upincr, expr)
758 | Some TK.MinusMinus when postfix -> Unop (Updecr, expr)
759 | Some TK.PlusPlus -> Unop (Uincr, expr)
760 | Some TK.MinusMinus -> Unop (Udecr, expr)
761 | Some TK.Exclamation -> Unop (Unot, expr)
762 | Some TK.Tilde -> Unop (Utild, expr)
763 | Some TK.Plus -> Unop (Uplus, expr)
764 | Some TK.Minus -> Unop (Uminus, expr)
765 | Some TK.Ampersand -> Unop (Uref, expr)
766 | Some TK.DotDotDot -> Unop (Usplat, expr)
767 | Some TK.At -> Unop (Usilence, expr)
768 | Some TK.Await -> Await expr
769 | Some TK.Clone -> Clone expr
770 | Some TK.Dollar ->
771 (match snd expr with
772 | Lvarvar (n, id) -> Lvarvar (n + 1, id)
773 | Lvar id -> Lvarvar (1, id)
774 | _ -> BracedExpr expr
776 | _ -> missing_syntax "unary operator" node env
778 | BinaryExpression
779 { binary_left_operand; binary_operator; binary_right_operand }
781 pBop binary_operator env
782 (pExpr binary_left_operand env)
783 (pExpr binary_right_operand env)
785 | Token _ when top_level -> Id (pos_name node)
786 | Token _ -> String (pos, unesc_dbl (text node))
788 | YieldExpression { yield_operand; _ } when text yield_operand = "break" ->
789 env.saw_yield := true;
790 Yield_break
791 | YieldExpression { yield_operand; _ } ->
792 env.saw_yield := true;
793 Yield (pAField yield_operand env)
795 | DefineExpression { define_keyword; define_argument_list; _ } -> Call
796 ( (let name = pos_name define_keyword in fst name, Id name)
797 , List.map ~f:(fun x -> pExpr x env) (as_list define_argument_list)
798 , []
801 | ScopeResolutionExpression
802 { scope_resolution_qualifier; scope_resolution_name; _ } ->
803 let (_, n) as name = pos_name scope_resolution_name in
804 let qual = pos_name scope_resolution_qualifier in
805 if String.length n > 0 && n.[0] = '$'
806 then Class_get (qual, name)
807 else Class_const (qual, name)
809 | CastExpression { cast_type; cast_operand; _ } ->
810 Cast (pHint cast_type env, pExpr cast_operand env)
811 | ConditionalExpression
812 { conditional_test; conditional_consequence; conditional_alternative; _ }
813 -> Eif
814 ( pExpr conditional_test env
815 , mpOptional pExpr conditional_consequence env
816 , pExpr conditional_alternative env
818 | SubscriptExpression { subscript_receiver; subscript_index; _ } ->
819 Array_get
820 ( pExpr subscript_receiver env
821 , mpOptional pExpr subscript_index env
823 | EmbeddedSubscriptExpression
824 { embedded_subscript_receiver; embedded_subscript_index; _ } ->
825 Array_get
826 ( pExpr embedded_subscript_receiver env
827 , mpOptional pExpr embedded_subscript_index env
829 | ShapeExpression { shape_expression_fields; _ } ->
830 Shape (
831 couldMap ~f:(mpShapeExpressionField pExpr) shape_expression_fields env
833 | ObjectCreationExpression
834 { object_creation_type; object_creation_argument_list; _ } ->
836 ( (match syntax object_creation_type with
837 | GenericTypeSpecifier { generic_class_type; generic_argument_list } ->
838 let name = pos_name generic_class_type in
839 let hints =
840 match syntax generic_argument_list with
841 | TypeArguments { type_arguments_types; _ }
842 -> couldMap ~f:pHint type_arguments_types env
843 | _ ->
844 missing_syntax "generic type arguments" generic_argument_list env
846 fst name, Id_type_arguments (name, hints)
847 | QualifiedNameExpression _
848 | SimpleTypeSpecifier _
849 | Token _
850 -> let name = pos_name object_creation_type in fst name, Id name
851 | _ -> pExpr object_creation_type env
853 , couldMap ~f:pExpr object_creation_argument_list env
854 , []
856 | GenericTypeSpecifier
857 { generic_class_type
858 ; generic_argument_list
859 } ->
860 let name = pos_name generic_class_type in
861 let hints =
862 match syntax generic_argument_list with
863 | TypeArguments { type_arguments_types; _ }
864 -> couldMap ~f:pHint type_arguments_types env
865 | _ ->
866 missing_syntax "generic type arguments" generic_argument_list env
868 Id_type_arguments (name, hints)
869 | LiteralExpression { literal_expression = expr } ->
870 (match syntax expr with
871 | Token _ ->
872 let s = text expr in
873 (* TODO(17796330): Get rid of linter functionality in the lowerer *)
874 if s <> String.lowercase s then Lint.lowercase_constant pos s;
875 (match token_kind expr with
876 | Some TK.DecimalLiteral
877 | Some TK.OctalLiteral
878 | Some TK.HexadecimalLiteral
879 | Some TK.BinaryLiteral -> Int (pos, s)
880 | Some TK.FloatingLiteral -> Float (pos, s)
881 | Some TK.SingleQuotedStringLiteral -> String (pos, mkStr unesc_sgl s)
882 | Some TK.DoubleQuotedStringLiteral
883 | Some TK.HeredocStringLiteral
884 | Some TK.NowdocStringLiteral -> String (pos, mkStr unesc_dbl s)
885 | Some TK.NullLiteral -> Null
886 | Some TK.BooleanLiteral ->
887 (match String.lowercase_ascii s with
888 | "false" -> False
889 | "true" -> True
890 | _ -> missing_syntax ("boolean (not: " ^ s ^ ")") expr env
892 | _ -> missing_syntax "literal" expr env
895 | SyntaxList ts -> String2 (pString2 (prepString2 ts) env)
896 | _ -> missing_syntax "literal expression" expr env
899 | InstanceofExpression
900 { instanceof_left_operand; instanceof_right_operand; _ } ->
901 let ty =
902 match pExpr instanceof_right_operand env with
903 | p, Class_const (pid, (_, "")) -> p, Id pid
904 | ty -> ty
906 InstanceOf (pExpr instanceof_left_operand env, ty)
907 (* TODO: Priority fix? *)
908 (*match pExpr instanceof_left_operand env with
909 | p, Unop (o,e) -> Unop (0, (p, InstanceOf (e, ty)))
910 | e -> InstanceOf (e, ty)
912 | AnonymousFunction
913 { anonymous_async_keyword
914 ; anonymous_coroutine_keyword
915 ; anonymous_parameters
916 ; anonymous_type
917 ; anonymous_use
918 ; anonymous_body
919 ; _ } ->
920 let pArg node env =
921 match syntax node with
922 | PrefixUnaryExpression
923 { prefix_unary_operator = op; prefix_unary_operand = v } ->
924 pos_name v, token_kind op = Some TK.Ampersand
925 | Token _ -> pos_name node, false
926 | _ -> missing_syntax "use variable" node env
928 let pUse node =
929 match syntax node with
930 | AnonymousFunctionUseClause { anonymous_use_variables; _ } ->
931 couldMap ~f:pArg anonymous_use_variables
932 | _ -> fun _env -> []
934 let suspension_kind =
935 mk_suspension_kind
936 anonymous_async_keyword
937 anonymous_coroutine_keyword in
938 let body, yield = mpYielding pBlock anonymous_body env in
939 Efun
940 ( { (fun_template yield node suspension_kind) with
941 f_ret = mpOptional pHint anonymous_type env
942 ; f_params = couldMap ~f:pFunParam anonymous_parameters env
943 ; f_body = mk_noop body
945 , try pUse anonymous_use env with _ -> []
948 | AwaitableCreationExpression
949 { awaitable_async; awaitable_coroutine; awaitable_compound_statement } ->
950 let suspension_kind =
951 mk_suspension_kind awaitable_async awaitable_coroutine in
952 let blk, yld = mpYielding pBlock awaitable_compound_statement env in
953 let body =
954 { (fun_template yld node suspension_kind) with f_body = mk_noop blk }
956 Call ((get_pos node, Lfun body), [], [])
957 | XHPExpression
958 { xhp_open =
959 { syntax = XHPOpen { xhp_open_name; xhp_open_attributes; _ }; _ }
960 ; xhp_body = body
961 ; _ } ->
962 lowerer_state.ignorePos := false;
963 let name =
964 let pos, name = pos_name xhp_open_name in
965 (pos, ":" ^ name)
967 let combine b e =
968 syntax_of_token PT.(
969 make (kind b) (source_text b) (leading_start_offset b)
970 (start_offset e - start_offset b + width e) (leading b) (trailing e)
973 let aggregate_tokens node =
974 let rec search = function (* scroll through non-token things *)
975 | [] -> []
976 | t :: xs when token_kind t = Some TK.XHPComment -> search xs
977 | { syntax = Token b; _ } as t :: xs -> track t b None xs
978 | x :: xs -> x :: search xs
979 and track t b oe = function (* keep going through consecutive tokens *)
980 | { syntax = Token e; _ } :: xs -> track t b (Some e) xs
981 | xs -> Option.value_map oe ~default:t ~f:(combine b) :: search xs
983 search (as_list node)
985 let pEmbedded escaper node env =
986 match syntax node with
987 | Token _ ->
988 let p = get_pos node in
989 p, String (p, escaper (full_text node))
990 | _ -> pExpr node env
992 let pAttr = fun node env ->
993 match syntax node with
994 | XHPAttribute { xhp_attribute_name; xhp_attribute_expression; _ } ->
995 ( pos_name xhp_attribute_name
996 , pEmbedded unesc_xhp_attr xhp_attribute_expression env
998 | _ -> missing_syntax "XHP attribute" node env
1001 ( name
1002 , couldMap ~f:pAttr xhp_open_attributes env
1003 , List.map ~f:(fun x -> pEmbedded unesc_xhp x env)
1004 (aggregate_tokens body)
1006 (* FIXME; should this include Missing? ; "| Missing -> Null" *)
1007 | _ -> missing_syntax "expression" node env
1009 (* Since we need positions in XHP, regardless of the ignorePos flag, we
1010 * parenthesise the call to pExpr_ so that the XHP expression case can flip
1011 * the switch. The key part is that `get_pos node` happens before the old
1012 * setting is restored.
1014 * Evaluation order matters here!
1016 let local_ignore_pos = !(lowerer_state.ignorePos) in
1017 let expr_ = pExpr_ node env in
1018 let p = get_pos node in
1019 lowerer_state.ignorePos := local_ignore_pos;
1020 p, expr_
1021 and pBlock : block parser = fun node env ->
1022 match pStmt node env with
1023 | Block block -> List.filter ~f:(fun x -> x <> Noop) block
1024 | stmt -> [stmt]
1025 and pStmt : stmt parser = fun node env ->
1026 match syntax node with
1027 | SwitchStatement { switch_expression; switch_sections; _ } ->
1028 let pSwitchLabel : (block -> case) parser = fun node env cont ->
1029 match syntax node with
1030 | CaseLabel { case_expression; _ } ->
1031 Case (pExpr case_expression env, cont)
1032 | DefaultLabel _ -> Default cont
1033 | _ -> missing_syntax "pSwitchLabel" node env
1035 let pSwitchSection : case list parser = fun node env ->
1036 match syntax node with
1037 | SwitchSection { switch_section_labels; switch_section_statements; _ } ->
1038 let rec null_out cont = function
1039 | [x] -> [x cont]
1040 | (x::xs) -> x [] :: null_out cont xs
1041 | _ -> raise (Failure "Malformed block result")
1043 let blk = couldMap ~f:pStmt switch_section_statements env in
1044 null_out blk (couldMap ~f:pSwitchLabel switch_section_labels env)
1045 | _ -> missing_syntax "switch section" node env
1047 Switch
1048 ( pExpr switch_expression env
1049 , List.concat @@ couldMap ~f:pSwitchSection switch_sections env
1051 | IfStatement
1052 { if_condition; if_statement; if_elseif_clauses; if_else_clause; _ } ->
1053 let pElseIf : (block -> block) parser = fun node env ->
1054 match syntax node with
1055 | ElseifClause { elseif_condition; elseif_statement; _ } ->
1056 fun next_clause ->
1057 [ If
1058 ( pExpr elseif_condition env
1059 , [ pStmt elseif_statement env ]
1060 , next_clause
1063 | _ -> missing_syntax "elseif clause" node env
1066 ( pExpr if_condition env
1067 , [ pStmt if_statement env ]
1068 , List.fold_right ~f:(@@)
1069 (couldMap ~f:pElseIf if_elseif_clauses env)
1070 ~init:[ match syntax if_else_clause with
1071 | ElseClause { else_statement; _ } -> pStmt else_statement env
1072 | Missing -> Noop
1073 | _ -> missing_syntax "else clause" if_else_clause env
1076 | ExpressionStatement { expression_statement_expression; _ } ->
1077 if is_missing expression_statement_expression
1078 then Noop
1079 else Expr (pExpr expression_statement_expression env)
1080 | CompoundStatement { compound_statements; _ } ->
1081 Block (List.filter ~f:(fun x -> x <> Noop) @@
1082 couldMap ~f:pStmt compound_statements env)
1083 | ThrowStatement { throw_expression; _ } -> Throw (pExpr throw_expression env)
1084 | DoStatement { do_body; do_condition; _ } ->
1085 Do ([Block (pBlock do_body env)], pExpr do_condition env)
1086 | WhileStatement { while_condition; while_body; _ } ->
1087 While (pExpr while_condition env, [ pStmt while_body env ])
1088 | ForStatement
1089 { for_initializer; for_control; for_end_of_loop; for_body; _ } ->
1090 let pExprL node env =
1091 (get_pos node, Expr_list (couldMap ~f:pExpr node env))
1094 ( pExprL for_initializer env
1095 , pExprL for_control env
1096 , pExprL for_end_of_loop env
1097 , [Block (pBlock for_body env)]
1099 | ForeachStatement
1100 { foreach_collection
1101 ; foreach_await_keyword
1102 ; foreach_key
1103 ; foreach_value
1104 ; foreach_body
1105 ; _ } ->
1106 Foreach
1107 ( pExpr foreach_collection env
1108 , ( if token_kind foreach_await_keyword = Some TK.Await
1109 then Some (get_pos foreach_await_keyword)
1110 else None
1112 , ( let value = pExpr foreach_value env in
1113 Option.value_map (mpOptional pExpr foreach_key env)
1114 ~default:(As_v value)
1115 ~f:(fun key -> As_kv (key, value))
1117 , [ pStmt foreach_body env ]
1119 | TryStatement
1120 { try_compound_statement; try_catch_clauses; try_finally_clause; _ } ->
1122 ( [ Block (pBlock try_compound_statement env) ]
1123 , couldMap try_catch_clauses env ~f:begin fun node env ->
1124 match syntax node with
1125 | CatchClause { catch_type; catch_variable; catch_body; _ } ->
1126 ( pos_name catch_type
1127 , pos_name catch_variable
1128 , [ Block (mpStripNoop pBlock catch_body env) ]
1130 | _ -> missing_syntax "catch clause" node env
1132 , match syntax try_finally_clause with
1133 | FinallyClause { finally_body; _ } -> [ Block (pBlock finally_body env) ]
1134 | _ -> []
1136 | FunctionStaticStatement { static_declarations; _ } ->
1137 let pStaticDeclarator node env =
1138 match syntax node with
1139 | StaticDeclarator { static_name; static_initializer } ->
1140 let lhs =
1141 match pExpr static_name env with
1142 | p, Id (p', s) -> p, Lvar (p', s)
1143 | x -> x
1145 (match syntax static_initializer with
1146 | SimpleInitializer { simple_initializer_value; _ } ->
1147 ( get_pos static_initializer
1148 , Binop (Eq None, lhs, pExpr simple_initializer_value env)
1150 | _ -> lhs
1152 | _ -> missing_syntax "static declarator" node env
1154 Static_var (couldMap ~f:pStaticDeclarator static_declarations env)
1155 | ReturnStatement { return_expression; _ } -> Return
1156 ( get_pos return_expression
1157 , mpOptional pExpr return_expression env
1159 | Full_fidelity_positioned_syntax.GotoLabel { goto_label_name; _ } ->
1160 let pos = get_pos goto_label_name in
1161 let label_name = text goto_label_name in
1162 Ast.GotoLabel (pos, label_name)
1163 | GotoStatement { goto_statement_label_name; _ } ->
1164 Goto (pos_name goto_statement_label_name)
1165 | EchoStatement { echo_keyword = kw; echo_expressions = exprs; _ }
1166 | UnsetStatement { unset_keyword = kw; unset_variables = exprs; _ }
1167 -> Expr
1168 ( get_pos node
1169 , Call
1170 ( (match syntax kw with
1171 | QualifiedNameExpression _
1172 | SimpleTypeSpecifier _
1173 | Token _
1174 -> let name = pos_name kw in fst name, Id name
1175 | _ -> missing_syntax "id" kw env
1177 , couldMap ~f:pExpr exprs env
1178 , []
1180 | BreakStatement { break_level=level; _ } ->
1181 Break (get_pos node, pBreak_or_continue_level env level)
1182 | ContinueStatement { continue_level=level; _ } ->
1183 Continue (get_pos node, pBreak_or_continue_level env level)
1184 | GlobalStatement { global_variables; _ } ->
1185 Global_var (couldMap ~f:pExpr global_variables env)
1186 | MarkupSection _ -> pMarkup node env
1187 | _ when env.max_depth > 0 ->
1188 (* OCaml optimisers; Forgive them, for they know not what they do!
1190 * The max_depth is only there to stop the *optimised* version from an
1191 * unbounded recursion. Sad times.
1193 Def_inline (pDef node { env with max_depth = env.max_depth - 1 })
1194 | _ -> missing_syntax "statement" node env
1196 and pMarkup node env =
1197 match syntax node with
1198 | MarkupSection { markup_expression; _ } ->
1199 begin match syntax markup_expression with
1200 | Missing ->
1201 (*TODO: properly lower markup sections *)
1202 Noop
1203 | ExpressionStatement {
1204 expression_statement_expression = e
1205 ; _} -> Expr (pExpr e env)
1206 | _ -> failwith "expression expected"
1208 | _ -> failwith "invalid node"
1210 and pBreak_or_continue_level env level =
1211 match mpOptional pExpr level env with
1212 | Some (_, Int(_, s)) -> Some (int_of_string s)
1213 | _ -> None
1215 and pTConstraintTy : hint parser = fun node ->
1216 match syntax node with
1217 | TypeConstraint { constraint_type; _ } -> pHint constraint_type
1218 | _ -> missing_syntax "type constraint" node
1220 and pTConstraint : (constraint_kind * hint) parser = fun node env ->
1221 match syntax node with
1222 | TypeConstraint { constraint_keyword; constraint_type } ->
1223 ( (match token_kind constraint_keyword with
1224 | Some TK.As -> Constraint_as
1225 | Some TK.Super -> Constraint_super
1226 | Some TK.Equal -> Constraint_eq
1227 | _ -> missing_syntax "constraint operator" constraint_keyword env
1229 , pHint constraint_type env
1231 | _ -> missing_syntax "type constraint" node env
1233 and pTParaml : tparam list parser = fun node env ->
1234 let pTParam : tparam parser = fun node env ->
1235 match syntax node with
1236 | TypeParameter { type_variance; type_name; type_constraints } ->
1237 ( (match token_kind type_variance with
1238 | Some TK.Plus -> Covariant
1239 | Some TK.Minus -> Contravariant
1240 | _ -> Invariant
1242 , pos_name type_name
1243 , couldMap ~f:pTConstraint type_constraints env
1245 | _ -> missing_syntax "type parameter" node env
1247 match syntax node with
1248 | Missing -> []
1249 | TypeParameters { type_parameters_parameters; _ } ->
1250 couldMap ~f:pTParam type_parameters_parameters env
1251 | _ -> missing_syntax "type parameter list" node env
1253 (* TODO: Translate the where clause *)
1254 and pFunHdr : fun_hdr parser = fun node env ->
1255 match syntax node with
1256 | FunctionDeclarationHeader
1257 { function_async
1258 ; function_coroutine
1259 ; function_ampersand
1260 ; function_name
1261 ; function_type_parameter_list
1262 ; function_parameter_list
1263 ; function_type
1264 ; _ } ->
1265 let fh_parameters = couldMap ~f:pFunParam function_parameter_list env in
1266 let fh_return_type = mpOptional pHint function_type env in
1267 let fh_suspension_kind =
1268 mk_suspension_kind function_async function_coroutine in
1269 { fh_suspension_kind
1270 ; fh_name = pos_name function_name
1271 ; fh_type_parameters = pTParaml function_type_parameter_list env
1272 ; fh_parameters
1273 ; fh_return_type
1274 ; fh_param_modifiers =
1275 List.filter ~f:(fun p -> Option.is_some p.param_modifier) fh_parameters
1276 ; fh_ret_by_ref = is_ret_by_ref function_ampersand
1278 | LambdaSignature { lambda_parameters; lambda_type; _ } ->
1279 { empty_fun_hdr with
1280 fh_parameters = couldMap ~f:pFunParam lambda_parameters env
1281 ; fh_return_type = mpOptional pHint lambda_type env
1283 | Token _ -> empty_fun_hdr
1284 | _ -> missing_syntax "function header" node env
1286 and pClassElt : class_elt list parser = fun node env ->
1287 match syntax node with
1288 | ConstDeclaration
1289 { const_abstract; const_type_specifier; const_declarators; _ } ->
1290 let ty = mpOptional pHint const_type_specifier env in
1291 let res =
1292 couldMap const_declarators env ~f:begin function
1293 | { syntax = ConstantDeclarator
1294 { constant_declarator_name; constant_declarator_initializer }
1295 ; _ } -> fun env ->
1296 ( pos_name constant_declarator_name
1297 (* TODO: Parse error when const is abstract and has inits *)
1298 , if is_missing const_abstract
1299 then mpOptional pSimpleInitializer constant_declarator_initializer env
1300 else None
1302 | node -> missing_syntax "constant declarator" node env
1305 let rec aux absts concrs = function
1306 | (id, None ) :: xs -> aux (AbsConst (ty, id) :: absts) concrs xs
1307 | (id, Some x) :: xs -> aux absts ((id, x) :: concrs) xs
1308 | [] when concrs = [] -> List.rev absts
1309 | [] -> Const (ty, List.rev concrs) :: List.rev absts
1311 aux [] [] res
1312 | TypeConstDeclaration
1313 { type_const_abstract
1314 ; type_const_name
1315 ; type_const_type_constraint
1316 ; type_const_type_specifier
1317 ; _ } ->
1318 [ TypeConst
1319 { tconst_abstract = not (is_missing type_const_abstract)
1320 ; tconst_name = pos_name type_const_name
1321 ; tconst_constraint = mpOptional pTConstraintTy type_const_type_constraint env
1322 ; tconst_type = mpOptional pHint type_const_type_specifier env
1323 ; tconst_span = get_pos node
1326 | PropertyDeclaration
1327 { property_modifiers; property_type; property_declarators; _ } ->
1328 [ ClassVars
1329 ( pKinds property_modifiers env
1330 , mpOptional pHint property_type env
1331 , couldMap property_declarators env ~f:begin fun node env ->
1332 match syntax node with
1333 | PropertyDeclarator { property_name; property_initializer } ->
1334 ( let _, n as name = pos_name property_name in
1335 ( get_pos node
1336 , ( if n.[0] = '$'
1337 then drop_pstr 1 name
1338 else name
1340 , mpOptional pSimpleInitializer property_initializer env
1343 | _ -> missing_syntax "property declarator" node env
1347 | MethodishDeclaration
1348 { methodish_attribute
1349 ; methodish_modifiers
1350 ; methodish_function_decl_header
1351 ; methodish_function_body
1352 ; _ } ->
1353 let classvar_init : fun_param -> stmt * class_elt = fun param ->
1354 let p, _ as cvname = drop_pstr 1 param.param_id in (* Drop the '$' *)
1355 let span =
1356 match param.param_expr with
1357 | Some (pos_end, _) -> Pos.btw p pos_end
1358 | None -> p
1360 ( Expr (p, Binop (Eq None,
1361 (p, Obj_get((p, Lvar (p, "$this")), (p, Id cvname), OG_nullthrows)),
1362 (p, Lvar param.param_id)
1364 , ClassVars
1365 ( Option.to_list param.param_modifier
1366 , param.param_hint
1367 , [span, cvname, None]
1371 let hdr = pFunHdr methodish_function_decl_header env in
1372 let member_init, member_def =
1373 List.unzip @@
1374 List.filter_map hdr.fh_parameters ~f:(fun p ->
1375 Option.map ~f: (fun _ -> classvar_init p) p.param_modifier
1378 let pBody = fun node env ->
1379 if is_missing node then [] else
1380 (* TODO: Give parse error when not abstract, but body is missing *)
1381 List.rev member_init @
1382 match pStmt node env with
1383 | Block [] -> [Noop]
1384 | Block stmtl -> stmtl
1385 | stmt -> [stmt]
1387 let body, body_has_yield = mpYielding pBody methodish_function_body env in
1388 let body =
1389 (* Drop it on the floor in quickMode; we still need to process the body
1390 * to know, e.g. whether it contains a yield.
1392 if !(lowerer_state.quickMode)
1393 then [Noop]
1394 else body
1396 let kind = pKinds methodish_modifiers env in
1397 member_def @ [Method
1398 { m_kind = kind
1399 ; m_tparams = hdr.fh_type_parameters
1400 ; m_constrs = []
1401 ; m_name = hdr.fh_name
1402 ; m_params = hdr.fh_parameters
1403 ; m_body = body
1404 ; m_user_attributes = List.concat @@
1405 couldMap ~f:pUserAttribute methodish_attribute env
1406 ; m_ret = hdr.fh_return_type
1407 ; m_ret_by_ref = hdr.fh_ret_by_ref
1408 ; m_span = get_pos node
1409 ; m_fun_kind = mk_fun_kind hdr.fh_suspension_kind body_has_yield
1411 | TraitUseConflictResolution {
1412 trait_use_conflict_resolution_names;
1413 trait_use_conflict_resolution_clauses;
1415 } ->
1416 let pTraitUseConflictResolutionItem node env =
1417 match syntax node with
1418 | TraitUseConflictResolutionItem
1419 { trait_use_conflict_resolution_item_aliasing_name = aliasing_name;
1420 trait_use_conflict_resolution_item_aliasing_keyword = alias_kw;
1421 trait_use_conflict_resolution_item_aliased_name = aliased_name;
1423 } ->
1424 let aliasing_name, opt_scope_resolution_name =
1425 match syntax aliasing_name with
1426 | ScopeResolutionExpression
1427 { scope_resolution_qualifier; scope_resolution_name; _ } ->
1428 pos_name scope_resolution_qualifier,
1429 Some (pos_name scope_resolution_name)
1430 | _ -> pos_name aliasing_name, None
1432 let alias_type =
1433 match token_kind alias_kw with
1434 | Some TK.As -> CU_as
1435 | Some TK.Insteadof -> CU_insteadof
1436 | _ ->
1437 missing_syntax "trait use conflict resolution item" alias_kw env
1439 ClassUseAlias ((aliasing_name, opt_scope_resolution_name),
1440 pos_name aliased_name,
1441 alias_type)
1442 | _ -> missing_syntax "trait use conflict resolution item" node env
1444 (couldMap ~f:(fun n e ->
1445 ClassUse (pHint n e)) trait_use_conflict_resolution_names env)
1446 @ (couldMap ~f:pTraitUseConflictResolutionItem
1447 trait_use_conflict_resolution_clauses env)
1448 | TraitUse { trait_use_names; _ } ->
1449 couldMap ~f:(fun n e -> ClassUse (pHint n e)) trait_use_names env
1450 | RequireClause { require_kind; require_name; _ } ->
1451 [ ClassTraitRequire
1452 ( (match token_kind require_kind with
1453 | Some TK.Implements -> MustImplement
1454 | Some TK.Extends -> MustExtend
1455 | _ -> missing_syntax "trait require kind" require_kind env
1457 , pHint require_name env
1460 | XHPClassAttributeDeclaration { xhp_attribute_attributes; _ } ->
1461 let pXHPAttr node env =
1462 match syntax node with
1463 | XHPClassAttribute
1464 { xhp_attribute_decl_type = ty
1465 ; xhp_attribute_decl_name = name
1466 ; xhp_attribute_decl_initializer = init
1467 ; xhp_attribute_decl_required = req
1468 } ->
1469 let (p, name) = pos_name name in
1470 XhpAttr
1471 ( mpOptional pHint ty env
1472 , (Pos.none, (p, ":" ^ name), mpOptional pSimpleInitializer init env)
1473 , not (is_missing req)
1474 , match syntax ty with
1475 | XHPEnumType { xhp_enum_values; _ } ->
1476 Some (get_pos ty, couldMap ~f:pExpr xhp_enum_values env)
1477 | _ -> None
1479 | XHPSimpleClassAttribute { xhp_simple_class_attribute_type = attr } ->
1480 XhpAttrUse (get_pos attr, Happly (pos_name attr, []))
1481 | Token _ ->
1482 XhpAttrUse (get_pos node, Happly (pos_name node, []))
1483 | _ -> missing_syntax "XHP attribute" node env
1485 couldMap ~f:pXHPAttr xhp_attribute_attributes env
1486 | XHPChildrenDeclaration { xhp_children_expression; _; } ->
1487 [ XhpChild (pXhpChild xhp_children_expression env) ]
1488 | XHPCategoryDeclaration { xhp_category_categories = cats; _ } ->
1489 let pNameSansPercent node _env = drop_pstr 1 (pos_name node) in
1490 [ XhpCategory (couldMap ~f:pNameSansPercent cats env) ]
1491 | _ -> missing_syntax "expression" node env
1493 and pXhpChild : xhp_child parser = fun node env ->
1494 match syntax node with
1495 | Token t -> ChildName (pos_name node)
1496 | PostfixUnaryExpression { postfix_unary_operand; postfix_unary_operator;} ->
1497 let operand = pXhpChild postfix_unary_operand env in
1498 let operator =
1499 begin
1500 match token_kind postfix_unary_operator with
1501 | Some TK.Question -> ChildQuestion
1502 | Some TK.Plus -> ChildPlus
1503 | Some TK.Star -> ChildStar
1504 | _ -> missing_syntax "xhp children operator" node env
1505 end in
1506 ChildUnary(operand, operator)
1507 | BinaryExpression
1508 { binary_left_operand; binary_right_operand; _ } ->
1509 let left = pXhpChild binary_left_operand env in
1510 let right = pXhpChild binary_right_operand env in
1511 ChildBinary(left, right)
1512 | XHPChildrenParenthesizedList {xhp_children_list_xhp_children; _} ->
1513 let children = as_list xhp_children_list_xhp_children in
1514 let children = List.map ~f:(fun x -> pXhpChild x env) children in
1515 ChildList children
1516 | _ -> missing_syntax "xhp children" node env
1519 (*****************************************************************************(
1520 * Parsing definitions (AST's `def`)
1521 )*****************************************************************************)
1522 and pDef : def parser = fun node env ->
1523 match syntax node with
1524 | FunctionDeclaration
1525 { function_attribute_spec; function_declaration_header; function_body } ->
1526 let hdr = pFunHdr function_declaration_header env in
1527 let block, yield = mpYielding (mpOptional pBlock) function_body env in
1529 { (fun_template yield node hdr.fh_suspension_kind) with
1530 f_tparams = hdr.fh_type_parameters
1531 ; f_ret = hdr.fh_return_type
1532 ; f_name = hdr.fh_name
1533 ; f_params = hdr.fh_parameters
1534 ; f_ret_by_ref = hdr.fh_ret_by_ref
1535 ; f_body =
1536 if !(lowerer_state.quickMode)
1537 then [Noop]
1538 else begin
1539 (* FIXME: Filthy hack to catch UNSAFE *)
1540 let containsUNSAFE =
1541 let re = Str.regexp_string "UNSAFE" in
1542 try Str.search_forward re (full_text function_body) 0 >= 0 with
1543 | Not_found -> false
1545 match block with
1546 | Some [Noop] when containsUNSAFE -> [Unsafe]
1547 | Some [] -> [Noop]
1548 | None -> []
1549 | Some b -> b
1551 ; f_user_attributes =
1552 List.concat @@ couldMap ~f:pUserAttribute function_attribute_spec env
1554 | ClassishDeclaration
1555 { classish_attribute = attr
1556 ; classish_modifiers = mods
1557 ; classish_keyword = kw
1558 ; classish_name = name
1559 ; classish_type_parameters = tparaml
1560 ; classish_extends_list = exts
1561 ; classish_implements_list = impls
1562 ; classish_body =
1563 { syntax = ClassishBody { classish_body_elements = elts; _ }; _ }
1564 ; _ } ->
1565 Class
1566 { c_mode = !(lowerer_state.mode)
1567 ; c_user_attributes = List.concat @@ couldMap ~f:pUserAttribute attr env
1568 ; c_final = List.mem (pKinds mods env) Final
1569 ; c_is_xhp =
1570 (match token_kind name with
1571 | Some TK.XHPElementName | Some TK.XHPClassName -> true
1572 | _ -> false
1574 ; c_name = pos_name name
1575 ; c_tparams = pTParaml tparaml env
1576 ; c_extends = couldMap ~f:pHint exts env
1577 ; c_implements = couldMap ~f:pHint impls env
1578 ; c_body = List.concat (couldMap ~f:pClassElt elts env)
1579 ; c_namespace = Namespace_env.empty !(lowerer_state.popt)
1580 ; c_enum = None
1581 ; c_span = get_pos node
1582 ; c_kind =
1583 let is_abs = Str.(string_match (regexp ".*abstract.*") (text mods) 0) in
1584 match token_kind kw with
1585 | Some TK.Class when is_abs -> Cabstract
1586 | Some TK.Class -> Cnormal
1587 | Some TK.Interface -> Cinterface
1588 | Some TK.Trait -> Ctrait
1589 | Some TK.Enum -> Cenum
1590 | _ -> missing_syntax "class kind" kw env
1592 | ConstDeclaration
1593 { const_type_specifier = ty
1594 ; const_declarators = decls
1595 ; _ } ->
1596 (match List.map ~f:syntax (as_list decls) with
1597 | [ ConstantDeclarator
1598 { constant_declarator_name = name
1599 ; constant_declarator_initializer = init
1601 ] -> Constant
1602 { cst_mode = !(lowerer_state.mode)
1603 ; cst_kind = Cst_const
1604 ; cst_name = pos_name name
1605 ; cst_type = mpOptional pHint ty env
1606 ; cst_value = pSimpleInitializer init env
1607 ; cst_namespace = Namespace_env.empty !(lowerer_state.popt)
1609 | _ -> missing_syntax "constant declaration" decls env
1611 | AliasDeclaration
1612 { alias_attribute_spec = attr
1613 ; alias_keyword = kw
1614 ; alias_name = name
1615 ; alias_generic_parameter = tparams
1616 ; alias_constraint = constr
1617 ; alias_type = hint
1618 ; _ } -> Typedef
1619 { t_id = pos_name name
1620 ; t_tparams = pTParaml tparams env
1621 ; t_constraint = Option.map ~f:snd @@
1622 mpOptional pTConstraint constr env
1623 ; t_user_attributes = List.concat @@
1624 List.map ~f:(fun x -> pUserAttribute x env) (as_list attr)
1625 ; t_namespace = Namespace_env.empty !(lowerer_state.popt)
1626 ; t_mode = !(lowerer_state.mode)
1627 ; t_kind =
1628 match token_kind kw with
1629 | Some TK.Newtype -> NewType (pHint hint env)
1630 | Some TK.Type -> Alias (pHint hint env)
1631 | _ -> missing_syntax "kind" kw env
1633 | EnumDeclaration
1634 { enum_attribute_spec = attrs
1635 ; enum_name = name
1636 ; enum_base = base
1637 ; enum_type = constr
1638 ; enum_enumerators = enums
1639 ; _ } ->
1640 let pEnumerator node =
1641 match syntax node with
1642 | Enumerator { enumerator_name = name; enumerator_value = value; _ } ->
1643 fun env -> Const (None, [pos_name name, pExpr value env])
1644 | _ -> missing_syntax "enumerator" node
1646 Class
1647 { c_mode = !(lowerer_state.mode)
1648 ; c_user_attributes = List.concat @@ couldMap ~f:pUserAttribute attrs env
1649 ; c_final = false
1650 ; c_kind = Cenum
1651 ; c_is_xhp = false
1652 ; c_name = pos_name name
1653 ; c_tparams = []
1654 ; c_extends = []
1655 ; c_implements = []
1656 ; c_body = couldMap enums env ~f:pEnumerator
1657 ; c_namespace = Namespace_env.empty !(lowerer_state.popt)
1658 ; c_span = get_pos node
1659 ; c_enum = Some
1660 { e_base = pHint base env
1661 ; e_constraint = mpOptional pTConstraintTy constr env
1664 | InclusionDirective
1665 { inclusion_expression =
1666 { syntax = InclusionExpression
1667 { inclusion_require = req
1668 ; inclusion_filename = file
1670 ; _ }
1671 ; inclusion_semicolon = _
1672 } ->
1673 let flavor = pImportFlavor req env in
1674 Stmt (Expr (get_pos node, Import (flavor, pExpr file env)))
1675 | NamespaceDeclaration
1676 { namespace_name = name
1677 ; namespace_body =
1678 { syntax = NamespaceBody { namespace_declarations = decls; _ }; _ }
1679 ; _ } -> Namespace
1680 ( pos_name name
1681 , List.map ~f:(fun x -> pDef x env) (as_list decls)
1683 | NamespaceDeclaration { namespace_name = name; _ } ->
1684 Namespace (pos_name name, [])
1685 | NamespaceUseDeclaration
1686 { namespace_use_kind = kind
1687 ; namespace_use_clauses = clauses
1688 ; _ } ->
1689 let f node = match syntax node with
1690 | NamespaceUseClause
1691 { namespace_use_name = name
1692 ; namespace_use_alias = alias
1693 ; _ } ->
1694 let (p, n) as name = pos_name name in
1695 let x = Str.search_forward (Str.regexp "[^\\\\]*$") n 0 in
1696 let key = drop_pstr x name in
1697 let kind =
1698 match syntax kind with
1699 | Missing -> NSClass
1700 | Token { PT.kind = TK.Function; _ } -> NSFun
1701 | Token { PT.kind = TK.Const ; _ } -> NSConst
1702 | _ -> missing_syntax "namespace use kind" kind env
1704 ( kind
1705 , (p, if n.[0] = '\\' then n else "\\" ^ n)
1706 , if is_missing alias
1707 then key
1708 else pos_name alias
1710 | _ -> missing_syntax "namespace use clause" node env
1712 NamespaceUse (List.map ~f (as_list clauses))
1713 | NamespaceGroupUseDeclaration _ -> NamespaceUse []
1714 (* Fail open, assume top-level statement. Not too nice when reporting bugs,
1715 * but if this turns out prohibitive, just `try` this and catch-and-correct
1716 * the raised exception.
1718 | _ -> Stmt (pStmt node env)
1719 let pProgram : program parser = fun node env ->
1720 let rec post_process program =
1721 let span (p : 'a -> bool) =
1722 let rec go yes = function
1723 | (x::xs) when p x -> go (x::yes) xs
1724 | xs -> (List.rev yes, xs)
1725 in go []
1727 let not_namespace = function
1728 | Namespace _ -> false
1729 | _ -> true
1731 match program with
1732 | [] -> []
1733 | (Namespace (n, [])::el) ->
1734 let body, remainder = span not_namespace el in
1735 Namespace (n, body) :: post_process remainder
1736 | (Namespace (n, il)::el) ->
1737 Namespace (n, post_process il) :: post_process el
1738 | (Stmt Noop::el) -> post_process el
1739 | ((Stmt (Expr (_, (Call
1740 ( (_, (Id (_, "define")))
1741 , [ (_, (String name))
1742 ; value
1744 , []
1746 )))) :: el) -> Constant
1747 { cst_mode = !(lowerer_state.mode)
1748 ; cst_kind = Cst_define
1749 ; cst_name = name
1750 ; cst_type = None
1751 ; cst_value = value
1752 ; cst_namespace = Namespace_env.empty !(lowerer_state.popt)
1753 } :: post_process el
1754 | (e::el) -> e :: post_process el
1757 (* The list of top-level things in a file is somewhat special. *)
1758 let rec aux env = function
1759 | [] -> []
1760 (* EOF happens only as the last token in the list. *)
1761 | [{ syntax = EndOfFile _; _ }] -> []
1762 (* There's an incompatibility between the Full-Fidelity (FF) and the AST view
1763 * of the world; `define` is an *expression* in FF, but a *definition* in AST.
1764 * Luckily, `define` only happens at the level of definitions.
1766 | { syntax = ExpressionStatement
1767 { expression_statement_expression =
1768 { syntax = DefineExpression { define_argument_list = args; _ } ; _ }
1769 ; _ }
1770 ; _ } :: nodel ->
1771 ( match List.map ~f:(fun x -> pExpr x env) (as_list args) with
1772 | [ _, String name; e ] -> Constant
1773 { cst_mode = !(lowerer_state.mode)
1774 ; cst_kind = Cst_define
1775 ; cst_name = name
1776 ; cst_type = None
1777 ; cst_value = e
1778 ; cst_namespace = Namespace_env.empty !(lowerer_state.popt)
1780 | _ -> missing_syntax "DefineExpression:inner" args env
1781 ) :: aux env nodel
1782 | node :: nodel -> pDef node env :: aux env nodel
1784 post_process @@ aux env (as_list node)
1786 let pScript node env =
1787 match syntax node with
1788 | Script { script_declarations; _ } -> pProgram script_declarations env
1789 | _ -> missing_syntax "script" node env
1791 (* The full fidelity parser considers all comments "simply" trivia. Some
1792 * comments have meaning, though. This meaning can either be relevant for the
1793 * type checker (like UNSAFE, HH_FIXME, etc.), but also for other uses, like
1794 * Codex, where comments are used for documentation generation.
1796 * Inlining the scrape for comments in the lowering code would be prohibitively
1797 * complicated, but a separate pass is fine.
1800 exception Malformed_trivia of int
1802 type scoured_comment = Pos.t * string
1803 type scoured_comments = scoured_comment list
1805 let scour_comments
1806 (path : Relative_path.t)
1807 (source_text : Full_fidelity_source_text.t)
1808 (tree : node)
1809 : scoured_comments =
1810 let pos_of_offset =
1811 Full_fidelity_source_text.relative_pos path source_text
1813 let parse
1814 (acc : scoured_comments)
1815 (offset : int)
1816 (str : string)
1817 : scoured_comments =
1818 let fail state n =
1819 let state =
1820 match state with
1821 | `Free -> "Free"
1822 | `LineCmt -> "LineCmt"
1823 | `SawSlash -> "SawSlash"
1824 | `EmbeddedCmt -> "EmbeddedCmt"
1825 | `EndEmbedded -> "EndEmbedded"
1827 if not !(lowerer_state.suppress_output) then
1828 Printf.eprintf "Error parsing trivia in state %s: '%s'\n" state str;
1829 raise (Malformed_trivia n)
1831 let length = String.length str in
1832 let mk tag (start : int) (end_plus_one : int) acc : scoured_comments =
1833 match tag with
1834 | `Line ->
1835 (* Correct for the offset of the comment in the file *)
1836 let start = offset + start in
1837 let end_ = offset + end_plus_one in
1838 let (p, c) as result =
1839 Full_fidelity_source_text.
1840 ( pos_of_offset start end_
1841 , sub source_text start (end_ - start)
1844 result :: acc
1845 | `Block ->
1846 (* Correct for the offset of the comment in the file *)
1847 let start = offset + start in
1848 let end_ = offset + end_plus_one - 1 in
1849 let (p, c) as result =
1850 Full_fidelity_source_text.
1851 (* Should be 'start end_', but keeping broken for fidelity. *)
1852 ( pos_of_offset (end_) (end_ + 1)
1853 , sub source_text start (end_ - start)
1856 result :: acc
1858 let rec go start state idx : scoured_comments =
1859 if idx = length (* finished? *)
1860 then begin
1861 match state with
1862 | `Free -> acc
1863 | `LineCmt -> mk `Line start length acc
1864 | _ -> fail state start
1865 end else begin
1866 let next = idx + 1 in
1867 match state, str.[idx] with
1868 (* Ending comments produces the comment just scanned *)
1869 | `LineCmt, '\n' -> mk `Line start idx @@ go next `Free next
1870 | `EndEmbedded, '/' -> mk `Block start idx @@ go next `Free next
1871 (* PHP has line comments delimited by a # *)
1872 | `Free, '#' -> go next `LineCmt next
1873 (* All other comment delimiters start with a / *)
1874 | `Free, '/' -> go start `SawSlash next
1875 (* After a / in trivia, we must see either another / or a * *)
1876 | `SawSlash, '/' -> go next `LineCmt next
1877 | `SawSlash, '*' -> go next `EmbeddedCmt next
1878 (* A * without a / does not end an embedded comment *)
1879 | `EmbeddedCmt, '*' -> go start `EndEmbedded next
1880 | `EndEmbedded, '*' -> go start `EndEmbedded next
1881 | `EndEmbedded, _ -> go start `EmbeddedCmt next
1882 (* Whitespace skips everywhere else *)
1883 | _, (' ' | '\t' | '\n') -> go start state next
1884 (* When scanning comments, anything else is accepted *)
1885 | `LineCmt, _ -> go start state next
1886 | `EmbeddedCmt, _ -> go start state next
1887 (* Anything else; bail *)
1888 | _ -> fail state start
1891 go 0 `Free 0
1892 in (* Now that we have a parser *)
1893 let rec aux (acc : scoured_comments) node : scoured_comments =
1894 match syntax node with
1895 | Token _ ->
1896 let acc = parse acc (leading_start_offset node) (leading_text node) in
1897 parse acc (trailing_start_offset node) (trailing_text node)
1898 | _ -> List.fold_left ~f:aux ~init:acc (children node)
1900 aux [] tree
1902 (*****************************************************************************(
1903 * Front-end matter
1904 )*****************************************************************************)
1906 type result =
1907 { fi_mode : FileInfo.mode
1908 ; ast : Ast.program
1909 ; content : string
1910 ; file : Relative_path.t
1911 ; comments : (Pos.t * string) list
1914 let from_text
1915 ?(elaborate_namespaces = true)
1916 ?(include_line_comments = false)
1917 ?(keep_errors = true)
1918 ?(ignore_pos = false)
1919 ?(quick = false)
1920 ?(suppress_output = false)
1921 ?(lower_coroutines = true)
1922 ?(parser_options = ParserOptions.default)
1923 (file : Relative_path.t)
1924 (source_text : Full_fidelity_source_text.t)
1925 : result =
1926 let open Full_fidelity_syntax_tree in
1927 let tree = make source_text in
1928 let tree =
1929 if lower_coroutines then
1930 Coroutine_lowerer.lower_coroutines tree
1931 else
1932 tree in
1933 let script = Full_fidelity_positioned_syntax.from_tree tree in
1934 let fi_mode = if is_php tree then FileInfo.Mphp else
1935 let mode_string = String.trim (mode tree) in
1936 let mode_word =
1937 try List.hd (Str.split (Str.regexp " +") mode_string) with
1938 | _ -> None
1940 Option.value_map mode_word ~default:FileInfo.Mpartial ~f:(function
1941 | "decl" -> FileInfo.Mdecl
1942 | "strict" -> FileInfo.Mstrict
1943 | ("partial" | "") -> FileInfo.Mpartial
1944 (* TODO: Come up with better mode detection *)
1945 | _ -> FileInfo.Mpartial
1948 lowerer_state.language := language tree;
1949 lowerer_state.filePath := file;
1950 lowerer_state.mode := fi_mode;
1951 lowerer_state.popt := parser_options;
1952 lowerer_state.ignorePos := ignore_pos;
1953 lowerer_state.quickMode := quick;
1954 lowerer_state.suppress_output := suppress_output;
1955 let saw_yield = ref false in
1956 let errors = ref [] in (* The top-level error list. *)
1957 let max_depth = 42 in (* Filthy hack around OCaml bug *)
1958 let ast = runP pScript script { saw_yield; errors; max_depth } in
1959 let ast =
1960 if elaborate_namespaces
1961 then Namespaces.elaborate_defs parser_options ast
1962 else ast
1964 let content = Full_fidelity_source_text.text source_text in
1965 let comments, fixmes =
1966 if not include_line_comments
1967 then [], IMap.empty
1968 else
1969 let comments = scour_comments file source_text script in
1970 let fixmes = IMap.empty (*TODO*) in
1971 comments, fixmes
1973 if keep_errors then begin
1974 Fixmes.HH_FIXMES.add file fixmes;
1975 Option.iter (List.last !errors) Errors.parsing_error
1976 end;
1977 { fi_mode; ast; content; comments; file }
1979 let from_file
1980 ?(elaborate_namespaces = true)
1981 ?(include_line_comments = false)
1982 ?(keep_errors = true)
1983 ?(ignore_pos = false)
1984 ?(quick = false)
1985 ?lower_coroutines
1986 ?(parser_options = ParserOptions.default)
1987 (path : Relative_path.t)
1988 : result =
1989 from_text
1990 ~elaborate_namespaces
1991 ~include_line_comments
1992 ~keep_errors
1993 ~ignore_pos
1994 ~quick
1995 ?lower_coroutines
1996 ~parser_options
1997 path
1998 (Full_fidelity_source_text.from_file path)
2000 (*****************************************************************************(
2001 * Backward compatibility matter (should be short-lived)
2002 )*****************************************************************************)
2004 let legacy (x : result) : Parser_hack.parser_return =
2005 { Parser_hack.file_mode = Some x.fi_mode
2006 ; Parser_hack.comments = x.comments
2007 ; Parser_hack.ast = x.ast
2008 ; Parser_hack.content = x.content
2011 let from_text_with_legacy
2012 ?(elaborate_namespaces = true)
2013 ?(include_line_comments = false)
2014 ?(keep_errors = true)
2015 ?(ignore_pos = false)
2016 ?(quick = false)
2017 ?(suppress_output = false)
2018 ?lower_coroutines
2019 ?(parser_options = ParserOptions.default)
2020 (file : Relative_path.t)
2021 (content : string)
2022 : Parser_hack.parser_return =
2023 legacy @@ from_text
2024 ~elaborate_namespaces
2025 ~include_line_comments
2026 ~keep_errors
2027 ~ignore_pos
2028 ~quick
2029 ~suppress_output
2030 ?lower_coroutines
2031 ~parser_options
2032 file
2033 (Full_fidelity_source_text.make content)
2035 let from_file_with_legacy
2036 ?(elaborate_namespaces = true)
2037 ?(include_line_comments = false)
2038 ?(keep_errors = true)
2039 ?(ignore_pos = false)
2040 ?(quick = false)
2041 ?lower_coroutines
2042 ?(parser_options = ParserOptions.default)
2043 (file : Relative_path.t)
2044 : Parser_hack.parser_return =
2045 legacy @@ from_file
2046 ~elaborate_namespaces
2047 ~include_line_comments
2048 ~keep_errors
2049 ~ignore_pos
2050 ~quick
2051 ?lower_coroutines
2052 ~parser_options
2053 file