Introduce the EditablePositioned full-fidelity syntax tree
[hiphop-php.git] / hphp / hack / src / parser / lambda_analyzer.ml
blobea1296d7f624fa18a09347c82f21908cf3357f16
1 (**
2 * Copyright (c) 2017, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
9 *)
10 module List = Core_list
11 module Syntax = Full_fidelity_editable_positioned_syntax
12 module Token = Syntax.Token
13 module SyntaxKind = Full_fidelity_syntax_kind
14 module Utils = Full_fidelity_syntax_utilities.WithSyntax(Syntax)
15 open Syntax
16 open Utils
19 We have a lambda in hand, and the chain of parents in its parse tree.
20 We wish to know all the local variables which appear immediately in any parent
21 that are used anywhere inside the lambda. These are the "outer variables" of
22 the lambda.
24 This is trickier than it sounds. Consider the first lambda here:
26 function foo($a, $e) {
27 $b = 123;
29 ($b, $c, $f) ==> m($a, $b, $c, () ==> { ... $d = $e ... }),
30 () ==> { ... $d = $b; ... });
33 * $a is an outer variable of the lambda; it appears outside and is used inside.
34 * $b is not an outer variable of the lambda. It appears outside, but the $b used
35 inside the lambda is a different $b.
36 * $c is not an outer variable of the lambda; it does not appear outside.
37 * $d is not an outer variable of the lambda, even though it appears outside
38 the lambda and is used inside the lambda.
39 * $e is an outer variable; it appears outside, and is used indirectly inside.
40 * $f is not an outer variable; it does not appear outside.
42 The first problem we're going to solve is finding all the variables used
43 *without recursing into child lambdas*. Variables which are declared as formal
44 parameters are considered used; they are implicitly assigned a value.
46 For our example above:
48 * the variables used by foo are $a, $b and $e
49 * the variables used by the first lambda are $a, $b, $c, $f
50 * the variables used by the second lambda are $d and $e
51 * the variables used by the third lambda are $b and $d
53 TODO: We need to figure out how to make cases like
56 $x = 123;
57 ... () ==> "${x}" ...
60 work correctly. String identity on locals doesn't really work here.
64 let fold_no_lambdas folder acc node =
65 let predicate node =
66 match kind node with
67 | SyntaxKind.LambdaExpression
68 | SyntaxKind.AnonymousFunction -> false
69 | _ -> true in
70 fold_where folder predicate acc node
72 let token_to_string node =
73 match syntax node with
74 | Token t -> Some (Token.text t)
75 | _ -> None
77 let param_name node =
78 match syntax node with
79 | ListItem { list_item = {
80 syntax = ParameterDeclaration { parameter_name; _ }; _
81 }; _ } -> token_to_string parameter_name
82 | _ -> None
84 let get_params node =
85 let rec aux node =
86 match syntax node with
87 | FunctionDeclaration { function_declaration_header; _ } ->
88 aux function_declaration_header
89 | FunctionDeclarationHeader { function_parameter_list; _ } ->
90 function_parameter_list
91 | LambdaExpression { lambda_signature; _ } ->
92 aux lambda_signature
93 | LambdaSignature { lambda_parameters; _ } ->
94 lambda_parameters
95 | AnonymousFunction { anonymous_parameters; _ } ->
96 anonymous_parameters
97 | MethodishDeclaration { methodish_function_decl_header; _ } ->
98 aux methodish_function_decl_header
99 | _ -> make_missing() in
100 let param_list = aux node in
101 let param_list = syntax_node_to_list param_list in
102 let param_list = List.filter_map param_list ~f:param_name in
103 SSet.of_list param_list
105 let all_params parents =
106 let folder acc node =
107 SSet.union acc (get_params node) in
108 List.fold_left parents ~f:folder ~init:SSet.empty
110 let get_body node =
111 match syntax node with
112 | FunctionDeclaration { function_body; _ } -> function_body
113 | LambdaExpression { lambda_body; _ } -> lambda_body
114 | AnonymousFunction { anonymous_body; _ } -> anonymous_body
115 | MethodishDeclaration { methodish_function_body; _ } -> methodish_function_body
116 | _ -> make_missing()
118 (* TODO: This does not consider situations like "${x}" as the use of a local.*)
119 let add_local acc node =
120 match syntax node with
121 | VariableExpression { variable_expression =
122 { syntax = Token token; _ } } ->
123 SSet.add (Token.text token) acc
124 | _ -> acc
126 let local_variables acc node =
127 let params = get_params node in
128 let body = get_body node in
129 let locals = fold_no_lambdas add_local params body in
130 SSet.union acc locals
133 Second problem: what variables appear inside a lambda, including nested lambdas,
134 which are *not* parameters of any lambda?
137 let used_non_params node =
138 let folder acc node parents =
139 (* Note that the parent chain here only goes up to the originally-passed-in
140 node; it does not include the parents of *that* node. Typically the node
141 will be a lambda. We want to examine all children of that lambda, looking
142 for local variables which are not parameters of the current lambda. If
143 there are local variables which are *closed-over parameters of an outer
144 lambda*, that's great; we don't want to exclude them. *)
145 match syntax node with
146 | VariableExpression { variable_expression =
147 { syntax = Token token; _ } } ->
148 let text = Token.text token in
149 (* TODO: This is very expensive and inefficient. Basically on every
150 encounter of a local variable we reconstruct the set of parameters
151 in scope from this parent chain. A more efficient approach would be
152 to have a scope-aware folder which, instead of passing in a list
153 of parents, passes in a set of parameters. *)
154 let params = all_params parents in
155 if SSet.mem text params then
156 acc (* It's a parameter *)
157 else
158 SSet.add text acc (* Not any parameter. Maybe an outer variable. *)
159 | _ -> acc in
160 parented_fold_post folder SSet.empty node
162 let outer_variables parents lambda =
163 let all_outer = List.fold_left parents ~f:local_variables ~init:SSet.empty in
164 let all_used = used_non_params lambda in
165 (* We now know all the local variables created outside the lambda, and we
166 know all the variables that are used that are not any of the lambda's
167 parameters, or child lambda's parameters.
168 The set of outer variables of the lambda is the intersection. *)
169 let outer = SSet.inter all_outer all_used in
170 SSet.elements outer
171 (* Note that we are guaranteed that the list is sorted. *)
173 let partition_used_locals parents params body =
174 let all_used = fold_no_lambdas add_local SSet.empty body in
175 let all_used = SSet.remove "$this" all_used in
176 let decls = syntax_node_to_list params in
177 let all_params = List.filter_map decls ~f:param_name in
178 let all_params = SSet.of_list all_params in
179 let used_params = SSet.inter all_used all_params in
180 let all_outer = List.fold_left parents ~f:local_variables ~init:SSet.empty in
181 let used_outer = SSet.inter all_used all_outer in
182 let inner = SSet.diff all_used (SSet.union used_params used_outer) in
183 (inner, used_outer, used_params)