2 * Copyright (c) 2018, Facebook, Inc.
5 * This source code is licensed under the MIT license found in the
6 * LICENSE file in the "hack" directory of this source tree.
10 module Hh_bucket
= Bucket
14 module Syntax
= Full_fidelity_positioned_syntax
15 module SyntaxKind
= Full_fidelity_syntax_kind
16 module SyntaxTree
= Full_fidelity_syntax_tree.WithSyntax
(Syntax
)
19 * A `SyntaxKind.t`. We can generate a string from a kind, but not the other way
20 * around, so we store the string value given to us in the input directly. Then,
21 * when examining a node to see if it's a match, we'll convert the node's kind
22 * to a string and simply use string comparison.
24 * (Note that multiple syntax kinds map to the same string anyways, so it would
25 * be hard to reverse the mapping.)
27 type node_kind
= NodeKind
of string
30 * An identifier in the pattern used to identify the this node when returning
31 * the results of a match to the caller. This identifier may not be unique in
32 * the list of returned results.
34 type match_name
= MatchName
of string
37 * The name of a child of a node.
39 * For example, a binary expression has three possible child names:
41 * * binary_left_operand
43 * * binary_right_operand
45 * See the FFP schema definition in `src/parser/schema/schema_definition.ml`.
46 * Note that `BinaryExpression` has the prefix of `binary` and children like
47 * `left_operand`. When combined, this forms the full name
48 * `binary_left_operand`.
50 type child_type
= ChildType
of string
53 * A query that can be run on the syntax tree. It will return the matched nodes
54 * of any matched `MatchPattern`s.
58 * Match any node with the given kind, and whose children match the given
65 * Mapping from child name to pattern that the child must satisfy.
66 * Children may be omitted from this map. No child type may be specified
69 children
: (child_type
* pattern
) list
;
72 (* Match any missing node. *)
76 * Return the given node in the result list, assuming that the pattern overall
77 * matches. (This pattern by itself always matches; it's often used with
81 match_name
: match_name
;
85 * Matches a given node if there is descendant node matching the given pattern
86 * anywhere in the subtree underneath the parent node.
88 | DescendantPattern
of {
93 * Matches a list node (such as a statement list) only if all the children at
94 * the given indexes match their respective patterns.
97 children
: (int * pattern
) list
;
98 max_length
: int option;
102 * Matches a given node if its raw text is exactly the specified string. The
103 * "raw" text doesn't include trivia.
105 | RawTextPattern
of {
110 * Matches a given expression node if its type is a subtype of the given type.
111 * TODO: decide if we want to handle exact type equality or supertypes.
112 * TODO: decide what we want to name the pattern for decl types, if we create
116 subtype_of
: Typing_defs.locl
Typing_defs.ty
;
120 * Matches if all of the children patterns match.
123 patterns
: pattern list
;
127 * Matches if any of the children patterns match.
129 * Note that currently:
131 * * This short-circuits, so the after any pattern succeeds, no other
132 * patterns will be evaluated. This means that if two patterns have
133 * `MatchPattern`s somewhere underneath them, then the `MatchPattern`s from
134 * at most one of these patterns will be included in the results.
135 * * The order in which the provided patterns are evaluated is not
136 * specified. (For example, in the future, we may want to order it so that
137 * patterns that rely only on syntactic information are executed before
138 * patterns that rely on type information.)
140 * Consequently, you shouldn't rely on `MatchPattern`s that are nested inside
141 * the constituent patterns.
144 patterns
: pattern list
;
148 * Matches only if the given pattern does not match.
150 * Regardless of whether the child pattern matches or not, any matches that it
151 * produced are thrown away. (In the case that it doesn't match, we don't keep
152 * around "witness" matches explaining why this `NotPattern` didn't match.)
158 type matched_node
= {
159 match_name
: match_name
;
167 * The list of nodes for which a `MatchPattern` matched.
169 matched_nodes
: matched_node list
;
172 type collected_type_map
= Tast_type_collector.collected_type list
Pos.AbsolutePosMap.t
174 (* The environment used in searching a single file. *)
176 tcopt
: TypecheckerOptions.t
;
177 fileinfo
: FileInfo.t
;
178 path
: Relative_path.t
;
179 syntax_tree
: SyntaxTree.t
;
180 collected_types
: collected_type_map
option;
183 let empty_result: result
option = Some
{ matched_nodes
= [] }
185 let merge_results (lhs
: result
option) (rhs
: result
option): result
option =
186 Option.merge lhs rhs ~f
:(fun lhs rhs
->
187 { matched_nodes
= lhs
.matched_nodes
@ rhs
.matched_nodes
}
190 let find_child_with_type
192 (child_type
: child_type
)
195 Syntax.children_names node
196 |> List.findi ~f
:(fun _i actual_child_type
->
197 match child_type
with
198 | ChildType expected_child_type
->
199 expected_child_type
= actual_child_type
203 match child_index with
205 | Some index
-> List.nth
(Syntax.children node
) index
207 let collect_types (env
: env
): env
* collected_type_map
=
209 | { collected_types
= Some collected_types
; _
} ->
210 (env
, collected_types
)
211 | { collected_types
= None
; tcopt
; fileinfo
; path
; _
} ->
212 let tast = ServerIdeUtils.check_fileinfo tcopt path fileinfo
in
213 let collected_types = Tast_type_collector.collect_types tast in
214 let env = { env with collected_types = Some
collected_types } in
215 (env, collected_types)
221 : env * result
option =
223 | NodePattern
{ kind
; children
} ->
224 let kind = match kind with NodeKind
kind -> kind in
225 if (node
|> Syntax.kind |> SyntaxKind.to_string
) <> kind
229 let patterns = List.map children ~f
:(fun (child_type
, pattern
) ->
230 let child_node = find_child_with_type node child_type
in
231 let child_node = Option.value_exn
child_node in
232 (child_node, pattern
)
234 search_and ~
env ~
patterns
236 | MissingNodePattern
->
237 if Syntax.kind node
= SyntaxKind.Missing
238 then (env, empty_result)
241 | MatchPattern
{ match_name
} ->
246 kind = NodeKind
(SyntaxKind.to_string
(Syntax.kind node
));
247 start_offset
= Syntax.start_offset node
;
248 end_offset
= Syntax.end_offset node
;
254 | DescendantPattern
{ pattern
} ->
255 search_descendants ~
env ~pattern ~node
257 | ListPattern
{ children
; max_length
} ->
260 match node
.syntax
with
261 | SyntaxList
syntax_list ->
262 begin match max_length
with
263 | None
-> Some
syntax_list
265 if List.length
syntax_list <= max_length
266 then Some
syntax_list
272 begin match syntax_list with
273 | None
-> (env, None
)
274 | Some
syntax_list ->
275 let open Option.Monad_infix
in
276 let patterns = List.map children ~f
:(fun (index
, pattern
) ->
277 List.nth
syntax_list index
>>| fun child_node ->
278 (child_node, pattern
)
280 begin match Option.all
patterns with
282 search_and ~
env ~
patterns
284 (* We tried to match a pattern for the child at at index N, but the syntax
285 list didn't have an Nth element. *)
290 | RawTextPattern
{ raw_text
} ->
291 if Syntax.text node
= raw_text
292 then (env, empty_result)
295 | TypePattern
{ subtype_of
} ->
296 Line_break_map.reset_global_state
();
297 let pos = Syntax.position
env.path node
in
299 | None
-> (env, None
)
301 let (env, collected_types) = collect_types env in
302 let pos = Pos.to_absolute
pos in
303 let tys = Tast_type_collector.get_from_pos_map
pos collected_types in
304 let is_subtype_of (tast_env
, ty
) =
306 | Typing_defs.LoclTy ty
->
307 Tast_env.can_subtype tast_env ty subtype_of
308 | Typing_defs.DeclTy _ty
->
313 when List.exists
tys ~f
:is_subtype_of ->
319 | AndPattern
{ patterns } ->
320 let patterns = List.map
patterns ~f
:(fun pattern
-> (node
, pattern
)) in
321 search_and ~
env ~
patterns
323 | OrPattern
{ patterns } ->
324 let patterns = List.map
patterns ~f
:(fun pattern
-> (node
, pattern
)) in
325 search_or ~
env ~
patterns
327 | NotPattern
{ pattern
} ->
328 let (env, result) = search_node ~
env ~node ~pattern
in
329 begin match result with
330 | Some _
-> (env, None
)
331 | None
-> (env, empty_result)
334 (* TODO: this will likely have to become more intelligent *)
335 and search_descendants
339 : env * result option =
342 (Syntax.children node
)
344 ~f
:(fun env acc_result child
->
345 let (env, child_result
) = search_node ~
env ~pattern ~node
:child
in
346 let (env, descendants_result
) =
347 search_descendants ~
env ~pattern ~node
:child
in
348 let result = merge_results child_result descendants_result
in
349 (env, (merge_results result acc_result
))
354 ~
(patterns: (Syntax.t
* pattern
) list
)
355 : env * result option =
360 ~f
:(fun env result (node
, pattern
) ->
365 | Some _
as result ->
366 let (env, pattern_result
) = search_node ~
env ~pattern ~node
in
367 match pattern_result
with
368 | None
-> (env, None
)
369 | Some _
as pattern_result
->
370 (env, merge_results result pattern_result
)
375 ~
(patterns: (Syntax.t
* pattern
) list
)
376 : env * result option =
381 ~f
:(fun env result (node
, pattern
) ->
383 | Some _
as result ->
387 search_node ~
env ~pattern ~node
391 (tcopt
: TypecheckerOptions.t
)
393 : (pattern
, string) Result.t
=
395 let open Result.Monad_infix
in
397 let wrap_json_accessor f
=
398 fun x
-> Result.map_error
(f x
)
399 ~f
:Hh_json.Access.access_failure_to_string
402 let get_string x
= wrap_json_accessor (Hh_json.Access.get_string x
) in
403 let get_obj x
= wrap_json_accessor (Hh_json.Access.get_obj x
) in
404 let get_array x
= wrap_json_accessor (Hh_json.Access.get_array x
) in
405 let keytrace_to_string = Hh_json.Access.keytrace_to_string in
406 let error_at_keytrace ~keytrace error_message
=
407 Error
(error_message ^
(keytrace_to_string keytrace
))
410 let rec compile_pattern ~json ~keytrace
: (pattern
, string) Result.t
=
411 get_string "pattern_type" (json
, keytrace
)
413 >>= fun (pattern_type
, pattern_type_keytrace
) ->
414 match pattern_type
with
416 compile_node_pattern ~json ~keytrace
417 | "missing_node_pattern" ->
418 compile_missing_node_pattern ~json ~keytrace
420 compile_match_pattern ~json ~keytrace
421 | "descendant_pattern" ->
422 compile_descendant_pattern ~json ~keytrace
424 compile_list_pattern ~json ~keytrace
425 | "raw_text_pattern" ->
426 compile_raw_text_pattern ~json ~keytrace
428 compile_type_pattern ~json ~keytrace
430 compile_and_pattern ~json ~keytrace
432 compile_or_pattern ~json ~keytrace
434 compile_not_pattern ~json ~keytrace
436 error_at_keytrace ~keytrace
:pattern_type_keytrace
437 (Printf.sprintf
"Unknown pattern type '%s'" pattern_type
)
439 and compile_node_pattern ~json ~keytrace
: (pattern
, string) Result.t
=
440 get_string "kind" (json
, keytrace
)
442 >>= fun (kind, kind_keytrace
) ->
443 let open Schema_definition
in
444 let kind_info = List.find schema ~f
:(fun schema_node
->
445 schema_node
.description
= kind
449 error_at_keytrace ~keytrace
:kind_keytrace
450 (Printf.sprintf
"Kind '%s' doesn't exist" kind)
451 | Some
kind_info -> Ok
kind_info
454 get_obj "children" (json
, keytrace
)
456 >>= fun (children_json
, children_keytrace
) ->
457 (* This has already been verified to be an object above. *)
458 let children = Hh_json.get_object_exn children_json
in
461 (child_keytrace
: Hh_json.Access.keytrace
)
463 : (child_type
, string) Result.t
=
464 (* We're given a field name like `binary_right_operand`, but the field
465 names in the schema are things like `right_operand`, and you have to
466 affix the prefix yourself. For consistency with other tooling, we want
467 to use `binary_right_operand` instead of just `right_operand`. *)
468 let get_prefixed_field_name field_name
=
469 kind_info.prefix ^
"_" ^ field_name
471 let field = List.find
kind_info.fields ~f
:(fun (field_name
, _
) ->
472 (get_prefixed_field_name field_name
) = child_name
)
476 let valid_types = List.map
kind_info.fields ~f
:(fun (field_name
, _
) ->
477 get_prefixed_field_name field_name
479 error_at_keytrace ~keytrace
:child_keytrace
481 ("Unknown child type '%s'; "^^
482 "valid child types for a node of kind '%s' are: %s")
485 (String.concat ~sep
:", " valid_types))
486 | Some _
-> Ok
(ChildType child_name
)
488 let children_patterns =
489 List.mapi
children ~f
:(fun index
(child_name
, pattern_json
) ->
490 let child_keytrace = (string_of_int index
) :: children_keytrace
in
491 get_child_type child_keytrace child_name
>>= fun child_name
->
492 compile_pattern ~json
:pattern_json ~keytrace
:child_keytrace
494 (child_name
, pattern
)
497 all
children_patterns >>| fun children ->
499 kind = NodeKind
kind;
503 and compile_missing_node_pattern ~json
:_json ~keytrace
:_keytrace
=
504 Ok MissingNodePattern
506 and compile_match_pattern ~json ~keytrace
=
507 get_string "match_name" (json
, keytrace
)
508 >>| fun (match_name
, _match_name_keytrace
) ->
510 match_name
= MatchName match_name
;
513 and compile_descendant_pattern ~json ~keytrace
=
514 get_obj "pattern" (json
, keytrace
) >>= fun (pattern
, pattern_keytrace
) ->
515 compile_pattern ~json
:pattern ~keytrace
:pattern_keytrace
>>| fun pattern
->
520 and compile_list_pattern ~json ~keytrace
=
521 let max_length = Hh_json.get_field_opt
522 (Hh_json.Access.get_number_int
"max_length")
526 get_obj "children" (json
, keytrace
)
527 >>= fun (children_json
, children_keytrace
) ->
529 (* This has already been verified to be an object above. *)
530 let children = Hh_json.get_object_exn children_json
in
531 let children_patterns =
532 List.map
children ~f
:(fun (index_str
, pattern_json
) ->
533 let child_keytrace = index_str
:: children_keytrace
in
534 begin match int_of_string_opt index_str
with
535 | Some index
-> Ok index
538 (Printf.sprintf
"Invalid integer key: %s" index_str
)
539 ~keytrace
:child_keytrace
548 error_at_keytrace "Integer key must be non-negative"
549 ~keytrace
:child_keytrace
553 compile_pattern ~json
:pattern_json ~keytrace
:child_keytrace
559 Result.all
children_patterns >>| fun children ->
565 and compile_raw_text_pattern ~json ~keytrace
=
566 get_string "raw_text" (json
, keytrace
)
567 >>| fun (raw_text
, _raw_text_keytrace
) ->
572 and compile_type_pattern ~json ~keytrace
=
573 get_obj "subtype_of" (json
, keytrace
)
574 >>= fun (subtype_of_json
, subtype_of_keytrace
) ->
575 let locl_ty = Typing_print.json_to_locl_ty
577 ~keytrace
:subtype_of_keytrace
583 subtype_of
= locl_ty;
585 | Error
(Typing_defs.Wrong_phase message
)
586 | Error
(Typing_defs.Not_supported message
)
587 | Error
(Typing_defs.Deserialization_error message
) ->
590 and compile_child_patterns_helper ~json ~keytrace
=
591 get_array "patterns" (json
, keytrace
) >>= fun (pattern_list
, pattern_list_keytrace
) ->
592 let compiled_patterns = List.mapi pattern_list
(fun i json
->
593 let keytrace = (string_of_int i
) :: pattern_list_keytrace
in
594 compile_pattern ~json ~
keytrace
596 Result.all
compiled_patterns
598 and compile_and_pattern ~json ~
keytrace =
599 compile_child_patterns_helper ~json ~
keytrace >>| fun patterns ->
604 and compile_or_pattern ~json ~
keytrace =
605 compile_child_patterns_helper ~json ~
keytrace >>| fun patterns ->
610 and compile_not_pattern ~json ~
keytrace =
611 get_obj "pattern" (json
, keytrace) >>= fun (json
, keytrace) ->
612 compile_pattern ~json ~
keytrace >>| fun pattern
->
618 compile_pattern ~json ~
keytrace:[]
620 let result_to_json ~
(sort_results
: bool) (result: result option): Hh_json.json
=
625 let matched_nodes = result.matched_nodes in
628 then List.sort
matched_nodes ~compare
:Pervasives.compare
632 List.map
matched_nodes ~f
:(fun matched_node
->
634 match matched_node
.match_name
635 with MatchName
match_name -> match_name
638 match matched_node
.kind
639 with NodeKind
kind -> kind
642 "match_name", JSON_String
match_name;
643 "kind", JSON_String
kind;
644 "start_offset", Hh_json.int_ matched_node
.start_offset
;
645 "end_offset", Hh_json.int_ matched_node
.end_offset
;
649 "matched_nodes", JSON_Array
matched_nodes;
653 (tcopt
: TypecheckerOptions.t
)
654 (path
: Relative_path.t
)
655 (fileinfo
: FileInfo.t
)
658 let source_text = Full_fidelity_source_text.from_file path
in
659 let syntax_tree = SyntaxTree.make
source_text in
666 collected_types = None
;
669 search_node ~
env ~pattern ~node
:(SyntaxTree.root
env.syntax_tree) in
673 (genv
: ServerEnv.genv
)
675 ~
(sort_results
: bool)
676 ~
(files_to_search
: string list
option)
677 (input
: Hh_json.json
)
678 : (Hh_json.json
, string) Result.t
680 let open Result.Monad_infix
in
681 compile_pattern env.ServerEnv.tcopt input
>>| fun pattern
->
683 let num_files_searched = ref 0 in
684 let last_printed_num_files_searched = ref 0 in
685 let done_searching = ref false in
686 let progress_fn ~total
:_total ~start
:_start ~
(length
: int): unit =
687 let is_bucket_empty = (length
= 0) in
688 if not
!done_searching then begin
689 num_files_searched := !num_files_searched + length
;
691 !num_files_searched - !last_printed_num_files_searched >= 10000
694 Hh_logger.log
"CST search: searched %d files..." !num_files_searched;
695 last_printed_num_files_searched := !num_files_searched;
699 then done_searching := true;
702 let next_files: (Relative_path.t
* FileInfo.t
* pattern
) list
Hh_bucket.next
=
703 let with_file_data path
=
704 let path = Relative_path.create_detect_prefix
path in
705 match Relative_path.Map.get
env.ServerEnv.files_info
path with
706 | Some fileinfo
-> Some
(path, fileinfo
, pattern
)
708 (* We may not have the file information for a file such as one that we
709 ignore in `.hhconfig`. *)
712 match files_to_search
with
713 | Some files_to_search
->
714 let files_to_search =
715 Sys_utils.parse_path_list
files_to_search
716 |> List.filter_map ~f
:with_file_data
719 genv
.ServerEnv.workers
723 let indexer = genv
.ServerEnv.indexer FindUtils.is_php
in
725 let files = indexer () |> List.filter_map ~f
:with_file_data in
726 progress_fn ~total
:0 ~start
:0 ~length
:(List.length
files);
727 Hh_bucket.of_list
files
731 (* Extract the `tcopt` so that we don't close over the entire `env`. *)
732 let tcopt = env.ServerEnv.tcopt in
734 (acc
: (Relative_path.t
* result) list
)
735 (inputs
: (Relative_path.t
* FileInfo.t
* pattern
) list
)
736 : (Relative_path.t
* result) list
=
739 ~f
:(fun acc
(path, fileinfo
, pattern
) ->
741 match search tcopt path fileinfo pattern
with
742 | Some
result -> (path, result) :: acc
745 let stack = Printexc.get_backtrace
() in
746 let prefix = Printf.sprintf
747 "Error while running CST search on path %s:\n"
748 (Relative_path.to_absolute
path)
750 Hh_logger.exc e ~
prefix ~
stack;
755 let results = MultiWorker.call
756 genv
.ServerEnv.workers
759 ~merge
:List.rev_append
765 then List.sort
results ~compare
:Pervasives.compare
769 Hh_json.JSON_Object
(List.map
results ~f
:(fun (path, result) ->
770 (Relative_path.to_absolute
path, result_to_json ~sort_results
(Some
result))