Add type annotations to `clientConnect.ml`
[hiphop-php.git] / hphp / hack / src / server / cstSearchService.ml
blob3d384ab23303839be2762832654b1f5d1c1a69d2
1 (**
2 * Copyright (c) 2018, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the MIT license found in the
6 * LICENSE file in the "hack" directory of this source tree.
8 *)
10 module Hh_bucket = Bucket
11 open Core_kernel
12 open Common
14 module Syntax = Full_fidelity_positioned_syntax
15 module SyntaxKind = Full_fidelity_syntax_kind
16 module SyntaxTree = Full_fidelity_syntax_tree.WithSyntax(Syntax)
18 (**
19 * A `SyntaxKind.t`. We can generate a string from a kind, but not the other way
20 * around, so we store the string value given to us in the input directly. Then,
21 * when examining a node to see if it's a match, we'll convert the node's kind
22 * to a string and simply use string comparison.
24 * (Note that multiple syntax kinds map to the same string anyways, so it would
25 * be hard to reverse the mapping.)
27 type node_kind = NodeKind of string
29 (**
30 * An identifier in the pattern used to identify the this node when returning
31 * the results of a match to the caller. This identifier may not be unique in
32 * the list of returned results.
34 type match_name = MatchName of string
36 (**
37 * The name of a child of a node.
39 * For example, a binary expression has three possible child names:
41 * * binary_left_operand
42 * * binary_operator
43 * * binary_right_operand
45 * See the FFP schema definition in `src/parser/schema/schema_definition.ml`.
46 * Note that `BinaryExpression` has the prefix of `binary` and children like
47 * `left_operand`. When combined, this forms the full name
48 * `binary_left_operand`.
50 type child_type = ChildType of string
52 (**
53 * A query that can be run on the syntax tree. It will return the matched nodes
54 * of any matched `MatchPattern`s.
56 type pattern =
57 (**
58 * Match any node with the given kind, and whose children match the given
59 * patterns.
61 | NodePattern of {
62 kind: node_kind;
64 (**
65 * Mapping from child name to pattern that the child must satisfy.
66 * Children may be omitted from this map. No child type may be specified
67 * more than once.
69 children: (child_type * pattern) list;
72 (* Match any missing node. *)
73 | MissingNodePattern
75 (**
76 * Return the given node in the result list, assuming that the pattern overall
77 * matches. (This pattern by itself always matches; it's often used with
78 * `AndPattern`).
80 | MatchPattern of {
81 match_name: match_name;
84 (**
85 * Matches a given node if there is descendant node matching the given pattern
86 * anywhere in the subtree underneath the parent node.
88 | DescendantPattern of {
89 pattern: pattern;
92 (**
93 * Matches a list node (such as a statement list) only if all the children at
94 * the given indexes match their respective patterns.
96 | ListPattern of {
97 children: (int * pattern) list;
98 max_length: int option;
102 * Matches a given node if its raw text is exactly the specified string. The
103 * "raw" text doesn't include trivia.
105 | RawTextPattern of {
106 raw_text: string;
110 * Matches a given expression node if its type is a subtype of the given type.
111 * TODO: decide if we want to handle exact type equality or supertypes.
112 * TODO: decide what we want to name the pattern for decl types, if we create
113 * one.
115 | TypePattern of {
116 subtype_of: Typing_defs.locl Typing_defs.ty;
120 * Matches if all of the children patterns match.
122 | AndPattern of {
123 patterns: pattern list;
127 * Matches if any of the children patterns match.
129 * Note that currently:
131 * * This short-circuits, so the after any pattern succeeds, no other
132 * patterns will be evaluated. This means that if two patterns have
133 * `MatchPattern`s somewhere underneath them, then the `MatchPattern`s from
134 * at most one of these patterns will be included in the results.
135 * * The order in which the provided patterns are evaluated is not
136 * specified. (For example, in the future, we may want to order it so that
137 * patterns that rely only on syntactic information are executed before
138 * patterns that rely on type information.)
140 * Consequently, you shouldn't rely on `MatchPattern`s that are nested inside
141 * the constituent patterns.
143 | OrPattern of {
144 patterns: pattern list;
148 * Matches only if the given pattern does not match.
150 * Regardless of whether the child pattern matches or not, any matches that it
151 * produced are thrown away. (In the case that it doesn't match, we don't keep
152 * around "witness" matches explaining why this `NotPattern` didn't match.)
154 | NotPattern of {
155 pattern: pattern;
158 type matched_node = {
159 match_name: match_name;
160 kind: node_kind;
161 start_offset: int;
162 end_offset: int;
165 type result = {
167 * The list of nodes for which a `MatchPattern` matched.
169 matched_nodes: matched_node list;
172 type collected_type_map = Tast_type_collector.collected_type list Pos.AbsolutePosMap.t
174 (* The environment used in searching a single file. *)
175 type env = {
176 tcopt: TypecheckerOptions.t;
177 fileinfo: FileInfo.t;
178 path: Relative_path.t;
179 syntax_tree: SyntaxTree.t;
180 collected_types: collected_type_map option;
183 let empty_result: result option = Some { matched_nodes = [] }
185 let merge_results (lhs: result option) (rhs: result option): result option =
186 Option.merge lhs rhs ~f:(fun lhs rhs ->
187 { matched_nodes = lhs.matched_nodes @ rhs.matched_nodes }
190 let find_child_with_type
191 (node: Syntax.t)
192 (child_type: child_type)
193 : Syntax.t option =
194 let child_index =
195 Syntax.children_names node
196 |> List.findi ~f:(fun _i actual_child_type ->
197 match child_type with
198 | ChildType expected_child_type ->
199 expected_child_type = actual_child_type
201 |> Option.map ~f:fst
203 match child_index with
204 | None -> None
205 | Some index -> List.nth (Syntax.children node) index
207 let collect_types (env: env): env * collected_type_map =
208 match env with
209 | { collected_types = Some collected_types; _ } ->
210 (env, collected_types)
211 | { collected_types = None; tcopt; fileinfo; path; _ } ->
212 let tast = ServerIdeUtils.check_fileinfo tcopt path fileinfo in
213 let collected_types = Tast_type_collector.collect_types tast in
214 let env = { env with collected_types = Some collected_types } in
215 (env, collected_types)
217 let rec search_node
218 ~(env: env)
219 ~(pattern: pattern)
220 ~(node: Syntax.t)
221 : env * result option =
222 match pattern with
223 | NodePattern { kind; children } ->
224 let kind = match kind with NodeKind kind -> kind in
225 if (node |> Syntax.kind |> SyntaxKind.to_string) <> kind
226 then (env, None)
227 else
229 let patterns = List.map children ~f:(fun (child_type, pattern) ->
230 let child_node = find_child_with_type node child_type in
231 let child_node = Option.value_exn child_node in
232 (child_node, pattern)
233 ) in
234 search_and ~env ~patterns
236 | MissingNodePattern ->
237 if Syntax.kind node = SyntaxKind.Missing
238 then (env, empty_result)
239 else (env, None)
241 | MatchPattern { match_name } ->
242 let result = {
243 matched_nodes = [
245 match_name;
246 kind = NodeKind (SyntaxKind.to_string (Syntax.kind node));
247 start_offset = Syntax.start_offset node;
248 end_offset = Syntax.end_offset node;
251 } in
252 (env, Some result)
254 | DescendantPattern { pattern } ->
255 search_descendants ~env ~pattern ~node
257 | ListPattern { children; max_length } ->
258 let syntax_list =
259 let open Syntax in
260 match node.syntax with
261 | SyntaxList syntax_list ->
262 begin match max_length with
263 | None -> Some syntax_list
264 | Some max_length ->
265 if List.length syntax_list <= max_length
266 then Some syntax_list
267 else None
269 | _ -> None
272 begin match syntax_list with
273 | None -> (env, None)
274 | Some syntax_list ->
275 let open Option.Monad_infix in
276 let patterns = List.map children ~f:(fun (index, pattern) ->
277 List.nth syntax_list index >>| fun child_node ->
278 (child_node, pattern)
279 ) in
280 begin match Option.all patterns with
281 | Some patterns ->
282 search_and ~env ~patterns
283 | None ->
284 (* We tried to match a pattern for the child at at index N, but the syntax
285 list didn't have an Nth element. *)
286 (env, None)
290 | RawTextPattern { raw_text } ->
291 if Syntax.text node = raw_text
292 then (env, empty_result)
293 else (env, None)
295 | TypePattern { subtype_of } ->
296 Line_break_map.reset_global_state ();
297 let pos = Syntax.position env.path node in
298 begin match pos with
299 | None -> (env, None)
300 | Some pos ->
301 let (env, collected_types) = collect_types env in
302 let pos = Pos.to_absolute pos in
303 let tys = Tast_type_collector.get_from_pos_map pos collected_types in
304 let is_subtype_of (tast_env, ty) =
305 match ty with
306 | Typing_defs.LoclTy ty ->
307 Tast_env.can_subtype tast_env ty subtype_of
308 | Typing_defs.DeclTy _ty ->
309 false
311 match tys with
312 | Some tys
313 when List.exists tys ~f:is_subtype_of ->
314 (env, empty_result)
315 | Some _ | None ->
316 (env, None)
319 | AndPattern { patterns } ->
320 let patterns = List.map patterns ~f:(fun pattern -> (node, pattern)) in
321 search_and ~env ~patterns
323 | OrPattern { patterns } ->
324 let patterns = List.map patterns ~f:(fun pattern -> (node, pattern)) in
325 search_or ~env ~patterns
327 | NotPattern { pattern } ->
328 let (env, result) = search_node ~env ~node ~pattern in
329 begin match result with
330 | Some _ -> (env, None)
331 | None -> (env, empty_result)
334 (* TODO: this will likely have to become more intelligent *)
335 and search_descendants
336 ~(env: env)
337 ~(pattern: pattern)
338 ~(node: Syntax.t)
339 : env * result option =
340 List.fold_left_env
342 (Syntax.children node)
343 ~init:None
344 ~f:(fun env acc_result child ->
345 let (env, child_result) = search_node ~env ~pattern ~node:child in
346 let (env, descendants_result) =
347 search_descendants ~env ~pattern ~node:child in
348 let result = merge_results child_result descendants_result in
349 (env, (merge_results result acc_result))
352 and search_and
353 ~(env: env)
354 ~(patterns: (Syntax.t * pattern) list)
355 : env * result option =
356 List.fold_left_env
358 patterns
359 ~init:empty_result
360 ~f:(fun env result (node, pattern) ->
361 match result with
362 | None ->
363 (* Short-circuit. *)
364 (env, None)
365 | Some _ as result ->
366 let (env, pattern_result) = search_node ~env ~pattern ~node in
367 match pattern_result with
368 | None -> (env, None)
369 | Some _ as pattern_result ->
370 (env, merge_results result pattern_result)
373 and search_or
374 ~(env: env)
375 ~(patterns: (Syntax.t * pattern) list)
376 : env * result option =
377 List.fold_left_env
379 patterns
380 ~init:None
381 ~f:(fun env result (node, pattern) ->
382 match result with
383 | Some _ as result ->
384 (* Short-circuit. *)
385 (env, result)
386 | None ->
387 search_node ~env ~pattern ~node
390 let compile_pattern
391 (tcopt: TypecheckerOptions.t)
392 (json: Hh_json.json)
393 : (pattern, string) Result.t =
394 let open Result in
395 let open Result.Monad_infix in
397 let wrap_json_accessor f =
398 fun x -> Result.map_error (f x)
399 ~f:Hh_json.Access.access_failure_to_string
402 let get_string x = wrap_json_accessor (Hh_json.Access.get_string x) in
403 let get_obj x = wrap_json_accessor (Hh_json.Access.get_obj x) in
404 let get_array x = wrap_json_accessor (Hh_json.Access.get_array x) in
405 let keytrace_to_string = Hh_json.Access.keytrace_to_string in
406 let error_at_keytrace ~keytrace error_message =
407 Error (error_message ^ (keytrace_to_string keytrace))
410 let rec compile_pattern ~json ~keytrace : (pattern, string) Result.t =
411 get_string "pattern_type" (json, keytrace)
413 >>= fun (pattern_type, pattern_type_keytrace) ->
414 match pattern_type with
415 | "node_pattern" ->
416 compile_node_pattern ~json ~keytrace
417 | "missing_node_pattern" ->
418 compile_missing_node_pattern ~json ~keytrace
419 | "match_pattern" ->
420 compile_match_pattern ~json ~keytrace
421 | "descendant_pattern" ->
422 compile_descendant_pattern ~json ~keytrace
423 | "list_pattern" ->
424 compile_list_pattern ~json ~keytrace
425 | "raw_text_pattern" ->
426 compile_raw_text_pattern ~json ~keytrace
427 | "type_pattern" ->
428 compile_type_pattern ~json ~keytrace
429 | "and_pattern" ->
430 compile_and_pattern ~json ~keytrace
431 | "or_pattern" ->
432 compile_or_pattern ~json ~keytrace
433 | "not_pattern" ->
434 compile_not_pattern ~json ~keytrace
435 | pattern_type ->
436 error_at_keytrace ~keytrace:pattern_type_keytrace
437 (Printf.sprintf "Unknown pattern type '%s'" pattern_type)
439 and compile_node_pattern ~json ~keytrace : (pattern, string) Result.t =
440 get_string "kind" (json, keytrace)
442 >>= fun (kind, kind_keytrace) ->
443 let open Schema_definition in
444 let kind_info = List.find schema ~f:(fun schema_node ->
445 schema_node.description = kind
446 ) in
447 match kind_info with
448 | None ->
449 error_at_keytrace ~keytrace:kind_keytrace
450 (Printf.sprintf "Kind '%s' doesn't exist" kind)
451 | Some kind_info -> Ok kind_info
453 >>= fun kind_info ->
454 get_obj "children" (json, keytrace)
456 >>= fun (children_json, children_keytrace) ->
457 (* This has already been verified to be an object above. *)
458 let children = Hh_json.get_object_exn children_json in
460 let get_child_type
461 (child_keytrace: Hh_json.Access.keytrace)
462 (child_name: string)
463 : (child_type, string) Result.t =
464 (* We're given a field name like `binary_right_operand`, but the field
465 names in the schema are things like `right_operand`, and you have to
466 affix the prefix yourself. For consistency with other tooling, we want
467 to use `binary_right_operand` instead of just `right_operand`. *)
468 let get_prefixed_field_name field_name =
469 kind_info.prefix ^ "_" ^ field_name
471 let field = List.find kind_info.fields ~f:(fun (field_name, _) ->
472 (get_prefixed_field_name field_name) = child_name)
474 match field with
475 | None ->
476 let valid_types = List.map kind_info.fields ~f:(fun (field_name, _) ->
477 get_prefixed_field_name field_name
478 ) in
479 error_at_keytrace ~keytrace:child_keytrace
480 (Printf.sprintf
481 ("Unknown child type '%s'; "^^
482 "valid child types for a node of kind '%s' are: %s")
483 child_name
484 kind
485 (String.concat ~sep:", " valid_types))
486 | Some _ -> Ok (ChildType child_name)
488 let children_patterns =
489 List.mapi children ~f:(fun index (child_name, pattern_json) ->
490 let child_keytrace = (string_of_int index) :: children_keytrace in
491 get_child_type child_keytrace child_name >>= fun child_name ->
492 compile_pattern ~json:pattern_json ~keytrace:child_keytrace
493 >>| fun pattern ->
494 (child_name, pattern)
497 all children_patterns >>| fun children ->
498 NodePattern {
499 kind = NodeKind kind;
500 children;
503 and compile_missing_node_pattern ~json:_json ~keytrace:_keytrace =
504 Ok MissingNodePattern
506 and compile_match_pattern ~json ~keytrace =
507 get_string "match_name" (json, keytrace)
508 >>| fun (match_name, _match_name_keytrace) ->
509 MatchPattern {
510 match_name = MatchName match_name;
513 and compile_descendant_pattern ~json ~keytrace =
514 get_obj "pattern" (json, keytrace) >>= fun (pattern, pattern_keytrace) ->
515 compile_pattern ~json:pattern ~keytrace:pattern_keytrace >>| fun pattern ->
516 DescendantPattern {
517 pattern;
520 and compile_list_pattern ~json ~keytrace =
521 let max_length = Hh_json.get_field_opt
522 (Hh_json.Access.get_number_int "max_length")
523 json
526 get_obj "children" (json, keytrace)
527 >>= fun (children_json, children_keytrace) ->
529 (* This has already been verified to be an object above. *)
530 let children = Hh_json.get_object_exn children_json in
531 let children_patterns =
532 List.map children ~f:(fun (index_str, pattern_json) ->
533 let child_keytrace = index_str :: children_keytrace in
534 begin match int_of_string_opt index_str with
535 | Some index -> Ok index
536 | None ->
537 error_at_keytrace
538 (Printf.sprintf "Invalid integer key: %s" index_str)
539 ~keytrace:child_keytrace
541 >>= fun index ->
543 begin
544 if index >= 0
545 then
546 Ok index
547 else
548 error_at_keytrace "Integer key must be non-negative"
549 ~keytrace:child_keytrace
551 >>= fun index ->
553 compile_pattern ~json:pattern_json ~keytrace:child_keytrace
554 >>| fun pattern ->
556 (index, pattern)
559 Result.all children_patterns >>| fun children ->
560 ListPattern {
561 children;
562 max_length;
565 and compile_raw_text_pattern ~json ~keytrace =
566 get_string "raw_text" (json, keytrace)
567 >>| fun (raw_text, _raw_text_keytrace) ->
568 RawTextPattern {
569 raw_text;
572 and compile_type_pattern ~json ~keytrace =
573 get_obj "subtype_of" (json, keytrace)
574 >>= fun (subtype_of_json, subtype_of_keytrace) ->
575 let locl_ty = Typing_print.json_to_locl_ty
576 tcopt
577 ~keytrace:subtype_of_keytrace
578 subtype_of_json
580 match locl_ty with
581 | Ok locl_ty ->
582 Ok (TypePattern {
583 subtype_of = locl_ty;
585 | Error (Typing_defs.Wrong_phase message)
586 | Error (Typing_defs.Not_supported message)
587 | Error (Typing_defs.Deserialization_error message) ->
588 Error message
590 and compile_child_patterns_helper ~json ~keytrace =
591 get_array "patterns" (json, keytrace) >>= fun (pattern_list, pattern_list_keytrace) ->
592 let compiled_patterns = List.mapi pattern_list (fun i json ->
593 let keytrace = (string_of_int i) :: pattern_list_keytrace in
594 compile_pattern ~json ~keytrace
595 ) in
596 Result.all compiled_patterns
598 and compile_and_pattern ~json ~keytrace =
599 compile_child_patterns_helper ~json ~keytrace >>| fun patterns ->
600 AndPattern {
601 patterns;
604 and compile_or_pattern ~json ~keytrace =
605 compile_child_patterns_helper ~json ~keytrace >>| fun patterns ->
606 OrPattern {
607 patterns;
610 and compile_not_pattern ~json ~keytrace =
611 get_obj "pattern" (json, keytrace) >>= fun (json, keytrace) ->
612 compile_pattern ~json ~keytrace >>| fun pattern ->
613 NotPattern {
614 pattern;
618 compile_pattern ~json ~keytrace:[]
620 let result_to_json ~(sort_results: bool) (result: result option): Hh_json.json =
621 let open Hh_json in
622 match result with
623 | None -> JSON_Null
624 | Some result ->
625 let matched_nodes = result.matched_nodes in
626 let matched_nodes =
627 if sort_results
628 then List.sort matched_nodes ~compare:Pervasives.compare
629 else matched_nodes
631 let matched_nodes =
632 List.map matched_nodes ~f:(fun matched_node ->
633 let match_name =
634 match matched_node.match_name
635 with MatchName match_name -> match_name
637 let kind =
638 match matched_node.kind
639 with NodeKind kind -> kind
641 JSON_Object [
642 "match_name", JSON_String match_name;
643 "kind", JSON_String kind;
644 "start_offset", Hh_json.int_ matched_node.start_offset;
645 "end_offset", Hh_json.int_ matched_node.end_offset;
648 JSON_Object [
649 "matched_nodes", JSON_Array matched_nodes;
652 let search
653 (tcopt: TypecheckerOptions.t)
654 (path: Relative_path.t)
655 (fileinfo: FileInfo.t)
656 (pattern: pattern)
657 : result option =
658 let source_text = Full_fidelity_source_text.from_file path in
659 let syntax_tree = SyntaxTree.make source_text in
661 let env = {
662 tcopt;
663 fileinfo;
664 path;
665 syntax_tree;
666 collected_types = None;
667 } in
668 let (_env, result) =
669 search_node ~env ~pattern ~node:(SyntaxTree.root env.syntax_tree) in
670 result
672 let go
673 (genv: ServerEnv.genv)
674 (env: ServerEnv.env)
675 ~(sort_results: bool)
676 ~(files_to_search: string list option)
677 (input: Hh_json.json)
678 : (Hh_json.json, string) Result.t
680 let open Result.Monad_infix in
681 compile_pattern env.ServerEnv.tcopt input >>| fun pattern ->
683 let num_files_searched = ref 0 in
684 let last_printed_num_files_searched = ref 0 in
685 let done_searching = ref false in
686 let progress_fn ~total:_total ~start:_start ~(length: int): unit =
687 let is_bucket_empty = (length = 0) in
688 if not !done_searching then begin
689 num_files_searched := !num_files_searched + length;
690 if (
691 !num_files_searched - !last_printed_num_files_searched >= 10000
692 || is_bucket_empty
693 ) then begin
694 Hh_logger.log "CST search: searched %d files..." !num_files_searched;
695 last_printed_num_files_searched := !num_files_searched;
697 end;
698 if is_bucket_empty
699 then done_searching := true;
702 let next_files: (Relative_path.t * FileInfo.t * pattern) list Hh_bucket.next =
703 let with_file_data path =
704 let path = Relative_path.create_detect_prefix path in
705 match Relative_path.Map.get env.ServerEnv.files_info path with
706 | Some fileinfo -> Some (path, fileinfo, pattern)
707 | None ->
708 (* We may not have the file information for a file such as one that we
709 ignore in `.hhconfig`. *)
710 None
712 match files_to_search with
713 | Some files_to_search ->
714 let files_to_search =
715 Sys_utils.parse_path_list files_to_search
716 |> List.filter_map ~f:with_file_data
718 MultiWorker.next
719 genv.ServerEnv.workers
720 files_to_search
721 ~progress_fn
722 | None ->
723 let indexer = genv.ServerEnv.indexer FindUtils.is_php in
724 begin fun () ->
725 let files = indexer () |> List.filter_map ~f:with_file_data in
726 progress_fn ~total:0 ~start:0 ~length:(List.length files);
727 Hh_bucket.of_list files
731 (* Extract the `tcopt` so that we don't close over the entire `env`. *)
732 let tcopt = env.ServerEnv.tcopt in
733 let job
734 (acc: (Relative_path.t * result) list)
735 (inputs: (Relative_path.t * FileInfo.t * pattern) list)
736 : (Relative_path.t * result) list =
737 List.fold inputs
738 ~init:acc
739 ~f:(fun acc (path, fileinfo, pattern) ->
741 match search tcopt path fileinfo pattern with
742 | Some result -> (path, result) :: acc
743 | None -> acc
744 with e ->
745 let stack = Printexc.get_backtrace () in
746 let prefix = Printf.sprintf
747 "Error while running CST search on path %s:\n"
748 (Relative_path.to_absolute path)
750 Hh_logger.exc e ~prefix ~stack;
751 raise e
755 let results = MultiWorker.call
756 genv.ServerEnv.workers
757 ~job
758 ~neutral:[]
759 ~merge:List.rev_append
760 ~next:next_files
763 let results =
764 if sort_results
765 then List.sort results ~compare:Pervasives.compare
766 else results
769 Hh_json.JSON_Object (List.map results ~f:(fun (path, result) ->
770 (Relative_path.to_absolute path, result_to_json ~sort_results (Some result))