Move all enums/typedefs into common
[hiphop-php.git] / hphp / hack / src / facts / symbols / indexBuilder.ml
blob39b8794799752287eddd125367392f82d02cf883
1 (**
2 * Copyright (c) 2016, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
9 *)
11 open Core_kernel
12 open SearchUtils
13 open Facts
15 (* Keep track of all references yet to scan *)
16 let files_scanned = ref 0
17 let error_count = ref 0
19 type index_builder_context = {
20 repo_folder: string;
21 sqlite_filename: string option;
22 text_filename: string option;
23 json_filename: string option;
26 (* Combine two results *)
27 (* Parse one single file and capture information about it *)
28 let parse_file (filename: string): si_results =
29 if Sys.is_directory filename then begin
31 end else begin
32 let text = Core_kernel.In_channel.read_all filename in
33 let enable_hh_syntax =
34 Hhbc_options.enable_hiphop_syntax !Hhbc_options.compiler_options in
35 let enable_xhp =
36 Hhbc_options.enable_xhp !Hhbc_options.compiler_options in
37 let rp = Relative_path.from_root filename in
38 (* Just the facts ma'am *)
39 let fact_opt = Facts_parser.from_text
40 true true enable_hh_syntax enable_xhp rp text in
42 (* Iterate through facts and print them out *)
43 let result =
44 match fact_opt with
45 | Some facts ->
47 (* Identify all classes in the file *)
48 let class_keys = InvSMap.keys facts.types in
49 let classes_mapped = Core_kernel.List.map class_keys ~f:(fun key -> begin
50 let info_opt = InvSMap.get key facts.types in
51 let kind = begin
52 match info_opt with
53 | None -> SI_Unknown
54 | Some info -> begin
55 match info.kind with
56 | TKClass -> SI_Class
57 | TKInterface -> SI_Interface
58 | TKEnum -> SI_Enum
59 | TKTrait -> SI_Trait
60 | TKMixed -> SI_Mixed
61 | _ -> SI_Unknown
62 end
63 end in
65 si_name = key;
66 si_kind = kind;
68 end) in
70 (* Identify all functions in the file *)
71 let functions_mapped = Core_kernel.List.map facts.functions ~f:(fun funcname -> {
72 si_name = funcname;
73 si_kind = SI_Function;
74 }) in
76 (* Return unified results *)
77 List.append classes_mapped functions_mapped
78 | None ->
81 files_scanned := !files_scanned + 1;
82 result;
83 end
86 let parse_batch acc files =
87 List.fold files ~init:acc ~f:begin fun acc file ->
88 if Path.file_exists (Path.make file) then
89 try
90 let res = parse_file file in
91 List.append res acc;
92 with exn ->
93 error_count := !error_count + 1;
94 Printf.fprintf stderr "exception: %s\nfailed to parse \"%s\"\n"
95 (Caml.Printexc.to_string exn)
96 file;
97 acc
98 else (Printf.fprintf stderr "File %s does not exist.\n" file; acc)
99 end
102 let parallel_parse ~workers files =
103 MultiWorker.call workers
104 ~job:parse_batch
105 ~neutral:[]
106 ~merge:(List.append)
107 ~next:(MultiWorker.next workers files)
110 let entry = WorkerController.register_entry_point ~restore:(fun () -> ())
112 let init_workers () =
113 let nbr_procs = Sys_utils.nproc () in
114 let gc_control = GlobalConfig.gc_control in
115 let config = GlobalConfig.default_sharedmem_config in
116 let heap_handle = SharedMem.init config ~num_workers:nbr_procs in
117 MultiWorker.make
118 ?call_wrapper:None
119 ~saved_state:()
120 ~entry
121 ~nbr_procs
122 ~gc_control
123 ~heap_handle
126 (* Basic help text *)
127 let usage =
128 Printf.sprintf
129 "Usage: %s [--sqlite file] [--text file] [--json file] [repository]\n"
130 Sys.argv.(0)
133 let parse_options (): index_builder_context =
134 let sqlite_filename = ref None in
135 let text_filename = ref None in
136 let json_filename = ref None in
137 let repository = ref "." in
138 let options = ref [
139 "--sqlite",
140 Arg.String (fun x -> sqlite_filename := (Some x)),
141 "[filename] Save the global index in a Sqlite database";
143 "--text",
144 Arg.String (fun x -> text_filename := (Some x)),
145 "[filename] Save the global index in a finite-state transducer (FST) file";
147 "--json",
148 Arg.String (fun x -> json_filename := (Some x)),
149 "[filename] Save the global index in a JSON file";
151 ] in
152 Arg.parse_dynamic options (fun anonymous_arg -> repository := anonymous_arg) usage;
154 (* Print what we're about to do *)
155 Printf.printf "Building global symbol index for [%s]\n%!"
156 !repository;
158 (* Parameters for this execution *)
160 repo_folder = !repository;
161 sqlite_filename = !sqlite_filename;
162 text_filename = !text_filename;
163 json_filename = !json_filename;
167 (* Let's use the unix find command which seems to be really quick at this sort of thing *)
168 let gather_file_list (path: string): string list =
169 let cmdline = Printf.sprintf "find %s -name \"*.php\"" path in
170 let channel = Unix.open_process_in cmdline in
171 let result = ref [] in
172 (try
173 while true do
174 let line_opt = Core_kernel.In_channel.input_line channel in
175 match line_opt with
176 | Some line -> result := line :: !result
177 | None -> raise End_of_file
178 done;
179 with End_of_file -> ());
180 assert (Unix.close_process_in channel = Unix.WEXITED 0);
181 !result
184 (* Run something and measure its duration *)
185 let measure_time ~f ~(name: string) =
186 let start_time = Unix.gettimeofday () in
187 let result = f () in
188 let end_time = Unix.gettimeofday () in
189 Printf.printf "%s [%0.1f secs]\n%!" name (end_time -. start_time);
190 result
193 (* Run the application *)
194 let main (): unit =
195 Daemon.check_entry_point ();
196 PidLog.init "/tmp/hh_server/global_index_builder.pids";
197 PidLog.log ~reason:"main" (Unix.getpid ());
199 (* Gather list of files *)
200 let ctxt = parse_options () in
201 Printf.printf "Scanning repository %s... %!" ctxt.repo_folder;
202 let files = measure_time ~f:(fun () -> gather_file_list ctxt.repo_folder) ~name:"" in
204 (* Spawn the parallel parser *)
205 Printf.printf "Parsing %d files... %!" (List.length files);
206 let workers = Some (init_workers ()) in
207 let results = measure_time ~f:(fun () -> parallel_parse ~workers files) ~name:"" in
209 (* Are we exporting a sqlite file? *)
210 begin
211 match ctxt.sqlite_filename with
212 | None ->
214 | Some filename ->
215 Printf.printf "Writing %d symbols to sqlite... %!"
216 (List.length results);
217 measure_time ~f:(fun () ->
218 SqliteIndexWriter.record_in_db filename results;
219 ) ~name:"";
220 end;
222 (* Are we exporting a text file? *)
223 begin
224 match ctxt.text_filename with
225 | None ->
227 | Some filename ->
228 Printf.printf "Writing %d symbols to text... %!"
229 (List.length results);
230 measure_time ~f:(fun () ->
231 TextIndexWriter.record_in_textfile filename results;
232 ) ~name:"";
233 end;
235 (* Are we exporting a json file? *)
236 begin
237 match ctxt.json_filename with
238 | None ->
240 | Some filename ->
241 Printf.printf "Writing %d symbols to json... %!"
242 (List.length results);
243 measure_time ~f:(fun () ->
244 JsonIndexWriter.record_in_jsonfile filename results;
245 ) ~name:"";
246 end;
250 let () =
251 let _ = measure_time ~f:(fun () -> main ()) ~name:"\n\nGlobal Index Built successfully:" in
252 Printf.printf "Done%s" "";