Gather a bit more information from sysinfo()
[hiphop-php.git] / hphp / hack / src / facts / symbols / indexBuilder.ml
blobffc5fb06d8e4a64fbe37002743c43a731c672690
1 (*
2 * Copyright (c) 2016, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
9 *)
11 open Hh_prelude
12 open IndexBuilderTypes
13 open SearchUtils
14 open Facts
16 (* Keep track of all references yet to scan *)
17 let files_scanned = ref 0
19 let error_count = ref 0
21 (* Extract kind, abstract, and final flags *)
22 let get_details_from_info (info_opt : Facts.type_facts option) :
23 si_kind * bool * bool =
24 Facts_parser.(
25 match info_opt with
26 | None -> (SI_Unknown, false, false)
27 | Some info ->
28 let k =
29 match info.kind with
30 | TKClass -> SI_Class
31 | TKInterface -> SI_Interface
32 | TKEnum -> SI_Enum
33 | TKTrait -> SI_Trait
34 | TKMixed -> SI_Mixed
35 | TKRecord -> SI_Unknown
36 | TKTypeAlias -> SI_Typedef
37 | _ -> SI_Unknown
39 let is_abstract = info.flags land flags_abstract > 0 in
40 let is_final = info.flags land flags_final > 0 in
41 (k, is_abstract, is_final))
43 let convert_facts ~(path : Relative_path.t) ~(facts : Facts.facts) : si_capture
45 let relative_path_str = Relative_path.suffix path in
46 (* Identify all classes in the file *)
47 let class_keys = InvSMap.keys facts.types in
48 let classes_mapped =
49 List.map class_keys ~f:(fun key ->
50 let info_opt = InvSMap.find_opt key facts.types in
51 let (kind, is_abstract, is_final) = get_details_from_info info_opt in
53 (* We need to strip away the preceding backslash for hack classes
54 * but leave intact the : for xhp classes. The preceding : symbol
55 * is needed to distinguish which type of a class you want. *)
56 sif_name = Utils.strip_ns key;
57 sif_kind = kind;
58 sif_filepath = relative_path_str;
59 sif_is_abstract = is_abstract;
60 sif_is_final = is_final;
63 (* Identify all functions in the file *)
64 let functions_mapped =
65 List.map facts.functions ~f:(fun funcname ->
67 sif_name = funcname;
68 sif_kind = SI_Function;
69 sif_filepath = relative_path_str;
70 sif_is_abstract = false;
71 sif_is_final = false;
74 (* Handle constants *)
75 let constants_mapped =
76 List.map facts.constants ~f:(fun constantname ->
78 sif_name = constantname;
79 sif_kind = SI_GlobalConstant;
80 sif_filepath = relative_path_str;
81 sif_is_abstract = false;
82 sif_is_final = false;
85 (* Return unified results *)
86 List.append classes_mapped functions_mapped |> List.append constants_mapped
88 (* Parse one single file and capture information about it *)
89 let parse_one_file
90 ~(namespace_map : (string * string) list) ~(path : Relative_path.t) :
91 si_capture =
92 let filename = Relative_path.to_absolute path in
93 let text = In_channel.read_all filename in
94 (* Just the facts ma'am *)
95 Facts_parser.mangle_xhp_mode := false;
96 let fact_opt =
97 Facts_parser.from_text
98 ~php5_compat_mode:false
99 ~hhvm_compat_mode:true
100 ~disable_legacy_soft_typehints:false
101 ~allow_new_attribute_syntax:false
102 ~disable_legacy_attribute_syntax:false
103 ~enable_xhp_class_modifier:false
104 ~disable_xhp_element_mangling:false
105 ~auto_namespace_map:namespace_map
106 ~filename:path
107 ~text
109 (* Iterate through facts and print them out *)
110 let result =
111 match fact_opt with
112 | Some facts -> convert_facts ~path ~facts
113 | None -> []
115 files_scanned := !files_scanned + 1;
116 result
118 (* Parse the file using the existing context*)
119 let parse_file (ctxt : index_builder_context) (path : Relative_path.t) :
120 si_capture =
121 parse_one_file ~path ~namespace_map:ctxt.namespace_map
123 (* Parse a batch of files *)
124 let parse_batch
125 (ctxt : index_builder_context)
126 (acc : si_capture)
127 (files : Relative_path.t list) : si_capture =
128 let repo_path = Path.make ctxt.repo_folder in
129 if ctxt.set_paths_for_worker then (
130 Relative_path.set_path_prefix Relative_path.Root repo_path;
131 Relative_path.set_path_prefix
132 Relative_path.Hhi
133 (Option.value_exn ctxt.hhi_root_folder)
135 List.fold files ~init:acc ~f:(fun acc file ->
137 let res = (parse_file ctxt) file in
138 List.append res acc
139 with
140 | exn ->
141 error_count := !error_count + 1;
142 Hh_logger.log
143 "IndexBuilder exception: %s. Failed to parse [%s]"
144 (Caml.Printexc.to_string exn)
145 (Relative_path.to_absolute file);
146 acc)
148 let parallel_parse
149 ~(workers : MultiWorker.worker list option)
150 (files : Relative_path.t list)
151 (ctxt : index_builder_context) : si_capture =
152 MultiWorker.call
153 workers
154 ~job:(parse_batch ctxt)
155 ~neutral:[]
156 ~merge:List.append
157 ~next:(MultiWorker.next workers files)
159 let entry =
160 WorkerControllerEntryPoint.register ~restore:(fun () ~(worker_id : int) ->
161 Hh_logger.set_id (Printf.sprintf "indexBuilder %d" worker_id))
163 (* Create one worker per cpu *)
164 let init_workers () =
165 let nbr_procs = Sys_utils.nbr_procs in
166 let gc_control = GlobalConfig.gc_control in
167 let config = SharedMem.default_config in
168 let heap_handle = SharedMem.init config ~num_workers:nbr_procs in
169 MultiWorker.make
170 ?call_wrapper:None
171 ~longlived_workers:false
172 ~saved_state:()
173 ~entry
174 nbr_procs
175 ~gc_control
176 ~heap_handle
178 let gather_file_list (path : string) : Relative_path.t list =
179 Find.find ~file_only:true ~filter:FindUtils.file_filter [Path.make path]
180 |> List.map ~f:(fun path -> Relative_path.create_detect_prefix path)
182 (* Run something and measure its duration *)
183 let measure_time ~(silent : bool) ~f ~(name : string) =
184 let start_time = Unix.gettimeofday () in
185 let result = f () in
186 let end_time = Unix.gettimeofday () in
187 if not silent then
188 Hh_logger.log "%s [%0.1f secs]" name (end_time -. start_time);
189 result
191 (* All data is ready. Identify unique namespaces and filepaths *)
192 let convert_capture (incoming : si_capture) : si_scan_result =
193 let result =
195 sisr_capture = incoming;
196 sisr_namespaces = Caml.Hashtbl.create 0;
197 sisr_filepaths = Caml.Hashtbl.create 0;
200 let ns_id = ref 1 in
201 List.iter incoming ~f:(fun s ->
202 (* Find / add namespace *)
203 let (namespace, _name) = Utils.split_ns_from_name s.sif_name in
204 if not (Caml.Hashtbl.mem result.sisr_namespaces namespace) then (
205 Caml.Hashtbl.add result.sisr_namespaces namespace !ns_id;
206 incr ns_id
209 (* Find / add filepath hashes *)
210 if not (Caml.Hashtbl.mem result.sisr_filepaths s.sif_filepath) then
211 let path_hash = SharedMemHash.hash_string s.sif_filepath in
212 Caml.Hashtbl.add result.sisr_filepaths s.sif_filepath path_hash);
213 result
215 let export_to_custom_writer
216 (json_exported_files : string list) (ctxt : index_builder_context) : unit =
217 match (ctxt.custom_service, ctxt.custom_repo_name) with
218 | (Some _, None)
219 | (None, Some _) ->
220 print_endline "API export requires both a service and a repo name."
221 | (None, None) -> ()
222 | (Some service, Some repo_name) ->
223 let name =
224 Printf.sprintf
225 "Exported to custom symbol index writer [%s] [%s] in "
226 service
227 repo_name
229 measure_time
230 ~silent:ctxt.silent
231 ~f:(fun () ->
232 CustomJsonUploader.send_to_custom_writer
233 ~workers:None
234 ~print_file_status:true
235 ~files:json_exported_files
236 ~service
237 ~repo_name
238 ~repo_folder:ctxt.repo_folder)
239 ~name
241 (* Run the index builder project *)
242 let go (ctxt : index_builder_context) (workers : MultiWorker.worker list option)
243 : unit =
244 if Option.is_some ctxt.json_repo_name then
245 (* if json repo is specified, just export to custom writer directly *)
246 let json_exported_files =
247 match ctxt.json_repo_name with
248 | None -> []
249 | Some repo_name ->
250 Sys_utils.collect_paths
251 begin
252 fun filename ->
253 Str.string_match (Str.regexp "[./a-zA-Z0-9_]+.json") filename 0
255 repo_name
257 export_to_custom_writer json_exported_files ctxt
258 else
259 (* Gather list of files *)
260 let name =
261 Printf.sprintf "Scanned repository folder [%s] in " ctxt.repo_folder
263 let hhconfig_path = Path.concat (Path.make ctxt.repo_folder) ".hhconfig" in
264 let files =
265 (* Sanity test. If the folder does not have an .hhconfig file, this is probably
266 * an integration test that's using a fake repository. Don't do anything! *)
267 if Disk.file_exists (Path.to_string hhconfig_path) then
268 let options = ServerArgs.default_options ~root:ctxt.repo_folder in
269 let (hhconfig, _) =
270 ServerConfig.load
271 ~silent:ctxt.silent
272 (Relative_path.create
273 Relative_path.Root
274 (Path.to_string hhconfig_path))
275 options
277 let popt = ServerConfig.parser_options hhconfig in
278 let ctxt =
279 { ctxt with namespace_map = ParserOptions.auto_namespace_map popt }
281 measure_time
282 ~silent:ctxt.silent
283 ~f:(fun () -> gather_file_list ctxt.repo_folder)
284 ~name
285 else (
286 if not ctxt.silent then
287 Hh_logger.log
288 "The repository [%s] lacks an .hhconfig file. Skipping index of repository."
289 ctxt.repo_folder;
293 (* If desired, get the HHI root folder and add all HHI files from there *)
294 let files =
295 if Option.is_some ctxt.hhi_root_folder then
296 let hhi_root_folder_path =
297 Path.to_string (Option.value_exn ctxt.hhi_root_folder)
299 let name =
300 Printf.sprintf "Scanned HHI folder [%s] in " hhi_root_folder_path
302 let hhi_files =
303 measure_time
304 ~silent:ctxt.silent
305 ~f:(fun () -> gather_file_list hhi_root_folder_path)
306 ~name
308 (* Merge lists *)
309 List.append files hhi_files
310 else
311 files
313 (* Spawn the parallel parser *)
314 let name = Printf.sprintf "Parsed %d files in " (List.length files) in
315 let capture =
316 measure_time
317 ~silent:ctxt.silent
318 ~f:(fun () -> parallel_parse ~workers files ctxt)
319 ~name
321 (* Convert the raw capture into results *)
322 let results = convert_capture capture in
323 (* Are we exporting a sqlite file? *)
324 begin
325 match ctxt.sqlite_filename with
326 | None -> ()
327 | Some filename ->
328 let name =
329 Printf.sprintf
330 "Wrote %d symbols to sqlite in "
331 (List.length results.sisr_capture)
333 measure_time
334 ~silent:ctxt.silent
335 ~f:(fun () -> SqliteSymbolIndexWriter.record_in_db filename results)
336 ~name
337 end;
339 (* Are we exporting a text file? *)
340 begin
341 match ctxt.text_filename with
342 | None -> ()
343 | Some filename ->
344 let name =
345 Printf.sprintf
346 "Wrote %d symbols to text in "
347 (List.length results.sisr_capture)
349 measure_time
350 ~silent:ctxt.silent
351 ~f:(fun () ->
352 TextSymbolIndexWriter.record_in_textfile filename results)
353 ~name
354 end;
356 (* Are we exporting a json file? *)
357 let json_exported_files =
358 match ctxt.json_filename with
359 | None -> []
360 | Some filename ->
361 let name =
362 Printf.sprintf
363 "Wrote %d symbols to json in "
364 (List.length results.sisr_capture)
366 measure_time
367 ~silent:ctxt.silent
368 ~f:(fun () ->
369 JsonSymbolIndexWriter.record_in_jsonfiles
370 ctxt.json_chunk_size
371 filename
372 results)
373 ~name
375 (* Are we exporting to a custom writer? *)
376 export_to_custom_writer json_exported_files ctxt