2 * Copyright (c) 2015, Facebook, Inc.
5 * This source code is licensed under the MIT license found in the
6 * LICENSE file in the "hack" directory of this source tree.
15 rageid
: string option;
19 let format_failure (message
: string) (failure
: Lwt_utils.Process_failure.t
) :
21 let open Lwt_utils.Process_failure
in
23 "%s: %s\n%s\nSTDOUT:\n%s\nSTDERR:\n%s\n"
25 (Process.status_to_string failure
.process_status
)
30 let get_stack (pid
, reason
) : string Lwt.t
=
31 let pid = string_of_int
pid in
32 let format msg
= Printf.sprintf
"PSTACK %s (%s)\n%s\n" pid reason msg
in
34 Lwt_utils.exec_checked
Exec_command.Pstack
[| pid |] ~timeout
:60.0
37 let stack = result
.Lwt_utils.Process_success.stdout
in
38 Lwt.return
(format stack)
40 (* pstack is just an alias for gstack, but it's not present on all systems. *)
42 Lwt_utils.exec_checked
Exec_command.Gstack
[| pid |] ~timeout
:60.0
45 let stack = result
.Lwt_utils.Process_success.stdout
in
46 Lwt.return
(format stack)
48 let err = "unable to get stack: " ^ e
.Lwt_utils.Process_failure.stderr
in
49 Lwt.return
(format err))
53 Lwt_utils.exec_checked ~timeout
:20.0 Exec_command.Pgrep
[| "-a"; pattern
|]
56 | Ok
{ Lwt_utils.Process_success.stdout
; _
} ->
57 let re = Str.regexp
{|^\
([0-9]+\
) \
(.*\
)$
|} in
59 String.split_lines stdout
60 |> List.filter_map ~f
:(fun s
->
62 if Str.string_match
re s
0 then
63 let pid = Str.matched_group
1 s
|> int_of_string
in
64 let reason = Str.matched_group
2 s
in
71 | Error _
-> Lwt.return
[]
73 let rage_pstacks (env
: env
) : string Lwt.t
=
74 (* We'll look at all relevant pids: all those from the hh_server
75 binary, and the hh_client binary, and all those in the server pids_file.
76 We put them into a map from pid to "reason" so that each relevant
77 pid is only picked once. The "reason" is a useful string description:
78 pgrep shows the cmdline that spawned a given pid so we use that as
79 a reason; the server pids_file also stores reasons. *)
80 let%lwt hh_server_pids
= pgrep "hh_server" in
81 let%lwt hh_client_pids
= pgrep "hh_client" in
82 let server_pids = PidLog.get_pids
(ServerFiles.pids_file env
.root
) in
83 let pids = IMap.empty
in
85 List.fold hh_server_pids ~init
:pids ~f
:(fun acc
(pid, reason) ->
86 IMap.add
pid reason acc
)
89 List.fold hh_client_pids ~init
:pids ~f
:(fun acc
(pid, reason) ->
90 IMap.add
pid reason acc
)
93 List.fold
server_pids ~init
:pids ~f
:(fun acc
(pid, reason) ->
94 IMap.add
pid reason acc
)
96 let pids = IMap.bindings
pids in
97 (* Pstacks take a while to collect. And some are uninteresting.
98 We'll filter out all scuba, and all but one slave. Keep just
99 one slave in case the workers are stuck for some reason. *)
101 List.fold
pids ~init
:([], false) ~f
:(fun (acc
, has_slave
) (pid, reason) ->
102 if String_utils.is_substring
"scuba for process" reason then
104 else if String_utils.string_starts_with
reason "slave" then
108 ((pid, reason) :: acc
, true)
110 ((pid, reason) :: acc
, has_slave
))
112 (* I don't know why pstacks are slow; I don't know what their
113 bottleneck is. But I observed that doing them in parallel didn't hurt. *)
114 let%lwt stacks
= Lwt_list.map_p
get_stack pids in
115 let stacks = String.concat
stacks ~sep
:"\n\n" in
118 let rage_ps () : string Lwt.t
=
120 -A means "all processes"
121 -F means "extra full output" i.e. lots of fields of output. *)
123 Lwt_utils.exec_checked ~timeout
:20.0 Exec_command.Ps
[| "-AF" |]
126 | Ok
{ Lwt_utils.Process_success.stdout
; _
} -> Lwt.return stdout
127 | Error failure
-> Lwt.return
(format_failure "" failure
)
130 (env
: env
) (hhconfig_version_raw
: Config_file.version
option) :
135 ~f
:(Config_file.version_to_string_opt ~pad
:false)
137 let hhconfig_update =
142 "hg pull -B releases/hack/v%s\nhg update -C remote/releases/hack/v%s"
147 Sys.getenv_opt
"HH_HOME" |> Option.value ~default
:"[unset]"
150 Sys_utils.expanduser
"~/.hack_rc_mode"
151 |> Sys_utils.cat_or_failed
152 |> Option.value ~default
:"[absent]"
154 let%lwt hh_server_version_result
=
155 Lwt_utils.exec_checked
157 (Exec_command.Hh_server
"hh_server")
158 [| "--version"; Path.to_string env
.root
|]
160 let hh_server_version =
161 match hh_server_version_result
with
162 | Ok
{ Lwt_utils.Process_success.stdout
; _
} -> stdout
163 | Error failure
-> format_failure "" failure
167 ( "build_commit_time: %d (%s)\n"
168 ^^
"build_mode: %s\n"
169 ^^
"build_revision: %s\n"
170 ^^
"hhconfig_version: %s\n"
172 ^^
"~/.hack_rc_mode: %s\n"
173 ^^
"executable_name: %s\n"
174 ^^
"\nhh_server --version: %s\n"
176 Build_id.build_commit_time
177 Build_id.build_commit_time_string
179 ( if String.equal
Build_id.build_revision
"" then
182 Build_id.build_revision
)
183 (Option.value version ~default
:"[absent]")
190 Lwt.return
hh_version
192 let rage_hh_server_state (env
: env
) :
193 ((string * string) list
, string) result
Lwt.t
=
195 let json_item_to_pair json_item
=
198 [("name", JSON_String name
); ("contents", JSON_String contents
)]
200 [("contents", JSON_String contents
); ("name", JSON_String name
)] ->
201 ("hh_server_" ^ name
, contents
)
202 | _
-> raise
(Syntax_error
"unexpected item; expected {name:_, contents:_}")
204 let%lwt hh_server_state_result
=
205 Lwt_utils.exec_checked
211 "--autostart-server";
216 Path.to_string env
.root
;
219 match hh_server_state_result
with
221 Lwt.return_error
(format_failure "failed to obtain" failure
)
222 | Ok
{ Lwt_utils.Process_success.stdout
; _
} ->
225 match json_of_string stdout
with
226 | JSON_Array json_items
->
227 Lwt.return_ok
(List.map json_items ~f
:json_item_to_pair)
228 | _
-> raise
(Syntax_error
"unexpected json; expected array")
229 with Syntax_error msg
->
231 (Printf.sprintf
"unable to parse json: %s\n\n%s\n" msg stdout
)
234 let rage_www (env
: env
) : ((string * string) option * string) Lwt.t
=
236 Process.env_to_array
(Process_types.Augment
["HGPLAIN=1"])
239 Lwt_utils.exec_checked
246 "last(public() & :: .)";
250 Path.to_string env
.root
;
253 match www_result
with
255 Lwt.return
(None
, format_failure "Unable to determine mergebase" failure
)
256 | Ok
{ Lwt_utils.Process_success.stdout
; _
} ->
257 let mergebase = stdout
in
258 let%lwt www_diff_result
=
259 Lwt_utils.exec_checked
263 [| "diff"; "-r"; mergebase; "--cwd"; Path.to_string env
.root
|]
265 let%lwt
(patch_item
, patch_instructions
) =
266 match www_diff_result
with
268 Lwt.return
(None
, format_failure "Unable to determine diff" failure
)
269 | Ok
{ Lwt_utils.Process_success.stdout
= hgdiff
; _
} ->
270 if String.is_empty hgdiff
then
271 Lwt.return
(None
, "")
273 let%lwt clowder_result
=
274 Clowder_paste.clowder_upload_and_get_shellscript
278 (match clowder_result
with
281 ( Some
("www_hgdiff.txt", hgdiff
),
283 "hg patch --no-commit www_hgdiff.txt\n\nnote: clowder failed to put:\n%s"
285 | Ok clowder_script
->
287 ( Some
("www_hgdiff.txt", hgdiff
),
288 clowder_script ^
" | hg patch --no-commit -" ))
290 let%lwt hg_st_result
=
291 Lwt_utils.exec_checked
295 [| "status"; "--cwd"; Path.to_string env
.root
|]
298 match hg_st_result
with
299 | Error failure
-> format_failure "Unable to `hg status`" failure
300 | Ok
{ Lwt_utils.Process_success.stdout
; _
} -> "hg status:\n" ^ stdout
305 "hg update -C %s\n\n%s\n\n\n%s"
310 let rage_www_errors (env
: env
) : string Lwt.t
=
311 let%lwt www_errors_result
=
312 Lwt_utils.exec_checked
316 "--from"; "rage"; "--autostart-server"; "false"; Path.to_string env
.root
;
319 let (www_errors_cmd
, www_errors_stdout
, www_errors_stderr
, www_errors_exit
) =
320 match www_errors_result
with
321 | Ok
{ Lwt_utils.Process_success.command_line
; stdout
; stderr
; _
} ->
322 (command_line
, stdout
, stderr
, "exit 0 ok")
325 Lwt_utils.Process_failure.command_line
;
331 (command_line
, stdout
, stderr
, Process.status_to_string process_status
)
335 "%s\n%s\n\nSTDOUT:\n%s\n\nSTDERR:\n%s\n"
341 Lwt.return
www_errors
343 let rage_saved_state (env
: env
) : (string * string) list
Lwt.t
=
345 { Saved_state_loader.Watchman_options.root
= env
.root
; sockname
= None
}
347 let saved_state_check saved_state_type
=
349 let%lwt result_or_timeout
=
353 State_loader_lwt.load_internal
355 ~ignore_hh_version
:false
358 Lwt.return_ok result
);
359 (let%lwt
() = Lwt_unix.sleep
90.0 in
360 Lwt.return_error
());
363 match result_or_timeout
with
364 | Error
() -> Lwt.return_error
"timeout"
365 | Ok
(Ok
(result
, changed_files
, telemetry
)) ->
370 ( List.map changed_files ~f
:Relative_path.suffix
371 |> String.concat ~sep
:"\n" )
372 (Telemetry.to_json telemetry
|> Hh_json.json_to_multiline
) )
373 | Ok
(Error
(load_error
, telemetry
)) ->
377 (Saved_state_loader.medium_user_message_of_error load_error
)
378 (Saved_state_loader.debug_details_of_error load_error
)
379 (Telemetry.to_json telemetry
|> Hh_json.json_to_multiline
))
380 with e
-> Lwt.return_error
(Exception.wrap e
|> Exception.to_string
)
382 let path_to_string path
=
383 let path = Path.to_string
path in
384 let stat = Sys_utils.lstat
path in
385 Printf.sprintf
"%s [%d]" path stat.Unix.st_size
388 let%lwt naming_saved_state
=
389 saved_state_check Saved_state_loader.Naming_table
391 let naming_saved_state =
392 match naming_saved_state with
395 let open Saved_state_loader.Naming_table_saved_state_info
in
397 "naming_table: %s\n\n%s"
398 (path_to_string result
.naming_table_path
)
402 let%lwt regular_saved_state
= saved_state_check Saved_state_loader.Regular
in
403 let regular_saved_state =
404 match regular_saved_state with
407 let open Saved_state_loader.Regular_saved_state_info
in
409 "naming_table: %s\ndeptable: %s\nhot_decls: %s\n\n%s"
410 (path_to_string result
.naming_table_path
)
411 (path_to_string result
.deptable_path
)
412 (path_to_string result
.hot_decls_path
)
417 ("saved_state_naming", naming_saved_state);
418 ("saved_state_regular", regular_saved_state);
421 let rage_tmp_dir () : string Lwt.t
=
422 (* `ls -ld /tmp/hh_server` will show the existence, ownership and permissions of
423 our tmp directory - in case hh_server hasn't been able to work right because it
425 let%lwt dir1_result
=
426 Lwt_utils.exec_checked
429 [| "-ld"; GlobalConfig.tmp_dir
|]
432 match dir1_result
with
433 | Ok
{ Lwt_utils.Process_success.command_line
; stdout
; _
} ->
434 Printf.sprintf
"%s\n\n%s\n\n" command_line stdout
435 | Error failure
-> format_failure "listing tmp directory" failure
437 (* `ls -lR /tmp/hh_server` will do a recursive list of every file and directory within
438 our tmp directory - in case wrong files are there, or in case we lack permissions. *)
439 let%lwt dir2_result
=
440 Lwt_utils.exec_checked
443 [| "-lR"; GlobalConfig.tmp_dir
|]
446 match dir2_result
with
447 | Ok
{ Lwt_utils.Process_success.command_line
; stdout
; _
} ->
448 Printf.sprintf
"%s\n\n%s\n\n" command_line stdout
450 format_failure "listing contents of tmp directory" failure
452 Lwt.return
(dir1 ^
"\n\n" ^
dir2)
454 let rage_experiments_and_config
455 (hhconfig_version_raw
: Config_file.version option) : string list
* string =
456 match hhconfig_version_raw
with
459 let config_overrides = SMap.empty
in
461 ServerLocalConfig.load
463 ~current_version
:version
466 ( local_config.ServerLocalConfig.experiments
,
467 local_config.ServerLocalConfig.experiments_config_meta
)
469 let main (env
: env
) : Exit_status.t
Lwt.t
=
470 let start_time = Unix.gettimeofday
() in
471 Hh_logger.Level.set_min_level
Hh_logger.Level.Error
;
473 (* If user invoked us with `--rageid`, that's their way of saying that they
474 want rageid to be recorded even if they terminate.
475 Unix behavior when a process terminates, is that all its children get
476 reparented onto ID1; also, if the process was a "session leader" then
477 its children and descendents get sent SIGHUP, and their default response
478 is to terminate. So we'll ignore SIGHUP in this case; also, since our
479 stdout+stderr may have been closed, we'll do without them. *)
480 let nohup = Option.is_some env
.rageid
in
481 if nohup then Sys.set_signal
Sys.sighup
Sys.Signal_ignore
;
482 let printf s
= (try Printf.printf "%s\n%!" s
with _
when nohup -> ()) in
483 let eprintf s
= (try Printf.eprintf "%s\n%!" s
with _
when nohup -> ()) in
485 (* helpers for constructing our list of items *)
486 let items : (string * string) list
ref = ref [] in
487 let add item
= items := item
:: !items in
488 (* If the file exists, we'll add it. If the file doesn't exist, we won't.
489 If the file exists but there was a error reading it, we'll report that error. *)
491 let contents = (try Sys_utils.cat fn
with e
-> Exn.to_string e
) in
492 if Sys.file_exists fn
then add (name
, contents)
495 (* stacks of processes *)
496 eprintf "Fetching pstacks (this takes a minute...)";
497 let%lwt pstacks
= rage_pstacks env
in
498 add ("pstacks", pstacks
);
499 let%lwt ps
= rage_ps () in
502 (* hhconfig, hh.conf *)
503 let hhconfig_file = Filename.concat
(Path.to_string env
.root
) ".hhconfig" in
504 add_fn "hhconfig.txt" hhconfig_file;
505 add_fn "hh_conf.txt" ServerLocalConfig.path;
508 let%lwt hash_and_config
= Config_file_lwt.parse_hhconfig
hhconfig_file in
509 let hhconfig_version_raw =
510 match hash_and_config
with
512 | Ok
(_hash
, config
) ->
514 SMap.find_opt
"version" config
|> Config_file.parse_version
518 let hhconfig_version =
519 Option.bind
hhconfig_version_raw ~f
:Config_file.version_to_string_opt
521 let%lwt
hh_version = rage_hh_version env
hhconfig_version_raw in
522 add ("hh_version", hh_version);
524 (* hh_server internal state *)
525 eprintf "Getting current hh state";
526 let%lwt hh_server_state
= rage_hh_server_state env
in
528 match hh_server_state
with
529 | Ok
items -> List.iter
items ~f
:add
530 | Error s
-> add ("hh_server_state", s
)
534 eprintf "Getting current www state";
535 let%lwt
(www_item
, www_instructions
) = rage_www env
in
536 Option.iter www_item ~f
:add;
537 add ("www", www_instructions
);
540 eprintf "Executing hh";
541 let%lwt
www_errors = rage_www_errors env
in
542 add ("www errors", www_errors);
545 eprintf "Checking saved-states";
546 let%lwt saved_state_items
= rage_saved_state env
in
547 List.iter saved_state_items ~f
:add;
550 let (experiments
, experiments_config_meta
) =
551 rage_experiments_and_config hhconfig_version_raw
553 let experiments_content =
555 "EXPERIMENTS\n%s\n\nEXPERIMENTS_CONFIG_META\n%s"
556 (String.concat experiments ~sep
:"\n")
557 experiments_config_meta
559 add ("experiments", experiments_content);
562 add_fn "log_server.txt" (ServerFiles.log_link env
.root
);
563 add_fn "logold_server.txt" (ServerFiles.log_link env
.root ^
".old");
564 add_fn "log_monitor.txt" (ServerFiles.monitor_log_link env
.root
);
565 add_fn "logold_monitor.txt" (ServerFiles.monitor_log_link env
.root ^
".old");
566 add_fn "log_client_lsp.txt" (ServerFiles.client_lsp_log env
.root
);
567 add_fn "logold_client_lsp.txt" (ServerFiles.client_lsp_log env
.root ^
".old");
568 add_fn "log_client_ide.txt" (ServerFiles.client_ide_log env
.root
);
569 add_fn "logold_client_ide.txt" (ServerFiles.client_ide_log env
.root ^
".old");
571 (* temp directories *)
572 eprintf "Looking at hh_server tmp directory";
573 let%lwt tmp_dir
= rage_tmp_dir () in
574 add ("hh_server tmp", tmp_dir
);
576 (* We've assembled everything! now log it. *)
578 Flytrap.create ~title
:("hh_rage: " ^ env
.desc
) ~
items:!items
580 HackEventLogger.Rage.rage
581 ~rageid
:(Option.value env
.rageid ~default
:(Random_id.short_string
()))
587 ~experiments_config_meta
595 Lwt.return
Exit_status.No_error
597 printf ("Flytrap: failed\n" ^ e
);
598 Lwt.return
Exit_status.Uncaught_exception