Don't rely on kill return value for fallback path
[hiphop-php.git] / hphp / hack / src / monitor / serverMonitorUtils.ml
blob44d287bb5acd6366eef1265cfffdff6eeaca49fb
1 (*
2 * Copyright (c) 2015, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the MIT license found in the
6 * LICENSE file in the "hack" directory of this source tree.
8 *)
10 type monitor_config = {
11 socket_file: string;
12 (** The socket file on which the monitor is listening for connections. *)
13 lock_file: string; (** This lock is held when a monitor is alive. *)
14 server_log_file: string; (** The path to the server log file *)
15 monitor_log_file: string; (** The path to the monitor log file *)
18 (* In an Informant-directed restart, Watchman provided a new
19 * mergebase, a new clock, and a list of files changed w.r.t.
20 * that mergebase.
22 * A new server instance can "resume" from that new mergebase
23 * given that it handles the list of files changed w.r.t. that
24 * new mergebase, and just starts a watchman subscription
25 * beginning with that clock.
27 type watchman_mergebase = {
28 (* Watchman says current repo mergebase is this. *)
29 mergebase_global_rev: int;
30 (* ... plus these files changed to represent its current state *)
31 files_changed: SSet.t; [@printer SSet.pp_large]
32 (* ...as of this clock *)
33 watchman_clock: string;
35 [@@deriving show]
37 let watchman_mergebase_to_string
38 { mergebase_global_rev; files_changed; watchman_clock } =
39 Printf.sprintf
40 "watchman_mergebase (mergebase_global_rev: %d; files_changed count: %d; watchman_clock: %s)"
41 mergebase_global_rev
42 (SSet.cardinal files_changed)
43 watchman_clock
45 module type Server_config = sig
46 type server_start_options
48 (** Start the server. Optionally takes in the exit code of the previously
49 running server that exited. *)
50 val start_server :
51 informant_managed:bool ->
52 prior_exit_status:int option ->
53 server_start_options ->
54 ServerProcess.process_data
56 val kill_server : violently:bool -> ServerProcess.process_data -> unit
58 val wait_for_server_exit :
59 timeout_t:float option ->
60 ServerProcess.process_data ->
61 float (* Kill signal time *) ->
62 bool
64 val wait_pid : ServerProcess.process_data -> int * Unix.process_status
66 val is_saved_state_precomputed : server_start_options -> bool
67 end
69 type build_mismatch_info = {
70 existing_version: string;
71 existing_build_commit_time: string;
72 existing_argv: string list;
73 existing_launch_time: float;
75 [@@deriving show]
77 let current_build_info =
79 existing_version = Build_id.build_revision;
80 existing_build_commit_time = Build_id.build_commit_time_string;
81 existing_argv = Array.to_list Sys.argv;
82 existing_launch_time = Unix.gettimeofday ();
85 type connect_failure_reason =
86 | Connect_timeout
87 | Connect_exception of Exception.t
89 type connect_failure_phase =
90 | Connect_open_socket
91 | Connect_send_version
92 | Connect_send_newline
93 | Connect_receive_connection_ok
94 | Connect_send_shutdown
95 [@@deriving show]
97 type connect_to_monitor_failure = {
98 server_exists: bool;
99 (** This reflects the state of the lock file shortly after the failure happened. *)
100 failure_phase: connect_failure_phase;
101 failure_reason: connect_failure_reason;
104 type connection_error =
105 | Connect_to_monitor_failure of connect_to_monitor_failure
106 | Server_died
107 (* Server dormant and can't join the (now full) queue of connections
108 * waiting for the next server. *)
109 | Server_dormant
110 | Server_dormant_out_of_retries
111 (* Build ID mismatch indicates that hh_client binary is a different
112 * version from hh_server binary, and hence hh_server will shutdown.
114 * It may happen due to several reasons:
115 * - It is the expected mechanism by which hh_server shuts down upon a
116 * version bump (i.e. it doesn't shutdown until a newer version of the client
117 * pings it).
118 * - It can arise also if you've rebuilt Hack yourself and this versionless
119 * hh_client connects to an already-running hh_server.
120 * - More rarely, it may happen if chef/fbpkg didn't update binaries on disk
121 * correctly.
123 | Build_id_mismatched of build_mismatch_info option
125 let connection_error_to_telemetry (e : connection_error) : Telemetry.t =
126 let telemetry = Telemetry.create () in
127 match e with
128 | Server_died ->
129 telemetry |> Telemetry.string_ ~key:"kind" ~value:"Server_died"
130 | Server_dormant ->
131 telemetry |> Telemetry.string_ ~key:"kind" ~value:"Server_dormant"
132 | Server_dormant_out_of_retries ->
133 telemetry
134 |> Telemetry.string_ ~key:"kind" ~value:"Server_dormant_out_of_retries"
135 | Build_id_mismatched _ ->
136 telemetry |> Telemetry.string_ ~key:"kind" ~value:"Build_id_mismatched"
137 | Connect_to_monitor_failure { server_exists; failure_phase; failure_reason }
139 let (reason, exn, stack) =
140 match failure_reason with
141 | Connect_timeout -> ("timeout", None, None)
142 | Connect_exception e ->
143 ( "exception",
144 Some (Exception.get_ctor_string e),
145 Some (Exception.get_backtrace_string e |> Exception.clean_stack) )
147 telemetry
148 |> Telemetry.string_ ~key:"kind" ~value:"Connection_to_monitor_Failure"
149 |> Telemetry.bool_ ~key:"server_exists" ~value:server_exists
150 |> Telemetry.string_
151 ~key:"phase"
152 ~value:(show_connect_failure_phase failure_phase)
153 |> Telemetry.string_ ~key:"reason" ~value:reason
154 |> Telemetry.string_opt ~key:"exn" ~value:exn
155 |> Telemetry.string_opt ~key:"exn_stack" ~value:stack
157 type connection_state =
158 | Connection_ok
159 | Build_id_mismatch
160 (** Build_is_mismatch is never used, but it can't be removed, because
161 the sequence of constructors here is part of the binary protocol
162 we want to support between mismatched versions of client_server. *)
163 | Build_id_mismatch_ex of build_mismatch_info
164 (** Build_id_mismatch_ex *is* used. Ex stands for 'extended' *)
165 | Build_id_mismatch_v3 of build_mismatch_info * string
166 (** Build_id_mismatch_v3 isn't used yet, but might be *)
167 | Connection_ok_v2 of string
168 (** Connection_ok_v2 isn't used yet, but might be *)
170 (* Result of a shutdown monitor RPC. *)
171 type shutdown_result =
172 (* Request sent and channel hung up, indicating the process has exited. *)
173 | SHUTDOWN_VERIFIED
174 (* Request sent, but channel hasn't hung up. *)
175 | SHUTDOWN_UNVERIFIED
177 (* Message we send to the --waiting-client *)
178 let ready = "ready"
180 let exit_if_parent_dead () =
181 (* Cross-platform compatible way; parent PID becomes 1 when parent dies. *)
182 if Unix.getppid () = 1 then (
183 Hh_logger.log "Server's parent has died; exiting.\n";
184 Exit.exit Exit_status.Lost_parent_monitor