timeout: defensive handling of all wait() errors
[coreutils/ericb.git] / src / timeout.c
blob62f3d4b6e24abe53064b41183ff3eb5ac84e9569
1 /* timeout -- run a command with bounded time
2 Copyright (C) 2008-2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* timeout - Start a command, and kill it if the specified timeout expires
20 We try to behave like a shell starting a single (foreground) job,
21 and will kill the job if we receive the alarm signal we setup.
22 The exit status of the job is returned, or one of these errors:
23 EXIT_TIMEDOUT 124 job timed out
24 EXIT_CANCELED 125 internal error
25 EXIT_CANNOT_INVOKE 126 error executing job
26 EXIT_ENOENT 127 couldn't find job to exec
28 Caveats:
29 If user specifies the KILL (9) signal is to be sent on timeout,
30 the monitor is killed and so exits with 128+9 rather than 124.
32 If you start a command in the background, which reads from the tty
33 and so is immediately sent SIGTTIN to stop, then the timeout
34 process will ignore this so it can timeout the command as expected.
35 This can be seen with `timeout 10 dd&` for example.
36 However if one brings this group to the foreground with the `fg`
37 command before the timer expires, the command will remain
38 in the sTop state as the shell doesn't send a SIGCONT
39 because the timeout process (group leader) is already running.
40 To get the command running again one can Ctrl-Z, and do fg again.
41 Note one can Ctrl-C the whole job when in this state.
42 I think this could be fixed but I'm not sure the extra
43 complication is justified for this scenario.
45 Written by Pádraig Brady. */
47 #include <config.h>
48 #include <getopt.h>
49 #include <stdio.h>
50 #include <sys/types.h>
51 #include <signal.h>
53 #if HAVE_SYS_WAIT_H
54 # include <sys/wait.h>
55 #endif
56 #ifndef WIFSIGNALED
57 # define WIFSIGNALED(s) (((s) & 0xFFFF) - 1 < (unsigned int) 0xFF)
58 #endif
59 #ifndef WTERMSIG
60 # define WTERMSIG(s) ((s) & 0x7F)
61 #endif
63 #include "system.h"
64 #include "xstrtol.h"
65 #include "sig2str.h"
66 #include "operand2sig.h"
67 #include "cloexec.h"
68 #include "error.h"
69 #include "long-options.h"
70 #include "quote.h"
72 #define PROGRAM_NAME "timeout"
74 #define AUTHORS proper_name_utf8 ("Padraig Brady", "P\303\241draig Brady")
76 /* Note ETIMEDOUT is 110 on GNU/Linux systems but this is non standard */
77 #define EXIT_TIMEDOUT 124
79 /* Internal failure. */
80 #define EXIT_CANCELED 125
82 static int timed_out;
83 static int term_signal = SIGTERM; /* same default as kill command. */
84 static int monitored_pid;
85 static int sigs_to_ignore[NSIG]; /* so monitor can ignore sigs it resends. */
87 static struct option const long_options[] =
89 {"signal", required_argument, NULL, 's'},
90 {NULL, 0, NULL, 0}
93 /* send sig to group but not ourselves.
94 * FIXME: Is there a better way to achieve this? */
95 static int
96 send_sig (int where, int sig)
98 sigs_to_ignore[sig] = 1;
99 return kill (where, sig);
102 static void
103 cleanup (int sig)
105 if (sig == SIGALRM)
107 timed_out = 1;
108 sig = term_signal;
110 if (monitored_pid)
112 if (sigs_to_ignore[sig])
114 sigs_to_ignore[sig] = 0;
115 return;
117 send_sig (0, sig);
118 if (sig != SIGKILL && sig != SIGCONT)
119 send_sig (0, SIGCONT);
121 else /* we're the child or the child is not exec'd yet. */
122 _exit (128 + sig);
125 void
126 usage (int status)
128 if (status != EXIT_SUCCESS)
129 fprintf (stderr, _("Try `%s --help' for more information.\n"),
130 program_name);
131 else
133 printf (_("\
134 Usage: %s [OPTION] NUMBER[SUFFIX] COMMAND [ARG]...\n\
135 or: %s [OPTION]\n"), program_name, program_name);
137 fputs (_("\
138 Start COMMAND, and kill it if still running after NUMBER seconds.\n\
139 SUFFIX may be `s' for seconds (the default), `m' for minutes,\n\
140 `h' for hours or `d' for days.\n\
142 "), stdout);
144 fputs (_("\
145 Mandatory arguments to long options are mandatory for short options too.\n\
146 "), stdout);
147 fputs (_("\
148 -s, --signal=SIGNAL\n\
149 specify the signal to be sent on timeout.\n\
150 SIGNAL may be a name like `HUP' or a number.\n\
151 See `kill -l` for a list of signals\n"), stdout);
153 fputs (HELP_OPTION_DESCRIPTION, stdout);
154 fputs (VERSION_OPTION_DESCRIPTION, stdout);
155 fputs (_("\n\
156 If the command times out, then we exit with status 124,\n\
157 otherwise the normal exit status of the command is returned.\n\
158 If no signal is specified, the TERM signal is sent. The TERM signal\n\
159 will kill processes which do not catch this signal. For other processes,\n\
160 it may be necessary to use the KILL (9) signal, since this signal cannot\n\
161 be caught.\n"), stdout);
162 emit_bug_reporting_address ();
164 exit (status);
167 /* Given a long integer value *X, and a suffix character, SUFFIX_CHAR,
168 scale *X by the multiplier implied by SUFFIX_CHAR. SUFFIX_CHAR may
169 be the NUL byte or `s' to denote seconds, `m' for minutes, `h' for
170 hours, or `d' for days. If SUFFIX_CHAR is invalid, don't modify *X
171 and return false. If *X would overflow an integer, don't modify *X
172 and return false. Otherwise return true. */
174 static bool
175 apply_time_suffix (unsigned long *x, char suffix_char)
177 unsigned int multiplier = 1;
179 switch (suffix_char)
181 case 0:
182 case 's':
183 return true;
184 case 'd':
185 multiplier *= 24;
186 case 'h':
187 multiplier *= 60;
188 case 'm':
189 if (multiplier > UINT_MAX / 60) /* 16 bit overflow */
190 return false;
191 multiplier *= 60;
192 break;
193 default:
194 return false;
197 if (*x > UINT_MAX / multiplier)
198 return false;
200 *x *= multiplier;
202 return true;
205 static void
206 install_signal_handlers (void)
208 struct sigaction sa;
209 sigemptyset(&sa.sa_mask); /* Allow concurrent calls to handler */
210 sa.sa_handler = cleanup;
211 sa.sa_flags = SA_RESTART; /* restart syscalls (like wait() below) */
213 sigaction (SIGALRM, &sa, NULL); /* our timeout. */
214 sigaction (SIGINT, &sa, NULL); /* Ctrl-C at terminal for example. */
215 sigaction (SIGQUIT, &sa, NULL); /* Ctrl-\ at terminal for example. */
216 sigaction (SIGTERM, &sa, NULL); /* if we're killed, stop monitored proc. */
217 sigaction (SIGHUP, &sa, NULL); /* terminal closed for example. */
221 main (int argc, char **argv)
223 unsigned long timeout;
224 char signame[SIG2STR_MAX];
225 int c;
226 char *ep;
228 initialize_main (&argc, &argv);
229 set_program_name (argv[0]);
230 setlocale (LC_ALL, "");
231 bindtextdomain (PACKAGE, LOCALEDIR);
232 textdomain (PACKAGE);
234 initialize_exit_failure (EXIT_CANCELED);
235 atexit (close_stdout);
237 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE_NAME, Version,
238 usage, AUTHORS, (char const *) NULL);
240 while ((c = getopt_long (argc, argv, "+s:", long_options, NULL)) != -1)
242 switch (c)
244 case 's':
245 term_signal = operand2sig (optarg, signame);
246 if (term_signal == -1)
247 usage (EXIT_CANCELED);
248 break;
249 default:
250 usage (EXIT_CANCELED);
251 break;
255 if (argc - optind < 2)
256 usage (EXIT_CANCELED);
258 if (xstrtoul (argv[optind], &ep, 10, &timeout, NULL)
259 /* Invalid interval. Note 0 disables timeout */
260 || (timeout > UINT_MAX)
261 /* Extra chars after the number and an optional s,m,h,d char. */
262 || (*ep && *(ep + 1))
263 /* Check any suffix char and update timeout based on the suffix. */
264 || !apply_time_suffix (&timeout, *ep))
266 error (0, 0, _("invalid time interval %s"), quote (argv[optind]));
267 usage (EXIT_CANCELED);
269 optind++;
271 argc -= optind;
272 argv += optind;
274 /* Ensure we're in our own group so all subprocesses can be killed.
275 Note we don't just put the child in a separate group as
276 then we would need to worry about foreground and background groups
277 and propagating signals between them. */
278 setpgid (0, 0);
280 /* Setup handlers before fork() so that we
281 handle any signals caused by child, without races. */
282 install_signal_handlers ();
283 signal (SIGTTIN, SIG_IGN); /* don't sTop if background child needs tty. */
284 signal (SIGTTOU, SIG_IGN); /* don't sTop if background child needs tty. */
286 monitored_pid = fork ();
287 if (monitored_pid == -1)
289 error (0, errno, _("fork system call failed"));
290 return EXIT_CANCELED;
292 else if (monitored_pid == 0)
293 { /* child */
294 int exit_status;
296 /* exec doesn't reset SIG_IGN -> SIG_DFL. */
297 signal (SIGTTIN, SIG_DFL);
298 signal (SIGTTOU, SIG_DFL);
300 execvp (argv[0], argv); /* FIXME: should we use "sh -c" ... here? */
302 /* exit like sh, env, nohup, ... */
303 exit_status = (errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE);
304 error (0, errno, _("failed to run command %s"), quote (argv[0]));
305 return exit_status;
307 else
309 int status;
311 alarm (timeout);
313 /* We're just waiting for a single process here, so wait() suffices.
314 Note the signal() calls above on GNU/Linux and BSD at least,
315 essentially call the lower level sigaction() with the SA_RESTART flag
316 set, which ensures the following wait call will only return if the
317 child exits, not on this process receiving a signal. Also we're not
318 passing WUNTRACED | WCONTINUED to a waitpid() call and so will not get
319 indication that the child has stopped or continued. */
320 if (wait (&status) == -1)
322 /* shouldn't happen. */
323 error (0, errno, _("error waiting for command"));
324 status = EXIT_CANCELED;
326 else
328 if (WIFEXITED (status))
329 status = WEXITSTATUS (status);
330 else if (WIFSIGNALED (status))
331 status = WTERMSIG (status) + 128; /* what sh does at least. */
332 else
334 /* shouldn't happen. */
335 error (0, 0, _("unknown status from command (0x%X)"), status);
336 status = EXIT_FAILURE;
340 if (timed_out)
341 return EXIT_TIMEDOUT;
342 else
343 return status;