Avoid using the shell to run most external commands
[xapian.git] / xapian-applications / omega / runfilter.cc
blob673c546daeb73475960d1a3e00411339a9548984
1 /** @file runfilter.cc
2 * @brief Run an external filter and capture its output in a std::string.
4 * Copyright (C) 2003,2006,2007,2009,2010,2011,2013,2015 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "runfilter.h"
25 #include <iostream>
26 #include <string>
27 #include <vector>
29 #include <sys/types.h>
30 #include "safeerrno.h"
31 #include "safefcntl.h"
32 #include <cstdio>
33 #ifdef HAVE_SYS_TIME_H
34 # include <sys/time.h>
35 #endif
36 #ifdef HAVE_SYS_RESOURCE_H
37 # include <sys/resource.h>
38 #endif
39 #include "safesysselect.h"
40 #ifdef HAVE_SYS_SOCKET_H
41 # include <sys/socket.h>
42 #endif
43 #include "safesyswait.h"
44 #include "safeunistd.h"
46 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
47 # include <signal.h>
48 #endif
50 #include "freemem.h"
52 #ifdef _MSC_VER
53 # define popen _popen
54 # define pclose _pclose
55 #endif
57 using namespace std;
59 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
60 static pid_t pid_to_kill_on_signal;
62 #ifdef HAVE_SIGACTION
63 static struct sigaction old_hup_handler;
64 static struct sigaction old_int_handler;
65 static struct sigaction old_quit_handler;
66 static struct sigaction old_term_handler;
68 extern "C" {
70 static void
71 handle_signal(int signum)
73 if (pid_to_kill_on_signal) {
74 kill(pid_to_kill_on_signal, SIGKILL);
75 pid_to_kill_on_signal = 0;
77 switch (signum) {
78 case SIGHUP:
79 sigaction(signum, &old_hup_handler, NULL);
80 break;
81 case SIGINT:
82 sigaction(signum, &old_int_handler, NULL);
83 break;
84 case SIGQUIT:
85 sigaction(signum, &old_quit_handler, NULL);
86 break;
87 case SIGTERM:
88 sigaction(signum, &old_term_handler, NULL);
89 break;
90 default:
91 return;
93 raise(signum);
98 void
99 runfilter_init()
101 struct sigaction sa;
102 sa.sa_handler = handle_signal;
103 sigemptyset(&sa.sa_mask);
104 sa.sa_flags = 0;
106 sigaction(SIGHUP, &sa, &old_hup_handler);
107 sigaction(SIGINT, &sa, &old_int_handler);
108 sigaction(SIGQUIT, &sa, &old_quit_handler);
109 sigaction(SIGTERM, &sa, &old_term_handler);
111 #else
112 static sighandler_t old_hup_handler;
113 static sighandler_t old_int_handler;
114 static sighandler_t old_quit_handler;
115 static sighandler_t old_term_handler;
117 extern "C" {
119 static void
120 handle_signal(int signum)
122 if (pid_to_kill_on_signal) {
123 kill(pid_to_kill_on_signal, SIGKILL);
124 pid_to_kill_on_signal = 0;
126 switch (signum) {
127 case SIGHUP:
128 signal(signum, old_hup_handler);
129 break;
130 case SIGINT:
131 signal(signum, old_int_handler);
132 break;
133 case SIGQUIT:
134 signal(signum, old_quit_handler);
135 break;
136 case SIGTERM:
137 signal(signum, old_term_handler);
138 break;
139 default:
140 return;
142 raise(signum);
147 void
148 runfilter_init()
150 old_hup_handler = signal(SIGHUP, handle_signal);
151 old_int_handler = signal(SIGINT, handle_signal);
152 old_quit_handler = signal(SIGQUIT, handle_signal);
153 old_term_handler = signal(SIGTERM, handle_signal);
155 #endif
156 #else
157 void
158 runfilter_init()
161 #endif
163 string
164 stdout_to_string(const string &cmd, bool use_shell)
166 string out;
167 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
168 // We want to be able to get the exit status of the child process.
169 signal(SIGCHLD, SIG_DFL);
171 int fds[2];
172 if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) < 0)
173 throw ReadError("socketpair failed");
175 pid_t child = fork();
176 if (child == 0) {
177 // We're the child process.
179 #ifdef HAVE_SETPGID
180 // Put the child process into its own process group, so that we can
181 // easily kill it and any children it in turn forks if we need to.
182 setpgid(0, 0);
183 pid_to_kill_on_signal = -child;
184 #else
185 pid_to_kill_on_signal = child;
186 #endif
188 // Close the parent's side of the socket pair.
189 close(fds[0]);
191 // Connect stdout to our side of the socket pair.
192 dup2(fds[1], 1);
194 #ifdef HAVE_SETRLIMIT
195 // Impose some pretty generous resource limits to prevent run-away
196 // filter programs from causing problems.
198 // Limit CPU time to 300 seconds (5 minutes).
199 struct rlimit cpu_limit = { 300, RLIM_INFINITY } ;
200 setrlimit(RLIMIT_CPU, &cpu_limit);
202 #if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
203 // Limit process data to free physical memory.
204 long mem = get_free_physical_memory();
205 if (mem > 0) {
206 struct rlimit ram_limit = {
207 static_cast<rlim_t>(mem),
208 RLIM_INFINITY
210 #ifdef RLIMIT_AS
211 setrlimit(RLIMIT_AS, &ram_limit);
212 #elif defined RLIMIT_VMEM
213 setrlimit(RLIMIT_VMEM, &ram_limit);
214 #else
215 // Only limits the data segment rather than the total address
216 // space, but that's better than nothing.
217 setrlimit(RLIMIT_DATA, &ram_limit);
218 #endif
220 #endif
221 #endif
223 if (use_shell) {
224 execl("/bin/sh", "/bin/sh", "-c", cmd.c_str(), (void*)NULL);
225 _exit(-1);
228 string s(cmd);
229 vector<const char *> argv;
230 size_t j = 0;
231 while (true) {
232 size_t i = s.find_first_not_of(" \t\n", j);
233 if (i == string::npos) break;
234 j = i;
235 if (s[j] == '\'') {
236 single_quoted:
237 s.erase(j, 1);
238 while (true) {
239 j = s.find('\'', j + 1);
240 if (j == s.npos) {
241 // Unmatched ' in command string.
242 // dash exits 2 in this case, bash exits 1.
243 _exit(2);
245 // Replace four character sequence '\'' with ' - this is
246 // how a single quote inside single quotes gets escaped.
247 if (s[j + 1] != '\\' ||
248 s[j + 2] != '\'' ||
249 s[j + 3] != '\'') {
250 break;
252 s.erase(j + 1, 3);
254 if (j + 1 != s.size()) {
255 char ch = s[j + 1];
256 if (ch != ' ' && ch != '\t' && ch != '\n') {
257 // Handle the expansion of e.g.: --input=%f,html
258 s.erase(j, 1);
259 goto out_of_quotes;
262 } else {
263 out_of_quotes:
264 j = s.find_first_of(" \t\n'", j + 1);
265 // Handle the expansion of e.g.: --input=%f
266 if (j != s.npos && s[j] == '\'') goto single_quoted;
268 if (j != s.npos) {
269 s[j++] = '\0';
271 const char * word = s.c_str() + i;
272 argv.push_back(word);
274 argv.push_back(NULL);
276 execvp(argv[0], const_cast<char **>(&argv[0]));
277 _exit(-1);
280 // We're the parent process.
282 // Close the child's side of the socket pair.
283 close(fds[1]);
284 if (child == -1) {
285 // fork() failed.
286 close(fds[0]);
287 throw ReadError("fork failed");
290 int fd = fds[0];
292 fd_set readfds;
293 FD_ZERO(&readfds);
294 while (true) {
295 // If we wait 300 seconds (5 minutes) without getting data from the
296 // filter, then give up to avoid waiting forever for a filter which
297 // has ended up blocked waiting for something which will never happen.
298 struct timeval tv;
299 tv.tv_sec = 300;
300 tv.tv_usec = 0;
301 FD_SET(fd, &readfds);
302 int r = select(fd + 1, &readfds, NULL, NULL, &tv);
303 if (r <= 0) {
304 if (r < 0) {
305 if (errno == EINTR) {
306 // select() interrupted by a signal, so retry.
307 continue;
309 cerr << "Reading from filter failed (" << strerror(errno) << ")"
310 << endl;
311 } else {
312 cerr << "Filter inactive for too long" << endl;
314 #ifdef HAVE_SETPGID
315 kill(-child, SIGKILL);
316 #else
317 kill(child, SIGKILL);
318 #endif
319 close(fd);
320 int status = 0;
321 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
322 pid_to_kill_on_signal = 0;
323 throw ReadError(status);
326 char buf[4096];
327 ssize_t res = read(fd, buf, sizeof(buf));
328 if (res == 0) break;
329 if (res == -1) {
330 if (errno == EINTR) {
331 // read() interrupted by a signal, so retry.
332 continue;
334 close(fd);
335 #ifdef HAVE_SETPGID
336 kill(-child, SIGKILL);
337 #endif
338 int status = 0;
339 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
340 pid_to_kill_on_signal = 0;
341 throw ReadError(status);
343 out.append(buf, res);
346 close(fd);
347 #ifdef HAVE_SETPGID
348 kill(-child, SIGKILL);
349 #endif
350 int status = 0;
351 while (waitpid(child, &status, 0) < 0) {
352 if (errno != EINTR)
353 throw ReadError("wait pid failed");
355 pid_to_kill_on_signal = 0;
356 #else
357 (void)use_shell;
358 FILE * fh = popen(cmd.c_str(), "r");
359 if (fh == NULL) throw ReadError("popen failed");
360 while (!feof(fh)) {
361 char buf[4096];
362 size_t len = fread(buf, 1, 4096, fh);
363 if (ferror(fh)) {
364 (void)pclose(fh);
365 throw ReadError("fread failed");
367 out.append(buf, len);
369 int status = pclose(fh);
370 #endif
372 if (status != 0) {
373 if (WIFEXITED(status) && WEXITSTATUS(status) == 127) {
374 throw NoSuchFilter();
376 #ifdef SIGXCPU
377 if (WIFSIGNALED(status) && WTERMSIG(status) == SIGXCPU) {
378 cerr << "Filter process consumed too much CPU time" << endl;
380 #endif
381 throw ReadError(status);
383 return out;