[python3] Simplify generated wrapper post-processing
[xapian.git] / xapian-applications / omega / runfilter.cc
bloba85ddfe36ba689ce0f5a58ac1add35391d4e03f2
1 /** @file runfilter.cc
2 * @brief Run an external filter and capture its output in a std::string.
3 */
4 /* Copyright (C) 2003,2006,2007,2009,2010,2011,2013,2015,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "runfilter.h"
25 #include <iostream>
26 #include <string>
27 #include <vector>
29 #include <sys/types.h>
30 #include "safeerrno.h"
31 #include "safefcntl.h"
32 #include <cstdio>
33 #include <cstring>
34 #ifdef HAVE_SYS_TIME_H
35 # include <sys/time.h>
36 #endif
37 #ifdef HAVE_SYS_RESOURCE_H
38 # include <sys/resource.h>
39 #endif
40 #include "safesysselect.h"
41 #ifdef HAVE_SYS_SOCKET_H
42 # include <sys/socket.h>
43 #endif
44 #include "safesyswait.h"
45 #include "safeunistd.h"
47 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
48 # include <signal.h>
49 #endif
51 #include "freemem.h"
52 #include "stringutils.h"
54 #ifdef _MSC_VER
55 # define popen _popen
56 # define pclose _pclose
57 #endif
59 using namespace std;
61 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
62 bool
63 command_needs_shell(const char * p)
65 for ( ; *p; ++p) {
66 // Probably overly conservative, but suitable for
67 // real-world cases.
68 if (strchr("!\"#$&()*;<>?[\\]^`{|}~", *p) != NULL) {
69 return true;
72 return false;
75 static bool
76 unquote(string & s, size_t & j)
78 bool quoted = false;
79 if (s[j] == '\'') {
80 single_quoted:
81 quoted = true;
82 s.erase(j, 1);
83 while (true) {
84 j = s.find('\'', j + 1);
85 if (j == s.npos) {
86 // Unmatched ' in command string.
87 // dash exits 2 in this case, bash exits 1.
88 _exit(2);
90 // Replace four character sequence '\'' with ' - this is
91 // how a single quote inside single quotes gets escaped.
92 if (s[j + 1] != '\\' ||
93 s[j + 2] != '\'' ||
94 s[j + 3] != '\'') {
95 break;
97 s.erase(j + 1, 3);
99 if (j + 1 != s.size()) {
100 char ch = s[j + 1];
101 if (ch != ' ' && ch != '\t' && ch != '\n') {
102 // Handle the expansion of e.g.: --input=%f,html
103 s.erase(j, 1);
104 goto out_of_quotes;
107 } else {
108 out_of_quotes:
109 j = s.find_first_of(" \t\n'", j + 1);
110 // Handle the expansion of e.g.: --input=%f
111 if (j != s.npos && s[j] == '\'') goto single_quoted;
113 if (j != s.npos) {
114 s[j++] = '\0';
116 return quoted;
119 static pid_t pid_to_kill_on_signal;
121 #ifdef HAVE_SIGACTION
122 static struct sigaction old_hup_handler;
123 static struct sigaction old_int_handler;
124 static struct sigaction old_quit_handler;
125 static struct sigaction old_term_handler;
127 extern "C" {
129 static void
130 handle_signal(int signum)
132 if (pid_to_kill_on_signal) {
133 kill(pid_to_kill_on_signal, SIGKILL);
134 pid_to_kill_on_signal = 0;
136 switch (signum) {
137 case SIGHUP:
138 sigaction(signum, &old_hup_handler, NULL);
139 break;
140 case SIGINT:
141 sigaction(signum, &old_int_handler, NULL);
142 break;
143 case SIGQUIT:
144 sigaction(signum, &old_quit_handler, NULL);
145 break;
146 case SIGTERM:
147 sigaction(signum, &old_term_handler, NULL);
148 break;
149 default:
150 return;
152 raise(signum);
157 void
158 runfilter_init()
160 struct sigaction sa;
161 sa.sa_handler = handle_signal;
162 sigemptyset(&sa.sa_mask);
163 sa.sa_flags = 0;
165 sigaction(SIGHUP, &sa, &old_hup_handler);
166 sigaction(SIGINT, &sa, &old_int_handler);
167 sigaction(SIGQUIT, &sa, &old_quit_handler);
168 sigaction(SIGTERM, &sa, &old_term_handler);
170 #else
171 static sighandler_t old_hup_handler;
172 static sighandler_t old_int_handler;
173 static sighandler_t old_quit_handler;
174 static sighandler_t old_term_handler;
176 extern "C" {
178 static void
179 handle_signal(int signum)
181 if (pid_to_kill_on_signal) {
182 kill(pid_to_kill_on_signal, SIGKILL);
183 pid_to_kill_on_signal = 0;
185 switch (signum) {
186 case SIGHUP:
187 signal(signum, old_hup_handler);
188 break;
189 case SIGINT:
190 signal(signum, old_int_handler);
191 break;
192 case SIGQUIT:
193 signal(signum, old_quit_handler);
194 break;
195 case SIGTERM:
196 signal(signum, old_term_handler);
197 break;
198 default:
199 return;
201 raise(signum);
206 void
207 runfilter_init()
209 old_hup_handler = signal(SIGHUP, handle_signal);
210 old_int_handler = signal(SIGINT, handle_signal);
211 old_quit_handler = signal(SIGQUIT, handle_signal);
212 old_term_handler = signal(SIGTERM, handle_signal);
214 #endif
215 #else
216 bool
217 command_needs_shell(const char *)
219 // We don't try to avoid the shell on this platform, so don't waste time
220 // analysing commands to see if they could.
221 return true;
224 void
225 runfilter_init()
228 #endif
230 string
231 stdout_to_string(const string &cmd, bool use_shell, int alt_status)
233 string out;
234 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
235 // We want to be able to get the exit status of the child process.
236 signal(SIGCHLD, SIG_DFL);
238 int fds[2];
239 if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) < 0)
240 throw ReadError("socketpair failed");
242 pid_t child = fork();
243 if (child == 0) {
244 // We're the child process.
246 #ifdef HAVE_SETPGID
247 // Put the child process into its own process group, so that we can
248 // easily kill it and any children it in turn forks if we need to.
249 setpgid(0, 0);
250 pid_to_kill_on_signal = -child;
251 #else
252 pid_to_kill_on_signal = child;
253 #endif
255 // Close the parent's side of the socket pair.
256 close(fds[0]);
258 // Connect stdout to our side of the socket pair.
259 dup2(fds[1], 1);
261 #ifdef HAVE_SETRLIMIT
262 // Impose some pretty generous resource limits to prevent run-away
263 // filter programs from causing problems.
265 // Limit CPU time to 300 seconds (5 minutes).
266 struct rlimit cpu_limit = { 300, RLIM_INFINITY };
267 setrlimit(RLIMIT_CPU, &cpu_limit);
269 #if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
270 // Limit process data to free physical memory.
271 long mem = get_free_physical_memory();
272 if (mem > 0) {
273 struct rlimit ram_limit = {
274 static_cast<rlim_t>(mem),
275 RLIM_INFINITY
277 #ifdef RLIMIT_AS
278 setrlimit(RLIMIT_AS, &ram_limit);
279 #elif defined RLIMIT_VMEM
280 setrlimit(RLIMIT_VMEM, &ram_limit);
281 #else
282 // Only limits the data segment rather than the total address
283 // space, but that's better than nothing.
284 setrlimit(RLIMIT_DATA, &ram_limit);
285 #endif
287 #endif
288 #endif
290 if (use_shell) {
291 #if !defined HAVE_SETENV && !defined HAVE_PUTENV
292 use_shell_after_all:
293 #endif
294 execl("/bin/sh", "/bin/sh", "-c", cmd.c_str(), (void*)NULL);
295 _exit(-1);
298 string s(cmd);
299 // Handle any environment variable assignments.
300 // Name must start with alpha or '_', contain only alphanumerics and
301 // '_', and there must be no quoting of either the name or the '='.
302 size_t j = 0;
303 while (true) {
304 j = s.find_first_not_of(" \t\n", j);
305 if (!(C_isalnum(s[j]) || s[j] == '_')) break;
306 size_t i = j;
307 do ++j; while (C_isalnum(s[j]) || s[j] == '_');
308 if (s[j] != '=') {
309 j = i;
310 break;
313 #ifdef HAVE_SETENV
314 size_t eq = j;
315 unquote(s, j);
316 s[eq] = '\0';
317 setenv(&s[i], &s[eq + 1], 1);
318 j = s.find_first_not_of(" \t\n", j);
319 #elif defined HAVE_PUTENV
320 unquote(s, j);
321 putenv(&s[i]);
322 #else
323 goto use_shell_after_all;
324 #endif
327 vector<const char *> argv;
328 while (true) {
329 size_t i = s.find_first_not_of(" \t\n", j);
330 if (i == string::npos) break;
331 bool quoted = unquote(s, j);
332 const char * word = s.c_str() + i;
333 if (!quoted) {
334 // Handle simple cases of redirection.
335 if (strcmp(word, ">/dev/null") == 0) {
336 int fd = open(word + 1, O_WRONLY);
337 if (fd != -1 && fd != 1) dup2(fd, 1);
338 close(fd);
339 continue;
341 if (strcmp(word, "2>/dev/null") == 0) {
342 int fd = open(word + 2, O_WRONLY);
343 if (fd != -1 && fd != 2) dup2(fd, 2);
344 close(fd);
345 continue;
347 if (strcmp(word, "2>&1") == 0) {
348 dup2(1, 2);
349 continue;
351 if (strcmp(word, "1>&2") == 0) {
352 dup2(2, 1);
353 continue;
356 argv.push_back(word);
358 if (argv.empty()) _exit(0);
359 argv.push_back(NULL);
361 execvp(argv[0], const_cast<char **>(&argv[0]));
362 // Emulate shell behaviour and exit with status 127 if the command
363 // isn't found, and status 126 for other problems. In particular, we
364 // rely on 127 below to throw NoSuchFilter.
365 _exit(errno == ENOENT ? 127 : 126);
368 // We're the parent process.
370 // Close the child's side of the socket pair.
371 close(fds[1]);
372 if (child == -1) {
373 // fork() failed.
374 close(fds[0]);
375 throw ReadError("fork failed");
378 int fd = fds[0];
380 fd_set readfds;
381 FD_ZERO(&readfds);
382 while (true) {
383 // If we wait 300 seconds (5 minutes) without getting data from the
384 // filter, then give up to avoid waiting forever for a filter which
385 // has ended up blocked waiting for something which will never happen.
386 struct timeval tv;
387 tv.tv_sec = 300;
388 tv.tv_usec = 0;
389 FD_SET(fd, &readfds);
390 int r = select(fd + 1, &readfds, NULL, NULL, &tv);
391 if (r <= 0) {
392 if (r < 0) {
393 if (errno == EINTR || errno == EAGAIN) {
394 // select() interrupted by a signal, so retry.
395 continue;
397 cerr << "Reading from filter failed (" << strerror(errno) << ")"
398 << endl;
399 } else {
400 cerr << "Filter inactive for too long" << endl;
402 #ifdef HAVE_SETPGID
403 kill(-child, SIGKILL);
404 #else
405 kill(child, SIGKILL);
406 #endif
407 close(fd);
408 int status = 0;
409 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
410 pid_to_kill_on_signal = 0;
411 throw ReadError(status);
414 char buf[4096];
415 ssize_t res = read(fd, buf, sizeof(buf));
416 if (res == 0) break;
417 if (res == -1) {
418 if (errno == EINTR) {
419 // read() interrupted by a signal, so retry.
420 continue;
422 close(fd);
423 #ifdef HAVE_SETPGID
424 kill(-child, SIGKILL);
425 #endif
426 int status = 0;
427 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
428 pid_to_kill_on_signal = 0;
429 throw ReadError(status);
431 out.append(buf, res);
434 close(fd);
435 #ifdef HAVE_SETPGID
436 kill(-child, SIGKILL);
437 #endif
438 int status = 0;
439 while (waitpid(child, &status, 0) < 0) {
440 if (errno != EINTR)
441 throw ReadError("wait pid failed");
443 pid_to_kill_on_signal = 0;
444 #else
445 (void)use_shell;
446 FILE * fh = popen(cmd.c_str(), "r");
447 if (fh == NULL) throw ReadError("popen failed");
448 while (!feof(fh)) {
449 char buf[4096];
450 size_t len = fread(buf, 1, 4096, fh);
451 if (ferror(fh)) {
452 (void)pclose(fh);
453 throw ReadError("fread failed");
455 out.append(buf, len);
457 int status = pclose(fh);
458 #endif
460 if (WIFEXITED(status)) {
461 int exit_status = WEXITSTATUS(status);
462 if (exit_status == 0 || exit_status == alt_status)
463 return out;
464 if (exit_status == 127)
465 throw NoSuchFilter();
467 #ifdef SIGXCPU
468 if (WIFSIGNALED(status) && WTERMSIG(status) == SIGXCPU) {
469 cerr << "Filter process consumed too much CPU time" << endl;
471 #endif
472 throw ReadError(status);