VERSION: Disable GIT_SNAPSHOT for the Samba 4.17.0rc1 release.
[Samba.git] / lib / util / tfork.c
blobd64c7174fbf894aa096851c6a37dd1442dda53cb
1 /*
2 fork on steroids to avoid SIGCHLD and waitpid
4 Copyright (C) Stefan Metzmacher 2010
5 Copyright (C) Ralph Boehme 2017
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/wait.h"
23 #include "system/filesys.h"
24 #include "system/network.h"
25 #include "lib/util/samba_util.h"
26 #include "lib/util/sys_rw.h"
27 #include "lib/util/tfork.h"
28 #include "lib/util/debug.h"
29 #include "lib/util/util_process.h"
31 #ifdef HAVE_PTHREAD
32 #include <pthread.h>
33 #endif
35 #ifdef NDEBUG
36 #undef NDEBUG
37 #endif
38 #include <assert.h>
41 * This is how the process hierarchy looks like:
43 * +----------+
44 * | caller |
45 * +----------+
46 * |
47 * fork
48 * |
49 * v
50 * +----------+
51 * | waiter |
52 * +----------+
53 * |
54 * fork
55 * |
56 * v
57 * +----------+
58 * | worker |
59 * +----------+
62 #ifdef HAVE_VALGRIND_HELGRIND_H
63 #include <valgrind/helgrind.h>
64 #endif
65 #ifndef ANNOTATE_BENIGN_RACE_SIZED
66 #define ANNOTATE_BENIGN_RACE_SIZED(obj, size, description)
67 #endif
69 #define TFORK_ANNOTATE_BENIGN_RACE(obj) \
70 ANNOTATE_BENIGN_RACE_SIZED( \
71 (obj), sizeof(*(obj)), \
72 "no race, serialized by tfork_[un]install_sigchld_handler");
75 * The resulting (private) state per tfork_create() call, returned as a opaque
76 * handle to the caller.
78 struct tfork {
80 * This is returned to the caller with tfork_event_fd()
82 int event_fd;
85 * This is used in the caller by tfork_status() to read the worker exit
86 * status and to tell the waiter to exit by closing the fd.
88 int status_fd;
90 pid_t waiter_pid;
91 pid_t worker_pid;
95 * Internal per-thread state maintained while inside tfork.
97 struct tfork_state {
98 pid_t waiter_pid;
99 int waiter_errno;
101 pid_t worker_pid;
105 * A global state that synchronizes access to handling SIGCHLD and waiting for
106 * childs.
108 struct tfork_signal_state {
109 bool available;
111 #ifdef HAVE_PTHREAD
112 pthread_cond_t cond;
113 pthread_mutex_t mutex;
114 #endif
117 * pid of the waiter child. This points at waiter_pid in either struct
118 * tfork or struct tfork_state, depending on who called
119 * tfork_install_sigchld_handler().
121 * When tfork_install_sigchld_handler() is called the waiter_pid is
122 * still -1 and only set later after fork(), that's why this is must be
123 * a pointer. The signal handler checks this.
125 pid_t *pid;
127 struct sigaction oldact;
128 sigset_t oldset;
131 static struct tfork_signal_state signal_state;
133 #ifdef HAVE_PTHREAD
134 static pthread_once_t tfork_global_is_initialized = PTHREAD_ONCE_INIT;
135 static pthread_key_t tfork_global_key;
136 #else
137 static struct tfork_state *global_state;
138 #endif
140 static void tfork_sigchld_handler(int signum, siginfo_t *si, void *p);
142 #ifdef HAVE_PTHREAD
143 static void tfork_global_destructor(void *state)
145 anonymous_shared_free(state);
147 #endif
149 static int tfork_acquire_sighandling(void)
151 int ret = 0;
153 #ifdef HAVE_PTHREAD
154 ret = pthread_mutex_lock(&signal_state.mutex);
155 if (ret != 0) {
156 return ret;
159 while (!signal_state.available) {
160 ret = pthread_cond_wait(&signal_state.cond,
161 &signal_state.mutex);
162 if (ret != 0) {
163 return ret;
167 signal_state.available = false;
169 ret = pthread_mutex_unlock(&signal_state.mutex);
170 if (ret != 0) {
171 return ret;
173 #endif
175 return ret;
178 static int tfork_release_sighandling(void)
180 int ret = 0;
182 #ifdef HAVE_PTHREAD
183 ret = pthread_mutex_lock(&signal_state.mutex);
184 if (ret != 0) {
185 return ret;
188 signal_state.available = true;
190 ret = pthread_cond_signal(&signal_state.cond);
191 if (ret != 0) {
192 pthread_mutex_unlock(&signal_state.mutex);
193 return ret;
196 ret = pthread_mutex_unlock(&signal_state.mutex);
197 if (ret != 0) {
198 return ret;
200 #endif
202 return ret;
205 #ifdef HAVE_PTHREAD
206 static void tfork_atfork_prepare(void)
208 int ret;
210 ret = pthread_mutex_lock(&signal_state.mutex);
211 assert(ret == 0);
214 static void tfork_atfork_parent(void)
216 int ret;
218 ret = pthread_mutex_unlock(&signal_state.mutex);
219 assert(ret == 0);
221 #endif
223 static void tfork_atfork_child(void)
225 int ret;
227 #ifdef HAVE_PTHREAD
228 ret = pthread_mutex_unlock(&signal_state.mutex);
229 assert(ret == 0);
231 ret = pthread_key_delete(tfork_global_key);
232 assert(ret == 0);
234 ret = pthread_key_create(&tfork_global_key, tfork_global_destructor);
235 assert(ret == 0);
238 * There's no data race on the cond variable from the signal state, we
239 * are writing here, but there are no readers yet. Some data race
240 * detection tools report a race, but the readers are in the parent
241 * process.
243 TFORK_ANNOTATE_BENIGN_RACE(&signal_state.cond);
246 * There's no way to destroy a condition variable if there are waiters,
247 * pthread_cond_destroy() will return EBUSY. Just zero out memory and
248 * then initialize again. This is not backed by POSIX but should be ok.
250 ZERO_STRUCT(signal_state.cond);
251 ret = pthread_cond_init(&signal_state.cond, NULL);
252 assert(ret == 0);
253 #endif
255 if (signal_state.pid != NULL) {
257 ret = sigaction(SIGCHLD, &signal_state.oldact, NULL);
258 assert(ret == 0);
260 #ifdef HAVE_PTHREAD
261 ret = pthread_sigmask(SIG_SETMASK, &signal_state.oldset, NULL);
262 #else
263 ret = sigprocmask(SIG_SETMASK, &signal_state.oldset, NULL);
264 #endif
265 assert(ret == 0);
267 signal_state.pid = NULL;
270 signal_state.available = true;
273 static void tfork_global_initialize(void)
275 #ifdef HAVE_PTHREAD
276 int ret;
278 pthread_atfork(tfork_atfork_prepare,
279 tfork_atfork_parent,
280 tfork_atfork_child);
282 ret = pthread_key_create(&tfork_global_key, tfork_global_destructor);
283 assert(ret == 0);
285 ret = pthread_mutex_init(&signal_state.mutex, NULL);
286 assert(ret == 0);
288 ret = pthread_cond_init(&signal_state.cond, NULL);
289 assert(ret == 0);
292 * In a threaded process there's no data race on t->waiter_pid as
293 * we're serializing globally via tfork_acquire_sighandling() and
294 * tfork_release_sighandling().
296 TFORK_ANNOTATE_BENIGN_RACE(&signal_state.pid);
297 #endif
299 signal_state.available = true;
302 static struct tfork_state *tfork_global_get(void)
304 struct tfork_state *state = NULL;
305 #ifdef HAVE_PTHREAD
306 int ret;
307 #endif
309 #ifdef HAVE_PTHREAD
310 state = (struct tfork_state *)pthread_getspecific(tfork_global_key);
311 #else
312 state = global_state;
313 #endif
314 if (state != NULL) {
315 return state;
318 state = (struct tfork_state *)anonymous_shared_allocate(
319 sizeof(struct tfork_state));
320 if (state == NULL) {
321 return NULL;
324 #ifdef HAVE_PTHREAD
325 ret = pthread_setspecific(tfork_global_key, state);
326 if (ret != 0) {
327 anonymous_shared_free(state);
328 return NULL;
330 #endif
331 return state;
334 static void tfork_global_free(void)
336 struct tfork_state *state = NULL;
337 #ifdef HAVE_PTHREAD
338 int ret;
339 #endif
341 #ifdef HAVE_PTHREAD
342 state = (struct tfork_state *)pthread_getspecific(tfork_global_key);
343 #else
344 state = global_state;
345 #endif
346 if (state == NULL) {
347 return;
350 #ifdef HAVE_PTHREAD
351 ret = pthread_setspecific(tfork_global_key, NULL);
352 if (ret != 0) {
353 return;
355 #endif
356 anonymous_shared_free(state);
360 * Only one thread at a time is allowed to handle SIGCHLD signals
362 static int tfork_install_sigchld_handler(pid_t *pid)
364 int ret;
365 struct sigaction act;
366 sigset_t set;
368 ret = tfork_acquire_sighandling();
369 if (ret != 0) {
370 return -1;
373 assert(signal_state.pid == NULL);
374 signal_state.pid = pid;
376 act = (struct sigaction) {
377 .sa_sigaction = tfork_sigchld_handler,
378 .sa_flags = SA_SIGINFO,
381 ret = sigaction(SIGCHLD, &act, &signal_state.oldact);
382 if (ret != 0) {
383 return -1;
386 sigemptyset(&set);
387 sigaddset(&set, SIGCHLD);
388 #ifdef HAVE_PTHREAD
389 ret = pthread_sigmask(SIG_UNBLOCK, &set, &signal_state.oldset);
390 #else
391 ret = sigprocmask(SIG_UNBLOCK, &set, &signal_state.oldset);
392 #endif
393 if (ret != 0) {
394 return -1;
397 return 0;
400 static int tfork_uninstall_sigchld_handler(void)
402 int ret;
404 signal_state.pid = NULL;
406 ret = sigaction(SIGCHLD, &signal_state.oldact, NULL);
407 if (ret != 0) {
408 return -1;
411 #ifdef HAVE_PTHREAD
412 ret = pthread_sigmask(SIG_SETMASK, &signal_state.oldset, NULL);
413 #else
414 ret = sigprocmask(SIG_SETMASK, &signal_state.oldset, NULL);
415 #endif
416 if (ret != 0) {
417 return -1;
420 ret = tfork_release_sighandling();
421 if (ret != 0) {
422 return -1;
425 return 0;
428 static void tfork_sigchld_handler(int signum, siginfo_t *si, void *p)
430 if ((signal_state.pid != NULL) &&
431 (*signal_state.pid != -1) &&
432 (si->si_pid == *signal_state.pid))
434 return;
438 * Not our child, forward to old handler
440 if (signal_state.oldact.sa_flags & SA_SIGINFO) {
441 signal_state.oldact.sa_sigaction(signum, si, p);
442 return;
445 if (signal_state.oldact.sa_handler == SIG_IGN) {
446 return;
448 if (signal_state.oldact.sa_handler == SIG_DFL) {
449 return;
451 signal_state.oldact.sa_handler(signum);
454 static pid_t tfork_start_waiter_and_worker(struct tfork_state *state,
455 int *_event_fd,
456 int *_status_fd)
458 int p[2];
459 int status_sp_caller_fd = -1;
460 int status_sp_waiter_fd = -1;
461 int event_pipe_caller_fd = -1;
462 int event_pipe_waiter_fd = -1;
463 int ready_pipe_caller_fd = -1;
464 int ready_pipe_worker_fd = -1;
465 ssize_t nwritten;
466 ssize_t nread;
467 pid_t pid;
468 int status;
469 int fd;
470 char c;
471 int ret;
473 *_event_fd = -1;
474 *_status_fd = -1;
476 if (state == NULL) {
477 return -1;
480 ret = socketpair(AF_UNIX, SOCK_STREAM, 0, p);
481 if (ret != 0) {
482 return -1;
484 set_close_on_exec(p[0]);
485 set_close_on_exec(p[1]);
486 status_sp_caller_fd = p[0];
487 status_sp_waiter_fd = p[1];
489 ret = pipe(p);
490 if (ret != 0) {
491 close(status_sp_caller_fd);
492 close(status_sp_waiter_fd);
493 return -1;
495 set_close_on_exec(p[0]);
496 set_close_on_exec(p[1]);
497 event_pipe_caller_fd = p[0];
498 event_pipe_waiter_fd = p[1];
501 ret = pipe(p);
502 if (ret != 0) {
503 close(status_sp_caller_fd);
504 close(status_sp_waiter_fd);
505 close(event_pipe_caller_fd);
506 close(event_pipe_waiter_fd);
507 return -1;
509 set_close_on_exec(p[0]);
510 set_close_on_exec(p[1]);
511 ready_pipe_worker_fd = p[0];
512 ready_pipe_caller_fd = p[1];
514 pid = fork();
515 if (pid == -1) {
516 close(status_sp_caller_fd);
517 close(status_sp_waiter_fd);
518 close(event_pipe_caller_fd);
519 close(event_pipe_waiter_fd);
520 close(ready_pipe_caller_fd);
521 close(ready_pipe_worker_fd);
522 return -1;
524 if (pid != 0) {
525 /* The caller */
528 * In a threaded process there's no data race on
529 * state->waiter_pid as we're serializing globally via
530 * tfork_acquire_sighandling() and tfork_release_sighandling().
532 TFORK_ANNOTATE_BENIGN_RACE(&state->waiter_pid);
534 state->waiter_pid = pid;
536 close(status_sp_waiter_fd);
537 close(event_pipe_waiter_fd);
538 close(ready_pipe_worker_fd);
540 set_blocking(event_pipe_caller_fd, false);
543 * wait for the waiter to get ready.
545 nread = sys_read(status_sp_caller_fd, &c, sizeof(char));
546 if (nread != sizeof(char)) {
547 return -1;
551 * Notify the worker to start.
553 nwritten = sys_write(ready_pipe_caller_fd,
554 &(char){0}, sizeof(char));
555 if (nwritten != sizeof(char)) {
556 close(ready_pipe_caller_fd);
557 return -1;
559 close(ready_pipe_caller_fd);
561 *_event_fd = event_pipe_caller_fd;
562 *_status_fd = status_sp_caller_fd;
564 return pid;
567 #ifndef HAVE_PTHREAD
568 /* cleanup sigchld_handler */
569 tfork_atfork_child();
570 #endif
573 * The "waiter" child.
575 setproctitle("tfork waiter process");
576 prctl_set_comment("tfork waiter");
577 CatchSignal(SIGCHLD, SIG_DFL);
579 close(status_sp_caller_fd);
580 close(event_pipe_caller_fd);
581 close(ready_pipe_caller_fd);
583 pid = fork();
584 if (pid == -1) {
585 state->waiter_errno = errno;
586 _exit(0);
588 if (pid == 0) {
590 * The worker child.
593 close(status_sp_waiter_fd);
594 close(event_pipe_waiter_fd);
597 * Wait for the caller to give us a go!
599 nread = sys_read(ready_pipe_worker_fd, &c, sizeof(char));
600 if (nread != sizeof(char)) {
601 _exit(1);
603 close(ready_pipe_worker_fd);
605 return 0;
607 state->worker_pid = pid;
608 setproctitle("tfork waiter process(%d)", pid);
609 prctl_set_comment("tfork(%d)", pid);
611 close(ready_pipe_worker_fd);
614 * We're going to stay around until child2 exits, so lets close all fds
615 * other than the pipe fd we may have inherited from the caller.
617 * Dup event_sp_waiter_fd and status_sp_waiter_fd onto fds 0 and 1 so we
618 * can then call closefrom(2).
620 if (event_pipe_waiter_fd > 0) {
621 int dup_fd = 0;
623 if (status_sp_waiter_fd == 0) {
624 dup_fd = 1;
627 do {
628 fd = dup2(event_pipe_waiter_fd, dup_fd);
629 } while ((fd == -1) && (errno == EINTR));
630 if (fd == -1) {
631 state->waiter_errno = errno;
632 kill(state->worker_pid, SIGKILL);
633 state->worker_pid = -1;
634 _exit(1);
636 event_pipe_waiter_fd = fd;
639 if (status_sp_waiter_fd > 1) {
640 do {
641 fd = dup2(status_sp_waiter_fd, 1);
642 } while ((fd == -1) && (errno == EINTR));
643 if (fd == -1) {
644 state->waiter_errno = errno;
645 kill(state->worker_pid, SIGKILL);
646 state->worker_pid = -1;
647 _exit(1);
649 status_sp_waiter_fd = fd;
652 closefrom(2);
654 /* Tell the caller we're ready */
655 nwritten = sys_write(status_sp_waiter_fd, &(char){0}, sizeof(char));
656 if (nwritten != sizeof(char)) {
657 _exit(1);
660 tfork_global_free();
661 state = NULL;
663 do {
664 ret = waitpid(pid, &status, 0);
665 } while ((ret == -1) && (errno == EINTR));
666 if (ret == -1) {
667 status = errno;
668 kill(pid, SIGKILL);
672 * This writes the worker child exit status via our internal socketpair
673 * so the tfork_status() implementation can read it from its end.
675 nwritten = sys_write(status_sp_waiter_fd, &status, sizeof(status));
676 if (nwritten == -1) {
677 if (errno != EPIPE && errno != ECONNRESET) {
678 _exit(errno);
681 * The caller exitted and didn't call tfork_status().
683 _exit(0);
685 if (nwritten != sizeof(status)) {
686 _exit(1);
690 * This write to the event_fd returned by tfork_event_fd() and notifies
691 * the caller that the worker child is done and he may now call
692 * tfork_status().
694 nwritten = sys_write(event_pipe_waiter_fd, &(char){0}, sizeof(char));
695 if (nwritten != sizeof(char)) {
696 _exit(1);
700 * Wait for our parent (the process that called tfork_create()) to
701 * close() the socketpair fd in tfork_status().
703 * Again, the caller might have exitted without calling tfork_status().
705 nread = sys_read(status_sp_waiter_fd, &c, 1);
706 if (nread == -1) {
707 if (errno == EPIPE || errno == ECONNRESET) {
708 _exit(0);
710 _exit(errno);
712 if (nread != 1) {
713 _exit(255);
716 _exit(0);
719 static int tfork_create_reap_waiter(pid_t waiter_pid)
721 pid_t pid;
722 int waiter_status;
724 if (waiter_pid == -1) {
725 return 0;
728 kill(waiter_pid, SIGKILL);
730 do {
731 pid = waitpid(waiter_pid, &waiter_status, 0);
732 } while ((pid == -1) && (errno == EINTR));
733 assert(pid == waiter_pid);
735 return 0;
738 struct tfork *tfork_create(void)
740 struct tfork_state *state = NULL;
741 struct tfork *t = NULL;
742 pid_t pid;
743 int saved_errno;
744 int ret = 0;
746 #ifdef HAVE_PTHREAD
747 ret = pthread_once(&tfork_global_is_initialized,
748 tfork_global_initialize);
749 if (ret != 0) {
750 return NULL;
752 #else
753 tfork_global_initialize();
754 #endif
756 state = tfork_global_get();
757 if (state == NULL) {
758 return NULL;
760 *state = (struct tfork_state) {
761 .waiter_pid = -1,
762 .waiter_errno = ECANCELED,
763 .worker_pid = -1,
766 t = malloc(sizeof(struct tfork));
767 if (t == NULL) {
768 ret = -1;
769 goto cleanup;
772 *t = (struct tfork) {
773 .event_fd = -1,
774 .status_fd = -1,
775 .waiter_pid = -1,
776 .worker_pid = -1,
779 ret = tfork_install_sigchld_handler(&state->waiter_pid);
780 if (ret != 0) {
781 goto cleanup;
784 pid = tfork_start_waiter_and_worker(state,
785 &t->event_fd,
786 &t->status_fd);
787 if (pid == -1) {
788 ret = -1;
789 goto cleanup;
791 if (pid == 0) {
792 /* In the worker */
793 tfork_global_free();
794 t->worker_pid = 0;
795 return t;
799 * In a threaded process there's no data race on t->waiter_pid as
800 * we're serializing globally via tfork_acquire_sighandling() and
801 * tfork_release_sighandling().
803 TFORK_ANNOTATE_BENIGN_RACE(&t->waiter_pid);
805 t->waiter_pid = pid;
806 t->worker_pid = state->worker_pid;
808 cleanup:
809 if (ret == -1) {
810 saved_errno = errno;
812 if (t != NULL) {
813 if (t->status_fd != -1) {
814 close(t->status_fd);
816 if (t->event_fd != -1) {
817 close(t->event_fd);
820 ret = tfork_create_reap_waiter(state->waiter_pid);
821 assert(ret == 0);
823 free(t);
824 t = NULL;
828 ret = tfork_uninstall_sigchld_handler();
829 assert(ret == 0);
831 tfork_global_free();
833 if (ret == -1) {
834 errno = saved_errno;
836 return t;
839 pid_t tfork_child_pid(const struct tfork *t)
841 return t->worker_pid;
844 int tfork_event_fd(struct tfork *t)
846 int fd = t->event_fd;
848 assert(t->event_fd != -1);
849 t->event_fd = -1;
851 return fd;
854 int tfork_status(struct tfork **_t, bool wait)
856 struct tfork *t = *_t;
857 int status;
858 ssize_t nread;
859 int waiter_status;
860 pid_t pid;
861 int ret;
863 if (t == NULL) {
864 return -1;
867 if (wait) {
868 set_blocking(t->status_fd, true);
870 nread = sys_read(t->status_fd, &status, sizeof(int));
871 } else {
872 set_blocking(t->status_fd, false);
874 nread = read(t->status_fd, &status, sizeof(int));
875 if ((nread == -1) &&
876 ((errno == EAGAIN) || (errno == EWOULDBLOCK) || errno == EINTR)) {
877 errno = EAGAIN;
878 return -1;
881 if (nread != sizeof(int)) {
882 return -1;
885 ret = tfork_install_sigchld_handler(&t->waiter_pid);
886 if (ret != 0) {
887 return -1;
891 * This triggers process exit in the waiter.
892 * We write to the fd as well as closing it, as any tforked sibling
893 * processes will also have the writable end of this socket open.
897 size_t nwritten;
898 nwritten = sys_write(t->status_fd, &(char){0}, sizeof(char));
899 if (nwritten != sizeof(char)) {
900 close(t->status_fd);
901 return -1;
904 close(t->status_fd);
906 do {
907 pid = waitpid(t->waiter_pid, &waiter_status, 0);
908 } while ((pid == -1) && (errno == EINTR));
909 assert(pid == t->waiter_pid);
911 if (t->event_fd != -1) {
912 close(t->event_fd);
913 t->event_fd = -1;
916 free(t);
917 t = NULL;
918 *_t = NULL;
920 ret = tfork_uninstall_sigchld_handler();
921 assert(ret == 0);
923 return status;
926 int tfork_destroy(struct tfork **_t)
928 struct tfork *t = *_t;
929 int ret;
931 if (t == NULL) {
932 errno = EINVAL;
933 return -1;
936 kill(t->worker_pid, SIGKILL);
938 ret = tfork_status(_t, true);
939 if (ret == -1) {
940 return -1;
943 return 0;