s3-prefork: Set up a SIGCHLD handler by default
[Samba.git] / source3 / lib / server_prefork.c
blob26288f70fd4e42c6d08783703251d98c665c71de
1 /*
2 Unix SMB/CIFS implementation.
3 Common server globals
5 Copyright (C) Simo Sorce <idra@samba.org> 2011
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "system/time.h"
23 #include "system/shmem.h"
24 #include "system/filesys.h"
25 #include "server_prefork.h"
26 #include "../lib/util/util.h"
27 #include "../lib/util/tevent_unix.h"
29 struct prefork_pool {
31 int listen_fd_size;
32 int *listen_fds;
34 int lock_fd;
36 prefork_main_fn_t *main_fn;
37 void *private_data;
39 int pool_size;
40 struct pf_worker_data *pool;
42 int allowed_clients;
44 prefork_sigchld_fn_t *sigchld_fn;
45 void *sigchld_data;
48 static bool prefork_setup_sigchld_handler(struct tevent_context *ev_ctx,
49 struct prefork_pool *pfp);
51 static int prefork_pool_destructor(struct prefork_pool *pfp)
53 munmap(pfp->pool, pfp->pool_size * sizeof(struct pf_worker_data));
54 return 0;
57 bool prefork_create_pool(struct tevent_context *ev_ctx, TALLOC_CTX *mem_ctx,
58 int listen_fd_size, int *listen_fds,
59 int min_children, int max_children,
60 prefork_main_fn_t *main_fn, void *private_data,
61 struct prefork_pool **pf_pool)
63 struct prefork_pool *pfp;
64 pid_t pid;
65 time_t now = time(NULL);
66 size_t data_size;
67 int ret;
68 int i;
69 bool ok;
71 pfp = talloc_zero(mem_ctx, struct prefork_pool);
72 if (!pfp) {
73 DEBUG(1, ("Out of memory!\n"));
74 return false;
76 pfp->listen_fd_size = listen_fd_size;
77 pfp->listen_fds = talloc_array(pfp, int, listen_fd_size);
78 if (!pfp->listen_fds) {
79 DEBUG(1, ("Out of memory!\n"));
80 return false;
82 for (i = 0; i < listen_fd_size; i++) {
83 pfp->listen_fds[i] = listen_fds[i];
85 pfp->main_fn = main_fn;
86 pfp->private_data = private_data;
88 pfp->lock_fd = create_unlink_tmp(NULL);
89 if (pfp->lock_fd == -1) {
90 DEBUG(1, ("Failed to create prefork lock fd!\n"));
91 talloc_free(pfp);
92 return false;
95 pfp->pool_size = max_children;
96 data_size = sizeof(struct pf_worker_data) * max_children;
98 pfp->pool = mmap(NULL, data_size, PROT_READ|PROT_WRITE,
99 MAP_SHARED|MAP_ANONYMOUS, -1, 0);
100 if (pfp->pool == MAP_FAILED) {
101 DEBUG(1, ("Failed to mmap memory for prefork pool!\n"));
102 talloc_free(pfp);
103 return false;
105 talloc_set_destructor(pfp, prefork_pool_destructor);
107 for (i = 0; i < min_children; i++) {
109 pfp->pool[i].allowed_clients = 1;
110 pfp->pool[i].started = now;
112 pid = sys_fork();
113 switch (pid) {
114 case -1:
115 DEBUG(1, ("Failed to prefork child n. %d !\n", i));
116 break;
118 case 0: /* THE CHILD */
120 pfp->pool[i].status = PF_WORKER_IDLE;
121 ret = pfp->main_fn(ev_ctx, &pfp->pool[i],
122 pfp->listen_fd_size,
123 pfp->listen_fds,
124 pfp->lock_fd,
125 pfp->private_data);
126 exit(ret);
128 default: /* THE PARENT */
129 pfp->pool[i].pid = pid;
130 break;
134 ok = prefork_setup_sigchld_handler(ev_ctx, pfp);
135 if (!ok) {
136 DEBUG(1, ("Failed to setup SIGCHLD Handler!\n"));
137 talloc_free(pfp);
138 return false;
141 *pf_pool = pfp;
142 return true;
145 /* Provide the new max children number in new_max
146 * (must be larger than current max).
147 * Returns: 0 if all fine
148 * ENOSPC if mremap fails to expand
149 * EINVAL if new_max is invalid
151 int prefork_expand_pool(struct prefork_pool *pfp, int new_max)
153 struct pf_worker_data *pool;
154 size_t old_size;
155 size_t new_size;
157 if (new_max <= pfp->pool_size) {
158 return EINVAL;
161 old_size = sizeof(struct pf_worker_data) * pfp->pool_size;
162 new_size = sizeof(struct pf_worker_data) * new_max;
164 pool = mremap(pfp->pool, old_size, new_size, 0);
165 if (pool == MAP_FAILED) {
166 DEBUG(3, ("Failed to mremap memory for prefork pool!\n"));
167 return ENOSPC;
170 memset(&pool[pfp->pool_size], 0, new_size - old_size);
172 pfp->pool_size = new_max;
174 return 0;
177 int prefork_add_children(struct tevent_context *ev_ctx,
178 struct prefork_pool *pfp,
179 int num_children)
181 pid_t pid;
182 time_t now = time(NULL);
183 int ret;
184 int i, j;
186 for (i = 0, j = 0; i < pfp->pool_size && j < num_children; i++) {
188 if (pfp->pool[i].status != PF_WORKER_NONE) {
189 continue;
192 pfp->pool[i].allowed_clients = 1;
193 pfp->pool[i].started = now;
195 pid = sys_fork();
196 switch (pid) {
197 case -1:
198 DEBUG(1, ("Failed to prefork child n. %d !\n", j));
199 break;
201 case 0: /* THE CHILD */
203 pfp->pool[i].status = PF_WORKER_IDLE;
204 ret = pfp->main_fn(ev_ctx, &pfp->pool[i],
205 pfp->listen_fd_size,
206 pfp->listen_fds,
207 pfp->lock_fd,
208 pfp->private_data);
210 pfp->pool[i].status = PF_WORKER_EXITING;
211 exit(ret);
213 default: /* THE PARENT */
214 pfp->pool[i].pid = pid;
215 j++;
216 break;
220 DEBUG(5, ("Added %d children!\n", j));
222 return j;
225 struct prefork_oldest {
226 int num;
227 time_t started;
230 /* sort in inverse order */
231 static int prefork_sort_oldest(const void *ap, const void *bp)
233 struct prefork_oldest *a = (struct prefork_oldest *)ap;
234 struct prefork_oldest *b = (struct prefork_oldest *)bp;
236 if (a->started == b->started) {
237 return 0;
239 if (a->started < b->started) {
240 return 1;
242 return -1;
245 int prefork_retire_children(struct prefork_pool *pfp,
246 int num_children, time_t age_limit)
248 time_t now = time(NULL);
249 struct prefork_oldest *oldest;
250 int i, j;
252 oldest = talloc_array(pfp, struct prefork_oldest, pfp->pool_size);
253 if (!oldest) {
254 return -1;
257 for (i = 0; i < pfp->pool_size; i++) {
258 oldest[i].num = i;
259 if (pfp->pool[i].status == PF_WORKER_IDLE) {
260 oldest[i].started = pfp->pool[i].started;
261 } else {
262 oldest[i].started = now;
266 qsort(oldest, pfp->pool_size,
267 sizeof(struct prefork_oldest),
268 prefork_sort_oldest);
270 for (i = 0, j = 0; i < pfp->pool_size && j < num_children; i++) {
271 if (pfp->pool[i].status == PF_WORKER_IDLE &&
272 pfp->pool[i].started <= age_limit) {
273 /* tell the child it's time to give up */
274 DEBUG(5, ("Retiring pid %d!\n", pfp->pool[i].pid));
275 pfp->pool[i].cmds = PF_SRV_MSG_EXIT;
276 kill(pfp->pool[i].pid, SIGHUP);
277 j++;
281 return j;
284 int prefork_count_active_children(struct prefork_pool *pfp, int *total)
286 int i, a, t;
288 a = 0;
289 t = 0;
290 for (i = 0; i < pfp->pool_size; i++) {
291 if (pfp->pool[i].status == PF_WORKER_NONE) {
292 continue;
295 t++;
297 if (pfp->pool[i].num_clients == 0) {
298 continue;
301 a++;
304 *total = t;
305 return a;
308 static void prefork_cleanup_loop(struct prefork_pool *pfp)
310 int status;
311 pid_t pid;
312 int i;
314 /* TODO: should we use a process group id wait instead of looping ? */
315 for (i = 0; i < pfp->pool_size; i++) {
316 if (pfp->pool[i].status == PF_WORKER_NONE ||
317 pfp->pool[i].pid == 0) {
318 continue;
321 pid = sys_waitpid(pfp->pool[i].pid, &status, WNOHANG);
322 if (pid > 0) {
324 if (pfp->pool[i].status != PF_WORKER_EXITING) {
325 DEBUG(3, ("Child (%d) terminated abnormally:"
326 " %d\n", (int)pid, status));
327 } else {
328 DEBUG(10, ("Child (%d) terminated with status:"
329 " %d\n", (int)pid, status));
332 /* reset all fields,
333 * this makes status = PF_WORK_NONE */
334 memset(&pfp->pool[i], 0,
335 sizeof(struct pf_worker_data));
341 void prefork_increase_allowed_clients(struct prefork_pool *pfp, int max)
343 int i;
345 for (i = 0; i < pfp->pool_size; i++) {
346 if (pfp->pool[i].status == PF_WORKER_NONE) {
347 continue;
350 if (pfp->pool[i].allowed_clients < max) {
351 pfp->pool[i].allowed_clients++;
356 void prefork_reset_allowed_clients(struct prefork_pool *pfp)
358 int i;
360 for (i = 0; i < pfp->pool_size; i++) {
361 pfp->pool[i].allowed_clients = 1;
365 void prefork_send_signal_to_all(struct prefork_pool *pfp, int signal_num)
367 int i;
369 for (i = 0; i < pfp->pool_size; i++) {
370 if (pfp->pool[i].status == PF_WORKER_NONE) {
371 continue;
374 kill(pfp->pool[i].pid, signal_num);
378 static void prefork_sigchld_handler(struct tevent_context *ev_ctx,
379 struct tevent_signal *se,
380 int signum, int count,
381 void *siginfo, void *pvt)
383 struct prefork_pool *pfp;
385 pfp = talloc_get_type_abort(pvt, struct prefork_pool);
387 /* run the cleanup function to make sure all dead children are
388 * properly and timely retired. */
389 prefork_cleanup_loop(pfp);
391 if (pfp->sigchld_fn) {
392 pfp->sigchld_fn(ev_ctx, pfp, pfp->sigchld_data);
396 static bool prefork_setup_sigchld_handler(struct tevent_context *ev_ctx,
397 struct prefork_pool *pfp)
399 struct tevent_signal *se;
401 se = tevent_add_signal(ev_ctx, pfp, SIGCHLD, 0,
402 prefork_sigchld_handler, pfp);
403 if (!se) {
404 DEBUG(0, ("Failed to setup SIGCHLD handler!\n"));
405 return false;
408 return true;
411 void prefork_set_sigchld_callback(struct prefork_pool *pfp,
412 prefork_sigchld_fn_t *sigchld_fn,
413 void *private_data)
415 pfp->sigchld_fn = sigchld_fn;
416 pfp->sigchld_data = private_data;
419 /* ==== Functions used by children ==== */
421 static SIG_ATOMIC_T pf_alarm;
423 static void pf_alarm_cb(int signum)
425 pf_alarm = 1;
430 * Parameters:
431 * pf - the worker shared data structure
432 * lock_fd - the file descriptor used for locking
433 * timeout - expressed in seconds:
434 * -1 never timeouts,
435 * 0 timeouts immediately
436 * N seconds before timing out
438 * Returns values:
439 * negative errno on fatal error
440 * 0 on success to acquire lock
441 * -1 on timeout/lock held by other
442 * -2 on server msg to terminate
443 * ERRNO on other errors
446 static int prefork_grab_lock(struct pf_worker_data *pf,
447 int lock_fd, int timeout)
449 struct flock lock;
450 int op;
451 int ret;
453 if (pf->cmds == PF_SRV_MSG_EXIT) {
454 return -2;
457 pf_alarm = 0;
459 if (timeout > 0) {
460 CatchSignal(SIGALRM, pf_alarm_cb);
461 alarm(timeout);
464 if (timeout == 0) {
465 op = F_SETLK;
466 } else {
467 op = F_SETLKW;
470 ret = 0;
471 do {
472 ZERO_STRUCT(lock);
473 lock.l_type = F_WRLCK;
474 lock.l_whence = SEEK_SET;
476 ret = fcntl(lock_fd, op, &lock);
477 if (ret == 0) break;
479 ret = errno;
481 if (pf->cmds == PF_SRV_MSG_EXIT) {
482 ret = -2;
483 goto done;
486 switch (ret) {
487 case EINTR:
488 break;
490 case EACCES:
491 case EAGAIN:
492 /* lock held by other proc */
493 ret = -1;
494 goto done;
495 default:
496 goto done;
499 if (pf_alarm == 1) {
500 /* timed out */
501 ret = -1;
502 goto done;
504 } while (timeout != 0);
506 if (ret != 0) {
507 /* We have the Lock */
508 pf->status = PF_WORKER_ACCEPTING;
511 done:
512 if (timeout > 0) {
513 alarm(0);
514 CatchSignal(SIGALRM, SIG_IGN);
517 if (ret > 0) {
518 DEBUG(1, ("Failed to get lock (%d, %s)!\n",
519 ret, strerror(ret)));
521 return ret;
525 * Parameters:
526 * pf - the worker shared data structure
527 * lock_fd - the file descriptor used for locking
528 * timeout - expressed in seconds:
529 * -1 never timeouts,
530 * 0 timeouts immediately
531 * N seconds before timing out
533 * Returns values:
534 * negative errno on fatal error
535 * 0 on success to release lock
536 * -1 on timeout
537 * ERRNO on error
540 static int prefork_release_lock(struct pf_worker_data *pf,
541 int lock_fd, int timeout)
543 struct flock lock;
544 int op;
545 int ret;
547 pf_alarm = 0;
549 if (timeout > 0) {
550 CatchSignal(SIGALRM, pf_alarm_cb);
551 alarm(timeout);
554 if (timeout == 0) {
555 op = F_SETLK;
556 } else {
557 op = F_SETLKW;
560 do {
561 ZERO_STRUCT(lock);
562 lock.l_type = F_UNLCK;
563 lock.l_whence = SEEK_SET;
565 ret = fcntl(lock_fd, op, &lock);
566 if (ret == 0) break;
568 ret = errno;
570 if (ret != EINTR) {
571 goto done;
574 if (pf_alarm == 1) {
575 /* timed out */
576 ret = -1;
577 goto done;
579 } while (timeout != 0);
581 done:
582 if (timeout > 0) {
583 alarm(0);
584 CatchSignal(SIGALRM, SIG_IGN);
587 if (ret > 0) {
588 DEBUG(1, ("Failed to release lock (%d, %s)!\n",
589 ret, strerror(ret)));
591 return ret;
594 /* ==== async code ==== */
596 #define PF_ASYNC_LOCK_GRAB 0x01
597 #define PF_ASYNC_LOCK_RELEASE 0x02
598 #define PF_ASYNC_ACTION_MASK 0x03
599 #define PF_ASYNC_LOCK_DONE 0x04
601 struct pf_lock_state {
602 struct pf_worker_data *pf;
603 int lock_fd;
604 int flags;
607 static void prefork_lock_handler(struct tevent_context *ev,
608 struct tevent_timer *te,
609 struct timeval curtime, void *pvt);
611 static struct tevent_req *prefork_lock_send(TALLOC_CTX *mem_ctx,
612 struct tevent_context *ev,
613 struct pf_worker_data *pf,
614 int lock_fd, int action)
616 struct tevent_req *req;
617 struct pf_lock_state *state;
619 req = tevent_req_create(mem_ctx, &state, struct pf_lock_state);
620 if (!req) {
621 return NULL;
624 state->pf = pf;
625 state->lock_fd = lock_fd;
626 state->flags = action;
628 /* try once immediately */
629 prefork_lock_handler(ev, NULL, tevent_timeval_zero(), req);
630 if (state->flags & PF_ASYNC_LOCK_DONE) {
631 tevent_req_post(req, ev);
634 return req;
637 static void prefork_lock_handler(struct tevent_context *ev,
638 struct tevent_timer *te,
639 struct timeval curtime, void *pvt)
641 struct tevent_req *req;
642 struct pf_lock_state *state;
643 struct timeval tv;
644 int timeout = 0;
645 int ret;
647 req = talloc_get_type_abort(pvt, struct tevent_req);
648 state = tevent_req_data(req, struct pf_lock_state);
650 if (state->pf->num_clients > 0) {
651 timeout = 1;
654 switch (state->flags & PF_ASYNC_ACTION_MASK) {
655 case PF_ASYNC_LOCK_GRAB:
656 ret = prefork_grab_lock(state->pf, state->lock_fd, timeout);
657 break;
658 case PF_ASYNC_LOCK_RELEASE:
659 ret = prefork_release_lock(state->pf, state->lock_fd, timeout);
660 break;
661 default:
662 ret = EINVAL;
663 break;
666 switch (ret) {
667 case 0:
668 state->flags |= PF_ASYNC_LOCK_DONE;
669 tevent_req_done(req);
670 return;
671 case -1:
672 if (timeout) {
673 tv = tevent_timeval_zero();
674 } else {
675 tv = tevent_timeval_current_ofs(0, 100000);
677 te = tevent_add_timer(ev, state, tv,
678 prefork_lock_handler, req);
679 tevent_req_nomem(te, req);
680 return;
681 case -2:
682 /* server tells us to stop */
683 state->flags |= PF_ASYNC_LOCK_DONE;
684 tevent_req_error(req, -2);
685 return;
686 default:
687 state->flags |= PF_ASYNC_LOCK_DONE;
688 tevent_req_error(req, ret);
689 return;
693 static int prefork_lock_recv(struct tevent_req *req)
695 int ret;
697 if (!tevent_req_is_unix_error(req, &ret)) {
698 ret = 0;
701 tevent_req_received(req);
702 return ret;
705 struct pf_listen_state {
706 struct tevent_context *ev;
707 struct pf_worker_data *pf;
709 int listen_fd_size;
710 int *listen_fds;
712 int lock_fd;
714 struct sockaddr *addr;
715 socklen_t *addrlen;
717 int accept_fd;
719 int error;
722 static void prefork_listen_lock_done(struct tevent_req *subreq);
723 static void prefork_listen_accept_handler(struct tevent_context *ev,
724 struct tevent_fd *fde,
725 uint16_t flags, void *pvt);
726 static void prefork_listen_release_done(struct tevent_req *subreq);
728 struct tevent_req *prefork_listen_send(TALLOC_CTX *mem_ctx,
729 struct tevent_context *ev,
730 struct pf_worker_data *pf,
731 int listen_fd_size,
732 int *listen_fds,
733 int lock_fd,
734 struct sockaddr *addr,
735 socklen_t *addrlen)
737 struct tevent_req *req, *subreq;
738 struct pf_listen_state *state;
740 req = tevent_req_create(mem_ctx, &state, struct pf_listen_state);
741 if (!req) {
742 return NULL;
745 state->ev = ev;
746 state->pf = pf;
747 state->lock_fd = lock_fd;
748 state->listen_fd_size = listen_fd_size;
749 state->listen_fds = listen_fds;
750 state->addr = addr;
751 state->addrlen = addrlen;
752 state->accept_fd = -1;
753 state->error = 0;
755 subreq = prefork_lock_send(state, state->ev, state->pf,
756 state->lock_fd, PF_ASYNC_LOCK_GRAB);
757 if (tevent_req_nomem(subreq, req)) {
758 return tevent_req_post(req, ev);
761 tevent_req_set_callback(subreq, prefork_listen_lock_done, req);
762 return req;
765 struct pf_listen_ctx {
766 TALLOC_CTX *fde_ctx;
767 struct tevent_req *req;
768 int listen_fd;
771 static void prefork_listen_lock_done(struct tevent_req *subreq)
773 struct tevent_req *req;
774 struct pf_listen_state *state;
775 struct pf_listen_ctx *ctx;
776 struct tevent_fd *fde;
777 TALLOC_CTX *fde_ctx;
778 int ret;
779 int i;
781 req = tevent_req_callback_data(subreq, struct tevent_req);
782 state = tevent_req_data(req, struct pf_listen_state);
784 ret = prefork_lock_recv(subreq);
785 if (ret != 0) {
786 tevent_req_error(req, ret);
787 return;
790 fde_ctx = talloc_new(state);
791 if (tevent_req_nomem(fde_ctx, req)) {
792 return;
795 /* next step, accept */
796 for (i = 0; i < state->listen_fd_size; i++) {
797 ctx = talloc(fde_ctx, struct pf_listen_ctx);
798 if (tevent_req_nomem(ctx, req)) {
799 return;
801 ctx->fde_ctx = fde_ctx;
802 ctx->req = req;
803 ctx->listen_fd = state->listen_fds[i];
805 fde = tevent_add_fd(state->ev, fde_ctx,
806 ctx->listen_fd, TEVENT_FD_READ,
807 prefork_listen_accept_handler, ctx);
808 if (tevent_req_nomem(fde, req)) {
809 return;
814 static void prefork_listen_accept_handler(struct tevent_context *ev,
815 struct tevent_fd *fde,
816 uint16_t flags, void *pvt)
818 struct pf_listen_state *state;
819 struct tevent_req *req, *subreq;
820 struct pf_listen_ctx *ctx;
821 int err = 0;
822 int sd = -1;
824 ctx = talloc_get_type_abort(pvt, struct pf_listen_ctx);
825 state = tevent_req_data(ctx->req, struct pf_listen_state);
827 sd = accept(ctx->listen_fd, state->addr, state->addrlen);
828 if (sd == -1) {
829 if (errno == EINTR) {
830 /* keep trying */
831 return;
833 err = errno;
834 DEBUG(6, ("Accept failed! (%d, %s)\n", err, strerror(err)));
838 /* do not track the listen fds anymore */
839 req = ctx->req;
840 talloc_free(ctx->fde_ctx);
841 ctx = NULL;
842 if (err) {
843 tevent_req_error(req, err);
844 return;
847 state->accept_fd = sd;
849 /* release lock now */
850 subreq = prefork_lock_send(state, state->ev, state->pf,
851 state->lock_fd, PF_ASYNC_LOCK_RELEASE);
852 if (tevent_req_nomem(subreq, req)) {
853 return;
855 tevent_req_set_callback(subreq, prefork_listen_release_done, req);
858 static void prefork_listen_release_done(struct tevent_req *subreq)
860 struct tevent_req *req;
861 int ret;
863 req = tevent_req_callback_data(subreq, struct tevent_req);
865 ret = prefork_lock_recv(subreq);
866 if (ret != 0) {
867 tevent_req_error(req, ret);
868 return;
871 tevent_req_done(req);
874 int prefork_listen_recv(struct tevent_req *req, int *fd)
876 struct pf_listen_state *state;
877 int ret;
879 state = tevent_req_data(req, struct pf_listen_state);
881 if (tevent_req_is_unix_error(req, &ret)) {
882 if (state->accept_fd != -1) {
883 close(state->accept_fd);
885 } else {
886 *fd = state->accept_fd;
887 ret = 0;
888 state->pf->status = PF_WORKER_BUSY;
889 state->pf->num_clients++;
892 tevent_req_received(req);
893 return ret;