s3-prefork: add support for multiple listning file descriptors
[Samba/vl.git] / source3 / lib / server_prefork.c
blob53b6d5c9929db39f9b33f9eb661a98a77a5fd68c
1 /*
2 Unix SMB/CIFS implementation.
3 Common server globals
5 Copyright (C) Simo Sorce <idra@samba.org> 2011
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "system/time.h"
23 #include "system/shmem.h"
24 #include "system/filesys.h"
25 #include "server_prefork.h"
26 #include "../lib/util/util.h"
27 #include "../lib/util/tevent_unix.h"
29 struct prefork_pool {
31 int listen_fd_size;
32 int *listen_fds;
34 int lock_fd;
36 prefork_main_fn_t *main_fn;
37 void *private_data;
39 int pool_size;
40 struct pf_worker_data *pool;
42 int allowed_clients;
45 int prefork_pool_destructor(struct prefork_pool *pfp)
47 munmap(pfp->pool, pfp->pool_size * sizeof(struct pf_worker_data));
48 return 0;
51 bool prefork_create_pool(struct tevent_context *ev_ctx, TALLOC_CTX *mem_ctx,
52 int listen_fd_size, int *listen_fds,
53 int min_children, int max_children,
54 prefork_main_fn_t *main_fn, void *private_data,
55 struct prefork_pool **pf_pool)
57 struct prefork_pool *pfp;
58 pid_t pid;
59 time_t now = time(NULL);
60 size_t data_size;
61 int ret;
62 int i;
64 pfp = talloc(mem_ctx, struct prefork_pool);
65 if (!pfp) {
66 DEBUG(1, ("Out of memory!\n"));
67 return false;
69 pfp->listen_fd_size = listen_fd_size;
70 pfp->listen_fds = talloc_array(pfp, int, listen_fd_size);
71 if (!pfp->listen_fds) {
72 DEBUG(1, ("Out of memory!\n"));
73 return false;
75 for (i = 0; i < listen_fd_size; i++) {
76 pfp->listen_fds[i] = listen_fds[i];
78 pfp->main_fn = main_fn;
79 pfp->private_data = private_data;
81 pfp->lock_fd = create_unlink_tmp(NULL);
82 if (pfp->lock_fd == -1) {
83 DEBUG(1, ("Failed to create prefork lock fd!\n"));
84 talloc_free(pfp);
85 return false;
88 pfp->pool_size = max_children;
89 data_size = sizeof(struct pf_worker_data) * max_children;
91 pfp->pool = mmap(NULL, data_size, PROT_READ|PROT_WRITE,
92 MAP_SHARED|MAP_ANONYMOUS, -1, 0);
93 if (pfp->pool == MAP_FAILED) {
94 DEBUG(1, ("Failed to mmap memory for prefork pool!\n"));
95 talloc_free(pfp);
96 return false;
98 talloc_set_destructor(pfp, prefork_pool_destructor);
100 for (i = 0; i < min_children; i++) {
102 pfp->pool[i].allowed_clients = 1;
103 pfp->pool[i].started = now;
105 pid = sys_fork();
106 switch (pid) {
107 case -1:
108 DEBUG(1, ("Failed to prefork child n. %d !\n", i));
109 break;
111 case 0: /* THE CHILD */
113 pfp->pool[i].status = PF_WORKER_IDLE;
114 ret = pfp->main_fn(ev_ctx, &pfp->pool[i],
115 pfp->listen_fd_size,
116 pfp->listen_fds,
117 pfp->lock_fd,
118 pfp->private_data);
119 exit(ret);
121 default: /* THE PARENT */
122 pfp->pool[i].pid = pid;
123 break;
127 *pf_pool = pfp;
128 return true;
131 /* Provide the new max children number in new_max
132 * (must be larger than current max).
133 * Returns: 0 if all fine
134 * ENOSPC if mremap fails to expand
135 * EINVAL if new_max is invalid
137 int prefork_expand_pool(struct prefork_pool *pfp, int new_max)
139 struct pf_worker_data *pool;
140 size_t old_size;
141 size_t new_size;
143 if (new_max <= pfp->pool_size) {
144 return EINVAL;
147 old_size = sizeof(struct pf_worker_data) * pfp->pool_size;
148 new_size = sizeof(struct pf_worker_data) * new_max;
150 pool = mremap(pfp->pool, old_size, new_size, 0);
151 if (pool == MAP_FAILED) {
152 DEBUG(3, ("Failed to mremap memory for prefork pool!\n"));
153 return ENOSPC;
156 memset(&pool[pfp->pool_size], 0, new_size - old_size);
158 pfp->pool_size = new_max;
160 return 0;
163 int prefork_add_children(struct tevent_context *ev_ctx,
164 struct prefork_pool *pfp,
165 int num_children)
167 pid_t pid;
168 time_t now = time(NULL);
169 int ret;
170 int i, j;
172 for (i = 0, j = 0; i < pfp->pool_size && j < num_children; i++) {
174 if (pfp->pool[i].status != PF_WORKER_NONE) {
175 continue;
178 pfp->pool[i].allowed_clients = 1;
179 pfp->pool[i].started = now;
181 pid = sys_fork();
182 switch (pid) {
183 case -1:
184 DEBUG(1, ("Failed to prefork child n. %d !\n", j));
185 break;
187 case 0: /* THE CHILD */
189 pfp->pool[i].status = PF_WORKER_IDLE;
190 ret = pfp->main_fn(ev_ctx, &pfp->pool[i],
191 pfp->listen_fd_size,
192 pfp->listen_fds,
193 pfp->lock_fd,
194 pfp->private_data);
196 pfp->pool[i].status = PF_WORKER_EXITING;
197 exit(ret);
199 default: /* THE PARENT */
200 pfp->pool[i].pid = pid;
201 j++;
202 break;
206 DEBUG(5, ("Added %d children!\n", j));
208 return j;
211 struct prefork_oldest {
212 int num;
213 time_t started;
216 /* sort in inverse order */
217 static int prefork_sort_oldest(const void *ap, const void *bp)
219 struct prefork_oldest *a = (struct prefork_oldest *)ap;
220 struct prefork_oldest *b = (struct prefork_oldest *)bp;
222 if (a->started == b->started) {
223 return 0;
225 if (a->started < b->started) {
226 return 1;
228 return -1;
231 int prefork_retire_children(struct prefork_pool *pfp,
232 int num_children, time_t age_limit)
234 time_t now = time(NULL);
235 struct prefork_oldest *oldest;
236 int i, j;
238 oldest = talloc_array(pfp, struct prefork_oldest, pfp->pool_size);
239 if (!oldest) {
240 return -1;
243 for (i = 0; i < pfp->pool_size; i++) {
244 oldest[i].num = i;
245 if (pfp->pool[i].status == PF_WORKER_IDLE) {
246 oldest[i].started = pfp->pool[i].started;
247 } else {
248 oldest[i].started = now;
252 qsort(oldest, pfp->pool_size,
253 sizeof(struct prefork_oldest),
254 prefork_sort_oldest);
256 for (i = 0, j = 0; i < pfp->pool_size && j < num_children; i++) {
257 if (pfp->pool[i].status == PF_WORKER_IDLE &&
258 pfp->pool[i].started <= age_limit) {
259 /* tell the child it's time to give up */
260 DEBUG(5, ("Retiring pid %d!\n", pfp->pool[i].pid));
261 pfp->pool[i].cmds = PF_SRV_MSG_EXIT;
262 kill(pfp->pool[i].pid, SIGHUP);
263 j++;
267 return j;
270 int prefork_count_active_children(struct prefork_pool *pfp, int *total)
272 int i, a, t;
274 a = 0;
275 t = 0;
276 for (i = 0; i < pfp->pool_size; i++) {
277 if (pfp->pool[i].status == PF_WORKER_NONE) {
278 continue;
281 t++;
283 if (pfp->pool[i].num_clients == 0) {
284 continue;
287 a++;
290 *total = t;
291 return a;
294 /* to be used to finally mark a children as dead, so that it's slot can
295 * be reused */
296 bool prefork_mark_pid_dead(struct prefork_pool *pfp, pid_t pid)
298 int i;
300 for (i = 0; i < pfp->pool_size; i++) {
301 if (pfp->pool[i].pid == pid) {
302 if (pfp->pool[i].status != PF_WORKER_EXITING) {
303 DEBUG(2, ("pid %d terminated abnormally!\n",
304 (int)pid));
307 /* reset all fields,
308 * this makes status = PF_WORK_NONE */
309 memset(&pfp->pool[i], 0,
310 sizeof(struct pf_worker_data));
312 return true;
316 return false;
319 void prefork_increase_allowed_clients(struct prefork_pool *pfp, int max)
321 int i;
323 for (i = 0; i < pfp->pool_size; i++) {
324 if (pfp->pool[i].status == PF_WORKER_NONE) {
325 continue;
328 if (pfp->pool[i].allowed_clients < max) {
329 pfp->pool[i].allowed_clients++;
334 void prefork_reset_allowed_clients(struct prefork_pool *pfp)
336 int i;
338 for (i = 0; i < pfp->pool_size; i++) {
339 pfp->pool[i].allowed_clients = 1;
343 void prefork_send_signal_to_all(struct prefork_pool *pfp, int signal_num)
345 int i;
347 for (i = 0; i < pfp->pool_size; i++) {
348 if (pfp->pool[i].status == PF_WORKER_NONE) {
349 continue;
352 kill(pfp->pool[i].pid, signal_num);
357 /* ==== Functions used by children ==== */
359 static SIG_ATOMIC_T pf_alarm;
361 static void pf_alarm_cb(int signum)
363 pf_alarm = 1;
368 * Parameters:
369 * pf - the worker shared data structure
370 * lock_fd - the file descriptor used for locking
371 * timeout - expressed in seconds:
372 * -1 never timeouts,
373 * 0 timeouts immediately
374 * N seconds before timing out
376 * Returns values:
377 * negative errno on fatal error
378 * 0 on success to acquire lock
379 * -1 on timeout/lock held by other
380 * -2 on server msg to terminate
381 * ERRNO on other errors
384 static int prefork_grab_lock(struct pf_worker_data *pf,
385 int lock_fd, int timeout)
387 struct flock lock;
388 int op;
389 int ret;
391 if (pf->cmds == PF_SRV_MSG_EXIT) {
392 return -2;
395 pf_alarm = 0;
397 if (timeout > 0) {
398 CatchSignal(SIGALRM, pf_alarm_cb);
399 alarm(timeout);
402 if (timeout == 0) {
403 op = F_SETLK;
404 } else {
405 op = F_SETLKW;
408 ret = 0;
409 do {
410 ZERO_STRUCT(lock);
411 lock.l_type = F_WRLCK;
412 lock.l_whence = SEEK_SET;
414 ret = fcntl(lock_fd, op, &lock);
415 if (ret == 0) break;
417 ret = errno;
419 if (pf->cmds == PF_SRV_MSG_EXIT) {
420 ret = -2;
421 goto done;
424 switch (ret) {
425 case EINTR:
426 break;
428 case EACCES:
429 case EAGAIN:
430 /* lock held by other proc */
431 ret = -1;
432 goto done;
433 default:
434 goto done;
437 if (pf_alarm == 1) {
438 /* timed out */
439 ret = -1;
440 goto done;
442 } while (timeout != 0);
444 if (ret != 0) {
445 /* We have the Lock */
446 pf->status = PF_WORKER_ACCEPTING;
449 done:
450 if (timeout > 0) {
451 alarm(0);
452 CatchSignal(SIGALRM, SIG_IGN);
455 if (ret > 0) {
456 DEBUG(1, ("Failed to get lock (%d, %s)!\n",
457 ret, strerror(ret)));
459 return ret;
463 * Parameters:
464 * pf - the worker shared data structure
465 * lock_fd - the file descriptor used for locking
466 * timeout - expressed in seconds:
467 * -1 never timeouts,
468 * 0 timeouts immediately
469 * N seconds before timing out
471 * Returns values:
472 * negative errno on fatal error
473 * 0 on success to release lock
474 * -1 on timeout
475 * ERRNO on error
478 static int prefork_release_lock(struct pf_worker_data *pf,
479 int lock_fd, int timeout)
481 struct flock lock;
482 int op;
483 int ret;
485 pf_alarm = 0;
487 if (timeout > 0) {
488 CatchSignal(SIGALRM, pf_alarm_cb);
489 alarm(timeout);
492 if (timeout == 0) {
493 op = F_SETLK;
494 } else {
495 op = F_SETLKW;
498 do {
499 ZERO_STRUCT(lock);
500 lock.l_type = F_UNLCK;
501 lock.l_whence = SEEK_SET;
503 ret = fcntl(lock_fd, op, &lock);
504 if (ret == 0) break;
506 ret = errno;
508 if (ret != EINTR) {
509 goto done;
512 if (pf_alarm == 1) {
513 /* timed out */
514 ret = -1;
515 goto done;
517 } while (timeout != 0);
519 done:
520 if (timeout > 0) {
521 alarm(0);
522 CatchSignal(SIGALRM, SIG_IGN);
525 if (ret > 0) {
526 DEBUG(1, ("Failed to release lock (%d, %s)!\n",
527 ret, strerror(ret)));
529 return ret;
532 /* ==== async code ==== */
534 #define PF_ASYNC_LOCK_GRAB 0x01
535 #define PF_ASYNC_LOCK_RELEASE 0x02
536 #define PF_ASYNC_ACTION_MASK 0x03
537 #define PF_ASYNC_LOCK_DONE 0x04
539 struct pf_lock_state {
540 struct pf_worker_data *pf;
541 int lock_fd;
542 int flags;
545 static void prefork_lock_handler(struct tevent_context *ev,
546 struct tevent_timer *te,
547 struct timeval curtime, void *pvt);
549 static struct tevent_req *prefork_lock_send(TALLOC_CTX *mem_ctx,
550 struct tevent_context *ev,
551 struct pf_worker_data *pf,
552 int lock_fd, int action)
554 struct tevent_req *req;
555 struct pf_lock_state *state;
557 req = tevent_req_create(mem_ctx, &state, struct pf_lock_state);
558 if (!req) {
559 return NULL;
562 state->pf = pf;
563 state->lock_fd = lock_fd;
564 state->flags = action;
566 /* try once immediately */
567 prefork_lock_handler(ev, NULL, tevent_timeval_zero(), req);
568 if (state->flags & PF_ASYNC_LOCK_DONE) {
569 tevent_req_post(req, ev);
572 return req;
575 static void prefork_lock_handler(struct tevent_context *ev,
576 struct tevent_timer *te,
577 struct timeval curtime, void *pvt)
579 struct tevent_req *req;
580 struct pf_lock_state *state;
581 int ret;
583 req = talloc_get_type_abort(pvt, struct tevent_req);
584 state = tevent_req_data(req, struct pf_lock_state);
586 switch (state->flags & PF_ASYNC_ACTION_MASK) {
587 case PF_ASYNC_LOCK_GRAB:
588 ret = prefork_grab_lock(state->pf, state->lock_fd, 0);
589 break;
590 case PF_ASYNC_LOCK_RELEASE:
591 ret = prefork_release_lock(state->pf, state->lock_fd, 0);
592 break;
593 default:
594 ret = EINVAL;
595 break;
598 switch (ret) {
599 case 0:
600 state->flags |= PF_ASYNC_LOCK_DONE;
601 tevent_req_done(req);
602 return;
603 case -1:
604 te = tevent_add_timer(ev, state,
605 tevent_timeval_current_ofs(1, 0),
606 prefork_lock_handler, req);
607 tevent_req_nomem(te, req);
608 return;
609 case -2:
610 /* server tells us to stop */
611 state->flags |= PF_ASYNC_LOCK_DONE;
612 tevent_req_error(req, -2);
613 return;
614 default:
615 state->flags |= PF_ASYNC_LOCK_DONE;
616 tevent_req_error(req, ret);
617 return;
621 static int prefork_lock_recv(struct tevent_req *req)
623 int ret;
625 if (!tevent_req_is_unix_error(req, &ret)) {
626 ret = 0;
629 tevent_req_received(req);
630 return ret;
633 struct pf_listen_state {
634 struct tevent_context *ev;
635 struct pf_worker_data *pf;
637 int listen_fd_size;
638 int *listen_fds;
640 int lock_fd;
642 struct sockaddr *addr;
643 socklen_t *addrlen;
645 int accept_fd;
647 int error;
650 static void prefork_listen_lock_done(struct tevent_req *subreq);
651 static void prefork_listen_accept_handler(struct tevent_context *ev,
652 struct tevent_fd *fde,
653 uint16_t flags, void *pvt);
654 static void prefork_listen_release_done(struct tevent_req *subreq);
656 struct tevent_req *prefork_listen_send(TALLOC_CTX *mem_ctx,
657 struct tevent_context *ev,
658 struct pf_worker_data *pf,
659 int listen_fd_size,
660 int *listen_fds,
661 int lock_fd,
662 struct sockaddr *addr,
663 socklen_t *addrlen)
665 struct tevent_req *req, *subreq;
666 struct pf_listen_state *state;
668 req = tevent_req_create(mem_ctx, &state, struct pf_listen_state);
669 if (!req) {
670 return NULL;
673 state->ev = ev;
674 state->pf = pf;
675 state->lock_fd = lock_fd;
676 state->listen_fd_size = listen_fd_size;
677 state->listen_fds = listen_fds;
678 state->addr = addr;
679 state->addrlen = addrlen;
680 state->accept_fd = -1;
681 state->error = 0;
683 subreq = prefork_lock_send(state, state->ev, state->pf,
684 state->lock_fd, PF_ASYNC_LOCK_GRAB);
685 if (tevent_req_nomem(subreq, req)) {
686 return tevent_req_post(req, ev);
689 tevent_req_set_callback(subreq, prefork_listen_lock_done, req);
690 return req;
693 struct pf_listen_ctx {
694 TALLOC_CTX *fde_ctx;
695 struct tevent_req *req;
696 int listen_fd;
699 static void prefork_listen_lock_done(struct tevent_req *subreq)
701 struct tevent_req *req;
702 struct pf_listen_state *state;
703 struct pf_listen_ctx *ctx;
704 struct tevent_fd *fde;
705 TALLOC_CTX *fde_ctx;
706 int ret;
707 int i;
709 req = tevent_req_callback_data(subreq, struct tevent_req);
710 state = tevent_req_data(req, struct pf_listen_state);
712 ret = prefork_lock_recv(subreq);
713 if (ret != 0) {
714 tevent_req_error(req, ret);
715 return;
718 fde_ctx = talloc_new(state);
719 if (tevent_req_nomem(fde_ctx, req)) {
720 return;
723 /* next step, accept */
724 for (i = 0; i < state->listen_fd_size; i++) {
725 ctx = talloc(fde_ctx, struct pf_listen_ctx);
726 if (tevent_req_nomem(ctx, req)) {
727 return;
729 ctx->fde_ctx = fde_ctx;
730 ctx->req = req;
731 ctx->listen_fd = state->listen_fds[i];
733 fde = tevent_add_fd(state->ev, fde_ctx,
734 ctx->listen_fd, TEVENT_FD_READ,
735 prefork_listen_accept_handler, ctx);
736 if (tevent_req_nomem(fde, req)) {
737 return;
742 static void prefork_listen_accept_handler(struct tevent_context *ev,
743 struct tevent_fd *fde,
744 uint16_t flags, void *pvt)
746 struct pf_listen_state *state;
747 struct tevent_req *req, *subreq;
748 struct pf_listen_ctx *ctx;
749 int err = 0;
750 int sd = -1;
752 ctx = talloc_get_type_abort(pvt, struct pf_listen_ctx);
753 state = tevent_req_data(ctx->req, struct pf_listen_state);
755 sd = accept(ctx->listen_fd, state->addr, state->addrlen);
756 if (sd == -1) {
757 if (errno == EINTR) {
758 /* keep trying */
759 return;
761 err = errno;
762 DEBUG(6, ("Accept failed! (%d, %s)\n", err, strerror(err)));
766 /* do not track the listen fds anymore */
767 req = ctx->req;
768 talloc_free(ctx->fde_ctx);
769 ctx = NULL;
770 if (err) {
771 tevent_req_error(req, err);
772 return;
775 state->accept_fd = sd;
777 /* release lock now */
778 subreq = prefork_lock_send(state, state->ev, state->pf,
779 state->lock_fd, PF_ASYNC_LOCK_RELEASE);
780 if (tevent_req_nomem(subreq, req)) {
781 return;
783 tevent_req_set_callback(subreq, prefork_listen_release_done, req);
786 static void prefork_listen_release_done(struct tevent_req *subreq)
788 struct tevent_req *req;
789 int ret;
791 req = tevent_req_callback_data(subreq, struct tevent_req);
793 ret = prefork_lock_recv(subreq);
794 if (ret != 0) {
795 tevent_req_error(req, ret);
796 return;
799 tevent_req_done(req);
802 int prefork_listen_recv(struct tevent_req *req, int *fd)
804 struct pf_listen_state *state;
805 int ret;
807 state = tevent_req_data(req, struct pf_listen_state);
809 if (tevent_req_is_unix_error(req, &ret)) {
810 if (state->accept_fd != -1) {
811 close(state->accept_fd);
813 } else {
814 *fd = state->accept_fd;
815 ret = 0;
816 state->pf->status = PF_WORKER_BUSY;
817 state->pf->num_clients++;
820 tevent_req_received(req);
821 return ret;