s3-prefork: Improve heuristics
[Samba.git] / source3 / lib / server_prefork.c
blobd63e6a1673874690d83affe3c3a72ebe09a05ff6
1 /*
2 Unix SMB/CIFS implementation.
3 Common server globals
5 Copyright (C) Simo Sorce <idra@samba.org> 2011
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "system/time.h"
23 #include "system/shmem.h"
24 #include "system/filesys.h"
25 #include "server_prefork.h"
26 #include "../lib/util/util.h"
27 #include "../lib/util/tevent_unix.h"
29 struct prefork_pool {
31 int listen_fd_size;
32 int *listen_fds;
34 prefork_main_fn_t *main_fn;
35 void *private_data;
37 int pool_size;
38 struct pf_worker_data *pool;
40 int allowed_clients;
42 prefork_sigchld_fn_t *sigchld_fn;
43 void *sigchld_data;
46 static bool prefork_setup_sigchld_handler(struct tevent_context *ev_ctx,
47 struct prefork_pool *pfp);
49 static int prefork_pool_destructor(struct prefork_pool *pfp)
51 anonymous_shared_free(pfp->pool);
52 return 0;
55 bool prefork_create_pool(TALLOC_CTX *mem_ctx,
56 struct tevent_context *ev_ctx,
57 struct messaging_context *msg_ctx,
58 int listen_fd_size, int *listen_fds,
59 int min_children, int max_children,
60 prefork_main_fn_t *main_fn, void *private_data,
61 struct prefork_pool **pf_pool)
63 struct prefork_pool *pfp;
64 pid_t pid;
65 time_t now = time(NULL);
66 size_t data_size;
67 int ret;
68 int i;
69 bool ok;
71 pfp = talloc_zero(mem_ctx, struct prefork_pool);
72 if (!pfp) {
73 DEBUG(1, ("Out of memory!\n"));
74 return false;
76 pfp->listen_fd_size = listen_fd_size;
77 pfp->listen_fds = talloc_array(pfp, int, listen_fd_size);
78 if (!pfp->listen_fds) {
79 DEBUG(1, ("Out of memory!\n"));
80 return false;
82 for (i = 0; i < listen_fd_size; i++) {
83 pfp->listen_fds[i] = listen_fds[i];
85 pfp->main_fn = main_fn;
86 pfp->private_data = private_data;
88 pfp->pool_size = max_children;
89 data_size = sizeof(struct pf_worker_data) * max_children;
91 pfp->pool = anonymous_shared_allocate(data_size);
92 if (pfp->pool == NULL) {
93 DEBUG(1, ("Failed to mmap memory for prefork pool!\n"));
94 talloc_free(pfp);
95 return false;
97 talloc_set_destructor(pfp, prefork_pool_destructor);
99 for (i = 0; i < min_children; i++) {
101 pfp->pool[i].allowed_clients = 1;
102 pfp->pool[i].started = now;
104 pid = sys_fork();
105 switch (pid) {
106 case -1:
107 DEBUG(1, ("Failed to prefork child n. %d !\n", i));
108 break;
110 case 0: /* THE CHILD */
112 pfp->pool[i].status = PF_WORKER_ALIVE;
113 ret = pfp->main_fn(ev_ctx, msg_ctx,
114 &pfp->pool[i], i + 1,
115 pfp->listen_fd_size,
116 pfp->listen_fds,
117 pfp->private_data);
118 exit(ret);
120 default: /* THE PARENT */
121 pfp->pool[i].pid = pid;
122 break;
126 ok = prefork_setup_sigchld_handler(ev_ctx, pfp);
127 if (!ok) {
128 DEBUG(1, ("Failed to setup SIGCHLD Handler!\n"));
129 talloc_free(pfp);
130 return false;
133 *pf_pool = pfp;
134 return true;
137 /* Provide the new max children number in new_max
138 * (must be larger than current max).
139 * Returns: 0 if all fine
140 * ENOSPC if mremap fails to expand
141 * EINVAL if new_max is invalid
143 int prefork_expand_pool(struct prefork_pool *pfp, int new_max)
145 struct prefork_pool *pool;
146 size_t old_size;
147 size_t new_size;
148 int ret;
150 if (new_max <= pfp->pool_size) {
151 return EINVAL;
154 old_size = sizeof(struct pf_worker_data) * pfp->pool_size;
155 new_size = sizeof(struct pf_worker_data) * new_max;
157 pool = anonymous_shared_resize(&pfp->pool, new_size, false);
158 if (pool == NULL) {
159 ret = errno;
160 DEBUG(3, ("Failed to mremap memory (%d: %s)!\n",
161 ret, strerror(ret)));
162 return ret;
165 memset(&pool[pfp->pool_size], 0, new_size - old_size);
167 pfp->pool_size = new_max;
169 return 0;
172 int prefork_add_children(struct tevent_context *ev_ctx,
173 struct messaging_context *msg_ctx,
174 struct prefork_pool *pfp,
175 int num_children)
177 pid_t pid;
178 time_t now = time(NULL);
179 int ret;
180 int i, j;
182 for (i = 0, j = 0; i < pfp->pool_size && j < num_children; i++) {
184 if (pfp->pool[i].status != PF_WORKER_NONE) {
185 continue;
188 pfp->pool[i].allowed_clients = 1;
189 pfp->pool[i].started = now;
191 pid = sys_fork();
192 switch (pid) {
193 case -1:
194 DEBUG(1, ("Failed to prefork child n. %d !\n", j));
195 break;
197 case 0: /* THE CHILD */
199 pfp->pool[i].status = PF_WORKER_ALIVE;
200 ret = pfp->main_fn(ev_ctx, msg_ctx,
201 &pfp->pool[i], i + 1,
202 pfp->listen_fd_size,
203 pfp->listen_fds,
204 pfp->private_data);
206 pfp->pool[i].status = PF_WORKER_EXITING;
207 exit(ret);
209 default: /* THE PARENT */
210 pfp->pool[i].pid = pid;
211 j++;
212 break;
216 DEBUG(5, ("Added %d children!\n", j));
218 return j;
221 struct prefork_oldest {
222 int num;
223 time_t started;
226 /* sort in inverse order */
227 static int prefork_sort_oldest(const void *ap, const void *bp)
229 const struct prefork_oldest *a = (const struct prefork_oldest *)ap;
230 const struct prefork_oldest *b = (const struct prefork_oldest *)bp;
232 if (a->started == b->started) {
233 return 0;
235 if (a->started < b->started) {
236 return 1;
238 return -1;
241 int prefork_retire_children(struct prefork_pool *pfp,
242 int num_children, time_t age_limit)
244 time_t now = time(NULL);
245 struct prefork_oldest *oldest;
246 int i, j;
248 oldest = talloc_array(pfp, struct prefork_oldest, pfp->pool_size);
249 if (!oldest) {
250 return -1;
253 for (i = 0; i < pfp->pool_size; i++) {
254 oldest[i].num = i;
255 if (pfp->pool[i].status == PF_WORKER_ALIVE ||
256 pfp->pool[i].status == PF_WORKER_ACCEPTING) {
257 oldest[i].started = pfp->pool[i].started;
258 } else {
259 oldest[i].started = now;
263 qsort(oldest, pfp->pool_size,
264 sizeof(struct prefork_oldest),
265 prefork_sort_oldest);
267 for (i = 0, j = 0; i < pfp->pool_size && j < num_children; i++) {
268 if ((pfp->pool[i].status == PF_WORKER_ALIVE ||
269 pfp->pool[i].status == PF_WORKER_ACCEPTING) &&
270 pfp->pool[i].started <= age_limit) {
271 /* tell the child it's time to give up */
272 DEBUG(5, ("Retiring pid %d!\n", pfp->pool[i].pid));
273 pfp->pool[i].cmds = PF_SRV_MSG_EXIT;
274 kill(pfp->pool[i].pid, SIGHUP);
275 j++;
279 return j;
282 int prefork_count_children(struct prefork_pool *pfp, int *active)
284 int i, a, t;
286 a = 0;
287 t = 0;
288 for (i = 0; i < pfp->pool_size; i++) {
289 if (pfp->pool[i].status == PF_WORKER_NONE) {
290 continue;
293 t++;
295 if ((pfp->pool[i].status == PF_WORKER_EXITING) ||
296 (pfp->pool[i].num_clients <= 0)) {
297 continue;
300 a++;
303 if (active) {
304 *active = a;
306 return t;
309 static void prefork_cleanup_loop(struct prefork_pool *pfp)
311 int status;
312 pid_t pid;
313 int i;
315 /* TODO: should we use a process group id wait instead of looping ? */
316 for (i = 0; i < pfp->pool_size; i++) {
317 if (pfp->pool[i].status == PF_WORKER_NONE ||
318 pfp->pool[i].pid == 0) {
319 continue;
322 pid = sys_waitpid(pfp->pool[i].pid, &status, WNOHANG);
323 if (pid > 0) {
325 if (pfp->pool[i].status != PF_WORKER_EXITING) {
326 DEBUG(3, ("Child (%d) terminated abnormally:"
327 " %d\n", (int)pid, status));
328 } else {
329 DEBUG(10, ("Child (%d) terminated with status:"
330 " %d\n", (int)pid, status));
333 /* reset all fields,
334 * this makes status = PF_WORK_NONE */
335 memset(&pfp->pool[i], 0,
336 sizeof(struct pf_worker_data));
342 int prefork_count_allowed_connections(struct prefork_pool *pfp)
344 int c;
345 int i;
347 c = 0;
348 for (i = 0; i < pfp->pool_size; i++) {
349 if (pfp->pool[i].status == PF_WORKER_NONE ||
350 pfp->pool[i].status == PF_WORKER_EXITING) {
351 continue;
354 if (pfp->pool[i].num_clients < 0) {
355 continue;
358 c += pfp->pool[i].allowed_clients - pfp->pool[i].num_clients;
361 return c;
364 void prefork_increase_allowed_clients(struct prefork_pool *pfp, int max)
366 int i;
368 for (i = 0; i < pfp->pool_size; i++) {
369 if (pfp->pool[i].status == PF_WORKER_NONE ||
370 pfp->pool[i].status == PF_WORKER_EXITING) {
371 continue;
374 if (pfp->pool[i].num_clients < 0) {
375 continue;
378 if (pfp->pool[i].allowed_clients < max) {
379 pfp->pool[i].allowed_clients++;
384 void prefork_decrease_allowed_clients(struct prefork_pool *pfp)
386 int i;
388 for (i = 0; i < pfp->pool_size; i++) {
389 if (pfp->pool[i].status == PF_WORKER_NONE ||
390 pfp->pool[i].status == PF_WORKER_EXITING) {
391 continue;
394 if (pfp->pool[i].num_clients < 0) {
395 continue;
398 if (pfp->pool[i].allowed_clients > 1) {
399 pfp->pool[i].allowed_clients--;
404 void prefork_reset_allowed_clients(struct prefork_pool *pfp)
406 int i;
408 for (i = 0; i < pfp->pool_size; i++) {
409 pfp->pool[i].allowed_clients = 1;
413 void prefork_send_signal_to_all(struct prefork_pool *pfp, int signal_num)
415 int i;
417 for (i = 0; i < pfp->pool_size; i++) {
418 if (pfp->pool[i].status == PF_WORKER_NONE) {
419 continue;
422 kill(pfp->pool[i].pid, signal_num);
426 static void prefork_sigchld_handler(struct tevent_context *ev_ctx,
427 struct tevent_signal *se,
428 int signum, int count,
429 void *siginfo, void *pvt)
431 struct prefork_pool *pfp;
433 pfp = talloc_get_type_abort(pvt, struct prefork_pool);
435 /* run the cleanup function to make sure all dead children are
436 * properly and timely retired. */
437 prefork_cleanup_loop(pfp);
439 if (pfp->sigchld_fn) {
440 pfp->sigchld_fn(ev_ctx, pfp, pfp->sigchld_data);
444 static bool prefork_setup_sigchld_handler(struct tevent_context *ev_ctx,
445 struct prefork_pool *pfp)
447 struct tevent_signal *se;
449 se = tevent_add_signal(ev_ctx, pfp, SIGCHLD, 0,
450 prefork_sigchld_handler, pfp);
451 if (!se) {
452 DEBUG(0, ("Failed to setup SIGCHLD handler!\n"));
453 return false;
456 return true;
459 void prefork_set_sigchld_callback(struct prefork_pool *pfp,
460 prefork_sigchld_fn_t *sigchld_fn,
461 void *private_data)
463 pfp->sigchld_fn = sigchld_fn;
464 pfp->sigchld_data = private_data;
467 /* ==== Functions used by children ==== */
469 struct pf_listen_state {
470 struct tevent_context *ev;
471 struct pf_worker_data *pf;
473 int listen_fd_size;
474 int *listen_fds;
476 int accept_fd;
478 struct tsocket_address *srv_addr;
479 struct tsocket_address *cli_addr;
481 int error;
484 struct pf_listen_ctx {
485 TALLOC_CTX *fde_ctx;
486 struct tevent_req *req;
487 int listen_fd;
490 static void prefork_listen_accept_handler(struct tevent_context *ev,
491 struct tevent_fd *fde,
492 uint16_t flags, void *pvt);
494 struct tevent_req *prefork_listen_send(TALLOC_CTX *mem_ctx,
495 struct tevent_context *ev,
496 struct pf_worker_data *pf,
497 int listen_fd_size,
498 int *listen_fds)
500 struct tevent_req *req;
501 struct pf_listen_state *state;
502 struct pf_listen_ctx *ctx;
503 struct tevent_fd *fde;
504 TALLOC_CTX *fde_ctx;
505 int i;
507 req = tevent_req_create(mem_ctx, &state, struct pf_listen_state);
508 if (!req) {
509 return NULL;
512 state->ev = ev;
513 state->pf = pf;
514 state->listen_fd_size = listen_fd_size;
515 state->listen_fds = listen_fds;
516 state->accept_fd = -1;
517 state->error = 0;
519 fde_ctx = talloc_new(state);
520 if (tevent_req_nomem(fde_ctx, req)) {
521 return tevent_req_post(req, ev);
524 /* race on accept */
525 for (i = 0; i < state->listen_fd_size; i++) {
526 ctx = talloc(fde_ctx, struct pf_listen_ctx);
527 if (tevent_req_nomem(ctx, req)) {
528 return tevent_req_post(req, ev);
530 ctx->fde_ctx = fde_ctx;
531 ctx->req = req;
532 ctx->listen_fd = state->listen_fds[i];
534 fde = tevent_add_fd(state->ev, fde_ctx,
535 ctx->listen_fd, TEVENT_FD_READ,
536 prefork_listen_accept_handler, ctx);
537 if (tevent_req_nomem(fde, req)) {
538 return tevent_req_post(req, ev);
542 pf->status = PF_WORKER_ACCEPTING;
544 return req;
547 static void prefork_listen_accept_handler(struct tevent_context *ev,
548 struct tevent_fd *fde,
549 uint16_t flags, void *pvt)
551 struct pf_listen_state *state;
552 struct tevent_req *req;
553 struct pf_listen_ctx *ctx;
554 struct sockaddr_storage addr;
555 socklen_t addrlen;
556 int err = 0;
557 int sd = -1;
558 int ret;
560 ctx = talloc_get_type_abort(pvt, struct pf_listen_ctx);
561 req = ctx->req;
562 state = tevent_req_data(ctx->req, struct pf_listen_state);
564 if (state->pf->cmds == PF_SRV_MSG_EXIT) {
565 /* We have been asked to exit, so drop here and the next
566 * child will pick it up */
567 state->pf->status = PF_WORKER_EXITING;
568 state->error = EINTR;
569 goto done;
572 ZERO_STRUCT(addr);
573 addrlen = sizeof(addr);
574 sd = accept(ctx->listen_fd, (struct sockaddr *)&addr, &addrlen);
575 if (sd == -1) {
576 err = errno;
577 DEBUG(6, ("Accept failed! (%d, %s)\n", err, strerror(err)));
580 /* do not track the listen fds anymore */
581 talloc_free(ctx->fde_ctx);
582 ctx = NULL;
583 if (err) {
584 state->error = err;
585 goto done;
588 state->accept_fd = sd;
590 ret = tsocket_address_bsd_from_sockaddr(state,
591 (struct sockaddr *)(void *)&addr,
592 addrlen, &state->cli_addr);
593 if (ret < 0) {
594 state->error = errno;
595 goto done;
598 ZERO_STRUCT(addr);
599 addrlen = sizeof(addr);
600 ret = getsockname(sd, (struct sockaddr *)(void *)&addr, &addrlen);
601 if (ret < 0) {
602 state->error = errno;
603 goto done;
606 ret = tsocket_address_bsd_from_sockaddr(state,
607 (struct sockaddr *)(void *)&addr,
608 addrlen, &state->srv_addr);
609 if (ret < 0) {
610 state->error = errno;
611 goto done;
614 done:
615 tevent_req_done(req);
618 int prefork_listen_recv(struct tevent_req *req,
619 TALLOC_CTX *mem_ctx, int *fd,
620 struct tsocket_address **srv_addr,
621 struct tsocket_address **cli_addr)
623 struct pf_listen_state *state;
624 int ret = 0;
626 state = tevent_req_data(req, struct pf_listen_state);
628 if (state->error) {
629 ret = state->error;
630 } else {
631 tevent_req_is_unix_error(req, &ret);
634 if (ret) {
635 if (state->accept_fd != -1) {
636 close(state->accept_fd);
638 } else {
639 *fd = state->accept_fd;
640 *srv_addr = talloc_move(mem_ctx, &state->srv_addr);
641 *cli_addr = talloc_move(mem_ctx, &state->cli_addr);
642 state->pf->num_clients++;
644 if (state->pf->status == PF_WORKER_ACCEPTING) {
645 state->pf->status = PF_WORKER_ALIVE;
648 tevent_req_received(req);
649 return ret;