2 Unix SMB/CIFS implementation.
4 process model: prefork (n client connections per process)
6 Copyright (C) Andrew Tridgell 1992-2005
7 Copyright (C) James J Myers 2003 <myersjj@samba.org>
8 Copyright (C) Stefan (metze) Metzmacher 2004
9 Copyright (C) Andrew Bartlett 2008 <abartlet@samba.org>
10 Copyright (C) David Disseldorp 2008 <ddiss@sgi.com>
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 3 of the License, or
15 (at your option) any later version.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 * The pre-fork process model distributes the server workload amongst several
27 * designated worker threads (e.g. 'prefork-worker-ldap-0',
28 * 'prefork-worker-ldap-1', etc). The number of worker threads is controlled
29 * by the 'prefork children' conf setting. The worker threads are controlled
30 * by a prefork master process (e.g. 'prefork-master-ldap'). The prefork master
31 * doesn't handle the server workload (i.e. processing messages) itself, but is
32 * responsible for restarting workers if they exit unexpectedly. The top-level
33 * samba process is responsible for restarting the master process if it exits.
38 #include "lib/events/events.h"
39 #include "lib/messaging/messaging.h"
40 #include "lib/socket/socket.h"
41 #include "samba/process_model.h"
42 #include "cluster/cluster.h"
43 #include "param/param.h"
45 #include "lib/util/tfork.h"
46 #include "lib/messaging/irpc.h"
47 #include "lib/util/util_process.h"
48 #include "server_util.h"
50 #define min(a, b) (((a) < (b)) ? (a) : (b))
52 NTSTATUS
process_model_prefork_init(void);
53 static void prefork_new_task(
54 struct tevent_context
*ev
,
55 struct loadparm_context
*lp_ctx
,
56 const char *service_name
,
57 struct task_server
*(*new_task_fn
)(struct tevent_context
*,
58 struct loadparm_context
*lp_ctx
,
63 const struct service_details
*service_details
,
65 static void prefork_fork_worker(struct task_server
*task
,
66 struct tevent_context
*ev
,
67 struct tevent_context
*ev2
,
68 struct loadparm_context
*lp_ctx
,
69 const struct service_details
*service_details
,
70 const char *service_name
,
72 unsigned restart_delay
,
73 struct process_details
*pd
);
74 static void prefork_child_pipe_handler(struct tevent_context
*ev
,
75 struct tevent_fd
*fde
,
78 static void setup_handlers(struct tevent_context
*ev
,
79 struct loadparm_context
*lp_ctx
,
83 * State needed to restart the master process or a worker process if they
86 struct master_restart_context
{
87 struct task_server
*(*new_task_fn
)(struct tevent_context
*,
88 struct loadparm_context
*lp_ctx
,
95 struct worker_restart_context
{
96 unsigned int instance
;
97 struct task_server
*task
;
98 struct tevent_context
*ev2
;
102 struct restart_context
{
103 struct loadparm_context
*lp_ctx
;
106 const struct service_details
*service_details
;
107 const char *service_name
;
108 unsigned restart_delay
;
109 struct master_restart_context
*master
;
110 struct worker_restart_context
*worker
;
113 static void sighup_signal_handler(struct tevent_context
*ev
,
114 struct tevent_signal
*se
,
115 int signum
, int count
, void *siginfo
,
118 reopen_logs_internal();
121 static void sigterm_signal_handler(struct tevent_context
*ev
,
122 struct tevent_signal
*se
,
123 int signum
, int count
, void *siginfo
,
127 if (getpgrp() == getpid()) {
129 * We're the process group leader, send
130 * SIGTERM to our process group.
132 DBG_NOTICE("SIGTERM: killing children\n");
133 kill(-getpgrp(), SIGTERM
);
136 DBG_NOTICE("Exiting pid %d on SIGTERM\n", getpid());
142 called when the process model is selected
144 static void prefork_model_init(void)
148 static void prefork_reload_after_fork(void)
152 ldb_wrap_fork_hook();
153 /* Must be done after a fork() to reset messaging contexts. */
154 status
= imessaging_reinit_all();
155 if (!NT_STATUS_IS_OK(status
)) {
156 smb_panic("Failed to re-initialise imessaging after fork");
158 force_check_log_size();
162 * clean up any messaging associated with the old process.
165 static void irpc_cleanup(
166 struct loadparm_context
*lp_ctx
,
167 struct tevent_context
*ev
,
170 TALLOC_CTX
*mem_ctx
= talloc_new(NULL
);
171 struct imessaging_context
*msg_ctx
= NULL
;
172 NTSTATUS status
= NT_STATUS_OK
;
174 if (mem_ctx
== NULL
) {
175 DBG_ERR("OOM cleaning up irpc\n");
178 msg_ctx
= imessaging_client_init(mem_ctx
, lp_ctx
, ev
);
179 if (msg_ctx
== NULL
) {
180 DBG_ERR("Unable to create imessaging_context\n");
181 TALLOC_FREE(mem_ctx
);
184 status
= imessaging_process_cleanup(msg_ctx
, pid
);
185 if (!NT_STATUS_IS_OK(status
)) {
186 DBG_ERR("imessaging_process_cleanup returned (%s)\n",
188 TALLOC_FREE(mem_ctx
);
192 TALLOC_FREE(mem_ctx
);
196 * handle EOF on the parent-to-all-children pipe in the child, i.e.
197 * the parent has died and its end of the pipe has been closed.
198 * The child handles this by exiting as well.
200 static void prefork_pipe_handler(struct tevent_context
*event_ctx
,
201 struct tevent_fd
*fde
, uint16_t flags
,
204 struct loadparm_context
*lp_ctx
= NULL
;
208 * free the fde which removes the event and stops it firing again
213 * Clean up any irpc end points this process had.
216 lp_ctx
= talloc_get_type_abort(private_data
, struct loadparm_context
);
217 irpc_cleanup(lp_ctx
, event_ctx
, pid
);
219 DBG_NOTICE("Child %d exiting\n", getpid());
220 TALLOC_FREE(event_ctx
);
226 * Called by the top-level samba process to create a new prefork master process
228 static void prefork_fork_master(
229 struct tevent_context
*ev
,
230 struct loadparm_context
*lp_ctx
,
231 const char *service_name
,
232 struct task_server
*(*new_task_fn
)(struct tevent_context
*,
233 struct loadparm_context
*lp_ctx
,
238 const struct service_details
*service_details
,
239 unsigned restart_delay
,
243 struct tfork
* t
= NULL
;
246 struct tevent_context
*ev2
;
247 struct task_server
*task
= NULL
;
248 struct process_details pd
= initial_process_details
;
249 struct samba_tevent_trace_state
*samba_tevent_trace_state
= NULL
;
254 smb_panic("failure in tfork\n");
257 DBG_NOTICE("Forking [%s] pre-fork master process\n", service_name
);
258 pid
= tfork_child_pid(t
);
260 struct tevent_fd
*fde
= NULL
;
261 int fd
= tfork_event_fd(t
);
262 struct restart_context
*rc
= NULL
;
264 /* Register a pipe handler that gets called when the prefork
265 * master process terminates.
267 rc
= talloc_zero(ev
, struct restart_context
);
269 smb_panic("OOM allocating restart context\n");
273 rc
->service_name
= service_name
;
274 rc
->service_details
= service_details
;
275 rc
->from_parent_fd
= from_parent_fd
;
276 rc
->restart_delay
= restart_delay
;
277 rc
->master
= talloc_zero(rc
, struct master_restart_context
);
278 if (rc
->master
== NULL
) {
279 smb_panic("OOM allocating master restart context\n");
282 rc
->master
->new_task_fn
= new_task_fn
;
283 rc
->master
->private_data
= private_data
;
286 ev
, ev
, fd
, TEVENT_FD_READ
, prefork_child_pipe_handler
, rc
);
288 smb_panic("Failed to add child pipe handler, "
291 tevent_fd_set_auto_close(fde
);
297 process_set_title("%s[master]", "task[%s] pre-fork master", service_name
);
300 * this will free all the listening sockets and all state that
301 * is not associated with this new connection
303 if (tevent_re_initialise(ev
) != 0) {
304 smb_panic("Failed to re-initialise tevent after fork");
306 prefork_reload_after_fork();
307 setup_handlers(ev
, lp_ctx
, from_parent_fd
);
309 if (service_details
->inhibit_pre_fork
) {
311 ev
, lp_ctx
, cluster_id(pid
, 0), private_data
, NULL
);
313 * The task does not support pre-fork
315 if (task
!= NULL
&& service_details
->post_fork
!= NULL
) {
316 service_details
->post_fork(task
, &pd
);
318 if (task
!= NULL
&& service_details
->before_loop
!= NULL
) {
319 service_details
->before_loop(task
);
321 tevent_loop_wait(ev
);
327 * This is now the child code. We need a completely new event_context
330 ev2
= s4_event_context_init(NULL
);
332 samba_tevent_trace_state
= create_samba_tevent_trace_state(ev2
);
333 if (samba_tevent_trace_state
== NULL
) {
339 tevent_set_trace_callback(ev2
,
340 samba_tevent_trace_callback
,
341 samba_tevent_trace_state
);
343 /* setup this new connection: process will bind to it's sockets etc
345 * While we can use ev for the child, which has been re-initialised
346 * above we must run the new task under ev2 otherwise the children would
347 * be listening on the sockets. Also we don't want the top level
348 * process accepting and handling requests, it's responsible for
349 * monitoring and controlling the child work processes.
351 task
= new_task_fn(ev2
, lp_ctx
, cluster_id(pid
, 0), private_data
, NULL
);
359 * Register an irpc name that can be used by the samba-tool processes
363 struct talloc_ctx
*ctx
= talloc_new(NULL
);
366 DBG_ERR("Out of memory\n");
369 name
= talloc_asprintf(ctx
, "prefork-master-%s", service_name
);
370 irpc_add_name(task
->msg_ctx
, name
);
375 int default_children
;
376 default_children
= lpcfg_prefork_children(lp_ctx
);
377 num_children
= lpcfg_parm_int(lp_ctx
, NULL
, "prefork children",
378 service_name
, default_children
);
380 if (num_children
== 0) {
381 DBG_WARNING("Number of pre-fork children for %s is zero, "
382 "NO worker processes will be started for %s\n",
383 service_name
, service_name
);
385 DBG_NOTICE("Forking %d %s worker processes\n",
386 num_children
, service_name
);
389 * the prefork master creates its own control pipe, so the prefork
390 * workers can detect if the master exits (in which case an EOF gets
391 * written). (Whereas from_parent_fd is the control pipe from the
392 * top-level process that the prefork master listens on)
396 ret
= pipe(control_pipe
);
398 smb_panic("Unable to create worker control pipe\n");
400 smb_set_close_on_exec(control_pipe
[0]);
401 smb_set_close_on_exec(control_pipe
[1]);
405 * Note, we call this before the first
406 * prefork_fork_worker() in order to have
408 * task_init(master) -> before_loop(master)
409 * -> post_fork(worker) -> before_loop(worker)
411 * Otherwise we would have different behaviors
412 * between the first prefork_fork_worker() loop
413 * and restarting of died workers
415 if (task
!= NULL
&& service_details
->before_loop
!= NULL
) {
416 struct task_server
*task_copy
= NULL
;
419 * We need to use ev as parent in order to
420 * keep everything alive during the loop
422 task_copy
= talloc(ev
, struct task_server
);
423 if (task_copy
== NULL
) {
431 * In order to allow the before_loop() hook
432 * to register messages or event handlers,
433 * we need to fix up task->event_ctx
434 * and create a new task->msg_ctx
436 task_copy
->event_ctx
= ev
;
437 task_copy
->msg_ctx
= imessaging_init(task_copy
,
439 task_copy
->server_id
,
440 task_copy
->event_ctx
);
441 if (task_copy
->msg_ctx
== NULL
) {
446 service_details
->before_loop(task_copy
);
450 * We are now free to spawn some worker processes
452 for (i
=0; i
< num_children
; i
++) {
453 prefork_fork_worker(task
,
466 * Make sure the messaging context
467 * used by the workers is no longer
468 * active on ev2, otherwise we
469 * would have memory leaks, because
470 * we queue incoming messages
471 * and never process them via ev2.
473 imessaging_dgm_unref_ev(ev2
);
475 /* Don't listen on the sockets we just gave to the children */
476 tevent_loop_wait(ev
);
477 imessaging_dgm_unref_ev(ev
);
479 /* We need to keep ev2 until we're finished for the messaging to work */
484 static void prefork_restart_fn(struct tevent_context
*ev
,
485 struct tevent_timer
*te
,
490 * Restarts a child process if it exits unexpectedly
492 static bool prefork_restart(struct tevent_context
*ev
,
493 struct restart_context
*rc
)
495 struct tevent_timer
*te
= NULL
;
497 if (rc
->restart_delay
> 0) {
498 DBG_ERR("Restarting [%s] pre-fork %s in (%d) seconds\n",
500 (rc
->master
== NULL
) ? "worker" : "master",
505 * Always use an async timer event. If
506 * rc->restart_delay is zero this is the
507 * same as an immediate event and will be
508 * called immediately we go back into the
511 te
= tevent_add_timer(ev
,
513 tevent_timeval_current_ofs(rc
->restart_delay
, 0),
517 DBG_ERR("tevent_add_timer fail [%s] pre-fork event %s\n",
519 (rc
->master
== NULL
) ? "worker" : "master");
520 /* Caller needs to free rc. */
523 /* Caller must not free rc - it's in use. */
527 static void prefork_restart_fn(struct tevent_context
*ev
,
528 struct tevent_timer
*te
,
532 unsigned max_backoff
= 0;
533 unsigned backoff
= 0;
534 unsigned default_value
= 0;
535 struct restart_context
*rc
= talloc_get_type(private_data
,
536 struct restart_context
);
537 unsigned restart_delay
= rc
->restart_delay
;
542 * If the child process is constantly exiting, then restarting it can
543 * consume a lot of resources. In which case, we want to backoff a bit
544 * before respawning it
546 default_value
= lpcfg_prefork_backoff_increment(rc
->lp_ctx
);
547 backoff
= lpcfg_parm_int(rc
->lp_ctx
,
549 "prefork backoff increment",
553 default_value
= lpcfg_prefork_maximum_backoff(rc
->lp_ctx
);
554 max_backoff
= lpcfg_parm_int(rc
->lp_ctx
,
556 "prefork maximum backoff",
560 restart_delay
+= backoff
;
561 restart_delay
= min(restart_delay
, max_backoff
);
563 if (rc
->master
!= NULL
) {
564 DBG_ERR("Restarting [%s] pre-fork master\n", rc
->service_name
);
565 prefork_fork_master(ev
,
568 rc
->master
->new_task_fn
,
569 rc
->master
->private_data
,
573 } else if (rc
->worker
!= NULL
) {
574 struct process_details pd
= initial_process_details
;
575 DBG_ERR("Restarting [%s] pre-fork worker(%d)\n",
577 rc
->worker
->instance
);
578 pd
.instances
= rc
->worker
->instance
;
579 prefork_fork_worker(rc
->worker
->task
,
585 rc
->worker
->control_pipe
,
589 /* tfork allocates tfork structures with malloc */
590 tfork_destroy(&rc
->t
);
596 handle EOF on the child pipe in the parent, so we know when a
597 process terminates without using SIGCHLD or waiting on all possible pids.
599 We need to ensure we do not ignore SIGCHLD because we need it to
600 work to get a valid error code from samba_runcmd_*().
602 static void prefork_child_pipe_handler(struct tevent_context
*ev
,
603 struct tevent_fd
*fde
,
607 struct restart_context
*rc
= NULL
;
610 bool rc_inuse
= false;
612 /* free the fde which removes the event and stops it firing again */
615 /* the child has closed the pipe, assume its dead */
617 rc
= talloc_get_type_abort(private_data
, struct restart_context
);
618 pid
= tfork_child_pid(rc
->t
);
621 irpc_cleanup(rc
->lp_ctx
, ev
, pid
);
622 status
= tfork_status(&rc
->t
, false);
624 DBG_ERR("Parent %d, Child %d terminated, "
625 "unable to get status code from tfork\n",
627 rc_inuse
= prefork_restart(ev
, rc
);
628 } else if (WIFEXITED(status
)) {
629 status
= WEXITSTATUS(status
);
630 DBG_ERR("Parent %d, Child %d exited with status %d\n",
631 getpid(), pid
, status
);
633 rc_inuse
= prefork_restart(ev
, rc
);
635 } else if (WIFSIGNALED(status
)) {
636 status
= WTERMSIG(status
);
637 DBG_ERR("Parent %d, Child %d terminated with signal %d\n",
638 getpid(), pid
, status
);
639 if (status
== SIGABRT
|| status
== SIGBUS
|| status
== SIGFPE
||
640 status
== SIGILL
|| status
== SIGSYS
|| status
== SIGSEGV
||
643 rc_inuse
= prefork_restart(ev
, rc
);
647 /* tfork allocates tfork structures with malloc */
648 tfork_destroy(&rc
->t
);
656 called when a listening socket becomes readable.
658 static void prefork_accept_connection(
659 struct tevent_context
*ev
,
660 struct loadparm_context
*lp_ctx
,
661 struct socket_context
*listen_socket
,
662 void (*new_conn
)(struct tevent_context
*,
663 struct loadparm_context
*,
664 struct socket_context
*,
669 void *process_context
)
672 struct socket_context
*connected_socket
;
673 pid_t pid
= getpid();
675 /* accept an incoming connection. */
676 status
= socket_accept(listen_socket
, &connected_socket
);
677 if (!NT_STATUS_IS_OK(status
)) {
679 * For prefork we can ignore STATUS_MORE_ENTRIES, as once a
680 * connection becomes available all waiting processes are
681 * woken, but only one gets work to process.
682 * AKA the thundering herd.
683 * In the short term this should not be an issue as the number
684 * of workers should be a small multiple of the number of cpus
685 * In the longer term socket_accept needs to implement a
686 * mutex/semaphore (like apache does) to serialise the accepts
688 if (!NT_STATUS_EQUAL(status
, STATUS_MORE_ENTRIES
)) {
689 DBG_ERR("Worker process (%d), error in accept [%s]\n",
690 getpid(), nt_errstr(status
));
695 talloc_steal(private_data
, connected_socket
);
697 new_conn(ev
, lp_ctx
, connected_socket
,
698 cluster_id(pid
, socket_get_fd(connected_socket
)),
699 private_data
, process_context
);
702 static void setup_handlers(
703 struct tevent_context
*ev
,
704 struct loadparm_context
*lp_ctx
,
707 struct tevent_fd
*fde
= NULL
;
708 struct tevent_signal
*se
= NULL
;
710 fde
= tevent_add_fd(ev
, ev
, from_parent_fd
, TEVENT_FD_READ
,
711 prefork_pipe_handler
, lp_ctx
);
713 smb_panic("Failed to add fd handler after fork");
716 se
= tevent_add_signal(ev
,
720 sighup_signal_handler
,
723 smb_panic("Failed to add SIGHUP handler after fork");
726 se
= tevent_add_signal(ev
,
730 sigterm_signal_handler
,
733 smb_panic("Failed to add SIGTERM handler after fork");
738 * Called by the prefork master to create a new prefork worker process
740 static void prefork_fork_worker(struct task_server
*task
,
741 struct tevent_context
*ev
,
742 struct tevent_context
*ev2
,
743 struct loadparm_context
*lp_ctx
,
744 const struct service_details
*service_details
,
745 const char *service_name
,
747 unsigned restart_delay
,
748 struct process_details
*pd
)
750 struct tfork
*w
= NULL
;
755 smb_panic("failure in tfork\n");
758 pid
= tfork_child_pid(w
);
760 struct tevent_fd
*fde
= NULL
;
761 int fd
= tfork_event_fd(w
);
762 struct restart_context
*rc
= NULL
;
765 * we're the parent (prefork master), so store enough info to
766 * restart the worker/child if it exits unexpectedly
768 rc
= talloc_zero(ev
, struct restart_context
);
770 smb_panic("OOM allocating restart context\n");
774 rc
->service_name
= service_name
;
775 rc
->service_details
= service_details
;
776 rc
->restart_delay
= restart_delay
;
778 rc
->worker
= talloc_zero(rc
, struct worker_restart_context
);
779 if (rc
->worker
== NULL
) {
780 smb_panic("OOM allocating master restart context\n");
782 rc
->worker
->ev2
= ev2
;
783 rc
->worker
->instance
= pd
->instances
;
784 rc
->worker
->task
= task
;
785 rc
->worker
->control_pipe
[0] = control_pipe
[0];
786 rc
->worker
->control_pipe
[1] = control_pipe
[1];
789 ev
, ev
, fd
, TEVENT_FD_READ
, prefork_child_pipe_handler
, rc
);
791 smb_panic("Failed to add child pipe handler, "
794 tevent_fd_set_auto_close(fde
);
798 * we're the child (prefork-worker). We never write to the
799 * control pipe, but listen on the read end in case our parent
800 * (the pre-fork master) exits
802 close(control_pipe
[1]);
803 setup_handlers(ev2
, lp_ctx
, control_pipe
[0]);
810 imessaging_dgm_unref_ev(ev
);
813 process_set_title("%s(%d)",
814 "task[%s] pre-forked worker(%d)",
818 prefork_reload_after_fork();
819 if (service_details
->post_fork
!= NULL
) {
820 service_details
->post_fork(task
, pd
);
823 struct talloc_ctx
*ctx
= talloc_new(NULL
);
826 smb_panic("OOM allocating talloc context\n");
828 name
= talloc_asprintf(ctx
,
829 "prefork-worker-%s-%d",
832 irpc_add_name(task
->msg_ctx
, name
);
835 if (service_details
->before_loop
!= NULL
) {
836 service_details
->before_loop(task
);
838 tevent_loop_wait(ev2
);
839 imessaging_dgm_unref_ev(ev2
);
845 * called to create a new server task
847 static void prefork_new_task(
848 struct tevent_context
*ev
,
849 struct loadparm_context
*lp_ctx
,
850 const char *service_name
,
851 struct task_server
*(*new_task_fn
)(struct tevent_context
*,
852 struct loadparm_context
*lp_ctx
,
853 struct server_id
, void *, void *),
855 const struct service_details
*service_details
,
858 prefork_fork_master(ev
,
870 * called when a task terminates
872 static void prefork_terminate_task(struct tevent_context
*ev
,
873 struct loadparm_context
*lp_ctx
,
876 void *process_context
)
878 DBG_DEBUG("called with reason[%s]\n", reason
);
888 * called when a connection completes
890 static void prefork_terminate_connection(struct tevent_context
*ev
,
891 struct loadparm_context
*lp_ctx
,
893 void *process_context
)
897 /* called to set a title of a task or connection */
898 static void prefork_set_title(struct tevent_context
*ev
, const char *title
)
902 static const struct model_ops prefork_ops
= {
904 .model_init
= prefork_model_init
,
905 .accept_connection
= prefork_accept_connection
,
906 .new_task
= prefork_new_task
,
907 .terminate_task
= prefork_terminate_task
,
908 .terminate_connection
= prefork_terminate_connection
,
909 .set_title
= prefork_set_title
,
913 * initialise the prefork process model, registering ourselves with the
914 * process model subsystem
916 NTSTATUS
process_model_prefork_init(void)
918 return register_process_model(&prefork_ops
);