ctdb-daemon: Exit if eventd goes away
[Samba.git] / ctdb / server / eventscript.c
blob4a680044c2593571e5e5855b34bc046a51064508
1 /*
2 event script handling
4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/filesys.h"
22 #include "system/network.h"
23 #include "system/wait.h"
24 #include "system/dir.h"
25 #include "system/locale.h"
26 #include "system/time.h"
27 #include "system/dir.h"
29 #include <talloc.h>
30 #include <tevent.h>
32 #include "lib/util/dlinklist.h"
33 #include "lib/util/debug.h"
34 #include "lib/util/samba_util.h"
35 #include "lib/util/sys_rw.h"
37 #include "ctdb_private.h"
39 #include "common/rb_tree.h"
40 #include "common/common.h"
41 #include "common/logging.h"
42 #include "common/reqid.h"
43 #include "common/sock_io.h"
45 #include "protocol/protocol_api.h"
48 * Setting up event daemon
51 struct eventd_context {
52 struct tevent_context *ev;
53 const char *path;
54 const char *script_dir;
55 const char *pidfile;
56 const char *socket;
57 const char *debug_hung_script;
59 /* server state */
60 pid_t eventd_pid;
61 struct tevent_fd *eventd_fde;
63 /* client state */
64 struct reqid_context *idr;
65 struct sock_queue *queue;
66 struct eventd_client_state *calls;
69 static bool eventd_context_init(TALLOC_CTX *mem_ctx,
70 struct ctdb_context *ctdb,
71 struct eventd_context **out)
73 struct eventd_context *ectx;
74 const char *eventd = CTDB_HELPER_BINDIR "/ctdb_eventd";
75 const char *debug_hung_script = CTDB_ETCDIR "/debug-hung-script.sh";
76 const char *value;
77 char *socket;
78 int ret;
80 ectx = talloc_zero(mem_ctx, struct eventd_context);
81 if (ectx == NULL) {
82 return false;
85 ectx->ev = ctdb->ev;
87 value = getenv("CTDB_EVENTD");
88 if (value != NULL) {
89 eventd = value;
92 ectx->path = talloc_strdup(ectx, eventd);
93 if (ectx->path == NULL) {
94 talloc_free(ectx);
95 return false;
98 ectx->script_dir = ctdb->event_script_dir;
100 socket = talloc_strdup(ectx, ctdb_get_socketname(ctdb));
101 if (socket == NULL) {
102 talloc_free(ectx);
103 return false;
106 ectx->socket = talloc_asprintf(ectx, "%s/eventd.sock",
107 dirname(socket));
108 if (ectx->socket == NULL) {
109 talloc_free(ectx);
110 return false;
113 talloc_free(socket);
115 value = getenv("CTDB_DEBUG_HUNG_SCRIPT");
116 if (value != NULL) {
117 if (value[0] == '\0') {
118 debug_hung_script = NULL;
119 } else {
120 debug_hung_script = value;
124 if (debug_hung_script != NULL) {
125 ectx->debug_hung_script = talloc_strdup(ectx,
126 debug_hung_script);
127 if (ectx->debug_hung_script == NULL) {
128 talloc_free(ectx);
129 return false;
133 ret = reqid_init(ectx, 1, &ectx->idr);
134 if (ret != 0) {
135 talloc_free(ectx);
136 return false;
139 ectx->eventd_pid = -1;
141 *out = ectx;
142 return true;
145 struct eventd_startup_state {
146 bool done;
147 int ret;
148 int fd;
151 static void eventd_startup_timeout_handler(struct tevent_context *ev,
152 struct tevent_timer *te,
153 struct timeval t,
154 void *private_data)
156 struct eventd_startup_state *state =
157 (struct eventd_startup_state *) private_data;
159 state->done = true;
160 state->ret = ETIMEDOUT;
163 static void eventd_startup_handler(struct tevent_context *ev,
164 struct tevent_fd *fde, uint16_t flags,
165 void *private_data)
167 struct eventd_startup_state *state =
168 (struct eventd_startup_state *)private_data;
169 unsigned int data;
170 ssize_t num_read;
172 num_read = sys_read(state->fd, &data, sizeof(data));
173 if (num_read == sizeof(data)) {
174 if (data == 0) {
175 state->ret = 0;
176 } else {
177 state->ret = EIO;
179 } else if (num_read == 0) {
180 state->ret = EPIPE;
181 } else if (num_read == -1) {
182 state->ret = errno;
183 } else {
184 state->ret = EINVAL;
187 state->done = true;
191 static int wait_for_daemon_startup(struct tevent_context *ev,
192 int fd)
194 TALLOC_CTX *mem_ctx;
195 struct tevent_timer *timer;
196 struct tevent_fd *fde;
197 struct eventd_startup_state state = {
198 .done = false,
199 .ret = 0,
200 .fd = fd,
203 mem_ctx = talloc_new(ev);
204 if (mem_ctx == NULL) {
205 return ENOMEM;
208 timer = tevent_add_timer(ev,
209 mem_ctx,
210 tevent_timeval_current_ofs(10, 0),
211 eventd_startup_timeout_handler,
212 &state);
213 if (timer == NULL) {
214 talloc_free(mem_ctx);
215 return ENOMEM;
218 fde = tevent_add_fd(ev,
219 mem_ctx,
221 TEVENT_FD_READ,
222 eventd_startup_handler,
223 &state);
224 if (fde == NULL) {
225 talloc_free(mem_ctx);
226 return ENOMEM;
229 while (! state.done) {
230 tevent_loop_once(ev);
233 talloc_free(mem_ctx);
235 return state.ret;
240 * Start and stop event daemon
243 static bool eventd_client_connect(struct eventd_context *ectx);
244 static void eventd_dead_handler(struct tevent_context *ev,
245 struct tevent_fd *fde, uint16_t flags,
246 void *private_data);
248 int ctdb_start_eventd(struct ctdb_context *ctdb)
250 struct eventd_context *ectx;
251 const char **argv;
252 int fd[2];
253 pid_t pid;
254 int ret;
255 bool status;
257 if (ctdb->ectx == NULL) {
258 status = eventd_context_init(ctdb, ctdb, &ctdb->ectx);
259 if (! status) {
260 DEBUG(DEBUG_ERR,
261 ("Failed to initialize eventd context\n"));
262 return -1;
266 ectx = ctdb->ectx;
268 if (! sock_clean(ectx->socket)) {
269 return -1;
272 ret = pipe(fd);
273 if (ret != 0) {
274 return -1;
277 argv = talloc_array(ectx, const char *, 16);
278 if (argv == NULL) {
279 close(fd[0]);
280 close(fd[1]);
281 return -1;
284 argv[0] = ectx->path;
285 argv[1] = "-e";
286 argv[2] = ectx->script_dir;
287 argv[3] = "-s";
288 argv[4] = ectx->socket;
289 argv[5] = "-P";
290 argv[6] = talloc_asprintf(argv, "%d", ctdb->ctdbd_pid);
291 argv[7] = "-l";
292 argv[8] = getenv("CTDB_LOGGING");
293 argv[9] = "-d";
294 argv[10] = debug_level_to_string(DEBUGLEVEL);
295 argv[11] = "-S";
296 argv[12] = talloc_asprintf(argv, "%d", fd[1]);
297 if (ectx->debug_hung_script == NULL) {
298 argv[13] = NULL;
299 argv[14] = NULL;
300 } else {
301 argv[13] = "-D";
302 argv[14] = ectx->debug_hung_script;
304 argv[15] = NULL;
306 if (argv[6] == NULL || argv[12] == NULL) {
307 close(fd[0]);
308 close(fd[1]);
309 talloc_free(argv);
310 return -1;
313 D_NOTICE("Starting event daemon "
314 "%s %s %s %s %s %s %s %s %s %s %s %s %s\n",
315 argv[0], argv[1], argv[2], argv[3], argv[4], argv[5],
316 argv[6], argv[7], argv[8], argv[9], argv[10],
317 argv[11], argv[12]);
319 pid = ctdb_fork(ctdb);
320 if (pid == -1) {
321 close(fd[0]);
322 close(fd[1]);
323 talloc_free(argv);
324 return -1;
327 if (pid == 0) {
328 close(fd[0]);
329 ret = execv(argv[0], discard_const(argv));
330 if (ret == -1) {
331 _exit(errno);
333 _exit(0);
336 talloc_free(argv);
337 close(fd[1]);
339 ret = wait_for_daemon_startup(ctdb->ev, fd[0]);
340 if (ret != 0) {
341 ctdb_kill(ctdb, pid, SIGKILL);
342 close(fd[0]);
343 D_ERR("Failed to initialize event daemon (%d)\n", ret);
344 return -1;
347 ectx->eventd_fde = tevent_add_fd(ctdb->ev, ectx, fd[0],
348 TEVENT_FD_READ,
349 eventd_dead_handler, ectx);
350 if (ectx->eventd_fde == NULL) {
351 ctdb_kill(ctdb, pid, SIGKILL);
352 close(fd[0]);
353 return -1;
356 tevent_fd_set_auto_close(ectx->eventd_fde);
357 ectx->eventd_pid = pid;
359 status = eventd_client_connect(ectx);
360 if (! status) {
361 DEBUG(DEBUG_ERR, ("Failed to connect to event daemon\n"));
362 ctdb_stop_eventd(ctdb);
363 return -1;
366 return 0;
369 static void eventd_dead_handler(struct tevent_context *ev,
370 struct tevent_fd *fde, uint16_t flags,
371 void *private_data)
373 D_ERR("Eventd went away - exiting\n");
374 exit(1);
377 void ctdb_stop_eventd(struct ctdb_context *ctdb)
379 struct eventd_context *ectx = ctdb->ectx;
381 if (ectx == NULL) {
382 return;
385 TALLOC_FREE(ectx->eventd_fde);
386 if (ectx->eventd_pid != -1) {
387 kill(ectx->eventd_pid, SIGTERM);
388 ectx->eventd_pid = -1;
390 TALLOC_FREE(ctdb->ectx);
394 * Connect to event daemon
397 struct eventd_client_state {
398 struct eventd_client_state *prev, *next;
400 struct eventd_context *ectx;
401 void (*callback)(struct ctdb_event_reply *reply, void *private_data);
402 void *private_data;
404 uint32_t reqid;
405 uint8_t *buf;
406 size_t buflen;
409 static void eventd_client_read(uint8_t *buf, size_t buflen,
410 void *private_data);
411 static int eventd_client_state_destructor(struct eventd_client_state *state);
413 static bool eventd_client_connect(struct eventd_context *ectx)
415 int fd;
417 if (ectx->queue != NULL) {
418 return true;
421 fd = sock_connect(ectx->socket);
422 if (fd == -1) {
423 return false;
426 ectx->queue = sock_queue_setup(ectx, ectx->ev, fd,
427 eventd_client_read, ectx);
428 if (ectx->queue == NULL) {
429 close(fd);
430 return false;
433 return true;
436 static int eventd_client_write(struct eventd_context *ectx,
437 TALLOC_CTX *mem_ctx,
438 struct ctdb_event_request *request,
439 void (*callback)(struct ctdb_event_reply *reply,
440 void *private_data),
441 void *private_data)
443 struct eventd_client_state *state;
444 int ret;
446 if (! eventd_client_connect(ectx)) {
447 return -1;
450 state = talloc_zero(mem_ctx, struct eventd_client_state);
451 if (state == NULL) {
452 return -1;
455 state->ectx = ectx;
456 state->callback = callback;
457 state->private_data = private_data;
459 state->reqid = reqid_new(ectx->idr, state);
460 if (state->reqid == REQID_INVALID) {
461 talloc_free(state);
462 return -1;
465 talloc_set_destructor(state, eventd_client_state_destructor);
467 sock_packet_header_set_reqid(&request->header, state->reqid);
469 state->buflen = ctdb_event_request_len(request);
470 state->buf = talloc_size(state, state->buflen);
471 if (state->buf == NULL) {
472 talloc_free(state);
473 return -1;
476 ret = ctdb_event_request_push(request, state->buf, &state->buflen);
477 if (ret != 0) {
478 talloc_free(state);
479 return -1;
482 ret = sock_queue_write(ectx->queue, state->buf, state->buflen);
483 if (ret != 0) {
484 talloc_free(state);
485 return -1;
488 DLIST_ADD(ectx->calls, state);
490 return 0;
493 static int eventd_client_state_destructor(struct eventd_client_state *state)
495 struct eventd_context *ectx = state->ectx;
497 reqid_remove(ectx->idr, state->reqid);
498 DLIST_REMOVE(ectx->calls, state);
499 return 0;
502 static void eventd_client_read(uint8_t *buf, size_t buflen,
503 void *private_data)
505 struct eventd_context *ectx = talloc_get_type_abort(
506 private_data, struct eventd_context);
507 struct eventd_client_state *state;
508 struct ctdb_event_reply *reply;
509 int ret;
511 if (buf == NULL) {
512 /* connection lost */
513 TALLOC_FREE(ectx->queue);
514 return;
517 reply = talloc_zero(ectx, struct ctdb_event_reply);
518 if (reply == NULL) {
519 return;
522 ret = ctdb_event_reply_pull(buf, buflen, reply, reply);
523 if (ret != 0) {
524 D_ERR("Invalid packet received, ret=%d\n", ret);
525 talloc_free(reply);
526 return;
529 if (buflen != reply->header.length) {
530 D_ERR("Packet size mismatch %zu != %"PRIu32"\n",
531 buflen, reply->header.length);
532 talloc_free(reply);
533 return;
536 state = reqid_find(ectx->idr, reply->header.reqid,
537 struct eventd_client_state);
538 if (state == NULL) {
539 talloc_free(reply);
540 return;
543 if (state->reqid != reply->header.reqid) {
544 talloc_free(reply);
545 return;
548 state = talloc_steal(reply, state);
549 state->callback(reply, state->private_data);
550 talloc_free(reply);
554 * Run an event
557 struct eventd_client_run_state {
558 struct eventd_context *ectx;
559 void (*callback)(int result, void *private_data);
560 void *private_data;
563 static void eventd_client_run_done(struct ctdb_event_reply *reply,
564 void *private_data);
566 static int eventd_client_run(struct eventd_context *ectx,
567 TALLOC_CTX *mem_ctx,
568 void (*callback)(int result,
569 void *private_data),
570 void *private_data,
571 enum ctdb_event event,
572 const char *arg_str,
573 uint32_t timeout)
575 struct eventd_client_run_state *state;
576 struct ctdb_event_request request;
577 struct ctdb_event_request_run rdata;
578 int ret;
580 state = talloc_zero(mem_ctx, struct eventd_client_run_state);
581 if (state == NULL) {
582 return -1;
585 state->ectx = ectx;
586 state->callback = callback;
587 state->private_data = private_data;
589 rdata.event = event;
590 rdata.timeout = timeout;
591 rdata.arg_str = arg_str;
593 request.rdata.command = CTDB_EVENT_COMMAND_RUN;
594 request.rdata.data.run = &rdata;
596 ret = eventd_client_write(ectx, state, &request,
597 eventd_client_run_done, state);
598 if (ret != 0) {
599 talloc_free(state);
600 return ret;
603 return 0;
606 static void eventd_client_run_done(struct ctdb_event_reply *reply,
607 void *private_data)
609 struct eventd_client_run_state *state = talloc_get_type_abort(
610 private_data, struct eventd_client_run_state);
612 state = talloc_steal(state->ectx, state);
613 state->callback(reply->rdata.result, state->private_data);
614 talloc_free(state);
618 * CTDB event script functions
621 int ctdb_event_script_run(struct ctdb_context *ctdb,
622 TALLOC_CTX *mem_ctx,
623 void (*callback)(struct ctdb_context *ctdb,
624 int result, void *private_data),
625 void *private_data,
626 enum ctdb_event event,
627 const char *fmt, va_list ap)
628 PRINTF_ATTRIBUTE(6,0);
630 struct ctdb_event_script_run_state {
631 struct ctdb_context *ctdb;
632 void (*callback)(struct ctdb_context *ctdb, int result,
633 void *private_data);
634 void *private_data;
635 enum ctdb_event event;
638 static bool event_allowed_during_recovery(enum ctdb_event event);
639 static void ctdb_event_script_run_done(int result, void *private_data);
640 static bool check_options(enum ctdb_event call, const char *options);
642 int ctdb_event_script_run(struct ctdb_context *ctdb,
643 TALLOC_CTX *mem_ctx,
644 void (*callback)(struct ctdb_context *ctdb,
645 int result, void *private_data),
646 void *private_data,
647 enum ctdb_event event,
648 const char *fmt, va_list ap)
650 struct ctdb_event_script_run_state *state;
651 char *arg_str;
652 int ret;
654 if ( (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) &&
655 (! event_allowed_during_recovery(event)) ) {
656 DEBUG(DEBUG_ERR,
657 ("Refusing to run event '%s' while in recovery\n",
658 ctdb_eventscript_call_names[event]));
659 return -1;
662 state = talloc_zero(mem_ctx, struct ctdb_event_script_run_state);
663 if (state == NULL) {
664 return -1;
667 state->ctdb = ctdb;
668 state->callback = callback;
669 state->private_data = private_data;
670 state->event = event;
672 if (fmt != NULL) {
673 arg_str = talloc_vasprintf(state, fmt, ap);
674 if (arg_str == NULL) {
675 talloc_free(state);
676 return -1;
678 } else {
679 arg_str = NULL;
682 if (! check_options(event, arg_str)) {
683 DEBUG(DEBUG_ERR,
684 ("Bad event script arguments '%s' for '%s'\n",
685 arg_str, ctdb_eventscript_call_names[event]));
686 talloc_free(arg_str);
687 return -1;
690 ret = eventd_client_run(ctdb->ectx, state,
691 ctdb_event_script_run_done, state,
692 event, arg_str, ctdb->tunable.script_timeout);
693 if (ret != 0) {
694 talloc_free(state);
695 return ret;
698 DEBUG(DEBUG_INFO,
699 (__location__ " Running event %s with arguments %s\n",
700 ctdb_eventscript_call_names[event], arg_str));
702 talloc_free(arg_str);
703 return 0;
706 static void ctdb_event_script_run_done(int result, void *private_data)
708 struct ctdb_event_script_run_state *state = talloc_get_type_abort(
709 private_data, struct ctdb_event_script_run_state);
711 if (result == -ETIME) {
712 switch (state->event) {
713 case CTDB_EVENT_START_RECOVERY:
714 case CTDB_EVENT_RECOVERED:
715 case CTDB_EVENT_TAKE_IP:
716 case CTDB_EVENT_RELEASE_IP:
717 DEBUG(DEBUG_ERR,
718 ("Ignoring hung script for %s event\n",
719 ctdb_eventscript_call_names[state->event]));
720 result = 0;
721 break;
723 default:
724 break;
728 state = talloc_steal(state->ctdb, state);
729 state->callback(state->ctdb, result, state->private_data);
730 talloc_free(state);
734 static unsigned int count_words(const char *options)
736 unsigned int words = 0;
738 if (options == NULL) {
739 return 0;
742 options += strspn(options, " \t");
743 while (*options) {
744 words++;
745 options += strcspn(options, " \t");
746 options += strspn(options, " \t");
748 return words;
751 static bool check_options(enum ctdb_event call, const char *options)
753 switch (call) {
754 /* These all take no arguments. */
755 case CTDB_EVENT_INIT:
756 case CTDB_EVENT_SETUP:
757 case CTDB_EVENT_STARTUP:
758 case CTDB_EVENT_START_RECOVERY:
759 case CTDB_EVENT_RECOVERED:
760 case CTDB_EVENT_MONITOR:
761 case CTDB_EVENT_SHUTDOWN:
762 case CTDB_EVENT_IPREALLOCATED:
763 return count_words(options) == 0;
765 case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
766 case CTDB_EVENT_RELEASE_IP:
767 return count_words(options) == 3;
769 case CTDB_EVENT_UPDATE_IP: /* old interface, new interface, IP address, netmask bits. */
770 return count_words(options) == 4;
772 default:
773 DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_event %u\n", call));
774 return false;
778 /* only specific events are allowed while in recovery */
779 static bool event_allowed_during_recovery(enum ctdb_event event)
781 const enum ctdb_event allowed_events[] = {
782 CTDB_EVENT_INIT,
783 CTDB_EVENT_SETUP,
784 CTDB_EVENT_START_RECOVERY,
785 CTDB_EVENT_SHUTDOWN,
786 CTDB_EVENT_RELEASE_IP,
787 CTDB_EVENT_IPREALLOCATED,
789 int i;
791 for (i = 0; i < ARRAY_SIZE(allowed_events); i++) {
792 if (event == allowed_events[i]) {
793 return true;
797 return false;
801 run the event script in the background, calling the callback when
802 finished. If mem_ctx is freed, callback will never be called.
804 int ctdb_event_script_callback(struct ctdb_context *ctdb,
805 TALLOC_CTX *mem_ctx,
806 void (*callback)(struct ctdb_context *, int, void *),
807 void *private_data,
808 enum ctdb_event call,
809 const char *fmt, ...)
811 va_list ap;
812 int ret;
814 va_start(ap, fmt);
815 ret = ctdb_event_script_run(ctdb, mem_ctx, callback, private_data,
816 call, fmt, ap);
817 va_end(ap);
819 return ret;
823 struct ctdb_event_script_args_state {
824 bool done;
825 int status;
828 static void ctdb_event_script_args_done(struct ctdb_context *ctdb,
829 int status, void *private_data)
831 struct ctdb_event_script_args_state *s =
832 (struct ctdb_event_script_args_state *)private_data;
834 s->done = true;
835 s->status = status;
839 run the event script, waiting for it to complete. Used when the caller
840 doesn't want to continue till the event script has finished.
842 int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_event call,
843 const char *fmt, ...)
845 va_list ap;
846 int ret;
847 struct ctdb_event_script_args_state state = {
848 .status = -1,
849 .done = false,
852 va_start(ap, fmt);
853 ret = ctdb_event_script_run(ctdb, ctdb,
854 ctdb_event_script_args_done, &state,
855 call, fmt, ap);
856 va_end(ap);
857 if (ret != 0) {
858 return ret;
861 while (! state.done) {
862 tevent_loop_once(ctdb->ev);
865 if (state.status == -ETIME) {
866 /* Don't ban self if CTDB is starting up or shutting down */
867 if (call != CTDB_EVENT_INIT && call != CTDB_EVENT_SHUTDOWN) {
868 DEBUG(DEBUG_ERR,
869 (__location__ " eventscript for '%s' timed out."
870 " Immediately banning ourself for %d seconds\n",
871 ctdb_eventscript_call_names[call],
872 ctdb->tunable.recovery_ban_period));
873 ctdb_ban_self(ctdb);
877 return state.status;
880 int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_event call)
882 /* GCC complains about empty format string, so use %s and "". */
883 return ctdb_event_script_args(ctdb, call, NULL);