ctdb-daemon: Handle failure immediately, do housekeeping later
[Samba.git] / ctdb / server / eventscript.c
blobf555625996ecd60245c93c9c7bd0e3e13019be32
1 /*
2 event script handling
4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/filesys.h"
22 #include "system/network.h"
23 #include "system/wait.h"
24 #include "system/dir.h"
25 #include "system/locale.h"
26 #include "system/time.h"
28 #include <talloc.h>
29 #include <tevent.h>
31 #include "lib/util/dlinklist.h"
32 #include "lib/util/debug.h"
33 #include "lib/util/samba_util.h"
35 #include "ctdb_private.h"
37 #include "common/rb_tree.h"
38 #include "common/system.h"
39 #include "common/common.h"
40 #include "common/logging.h"
43 static void ctdb_event_script_timeout(struct tevent_context *ev,
44 struct tevent_timer *te,
45 struct timeval t, void *p);
47 /* This is attached to the event script state. */
48 struct event_script_callback {
49 struct event_script_callback *next, *prev;
50 struct ctdb_context *ctdb;
52 /* Warning: this can free us! */
53 void (*fn)(struct ctdb_context *, int, void *);
54 void *private_data;
57 struct ctdb_event_script_state {
58 struct ctdb_context *ctdb;
59 struct event_script_callback *callback;
60 pid_t child;
61 int fd[2];
62 enum ctdb_event call;
63 const char *options;
64 struct timeval timeout;
66 unsigned int current;
67 struct ctdb_script_list_old *scripts;
70 static struct ctdb_script *get_current_script(struct ctdb_event_script_state *state)
72 return &state->scripts->scripts[state->current];
75 /* called from ctdb_logging when we have received output on STDERR from
76 * one of the eventscripts
78 static void log_event_script_output(const char *str, uint16_t len, void *p)
80 struct ctdb_event_script_state *state
81 = talloc_get_type(p, struct ctdb_event_script_state);
82 struct ctdb_script *current;
83 unsigned int slen, min;
85 /* We may have been aborted to run something else. Discard */
86 if (state->scripts == NULL) {
87 return;
90 current = get_current_script(state);
92 /* Append, but don't overfill buffer. It starts zero-filled. */
93 slen = strlen(current->output);
94 min = MIN(len, sizeof(current->output) - slen - 1);
96 memcpy(current->output + slen, str, min);
99 int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb,
100 uint32_t call_type,
101 TDB_DATA *outdata)
103 if (call_type >= CTDB_EVENT_MAX) {
104 return -1;
107 if (ctdb->last_status[call_type] == NULL) {
108 /* If it's never been run, return nothing so they can tell. */
109 outdata->dsize = 0;
110 } else {
111 outdata->dsize = talloc_get_size(ctdb->last_status[call_type]);
112 outdata->dptr = (uint8_t *)ctdb->last_status[call_type];
114 return 0;
117 /* To ignore directory entry return 0, else return non-zero */
118 static int script_filter(const struct dirent *de)
120 int namelen = strlen(de->d_name);
122 /* Ignore . and .. */
123 if (namelen < 3) {
124 return 0;
127 /* Skip temporary files left behind by emacs */
128 if (de->d_name[namelen-1] == '~') {
129 return 0;
132 /* Filename should start with [0-9][0-9]. */
133 if (!isdigit(de->d_name[0]) || !isdigit(de->d_name[1]) ||
134 de->d_name[2] != '.') {
135 return 0;
138 if (namelen > MAX_SCRIPT_NAME) {
139 return 0;
142 return 1;
145 /* Return true if OK, otherwise set errno. */
146 static bool check_executable(const char *dir, const char *name)
148 char *full;
149 struct stat st;
151 full = talloc_asprintf(NULL, "%s/%s", dir, name);
152 if (!full)
153 return false;
155 if (stat(full, &st) != 0) {
156 DEBUG(DEBUG_ERR,("Could not stat event script %s: %s\n",
157 full, strerror(errno)));
158 talloc_free(full);
159 return false;
162 if (!(st.st_mode & S_IXUSR)) {
163 DEBUG(DEBUG_DEBUG,("Event script %s is not executable. Ignoring this event script\n", full));
164 errno = ENOEXEC;
165 talloc_free(full);
166 return false;
169 talloc_free(full);
170 return true;
173 static struct ctdb_script_list_old *ctdb_get_script_list(
174 struct ctdb_context *ctdb,
175 TALLOC_CTX *mem_ctx)
177 struct dirent **namelist;
178 struct ctdb_script_list_old *scripts;
179 int i, count;
181 /* scan all directory entries and insert all valid scripts into the
182 tree
184 count = scandir(ctdb->event_script_dir, &namelist, script_filter, alphasort);
185 if (count == -1) {
186 DEBUG(DEBUG_CRIT, ("Failed to read event script directory '%s' - %s\n",
187 ctdb->event_script_dir, strerror(errno)));
188 return NULL;
191 /* Overallocates by one, but that's OK */
192 scripts = talloc_zero_size(mem_ctx,
193 sizeof(*scripts)
194 + sizeof(scripts->scripts[0]) * count);
195 if (scripts == NULL) {
196 DEBUG(DEBUG_ERR, (__location__ " Failed to allocate scripts\n"));
197 goto done;
199 scripts->num_scripts = count;
201 for (i = 0; i < count; i++) {
202 struct ctdb_script *s = &scripts->scripts[i];
204 if (strlcpy(s->name, namelist[i]->d_name, sizeof(s->name)) >=
205 sizeof(s->name)) {
206 s->status = -ENAMETOOLONG;
207 continue;
210 s->status = 0;
211 if (!check_executable(ctdb->event_script_dir,
212 namelist[i]->d_name)) {
213 s->status = -errno;
217 done:
218 for (i=0; i<count; i++) {
219 free(namelist[i]);
221 free(namelist);
222 return scripts;
226 /* There cannot be more than 10 arguments to command helper. */
227 #define MAX_HELPER_ARGS (10)
229 static bool child_helper_args(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
230 enum ctdb_event call,
231 const char *options,
232 struct ctdb_script *current, int fd,
233 int *argc, const char ***argv)
235 const char **tmp;
236 int n, i;
237 char *t, *saveptr, *opt;
239 tmp = talloc_array(mem_ctx, const char *, 10+1);
240 if (tmp == NULL) goto failed;
242 tmp[0] = talloc_asprintf(tmp, "%d", fd);
243 tmp[1] = talloc_asprintf(tmp, "%s/%s", ctdb->event_script_dir, current->name);
244 tmp[2] = talloc_asprintf(tmp, "%s", ctdb_eventscript_call_names[call]);
245 n = 3;
247 /* Split options into individual arguments */
248 opt = talloc_strdup(mem_ctx, options);
249 if (opt == NULL) {
250 goto failed;
253 t = strtok_r(opt, " ", &saveptr);
254 while (t != NULL) {
255 tmp[n++] = talloc_strdup(tmp, t);
256 if (n > MAX_HELPER_ARGS) {
257 goto args_failed;
259 t = strtok_r(NULL, " ", &saveptr);
262 for (i=0; i<n; i++) {
263 if (tmp[i] == NULL) {
264 goto failed;
268 /* Last argument should be NULL */
269 tmp[n++] = NULL;
271 *argc = n;
272 *argv = tmp;
273 return true;
276 args_failed:
277 DEBUG(DEBUG_ERR, (__location__ " too many arguments '%s' to eventscript '%s'\n",
278 options, ctdb_eventscript_call_names[call]));
280 failed:
281 if (tmp) {
282 talloc_free(tmp);
284 return false;
288 static void ctdb_event_script_handler(struct tevent_context *ev,
289 struct tevent_fd *fde,
290 uint16_t flags, void *p);
292 static char helper_prog[PATH_MAX+1] = "";
294 static int fork_child_for_script(struct ctdb_context *ctdb,
295 struct ctdb_event_script_state *state)
297 int r;
298 struct tevent_fd *fde;
299 struct ctdb_script *current = get_current_script(state);
300 int argc;
301 const char **argv;
303 if (!ctdb_set_helper("event helper", helper_prog, sizeof(helper_prog),
304 "CTDB_EVENT_HELPER",
305 CTDB_HELPER_BINDIR, "ctdb_event_helper")) {
306 ctdb_die(ctdb, __location__
307 " Unable to set event helper\n");
310 current->start = timeval_current();
312 r = pipe(state->fd);
313 if (r != 0) {
314 DEBUG(DEBUG_ERR, (__location__ " pipe failed for child eventscript process\n"));
315 return -errno;
318 /* Arguments for helper */
319 if (!child_helper_args(state, ctdb, state->call, state->options, current,
320 state->fd[1], &argc, &argv)) {
321 DEBUG(DEBUG_ERR, (__location__ " failed to create arguments for eventscript helper\n"));
322 r = -ENOMEM;
323 close(state->fd[0]);
324 close(state->fd[1]);
325 return r;
328 if (!ctdb_vfork_with_logging(state, ctdb, current->name,
329 helper_prog, argc, argv,
330 log_event_script_output,
331 state, &state->child)) {
332 talloc_free(argv);
333 r = -errno;
334 close(state->fd[0]);
335 close(state->fd[1]);
336 return r;
339 talloc_free(argv);
341 close(state->fd[1]);
342 set_close_on_exec(state->fd[0]);
344 /* Set ourselves up to be called when that's done. */
345 fde = tevent_add_fd(ctdb->ev, state, state->fd[0], TEVENT_FD_READ,
346 ctdb_event_script_handler, state);
347 tevent_fd_set_auto_close(fde);
349 return 0;
353 Summarize status of this run of scripts.
355 static int script_status(struct ctdb_script_list_old *scripts)
357 unsigned int i;
359 for (i = 0; i < scripts->num_scripts; i++) {
360 switch (scripts->scripts[i].status) {
361 case -ENAMETOOLONG:
362 case -ENOENT:
363 case -ENOEXEC:
364 /* Disabled or missing; that's OK. */
365 break;
366 case 0:
367 /* No problem. */
368 break;
369 default:
370 return scripts->scripts[i].status;
374 /* All OK! */
375 return 0;
378 /* called when child is finished */
379 static void ctdb_event_script_handler(struct tevent_context *ev,
380 struct tevent_fd *fde,
381 uint16_t flags, void *p)
383 struct ctdb_event_script_state *state =
384 talloc_get_type(p, struct ctdb_event_script_state);
385 struct ctdb_script *current = get_current_script(state);
386 struct ctdb_context *ctdb = state->ctdb;
387 int r, status;
389 if (ctdb == NULL) {
390 DEBUG(DEBUG_ERR,("Eventscript finished but ctdb is NULL\n"));
391 return;
394 r = sys_read(state->fd[0], &current->status, sizeof(current->status));
395 if (r < 0) {
396 current->status = -errno;
397 } else if (r == 0) {
398 current->status = -EINTR;
399 } else if (r != sizeof(current->status)) {
400 current->status = -EIO;
403 current->finished = timeval_current();
404 /* valgrind gets overloaded if we run next script as it's still doing
405 * post-execution analysis, so kill finished child here. */
406 if (ctdb->valgrinding) {
407 ctdb_kill(ctdb, state->child, SIGKILL);
410 state->child = 0;
412 status = script_status(state->scripts);
414 /* Aborted or finished all scripts? We're done. */
415 if (status != 0 || state->current+1 == state->scripts->num_scripts) {
416 if (status != 0) {
417 DEBUG(DEBUG_INFO,
418 ("Eventscript %s %s finished with state %d\n",
419 ctdb_eventscript_call_names[state->call],
420 state->options, status));
423 talloc_free(state);
424 return;
427 /* Forget about that old fd. */
428 talloc_free(fde);
430 /* Next script! */
431 state->current++;
432 current++;
433 current->status = fork_child_for_script(ctdb, state);
434 if (current->status != 0) {
435 /* This calls the callback. */
436 talloc_free(state);
440 struct debug_hung_script_state {
441 struct ctdb_context *ctdb;
442 pid_t child;
443 enum ctdb_event call;
446 static int debug_hung_script_state_destructor(struct debug_hung_script_state *state)
448 if (state->child) {
449 ctdb_kill(state->ctdb, state->child, SIGKILL);
451 return 0;
454 static void debug_hung_script_timeout(struct tevent_context *ev, struct tevent_timer *te,
455 struct timeval t, void *p)
457 struct debug_hung_script_state *state =
458 talloc_get_type(p, struct debug_hung_script_state);
460 talloc_free(state);
463 static void debug_hung_script_done(struct tevent_context *ev, struct tevent_fd *fde,
464 uint16_t flags, void *p)
466 struct debug_hung_script_state *state =
467 talloc_get_type(p, struct debug_hung_script_state);
469 talloc_free(state);
472 static void ctdb_run_debug_hung_script(struct ctdb_context *ctdb, struct debug_hung_script_state *state)
474 pid_t pid;
475 const char * debug_hung_script = CTDB_ETCDIR "/debug-hung-script.sh";
476 int fd[2];
477 struct tevent_timer *ttimer;
478 struct tevent_fd *tfd;
479 const char **argv;
480 int i;
482 if (pipe(fd) < 0) {
483 DEBUG(DEBUG_ERR,("Failed to create pipe fd for debug hung script\n"));
484 return;
487 if (getenv("CTDB_DEBUG_HUNG_SCRIPT") != NULL) {
488 debug_hung_script = getenv("CTDB_DEBUG_HUNG_SCRIPT");
491 argv = talloc_array(state, const char *, 5);
493 argv[0] = talloc_asprintf(argv, "%d", fd[1]);
494 argv[1] = talloc_strdup(argv, debug_hung_script);
495 argv[2] = talloc_asprintf(argv, "%d", state->child);
496 argv[3] = talloc_strdup(argv, ctdb_eventscript_call_names[state->call]);
497 argv[4] = NULL;
499 for (i=0; i<4; i++) {
500 if (argv[i] == NULL) {
501 close(fd[0]);
502 close(fd[1]);
503 talloc_free(argv);
504 return;
509 if (!ctdb_vfork_with_logging(state, ctdb, "Hung-script",
510 helper_prog, 5, argv, NULL, NULL, &pid)) {
511 DEBUG(DEBUG_ERR,("Failed to fork a child to track hung event script\n"));
512 talloc_free(argv);
513 close(fd[0]);
514 close(fd[1]);
515 return;
518 talloc_free(argv);
519 close(fd[1]);
521 ttimer = tevent_add_timer(ctdb->ev, state,
522 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
523 debug_hung_script_timeout, state);
524 if (ttimer == NULL) {
525 close(fd[0]);
526 return;
529 tfd = tevent_add_fd(ctdb->ev, state, fd[0], TEVENT_FD_READ,
530 debug_hung_script_done, state);
531 if (tfd == NULL) {
532 talloc_free(ttimer);
533 close(fd[0]);
534 return;
536 tevent_fd_set_auto_close(tfd);
539 /* called when child times out */
540 static void ctdb_event_script_timeout(struct tevent_context *ev,
541 struct tevent_timer *te,
542 struct timeval t, void *p)
544 struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
545 struct ctdb_context *ctdb = state->ctdb;
546 struct ctdb_script *current = get_current_script(state);
547 struct debug_hung_script_state *debug_state;
549 DEBUG(DEBUG_ERR,("Event script '%s %s %s' timed out after %.1fs, pid: %d\n",
550 current->name, ctdb_eventscript_call_names[state->call], state->options,
551 timeval_elapsed(&current->start),
552 state->child));
554 /* ignore timeouts for these events */
555 switch (state->call) {
556 case CTDB_EVENT_START_RECOVERY:
557 case CTDB_EVENT_RECOVERED:
558 case CTDB_EVENT_TAKE_IP:
559 case CTDB_EVENT_RELEASE_IP:
560 state->scripts->scripts[state->current].status = 0;
561 DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call));
562 break;
563 default:
564 state->scripts->scripts[state->current].status = -ETIME;
567 debug_state = talloc_zero(ctdb, struct debug_hung_script_state);
568 if (debug_state == NULL) {
569 talloc_free(state);
570 return;
573 /* Save information useful for running debug hung script, so
574 * eventscript state can be freed.
576 debug_state->ctdb = ctdb;
577 debug_state->child = state->child;
578 debug_state->call = state->call;
580 /* This destructor will actually kill the hung event script */
581 talloc_set_destructor(debug_state, debug_hung_script_state_destructor);
583 state->child = 0;
584 talloc_free(state);
586 ctdb_run_debug_hung_script(ctdb, debug_state);
590 destroy an event script: kill it if ->child != 0.
592 static int event_script_destructor(struct ctdb_event_script_state *state)
594 int status;
595 struct event_script_callback *callback;
597 if (state->child) {
598 DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
600 if (ctdb_kill(state->ctdb, state->child, SIGTERM) != 0) {
601 DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
605 /* If we were the current monitor, we no longer are. */
606 if (state->ctdb->current_monitor == state) {
607 state->ctdb->current_monitor = NULL;
610 /* Save our scripts as the last executed status, if we have them.
611 * See ctdb_event_script_callback_v where we abort monitor event. */
612 if (state->scripts) {
613 talloc_free(state->ctdb->last_status[state->call]);
614 state->ctdb->last_status[state->call] = state->scripts;
615 if (state->current < state->ctdb->last_status[state->call]->num_scripts) {
616 state->ctdb->last_status[state->call]->num_scripts = state->current+1;
620 /* Use last status as result, or "OK" if none. */
621 if (state->ctdb->last_status[state->call]) {
622 status = script_status(state->ctdb->last_status[state->call]);
623 } else {
624 status = 0;
627 state->ctdb->active_events--;
628 if (state->ctdb->active_events < 0) {
629 ctdb_fatal(state->ctdb, "Active events < 0");
632 /* This is allowed to free us; talloc will prevent double free anyway,
633 * but beware if you call this outside the destructor!
634 * the callback hangs off a different context so we walk the list
635 * of "active" callbacks until we find the one state points to.
636 * if we cant find it it means the callback has been removed.
638 for (callback = state->ctdb->script_callbacks; callback != NULL; callback = callback->next) {
639 if (callback == state->callback) {
640 break;
644 state->callback = NULL;
646 if (callback) {
647 /* Make sure destructor doesn't free itself! */
648 talloc_steal(NULL, callback);
649 callback->fn(state->ctdb, status, callback->private_data);
650 talloc_free(callback);
653 return 0;
656 static unsigned int count_words(const char *options)
658 unsigned int words = 0;
660 options += strspn(options, " \t");
661 while (*options) {
662 words++;
663 options += strcspn(options, " \t");
664 options += strspn(options, " \t");
666 return words;
669 static bool check_options(enum ctdb_event call, const char *options)
671 switch (call) {
672 /* These all take no arguments. */
673 case CTDB_EVENT_INIT:
674 case CTDB_EVENT_SETUP:
675 case CTDB_EVENT_STARTUP:
676 case CTDB_EVENT_START_RECOVERY:
677 case CTDB_EVENT_RECOVERED:
678 case CTDB_EVENT_MONITOR:
679 case CTDB_EVENT_SHUTDOWN:
680 case CTDB_EVENT_IPREALLOCATED:
681 return count_words(options) == 0;
683 case CTDB_EVENT_TAKE_IP: /* interface, IP address, netmask bits. */
684 case CTDB_EVENT_RELEASE_IP:
685 return count_words(options) == 3;
687 case CTDB_EVENT_UPDATE_IP: /* old interface, new interface, IP address, netmask bits. */
688 return count_words(options) == 4;
690 default:
691 DEBUG(DEBUG_ERR,(__location__ "Unknown ctdb_event %u\n", call));
692 return false;
696 static int remove_callback(struct event_script_callback *callback)
698 DLIST_REMOVE(callback->ctdb->script_callbacks, callback);
699 return 0;
702 struct schedule_callback_state {
703 struct ctdb_context *ctdb;
704 void (*callback)(struct ctdb_context *, int, void *);
705 void *private_data;
706 int status;
707 struct tevent_immediate *im;
710 static void schedule_callback_handler(struct tevent_context *ctx,
711 struct tevent_immediate *im,
712 void *private_data)
714 struct schedule_callback_state *state =
715 talloc_get_type_abort(private_data,
716 struct schedule_callback_state);
718 if (state->callback != NULL) {
719 state->callback(state->ctdb, state->status,
720 state->private_data);
722 talloc_free(state);
725 static int
726 schedule_callback_immediate(struct ctdb_context *ctdb,
727 void (*callback)(struct ctdb_context *,
728 int, void *),
729 void *private_data,
730 int status)
732 struct schedule_callback_state *state;
733 struct tevent_immediate *im;
735 state = talloc_zero(ctdb, struct schedule_callback_state);
736 if (state == NULL) {
737 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
738 return -1;
740 im = tevent_create_immediate(state);
741 if (im == NULL) {
742 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
743 talloc_free(state);
744 return -1;
747 state->ctdb = ctdb;
748 state->callback = callback;
749 state->private_data = private_data;
750 state->status = status;
751 state->im = im;
753 tevent_schedule_immediate(im, ctdb->ev,
754 schedule_callback_handler, state);
755 return 0;
759 run the event script in the background, calling the callback when
760 finished
762 static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
763 const void *mem_ctx,
764 void (*callback)(struct ctdb_context *, int, void *),
765 void *private_data,
766 enum ctdb_event call,
767 const char *fmt, va_list ap)
768 PRINTF_ATTRIBUTE(6,0);
770 static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
771 const void *mem_ctx,
772 void (*callback)(struct ctdb_context *, int, void *),
773 void *private_data,
774 enum ctdb_event call,
775 const char *fmt, va_list ap)
777 struct ctdb_event_script_state *state;
779 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
780 /* we guarantee that only some specifically allowed event scripts are run
781 while in recovery */
782 const enum ctdb_event allowed_calls[] = {
783 CTDB_EVENT_INIT,
784 CTDB_EVENT_SETUP,
785 CTDB_EVENT_START_RECOVERY,
786 CTDB_EVENT_SHUTDOWN,
787 CTDB_EVENT_RELEASE_IP,
788 CTDB_EVENT_IPREALLOCATED,
790 int i;
791 for (i=0;i<ARRAY_SIZE(allowed_calls);i++) {
792 if (call == allowed_calls[i]) break;
794 if (i == ARRAY_SIZE(allowed_calls)) {
795 DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
796 ctdb_eventscript_call_names[call]));
797 return -1;
801 /* Do not run new monitor events if some event is already
802 * running, unless the running event is a monitor event, in
803 * which case running a new one should cancel the old one. */
804 if (call == CTDB_EVENT_MONITOR &&
805 ctdb->active_events > 0 &&
806 ctdb->current_monitor == NULL) {
807 if (callback != NULL) {
808 callback(ctdb, -ECANCELED, private_data);
810 return 0;
813 /* Kill off any running monitor events to run this event. */
814 if (ctdb->current_monitor) {
815 struct ctdb_event_script_state *ms = talloc_get_type(ctdb->current_monitor, struct ctdb_event_script_state);
817 /* Cancel current monitor callback state only if monitoring
818 * context ctdb->monitor->monitor_context has not been freed */
819 if (ms->callback != NULL && !ctdb_stopped_monitoring(ctdb)) {
820 ms->callback->fn(ctdb, -ECANCELED, ms->callback->private_data);
821 talloc_free(ms->callback);
824 /* Discard script status so we don't save to last_status */
825 talloc_free(ctdb->current_monitor->scripts);
826 ctdb->current_monitor->scripts = NULL;
827 talloc_free(ctdb->current_monitor);
828 ctdb->current_monitor = NULL;
831 state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
832 CTDB_NO_MEMORY(ctdb, state);
834 /* The callback isn't done if the context is freed. */
835 state->callback = talloc(mem_ctx, struct event_script_callback);
836 CTDB_NO_MEMORY(ctdb, state->callback);
837 DLIST_ADD(ctdb->script_callbacks, state->callback);
838 talloc_set_destructor(state->callback, remove_callback);
839 state->callback->ctdb = ctdb;
840 state->callback->fn = callback;
841 state->callback->private_data = private_data;
843 state->ctdb = ctdb;
844 state->call = call;
845 state->options = talloc_vasprintf(state, fmt, ap);
846 state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
847 state->scripts = NULL;
848 if (state->options == NULL) {
849 DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
850 talloc_free(state);
851 return -1;
853 if (!check_options(state->call, state->options)) {
854 DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for '%s'\n",
855 state->options,
856 ctdb_eventscript_call_names[state->call]));
857 talloc_free(state);
858 return -1;
861 DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
862 ctdb_eventscript_call_names[state->call],
863 state->options));
865 /* This is not a child of state, since we save it in destructor. */
866 state->scripts = ctdb_get_script_list(ctdb, ctdb);
867 if (state->scripts == NULL) {
868 talloc_free(state);
869 return -1;
871 state->current = 0;
872 state->child = 0;
874 /* Nothing to do? */
875 if (state->scripts->num_scripts == 0) {
876 int ret = schedule_callback_immediate(ctdb, callback,
877 private_data, 0);
878 talloc_free(state);
879 if (ret != 0) {
880 DEBUG(DEBUG_ERR,
881 ("Unable to schedule callback for 0 scripts\n"));
882 return 1;
884 return 0;
887 state->scripts->scripts[0].status = fork_child_for_script(ctdb, state);
888 if (state->scripts->scripts[0].status != 0) {
889 talloc_free(state);
890 return -1;
893 if (call == CTDB_EVENT_MONITOR) {
894 ctdb->current_monitor = state;
897 ctdb->active_events++;
899 talloc_set_destructor(state, event_script_destructor);
901 if (!timeval_is_zero(&state->timeout)) {
902 tevent_add_timer(ctdb->ev, state,
903 timeval_current_ofs(state->timeout.tv_sec,
904 state->timeout.tv_usec),
905 ctdb_event_script_timeout, state);
906 } else {
907 DEBUG(DEBUG_ERR, (__location__ " eventscript %s %s called with no timeout\n",
908 ctdb_eventscript_call_names[state->call],
909 state->options));
912 return 0;
917 run the event script in the background, calling the callback when
918 finished. If mem_ctx is freed, callback will never be called.
920 int ctdb_event_script_callback(struct ctdb_context *ctdb,
921 TALLOC_CTX *mem_ctx,
922 void (*callback)(struct ctdb_context *, int, void *),
923 void *private_data,
924 enum ctdb_event call,
925 const char *fmt, ...)
927 va_list ap;
928 int ret;
930 va_start(ap, fmt);
931 ret = ctdb_event_script_callback_v(ctdb, mem_ctx, callback, private_data, call, fmt, ap);
932 va_end(ap);
934 return ret;
938 struct callback_status {
939 bool done;
940 int status;
944 called when ctdb_event_script() finishes
946 static void event_script_callback(struct ctdb_context *ctdb, int status, void *private_data)
948 struct callback_status *s = (struct callback_status *)private_data;
949 s->done = true;
950 s->status = status;
954 run the event script, waiting for it to complete. Used when the caller
955 doesn't want to continue till the event script has finished.
957 int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_event call,
958 const char *fmt, ...)
960 va_list ap;
961 int ret;
962 struct callback_status status = {
963 .status = -1,
964 .done = false,
967 va_start(ap, fmt);
968 ret = ctdb_event_script_callback_v(ctdb, ctdb,
969 event_script_callback, &status, call, fmt, ap);
970 va_end(ap);
971 if (ret != 0) {
972 return ret;
975 while (status.done == false && tevent_loop_once(ctdb->ev) == 0) /* noop */;
977 if (status.status == -ETIME) {
978 DEBUG(DEBUG_ERR, (__location__ " eventscript for '%s' timed out."
979 " Immediately banning ourself for %d seconds\n",
980 ctdb_eventscript_call_names[call],
981 ctdb->tunable.recovery_ban_period));
983 /* Don't ban self if CTDB is starting up or shutting down */
984 if (call != CTDB_EVENT_INIT && call != CTDB_EVENT_SHUTDOWN) {
985 ctdb_ban_self(ctdb);
989 return status.status;
992 int ctdb_event_script(struct ctdb_context *ctdb, enum ctdb_event call)
994 /* GCC complains about empty format string, so use %s and "". */
995 return ctdb_event_script_args(ctdb, call, "%s", "");
998 struct eventscript_callback_state {
999 struct ctdb_req_control_old *c;
1003 called when a forced eventscript run has finished
1005 static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
1006 void *private_data)
1008 const char *errmsg = NULL;
1010 struct eventscript_callback_state *state =
1011 talloc_get_type(private_data, struct eventscript_callback_state);
1013 if (status != 0) {
1014 if (status == -ECANCELED) {
1015 DEBUG(DEBUG_WARNING,
1016 (__location__ " Eventscript cancelled\n"));
1017 errmsg = "cancelled";
1018 } else {
1019 DEBUG(DEBUG_ERR,
1020 (__location__ " Failed to run eventscripts\n"));
1024 ctdb_request_control_reply(ctdb, state->c, NULL, status, errmsg);
1025 /* This will free the struct ctdb_event_script_state we are in! */
1026 talloc_free(state);
1027 return;
1031 /* Returns rest of string, or NULL if no match. */
1032 static const char *get_call(const char *p, enum ctdb_event *call)
1034 unsigned int len;
1036 /* Skip any initial whitespace. */
1037 p += strspn(p, " \t");
1039 /* See if we match any. */
1040 for (*call = 0; *call < CTDB_EVENT_MAX; (*call)++) {
1041 len = strlen(ctdb_eventscript_call_names[*call]);
1042 if (strncmp(p, ctdb_eventscript_call_names[*call], len) == 0) {
1043 /* If end of string or whitespace, we're done. */
1044 if (strcspn(p + len, " \t") == 0) {
1045 return p + len;
1049 return NULL;
1053 A control to force running of the eventscripts from the ctdb client tool
1055 int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
1056 struct ctdb_req_control_old *c,
1057 TDB_DATA indata, bool *async_reply)
1059 int ret;
1060 struct eventscript_callback_state *state;
1061 const char *options;
1062 enum ctdb_event call;
1064 /* Figure out what call they want. */
1065 options = get_call((const char *)indata.dptr, &call);
1066 if (!options) {
1067 DEBUG(DEBUG_ERR, (__location__ " Invalid event name \"%s\"\n", (const char *)indata.dptr));
1068 return -1;
1071 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
1072 DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
1073 return -1;
1076 state = talloc(ctdb->event_script_ctx, struct eventscript_callback_state);
1077 CTDB_NO_MEMORY(ctdb, state);
1079 state->c = talloc_steal(state, c);
1081 DEBUG(DEBUG_NOTICE,("Running eventscripts with arguments %s\n", indata.dptr));
1083 ret = ctdb_event_script_callback(ctdb,
1084 ctdb, run_eventscripts_callback, state,
1085 call, "%s", options);
1087 if (ret != 0) {
1088 DEBUG(DEBUG_ERR,(__location__ " Failed to run eventscripts with arguments %s\n", indata.dptr));
1089 talloc_free(state);
1090 return -1;
1093 /* tell ctdb_control.c that we will be replying asynchronously */
1094 *async_reply = true;
1096 return 0;
1101 int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata)
1103 const char *script;
1104 struct stat st;
1105 char *filename;
1106 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1108 script = (char *)indata.dptr;
1109 if (indata.dsize == 0) {
1110 DEBUG(DEBUG_ERR,(__location__ " No script specified.\n"));
1111 talloc_free(tmp_ctx);
1112 return -1;
1114 if (indata.dptr[indata.dsize - 1] != '\0') {
1115 DEBUG(DEBUG_ERR,(__location__ " String is not null terminated.\n"));
1116 talloc_free(tmp_ctx);
1117 return -1;
1119 if (index(script,'/') != NULL) {
1120 DEBUG(DEBUG_ERR,(__location__ " Script name contains '/'. Failed to enable script %s\n", script));
1121 talloc_free(tmp_ctx);
1122 return -1;
1126 if (stat(ctdb->event_script_dir, &st) != 0 &&
1127 errno == ENOENT) {
1128 DEBUG(DEBUG_CRIT,("No event script directory found at '%s'\n", ctdb->event_script_dir));
1129 talloc_free(tmp_ctx);
1130 return -1;
1134 filename = talloc_asprintf(tmp_ctx, "%s/%s", ctdb->event_script_dir, script);
1135 if (filename == NULL) {
1136 DEBUG(DEBUG_ERR,(__location__ " Failed to create script path\n"));
1137 talloc_free(tmp_ctx);
1138 return -1;
1141 if (stat(filename, &st) != 0) {
1142 DEBUG(DEBUG_ERR,("Could not stat event script %s. Failed to enable script.\n", filename));
1143 talloc_free(tmp_ctx);
1144 return -1;
1147 if (chmod(filename, st.st_mode | S_IXUSR) == -1) {
1148 DEBUG(DEBUG_ERR,("Could not chmod %s. Failed to enable script.\n", filename));
1149 talloc_free(tmp_ctx);
1150 return -1;
1153 talloc_free(tmp_ctx);
1154 return 0;
1157 int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA indata)
1159 const char *script;
1160 struct stat st;
1161 char *filename;
1162 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1164 script = (char *)indata.dptr;
1165 if (indata.dsize == 0) {
1166 DEBUG(DEBUG_ERR,(__location__ " No script specified.\n"));
1167 talloc_free(tmp_ctx);
1168 return -1;
1170 if (indata.dptr[indata.dsize - 1] != '\0') {
1171 DEBUG(DEBUG_ERR,(__location__ " String is not null terminated.\n"));
1172 talloc_free(tmp_ctx);
1173 return -1;
1175 if (index(script,'/') != NULL) {
1176 DEBUG(DEBUG_ERR,(__location__ " Script name contains '/'. Failed to disable script %s\n", script));
1177 talloc_free(tmp_ctx);
1178 return -1;
1182 if (stat(ctdb->event_script_dir, &st) != 0 &&
1183 errno == ENOENT) {
1184 DEBUG(DEBUG_CRIT,("No event script directory found at '%s'\n", ctdb->event_script_dir));
1185 talloc_free(tmp_ctx);
1186 return -1;
1190 filename = talloc_asprintf(tmp_ctx, "%s/%s", ctdb->event_script_dir, script);
1191 if (filename == NULL) {
1192 DEBUG(DEBUG_ERR,(__location__ " Failed to create script path\n"));
1193 talloc_free(tmp_ctx);
1194 return -1;
1197 if (stat(filename, &st) != 0) {
1198 DEBUG(DEBUG_ERR,("Could not stat event script %s. Failed to disable script.\n", filename));
1199 talloc_free(tmp_ctx);
1200 return -1;
1203 if (chmod(filename, st.st_mode & ~(S_IXUSR|S_IXGRP|S_IXOTH)) == -1) {
1204 DEBUG(DEBUG_ERR,("Could not chmod %s. Failed to disable script.\n", filename));
1205 talloc_free(tmp_ctx);
1206 return -1;
1209 talloc_free(tmp_ctx);
1210 return 0;