ctdb/server/ctdb_monitor.c

   1 /*
   2    monitoring links to all other nodes to detect dead nodes
   3
   4
   5    Copyright (C) Ronnie Sahlberg 2007
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/filesys.h"
  23 #include "system/wait.h"
  24 #include "../include/ctdb_private.h"
  25
  26 struct ctdb_monitor_state {
  27         uint32_t monitoring_mode;
  28         TALLOC_CTX *monitor_context;
  29         uint32_t next_interval;
  30 };
  31
  32 static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
  33                               struct timeval t, void *private_data);
  34
  35 /*
  36   setup the notification script
  37 */
  38 int ctdb_set_notification_script(struct ctdb_context *ctdb, const char *script)
  39 {
  40         ctdb->notification_script = talloc_strdup(ctdb, script);
  41         CTDB_NO_MEMORY(ctdb, ctdb->notification_script);
  42         return 0;
  43 }
  44
  45 static int ctdb_run_notification_script_child(struct ctdb_context *ctdb, const char *event)
  46 {
  47         struct stat st;
  48         int ret;
  49         char *cmd;
  50
  51         if (stat(ctdb->notification_script, &st) != 0) {
  52                 DEBUG(DEBUG_ERR,("Could not stat notification script %s. Can not send notifications.\n", ctdb->notification_script));
  53                 return -1;
  54         }
  55         if (!(st.st_mode & S_IXUSR)) {
  56                 DEBUG(DEBUG_ERR,("Notification script %s is not executable.\n", ctdb->notification_script));
  57                 return -1;
  58         }
  59
  60         cmd = talloc_asprintf(ctdb, "%s %s\n", ctdb->notification_script, event);
  61         CTDB_NO_MEMORY(ctdb, cmd);
  62
  63         ret = system(cmd);
  64         /* if the system() call was successful, translate ret into the
  65            return code from the command
  66         */
  67         if (ret != -1) {
  68                 ret = WEXITSTATUS(ret);
  69         }
  70         if (ret != 0) {
  71                 DEBUG(DEBUG_ERR,("Notification script \"%s\" failed with error %d\n", cmd, ret));
  72         }
  73
  74         return ret;
  75 }
  76
  77 void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
  78 {
  79         pid_t child;
  80
  81         if (ctdb->notification_script == NULL) {
  82                 return;
  83         }
  84
  85         child = ctdb_fork(ctdb);
  86         if (child == (pid_t)-1) {
  87                 DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
  88                 return;
  89         }
  90         if (child == 0) {
  91                 int ret;
  92
  93                 ctdb_set_process_name("ctdb_notification");
  94                 debug_extra = talloc_asprintf(NULL, "notification-%s:", event);
  95                 ret = ctdb_run_notification_script_child(ctdb, event);
  96                 if (ret != 0) {
  97                         DEBUG(DEBUG_ERR,(__location__ " Notification script failed\n"));
  98                 }
  99                 _exit(0);
 100         }
 101
 102         return;
 103 }
 104
 105 /*
 106   called when a health monitoring event script finishes
 107  */
 108 static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 109 {
 110         struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
 111         TDB_DATA data;
 112         struct ctdb_node_flag_change c;
 113         uint32_t next_interval;
 114         int ret;
 115         TDB_DATA rddata;
 116         struct srvid_request rd;
 117         const char *state_str = NULL;
 118
 119         c.pnn = ctdb->pnn;
 120         c.old_flags = node->flags;
 121
 122         rd.pnn   = ctdb->pnn;
 123         rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
 124
 125         rddata.dptr = (uint8_t *)&rd;
 126         rddata.dsize = sizeof(rd);
 127
 128         if (status == -ECANCELED) {
 129                 DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n"));
 130                 goto after_change_status;
 131         }
 132
 133         if (status == -ETIME) {
 134                 ctdb->event_script_timeouts++;
 135
 136                 if (ctdb->event_script_timeouts >= ctdb->tunable.script_timeout_count) {
 137                         DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_timeout_count));
 138                 } else {
 139                         /* We pretend this is OK. */
 140                         goto after_change_status;
 141                 }
 142         }
 143
 144         if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
 145                 DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
 146                 node->flags |= NODE_FLAGS_UNHEALTHY;
 147                 ctdb->monitor->next_interval = 5;
 148
 149                 ctdb_run_notification_script(ctdb, "unhealthy");
 150         } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
 151                 DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
 152                 node->flags &= ~NODE_FLAGS_UNHEALTHY;
 153                 ctdb->monitor->next_interval = 5;
 154
 155                 ctdb_run_notification_script(ctdb, "healthy");
 156         }
 157
 158 after_change_status:
 159         next_interval = ctdb->monitor->next_interval;
 160
 161         ctdb->monitor->next_interval *= 2;
 162         if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) {
 163                 ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
 164         }
 165
 166         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 167                                 timeval_current_ofs(next_interval, 0),
 168                                 ctdb_check_health, ctdb);
 169
 170         if (c.old_flags == node->flags) {
 171                 return;
 172         }
 173
 174         c.new_flags = node->flags;
 175
 176         data.dptr = (uint8_t *)&c;
 177         data.dsize = sizeof(c);
 178
 179         /* ask the recovery daemon to push these changes out to all nodes */
 180         ctdb_daemon_send_message(ctdb, ctdb->pnn,
 181                                  CTDB_SRVID_PUSH_NODE_FLAGS, data);
 182
 183         if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
 184                 state_str = "UNHEALTHY";
 185         } else {
 186                 state_str = "HEALTHY";
 187         }
 188
 189         /* ask the recmaster to reallocate all addresses */
 190         DEBUG(DEBUG_ERR,("Node became %s. Ask recovery master %u to perform ip reallocation\n",
 191                          state_str, ctdb->recovery_master));
 192         ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
 193         if (ret != 0) {
 194                 DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
 195         }
 196 }
 197
 198
 199 /*
 200   called when the startup event script finishes
 201  */
 202 static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
 203 {
 204         if (status != 0) {
 205                 DEBUG(DEBUG_ERR,("startup event failed\n"));
 206         } else if (status == 0) {
 207                 DEBUG(DEBUG_NOTICE,("startup event OK - enabling monitoring\n"));
 208                 ctdb_set_runstate(ctdb, CTDB_RUNSTATE_RUNNING);
 209                 ctdb->monitor->next_interval = 2;
 210                 ctdb_run_notification_script(ctdb, "startup");
 211         }
 212
 213         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 214                         timeval_current_ofs(ctdb->monitor->next_interval, 0),
 215                         ctdb_check_health, ctdb);
 216 }
 217
 218
 219 /*
 220   wait until we have finished initial recoveries before we start the
 221   monitoring events
 222  */
 223 static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_event *te,
 224                               struct timeval t, void *private_data)
 225 {
 226         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 227         int ret;
 228         static int count = 0;
 229
 230         count++;
 231
 232         if (count < 60 || count%600 == 0) {
 233                 DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
 234                 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_STOPPED) {
 235                         DEBUG(DEBUG_NOTICE,("Node is STOPPED. Node will NOT recover.\n"));
 236                 }
 237         }
 238
 239         if (ctdb->vnn_map->generation == INVALID_GENERATION) {
 240                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 241
 242                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 243                                      timeval_current_ofs(1, 0),
 244                                      ctdb_wait_until_recovered, ctdb);
 245                 return;
 246         }
 247
 248         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
 249                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 250
 251                 DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
 252                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 253                                      timeval_current_ofs(1, 0),
 254                                      ctdb_wait_until_recovered, ctdb);
 255                 return;
 256         }
 257
 258
 259         if (!fast_start && timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
 260                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 261
 262                 DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
 263
 264                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 265                                      timeval_current_ofs(1, 0),
 266                                      ctdb_wait_until_recovered, ctdb);
 267                 return;
 268         }
 269
 270         if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
 271                 DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
 272                                   "until the next recovery\n"));
 273                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 274                                      timeval_current_ofs(1, 0),
 275                                      ctdb_wait_until_recovered, ctdb);
 276                 return;
 277         }
 278
 279         ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
 280         ret = ctdb_recheck_persistent_health(ctdb);
 281         if (ret != 0) {
 282                 ctdb->db_persistent_check_errors++;
 283                 if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
 284                         DEBUG(ctdb->db_persistent_check_errors==1?DEBUG_ERR:DEBUG_WARNING,
 285                               (__location__ "ctdb_recheck_persistent_health() "
 286                               "failed (%llu of %llu times) - retry later\n",
 287                               (unsigned long long)ctdb->db_persistent_check_errors,
 288                               (unsigned long long)ctdb->max_persistent_check_errors));
 289                         event_add_timed(ctdb->ev,
 290                                         ctdb->monitor->monitor_context,
 291                                         timeval_current_ofs(1, 0),
 292                                         ctdb_wait_until_recovered, ctdb);
 293                         return;
 294                 }
 295                 DEBUG(DEBUG_ALERT,(__location__
 296                                   "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
 297                                   (unsigned long long)ctdb->db_persistent_check_errors));
 298                 ctdb_shutdown_sequence(ctdb, 11);
 299                 /* In case above returns due to duplicate shutdown */
 300                 return;
 301         }
 302         ctdb->db_persistent_check_errors = 0;
 303
 304         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 305                              timeval_current(),
 306                              ctdb_check_health, ctdb);
 307 }
 308
 309
 310 /*
 311   see if the event scripts think we are healthy
 312  */
 313 static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
 314                               struct timeval t, void *private_data)
 315 {
 316         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 317         int ret = 0;
 318
 319         if (ctdb->runstate < CTDB_RUNSTATE_STARTUP) {
 320                 DEBUG(DEBUG_NOTICE,("Not yet in startup runstate. Wait one more second\n"));
 321                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 322                                 timeval_current_ofs(1, 0),
 323                                 ctdb_check_health, ctdb);
 324                 return;
 325         }
 326
 327         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL ||
 328             (ctdb->monitor->monitoring_mode == CTDB_MONITORING_DISABLED &&
 329              ctdb->runstate == CTDB_RUNSTATE_RUNNING)) {
 330                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 331                                 timeval_current_ofs(ctdb->monitor->next_interval, 0),
 332                                 ctdb_check_health, ctdb);
 333                 return;
 334         }
 335
 336         if (ctdb->runstate == CTDB_RUNSTATE_STARTUP) {
 337                 DEBUG(DEBUG_NOTICE,("Recoveries finished. Running the \"startup\" event.\n"));
 338                 ret = ctdb_event_script_callback(ctdb,
 339                                                  ctdb->monitor->monitor_context, ctdb_startup_callback,
 340                                                  ctdb, false,
 341                                                  CTDB_EVENT_STARTUP, "%s", "");
 342         } else {
 343                 int i;
 344                 int skip_monitoring = 0;
 345
 346                 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
 347                         skip_monitoring = 1;
 348                         DEBUG(DEBUG_ERR,("Skip monitoring during recovery\n"));
 349                 }
 350                 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
 351                         if (ctdb->freeze_handles[i] != NULL) {
 352                                 DEBUG(DEBUG_ERR,("Skip monitoring since databases are frozen\n"));
 353                                 skip_monitoring = 1;
 354                                 break;
 355                         }
 356                 }
 357                 if (skip_monitoring != 0) {
 358                         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 359                                         timeval_current_ofs(ctdb->monitor->next_interval, 0),
 360                                         ctdb_check_health, ctdb);
 361                         return;
 362                 } else {
 363                         ret = ctdb_event_script_callback(ctdb,
 364                                         ctdb->monitor->monitor_context, ctdb_health_callback,
 365                                         ctdb, false,
 366                                         CTDB_EVENT_MONITOR, "%s", "");
 367                 }
 368         }
 369
 370         if (ret != 0) {
 371                 DEBUG(DEBUG_ERR,("Unable to launch monitor event script\n"));
 372                 ctdb->monitor->next_interval = 5;
 373                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 374                         timeval_current_ofs(5, 0),
 375                         ctdb_check_health, ctdb);
 376         }
 377 }
 378
 379 /*
 380   (Temporaily) Disabling monitoring will stop the monitor event scripts
 381   from running   but node health checks will still occur
 382 */
 383 void ctdb_disable_monitoring(struct ctdb_context *ctdb)
 384 {
 385         ctdb->monitor->monitoring_mode = CTDB_MONITORING_DISABLED;
 386         DEBUG(DEBUG_INFO,("Monitoring has been disabled\n"));
 387 }
 388
 389 /*
 390    Re-enable running monitor events after they have been disabled
 391  */
 392 void ctdb_enable_monitoring(struct ctdb_context *ctdb)
 393 {
 394         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_ACTIVE;
 395         ctdb->monitor->next_interval = 5;
 396         DEBUG(DEBUG_INFO,("Monitoring has been enabled\n"));
 397 }
 398
 399 /* stop any monitoring
 400    this should only be done when shutting down the daemon
 401 */
 402 void ctdb_stop_monitoring(struct ctdb_context *ctdb)
 403 {
 404         talloc_free(ctdb->monitor->monitor_context);
 405         ctdb->monitor->monitor_context = NULL;
 406
 407         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_DISABLED;
 408         ctdb->monitor->next_interval = 5;
 409         DEBUG(DEBUG_NOTICE,("Monitoring has been stopped\n"));
 410 }
 411
 412 /*
 413   start watching for nodes that might be dead
 414  */
 415 void ctdb_start_monitoring(struct ctdb_context *ctdb)
 416 {
 417         if (ctdb->monitor != NULL) {
 418                 return;
 419         }
 420
 421         ctdb->monitor = talloc(ctdb, struct ctdb_monitor_state);
 422         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor);
 423
 424         ctdb->monitor->next_interval = 5;
 425
 426         ctdb->monitor->monitor_context = talloc_new(ctdb->monitor);
 427         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context);
 428
 429         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 430                              timeval_current_ofs(1, 0),
 431                              ctdb_wait_until_recovered, ctdb);
 432
 433         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_ACTIVE;
 434         DEBUG(DEBUG_NOTICE,("Monitoring has been started\n"));
 435 }
 436
 437
 438 /*
 439   modify flags on a node
 440  */
 441 int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 442 {
 443         struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
 444         struct ctdb_node *node;
 445         uint32_t old_flags;
 446
 447         if (c->pnn >= ctdb->num_nodes) {
 448                 DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
 449                 return -1;
 450         }
 451
 452         node         = ctdb->nodes[c->pnn];
 453         old_flags    = node->flags;
 454         if (c->pnn != ctdb->pnn) {
 455                 c->old_flags  = node->flags;
 456         }
 457         node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
 458         node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);
 459
 460         /* we dont let other nodes modify our STOPPED status */
 461         if (c->pnn == ctdb->pnn) {
 462                 node->flags &= ~NODE_FLAGS_STOPPED;
 463                 if (old_flags & NODE_FLAGS_STOPPED) {
 464                         node->flags |= NODE_FLAGS_STOPPED;
 465                 }
 466         }
 467
 468         /* we dont let other nodes modify our BANNED status */
 469         if (c->pnn == ctdb->pnn) {
 470                 node->flags &= ~NODE_FLAGS_BANNED;
 471                 if (old_flags & NODE_FLAGS_BANNED) {
 472                         node->flags |= NODE_FLAGS_BANNED;
 473                 }
 474         }
 475
 476         if (node->flags == c->old_flags) {
 477                 DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
 478                 return 0;
 479         }
 480
 481         DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));
 482
 483         if (node->flags == 0 && ctdb->runstate <= CTDB_RUNSTATE_STARTUP) {
 484                 DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n",
 485                                   c->pnn));
 486                 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 487         }
 488
 489         /* tell the recovery daemon something has changed */
 490         ctdb_daemon_send_message(ctdb, ctdb->pnn,
 491                                  CTDB_SRVID_SET_NODE_FLAGS, indata);
 492
 493         /* if we have become banned, we should go into recovery mode */
 494         if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
 495                 ctdb_local_node_got_banned(ctdb);
 496         }
 497
 498         return 0;
 499 }
 500
 501 /*
 502   return the monitoring mode
 503  */
 504 int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb)
 505 {
 506         if (ctdb->monitor == NULL) {
 507                 return CTDB_MONITORING_DISABLED;
 508         }
 509         return ctdb->monitor->monitoring_mode;
 510 }
 511
 512 /*
 513  * Check if monitoring has been stopped
 514  */
 515 bool ctdb_stopped_monitoring(struct ctdb_context *ctdb)
 516 {
 517         return (ctdb->monitor->monitor_context == NULL ? true : false);
 518 }