ctdb/server/ctdb_monitor.c

   1 /*
   2    monitoring links to all other nodes to detect dead nodes
   3
   4
   5    Copyright (C) Ronnie Sahlberg 2007
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/filesys.h"
  23 #include "system/wait.h"
  24 #include "../include/ctdb_private.h"
  25
  26 struct ctdb_monitor_state {
  27         uint32_t monitoring_mode;
  28         TALLOC_CTX *monitor_context;
  29         uint32_t next_interval;
  30 };
  31
  32 static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
  33                               struct timeval t, void *private_data);
  34
  35 /*
  36   setup the notification script
  37 */
  38 int ctdb_set_notification_script(struct ctdb_context *ctdb, const char *script)
  39 {
  40         ctdb->notification_script = talloc_strdup(ctdb, script);
  41         CTDB_NO_MEMORY(ctdb, ctdb->notification_script);
  42         return 0;
  43 }
  44
  45 static int ctdb_run_notification_script_child(struct ctdb_context *ctdb, const char *event)
  46 {
  47         struct stat st;
  48         int ret;
  49         char *cmd;
  50
  51         if (stat(ctdb->notification_script, &st) != 0) {
  52                 DEBUG(DEBUG_ERR,("Could not stat notification script %s. Can not send notifications.\n", ctdb->notification_script));
  53                 return -1;
  54         }
  55         if (!(st.st_mode & S_IXUSR)) {
  56                 DEBUG(DEBUG_ERR,("Notification script %s is not executable.\n", ctdb->notification_script));
  57                 return -1;
  58         }
  59
  60         cmd = talloc_asprintf(ctdb, "%s %s\n", ctdb->notification_script, event);
  61         CTDB_NO_MEMORY(ctdb, cmd);
  62
  63         ret = system(cmd);
  64         /* if the system() call was successful, translate ret into the
  65            return code from the command
  66         */
  67         if (ret != -1) {
  68                 ret = WEXITSTATUS(ret);
  69         }
  70         if (ret != 0) {
  71                 DEBUG(DEBUG_ERR,("Notification script \"%s\" failed with error %d\n", cmd, ret));
  72         }
  73
  74         return ret;
  75 }
  76
  77 void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
  78 {
  79         pid_t child;
  80
  81         if (ctdb->notification_script == NULL) {
  82                 return;
  83         }
  84
  85         child = ctdb_fork(ctdb);
  86         if (child == (pid_t)-1) {
  87                 DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
  88                 return;
  89         }
  90         if (child == 0) {
  91                 int ret;
  92
  93                 ctdb_set_process_name("ctdb_notification");
  94                 debug_extra = talloc_asprintf(NULL, "notification-%s:", event);
  95                 ret = ctdb_run_notification_script_child(ctdb, event);
  96                 if (ret != 0) {
  97                         DEBUG(DEBUG_ERR,(__location__ " Notification script failed\n"));
  98                 }
  99                 _exit(0);
 100         }
 101
 102         return;
 103 }
 104
 105 /*
 106   called when a health monitoring event script finishes
 107  */
 108 static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 109 {
 110         struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
 111         TDB_DATA data;
 112         struct ctdb_node_flag_change c;
 113         uint32_t next_interval;
 114         int ret;
 115         TDB_DATA rddata;
 116         struct srvid_request rd;
 117         const char *state_str = NULL;
 118
 119         c.pnn = ctdb->pnn;
 120         c.old_flags = node->flags;
 121
 122         rd.pnn   = ctdb->pnn;
 123         rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
 124
 125         rddata.dptr = (uint8_t *)&rd;
 126         rddata.dsize = sizeof(rd);
 127
 128         if (status == -ECANCELED) {
 129                 DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n"));
 130                 goto after_change_status;
 131         }
 132
 133         if (status == -ETIME) {
 134                 ctdb->event_script_timeouts++;
 135
 136                 if (ctdb->event_script_timeouts >= ctdb->tunable.script_timeout_count) {
 137                         DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_timeout_count));
 138                 } else {
 139                         /* We pretend this is OK. */
 140                         goto after_change_status;
 141                 }
 142         }
 143
 144         if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
 145                 DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
 146                 node->flags |= NODE_FLAGS_UNHEALTHY;
 147                 ctdb->monitor->next_interval = 5;
 148
 149                 ctdb_run_notification_script(ctdb, "unhealthy");
 150         } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
 151                 DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
 152                 node->flags &= ~NODE_FLAGS_UNHEALTHY;
 153                 ctdb->monitor->next_interval = 5;
 154
 155                 ctdb_run_notification_script(ctdb, "healthy");
 156         }
 157
 158 after_change_status:
 159         next_interval = ctdb->monitor->next_interval;
 160
 161         ctdb->monitor->next_interval *= 2;
 162         if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) {
 163                 ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
 164         }
 165
 166         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 167                                 timeval_current_ofs(next_interval, 0),
 168                                 ctdb_check_health, ctdb);
 169
 170         if (c.old_flags == node->flags) {
 171                 return;
 172         }
 173
 174         c.new_flags = node->flags;
 175
 176         data.dptr = (uint8_t *)&c;
 177         data.dsize = sizeof(c);
 178
 179         /* ask the recovery daemon to push these changes out to all nodes */
 180         ctdb_daemon_send_message(ctdb, ctdb->pnn,
 181                                  CTDB_SRVID_PUSH_NODE_FLAGS, data);
 182
 183         if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
 184                 state_str = "UNHEALTHY";
 185         } else {
 186                 state_str = "HEALTHY";
 187         }
 188
 189         /* ask the recmaster to reallocate all addresses */
 190         DEBUG(DEBUG_ERR,("Node became %s. Ask recovery master %u to perform ip reallocation\n",
 191                          state_str, ctdb->recovery_master));
 192         ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
 193         if (ret != 0) {
 194                 DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
 195         }
 196 }
 197
 198
 199 static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
 200                              struct timeval t, void *private_data);
 201 /*
 202   called when the startup event script finishes
 203  */
 204 static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
 205 {
 206         if (status != 0) {
 207                 DEBUG(DEBUG_ERR,("startup event failed\n"));
 208                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 209                                 timeval_current_ofs(5, 0),
 210                                 ctdb_run_startup, ctdb);
 211                 return;
 212         }
 213
 214         DEBUG(DEBUG_NOTICE,("startup event OK - enabling monitoring\n"));
 215         ctdb_set_runstate(ctdb, CTDB_RUNSTATE_RUNNING);
 216         ctdb->monitor->next_interval = 2;
 217         ctdb_run_notification_script(ctdb, "startup");
 218
 219         ctdb->monitor->monitoring_mode = CTDB_MONITORING_ACTIVE;
 220
 221         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 222                         timeval_current_ofs(ctdb->monitor->next_interval, 0),
 223                         ctdb_check_health, ctdb);
 224 }
 225
 226 static void ctdb_run_startup(struct event_context *ev, struct timed_event *te,
 227                              struct timeval t, void *private_data)
 228 {
 229         struct ctdb_context *ctdb = talloc_get_type(private_data,
 230                                                     struct ctdb_context);
 231         int ret;
 232
 233         /* This is necessary to avoid the "startup" event colliding
 234          * with the "ipreallocated" event from the takeover run
 235          * following the first recovery.  We might as well serialise
 236          * these things if we can.
 237          */
 238         if (ctdb->runstate < CTDB_RUNSTATE_STARTUP) {
 239                 DEBUG(DEBUG_NOTICE,
 240                       ("Not yet in startup runstate. Wait one more second\n"));
 241                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 242                                 timeval_current_ofs(1, 0),
 243                                 ctdb_run_startup, ctdb);
 244                 return;
 245         }
 246
 247         DEBUG(DEBUG_NOTICE,("Running the \"startup\" event.\n"));
 248         ret = ctdb_event_script_callback(ctdb,
 249                                          ctdb->monitor->monitor_context,
 250                                          ctdb_startup_callback,
 251                                          ctdb, CTDB_EVENT_STARTUP, "%s", "");
 252
 253         if (ret != 0) {
 254                 DEBUG(DEBUG_ERR,("Unable to launch startup event script\n"));
 255                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 256                                 timeval_current_ofs(5, 0),
 257                                 ctdb_run_startup, ctdb);
 258         }
 259 }
 260
 261 /*
 262   wait until we have finished initial recoveries before we start the
 263   monitoring events
 264  */
 265 static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_event *te,
 266                               struct timeval t, void *private_data)
 267 {
 268         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 269         int ret;
 270         static int count = 0;
 271
 272         count++;
 273
 274         if (count < 60 || count%600 == 0) {
 275                 DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
 276                 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_STOPPED) {
 277                         DEBUG(DEBUG_NOTICE,("Node is STOPPED. Node will NOT recover.\n"));
 278                 }
 279         }
 280
 281         if (ctdb->vnn_map->generation == INVALID_GENERATION) {
 282                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 283
 284                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 285                                      timeval_current_ofs(1, 0),
 286                                      ctdb_wait_until_recovered, ctdb);
 287                 return;
 288         }
 289
 290         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
 291                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 292
 293                 DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
 294                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 295                                      timeval_current_ofs(1, 0),
 296                                      ctdb_wait_until_recovered, ctdb);
 297                 return;
 298         }
 299
 300
 301         if (!fast_start && timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
 302                 ctdb->db_persistent_startup_generation = INVALID_GENERATION;
 303
 304                 DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
 305
 306                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 307                                      timeval_current_ofs(1, 0),
 308                                      ctdb_wait_until_recovered, ctdb);
 309                 return;
 310         }
 311
 312         if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
 313                 DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
 314                                   "until the next recovery\n"));
 315                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 316                                      timeval_current_ofs(1, 0),
 317                                      ctdb_wait_until_recovered, ctdb);
 318                 return;
 319         }
 320
 321         ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
 322         ret = ctdb_recheck_persistent_health(ctdb);
 323         if (ret != 0) {
 324                 ctdb->db_persistent_check_errors++;
 325                 if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
 326                         DEBUG(ctdb->db_persistent_check_errors==1?DEBUG_ERR:DEBUG_WARNING,
 327                               (__location__ "ctdb_recheck_persistent_health() "
 328                               "failed (%llu of %llu times) - retry later\n",
 329                               (unsigned long long)ctdb->db_persistent_check_errors,
 330                               (unsigned long long)ctdb->max_persistent_check_errors));
 331                         event_add_timed(ctdb->ev,
 332                                         ctdb->monitor->monitor_context,
 333                                         timeval_current_ofs(1, 0),
 334                                         ctdb_wait_until_recovered, ctdb);
 335                         return;
 336                 }
 337                 DEBUG(DEBUG_ALERT,(__location__
 338                                   "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
 339                                   (unsigned long long)ctdb->db_persistent_check_errors));
 340                 ctdb_shutdown_sequence(ctdb, 11);
 341                 /* In case above returns due to duplicate shutdown */
 342                 return;
 343         }
 344         ctdb->db_persistent_check_errors = 0;
 345
 346         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 347                         timeval_current(), ctdb_run_startup, ctdb);
 348 }
 349
 350
 351 /*
 352   see if the event scripts think we are healthy
 353  */
 354 static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
 355                               struct timeval t, void *private_data)
 356 {
 357         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
 358         bool skip_monitoring = false;
 359         int ret = 0;
 360
 361         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL ||
 362             ctdb->monitor->monitoring_mode == CTDB_MONITORING_DISABLED) {
 363                 skip_monitoring = true;
 364         } else {
 365                 int i;
 366                 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
 367                         if (ctdb->freeze_handles[i] != NULL) {
 368                                 DEBUG(DEBUG_ERR,
 369                                       ("Skip monitoring since databases are frozen\n"));
 370                                 skip_monitoring = true;
 371                                 break;
 372                         }
 373                 }
 374         }
 375
 376         if (skip_monitoring) {
 377                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 378                                 timeval_current_ofs(ctdb->monitor->next_interval, 0),
 379                                 ctdb_check_health, ctdb);
 380                 return;
 381         }
 382
 383         ret = ctdb_event_script_callback(ctdb,
 384                                          ctdb->monitor->monitor_context,
 385                                          ctdb_health_callback,
 386                                          ctdb, CTDB_EVENT_MONITOR, "%s", "");
 387         if (ret != 0) {
 388                 DEBUG(DEBUG_ERR,("Unable to launch monitor event script\n"));
 389                 ctdb->monitor->next_interval = 5;
 390                 event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 391                                 timeval_current_ofs(5, 0),
 392                                 ctdb_check_health, ctdb);
 393         }
 394 }
 395
 396 /*
 397   (Temporaily) Disabling monitoring will stop the monitor event scripts
 398   from running   but node health checks will still occur
 399 */
 400 void ctdb_disable_monitoring(struct ctdb_context *ctdb)
 401 {
 402         ctdb->monitor->monitoring_mode = CTDB_MONITORING_DISABLED;
 403         DEBUG(DEBUG_INFO,("Monitoring has been disabled\n"));
 404 }
 405
 406 /*
 407    Re-enable running monitor events after they have been disabled
 408  */
 409 void ctdb_enable_monitoring(struct ctdb_context *ctdb)
 410 {
 411         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_ACTIVE;
 412         ctdb->monitor->next_interval = 5;
 413         DEBUG(DEBUG_INFO,("Monitoring has been enabled\n"));
 414 }
 415
 416 /* stop any monitoring
 417    this should only be done when shutting down the daemon
 418 */
 419 void ctdb_stop_monitoring(struct ctdb_context *ctdb)
 420 {
 421         talloc_free(ctdb->monitor->monitor_context);
 422         ctdb->monitor->monitor_context = NULL;
 423
 424         ctdb->monitor->monitoring_mode  = CTDB_MONITORING_DISABLED;
 425         ctdb->monitor->next_interval = 5;
 426         DEBUG(DEBUG_NOTICE,("Monitoring has been stopped\n"));
 427 }
 428
 429 /*
 430   start watching for nodes that might be dead
 431  */
 432 void ctdb_wait_for_first_recovery(struct ctdb_context *ctdb)
 433 {
 434         ctdb_set_runstate(ctdb, CTDB_RUNSTATE_FIRST_RECOVERY);
 435
 436         ctdb->monitor = talloc(ctdb, struct ctdb_monitor_state);
 437         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor);
 438
 439         ctdb->monitor->monitor_context = talloc_new(ctdb->monitor);
 440         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context);
 441
 442         event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
 443                         timeval_current_ofs(1, 0),
 444                         ctdb_wait_until_recovered, ctdb);
 445 }
 446
 447
 448 /*
 449   modify flags on a node
 450  */
 451 int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 452 {
 453         struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
 454         struct ctdb_node *node;
 455         uint32_t old_flags;
 456
 457         if (c->pnn >= ctdb->num_nodes) {
 458                 DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
 459                 return -1;
 460         }
 461
 462         node         = ctdb->nodes[c->pnn];
 463         old_flags    = node->flags;
 464         if (c->pnn != ctdb->pnn) {
 465                 c->old_flags  = node->flags;
 466         }
 467         node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
 468         node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);
 469
 470         /* we dont let other nodes modify our STOPPED status */
 471         if (c->pnn == ctdb->pnn) {
 472                 node->flags &= ~NODE_FLAGS_STOPPED;
 473                 if (old_flags & NODE_FLAGS_STOPPED) {
 474                         node->flags |= NODE_FLAGS_STOPPED;
 475                 }
 476         }
 477
 478         /* we dont let other nodes modify our BANNED status */
 479         if (c->pnn == ctdb->pnn) {
 480                 node->flags &= ~NODE_FLAGS_BANNED;
 481                 if (old_flags & NODE_FLAGS_BANNED) {
 482                         node->flags |= NODE_FLAGS_BANNED;
 483                 }
 484         }
 485
 486         if (node->flags == c->old_flags) {
 487                 DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
 488                 return 0;
 489         }
 490
 491         DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));
 492
 493         if (node->flags == 0 && ctdb->runstate <= CTDB_RUNSTATE_STARTUP) {
 494                 DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n",
 495                                   c->pnn));
 496                 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 497         }
 498
 499         /* tell the recovery daemon something has changed */
 500         ctdb_daemon_send_message(ctdb, ctdb->pnn,
 501                                  CTDB_SRVID_SET_NODE_FLAGS, indata);
 502
 503         /* if we have become banned, we should go into recovery mode */
 504         if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
 505                 ctdb_local_node_got_banned(ctdb);
 506         }
 507
 508         return 0;
 509 }
 510
 511 /*
 512   return the monitoring mode
 513  */
 514 int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb)
 515 {
 516         if (ctdb->monitor == NULL) {
 517                 return CTDB_MONITORING_DISABLED;
 518         }
 519         return ctdb->monitor->monitoring_mode;
 520 }
 521
 522 /*
 523  * Check if monitoring has been stopped
 524  */
 525 bool ctdb_stopped_monitoring(struct ctdb_context *ctdb)
 526 {
 527         return (ctdb->monitor->monitor_context == NULL ? true : false);
 528 }