2 monitoring links to all other nodes to detect dead nodes
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/filesys.h"
23 #include "system/wait.h"
24 #include "../include/ctdb_private.h"
28 see if any nodes are dead
30 static void ctdb_check_for_dead_nodes(struct event_context
*ev
, struct timed_event
*te
,
31 struct timeval t
, void *private_data
)
33 struct ctdb_context
*ctdb
= talloc_get_type(private_data
, struct ctdb_context
);
36 /* send a keepalive to all other nodes, unless */
37 for (i
=0;i
<ctdb
->num_nodes
;i
++) {
38 struct ctdb_node
*node
= ctdb
->nodes
[i
];
40 if (node
->flags
& NODE_FLAGS_DELETED
) {
44 if (node
->pnn
== ctdb
->pnn
) {
48 if (node
->flags
& NODE_FLAGS_DISCONNECTED
) {
49 /* it might have come alive again */
50 if (node
->rx_cnt
!= 0) {
51 ctdb_node_connected(node
);
57 if (node
->rx_cnt
== 0) {
65 if (node
->dead_count
>= ctdb
->tunable
.keepalive_limit
) {
66 DEBUG(DEBUG_NOTICE
,("dead count reached for node %u\n", node
->pnn
));
68 ctdb_send_keepalive(ctdb
, node
->pnn
);
69 /* maybe tell the transport layer to kill the
75 DEBUG(DEBUG_DEBUG
,("sending keepalive to %u\n", node
->pnn
));
76 ctdb_send_keepalive(ctdb
, node
->pnn
);
81 event_add_timed(ctdb
->ev
, ctdb
->keepalive_ctx
,
82 timeval_current_ofs(ctdb
->tunable
.keepalive_interval
, 0),
83 ctdb_check_for_dead_nodes
, ctdb
);
87 void ctdb_start_keepalive(struct ctdb_context
*ctdb
)
89 struct timed_event
*te
;
91 ctdb
->keepalive_ctx
= talloc_new(ctdb
);
92 CTDB_NO_MEMORY_FATAL(ctdb
, ctdb
->keepalive_ctx
);
94 te
= event_add_timed(ctdb
->ev
, ctdb
->keepalive_ctx
,
95 timeval_current_ofs(ctdb
->tunable
.keepalive_interval
, 0),
96 ctdb_check_for_dead_nodes
, ctdb
);
97 CTDB_NO_MEMORY_FATAL(ctdb
, te
);
99 DEBUG(DEBUG_NOTICE
,("Keepalive monitoring has been started\n"));
102 void ctdb_stop_keepalive(struct ctdb_context
*ctdb
)
104 talloc_free(ctdb
->keepalive_ctx
);
105 ctdb
->keepalive_ctx
= NULL
;