2 monitoring links to all other nodes to detect dead nodes
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/filesys.h"
23 #include "system/network.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/debug.h"
31 #include "lib/util/samba_util.h"
33 #include "ctdb_private.h"
35 #include "common/common.h"
36 #include "common/logging.h"
40 see if any nodes are dead
42 static void ctdb_check_for_dead_nodes(struct tevent_context
*ev
,
43 struct tevent_timer
*te
,
44 struct timeval t
, void *private_data
)
46 struct ctdb_context
*ctdb
= talloc_get_type(private_data
, struct ctdb_context
);
49 /* send a keepalive to all other nodes, unless */
50 for (i
=0;i
<ctdb
->num_nodes
;i
++) {
51 struct ctdb_node
*node
= ctdb
->nodes
[i
];
53 if (node
->flags
& NODE_FLAGS_DELETED
) {
57 if (node
->pnn
== ctdb
->pnn
) {
61 if (node
->flags
& NODE_FLAGS_DISCONNECTED
) {
62 /* it might have come alive again */
63 if (node
->rx_cnt
!= 0) {
64 ctdb_node_connected(node
);
70 if (node
->rx_cnt
== 0) {
78 if (node
->dead_count
>= ctdb
->tunable
.keepalive_limit
) {
79 DEBUG(DEBUG_NOTICE
,("dead count reached for node %u\n", node
->pnn
));
81 ctdb_send_keepalive(ctdb
, node
->pnn
);
82 /* maybe tell the transport layer to kill the
88 DEBUG(DEBUG_DEBUG
,("sending keepalive to %u\n", node
->pnn
));
89 ctdb_send_keepalive(ctdb
, node
->pnn
);
94 tevent_add_timer(ctdb
->ev
, ctdb
->keepalive_ctx
,
95 timeval_current_ofs(ctdb
->tunable
.keepalive_interval
, 0),
96 ctdb_check_for_dead_nodes
, ctdb
);
100 void ctdb_start_keepalive(struct ctdb_context
*ctdb
)
102 struct tevent_timer
*te
;
104 ctdb
->keepalive_ctx
= talloc_new(ctdb
);
105 CTDB_NO_MEMORY_FATAL(ctdb
, ctdb
->keepalive_ctx
);
107 te
= tevent_add_timer(ctdb
->ev
, ctdb
->keepalive_ctx
,
108 timeval_current_ofs(ctdb
->tunable
.keepalive_interval
, 0),
109 ctdb_check_for_dead_nodes
, ctdb
);
110 CTDB_NO_MEMORY_FATAL(ctdb
, te
);
112 DEBUG(DEBUG_NOTICE
,("Keepalive monitoring has been started\n"));
115 void ctdb_stop_keepalive(struct ctdb_context
*ctdb
)
117 talloc_free(ctdb
->keepalive_ctx
);
118 ctdb
->keepalive_ctx
= NULL
;