2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "system/network.h"
22 #include "system/filesys.h"
27 #include "lib/util/dlinklist.h"
28 #include "lib/util/debug.h"
29 #include "lib/util/samba_util.h"
31 #include "ctdb_private.h"
32 #include "ctdb_client.h"
34 #include "common/common.h"
35 #include "common/logging.h"
38 choose the transport we will use
40 int ctdb_set_transport(struct ctdb_context
*ctdb
, const char *transport
)
42 ctdb
->transport
= talloc_strdup(ctdb
, transport
);
43 CTDB_NO_MEMORY(ctdb
, ctdb
->transport
);
49 Check whether an ip is a valid node ip
50 Returns the node id for this ip address or -1
52 int ctdb_ip_to_nodeid(struct ctdb_context
*ctdb
, const ctdb_sock_addr
*nodeip
)
56 for (nodeid
=0;nodeid
<ctdb
->num_nodes
;nodeid
++) {
57 if (ctdb
->nodes
[nodeid
]->flags
& NODE_FLAGS_DELETED
) {
60 if (ctdb_same_ip(&ctdb
->nodes
[nodeid
]->address
, nodeip
)) {
69 choose the recovery lock file
71 int ctdb_set_recovery_lock_file(struct ctdb_context
*ctdb
, const char *file
)
73 if (ctdb
->recovery_lock_file
!= NULL
) {
74 talloc_free(ctdb
->recovery_lock_file
);
75 ctdb
->recovery_lock_file
= NULL
;
79 DEBUG(DEBUG_ALERT
,("Recovery lock file set to \"\". Disabling recovery lock checking\n"));
83 ctdb
->recovery_lock_file
= talloc_strdup(ctdb
, file
);
84 CTDB_NO_MEMORY(ctdb
, ctdb
->recovery_lock_file
);
89 /* Load a nodes list file into a nodes array */
90 static int convert_node_map_to_list(struct ctdb_context
*ctdb
,
92 struct ctdb_node_map_old
*node_map
,
93 struct ctdb_node
***nodes
,
98 *nodes
= talloc_zero_array(mem_ctx
,
99 struct ctdb_node
*, node_map
->num
);
100 CTDB_NO_MEMORY(ctdb
, *nodes
);
101 *num_nodes
= node_map
->num
;
103 for (i
= 0; i
< node_map
->num
; i
++) {
104 struct ctdb_node
*node
;
106 node
= talloc_zero(*nodes
, struct ctdb_node
);
107 CTDB_NO_MEMORY(ctdb
, node
);
110 node
->address
= node_map
->nodes
[i
].addr
;
111 node
->name
= talloc_asprintf(node
, "%s:%u",
112 ctdb_addr_to_str(&node
->address
),
113 ctdb_addr_to_port(&node
->address
));
115 node
->flags
= node_map
->nodes
[i
].flags
;
116 if (!(node
->flags
& NODE_FLAGS_DELETED
)) {
117 node
->flags
= NODE_FLAGS_UNHEALTHY
;
119 node
->flags
|= NODE_FLAGS_DISCONNECTED
;
123 node
->dead_count
= 0;
129 /* Load the nodes list from a file */
130 void ctdb_load_nodes_file(struct ctdb_context
*ctdb
)
132 struct ctdb_node_map_old
*node_map
;
135 node_map
= ctdb_read_nodes_file(ctdb
, ctdb
->nodes_file
);
136 if (node_map
== NULL
) {
140 TALLOC_FREE(ctdb
->nodes
);
141 ret
= convert_node_map_to_list(ctdb
, ctdb
, node_map
,
142 &ctdb
->nodes
, &ctdb
->num_nodes
);
147 talloc_free(node_map
);
151 DEBUG(DEBUG_ERR
, ("Failed to load nodes file \"%s\"\n",
153 talloc_free(node_map
);
158 setup the local node address
160 int ctdb_set_address(struct ctdb_context
*ctdb
, const char *address
)
162 ctdb
->address
= talloc(ctdb
, ctdb_sock_addr
);
163 CTDB_NO_MEMORY(ctdb
, ctdb
->address
);
165 if (ctdb_parse_address(ctdb
, address
, ctdb
->address
) != 0) {
169 ctdb
->name
= talloc_asprintf(ctdb
, "%s:%u",
170 ctdb_addr_to_str(ctdb
->address
),
171 ctdb_addr_to_port(ctdb
->address
));
177 return the number of active nodes
179 uint32_t ctdb_get_num_active_nodes(struct ctdb_context
*ctdb
)
183 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
184 if (!(ctdb
->nodes
[i
]->flags
& NODE_FLAGS_INACTIVE
)) {
193 called when we need to process a packet. This can be a requeued packet
194 after a lockwait, or a real packet from another node
196 void ctdb_input_pkt(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
)
200 /* place the packet as a child of the tmp_ctx. We then use
201 talloc_free() below to free it. If any of the calls want
202 to keep it, then they will steal it somewhere else, and the
203 talloc_free() will only free the tmp_ctx */
204 tmp_ctx
= talloc_new(ctdb
);
205 talloc_steal(tmp_ctx
, hdr
);
207 DEBUG(DEBUG_DEBUG
,(__location__
" ctdb request %u of type %u length %u from "
208 "node %u to %u\n", hdr
->reqid
, hdr
->operation
, hdr
->length
,
209 hdr
->srcnode
, hdr
->destnode
));
211 switch (hdr
->operation
) {
213 case CTDB_REPLY_CALL
:
214 case CTDB_REQ_DMASTER
:
215 case CTDB_REPLY_DMASTER
:
216 /* we don't allow these calls when banned */
217 if (ctdb
->nodes
[ctdb
->pnn
]->flags
& NODE_FLAGS_BANNED
) {
218 DEBUG(DEBUG_DEBUG
,(__location__
" ctdb operation %u"
220 " length %u from node %u to %u while node"
222 hdr
->operation
, hdr
->reqid
,
224 hdr
->srcnode
, hdr
->destnode
));
228 /* for ctdb_call inter-node operations verify that the
229 remote node that sent us the call is running in the
230 same generation instance as this node
232 if (ctdb
->vnn_map
->generation
!= hdr
->generation
) {
233 DEBUG(DEBUG_DEBUG
,(__location__
" ctdb operation %u"
235 " length %u from node %u to %u had an"
236 " invalid generation id:%u while our"
237 " generation id is:%u\n",
238 hdr
->operation
, hdr
->reqid
,
240 hdr
->srcnode
, hdr
->destnode
,
241 hdr
->generation
, ctdb
->vnn_map
->generation
));
246 switch (hdr
->operation
) {
248 CTDB_INCREMENT_STAT(ctdb
, node
.req_call
);
249 ctdb_request_call(ctdb
, hdr
);
252 case CTDB_REPLY_CALL
:
253 CTDB_INCREMENT_STAT(ctdb
, node
.reply_call
);
254 ctdb_reply_call(ctdb
, hdr
);
257 case CTDB_REPLY_ERROR
:
258 CTDB_INCREMENT_STAT(ctdb
, node
.reply_error
);
259 ctdb_reply_error(ctdb
, hdr
);
262 case CTDB_REQ_DMASTER
:
263 CTDB_INCREMENT_STAT(ctdb
, node
.req_dmaster
);
264 ctdb_request_dmaster(ctdb
, hdr
);
267 case CTDB_REPLY_DMASTER
:
268 CTDB_INCREMENT_STAT(ctdb
, node
.reply_dmaster
);
269 ctdb_reply_dmaster(ctdb
, hdr
);
272 case CTDB_REQ_MESSAGE
:
273 CTDB_INCREMENT_STAT(ctdb
, node
.req_message
);
274 ctdb_request_message(ctdb
, hdr
);
277 case CTDB_REQ_CONTROL
:
278 CTDB_INCREMENT_STAT(ctdb
, node
.req_control
);
279 ctdb_request_control(ctdb
, hdr
);
282 case CTDB_REPLY_CONTROL
:
283 CTDB_INCREMENT_STAT(ctdb
, node
.reply_control
);
284 ctdb_reply_control(ctdb
, hdr
);
287 case CTDB_REQ_KEEPALIVE
:
288 CTDB_INCREMENT_STAT(ctdb
, keepalive_packets_recv
);
292 DEBUG(DEBUG_CRIT
,("%s: Packet with unknown operation %u\n",
293 __location__
, hdr
->operation
));
298 talloc_free(tmp_ctx
);
303 called by the transport layer when a node is dead
305 void ctdb_node_dead(struct ctdb_node
*node
)
307 if (node
->flags
& NODE_FLAGS_DISCONNECTED
) {
308 DEBUG(DEBUG_INFO
,("%s: node %s is already marked disconnected: %u connected\n",
309 node
->ctdb
->name
, node
->name
,
310 node
->ctdb
->num_connected
));
313 node
->ctdb
->num_connected
--;
314 node
->flags
|= NODE_FLAGS_DISCONNECTED
| NODE_FLAGS_UNHEALTHY
;
316 node
->dead_count
= 0;
318 DEBUG(DEBUG_NOTICE
,("%s: node %s is dead: %u connected\n",
319 node
->ctdb
->name
, node
->name
, node
->ctdb
->num_connected
));
320 ctdb_daemon_cancel_controls(node
->ctdb
, node
);
322 if (node
->ctdb
->methods
== NULL
) {
323 DEBUG(DEBUG_ERR
,(__location__
" Can not restart transport while shutting down daemon.\n"));
327 node
->ctdb
->methods
->restart(node
);
331 called by the transport layer when a node is connected
333 void ctdb_node_connected(struct ctdb_node
*node
)
335 if (!(node
->flags
& NODE_FLAGS_DISCONNECTED
)) {
336 DEBUG(DEBUG_INFO
,("%s: node %s is already marked connected: %u connected\n",
337 node
->ctdb
->name
, node
->name
,
338 node
->ctdb
->num_connected
));
341 node
->ctdb
->num_connected
++;
342 node
->dead_count
= 0;
343 node
->flags
&= ~NODE_FLAGS_DISCONNECTED
;
344 node
->flags
|= NODE_FLAGS_UNHEALTHY
;
346 ("%s: connected to %s - %u connected\n",
347 node
->ctdb
->name
, node
->name
, node
->ctdb
->num_connected
));
351 struct ctdb_context
*ctdb
;
352 struct ctdb_req_header
*hdr
;
357 triggered when a deferred packet is due
359 static void queue_next_trigger(struct tevent_context
*ev
,
360 struct tevent_timer
*te
,
361 struct timeval t
, void *private_data
)
363 struct queue_next
*q
= talloc_get_type(private_data
, struct queue_next
);
364 ctdb_input_pkt(q
->ctdb
, q
->hdr
);
369 defer a packet, so it is processed on the next event loop
370 this is used for sending packets to ourselves
372 static void ctdb_defer_packet(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
)
374 struct queue_next
*q
;
375 q
= talloc(ctdb
, struct queue_next
);
377 DEBUG(DEBUG_ERR
,(__location__
" Failed to allocate deferred packet\n"));
381 q
->hdr
= talloc_memdup(ctdb
, hdr
, hdr
->length
);
382 if (q
->hdr
== NULL
) {
383 DEBUG(DEBUG_ERR
,("Error copying deferred packet to self\n"));
387 /* use this to put packets directly into our recv function */
388 ctdb_input_pkt(q
->ctdb
, q
->hdr
);
390 tevent_add_timer(ctdb
->ev
, q
, timeval_zero(), queue_next_trigger
, q
);
396 broadcast a packet to all nodes
398 static void ctdb_broadcast_packet_all(struct ctdb_context
*ctdb
,
399 struct ctdb_req_header
*hdr
)
402 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
403 if (ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DELETED
) {
406 hdr
->destnode
= ctdb
->nodes
[i
]->pnn
;
407 ctdb_queue_packet(ctdb
, hdr
);
412 broadcast a packet to all nodes in the current vnnmap
414 static void ctdb_broadcast_packet_vnnmap(struct ctdb_context
*ctdb
,
415 struct ctdb_req_header
*hdr
)
418 for (i
=0;i
<ctdb
->vnn_map
->size
;i
++) {
419 hdr
->destnode
= ctdb
->vnn_map
->map
[i
];
420 ctdb_queue_packet(ctdb
, hdr
);
425 broadcast a packet to all connected nodes
427 static void ctdb_broadcast_packet_connected(struct ctdb_context
*ctdb
,
428 struct ctdb_req_header
*hdr
)
431 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
432 if (ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DELETED
) {
435 if (!(ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DISCONNECTED
)) {
436 hdr
->destnode
= ctdb
->nodes
[i
]->pnn
;
437 ctdb_queue_packet(ctdb
, hdr
);
443 queue a packet or die
445 void ctdb_queue_packet(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
)
447 struct ctdb_node
*node
;
449 switch (hdr
->destnode
) {
450 case CTDB_BROADCAST_ALL
:
451 ctdb_broadcast_packet_all(ctdb
, hdr
);
453 case CTDB_BROADCAST_VNNMAP
:
454 ctdb_broadcast_packet_vnnmap(ctdb
, hdr
);
456 case CTDB_BROADCAST_CONNECTED
:
457 ctdb_broadcast_packet_connected(ctdb
, hdr
);
461 CTDB_INCREMENT_STAT(ctdb
, node_packets_sent
);
463 if (!ctdb_validate_pnn(ctdb
, hdr
->destnode
)) {
464 DEBUG(DEBUG_CRIT
,(__location__
" cant send to node %u that does not exist\n",
469 node
= ctdb
->nodes
[hdr
->destnode
];
471 if (node
->flags
& NODE_FLAGS_DELETED
) {
472 DEBUG(DEBUG_ERR
, (__location__
" Can not queue packet to DELETED node %d\n", hdr
->destnode
));
476 if (node
->pnn
== ctdb
->pnn
) {
477 ctdb_defer_packet(ctdb
, hdr
);
481 if (ctdb
->methods
== NULL
) {
482 DEBUG(DEBUG_ALERT
, (__location__
" Can not queue packet. "
483 "Transport is DOWN\n"));
488 if (ctdb
->methods
->queue_pkt(node
, (uint8_t *)hdr
, hdr
->length
) != 0) {
489 ctdb_fatal(ctdb
, "Unable to queue packet\n");
497 a valgrind hack to allow us to get opcode specific backtraces
498 very ugly, and relies on no compiler optimisation!
500 void ctdb_queue_packet_opcode(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
, unsigned opcode
)
503 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
605 ctdb_queue_packet(ctdb
, hdr
);