2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/util/dlinklist.h"
23 #include "system/network.h"
24 #include "system/filesys.h"
25 #include "../include/ctdb_private.h"
28 choose the transport we will use
30 int ctdb_set_transport(struct ctdb_context
*ctdb
, const char *transport
)
32 ctdb
->transport
= talloc_strdup(ctdb
, transport
);
33 CTDB_NO_MEMORY(ctdb
, ctdb
->transport
);
39 Check whether an ip is a valid node ip
40 Returns the node id for this ip address or -1
42 int ctdb_ip_to_nodeid(struct ctdb_context
*ctdb
, const char *nodeip
)
46 for (nodeid
=0;nodeid
<ctdb
->num_nodes
;nodeid
++) {
47 if (ctdb
->nodes
[nodeid
]->flags
& NODE_FLAGS_DELETED
) {
50 if (!strcmp(ctdb
->nodes
[nodeid
]->address
.address
, nodeip
)) {
59 choose the recovery lock file
61 int ctdb_set_recovery_lock_file(struct ctdb_context
*ctdb
, const char *file
)
63 if (ctdb
->recovery_lock_file
!= NULL
) {
64 talloc_free(ctdb
->recovery_lock_file
);
65 ctdb
->recovery_lock_file
= NULL
;
69 DEBUG(DEBUG_ALERT
,("Recovery lock file set to \"\". Disabling recovery lock checking\n"));
73 ctdb
->recovery_lock_file
= talloc_strdup(ctdb
, file
);
74 CTDB_NO_MEMORY(ctdb
, ctdb
->recovery_lock_file
);
80 add a node to the list of nodes
82 static int ctdb_add_node(struct ctdb_context
*ctdb
, char *nstr
)
84 struct ctdb_node
*node
, **nodep
;
86 nodep
= talloc_realloc(ctdb
, ctdb
->nodes
, struct ctdb_node
*, ctdb
->num_nodes
+1);
87 CTDB_NO_MEMORY(ctdb
, nodep
);
90 nodep
= &ctdb
->nodes
[ctdb
->num_nodes
];
91 (*nodep
) = talloc_zero(ctdb
->nodes
, struct ctdb_node
);
92 CTDB_NO_MEMORY(ctdb
, *nodep
);
95 if (ctdb_parse_address(ctdb
, node
, nstr
, &node
->address
) != 0) {
99 node
->name
= talloc_asprintf(node
, "%s:%u",
100 node
->address
.address
,
102 /* this assumes that the nodes are kept in sorted order, and no gaps */
103 node
->pnn
= ctdb
->num_nodes
;
105 /* nodes start out disconnected and unhealthy */
106 node
->flags
= (NODE_FLAGS_DISCONNECTED
| NODE_FLAGS_UNHEALTHY
);
109 node
->dead_count
= 0;
115 add an entry for a "deleted" node to the list of nodes.
116 a "deleted" node is a node that is commented out from the nodes file.
117 this is used to prevent that subsequent nodes in the nodes list
118 change their pnn value if a node is "delete" by commenting it out and then
119 using "ctdb reloadnodes" at runtime.
121 static int ctdb_add_deleted_node(struct ctdb_context
*ctdb
)
123 struct ctdb_node
*node
, **nodep
;
125 nodep
= talloc_realloc(ctdb
, ctdb
->nodes
, struct ctdb_node
*, ctdb
->num_nodes
+1);
126 CTDB_NO_MEMORY(ctdb
, nodep
);
129 nodep
= &ctdb
->nodes
[ctdb
->num_nodes
];
130 (*nodep
) = talloc_zero(ctdb
->nodes
, struct ctdb_node
);
131 CTDB_NO_MEMORY(ctdb
, *nodep
);
134 if (ctdb_parse_address(ctdb
, node
, "0.0.0.0", &node
->address
) != 0) {
135 DEBUG(DEBUG_ERR
,("Failed to setup deleted node %d\n", ctdb
->num_nodes
));
139 node
->name
= talloc_strdup(node
, "0.0.0.0:0");
141 /* this assumes that the nodes are kept in sorted order, and no gaps */
142 node
->pnn
= ctdb
->num_nodes
;
144 /* this node is permanently deleted/disconnected */
145 node
->flags
= NODE_FLAGS_DELETED
|NODE_FLAGS_DISCONNECTED
;
148 node
->dead_count
= 0;
155 setup the node list from a file
157 static int ctdb_set_nlist(struct ctdb_context
*ctdb
, const char *nlist
)
163 talloc_free(ctdb
->nodes
);
167 lines
= file_lines_load(nlist
, &nlines
, 0, ctdb
);
169 ctdb_set_error(ctdb
, "Failed to load nlist '%s'\n", nlist
);
172 while (nlines
> 0 && strcmp(lines
[nlines
-1], "") == 0) {
176 for (i
=0; i
< nlines
; i
++) {
180 /* strip leading spaces */
181 while((*node
== ' ') || (*node
== '\t')) {
185 if (ctdb_add_deleted_node(ctdb
) != 0) {
191 if (strcmp(node
, "") == 0) {
194 if (ctdb_add_node(ctdb
, node
) != 0) {
204 void ctdb_load_nodes_file(struct ctdb_context
*ctdb
)
208 ret
= ctdb_set_nlist(ctdb
, ctdb
->nodes_file
);
210 DEBUG(DEBUG_ALERT
,("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb
)));
216 setup the local node address
218 int ctdb_set_address(struct ctdb_context
*ctdb
, const char *address
)
220 if (ctdb_parse_address(ctdb
, ctdb
, address
, &ctdb
->address
) != 0) {
224 ctdb
->name
= talloc_asprintf(ctdb
, "%s:%u",
225 ctdb
->address
.address
,
232 return the number of active nodes
234 uint32_t ctdb_get_num_active_nodes(struct ctdb_context
*ctdb
)
238 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
239 if (!(ctdb
->nodes
[i
]->flags
& NODE_FLAGS_INACTIVE
)) {
248 called when we need to process a packet. This can be a requeued packet
249 after a lockwait, or a real packet from another node
251 void ctdb_input_pkt(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
)
255 /* place the packet as a child of the tmp_ctx. We then use
256 talloc_free() below to free it. If any of the calls want
257 to keep it, then they will steal it somewhere else, and the
258 talloc_free() will only free the tmp_ctx */
259 tmp_ctx
= talloc_new(ctdb
);
260 talloc_steal(tmp_ctx
, hdr
);
262 DEBUG(DEBUG_DEBUG
,(__location__
" ctdb request %u of type %u length %u from "
263 "node %u to %u\n", hdr
->reqid
, hdr
->operation
, hdr
->length
,
264 hdr
->srcnode
, hdr
->destnode
));
266 switch (hdr
->operation
) {
268 case CTDB_REPLY_CALL
:
269 case CTDB_REQ_DMASTER
:
270 case CTDB_REPLY_DMASTER
:
271 /* we dont allow these calls when banned */
272 if (ctdb
->nodes
[ctdb
->pnn
]->flags
& NODE_FLAGS_BANNED
) {
273 DEBUG(DEBUG_DEBUG
,(__location__
" ctdb operation %u"
275 " length %u from node %u to %u while node"
277 hdr
->operation
, hdr
->reqid
,
279 hdr
->srcnode
, hdr
->destnode
));
283 /* for ctdb_call inter-node operations verify that the
284 remote node that sent us the call is running in the
285 same generation instance as this node
287 if (ctdb
->vnn_map
->generation
!= hdr
->generation
) {
288 DEBUG(DEBUG_DEBUG
,(__location__
" ctdb operation %u"
290 " length %u from node %u to %u had an"
291 " invalid generation id:%u while our"
292 " generation id is:%u\n",
293 hdr
->operation
, hdr
->reqid
,
295 hdr
->srcnode
, hdr
->destnode
,
296 hdr
->generation
, ctdb
->vnn_map
->generation
));
301 switch (hdr
->operation
) {
303 CTDB_INCREMENT_STAT(ctdb
, node
.req_call
);
304 ctdb_request_call(ctdb
, hdr
);
307 case CTDB_REPLY_CALL
:
308 CTDB_INCREMENT_STAT(ctdb
, node
.reply_call
);
309 ctdb_reply_call(ctdb
, hdr
);
312 case CTDB_REPLY_ERROR
:
313 CTDB_INCREMENT_STAT(ctdb
, node
.reply_error
);
314 ctdb_reply_error(ctdb
, hdr
);
317 case CTDB_REQ_DMASTER
:
318 CTDB_INCREMENT_STAT(ctdb
, node
.req_dmaster
);
319 ctdb_request_dmaster(ctdb
, hdr
);
322 case CTDB_REPLY_DMASTER
:
323 CTDB_INCREMENT_STAT(ctdb
, node
.reply_dmaster
);
324 ctdb_reply_dmaster(ctdb
, hdr
);
327 case CTDB_REQ_MESSAGE
:
328 CTDB_INCREMENT_STAT(ctdb
, node
.req_message
);
329 ctdb_request_message(ctdb
, hdr
);
332 case CTDB_REQ_CONTROL
:
333 CTDB_INCREMENT_STAT(ctdb
, node
.req_control
);
334 ctdb_request_control(ctdb
, hdr
);
337 case CTDB_REPLY_CONTROL
:
338 CTDB_INCREMENT_STAT(ctdb
, node
.reply_control
);
339 ctdb_reply_control(ctdb
, hdr
);
342 case CTDB_REQ_KEEPALIVE
:
343 CTDB_INCREMENT_STAT(ctdb
, keepalive_packets_recv
);
347 DEBUG(DEBUG_CRIT
,("%s: Packet with unknown operation %u\n",
348 __location__
, hdr
->operation
));
353 talloc_free(tmp_ctx
);
358 called by the transport layer when a node is dead
360 void ctdb_node_dead(struct ctdb_node
*node
)
362 if (node
->flags
& NODE_FLAGS_DISCONNECTED
) {
363 DEBUG(DEBUG_INFO
,("%s: node %s is already marked disconnected: %u connected\n",
364 node
->ctdb
->name
, node
->name
,
365 node
->ctdb
->num_connected
));
368 node
->ctdb
->num_connected
--;
369 node
->flags
|= NODE_FLAGS_DISCONNECTED
| NODE_FLAGS_UNHEALTHY
;
371 node
->dead_count
= 0;
373 DEBUG(DEBUG_NOTICE
,("%s: node %s is dead: %u connected\n",
374 node
->ctdb
->name
, node
->name
, node
->ctdb
->num_connected
));
375 ctdb_daemon_cancel_controls(node
->ctdb
, node
);
377 if (node
->ctdb
->methods
== NULL
) {
378 DEBUG(DEBUG_ERR
,(__location__
" Can not restart transport while shutting down daemon.\n"));
382 node
->ctdb
->methods
->restart(node
);
386 called by the transport layer when a node is connected
388 void ctdb_node_connected(struct ctdb_node
*node
)
390 if (!(node
->flags
& NODE_FLAGS_DISCONNECTED
)) {
391 DEBUG(DEBUG_INFO
,("%s: node %s is already marked connected: %u connected\n",
392 node
->ctdb
->name
, node
->name
,
393 node
->ctdb
->num_connected
));
396 node
->ctdb
->num_connected
++;
397 node
->dead_count
= 0;
398 node
->flags
&= ~NODE_FLAGS_DISCONNECTED
;
399 node
->flags
|= NODE_FLAGS_UNHEALTHY
;
401 ("%s: connected to %s - %u connected\n",
402 node
->ctdb
->name
, node
->name
, node
->ctdb
->num_connected
));
406 struct ctdb_context
*ctdb
;
407 struct ctdb_req_header
*hdr
;
412 triggered when a deferred packet is due
414 static void queue_next_trigger(struct event_context
*ev
, struct timed_event
*te
,
415 struct timeval t
, void *private_data
)
417 struct queue_next
*q
= talloc_get_type(private_data
, struct queue_next
);
418 ctdb_input_pkt(q
->ctdb
, q
->hdr
);
423 defer a packet, so it is processed on the next event loop
424 this is used for sending packets to ourselves
426 static void ctdb_defer_packet(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
)
428 struct queue_next
*q
;
429 q
= talloc(ctdb
, struct queue_next
);
431 DEBUG(DEBUG_ERR
,(__location__
" Failed to allocate deferred packet\n"));
435 q
->hdr
= talloc_memdup(ctdb
, hdr
, hdr
->length
);
436 if (q
->hdr
== NULL
) {
437 DEBUG(DEBUG_ERR
,("Error copying deferred packet to self\n"));
441 /* use this to put packets directly into our recv function */
442 ctdb_input_pkt(q
->ctdb
, q
->hdr
);
444 event_add_timed(ctdb
->ev
, q
, timeval_zero(), queue_next_trigger
, q
);
450 broadcast a packet to all nodes
452 static void ctdb_broadcast_packet_all(struct ctdb_context
*ctdb
,
453 struct ctdb_req_header
*hdr
)
456 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
457 if (ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DELETED
) {
460 hdr
->destnode
= ctdb
->nodes
[i
]->pnn
;
461 ctdb_queue_packet(ctdb
, hdr
);
466 broadcast a packet to all nodes in the current vnnmap
468 static void ctdb_broadcast_packet_vnnmap(struct ctdb_context
*ctdb
,
469 struct ctdb_req_header
*hdr
)
472 for (i
=0;i
<ctdb
->vnn_map
->size
;i
++) {
473 hdr
->destnode
= ctdb
->vnn_map
->map
[i
];
474 ctdb_queue_packet(ctdb
, hdr
);
479 broadcast a packet to all connected nodes
481 static void ctdb_broadcast_packet_connected(struct ctdb_context
*ctdb
,
482 struct ctdb_req_header
*hdr
)
485 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
486 if (ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DELETED
) {
489 if (!(ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DISCONNECTED
)) {
490 hdr
->destnode
= ctdb
->nodes
[i
]->pnn
;
491 ctdb_queue_packet(ctdb
, hdr
);
497 queue a packet or die
499 void ctdb_queue_packet(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
)
501 struct ctdb_node
*node
;
503 switch (hdr
->destnode
) {
504 case CTDB_BROADCAST_ALL
:
505 ctdb_broadcast_packet_all(ctdb
, hdr
);
507 case CTDB_BROADCAST_VNNMAP
:
508 ctdb_broadcast_packet_vnnmap(ctdb
, hdr
);
510 case CTDB_BROADCAST_CONNECTED
:
511 ctdb_broadcast_packet_connected(ctdb
, hdr
);
515 CTDB_INCREMENT_STAT(ctdb
, node_packets_sent
);
517 if (!ctdb_validate_pnn(ctdb
, hdr
->destnode
)) {
518 DEBUG(DEBUG_CRIT
,(__location__
" cant send to node %u that does not exist\n",
523 node
= ctdb
->nodes
[hdr
->destnode
];
525 if (node
->flags
& NODE_FLAGS_DELETED
) {
526 DEBUG(DEBUG_ERR
, (__location__
" Can not queue packet to DELETED node %d\n", hdr
->destnode
));
530 if (node
->pnn
== ctdb
->pnn
) {
531 ctdb_defer_packet(ctdb
, hdr
);
535 if (ctdb
->methods
== NULL
) {
536 DEBUG(DEBUG_ALERT
, (__location__
" Can not queue packet. "
537 "Transport is DOWN\n"));
542 if (ctdb
->methods
->queue_pkt(node
, (uint8_t *)hdr
, hdr
->length
) != 0) {
543 ctdb_fatal(ctdb
, "Unable to queue packet\n");
551 a valgrind hack to allow us to get opcode specific backtraces
552 very ugly, and relies on no compiler optimisation!
554 void ctdb_queue_packet_opcode(struct ctdb_context
*ctdb
, struct ctdb_req_header
*hdr
, unsigned opcode
)
557 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
659 ctdb_queue_packet(ctdb
, hdr
);