ctdb-daemon: Set node PNN in one place
[Samba.git] / ctdb / server / ctdb_server.c
blobbd731954bb524ec1738c625cd4c9984bd1bef963
1 /*
2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "includes.h"
21 #include "tdb.h"
22 #include "lib/util/dlinklist.h"
23 #include "system/network.h"
24 #include "system/filesys.h"
25 #include "../include/ctdb_private.h"
28 choose the transport we will use
30 int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
32 ctdb->transport = talloc_strdup(ctdb, transport);
33 CTDB_NO_MEMORY(ctdb, ctdb->transport);
35 return 0;
39 Check whether an ip is a valid node ip
40 Returns the node id for this ip address or -1
42 int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip)
44 int nodeid;
46 for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
47 if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
48 continue;
50 if (!strcmp(ctdb->nodes[nodeid]->address.address, nodeip)) {
51 return nodeid;
55 return -1;
59 choose the recovery lock file
61 int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
63 if (ctdb->recovery_lock_file != NULL) {
64 talloc_free(ctdb->recovery_lock_file);
65 ctdb->recovery_lock_file = NULL;
68 if (file == NULL) {
69 DEBUG(DEBUG_ALERT,("Recovery lock file set to \"\". Disabling recovery lock checking\n"));
70 return 0;
73 ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
74 CTDB_NO_MEMORY(ctdb, ctdb->recovery_lock_file);
76 return 0;
80 add a node to the list of nodes
82 static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
84 struct ctdb_node *node, **nodep;
86 nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
87 CTDB_NO_MEMORY(ctdb, nodep);
89 ctdb->nodes = nodep;
90 nodep = &ctdb->nodes[ctdb->num_nodes];
91 (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
92 CTDB_NO_MEMORY(ctdb, *nodep);
93 node = *nodep;
95 if (ctdb_parse_address(ctdb, node, nstr, &node->address) != 0) {
96 return -1;
98 node->ctdb = ctdb;
99 node->name = talloc_asprintf(node, "%s:%u",
100 node->address.address,
101 node->address.port);
102 /* this assumes that the nodes are kept in sorted order, and no gaps */
103 node->pnn = ctdb->num_nodes;
105 /* nodes start out disconnected and unhealthy */
106 node->flags = (NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY);
108 ctdb->num_nodes++;
109 node->dead_count = 0;
111 return 0;
115 add an entry for a "deleted" node to the list of nodes.
116 a "deleted" node is a node that is commented out from the nodes file.
117 this is used to prevent that subsequent nodes in the nodes list
118 change their pnn value if a node is "delete" by commenting it out and then
119 using "ctdb reloadnodes" at runtime.
121 static int ctdb_add_deleted_node(struct ctdb_context *ctdb)
123 struct ctdb_node *node, **nodep;
125 nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
126 CTDB_NO_MEMORY(ctdb, nodep);
128 ctdb->nodes = nodep;
129 nodep = &ctdb->nodes[ctdb->num_nodes];
130 (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
131 CTDB_NO_MEMORY(ctdb, *nodep);
132 node = *nodep;
134 if (ctdb_parse_address(ctdb, node, "0.0.0.0", &node->address) != 0) {
135 DEBUG(DEBUG_ERR,("Failed to setup deleted node %d\n", ctdb->num_nodes));
136 return -1;
138 node->ctdb = ctdb;
139 node->name = talloc_strdup(node, "0.0.0.0:0");
141 /* this assumes that the nodes are kept in sorted order, and no gaps */
142 node->pnn = ctdb->num_nodes;
144 /* this node is permanently deleted/disconnected */
145 node->flags = NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED;
147 ctdb->num_nodes++;
148 node->dead_count = 0;
150 return 0;
155 setup the node list from a file
157 static int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
159 char **lines;
160 int nlines;
161 int i;
163 talloc_free(ctdb->nodes);
164 ctdb->nodes = NULL;
165 ctdb->num_nodes = 0;
167 lines = file_lines_load(nlist, &nlines, 0, ctdb);
168 if (lines == NULL) {
169 ctdb_set_error(ctdb, "Failed to load nlist '%s'\n", nlist);
170 return -1;
172 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
173 nlines--;
176 for (i=0; i < nlines; i++) {
177 char *node;
179 node = lines[i];
180 /* strip leading spaces */
181 while((*node == ' ') || (*node == '\t')) {
182 node++;
184 if (*node == '#') {
185 if (ctdb_add_deleted_node(ctdb) != 0) {
186 talloc_free(lines);
187 return -1;
189 continue;
191 if (strcmp(node, "") == 0) {
192 continue;
194 if (ctdb_add_node(ctdb, node) != 0) {
195 talloc_free(lines);
196 return -1;
200 talloc_free(lines);
201 return 0;
204 void ctdb_load_nodes_file(struct ctdb_context *ctdb)
206 int ret;
208 ret = ctdb_set_nlist(ctdb, ctdb->nodes_file);
209 if (ret == -1) {
210 DEBUG(DEBUG_ALERT,("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb)));
211 exit(1);
216 setup the local node address
218 int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
220 if (ctdb_parse_address(ctdb, ctdb, address, &ctdb->address) != 0) {
221 return -1;
224 ctdb->name = talloc_asprintf(ctdb, "%s:%u",
225 ctdb->address.address,
226 ctdb->address.port);
227 return 0;
232 return the number of active nodes
234 uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
236 int i;
237 uint32_t count=0;
238 for (i=0; i < ctdb->num_nodes; i++) {
239 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) {
240 count++;
243 return count;
248 called when we need to process a packet. This can be a requeued packet
249 after a lockwait, or a real packet from another node
251 void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
253 TALLOC_CTX *tmp_ctx;
255 /* place the packet as a child of the tmp_ctx. We then use
256 talloc_free() below to free it. If any of the calls want
257 to keep it, then they will steal it somewhere else, and the
258 talloc_free() will only free the tmp_ctx */
259 tmp_ctx = talloc_new(ctdb);
260 talloc_steal(tmp_ctx, hdr);
262 DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
263 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
264 hdr->srcnode, hdr->destnode));
266 switch (hdr->operation) {
267 case CTDB_REQ_CALL:
268 case CTDB_REPLY_CALL:
269 case CTDB_REQ_DMASTER:
270 case CTDB_REPLY_DMASTER:
271 /* we dont allow these calls when banned */
272 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
273 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
274 " request %u"
275 " length %u from node %u to %u while node"
276 " is banned\n",
277 hdr->operation, hdr->reqid,
278 hdr->length,
279 hdr->srcnode, hdr->destnode));
280 goto done;
283 /* for ctdb_call inter-node operations verify that the
284 remote node that sent us the call is running in the
285 same generation instance as this node
287 if (ctdb->vnn_map->generation != hdr->generation) {
288 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
289 " request %u"
290 " length %u from node %u to %u had an"
291 " invalid generation id:%u while our"
292 " generation id is:%u\n",
293 hdr->operation, hdr->reqid,
294 hdr->length,
295 hdr->srcnode, hdr->destnode,
296 hdr->generation, ctdb->vnn_map->generation));
297 goto done;
301 switch (hdr->operation) {
302 case CTDB_REQ_CALL:
303 CTDB_INCREMENT_STAT(ctdb, node.req_call);
304 ctdb_request_call(ctdb, hdr);
305 break;
307 case CTDB_REPLY_CALL:
308 CTDB_INCREMENT_STAT(ctdb, node.reply_call);
309 ctdb_reply_call(ctdb, hdr);
310 break;
312 case CTDB_REPLY_ERROR:
313 CTDB_INCREMENT_STAT(ctdb, node.reply_error);
314 ctdb_reply_error(ctdb, hdr);
315 break;
317 case CTDB_REQ_DMASTER:
318 CTDB_INCREMENT_STAT(ctdb, node.req_dmaster);
319 ctdb_request_dmaster(ctdb, hdr);
320 break;
322 case CTDB_REPLY_DMASTER:
323 CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster);
324 ctdb_reply_dmaster(ctdb, hdr);
325 break;
327 case CTDB_REQ_MESSAGE:
328 CTDB_INCREMENT_STAT(ctdb, node.req_message);
329 ctdb_request_message(ctdb, hdr);
330 break;
332 case CTDB_REQ_CONTROL:
333 CTDB_INCREMENT_STAT(ctdb, node.req_control);
334 ctdb_request_control(ctdb, hdr);
335 break;
337 case CTDB_REPLY_CONTROL:
338 CTDB_INCREMENT_STAT(ctdb, node.reply_control);
339 ctdb_reply_control(ctdb, hdr);
340 break;
342 case CTDB_REQ_KEEPALIVE:
343 CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv);
344 break;
346 default:
347 DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
348 __location__, hdr->operation));
349 break;
352 done:
353 talloc_free(tmp_ctx);
358 called by the transport layer when a node is dead
360 void ctdb_node_dead(struct ctdb_node *node)
362 if (node->flags & NODE_FLAGS_DISCONNECTED) {
363 DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
364 node->ctdb->name, node->name,
365 node->ctdb->num_connected));
366 return;
368 node->ctdb->num_connected--;
369 node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
370 node->rx_cnt = 0;
371 node->dead_count = 0;
373 DEBUG(DEBUG_NOTICE,("%s: node %s is dead: %u connected\n",
374 node->ctdb->name, node->name, node->ctdb->num_connected));
375 ctdb_daemon_cancel_controls(node->ctdb, node);
377 if (node->ctdb->methods == NULL) {
378 DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n"));
379 return;
382 node->ctdb->methods->restart(node);
386 called by the transport layer when a node is connected
388 void ctdb_node_connected(struct ctdb_node *node)
390 if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
391 DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
392 node->ctdb->name, node->name,
393 node->ctdb->num_connected));
394 return;
396 node->ctdb->num_connected++;
397 node->dead_count = 0;
398 node->flags &= ~NODE_FLAGS_DISCONNECTED;
399 node->flags |= NODE_FLAGS_UNHEALTHY;
400 DEBUG(DEBUG_NOTICE,
401 ("%s: connected to %s - %u connected\n",
402 node->ctdb->name, node->name, node->ctdb->num_connected));
405 struct queue_next {
406 struct ctdb_context *ctdb;
407 struct ctdb_req_header *hdr;
412 triggered when a deferred packet is due
414 static void queue_next_trigger(struct event_context *ev, struct timed_event *te,
415 struct timeval t, void *private_data)
417 struct queue_next *q = talloc_get_type(private_data, struct queue_next);
418 ctdb_input_pkt(q->ctdb, q->hdr);
419 talloc_free(q);
423 defer a packet, so it is processed on the next event loop
424 this is used for sending packets to ourselves
426 static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
428 struct queue_next *q;
429 q = talloc(ctdb, struct queue_next);
430 if (q == NULL) {
431 DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
432 return;
434 q->ctdb = ctdb;
435 q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
436 if (q->hdr == NULL) {
437 DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
438 return;
440 #if 0
441 /* use this to put packets directly into our recv function */
442 ctdb_input_pkt(q->ctdb, q->hdr);
443 #else
444 event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
445 #endif
450 broadcast a packet to all nodes
452 static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
453 struct ctdb_req_header *hdr)
455 int i;
456 for (i=0; i < ctdb->num_nodes; i++) {
457 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
458 continue;
460 hdr->destnode = ctdb->nodes[i]->pnn;
461 ctdb_queue_packet(ctdb, hdr);
466 broadcast a packet to all nodes in the current vnnmap
468 static void ctdb_broadcast_packet_vnnmap(struct ctdb_context *ctdb,
469 struct ctdb_req_header *hdr)
471 int i;
472 for (i=0;i<ctdb->vnn_map->size;i++) {
473 hdr->destnode = ctdb->vnn_map->map[i];
474 ctdb_queue_packet(ctdb, hdr);
479 broadcast a packet to all connected nodes
481 static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
482 struct ctdb_req_header *hdr)
484 int i;
485 for (i=0; i < ctdb->num_nodes; i++) {
486 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
487 continue;
489 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
490 hdr->destnode = ctdb->nodes[i]->pnn;
491 ctdb_queue_packet(ctdb, hdr);
497 queue a packet or die
499 void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
501 struct ctdb_node *node;
503 switch (hdr->destnode) {
504 case CTDB_BROADCAST_ALL:
505 ctdb_broadcast_packet_all(ctdb, hdr);
506 return;
507 case CTDB_BROADCAST_VNNMAP:
508 ctdb_broadcast_packet_vnnmap(ctdb, hdr);
509 return;
510 case CTDB_BROADCAST_CONNECTED:
511 ctdb_broadcast_packet_connected(ctdb, hdr);
512 return;
515 CTDB_INCREMENT_STAT(ctdb, node_packets_sent);
517 if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
518 DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n",
519 hdr->destnode));
520 return;
523 node = ctdb->nodes[hdr->destnode];
525 if (node->flags & NODE_FLAGS_DELETED) {
526 DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode));
527 return;
530 if (node->pnn == ctdb->pnn) {
531 ctdb_defer_packet(ctdb, hdr);
532 return;
535 if (ctdb->methods == NULL) {
536 DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. "
537 "Transport is DOWN\n"));
538 return;
541 node->tx_cnt++;
542 if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
543 ctdb_fatal(ctdb, "Unable to queue packet\n");
551 a valgrind hack to allow us to get opcode specific backtraces
552 very ugly, and relies on no compiler optimisation!
554 void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
556 switch (opcode) {
557 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
558 DO_OP(1);
559 DO_OP(2);
560 DO_OP(3);
561 DO_OP(4);
562 DO_OP(5);
563 DO_OP(6);
564 DO_OP(7);
565 DO_OP(8);
566 DO_OP(9);
567 DO_OP(10);
568 DO_OP(11);
569 DO_OP(12);
570 DO_OP(13);
571 DO_OP(14);
572 DO_OP(15);
573 DO_OP(16);
574 DO_OP(17);
575 DO_OP(18);
576 DO_OP(19);
577 DO_OP(20);
578 DO_OP(21);
579 DO_OP(22);
580 DO_OP(23);
581 DO_OP(24);
582 DO_OP(25);
583 DO_OP(26);
584 DO_OP(27);
585 DO_OP(28);
586 DO_OP(29);
587 DO_OP(30);
588 DO_OP(31);
589 DO_OP(32);
590 DO_OP(33);
591 DO_OP(34);
592 DO_OP(35);
593 DO_OP(36);
594 DO_OP(37);
595 DO_OP(38);
596 DO_OP(39);
597 DO_OP(40);
598 DO_OP(41);
599 DO_OP(42);
600 DO_OP(43);
601 DO_OP(44);
602 DO_OP(45);
603 DO_OP(46);
604 DO_OP(47);
605 DO_OP(48);
606 DO_OP(49);
607 DO_OP(50);
608 DO_OP(51);
609 DO_OP(52);
610 DO_OP(53);
611 DO_OP(54);
612 DO_OP(55);
613 DO_OP(56);
614 DO_OP(57);
615 DO_OP(58);
616 DO_OP(59);
617 DO_OP(60);
618 DO_OP(61);
619 DO_OP(62);
620 DO_OP(63);
621 DO_OP(64);
622 DO_OP(65);
623 DO_OP(66);
624 DO_OP(67);
625 DO_OP(68);
626 DO_OP(69);
627 DO_OP(70);
628 DO_OP(71);
629 DO_OP(72);
630 DO_OP(73);
631 DO_OP(74);
632 DO_OP(75);
633 DO_OP(76);
634 DO_OP(77);
635 DO_OP(78);
636 DO_OP(79);
637 DO_OP(80);
638 DO_OP(81);
639 DO_OP(82);
640 DO_OP(83);
641 DO_OP(84);
642 DO_OP(85);
643 DO_OP(86);
644 DO_OP(87);
645 DO_OP(88);
646 DO_OP(89);
647 DO_OP(90);
648 DO_OP(91);
649 DO_OP(92);
650 DO_OP(93);
651 DO_OP(94);
652 DO_OP(95);
653 DO_OP(96);
654 DO_OP(97);
655 DO_OP(98);
656 DO_OP(99);
657 DO_OP(100);
658 default:
659 ctdb_queue_packet(ctdb, hdr);
660 break;