WHATSNEW: Clear release notes for Samba 4.4.0pre1.
[Samba.git] / ctdb / server / ctdb_server.c
blob81ef361b225e30c393dbffac97272da138499865
1 /*
2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "includes.h"
21 #include "tdb.h"
22 #include "lib/util/dlinklist.h"
23 #include "system/network.h"
24 #include "system/filesys.h"
25 #include "../include/ctdb_private.h"
28 choose the transport we will use
30 int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
32 ctdb->transport = talloc_strdup(ctdb, transport);
33 CTDB_NO_MEMORY(ctdb, ctdb->transport);
35 return 0;
39 Check whether an ip is a valid node ip
40 Returns the node id for this ip address or -1
42 int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const ctdb_sock_addr *nodeip)
44 int nodeid;
46 for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
47 if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
48 continue;
50 if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) {
51 return nodeid;
55 return -1;
59 choose the recovery lock file
61 int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
63 if (ctdb->recovery_lock_file != NULL) {
64 talloc_free(ctdb->recovery_lock_file);
65 ctdb->recovery_lock_file = NULL;
68 if (file == NULL) {
69 DEBUG(DEBUG_ALERT,("Recovery lock file set to \"\". Disabling recovery lock checking\n"));
70 return 0;
73 ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
74 CTDB_NO_MEMORY(ctdb, ctdb->recovery_lock_file);
76 return 0;
79 /* Load a nodes list file into a nodes array */
80 static int convert_node_map_to_list(struct ctdb_context *ctdb,
81 TALLOC_CTX *mem_ctx,
82 struct ctdb_node_map *node_map,
83 struct ctdb_node ***nodes,
84 uint32_t *num_nodes)
86 int i;
88 *nodes = talloc_zero_array(mem_ctx,
89 struct ctdb_node *, node_map->num);
90 CTDB_NO_MEMORY(ctdb, *nodes);
91 *num_nodes = node_map->num;
93 for (i = 0; i < node_map->num; i++) {
94 struct ctdb_node *node;
96 node = talloc_zero(*nodes, struct ctdb_node);
97 CTDB_NO_MEMORY(ctdb, node);
98 (*nodes)[i] = node;
100 node->address = node_map->nodes[i].addr;
101 node->name = talloc_asprintf(node, "%s:%u",
102 ctdb_addr_to_str(&node->address),
103 ctdb_addr_to_port(&node->address));
105 node->flags = node_map->nodes[i].flags;
106 if (!(node->flags & NODE_FLAGS_DELETED)) {
107 node->flags = NODE_FLAGS_UNHEALTHY;
109 node->flags |= NODE_FLAGS_DISCONNECTED;
111 node->pnn = i;
112 node->ctdb = ctdb;
113 node->dead_count = 0;
116 return 0;
119 /* Load the nodes list from a file */
120 void ctdb_load_nodes_file(struct ctdb_context *ctdb)
122 struct ctdb_node_map *node_map;
123 int ret;
125 node_map = ctdb_read_nodes_file(ctdb, ctdb->nodes_file);
126 if (node_map == NULL) {
127 goto fail;
130 TALLOC_FREE(ctdb->nodes);
131 ret = convert_node_map_to_list(ctdb, ctdb, node_map,
132 &ctdb->nodes, &ctdb->num_nodes);
133 if (ret == -1) {
134 goto fail;
137 talloc_free(node_map);
138 return;
140 fail:
141 DEBUG(DEBUG_ERR, ("Failed to load nodes file \"%s\"\n",
142 ctdb->nodes_file));
143 talloc_free(node_map);
144 exit(1);
148 setup the local node address
150 int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
152 ctdb->address = talloc(ctdb, ctdb_sock_addr);
153 CTDB_NO_MEMORY(ctdb, ctdb->address);
155 if (ctdb_parse_address(ctdb, address, ctdb->address) != 0) {
156 return -1;
159 ctdb->name = talloc_asprintf(ctdb, "%s:%u",
160 ctdb_addr_to_str(ctdb->address),
161 ctdb_addr_to_port(ctdb->address));
162 return 0;
167 return the number of active nodes
169 uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
171 int i;
172 uint32_t count=0;
173 for (i=0; i < ctdb->num_nodes; i++) {
174 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) {
175 count++;
178 return count;
183 called when we need to process a packet. This can be a requeued packet
184 after a lockwait, or a real packet from another node
186 void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
188 TALLOC_CTX *tmp_ctx;
190 /* place the packet as a child of the tmp_ctx. We then use
191 talloc_free() below to free it. If any of the calls want
192 to keep it, then they will steal it somewhere else, and the
193 talloc_free() will only free the tmp_ctx */
194 tmp_ctx = talloc_new(ctdb);
195 talloc_steal(tmp_ctx, hdr);
197 DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
198 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
199 hdr->srcnode, hdr->destnode));
201 switch (hdr->operation) {
202 case CTDB_REQ_CALL:
203 case CTDB_REPLY_CALL:
204 case CTDB_REQ_DMASTER:
205 case CTDB_REPLY_DMASTER:
206 /* we dont allow these calls when banned */
207 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
208 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
209 " request %u"
210 " length %u from node %u to %u while node"
211 " is banned\n",
212 hdr->operation, hdr->reqid,
213 hdr->length,
214 hdr->srcnode, hdr->destnode));
215 goto done;
218 /* for ctdb_call inter-node operations verify that the
219 remote node that sent us the call is running in the
220 same generation instance as this node
222 if (ctdb->vnn_map->generation != hdr->generation) {
223 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
224 " request %u"
225 " length %u from node %u to %u had an"
226 " invalid generation id:%u while our"
227 " generation id is:%u\n",
228 hdr->operation, hdr->reqid,
229 hdr->length,
230 hdr->srcnode, hdr->destnode,
231 hdr->generation, ctdb->vnn_map->generation));
232 goto done;
236 switch (hdr->operation) {
237 case CTDB_REQ_CALL:
238 CTDB_INCREMENT_STAT(ctdb, node.req_call);
239 ctdb_request_call(ctdb, hdr);
240 break;
242 case CTDB_REPLY_CALL:
243 CTDB_INCREMENT_STAT(ctdb, node.reply_call);
244 ctdb_reply_call(ctdb, hdr);
245 break;
247 case CTDB_REPLY_ERROR:
248 CTDB_INCREMENT_STAT(ctdb, node.reply_error);
249 ctdb_reply_error(ctdb, hdr);
250 break;
252 case CTDB_REQ_DMASTER:
253 CTDB_INCREMENT_STAT(ctdb, node.req_dmaster);
254 ctdb_request_dmaster(ctdb, hdr);
255 break;
257 case CTDB_REPLY_DMASTER:
258 CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster);
259 ctdb_reply_dmaster(ctdb, hdr);
260 break;
262 case CTDB_REQ_MESSAGE:
263 CTDB_INCREMENT_STAT(ctdb, node.req_message);
264 ctdb_request_message(ctdb, hdr);
265 break;
267 case CTDB_REQ_CONTROL:
268 CTDB_INCREMENT_STAT(ctdb, node.req_control);
269 ctdb_request_control(ctdb, hdr);
270 break;
272 case CTDB_REPLY_CONTROL:
273 CTDB_INCREMENT_STAT(ctdb, node.reply_control);
274 ctdb_reply_control(ctdb, hdr);
275 break;
277 case CTDB_REQ_KEEPALIVE:
278 CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv);
279 break;
281 default:
282 DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
283 __location__, hdr->operation));
284 break;
287 done:
288 talloc_free(tmp_ctx);
293 called by the transport layer when a node is dead
295 void ctdb_node_dead(struct ctdb_node *node)
297 if (node->flags & NODE_FLAGS_DISCONNECTED) {
298 DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
299 node->ctdb->name, node->name,
300 node->ctdb->num_connected));
301 return;
303 node->ctdb->num_connected--;
304 node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
305 node->rx_cnt = 0;
306 node->dead_count = 0;
308 DEBUG(DEBUG_NOTICE,("%s: node %s is dead: %u connected\n",
309 node->ctdb->name, node->name, node->ctdb->num_connected));
310 ctdb_daemon_cancel_controls(node->ctdb, node);
312 if (node->ctdb->methods == NULL) {
313 DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n"));
314 return;
317 node->ctdb->methods->restart(node);
321 called by the transport layer when a node is connected
323 void ctdb_node_connected(struct ctdb_node *node)
325 if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
326 DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
327 node->ctdb->name, node->name,
328 node->ctdb->num_connected));
329 return;
331 node->ctdb->num_connected++;
332 node->dead_count = 0;
333 node->flags &= ~NODE_FLAGS_DISCONNECTED;
334 node->flags |= NODE_FLAGS_UNHEALTHY;
335 DEBUG(DEBUG_NOTICE,
336 ("%s: connected to %s - %u connected\n",
337 node->ctdb->name, node->name, node->ctdb->num_connected));
340 struct queue_next {
341 struct ctdb_context *ctdb;
342 struct ctdb_req_header *hdr;
347 triggered when a deferred packet is due
349 static void queue_next_trigger(struct event_context *ev, struct timed_event *te,
350 struct timeval t, void *private_data)
352 struct queue_next *q = talloc_get_type(private_data, struct queue_next);
353 ctdb_input_pkt(q->ctdb, q->hdr);
354 talloc_free(q);
358 defer a packet, so it is processed on the next event loop
359 this is used for sending packets to ourselves
361 static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
363 struct queue_next *q;
364 q = talloc(ctdb, struct queue_next);
365 if (q == NULL) {
366 DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
367 return;
369 q->ctdb = ctdb;
370 q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
371 if (q->hdr == NULL) {
372 DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
373 return;
375 #if 0
376 /* use this to put packets directly into our recv function */
377 ctdb_input_pkt(q->ctdb, q->hdr);
378 #else
379 event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
380 #endif
385 broadcast a packet to all nodes
387 static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
388 struct ctdb_req_header *hdr)
390 int i;
391 for (i=0; i < ctdb->num_nodes; i++) {
392 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
393 continue;
395 hdr->destnode = ctdb->nodes[i]->pnn;
396 ctdb_queue_packet(ctdb, hdr);
401 broadcast a packet to all nodes in the current vnnmap
403 static void ctdb_broadcast_packet_vnnmap(struct ctdb_context *ctdb,
404 struct ctdb_req_header *hdr)
406 int i;
407 for (i=0;i<ctdb->vnn_map->size;i++) {
408 hdr->destnode = ctdb->vnn_map->map[i];
409 ctdb_queue_packet(ctdb, hdr);
414 broadcast a packet to all connected nodes
416 static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
417 struct ctdb_req_header *hdr)
419 int i;
420 for (i=0; i < ctdb->num_nodes; i++) {
421 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
422 continue;
424 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
425 hdr->destnode = ctdb->nodes[i]->pnn;
426 ctdb_queue_packet(ctdb, hdr);
432 queue a packet or die
434 void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
436 struct ctdb_node *node;
438 switch (hdr->destnode) {
439 case CTDB_BROADCAST_ALL:
440 ctdb_broadcast_packet_all(ctdb, hdr);
441 return;
442 case CTDB_BROADCAST_VNNMAP:
443 ctdb_broadcast_packet_vnnmap(ctdb, hdr);
444 return;
445 case CTDB_BROADCAST_CONNECTED:
446 ctdb_broadcast_packet_connected(ctdb, hdr);
447 return;
450 CTDB_INCREMENT_STAT(ctdb, node_packets_sent);
452 if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
453 DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n",
454 hdr->destnode));
455 return;
458 node = ctdb->nodes[hdr->destnode];
460 if (node->flags & NODE_FLAGS_DELETED) {
461 DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode));
462 return;
465 if (node->pnn == ctdb->pnn) {
466 ctdb_defer_packet(ctdb, hdr);
467 return;
470 if (ctdb->methods == NULL) {
471 DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. "
472 "Transport is DOWN\n"));
473 return;
476 node->tx_cnt++;
477 if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
478 ctdb_fatal(ctdb, "Unable to queue packet\n");
486 a valgrind hack to allow us to get opcode specific backtraces
487 very ugly, and relies on no compiler optimisation!
489 void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
491 switch (opcode) {
492 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
493 DO_OP(1);
494 DO_OP(2);
495 DO_OP(3);
496 DO_OP(4);
497 DO_OP(5);
498 DO_OP(6);
499 DO_OP(7);
500 DO_OP(8);
501 DO_OP(9);
502 DO_OP(10);
503 DO_OP(11);
504 DO_OP(12);
505 DO_OP(13);
506 DO_OP(14);
507 DO_OP(15);
508 DO_OP(16);
509 DO_OP(17);
510 DO_OP(18);
511 DO_OP(19);
512 DO_OP(20);
513 DO_OP(21);
514 DO_OP(22);
515 DO_OP(23);
516 DO_OP(24);
517 DO_OP(25);
518 DO_OP(26);
519 DO_OP(27);
520 DO_OP(28);
521 DO_OP(29);
522 DO_OP(30);
523 DO_OP(31);
524 DO_OP(32);
525 DO_OP(33);
526 DO_OP(34);
527 DO_OP(35);
528 DO_OP(36);
529 DO_OP(37);
530 DO_OP(38);
531 DO_OP(39);
532 DO_OP(40);
533 DO_OP(41);
534 DO_OP(42);
535 DO_OP(43);
536 DO_OP(44);
537 DO_OP(45);
538 DO_OP(46);
539 DO_OP(47);
540 DO_OP(48);
541 DO_OP(49);
542 DO_OP(50);
543 DO_OP(51);
544 DO_OP(52);
545 DO_OP(53);
546 DO_OP(54);
547 DO_OP(55);
548 DO_OP(56);
549 DO_OP(57);
550 DO_OP(58);
551 DO_OP(59);
552 DO_OP(60);
553 DO_OP(61);
554 DO_OP(62);
555 DO_OP(63);
556 DO_OP(64);
557 DO_OP(65);
558 DO_OP(66);
559 DO_OP(67);
560 DO_OP(68);
561 DO_OP(69);
562 DO_OP(70);
563 DO_OP(71);
564 DO_OP(72);
565 DO_OP(73);
566 DO_OP(74);
567 DO_OP(75);
568 DO_OP(76);
569 DO_OP(77);
570 DO_OP(78);
571 DO_OP(79);
572 DO_OP(80);
573 DO_OP(81);
574 DO_OP(82);
575 DO_OP(83);
576 DO_OP(84);
577 DO_OP(85);
578 DO_OP(86);
579 DO_OP(87);
580 DO_OP(88);
581 DO_OP(89);
582 DO_OP(90);
583 DO_OP(91);
584 DO_OP(92);
585 DO_OP(93);
586 DO_OP(94);
587 DO_OP(95);
588 DO_OP(96);
589 DO_OP(97);
590 DO_OP(98);
591 DO_OP(99);
592 DO_OP(100);
593 default:
594 ctdb_queue_packet(ctdb, hdr);
595 break;