idmap_hash: mirror the NT_STATUS_NONE_MAPPED/STATUS_SOME_UNMAPPED logic from idmap_au...
[Samba.git] / ctdb / server / ctdb_server.c
blobec6480c5067f8766fdd9748f0c663139dbb5bc36
1 /*
2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/network.h"
22 #include "system/filesys.h"
24 #include <talloc.h>
25 #include <tevent.h>
27 #include "lib/util/dlinklist.h"
28 #include "lib/util/debug.h"
29 #include "lib/util/samba_util.h"
31 #include "ctdb_private.h"
32 #include "ctdb_client.h"
34 #include "common/common.h"
35 #include "common/logging.h"
38 choose the transport we will use
40 int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
42 ctdb->transport = talloc_strdup(ctdb, transport);
43 CTDB_NO_MEMORY(ctdb, ctdb->transport);
45 return 0;
48 /* Return the node structure for nodeip, NULL if nodeip is invalid */
49 struct ctdb_node *ctdb_ip_to_node(struct ctdb_context *ctdb,
50 const ctdb_sock_addr *nodeip)
52 unsigned int nodeid;
54 for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
55 if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
56 continue;
58 if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) {
59 return ctdb->nodes[nodeid];
63 return NULL;
66 /* Return the PNN for nodeip, CTDB_UNKNOWN_PNN if nodeip is invalid */
67 uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb,
68 const ctdb_sock_addr *nodeip)
70 struct ctdb_node *node;
72 node = ctdb_ip_to_node(ctdb, nodeip);
73 if (node == NULL) {
74 return CTDB_UNKNOWN_PNN;
77 return node->pnn;
80 /* Load a nodes list file into a nodes array */
81 static int convert_node_map_to_list(struct ctdb_context *ctdb,
82 TALLOC_CTX *mem_ctx,
83 struct ctdb_node_map_old *node_map,
84 struct ctdb_node ***nodes,
85 uint32_t *num_nodes)
87 unsigned int i;
89 *nodes = talloc_zero_array(mem_ctx,
90 struct ctdb_node *, node_map->num);
91 CTDB_NO_MEMORY(ctdb, *nodes);
92 *num_nodes = node_map->num;
94 for (i = 0; i < node_map->num; i++) {
95 struct ctdb_node *node;
97 node = talloc_zero(*nodes, struct ctdb_node);
98 CTDB_NO_MEMORY(ctdb, node);
99 (*nodes)[i] = node;
101 node->address = node_map->nodes[i].addr;
102 node->name = talloc_asprintf(node, "%s:%u",
103 ctdb_addr_to_str(&node->address),
104 ctdb_addr_to_port(&node->address));
106 node->flags = node_map->nodes[i].flags;
107 if (!(node->flags & NODE_FLAGS_DELETED)) {
108 node->flags = NODE_FLAGS_UNHEALTHY;
110 node->flags |= NODE_FLAGS_DISCONNECTED;
112 node->pnn = i;
113 node->ctdb = ctdb;
114 node->dead_count = 0;
117 return 0;
120 /* Load the nodes list from a file */
121 void ctdb_load_nodes_file(struct ctdb_context *ctdb)
123 struct ctdb_node_map_old *node_map;
124 int ret;
126 node_map = ctdb_read_nodes_file(ctdb, ctdb->nodes_file);
127 if (node_map == NULL) {
128 goto fail;
131 TALLOC_FREE(ctdb->nodes);
132 ret = convert_node_map_to_list(ctdb, ctdb, node_map,
133 &ctdb->nodes, &ctdb->num_nodes);
134 if (ret == -1) {
135 goto fail;
138 talloc_free(node_map);
139 return;
141 fail:
142 DEBUG(DEBUG_ERR, ("Failed to load nodes file \"%s\"\n",
143 ctdb->nodes_file));
144 talloc_free(node_map);
145 exit(1);
149 setup the local node address
151 int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
153 ctdb->address = talloc(ctdb, ctdb_sock_addr);
154 CTDB_NO_MEMORY(ctdb, ctdb->address);
156 if (ctdb_parse_address(ctdb, address, ctdb->address) != 0) {
157 return -1;
160 ctdb->name = talloc_asprintf(ctdb, "%s:%u",
161 ctdb_addr_to_str(ctdb->address),
162 ctdb_addr_to_port(ctdb->address));
163 return 0;
168 return the number of active nodes
170 uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
172 unsigned int i;
173 uint32_t count=0;
174 for (i=0; i < ctdb->num_nodes; i++) {
175 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) {
176 count++;
179 return count;
184 called when we need to process a packet. This can be a requeued packet
185 after a lockwait, or a real packet from another node
187 void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
189 TALLOC_CTX *tmp_ctx;
191 /* place the packet as a child of the tmp_ctx. We then use
192 talloc_free() below to free it. If any of the calls want
193 to keep it, then they will steal it somewhere else, and the
194 talloc_free() will only free the tmp_ctx */
195 tmp_ctx = talloc_new(ctdb);
196 talloc_steal(tmp_ctx, hdr);
198 DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
199 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
200 hdr->srcnode, hdr->destnode));
202 switch (hdr->operation) {
203 case CTDB_REQ_CALL:
204 case CTDB_REPLY_CALL:
205 case CTDB_REQ_DMASTER:
206 case CTDB_REPLY_DMASTER:
207 /* we don't allow these calls when banned */
208 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
209 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
210 " request %u"
211 " length %u from node %u to %u while node"
212 " is banned\n",
213 hdr->operation, hdr->reqid,
214 hdr->length,
215 hdr->srcnode, hdr->destnode));
216 goto done;
219 /* for ctdb_call inter-node operations verify that the
220 remote node that sent us the call is running in the
221 same generation instance as this node
223 if (ctdb->vnn_map->generation != hdr->generation) {
224 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
225 " request %u"
226 " length %u from node %u to %u had an"
227 " invalid generation id:%u while our"
228 " generation id is:%u\n",
229 hdr->operation, hdr->reqid,
230 hdr->length,
231 hdr->srcnode, hdr->destnode,
232 hdr->generation, ctdb->vnn_map->generation));
233 goto done;
237 switch (hdr->operation) {
238 case CTDB_REQ_CALL:
239 CTDB_INCREMENT_STAT(ctdb, node.req_call);
240 ctdb_request_call(ctdb, hdr);
241 break;
243 case CTDB_REPLY_CALL:
244 CTDB_INCREMENT_STAT(ctdb, node.reply_call);
245 ctdb_reply_call(ctdb, hdr);
246 break;
248 case CTDB_REPLY_ERROR:
249 CTDB_INCREMENT_STAT(ctdb, node.reply_error);
250 ctdb_reply_error(ctdb, hdr);
251 break;
253 case CTDB_REQ_DMASTER:
254 CTDB_INCREMENT_STAT(ctdb, node.req_dmaster);
255 ctdb_request_dmaster(ctdb, hdr);
256 break;
258 case CTDB_REPLY_DMASTER:
259 CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster);
260 ctdb_reply_dmaster(ctdb, hdr);
261 break;
263 case CTDB_REQ_MESSAGE:
264 CTDB_INCREMENT_STAT(ctdb, node.req_message);
265 ctdb_request_message(ctdb, hdr);
266 break;
268 case CTDB_REQ_CONTROL:
269 CTDB_INCREMENT_STAT(ctdb, node.req_control);
270 ctdb_request_control(ctdb, hdr);
271 break;
273 case CTDB_REPLY_CONTROL:
274 CTDB_INCREMENT_STAT(ctdb, node.reply_control);
275 ctdb_reply_control(ctdb, hdr);
276 break;
278 case CTDB_REQ_KEEPALIVE:
279 CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv);
280 ctdb_request_keepalive(ctdb, hdr);
281 break;
283 case CTDB_REQ_TUNNEL:
284 CTDB_INCREMENT_STAT(ctdb, node.req_tunnel);
285 ctdb_request_tunnel(ctdb, hdr);
286 break;
288 default:
289 DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
290 __location__, hdr->operation));
291 break;
294 done:
295 talloc_free(tmp_ctx);
300 called by the transport layer when a node is dead
302 void ctdb_node_dead(struct ctdb_node *node)
304 if (node->ctdb->methods == NULL) {
305 DBG_ERR("Can not restart transport while shutting down\n");
306 return;
308 node->ctdb->methods->restart(node);
310 if (node->flags & NODE_FLAGS_DISCONNECTED) {
311 DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
312 node->ctdb->name, node->name,
313 node->ctdb->num_connected));
314 return;
316 node->ctdb->num_connected--;
317 node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
318 node->rx_cnt = 0;
319 node->dead_count = 0;
321 DEBUG(DEBUG_ERR,("%s: node %s is dead: %u connected\n",
322 node->ctdb->name, node->name, node->ctdb->num_connected));
323 ctdb_daemon_cancel_controls(node->ctdb, node);
327 called by the transport layer when a node is connected
329 void ctdb_node_connected(struct ctdb_node *node)
331 if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
332 DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
333 node->ctdb->name, node->name,
334 node->ctdb->num_connected));
335 return;
337 node->ctdb->num_connected++;
338 node->dead_count = 0;
339 node->flags &= ~NODE_FLAGS_DISCONNECTED;
340 DEBUG(DEBUG_ERR,
341 ("%s: connected to %s - %u connected\n",
342 node->ctdb->name, node->name, node->ctdb->num_connected));
345 struct queue_next {
346 struct ctdb_context *ctdb;
347 struct ctdb_req_header *hdr;
352 triggered when a deferred packet is due
354 static void queue_next_trigger(struct tevent_context *ev,
355 struct tevent_timer *te,
356 struct timeval t, void *private_data)
358 struct queue_next *q = talloc_get_type(private_data, struct queue_next);
359 ctdb_input_pkt(q->ctdb, q->hdr);
360 talloc_free(q);
364 defer a packet, so it is processed on the next event loop
365 this is used for sending packets to ourselves
367 static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
369 struct queue_next *q;
370 q = talloc(ctdb, struct queue_next);
371 if (q == NULL) {
372 DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
373 return;
375 q->ctdb = ctdb;
376 q->hdr = talloc_memdup(q, hdr, hdr->length);
377 if (q->hdr == NULL) {
378 talloc_free(q);
379 DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
380 return;
382 #if 0
383 /* use this to put packets directly into our recv function */
384 ctdb_input_pkt(q->ctdb, q->hdr);
385 #else
386 tevent_add_timer(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
387 #endif
392 broadcast a packet to all nodes
394 static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
395 struct ctdb_req_header *hdr)
397 unsigned int i;
398 for (i=0; i < ctdb->num_nodes; i++) {
399 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
400 continue;
402 hdr->destnode = ctdb->nodes[i]->pnn;
403 ctdb_queue_packet(ctdb, hdr);
408 broadcast a packet to all active nodes
410 static void ctdb_broadcast_packet_active(struct ctdb_context *ctdb,
411 struct ctdb_req_header *hdr)
413 unsigned int i;
414 for (i = 0; i < ctdb->num_nodes; i++) {
415 if (ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE) {
416 continue;
419 hdr->destnode = ctdb->nodes[i]->pnn;
420 ctdb_queue_packet(ctdb, hdr);
425 broadcast a packet to all connected nodes
427 static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
428 struct ctdb_req_header *hdr)
430 unsigned int i;
431 for (i=0; i < ctdb->num_nodes; i++) {
432 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
433 continue;
435 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
436 hdr->destnode = ctdb->nodes[i]->pnn;
437 ctdb_queue_packet(ctdb, hdr);
443 queue a packet or die
445 void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
447 struct ctdb_node *node;
449 switch (hdr->destnode) {
450 case CTDB_BROADCAST_ALL:
451 ctdb_broadcast_packet_all(ctdb, hdr);
452 return;
453 case CTDB_BROADCAST_ACTIVE:
454 ctdb_broadcast_packet_active(ctdb, hdr);
455 return;
456 case CTDB_BROADCAST_CONNECTED:
457 ctdb_broadcast_packet_connected(ctdb, hdr);
458 return;
461 CTDB_INCREMENT_STAT(ctdb, node_packets_sent);
463 if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
464 DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n",
465 hdr->destnode));
466 return;
469 node = ctdb->nodes[hdr->destnode];
471 if (node->flags & NODE_FLAGS_DELETED) {
472 DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode));
473 return;
476 if (node->pnn == ctdb->pnn) {
477 ctdb_defer_packet(ctdb, hdr);
478 return;
481 if (ctdb->methods == NULL) {
482 DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. "
483 "Transport is DOWN\n"));
484 return;
487 node->tx_cnt++;
488 if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
489 ctdb_fatal(ctdb, "Unable to queue packet\n");
497 a valgrind hack to allow us to get opcode specific backtraces
498 very ugly, and relies on no compiler optimisation!
500 void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
502 switch (opcode) {
503 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
504 DO_OP(1);
505 DO_OP(2);
506 DO_OP(3);
507 DO_OP(4);
508 DO_OP(5);
509 DO_OP(6);
510 DO_OP(7);
511 DO_OP(8);
512 DO_OP(9);
513 DO_OP(10);
514 DO_OP(11);
515 DO_OP(12);
516 DO_OP(13);
517 DO_OP(14);
518 DO_OP(15);
519 DO_OP(16);
520 DO_OP(17);
521 DO_OP(18);
522 DO_OP(19);
523 DO_OP(20);
524 DO_OP(21);
525 DO_OP(22);
526 DO_OP(23);
527 DO_OP(24);
528 DO_OP(25);
529 DO_OP(26);
530 DO_OP(27);
531 DO_OP(28);
532 DO_OP(29);
533 DO_OP(30);
534 DO_OP(31);
535 DO_OP(32);
536 DO_OP(33);
537 DO_OP(34);
538 DO_OP(35);
539 DO_OP(36);
540 DO_OP(37);
541 DO_OP(38);
542 DO_OP(39);
543 DO_OP(40);
544 DO_OP(41);
545 DO_OP(42);
546 DO_OP(43);
547 DO_OP(44);
548 DO_OP(45);
549 DO_OP(46);
550 DO_OP(47);
551 DO_OP(48);
552 DO_OP(49);
553 DO_OP(50);
554 DO_OP(51);
555 DO_OP(52);
556 DO_OP(53);
557 DO_OP(54);
558 DO_OP(55);
559 DO_OP(56);
560 DO_OP(57);
561 DO_OP(58);
562 DO_OP(59);
563 DO_OP(60);
564 DO_OP(61);
565 DO_OP(62);
566 DO_OP(63);
567 DO_OP(64);
568 DO_OP(65);
569 DO_OP(66);
570 DO_OP(67);
571 DO_OP(68);
572 DO_OP(69);
573 DO_OP(70);
574 DO_OP(71);
575 DO_OP(72);
576 DO_OP(73);
577 DO_OP(74);
578 DO_OP(75);
579 DO_OP(76);
580 DO_OP(77);
581 DO_OP(78);
582 DO_OP(79);
583 DO_OP(80);
584 DO_OP(81);
585 DO_OP(82);
586 DO_OP(83);
587 DO_OP(84);
588 DO_OP(85);
589 DO_OP(86);
590 DO_OP(87);
591 DO_OP(88);
592 DO_OP(89);
593 DO_OP(90);
594 DO_OP(91);
595 DO_OP(92);
596 DO_OP(93);
597 DO_OP(94);
598 DO_OP(95);
599 DO_OP(96);
600 DO_OP(97);
601 DO_OP(98);
602 DO_OP(99);
603 DO_OP(100);
604 default:
605 ctdb_queue_packet(ctdb, hdr);
606 break;