ldb_tdb: Use mem_ctx and so avoid leak onto long-term memory on duplicated add.
[Samba.git] / ctdb / server / ctdb_server.c
blob8e31038cc9567d8ceb02799ff87adc58238b3098
1 /*
2 ctdb main protocol code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/network.h"
22 #include "system/filesys.h"
24 #include <talloc.h>
25 #include <tevent.h>
27 #include "lib/util/dlinklist.h"
28 #include "lib/util/debug.h"
29 #include "lib/util/samba_util.h"
31 #include "ctdb_private.h"
32 #include "ctdb_client.h"
34 #include "common/common.h"
35 #include "common/logging.h"
38 choose the transport we will use
40 int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
42 ctdb->transport = talloc_strdup(ctdb, transport);
43 CTDB_NO_MEMORY(ctdb, ctdb->transport);
45 return 0;
49 Check whether an ip is a valid node ip
50 Returns the node id for this ip address or -1
52 int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const ctdb_sock_addr *nodeip)
54 int nodeid;
56 for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
57 if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) {
58 continue;
60 if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) {
61 return nodeid;
65 return -1;
68 /* Load a nodes list file into a nodes array */
69 static int convert_node_map_to_list(struct ctdb_context *ctdb,
70 TALLOC_CTX *mem_ctx,
71 struct ctdb_node_map_old *node_map,
72 struct ctdb_node ***nodes,
73 uint32_t *num_nodes)
75 int i;
77 *nodes = talloc_zero_array(mem_ctx,
78 struct ctdb_node *, node_map->num);
79 CTDB_NO_MEMORY(ctdb, *nodes);
80 *num_nodes = node_map->num;
82 for (i = 0; i < node_map->num; i++) {
83 struct ctdb_node *node;
85 node = talloc_zero(*nodes, struct ctdb_node);
86 CTDB_NO_MEMORY(ctdb, node);
87 (*nodes)[i] = node;
89 node->address = node_map->nodes[i].addr;
90 node->name = talloc_asprintf(node, "%s:%u",
91 ctdb_addr_to_str(&node->address),
92 ctdb_addr_to_port(&node->address));
94 node->flags = node_map->nodes[i].flags;
95 if (!(node->flags & NODE_FLAGS_DELETED)) {
96 node->flags = NODE_FLAGS_UNHEALTHY;
98 node->flags |= NODE_FLAGS_DISCONNECTED;
100 node->pnn = i;
101 node->ctdb = ctdb;
102 node->dead_count = 0;
105 return 0;
108 /* Load the nodes list from a file */
109 void ctdb_load_nodes_file(struct ctdb_context *ctdb)
111 struct ctdb_node_map_old *node_map;
112 int ret;
114 node_map = ctdb_read_nodes_file(ctdb, ctdb->nodes_file);
115 if (node_map == NULL) {
116 goto fail;
119 TALLOC_FREE(ctdb->nodes);
120 ret = convert_node_map_to_list(ctdb, ctdb, node_map,
121 &ctdb->nodes, &ctdb->num_nodes);
122 if (ret == -1) {
123 goto fail;
126 talloc_free(node_map);
127 return;
129 fail:
130 DEBUG(DEBUG_ERR, ("Failed to load nodes file \"%s\"\n",
131 ctdb->nodes_file));
132 talloc_free(node_map);
133 exit(1);
137 setup the local node address
139 int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
141 ctdb->address = talloc(ctdb, ctdb_sock_addr);
142 CTDB_NO_MEMORY(ctdb, ctdb->address);
144 if (ctdb_parse_address(ctdb, address, ctdb->address) != 0) {
145 return -1;
148 ctdb->name = talloc_asprintf(ctdb, "%s:%u",
149 ctdb_addr_to_str(ctdb->address),
150 ctdb_addr_to_port(ctdb->address));
151 return 0;
156 return the number of active nodes
158 uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
160 int i;
161 uint32_t count=0;
162 for (i=0; i < ctdb->num_nodes; i++) {
163 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) {
164 count++;
167 return count;
172 called when we need to process a packet. This can be a requeued packet
173 after a lockwait, or a real packet from another node
175 void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
177 TALLOC_CTX *tmp_ctx;
179 /* place the packet as a child of the tmp_ctx. We then use
180 talloc_free() below to free it. If any of the calls want
181 to keep it, then they will steal it somewhere else, and the
182 talloc_free() will only free the tmp_ctx */
183 tmp_ctx = talloc_new(ctdb);
184 talloc_steal(tmp_ctx, hdr);
186 DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from "
187 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
188 hdr->srcnode, hdr->destnode));
190 switch (hdr->operation) {
191 case CTDB_REQ_CALL:
192 case CTDB_REPLY_CALL:
193 case CTDB_REQ_DMASTER:
194 case CTDB_REPLY_DMASTER:
195 /* we don't allow these calls when banned */
196 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) {
197 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
198 " request %u"
199 " length %u from node %u to %u while node"
200 " is banned\n",
201 hdr->operation, hdr->reqid,
202 hdr->length,
203 hdr->srcnode, hdr->destnode));
204 goto done;
207 /* for ctdb_call inter-node operations verify that the
208 remote node that sent us the call is running in the
209 same generation instance as this node
211 if (ctdb->vnn_map->generation != hdr->generation) {
212 DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u"
213 " request %u"
214 " length %u from node %u to %u had an"
215 " invalid generation id:%u while our"
216 " generation id is:%u\n",
217 hdr->operation, hdr->reqid,
218 hdr->length,
219 hdr->srcnode, hdr->destnode,
220 hdr->generation, ctdb->vnn_map->generation));
221 goto done;
225 switch (hdr->operation) {
226 case CTDB_REQ_CALL:
227 CTDB_INCREMENT_STAT(ctdb, node.req_call);
228 ctdb_request_call(ctdb, hdr);
229 break;
231 case CTDB_REPLY_CALL:
232 CTDB_INCREMENT_STAT(ctdb, node.reply_call);
233 ctdb_reply_call(ctdb, hdr);
234 break;
236 case CTDB_REPLY_ERROR:
237 CTDB_INCREMENT_STAT(ctdb, node.reply_error);
238 ctdb_reply_error(ctdb, hdr);
239 break;
241 case CTDB_REQ_DMASTER:
242 CTDB_INCREMENT_STAT(ctdb, node.req_dmaster);
243 ctdb_request_dmaster(ctdb, hdr);
244 break;
246 case CTDB_REPLY_DMASTER:
247 CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster);
248 ctdb_reply_dmaster(ctdb, hdr);
249 break;
251 case CTDB_REQ_MESSAGE:
252 CTDB_INCREMENT_STAT(ctdb, node.req_message);
253 ctdb_request_message(ctdb, hdr);
254 break;
256 case CTDB_REQ_CONTROL:
257 CTDB_INCREMENT_STAT(ctdb, node.req_control);
258 ctdb_request_control(ctdb, hdr);
259 break;
261 case CTDB_REPLY_CONTROL:
262 CTDB_INCREMENT_STAT(ctdb, node.reply_control);
263 ctdb_reply_control(ctdb, hdr);
264 break;
266 case CTDB_REQ_KEEPALIVE:
267 CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv);
268 ctdb_request_keepalive(ctdb, hdr);
269 break;
271 case CTDB_REQ_TUNNEL:
272 CTDB_INCREMENT_STAT(ctdb, node.req_tunnel);
273 ctdb_request_tunnel(ctdb, hdr);
274 break;
276 default:
277 DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n",
278 __location__, hdr->operation));
279 break;
282 done:
283 talloc_free(tmp_ctx);
288 called by the transport layer when a node is dead
290 void ctdb_node_dead(struct ctdb_node *node)
292 if (node->flags & NODE_FLAGS_DISCONNECTED) {
293 DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n",
294 node->ctdb->name, node->name,
295 node->ctdb->num_connected));
296 return;
298 node->ctdb->num_connected--;
299 node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
300 node->rx_cnt = 0;
301 node->dead_count = 0;
303 DEBUG(DEBUG_ERR,("%s: node %s is dead: %u connected\n",
304 node->ctdb->name, node->name, node->ctdb->num_connected));
305 ctdb_daemon_cancel_controls(node->ctdb, node);
307 if (node->ctdb->methods == NULL) {
308 DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n"));
309 return;
312 node->ctdb->methods->restart(node);
316 called by the transport layer when a node is connected
318 void ctdb_node_connected(struct ctdb_node *node)
320 if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
321 DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n",
322 node->ctdb->name, node->name,
323 node->ctdb->num_connected));
324 return;
326 node->ctdb->num_connected++;
327 node->dead_count = 0;
328 node->flags &= ~NODE_FLAGS_DISCONNECTED;
329 node->flags |= NODE_FLAGS_UNHEALTHY;
330 DEBUG(DEBUG_ERR,
331 ("%s: connected to %s - %u connected\n",
332 node->ctdb->name, node->name, node->ctdb->num_connected));
335 struct queue_next {
336 struct ctdb_context *ctdb;
337 struct ctdb_req_header *hdr;
342 triggered when a deferred packet is due
344 static void queue_next_trigger(struct tevent_context *ev,
345 struct tevent_timer *te,
346 struct timeval t, void *private_data)
348 struct queue_next *q = talloc_get_type(private_data, struct queue_next);
349 ctdb_input_pkt(q->ctdb, q->hdr);
350 talloc_free(q);
354 defer a packet, so it is processed on the next event loop
355 this is used for sending packets to ourselves
357 static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
359 struct queue_next *q;
360 q = talloc(ctdb, struct queue_next);
361 if (q == NULL) {
362 DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n"));
363 return;
365 q->ctdb = ctdb;
366 q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
367 if (q->hdr == NULL) {
368 DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n"));
369 return;
371 #if 0
372 /* use this to put packets directly into our recv function */
373 ctdb_input_pkt(q->ctdb, q->hdr);
374 #else
375 tevent_add_timer(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
376 #endif
381 broadcast a packet to all nodes
383 static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
384 struct ctdb_req_header *hdr)
386 int i;
387 for (i=0; i < ctdb->num_nodes; i++) {
388 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
389 continue;
391 hdr->destnode = ctdb->nodes[i]->pnn;
392 ctdb_queue_packet(ctdb, hdr);
397 broadcast a packet to all nodes in the current vnnmap
399 static void ctdb_broadcast_packet_vnnmap(struct ctdb_context *ctdb,
400 struct ctdb_req_header *hdr)
402 int i;
403 for (i=0;i<ctdb->vnn_map->size;i++) {
404 hdr->destnode = ctdb->vnn_map->map[i];
405 ctdb_queue_packet(ctdb, hdr);
410 broadcast a packet to all connected nodes
412 static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
413 struct ctdb_req_header *hdr)
415 int i;
416 for (i=0; i < ctdb->num_nodes; i++) {
417 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
418 continue;
420 if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
421 hdr->destnode = ctdb->nodes[i]->pnn;
422 ctdb_queue_packet(ctdb, hdr);
428 queue a packet or die
430 void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
432 struct ctdb_node *node;
434 switch (hdr->destnode) {
435 case CTDB_BROADCAST_ALL:
436 ctdb_broadcast_packet_all(ctdb, hdr);
437 return;
438 case CTDB_BROADCAST_VNNMAP:
439 ctdb_broadcast_packet_vnnmap(ctdb, hdr);
440 return;
441 case CTDB_BROADCAST_CONNECTED:
442 ctdb_broadcast_packet_connected(ctdb, hdr);
443 return;
446 CTDB_INCREMENT_STAT(ctdb, node_packets_sent);
448 if (!ctdb_validate_pnn(ctdb, hdr->destnode)) {
449 DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n",
450 hdr->destnode));
451 return;
454 node = ctdb->nodes[hdr->destnode];
456 if (node->flags & NODE_FLAGS_DELETED) {
457 DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode));
458 return;
461 if (node->pnn == ctdb->pnn) {
462 ctdb_defer_packet(ctdb, hdr);
463 return;
466 if (ctdb->methods == NULL) {
467 DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. "
468 "Transport is DOWN\n"));
469 return;
472 node->tx_cnt++;
473 if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
474 ctdb_fatal(ctdb, "Unable to queue packet\n");
482 a valgrind hack to allow us to get opcode specific backtraces
483 very ugly, and relies on no compiler optimisation!
485 void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode)
487 switch (opcode) {
488 #define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break
489 DO_OP(1);
490 DO_OP(2);
491 DO_OP(3);
492 DO_OP(4);
493 DO_OP(5);
494 DO_OP(6);
495 DO_OP(7);
496 DO_OP(8);
497 DO_OP(9);
498 DO_OP(10);
499 DO_OP(11);
500 DO_OP(12);
501 DO_OP(13);
502 DO_OP(14);
503 DO_OP(15);
504 DO_OP(16);
505 DO_OP(17);
506 DO_OP(18);
507 DO_OP(19);
508 DO_OP(20);
509 DO_OP(21);
510 DO_OP(22);
511 DO_OP(23);
512 DO_OP(24);
513 DO_OP(25);
514 DO_OP(26);
515 DO_OP(27);
516 DO_OP(28);
517 DO_OP(29);
518 DO_OP(30);
519 DO_OP(31);
520 DO_OP(32);
521 DO_OP(33);
522 DO_OP(34);
523 DO_OP(35);
524 DO_OP(36);
525 DO_OP(37);
526 DO_OP(38);
527 DO_OP(39);
528 DO_OP(40);
529 DO_OP(41);
530 DO_OP(42);
531 DO_OP(43);
532 DO_OP(44);
533 DO_OP(45);
534 DO_OP(46);
535 DO_OP(47);
536 DO_OP(48);
537 DO_OP(49);
538 DO_OP(50);
539 DO_OP(51);
540 DO_OP(52);
541 DO_OP(53);
542 DO_OP(54);
543 DO_OP(55);
544 DO_OP(56);
545 DO_OP(57);
546 DO_OP(58);
547 DO_OP(59);
548 DO_OP(60);
549 DO_OP(61);
550 DO_OP(62);
551 DO_OP(63);
552 DO_OP(64);
553 DO_OP(65);
554 DO_OP(66);
555 DO_OP(67);
556 DO_OP(68);
557 DO_OP(69);
558 DO_OP(70);
559 DO_OP(71);
560 DO_OP(72);
561 DO_OP(73);
562 DO_OP(74);
563 DO_OP(75);
564 DO_OP(76);
565 DO_OP(77);
566 DO_OP(78);
567 DO_OP(79);
568 DO_OP(80);
569 DO_OP(81);
570 DO_OP(82);
571 DO_OP(83);
572 DO_OP(84);
573 DO_OP(85);
574 DO_OP(86);
575 DO_OP(87);
576 DO_OP(88);
577 DO_OP(89);
578 DO_OP(90);
579 DO_OP(91);
580 DO_OP(92);
581 DO_OP(93);
582 DO_OP(94);
583 DO_OP(95);
584 DO_OP(96);
585 DO_OP(97);
586 DO_OP(98);
587 DO_OP(99);
588 DO_OP(100);
589 default:
590 ctdb_queue_packet(ctdb, hdr);
591 break;