ctdb-takeover: Do not kill smbd processes on releasing IP
[Samba.git] / ctdb / server / ctdb_takeover.c
blobb32772f88cdad4cb0ae602b8319e326d7c3cc88e
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
53 const char *name;
54 bool link_up;
55 uint32_t references;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
60 if (vnn->iface) {
61 return vnn->iface->name;
64 return "__none__";
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 /* Verify that we don't have an entry for this ip yet */
72 for (i=ctdb->ifaces;i;i=i->next) {
73 if (strcmp(i->name, iface) == 0) {
74 return 0;
78 /* create a new structure for this interface */
79 i = talloc_zero(ctdb, struct ctdb_interface);
80 CTDB_NO_MEMORY_FATAL(ctdb, i);
81 i->name = talloc_strdup(i, iface);
82 CTDB_NO_MEMORY(ctdb, i->name);
84 i->link_up = true;
86 DLIST_ADD(ctdb->ifaces, i);
88 return 0;
91 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
92 const char *name)
94 int n;
96 for (n = 0; vnn->ifaces[n] != NULL; n++) {
97 if (strcmp(name, vnn->ifaces[n]) == 0) {
98 return true;
102 return false;
105 /* If any interfaces now have no possible IPs then delete them. This
106 * implementation is naive (i.e. simple) rather than clever
107 * (i.e. complex). Given that this is run on delip and that operation
108 * is rare, this doesn't need to be efficient - it needs to be
109 * foolproof. One alternative is reference counting, where the logic
110 * is distributed and can, therefore, be broken in multiple places.
111 * Another alternative is to build a red-black tree of interfaces that
112 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
113 * once) and then walking ctdb->ifaces once and deleting those not in
114 * the tree. Let's go to one of those if the naive implementation
115 * causes problems... :-)
117 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
118 struct ctdb_vnn *vnn)
120 struct ctdb_interface *i, *next;
122 /* For each interface, check if there's an IP using it. */
123 for (i = ctdb->ifaces; i != NULL; i = next) {
124 struct ctdb_vnn *tv;
125 bool found;
126 next = i->next;
128 /* Only consider interfaces named in the given VNN. */
129 if (!vnn_has_interface_with_name(vnn, i->name)) {
130 continue;
133 /* Is the "single IP" on this interface? */
134 if ((ctdb->single_ip_vnn != NULL) &&
135 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
136 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
137 /* Found, next interface please... */
138 continue;
140 /* Search for a vnn with this interface. */
141 found = false;
142 for (tv=ctdb->vnn; tv; tv=tv->next) {
143 if (vnn_has_interface_with_name(tv, i->name)) {
144 found = true;
145 break;
149 if (!found) {
150 /* None of the VNNs are using this interface. */
151 DLIST_REMOVE(ctdb->ifaces, i);
152 talloc_free(i);
158 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
159 const char *iface)
161 struct ctdb_interface *i;
163 for (i=ctdb->ifaces;i;i=i->next) {
164 if (strcmp(i->name, iface) == 0) {
165 return i;
169 return NULL;
172 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
175 int i;
176 struct ctdb_interface *cur = NULL;
177 struct ctdb_interface *best = NULL;
179 for (i=0; vnn->ifaces[i]; i++) {
181 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
182 if (cur == NULL) {
183 continue;
186 if (!cur->link_up) {
187 continue;
190 if (best == NULL) {
191 best = cur;
192 continue;
195 if (cur->references < best->references) {
196 best = cur;
197 continue;
201 return best;
204 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
205 struct ctdb_vnn *vnn)
207 struct ctdb_interface *best = NULL;
209 if (vnn->iface) {
210 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
211 "still assigned to iface '%s'\n",
212 ctdb_addr_to_str(&vnn->public_address),
213 ctdb_vnn_iface_string(vnn)));
214 return 0;
217 best = ctdb_vnn_best_iface(ctdb, vnn);
218 if (best == NULL) {
219 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
220 "cannot assign to iface any iface\n",
221 ctdb_addr_to_str(&vnn->public_address)));
222 return -1;
225 vnn->iface = best;
226 best->references++;
227 vnn->pnn = ctdb->pnn;
229 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
230 "now assigned to iface '%s' refs[%d]\n",
231 ctdb_addr_to_str(&vnn->public_address),
232 ctdb_vnn_iface_string(vnn),
233 best->references));
234 return 0;
237 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
238 struct ctdb_vnn *vnn)
240 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
241 "now unassigned (old iface '%s' refs[%d])\n",
242 ctdb_addr_to_str(&vnn->public_address),
243 ctdb_vnn_iface_string(vnn),
244 vnn->iface?vnn->iface->references:0));
245 if (vnn->iface) {
246 vnn->iface->references--;
248 vnn->iface = NULL;
249 if (vnn->pnn == ctdb->pnn) {
250 vnn->pnn = -1;
254 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
255 struct ctdb_vnn *vnn)
257 int i;
259 /* Nodes that are not RUNNING can not host IPs */
260 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
261 return false;
264 if (vnn->delete_pending) {
265 return false;
268 if (vnn->iface && vnn->iface->link_up) {
269 return true;
272 for (i=0; vnn->ifaces[i]; i++) {
273 struct ctdb_interface *cur;
275 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
276 if (cur == NULL) {
277 continue;
280 if (cur->link_up) {
281 return true;
285 return false;
288 struct ctdb_takeover_arp {
289 struct ctdb_context *ctdb;
290 uint32_t count;
291 ctdb_sock_addr addr;
292 struct ctdb_tcp_array *tcparray;
293 struct ctdb_vnn *vnn;
298 lists of tcp endpoints
300 struct ctdb_tcp_list {
301 struct ctdb_tcp_list *prev, *next;
302 struct ctdb_connection connection;
306 list of clients to kill on IP release
308 struct ctdb_client_ip {
309 struct ctdb_client_ip *prev, *next;
310 struct ctdb_context *ctdb;
311 ctdb_sock_addr addr;
312 uint32_t client_id;
317 send a gratuitous arp
319 static void ctdb_control_send_arp(struct tevent_context *ev,
320 struct tevent_timer *te,
321 struct timeval t, void *private_data)
323 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
324 struct ctdb_takeover_arp);
325 int i, ret;
326 struct ctdb_tcp_array *tcparray;
327 const char *iface = ctdb_vnn_iface_string(arp->vnn);
329 ret = ctdb_sys_send_arp(&arp->addr, iface);
330 if (ret != 0) {
331 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
332 iface, strerror(errno)));
335 tcparray = arp->tcparray;
336 if (tcparray) {
337 for (i=0;i<tcparray->num;i++) {
338 struct ctdb_connection *tcon;
340 tcon = &tcparray->connections[i];
341 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
342 (unsigned)ntohs(tcon->dst.ip.sin_port),
343 ctdb_addr_to_str(&tcon->src),
344 (unsigned)ntohs(tcon->src.ip.sin_port)));
345 ret = ctdb_sys_send_tcp(
346 &tcon->src,
347 &tcon->dst,
348 0, 0, 0);
349 if (ret != 0) {
350 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
351 ctdb_addr_to_str(&tcon->src)));
356 arp->count++;
358 if (arp->count == CTDB_ARP_REPEAT) {
359 talloc_free(arp);
360 return;
363 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
364 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
365 ctdb_control_send_arp, arp);
368 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
369 struct ctdb_vnn *vnn)
371 struct ctdb_takeover_arp *arp;
372 struct ctdb_tcp_array *tcparray;
374 if (!vnn->takeover_ctx) {
375 vnn->takeover_ctx = talloc_new(vnn);
376 if (!vnn->takeover_ctx) {
377 return -1;
381 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
382 if (!arp) {
383 return -1;
386 arp->ctdb = ctdb;
387 arp->addr = vnn->public_address;
388 arp->vnn = vnn;
390 tcparray = vnn->tcp_array;
391 if (tcparray) {
392 /* add all of the known tcp connections for this IP to the
393 list of tcp connections to send tickle acks for */
394 arp->tcparray = talloc_steal(arp, tcparray);
396 vnn->tcp_array = NULL;
397 vnn->tcp_update_needed = true;
400 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
401 timeval_zero(), ctdb_control_send_arp, arp);
403 return 0;
406 struct takeover_callback_state {
407 struct ctdb_req_control_old *c;
408 ctdb_sock_addr *addr;
409 struct ctdb_vnn *vnn;
412 struct ctdb_do_takeip_state {
413 struct ctdb_req_control_old *c;
414 struct ctdb_vnn *vnn;
418 called when takeip event finishes
420 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
421 void *private_data)
423 struct ctdb_do_takeip_state *state =
424 talloc_get_type(private_data, struct ctdb_do_takeip_state);
425 int32_t ret;
426 TDB_DATA data;
428 if (status != 0) {
429 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
431 if (status == -ETIME) {
432 ctdb_ban_self(ctdb);
434 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
435 ctdb_addr_to_str(&state->vnn->public_address),
436 ctdb_vnn_iface_string(state->vnn)));
437 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
439 node->flags |= NODE_FLAGS_UNHEALTHY;
440 talloc_free(state);
441 return;
444 if (ctdb->do_checkpublicip) {
446 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
447 if (ret != 0) {
448 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
449 talloc_free(state);
450 return;
455 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
456 data.dsize = strlen((char *)data.dptr) + 1;
457 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
459 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
462 /* the control succeeded */
463 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
464 talloc_free(state);
465 return;
468 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
470 state->vnn->update_in_flight = false;
471 return 0;
475 take over an ip address
477 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
478 struct ctdb_req_control_old *c,
479 struct ctdb_vnn *vnn)
481 int ret;
482 struct ctdb_do_takeip_state *state;
484 if (vnn->update_in_flight) {
485 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
486 "update for this IP already in flight\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits));
489 return -1;
492 ret = ctdb_vnn_assign_iface(ctdb, vnn);
493 if (ret != 0) {
494 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
495 "assign a usable interface\n",
496 ctdb_addr_to_str(&vnn->public_address),
497 vnn->public_netmask_bits));
498 return -1;
501 state = talloc(vnn, struct ctdb_do_takeip_state);
502 CTDB_NO_MEMORY(ctdb, state);
504 state->c = talloc_steal(ctdb, c);
505 state->vnn = vnn;
507 vnn->update_in_flight = true;
508 talloc_set_destructor(state, ctdb_takeip_destructor);
510 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
511 ctdb_addr_to_str(&vnn->public_address),
512 vnn->public_netmask_bits,
513 ctdb_vnn_iface_string(vnn)));
515 ret = ctdb_event_script_callback(ctdb,
516 state,
517 ctdb_do_takeip_callback,
518 state,
519 CTDB_EVENT_TAKE_IP,
520 "%s %s %u",
521 ctdb_vnn_iface_string(vnn),
522 ctdb_addr_to_str(&vnn->public_address),
523 vnn->public_netmask_bits);
525 if (ret != 0) {
526 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
527 ctdb_addr_to_str(&vnn->public_address),
528 ctdb_vnn_iface_string(vnn)));
529 talloc_free(state);
530 return -1;
533 return 0;
536 struct ctdb_do_updateip_state {
537 struct ctdb_req_control_old *c;
538 struct ctdb_interface *old;
539 struct ctdb_vnn *vnn;
543 called when updateip event finishes
545 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
546 void *private_data)
548 struct ctdb_do_updateip_state *state =
549 talloc_get_type(private_data, struct ctdb_do_updateip_state);
550 int32_t ret;
552 if (status != 0) {
553 if (status == -ETIME) {
554 ctdb_ban_self(ctdb);
556 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
557 ctdb_addr_to_str(&state->vnn->public_address),
558 state->old->name,
559 ctdb_vnn_iface_string(state->vnn)));
562 * All we can do is reset the old interface
563 * and let the next run fix it
565 ctdb_vnn_unassign_iface(ctdb, state->vnn);
566 state->vnn->iface = state->old;
567 state->vnn->iface->references++;
569 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
570 talloc_free(state);
571 return;
574 if (ctdb->do_checkpublicip) {
576 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
577 if (ret != 0) {
578 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
579 talloc_free(state);
580 return;
585 /* the control succeeded */
586 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
587 talloc_free(state);
588 return;
591 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
593 state->vnn->update_in_flight = false;
594 return 0;
598 update (move) an ip address
600 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
601 struct ctdb_req_control_old *c,
602 struct ctdb_vnn *vnn)
604 int ret;
605 struct ctdb_do_updateip_state *state;
606 struct ctdb_interface *old = vnn->iface;
607 const char *new_name;
609 if (vnn->update_in_flight) {
610 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
611 "update for this IP already in flight\n",
612 ctdb_addr_to_str(&vnn->public_address),
613 vnn->public_netmask_bits));
614 return -1;
617 ctdb_vnn_unassign_iface(ctdb, vnn);
618 ret = ctdb_vnn_assign_iface(ctdb, vnn);
619 if (ret != 0) {
620 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
621 "assin a usable interface (old iface '%s')\n",
622 ctdb_addr_to_str(&vnn->public_address),
623 vnn->public_netmask_bits,
624 old->name));
625 return -1;
628 new_name = ctdb_vnn_iface_string(vnn);
629 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
630 /* A benign update from one interface onto itself.
631 * no need to run the eventscripts in this case, just return
632 * success.
634 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
635 return 0;
638 state = talloc(vnn, struct ctdb_do_updateip_state);
639 CTDB_NO_MEMORY(ctdb, state);
641 state->c = talloc_steal(ctdb, c);
642 state->old = old;
643 state->vnn = vnn;
645 vnn->update_in_flight = true;
646 talloc_set_destructor(state, ctdb_updateip_destructor);
648 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
649 "interface %s to %s\n",
650 ctdb_addr_to_str(&vnn->public_address),
651 vnn->public_netmask_bits,
652 old->name,
653 new_name));
655 ret = ctdb_event_script_callback(ctdb,
656 state,
657 ctdb_do_updateip_callback,
658 state,
659 CTDB_EVENT_UPDATE_IP,
660 "%s %s %s %u",
661 state->old->name,
662 new_name,
663 ctdb_addr_to_str(&vnn->public_address),
664 vnn->public_netmask_bits);
665 if (ret != 0) {
666 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
667 ctdb_addr_to_str(&vnn->public_address),
668 old->name, new_name));
669 talloc_free(state);
670 return -1;
673 return 0;
677 Find the vnn of the node that has a public ip address
678 returns -1 if the address is not known as a public address
680 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
682 struct ctdb_vnn *vnn;
684 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
685 if (ctdb_same_ip(&vnn->public_address, addr)) {
686 return vnn;
690 return NULL;
694 take over an ip address
696 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
697 struct ctdb_req_control_old *c,
698 TDB_DATA indata,
699 bool *async_reply)
701 int ret;
702 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
703 struct ctdb_vnn *vnn;
704 bool have_ip = false;
705 bool do_updateip = false;
706 bool do_takeip = false;
707 struct ctdb_interface *best_iface = NULL;
709 if (pip->pnn != ctdb->pnn) {
710 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
711 "with pnn %d, but we're node %d\n",
712 ctdb_addr_to_str(&pip->addr),
713 pip->pnn, ctdb->pnn));
714 return -1;
717 /* update out vnn list */
718 vnn = find_public_ip_vnn(ctdb, &pip->addr);
719 if (vnn == NULL) {
720 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
721 ctdb_addr_to_str(&pip->addr)));
722 return 0;
725 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
726 have_ip = ctdb_sys_have_ip(&pip->addr);
728 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
729 if (best_iface == NULL) {
730 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
731 "a usable interface (old %s, have_ip %d)\n",
732 ctdb_addr_to_str(&vnn->public_address),
733 vnn->public_netmask_bits,
734 ctdb_vnn_iface_string(vnn),
735 have_ip));
736 return -1;
739 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
740 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
741 have_ip = false;
745 if (vnn->iface == NULL && have_ip) {
746 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
747 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
748 ctdb_addr_to_str(&vnn->public_address)));
749 return 0;
752 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
753 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
754 "and we have it on iface[%s], but it was assigned to node %d"
755 "and we are node %d, banning ourself\n",
756 ctdb_addr_to_str(&vnn->public_address),
757 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
758 ctdb_ban_self(ctdb);
759 return -1;
762 if (vnn->pnn == -1 && have_ip) {
763 vnn->pnn = ctdb->pnn;
764 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
765 "and we already have it on iface[%s], update local daemon\n",
766 ctdb_addr_to_str(&vnn->public_address),
767 ctdb_vnn_iface_string(vnn)));
768 return 0;
771 if (vnn->iface) {
772 if (vnn->iface != best_iface) {
773 if (!vnn->iface->link_up) {
774 do_updateip = true;
775 } else if (vnn->iface->references > (best_iface->references + 1)) {
776 /* only move when the rebalance gains something */
777 do_updateip = true;
782 if (!have_ip) {
783 if (do_updateip) {
784 ctdb_vnn_unassign_iface(ctdb, vnn);
785 do_updateip = false;
787 do_takeip = true;
790 if (do_takeip) {
791 ret = ctdb_do_takeip(ctdb, c, vnn);
792 if (ret != 0) {
793 return -1;
795 } else if (do_updateip) {
796 ret = ctdb_do_updateip(ctdb, c, vnn);
797 if (ret != 0) {
798 return -1;
800 } else {
802 * The interface is up and the kernel known the ip
803 * => do nothing
805 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
806 ctdb_addr_to_str(&pip->addr),
807 vnn->public_netmask_bits,
808 ctdb_vnn_iface_string(vnn)));
809 return 0;
812 /* tell ctdb_control.c that we will be replying asynchronously */
813 *async_reply = true;
815 return 0;
818 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
820 DLIST_REMOVE(ctdb->vnn, vnn);
821 ctdb_vnn_unassign_iface(ctdb, vnn);
822 ctdb_remove_orphaned_ifaces(ctdb, vnn);
823 talloc_free(vnn);
827 called when releaseip event finishes
829 static void release_ip_callback(struct ctdb_context *ctdb, int status,
830 void *private_data)
832 struct takeover_callback_state *state =
833 talloc_get_type(private_data, struct takeover_callback_state);
834 TDB_DATA data;
836 if (status == -ETIME) {
837 ctdb_ban_self(ctdb);
840 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
841 if (ctdb_sys_have_ip(state->addr)) {
842 DEBUG(DEBUG_ERR,
843 ("IP %s still hosted during release IP callback, failing\n",
844 ctdb_addr_to_str(state->addr)));
845 ctdb_request_control_reply(ctdb, state->c,
846 NULL, -1, NULL);
847 talloc_free(state);
848 return;
852 /* send a message to all clients of this node telling them
853 that the cluster has been reconfigured and they should
854 release any sockets on this IP */
855 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
856 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
857 data.dsize = strlen((char *)data.dptr)+1;
859 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
861 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
863 ctdb_vnn_unassign_iface(ctdb, state->vnn);
865 /* Process the IP if it has been marked for deletion */
866 if (state->vnn->delete_pending) {
867 do_delete_ip(ctdb, state->vnn);
868 state->vnn = NULL;
871 /* the control succeeded */
872 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
873 talloc_free(state);
876 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
878 if (state->vnn != NULL) {
879 state->vnn->update_in_flight = false;
881 return 0;
885 release an ip address
887 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
888 struct ctdb_req_control_old *c,
889 TDB_DATA indata,
890 bool *async_reply)
892 int ret;
893 struct takeover_callback_state *state;
894 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
895 struct ctdb_vnn *vnn;
896 char *iface;
898 /* update our vnn list */
899 vnn = find_public_ip_vnn(ctdb, &pip->addr);
900 if (vnn == NULL) {
901 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
902 ctdb_addr_to_str(&pip->addr)));
903 return 0;
905 vnn->pnn = pip->pnn;
907 /* stop any previous arps */
908 talloc_free(vnn->takeover_ctx);
909 vnn->takeover_ctx = NULL;
911 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
912 * lazy multicast to drop an IP from any node that isn't the
913 * intended new node. The following causes makes ctdbd ignore
914 * a release for any address it doesn't host.
916 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
917 if (!ctdb_sys_have_ip(&pip->addr)) {
918 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
919 ctdb_addr_to_str(&pip->addr),
920 vnn->public_netmask_bits,
921 ctdb_vnn_iface_string(vnn)));
922 ctdb_vnn_unassign_iface(ctdb, vnn);
923 return 0;
925 } else {
926 if (vnn->iface == NULL) {
927 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
928 ctdb_addr_to_str(&pip->addr),
929 vnn->public_netmask_bits));
930 return 0;
934 /* There is a potential race between take_ip and us because we
935 * update the VNN via a callback that run when the
936 * eventscripts have been run. Avoid the race by allowing one
937 * update to be in flight at a time.
939 if (vnn->update_in_flight) {
940 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
941 "update for this IP already in flight\n",
942 ctdb_addr_to_str(&vnn->public_address),
943 vnn->public_netmask_bits));
944 return -1;
947 iface = strdup(ctdb_vnn_iface_string(vnn));
949 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
950 ctdb_addr_to_str(&pip->addr),
951 vnn->public_netmask_bits,
952 iface,
953 pip->pnn));
955 state = talloc(ctdb, struct takeover_callback_state);
956 if (state == NULL) {
957 ctdb_set_error(ctdb, "Out of memory at %s:%d",
958 __FILE__, __LINE__);
959 free(iface);
960 return -1;
963 state->c = talloc_steal(state, c);
964 state->addr = talloc(state, ctdb_sock_addr);
965 if (state->addr == NULL) {
966 ctdb_set_error(ctdb, "Out of memory at %s:%d",
967 __FILE__, __LINE__);
968 free(iface);
969 talloc_free(state);
970 return -1;
972 *state->addr = pip->addr;
973 state->vnn = vnn;
975 vnn->update_in_flight = true;
976 talloc_set_destructor(state, ctdb_releaseip_destructor);
978 ret = ctdb_event_script_callback(ctdb,
979 state, release_ip_callback, state,
980 CTDB_EVENT_RELEASE_IP,
981 "%s %s %u",
982 iface,
983 ctdb_addr_to_str(&pip->addr),
984 vnn->public_netmask_bits);
985 free(iface);
986 if (ret != 0) {
987 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
988 ctdb_addr_to_str(&pip->addr),
989 ctdb_vnn_iface_string(vnn)));
990 talloc_free(state);
991 return -1;
994 /* tell the control that we will be reply asynchronously */
995 *async_reply = true;
996 return 0;
999 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1000 ctdb_sock_addr *addr,
1001 unsigned mask, const char *ifaces,
1002 bool check_address)
1004 struct ctdb_vnn *vnn;
1005 uint32_t num = 0;
1006 char *tmp;
1007 const char *iface;
1008 int i;
1009 int ret;
1011 tmp = strdup(ifaces);
1012 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1013 if (!ctdb_sys_check_iface_exists(iface)) {
1014 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1015 free(tmp);
1016 return -1;
1019 free(tmp);
1021 /* Verify that we don't have an entry for this ip yet */
1022 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1023 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1024 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1025 ctdb_addr_to_str(addr)));
1026 return -1;
1030 /* create a new vnn structure for this ip address */
1031 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1032 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1033 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1034 tmp = talloc_strdup(vnn, ifaces);
1035 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1036 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1037 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1038 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1039 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1040 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1041 num++;
1043 talloc_free(tmp);
1044 vnn->ifaces[num] = NULL;
1045 vnn->public_address = *addr;
1046 vnn->public_netmask_bits = mask;
1047 vnn->pnn = -1;
1048 if (check_address) {
1049 if (ctdb_sys_have_ip(addr)) {
1050 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1051 vnn->pnn = ctdb->pnn;
1055 for (i=0; vnn->ifaces[i]; i++) {
1056 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1057 if (ret != 0) {
1058 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1059 "for public_address[%s]\n",
1060 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1061 talloc_free(vnn);
1062 return -1;
1066 DLIST_ADD(ctdb->vnn, vnn);
1068 return 0;
1072 setup the public address lists from a file
1074 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1076 char **lines;
1077 int nlines;
1078 int i;
1080 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1081 if (lines == NULL) {
1082 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1083 return -1;
1085 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1086 nlines--;
1089 for (i=0;i<nlines;i++) {
1090 unsigned mask;
1091 ctdb_sock_addr addr;
1092 const char *addrstr;
1093 const char *ifaces;
1094 char *tok, *line;
1096 line = lines[i];
1097 while ((*line == ' ') || (*line == '\t')) {
1098 line++;
1100 if (*line == '#') {
1101 continue;
1103 if (strcmp(line, "") == 0) {
1104 continue;
1106 tok = strtok(line, " \t");
1107 addrstr = tok;
1108 tok = strtok(NULL, " \t");
1109 if (tok == NULL) {
1110 if (NULL == ctdb->default_public_interface) {
1111 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1112 i+1));
1113 talloc_free(lines);
1114 return -1;
1116 ifaces = ctdb->default_public_interface;
1117 } else {
1118 ifaces = tok;
1121 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1122 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1123 talloc_free(lines);
1124 return -1;
1126 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1127 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1128 talloc_free(lines);
1129 return -1;
1134 talloc_free(lines);
1135 return 0;
1138 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1139 const char *iface,
1140 const char *ip)
1142 struct ctdb_vnn *svnn;
1143 struct ctdb_interface *cur = NULL;
1144 bool ok;
1145 int ret;
1147 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1148 CTDB_NO_MEMORY(ctdb, svnn);
1150 svnn->ifaces = talloc_array(svnn, const char *, 2);
1151 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1152 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1153 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1154 svnn->ifaces[1] = NULL;
1156 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1157 if (!ok) {
1158 talloc_free(svnn);
1159 return -1;
1162 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1163 if (ret != 0) {
1164 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1165 "for single_ip[%s]\n",
1166 svnn->ifaces[0],
1167 ctdb_addr_to_str(&svnn->public_address)));
1168 talloc_free(svnn);
1169 return -1;
1172 /* assume the single public ip interface is initially "good" */
1173 cur = ctdb_find_iface(ctdb, iface);
1174 if (cur == NULL) {
1175 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1176 return -1;
1178 cur->link_up = true;
1180 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1181 if (ret != 0) {
1182 talloc_free(svnn);
1183 return -1;
1186 ctdb->single_ip_vnn = svnn;
1187 return 0;
1190 static void *add_ip_callback(void *parm, void *data)
1192 struct public_ip_list *this_ip = parm;
1193 struct public_ip_list *prev_ip = data;
1195 if (prev_ip == NULL) {
1196 return parm;
1198 if (this_ip->pnn == -1) {
1199 this_ip->pnn = prev_ip->pnn;
1202 return parm;
1205 static int getips_count_callback(void *param, void *data)
1207 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1208 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1210 new_ip->next = *ip_list;
1211 *ip_list = new_ip;
1212 return 0;
1215 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1216 struct ctdb_public_ip_list_old *ips,
1217 uint32_t pnn);
1219 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1220 struct ipalloc_state *ipalloc_state,
1221 struct ctdb_node_map_old *nodemap)
1223 int j;
1224 int ret;
1226 if (ipalloc_state->num != nodemap->num) {
1227 DEBUG(DEBUG_ERR,
1228 (__location__
1229 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1230 ipalloc_state->num, nodemap->num));
1231 return -1;
1234 for (j=0; j<nodemap->num; j++) {
1235 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1236 continue;
1239 /* Retrieve the list of known public IPs from the node */
1240 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1241 TAKEOVER_TIMEOUT(),
1243 ipalloc_state->known_public_ips,
1245 &ipalloc_state->known_public_ips[j]);
1246 if (ret != 0) {
1247 DEBUG(DEBUG_ERR,
1248 ("Failed to read known public IPs from node: %u\n",
1249 j));
1250 return -1;
1253 if (ctdb->do_checkpublicip) {
1254 verify_remote_ip_allocation(ctdb,
1255 ipalloc_state->known_public_ips[j],
1259 /* Retrieve the list of available public IPs from the node */
1260 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1261 TAKEOVER_TIMEOUT(),
1263 ipalloc_state->available_public_ips,
1264 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1265 &ipalloc_state->available_public_ips[j]);
1266 if (ret != 0) {
1267 DEBUG(DEBUG_ERR,
1268 ("Failed to read available public IPs from node: %u\n",
1269 j));
1270 return -1;
1274 return 0;
1277 static struct public_ip_list *
1278 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1280 int i, j;
1281 struct public_ip_list *ip_list;
1282 struct ctdb_public_ip_list_old *public_ips;
1284 TALLOC_FREE(ctdb->ip_tree);
1285 ctdb->ip_tree = trbt_create(ctdb, 0);
1287 for (i=0; i < ctdb->num_nodes; i++) {
1288 public_ips = ipalloc_state->known_public_ips[i];
1290 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1291 continue;
1294 /* there were no public ips for this node */
1295 if (public_ips == NULL) {
1296 continue;
1299 for (j=0; j < public_ips->num; j++) {
1300 struct public_ip_list *tmp_ip;
1302 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1303 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1304 /* Do not use information about IP addresses hosted
1305 * on other nodes, it may not be accurate */
1306 if (public_ips->ips[j].pnn == ctdb->nodes[i]->pnn) {
1307 tmp_ip->pnn = public_ips->ips[j].pnn;
1308 } else {
1309 tmp_ip->pnn = -1;
1311 tmp_ip->addr = public_ips->ips[j].addr;
1312 tmp_ip->next = NULL;
1314 trbt_insertarray32_callback(ctdb->ip_tree,
1315 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1316 add_ip_callback,
1317 tmp_ip);
1321 ip_list = NULL;
1322 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1324 return ip_list;
1327 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1329 int i;
1331 for (i=0;i<nodemap->num;i++) {
1332 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1333 /* Found one completely healthy node */
1334 return false;
1338 return true;
1341 struct get_tunable_callback_data {
1342 const char *tunable;
1343 uint32_t *out;
1344 bool fatal;
1347 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1348 int32_t res, TDB_DATA outdata,
1349 void *callback)
1351 struct get_tunable_callback_data *cd =
1352 (struct get_tunable_callback_data *)callback;
1353 int size;
1355 if (res != 0) {
1356 /* Already handled in fail callback */
1357 return;
1360 if (outdata.dsize != sizeof(uint32_t)) {
1361 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1362 cd->tunable, pnn, (int)sizeof(uint32_t),
1363 (int)outdata.dsize));
1364 cd->fatal = true;
1365 return;
1368 size = talloc_array_length(cd->out);
1369 if (pnn >= size) {
1370 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1371 cd->tunable, pnn, size));
1372 return;
1376 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1379 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1380 int32_t res, TDB_DATA outdata,
1381 void *callback)
1383 struct get_tunable_callback_data *cd =
1384 (struct get_tunable_callback_data *)callback;
1386 switch (res) {
1387 case -ETIME:
1388 DEBUG(DEBUG_ERR,
1389 ("Timed out getting tunable \"%s\" from node %d\n",
1390 cd->tunable, pnn));
1391 cd->fatal = true;
1392 break;
1393 case -EINVAL:
1394 case -1:
1395 DEBUG(DEBUG_WARNING,
1396 ("Tunable \"%s\" not implemented on node %d\n",
1397 cd->tunable, pnn));
1398 break;
1399 default:
1400 DEBUG(DEBUG_ERR,
1401 ("Unexpected error getting tunable \"%s\" from node %d\n",
1402 cd->tunable, pnn));
1403 cd->fatal = true;
1407 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1408 TALLOC_CTX *tmp_ctx,
1409 struct ctdb_node_map_old *nodemap,
1410 const char *tunable,
1411 uint32_t default_value)
1413 TDB_DATA data;
1414 struct ctdb_control_get_tunable *t;
1415 uint32_t *nodes;
1416 uint32_t *tvals;
1417 struct get_tunable_callback_data callback_data;
1418 int i;
1420 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1421 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1422 for (i=0; i<nodemap->num; i++) {
1423 tvals[i] = default_value;
1426 callback_data.out = tvals;
1427 callback_data.tunable = tunable;
1428 callback_data.fatal = false;
1430 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1431 data.dptr = talloc_size(tmp_ctx, data.dsize);
1432 t = (struct ctdb_control_get_tunable *)data.dptr;
1433 t->length = strlen(tunable)+1;
1434 memcpy(t->name, tunable, t->length);
1435 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1436 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1437 nodes, 0, TAKEOVER_TIMEOUT(),
1438 false, data,
1439 get_tunable_callback,
1440 get_tunable_fail_callback,
1441 &callback_data) != 0) {
1442 if (callback_data.fatal) {
1443 talloc_free(tvals);
1444 tvals = NULL;
1447 talloc_free(nodes);
1448 talloc_free(data.dptr);
1450 return tvals;
1453 /* Set internal flags for IP allocation:
1454 * Clear ip flags
1455 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1456 * Set NOIPHOST ip flag for each INACTIVE node
1457 * if all nodes are disabled:
1458 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1459 * else
1460 * Set NOIPHOST ip flags for disabled nodes
1462 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1463 struct ctdb_node_map_old *nodemap,
1464 uint32_t *tval_noiptakeover,
1465 uint32_t *tval_noiphostonalldisabled)
1467 int i;
1469 for (i=0;i<nodemap->num;i++) {
1470 /* Can not take IPs on node with NoIPTakeover set */
1471 if (tval_noiptakeover[i] != 0) {
1472 ipalloc_state->noiptakeover[i] = true;
1475 /* Can not host IPs on INACTIVE node */
1476 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1477 ipalloc_state->noiphost[i] = true;
1481 if (all_nodes_are_disabled(nodemap)) {
1482 /* If all nodes are disabled, can not host IPs on node
1483 * with NoIPHostOnAllDisabled set
1485 for (i=0;i<nodemap->num;i++) {
1486 if (tval_noiphostonalldisabled[i] != 0) {
1487 ipalloc_state->noiphost[i] = true;
1490 } else {
1491 /* If some nodes are not disabled, then can not host
1492 * IPs on DISABLED node
1494 for (i=0;i<nodemap->num;i++) {
1495 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1496 ipalloc_state->noiphost[i] = true;
1502 static bool set_ipflags(struct ctdb_context *ctdb,
1503 struct ipalloc_state *ipalloc_state,
1504 struct ctdb_node_map_old *nodemap)
1506 uint32_t *tval_noiptakeover;
1507 uint32_t *tval_noiphostonalldisabled;
1509 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1510 "NoIPTakeover", 0);
1511 if (tval_noiptakeover == NULL) {
1512 return false;
1515 tval_noiphostonalldisabled =
1516 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1517 "NoIPHostOnAllDisabled", 0);
1518 if (tval_noiphostonalldisabled == NULL) {
1519 /* Caller frees tmp_ctx */
1520 return false;
1523 set_ipflags_internal(ipalloc_state, nodemap,
1524 tval_noiptakeover,
1525 tval_noiphostonalldisabled);
1527 talloc_free(tval_noiptakeover);
1528 talloc_free(tval_noiphostonalldisabled);
1530 return true;
1533 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1534 TALLOC_CTX *mem_ctx)
1536 struct ipalloc_state *ipalloc_state =
1537 talloc_zero(mem_ctx, struct ipalloc_state);
1538 if (ipalloc_state == NULL) {
1539 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1540 return NULL;
1543 ipalloc_state->num = ctdb->num_nodes;
1544 ipalloc_state->known_public_ips =
1545 talloc_zero_array(ipalloc_state,
1546 struct ctdb_public_ip_list_old *,
1547 ipalloc_state->num);
1548 if (ipalloc_state->known_public_ips == NULL) {
1549 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1550 talloc_free(ipalloc_state);
1551 return NULL;
1553 ipalloc_state->available_public_ips =
1554 talloc_zero_array(ipalloc_state,
1555 struct ctdb_public_ip_list_old *,
1556 ipalloc_state->num);
1557 if (ipalloc_state->available_public_ips == NULL) {
1558 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1559 talloc_free(ipalloc_state);
1560 return NULL;
1562 ipalloc_state->noiptakeover =
1563 talloc_zero_array(ipalloc_state,
1564 bool,
1565 ipalloc_state->num);
1566 if (ipalloc_state->noiptakeover == NULL) {
1567 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1568 talloc_free(ipalloc_state);
1569 return NULL;
1571 ipalloc_state->noiphost =
1572 talloc_zero_array(ipalloc_state,
1573 bool,
1574 ipalloc_state->num);
1575 if (ipalloc_state->noiphost == NULL) {
1576 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1577 talloc_free(ipalloc_state);
1578 return NULL;
1581 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1582 ipalloc_state->algorithm = IPALLOC_LCP2;
1583 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1584 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1585 } else {
1586 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1589 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1591 return ipalloc_state;
1594 struct iprealloc_callback_data {
1595 bool *retry_nodes;
1596 int retry_count;
1597 client_async_callback fail_callback;
1598 void *fail_callback_data;
1599 struct ctdb_node_map_old *nodemap;
1602 static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1603 int32_t res, TDB_DATA outdata,
1604 void *callback)
1606 int numnodes;
1607 struct iprealloc_callback_data *cd =
1608 (struct iprealloc_callback_data *)callback;
1610 numnodes = talloc_array_length(cd->retry_nodes);
1611 if (pnn > numnodes) {
1612 DEBUG(DEBUG_ERR,
1613 ("ipreallocated failure from node %d, "
1614 "but only %d nodes in nodemap\n",
1615 pnn, numnodes));
1616 return;
1619 /* Can't run the "ipreallocated" event on a INACTIVE node */
1620 if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
1621 DEBUG(DEBUG_WARNING,
1622 ("ipreallocated failed on inactive node %d, ignoring\n",
1623 pnn));
1624 return;
1627 switch (res) {
1628 case -ETIME:
1629 /* If the control timed out then that's a real error,
1630 * so call the real fail callback
1632 if (cd->fail_callback) {
1633 cd->fail_callback(ctdb, pnn, res, outdata,
1634 cd->fail_callback_data);
1635 } else {
1636 DEBUG(DEBUG_WARNING,
1637 ("iprealloc timed out but no callback registered\n"));
1639 break;
1640 default:
1641 /* If not a timeout then either the ipreallocated
1642 * eventscript (or some setup) failed. This might
1643 * have failed because the IPREALLOCATED control isn't
1644 * implemented - right now there is no way of knowing
1645 * because the error codes are all folded down to -1.
1646 * Consider retrying using EVENTSCRIPT control...
1648 DEBUG(DEBUG_WARNING,
1649 ("ipreallocated failure from node %d, flagging retry\n",
1650 pnn));
1651 cd->retry_nodes[pnn] = true;
1652 cd->retry_count++;
1656 struct takeover_callback_data {
1657 bool *node_failed;
1658 client_async_callback fail_callback;
1659 void *fail_callback_data;
1660 struct ctdb_node_map_old *nodemap;
1663 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1664 uint32_t node_pnn, int32_t res,
1665 TDB_DATA outdata, void *callback_data)
1667 struct takeover_callback_data *cd =
1668 talloc_get_type_abort(callback_data,
1669 struct takeover_callback_data);
1670 int i;
1672 for (i = 0; i < cd->nodemap->num; i++) {
1673 if (node_pnn == cd->nodemap->nodes[i].pnn) {
1674 break;
1678 if (i == cd->nodemap->num) {
1679 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1680 return;
1683 if (!cd->node_failed[i]) {
1684 cd->node_failed[i] = true;
1685 cd->fail_callback(ctdb, node_pnn, res, outdata,
1686 cd->fail_callback_data);
1691 * Recalculate the allocation of public IPs to nodes and have the
1692 * nodes host their allocated addresses.
1694 * - Allocate memory for IP allocation state, including per node
1695 * arrays
1696 * - Populate IP allocation algorithm in IP allocation state
1697 * - Populate local value of tunable NoIPFailback in IP allocation
1698 state - this is really a cluster-wide configuration variable and
1699 only the value form the master node is used
1700 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1701 * connected nodes - this is done separately so tunable values can
1702 * be faked in unit testing
1703 * - Populate NoIPTakover tunable in IP allocation state
1704 * - Populate NoIPHost in IP allocation state, derived from node flags
1705 * and NoIPHostOnAllDisabled tunable
1706 * - Retrieve and populate known and available IP lists in IP
1707 * allocation state
1708 * - If no available IP addresses then early exit
1709 * - Build list of (known IPs, currently assigned node)
1710 * - Populate list of nodes to force rebalance - internal structure,
1711 * currently no way to fetch, only used by LCP2 for nodes that have
1712 * had new IP addresses added
1713 * - Run IP allocation algorithm
1714 * - Send RELEASE_IP to all nodes for IPs they should not host
1715 * - Send TAKE_IP to all nodes for IPs they should host
1716 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1718 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1719 uint32_t *force_rebalance_nodes,
1720 client_async_callback fail_callback, void *callback_data)
1722 int i, j, ret;
1723 struct ctdb_public_ip ip;
1724 uint32_t *nodes;
1725 struct public_ip_list *all_ips, *tmp_ip;
1726 TDB_DATA data;
1727 struct timeval timeout;
1728 struct client_async_data *async_data;
1729 struct ctdb_client_control_state *state;
1730 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1731 struct ipalloc_state *ipalloc_state;
1732 struct takeover_callback_data *takeover_data;
1733 struct iprealloc_callback_data iprealloc_data;
1734 bool *retry_data;
1735 bool can_host_ips;
1738 * ip failover is completely disabled, just send out the
1739 * ipreallocated event.
1741 if (ctdb->tunable.disable_ip_failover != 0) {
1742 goto ipreallocated;
1745 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1746 if (ipalloc_state == NULL) {
1747 talloc_free(tmp_ctx);
1748 return -1;
1751 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1752 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1753 talloc_free(tmp_ctx);
1754 return -1;
1757 /* Fetch known/available public IPs from each active node */
1758 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1759 if (ret != 0) {
1760 talloc_free(tmp_ctx);
1761 return -1;
1764 /* Short-circuit IP allocation if no node has available IPs */
1765 can_host_ips = false;
1766 for (i=0; i < ipalloc_state->num; i++) {
1767 if (ipalloc_state->available_public_ips[i] != NULL) {
1768 can_host_ips = true;
1771 if (!can_host_ips) {
1772 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1773 return 0;
1776 /* since nodes only know about those public addresses that
1777 can be served by that particular node, no single node has
1778 a full list of all public addresses that exist in the cluster.
1779 Walk over all node structures and create a merged list of
1780 all public addresses that exist in the cluster.
1782 keep the tree of ips around as ctdb->ip_tree
1784 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1785 ipalloc_state->all_ips = all_ips;
1787 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1789 /* Do the IP reassignment calculations */
1790 ipalloc(ipalloc_state);
1792 /* Now tell all nodes to release any public IPs should not
1793 * host. This will be a NOOP on nodes that don't currently
1794 * hold the given IP.
1796 takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
1797 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
1799 takeover_data->node_failed = talloc_zero_array(tmp_ctx,
1800 bool, nodemap->num);
1801 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
1802 takeover_data->fail_callback = fail_callback;
1803 takeover_data->fail_callback_data = callback_data;
1804 takeover_data->nodemap = nodemap;
1806 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1807 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1809 async_data->fail_callback = takeover_run_fail_callback;
1810 async_data->callback_data = takeover_data;
1812 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1814 /* Send a RELEASE_IP to all nodes that should not be hosting
1815 * each IP. For each IP, all but one of these will be
1816 * redundant. However, the redundant ones are used to tell
1817 * nodes which node should be hosting the IP so that commands
1818 * like "ctdb ip" can display a particular nodes idea of who
1819 * is hosting what. */
1820 for (i=0;i<nodemap->num;i++) {
1821 /* don't talk to unconnected nodes, but do talk to banned nodes */
1822 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1823 continue;
1826 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1827 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1828 /* This node should be serving this
1829 vnn so don't tell it to release the ip
1831 continue;
1833 ip.pnn = tmp_ip->pnn;
1834 ip.addr = tmp_ip->addr;
1836 timeout = TAKEOVER_TIMEOUT();
1837 data.dsize = sizeof(ip);
1838 data.dptr = (uint8_t *)&ip;
1839 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1840 0, CTDB_CONTROL_RELEASE_IP, 0,
1841 data, async_data,
1842 &timeout, NULL);
1843 if (state == NULL) {
1844 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1845 talloc_free(tmp_ctx);
1846 return -1;
1849 ctdb_client_async_add(async_data, state);
1852 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1853 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1854 talloc_free(tmp_ctx);
1855 return -1;
1857 talloc_free(async_data);
1860 /* For each IP, send a TAKOVER_IP to the node that should be
1861 * hosting it. Many of these will often be redundant (since
1862 * the allocation won't have changed) but they can be useful
1863 * to recover from inconsistencies. */
1864 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1865 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1867 async_data->fail_callback = fail_callback;
1868 async_data->callback_data = callback_data;
1870 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1871 if (tmp_ip->pnn == -1) {
1872 /* this IP won't be taken over */
1873 continue;
1876 ip.pnn = tmp_ip->pnn;
1877 ip.addr = tmp_ip->addr;
1879 timeout = TAKEOVER_TIMEOUT();
1880 data.dsize = sizeof(ip);
1881 data.dptr = (uint8_t *)&ip;
1882 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1883 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1884 data, async_data, &timeout, NULL);
1885 if (state == NULL) {
1886 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1887 talloc_free(tmp_ctx);
1888 return -1;
1891 ctdb_client_async_add(async_data, state);
1893 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1894 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1895 talloc_free(tmp_ctx);
1896 return -1;
1899 ipreallocated:
1901 * Tell all nodes to run eventscripts to process the
1902 * "ipreallocated" event. This can do a lot of things,
1903 * including restarting services to reconfigure them if public
1904 * IPs have moved. Once upon a time this event only used to
1905 * update natgw.
1907 retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
1908 CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
1909 iprealloc_data.retry_nodes = retry_data;
1910 iprealloc_data.retry_count = 0;
1911 iprealloc_data.fail_callback = fail_callback;
1912 iprealloc_data.fail_callback_data = callback_data;
1913 iprealloc_data.nodemap = nodemap;
1915 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1916 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1917 nodes, 0, TAKEOVER_TIMEOUT(),
1918 false, tdb_null,
1919 NULL, iprealloc_fail_callback,
1920 &iprealloc_data);
1921 if (ret != 0) {
1922 /* If the control failed then we should retry to any
1923 * nodes flagged by iprealloc_fail_callback using the
1924 * EVENTSCRIPT control. This is a best-effort at
1925 * backward compatiblity when running a mixed cluster
1926 * where some nodes have not yet been upgraded to
1927 * support the IPREALLOCATED control.
1929 DEBUG(DEBUG_WARNING,
1930 ("Retry ipreallocated to some nodes using eventscript control\n"));
1932 nodes = talloc_array(tmp_ctx, uint32_t,
1933 iprealloc_data.retry_count);
1934 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
1936 j = 0;
1937 for (i=0; i<nodemap->num; i++) {
1938 if (iprealloc_data.retry_nodes[i]) {
1939 nodes[j] = i;
1940 j++;
1944 data.dptr = discard_const("ipreallocated");
1945 data.dsize = strlen((char *)data.dptr) + 1;
1946 ret = ctdb_client_async_control(ctdb,
1947 CTDB_CONTROL_RUN_EVENTSCRIPTS,
1948 nodes, 0, TAKEOVER_TIMEOUT(),
1949 false, data,
1950 NULL, fail_callback,
1951 callback_data);
1952 if (ret != 0) {
1953 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
1957 talloc_free(tmp_ctx);
1958 return ret;
1963 destroy a ctdb_client_ip structure
1965 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1967 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1968 ctdb_addr_to_str(&ip->addr),
1969 ntohs(ip->addr.ip.sin_port),
1970 ip->client_id));
1972 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1973 return 0;
1977 called by a client to inform us of a TCP connection that it is managing
1978 that should tickled with an ACK when IP takeover is done
1980 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1981 TDB_DATA indata)
1983 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1984 struct ctdb_connection *tcp_sock = NULL;
1985 struct ctdb_tcp_list *tcp;
1986 struct ctdb_connection t;
1987 int ret;
1988 TDB_DATA data;
1989 struct ctdb_client_ip *ip;
1990 struct ctdb_vnn *vnn;
1991 ctdb_sock_addr addr;
1993 /* If we don't have public IPs, tickles are useless */
1994 if (ctdb->vnn == NULL) {
1995 return 0;
1998 tcp_sock = (struct ctdb_connection *)indata.dptr;
2000 addr = tcp_sock->src;
2001 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2002 addr = tcp_sock->dst;
2003 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
2005 ZERO_STRUCT(addr);
2006 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
2007 vnn = find_public_ip_vnn(ctdb, &addr);
2008 if (vnn == NULL) {
2009 switch (addr.sa.sa_family) {
2010 case AF_INET:
2011 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2012 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2013 ctdb_addr_to_str(&addr)));
2015 break;
2016 case AF_INET6:
2017 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2018 ctdb_addr_to_str(&addr)));
2019 break;
2020 default:
2021 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2024 return 0;
2027 if (vnn->pnn != ctdb->pnn) {
2028 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2029 ctdb_addr_to_str(&addr),
2030 client_id, client->pid));
2031 /* failing this call will tell smbd to die */
2032 return -1;
2035 ip = talloc(client, struct ctdb_client_ip);
2036 CTDB_NO_MEMORY(ctdb, ip);
2038 ip->ctdb = ctdb;
2039 ip->addr = addr;
2040 ip->client_id = client_id;
2041 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2042 DLIST_ADD(ctdb->client_ip_list, ip);
2044 tcp = talloc(client, struct ctdb_tcp_list);
2045 CTDB_NO_MEMORY(ctdb, tcp);
2047 tcp->connection.src = tcp_sock->src;
2048 tcp->connection.dst = tcp_sock->dst;
2050 DLIST_ADD(client->tcp_list, tcp);
2052 t.src = tcp_sock->src;
2053 t.dst = tcp_sock->dst;
2055 data.dptr = (uint8_t *)&t;
2056 data.dsize = sizeof(t);
2058 switch (addr.sa.sa_family) {
2059 case AF_INET:
2060 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2061 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
2062 ctdb_addr_to_str(&tcp_sock->src),
2063 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2064 break;
2065 case AF_INET6:
2066 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2067 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
2068 ctdb_addr_to_str(&tcp_sock->src),
2069 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2070 break;
2071 default:
2072 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2076 /* tell all nodes about this tcp connection */
2077 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2078 CTDB_CONTROL_TCP_ADD,
2079 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2080 if (ret != 0) {
2081 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2082 return -1;
2085 return 0;
2089 find a tcp address on a list
2091 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2092 struct ctdb_connection *tcp)
2094 int i;
2096 if (array == NULL) {
2097 return NULL;
2100 for (i=0;i<array->num;i++) {
2101 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2102 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2103 return &array->connections[i];
2106 return NULL;
2112 called by a daemon to inform us of a TCP connection that one of its
2113 clients managing that should tickled with an ACK when IP takeover is
2114 done
2116 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2118 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2119 struct ctdb_tcp_array *tcparray;
2120 struct ctdb_connection tcp;
2121 struct ctdb_vnn *vnn;
2123 /* If we don't have public IPs, tickles are useless */
2124 if (ctdb->vnn == NULL) {
2125 return 0;
2128 vnn = find_public_ip_vnn(ctdb, &p->dst);
2129 if (vnn == NULL) {
2130 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2131 ctdb_addr_to_str(&p->dst)));
2133 return -1;
2137 tcparray = vnn->tcp_array;
2139 /* If this is the first tickle */
2140 if (tcparray == NULL) {
2141 tcparray = talloc(vnn, struct ctdb_tcp_array);
2142 CTDB_NO_MEMORY(ctdb, tcparray);
2143 vnn->tcp_array = tcparray;
2145 tcparray->num = 0;
2146 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2147 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2149 tcparray->connections[tcparray->num].src = p->src;
2150 tcparray->connections[tcparray->num].dst = p->dst;
2151 tcparray->num++;
2153 if (tcp_update_needed) {
2154 vnn->tcp_update_needed = true;
2156 return 0;
2160 /* Do we already have this tickle ?*/
2161 tcp.src = p->src;
2162 tcp.dst = p->dst;
2163 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2164 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2165 ctdb_addr_to_str(&tcp.dst),
2166 ntohs(tcp.dst.ip.sin_port),
2167 vnn->pnn));
2168 return 0;
2171 /* A new tickle, we must add it to the array */
2172 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2173 struct ctdb_connection,
2174 tcparray->num+1);
2175 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2177 tcparray->connections[tcparray->num].src = p->src;
2178 tcparray->connections[tcparray->num].dst = p->dst;
2179 tcparray->num++;
2181 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2182 ctdb_addr_to_str(&tcp.dst),
2183 ntohs(tcp.dst.ip.sin_port),
2184 vnn->pnn));
2186 if (tcp_update_needed) {
2187 vnn->tcp_update_needed = true;
2190 return 0;
2194 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2196 struct ctdb_connection *tcpp;
2198 if (vnn == NULL) {
2199 return;
2202 /* if the array is empty we cant remove it
2203 and we don't need to do anything
2205 if (vnn->tcp_array == NULL) {
2206 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2207 ctdb_addr_to_str(&conn->dst),
2208 ntohs(conn->dst.ip.sin_port)));
2209 return;
2213 /* See if we know this connection
2214 if we don't know this connection then we dont need to do anything
2216 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2217 if (tcpp == NULL) {
2218 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2219 ctdb_addr_to_str(&conn->dst),
2220 ntohs(conn->dst.ip.sin_port)));
2221 return;
2225 /* We need to remove this entry from the array.
2226 Instead of allocating a new array and copying data to it
2227 we cheat and just copy the last entry in the existing array
2228 to the entry that is to be removed and just shring the
2229 ->num field
2231 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2232 vnn->tcp_array->num--;
2234 /* If we deleted the last entry we also need to remove the entire array
2236 if (vnn->tcp_array->num == 0) {
2237 talloc_free(vnn->tcp_array);
2238 vnn->tcp_array = NULL;
2241 vnn->tcp_update_needed = true;
2243 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2244 ctdb_addr_to_str(&conn->src),
2245 ntohs(conn->src.ip.sin_port)));
2250 called by a daemon to inform us of a TCP connection that one of its
2251 clients used are no longer needed in the tickle database
2253 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2255 struct ctdb_vnn *vnn;
2256 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2258 /* If we don't have public IPs, tickles are useless */
2259 if (ctdb->vnn == NULL) {
2260 return 0;
2263 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2264 if (vnn == NULL) {
2265 DEBUG(DEBUG_ERR,
2266 (__location__ " unable to find public address %s\n",
2267 ctdb_addr_to_str(&conn->dst)));
2268 return 0;
2271 ctdb_remove_connection(vnn, conn);
2273 return 0;
2278 Called when another daemon starts - causes all tickles for all
2279 public addresses we are serving to be sent to the new node on the
2280 next check. This actually causes the next scheduled call to
2281 tdb_update_tcp_tickles() to update all nodes. This is simple and
2282 doesn't require careful error handling.
2284 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2286 struct ctdb_vnn *vnn;
2288 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2289 (unsigned long) pnn));
2291 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2292 vnn->tcp_update_needed = true;
2295 return 0;
2300 called when a client structure goes away - hook to remove
2301 elements from the tcp_list in all daemons
2303 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2305 while (client->tcp_list) {
2306 struct ctdb_vnn *vnn;
2307 struct ctdb_tcp_list *tcp = client->tcp_list;
2308 struct ctdb_connection *conn = &tcp->connection;
2310 DLIST_REMOVE(client->tcp_list, tcp);
2312 vnn = find_public_ip_vnn(client->ctdb,
2313 &conn->dst);
2314 if (vnn == NULL) {
2315 DEBUG(DEBUG_ERR,
2316 (__location__ " unable to find public address %s\n",
2317 ctdb_addr_to_str(&conn->dst)));
2318 continue;
2321 /* If the IP address is hosted on this node then
2322 * remove the connection. */
2323 if (vnn->pnn == client->ctdb->pnn) {
2324 ctdb_remove_connection(vnn, conn);
2327 /* Otherwise this function has been called because the
2328 * server IP address has been released to another node
2329 * and the client has exited. This means that we
2330 * should not delete the connection information. The
2331 * takeover node processes connections too. */
2336 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2338 struct ctdb_vnn *vnn;
2339 int count = 0;
2341 if (ctdb->tunable.disable_ip_failover == 1) {
2342 return;
2345 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2346 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2347 ctdb_vnn_unassign_iface(ctdb, vnn);
2348 continue;
2350 if (!vnn->iface) {
2351 continue;
2354 /* Don't allow multiple releases at once. Some code,
2355 * particularly ctdb_tickle_sentenced_connections() is
2356 * not re-entrant */
2357 if (vnn->update_in_flight) {
2358 DEBUG(DEBUG_WARNING,
2359 (__location__
2360 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2361 ctdb_addr_to_str(&vnn->public_address),
2362 vnn->public_netmask_bits,
2363 ctdb_vnn_iface_string(vnn)));
2364 continue;
2366 vnn->update_in_flight = true;
2368 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2369 ctdb_addr_to_str(&vnn->public_address),
2370 vnn->public_netmask_bits,
2371 ctdb_vnn_iface_string(vnn)));
2373 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2374 ctdb_vnn_iface_string(vnn),
2375 ctdb_addr_to_str(&vnn->public_address),
2376 vnn->public_netmask_bits);
2377 ctdb_vnn_unassign_iface(ctdb, vnn);
2378 vnn->update_in_flight = false;
2379 count++;
2382 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2387 get list of public IPs
2389 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2390 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2392 int i, num, len;
2393 struct ctdb_public_ip_list_old *ips;
2394 struct ctdb_vnn *vnn;
2395 bool only_available = false;
2397 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2398 only_available = true;
2401 /* count how many public ip structures we have */
2402 num = 0;
2403 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2404 num++;
2407 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2408 num*sizeof(struct ctdb_public_ip);
2409 ips = talloc_zero_size(outdata, len);
2410 CTDB_NO_MEMORY(ctdb, ips);
2412 i = 0;
2413 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2414 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2415 continue;
2417 ips->ips[i].pnn = vnn->pnn;
2418 ips->ips[i].addr = vnn->public_address;
2419 i++;
2421 ips->num = i;
2422 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2423 i*sizeof(struct ctdb_public_ip);
2425 outdata->dsize = len;
2426 outdata->dptr = (uint8_t *)ips;
2428 return 0;
2432 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2433 struct ctdb_req_control_old *c,
2434 TDB_DATA indata,
2435 TDB_DATA *outdata)
2437 int i, num, len;
2438 ctdb_sock_addr *addr;
2439 struct ctdb_public_ip_info_old *info;
2440 struct ctdb_vnn *vnn;
2442 addr = (ctdb_sock_addr *)indata.dptr;
2444 vnn = find_public_ip_vnn(ctdb, addr);
2445 if (vnn == NULL) {
2446 /* if it is not a public ip it could be our 'single ip' */
2447 if (ctdb->single_ip_vnn) {
2448 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2449 vnn = ctdb->single_ip_vnn;
2453 if (vnn == NULL) {
2454 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2455 "'%s'not a public address\n",
2456 ctdb_addr_to_str(addr)));
2457 return -1;
2460 /* count how many public ip structures we have */
2461 num = 0;
2462 for (;vnn->ifaces[num];) {
2463 num++;
2466 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2467 num*sizeof(struct ctdb_iface);
2468 info = talloc_zero_size(outdata, len);
2469 CTDB_NO_MEMORY(ctdb, info);
2471 info->ip.addr = vnn->public_address;
2472 info->ip.pnn = vnn->pnn;
2473 info->active_idx = 0xFFFFFFFF;
2475 for (i=0; vnn->ifaces[i]; i++) {
2476 struct ctdb_interface *cur;
2478 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2479 if (cur == NULL) {
2480 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2481 vnn->ifaces[i]));
2482 return -1;
2484 if (vnn->iface == cur) {
2485 info->active_idx = i;
2487 strncpy(info->ifaces[i].name, cur->name, sizeof(info->ifaces[i].name)-1);
2488 info->ifaces[i].link_state = cur->link_up;
2489 info->ifaces[i].references = cur->references;
2491 info->num = i;
2492 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2493 i*sizeof(struct ctdb_iface);
2495 outdata->dsize = len;
2496 outdata->dptr = (uint8_t *)info;
2498 return 0;
2501 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2502 struct ctdb_req_control_old *c,
2503 TDB_DATA *outdata)
2505 int i, num, len;
2506 struct ctdb_iface_list_old *ifaces;
2507 struct ctdb_interface *cur;
2509 /* count how many public ip structures we have */
2510 num = 0;
2511 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2512 num++;
2515 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2516 num*sizeof(struct ctdb_iface);
2517 ifaces = talloc_zero_size(outdata, len);
2518 CTDB_NO_MEMORY(ctdb, ifaces);
2520 i = 0;
2521 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2522 strcpy(ifaces->ifaces[i].name, cur->name);
2523 ifaces->ifaces[i].link_state = cur->link_up;
2524 ifaces->ifaces[i].references = cur->references;
2525 i++;
2527 ifaces->num = i;
2528 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2529 i*sizeof(struct ctdb_iface);
2531 outdata->dsize = len;
2532 outdata->dptr = (uint8_t *)ifaces;
2534 return 0;
2537 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2538 struct ctdb_req_control_old *c,
2539 TDB_DATA indata)
2541 struct ctdb_iface *info;
2542 struct ctdb_interface *iface;
2543 bool link_up = false;
2545 info = (struct ctdb_iface *)indata.dptr;
2547 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2548 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2549 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2550 len, len, info->name));
2551 return -1;
2554 switch (info->link_state) {
2555 case 0:
2556 link_up = false;
2557 break;
2558 case 1:
2559 link_up = true;
2560 break;
2561 default:
2562 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2563 (unsigned int)info->link_state));
2564 return -1;
2567 if (info->references != 0) {
2568 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2569 (unsigned int)info->references));
2570 return -1;
2573 iface = ctdb_find_iface(ctdb, info->name);
2574 if (iface == NULL) {
2575 return -1;
2578 if (link_up == iface->link_up) {
2579 return 0;
2582 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2583 ("iface[%s] has changed it's link status %s => %s\n",
2584 iface->name,
2585 iface->link_up?"up":"down",
2586 link_up?"up":"down"));
2588 iface->link_up = link_up;
2589 return 0;
2594 structure containing the listening socket and the list of tcp connections
2595 that the ctdb daemon is to kill
2597 struct ctdb_kill_tcp {
2598 struct ctdb_vnn *vnn;
2599 struct ctdb_context *ctdb;
2600 int capture_fd;
2601 struct tevent_fd *fde;
2602 trbt_tree_t *connections;
2603 void *private_data;
2607 a tcp connection that is to be killed
2609 struct ctdb_killtcp_con {
2610 ctdb_sock_addr src_addr;
2611 ctdb_sock_addr dst_addr;
2612 int count;
2613 struct ctdb_kill_tcp *killtcp;
2616 /* this function is used to create a key to represent this socketpair
2617 in the killtcp tree.
2618 this key is used to insert and lookup matching socketpairs that are
2619 to be tickled and RST
2621 #define KILLTCP_KEYLEN 10
2622 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2624 static uint32_t key[KILLTCP_KEYLEN];
2626 bzero(key, sizeof(key));
2628 if (src->sa.sa_family != dst->sa.sa_family) {
2629 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2630 return key;
2633 switch (src->sa.sa_family) {
2634 case AF_INET:
2635 key[0] = dst->ip.sin_addr.s_addr;
2636 key[1] = src->ip.sin_addr.s_addr;
2637 key[2] = dst->ip.sin_port;
2638 key[3] = src->ip.sin_port;
2639 break;
2640 case AF_INET6: {
2641 uint32_t *dst6_addr32 =
2642 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
2643 uint32_t *src6_addr32 =
2644 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
2645 key[0] = dst6_addr32[3];
2646 key[1] = src6_addr32[3];
2647 key[2] = dst6_addr32[2];
2648 key[3] = src6_addr32[2];
2649 key[4] = dst6_addr32[1];
2650 key[5] = src6_addr32[1];
2651 key[6] = dst6_addr32[0];
2652 key[7] = src6_addr32[0];
2653 key[8] = dst->ip6.sin6_port;
2654 key[9] = src->ip6.sin6_port;
2655 break;
2657 default:
2658 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2659 return key;
2662 return key;
2666 called when we get a read event on the raw socket
2668 static void capture_tcp_handler(struct tevent_context *ev,
2669 struct tevent_fd *fde,
2670 uint16_t flags, void *private_data)
2672 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2673 struct ctdb_killtcp_con *con;
2674 ctdb_sock_addr src, dst;
2675 uint32_t ack_seq, seq;
2677 if (!(flags & TEVENT_FD_READ)) {
2678 return;
2681 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2682 killtcp->private_data,
2683 &src, &dst,
2684 &ack_seq, &seq) != 0) {
2685 /* probably a non-tcp ACK packet */
2686 return;
2689 /* check if we have this guy in our list of connections
2690 to kill
2692 con = trbt_lookuparray32(killtcp->connections,
2693 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2694 if (con == NULL) {
2695 /* no this was some other packet we can just ignore */
2696 return;
2699 /* This one has been tickled !
2700 now reset him and remove him from the list.
2702 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2703 ntohs(con->dst_addr.ip.sin_port),
2704 ctdb_addr_to_str(&con->src_addr),
2705 ntohs(con->src_addr.ip.sin_port)));
2707 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2708 talloc_free(con);
2712 /* when traversing the list of all tcp connections to send tickle acks to
2713 (so that we can capture the ack coming back and kill the connection
2714 by a RST)
2715 this callback is called for each connection we are currently trying to kill
2717 static int tickle_connection_traverse(void *param, void *data)
2719 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2721 /* have tried too many times, just give up */
2722 if (con->count >= 5) {
2723 /* can't delete in traverse: reparent to delete_cons */
2724 talloc_steal(param, con);
2725 return 0;
2728 /* othervise, try tickling it again */
2729 con->count++;
2730 ctdb_sys_send_tcp(
2731 (ctdb_sock_addr *)&con->dst_addr,
2732 (ctdb_sock_addr *)&con->src_addr,
2733 0, 0, 0);
2734 return 0;
2739 called every second until all sentenced connections have been reset
2741 static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
2742 struct tevent_timer *te,
2743 struct timeval t, void *private_data)
2745 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2746 void *delete_cons = talloc_new(NULL);
2748 /* loop over all connections sending tickle ACKs */
2749 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2751 /* now we've finished traverse, it's safe to do deletion. */
2752 talloc_free(delete_cons);
2754 /* If there are no more connections to kill we can remove the
2755 entire killtcp structure
2757 if ( (killtcp->connections == NULL) ||
2758 (killtcp->connections->root == NULL) ) {
2759 talloc_free(killtcp);
2760 return;
2763 /* try tickling them again in a seconds time
2765 tevent_add_timer(killtcp->ctdb->ev, killtcp,
2766 timeval_current_ofs(1, 0),
2767 ctdb_tickle_sentenced_connections, killtcp);
2771 destroy the killtcp structure
2773 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2775 struct ctdb_vnn *tmpvnn;
2777 /* verify that this vnn is still active */
2778 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
2779 if (tmpvnn == killtcp->vnn) {
2780 break;
2784 if (tmpvnn == NULL) {
2785 return 0;
2788 if (killtcp->vnn->killtcp != killtcp) {
2789 return 0;
2792 killtcp->vnn->killtcp = NULL;
2794 return 0;
2798 /* nothing fancy here, just unconditionally replace any existing
2799 connection structure with the new one.
2801 don't even free the old one if it did exist, that one is talloc_stolen
2802 by the same node in the tree anyway and will be deleted when the new data
2803 is deleted
2805 static void *add_killtcp_callback(void *parm, void *data)
2807 return parm;
2811 add a tcp socket to the list of connections we want to RST
2813 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2814 ctdb_sock_addr *s,
2815 ctdb_sock_addr *d)
2817 ctdb_sock_addr src, dst;
2818 struct ctdb_kill_tcp *killtcp;
2819 struct ctdb_killtcp_con *con;
2820 struct ctdb_vnn *vnn;
2822 ctdb_canonicalize_ip(s, &src);
2823 ctdb_canonicalize_ip(d, &dst);
2825 vnn = find_public_ip_vnn(ctdb, &dst);
2826 if (vnn == NULL) {
2827 vnn = find_public_ip_vnn(ctdb, &src);
2829 if (vnn == NULL) {
2830 /* if it is not a public ip it could be our 'single ip' */
2831 if (ctdb->single_ip_vnn) {
2832 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2833 vnn = ctdb->single_ip_vnn;
2837 if (vnn == NULL) {
2838 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2839 return -1;
2842 killtcp = vnn->killtcp;
2844 /* If this is the first connection to kill we must allocate
2845 a new structure
2847 if (killtcp == NULL) {
2848 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
2849 CTDB_NO_MEMORY(ctdb, killtcp);
2851 killtcp->vnn = vnn;
2852 killtcp->ctdb = ctdb;
2853 killtcp->capture_fd = -1;
2854 killtcp->connections = trbt_create(killtcp, 0);
2856 vnn->killtcp = killtcp;
2857 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2862 /* create a structure that describes this connection we want to
2863 RST and store it in killtcp->connections
2865 con = talloc(killtcp, struct ctdb_killtcp_con);
2866 CTDB_NO_MEMORY(ctdb, con);
2867 con->src_addr = src;
2868 con->dst_addr = dst;
2869 con->count = 0;
2870 con->killtcp = killtcp;
2873 trbt_insertarray32_callback(killtcp->connections,
2874 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2875 add_killtcp_callback, con);
2878 If we don't have a socket to listen on yet we must create it
2880 if (killtcp->capture_fd == -1) {
2881 const char *iface = ctdb_vnn_iface_string(vnn);
2882 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2883 if (killtcp->capture_fd == -1) {
2884 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2885 "socket on iface '%s' for killtcp (%s)\n",
2886 iface, strerror(errno)));
2887 goto failed;
2892 if (killtcp->fde == NULL) {
2893 killtcp->fde = tevent_add_fd(ctdb->ev, killtcp,
2894 killtcp->capture_fd,
2895 TEVENT_FD_READ,
2896 capture_tcp_handler, killtcp);
2897 tevent_fd_set_auto_close(killtcp->fde);
2899 /* We also need to set up some events to tickle all these connections
2900 until they are all reset
2902 tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2903 ctdb_tickle_sentenced_connections, killtcp);
2906 /* tickle him once now */
2907 ctdb_sys_send_tcp(
2908 &con->dst_addr,
2909 &con->src_addr,
2910 0, 0, 0);
2912 return 0;
2914 failed:
2915 talloc_free(vnn->killtcp);
2916 vnn->killtcp = NULL;
2917 return -1;
2921 kill a TCP connection.
2923 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2925 struct ctdb_connection *killtcp = (struct ctdb_connection *)indata.dptr;
2927 return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst);
2931 called by a daemon to inform us of the entire list of TCP tickles for
2932 a particular public address.
2933 this control should only be sent by the node that is currently serving
2934 that public address.
2936 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2938 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2939 struct ctdb_tcp_array *tcparray;
2940 struct ctdb_vnn *vnn;
2942 /* We must at least have tickles.num or else we cant verify the size
2943 of the received data blob
2945 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2946 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2947 return -1;
2950 /* verify that the size of data matches what we expect */
2951 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2952 + sizeof(struct ctdb_connection) * list->num) {
2953 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2954 return -1;
2957 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2958 ctdb_addr_to_str(&list->addr)));
2960 vnn = find_public_ip_vnn(ctdb, &list->addr);
2961 if (vnn == NULL) {
2962 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2963 ctdb_addr_to_str(&list->addr)));
2965 return 1;
2968 if (vnn->pnn == ctdb->pnn) {
2969 DEBUG(DEBUG_INFO,
2970 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2971 ctdb_addr_to_str(&list->addr)));
2972 return 0;
2975 /* remove any old ticklelist we might have */
2976 talloc_free(vnn->tcp_array);
2977 vnn->tcp_array = NULL;
2979 tcparray = talloc(vnn, struct ctdb_tcp_array);
2980 CTDB_NO_MEMORY(ctdb, tcparray);
2982 tcparray->num = list->num;
2984 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2985 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2987 memcpy(tcparray->connections, &list->connections[0],
2988 sizeof(struct ctdb_connection)*tcparray->num);
2990 /* We now have a new fresh tickle list array for this vnn */
2991 vnn->tcp_array = tcparray;
2993 return 0;
2997 called to return the full list of tickles for the puclic address associated
2998 with the provided vnn
3000 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3002 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3003 struct ctdb_tickle_list_old *list;
3004 struct ctdb_tcp_array *tcparray;
3005 int num;
3006 struct ctdb_vnn *vnn;
3008 vnn = find_public_ip_vnn(ctdb, addr);
3009 if (vnn == NULL) {
3010 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3011 ctdb_addr_to_str(addr)));
3013 return 1;
3016 tcparray = vnn->tcp_array;
3017 if (tcparray) {
3018 num = tcparray->num;
3019 } else {
3020 num = 0;
3023 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
3024 + sizeof(struct ctdb_connection) * num;
3026 outdata->dptr = talloc_size(outdata, outdata->dsize);
3027 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3028 list = (struct ctdb_tickle_list_old *)outdata->dptr;
3030 list->addr = *addr;
3031 list->num = num;
3032 if (num) {
3033 memcpy(&list->connections[0], tcparray->connections,
3034 sizeof(struct ctdb_connection) * num);
3037 return 0;
3042 set the list of all tcp tickles for a public address
3044 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
3045 ctdb_sock_addr *addr,
3046 struct ctdb_tcp_array *tcparray)
3048 int ret, num;
3049 TDB_DATA data;
3050 struct ctdb_tickle_list_old *list;
3052 if (tcparray) {
3053 num = tcparray->num;
3054 } else {
3055 num = 0;
3058 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
3059 sizeof(struct ctdb_connection) * num;
3060 data.dptr = talloc_size(ctdb, data.dsize);
3061 CTDB_NO_MEMORY(ctdb, data.dptr);
3063 list = (struct ctdb_tickle_list_old *)data.dptr;
3064 list->addr = *addr;
3065 list->num = num;
3066 if (tcparray) {
3067 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
3070 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
3071 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3072 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3073 if (ret != 0) {
3074 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3075 return -1;
3078 talloc_free(data.dptr);
3080 return ret;
3085 perform tickle updates if required
3087 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
3088 struct tevent_timer *te,
3089 struct timeval t, void *private_data)
3091 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3092 int ret;
3093 struct ctdb_vnn *vnn;
3095 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3096 /* we only send out updates for public addresses that
3097 we have taken over
3099 if (ctdb->pnn != vnn->pnn) {
3100 continue;
3102 /* We only send out the updates if we need to */
3103 if (!vnn->tcp_update_needed) {
3104 continue;
3106 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
3107 &vnn->public_address,
3108 vnn->tcp_array);
3109 if (ret != 0) {
3110 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3111 ctdb_addr_to_str(&vnn->public_address)));
3112 } else {
3113 DEBUG(DEBUG_INFO,
3114 ("Sent tickle update for public address %s\n",
3115 ctdb_addr_to_str(&vnn->public_address)));
3116 vnn->tcp_update_needed = false;
3120 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3121 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3122 ctdb_update_tcp_tickles, ctdb);
3126 start periodic update of tcp tickles
3128 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3130 ctdb->tickle_update_context = talloc_new(ctdb);
3132 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3133 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3134 ctdb_update_tcp_tickles, ctdb);
3140 struct control_gratious_arp {
3141 struct ctdb_context *ctdb;
3142 ctdb_sock_addr addr;
3143 const char *iface;
3144 int count;
3148 send a control_gratuitous arp
3150 static void send_gratious_arp(struct tevent_context *ev,
3151 struct tevent_timer *te,
3152 struct timeval t, void *private_data)
3154 int ret;
3155 struct control_gratious_arp *arp = talloc_get_type(private_data,
3156 struct control_gratious_arp);
3158 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3159 if (ret != 0) {
3160 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3161 arp->iface, strerror(errno)));
3165 arp->count++;
3166 if (arp->count == CTDB_ARP_REPEAT) {
3167 talloc_free(arp);
3168 return;
3171 tevent_add_timer(arp->ctdb->ev, arp,
3172 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3173 send_gratious_arp, arp);
3178 send a gratious arp
3180 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3182 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
3183 struct control_gratious_arp *arp;
3185 /* verify the size of indata */
3186 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3187 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3188 (unsigned)indata.dsize,
3189 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
3190 return -1;
3192 if (indata.dsize !=
3193 ( offsetof(struct ctdb_addr_info_old, iface)
3194 + gratious_arp->len ) ){
3196 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3197 "but should be %u bytes\n",
3198 (unsigned)indata.dsize,
3199 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
3200 return -1;
3204 arp = talloc(ctdb, struct control_gratious_arp);
3205 CTDB_NO_MEMORY(ctdb, arp);
3207 arp->ctdb = ctdb;
3208 arp->addr = gratious_arp->addr;
3209 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3210 CTDB_NO_MEMORY(ctdb, arp->iface);
3211 arp->count = 0;
3213 tevent_add_timer(arp->ctdb->ev, arp,
3214 timeval_zero(), send_gratious_arp, arp);
3216 return 0;
3219 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3221 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3222 int ret;
3224 /* verify the size of indata */
3225 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3226 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3227 return -1;
3229 if (indata.dsize !=
3230 ( offsetof(struct ctdb_addr_info_old, iface)
3231 + pub->len ) ){
3233 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3234 "but should be %u bytes\n",
3235 (unsigned)indata.dsize,
3236 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3237 return -1;
3240 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
3242 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
3244 if (ret != 0) {
3245 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3246 return -1;
3249 return 0;
3252 struct delete_ip_callback_state {
3253 struct ctdb_req_control_old *c;
3257 called when releaseip event finishes for del_public_address
3259 static void delete_ip_callback(struct ctdb_context *ctdb,
3260 int32_t status, TDB_DATA data,
3261 const char *errormsg,
3262 void *private_data)
3264 struct delete_ip_callback_state *state =
3265 talloc_get_type(private_data, struct delete_ip_callback_state);
3267 /* If release failed then fail. */
3268 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
3269 talloc_free(private_data);
3272 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
3273 struct ctdb_req_control_old *c,
3274 TDB_DATA indata, bool *async_reply)
3276 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3277 struct ctdb_vnn *vnn;
3279 /* verify the size of indata */
3280 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3281 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3282 return -1;
3284 if (indata.dsize !=
3285 ( offsetof(struct ctdb_addr_info_old, iface)
3286 + pub->len ) ){
3288 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3289 "but should be %u bytes\n",
3290 (unsigned)indata.dsize,
3291 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3292 return -1;
3295 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
3297 /* walk over all public addresses until we find a match */
3298 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3299 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3300 if (vnn->pnn == ctdb->pnn) {
3301 struct delete_ip_callback_state *state;
3302 struct ctdb_public_ip *ip;
3303 TDB_DATA data;
3304 int ret;
3306 vnn->delete_pending = true;
3308 state = talloc(ctdb,
3309 struct delete_ip_callback_state);
3310 CTDB_NO_MEMORY(ctdb, state);
3311 state->c = c;
3313 ip = talloc(state, struct ctdb_public_ip);
3314 if (ip == NULL) {
3315 DEBUG(DEBUG_ERR,
3316 (__location__ " Out of memory\n"));
3317 talloc_free(state);
3318 return -1;
3320 ip->pnn = -1;
3321 ip->addr = pub->addr;
3323 data.dsize = sizeof(struct ctdb_public_ip);
3324 data.dptr = (unsigned char *)ip;
3326 ret = ctdb_daemon_send_control(ctdb,
3327 ctdb_get_pnn(ctdb),
3329 CTDB_CONTROL_RELEASE_IP,
3330 0, 0,
3331 data,
3332 delete_ip_callback,
3333 state);
3334 if (ret == -1) {
3335 DEBUG(DEBUG_ERR,
3336 (__location__ "Unable to send "
3337 "CTDB_CONTROL_RELEASE_IP\n"));
3338 talloc_free(state);
3339 return -1;
3342 state->c = talloc_steal(state, c);
3343 *async_reply = true;
3344 } else {
3345 /* This IP is not hosted on the
3346 * current node so just delete it
3347 * now. */
3348 do_delete_ip(ctdb, vnn);
3351 return 0;
3355 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
3356 ctdb_addr_to_str(&pub->addr)));
3357 return -1;
3361 struct ipreallocated_callback_state {
3362 struct ctdb_req_control_old *c;
3365 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3366 int status, void *p)
3368 struct ipreallocated_callback_state *state =
3369 talloc_get_type(p, struct ipreallocated_callback_state);
3371 if (status != 0) {
3372 DEBUG(DEBUG_ERR,
3373 (" \"ipreallocated\" event script failed (status %d)\n",
3374 status));
3375 if (status == -ETIME) {
3376 ctdb_ban_self(ctdb);
3380 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3381 talloc_free(state);
3384 /* A control to run the ipreallocated event */
3385 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3386 struct ctdb_req_control_old *c,
3387 bool *async_reply)
3389 int ret;
3390 struct ipreallocated_callback_state *state;
3392 state = talloc(ctdb, struct ipreallocated_callback_state);
3393 CTDB_NO_MEMORY(ctdb, state);
3395 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3397 ret = ctdb_event_script_callback(ctdb, state,
3398 ctdb_ipreallocated_callback, state,
3399 CTDB_EVENT_IPREALLOCATED,
3400 "%s", "");
3402 if (ret != 0) {
3403 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3404 talloc_free(state);
3405 return -1;
3408 /* tell the control that we will be reply asynchronously */
3409 state->c = talloc_steal(state, c);
3410 *async_reply = true;
3412 return 0;
3416 /* This function is called from the recovery daemon to verify that a remote
3417 node has the expected ip allocation.
3418 This is verified against ctdb->ip_tree
3420 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3421 struct ctdb_public_ip_list_old *ips,
3422 uint32_t pnn)
3424 struct public_ip_list *tmp_ip;
3425 int i;
3427 if (ctdb->ip_tree == NULL) {
3428 /* don't know the expected allocation yet, assume remote node
3429 is correct. */
3430 return 0;
3433 if (ips == NULL) {
3434 return 0;
3437 for (i=0; i<ips->num; i++) {
3438 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3439 if (tmp_ip == NULL) {
3440 DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
3441 return -1;
3444 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3445 continue;
3448 if (tmp_ip->pnn != ips->ips[i].pnn) {
3449 DEBUG(DEBUG_ERR,
3450 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3451 pnn,
3452 ctdb_addr_to_str(&ips->ips[i].addr),
3453 ips->ips[i].pnn, tmp_ip->pnn));
3454 return -1;
3458 return 0;
3461 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3463 struct public_ip_list *tmp_ip;
3465 /* IP tree is never built if DisableIPFailover is set */
3466 if (ctdb->tunable.disable_ip_failover != 0) {
3467 return 0;
3470 if (ctdb->ip_tree == NULL) {
3471 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3472 return -1;
3475 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3476 if (tmp_ip == NULL) {
3477 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3478 return -1;
3481 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3482 tmp_ip->pnn = ip->pnn;
3484 return 0;
3487 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3489 TALLOC_FREE(ctdb->ip_tree);
3492 struct ctdb_reloadips_handle {
3493 struct ctdb_context *ctdb;
3494 struct ctdb_req_control_old *c;
3495 int status;
3496 int fd[2];
3497 pid_t child;
3498 struct tevent_fd *fde;
3501 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3503 if (h == h->ctdb->reload_ips) {
3504 h->ctdb->reload_ips = NULL;
3506 if (h->c != NULL) {
3507 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3508 h->c = NULL;
3510 ctdb_kill(h->ctdb, h->child, SIGKILL);
3511 return 0;
3514 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3515 struct tevent_timer *te,
3516 struct timeval t, void *private_data)
3518 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3520 talloc_free(h);
3523 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3524 struct tevent_fd *fde,
3525 uint16_t flags, void *private_data)
3527 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3529 char res;
3530 int ret;
3532 ret = sys_read(h->fd[0], &res, 1);
3533 if (ret < 1 || res != 0) {
3534 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3535 res = 1;
3537 h->status = res;
3539 talloc_free(h);
3542 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3544 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3545 struct ctdb_public_ip_list_old *ips;
3546 struct ctdb_vnn *vnn;
3547 struct client_async_data *async_data;
3548 struct timeval timeout;
3549 TDB_DATA data;
3550 struct ctdb_client_control_state *state;
3551 bool first_add;
3552 int i, ret;
3554 CTDB_NO_MEMORY(ctdb, mem_ctx);
3556 /* Read IPs from local node */
3557 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3558 CTDB_CURRENT_NODE, mem_ctx, &ips);
3559 if (ret != 0) {
3560 DEBUG(DEBUG_ERR,
3561 ("Unable to fetch public IPs from local node\n"));
3562 talloc_free(mem_ctx);
3563 return -1;
3566 /* Read IPs file - this is safe since this is a child process */
3567 ctdb->vnn = NULL;
3568 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3569 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3570 talloc_free(mem_ctx);
3571 return -1;
3574 async_data = talloc_zero(mem_ctx, struct client_async_data);
3575 CTDB_NO_MEMORY(ctdb, async_data);
3577 /* Compare IPs between node and file for IPs to be deleted */
3578 for (i = 0; i < ips->num; i++) {
3579 /* */
3580 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3581 if (ctdb_same_ip(&vnn->public_address,
3582 &ips->ips[i].addr)) {
3583 /* IP is still in file */
3584 break;
3588 if (vnn == NULL) {
3589 /* Delete IP ips->ips[i] */
3590 struct ctdb_addr_info_old *pub;
3592 DEBUG(DEBUG_NOTICE,
3593 ("IP %s no longer configured, deleting it\n",
3594 ctdb_addr_to_str(&ips->ips[i].addr)));
3596 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3597 CTDB_NO_MEMORY(ctdb, pub);
3599 pub->addr = ips->ips[i].addr;
3600 pub->mask = 0;
3601 pub->len = 0;
3603 timeout = TAKEOVER_TIMEOUT();
3605 data.dsize = offsetof(struct ctdb_addr_info_old,
3606 iface) + pub->len;
3607 data.dptr = (uint8_t *)pub;
3609 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3610 CTDB_CONTROL_DEL_PUBLIC_IP,
3611 0, data, async_data,
3612 &timeout, NULL);
3613 if (state == NULL) {
3614 DEBUG(DEBUG_ERR,
3615 (__location__
3616 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3617 goto failed;
3620 ctdb_client_async_add(async_data, state);
3624 /* Compare IPs between node and file for IPs to be added */
3625 first_add = true;
3626 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3627 for (i = 0; i < ips->num; i++) {
3628 if (ctdb_same_ip(&vnn->public_address,
3629 &ips->ips[i].addr)) {
3630 /* IP already on node */
3631 break;
3634 if (i == ips->num) {
3635 /* Add IP ips->ips[i] */
3636 struct ctdb_addr_info_old *pub;
3637 const char *ifaces = NULL;
3638 uint32_t len;
3639 int iface = 0;
3641 DEBUG(DEBUG_NOTICE,
3642 ("New IP %s configured, adding it\n",
3643 ctdb_addr_to_str(&vnn->public_address)));
3644 if (first_add) {
3645 uint32_t pnn = ctdb_get_pnn(ctdb);
3647 data.dsize = sizeof(pnn);
3648 data.dptr = (uint8_t *)&pnn;
3650 ret = ctdb_client_send_message(
3651 ctdb,
3652 CTDB_BROADCAST_CONNECTED,
3653 CTDB_SRVID_REBALANCE_NODE,
3654 data);
3655 if (ret != 0) {
3656 DEBUG(DEBUG_WARNING,
3657 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3660 first_add = false;
3663 ifaces = vnn->ifaces[0];
3664 iface = 1;
3665 while (vnn->ifaces[iface] != NULL) {
3666 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3667 vnn->ifaces[iface]);
3668 iface++;
3671 len = strlen(ifaces) + 1;
3672 pub = talloc_zero_size(mem_ctx,
3673 offsetof(struct ctdb_addr_info_old, iface) + len);
3674 CTDB_NO_MEMORY(ctdb, pub);
3676 pub->addr = vnn->public_address;
3677 pub->mask = vnn->public_netmask_bits;
3678 pub->len = len;
3679 memcpy(&pub->iface[0], ifaces, pub->len);
3681 timeout = TAKEOVER_TIMEOUT();
3683 data.dsize = offsetof(struct ctdb_addr_info_old,
3684 iface) + pub->len;
3685 data.dptr = (uint8_t *)pub;
3687 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3688 CTDB_CONTROL_ADD_PUBLIC_IP,
3689 0, data, async_data,
3690 &timeout, NULL);
3691 if (state == NULL) {
3692 DEBUG(DEBUG_ERR,
3693 (__location__
3694 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3695 goto failed;
3698 ctdb_client_async_add(async_data, state);
3702 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3703 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3704 goto failed;
3707 talloc_free(mem_ctx);
3708 return 0;
3710 failed:
3711 talloc_free(mem_ctx);
3712 return -1;
3715 /* This control is sent to force the node to re-read the public addresses file
3716 and drop any addresses we should nnot longer host, and add new addresses
3717 that we are now able to host
3719 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3721 struct ctdb_reloadips_handle *h;
3722 pid_t parent = getpid();
3724 if (ctdb->reload_ips != NULL) {
3725 talloc_free(ctdb->reload_ips);
3726 ctdb->reload_ips = NULL;
3729 h = talloc(ctdb, struct ctdb_reloadips_handle);
3730 CTDB_NO_MEMORY(ctdb, h);
3731 h->ctdb = ctdb;
3732 h->c = NULL;
3733 h->status = -1;
3735 if (pipe(h->fd) == -1) {
3736 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3737 talloc_free(h);
3738 return -1;
3741 h->child = ctdb_fork(ctdb);
3742 if (h->child == (pid_t)-1) {
3743 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3744 close(h->fd[0]);
3745 close(h->fd[1]);
3746 talloc_free(h);
3747 return -1;
3750 /* child process */
3751 if (h->child == 0) {
3752 signed char res = 0;
3754 close(h->fd[0]);
3755 debug_extra = talloc_asprintf(NULL, "reloadips:");
3757 prctl_set_comment("ctdb_reloadips");
3758 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3759 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3760 res = -1;
3761 } else {
3762 res = ctdb_reloadips_child(ctdb);
3763 if (res != 0) {
3764 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3768 sys_write(h->fd[1], &res, 1);
3769 /* make sure we die when our parent dies */
3770 while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
3771 sleep(5);
3773 _exit(0);
3776 h->c = talloc_steal(h, c);
3778 close(h->fd[1]);
3779 set_close_on_exec(h->fd[0]);
3781 talloc_set_destructor(h, ctdb_reloadips_destructor);
3784 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3785 ctdb_reloadips_child_handler, (void *)h);
3786 tevent_fd_set_auto_close(h->fde);
3788 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3789 ctdb_reloadips_timeout_event, h);
3791 /* we reply later */
3792 *async_reply = true;
3793 return 0;