ctdb-daemon: Avoid spurious error sending ARPs for released IP
[Samba.git] / ctdb / server / ctdb_takeover.c
blob81c733a9c486032dcfc04ce416d51efd007b8033
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "protocol/protocol_util.h"
38 #include "ctdb_private.h"
39 #include "ctdb_client.h"
41 #include "common/reqid.h"
42 #include "common/system.h"
43 #include "common/system_socket.h"
44 #include "common/common.h"
45 #include "common/logging.h"
47 #include "server/ctdb_config.h"
49 #include "server/ipalloc.h"
51 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
53 #define CTDB_ARP_INTERVAL 1
54 #define CTDB_ARP_REPEAT 3
56 struct ctdb_interface {
57 struct ctdb_interface *prev, *next;
58 const char *name;
59 bool link_up;
60 uint32_t references;
63 struct vnn_interface {
64 struct vnn_interface *prev, *next;
65 struct ctdb_interface *iface;
68 /* state associated with a public ip address */
69 struct ctdb_vnn {
70 struct ctdb_vnn *prev, *next;
72 struct ctdb_interface *iface;
73 struct vnn_interface *ifaces;
74 ctdb_sock_addr public_address;
75 uint8_t public_netmask_bits;
78 * The node number that is serving this public address - set
79 * to CTDB_UNKNOWN_PNN if node is serving it
81 uint32_t pnn;
83 /* List of clients to tickle for this public address */
84 struct ctdb_tcp_array *tcp_array;
86 /* whether we need to update the other nodes with changes to our list
87 of connected clients */
88 bool tcp_update_needed;
90 /* a context to hang sending gratious arp events off */
91 TALLOC_CTX *takeover_ctx;
93 /* Set to true any time an update to this VNN is in flight.
94 This helps to avoid races. */
95 bool update_in_flight;
97 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
98 * address then this flag is set. It will be deleted in the
99 * release IP callback. */
100 bool delete_pending;
103 static const char *iface_string(const struct ctdb_interface *iface)
105 return (iface != NULL ? iface->name : "__none__");
108 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
110 return iface_string(vnn->iface);
113 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
114 const char *iface);
116 static struct ctdb_interface *
117 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
119 struct ctdb_interface *i;
121 if (strlen(iface) > CTDB_IFACE_SIZE) {
122 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
123 return NULL;
126 /* Verify that we don't have an entry for this ip yet */
127 i = ctdb_find_iface(ctdb, iface);
128 if (i != NULL) {
129 return i;
132 /* create a new structure for this interface */
133 i = talloc_zero(ctdb, struct ctdb_interface);
134 if (i == NULL) {
135 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
136 return NULL;
138 i->name = talloc_strdup(i, iface);
139 if (i->name == NULL) {
140 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
141 talloc_free(i);
142 return NULL;
145 i->link_up = true;
147 DLIST_ADD(ctdb->ifaces, i);
149 return i;
152 static bool vnn_has_interface(struct ctdb_vnn *vnn,
153 const struct ctdb_interface *iface)
155 struct vnn_interface *i;
157 for (i = vnn->ifaces; i != NULL; i = i->next) {
158 if (iface == i->iface) {
159 return true;
163 return false;
166 /* If any interfaces now have no possible IPs then delete them. This
167 * implementation is naive (i.e. simple) rather than clever
168 * (i.e. complex). Given that this is run on delip and that operation
169 * is rare, this doesn't need to be efficient - it needs to be
170 * foolproof. One alternative is reference counting, where the logic
171 * is distributed and can, therefore, be broken in multiple places.
172 * Another alternative is to build a red-black tree of interfaces that
173 * can have addresses (by walking ctdb->vnn once) and then walking
174 * ctdb->ifaces once and deleting those not in the tree. Let's go to
175 * one of those if the naive implementation causes problems... :-)
177 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
178 struct ctdb_vnn *vnn)
180 struct ctdb_interface *i, *next;
182 /* For each interface, check if there's an IP using it. */
183 for (i = ctdb->ifaces; i != NULL; i = next) {
184 struct ctdb_vnn *tv;
185 bool found;
186 next = i->next;
188 /* Only consider interfaces named in the given VNN. */
189 if (!vnn_has_interface(vnn, i)) {
190 continue;
193 /* Search for a vnn with this interface. */
194 found = false;
195 for (tv=ctdb->vnn; tv; tv=tv->next) {
196 if (vnn_has_interface(tv, i)) {
197 found = true;
198 break;
202 if (!found) {
203 /* None of the VNNs are using this interface. */
204 DLIST_REMOVE(ctdb->ifaces, i);
205 talloc_free(i);
211 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
212 const char *iface)
214 struct ctdb_interface *i;
216 for (i=ctdb->ifaces;i;i=i->next) {
217 if (strcmp(i->name, iface) == 0) {
218 return i;
222 return NULL;
225 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
226 struct ctdb_vnn *vnn)
228 struct vnn_interface *i;
229 struct ctdb_interface *cur = NULL;
230 struct ctdb_interface *best = NULL;
232 for (i = vnn->ifaces; i != NULL; i = i->next) {
234 cur = i->iface;
236 if (!cur->link_up) {
237 continue;
240 if (best == NULL) {
241 best = cur;
242 continue;
245 if (cur->references < best->references) {
246 best = cur;
247 continue;
251 return best;
254 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
255 struct ctdb_vnn *vnn)
257 struct ctdb_interface *best = NULL;
259 if (vnn->iface) {
260 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
261 "still assigned to iface '%s'\n",
262 ctdb_addr_to_str(&vnn->public_address),
263 ctdb_vnn_iface_string(vnn)));
264 return 0;
267 best = ctdb_vnn_best_iface(ctdb, vnn);
268 if (best == NULL) {
269 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
270 "cannot assign to iface any iface\n",
271 ctdb_addr_to_str(&vnn->public_address)));
272 return -1;
275 vnn->iface = best;
276 best->references++;
277 vnn->pnn = ctdb->pnn;
279 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
280 "now assigned to iface '%s' refs[%d]\n",
281 ctdb_addr_to_str(&vnn->public_address),
282 ctdb_vnn_iface_string(vnn),
283 best->references));
284 return 0;
287 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
288 struct ctdb_vnn *vnn)
290 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
291 "now unassigned (old iface '%s' refs[%d])\n",
292 ctdb_addr_to_str(&vnn->public_address),
293 ctdb_vnn_iface_string(vnn),
294 vnn->iface?vnn->iface->references:0));
295 if (vnn->iface) {
296 vnn->iface->references--;
298 vnn->iface = NULL;
299 if (vnn->pnn == ctdb->pnn) {
300 vnn->pnn = CTDB_UNKNOWN_PNN;
304 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
305 struct ctdb_vnn *vnn)
307 uint32_t flags;
308 struct vnn_interface *i;
310 /* Nodes that are not RUNNING can not host IPs */
311 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
312 return false;
315 flags = ctdb->nodes[ctdb->pnn]->flags;
316 if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
317 return false;
320 if (vnn->delete_pending) {
321 return false;
324 if (vnn->iface && vnn->iface->link_up) {
325 return true;
328 for (i = vnn->ifaces; i != NULL; i = i->next) {
329 if (i->iface->link_up) {
330 return true;
334 return false;
337 struct ctdb_takeover_arp {
338 struct ctdb_context *ctdb;
339 uint32_t count;
340 ctdb_sock_addr addr;
341 struct ctdb_tcp_array *tcparray;
342 struct ctdb_vnn *vnn;
347 lists of tcp endpoints
349 struct ctdb_tcp_list {
350 struct ctdb_tcp_list *prev, *next;
351 struct ctdb_connection connection;
355 list of clients to kill on IP release
357 struct ctdb_client_ip {
358 struct ctdb_client_ip *prev, *next;
359 struct ctdb_context *ctdb;
360 ctdb_sock_addr addr;
361 uint32_t client_id;
366 send a gratuitous arp
368 static void ctdb_control_send_arp(struct tevent_context *ev,
369 struct tevent_timer *te,
370 struct timeval t, void *private_data)
372 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
373 struct ctdb_takeover_arp);
374 int ret;
375 struct ctdb_tcp_array *tcparray;
376 const char *iface;
378 /* IP address might have been released between sends */
379 if (arp->vnn->iface == NULL) {
380 DBG_INFO("Cancelling ARP send for released IP %s\n",
381 ctdb_addr_to_str(&arp->vnn->public_address));
382 talloc_free(arp);
383 return;
386 iface = ctdb_vnn_iface_string(arp->vnn);
387 ret = ctdb_sys_send_arp(&arp->addr, iface);
388 if (ret != 0) {
389 DBG_ERR("Failed to send ARP on interface %s: %s\n",
390 iface, strerror(ret));
393 tcparray = arp->tcparray;
394 if (tcparray) {
395 unsigned int i;
397 for (i=0;i<tcparray->num;i++) {
398 struct ctdb_connection *tcon;
399 char buf[128];
401 tcon = &tcparray->connections[i];
402 ret = ctdb_connection_to_buf(buf,
403 sizeof(buf),
404 tcon,
405 true,
406 " -> ");
407 if (ret != 0) {
408 strlcpy(buf, "UNKNOWN", sizeof(buf));
410 D_INFO("Send TCP tickle ACK: %s\n", buf);
411 ret = ctdb_sys_send_tcp(
412 &tcon->src,
413 &tcon->dst,
414 0, 0, 0);
415 if (ret != 0) {
416 DBG_ERR("Failed to send TCP tickle ACK: %s\n",
417 buf);
422 arp->count++;
424 if (arp->count == CTDB_ARP_REPEAT) {
425 talloc_free(arp);
426 return;
429 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
430 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
431 ctdb_control_send_arp, arp);
434 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
435 struct ctdb_vnn *vnn)
437 struct ctdb_takeover_arp *arp;
438 struct ctdb_tcp_array *tcparray;
440 if (!vnn->takeover_ctx) {
441 vnn->takeover_ctx = talloc_new(vnn);
442 if (!vnn->takeover_ctx) {
443 return -1;
447 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
448 if (!arp) {
449 return -1;
452 arp->ctdb = ctdb;
453 arp->addr = vnn->public_address;
454 arp->vnn = vnn;
456 tcparray = vnn->tcp_array;
457 if (tcparray) {
458 /* add all of the known tcp connections for this IP to the
459 list of tcp connections to send tickle acks for */
460 arp->tcparray = talloc_steal(arp, tcparray);
462 vnn->tcp_array = NULL;
463 vnn->tcp_update_needed = true;
466 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
467 timeval_zero(), ctdb_control_send_arp, arp);
469 return 0;
472 struct ctdb_do_takeip_state {
473 struct ctdb_req_control_old *c;
474 struct ctdb_vnn *vnn;
478 called when takeip event finishes
480 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
481 void *private_data)
483 struct ctdb_do_takeip_state *state =
484 talloc_get_type(private_data, struct ctdb_do_takeip_state);
485 int32_t ret;
486 TDB_DATA data;
488 if (status != 0) {
489 if (status == -ETIMEDOUT) {
490 ctdb_ban_self(ctdb);
492 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
493 ctdb_addr_to_str(&state->vnn->public_address),
494 ctdb_vnn_iface_string(state->vnn)));
495 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
497 talloc_free(state);
498 return;
501 if (ctdb->do_checkpublicip) {
503 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
504 if (ret != 0) {
505 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
506 talloc_free(state);
507 return;
512 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
513 data.dsize = strlen((char *)data.dptr) + 1;
514 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
516 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
519 /* the control succeeded */
520 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
521 talloc_free(state);
522 return;
525 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
527 state->vnn->update_in_flight = false;
528 return 0;
532 take over an ip address
534 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
535 struct ctdb_req_control_old *c,
536 struct ctdb_vnn *vnn)
538 int ret;
539 struct ctdb_do_takeip_state *state;
541 if (vnn->update_in_flight) {
542 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
543 "update for this IP already in flight\n",
544 ctdb_addr_to_str(&vnn->public_address),
545 vnn->public_netmask_bits));
546 return -1;
549 ret = ctdb_vnn_assign_iface(ctdb, vnn);
550 if (ret != 0) {
551 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
552 "assign a usable interface\n",
553 ctdb_addr_to_str(&vnn->public_address),
554 vnn->public_netmask_bits));
555 return -1;
558 state = talloc(vnn, struct ctdb_do_takeip_state);
559 CTDB_NO_MEMORY(ctdb, state);
561 state->c = NULL;
562 state->vnn = vnn;
564 vnn->update_in_flight = true;
565 talloc_set_destructor(state, ctdb_takeip_destructor);
567 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
568 ctdb_addr_to_str(&vnn->public_address),
569 vnn->public_netmask_bits,
570 ctdb_vnn_iface_string(vnn)));
572 ret = ctdb_event_script_callback(ctdb,
573 state,
574 ctdb_do_takeip_callback,
575 state,
576 CTDB_EVENT_TAKE_IP,
577 "%s %s %u",
578 ctdb_vnn_iface_string(vnn),
579 ctdb_addr_to_str(&vnn->public_address),
580 vnn->public_netmask_bits);
582 if (ret != 0) {
583 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
584 ctdb_addr_to_str(&vnn->public_address),
585 ctdb_vnn_iface_string(vnn)));
586 talloc_free(state);
587 return -1;
590 state->c = talloc_steal(ctdb, c);
591 return 0;
594 struct ctdb_do_updateip_state {
595 struct ctdb_req_control_old *c;
596 struct ctdb_interface *old;
597 struct ctdb_vnn *vnn;
601 called when updateip event finishes
603 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
604 void *private_data)
606 struct ctdb_do_updateip_state *state =
607 talloc_get_type(private_data, struct ctdb_do_updateip_state);
609 if (status != 0) {
610 if (status == -ETIMEDOUT) {
611 ctdb_ban_self(ctdb);
613 DEBUG(DEBUG_ERR,
614 ("Failed update of IP %s from interface %s to %s\n",
615 ctdb_addr_to_str(&state->vnn->public_address),
616 iface_string(state->old),
617 ctdb_vnn_iface_string(state->vnn)));
620 * All we can do is reset the old interface
621 * and let the next run fix it
623 ctdb_vnn_unassign_iface(ctdb, state->vnn);
624 state->vnn->iface = state->old;
625 state->vnn->iface->references++;
627 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
628 talloc_free(state);
629 return;
632 /* the control succeeded */
633 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
634 talloc_free(state);
635 return;
638 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
640 state->vnn->update_in_flight = false;
641 return 0;
645 update (move) an ip address
647 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
648 struct ctdb_req_control_old *c,
649 struct ctdb_vnn *vnn)
651 int ret;
652 struct ctdb_do_updateip_state *state;
653 struct ctdb_interface *old = vnn->iface;
654 const char *old_name = iface_string(old);
655 const char *new_name;
657 if (vnn->update_in_flight) {
658 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
659 "update for this IP already in flight\n",
660 ctdb_addr_to_str(&vnn->public_address),
661 vnn->public_netmask_bits));
662 return -1;
665 ctdb_vnn_unassign_iface(ctdb, vnn);
666 ret = ctdb_vnn_assign_iface(ctdb, vnn);
667 if (ret != 0) {
668 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
669 "assign a usable interface (old iface '%s')\n",
670 ctdb_addr_to_str(&vnn->public_address),
671 vnn->public_netmask_bits,
672 old_name));
673 return -1;
676 if (old == vnn->iface) {
677 /* A benign update from one interface onto itself.
678 * no need to run the eventscripts in this case, just return
679 * success.
681 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
682 return 0;
685 state = talloc(vnn, struct ctdb_do_updateip_state);
686 CTDB_NO_MEMORY(ctdb, state);
688 state->c = NULL;
689 state->old = old;
690 state->vnn = vnn;
692 vnn->update_in_flight = true;
693 talloc_set_destructor(state, ctdb_updateip_destructor);
695 new_name = ctdb_vnn_iface_string(vnn);
696 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
697 "interface %s to %s\n",
698 ctdb_addr_to_str(&vnn->public_address),
699 vnn->public_netmask_bits,
700 old_name,
701 new_name));
703 ret = ctdb_event_script_callback(ctdb,
704 state,
705 ctdb_do_updateip_callback,
706 state,
707 CTDB_EVENT_UPDATE_IP,
708 "%s %s %s %u",
709 old_name,
710 new_name,
711 ctdb_addr_to_str(&vnn->public_address),
712 vnn->public_netmask_bits);
713 if (ret != 0) {
714 DEBUG(DEBUG_ERR,
715 ("Failed update IP %s from interface %s to %s\n",
716 ctdb_addr_to_str(&vnn->public_address),
717 old_name, new_name));
718 talloc_free(state);
719 return -1;
722 state->c = talloc_steal(ctdb, c);
723 return 0;
727 Find the vnn of the node that has a public ip address
728 returns -1 if the address is not known as a public address
730 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
732 struct ctdb_vnn *vnn;
734 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
735 if (ctdb_same_ip(&vnn->public_address, addr)) {
736 return vnn;
740 return NULL;
744 take over an ip address
746 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
747 struct ctdb_req_control_old *c,
748 TDB_DATA indata,
749 bool *async_reply)
751 int ret;
752 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
753 struct ctdb_vnn *vnn;
754 bool have_ip = false;
755 bool do_updateip = false;
756 bool do_takeip = false;
757 struct ctdb_interface *best_iface = NULL;
759 if (pip->pnn != ctdb->pnn) {
760 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
761 "with pnn %d, but we're node %d\n",
762 ctdb_addr_to_str(&pip->addr),
763 pip->pnn, ctdb->pnn));
764 return -1;
767 /* update out vnn list */
768 vnn = find_public_ip_vnn(ctdb, &pip->addr);
769 if (vnn == NULL) {
770 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
771 ctdb_addr_to_str(&pip->addr)));
772 return 0;
775 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
776 have_ip = ctdb_sys_have_ip(&pip->addr);
778 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
779 if (best_iface == NULL) {
780 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
781 "a usable interface (old %s, have_ip %d)\n",
782 ctdb_addr_to_str(&vnn->public_address),
783 vnn->public_netmask_bits,
784 ctdb_vnn_iface_string(vnn),
785 have_ip));
786 return -1;
789 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) {
790 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
791 "and we have it on iface[%s], but it was assigned to node %d"
792 "and we are node %d, banning ourself\n",
793 ctdb_addr_to_str(&vnn->public_address),
794 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
795 ctdb_ban_self(ctdb);
796 return -1;
799 if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) {
800 /* This will cause connections to be reset and
801 * reestablished. However, this is a very unusual
802 * situation and doing this will completely repair the
803 * inconsistency in the VNN.
805 DEBUG(DEBUG_WARNING,
806 (__location__
807 " Doing updateip for IP %s already on an interface\n",
808 ctdb_addr_to_str(&vnn->public_address)));
809 do_updateip = true;
812 if (vnn->iface) {
813 if (vnn->iface != best_iface) {
814 if (!vnn->iface->link_up) {
815 do_updateip = true;
816 } else if (vnn->iface->references > (best_iface->references + 1)) {
817 /* only move when the rebalance gains something */
818 do_updateip = true;
823 if (!have_ip) {
824 if (do_updateip) {
825 ctdb_vnn_unassign_iface(ctdb, vnn);
826 do_updateip = false;
828 do_takeip = true;
831 if (do_takeip) {
832 ret = ctdb_do_takeip(ctdb, c, vnn);
833 if (ret != 0) {
834 return -1;
836 } else if (do_updateip) {
837 ret = ctdb_do_updateip(ctdb, c, vnn);
838 if (ret != 0) {
839 return -1;
841 } else {
843 * The interface is up and the kernel known the ip
844 * => do nothing
846 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
847 ctdb_addr_to_str(&pip->addr),
848 vnn->public_netmask_bits,
849 ctdb_vnn_iface_string(vnn)));
850 return 0;
853 /* tell ctdb_control.c that we will be replying asynchronously */
854 *async_reply = true;
856 return 0;
859 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
861 DLIST_REMOVE(ctdb->vnn, vnn);
862 ctdb_vnn_unassign_iface(ctdb, vnn);
863 ctdb_remove_orphaned_ifaces(ctdb, vnn);
864 talloc_free(vnn);
867 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
868 struct ctdb_vnn *vnn,
869 ctdb_sock_addr *addr)
871 TDB_DATA data;
873 /* Send a message to all clients of this node telling them
874 * that the cluster has been reconfigured and they should
875 * close any connections on this IP address
877 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
878 data.dsize = strlen((char *)data.dptr)+1;
879 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
880 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
882 ctdb_vnn_unassign_iface(ctdb, vnn);
884 /* Process the IP if it has been marked for deletion */
885 if (vnn->delete_pending) {
886 do_delete_ip(ctdb, vnn);
887 return NULL;
890 return vnn;
893 struct release_ip_callback_state {
894 struct ctdb_req_control_old *c;
895 ctdb_sock_addr *addr;
896 struct ctdb_vnn *vnn;
897 uint32_t target_pnn;
901 called when releaseip event finishes
903 static void release_ip_callback(struct ctdb_context *ctdb, int status,
904 void *private_data)
906 struct release_ip_callback_state *state =
907 talloc_get_type(private_data, struct release_ip_callback_state);
909 if (status == -ETIMEDOUT) {
910 ctdb_ban_self(ctdb);
913 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
914 if (ctdb_sys_have_ip(state->addr)) {
915 DEBUG(DEBUG_ERR,
916 ("IP %s still hosted during release IP callback, failing\n",
917 ctdb_addr_to_str(state->addr)));
918 ctdb_request_control_reply(ctdb, state->c,
919 NULL, -1, NULL);
920 talloc_free(state);
921 return;
925 state->vnn->pnn = state->target_pnn;
926 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
928 /* the control succeeded */
929 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
930 talloc_free(state);
933 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
935 if (state->vnn != NULL) {
936 state->vnn->update_in_flight = false;
938 return 0;
942 release an ip address
944 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
945 struct ctdb_req_control_old *c,
946 TDB_DATA indata,
947 bool *async_reply)
949 int ret;
950 struct release_ip_callback_state *state;
951 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
952 struct ctdb_vnn *vnn;
953 const char *iface;
955 /* update our vnn list */
956 vnn = find_public_ip_vnn(ctdb, &pip->addr);
957 if (vnn == NULL) {
958 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
959 ctdb_addr_to_str(&pip->addr)));
960 return 0;
963 /* stop any previous arps */
964 talloc_free(vnn->takeover_ctx);
965 vnn->takeover_ctx = NULL;
967 /* RELEASE_IP controls are sent to all nodes that should not
968 * be hosting a particular IP. This serves 2 purposes. The
969 * first is to help resolve any inconsistencies. If a node
970 * does unexpectly host an IP then it will be released. The
971 * 2nd is to use a "redundant release" to tell non-takeover
972 * nodes where an IP is moving to. This is how "ctdb ip" can
973 * report the (likely) location of an IP by only asking the
974 * local node. Redundant releases need to update the PNN but
975 * are otherwise ignored.
977 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
978 if (!ctdb_sys_have_ip(&pip->addr)) {
979 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
980 ctdb_addr_to_str(&pip->addr),
981 vnn->public_netmask_bits,
982 ctdb_vnn_iface_string(vnn)));
983 vnn->pnn = pip->pnn;
984 ctdb_vnn_unassign_iface(ctdb, vnn);
985 return 0;
987 } else {
988 if (vnn->iface == NULL) {
989 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
990 ctdb_addr_to_str(&pip->addr),
991 vnn->public_netmask_bits));
992 vnn->pnn = pip->pnn;
993 return 0;
997 /* There is a potential race between take_ip and us because we
998 * update the VNN via a callback that run when the
999 * eventscripts have been run. Avoid the race by allowing one
1000 * update to be in flight at a time.
1002 if (vnn->update_in_flight) {
1003 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
1004 "update for this IP already in flight\n",
1005 ctdb_addr_to_str(&vnn->public_address),
1006 vnn->public_netmask_bits));
1007 return -1;
1010 iface = ctdb_vnn_iface_string(vnn);
1012 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
1013 ctdb_addr_to_str(&pip->addr),
1014 vnn->public_netmask_bits,
1015 iface,
1016 pip->pnn));
1018 state = talloc(ctdb, struct release_ip_callback_state);
1019 if (state == NULL) {
1020 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1021 __FILE__, __LINE__);
1022 return -1;
1025 state->c = NULL;
1026 state->addr = talloc(state, ctdb_sock_addr);
1027 if (state->addr == NULL) {
1028 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1029 __FILE__, __LINE__);
1030 talloc_free(state);
1031 return -1;
1033 *state->addr = pip->addr;
1034 state->target_pnn = pip->pnn;
1035 state->vnn = vnn;
1037 vnn->update_in_flight = true;
1038 talloc_set_destructor(state, ctdb_releaseip_destructor);
1040 ret = ctdb_event_script_callback(ctdb,
1041 state, release_ip_callback, state,
1042 CTDB_EVENT_RELEASE_IP,
1043 "%s %s %u",
1044 iface,
1045 ctdb_addr_to_str(&pip->addr),
1046 vnn->public_netmask_bits);
1047 if (ret != 0) {
1048 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1049 ctdb_addr_to_str(&pip->addr),
1050 ctdb_vnn_iface_string(vnn)));
1051 talloc_free(state);
1052 return -1;
1055 /* tell the control that we will be reply asynchronously */
1056 *async_reply = true;
1057 state->c = talloc_steal(state, c);
1058 return 0;
1061 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1062 ctdb_sock_addr *addr,
1063 unsigned mask, const char *ifaces,
1064 bool check_address)
1066 struct ctdb_vnn *vnn;
1067 char *tmp;
1068 const char *iface;
1070 /* Verify that we don't have an entry for this IP yet */
1071 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1072 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1073 DEBUG(DEBUG_ERR,
1074 ("Duplicate public IP address '%s'\n",
1075 ctdb_addr_to_str(addr)));
1076 return -1;
1080 /* Create a new VNN structure for this IP address */
1081 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1082 if (vnn == NULL) {
1083 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1084 return -1;
1086 tmp = talloc_strdup(vnn, ifaces);
1087 if (tmp == NULL) {
1088 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1089 talloc_free(vnn);
1090 return -1;
1092 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1093 struct vnn_interface *vnn_iface;
1094 struct ctdb_interface *i;
1095 if (!ctdb_sys_check_iface_exists(iface)) {
1096 DEBUG(DEBUG_ERR,
1097 ("Unknown interface %s for public address %s\n",
1098 iface, ctdb_addr_to_str(addr)));
1099 talloc_free(vnn);
1100 return -1;
1103 i = ctdb_add_local_iface(ctdb, iface);
1104 if (i == NULL) {
1105 DEBUG(DEBUG_ERR,
1106 ("Failed to add interface '%s' "
1107 "for public address %s\n",
1108 iface, ctdb_addr_to_str(addr)));
1109 talloc_free(vnn);
1110 return -1;
1113 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1114 if (vnn_iface == NULL) {
1115 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1116 talloc_free(vnn);
1117 return -1;
1120 vnn_iface->iface = i;
1121 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1123 talloc_free(tmp);
1124 vnn->public_address = *addr;
1125 vnn->public_netmask_bits = mask;
1126 vnn->pnn = -1;
1128 DLIST_ADD(ctdb->vnn, vnn);
1130 return 0;
1134 setup the public address lists from a file
1136 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1138 bool ok;
1139 char **lines;
1140 int nlines;
1141 int i;
1143 /* If no public addresses file given then try the default */
1144 if (ctdb->public_addresses_file == NULL) {
1145 const char *b = getenv("CTDB_BASE");
1146 if (b == NULL) {
1147 DBG_ERR("CTDB_BASE not set\n");
1148 return -1;
1150 ctdb->public_addresses_file = talloc_asprintf(
1151 ctdb, "%s/%s", b, "public_addresses");
1152 if (ctdb->public_addresses_file == NULL) {
1153 DBG_ERR("Out of memory\n");
1154 return -1;
1158 /* If the file doesn't exist then warn and do nothing */
1159 ok = file_exist(ctdb->public_addresses_file);
1160 if (!ok) {
1161 D_WARNING("Not loading public addresses, no file %s\n",
1162 ctdb->public_addresses_file);
1163 return 0;
1166 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1167 if (lines == NULL) {
1168 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1169 return -1;
1171 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1172 nlines--;
1175 for (i=0;i<nlines;i++) {
1176 unsigned mask;
1177 ctdb_sock_addr addr;
1178 const char *addrstr;
1179 const char *ifaces;
1180 char *tok, *line;
1181 int ret;
1183 line = lines[i];
1184 while ((*line == ' ') || (*line == '\t')) {
1185 line++;
1187 if (*line == '#') {
1188 continue;
1190 if (strcmp(line, "") == 0) {
1191 continue;
1193 tok = strtok(line, " \t");
1194 addrstr = tok;
1196 tok = strtok(NULL, " \t");
1197 if (tok == NULL) {
1198 D_ERR("No interface specified at line %u "
1199 "of public addresses file\n", i+1);
1200 talloc_free(lines);
1201 return -1;
1203 ifaces = tok;
1205 if (addrstr == NULL) {
1206 D_ERR("Badly formed line %u in public address list\n",
1207 i+1);
1208 talloc_free(lines);
1209 return -1;
1212 ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
1213 if (ret != 0) {
1214 D_ERR("Badly formed line %u in public address list\n",
1215 i+1);
1216 talloc_free(lines);
1217 return -1;
1220 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1221 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1222 talloc_free(lines);
1223 return -1;
1228 D_NOTICE("Loaded public addresses from %s\n",
1229 ctdb->public_addresses_file);
1231 talloc_free(lines);
1232 return 0;
1236 destroy a ctdb_client_ip structure
1238 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1240 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1241 ctdb_addr_to_str(&ip->addr),
1242 ntohs(ip->addr.ip.sin_port),
1243 ip->client_id));
1245 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1246 return 0;
1250 called by a client to inform us of a TCP connection that it is managing
1251 that should tickled with an ACK when IP takeover is done
1253 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1254 TDB_DATA indata)
1256 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1257 struct ctdb_connection *tcp_sock = NULL;
1258 struct ctdb_tcp_list *tcp;
1259 struct ctdb_connection t;
1260 int ret;
1261 TDB_DATA data;
1262 struct ctdb_client_ip *ip;
1263 struct ctdb_vnn *vnn;
1264 ctdb_sock_addr src_addr;
1265 ctdb_sock_addr dst_addr;
1267 /* If we don't have public IPs, tickles are useless */
1268 if (ctdb->vnn == NULL) {
1269 return 0;
1272 tcp_sock = (struct ctdb_connection *)indata.dptr;
1274 src_addr = tcp_sock->src;
1275 ctdb_canonicalize_ip(&src_addr, &tcp_sock->src);
1276 ZERO_STRUCT(src_addr);
1277 memcpy(&src_addr, &tcp_sock->src, sizeof(src_addr));
1279 dst_addr = tcp_sock->dst;
1280 ctdb_canonicalize_ip(&dst_addr, &tcp_sock->dst);
1281 ZERO_STRUCT(dst_addr);
1282 memcpy(&dst_addr, &tcp_sock->dst, sizeof(dst_addr));
1284 vnn = find_public_ip_vnn(ctdb, &dst_addr);
1285 if (vnn == NULL) {
1286 char *src_addr_str = NULL;
1287 char *dst_addr_str = NULL;
1289 switch (dst_addr.sa.sa_family) {
1290 case AF_INET:
1291 if (ntohl(dst_addr.ip.sin_addr.s_addr) == INADDR_LOOPBACK) {
1292 /* ignore ... */
1293 return 0;
1295 break;
1296 case AF_INET6:
1297 break;
1298 default:
1299 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n",
1300 dst_addr.sa.sa_family));
1301 return 0;
1304 src_addr_str = ctdb_sock_addr_to_string(client, &src_addr, false);
1305 dst_addr_str = ctdb_sock_addr_to_string(client, &dst_addr, false);
1306 DEBUG(DEBUG_ERR,(
1307 "Could not register TCP connection from "
1308 "%s to %s (not a public address) (port %u) "
1309 "(client_id %u pid %u).\n",
1310 src_addr_str,
1311 dst_addr_str,
1312 ctdb_sock_addr_port(&dst_addr),
1313 client_id, client->pid));
1314 TALLOC_FREE(src_addr_str);
1315 TALLOC_FREE(dst_addr_str);
1316 return 0;
1319 if (vnn->pnn != ctdb->pnn) {
1320 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1321 ctdb_addr_to_str(&dst_addr),
1322 client_id, client->pid));
1323 /* failing this call will tell smbd to die */
1324 return -1;
1327 ip = talloc(client, struct ctdb_client_ip);
1328 CTDB_NO_MEMORY(ctdb, ip);
1330 ip->ctdb = ctdb;
1331 ip->addr = dst_addr;
1332 ip->client_id = client_id;
1333 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1334 DLIST_ADD(ctdb->client_ip_list, ip);
1336 tcp = talloc(client, struct ctdb_tcp_list);
1337 CTDB_NO_MEMORY(ctdb, tcp);
1339 tcp->connection.src = tcp_sock->src;
1340 tcp->connection.dst = tcp_sock->dst;
1342 DLIST_ADD(client->tcp_list, tcp);
1344 t.src = tcp_sock->src;
1345 t.dst = tcp_sock->dst;
1347 data.dptr = (uint8_t *)&t;
1348 data.dsize = sizeof(t);
1350 switch (dst_addr.sa.sa_family) {
1351 case AF_INET:
1352 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1353 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1354 ctdb_addr_to_str(&tcp_sock->src),
1355 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1356 break;
1357 case AF_INET6:
1358 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1359 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1360 ctdb_addr_to_str(&tcp_sock->src),
1361 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1362 break;
1363 default:
1364 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n",
1365 dst_addr.sa.sa_family));
1369 /* tell all nodes about this tcp connection */
1370 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1371 CTDB_CONTROL_TCP_ADD,
1372 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1373 if (ret != 0) {
1374 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1375 return -1;
1378 return 0;
1382 find a tcp address on a list
1384 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1385 struct ctdb_connection *tcp)
1387 unsigned int i;
1389 if (array == NULL) {
1390 return NULL;
1393 for (i=0;i<array->num;i++) {
1394 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1395 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1396 return &array->connections[i];
1399 return NULL;
1405 called by a daemon to inform us of a TCP connection that one of its
1406 clients managing that should tickled with an ACK when IP takeover is
1407 done
1409 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1411 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1412 struct ctdb_tcp_array *tcparray;
1413 struct ctdb_connection tcp;
1414 struct ctdb_vnn *vnn;
1416 /* If we don't have public IPs, tickles are useless */
1417 if (ctdb->vnn == NULL) {
1418 return 0;
1421 vnn = find_public_ip_vnn(ctdb, &p->dst);
1422 if (vnn == NULL) {
1423 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1424 ctdb_addr_to_str(&p->dst)));
1426 return -1;
1430 tcparray = vnn->tcp_array;
1432 /* If this is the first tickle */
1433 if (tcparray == NULL) {
1434 tcparray = talloc(vnn, struct ctdb_tcp_array);
1435 CTDB_NO_MEMORY(ctdb, tcparray);
1436 vnn->tcp_array = tcparray;
1438 tcparray->num = 0;
1439 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1440 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1442 tcparray->connections[tcparray->num].src = p->src;
1443 tcparray->connections[tcparray->num].dst = p->dst;
1444 tcparray->num++;
1446 if (tcp_update_needed) {
1447 vnn->tcp_update_needed = true;
1449 return 0;
1453 /* Do we already have this tickle ?*/
1454 tcp.src = p->src;
1455 tcp.dst = p->dst;
1456 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1457 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1458 ctdb_addr_to_str(&tcp.dst),
1459 ntohs(tcp.dst.ip.sin_port),
1460 vnn->pnn));
1461 return 0;
1464 /* A new tickle, we must add it to the array */
1465 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1466 struct ctdb_connection,
1467 tcparray->num+1);
1468 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1470 tcparray->connections[tcparray->num].src = p->src;
1471 tcparray->connections[tcparray->num].dst = p->dst;
1472 tcparray->num++;
1474 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1475 ctdb_addr_to_str(&tcp.dst),
1476 ntohs(tcp.dst.ip.sin_port),
1477 vnn->pnn));
1479 if (tcp_update_needed) {
1480 vnn->tcp_update_needed = true;
1483 return 0;
1487 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1489 struct ctdb_connection *tcpp;
1491 if (vnn == NULL) {
1492 return;
1495 /* if the array is empty we cant remove it
1496 and we don't need to do anything
1498 if (vnn->tcp_array == NULL) {
1499 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1500 ctdb_addr_to_str(&conn->dst),
1501 ntohs(conn->dst.ip.sin_port)));
1502 return;
1506 /* See if we know this connection
1507 if we don't know this connection then we don't need to do anything
1509 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1510 if (tcpp == NULL) {
1511 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1512 ctdb_addr_to_str(&conn->dst),
1513 ntohs(conn->dst.ip.sin_port)));
1514 return;
1518 /* We need to remove this entry from the array.
1519 Instead of allocating a new array and copying data to it
1520 we cheat and just copy the last entry in the existing array
1521 to the entry that is to be removed and just shring the
1522 ->num field
1524 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1525 vnn->tcp_array->num--;
1527 /* If we deleted the last entry we also need to remove the entire array
1529 if (vnn->tcp_array->num == 0) {
1530 talloc_free(vnn->tcp_array);
1531 vnn->tcp_array = NULL;
1534 vnn->tcp_update_needed = true;
1536 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1537 ctdb_addr_to_str(&conn->src),
1538 ntohs(conn->src.ip.sin_port)));
1543 called by a daemon to inform us of a TCP connection that one of its
1544 clients used are no longer needed in the tickle database
1546 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1548 struct ctdb_vnn *vnn;
1549 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1551 /* If we don't have public IPs, tickles are useless */
1552 if (ctdb->vnn == NULL) {
1553 return 0;
1556 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1557 if (vnn == NULL) {
1558 DEBUG(DEBUG_ERR,
1559 (__location__ " unable to find public address %s\n",
1560 ctdb_addr_to_str(&conn->dst)));
1561 return 0;
1564 ctdb_remove_connection(vnn, conn);
1566 return 0;
1570 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1571 bool force);
1574 Called when another daemon starts - causes all tickles for all
1575 public addresses we are serving to be sent to the new node on the
1576 next check. This actually causes the tickles to be sent to the
1577 other node immediately. In case there is an error, the periodic
1578 timer will send the updates on timer event. This is simple and
1579 doesn't require careful error handling.
1581 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1583 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1584 (unsigned long) pnn));
1586 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1587 return 0;
1592 called when a client structure goes away - hook to remove
1593 elements from the tcp_list in all daemons
1595 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1597 while (client->tcp_list) {
1598 struct ctdb_vnn *vnn;
1599 struct ctdb_tcp_list *tcp = client->tcp_list;
1600 struct ctdb_connection *conn = &tcp->connection;
1602 DLIST_REMOVE(client->tcp_list, tcp);
1604 vnn = find_public_ip_vnn(client->ctdb,
1605 &conn->dst);
1606 if (vnn == NULL) {
1607 DEBUG(DEBUG_ERR,
1608 (__location__ " unable to find public address %s\n",
1609 ctdb_addr_to_str(&conn->dst)));
1610 continue;
1613 /* If the IP address is hosted on this node then
1614 * remove the connection. */
1615 if (vnn->pnn == client->ctdb->pnn) {
1616 ctdb_remove_connection(vnn, conn);
1619 /* Otherwise this function has been called because the
1620 * server IP address has been released to another node
1621 * and the client has exited. This means that we
1622 * should not delete the connection information. The
1623 * takeover node processes connections too. */
1628 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1630 struct ctdb_vnn *vnn, *next;
1631 int count = 0;
1633 if (ctdb_config.failover_disabled == 1) {
1634 return;
1637 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1638 /* vnn can be freed below in release_ip_post() */
1639 next = vnn->next;
1641 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1642 ctdb_vnn_unassign_iface(ctdb, vnn);
1643 continue;
1646 /* Don't allow multiple releases at once. Some code,
1647 * particularly ctdb_tickle_sentenced_connections() is
1648 * not re-entrant */
1649 if (vnn->update_in_flight) {
1650 DEBUG(DEBUG_WARNING,
1651 (__location__
1652 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1653 ctdb_addr_to_str(&vnn->public_address),
1654 vnn->public_netmask_bits,
1655 ctdb_vnn_iface_string(vnn)));
1656 continue;
1658 vnn->update_in_flight = true;
1660 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1661 ctdb_addr_to_str(&vnn->public_address),
1662 vnn->public_netmask_bits,
1663 ctdb_vnn_iface_string(vnn)));
1665 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1666 ctdb_vnn_iface_string(vnn),
1667 ctdb_addr_to_str(&vnn->public_address),
1668 vnn->public_netmask_bits);
1669 /* releaseip timeouts are converted to success, so to
1670 * detect failures just check if the IP address is
1671 * still there...
1673 if (ctdb_sys_have_ip(&vnn->public_address)) {
1674 DEBUG(DEBUG_ERR,
1675 (__location__
1676 " IP address %s not released\n",
1677 ctdb_addr_to_str(&vnn->public_address)));
1678 vnn->update_in_flight = false;
1679 continue;
1682 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1683 if (vnn != NULL) {
1684 vnn->update_in_flight = false;
1686 count++;
1689 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1694 get list of public IPs
1696 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1697 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1699 int i, num, len;
1700 struct ctdb_public_ip_list_old *ips;
1701 struct ctdb_vnn *vnn;
1702 bool only_available = false;
1704 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1705 only_available = true;
1708 /* count how many public ip structures we have */
1709 num = 0;
1710 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1711 num++;
1714 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1715 num*sizeof(struct ctdb_public_ip);
1716 ips = talloc_zero_size(outdata, len);
1717 CTDB_NO_MEMORY(ctdb, ips);
1719 i = 0;
1720 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1721 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1722 continue;
1724 ips->ips[i].pnn = vnn->pnn;
1725 ips->ips[i].addr = vnn->public_address;
1726 i++;
1728 ips->num = i;
1729 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1730 i*sizeof(struct ctdb_public_ip);
1732 outdata->dsize = len;
1733 outdata->dptr = (uint8_t *)ips;
1735 return 0;
1739 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1740 struct ctdb_req_control_old *c,
1741 TDB_DATA indata,
1742 TDB_DATA *outdata)
1744 int i, num, len;
1745 ctdb_sock_addr *addr;
1746 struct ctdb_public_ip_info_old *info;
1747 struct ctdb_vnn *vnn;
1748 struct vnn_interface *iface;
1750 addr = (ctdb_sock_addr *)indata.dptr;
1752 vnn = find_public_ip_vnn(ctdb, addr);
1753 if (vnn == NULL) {
1754 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1755 "'%s'not a public address\n",
1756 ctdb_addr_to_str(addr)));
1757 return -1;
1760 /* count how many public ip structures we have */
1761 num = 0;
1762 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1763 num++;
1766 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1767 num*sizeof(struct ctdb_iface);
1768 info = talloc_zero_size(outdata, len);
1769 CTDB_NO_MEMORY(ctdb, info);
1771 info->ip.addr = vnn->public_address;
1772 info->ip.pnn = vnn->pnn;
1773 info->active_idx = 0xFFFFFFFF;
1775 i = 0;
1776 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1777 struct ctdb_interface *cur;
1779 cur = iface->iface;
1780 if (vnn->iface == cur) {
1781 info->active_idx = i;
1783 strncpy(info->ifaces[i].name, cur->name,
1784 sizeof(info->ifaces[i].name));
1785 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1786 info->ifaces[i].link_state = cur->link_up;
1787 info->ifaces[i].references = cur->references;
1789 i++;
1791 info->num = i;
1792 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1793 i*sizeof(struct ctdb_iface);
1795 outdata->dsize = len;
1796 outdata->dptr = (uint8_t *)info;
1798 return 0;
1801 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1802 struct ctdb_req_control_old *c,
1803 TDB_DATA *outdata)
1805 int i, num, len;
1806 struct ctdb_iface_list_old *ifaces;
1807 struct ctdb_interface *cur;
1809 /* count how many public ip structures we have */
1810 num = 0;
1811 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1812 num++;
1815 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1816 num*sizeof(struct ctdb_iface);
1817 ifaces = talloc_zero_size(outdata, len);
1818 CTDB_NO_MEMORY(ctdb, ifaces);
1820 i = 0;
1821 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1822 strncpy(ifaces->ifaces[i].name, cur->name,
1823 sizeof(ifaces->ifaces[i].name));
1824 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1825 ifaces->ifaces[i].link_state = cur->link_up;
1826 ifaces->ifaces[i].references = cur->references;
1827 i++;
1829 ifaces->num = i;
1830 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1831 i*sizeof(struct ctdb_iface);
1833 outdata->dsize = len;
1834 outdata->dptr = (uint8_t *)ifaces;
1836 return 0;
1839 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1840 struct ctdb_req_control_old *c,
1841 TDB_DATA indata)
1843 struct ctdb_iface *info;
1844 struct ctdb_interface *iface;
1845 bool link_up = false;
1847 info = (struct ctdb_iface *)indata.dptr;
1849 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1850 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1851 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1852 len, len, info->name));
1853 return -1;
1856 switch (info->link_state) {
1857 case 0:
1858 link_up = false;
1859 break;
1860 case 1:
1861 link_up = true;
1862 break;
1863 default:
1864 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1865 (unsigned int)info->link_state));
1866 return -1;
1869 if (info->references != 0) {
1870 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1871 (unsigned int)info->references));
1872 return -1;
1875 iface = ctdb_find_iface(ctdb, info->name);
1876 if (iface == NULL) {
1877 return -1;
1880 if (link_up == iface->link_up) {
1881 return 0;
1884 DEBUG(DEBUG_ERR,
1885 ("iface[%s] has changed it's link status %s => %s\n",
1886 iface->name,
1887 iface->link_up?"up":"down",
1888 link_up?"up":"down"));
1890 iface->link_up = link_up;
1891 return 0;
1896 called by a daemon to inform us of the entire list of TCP tickles for
1897 a particular public address.
1898 this control should only be sent by the node that is currently serving
1899 that public address.
1901 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1903 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1904 struct ctdb_tcp_array *tcparray;
1905 struct ctdb_vnn *vnn;
1907 /* We must at least have tickles.num or else we cant verify the size
1908 of the received data blob
1910 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1911 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1912 return -1;
1915 /* verify that the size of data matches what we expect */
1916 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1917 + sizeof(struct ctdb_connection) * list->num) {
1918 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1919 return -1;
1922 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1923 ctdb_addr_to_str(&list->addr)));
1925 vnn = find_public_ip_vnn(ctdb, &list->addr);
1926 if (vnn == NULL) {
1927 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1928 ctdb_addr_to_str(&list->addr)));
1930 return 1;
1933 if (vnn->pnn == ctdb->pnn) {
1934 DEBUG(DEBUG_INFO,
1935 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1936 ctdb_addr_to_str(&list->addr)));
1937 return 0;
1940 /* remove any old ticklelist we might have */
1941 talloc_free(vnn->tcp_array);
1942 vnn->tcp_array = NULL;
1944 tcparray = talloc(vnn, struct ctdb_tcp_array);
1945 CTDB_NO_MEMORY(ctdb, tcparray);
1947 tcparray->num = list->num;
1949 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1950 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1952 memcpy(tcparray->connections, &list->connections[0],
1953 sizeof(struct ctdb_connection)*tcparray->num);
1955 /* We now have a new fresh tickle list array for this vnn */
1956 vnn->tcp_array = tcparray;
1958 return 0;
1962 called to return the full list of tickles for the puclic address associated
1963 with the provided vnn
1965 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1967 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1968 struct ctdb_tickle_list_old *list;
1969 struct ctdb_tcp_array *tcparray;
1970 unsigned int num, i;
1971 struct ctdb_vnn *vnn;
1972 unsigned port;
1974 vnn = find_public_ip_vnn(ctdb, addr);
1975 if (vnn == NULL) {
1976 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1977 ctdb_addr_to_str(addr)));
1979 return 1;
1982 port = ctdb_addr_to_port(addr);
1984 tcparray = vnn->tcp_array;
1985 num = 0;
1986 if (tcparray != NULL) {
1987 if (port == 0) {
1988 /* All connections */
1989 num = tcparray->num;
1990 } else {
1991 /* Count connections for port */
1992 for (i = 0; i < tcparray->num; i++) {
1993 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1994 num++;
2000 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2001 + sizeof(struct ctdb_connection) * num;
2003 outdata->dptr = talloc_size(outdata, outdata->dsize);
2004 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2005 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2007 list->addr = *addr;
2008 list->num = num;
2010 if (num == 0) {
2011 return 0;
2014 num = 0;
2015 for (i = 0; i < tcparray->num; i++) {
2016 if (port == 0 || \
2017 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2018 list->connections[num] = tcparray->connections[i];
2019 num++;
2023 return 0;
2028 set the list of all tcp tickles for a public address
2030 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2031 ctdb_sock_addr *addr,
2032 struct ctdb_tcp_array *tcparray)
2034 int ret, num;
2035 TDB_DATA data;
2036 struct ctdb_tickle_list_old *list;
2038 if (tcparray) {
2039 num = tcparray->num;
2040 } else {
2041 num = 0;
2044 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2045 sizeof(struct ctdb_connection) * num;
2046 data.dptr = talloc_size(ctdb, data.dsize);
2047 CTDB_NO_MEMORY(ctdb, data.dptr);
2049 list = (struct ctdb_tickle_list_old *)data.dptr;
2050 list->addr = *addr;
2051 list->num = num;
2052 if (tcparray) {
2053 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2056 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2057 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2058 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2059 if (ret != 0) {
2060 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2061 return -1;
2064 talloc_free(data.dptr);
2066 return ret;
2069 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2070 bool force)
2072 struct ctdb_vnn *vnn;
2073 int ret;
2075 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2076 /* we only send out updates for public addresses that
2077 we have taken over
2079 if (ctdb->pnn != vnn->pnn) {
2080 continue;
2083 /* We only send out the updates if we need to */
2084 if (!force && !vnn->tcp_update_needed) {
2085 continue;
2088 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2089 &vnn->public_address,
2090 vnn->tcp_array);
2091 if (ret != 0) {
2092 D_ERR("Failed to send the tickle update for ip %s\n",
2093 ctdb_addr_to_str(&vnn->public_address));
2094 vnn->tcp_update_needed = true;
2095 } else {
2096 D_INFO("Sent tickle update for ip %s\n",
2097 ctdb_addr_to_str(&vnn->public_address));
2098 vnn->tcp_update_needed = false;
2105 perform tickle updates if required
2107 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2108 struct tevent_timer *te,
2109 struct timeval t, void *private_data)
2111 struct ctdb_context *ctdb = talloc_get_type(
2112 private_data, struct ctdb_context);
2114 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2116 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2117 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2118 ctdb_update_tcp_tickles, ctdb);
2122 start periodic update of tcp tickles
2124 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2126 ctdb->tickle_update_context = talloc_new(ctdb);
2128 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2129 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2130 ctdb_update_tcp_tickles, ctdb);
2136 struct control_gratious_arp {
2137 struct ctdb_context *ctdb;
2138 ctdb_sock_addr addr;
2139 const char *iface;
2140 int count;
2144 send a control_gratuitous arp
2146 static void send_gratious_arp(struct tevent_context *ev,
2147 struct tevent_timer *te,
2148 struct timeval t, void *private_data)
2150 int ret;
2151 struct control_gratious_arp *arp = talloc_get_type(private_data,
2152 struct control_gratious_arp);
2154 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2155 if (ret != 0) {
2156 DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
2157 arp->iface, strerror(ret));
2161 arp->count++;
2162 if (arp->count == CTDB_ARP_REPEAT) {
2163 talloc_free(arp);
2164 return;
2167 tevent_add_timer(arp->ctdb->ev, arp,
2168 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2169 send_gratious_arp, arp);
2174 send a gratious arp
2176 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2178 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2179 struct control_gratious_arp *arp;
2181 /* verify the size of indata */
2182 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2183 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2184 (unsigned)indata.dsize,
2185 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2186 return -1;
2188 if (indata.dsize !=
2189 ( offsetof(struct ctdb_addr_info_old, iface)
2190 + gratious_arp->len ) ){
2192 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2193 "but should be %u bytes\n",
2194 (unsigned)indata.dsize,
2195 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2196 return -1;
2200 arp = talloc(ctdb, struct control_gratious_arp);
2201 CTDB_NO_MEMORY(ctdb, arp);
2203 arp->ctdb = ctdb;
2204 arp->addr = gratious_arp->addr;
2205 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2206 CTDB_NO_MEMORY(ctdb, arp->iface);
2207 arp->count = 0;
2209 tevent_add_timer(arp->ctdb->ev, arp,
2210 timeval_zero(), send_gratious_arp, arp);
2212 return 0;
2215 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2217 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2218 int ret;
2220 /* verify the size of indata */
2221 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2222 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2223 return -1;
2225 if (indata.dsize !=
2226 ( offsetof(struct ctdb_addr_info_old, iface)
2227 + pub->len ) ){
2229 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2230 "but should be %u bytes\n",
2231 (unsigned)indata.dsize,
2232 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2233 return -1;
2236 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2238 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2240 if (ret != 0) {
2241 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2242 return -1;
2245 return 0;
2248 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2250 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2251 struct ctdb_vnn *vnn;
2253 /* verify the size of indata */
2254 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2255 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2256 return -1;
2258 if (indata.dsize !=
2259 ( offsetof(struct ctdb_addr_info_old, iface)
2260 + pub->len ) ){
2262 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2263 "but should be %u bytes\n",
2264 (unsigned)indata.dsize,
2265 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2266 return -1;
2269 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2271 /* walk over all public addresses until we find a match */
2272 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2273 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2274 if (vnn->pnn == ctdb->pnn) {
2275 /* This IP is currently being hosted.
2276 * Defer the deletion until the next
2277 * takeover run. "ctdb reloadips" will
2278 * always cause a takeover run. "ctdb
2279 * delip" will now need an explicit
2280 * "ctdb ipreallocated" afterwards. */
2281 vnn->delete_pending = true;
2282 } else {
2283 /* This IP is not hosted on the
2284 * current node so just delete it
2285 * now. */
2286 do_delete_ip(ctdb, vnn);
2289 return 0;
2293 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2294 ctdb_addr_to_str(&pub->addr)));
2295 return -1;
2299 struct ipreallocated_callback_state {
2300 struct ctdb_req_control_old *c;
2303 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2304 int status, void *p)
2306 struct ipreallocated_callback_state *state =
2307 talloc_get_type(p, struct ipreallocated_callback_state);
2309 if (status != 0) {
2310 DEBUG(DEBUG_ERR,
2311 (" \"ipreallocated\" event script failed (status %d)\n",
2312 status));
2313 if (status == -ETIMEDOUT) {
2314 ctdb_ban_self(ctdb);
2318 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2319 talloc_free(state);
2322 /* A control to run the ipreallocated event */
2323 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2324 struct ctdb_req_control_old *c,
2325 bool *async_reply)
2327 int ret;
2328 struct ipreallocated_callback_state *state;
2330 state = talloc(ctdb, struct ipreallocated_callback_state);
2331 CTDB_NO_MEMORY(ctdb, state);
2333 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2335 ret = ctdb_event_script_callback(ctdb, state,
2336 ctdb_ipreallocated_callback, state,
2337 CTDB_EVENT_IPREALLOCATED,
2338 "%s", "");
2340 if (ret != 0) {
2341 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2342 talloc_free(state);
2343 return -1;
2346 /* tell the control that we will be reply asynchronously */
2347 state->c = talloc_steal(state, c);
2348 *async_reply = true;
2350 return 0;
2354 struct ctdb_reloadips_handle {
2355 struct ctdb_context *ctdb;
2356 struct ctdb_req_control_old *c;
2357 int status;
2358 int fd[2];
2359 pid_t child;
2360 struct tevent_fd *fde;
2363 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2365 if (h == h->ctdb->reload_ips) {
2366 h->ctdb->reload_ips = NULL;
2368 if (h->c != NULL) {
2369 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2370 h->c = NULL;
2372 ctdb_kill(h->ctdb, h->child, SIGKILL);
2373 return 0;
2376 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2377 struct tevent_timer *te,
2378 struct timeval t, void *private_data)
2380 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2382 talloc_free(h);
2385 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2386 struct tevent_fd *fde,
2387 uint16_t flags, void *private_data)
2389 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2391 char res;
2392 int ret;
2394 ret = sys_read(h->fd[0], &res, 1);
2395 if (ret < 1 || res != 0) {
2396 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2397 res = 1;
2399 h->status = res;
2401 talloc_free(h);
2404 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2406 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2407 struct ctdb_public_ip_list_old *ips;
2408 struct ctdb_vnn *vnn;
2409 struct client_async_data *async_data;
2410 struct timeval timeout;
2411 TDB_DATA data;
2412 struct ctdb_client_control_state *state;
2413 bool first_add;
2414 unsigned int i;
2415 int ret;
2417 CTDB_NO_MEMORY(ctdb, mem_ctx);
2419 /* Read IPs from local node */
2420 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2421 CTDB_CURRENT_NODE, mem_ctx, &ips);
2422 if (ret != 0) {
2423 DEBUG(DEBUG_ERR,
2424 ("Unable to fetch public IPs from local node\n"));
2425 talloc_free(mem_ctx);
2426 return -1;
2429 /* Read IPs file - this is safe since this is a child process */
2430 ctdb->vnn = NULL;
2431 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2432 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2433 talloc_free(mem_ctx);
2434 return -1;
2437 async_data = talloc_zero(mem_ctx, struct client_async_data);
2438 CTDB_NO_MEMORY(ctdb, async_data);
2440 /* Compare IPs between node and file for IPs to be deleted */
2441 for (i = 0; i < ips->num; i++) {
2442 /* */
2443 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2444 if (ctdb_same_ip(&vnn->public_address,
2445 &ips->ips[i].addr)) {
2446 /* IP is still in file */
2447 break;
2451 if (vnn == NULL) {
2452 /* Delete IP ips->ips[i] */
2453 struct ctdb_addr_info_old *pub;
2455 DEBUG(DEBUG_NOTICE,
2456 ("IP %s no longer configured, deleting it\n",
2457 ctdb_addr_to_str(&ips->ips[i].addr)));
2459 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2460 CTDB_NO_MEMORY(ctdb, pub);
2462 pub->addr = ips->ips[i].addr;
2463 pub->mask = 0;
2464 pub->len = 0;
2466 timeout = TAKEOVER_TIMEOUT();
2468 data.dsize = offsetof(struct ctdb_addr_info_old,
2469 iface) + pub->len;
2470 data.dptr = (uint8_t *)pub;
2472 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2473 CTDB_CONTROL_DEL_PUBLIC_IP,
2474 0, data, async_data,
2475 &timeout, NULL);
2476 if (state == NULL) {
2477 DEBUG(DEBUG_ERR,
2478 (__location__
2479 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2480 goto failed;
2483 ctdb_client_async_add(async_data, state);
2487 /* Compare IPs between node and file for IPs to be added */
2488 first_add = true;
2489 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2490 for (i = 0; i < ips->num; i++) {
2491 if (ctdb_same_ip(&vnn->public_address,
2492 &ips->ips[i].addr)) {
2493 /* IP already on node */
2494 break;
2497 if (i == ips->num) {
2498 /* Add IP ips->ips[i] */
2499 struct ctdb_addr_info_old *pub;
2500 const char *ifaces = NULL;
2501 uint32_t len;
2502 struct vnn_interface *iface = NULL;
2504 DEBUG(DEBUG_NOTICE,
2505 ("New IP %s configured, adding it\n",
2506 ctdb_addr_to_str(&vnn->public_address)));
2507 if (first_add) {
2508 uint32_t pnn = ctdb_get_pnn(ctdb);
2510 data.dsize = sizeof(pnn);
2511 data.dptr = (uint8_t *)&pnn;
2513 ret = ctdb_client_send_message(
2514 ctdb,
2515 CTDB_BROADCAST_CONNECTED,
2516 CTDB_SRVID_REBALANCE_NODE,
2517 data);
2518 if (ret != 0) {
2519 DEBUG(DEBUG_WARNING,
2520 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2523 first_add = false;
2526 ifaces = vnn->ifaces->iface->name;
2527 iface = vnn->ifaces->next;
2528 while (iface != NULL) {
2529 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2530 iface->iface->name);
2531 iface = iface->next;
2534 len = strlen(ifaces) + 1;
2535 pub = talloc_zero_size(mem_ctx,
2536 offsetof(struct ctdb_addr_info_old, iface) + len);
2537 CTDB_NO_MEMORY(ctdb, pub);
2539 pub->addr = vnn->public_address;
2540 pub->mask = vnn->public_netmask_bits;
2541 pub->len = len;
2542 memcpy(&pub->iface[0], ifaces, pub->len);
2544 timeout = TAKEOVER_TIMEOUT();
2546 data.dsize = offsetof(struct ctdb_addr_info_old,
2547 iface) + pub->len;
2548 data.dptr = (uint8_t *)pub;
2550 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2551 CTDB_CONTROL_ADD_PUBLIC_IP,
2552 0, data, async_data,
2553 &timeout, NULL);
2554 if (state == NULL) {
2555 DEBUG(DEBUG_ERR,
2556 (__location__
2557 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2558 goto failed;
2561 ctdb_client_async_add(async_data, state);
2565 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2566 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2567 goto failed;
2570 talloc_free(mem_ctx);
2571 return 0;
2573 failed:
2574 talloc_free(mem_ctx);
2575 return -1;
2578 /* This control is sent to force the node to re-read the public addresses file
2579 and drop any addresses we should nnot longer host, and add new addresses
2580 that we are now able to host
2582 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2584 struct ctdb_reloadips_handle *h;
2585 pid_t parent = getpid();
2587 if (ctdb->reload_ips != NULL) {
2588 talloc_free(ctdb->reload_ips);
2589 ctdb->reload_ips = NULL;
2592 h = talloc(ctdb, struct ctdb_reloadips_handle);
2593 CTDB_NO_MEMORY(ctdb, h);
2594 h->ctdb = ctdb;
2595 h->c = NULL;
2596 h->status = -1;
2598 if (pipe(h->fd) == -1) {
2599 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2600 talloc_free(h);
2601 return -1;
2604 h->child = ctdb_fork(ctdb);
2605 if (h->child == (pid_t)-1) {
2606 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2607 close(h->fd[0]);
2608 close(h->fd[1]);
2609 talloc_free(h);
2610 return -1;
2613 /* child process */
2614 if (h->child == 0) {
2615 signed char res = 0;
2617 close(h->fd[0]);
2619 prctl_set_comment("ctdb_reloadips");
2620 if (switch_from_server_to_client(ctdb) != 0) {
2621 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2622 res = -1;
2623 } else {
2624 res = ctdb_reloadips_child(ctdb);
2625 if (res != 0) {
2626 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2630 sys_write(h->fd[1], &res, 1);
2631 ctdb_wait_for_process_to_exit(parent);
2632 _exit(0);
2635 h->c = talloc_steal(h, c);
2637 close(h->fd[1]);
2638 set_close_on_exec(h->fd[0]);
2640 talloc_set_destructor(h, ctdb_reloadips_destructor);
2643 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2644 ctdb_reloadips_child_handler, (void *)h);
2645 tevent_fd_set_auto_close(h->fde);
2647 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2648 ctdb_reloadips_timeout_event, h);
2650 /* we reply later */
2651 *async_reply = true;
2652 return 0;