VERSION: Disable GIT_SNAPSHOT for the 4.8.12 release.
[Samba.git] / ctdb / server / ctdb_takeover.c
blobcd240875ba2e4cc48424de59b2fe5e93f9855bb6
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/common.h"
43 #include "common/logging.h"
45 #include "server/ipalloc.h"
47 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
49 #define CTDB_ARP_INTERVAL 1
50 #define CTDB_ARP_REPEAT 3
52 struct ctdb_interface {
53 struct ctdb_interface *prev, *next;
54 const char *name;
55 bool link_up;
56 uint32_t references;
59 struct vnn_interface {
60 struct vnn_interface *prev, *next;
61 struct ctdb_interface *iface;
64 /* state associated with a public ip address */
65 struct ctdb_vnn {
66 struct ctdb_vnn *prev, *next;
68 struct ctdb_interface *iface;
69 struct vnn_interface *ifaces;
70 ctdb_sock_addr public_address;
71 uint8_t public_netmask_bits;
73 /* the node number that is serving this public address, if any.
74 If no node serves this ip it is set to -1 */
75 int32_t pnn;
77 /* List of clients to tickle for this public address */
78 struct ctdb_tcp_array *tcp_array;
80 /* whether we need to update the other nodes with changes to our list
81 of connected clients */
82 bool tcp_update_needed;
84 /* a context to hang sending gratious arp events off */
85 TALLOC_CTX *takeover_ctx;
87 /* Set to true any time an update to this VNN is in flight.
88 This helps to avoid races. */
89 bool update_in_flight;
91 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
92 * address then this flag is set. It will be deleted in the
93 * release IP callback. */
94 bool delete_pending;
97 static const char *iface_string(const struct ctdb_interface *iface)
99 return (iface != NULL ? iface->name : "__none__");
102 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
104 return iface_string(vnn->iface);
107 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
108 const char *iface);
110 static struct ctdb_interface *
111 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
113 struct ctdb_interface *i;
115 if (strlen(iface) > CTDB_IFACE_SIZE) {
116 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
117 return NULL;
120 /* Verify that we don't have an entry for this ip yet */
121 i = ctdb_find_iface(ctdb, iface);
122 if (i != NULL) {
123 return i;
126 /* create a new structure for this interface */
127 i = talloc_zero(ctdb, struct ctdb_interface);
128 if (i == NULL) {
129 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
130 return NULL;
132 i->name = talloc_strdup(i, iface);
133 if (i->name == NULL) {
134 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
135 talloc_free(i);
136 return NULL;
139 i->link_up = true;
141 DLIST_ADD(ctdb->ifaces, i);
143 return i;
146 static bool vnn_has_interface(struct ctdb_vnn *vnn,
147 const struct ctdb_interface *iface)
149 struct vnn_interface *i;
151 for (i = vnn->ifaces; i != NULL; i = i->next) {
152 if (iface == i->iface) {
153 return true;
157 return false;
160 /* If any interfaces now have no possible IPs then delete them. This
161 * implementation is naive (i.e. simple) rather than clever
162 * (i.e. complex). Given that this is run on delip and that operation
163 * is rare, this doesn't need to be efficient - it needs to be
164 * foolproof. One alternative is reference counting, where the logic
165 * is distributed and can, therefore, be broken in multiple places.
166 * Another alternative is to build a red-black tree of interfaces that
167 * can have addresses (by walking ctdb->vnn once) and then walking
168 * ctdb->ifaces once and deleting those not in the tree. Let's go to
169 * one of those if the naive implementation causes problems... :-)
171 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
172 struct ctdb_vnn *vnn)
174 struct ctdb_interface *i, *next;
176 /* For each interface, check if there's an IP using it. */
177 for (i = ctdb->ifaces; i != NULL; i = next) {
178 struct ctdb_vnn *tv;
179 bool found;
180 next = i->next;
182 /* Only consider interfaces named in the given VNN. */
183 if (!vnn_has_interface(vnn, i)) {
184 continue;
187 /* Search for a vnn with this interface. */
188 found = false;
189 for (tv=ctdb->vnn; tv; tv=tv->next) {
190 if (vnn_has_interface(tv, i)) {
191 found = true;
192 break;
196 if (!found) {
197 /* None of the VNNs are using this interface. */
198 DLIST_REMOVE(ctdb->ifaces, i);
199 talloc_free(i);
205 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
206 const char *iface)
208 struct ctdb_interface *i;
210 for (i=ctdb->ifaces;i;i=i->next) {
211 if (strcmp(i->name, iface) == 0) {
212 return i;
216 return NULL;
219 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
220 struct ctdb_vnn *vnn)
222 struct vnn_interface *i;
223 struct ctdb_interface *cur = NULL;
224 struct ctdb_interface *best = NULL;
226 for (i = vnn->ifaces; i != NULL; i = i->next) {
228 cur = i->iface;
230 if (!cur->link_up) {
231 continue;
234 if (best == NULL) {
235 best = cur;
236 continue;
239 if (cur->references < best->references) {
240 best = cur;
241 continue;
245 return best;
248 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
249 struct ctdb_vnn *vnn)
251 struct ctdb_interface *best = NULL;
253 if (vnn->iface) {
254 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
255 "still assigned to iface '%s'\n",
256 ctdb_addr_to_str(&vnn->public_address),
257 ctdb_vnn_iface_string(vnn)));
258 return 0;
261 best = ctdb_vnn_best_iface(ctdb, vnn);
262 if (best == NULL) {
263 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
264 "cannot assign to iface any iface\n",
265 ctdb_addr_to_str(&vnn->public_address)));
266 return -1;
269 vnn->iface = best;
270 best->references++;
271 vnn->pnn = ctdb->pnn;
273 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
274 "now assigned to iface '%s' refs[%d]\n",
275 ctdb_addr_to_str(&vnn->public_address),
276 ctdb_vnn_iface_string(vnn),
277 best->references));
278 return 0;
281 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
282 struct ctdb_vnn *vnn)
284 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
285 "now unassigned (old iface '%s' refs[%d])\n",
286 ctdb_addr_to_str(&vnn->public_address),
287 ctdb_vnn_iface_string(vnn),
288 vnn->iface?vnn->iface->references:0));
289 if (vnn->iface) {
290 vnn->iface->references--;
292 vnn->iface = NULL;
293 if (vnn->pnn == ctdb->pnn) {
294 vnn->pnn = -1;
298 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
299 struct ctdb_vnn *vnn)
301 struct vnn_interface *i;
303 /* Nodes that are not RUNNING can not host IPs */
304 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
305 return false;
308 if (vnn->delete_pending) {
309 return false;
312 if (vnn->iface && vnn->iface->link_up) {
313 return true;
316 for (i = vnn->ifaces; i != NULL; i = i->next) {
317 if (i->iface->link_up) {
318 return true;
322 return false;
325 struct ctdb_takeover_arp {
326 struct ctdb_context *ctdb;
327 uint32_t count;
328 ctdb_sock_addr addr;
329 struct ctdb_tcp_array *tcparray;
330 struct ctdb_vnn *vnn;
335 lists of tcp endpoints
337 struct ctdb_tcp_list {
338 struct ctdb_tcp_list *prev, *next;
339 struct ctdb_connection connection;
343 list of clients to kill on IP release
345 struct ctdb_client_ip {
346 struct ctdb_client_ip *prev, *next;
347 struct ctdb_context *ctdb;
348 ctdb_sock_addr addr;
349 uint32_t client_id;
354 send a gratuitous arp
356 static void ctdb_control_send_arp(struct tevent_context *ev,
357 struct tevent_timer *te,
358 struct timeval t, void *private_data)
360 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
361 struct ctdb_takeover_arp);
362 int i, ret;
363 struct ctdb_tcp_array *tcparray;
364 const char *iface = ctdb_vnn_iface_string(arp->vnn);
366 ret = ctdb_sys_send_arp(&arp->addr, iface);
367 if (ret != 0) {
368 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
369 iface, strerror(errno)));
372 tcparray = arp->tcparray;
373 if (tcparray) {
374 for (i=0;i<tcparray->num;i++) {
375 struct ctdb_connection *tcon;
377 tcon = &tcparray->connections[i];
378 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
379 (unsigned)ntohs(tcon->dst.ip.sin_port),
380 ctdb_addr_to_str(&tcon->src),
381 (unsigned)ntohs(tcon->src.ip.sin_port)));
382 ret = ctdb_sys_send_tcp(
383 &tcon->src,
384 &tcon->dst,
385 0, 0, 0);
386 if (ret != 0) {
387 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
388 ctdb_addr_to_str(&tcon->src)));
393 arp->count++;
395 if (arp->count == CTDB_ARP_REPEAT) {
396 talloc_free(arp);
397 return;
400 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
401 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
402 ctdb_control_send_arp, arp);
405 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
406 struct ctdb_vnn *vnn)
408 struct ctdb_takeover_arp *arp;
409 struct ctdb_tcp_array *tcparray;
411 if (!vnn->takeover_ctx) {
412 vnn->takeover_ctx = talloc_new(vnn);
413 if (!vnn->takeover_ctx) {
414 return -1;
418 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
419 if (!arp) {
420 return -1;
423 arp->ctdb = ctdb;
424 arp->addr = vnn->public_address;
425 arp->vnn = vnn;
427 tcparray = vnn->tcp_array;
428 if (tcparray) {
429 /* add all of the known tcp connections for this IP to the
430 list of tcp connections to send tickle acks for */
431 arp->tcparray = talloc_steal(arp, tcparray);
433 vnn->tcp_array = NULL;
434 vnn->tcp_update_needed = true;
437 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
438 timeval_zero(), ctdb_control_send_arp, arp);
440 return 0;
443 struct ctdb_do_takeip_state {
444 struct ctdb_req_control_old *c;
445 struct ctdb_vnn *vnn;
449 called when takeip event finishes
451 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
452 void *private_data)
454 struct ctdb_do_takeip_state *state =
455 talloc_get_type(private_data, struct ctdb_do_takeip_state);
456 int32_t ret;
457 TDB_DATA data;
459 if (status != 0) {
460 if (status == -ETIME) {
461 ctdb_ban_self(ctdb);
463 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
464 ctdb_addr_to_str(&state->vnn->public_address),
465 ctdb_vnn_iface_string(state->vnn)));
466 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
468 talloc_free(state);
469 return;
472 if (ctdb->do_checkpublicip) {
474 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
475 if (ret != 0) {
476 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
477 talloc_free(state);
478 return;
483 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
484 data.dsize = strlen((char *)data.dptr) + 1;
485 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
487 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
490 /* the control succeeded */
491 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
492 talloc_free(state);
493 return;
496 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
498 state->vnn->update_in_flight = false;
499 return 0;
503 take over an ip address
505 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
506 struct ctdb_req_control_old *c,
507 struct ctdb_vnn *vnn)
509 int ret;
510 struct ctdb_do_takeip_state *state;
512 if (vnn->update_in_flight) {
513 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
514 "update for this IP already in flight\n",
515 ctdb_addr_to_str(&vnn->public_address),
516 vnn->public_netmask_bits));
517 return -1;
520 ret = ctdb_vnn_assign_iface(ctdb, vnn);
521 if (ret != 0) {
522 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
523 "assign a usable interface\n",
524 ctdb_addr_to_str(&vnn->public_address),
525 vnn->public_netmask_bits));
526 return -1;
529 state = talloc(vnn, struct ctdb_do_takeip_state);
530 CTDB_NO_MEMORY(ctdb, state);
532 state->c = NULL;
533 state->vnn = vnn;
535 vnn->update_in_flight = true;
536 talloc_set_destructor(state, ctdb_takeip_destructor);
538 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
539 ctdb_addr_to_str(&vnn->public_address),
540 vnn->public_netmask_bits,
541 ctdb_vnn_iface_string(vnn)));
543 ret = ctdb_event_script_callback(ctdb,
544 state,
545 ctdb_do_takeip_callback,
546 state,
547 CTDB_EVENT_TAKE_IP,
548 "%s %s %u",
549 ctdb_vnn_iface_string(vnn),
550 ctdb_addr_to_str(&vnn->public_address),
551 vnn->public_netmask_bits);
553 if (ret != 0) {
554 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
555 ctdb_addr_to_str(&vnn->public_address),
556 ctdb_vnn_iface_string(vnn)));
557 talloc_free(state);
558 return -1;
561 state->c = talloc_steal(ctdb, c);
562 return 0;
565 struct ctdb_do_updateip_state {
566 struct ctdb_req_control_old *c;
567 struct ctdb_interface *old;
568 struct ctdb_vnn *vnn;
572 called when updateip event finishes
574 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
575 void *private_data)
577 struct ctdb_do_updateip_state *state =
578 talloc_get_type(private_data, struct ctdb_do_updateip_state);
580 if (status != 0) {
581 if (status == -ETIME) {
582 ctdb_ban_self(ctdb);
584 DEBUG(DEBUG_ERR,
585 ("Failed update of IP %s from interface %s to %s\n",
586 ctdb_addr_to_str(&state->vnn->public_address),
587 iface_string(state->old),
588 ctdb_vnn_iface_string(state->vnn)));
591 * All we can do is reset the old interface
592 * and let the next run fix it
594 ctdb_vnn_unassign_iface(ctdb, state->vnn);
595 state->vnn->iface = state->old;
596 state->vnn->iface->references++;
598 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
599 talloc_free(state);
600 return;
603 /* the control succeeded */
604 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
605 talloc_free(state);
606 return;
609 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
611 state->vnn->update_in_flight = false;
612 return 0;
616 update (move) an ip address
618 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
619 struct ctdb_req_control_old *c,
620 struct ctdb_vnn *vnn)
622 int ret;
623 struct ctdb_do_updateip_state *state;
624 struct ctdb_interface *old = vnn->iface;
625 const char *old_name = iface_string(old);
626 const char *new_name;
628 if (vnn->update_in_flight) {
629 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
630 "update for this IP already in flight\n",
631 ctdb_addr_to_str(&vnn->public_address),
632 vnn->public_netmask_bits));
633 return -1;
636 ctdb_vnn_unassign_iface(ctdb, vnn);
637 ret = ctdb_vnn_assign_iface(ctdb, vnn);
638 if (ret != 0) {
639 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
640 "assign a usable interface (old iface '%s')\n",
641 ctdb_addr_to_str(&vnn->public_address),
642 vnn->public_netmask_bits,
643 old_name));
644 return -1;
647 if (old == vnn->iface) {
648 /* A benign update from one interface onto itself.
649 * no need to run the eventscripts in this case, just return
650 * success.
652 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
653 return 0;
656 state = talloc(vnn, struct ctdb_do_updateip_state);
657 CTDB_NO_MEMORY(ctdb, state);
659 state->c = NULL;
660 state->old = old;
661 state->vnn = vnn;
663 vnn->update_in_flight = true;
664 talloc_set_destructor(state, ctdb_updateip_destructor);
666 new_name = ctdb_vnn_iface_string(vnn);
667 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
668 "interface %s to %s\n",
669 ctdb_addr_to_str(&vnn->public_address),
670 vnn->public_netmask_bits,
671 old_name,
672 new_name));
674 ret = ctdb_event_script_callback(ctdb,
675 state,
676 ctdb_do_updateip_callback,
677 state,
678 CTDB_EVENT_UPDATE_IP,
679 "%s %s %s %u",
680 old_name,
681 new_name,
682 ctdb_addr_to_str(&vnn->public_address),
683 vnn->public_netmask_bits);
684 if (ret != 0) {
685 DEBUG(DEBUG_ERR,
686 ("Failed update IP %s from interface %s to %s\n",
687 ctdb_addr_to_str(&vnn->public_address),
688 old_name, new_name));
689 talloc_free(state);
690 return -1;
693 state->c = talloc_steal(ctdb, c);
694 return 0;
698 Find the vnn of the node that has a public ip address
699 returns -1 if the address is not known as a public address
701 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
703 struct ctdb_vnn *vnn;
705 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
706 if (ctdb_same_ip(&vnn->public_address, addr)) {
707 return vnn;
711 return NULL;
715 take over an ip address
717 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
718 struct ctdb_req_control_old *c,
719 TDB_DATA indata,
720 bool *async_reply)
722 int ret;
723 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
724 struct ctdb_vnn *vnn;
725 bool have_ip = false;
726 bool do_updateip = false;
727 bool do_takeip = false;
728 struct ctdb_interface *best_iface = NULL;
730 if (pip->pnn != ctdb->pnn) {
731 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
732 "with pnn %d, but we're node %d\n",
733 ctdb_addr_to_str(&pip->addr),
734 pip->pnn, ctdb->pnn));
735 return -1;
738 /* update out vnn list */
739 vnn = find_public_ip_vnn(ctdb, &pip->addr);
740 if (vnn == NULL) {
741 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
742 ctdb_addr_to_str(&pip->addr)));
743 return 0;
746 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
747 have_ip = ctdb_sys_have_ip(&pip->addr);
749 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
750 if (best_iface == NULL) {
751 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
752 "a usable interface (old %s, have_ip %d)\n",
753 ctdb_addr_to_str(&vnn->public_address),
754 vnn->public_netmask_bits,
755 ctdb_vnn_iface_string(vnn),
756 have_ip));
757 return -1;
760 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
761 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762 "and we have it on iface[%s], but it was assigned to node %d"
763 "and we are node %d, banning ourself\n",
764 ctdb_addr_to_str(&vnn->public_address),
765 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
766 ctdb_ban_self(ctdb);
767 return -1;
770 if (vnn->pnn == -1 && have_ip) {
771 /* This will cause connections to be reset and
772 * reestablished. However, this is a very unusual
773 * situation and doing this will completely repair the
774 * inconsistency in the VNN.
776 DEBUG(DEBUG_WARNING,
777 (__location__
778 " Doing updateip for IP %s already on an interface\n",
779 ctdb_addr_to_str(&vnn->public_address)));
780 do_updateip = true;
783 if (vnn->iface) {
784 if (vnn->iface != best_iface) {
785 if (!vnn->iface->link_up) {
786 do_updateip = true;
787 } else if (vnn->iface->references > (best_iface->references + 1)) {
788 /* only move when the rebalance gains something */
789 do_updateip = true;
794 if (!have_ip) {
795 if (do_updateip) {
796 ctdb_vnn_unassign_iface(ctdb, vnn);
797 do_updateip = false;
799 do_takeip = true;
802 if (do_takeip) {
803 ret = ctdb_do_takeip(ctdb, c, vnn);
804 if (ret != 0) {
805 return -1;
807 } else if (do_updateip) {
808 ret = ctdb_do_updateip(ctdb, c, vnn);
809 if (ret != 0) {
810 return -1;
812 } else {
814 * The interface is up and the kernel known the ip
815 * => do nothing
817 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
818 ctdb_addr_to_str(&pip->addr),
819 vnn->public_netmask_bits,
820 ctdb_vnn_iface_string(vnn)));
821 return 0;
824 /* tell ctdb_control.c that we will be replying asynchronously */
825 *async_reply = true;
827 return 0;
830 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
832 DLIST_REMOVE(ctdb->vnn, vnn);
833 ctdb_vnn_unassign_iface(ctdb, vnn);
834 ctdb_remove_orphaned_ifaces(ctdb, vnn);
835 talloc_free(vnn);
838 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
839 struct ctdb_vnn *vnn,
840 ctdb_sock_addr *addr)
842 TDB_DATA data;
844 /* Send a message to all clients of this node telling them
845 * that the cluster has been reconfigured and they should
846 * close any connections on this IP address
848 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
849 data.dsize = strlen((char *)data.dptr)+1;
850 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
851 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
853 ctdb_vnn_unassign_iface(ctdb, vnn);
855 /* Process the IP if it has been marked for deletion */
856 if (vnn->delete_pending) {
857 do_delete_ip(ctdb, vnn);
858 return NULL;
861 return vnn;
864 struct release_ip_callback_state {
865 struct ctdb_req_control_old *c;
866 ctdb_sock_addr *addr;
867 struct ctdb_vnn *vnn;
868 uint32_t target_pnn;
872 called when releaseip event finishes
874 static void release_ip_callback(struct ctdb_context *ctdb, int status,
875 void *private_data)
877 struct release_ip_callback_state *state =
878 talloc_get_type(private_data, struct release_ip_callback_state);
880 if (status == -ETIME) {
881 ctdb_ban_self(ctdb);
884 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
885 if (ctdb_sys_have_ip(state->addr)) {
886 DEBUG(DEBUG_ERR,
887 ("IP %s still hosted during release IP callback, failing\n",
888 ctdb_addr_to_str(state->addr)));
889 ctdb_request_control_reply(ctdb, state->c,
890 NULL, -1, NULL);
891 talloc_free(state);
892 return;
896 state->vnn->pnn = state->target_pnn;
897 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
899 /* the control succeeded */
900 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
901 talloc_free(state);
904 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
906 if (state->vnn != NULL) {
907 state->vnn->update_in_flight = false;
909 return 0;
913 release an ip address
915 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
916 struct ctdb_req_control_old *c,
917 TDB_DATA indata,
918 bool *async_reply)
920 int ret;
921 struct release_ip_callback_state *state;
922 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
923 struct ctdb_vnn *vnn;
924 const char *iface;
926 /* update our vnn list */
927 vnn = find_public_ip_vnn(ctdb, &pip->addr);
928 if (vnn == NULL) {
929 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
930 ctdb_addr_to_str(&pip->addr)));
931 return 0;
934 /* stop any previous arps */
935 talloc_free(vnn->takeover_ctx);
936 vnn->takeover_ctx = NULL;
938 /* RELEASE_IP controls are sent to all nodes that should not
939 * be hosting a particular IP. This serves 2 purposes. The
940 * first is to help resolve any inconsistencies. If a node
941 * does unexpectly host an IP then it will be released. The
942 * 2nd is to use a "redundant release" to tell non-takeover
943 * nodes where an IP is moving to. This is how "ctdb ip" can
944 * report the (likely) location of an IP by only asking the
945 * local node. Redundant releases need to update the PNN but
946 * are otherwise ignored.
948 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
949 if (!ctdb_sys_have_ip(&pip->addr)) {
950 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
951 ctdb_addr_to_str(&pip->addr),
952 vnn->public_netmask_bits,
953 ctdb_vnn_iface_string(vnn)));
954 vnn->pnn = pip->pnn;
955 ctdb_vnn_unassign_iface(ctdb, vnn);
956 return 0;
958 } else {
959 if (vnn->iface == NULL) {
960 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
961 ctdb_addr_to_str(&pip->addr),
962 vnn->public_netmask_bits));
963 vnn->pnn = pip->pnn;
964 return 0;
968 /* There is a potential race between take_ip and us because we
969 * update the VNN via a callback that run when the
970 * eventscripts have been run. Avoid the race by allowing one
971 * update to be in flight at a time.
973 if (vnn->update_in_flight) {
974 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
975 "update for this IP already in flight\n",
976 ctdb_addr_to_str(&vnn->public_address),
977 vnn->public_netmask_bits));
978 return -1;
981 iface = ctdb_vnn_iface_string(vnn);
983 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
984 ctdb_addr_to_str(&pip->addr),
985 vnn->public_netmask_bits,
986 iface,
987 pip->pnn));
989 state = talloc(ctdb, struct release_ip_callback_state);
990 if (state == NULL) {
991 ctdb_set_error(ctdb, "Out of memory at %s:%d",
992 __FILE__, __LINE__);
993 return -1;
996 state->c = NULL;
997 state->addr = talloc(state, ctdb_sock_addr);
998 if (state->addr == NULL) {
999 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1000 __FILE__, __LINE__);
1001 talloc_free(state);
1002 return -1;
1004 *state->addr = pip->addr;
1005 state->target_pnn = pip->pnn;
1006 state->vnn = vnn;
1008 vnn->update_in_flight = true;
1009 talloc_set_destructor(state, ctdb_releaseip_destructor);
1011 ret = ctdb_event_script_callback(ctdb,
1012 state, release_ip_callback, state,
1013 CTDB_EVENT_RELEASE_IP,
1014 "%s %s %u",
1015 iface,
1016 ctdb_addr_to_str(&pip->addr),
1017 vnn->public_netmask_bits);
1018 if (ret != 0) {
1019 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1020 ctdb_addr_to_str(&pip->addr),
1021 ctdb_vnn_iface_string(vnn)));
1022 talloc_free(state);
1023 return -1;
1026 /* tell the control that we will be reply asynchronously */
1027 *async_reply = true;
1028 state->c = talloc_steal(state, c);
1029 return 0;
1032 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1033 ctdb_sock_addr *addr,
1034 unsigned mask, const char *ifaces,
1035 bool check_address)
1037 struct ctdb_vnn *vnn;
1038 char *tmp;
1039 const char *iface;
1041 /* Verify that we don't have an entry for this IP yet */
1042 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1043 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1044 DEBUG(DEBUG_ERR,
1045 ("Duplicate public IP address '%s'\n",
1046 ctdb_addr_to_str(addr)));
1047 return -1;
1051 /* Create a new VNN structure for this IP address */
1052 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1053 if (vnn == NULL) {
1054 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1055 return -1;
1057 tmp = talloc_strdup(vnn, ifaces);
1058 if (tmp == NULL) {
1059 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1060 talloc_free(vnn);
1061 return -1;
1063 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1064 struct vnn_interface *vnn_iface;
1065 struct ctdb_interface *i;
1066 if (!ctdb_sys_check_iface_exists(iface)) {
1067 DEBUG(DEBUG_ERR,
1068 ("Unknown interface %s for public address %s\n",
1069 iface, ctdb_addr_to_str(addr)));
1070 talloc_free(vnn);
1071 return -1;
1074 i = ctdb_add_local_iface(ctdb, iface);
1075 if (i == NULL) {
1076 DEBUG(DEBUG_ERR,
1077 ("Failed to add interface '%s' "
1078 "for public address %s\n",
1079 iface, ctdb_addr_to_str(addr)));
1080 talloc_free(vnn);
1081 return -1;
1084 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1085 if (vnn_iface == NULL) {
1086 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1087 talloc_free(vnn);
1088 return -1;
1091 vnn_iface->iface = i;
1092 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1094 talloc_free(tmp);
1095 vnn->public_address = *addr;
1096 vnn->public_netmask_bits = mask;
1097 vnn->pnn = -1;
1099 DLIST_ADD(ctdb->vnn, vnn);
1101 return 0;
1105 setup the public address lists from a file
1107 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1109 char **lines;
1110 int nlines;
1111 int i;
1113 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1114 if (lines == NULL) {
1115 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1116 return -1;
1118 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1119 nlines--;
1122 for (i=0;i<nlines;i++) {
1123 unsigned mask;
1124 ctdb_sock_addr addr;
1125 const char *addrstr;
1126 const char *ifaces;
1127 char *tok, *line;
1129 line = lines[i];
1130 while ((*line == ' ') || (*line == '\t')) {
1131 line++;
1133 if (*line == '#') {
1134 continue;
1136 if (strcmp(line, "") == 0) {
1137 continue;
1139 tok = strtok(line, " \t");
1140 addrstr = tok;
1141 tok = strtok(NULL, " \t");
1142 if (tok == NULL) {
1143 if (NULL == ctdb->default_public_interface) {
1144 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1145 i+1));
1146 talloc_free(lines);
1147 return -1;
1149 ifaces = ctdb->default_public_interface;
1150 } else {
1151 ifaces = tok;
1154 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1155 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1156 talloc_free(lines);
1157 return -1;
1159 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1160 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1161 talloc_free(lines);
1162 return -1;
1167 talloc_free(lines);
1168 return 0;
1172 destroy a ctdb_client_ip structure
1174 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1176 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1177 ctdb_addr_to_str(&ip->addr),
1178 ntohs(ip->addr.ip.sin_port),
1179 ip->client_id));
1181 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1182 return 0;
1186 called by a client to inform us of a TCP connection that it is managing
1187 that should tickled with an ACK when IP takeover is done
1189 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1190 TDB_DATA indata)
1192 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1193 struct ctdb_connection *tcp_sock = NULL;
1194 struct ctdb_tcp_list *tcp;
1195 struct ctdb_connection t;
1196 int ret;
1197 TDB_DATA data;
1198 struct ctdb_client_ip *ip;
1199 struct ctdb_vnn *vnn;
1200 ctdb_sock_addr addr;
1202 /* If we don't have public IPs, tickles are useless */
1203 if (ctdb->vnn == NULL) {
1204 return 0;
1207 tcp_sock = (struct ctdb_connection *)indata.dptr;
1209 addr = tcp_sock->src;
1210 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1211 addr = tcp_sock->dst;
1212 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1214 ZERO_STRUCT(addr);
1215 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1216 vnn = find_public_ip_vnn(ctdb, &addr);
1217 if (vnn == NULL) {
1218 switch (addr.sa.sa_family) {
1219 case AF_INET:
1220 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1221 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1222 ctdb_addr_to_str(&addr)));
1224 break;
1225 case AF_INET6:
1226 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1227 ctdb_addr_to_str(&addr)));
1228 break;
1229 default:
1230 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1233 return 0;
1236 if (vnn->pnn != ctdb->pnn) {
1237 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1238 ctdb_addr_to_str(&addr),
1239 client_id, client->pid));
1240 /* failing this call will tell smbd to die */
1241 return -1;
1244 ip = talloc(client, struct ctdb_client_ip);
1245 CTDB_NO_MEMORY(ctdb, ip);
1247 ip->ctdb = ctdb;
1248 ip->addr = addr;
1249 ip->client_id = client_id;
1250 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1251 DLIST_ADD(ctdb->client_ip_list, ip);
1253 tcp = talloc(client, struct ctdb_tcp_list);
1254 CTDB_NO_MEMORY(ctdb, tcp);
1256 tcp->connection.src = tcp_sock->src;
1257 tcp->connection.dst = tcp_sock->dst;
1259 DLIST_ADD(client->tcp_list, tcp);
1261 t.src = tcp_sock->src;
1262 t.dst = tcp_sock->dst;
1264 data.dptr = (uint8_t *)&t;
1265 data.dsize = sizeof(t);
1267 switch (addr.sa.sa_family) {
1268 case AF_INET:
1269 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1270 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1271 ctdb_addr_to_str(&tcp_sock->src),
1272 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1273 break;
1274 case AF_INET6:
1275 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1276 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1277 ctdb_addr_to_str(&tcp_sock->src),
1278 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1279 break;
1280 default:
1281 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1285 /* tell all nodes about this tcp connection */
1286 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1287 CTDB_CONTROL_TCP_ADD,
1288 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1289 if (ret != 0) {
1290 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1291 return -1;
1294 return 0;
1298 find a tcp address on a list
1300 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1301 struct ctdb_connection *tcp)
1303 int i;
1305 if (array == NULL) {
1306 return NULL;
1309 for (i=0;i<array->num;i++) {
1310 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1311 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1312 return &array->connections[i];
1315 return NULL;
1321 called by a daemon to inform us of a TCP connection that one of its
1322 clients managing that should tickled with an ACK when IP takeover is
1323 done
1325 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1327 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1328 struct ctdb_tcp_array *tcparray;
1329 struct ctdb_connection tcp;
1330 struct ctdb_vnn *vnn;
1332 /* If we don't have public IPs, tickles are useless */
1333 if (ctdb->vnn == NULL) {
1334 return 0;
1337 vnn = find_public_ip_vnn(ctdb, &p->dst);
1338 if (vnn == NULL) {
1339 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1340 ctdb_addr_to_str(&p->dst)));
1342 return -1;
1346 tcparray = vnn->tcp_array;
1348 /* If this is the first tickle */
1349 if (tcparray == NULL) {
1350 tcparray = talloc(vnn, struct ctdb_tcp_array);
1351 CTDB_NO_MEMORY(ctdb, tcparray);
1352 vnn->tcp_array = tcparray;
1354 tcparray->num = 0;
1355 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1356 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1358 tcparray->connections[tcparray->num].src = p->src;
1359 tcparray->connections[tcparray->num].dst = p->dst;
1360 tcparray->num++;
1362 if (tcp_update_needed) {
1363 vnn->tcp_update_needed = true;
1365 return 0;
1369 /* Do we already have this tickle ?*/
1370 tcp.src = p->src;
1371 tcp.dst = p->dst;
1372 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1373 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1374 ctdb_addr_to_str(&tcp.dst),
1375 ntohs(tcp.dst.ip.sin_port),
1376 vnn->pnn));
1377 return 0;
1380 /* A new tickle, we must add it to the array */
1381 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1382 struct ctdb_connection,
1383 tcparray->num+1);
1384 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1386 tcparray->connections[tcparray->num].src = p->src;
1387 tcparray->connections[tcparray->num].dst = p->dst;
1388 tcparray->num++;
1390 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1391 ctdb_addr_to_str(&tcp.dst),
1392 ntohs(tcp.dst.ip.sin_port),
1393 vnn->pnn));
1395 if (tcp_update_needed) {
1396 vnn->tcp_update_needed = true;
1399 return 0;
1403 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1405 struct ctdb_connection *tcpp;
1407 if (vnn == NULL) {
1408 return;
1411 /* if the array is empty we cant remove it
1412 and we don't need to do anything
1414 if (vnn->tcp_array == NULL) {
1415 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1416 ctdb_addr_to_str(&conn->dst),
1417 ntohs(conn->dst.ip.sin_port)));
1418 return;
1422 /* See if we know this connection
1423 if we don't know this connection then we dont need to do anything
1425 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1426 if (tcpp == NULL) {
1427 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1428 ctdb_addr_to_str(&conn->dst),
1429 ntohs(conn->dst.ip.sin_port)));
1430 return;
1434 /* We need to remove this entry from the array.
1435 Instead of allocating a new array and copying data to it
1436 we cheat and just copy the last entry in the existing array
1437 to the entry that is to be removed and just shring the
1438 ->num field
1440 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1441 vnn->tcp_array->num--;
1443 /* If we deleted the last entry we also need to remove the entire array
1445 if (vnn->tcp_array->num == 0) {
1446 talloc_free(vnn->tcp_array);
1447 vnn->tcp_array = NULL;
1450 vnn->tcp_update_needed = true;
1452 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1453 ctdb_addr_to_str(&conn->src),
1454 ntohs(conn->src.ip.sin_port)));
1459 called by a daemon to inform us of a TCP connection that one of its
1460 clients used are no longer needed in the tickle database
1462 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1464 struct ctdb_vnn *vnn;
1465 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1467 /* If we don't have public IPs, tickles are useless */
1468 if (ctdb->vnn == NULL) {
1469 return 0;
1472 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1473 if (vnn == NULL) {
1474 DEBUG(DEBUG_ERR,
1475 (__location__ " unable to find public address %s\n",
1476 ctdb_addr_to_str(&conn->dst)));
1477 return 0;
1480 ctdb_remove_connection(vnn, conn);
1482 return 0;
1486 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1487 bool force);
1490 Called when another daemon starts - causes all tickles for all
1491 public addresses we are serving to be sent to the new node on the
1492 next check. This actually causes the tickles to be sent to the
1493 other node immediately. In case there is an error, the periodic
1494 timer will send the updates on timer event. This is simple and
1495 doesn't require careful error handling.
1497 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1499 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1500 (unsigned long) pnn));
1502 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1503 return 0;
1508 called when a client structure goes away - hook to remove
1509 elements from the tcp_list in all daemons
1511 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1513 while (client->tcp_list) {
1514 struct ctdb_vnn *vnn;
1515 struct ctdb_tcp_list *tcp = client->tcp_list;
1516 struct ctdb_connection *conn = &tcp->connection;
1518 DLIST_REMOVE(client->tcp_list, tcp);
1520 vnn = find_public_ip_vnn(client->ctdb,
1521 &conn->dst);
1522 if (vnn == NULL) {
1523 DEBUG(DEBUG_ERR,
1524 (__location__ " unable to find public address %s\n",
1525 ctdb_addr_to_str(&conn->dst)));
1526 continue;
1529 /* If the IP address is hosted on this node then
1530 * remove the connection. */
1531 if (vnn->pnn == client->ctdb->pnn) {
1532 ctdb_remove_connection(vnn, conn);
1535 /* Otherwise this function has been called because the
1536 * server IP address has been released to another node
1537 * and the client has exited. This means that we
1538 * should not delete the connection information. The
1539 * takeover node processes connections too. */
1544 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1546 struct ctdb_vnn *vnn, *next;
1547 int count = 0;
1549 if (ctdb->tunable.disable_ip_failover == 1) {
1550 return;
1553 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1554 /* vnn can be freed below in release_ip_post() */
1555 next = vnn->next;
1557 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1558 ctdb_vnn_unassign_iface(ctdb, vnn);
1559 continue;
1562 /* Don't allow multiple releases at once. Some code,
1563 * particularly ctdb_tickle_sentenced_connections() is
1564 * not re-entrant */
1565 if (vnn->update_in_flight) {
1566 DEBUG(DEBUG_WARNING,
1567 (__location__
1568 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1569 ctdb_addr_to_str(&vnn->public_address),
1570 vnn->public_netmask_bits,
1571 ctdb_vnn_iface_string(vnn)));
1572 continue;
1574 vnn->update_in_flight = true;
1576 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1577 ctdb_addr_to_str(&vnn->public_address),
1578 vnn->public_netmask_bits,
1579 ctdb_vnn_iface_string(vnn)));
1581 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1582 ctdb_vnn_iface_string(vnn),
1583 ctdb_addr_to_str(&vnn->public_address),
1584 vnn->public_netmask_bits);
1585 /* releaseip timeouts are converted to success, so to
1586 * detect failures just check if the IP address is
1587 * still there...
1589 if (ctdb_sys_have_ip(&vnn->public_address)) {
1590 DEBUG(DEBUG_ERR,
1591 (__location__
1592 " IP address %s not released\n",
1593 ctdb_addr_to_str(&vnn->public_address)));
1594 vnn->update_in_flight = false;
1595 continue;
1598 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1599 if (vnn != NULL) {
1600 vnn->update_in_flight = false;
1602 count++;
1605 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1610 get list of public IPs
1612 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1613 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1615 int i, num, len;
1616 struct ctdb_public_ip_list_old *ips;
1617 struct ctdb_vnn *vnn;
1618 bool only_available = false;
1620 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1621 only_available = true;
1624 /* count how many public ip structures we have */
1625 num = 0;
1626 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1627 num++;
1630 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1631 num*sizeof(struct ctdb_public_ip);
1632 ips = talloc_zero_size(outdata, len);
1633 CTDB_NO_MEMORY(ctdb, ips);
1635 i = 0;
1636 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1637 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1638 continue;
1640 ips->ips[i].pnn = vnn->pnn;
1641 ips->ips[i].addr = vnn->public_address;
1642 i++;
1644 ips->num = i;
1645 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1646 i*sizeof(struct ctdb_public_ip);
1648 outdata->dsize = len;
1649 outdata->dptr = (uint8_t *)ips;
1651 return 0;
1655 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1656 struct ctdb_req_control_old *c,
1657 TDB_DATA indata,
1658 TDB_DATA *outdata)
1660 int i, num, len;
1661 ctdb_sock_addr *addr;
1662 struct ctdb_public_ip_info_old *info;
1663 struct ctdb_vnn *vnn;
1664 struct vnn_interface *iface;
1666 addr = (ctdb_sock_addr *)indata.dptr;
1668 vnn = find_public_ip_vnn(ctdb, addr);
1669 if (vnn == NULL) {
1670 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1671 "'%s'not a public address\n",
1672 ctdb_addr_to_str(addr)));
1673 return -1;
1676 /* count how many public ip structures we have */
1677 num = 0;
1678 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1679 num++;
1682 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1683 num*sizeof(struct ctdb_iface);
1684 info = talloc_zero_size(outdata, len);
1685 CTDB_NO_MEMORY(ctdb, info);
1687 info->ip.addr = vnn->public_address;
1688 info->ip.pnn = vnn->pnn;
1689 info->active_idx = 0xFFFFFFFF;
1691 i = 0;
1692 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1693 struct ctdb_interface *cur;
1695 cur = iface->iface;
1696 if (vnn->iface == cur) {
1697 info->active_idx = i;
1699 strncpy(info->ifaces[i].name, cur->name,
1700 sizeof(info->ifaces[i].name));
1701 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1702 info->ifaces[i].link_state = cur->link_up;
1703 info->ifaces[i].references = cur->references;
1705 i++;
1707 info->num = i;
1708 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1709 i*sizeof(struct ctdb_iface);
1711 outdata->dsize = len;
1712 outdata->dptr = (uint8_t *)info;
1714 return 0;
1717 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1718 struct ctdb_req_control_old *c,
1719 TDB_DATA *outdata)
1721 int i, num, len;
1722 struct ctdb_iface_list_old *ifaces;
1723 struct ctdb_interface *cur;
1725 /* count how many public ip structures we have */
1726 num = 0;
1727 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1728 num++;
1731 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1732 num*sizeof(struct ctdb_iface);
1733 ifaces = talloc_zero_size(outdata, len);
1734 CTDB_NO_MEMORY(ctdb, ifaces);
1736 i = 0;
1737 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1738 strncpy(ifaces->ifaces[i].name, cur->name,
1739 sizeof(ifaces->ifaces[i].name));
1740 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1741 ifaces->ifaces[i].link_state = cur->link_up;
1742 ifaces->ifaces[i].references = cur->references;
1743 i++;
1745 ifaces->num = i;
1746 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1747 i*sizeof(struct ctdb_iface);
1749 outdata->dsize = len;
1750 outdata->dptr = (uint8_t *)ifaces;
1752 return 0;
1755 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1756 struct ctdb_req_control_old *c,
1757 TDB_DATA indata)
1759 struct ctdb_iface *info;
1760 struct ctdb_interface *iface;
1761 bool link_up = false;
1763 info = (struct ctdb_iface *)indata.dptr;
1765 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1766 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1767 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1768 len, len, info->name));
1769 return -1;
1772 switch (info->link_state) {
1773 case 0:
1774 link_up = false;
1775 break;
1776 case 1:
1777 link_up = true;
1778 break;
1779 default:
1780 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1781 (unsigned int)info->link_state));
1782 return -1;
1785 if (info->references != 0) {
1786 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1787 (unsigned int)info->references));
1788 return -1;
1791 iface = ctdb_find_iface(ctdb, info->name);
1792 if (iface == NULL) {
1793 return -1;
1796 if (link_up == iface->link_up) {
1797 return 0;
1800 DEBUG(DEBUG_ERR,
1801 ("iface[%s] has changed it's link status %s => %s\n",
1802 iface->name,
1803 iface->link_up?"up":"down",
1804 link_up?"up":"down"));
1806 iface->link_up = link_up;
1807 return 0;
1812 called by a daemon to inform us of the entire list of TCP tickles for
1813 a particular public address.
1814 this control should only be sent by the node that is currently serving
1815 that public address.
1817 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1819 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1820 struct ctdb_tcp_array *tcparray;
1821 struct ctdb_vnn *vnn;
1823 /* We must at least have tickles.num or else we cant verify the size
1824 of the received data blob
1826 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1827 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1828 return -1;
1831 /* verify that the size of data matches what we expect */
1832 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1833 + sizeof(struct ctdb_connection) * list->num) {
1834 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1835 return -1;
1838 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1839 ctdb_addr_to_str(&list->addr)));
1841 vnn = find_public_ip_vnn(ctdb, &list->addr);
1842 if (vnn == NULL) {
1843 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1844 ctdb_addr_to_str(&list->addr)));
1846 return 1;
1849 if (vnn->pnn == ctdb->pnn) {
1850 DEBUG(DEBUG_INFO,
1851 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1852 ctdb_addr_to_str(&list->addr)));
1853 return 0;
1856 /* remove any old ticklelist we might have */
1857 talloc_free(vnn->tcp_array);
1858 vnn->tcp_array = NULL;
1860 tcparray = talloc(vnn, struct ctdb_tcp_array);
1861 CTDB_NO_MEMORY(ctdb, tcparray);
1863 tcparray->num = list->num;
1865 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1866 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1868 memcpy(tcparray->connections, &list->connections[0],
1869 sizeof(struct ctdb_connection)*tcparray->num);
1871 /* We now have a new fresh tickle list array for this vnn */
1872 vnn->tcp_array = tcparray;
1874 return 0;
1878 called to return the full list of tickles for the puclic address associated
1879 with the provided vnn
1881 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1883 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1884 struct ctdb_tickle_list_old *list;
1885 struct ctdb_tcp_array *tcparray;
1886 int num, i;
1887 struct ctdb_vnn *vnn;
1888 unsigned port;
1890 vnn = find_public_ip_vnn(ctdb, addr);
1891 if (vnn == NULL) {
1892 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1893 ctdb_addr_to_str(addr)));
1895 return 1;
1898 port = ctdb_addr_to_port(addr);
1900 tcparray = vnn->tcp_array;
1901 num = 0;
1902 if (tcparray != NULL) {
1903 if (port == 0) {
1904 /* All connections */
1905 num = tcparray->num;
1906 } else {
1907 /* Count connections for port */
1908 for (i = 0; i < tcparray->num; i++) {
1909 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1910 num++;
1916 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1917 + sizeof(struct ctdb_connection) * num;
1919 outdata->dptr = talloc_size(outdata, outdata->dsize);
1920 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1921 list = (struct ctdb_tickle_list_old *)outdata->dptr;
1923 list->addr = *addr;
1924 list->num = num;
1926 if (num == 0) {
1927 return 0;
1930 num = 0;
1931 for (i = 0; i < tcparray->num; i++) {
1932 if (port == 0 || \
1933 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1934 list->connections[num] = tcparray->connections[i];
1935 num++;
1939 return 0;
1944 set the list of all tcp tickles for a public address
1946 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1947 ctdb_sock_addr *addr,
1948 struct ctdb_tcp_array *tcparray)
1950 int ret, num;
1951 TDB_DATA data;
1952 struct ctdb_tickle_list_old *list;
1954 if (tcparray) {
1955 num = tcparray->num;
1956 } else {
1957 num = 0;
1960 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
1961 sizeof(struct ctdb_connection) * num;
1962 data.dptr = talloc_size(ctdb, data.dsize);
1963 CTDB_NO_MEMORY(ctdb, data.dptr);
1965 list = (struct ctdb_tickle_list_old *)data.dptr;
1966 list->addr = *addr;
1967 list->num = num;
1968 if (tcparray) {
1969 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
1972 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
1973 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1974 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1975 if (ret != 0) {
1976 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1977 return -1;
1980 talloc_free(data.dptr);
1982 return ret;
1985 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1986 bool force)
1988 struct ctdb_vnn *vnn;
1989 int ret;
1991 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1992 /* we only send out updates for public addresses that
1993 we have taken over
1995 if (ctdb->pnn != vnn->pnn) {
1996 continue;
1999 /* We only send out the updates if we need to */
2000 if (!force && !vnn->tcp_update_needed) {
2001 continue;
2004 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2005 &vnn->public_address,
2006 vnn->tcp_array);
2007 if (ret != 0) {
2008 D_ERR("Failed to send the tickle update for ip %s\n",
2009 ctdb_addr_to_str(&vnn->public_address));
2010 vnn->tcp_update_needed = true;
2011 } else {
2012 D_INFO("Sent tickle update for ip %s\n",
2013 ctdb_addr_to_str(&vnn->public_address));
2014 vnn->tcp_update_needed = false;
2021 perform tickle updates if required
2023 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2024 struct tevent_timer *te,
2025 struct timeval t, void *private_data)
2027 struct ctdb_context *ctdb = talloc_get_type(
2028 private_data, struct ctdb_context);
2030 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2032 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2033 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2034 ctdb_update_tcp_tickles, ctdb);
2038 start periodic update of tcp tickles
2040 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2042 ctdb->tickle_update_context = talloc_new(ctdb);
2044 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2045 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2046 ctdb_update_tcp_tickles, ctdb);
2052 struct control_gratious_arp {
2053 struct ctdb_context *ctdb;
2054 ctdb_sock_addr addr;
2055 const char *iface;
2056 int count;
2060 send a control_gratuitous arp
2062 static void send_gratious_arp(struct tevent_context *ev,
2063 struct tevent_timer *te,
2064 struct timeval t, void *private_data)
2066 int ret;
2067 struct control_gratious_arp *arp = talloc_get_type(private_data,
2068 struct control_gratious_arp);
2070 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2071 if (ret != 0) {
2072 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2073 arp->iface, strerror(errno)));
2077 arp->count++;
2078 if (arp->count == CTDB_ARP_REPEAT) {
2079 talloc_free(arp);
2080 return;
2083 tevent_add_timer(arp->ctdb->ev, arp,
2084 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2085 send_gratious_arp, arp);
2090 send a gratious arp
2092 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2094 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2095 struct control_gratious_arp *arp;
2097 /* verify the size of indata */
2098 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2099 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2100 (unsigned)indata.dsize,
2101 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2102 return -1;
2104 if (indata.dsize !=
2105 ( offsetof(struct ctdb_addr_info_old, iface)
2106 + gratious_arp->len ) ){
2108 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2109 "but should be %u bytes\n",
2110 (unsigned)indata.dsize,
2111 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2112 return -1;
2116 arp = talloc(ctdb, struct control_gratious_arp);
2117 CTDB_NO_MEMORY(ctdb, arp);
2119 arp->ctdb = ctdb;
2120 arp->addr = gratious_arp->addr;
2121 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2122 CTDB_NO_MEMORY(ctdb, arp->iface);
2123 arp->count = 0;
2125 tevent_add_timer(arp->ctdb->ev, arp,
2126 timeval_zero(), send_gratious_arp, arp);
2128 return 0;
2131 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2133 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2134 int ret;
2136 /* verify the size of indata */
2137 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2138 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2139 return -1;
2141 if (indata.dsize !=
2142 ( offsetof(struct ctdb_addr_info_old, iface)
2143 + pub->len ) ){
2145 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2146 "but should be %u bytes\n",
2147 (unsigned)indata.dsize,
2148 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2149 return -1;
2152 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2154 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2156 if (ret != 0) {
2157 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2158 return -1;
2161 return 0;
2164 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2166 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2167 struct ctdb_vnn *vnn;
2169 /* verify the size of indata */
2170 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2171 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2172 return -1;
2174 if (indata.dsize !=
2175 ( offsetof(struct ctdb_addr_info_old, iface)
2176 + pub->len ) ){
2178 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2179 "but should be %u bytes\n",
2180 (unsigned)indata.dsize,
2181 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2182 return -1;
2185 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2187 /* walk over all public addresses until we find a match */
2188 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2189 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2190 if (vnn->pnn == ctdb->pnn) {
2191 /* This IP is currently being hosted.
2192 * Defer the deletion until the next
2193 * takeover run. "ctdb reloadips" will
2194 * always cause a takeover run. "ctdb
2195 * delip" will now need an explicit
2196 * "ctdb ipreallocated" afterwards. */
2197 vnn->delete_pending = true;
2198 } else {
2199 /* This IP is not hosted on the
2200 * current node so just delete it
2201 * now. */
2202 do_delete_ip(ctdb, vnn);
2205 return 0;
2209 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2210 ctdb_addr_to_str(&pub->addr)));
2211 return -1;
2215 struct ipreallocated_callback_state {
2216 struct ctdb_req_control_old *c;
2219 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2220 int status, void *p)
2222 struct ipreallocated_callback_state *state =
2223 talloc_get_type(p, struct ipreallocated_callback_state);
2225 if (status != 0) {
2226 DEBUG(DEBUG_ERR,
2227 (" \"ipreallocated\" event script failed (status %d)\n",
2228 status));
2229 if (status == -ETIME) {
2230 ctdb_ban_self(ctdb);
2234 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2235 talloc_free(state);
2238 /* A control to run the ipreallocated event */
2239 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2240 struct ctdb_req_control_old *c,
2241 bool *async_reply)
2243 int ret;
2244 struct ipreallocated_callback_state *state;
2246 state = talloc(ctdb, struct ipreallocated_callback_state);
2247 CTDB_NO_MEMORY(ctdb, state);
2249 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2251 ret = ctdb_event_script_callback(ctdb, state,
2252 ctdb_ipreallocated_callback, state,
2253 CTDB_EVENT_IPREALLOCATED,
2254 "%s", "");
2256 if (ret != 0) {
2257 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2258 talloc_free(state);
2259 return -1;
2262 /* tell the control that we will be reply asynchronously */
2263 state->c = talloc_steal(state, c);
2264 *async_reply = true;
2266 return 0;
2270 struct ctdb_reloadips_handle {
2271 struct ctdb_context *ctdb;
2272 struct ctdb_req_control_old *c;
2273 int status;
2274 int fd[2];
2275 pid_t child;
2276 struct tevent_fd *fde;
2279 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2281 if (h == h->ctdb->reload_ips) {
2282 h->ctdb->reload_ips = NULL;
2284 if (h->c != NULL) {
2285 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2286 h->c = NULL;
2288 ctdb_kill(h->ctdb, h->child, SIGKILL);
2289 return 0;
2292 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2293 struct tevent_timer *te,
2294 struct timeval t, void *private_data)
2296 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2298 talloc_free(h);
2301 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2302 struct tevent_fd *fde,
2303 uint16_t flags, void *private_data)
2305 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2307 char res;
2308 int ret;
2310 ret = sys_read(h->fd[0], &res, 1);
2311 if (ret < 1 || res != 0) {
2312 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2313 res = 1;
2315 h->status = res;
2317 talloc_free(h);
2320 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2322 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2323 struct ctdb_public_ip_list_old *ips;
2324 struct ctdb_vnn *vnn;
2325 struct client_async_data *async_data;
2326 struct timeval timeout;
2327 TDB_DATA data;
2328 struct ctdb_client_control_state *state;
2329 bool first_add;
2330 int i, ret;
2332 CTDB_NO_MEMORY(ctdb, mem_ctx);
2334 /* Read IPs from local node */
2335 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2336 CTDB_CURRENT_NODE, mem_ctx, &ips);
2337 if (ret != 0) {
2338 DEBUG(DEBUG_ERR,
2339 ("Unable to fetch public IPs from local node\n"));
2340 talloc_free(mem_ctx);
2341 return -1;
2344 /* Read IPs file - this is safe since this is a child process */
2345 ctdb->vnn = NULL;
2346 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2347 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2348 talloc_free(mem_ctx);
2349 return -1;
2352 async_data = talloc_zero(mem_ctx, struct client_async_data);
2353 CTDB_NO_MEMORY(ctdb, async_data);
2355 /* Compare IPs between node and file for IPs to be deleted */
2356 for (i = 0; i < ips->num; i++) {
2357 /* */
2358 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2359 if (ctdb_same_ip(&vnn->public_address,
2360 &ips->ips[i].addr)) {
2361 /* IP is still in file */
2362 break;
2366 if (vnn == NULL) {
2367 /* Delete IP ips->ips[i] */
2368 struct ctdb_addr_info_old *pub;
2370 DEBUG(DEBUG_NOTICE,
2371 ("IP %s no longer configured, deleting it\n",
2372 ctdb_addr_to_str(&ips->ips[i].addr)));
2374 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2375 CTDB_NO_MEMORY(ctdb, pub);
2377 pub->addr = ips->ips[i].addr;
2378 pub->mask = 0;
2379 pub->len = 0;
2381 timeout = TAKEOVER_TIMEOUT();
2383 data.dsize = offsetof(struct ctdb_addr_info_old,
2384 iface) + pub->len;
2385 data.dptr = (uint8_t *)pub;
2387 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2388 CTDB_CONTROL_DEL_PUBLIC_IP,
2389 0, data, async_data,
2390 &timeout, NULL);
2391 if (state == NULL) {
2392 DEBUG(DEBUG_ERR,
2393 (__location__
2394 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2395 goto failed;
2398 ctdb_client_async_add(async_data, state);
2402 /* Compare IPs between node and file for IPs to be added */
2403 first_add = true;
2404 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2405 for (i = 0; i < ips->num; i++) {
2406 if (ctdb_same_ip(&vnn->public_address,
2407 &ips->ips[i].addr)) {
2408 /* IP already on node */
2409 break;
2412 if (i == ips->num) {
2413 /* Add IP ips->ips[i] */
2414 struct ctdb_addr_info_old *pub;
2415 const char *ifaces = NULL;
2416 uint32_t len;
2417 struct vnn_interface *iface = NULL;
2419 DEBUG(DEBUG_NOTICE,
2420 ("New IP %s configured, adding it\n",
2421 ctdb_addr_to_str(&vnn->public_address)));
2422 if (first_add) {
2423 uint32_t pnn = ctdb_get_pnn(ctdb);
2425 data.dsize = sizeof(pnn);
2426 data.dptr = (uint8_t *)&pnn;
2428 ret = ctdb_client_send_message(
2429 ctdb,
2430 CTDB_BROADCAST_CONNECTED,
2431 CTDB_SRVID_REBALANCE_NODE,
2432 data);
2433 if (ret != 0) {
2434 DEBUG(DEBUG_WARNING,
2435 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2438 first_add = false;
2441 ifaces = vnn->ifaces->iface->name;
2442 iface = vnn->ifaces->next;
2443 while (iface != NULL) {
2444 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2445 iface->iface->name);
2446 iface = iface->next;
2449 len = strlen(ifaces) + 1;
2450 pub = talloc_zero_size(mem_ctx,
2451 offsetof(struct ctdb_addr_info_old, iface) + len);
2452 CTDB_NO_MEMORY(ctdb, pub);
2454 pub->addr = vnn->public_address;
2455 pub->mask = vnn->public_netmask_bits;
2456 pub->len = len;
2457 memcpy(&pub->iface[0], ifaces, pub->len);
2459 timeout = TAKEOVER_TIMEOUT();
2461 data.dsize = offsetof(struct ctdb_addr_info_old,
2462 iface) + pub->len;
2463 data.dptr = (uint8_t *)pub;
2465 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2466 CTDB_CONTROL_ADD_PUBLIC_IP,
2467 0, data, async_data,
2468 &timeout, NULL);
2469 if (state == NULL) {
2470 DEBUG(DEBUG_ERR,
2471 (__location__
2472 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2473 goto failed;
2476 ctdb_client_async_add(async_data, state);
2480 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2481 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2482 goto failed;
2485 talloc_free(mem_ctx);
2486 return 0;
2488 failed:
2489 talloc_free(mem_ctx);
2490 return -1;
2493 /* This control is sent to force the node to re-read the public addresses file
2494 and drop any addresses we should nnot longer host, and add new addresses
2495 that we are now able to host
2497 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2499 struct ctdb_reloadips_handle *h;
2500 pid_t parent = getpid();
2502 if (ctdb->reload_ips != NULL) {
2503 talloc_free(ctdb->reload_ips);
2504 ctdb->reload_ips = NULL;
2507 h = talloc(ctdb, struct ctdb_reloadips_handle);
2508 CTDB_NO_MEMORY(ctdb, h);
2509 h->ctdb = ctdb;
2510 h->c = NULL;
2511 h->status = -1;
2513 if (pipe(h->fd) == -1) {
2514 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2515 talloc_free(h);
2516 return -1;
2519 h->child = ctdb_fork(ctdb);
2520 if (h->child == (pid_t)-1) {
2521 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2522 close(h->fd[0]);
2523 close(h->fd[1]);
2524 talloc_free(h);
2525 return -1;
2528 /* child process */
2529 if (h->child == 0) {
2530 signed char res = 0;
2532 close(h->fd[0]);
2534 prctl_set_comment("ctdb_reloadips");
2535 if (switch_from_server_to_client(ctdb) != 0) {
2536 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2537 res = -1;
2538 } else {
2539 res = ctdb_reloadips_child(ctdb);
2540 if (res != 0) {
2541 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2545 sys_write(h->fd[1], &res, 1);
2546 ctdb_wait_for_process_to_exit(parent);
2547 _exit(0);
2550 h->c = talloc_steal(h, c);
2552 close(h->fd[1]);
2553 set_close_on_exec(h->fd[0]);
2555 talloc_set_destructor(h, ctdb_reloadips_destructor);
2558 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2559 ctdb_reloadips_child_handler, (void *)h);
2560 tevent_fd_set_auto_close(h->fde);
2562 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2563 ctdb_reloadips_timeout_event, h);
2565 /* we reply later */
2566 *async_reply = true;
2567 return 0;