VERSION: Bump version up to 4.10.12.
[Samba.git] / ctdb / server / ctdb_takeover.c
blob424d0d6ff4aec4241cae0a40ccc9e860aac52f91
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "protocol/protocol_util.h"
38 #include "ctdb_private.h"
39 #include "ctdb_client.h"
41 #include "common/rb_tree.h"
42 #include "common/reqid.h"
43 #include "common/system.h"
44 #include "common/system_socket.h"
45 #include "common/common.h"
46 #include "common/logging.h"
48 #include "server/ctdb_config.h"
50 #include "server/ipalloc.h"
52 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
54 #define CTDB_ARP_INTERVAL 1
55 #define CTDB_ARP_REPEAT 3
57 struct ctdb_interface {
58 struct ctdb_interface *prev, *next;
59 const char *name;
60 bool link_up;
61 uint32_t references;
64 struct vnn_interface {
65 struct vnn_interface *prev, *next;
66 struct ctdb_interface *iface;
69 /* state associated with a public ip address */
70 struct ctdb_vnn {
71 struct ctdb_vnn *prev, *next;
73 struct ctdb_interface *iface;
74 struct vnn_interface *ifaces;
75 ctdb_sock_addr public_address;
76 uint8_t public_netmask_bits;
78 /* the node number that is serving this public address, if any.
79 If no node serves this ip it is set to -1 */
80 int32_t pnn;
82 /* List of clients to tickle for this public address */
83 struct ctdb_tcp_array *tcp_array;
85 /* whether we need to update the other nodes with changes to our list
86 of connected clients */
87 bool tcp_update_needed;
89 /* a context to hang sending gratious arp events off */
90 TALLOC_CTX *takeover_ctx;
92 /* Set to true any time an update to this VNN is in flight.
93 This helps to avoid races. */
94 bool update_in_flight;
96 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
97 * address then this flag is set. It will be deleted in the
98 * release IP callback. */
99 bool delete_pending;
102 static const char *iface_string(const struct ctdb_interface *iface)
104 return (iface != NULL ? iface->name : "__none__");
107 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
109 return iface_string(vnn->iface);
112 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
113 const char *iface);
115 static struct ctdb_interface *
116 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
118 struct ctdb_interface *i;
120 if (strlen(iface) > CTDB_IFACE_SIZE) {
121 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
122 return NULL;
125 /* Verify that we don't have an entry for this ip yet */
126 i = ctdb_find_iface(ctdb, iface);
127 if (i != NULL) {
128 return i;
131 /* create a new structure for this interface */
132 i = talloc_zero(ctdb, struct ctdb_interface);
133 if (i == NULL) {
134 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
135 return NULL;
137 i->name = talloc_strdup(i, iface);
138 if (i->name == NULL) {
139 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
140 talloc_free(i);
141 return NULL;
144 i->link_up = true;
146 DLIST_ADD(ctdb->ifaces, i);
148 return i;
151 static bool vnn_has_interface(struct ctdb_vnn *vnn,
152 const struct ctdb_interface *iface)
154 struct vnn_interface *i;
156 for (i = vnn->ifaces; i != NULL; i = i->next) {
157 if (iface == i->iface) {
158 return true;
162 return false;
165 /* If any interfaces now have no possible IPs then delete them. This
166 * implementation is naive (i.e. simple) rather than clever
167 * (i.e. complex). Given that this is run on delip and that operation
168 * is rare, this doesn't need to be efficient - it needs to be
169 * foolproof. One alternative is reference counting, where the logic
170 * is distributed and can, therefore, be broken in multiple places.
171 * Another alternative is to build a red-black tree of interfaces that
172 * can have addresses (by walking ctdb->vnn once) and then walking
173 * ctdb->ifaces once and deleting those not in the tree. Let's go to
174 * one of those if the naive implementation causes problems... :-)
176 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
177 struct ctdb_vnn *vnn)
179 struct ctdb_interface *i, *next;
181 /* For each interface, check if there's an IP using it. */
182 for (i = ctdb->ifaces; i != NULL; i = next) {
183 struct ctdb_vnn *tv;
184 bool found;
185 next = i->next;
187 /* Only consider interfaces named in the given VNN. */
188 if (!vnn_has_interface(vnn, i)) {
189 continue;
192 /* Search for a vnn with this interface. */
193 found = false;
194 for (tv=ctdb->vnn; tv; tv=tv->next) {
195 if (vnn_has_interface(tv, i)) {
196 found = true;
197 break;
201 if (!found) {
202 /* None of the VNNs are using this interface. */
203 DLIST_REMOVE(ctdb->ifaces, i);
204 talloc_free(i);
210 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
211 const char *iface)
213 struct ctdb_interface *i;
215 for (i=ctdb->ifaces;i;i=i->next) {
216 if (strcmp(i->name, iface) == 0) {
217 return i;
221 return NULL;
224 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
225 struct ctdb_vnn *vnn)
227 struct vnn_interface *i;
228 struct ctdb_interface *cur = NULL;
229 struct ctdb_interface *best = NULL;
231 for (i = vnn->ifaces; i != NULL; i = i->next) {
233 cur = i->iface;
235 if (!cur->link_up) {
236 continue;
239 if (best == NULL) {
240 best = cur;
241 continue;
244 if (cur->references < best->references) {
245 best = cur;
246 continue;
250 return best;
253 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
254 struct ctdb_vnn *vnn)
256 struct ctdb_interface *best = NULL;
258 if (vnn->iface) {
259 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
260 "still assigned to iface '%s'\n",
261 ctdb_addr_to_str(&vnn->public_address),
262 ctdb_vnn_iface_string(vnn)));
263 return 0;
266 best = ctdb_vnn_best_iface(ctdb, vnn);
267 if (best == NULL) {
268 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
269 "cannot assign to iface any iface\n",
270 ctdb_addr_to_str(&vnn->public_address)));
271 return -1;
274 vnn->iface = best;
275 best->references++;
276 vnn->pnn = ctdb->pnn;
278 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
279 "now assigned to iface '%s' refs[%d]\n",
280 ctdb_addr_to_str(&vnn->public_address),
281 ctdb_vnn_iface_string(vnn),
282 best->references));
283 return 0;
286 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
287 struct ctdb_vnn *vnn)
289 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
290 "now unassigned (old iface '%s' refs[%d])\n",
291 ctdb_addr_to_str(&vnn->public_address),
292 ctdb_vnn_iface_string(vnn),
293 vnn->iface?vnn->iface->references:0));
294 if (vnn->iface) {
295 vnn->iface->references--;
297 vnn->iface = NULL;
298 if (vnn->pnn == ctdb->pnn) {
299 vnn->pnn = -1;
303 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
304 struct ctdb_vnn *vnn)
306 uint32_t flags;
307 struct vnn_interface *i;
309 /* Nodes that are not RUNNING can not host IPs */
310 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
311 return false;
314 flags = ctdb->nodes[ctdb->pnn]->flags;
315 if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
316 return false;
319 if (vnn->delete_pending) {
320 return false;
323 if (vnn->iface && vnn->iface->link_up) {
324 return true;
327 for (i = vnn->ifaces; i != NULL; i = i->next) {
328 if (i->iface->link_up) {
329 return true;
333 return false;
336 struct ctdb_takeover_arp {
337 struct ctdb_context *ctdb;
338 uint32_t count;
339 ctdb_sock_addr addr;
340 struct ctdb_tcp_array *tcparray;
341 struct ctdb_vnn *vnn;
346 lists of tcp endpoints
348 struct ctdb_tcp_list {
349 struct ctdb_tcp_list *prev, *next;
350 struct ctdb_connection connection;
354 list of clients to kill on IP release
356 struct ctdb_client_ip {
357 struct ctdb_client_ip *prev, *next;
358 struct ctdb_context *ctdb;
359 ctdb_sock_addr addr;
360 uint32_t client_id;
365 send a gratuitous arp
367 static void ctdb_control_send_arp(struct tevent_context *ev,
368 struct tevent_timer *te,
369 struct timeval t, void *private_data)
371 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
372 struct ctdb_takeover_arp);
373 int i, ret;
374 struct ctdb_tcp_array *tcparray;
375 const char *iface = ctdb_vnn_iface_string(arp->vnn);
377 ret = ctdb_sys_send_arp(&arp->addr, iface);
378 if (ret != 0) {
379 DBG_ERR("Failed to send ARP on interface %s: %s\n",
380 iface, strerror(ret));
383 tcparray = arp->tcparray;
384 if (tcparray) {
385 for (i=0;i<tcparray->num;i++) {
386 struct ctdb_connection *tcon;
388 tcon = &tcparray->connections[i];
389 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
390 (unsigned)ntohs(tcon->dst.ip.sin_port),
391 ctdb_addr_to_str(&tcon->src),
392 (unsigned)ntohs(tcon->src.ip.sin_port)));
393 ret = ctdb_sys_send_tcp(
394 &tcon->src,
395 &tcon->dst,
396 0, 0, 0);
397 if (ret != 0) {
398 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
399 ctdb_addr_to_str(&tcon->src)));
404 arp->count++;
406 if (arp->count == CTDB_ARP_REPEAT) {
407 talloc_free(arp);
408 return;
411 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
412 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
413 ctdb_control_send_arp, arp);
416 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
417 struct ctdb_vnn *vnn)
419 struct ctdb_takeover_arp *arp;
420 struct ctdb_tcp_array *tcparray;
422 if (!vnn->takeover_ctx) {
423 vnn->takeover_ctx = talloc_new(vnn);
424 if (!vnn->takeover_ctx) {
425 return -1;
429 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
430 if (!arp) {
431 return -1;
434 arp->ctdb = ctdb;
435 arp->addr = vnn->public_address;
436 arp->vnn = vnn;
438 tcparray = vnn->tcp_array;
439 if (tcparray) {
440 /* add all of the known tcp connections for this IP to the
441 list of tcp connections to send tickle acks for */
442 arp->tcparray = talloc_steal(arp, tcparray);
444 vnn->tcp_array = NULL;
445 vnn->tcp_update_needed = true;
448 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
449 timeval_zero(), ctdb_control_send_arp, arp);
451 return 0;
454 struct ctdb_do_takeip_state {
455 struct ctdb_req_control_old *c;
456 struct ctdb_vnn *vnn;
460 called when takeip event finishes
462 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
463 void *private_data)
465 struct ctdb_do_takeip_state *state =
466 talloc_get_type(private_data, struct ctdb_do_takeip_state);
467 int32_t ret;
468 TDB_DATA data;
470 if (status != 0) {
471 if (status == -ETIMEDOUT) {
472 ctdb_ban_self(ctdb);
474 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
475 ctdb_addr_to_str(&state->vnn->public_address),
476 ctdb_vnn_iface_string(state->vnn)));
477 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
479 talloc_free(state);
480 return;
483 if (ctdb->do_checkpublicip) {
485 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
486 if (ret != 0) {
487 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
488 talloc_free(state);
489 return;
494 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
495 data.dsize = strlen((char *)data.dptr) + 1;
496 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
498 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
501 /* the control succeeded */
502 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
503 talloc_free(state);
504 return;
507 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
509 state->vnn->update_in_flight = false;
510 return 0;
514 take over an ip address
516 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
517 struct ctdb_req_control_old *c,
518 struct ctdb_vnn *vnn)
520 int ret;
521 struct ctdb_do_takeip_state *state;
523 if (vnn->update_in_flight) {
524 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
525 "update for this IP already in flight\n",
526 ctdb_addr_to_str(&vnn->public_address),
527 vnn->public_netmask_bits));
528 return -1;
531 ret = ctdb_vnn_assign_iface(ctdb, vnn);
532 if (ret != 0) {
533 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
534 "assign a usable interface\n",
535 ctdb_addr_to_str(&vnn->public_address),
536 vnn->public_netmask_bits));
537 return -1;
540 state = talloc(vnn, struct ctdb_do_takeip_state);
541 CTDB_NO_MEMORY(ctdb, state);
543 state->c = NULL;
544 state->vnn = vnn;
546 vnn->update_in_flight = true;
547 talloc_set_destructor(state, ctdb_takeip_destructor);
549 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
550 ctdb_addr_to_str(&vnn->public_address),
551 vnn->public_netmask_bits,
552 ctdb_vnn_iface_string(vnn)));
554 ret = ctdb_event_script_callback(ctdb,
555 state,
556 ctdb_do_takeip_callback,
557 state,
558 CTDB_EVENT_TAKE_IP,
559 "%s %s %u",
560 ctdb_vnn_iface_string(vnn),
561 ctdb_addr_to_str(&vnn->public_address),
562 vnn->public_netmask_bits);
564 if (ret != 0) {
565 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
566 ctdb_addr_to_str(&vnn->public_address),
567 ctdb_vnn_iface_string(vnn)));
568 talloc_free(state);
569 return -1;
572 state->c = talloc_steal(ctdb, c);
573 return 0;
576 struct ctdb_do_updateip_state {
577 struct ctdb_req_control_old *c;
578 struct ctdb_interface *old;
579 struct ctdb_vnn *vnn;
583 called when updateip event finishes
585 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
586 void *private_data)
588 struct ctdb_do_updateip_state *state =
589 talloc_get_type(private_data, struct ctdb_do_updateip_state);
591 if (status != 0) {
592 if (status == -ETIMEDOUT) {
593 ctdb_ban_self(ctdb);
595 DEBUG(DEBUG_ERR,
596 ("Failed update of IP %s from interface %s to %s\n",
597 ctdb_addr_to_str(&state->vnn->public_address),
598 iface_string(state->old),
599 ctdb_vnn_iface_string(state->vnn)));
602 * All we can do is reset the old interface
603 * and let the next run fix it
605 ctdb_vnn_unassign_iface(ctdb, state->vnn);
606 state->vnn->iface = state->old;
607 state->vnn->iface->references++;
609 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
610 talloc_free(state);
611 return;
614 /* the control succeeded */
615 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
616 talloc_free(state);
617 return;
620 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
622 state->vnn->update_in_flight = false;
623 return 0;
627 update (move) an ip address
629 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
630 struct ctdb_req_control_old *c,
631 struct ctdb_vnn *vnn)
633 int ret;
634 struct ctdb_do_updateip_state *state;
635 struct ctdb_interface *old = vnn->iface;
636 const char *old_name = iface_string(old);
637 const char *new_name;
639 if (vnn->update_in_flight) {
640 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
641 "update for this IP already in flight\n",
642 ctdb_addr_to_str(&vnn->public_address),
643 vnn->public_netmask_bits));
644 return -1;
647 ctdb_vnn_unassign_iface(ctdb, vnn);
648 ret = ctdb_vnn_assign_iface(ctdb, vnn);
649 if (ret != 0) {
650 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
651 "assign a usable interface (old iface '%s')\n",
652 ctdb_addr_to_str(&vnn->public_address),
653 vnn->public_netmask_bits,
654 old_name));
655 return -1;
658 if (old == vnn->iface) {
659 /* A benign update from one interface onto itself.
660 * no need to run the eventscripts in this case, just return
661 * success.
663 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
664 return 0;
667 state = talloc(vnn, struct ctdb_do_updateip_state);
668 CTDB_NO_MEMORY(ctdb, state);
670 state->c = NULL;
671 state->old = old;
672 state->vnn = vnn;
674 vnn->update_in_flight = true;
675 talloc_set_destructor(state, ctdb_updateip_destructor);
677 new_name = ctdb_vnn_iface_string(vnn);
678 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
679 "interface %s to %s\n",
680 ctdb_addr_to_str(&vnn->public_address),
681 vnn->public_netmask_bits,
682 old_name,
683 new_name));
685 ret = ctdb_event_script_callback(ctdb,
686 state,
687 ctdb_do_updateip_callback,
688 state,
689 CTDB_EVENT_UPDATE_IP,
690 "%s %s %s %u",
691 old_name,
692 new_name,
693 ctdb_addr_to_str(&vnn->public_address),
694 vnn->public_netmask_bits);
695 if (ret != 0) {
696 DEBUG(DEBUG_ERR,
697 ("Failed update IP %s from interface %s to %s\n",
698 ctdb_addr_to_str(&vnn->public_address),
699 old_name, new_name));
700 talloc_free(state);
701 return -1;
704 state->c = talloc_steal(ctdb, c);
705 return 0;
709 Find the vnn of the node that has a public ip address
710 returns -1 if the address is not known as a public address
712 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
714 struct ctdb_vnn *vnn;
716 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
717 if (ctdb_same_ip(&vnn->public_address, addr)) {
718 return vnn;
722 return NULL;
726 take over an ip address
728 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
729 struct ctdb_req_control_old *c,
730 TDB_DATA indata,
731 bool *async_reply)
733 int ret;
734 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
735 struct ctdb_vnn *vnn;
736 bool have_ip = false;
737 bool do_updateip = false;
738 bool do_takeip = false;
739 struct ctdb_interface *best_iface = NULL;
741 if (pip->pnn != ctdb->pnn) {
742 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
743 "with pnn %d, but we're node %d\n",
744 ctdb_addr_to_str(&pip->addr),
745 pip->pnn, ctdb->pnn));
746 return -1;
749 /* update out vnn list */
750 vnn = find_public_ip_vnn(ctdb, &pip->addr);
751 if (vnn == NULL) {
752 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
753 ctdb_addr_to_str(&pip->addr)));
754 return 0;
757 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
758 have_ip = ctdb_sys_have_ip(&pip->addr);
760 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
761 if (best_iface == NULL) {
762 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
763 "a usable interface (old %s, have_ip %d)\n",
764 ctdb_addr_to_str(&vnn->public_address),
765 vnn->public_netmask_bits,
766 ctdb_vnn_iface_string(vnn),
767 have_ip));
768 return -1;
771 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
772 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
773 "and we have it on iface[%s], but it was assigned to node %d"
774 "and we are node %d, banning ourself\n",
775 ctdb_addr_to_str(&vnn->public_address),
776 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
777 ctdb_ban_self(ctdb);
778 return -1;
781 if (vnn->pnn == -1 && have_ip) {
782 /* This will cause connections to be reset and
783 * reestablished. However, this is a very unusual
784 * situation and doing this will completely repair the
785 * inconsistency in the VNN.
787 DEBUG(DEBUG_WARNING,
788 (__location__
789 " Doing updateip for IP %s already on an interface\n",
790 ctdb_addr_to_str(&vnn->public_address)));
791 do_updateip = true;
794 if (vnn->iface) {
795 if (vnn->iface != best_iface) {
796 if (!vnn->iface->link_up) {
797 do_updateip = true;
798 } else if (vnn->iface->references > (best_iface->references + 1)) {
799 /* only move when the rebalance gains something */
800 do_updateip = true;
805 if (!have_ip) {
806 if (do_updateip) {
807 ctdb_vnn_unassign_iface(ctdb, vnn);
808 do_updateip = false;
810 do_takeip = true;
813 if (do_takeip) {
814 ret = ctdb_do_takeip(ctdb, c, vnn);
815 if (ret != 0) {
816 return -1;
818 } else if (do_updateip) {
819 ret = ctdb_do_updateip(ctdb, c, vnn);
820 if (ret != 0) {
821 return -1;
823 } else {
825 * The interface is up and the kernel known the ip
826 * => do nothing
828 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
829 ctdb_addr_to_str(&pip->addr),
830 vnn->public_netmask_bits,
831 ctdb_vnn_iface_string(vnn)));
832 return 0;
835 /* tell ctdb_control.c that we will be replying asynchronously */
836 *async_reply = true;
838 return 0;
841 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
843 DLIST_REMOVE(ctdb->vnn, vnn);
844 ctdb_vnn_unassign_iface(ctdb, vnn);
845 ctdb_remove_orphaned_ifaces(ctdb, vnn);
846 talloc_free(vnn);
849 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
850 struct ctdb_vnn *vnn,
851 ctdb_sock_addr *addr)
853 TDB_DATA data;
855 /* Send a message to all clients of this node telling them
856 * that the cluster has been reconfigured and they should
857 * close any connections on this IP address
859 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
860 data.dsize = strlen((char *)data.dptr)+1;
861 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
862 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
864 ctdb_vnn_unassign_iface(ctdb, vnn);
866 /* Process the IP if it has been marked for deletion */
867 if (vnn->delete_pending) {
868 do_delete_ip(ctdb, vnn);
869 return NULL;
872 return vnn;
875 struct release_ip_callback_state {
876 struct ctdb_req_control_old *c;
877 ctdb_sock_addr *addr;
878 struct ctdb_vnn *vnn;
879 uint32_t target_pnn;
883 called when releaseip event finishes
885 static void release_ip_callback(struct ctdb_context *ctdb, int status,
886 void *private_data)
888 struct release_ip_callback_state *state =
889 talloc_get_type(private_data, struct release_ip_callback_state);
891 if (status == -ETIMEDOUT) {
892 ctdb_ban_self(ctdb);
895 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
896 if (ctdb_sys_have_ip(state->addr)) {
897 DEBUG(DEBUG_ERR,
898 ("IP %s still hosted during release IP callback, failing\n",
899 ctdb_addr_to_str(state->addr)));
900 ctdb_request_control_reply(ctdb, state->c,
901 NULL, -1, NULL);
902 talloc_free(state);
903 return;
907 state->vnn->pnn = state->target_pnn;
908 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
910 /* the control succeeded */
911 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
912 talloc_free(state);
915 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
917 if (state->vnn != NULL) {
918 state->vnn->update_in_flight = false;
920 return 0;
924 release an ip address
926 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
927 struct ctdb_req_control_old *c,
928 TDB_DATA indata,
929 bool *async_reply)
931 int ret;
932 struct release_ip_callback_state *state;
933 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
934 struct ctdb_vnn *vnn;
935 const char *iface;
937 /* update our vnn list */
938 vnn = find_public_ip_vnn(ctdb, &pip->addr);
939 if (vnn == NULL) {
940 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
941 ctdb_addr_to_str(&pip->addr)));
942 return 0;
945 /* stop any previous arps */
946 talloc_free(vnn->takeover_ctx);
947 vnn->takeover_ctx = NULL;
949 /* RELEASE_IP controls are sent to all nodes that should not
950 * be hosting a particular IP. This serves 2 purposes. The
951 * first is to help resolve any inconsistencies. If a node
952 * does unexpectly host an IP then it will be released. The
953 * 2nd is to use a "redundant release" to tell non-takeover
954 * nodes where an IP is moving to. This is how "ctdb ip" can
955 * report the (likely) location of an IP by only asking the
956 * local node. Redundant releases need to update the PNN but
957 * are otherwise ignored.
959 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
960 if (!ctdb_sys_have_ip(&pip->addr)) {
961 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
962 ctdb_addr_to_str(&pip->addr),
963 vnn->public_netmask_bits,
964 ctdb_vnn_iface_string(vnn)));
965 vnn->pnn = pip->pnn;
966 ctdb_vnn_unassign_iface(ctdb, vnn);
967 return 0;
969 } else {
970 if (vnn->iface == NULL) {
971 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
972 ctdb_addr_to_str(&pip->addr),
973 vnn->public_netmask_bits));
974 vnn->pnn = pip->pnn;
975 return 0;
979 /* There is a potential race between take_ip and us because we
980 * update the VNN via a callback that run when the
981 * eventscripts have been run. Avoid the race by allowing one
982 * update to be in flight at a time.
984 if (vnn->update_in_flight) {
985 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
986 "update for this IP already in flight\n",
987 ctdb_addr_to_str(&vnn->public_address),
988 vnn->public_netmask_bits));
989 return -1;
992 iface = ctdb_vnn_iface_string(vnn);
994 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
995 ctdb_addr_to_str(&pip->addr),
996 vnn->public_netmask_bits,
997 iface,
998 pip->pnn));
1000 state = talloc(ctdb, struct release_ip_callback_state);
1001 if (state == NULL) {
1002 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1003 __FILE__, __LINE__);
1004 return -1;
1007 state->c = NULL;
1008 state->addr = talloc(state, ctdb_sock_addr);
1009 if (state->addr == NULL) {
1010 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1011 __FILE__, __LINE__);
1012 talloc_free(state);
1013 return -1;
1015 *state->addr = pip->addr;
1016 state->target_pnn = pip->pnn;
1017 state->vnn = vnn;
1019 vnn->update_in_flight = true;
1020 talloc_set_destructor(state, ctdb_releaseip_destructor);
1022 ret = ctdb_event_script_callback(ctdb,
1023 state, release_ip_callback, state,
1024 CTDB_EVENT_RELEASE_IP,
1025 "%s %s %u",
1026 iface,
1027 ctdb_addr_to_str(&pip->addr),
1028 vnn->public_netmask_bits);
1029 if (ret != 0) {
1030 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1031 ctdb_addr_to_str(&pip->addr),
1032 ctdb_vnn_iface_string(vnn)));
1033 talloc_free(state);
1034 return -1;
1037 /* tell the control that we will be reply asynchronously */
1038 *async_reply = true;
1039 state->c = talloc_steal(state, c);
1040 return 0;
1043 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1044 ctdb_sock_addr *addr,
1045 unsigned mask, const char *ifaces,
1046 bool check_address)
1048 struct ctdb_vnn *vnn;
1049 char *tmp;
1050 const char *iface;
1052 /* Verify that we don't have an entry for this IP yet */
1053 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1054 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1055 DEBUG(DEBUG_ERR,
1056 ("Duplicate public IP address '%s'\n",
1057 ctdb_addr_to_str(addr)));
1058 return -1;
1062 /* Create a new VNN structure for this IP address */
1063 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1064 if (vnn == NULL) {
1065 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1066 return -1;
1068 tmp = talloc_strdup(vnn, ifaces);
1069 if (tmp == NULL) {
1070 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1071 talloc_free(vnn);
1072 return -1;
1074 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1075 struct vnn_interface *vnn_iface;
1076 struct ctdb_interface *i;
1077 if (!ctdb_sys_check_iface_exists(iface)) {
1078 DEBUG(DEBUG_ERR,
1079 ("Unknown interface %s for public address %s\n",
1080 iface, ctdb_addr_to_str(addr)));
1081 talloc_free(vnn);
1082 return -1;
1085 i = ctdb_add_local_iface(ctdb, iface);
1086 if (i == NULL) {
1087 DEBUG(DEBUG_ERR,
1088 ("Failed to add interface '%s' "
1089 "for public address %s\n",
1090 iface, ctdb_addr_to_str(addr)));
1091 talloc_free(vnn);
1092 return -1;
1095 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1096 if (vnn_iface == NULL) {
1097 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1098 talloc_free(vnn);
1099 return -1;
1102 vnn_iface->iface = i;
1103 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1105 talloc_free(tmp);
1106 vnn->public_address = *addr;
1107 vnn->public_netmask_bits = mask;
1108 vnn->pnn = -1;
1110 DLIST_ADD(ctdb->vnn, vnn);
1112 return 0;
1116 setup the public address lists from a file
1118 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1120 bool ok;
1121 char **lines;
1122 int nlines;
1123 int i;
1125 /* If no public addresses file given then try the default */
1126 if (ctdb->public_addresses_file == NULL) {
1127 const char *b = getenv("CTDB_BASE");
1128 if (b == NULL) {
1129 DBG_ERR("CTDB_BASE not set\n");
1130 return -1;
1132 ctdb->public_addresses_file = talloc_asprintf(
1133 ctdb, "%s/%s", b, "public_addresses");
1134 if (ctdb->public_addresses_file == NULL) {
1135 DBG_ERR("Out of memory\n");
1136 return -1;
1140 /* If the file doesn't exist then warn and do nothing */
1141 ok = file_exist(ctdb->public_addresses_file);
1142 if (!ok) {
1143 D_WARNING("Not loading public addresses, no file %s\n",
1144 ctdb->public_addresses_file);
1145 return 0;
1148 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1149 if (lines == NULL) {
1150 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1151 return -1;
1153 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1154 nlines--;
1157 for (i=0;i<nlines;i++) {
1158 unsigned mask;
1159 ctdb_sock_addr addr;
1160 const char *addrstr;
1161 const char *ifaces;
1162 char *tok, *line;
1163 int ret;
1165 line = lines[i];
1166 while ((*line == ' ') || (*line == '\t')) {
1167 line++;
1169 if (*line == '#') {
1170 continue;
1172 if (strcmp(line, "") == 0) {
1173 continue;
1175 tok = strtok(line, " \t");
1176 addrstr = tok;
1178 tok = strtok(NULL, " \t");
1179 if (tok == NULL) {
1180 D_ERR("No interface specified at line %u "
1181 "of public addresses file\n", i+1);
1182 talloc_free(lines);
1183 return -1;
1185 ifaces = tok;
1187 if (addrstr == NULL) {
1188 D_ERR("Badly formed line %u in public address list\n",
1189 i+1);
1190 talloc_free(lines);
1191 return -1;
1194 ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
1195 if (ret != 0) {
1196 D_ERR("Badly formed line %u in public address list\n",
1197 i+1);
1198 talloc_free(lines);
1199 return -1;
1202 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1203 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1204 talloc_free(lines);
1205 return -1;
1210 D_NOTICE("Loaded public addresses from %s\n",
1211 ctdb->public_addresses_file);
1213 talloc_free(lines);
1214 return 0;
1218 destroy a ctdb_client_ip structure
1220 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1222 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1223 ctdb_addr_to_str(&ip->addr),
1224 ntohs(ip->addr.ip.sin_port),
1225 ip->client_id));
1227 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1228 return 0;
1232 called by a client to inform us of a TCP connection that it is managing
1233 that should tickled with an ACK when IP takeover is done
1235 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1236 TDB_DATA indata)
1238 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1239 struct ctdb_connection *tcp_sock = NULL;
1240 struct ctdb_tcp_list *tcp;
1241 struct ctdb_connection t;
1242 int ret;
1243 TDB_DATA data;
1244 struct ctdb_client_ip *ip;
1245 struct ctdb_vnn *vnn;
1246 ctdb_sock_addr addr;
1248 /* If we don't have public IPs, tickles are useless */
1249 if (ctdb->vnn == NULL) {
1250 return 0;
1253 tcp_sock = (struct ctdb_connection *)indata.dptr;
1255 addr = tcp_sock->src;
1256 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1257 addr = tcp_sock->dst;
1258 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1260 ZERO_STRUCT(addr);
1261 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1262 vnn = find_public_ip_vnn(ctdb, &addr);
1263 if (vnn == NULL) {
1264 switch (addr.sa.sa_family) {
1265 case AF_INET:
1266 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1267 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1268 ctdb_addr_to_str(&addr)));
1270 break;
1271 case AF_INET6:
1272 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1273 ctdb_addr_to_str(&addr)));
1274 break;
1275 default:
1276 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1279 return 0;
1282 if (vnn->pnn != ctdb->pnn) {
1283 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1284 ctdb_addr_to_str(&addr),
1285 client_id, client->pid));
1286 /* failing this call will tell smbd to die */
1287 return -1;
1290 ip = talloc(client, struct ctdb_client_ip);
1291 CTDB_NO_MEMORY(ctdb, ip);
1293 ip->ctdb = ctdb;
1294 ip->addr = addr;
1295 ip->client_id = client_id;
1296 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1297 DLIST_ADD(ctdb->client_ip_list, ip);
1299 tcp = talloc(client, struct ctdb_tcp_list);
1300 CTDB_NO_MEMORY(ctdb, tcp);
1302 tcp->connection.src = tcp_sock->src;
1303 tcp->connection.dst = tcp_sock->dst;
1305 DLIST_ADD(client->tcp_list, tcp);
1307 t.src = tcp_sock->src;
1308 t.dst = tcp_sock->dst;
1310 data.dptr = (uint8_t *)&t;
1311 data.dsize = sizeof(t);
1313 switch (addr.sa.sa_family) {
1314 case AF_INET:
1315 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1316 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1317 ctdb_addr_to_str(&tcp_sock->src),
1318 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1319 break;
1320 case AF_INET6:
1321 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1322 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1323 ctdb_addr_to_str(&tcp_sock->src),
1324 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1325 break;
1326 default:
1327 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1331 /* tell all nodes about this tcp connection */
1332 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1333 CTDB_CONTROL_TCP_ADD,
1334 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1335 if (ret != 0) {
1336 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1337 return -1;
1340 return 0;
1344 find a tcp address on a list
1346 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1347 struct ctdb_connection *tcp)
1349 int i;
1351 if (array == NULL) {
1352 return NULL;
1355 for (i=0;i<array->num;i++) {
1356 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1357 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1358 return &array->connections[i];
1361 return NULL;
1367 called by a daemon to inform us of a TCP connection that one of its
1368 clients managing that should tickled with an ACK when IP takeover is
1369 done
1371 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1373 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1374 struct ctdb_tcp_array *tcparray;
1375 struct ctdb_connection tcp;
1376 struct ctdb_vnn *vnn;
1378 /* If we don't have public IPs, tickles are useless */
1379 if (ctdb->vnn == NULL) {
1380 return 0;
1383 vnn = find_public_ip_vnn(ctdb, &p->dst);
1384 if (vnn == NULL) {
1385 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1386 ctdb_addr_to_str(&p->dst)));
1388 return -1;
1392 tcparray = vnn->tcp_array;
1394 /* If this is the first tickle */
1395 if (tcparray == NULL) {
1396 tcparray = talloc(vnn, struct ctdb_tcp_array);
1397 CTDB_NO_MEMORY(ctdb, tcparray);
1398 vnn->tcp_array = tcparray;
1400 tcparray->num = 0;
1401 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1402 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1404 tcparray->connections[tcparray->num].src = p->src;
1405 tcparray->connections[tcparray->num].dst = p->dst;
1406 tcparray->num++;
1408 if (tcp_update_needed) {
1409 vnn->tcp_update_needed = true;
1411 return 0;
1415 /* Do we already have this tickle ?*/
1416 tcp.src = p->src;
1417 tcp.dst = p->dst;
1418 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1419 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1420 ctdb_addr_to_str(&tcp.dst),
1421 ntohs(tcp.dst.ip.sin_port),
1422 vnn->pnn));
1423 return 0;
1426 /* A new tickle, we must add it to the array */
1427 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1428 struct ctdb_connection,
1429 tcparray->num+1);
1430 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1432 tcparray->connections[tcparray->num].src = p->src;
1433 tcparray->connections[tcparray->num].dst = p->dst;
1434 tcparray->num++;
1436 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1437 ctdb_addr_to_str(&tcp.dst),
1438 ntohs(tcp.dst.ip.sin_port),
1439 vnn->pnn));
1441 if (tcp_update_needed) {
1442 vnn->tcp_update_needed = true;
1445 return 0;
1449 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1451 struct ctdb_connection *tcpp;
1453 if (vnn == NULL) {
1454 return;
1457 /* if the array is empty we cant remove it
1458 and we don't need to do anything
1460 if (vnn->tcp_array == NULL) {
1461 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1462 ctdb_addr_to_str(&conn->dst),
1463 ntohs(conn->dst.ip.sin_port)));
1464 return;
1468 /* See if we know this connection
1469 if we don't know this connection then we dont need to do anything
1471 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1472 if (tcpp == NULL) {
1473 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1474 ctdb_addr_to_str(&conn->dst),
1475 ntohs(conn->dst.ip.sin_port)));
1476 return;
1480 /* We need to remove this entry from the array.
1481 Instead of allocating a new array and copying data to it
1482 we cheat and just copy the last entry in the existing array
1483 to the entry that is to be removed and just shring the
1484 ->num field
1486 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1487 vnn->tcp_array->num--;
1489 /* If we deleted the last entry we also need to remove the entire array
1491 if (vnn->tcp_array->num == 0) {
1492 talloc_free(vnn->tcp_array);
1493 vnn->tcp_array = NULL;
1496 vnn->tcp_update_needed = true;
1498 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1499 ctdb_addr_to_str(&conn->src),
1500 ntohs(conn->src.ip.sin_port)));
1505 called by a daemon to inform us of a TCP connection that one of its
1506 clients used are no longer needed in the tickle database
1508 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1510 struct ctdb_vnn *vnn;
1511 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1513 /* If we don't have public IPs, tickles are useless */
1514 if (ctdb->vnn == NULL) {
1515 return 0;
1518 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1519 if (vnn == NULL) {
1520 DEBUG(DEBUG_ERR,
1521 (__location__ " unable to find public address %s\n",
1522 ctdb_addr_to_str(&conn->dst)));
1523 return 0;
1526 ctdb_remove_connection(vnn, conn);
1528 return 0;
1532 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1533 bool force);
1536 Called when another daemon starts - causes all tickles for all
1537 public addresses we are serving to be sent to the new node on the
1538 next check. This actually causes the tickles to be sent to the
1539 other node immediately. In case there is an error, the periodic
1540 timer will send the updates on timer event. This is simple and
1541 doesn't require careful error handling.
1543 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1545 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1546 (unsigned long) pnn));
1548 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1549 return 0;
1554 called when a client structure goes away - hook to remove
1555 elements from the tcp_list in all daemons
1557 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1559 while (client->tcp_list) {
1560 struct ctdb_vnn *vnn;
1561 struct ctdb_tcp_list *tcp = client->tcp_list;
1562 struct ctdb_connection *conn = &tcp->connection;
1564 DLIST_REMOVE(client->tcp_list, tcp);
1566 vnn = find_public_ip_vnn(client->ctdb,
1567 &conn->dst);
1568 if (vnn == NULL) {
1569 DEBUG(DEBUG_ERR,
1570 (__location__ " unable to find public address %s\n",
1571 ctdb_addr_to_str(&conn->dst)));
1572 continue;
1575 /* If the IP address is hosted on this node then
1576 * remove the connection. */
1577 if (vnn->pnn == client->ctdb->pnn) {
1578 ctdb_remove_connection(vnn, conn);
1581 /* Otherwise this function has been called because the
1582 * server IP address has been released to another node
1583 * and the client has exited. This means that we
1584 * should not delete the connection information. The
1585 * takeover node processes connections too. */
1590 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1592 struct ctdb_vnn *vnn, *next;
1593 int count = 0;
1595 if (ctdb_config.failover_disabled == 1) {
1596 return;
1599 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1600 /* vnn can be freed below in release_ip_post() */
1601 next = vnn->next;
1603 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1604 ctdb_vnn_unassign_iface(ctdb, vnn);
1605 continue;
1608 /* Don't allow multiple releases at once. Some code,
1609 * particularly ctdb_tickle_sentenced_connections() is
1610 * not re-entrant */
1611 if (vnn->update_in_flight) {
1612 DEBUG(DEBUG_WARNING,
1613 (__location__
1614 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1615 ctdb_addr_to_str(&vnn->public_address),
1616 vnn->public_netmask_bits,
1617 ctdb_vnn_iface_string(vnn)));
1618 continue;
1620 vnn->update_in_flight = true;
1622 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1623 ctdb_addr_to_str(&vnn->public_address),
1624 vnn->public_netmask_bits,
1625 ctdb_vnn_iface_string(vnn)));
1627 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1628 ctdb_vnn_iface_string(vnn),
1629 ctdb_addr_to_str(&vnn->public_address),
1630 vnn->public_netmask_bits);
1631 /* releaseip timeouts are converted to success, so to
1632 * detect failures just check if the IP address is
1633 * still there...
1635 if (ctdb_sys_have_ip(&vnn->public_address)) {
1636 DEBUG(DEBUG_ERR,
1637 (__location__
1638 " IP address %s not released\n",
1639 ctdb_addr_to_str(&vnn->public_address)));
1640 vnn->update_in_flight = false;
1641 continue;
1644 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1645 if (vnn != NULL) {
1646 vnn->update_in_flight = false;
1648 count++;
1651 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1656 get list of public IPs
1658 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1659 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1661 int i, num, len;
1662 struct ctdb_public_ip_list_old *ips;
1663 struct ctdb_vnn *vnn;
1664 bool only_available = false;
1666 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1667 only_available = true;
1670 /* count how many public ip structures we have */
1671 num = 0;
1672 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1673 num++;
1676 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1677 num*sizeof(struct ctdb_public_ip);
1678 ips = talloc_zero_size(outdata, len);
1679 CTDB_NO_MEMORY(ctdb, ips);
1681 i = 0;
1682 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1683 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1684 continue;
1686 ips->ips[i].pnn = vnn->pnn;
1687 ips->ips[i].addr = vnn->public_address;
1688 i++;
1690 ips->num = i;
1691 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1692 i*sizeof(struct ctdb_public_ip);
1694 outdata->dsize = len;
1695 outdata->dptr = (uint8_t *)ips;
1697 return 0;
1701 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1702 struct ctdb_req_control_old *c,
1703 TDB_DATA indata,
1704 TDB_DATA *outdata)
1706 int i, num, len;
1707 ctdb_sock_addr *addr;
1708 struct ctdb_public_ip_info_old *info;
1709 struct ctdb_vnn *vnn;
1710 struct vnn_interface *iface;
1712 addr = (ctdb_sock_addr *)indata.dptr;
1714 vnn = find_public_ip_vnn(ctdb, addr);
1715 if (vnn == NULL) {
1716 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1717 "'%s'not a public address\n",
1718 ctdb_addr_to_str(addr)));
1719 return -1;
1722 /* count how many public ip structures we have */
1723 num = 0;
1724 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1725 num++;
1728 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1729 num*sizeof(struct ctdb_iface);
1730 info = talloc_zero_size(outdata, len);
1731 CTDB_NO_MEMORY(ctdb, info);
1733 info->ip.addr = vnn->public_address;
1734 info->ip.pnn = vnn->pnn;
1735 info->active_idx = 0xFFFFFFFF;
1737 i = 0;
1738 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1739 struct ctdb_interface *cur;
1741 cur = iface->iface;
1742 if (vnn->iface == cur) {
1743 info->active_idx = i;
1745 strncpy(info->ifaces[i].name, cur->name,
1746 sizeof(info->ifaces[i].name));
1747 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1748 info->ifaces[i].link_state = cur->link_up;
1749 info->ifaces[i].references = cur->references;
1751 i++;
1753 info->num = i;
1754 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1755 i*sizeof(struct ctdb_iface);
1757 outdata->dsize = len;
1758 outdata->dptr = (uint8_t *)info;
1760 return 0;
1763 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1764 struct ctdb_req_control_old *c,
1765 TDB_DATA *outdata)
1767 int i, num, len;
1768 struct ctdb_iface_list_old *ifaces;
1769 struct ctdb_interface *cur;
1771 /* count how many public ip structures we have */
1772 num = 0;
1773 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1774 num++;
1777 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1778 num*sizeof(struct ctdb_iface);
1779 ifaces = talloc_zero_size(outdata, len);
1780 CTDB_NO_MEMORY(ctdb, ifaces);
1782 i = 0;
1783 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1784 strncpy(ifaces->ifaces[i].name, cur->name,
1785 sizeof(ifaces->ifaces[i].name));
1786 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1787 ifaces->ifaces[i].link_state = cur->link_up;
1788 ifaces->ifaces[i].references = cur->references;
1789 i++;
1791 ifaces->num = i;
1792 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1793 i*sizeof(struct ctdb_iface);
1795 outdata->dsize = len;
1796 outdata->dptr = (uint8_t *)ifaces;
1798 return 0;
1801 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1802 struct ctdb_req_control_old *c,
1803 TDB_DATA indata)
1805 struct ctdb_iface *info;
1806 struct ctdb_interface *iface;
1807 bool link_up = false;
1809 info = (struct ctdb_iface *)indata.dptr;
1811 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1812 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1813 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1814 len, len, info->name));
1815 return -1;
1818 switch (info->link_state) {
1819 case 0:
1820 link_up = false;
1821 break;
1822 case 1:
1823 link_up = true;
1824 break;
1825 default:
1826 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1827 (unsigned int)info->link_state));
1828 return -1;
1831 if (info->references != 0) {
1832 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1833 (unsigned int)info->references));
1834 return -1;
1837 iface = ctdb_find_iface(ctdb, info->name);
1838 if (iface == NULL) {
1839 return -1;
1842 if (link_up == iface->link_up) {
1843 return 0;
1846 DEBUG(DEBUG_ERR,
1847 ("iface[%s] has changed it's link status %s => %s\n",
1848 iface->name,
1849 iface->link_up?"up":"down",
1850 link_up?"up":"down"));
1852 iface->link_up = link_up;
1853 return 0;
1858 called by a daemon to inform us of the entire list of TCP tickles for
1859 a particular public address.
1860 this control should only be sent by the node that is currently serving
1861 that public address.
1863 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1865 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1866 struct ctdb_tcp_array *tcparray;
1867 struct ctdb_vnn *vnn;
1869 /* We must at least have tickles.num or else we cant verify the size
1870 of the received data blob
1872 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1873 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1874 return -1;
1877 /* verify that the size of data matches what we expect */
1878 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1879 + sizeof(struct ctdb_connection) * list->num) {
1880 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1881 return -1;
1884 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1885 ctdb_addr_to_str(&list->addr)));
1887 vnn = find_public_ip_vnn(ctdb, &list->addr);
1888 if (vnn == NULL) {
1889 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1890 ctdb_addr_to_str(&list->addr)));
1892 return 1;
1895 if (vnn->pnn == ctdb->pnn) {
1896 DEBUG(DEBUG_INFO,
1897 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1898 ctdb_addr_to_str(&list->addr)));
1899 return 0;
1902 /* remove any old ticklelist we might have */
1903 talloc_free(vnn->tcp_array);
1904 vnn->tcp_array = NULL;
1906 tcparray = talloc(vnn, struct ctdb_tcp_array);
1907 CTDB_NO_MEMORY(ctdb, tcparray);
1909 tcparray->num = list->num;
1911 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1912 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1914 memcpy(tcparray->connections, &list->connections[0],
1915 sizeof(struct ctdb_connection)*tcparray->num);
1917 /* We now have a new fresh tickle list array for this vnn */
1918 vnn->tcp_array = tcparray;
1920 return 0;
1924 called to return the full list of tickles for the puclic address associated
1925 with the provided vnn
1927 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1929 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1930 struct ctdb_tickle_list_old *list;
1931 struct ctdb_tcp_array *tcparray;
1932 int num, i;
1933 struct ctdb_vnn *vnn;
1934 unsigned port;
1936 vnn = find_public_ip_vnn(ctdb, addr);
1937 if (vnn == NULL) {
1938 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1939 ctdb_addr_to_str(addr)));
1941 return 1;
1944 port = ctdb_addr_to_port(addr);
1946 tcparray = vnn->tcp_array;
1947 num = 0;
1948 if (tcparray != NULL) {
1949 if (port == 0) {
1950 /* All connections */
1951 num = tcparray->num;
1952 } else {
1953 /* Count connections for port */
1954 for (i = 0; i < tcparray->num; i++) {
1955 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1956 num++;
1962 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1963 + sizeof(struct ctdb_connection) * num;
1965 outdata->dptr = talloc_size(outdata, outdata->dsize);
1966 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1967 list = (struct ctdb_tickle_list_old *)outdata->dptr;
1969 list->addr = *addr;
1970 list->num = num;
1972 if (num == 0) {
1973 return 0;
1976 num = 0;
1977 for (i = 0; i < tcparray->num; i++) {
1978 if (port == 0 || \
1979 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1980 list->connections[num] = tcparray->connections[i];
1981 num++;
1985 return 0;
1990 set the list of all tcp tickles for a public address
1992 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1993 ctdb_sock_addr *addr,
1994 struct ctdb_tcp_array *tcparray)
1996 int ret, num;
1997 TDB_DATA data;
1998 struct ctdb_tickle_list_old *list;
2000 if (tcparray) {
2001 num = tcparray->num;
2002 } else {
2003 num = 0;
2006 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2007 sizeof(struct ctdb_connection) * num;
2008 data.dptr = talloc_size(ctdb, data.dsize);
2009 CTDB_NO_MEMORY(ctdb, data.dptr);
2011 list = (struct ctdb_tickle_list_old *)data.dptr;
2012 list->addr = *addr;
2013 list->num = num;
2014 if (tcparray) {
2015 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2018 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2019 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2020 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2021 if (ret != 0) {
2022 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2023 return -1;
2026 talloc_free(data.dptr);
2028 return ret;
2031 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2032 bool force)
2034 struct ctdb_vnn *vnn;
2035 int ret;
2037 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2038 /* we only send out updates for public addresses that
2039 we have taken over
2041 if (ctdb->pnn != vnn->pnn) {
2042 continue;
2045 /* We only send out the updates if we need to */
2046 if (!force && !vnn->tcp_update_needed) {
2047 continue;
2050 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2051 &vnn->public_address,
2052 vnn->tcp_array);
2053 if (ret != 0) {
2054 D_ERR("Failed to send the tickle update for ip %s\n",
2055 ctdb_addr_to_str(&vnn->public_address));
2056 vnn->tcp_update_needed = true;
2057 } else {
2058 D_INFO("Sent tickle update for ip %s\n",
2059 ctdb_addr_to_str(&vnn->public_address));
2060 vnn->tcp_update_needed = false;
2067 perform tickle updates if required
2069 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2070 struct tevent_timer *te,
2071 struct timeval t, void *private_data)
2073 struct ctdb_context *ctdb = talloc_get_type(
2074 private_data, struct ctdb_context);
2076 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2078 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2079 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2080 ctdb_update_tcp_tickles, ctdb);
2084 start periodic update of tcp tickles
2086 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2088 ctdb->tickle_update_context = talloc_new(ctdb);
2090 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2091 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2092 ctdb_update_tcp_tickles, ctdb);
2098 struct control_gratious_arp {
2099 struct ctdb_context *ctdb;
2100 ctdb_sock_addr addr;
2101 const char *iface;
2102 int count;
2106 send a control_gratuitous arp
2108 static void send_gratious_arp(struct tevent_context *ev,
2109 struct tevent_timer *te,
2110 struct timeval t, void *private_data)
2112 int ret;
2113 struct control_gratious_arp *arp = talloc_get_type(private_data,
2114 struct control_gratious_arp);
2116 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2117 if (ret != 0) {
2118 DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
2119 arp->iface, strerror(ret));
2123 arp->count++;
2124 if (arp->count == CTDB_ARP_REPEAT) {
2125 talloc_free(arp);
2126 return;
2129 tevent_add_timer(arp->ctdb->ev, arp,
2130 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2131 send_gratious_arp, arp);
2136 send a gratious arp
2138 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2140 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2141 struct control_gratious_arp *arp;
2143 /* verify the size of indata */
2144 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2145 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2146 (unsigned)indata.dsize,
2147 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2148 return -1;
2150 if (indata.dsize !=
2151 ( offsetof(struct ctdb_addr_info_old, iface)
2152 + gratious_arp->len ) ){
2154 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2155 "but should be %u bytes\n",
2156 (unsigned)indata.dsize,
2157 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2158 return -1;
2162 arp = talloc(ctdb, struct control_gratious_arp);
2163 CTDB_NO_MEMORY(ctdb, arp);
2165 arp->ctdb = ctdb;
2166 arp->addr = gratious_arp->addr;
2167 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2168 CTDB_NO_MEMORY(ctdb, arp->iface);
2169 arp->count = 0;
2171 tevent_add_timer(arp->ctdb->ev, arp,
2172 timeval_zero(), send_gratious_arp, arp);
2174 return 0;
2177 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2179 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2180 int ret;
2182 /* verify the size of indata */
2183 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2184 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2185 return -1;
2187 if (indata.dsize !=
2188 ( offsetof(struct ctdb_addr_info_old, iface)
2189 + pub->len ) ){
2191 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2192 "but should be %u bytes\n",
2193 (unsigned)indata.dsize,
2194 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2195 return -1;
2198 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2200 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2202 if (ret != 0) {
2203 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2204 return -1;
2207 return 0;
2210 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2212 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2213 struct ctdb_vnn *vnn;
2215 /* verify the size of indata */
2216 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2217 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2218 return -1;
2220 if (indata.dsize !=
2221 ( offsetof(struct ctdb_addr_info_old, iface)
2222 + pub->len ) ){
2224 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2225 "but should be %u bytes\n",
2226 (unsigned)indata.dsize,
2227 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2228 return -1;
2231 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2233 /* walk over all public addresses until we find a match */
2234 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2235 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2236 if (vnn->pnn == ctdb->pnn) {
2237 /* This IP is currently being hosted.
2238 * Defer the deletion until the next
2239 * takeover run. "ctdb reloadips" will
2240 * always cause a takeover run. "ctdb
2241 * delip" will now need an explicit
2242 * "ctdb ipreallocated" afterwards. */
2243 vnn->delete_pending = true;
2244 } else {
2245 /* This IP is not hosted on the
2246 * current node so just delete it
2247 * now. */
2248 do_delete_ip(ctdb, vnn);
2251 return 0;
2255 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2256 ctdb_addr_to_str(&pub->addr)));
2257 return -1;
2261 struct ipreallocated_callback_state {
2262 struct ctdb_req_control_old *c;
2265 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2266 int status, void *p)
2268 struct ipreallocated_callback_state *state =
2269 talloc_get_type(p, struct ipreallocated_callback_state);
2271 if (status != 0) {
2272 DEBUG(DEBUG_ERR,
2273 (" \"ipreallocated\" event script failed (status %d)\n",
2274 status));
2275 if (status == -ETIMEDOUT) {
2276 ctdb_ban_self(ctdb);
2280 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2281 talloc_free(state);
2284 /* A control to run the ipreallocated event */
2285 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2286 struct ctdb_req_control_old *c,
2287 bool *async_reply)
2289 int ret;
2290 struct ipreallocated_callback_state *state;
2292 state = talloc(ctdb, struct ipreallocated_callback_state);
2293 CTDB_NO_MEMORY(ctdb, state);
2295 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2297 ret = ctdb_event_script_callback(ctdb, state,
2298 ctdb_ipreallocated_callback, state,
2299 CTDB_EVENT_IPREALLOCATED,
2300 "%s", "");
2302 if (ret != 0) {
2303 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2304 talloc_free(state);
2305 return -1;
2308 /* tell the control that we will be reply asynchronously */
2309 state->c = talloc_steal(state, c);
2310 *async_reply = true;
2312 return 0;
2316 struct ctdb_reloadips_handle {
2317 struct ctdb_context *ctdb;
2318 struct ctdb_req_control_old *c;
2319 int status;
2320 int fd[2];
2321 pid_t child;
2322 struct tevent_fd *fde;
2325 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2327 if (h == h->ctdb->reload_ips) {
2328 h->ctdb->reload_ips = NULL;
2330 if (h->c != NULL) {
2331 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2332 h->c = NULL;
2334 ctdb_kill(h->ctdb, h->child, SIGKILL);
2335 return 0;
2338 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2339 struct tevent_timer *te,
2340 struct timeval t, void *private_data)
2342 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2344 talloc_free(h);
2347 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2348 struct tevent_fd *fde,
2349 uint16_t flags, void *private_data)
2351 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2353 char res;
2354 int ret;
2356 ret = sys_read(h->fd[0], &res, 1);
2357 if (ret < 1 || res != 0) {
2358 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2359 res = 1;
2361 h->status = res;
2363 talloc_free(h);
2366 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2368 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2369 struct ctdb_public_ip_list_old *ips;
2370 struct ctdb_vnn *vnn;
2371 struct client_async_data *async_data;
2372 struct timeval timeout;
2373 TDB_DATA data;
2374 struct ctdb_client_control_state *state;
2375 bool first_add;
2376 int i, ret;
2378 CTDB_NO_MEMORY(ctdb, mem_ctx);
2380 /* Read IPs from local node */
2381 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2382 CTDB_CURRENT_NODE, mem_ctx, &ips);
2383 if (ret != 0) {
2384 DEBUG(DEBUG_ERR,
2385 ("Unable to fetch public IPs from local node\n"));
2386 talloc_free(mem_ctx);
2387 return -1;
2390 /* Read IPs file - this is safe since this is a child process */
2391 ctdb->vnn = NULL;
2392 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2393 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2394 talloc_free(mem_ctx);
2395 return -1;
2398 async_data = talloc_zero(mem_ctx, struct client_async_data);
2399 CTDB_NO_MEMORY(ctdb, async_data);
2401 /* Compare IPs between node and file for IPs to be deleted */
2402 for (i = 0; i < ips->num; i++) {
2403 /* */
2404 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2405 if (ctdb_same_ip(&vnn->public_address,
2406 &ips->ips[i].addr)) {
2407 /* IP is still in file */
2408 break;
2412 if (vnn == NULL) {
2413 /* Delete IP ips->ips[i] */
2414 struct ctdb_addr_info_old *pub;
2416 DEBUG(DEBUG_NOTICE,
2417 ("IP %s no longer configured, deleting it\n",
2418 ctdb_addr_to_str(&ips->ips[i].addr)));
2420 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2421 CTDB_NO_MEMORY(ctdb, pub);
2423 pub->addr = ips->ips[i].addr;
2424 pub->mask = 0;
2425 pub->len = 0;
2427 timeout = TAKEOVER_TIMEOUT();
2429 data.dsize = offsetof(struct ctdb_addr_info_old,
2430 iface) + pub->len;
2431 data.dptr = (uint8_t *)pub;
2433 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2434 CTDB_CONTROL_DEL_PUBLIC_IP,
2435 0, data, async_data,
2436 &timeout, NULL);
2437 if (state == NULL) {
2438 DEBUG(DEBUG_ERR,
2439 (__location__
2440 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2441 goto failed;
2444 ctdb_client_async_add(async_data, state);
2448 /* Compare IPs between node and file for IPs to be added */
2449 first_add = true;
2450 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2451 for (i = 0; i < ips->num; i++) {
2452 if (ctdb_same_ip(&vnn->public_address,
2453 &ips->ips[i].addr)) {
2454 /* IP already on node */
2455 break;
2458 if (i == ips->num) {
2459 /* Add IP ips->ips[i] */
2460 struct ctdb_addr_info_old *pub;
2461 const char *ifaces = NULL;
2462 uint32_t len;
2463 struct vnn_interface *iface = NULL;
2465 DEBUG(DEBUG_NOTICE,
2466 ("New IP %s configured, adding it\n",
2467 ctdb_addr_to_str(&vnn->public_address)));
2468 if (first_add) {
2469 uint32_t pnn = ctdb_get_pnn(ctdb);
2471 data.dsize = sizeof(pnn);
2472 data.dptr = (uint8_t *)&pnn;
2474 ret = ctdb_client_send_message(
2475 ctdb,
2476 CTDB_BROADCAST_CONNECTED,
2477 CTDB_SRVID_REBALANCE_NODE,
2478 data);
2479 if (ret != 0) {
2480 DEBUG(DEBUG_WARNING,
2481 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2484 first_add = false;
2487 ifaces = vnn->ifaces->iface->name;
2488 iface = vnn->ifaces->next;
2489 while (iface != NULL) {
2490 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2491 iface->iface->name);
2492 iface = iface->next;
2495 len = strlen(ifaces) + 1;
2496 pub = talloc_zero_size(mem_ctx,
2497 offsetof(struct ctdb_addr_info_old, iface) + len);
2498 CTDB_NO_MEMORY(ctdb, pub);
2500 pub->addr = vnn->public_address;
2501 pub->mask = vnn->public_netmask_bits;
2502 pub->len = len;
2503 memcpy(&pub->iface[0], ifaces, pub->len);
2505 timeout = TAKEOVER_TIMEOUT();
2507 data.dsize = offsetof(struct ctdb_addr_info_old,
2508 iface) + pub->len;
2509 data.dptr = (uint8_t *)pub;
2511 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2512 CTDB_CONTROL_ADD_PUBLIC_IP,
2513 0, data, async_data,
2514 &timeout, NULL);
2515 if (state == NULL) {
2516 DEBUG(DEBUG_ERR,
2517 (__location__
2518 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2519 goto failed;
2522 ctdb_client_async_add(async_data, state);
2526 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2527 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2528 goto failed;
2531 talloc_free(mem_ctx);
2532 return 0;
2534 failed:
2535 talloc_free(mem_ctx);
2536 return -1;
2539 /* This control is sent to force the node to re-read the public addresses file
2540 and drop any addresses we should nnot longer host, and add new addresses
2541 that we are now able to host
2543 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2545 struct ctdb_reloadips_handle *h;
2546 pid_t parent = getpid();
2548 if (ctdb->reload_ips != NULL) {
2549 talloc_free(ctdb->reload_ips);
2550 ctdb->reload_ips = NULL;
2553 h = talloc(ctdb, struct ctdb_reloadips_handle);
2554 CTDB_NO_MEMORY(ctdb, h);
2555 h->ctdb = ctdb;
2556 h->c = NULL;
2557 h->status = -1;
2559 if (pipe(h->fd) == -1) {
2560 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2561 talloc_free(h);
2562 return -1;
2565 h->child = ctdb_fork(ctdb);
2566 if (h->child == (pid_t)-1) {
2567 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2568 close(h->fd[0]);
2569 close(h->fd[1]);
2570 talloc_free(h);
2571 return -1;
2574 /* child process */
2575 if (h->child == 0) {
2576 signed char res = 0;
2578 close(h->fd[0]);
2580 prctl_set_comment("ctdb_reloadips");
2581 if (switch_from_server_to_client(ctdb) != 0) {
2582 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2583 res = -1;
2584 } else {
2585 res = ctdb_reloadips_child(ctdb);
2586 if (res != 0) {
2587 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2591 sys_write(h->fd[1], &res, 1);
2592 ctdb_wait_for_process_to_exit(parent);
2593 _exit(0);
2596 h->c = talloc_steal(h, c);
2598 close(h->fd[1]);
2599 set_close_on_exec(h->fd[0]);
2601 talloc_set_destructor(h, ctdb_reloadips_destructor);
2604 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2605 ctdb_reloadips_child_handler, (void *)h);
2606 tevent_fd_set_auto_close(h->fde);
2608 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2609 ctdb_reloadips_timeout_event, h);
2611 /* we reply later */
2612 *async_reply = true;
2613 return 0;