ctdb-daemon: Drop unused function ctdb_local_node_got_banned()
[Samba.git] / ctdb / server / ctdb_takeover.c
blob3ac569a953a764d4e2f84185d24af2b4aeb8629f
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "protocol/protocol_util.h"
38 #include "ctdb_private.h"
39 #include "ctdb_client.h"
41 #include "common/rb_tree.h"
42 #include "common/reqid.h"
43 #include "common/system.h"
44 #include "common/system_socket.h"
45 #include "common/common.h"
46 #include "common/logging.h"
48 #include "server/ctdb_config.h"
50 #include "server/ipalloc.h"
52 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
54 #define CTDB_ARP_INTERVAL 1
55 #define CTDB_ARP_REPEAT 3
57 struct ctdb_interface {
58 struct ctdb_interface *prev, *next;
59 const char *name;
60 bool link_up;
61 uint32_t references;
64 struct vnn_interface {
65 struct vnn_interface *prev, *next;
66 struct ctdb_interface *iface;
69 /* state associated with a public ip address */
70 struct ctdb_vnn {
71 struct ctdb_vnn *prev, *next;
73 struct ctdb_interface *iface;
74 struct vnn_interface *ifaces;
75 ctdb_sock_addr public_address;
76 uint8_t public_netmask_bits;
79 * The node number that is serving this public address - set
80 * to CTDB_UNKNOWN_PNN if node is serving it
82 uint32_t pnn;
84 /* List of clients to tickle for this public address */
85 struct ctdb_tcp_array *tcp_array;
87 /* whether we need to update the other nodes with changes to our list
88 of connected clients */
89 bool tcp_update_needed;
91 /* a context to hang sending gratious arp events off */
92 TALLOC_CTX *takeover_ctx;
94 /* Set to true any time an update to this VNN is in flight.
95 This helps to avoid races. */
96 bool update_in_flight;
98 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
99 * address then this flag is set. It will be deleted in the
100 * release IP callback. */
101 bool delete_pending;
104 static const char *iface_string(const struct ctdb_interface *iface)
106 return (iface != NULL ? iface->name : "__none__");
109 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
111 return iface_string(vnn->iface);
114 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
115 const char *iface);
117 static struct ctdb_interface *
118 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
120 struct ctdb_interface *i;
122 if (strlen(iface) > CTDB_IFACE_SIZE) {
123 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
124 return NULL;
127 /* Verify that we don't have an entry for this ip yet */
128 i = ctdb_find_iface(ctdb, iface);
129 if (i != NULL) {
130 return i;
133 /* create a new structure for this interface */
134 i = talloc_zero(ctdb, struct ctdb_interface);
135 if (i == NULL) {
136 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
137 return NULL;
139 i->name = talloc_strdup(i, iface);
140 if (i->name == NULL) {
141 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
142 talloc_free(i);
143 return NULL;
146 i->link_up = true;
148 DLIST_ADD(ctdb->ifaces, i);
150 return i;
153 static bool vnn_has_interface(struct ctdb_vnn *vnn,
154 const struct ctdb_interface *iface)
156 struct vnn_interface *i;
158 for (i = vnn->ifaces; i != NULL; i = i->next) {
159 if (iface == i->iface) {
160 return true;
164 return false;
167 /* If any interfaces now have no possible IPs then delete them. This
168 * implementation is naive (i.e. simple) rather than clever
169 * (i.e. complex). Given that this is run on delip and that operation
170 * is rare, this doesn't need to be efficient - it needs to be
171 * foolproof. One alternative is reference counting, where the logic
172 * is distributed and can, therefore, be broken in multiple places.
173 * Another alternative is to build a red-black tree of interfaces that
174 * can have addresses (by walking ctdb->vnn once) and then walking
175 * ctdb->ifaces once and deleting those not in the tree. Let's go to
176 * one of those if the naive implementation causes problems... :-)
178 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
179 struct ctdb_vnn *vnn)
181 struct ctdb_interface *i, *next;
183 /* For each interface, check if there's an IP using it. */
184 for (i = ctdb->ifaces; i != NULL; i = next) {
185 struct ctdb_vnn *tv;
186 bool found;
187 next = i->next;
189 /* Only consider interfaces named in the given VNN. */
190 if (!vnn_has_interface(vnn, i)) {
191 continue;
194 /* Search for a vnn with this interface. */
195 found = false;
196 for (tv=ctdb->vnn; tv; tv=tv->next) {
197 if (vnn_has_interface(tv, i)) {
198 found = true;
199 break;
203 if (!found) {
204 /* None of the VNNs are using this interface. */
205 DLIST_REMOVE(ctdb->ifaces, i);
206 talloc_free(i);
212 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
213 const char *iface)
215 struct ctdb_interface *i;
217 for (i=ctdb->ifaces;i;i=i->next) {
218 if (strcmp(i->name, iface) == 0) {
219 return i;
223 return NULL;
226 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
227 struct ctdb_vnn *vnn)
229 struct vnn_interface *i;
230 struct ctdb_interface *cur = NULL;
231 struct ctdb_interface *best = NULL;
233 for (i = vnn->ifaces; i != NULL; i = i->next) {
235 cur = i->iface;
237 if (!cur->link_up) {
238 continue;
241 if (best == NULL) {
242 best = cur;
243 continue;
246 if (cur->references < best->references) {
247 best = cur;
248 continue;
252 return best;
255 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
256 struct ctdb_vnn *vnn)
258 struct ctdb_interface *best = NULL;
260 if (vnn->iface) {
261 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
262 "still assigned to iface '%s'\n",
263 ctdb_addr_to_str(&vnn->public_address),
264 ctdb_vnn_iface_string(vnn)));
265 return 0;
268 best = ctdb_vnn_best_iface(ctdb, vnn);
269 if (best == NULL) {
270 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
271 "cannot assign to iface any iface\n",
272 ctdb_addr_to_str(&vnn->public_address)));
273 return -1;
276 vnn->iface = best;
277 best->references++;
278 vnn->pnn = ctdb->pnn;
280 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
281 "now assigned to iface '%s' refs[%d]\n",
282 ctdb_addr_to_str(&vnn->public_address),
283 ctdb_vnn_iface_string(vnn),
284 best->references));
285 return 0;
288 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
289 struct ctdb_vnn *vnn)
291 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
292 "now unassigned (old iface '%s' refs[%d])\n",
293 ctdb_addr_to_str(&vnn->public_address),
294 ctdb_vnn_iface_string(vnn),
295 vnn->iface?vnn->iface->references:0));
296 if (vnn->iface) {
297 vnn->iface->references--;
299 vnn->iface = NULL;
300 if (vnn->pnn == ctdb->pnn) {
301 vnn->pnn = CTDB_UNKNOWN_PNN;
305 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
306 struct ctdb_vnn *vnn)
308 uint32_t flags;
309 struct vnn_interface *i;
311 /* Nodes that are not RUNNING can not host IPs */
312 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
313 return false;
316 flags = ctdb->nodes[ctdb->pnn]->flags;
317 if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
318 return false;
321 if (vnn->delete_pending) {
322 return false;
325 if (vnn->iface && vnn->iface->link_up) {
326 return true;
329 for (i = vnn->ifaces; i != NULL; i = i->next) {
330 if (i->iface->link_up) {
331 return true;
335 return false;
338 struct ctdb_takeover_arp {
339 struct ctdb_context *ctdb;
340 uint32_t count;
341 ctdb_sock_addr addr;
342 struct ctdb_tcp_array *tcparray;
343 struct ctdb_vnn *vnn;
348 lists of tcp endpoints
350 struct ctdb_tcp_list {
351 struct ctdb_tcp_list *prev, *next;
352 struct ctdb_connection connection;
356 list of clients to kill on IP release
358 struct ctdb_client_ip {
359 struct ctdb_client_ip *prev, *next;
360 struct ctdb_context *ctdb;
361 ctdb_sock_addr addr;
362 uint32_t client_id;
367 send a gratuitous arp
369 static void ctdb_control_send_arp(struct tevent_context *ev,
370 struct tevent_timer *te,
371 struct timeval t, void *private_data)
373 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
374 struct ctdb_takeover_arp);
375 int ret;
376 struct ctdb_tcp_array *tcparray;
377 const char *iface = ctdb_vnn_iface_string(arp->vnn);
379 ret = ctdb_sys_send_arp(&arp->addr, iface);
380 if (ret != 0) {
381 DBG_ERR("Failed to send ARP on interface %s: %s\n",
382 iface, strerror(ret));
385 tcparray = arp->tcparray;
386 if (tcparray) {
387 unsigned int i;
389 for (i=0;i<tcparray->num;i++) {
390 struct ctdb_connection *tcon;
392 tcon = &tcparray->connections[i];
393 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
394 (unsigned)ntohs(tcon->dst.ip.sin_port),
395 ctdb_addr_to_str(&tcon->src),
396 (unsigned)ntohs(tcon->src.ip.sin_port)));
397 ret = ctdb_sys_send_tcp(
398 &tcon->src,
399 &tcon->dst,
400 0, 0, 0);
401 if (ret != 0) {
402 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
403 ctdb_addr_to_str(&tcon->src)));
408 arp->count++;
410 if (arp->count == CTDB_ARP_REPEAT) {
411 talloc_free(arp);
412 return;
415 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
416 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
417 ctdb_control_send_arp, arp);
420 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
421 struct ctdb_vnn *vnn)
423 struct ctdb_takeover_arp *arp;
424 struct ctdb_tcp_array *tcparray;
426 if (!vnn->takeover_ctx) {
427 vnn->takeover_ctx = talloc_new(vnn);
428 if (!vnn->takeover_ctx) {
429 return -1;
433 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
434 if (!arp) {
435 return -1;
438 arp->ctdb = ctdb;
439 arp->addr = vnn->public_address;
440 arp->vnn = vnn;
442 tcparray = vnn->tcp_array;
443 if (tcparray) {
444 /* add all of the known tcp connections for this IP to the
445 list of tcp connections to send tickle acks for */
446 arp->tcparray = talloc_steal(arp, tcparray);
448 vnn->tcp_array = NULL;
449 vnn->tcp_update_needed = true;
452 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
453 timeval_zero(), ctdb_control_send_arp, arp);
455 return 0;
458 struct ctdb_do_takeip_state {
459 struct ctdb_req_control_old *c;
460 struct ctdb_vnn *vnn;
464 called when takeip event finishes
466 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
467 void *private_data)
469 struct ctdb_do_takeip_state *state =
470 talloc_get_type(private_data, struct ctdb_do_takeip_state);
471 int32_t ret;
472 TDB_DATA data;
474 if (status != 0) {
475 if (status == -ETIMEDOUT) {
476 ctdb_ban_self(ctdb);
478 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
479 ctdb_addr_to_str(&state->vnn->public_address),
480 ctdb_vnn_iface_string(state->vnn)));
481 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
483 talloc_free(state);
484 return;
487 if (ctdb->do_checkpublicip) {
489 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
490 if (ret != 0) {
491 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
492 talloc_free(state);
493 return;
498 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
499 data.dsize = strlen((char *)data.dptr) + 1;
500 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
502 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
505 /* the control succeeded */
506 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
507 talloc_free(state);
508 return;
511 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
513 state->vnn->update_in_flight = false;
514 return 0;
518 take over an ip address
520 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
521 struct ctdb_req_control_old *c,
522 struct ctdb_vnn *vnn)
524 int ret;
525 struct ctdb_do_takeip_state *state;
527 if (vnn->update_in_flight) {
528 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
529 "update for this IP already in flight\n",
530 ctdb_addr_to_str(&vnn->public_address),
531 vnn->public_netmask_bits));
532 return -1;
535 ret = ctdb_vnn_assign_iface(ctdb, vnn);
536 if (ret != 0) {
537 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
538 "assign a usable interface\n",
539 ctdb_addr_to_str(&vnn->public_address),
540 vnn->public_netmask_bits));
541 return -1;
544 state = talloc(vnn, struct ctdb_do_takeip_state);
545 CTDB_NO_MEMORY(ctdb, state);
547 state->c = NULL;
548 state->vnn = vnn;
550 vnn->update_in_flight = true;
551 talloc_set_destructor(state, ctdb_takeip_destructor);
553 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
554 ctdb_addr_to_str(&vnn->public_address),
555 vnn->public_netmask_bits,
556 ctdb_vnn_iface_string(vnn)));
558 ret = ctdb_event_script_callback(ctdb,
559 state,
560 ctdb_do_takeip_callback,
561 state,
562 CTDB_EVENT_TAKE_IP,
563 "%s %s %u",
564 ctdb_vnn_iface_string(vnn),
565 ctdb_addr_to_str(&vnn->public_address),
566 vnn->public_netmask_bits);
568 if (ret != 0) {
569 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
570 ctdb_addr_to_str(&vnn->public_address),
571 ctdb_vnn_iface_string(vnn)));
572 talloc_free(state);
573 return -1;
576 state->c = talloc_steal(ctdb, c);
577 return 0;
580 struct ctdb_do_updateip_state {
581 struct ctdb_req_control_old *c;
582 struct ctdb_interface *old;
583 struct ctdb_vnn *vnn;
587 called when updateip event finishes
589 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
590 void *private_data)
592 struct ctdb_do_updateip_state *state =
593 talloc_get_type(private_data, struct ctdb_do_updateip_state);
595 if (status != 0) {
596 if (status == -ETIMEDOUT) {
597 ctdb_ban_self(ctdb);
599 DEBUG(DEBUG_ERR,
600 ("Failed update of IP %s from interface %s to %s\n",
601 ctdb_addr_to_str(&state->vnn->public_address),
602 iface_string(state->old),
603 ctdb_vnn_iface_string(state->vnn)));
606 * All we can do is reset the old interface
607 * and let the next run fix it
609 ctdb_vnn_unassign_iface(ctdb, state->vnn);
610 state->vnn->iface = state->old;
611 state->vnn->iface->references++;
613 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
614 talloc_free(state);
615 return;
618 /* the control succeeded */
619 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
620 talloc_free(state);
621 return;
624 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
626 state->vnn->update_in_flight = false;
627 return 0;
631 update (move) an ip address
633 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
634 struct ctdb_req_control_old *c,
635 struct ctdb_vnn *vnn)
637 int ret;
638 struct ctdb_do_updateip_state *state;
639 struct ctdb_interface *old = vnn->iface;
640 const char *old_name = iface_string(old);
641 const char *new_name;
643 if (vnn->update_in_flight) {
644 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
645 "update for this IP already in flight\n",
646 ctdb_addr_to_str(&vnn->public_address),
647 vnn->public_netmask_bits));
648 return -1;
651 ctdb_vnn_unassign_iface(ctdb, vnn);
652 ret = ctdb_vnn_assign_iface(ctdb, vnn);
653 if (ret != 0) {
654 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
655 "assign a usable interface (old iface '%s')\n",
656 ctdb_addr_to_str(&vnn->public_address),
657 vnn->public_netmask_bits,
658 old_name));
659 return -1;
662 if (old == vnn->iface) {
663 /* A benign update from one interface onto itself.
664 * no need to run the eventscripts in this case, just return
665 * success.
667 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
668 return 0;
671 state = talloc(vnn, struct ctdb_do_updateip_state);
672 CTDB_NO_MEMORY(ctdb, state);
674 state->c = NULL;
675 state->old = old;
676 state->vnn = vnn;
678 vnn->update_in_flight = true;
679 talloc_set_destructor(state, ctdb_updateip_destructor);
681 new_name = ctdb_vnn_iface_string(vnn);
682 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
683 "interface %s to %s\n",
684 ctdb_addr_to_str(&vnn->public_address),
685 vnn->public_netmask_bits,
686 old_name,
687 new_name));
689 ret = ctdb_event_script_callback(ctdb,
690 state,
691 ctdb_do_updateip_callback,
692 state,
693 CTDB_EVENT_UPDATE_IP,
694 "%s %s %s %u",
695 old_name,
696 new_name,
697 ctdb_addr_to_str(&vnn->public_address),
698 vnn->public_netmask_bits);
699 if (ret != 0) {
700 DEBUG(DEBUG_ERR,
701 ("Failed update IP %s from interface %s to %s\n",
702 ctdb_addr_to_str(&vnn->public_address),
703 old_name, new_name));
704 talloc_free(state);
705 return -1;
708 state->c = talloc_steal(ctdb, c);
709 return 0;
713 Find the vnn of the node that has a public ip address
714 returns -1 if the address is not known as a public address
716 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
718 struct ctdb_vnn *vnn;
720 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
721 if (ctdb_same_ip(&vnn->public_address, addr)) {
722 return vnn;
726 return NULL;
730 take over an ip address
732 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
733 struct ctdb_req_control_old *c,
734 TDB_DATA indata,
735 bool *async_reply)
737 int ret;
738 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
739 struct ctdb_vnn *vnn;
740 bool have_ip = false;
741 bool do_updateip = false;
742 bool do_takeip = false;
743 struct ctdb_interface *best_iface = NULL;
745 if (pip->pnn != ctdb->pnn) {
746 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
747 "with pnn %d, but we're node %d\n",
748 ctdb_addr_to_str(&pip->addr),
749 pip->pnn, ctdb->pnn));
750 return -1;
753 /* update out vnn list */
754 vnn = find_public_ip_vnn(ctdb, &pip->addr);
755 if (vnn == NULL) {
756 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
757 ctdb_addr_to_str(&pip->addr)));
758 return 0;
761 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
762 have_ip = ctdb_sys_have_ip(&pip->addr);
764 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
765 if (best_iface == NULL) {
766 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
767 "a usable interface (old %s, have_ip %d)\n",
768 ctdb_addr_to_str(&vnn->public_address),
769 vnn->public_netmask_bits,
770 ctdb_vnn_iface_string(vnn),
771 have_ip));
772 return -1;
775 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) {
776 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
777 "and we have it on iface[%s], but it was assigned to node %d"
778 "and we are node %d, banning ourself\n",
779 ctdb_addr_to_str(&vnn->public_address),
780 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
781 ctdb_ban_self(ctdb);
782 return -1;
785 if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) {
786 /* This will cause connections to be reset and
787 * reestablished. However, this is a very unusual
788 * situation and doing this will completely repair the
789 * inconsistency in the VNN.
791 DEBUG(DEBUG_WARNING,
792 (__location__
793 " Doing updateip for IP %s already on an interface\n",
794 ctdb_addr_to_str(&vnn->public_address)));
795 do_updateip = true;
798 if (vnn->iface) {
799 if (vnn->iface != best_iface) {
800 if (!vnn->iface->link_up) {
801 do_updateip = true;
802 } else if (vnn->iface->references > (best_iface->references + 1)) {
803 /* only move when the rebalance gains something */
804 do_updateip = true;
809 if (!have_ip) {
810 if (do_updateip) {
811 ctdb_vnn_unassign_iface(ctdb, vnn);
812 do_updateip = false;
814 do_takeip = true;
817 if (do_takeip) {
818 ret = ctdb_do_takeip(ctdb, c, vnn);
819 if (ret != 0) {
820 return -1;
822 } else if (do_updateip) {
823 ret = ctdb_do_updateip(ctdb, c, vnn);
824 if (ret != 0) {
825 return -1;
827 } else {
829 * The interface is up and the kernel known the ip
830 * => do nothing
832 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
833 ctdb_addr_to_str(&pip->addr),
834 vnn->public_netmask_bits,
835 ctdb_vnn_iface_string(vnn)));
836 return 0;
839 /* tell ctdb_control.c that we will be replying asynchronously */
840 *async_reply = true;
842 return 0;
845 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
847 DLIST_REMOVE(ctdb->vnn, vnn);
848 ctdb_vnn_unassign_iface(ctdb, vnn);
849 ctdb_remove_orphaned_ifaces(ctdb, vnn);
850 talloc_free(vnn);
853 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
854 struct ctdb_vnn *vnn,
855 ctdb_sock_addr *addr)
857 TDB_DATA data;
859 /* Send a message to all clients of this node telling them
860 * that the cluster has been reconfigured and they should
861 * close any connections on this IP address
863 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
864 data.dsize = strlen((char *)data.dptr)+1;
865 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
866 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
868 ctdb_vnn_unassign_iface(ctdb, vnn);
870 /* Process the IP if it has been marked for deletion */
871 if (vnn->delete_pending) {
872 do_delete_ip(ctdb, vnn);
873 return NULL;
876 return vnn;
879 struct release_ip_callback_state {
880 struct ctdb_req_control_old *c;
881 ctdb_sock_addr *addr;
882 struct ctdb_vnn *vnn;
883 uint32_t target_pnn;
887 called when releaseip event finishes
889 static void release_ip_callback(struct ctdb_context *ctdb, int status,
890 void *private_data)
892 struct release_ip_callback_state *state =
893 talloc_get_type(private_data, struct release_ip_callback_state);
895 if (status == -ETIMEDOUT) {
896 ctdb_ban_self(ctdb);
899 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
900 if (ctdb_sys_have_ip(state->addr)) {
901 DEBUG(DEBUG_ERR,
902 ("IP %s still hosted during release IP callback, failing\n",
903 ctdb_addr_to_str(state->addr)));
904 ctdb_request_control_reply(ctdb, state->c,
905 NULL, -1, NULL);
906 talloc_free(state);
907 return;
911 state->vnn->pnn = state->target_pnn;
912 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
914 /* the control succeeded */
915 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
916 talloc_free(state);
919 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
921 if (state->vnn != NULL) {
922 state->vnn->update_in_flight = false;
924 return 0;
928 release an ip address
930 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
931 struct ctdb_req_control_old *c,
932 TDB_DATA indata,
933 bool *async_reply)
935 int ret;
936 struct release_ip_callback_state *state;
937 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
938 struct ctdb_vnn *vnn;
939 const char *iface;
941 /* update our vnn list */
942 vnn = find_public_ip_vnn(ctdb, &pip->addr);
943 if (vnn == NULL) {
944 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
945 ctdb_addr_to_str(&pip->addr)));
946 return 0;
949 /* stop any previous arps */
950 talloc_free(vnn->takeover_ctx);
951 vnn->takeover_ctx = NULL;
953 /* RELEASE_IP controls are sent to all nodes that should not
954 * be hosting a particular IP. This serves 2 purposes. The
955 * first is to help resolve any inconsistencies. If a node
956 * does unexpectly host an IP then it will be released. The
957 * 2nd is to use a "redundant release" to tell non-takeover
958 * nodes where an IP is moving to. This is how "ctdb ip" can
959 * report the (likely) location of an IP by only asking the
960 * local node. Redundant releases need to update the PNN but
961 * are otherwise ignored.
963 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
964 if (!ctdb_sys_have_ip(&pip->addr)) {
965 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
966 ctdb_addr_to_str(&pip->addr),
967 vnn->public_netmask_bits,
968 ctdb_vnn_iface_string(vnn)));
969 vnn->pnn = pip->pnn;
970 ctdb_vnn_unassign_iface(ctdb, vnn);
971 return 0;
973 } else {
974 if (vnn->iface == NULL) {
975 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
976 ctdb_addr_to_str(&pip->addr),
977 vnn->public_netmask_bits));
978 vnn->pnn = pip->pnn;
979 return 0;
983 /* There is a potential race between take_ip and us because we
984 * update the VNN via a callback that run when the
985 * eventscripts have been run. Avoid the race by allowing one
986 * update to be in flight at a time.
988 if (vnn->update_in_flight) {
989 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
990 "update for this IP already in flight\n",
991 ctdb_addr_to_str(&vnn->public_address),
992 vnn->public_netmask_bits));
993 return -1;
996 iface = ctdb_vnn_iface_string(vnn);
998 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
999 ctdb_addr_to_str(&pip->addr),
1000 vnn->public_netmask_bits,
1001 iface,
1002 pip->pnn));
1004 state = talloc(ctdb, struct release_ip_callback_state);
1005 if (state == NULL) {
1006 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1007 __FILE__, __LINE__);
1008 return -1;
1011 state->c = NULL;
1012 state->addr = talloc(state, ctdb_sock_addr);
1013 if (state->addr == NULL) {
1014 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1015 __FILE__, __LINE__);
1016 talloc_free(state);
1017 return -1;
1019 *state->addr = pip->addr;
1020 state->target_pnn = pip->pnn;
1021 state->vnn = vnn;
1023 vnn->update_in_flight = true;
1024 talloc_set_destructor(state, ctdb_releaseip_destructor);
1026 ret = ctdb_event_script_callback(ctdb,
1027 state, release_ip_callback, state,
1028 CTDB_EVENT_RELEASE_IP,
1029 "%s %s %u",
1030 iface,
1031 ctdb_addr_to_str(&pip->addr),
1032 vnn->public_netmask_bits);
1033 if (ret != 0) {
1034 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1035 ctdb_addr_to_str(&pip->addr),
1036 ctdb_vnn_iface_string(vnn)));
1037 talloc_free(state);
1038 return -1;
1041 /* tell the control that we will be reply asynchronously */
1042 *async_reply = true;
1043 state->c = talloc_steal(state, c);
1044 return 0;
1047 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1048 ctdb_sock_addr *addr,
1049 unsigned mask, const char *ifaces,
1050 bool check_address)
1052 struct ctdb_vnn *vnn;
1053 char *tmp;
1054 const char *iface;
1056 /* Verify that we don't have an entry for this IP yet */
1057 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1058 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1059 DEBUG(DEBUG_ERR,
1060 ("Duplicate public IP address '%s'\n",
1061 ctdb_addr_to_str(addr)));
1062 return -1;
1066 /* Create a new VNN structure for this IP address */
1067 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1068 if (vnn == NULL) {
1069 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1070 return -1;
1072 tmp = talloc_strdup(vnn, ifaces);
1073 if (tmp == NULL) {
1074 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1075 talloc_free(vnn);
1076 return -1;
1078 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1079 struct vnn_interface *vnn_iface;
1080 struct ctdb_interface *i;
1081 if (!ctdb_sys_check_iface_exists(iface)) {
1082 DEBUG(DEBUG_ERR,
1083 ("Unknown interface %s for public address %s\n",
1084 iface, ctdb_addr_to_str(addr)));
1085 talloc_free(vnn);
1086 return -1;
1089 i = ctdb_add_local_iface(ctdb, iface);
1090 if (i == NULL) {
1091 DEBUG(DEBUG_ERR,
1092 ("Failed to add interface '%s' "
1093 "for public address %s\n",
1094 iface, ctdb_addr_to_str(addr)));
1095 talloc_free(vnn);
1096 return -1;
1099 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1100 if (vnn_iface == NULL) {
1101 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1102 talloc_free(vnn);
1103 return -1;
1106 vnn_iface->iface = i;
1107 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1109 talloc_free(tmp);
1110 vnn->public_address = *addr;
1111 vnn->public_netmask_bits = mask;
1112 vnn->pnn = -1;
1114 DLIST_ADD(ctdb->vnn, vnn);
1116 return 0;
1120 setup the public address lists from a file
1122 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1124 bool ok;
1125 char **lines;
1126 int nlines;
1127 int i;
1129 /* If no public addresses file given then try the default */
1130 if (ctdb->public_addresses_file == NULL) {
1131 const char *b = getenv("CTDB_BASE");
1132 if (b == NULL) {
1133 DBG_ERR("CTDB_BASE not set\n");
1134 return -1;
1136 ctdb->public_addresses_file = talloc_asprintf(
1137 ctdb, "%s/%s", b, "public_addresses");
1138 if (ctdb->public_addresses_file == NULL) {
1139 DBG_ERR("Out of memory\n");
1140 return -1;
1144 /* If the file doesn't exist then warn and do nothing */
1145 ok = file_exist(ctdb->public_addresses_file);
1146 if (!ok) {
1147 D_WARNING("Not loading public addresses, no file %s\n",
1148 ctdb->public_addresses_file);
1149 return 0;
1152 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1153 if (lines == NULL) {
1154 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1155 return -1;
1157 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1158 nlines--;
1161 for (i=0;i<nlines;i++) {
1162 unsigned mask;
1163 ctdb_sock_addr addr;
1164 const char *addrstr;
1165 const char *ifaces;
1166 char *tok, *line;
1167 int ret;
1169 line = lines[i];
1170 while ((*line == ' ') || (*line == '\t')) {
1171 line++;
1173 if (*line == '#') {
1174 continue;
1176 if (strcmp(line, "") == 0) {
1177 continue;
1179 tok = strtok(line, " \t");
1180 addrstr = tok;
1182 tok = strtok(NULL, " \t");
1183 if (tok == NULL) {
1184 D_ERR("No interface specified at line %u "
1185 "of public addresses file\n", i+1);
1186 talloc_free(lines);
1187 return -1;
1189 ifaces = tok;
1191 if (addrstr == NULL) {
1192 D_ERR("Badly formed line %u in public address list\n",
1193 i+1);
1194 talloc_free(lines);
1195 return -1;
1198 ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
1199 if (ret != 0) {
1200 D_ERR("Badly formed line %u in public address list\n",
1201 i+1);
1202 talloc_free(lines);
1203 return -1;
1206 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1207 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1208 talloc_free(lines);
1209 return -1;
1214 D_NOTICE("Loaded public addresses from %s\n",
1215 ctdb->public_addresses_file);
1217 talloc_free(lines);
1218 return 0;
1222 destroy a ctdb_client_ip structure
1224 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1226 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1227 ctdb_addr_to_str(&ip->addr),
1228 ntohs(ip->addr.ip.sin_port),
1229 ip->client_id));
1231 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1232 return 0;
1236 called by a client to inform us of a TCP connection that it is managing
1237 that should tickled with an ACK when IP takeover is done
1239 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1240 TDB_DATA indata)
1242 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1243 struct ctdb_connection *tcp_sock = NULL;
1244 struct ctdb_tcp_list *tcp;
1245 struct ctdb_connection t;
1246 int ret;
1247 TDB_DATA data;
1248 struct ctdb_client_ip *ip;
1249 struct ctdb_vnn *vnn;
1250 ctdb_sock_addr src_addr;
1251 ctdb_sock_addr dst_addr;
1253 /* If we don't have public IPs, tickles are useless */
1254 if (ctdb->vnn == NULL) {
1255 return 0;
1258 tcp_sock = (struct ctdb_connection *)indata.dptr;
1260 src_addr = tcp_sock->src;
1261 ctdb_canonicalize_ip(&src_addr, &tcp_sock->src);
1262 ZERO_STRUCT(src_addr);
1263 memcpy(&src_addr, &tcp_sock->src, sizeof(src_addr));
1265 dst_addr = tcp_sock->dst;
1266 ctdb_canonicalize_ip(&dst_addr, &tcp_sock->dst);
1267 ZERO_STRUCT(dst_addr);
1268 memcpy(&dst_addr, &tcp_sock->dst, sizeof(dst_addr));
1270 vnn = find_public_ip_vnn(ctdb, &dst_addr);
1271 if (vnn == NULL) {
1272 char *src_addr_str = NULL;
1273 char *dst_addr_str = NULL;
1275 switch (dst_addr.sa.sa_family) {
1276 case AF_INET:
1277 if (ntohl(dst_addr.ip.sin_addr.s_addr) == INADDR_LOOPBACK) {
1278 /* ignore ... */
1279 return 0;
1281 break;
1282 case AF_INET6:
1283 break;
1284 default:
1285 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n",
1286 dst_addr.sa.sa_family));
1287 return 0;
1290 src_addr_str = ctdb_sock_addr_to_string(client, &src_addr, false);
1291 dst_addr_str = ctdb_sock_addr_to_string(client, &dst_addr, false);
1292 DEBUG(DEBUG_ERR,(
1293 "Could not register TCP connection from "
1294 "%s to %s (not a public address) (port %u) "
1295 "(client_id %u pid %u).\n",
1296 src_addr_str,
1297 dst_addr_str,
1298 ctdb_sock_addr_port(&dst_addr),
1299 client_id, client->pid));
1300 TALLOC_FREE(src_addr_str);
1301 TALLOC_FREE(dst_addr_str);
1302 return 0;
1305 if (vnn->pnn != ctdb->pnn) {
1306 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1307 ctdb_addr_to_str(&dst_addr),
1308 client_id, client->pid));
1309 /* failing this call will tell smbd to die */
1310 return -1;
1313 ip = talloc(client, struct ctdb_client_ip);
1314 CTDB_NO_MEMORY(ctdb, ip);
1316 ip->ctdb = ctdb;
1317 ip->addr = dst_addr;
1318 ip->client_id = client_id;
1319 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1320 DLIST_ADD(ctdb->client_ip_list, ip);
1322 tcp = talloc(client, struct ctdb_tcp_list);
1323 CTDB_NO_MEMORY(ctdb, tcp);
1325 tcp->connection.src = tcp_sock->src;
1326 tcp->connection.dst = tcp_sock->dst;
1328 DLIST_ADD(client->tcp_list, tcp);
1330 t.src = tcp_sock->src;
1331 t.dst = tcp_sock->dst;
1333 data.dptr = (uint8_t *)&t;
1334 data.dsize = sizeof(t);
1336 switch (dst_addr.sa.sa_family) {
1337 case AF_INET:
1338 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1339 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1340 ctdb_addr_to_str(&tcp_sock->src),
1341 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1342 break;
1343 case AF_INET6:
1344 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1345 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1346 ctdb_addr_to_str(&tcp_sock->src),
1347 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1348 break;
1349 default:
1350 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n",
1351 dst_addr.sa.sa_family));
1355 /* tell all nodes about this tcp connection */
1356 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1357 CTDB_CONTROL_TCP_ADD,
1358 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1359 if (ret != 0) {
1360 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1361 return -1;
1364 return 0;
1368 find a tcp address on a list
1370 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1371 struct ctdb_connection *tcp)
1373 unsigned int i;
1375 if (array == NULL) {
1376 return NULL;
1379 for (i=0;i<array->num;i++) {
1380 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1381 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1382 return &array->connections[i];
1385 return NULL;
1391 called by a daemon to inform us of a TCP connection that one of its
1392 clients managing that should tickled with an ACK when IP takeover is
1393 done
1395 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1397 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1398 struct ctdb_tcp_array *tcparray;
1399 struct ctdb_connection tcp;
1400 struct ctdb_vnn *vnn;
1402 /* If we don't have public IPs, tickles are useless */
1403 if (ctdb->vnn == NULL) {
1404 return 0;
1407 vnn = find_public_ip_vnn(ctdb, &p->dst);
1408 if (vnn == NULL) {
1409 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1410 ctdb_addr_to_str(&p->dst)));
1412 return -1;
1416 tcparray = vnn->tcp_array;
1418 /* If this is the first tickle */
1419 if (tcparray == NULL) {
1420 tcparray = talloc(vnn, struct ctdb_tcp_array);
1421 CTDB_NO_MEMORY(ctdb, tcparray);
1422 vnn->tcp_array = tcparray;
1424 tcparray->num = 0;
1425 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1426 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1428 tcparray->connections[tcparray->num].src = p->src;
1429 tcparray->connections[tcparray->num].dst = p->dst;
1430 tcparray->num++;
1432 if (tcp_update_needed) {
1433 vnn->tcp_update_needed = true;
1435 return 0;
1439 /* Do we already have this tickle ?*/
1440 tcp.src = p->src;
1441 tcp.dst = p->dst;
1442 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1443 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1444 ctdb_addr_to_str(&tcp.dst),
1445 ntohs(tcp.dst.ip.sin_port),
1446 vnn->pnn));
1447 return 0;
1450 /* A new tickle, we must add it to the array */
1451 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1452 struct ctdb_connection,
1453 tcparray->num+1);
1454 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1456 tcparray->connections[tcparray->num].src = p->src;
1457 tcparray->connections[tcparray->num].dst = p->dst;
1458 tcparray->num++;
1460 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1461 ctdb_addr_to_str(&tcp.dst),
1462 ntohs(tcp.dst.ip.sin_port),
1463 vnn->pnn));
1465 if (tcp_update_needed) {
1466 vnn->tcp_update_needed = true;
1469 return 0;
1473 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1475 struct ctdb_connection *tcpp;
1477 if (vnn == NULL) {
1478 return;
1481 /* if the array is empty we cant remove it
1482 and we don't need to do anything
1484 if (vnn->tcp_array == NULL) {
1485 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1486 ctdb_addr_to_str(&conn->dst),
1487 ntohs(conn->dst.ip.sin_port)));
1488 return;
1492 /* See if we know this connection
1493 if we don't know this connection then we dont need to do anything
1495 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1496 if (tcpp == NULL) {
1497 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1498 ctdb_addr_to_str(&conn->dst),
1499 ntohs(conn->dst.ip.sin_port)));
1500 return;
1504 /* We need to remove this entry from the array.
1505 Instead of allocating a new array and copying data to it
1506 we cheat and just copy the last entry in the existing array
1507 to the entry that is to be removed and just shring the
1508 ->num field
1510 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1511 vnn->tcp_array->num--;
1513 /* If we deleted the last entry we also need to remove the entire array
1515 if (vnn->tcp_array->num == 0) {
1516 talloc_free(vnn->tcp_array);
1517 vnn->tcp_array = NULL;
1520 vnn->tcp_update_needed = true;
1522 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1523 ctdb_addr_to_str(&conn->src),
1524 ntohs(conn->src.ip.sin_port)));
1529 called by a daemon to inform us of a TCP connection that one of its
1530 clients used are no longer needed in the tickle database
1532 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1534 struct ctdb_vnn *vnn;
1535 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1537 /* If we don't have public IPs, tickles are useless */
1538 if (ctdb->vnn == NULL) {
1539 return 0;
1542 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1543 if (vnn == NULL) {
1544 DEBUG(DEBUG_ERR,
1545 (__location__ " unable to find public address %s\n",
1546 ctdb_addr_to_str(&conn->dst)));
1547 return 0;
1550 ctdb_remove_connection(vnn, conn);
1552 return 0;
1556 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1557 bool force);
1560 Called when another daemon starts - causes all tickles for all
1561 public addresses we are serving to be sent to the new node on the
1562 next check. This actually causes the tickles to be sent to the
1563 other node immediately. In case there is an error, the periodic
1564 timer will send the updates on timer event. This is simple and
1565 doesn't require careful error handling.
1567 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1569 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1570 (unsigned long) pnn));
1572 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1573 return 0;
1578 called when a client structure goes away - hook to remove
1579 elements from the tcp_list in all daemons
1581 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1583 while (client->tcp_list) {
1584 struct ctdb_vnn *vnn;
1585 struct ctdb_tcp_list *tcp = client->tcp_list;
1586 struct ctdb_connection *conn = &tcp->connection;
1588 DLIST_REMOVE(client->tcp_list, tcp);
1590 vnn = find_public_ip_vnn(client->ctdb,
1591 &conn->dst);
1592 if (vnn == NULL) {
1593 DEBUG(DEBUG_ERR,
1594 (__location__ " unable to find public address %s\n",
1595 ctdb_addr_to_str(&conn->dst)));
1596 continue;
1599 /* If the IP address is hosted on this node then
1600 * remove the connection. */
1601 if (vnn->pnn == client->ctdb->pnn) {
1602 ctdb_remove_connection(vnn, conn);
1605 /* Otherwise this function has been called because the
1606 * server IP address has been released to another node
1607 * and the client has exited. This means that we
1608 * should not delete the connection information. The
1609 * takeover node processes connections too. */
1614 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1616 struct ctdb_vnn *vnn, *next;
1617 int count = 0;
1619 if (ctdb_config.failover_disabled == 1) {
1620 return;
1623 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1624 /* vnn can be freed below in release_ip_post() */
1625 next = vnn->next;
1627 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1628 ctdb_vnn_unassign_iface(ctdb, vnn);
1629 continue;
1632 /* Don't allow multiple releases at once. Some code,
1633 * particularly ctdb_tickle_sentenced_connections() is
1634 * not re-entrant */
1635 if (vnn->update_in_flight) {
1636 DEBUG(DEBUG_WARNING,
1637 (__location__
1638 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1639 ctdb_addr_to_str(&vnn->public_address),
1640 vnn->public_netmask_bits,
1641 ctdb_vnn_iface_string(vnn)));
1642 continue;
1644 vnn->update_in_flight = true;
1646 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1647 ctdb_addr_to_str(&vnn->public_address),
1648 vnn->public_netmask_bits,
1649 ctdb_vnn_iface_string(vnn)));
1651 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1652 ctdb_vnn_iface_string(vnn),
1653 ctdb_addr_to_str(&vnn->public_address),
1654 vnn->public_netmask_bits);
1655 /* releaseip timeouts are converted to success, so to
1656 * detect failures just check if the IP address is
1657 * still there...
1659 if (ctdb_sys_have_ip(&vnn->public_address)) {
1660 DEBUG(DEBUG_ERR,
1661 (__location__
1662 " IP address %s not released\n",
1663 ctdb_addr_to_str(&vnn->public_address)));
1664 vnn->update_in_flight = false;
1665 continue;
1668 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1669 if (vnn != NULL) {
1670 vnn->update_in_flight = false;
1672 count++;
1675 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1680 get list of public IPs
1682 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1683 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1685 int i, num, len;
1686 struct ctdb_public_ip_list_old *ips;
1687 struct ctdb_vnn *vnn;
1688 bool only_available = false;
1690 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1691 only_available = true;
1694 /* count how many public ip structures we have */
1695 num = 0;
1696 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1697 num++;
1700 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1701 num*sizeof(struct ctdb_public_ip);
1702 ips = talloc_zero_size(outdata, len);
1703 CTDB_NO_MEMORY(ctdb, ips);
1705 i = 0;
1706 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1707 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1708 continue;
1710 ips->ips[i].pnn = vnn->pnn;
1711 ips->ips[i].addr = vnn->public_address;
1712 i++;
1714 ips->num = i;
1715 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1716 i*sizeof(struct ctdb_public_ip);
1718 outdata->dsize = len;
1719 outdata->dptr = (uint8_t *)ips;
1721 return 0;
1725 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1726 struct ctdb_req_control_old *c,
1727 TDB_DATA indata,
1728 TDB_DATA *outdata)
1730 int i, num, len;
1731 ctdb_sock_addr *addr;
1732 struct ctdb_public_ip_info_old *info;
1733 struct ctdb_vnn *vnn;
1734 struct vnn_interface *iface;
1736 addr = (ctdb_sock_addr *)indata.dptr;
1738 vnn = find_public_ip_vnn(ctdb, addr);
1739 if (vnn == NULL) {
1740 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1741 "'%s'not a public address\n",
1742 ctdb_addr_to_str(addr)));
1743 return -1;
1746 /* count how many public ip structures we have */
1747 num = 0;
1748 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1749 num++;
1752 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1753 num*sizeof(struct ctdb_iface);
1754 info = talloc_zero_size(outdata, len);
1755 CTDB_NO_MEMORY(ctdb, info);
1757 info->ip.addr = vnn->public_address;
1758 info->ip.pnn = vnn->pnn;
1759 info->active_idx = 0xFFFFFFFF;
1761 i = 0;
1762 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1763 struct ctdb_interface *cur;
1765 cur = iface->iface;
1766 if (vnn->iface == cur) {
1767 info->active_idx = i;
1769 strncpy(info->ifaces[i].name, cur->name,
1770 sizeof(info->ifaces[i].name));
1771 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1772 info->ifaces[i].link_state = cur->link_up;
1773 info->ifaces[i].references = cur->references;
1775 i++;
1777 info->num = i;
1778 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1779 i*sizeof(struct ctdb_iface);
1781 outdata->dsize = len;
1782 outdata->dptr = (uint8_t *)info;
1784 return 0;
1787 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1788 struct ctdb_req_control_old *c,
1789 TDB_DATA *outdata)
1791 int i, num, len;
1792 struct ctdb_iface_list_old *ifaces;
1793 struct ctdb_interface *cur;
1795 /* count how many public ip structures we have */
1796 num = 0;
1797 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1798 num++;
1801 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1802 num*sizeof(struct ctdb_iface);
1803 ifaces = talloc_zero_size(outdata, len);
1804 CTDB_NO_MEMORY(ctdb, ifaces);
1806 i = 0;
1807 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1808 strncpy(ifaces->ifaces[i].name, cur->name,
1809 sizeof(ifaces->ifaces[i].name));
1810 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1811 ifaces->ifaces[i].link_state = cur->link_up;
1812 ifaces->ifaces[i].references = cur->references;
1813 i++;
1815 ifaces->num = i;
1816 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1817 i*sizeof(struct ctdb_iface);
1819 outdata->dsize = len;
1820 outdata->dptr = (uint8_t *)ifaces;
1822 return 0;
1825 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1826 struct ctdb_req_control_old *c,
1827 TDB_DATA indata)
1829 struct ctdb_iface *info;
1830 struct ctdb_interface *iface;
1831 bool link_up = false;
1833 info = (struct ctdb_iface *)indata.dptr;
1835 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1836 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1837 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1838 len, len, info->name));
1839 return -1;
1842 switch (info->link_state) {
1843 case 0:
1844 link_up = false;
1845 break;
1846 case 1:
1847 link_up = true;
1848 break;
1849 default:
1850 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1851 (unsigned int)info->link_state));
1852 return -1;
1855 if (info->references != 0) {
1856 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1857 (unsigned int)info->references));
1858 return -1;
1861 iface = ctdb_find_iface(ctdb, info->name);
1862 if (iface == NULL) {
1863 return -1;
1866 if (link_up == iface->link_up) {
1867 return 0;
1870 DEBUG(DEBUG_ERR,
1871 ("iface[%s] has changed it's link status %s => %s\n",
1872 iface->name,
1873 iface->link_up?"up":"down",
1874 link_up?"up":"down"));
1876 iface->link_up = link_up;
1877 return 0;
1882 called by a daemon to inform us of the entire list of TCP tickles for
1883 a particular public address.
1884 this control should only be sent by the node that is currently serving
1885 that public address.
1887 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1889 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1890 struct ctdb_tcp_array *tcparray;
1891 struct ctdb_vnn *vnn;
1893 /* We must at least have tickles.num or else we cant verify the size
1894 of the received data blob
1896 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1897 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1898 return -1;
1901 /* verify that the size of data matches what we expect */
1902 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1903 + sizeof(struct ctdb_connection) * list->num) {
1904 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1905 return -1;
1908 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1909 ctdb_addr_to_str(&list->addr)));
1911 vnn = find_public_ip_vnn(ctdb, &list->addr);
1912 if (vnn == NULL) {
1913 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1914 ctdb_addr_to_str(&list->addr)));
1916 return 1;
1919 if (vnn->pnn == ctdb->pnn) {
1920 DEBUG(DEBUG_INFO,
1921 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1922 ctdb_addr_to_str(&list->addr)));
1923 return 0;
1926 /* remove any old ticklelist we might have */
1927 talloc_free(vnn->tcp_array);
1928 vnn->tcp_array = NULL;
1930 tcparray = talloc(vnn, struct ctdb_tcp_array);
1931 CTDB_NO_MEMORY(ctdb, tcparray);
1933 tcparray->num = list->num;
1935 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1936 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1938 memcpy(tcparray->connections, &list->connections[0],
1939 sizeof(struct ctdb_connection)*tcparray->num);
1941 /* We now have a new fresh tickle list array for this vnn */
1942 vnn->tcp_array = tcparray;
1944 return 0;
1948 called to return the full list of tickles for the puclic address associated
1949 with the provided vnn
1951 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1953 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1954 struct ctdb_tickle_list_old *list;
1955 struct ctdb_tcp_array *tcparray;
1956 unsigned int num, i;
1957 struct ctdb_vnn *vnn;
1958 unsigned port;
1960 vnn = find_public_ip_vnn(ctdb, addr);
1961 if (vnn == NULL) {
1962 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1963 ctdb_addr_to_str(addr)));
1965 return 1;
1968 port = ctdb_addr_to_port(addr);
1970 tcparray = vnn->tcp_array;
1971 num = 0;
1972 if (tcparray != NULL) {
1973 if (port == 0) {
1974 /* All connections */
1975 num = tcparray->num;
1976 } else {
1977 /* Count connections for port */
1978 for (i = 0; i < tcparray->num; i++) {
1979 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1980 num++;
1986 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1987 + sizeof(struct ctdb_connection) * num;
1989 outdata->dptr = talloc_size(outdata, outdata->dsize);
1990 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1991 list = (struct ctdb_tickle_list_old *)outdata->dptr;
1993 list->addr = *addr;
1994 list->num = num;
1996 if (num == 0) {
1997 return 0;
2000 num = 0;
2001 for (i = 0; i < tcparray->num; i++) {
2002 if (port == 0 || \
2003 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2004 list->connections[num] = tcparray->connections[i];
2005 num++;
2009 return 0;
2014 set the list of all tcp tickles for a public address
2016 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2017 ctdb_sock_addr *addr,
2018 struct ctdb_tcp_array *tcparray)
2020 int ret, num;
2021 TDB_DATA data;
2022 struct ctdb_tickle_list_old *list;
2024 if (tcparray) {
2025 num = tcparray->num;
2026 } else {
2027 num = 0;
2030 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2031 sizeof(struct ctdb_connection) * num;
2032 data.dptr = talloc_size(ctdb, data.dsize);
2033 CTDB_NO_MEMORY(ctdb, data.dptr);
2035 list = (struct ctdb_tickle_list_old *)data.dptr;
2036 list->addr = *addr;
2037 list->num = num;
2038 if (tcparray) {
2039 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2042 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2043 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2044 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2045 if (ret != 0) {
2046 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2047 return -1;
2050 talloc_free(data.dptr);
2052 return ret;
2055 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2056 bool force)
2058 struct ctdb_vnn *vnn;
2059 int ret;
2061 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2062 /* we only send out updates for public addresses that
2063 we have taken over
2065 if (ctdb->pnn != vnn->pnn) {
2066 continue;
2069 /* We only send out the updates if we need to */
2070 if (!force && !vnn->tcp_update_needed) {
2071 continue;
2074 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2075 &vnn->public_address,
2076 vnn->tcp_array);
2077 if (ret != 0) {
2078 D_ERR("Failed to send the tickle update for ip %s\n",
2079 ctdb_addr_to_str(&vnn->public_address));
2080 vnn->tcp_update_needed = true;
2081 } else {
2082 D_INFO("Sent tickle update for ip %s\n",
2083 ctdb_addr_to_str(&vnn->public_address));
2084 vnn->tcp_update_needed = false;
2091 perform tickle updates if required
2093 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2094 struct tevent_timer *te,
2095 struct timeval t, void *private_data)
2097 struct ctdb_context *ctdb = talloc_get_type(
2098 private_data, struct ctdb_context);
2100 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2102 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2103 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2104 ctdb_update_tcp_tickles, ctdb);
2108 start periodic update of tcp tickles
2110 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2112 ctdb->tickle_update_context = talloc_new(ctdb);
2114 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2115 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2116 ctdb_update_tcp_tickles, ctdb);
2122 struct control_gratious_arp {
2123 struct ctdb_context *ctdb;
2124 ctdb_sock_addr addr;
2125 const char *iface;
2126 int count;
2130 send a control_gratuitous arp
2132 static void send_gratious_arp(struct tevent_context *ev,
2133 struct tevent_timer *te,
2134 struct timeval t, void *private_data)
2136 int ret;
2137 struct control_gratious_arp *arp = talloc_get_type(private_data,
2138 struct control_gratious_arp);
2140 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2141 if (ret != 0) {
2142 DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
2143 arp->iface, strerror(ret));
2147 arp->count++;
2148 if (arp->count == CTDB_ARP_REPEAT) {
2149 talloc_free(arp);
2150 return;
2153 tevent_add_timer(arp->ctdb->ev, arp,
2154 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2155 send_gratious_arp, arp);
2160 send a gratious arp
2162 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2164 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2165 struct control_gratious_arp *arp;
2167 /* verify the size of indata */
2168 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2169 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2170 (unsigned)indata.dsize,
2171 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2172 return -1;
2174 if (indata.dsize !=
2175 ( offsetof(struct ctdb_addr_info_old, iface)
2176 + gratious_arp->len ) ){
2178 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2179 "but should be %u bytes\n",
2180 (unsigned)indata.dsize,
2181 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2182 return -1;
2186 arp = talloc(ctdb, struct control_gratious_arp);
2187 CTDB_NO_MEMORY(ctdb, arp);
2189 arp->ctdb = ctdb;
2190 arp->addr = gratious_arp->addr;
2191 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2192 CTDB_NO_MEMORY(ctdb, arp->iface);
2193 arp->count = 0;
2195 tevent_add_timer(arp->ctdb->ev, arp,
2196 timeval_zero(), send_gratious_arp, arp);
2198 return 0;
2201 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2203 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2204 int ret;
2206 /* verify the size of indata */
2207 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2208 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2209 return -1;
2211 if (indata.dsize !=
2212 ( offsetof(struct ctdb_addr_info_old, iface)
2213 + pub->len ) ){
2215 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2216 "but should be %u bytes\n",
2217 (unsigned)indata.dsize,
2218 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2219 return -1;
2222 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2224 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2226 if (ret != 0) {
2227 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2228 return -1;
2231 return 0;
2234 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2236 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2237 struct ctdb_vnn *vnn;
2239 /* verify the size of indata */
2240 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2241 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2242 return -1;
2244 if (indata.dsize !=
2245 ( offsetof(struct ctdb_addr_info_old, iface)
2246 + pub->len ) ){
2248 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2249 "but should be %u bytes\n",
2250 (unsigned)indata.dsize,
2251 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2252 return -1;
2255 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2257 /* walk over all public addresses until we find a match */
2258 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2259 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2260 if (vnn->pnn == ctdb->pnn) {
2261 /* This IP is currently being hosted.
2262 * Defer the deletion until the next
2263 * takeover run. "ctdb reloadips" will
2264 * always cause a takeover run. "ctdb
2265 * delip" will now need an explicit
2266 * "ctdb ipreallocated" afterwards. */
2267 vnn->delete_pending = true;
2268 } else {
2269 /* This IP is not hosted on the
2270 * current node so just delete it
2271 * now. */
2272 do_delete_ip(ctdb, vnn);
2275 return 0;
2279 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2280 ctdb_addr_to_str(&pub->addr)));
2281 return -1;
2285 struct ipreallocated_callback_state {
2286 struct ctdb_req_control_old *c;
2289 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2290 int status, void *p)
2292 struct ipreallocated_callback_state *state =
2293 talloc_get_type(p, struct ipreallocated_callback_state);
2295 if (status != 0) {
2296 DEBUG(DEBUG_ERR,
2297 (" \"ipreallocated\" event script failed (status %d)\n",
2298 status));
2299 if (status == -ETIMEDOUT) {
2300 ctdb_ban_self(ctdb);
2304 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2305 talloc_free(state);
2308 /* A control to run the ipreallocated event */
2309 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2310 struct ctdb_req_control_old *c,
2311 bool *async_reply)
2313 int ret;
2314 struct ipreallocated_callback_state *state;
2316 state = talloc(ctdb, struct ipreallocated_callback_state);
2317 CTDB_NO_MEMORY(ctdb, state);
2319 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2321 ret = ctdb_event_script_callback(ctdb, state,
2322 ctdb_ipreallocated_callback, state,
2323 CTDB_EVENT_IPREALLOCATED,
2324 "%s", "");
2326 if (ret != 0) {
2327 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2328 talloc_free(state);
2329 return -1;
2332 /* tell the control that we will be reply asynchronously */
2333 state->c = talloc_steal(state, c);
2334 *async_reply = true;
2336 return 0;
2340 struct ctdb_reloadips_handle {
2341 struct ctdb_context *ctdb;
2342 struct ctdb_req_control_old *c;
2343 int status;
2344 int fd[2];
2345 pid_t child;
2346 struct tevent_fd *fde;
2349 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2351 if (h == h->ctdb->reload_ips) {
2352 h->ctdb->reload_ips = NULL;
2354 if (h->c != NULL) {
2355 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2356 h->c = NULL;
2358 ctdb_kill(h->ctdb, h->child, SIGKILL);
2359 return 0;
2362 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2363 struct tevent_timer *te,
2364 struct timeval t, void *private_data)
2366 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2368 talloc_free(h);
2371 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2372 struct tevent_fd *fde,
2373 uint16_t flags, void *private_data)
2375 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2377 char res;
2378 int ret;
2380 ret = sys_read(h->fd[0], &res, 1);
2381 if (ret < 1 || res != 0) {
2382 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2383 res = 1;
2385 h->status = res;
2387 talloc_free(h);
2390 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2392 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2393 struct ctdb_public_ip_list_old *ips;
2394 struct ctdb_vnn *vnn;
2395 struct client_async_data *async_data;
2396 struct timeval timeout;
2397 TDB_DATA data;
2398 struct ctdb_client_control_state *state;
2399 bool first_add;
2400 unsigned int i;
2401 int ret;
2403 CTDB_NO_MEMORY(ctdb, mem_ctx);
2405 /* Read IPs from local node */
2406 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2407 CTDB_CURRENT_NODE, mem_ctx, &ips);
2408 if (ret != 0) {
2409 DEBUG(DEBUG_ERR,
2410 ("Unable to fetch public IPs from local node\n"));
2411 talloc_free(mem_ctx);
2412 return -1;
2415 /* Read IPs file - this is safe since this is a child process */
2416 ctdb->vnn = NULL;
2417 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2418 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2419 talloc_free(mem_ctx);
2420 return -1;
2423 async_data = talloc_zero(mem_ctx, struct client_async_data);
2424 CTDB_NO_MEMORY(ctdb, async_data);
2426 /* Compare IPs between node and file for IPs to be deleted */
2427 for (i = 0; i < ips->num; i++) {
2428 /* */
2429 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2430 if (ctdb_same_ip(&vnn->public_address,
2431 &ips->ips[i].addr)) {
2432 /* IP is still in file */
2433 break;
2437 if (vnn == NULL) {
2438 /* Delete IP ips->ips[i] */
2439 struct ctdb_addr_info_old *pub;
2441 DEBUG(DEBUG_NOTICE,
2442 ("IP %s no longer configured, deleting it\n",
2443 ctdb_addr_to_str(&ips->ips[i].addr)));
2445 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2446 CTDB_NO_MEMORY(ctdb, pub);
2448 pub->addr = ips->ips[i].addr;
2449 pub->mask = 0;
2450 pub->len = 0;
2452 timeout = TAKEOVER_TIMEOUT();
2454 data.dsize = offsetof(struct ctdb_addr_info_old,
2455 iface) + pub->len;
2456 data.dptr = (uint8_t *)pub;
2458 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2459 CTDB_CONTROL_DEL_PUBLIC_IP,
2460 0, data, async_data,
2461 &timeout, NULL);
2462 if (state == NULL) {
2463 DEBUG(DEBUG_ERR,
2464 (__location__
2465 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2466 goto failed;
2469 ctdb_client_async_add(async_data, state);
2473 /* Compare IPs between node and file for IPs to be added */
2474 first_add = true;
2475 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2476 for (i = 0; i < ips->num; i++) {
2477 if (ctdb_same_ip(&vnn->public_address,
2478 &ips->ips[i].addr)) {
2479 /* IP already on node */
2480 break;
2483 if (i == ips->num) {
2484 /* Add IP ips->ips[i] */
2485 struct ctdb_addr_info_old *pub;
2486 const char *ifaces = NULL;
2487 uint32_t len;
2488 struct vnn_interface *iface = NULL;
2490 DEBUG(DEBUG_NOTICE,
2491 ("New IP %s configured, adding it\n",
2492 ctdb_addr_to_str(&vnn->public_address)));
2493 if (first_add) {
2494 uint32_t pnn = ctdb_get_pnn(ctdb);
2496 data.dsize = sizeof(pnn);
2497 data.dptr = (uint8_t *)&pnn;
2499 ret = ctdb_client_send_message(
2500 ctdb,
2501 CTDB_BROADCAST_CONNECTED,
2502 CTDB_SRVID_REBALANCE_NODE,
2503 data);
2504 if (ret != 0) {
2505 DEBUG(DEBUG_WARNING,
2506 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2509 first_add = false;
2512 ifaces = vnn->ifaces->iface->name;
2513 iface = vnn->ifaces->next;
2514 while (iface != NULL) {
2515 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2516 iface->iface->name);
2517 iface = iface->next;
2520 len = strlen(ifaces) + 1;
2521 pub = talloc_zero_size(mem_ctx,
2522 offsetof(struct ctdb_addr_info_old, iface) + len);
2523 CTDB_NO_MEMORY(ctdb, pub);
2525 pub->addr = vnn->public_address;
2526 pub->mask = vnn->public_netmask_bits;
2527 pub->len = len;
2528 memcpy(&pub->iface[0], ifaces, pub->len);
2530 timeout = TAKEOVER_TIMEOUT();
2532 data.dsize = offsetof(struct ctdb_addr_info_old,
2533 iface) + pub->len;
2534 data.dptr = (uint8_t *)pub;
2536 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2537 CTDB_CONTROL_ADD_PUBLIC_IP,
2538 0, data, async_data,
2539 &timeout, NULL);
2540 if (state == NULL) {
2541 DEBUG(DEBUG_ERR,
2542 (__location__
2543 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2544 goto failed;
2547 ctdb_client_async_add(async_data, state);
2551 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2552 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2553 goto failed;
2556 talloc_free(mem_ctx);
2557 return 0;
2559 failed:
2560 talloc_free(mem_ctx);
2561 return -1;
2564 /* This control is sent to force the node to re-read the public addresses file
2565 and drop any addresses we should nnot longer host, and add new addresses
2566 that we are now able to host
2568 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2570 struct ctdb_reloadips_handle *h;
2571 pid_t parent = getpid();
2573 if (ctdb->reload_ips != NULL) {
2574 talloc_free(ctdb->reload_ips);
2575 ctdb->reload_ips = NULL;
2578 h = talloc(ctdb, struct ctdb_reloadips_handle);
2579 CTDB_NO_MEMORY(ctdb, h);
2580 h->ctdb = ctdb;
2581 h->c = NULL;
2582 h->status = -1;
2584 if (pipe(h->fd) == -1) {
2585 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2586 talloc_free(h);
2587 return -1;
2590 h->child = ctdb_fork(ctdb);
2591 if (h->child == (pid_t)-1) {
2592 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2593 close(h->fd[0]);
2594 close(h->fd[1]);
2595 talloc_free(h);
2596 return -1;
2599 /* child process */
2600 if (h->child == 0) {
2601 signed char res = 0;
2603 close(h->fd[0]);
2605 prctl_set_comment("ctdb_reloadips");
2606 if (switch_from_server_to_client(ctdb) != 0) {
2607 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2608 res = -1;
2609 } else {
2610 res = ctdb_reloadips_child(ctdb);
2611 if (res != 0) {
2612 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2616 sys_write(h->fd[1], &res, 1);
2617 ctdb_wait_for_process_to_exit(parent);
2618 _exit(0);
2621 h->c = talloc_steal(h, c);
2623 close(h->fd[1]);
2624 set_close_on_exec(h->fd[0]);
2626 talloc_set_destructor(h, ctdb_reloadips_destructor);
2629 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2630 ctdb_reloadips_child_handler, (void *)h);
2631 tevent_fd_set_auto_close(h->fde);
2633 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2634 ctdb_reloadips_timeout_event, h);
2636 /* we reply later */
2637 *async_reply = true;
2638 return 0;