ctdb-server: Drop unnecessary copy of destination address
[Samba.git] / ctdb / server / ctdb_takeover.c
blobe333105e6335272fed17a1d76fafe7ba46a0d3e8
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "protocol/protocol_util.h"
38 #include "ctdb_private.h"
39 #include "ctdb_client.h"
41 #include "common/reqid.h"
42 #include "common/system.h"
43 #include "common/system_socket.h"
44 #include "common/common.h"
45 #include "common/logging.h"
47 #include "server/ctdb_config.h"
49 #include "server/ipalloc.h"
51 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
53 #define CTDB_ARP_INTERVAL 1
54 #define CTDB_ARP_REPEAT 3
56 struct ctdb_interface {
57 struct ctdb_interface *prev, *next;
58 const char *name;
59 bool link_up;
60 uint32_t references;
63 struct vnn_interface {
64 struct vnn_interface *prev, *next;
65 struct ctdb_interface *iface;
68 /* state associated with a public ip address */
69 struct ctdb_vnn {
70 struct ctdb_vnn *prev, *next;
72 struct ctdb_interface *iface;
73 struct vnn_interface *ifaces;
74 ctdb_sock_addr public_address;
75 uint8_t public_netmask_bits;
78 * The node number that is serving this public address - set
79 * to CTDB_UNKNOWN_PNN if node is serving it
81 uint32_t pnn;
83 /* List of clients to tickle for this public address */
84 struct ctdb_tcp_array *tcp_array;
86 /* whether we need to update the other nodes with changes to our list
87 of connected clients */
88 bool tcp_update_needed;
90 /* a context to hang sending gratious arp events off */
91 TALLOC_CTX *takeover_ctx;
93 /* Set to true any time an update to this VNN is in flight.
94 This helps to avoid races. */
95 bool update_in_flight;
97 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
98 * address then this flag is set. It will be deleted in the
99 * release IP callback. */
100 bool delete_pending;
103 static const char *iface_string(const struct ctdb_interface *iface)
105 return (iface != NULL ? iface->name : "__none__");
108 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
110 return iface_string(vnn->iface);
113 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
114 const char *iface);
116 static struct ctdb_interface *
117 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
119 struct ctdb_interface *i;
121 if (strlen(iface) > CTDB_IFACE_SIZE) {
122 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
123 return NULL;
126 /* Verify that we don't have an entry for this ip yet */
127 i = ctdb_find_iface(ctdb, iface);
128 if (i != NULL) {
129 return i;
132 /* create a new structure for this interface */
133 i = talloc_zero(ctdb, struct ctdb_interface);
134 if (i == NULL) {
135 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
136 return NULL;
138 i->name = talloc_strdup(i, iface);
139 if (i->name == NULL) {
140 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
141 talloc_free(i);
142 return NULL;
145 i->link_up = true;
147 DLIST_ADD(ctdb->ifaces, i);
149 return i;
152 static bool vnn_has_interface(struct ctdb_vnn *vnn,
153 const struct ctdb_interface *iface)
155 struct vnn_interface *i;
157 for (i = vnn->ifaces; i != NULL; i = i->next) {
158 if (iface == i->iface) {
159 return true;
163 return false;
166 /* If any interfaces now have no possible IPs then delete them. This
167 * implementation is naive (i.e. simple) rather than clever
168 * (i.e. complex). Given that this is run on delip and that operation
169 * is rare, this doesn't need to be efficient - it needs to be
170 * foolproof. One alternative is reference counting, where the logic
171 * is distributed and can, therefore, be broken in multiple places.
172 * Another alternative is to build a red-black tree of interfaces that
173 * can have addresses (by walking ctdb->vnn once) and then walking
174 * ctdb->ifaces once and deleting those not in the tree. Let's go to
175 * one of those if the naive implementation causes problems... :-)
177 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
178 struct ctdb_vnn *vnn)
180 struct ctdb_interface *i, *next;
182 /* For each interface, check if there's an IP using it. */
183 for (i = ctdb->ifaces; i != NULL; i = next) {
184 struct ctdb_vnn *tv;
185 bool found;
186 next = i->next;
188 /* Only consider interfaces named in the given VNN. */
189 if (!vnn_has_interface(vnn, i)) {
190 continue;
193 /* Search for a vnn with this interface. */
194 found = false;
195 for (tv=ctdb->vnn; tv; tv=tv->next) {
196 if (vnn_has_interface(tv, i)) {
197 found = true;
198 break;
202 if (!found) {
203 /* None of the VNNs are using this interface. */
204 DLIST_REMOVE(ctdb->ifaces, i);
205 talloc_free(i);
211 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
212 const char *iface)
214 struct ctdb_interface *i;
216 for (i=ctdb->ifaces;i;i=i->next) {
217 if (strcmp(i->name, iface) == 0) {
218 return i;
222 return NULL;
225 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
226 struct ctdb_vnn *vnn)
228 struct vnn_interface *i;
229 struct ctdb_interface *cur = NULL;
230 struct ctdb_interface *best = NULL;
232 for (i = vnn->ifaces; i != NULL; i = i->next) {
234 cur = i->iface;
236 if (!cur->link_up) {
237 continue;
240 if (best == NULL) {
241 best = cur;
242 continue;
245 if (cur->references < best->references) {
246 best = cur;
247 continue;
251 return best;
254 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
255 struct ctdb_vnn *vnn)
257 struct ctdb_interface *best = NULL;
259 if (vnn->iface) {
260 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
261 "still assigned to iface '%s'\n",
262 ctdb_addr_to_str(&vnn->public_address),
263 ctdb_vnn_iface_string(vnn)));
264 return 0;
267 best = ctdb_vnn_best_iface(ctdb, vnn);
268 if (best == NULL) {
269 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
270 "cannot assign to iface any iface\n",
271 ctdb_addr_to_str(&vnn->public_address)));
272 return -1;
275 vnn->iface = best;
276 best->references++;
277 vnn->pnn = ctdb->pnn;
279 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
280 "now assigned to iface '%s' refs[%d]\n",
281 ctdb_addr_to_str(&vnn->public_address),
282 ctdb_vnn_iface_string(vnn),
283 best->references));
284 return 0;
287 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
288 struct ctdb_vnn *vnn)
290 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
291 "now unassigned (old iface '%s' refs[%d])\n",
292 ctdb_addr_to_str(&vnn->public_address),
293 ctdb_vnn_iface_string(vnn),
294 vnn->iface?vnn->iface->references:0));
295 if (vnn->iface) {
296 vnn->iface->references--;
298 vnn->iface = NULL;
299 if (vnn->pnn == ctdb->pnn) {
300 vnn->pnn = CTDB_UNKNOWN_PNN;
304 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
305 struct ctdb_vnn *vnn)
307 uint32_t flags;
308 struct vnn_interface *i;
310 /* Nodes that are not RUNNING can not host IPs */
311 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
312 return false;
315 flags = ctdb->nodes[ctdb->pnn]->flags;
316 if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
317 return false;
320 if (vnn->delete_pending) {
321 return false;
324 if (vnn->iface && vnn->iface->link_up) {
325 return true;
328 for (i = vnn->ifaces; i != NULL; i = i->next) {
329 if (i->iface->link_up) {
330 return true;
334 return false;
337 struct ctdb_takeover_arp {
338 struct ctdb_context *ctdb;
339 uint32_t count;
340 ctdb_sock_addr addr;
341 struct ctdb_tcp_array *tcparray;
342 struct ctdb_vnn *vnn;
347 lists of tcp endpoints
349 struct ctdb_tcp_list {
350 struct ctdb_tcp_list *prev, *next;
351 struct ctdb_client *client;
352 struct ctdb_connection connection;
356 send a gratuitous arp
358 static void ctdb_control_send_arp(struct tevent_context *ev,
359 struct tevent_timer *te,
360 struct timeval t, void *private_data)
362 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
363 struct ctdb_takeover_arp);
364 int ret;
365 struct ctdb_tcp_array *tcparray;
366 const char *iface;
368 /* IP address might have been released between sends */
369 if (arp->vnn->iface == NULL) {
370 DBG_INFO("Cancelling ARP send for released IP %s\n",
371 ctdb_addr_to_str(&arp->vnn->public_address));
372 talloc_free(arp);
373 return;
376 iface = ctdb_vnn_iface_string(arp->vnn);
377 ret = ctdb_sys_send_arp(&arp->addr, iface);
378 if (ret != 0) {
379 DBG_ERR("Failed to send ARP on interface %s: %s\n",
380 iface, strerror(ret));
383 tcparray = arp->tcparray;
384 if (tcparray) {
385 unsigned int i;
387 for (i=0;i<tcparray->num;i++) {
388 struct ctdb_connection *tcon;
389 char buf[128];
391 tcon = &tcparray->connections[i];
392 ret = ctdb_connection_to_buf(buf,
393 sizeof(buf),
394 tcon,
395 false,
396 " -> ");
397 if (ret != 0) {
398 strlcpy(buf, "UNKNOWN", sizeof(buf));
400 D_INFO("Send TCP tickle ACK: %s\n", buf);
401 ret = ctdb_sys_send_tcp(
402 &tcon->src,
403 &tcon->dst,
404 0, 0, 0);
405 if (ret != 0) {
406 DBG_ERR("Failed to send TCP tickle ACK: %s\n",
407 buf);
412 arp->count++;
414 if (arp->count == CTDB_ARP_REPEAT) {
415 talloc_free(arp);
416 return;
419 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
420 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
421 ctdb_control_send_arp, arp);
424 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
425 struct ctdb_vnn *vnn)
427 struct ctdb_takeover_arp *arp;
428 struct ctdb_tcp_array *tcparray;
430 if (!vnn->takeover_ctx) {
431 vnn->takeover_ctx = talloc_new(vnn);
432 if (!vnn->takeover_ctx) {
433 return -1;
437 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
438 if (!arp) {
439 return -1;
442 arp->ctdb = ctdb;
443 arp->addr = vnn->public_address;
444 arp->vnn = vnn;
446 tcparray = vnn->tcp_array;
447 if (tcparray) {
448 /* add all of the known tcp connections for this IP to the
449 list of tcp connections to send tickle acks for */
450 arp->tcparray = talloc_steal(arp, tcparray);
452 vnn->tcp_array = NULL;
453 vnn->tcp_update_needed = true;
456 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
457 timeval_zero(), ctdb_control_send_arp, arp);
459 return 0;
462 struct ctdb_do_takeip_state {
463 struct ctdb_req_control_old *c;
464 struct ctdb_vnn *vnn;
468 called when takeip event finishes
470 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
471 void *private_data)
473 struct ctdb_do_takeip_state *state =
474 talloc_get_type(private_data, struct ctdb_do_takeip_state);
475 int32_t ret;
476 TDB_DATA data;
478 if (status != 0) {
479 if (status == -ETIMEDOUT) {
480 ctdb_ban_self(ctdb);
482 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
483 ctdb_addr_to_str(&state->vnn->public_address),
484 ctdb_vnn_iface_string(state->vnn)));
485 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
487 talloc_free(state);
488 return;
491 if (ctdb->do_checkpublicip) {
493 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
494 if (ret != 0) {
495 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
496 talloc_free(state);
497 return;
502 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
503 data.dsize = strlen((char *)data.dptr) + 1;
504 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
506 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
509 /* the control succeeded */
510 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
511 talloc_free(state);
512 return;
515 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
517 state->vnn->update_in_flight = false;
518 return 0;
522 take over an ip address
524 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
525 struct ctdb_req_control_old *c,
526 struct ctdb_vnn *vnn)
528 int ret;
529 struct ctdb_do_takeip_state *state;
531 if (vnn->update_in_flight) {
532 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
533 "update for this IP already in flight\n",
534 ctdb_addr_to_str(&vnn->public_address),
535 vnn->public_netmask_bits));
536 return -1;
539 ret = ctdb_vnn_assign_iface(ctdb, vnn);
540 if (ret != 0) {
541 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
542 "assign a usable interface\n",
543 ctdb_addr_to_str(&vnn->public_address),
544 vnn->public_netmask_bits));
545 return -1;
548 state = talloc(vnn, struct ctdb_do_takeip_state);
549 CTDB_NO_MEMORY(ctdb, state);
551 state->c = NULL;
552 state->vnn = vnn;
554 vnn->update_in_flight = true;
555 talloc_set_destructor(state, ctdb_takeip_destructor);
557 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
558 ctdb_addr_to_str(&vnn->public_address),
559 vnn->public_netmask_bits,
560 ctdb_vnn_iface_string(vnn)));
562 ret = ctdb_event_script_callback(ctdb,
563 state,
564 ctdb_do_takeip_callback,
565 state,
566 CTDB_EVENT_TAKE_IP,
567 "%s %s %u",
568 ctdb_vnn_iface_string(vnn),
569 ctdb_addr_to_str(&vnn->public_address),
570 vnn->public_netmask_bits);
572 if (ret != 0) {
573 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
574 ctdb_addr_to_str(&vnn->public_address),
575 ctdb_vnn_iface_string(vnn)));
576 talloc_free(state);
577 return -1;
580 state->c = talloc_steal(ctdb, c);
581 return 0;
584 struct ctdb_do_updateip_state {
585 struct ctdb_req_control_old *c;
586 struct ctdb_interface *old;
587 struct ctdb_vnn *vnn;
591 called when updateip event finishes
593 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
594 void *private_data)
596 struct ctdb_do_updateip_state *state =
597 talloc_get_type(private_data, struct ctdb_do_updateip_state);
599 if (status != 0) {
600 if (status == -ETIMEDOUT) {
601 ctdb_ban_self(ctdb);
603 DEBUG(DEBUG_ERR,
604 ("Failed update of IP %s from interface %s to %s\n",
605 ctdb_addr_to_str(&state->vnn->public_address),
606 iface_string(state->old),
607 ctdb_vnn_iface_string(state->vnn)));
610 * All we can do is reset the old interface
611 * and let the next run fix it
613 ctdb_vnn_unassign_iface(ctdb, state->vnn);
614 state->vnn->iface = state->old;
615 state->vnn->iface->references++;
617 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
618 talloc_free(state);
619 return;
622 /* the control succeeded */
623 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
624 talloc_free(state);
625 return;
628 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
630 state->vnn->update_in_flight = false;
631 return 0;
635 update (move) an ip address
637 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
638 struct ctdb_req_control_old *c,
639 struct ctdb_vnn *vnn)
641 int ret;
642 struct ctdb_do_updateip_state *state;
643 struct ctdb_interface *old = vnn->iface;
644 const char *old_name = iface_string(old);
645 const char *new_name;
647 if (vnn->update_in_flight) {
648 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
649 "update for this IP already in flight\n",
650 ctdb_addr_to_str(&vnn->public_address),
651 vnn->public_netmask_bits));
652 return -1;
655 ctdb_vnn_unassign_iface(ctdb, vnn);
656 ret = ctdb_vnn_assign_iface(ctdb, vnn);
657 if (ret != 0) {
658 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
659 "assign a usable interface (old iface '%s')\n",
660 ctdb_addr_to_str(&vnn->public_address),
661 vnn->public_netmask_bits,
662 old_name));
663 return -1;
666 if (old == vnn->iface) {
667 /* A benign update from one interface onto itself.
668 * no need to run the eventscripts in this case, just return
669 * success.
671 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
672 return 0;
675 state = talloc(vnn, struct ctdb_do_updateip_state);
676 CTDB_NO_MEMORY(ctdb, state);
678 state->c = NULL;
679 state->old = old;
680 state->vnn = vnn;
682 vnn->update_in_flight = true;
683 talloc_set_destructor(state, ctdb_updateip_destructor);
685 new_name = ctdb_vnn_iface_string(vnn);
686 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
687 "interface %s to %s\n",
688 ctdb_addr_to_str(&vnn->public_address),
689 vnn->public_netmask_bits,
690 old_name,
691 new_name));
693 ret = ctdb_event_script_callback(ctdb,
694 state,
695 ctdb_do_updateip_callback,
696 state,
697 CTDB_EVENT_UPDATE_IP,
698 "%s %s %s %u",
699 old_name,
700 new_name,
701 ctdb_addr_to_str(&vnn->public_address),
702 vnn->public_netmask_bits);
703 if (ret != 0) {
704 DEBUG(DEBUG_ERR,
705 ("Failed update IP %s from interface %s to %s\n",
706 ctdb_addr_to_str(&vnn->public_address),
707 old_name, new_name));
708 talloc_free(state);
709 return -1;
712 state->c = talloc_steal(ctdb, c);
713 return 0;
717 Find the vnn of the node that has a public ip address
718 returns -1 if the address is not known as a public address
720 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
722 struct ctdb_vnn *vnn;
724 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
725 if (ctdb_same_ip(&vnn->public_address, addr)) {
726 return vnn;
730 return NULL;
734 take over an ip address
736 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
737 struct ctdb_req_control_old *c,
738 TDB_DATA indata,
739 bool *async_reply)
741 int ret;
742 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
743 struct ctdb_vnn *vnn;
744 bool have_ip = false;
745 bool do_updateip = false;
746 bool do_takeip = false;
747 struct ctdb_interface *best_iface = NULL;
749 if (pip->pnn != ctdb->pnn) {
750 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
751 "with pnn %d, but we're node %d\n",
752 ctdb_addr_to_str(&pip->addr),
753 pip->pnn, ctdb->pnn));
754 return -1;
757 /* update out vnn list */
758 vnn = find_public_ip_vnn(ctdb, &pip->addr);
759 if (vnn == NULL) {
760 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
761 ctdb_addr_to_str(&pip->addr)));
762 return 0;
765 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
766 have_ip = ctdb_sys_have_ip(&pip->addr);
768 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
769 if (best_iface == NULL) {
770 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
771 "a usable interface (old %s, have_ip %d)\n",
772 ctdb_addr_to_str(&vnn->public_address),
773 vnn->public_netmask_bits,
774 ctdb_vnn_iface_string(vnn),
775 have_ip));
776 return -1;
779 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) {
780 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
781 "and we have it on iface[%s], but it was assigned to node %d"
782 "and we are node %d, banning ourself\n",
783 ctdb_addr_to_str(&vnn->public_address),
784 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
785 ctdb_ban_self(ctdb);
786 return -1;
789 if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) {
790 /* This will cause connections to be reset and
791 * reestablished. However, this is a very unusual
792 * situation and doing this will completely repair the
793 * inconsistency in the VNN.
795 DEBUG(DEBUG_WARNING,
796 (__location__
797 " Doing updateip for IP %s already on an interface\n",
798 ctdb_addr_to_str(&vnn->public_address)));
799 do_updateip = true;
802 if (vnn->iface) {
803 if (vnn->iface != best_iface) {
804 if (!vnn->iface->link_up) {
805 do_updateip = true;
806 } else if (vnn->iface->references > (best_iface->references + 1)) {
807 /* only move when the rebalance gains something */
808 do_updateip = true;
813 if (!have_ip) {
814 if (do_updateip) {
815 ctdb_vnn_unassign_iface(ctdb, vnn);
816 do_updateip = false;
818 do_takeip = true;
821 if (do_takeip) {
822 ret = ctdb_do_takeip(ctdb, c, vnn);
823 if (ret != 0) {
824 return -1;
826 } else if (do_updateip) {
827 ret = ctdb_do_updateip(ctdb, c, vnn);
828 if (ret != 0) {
829 return -1;
831 } else {
833 * The interface is up and the kernel known the ip
834 * => do nothing
836 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
837 ctdb_addr_to_str(&pip->addr),
838 vnn->public_netmask_bits,
839 ctdb_vnn_iface_string(vnn)));
840 return 0;
843 /* tell ctdb_control.c that we will be replying asynchronously */
844 *async_reply = true;
846 return 0;
849 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
851 DLIST_REMOVE(ctdb->vnn, vnn);
852 ctdb_vnn_unassign_iface(ctdb, vnn);
853 ctdb_remove_orphaned_ifaces(ctdb, vnn);
854 talloc_free(vnn);
857 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
858 struct ctdb_vnn *vnn,
859 ctdb_sock_addr *addr)
861 TDB_DATA data;
863 /* Send a message to all clients of this node telling them
864 * that the cluster has been reconfigured and they should
865 * close any connections on this IP address
867 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
868 data.dsize = strlen((char *)data.dptr)+1;
869 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
870 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
872 ctdb_vnn_unassign_iface(ctdb, vnn);
874 /* Process the IP if it has been marked for deletion */
875 if (vnn->delete_pending) {
876 do_delete_ip(ctdb, vnn);
877 return NULL;
880 return vnn;
883 struct release_ip_callback_state {
884 struct ctdb_req_control_old *c;
885 ctdb_sock_addr *addr;
886 struct ctdb_vnn *vnn;
887 uint32_t target_pnn;
891 called when releaseip event finishes
893 static void release_ip_callback(struct ctdb_context *ctdb, int status,
894 void *private_data)
896 struct release_ip_callback_state *state =
897 talloc_get_type(private_data, struct release_ip_callback_state);
899 if (status == -ETIMEDOUT) {
900 ctdb_ban_self(ctdb);
903 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
904 if (ctdb_sys_have_ip(state->addr)) {
905 DEBUG(DEBUG_ERR,
906 ("IP %s still hosted during release IP callback, failing\n",
907 ctdb_addr_to_str(state->addr)));
908 ctdb_request_control_reply(ctdb, state->c,
909 NULL, -1, NULL);
910 talloc_free(state);
911 return;
915 state->vnn->pnn = state->target_pnn;
916 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
918 /* the control succeeded */
919 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
920 talloc_free(state);
923 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
925 if (state->vnn != NULL) {
926 state->vnn->update_in_flight = false;
928 return 0;
932 release an ip address
934 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
935 struct ctdb_req_control_old *c,
936 TDB_DATA indata,
937 bool *async_reply)
939 int ret;
940 struct release_ip_callback_state *state;
941 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
942 struct ctdb_vnn *vnn;
943 const char *iface;
945 /* update our vnn list */
946 vnn = find_public_ip_vnn(ctdb, &pip->addr);
947 if (vnn == NULL) {
948 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
949 ctdb_addr_to_str(&pip->addr)));
950 return 0;
953 /* stop any previous arps */
954 talloc_free(vnn->takeover_ctx);
955 vnn->takeover_ctx = NULL;
957 /* RELEASE_IP controls are sent to all nodes that should not
958 * be hosting a particular IP. This serves 2 purposes. The
959 * first is to help resolve any inconsistencies. If a node
960 * does unexpectedly host an IP then it will be released. The
961 * 2nd is to use a "redundant release" to tell non-takeover
962 * nodes where an IP is moving to. This is how "ctdb ip" can
963 * report the (likely) location of an IP by only asking the
964 * local node. Redundant releases need to update the PNN but
965 * are otherwise ignored.
967 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
968 if (!ctdb_sys_have_ip(&pip->addr)) {
969 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
970 ctdb_addr_to_str(&pip->addr),
971 vnn->public_netmask_bits,
972 ctdb_vnn_iface_string(vnn)));
973 vnn->pnn = pip->pnn;
974 ctdb_vnn_unassign_iface(ctdb, vnn);
975 return 0;
977 } else {
978 if (vnn->iface == NULL) {
979 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
980 ctdb_addr_to_str(&pip->addr),
981 vnn->public_netmask_bits));
982 vnn->pnn = pip->pnn;
983 return 0;
987 /* There is a potential race between take_ip and us because we
988 * update the VNN via a callback that run when the
989 * eventscripts have been run. Avoid the race by allowing one
990 * update to be in flight at a time.
992 if (vnn->update_in_flight) {
993 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
994 "update for this IP already in flight\n",
995 ctdb_addr_to_str(&vnn->public_address),
996 vnn->public_netmask_bits));
997 return -1;
1000 iface = ctdb_vnn_iface_string(vnn);
1002 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
1003 ctdb_addr_to_str(&pip->addr),
1004 vnn->public_netmask_bits,
1005 iface,
1006 pip->pnn));
1008 state = talloc(ctdb, struct release_ip_callback_state);
1009 if (state == NULL) {
1010 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1011 __FILE__, __LINE__);
1012 return -1;
1015 state->c = NULL;
1016 state->addr = talloc(state, ctdb_sock_addr);
1017 if (state->addr == NULL) {
1018 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1019 __FILE__, __LINE__);
1020 talloc_free(state);
1021 return -1;
1023 *state->addr = pip->addr;
1024 state->target_pnn = pip->pnn;
1025 state->vnn = vnn;
1027 vnn->update_in_flight = true;
1028 talloc_set_destructor(state, ctdb_releaseip_destructor);
1030 ret = ctdb_event_script_callback(ctdb,
1031 state, release_ip_callback, state,
1032 CTDB_EVENT_RELEASE_IP,
1033 "%s %s %u",
1034 iface,
1035 ctdb_addr_to_str(&pip->addr),
1036 vnn->public_netmask_bits);
1037 if (ret != 0) {
1038 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1039 ctdb_addr_to_str(&pip->addr),
1040 ctdb_vnn_iface_string(vnn)));
1041 talloc_free(state);
1042 return -1;
1045 /* tell the control that we will be reply asynchronously */
1046 *async_reply = true;
1047 state->c = talloc_steal(state, c);
1048 return 0;
1051 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1052 ctdb_sock_addr *addr,
1053 unsigned mask, const char *ifaces,
1054 bool check_address)
1056 struct ctdb_vnn *vnn;
1057 char *tmp;
1058 const char *iface;
1060 /* Verify that we don't have an entry for this IP yet */
1061 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1062 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1063 D_ERR("Duplicate public IP address '%s'\n",
1064 ctdb_addr_to_str(addr));
1065 return -1;
1069 /* Create a new VNN structure for this IP address */
1070 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1071 if (vnn == NULL) {
1072 DBG_ERR("Memory allocation error\n");
1073 return -1;
1075 tmp = talloc_strdup(vnn, ifaces);
1076 if (tmp == NULL) {
1077 DBG_ERR("Memory allocation error\n");
1078 talloc_free(vnn);
1079 return -1;
1081 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1082 struct vnn_interface *vnn_iface;
1083 struct ctdb_interface *i;
1085 if (!ctdb_sys_check_iface_exists(iface)) {
1086 D_ERR("Unknown interface %s for public address %s\n",
1087 iface,
1088 ctdb_addr_to_str(addr));
1089 talloc_free(vnn);
1090 return -1;
1093 i = ctdb_add_local_iface(ctdb, iface);
1094 if (i == NULL) {
1095 D_ERR("Failed to add interface '%s' "
1096 "for public address %s\n",
1097 iface,
1098 ctdb_addr_to_str(addr));
1099 talloc_free(vnn);
1100 return -1;
1103 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1104 if (vnn_iface == NULL) {
1105 DBG_ERR("Memory allocation error\n");
1106 talloc_free(vnn);
1107 return -1;
1110 vnn_iface->iface = i;
1111 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1113 talloc_free(tmp);
1114 vnn->public_address = *addr;
1115 vnn->public_netmask_bits = mask;
1116 vnn->pnn = -1;
1118 DLIST_ADD(ctdb->vnn, vnn);
1120 return 0;
1124 setup the public address lists from a file
1126 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1128 bool ok;
1129 char **lines;
1130 int nlines;
1131 int i;
1133 /* If no public addresses file given then try the default */
1134 if (ctdb->public_addresses_file == NULL) {
1135 const char *b = getenv("CTDB_BASE");
1136 if (b == NULL) {
1137 DBG_ERR("CTDB_BASE not set\n");
1138 return -1;
1140 ctdb->public_addresses_file = talloc_asprintf(
1141 ctdb, "%s/%s", b, "public_addresses");
1142 if (ctdb->public_addresses_file == NULL) {
1143 DBG_ERR("Out of memory\n");
1144 return -1;
1148 /* If the file doesn't exist then warn and do nothing */
1149 ok = file_exist(ctdb->public_addresses_file);
1150 if (!ok) {
1151 D_WARNING("Not loading public addresses, no file %s\n",
1152 ctdb->public_addresses_file);
1153 return 0;
1156 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1157 if (lines == NULL) {
1158 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1159 return -1;
1161 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1162 nlines--;
1165 for (i=0;i<nlines;i++) {
1166 unsigned mask;
1167 ctdb_sock_addr addr;
1168 const char *addrstr;
1169 const char *ifaces;
1170 char *tok, *line;
1171 int ret;
1173 line = lines[i];
1174 while ((*line == ' ') || (*line == '\t')) {
1175 line++;
1177 if (*line == '#') {
1178 continue;
1180 if (strcmp(line, "") == 0) {
1181 continue;
1183 tok = strtok(line, " \t");
1184 addrstr = tok;
1186 tok = strtok(NULL, " \t");
1187 if (tok == NULL) {
1188 D_ERR("No interface specified at line %u "
1189 "of public addresses file\n", i+1);
1190 talloc_free(lines);
1191 return -1;
1193 ifaces = tok;
1195 if (addrstr == NULL) {
1196 D_ERR("Badly formed line %u in public address list\n",
1197 i+1);
1198 talloc_free(lines);
1199 return -1;
1202 ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
1203 if (ret != 0) {
1204 D_ERR("Badly formed line %u in public address list\n",
1205 i+1);
1206 talloc_free(lines);
1207 return -1;
1210 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1211 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1212 talloc_free(lines);
1213 return -1;
1218 D_NOTICE("Loaded public addresses from %s\n",
1219 ctdb->public_addresses_file);
1221 talloc_free(lines);
1222 return 0;
1226 destroy a ctdb_tcp_list structure
1228 static int ctdb_tcp_list_destructor(struct ctdb_tcp_list *tcp)
1230 struct ctdb_client *client = tcp->client;
1231 struct ctdb_connection *conn = &tcp->connection;
1232 char conn_str[132] = { 0, };
1233 int ret;
1235 ret = ctdb_connection_to_buf(conn_str,
1236 sizeof(conn_str),
1237 conn,
1238 false,
1239 " -> ");
1240 if (ret != 0) {
1241 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1244 D_DEBUG("removing client TCP connection %s "
1245 "(client_id %u pid %d)\n",
1246 conn_str, client->client_id, client->pid);
1248 DLIST_REMOVE(client->tcp_list, tcp);
1251 * We don't call ctdb_remove_connection(vnn, conn) here
1252 * as we want the caller to decide if it's called
1253 * directly (local only) or indirectly via a
1254 * CTDB_CONTROL_TCP_REMOVE broadcast
1257 return 0;
1261 called by a client to inform us of a TCP connection that it is managing
1262 that should tickled with an ACK when IP takeover is done
1264 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1265 TDB_DATA indata)
1267 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1268 struct ctdb_connection *tcp_sock = NULL;
1269 struct ctdb_tcp_list *tcp;
1270 struct ctdb_connection t;
1271 int ret;
1272 TDB_DATA data;
1273 struct ctdb_vnn *vnn;
1274 char conn_str[132] = { 0, };
1276 /* If we don't have public IPs, tickles are useless */
1277 if (ctdb->vnn == NULL) {
1278 return 0;
1281 tcp_sock = (struct ctdb_connection *)indata.dptr;
1283 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1284 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1286 ret = ctdb_connection_to_buf(conn_str,
1287 sizeof(conn_str),
1288 tcp_sock,
1289 false,
1290 " -> ");
1291 if (ret != 0) {
1292 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1295 vnn = find_public_ip_vnn(ctdb, &tcp_sock->dst);
1296 if (vnn == NULL) {
1297 D_ERR("Could not register TCP connection %s - "
1298 "not a public address (client_id %u pid %u)\n",
1299 conn_str, client_id, client->pid);
1300 return 0;
1303 if (vnn->pnn != ctdb->pnn) {
1304 D_ERR("Attempt to register tcp client for IP %s we don't hold - "
1305 "failing (client_id %u pid %u)\n",
1306 ctdb_addr_to_str(&tcp_sock->dst),
1307 client_id, client->pid);
1308 /* failing this call will tell smbd to die */
1309 return -1;
1312 tcp = talloc(client, struct ctdb_tcp_list);
1313 CTDB_NO_MEMORY(ctdb, tcp);
1314 tcp->client = client;
1316 tcp->connection.src = tcp_sock->src;
1317 tcp->connection.dst = tcp_sock->dst;
1319 DLIST_ADD(client->tcp_list, tcp);
1320 talloc_set_destructor(tcp, ctdb_tcp_list_destructor);
1322 t.src = tcp_sock->src;
1323 t.dst = tcp_sock->dst;
1325 data.dptr = (uint8_t *)&t;
1326 data.dsize = sizeof(t);
1328 D_INFO("Registered TCP connection %s (client_id %u pid %u)\n",
1329 conn_str, client_id, client->pid);
1331 /* tell all nodes about this tcp connection */
1332 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1333 CTDB_CONTROL_TCP_ADD,
1334 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1335 if (ret != 0) {
1336 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1337 return -1;
1340 return 0;
1343 static bool ctdb_client_remove_tcp(struct ctdb_client *client,
1344 const struct ctdb_connection *conn)
1346 struct ctdb_tcp_list *tcp = NULL;
1347 struct ctdb_tcp_list *tcp_next = NULL;
1348 bool found = false;
1350 for (tcp = client->tcp_list; tcp != NULL; tcp = tcp_next) {
1351 bool same;
1353 tcp_next = tcp->next;
1355 same = ctdb_connection_same(conn, &tcp->connection);
1356 if (!same) {
1357 continue;
1360 TALLOC_FREE(tcp);
1361 found = true;
1364 return found;
1368 called by a client to inform us of a TCP connection that was disconnected
1370 int32_t ctdb_control_tcp_client_disconnected(struct ctdb_context *ctdb,
1371 uint32_t client_id,
1372 TDB_DATA indata)
1374 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1375 struct ctdb_connection *tcp_sock = NULL;
1376 int ret;
1377 TDB_DATA data;
1378 char conn_str[132] = { 0, };
1379 bool found = false;
1381 tcp_sock = (struct ctdb_connection *)indata.dptr;
1383 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1384 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1386 ret = ctdb_connection_to_buf(conn_str,
1387 sizeof(conn_str),
1388 tcp_sock,
1389 false,
1390 " -> ");
1391 if (ret != 0) {
1392 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1395 found = ctdb_client_remove_tcp(client, tcp_sock);
1396 if (!found) {
1397 DBG_DEBUG("TCP connection %s not found "
1398 "(client_id %u pid %u).\n",
1399 conn_str, client_id, client->pid);
1400 return 0;
1403 D_INFO("deregistered TCP connection %s "
1404 "(client_id %u pid %u)\n",
1405 conn_str, client_id, client->pid);
1407 data.dptr = (uint8_t *)tcp_sock;
1408 data.dsize = sizeof(*tcp_sock);
1410 /* tell all nodes about this tcp connection is gone */
1411 ret = ctdb_daemon_send_control(ctdb,
1412 CTDB_BROADCAST_CONNECTED,
1414 CTDB_CONTROL_TCP_REMOVE,
1416 CTDB_CTRL_FLAG_NOREPLY,
1417 data,
1418 NULL,
1419 NULL);
1420 if (ret != 0) {
1421 DBG_ERR("Failed to send CTDB_CONTROL_TCP_REMOVE: %s\n",
1422 conn_str);
1423 return -1;
1426 return 0;
1430 called by a client to inform us of a TCP connection was passed to a different
1431 "client" (typically with multichannel to another smbd process).
1433 int32_t ctdb_control_tcp_client_passed(struct ctdb_context *ctdb,
1434 uint32_t client_id,
1435 TDB_DATA indata)
1437 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1438 struct ctdb_connection *tcp_sock = NULL;
1439 int ret;
1440 char conn_str[132] = { 0, };
1441 bool found = false;
1443 tcp_sock = (struct ctdb_connection *)indata.dptr;
1445 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1446 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1448 ret = ctdb_connection_to_buf(conn_str,
1449 sizeof(conn_str),
1450 tcp_sock,
1451 false,
1452 " -> ");
1453 if (ret != 0) {
1454 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1457 found = ctdb_client_remove_tcp(client, tcp_sock);
1458 if (!found) {
1459 DBG_DEBUG("TCP connection from %s not found "
1460 "(client_id %u pid %u).\n",
1461 conn_str, client_id, client->pid);
1462 return 0;
1465 D_INFO("TCP connection from %s "
1466 "(client_id %u pid %u) passed to another client\n",
1467 conn_str, client_id, client->pid);
1470 * We don't call CTDB_CONTROL_TCP_REMOVE
1471 * nor ctdb_remove_connection() as the connection
1472 * is still alive, but handled by another client
1475 return 0;
1479 find a tcp address on a list
1481 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1482 struct ctdb_connection *tcp)
1484 unsigned int i;
1486 if (array == NULL) {
1487 return NULL;
1490 for (i=0;i<array->num;i++) {
1491 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1492 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1493 return &array->connections[i];
1496 return NULL;
1502 called by a daemon to inform us of a TCP connection that one of its
1503 clients managing that should tickled with an ACK when IP takeover is
1504 done
1506 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1508 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1509 struct ctdb_tcp_array *tcparray;
1510 struct ctdb_connection tcp;
1511 struct ctdb_vnn *vnn;
1513 /* If we don't have public IPs, tickles are useless */
1514 if (ctdb->vnn == NULL) {
1515 return 0;
1518 vnn = find_public_ip_vnn(ctdb, &p->dst);
1519 if (vnn == NULL) {
1520 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1521 ctdb_addr_to_str(&p->dst)));
1523 return -1;
1527 tcparray = vnn->tcp_array;
1529 /* If this is the first tickle */
1530 if (tcparray == NULL) {
1531 tcparray = talloc(vnn, struct ctdb_tcp_array);
1532 CTDB_NO_MEMORY(ctdb, tcparray);
1533 vnn->tcp_array = tcparray;
1535 tcparray->num = 0;
1536 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1537 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1539 tcparray->connections[tcparray->num].src = p->src;
1540 tcparray->connections[tcparray->num].dst = p->dst;
1541 tcparray->num++;
1543 if (tcp_update_needed) {
1544 vnn->tcp_update_needed = true;
1546 return 0;
1550 /* Do we already have this tickle ?*/
1551 tcp.src = p->src;
1552 tcp.dst = p->dst;
1553 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1554 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1555 ctdb_addr_to_str(&tcp.dst),
1556 ntohs(tcp.dst.ip.sin_port),
1557 vnn->pnn));
1558 return 0;
1561 /* A new tickle, we must add it to the array */
1562 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1563 struct ctdb_connection,
1564 tcparray->num+1);
1565 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1567 tcparray->connections[tcparray->num].src = p->src;
1568 tcparray->connections[tcparray->num].dst = p->dst;
1569 tcparray->num++;
1571 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1572 ctdb_addr_to_str(&tcp.dst),
1573 ntohs(tcp.dst.ip.sin_port),
1574 vnn->pnn));
1576 if (tcp_update_needed) {
1577 vnn->tcp_update_needed = true;
1580 return 0;
1584 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1586 struct ctdb_connection *tcpp;
1588 if (vnn == NULL) {
1589 return;
1592 /* if the array is empty we can't remove it
1593 and we don't need to do anything
1595 if (vnn->tcp_array == NULL) {
1596 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1597 ctdb_addr_to_str(&conn->dst),
1598 ntohs(conn->dst.ip.sin_port)));
1599 return;
1603 /* See if we know this connection
1604 if we don't know this connection then we don't need to do anything
1606 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1607 if (tcpp == NULL) {
1608 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1609 ctdb_addr_to_str(&conn->dst),
1610 ntohs(conn->dst.ip.sin_port)));
1611 return;
1615 /* We need to remove this entry from the array.
1616 Instead of allocating a new array and copying data to it
1617 we cheat and just copy the last entry in the existing array
1618 to the entry that is to be removed and just shring the
1619 ->num field
1621 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1622 vnn->tcp_array->num--;
1624 /* If we deleted the last entry we also need to remove the entire array
1626 if (vnn->tcp_array->num == 0) {
1627 talloc_free(vnn->tcp_array);
1628 vnn->tcp_array = NULL;
1631 vnn->tcp_update_needed = true;
1633 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1634 ctdb_addr_to_str(&conn->src),
1635 ntohs(conn->src.ip.sin_port)));
1640 called by a daemon to inform us of a TCP connection that one of its
1641 clients used are no longer needed in the tickle database
1643 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1645 struct ctdb_vnn *vnn;
1646 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1648 /* If we don't have public IPs, tickles are useless */
1649 if (ctdb->vnn == NULL) {
1650 return 0;
1653 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1654 if (vnn == NULL) {
1655 DEBUG(DEBUG_ERR,
1656 (__location__ " unable to find public address %s\n",
1657 ctdb_addr_to_str(&conn->dst)));
1658 return 0;
1661 ctdb_remove_connection(vnn, conn);
1663 return 0;
1667 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1668 bool force);
1671 Called when another daemon starts - causes all tickles for all
1672 public addresses we are serving to be sent to the new node on the
1673 next check. This actually causes the tickles to be sent to the
1674 other node immediately. In case there is an error, the periodic
1675 timer will send the updates on timer event. This is simple and
1676 doesn't require careful error handling.
1678 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1680 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1681 (unsigned long) pnn));
1683 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1684 return 0;
1689 called when a client structure goes away - hook to remove
1690 elements from the tcp_list in all daemons
1692 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1694 while (client->tcp_list) {
1695 struct ctdb_vnn *vnn;
1696 struct ctdb_tcp_list *tcp = client->tcp_list;
1697 struct ctdb_connection *conn = &tcp->connection;
1699 vnn = find_public_ip_vnn(client->ctdb,
1700 &conn->dst);
1702 /* If the IP address is hosted on this node then
1703 * remove the connection. */
1704 if (vnn != NULL && vnn->pnn == client->ctdb->pnn) {
1705 ctdb_remove_connection(vnn, conn);
1708 /* Otherwise this function has been called because the
1709 * server IP address has been released to another node
1710 * and the client has exited. This means that we
1711 * should not delete the connection information. The
1712 * takeover node processes connections too. */
1715 * The destructor removes from the list
1717 TALLOC_FREE(tcp);
1722 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1724 struct ctdb_vnn *vnn, *next;
1725 int count = 0;
1727 if (ctdb_config.failover_disabled == 1) {
1728 return;
1731 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1732 /* vnn can be freed below in release_ip_post() */
1733 next = vnn->next;
1735 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1736 ctdb_vnn_unassign_iface(ctdb, vnn);
1737 continue;
1740 /* Don't allow multiple releases at once. Some code,
1741 * particularly ctdb_tickle_sentenced_connections() is
1742 * not re-entrant */
1743 if (vnn->update_in_flight) {
1744 DEBUG(DEBUG_WARNING,
1745 (__location__
1746 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1747 ctdb_addr_to_str(&vnn->public_address),
1748 vnn->public_netmask_bits,
1749 ctdb_vnn_iface_string(vnn)));
1750 continue;
1752 vnn->update_in_flight = true;
1754 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1755 ctdb_addr_to_str(&vnn->public_address),
1756 vnn->public_netmask_bits,
1757 ctdb_vnn_iface_string(vnn)));
1759 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1760 ctdb_vnn_iface_string(vnn),
1761 ctdb_addr_to_str(&vnn->public_address),
1762 vnn->public_netmask_bits);
1763 /* releaseip timeouts are converted to success, so to
1764 * detect failures just check if the IP address is
1765 * still there...
1767 if (ctdb_sys_have_ip(&vnn->public_address)) {
1768 DEBUG(DEBUG_ERR,
1769 (__location__
1770 " IP address %s not released\n",
1771 ctdb_addr_to_str(&vnn->public_address)));
1772 vnn->update_in_flight = false;
1773 continue;
1776 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1777 if (vnn != NULL) {
1778 vnn->update_in_flight = false;
1780 count++;
1783 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1788 get list of public IPs
1790 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1791 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1793 int i, num, len;
1794 struct ctdb_public_ip_list_old *ips;
1795 struct ctdb_vnn *vnn;
1796 bool only_available = false;
1798 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1799 only_available = true;
1802 /* count how many public ip structures we have */
1803 num = 0;
1804 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1805 num++;
1808 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1809 num*sizeof(struct ctdb_public_ip);
1810 ips = talloc_zero_size(outdata, len);
1811 CTDB_NO_MEMORY(ctdb, ips);
1813 i = 0;
1814 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1815 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1816 continue;
1818 ips->ips[i].pnn = vnn->pnn;
1819 ips->ips[i].addr = vnn->public_address;
1820 i++;
1822 ips->num = i;
1823 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1824 i*sizeof(struct ctdb_public_ip);
1826 outdata->dsize = len;
1827 outdata->dptr = (uint8_t *)ips;
1829 return 0;
1833 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1834 struct ctdb_req_control_old *c,
1835 TDB_DATA indata,
1836 TDB_DATA *outdata)
1838 int i, num, len;
1839 ctdb_sock_addr *addr;
1840 struct ctdb_public_ip_info_old *info;
1841 struct ctdb_vnn *vnn;
1842 struct vnn_interface *iface;
1844 addr = (ctdb_sock_addr *)indata.dptr;
1846 vnn = find_public_ip_vnn(ctdb, addr);
1847 if (vnn == NULL) {
1848 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1849 "'%s'not a public address\n",
1850 ctdb_addr_to_str(addr)));
1851 return -1;
1854 /* count how many public ip structures we have */
1855 num = 0;
1856 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1857 num++;
1860 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1861 num*sizeof(struct ctdb_iface);
1862 info = talloc_zero_size(outdata, len);
1863 CTDB_NO_MEMORY(ctdb, info);
1865 info->ip.addr = vnn->public_address;
1866 info->ip.pnn = vnn->pnn;
1867 info->active_idx = 0xFFFFFFFF;
1869 i = 0;
1870 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1871 struct ctdb_interface *cur;
1873 cur = iface->iface;
1874 if (vnn->iface == cur) {
1875 info->active_idx = i;
1877 strncpy(info->ifaces[i].name, cur->name,
1878 sizeof(info->ifaces[i].name));
1879 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1880 info->ifaces[i].link_state = cur->link_up;
1881 info->ifaces[i].references = cur->references;
1883 i++;
1885 info->num = i;
1886 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1887 i*sizeof(struct ctdb_iface);
1889 outdata->dsize = len;
1890 outdata->dptr = (uint8_t *)info;
1892 return 0;
1895 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1896 struct ctdb_req_control_old *c,
1897 TDB_DATA *outdata)
1899 int i, num, len;
1900 struct ctdb_iface_list_old *ifaces;
1901 struct ctdb_interface *cur;
1903 /* count how many public ip structures we have */
1904 num = 0;
1905 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1906 num++;
1909 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1910 num*sizeof(struct ctdb_iface);
1911 ifaces = talloc_zero_size(outdata, len);
1912 CTDB_NO_MEMORY(ctdb, ifaces);
1914 i = 0;
1915 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1916 strncpy(ifaces->ifaces[i].name, cur->name,
1917 sizeof(ifaces->ifaces[i].name));
1918 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1919 ifaces->ifaces[i].link_state = cur->link_up;
1920 ifaces->ifaces[i].references = cur->references;
1921 i++;
1923 ifaces->num = i;
1924 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1925 i*sizeof(struct ctdb_iface);
1927 outdata->dsize = len;
1928 outdata->dptr = (uint8_t *)ifaces;
1930 return 0;
1933 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1934 struct ctdb_req_control_old *c,
1935 TDB_DATA indata)
1937 struct ctdb_iface *info;
1938 struct ctdb_interface *iface;
1939 bool link_up = false;
1941 info = (struct ctdb_iface *)indata.dptr;
1943 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1944 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1945 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1946 len, len, info->name));
1947 return -1;
1950 switch (info->link_state) {
1951 case 0:
1952 link_up = false;
1953 break;
1954 case 1:
1955 link_up = true;
1956 break;
1957 default:
1958 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1959 (unsigned int)info->link_state));
1960 return -1;
1963 if (info->references != 0) {
1964 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1965 (unsigned int)info->references));
1966 return -1;
1969 iface = ctdb_find_iface(ctdb, info->name);
1970 if (iface == NULL) {
1971 return -1;
1974 if (link_up == iface->link_up) {
1975 return 0;
1978 DEBUG(DEBUG_ERR,
1979 ("iface[%s] has changed it's link status %s => %s\n",
1980 iface->name,
1981 iface->link_up?"up":"down",
1982 link_up?"up":"down"));
1984 iface->link_up = link_up;
1985 return 0;
1990 called by a daemon to inform us of the entire list of TCP tickles for
1991 a particular public address.
1992 this control should only be sent by the node that is currently serving
1993 that public address.
1995 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1997 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1998 struct ctdb_tcp_array *tcparray;
1999 struct ctdb_vnn *vnn;
2001 /* We must at least have tickles.num or else we can't verify the size
2002 of the received data blob
2004 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2005 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2006 return -1;
2009 /* verify that the size of data matches what we expect */
2010 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2011 + sizeof(struct ctdb_connection) * list->num) {
2012 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2013 return -1;
2016 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2017 ctdb_addr_to_str(&list->addr)));
2019 vnn = find_public_ip_vnn(ctdb, &list->addr);
2020 if (vnn == NULL) {
2021 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2022 ctdb_addr_to_str(&list->addr)));
2024 return 1;
2027 if (vnn->pnn == ctdb->pnn) {
2028 DEBUG(DEBUG_INFO,
2029 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2030 ctdb_addr_to_str(&list->addr)));
2031 return 0;
2034 /* remove any old ticklelist we might have */
2035 talloc_free(vnn->tcp_array);
2036 vnn->tcp_array = NULL;
2038 tcparray = talloc(vnn, struct ctdb_tcp_array);
2039 CTDB_NO_MEMORY(ctdb, tcparray);
2041 tcparray->num = list->num;
2043 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2044 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2046 memcpy(tcparray->connections, &list->connections[0],
2047 sizeof(struct ctdb_connection)*tcparray->num);
2049 /* We now have a new fresh tickle list array for this vnn */
2050 vnn->tcp_array = tcparray;
2052 return 0;
2056 called to return the full list of tickles for the puclic address associated
2057 with the provided vnn
2059 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2061 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2062 struct ctdb_tickle_list_old *list;
2063 struct ctdb_tcp_array *tcparray;
2064 unsigned int num, i;
2065 struct ctdb_vnn *vnn;
2066 unsigned port;
2068 vnn = find_public_ip_vnn(ctdb, addr);
2069 if (vnn == NULL) {
2070 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2071 ctdb_addr_to_str(addr)));
2073 return 1;
2076 port = ctdb_addr_to_port(addr);
2078 tcparray = vnn->tcp_array;
2079 num = 0;
2080 if (tcparray != NULL) {
2081 if (port == 0) {
2082 /* All connections */
2083 num = tcparray->num;
2084 } else {
2085 /* Count connections for port */
2086 for (i = 0; i < tcparray->num; i++) {
2087 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2088 num++;
2094 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2095 + sizeof(struct ctdb_connection) * num;
2097 outdata->dptr = talloc_size(outdata, outdata->dsize);
2098 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2099 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2101 list->addr = *addr;
2102 list->num = num;
2104 if (num == 0) {
2105 return 0;
2108 num = 0;
2109 for (i = 0; i < tcparray->num; i++) {
2110 if (port == 0 || \
2111 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2112 list->connections[num] = tcparray->connections[i];
2113 num++;
2117 return 0;
2122 set the list of all tcp tickles for a public address
2124 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2125 ctdb_sock_addr *addr,
2126 struct ctdb_tcp_array *tcparray)
2128 int ret, num;
2129 TDB_DATA data;
2130 struct ctdb_tickle_list_old *list;
2132 if (tcparray) {
2133 num = tcparray->num;
2134 } else {
2135 num = 0;
2138 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2139 sizeof(struct ctdb_connection) * num;
2140 data.dptr = talloc_size(ctdb, data.dsize);
2141 CTDB_NO_MEMORY(ctdb, data.dptr);
2143 list = (struct ctdb_tickle_list_old *)data.dptr;
2144 list->addr = *addr;
2145 list->num = num;
2146 if (tcparray) {
2147 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2150 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2151 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2152 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2153 if (ret != 0) {
2154 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2155 return -1;
2158 talloc_free(data.dptr);
2160 return ret;
2163 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2164 bool force)
2166 struct ctdb_vnn *vnn;
2167 int ret;
2169 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2170 /* we only send out updates for public addresses that
2171 we have taken over
2173 if (ctdb->pnn != vnn->pnn) {
2174 continue;
2177 /* We only send out the updates if we need to */
2178 if (!force && !vnn->tcp_update_needed) {
2179 continue;
2182 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2183 &vnn->public_address,
2184 vnn->tcp_array);
2185 if (ret != 0) {
2186 D_ERR("Failed to send the tickle update for ip %s\n",
2187 ctdb_addr_to_str(&vnn->public_address));
2188 vnn->tcp_update_needed = true;
2189 } else {
2190 D_INFO("Sent tickle update for ip %s\n",
2191 ctdb_addr_to_str(&vnn->public_address));
2192 vnn->tcp_update_needed = false;
2199 perform tickle updates if required
2201 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2202 struct tevent_timer *te,
2203 struct timeval t, void *private_data)
2205 struct ctdb_context *ctdb = talloc_get_type(
2206 private_data, struct ctdb_context);
2208 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2210 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2211 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2212 ctdb_update_tcp_tickles, ctdb);
2216 start periodic update of tcp tickles
2218 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2220 ctdb->tickle_update_context = talloc_new(ctdb);
2222 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2223 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2224 ctdb_update_tcp_tickles, ctdb);
2230 struct control_gratious_arp {
2231 struct ctdb_context *ctdb;
2232 ctdb_sock_addr addr;
2233 const char *iface;
2234 int count;
2238 send a control_gratuitous arp
2240 static void send_gratious_arp(struct tevent_context *ev,
2241 struct tevent_timer *te,
2242 struct timeval t, void *private_data)
2244 int ret;
2245 struct control_gratious_arp *arp = talloc_get_type(private_data,
2246 struct control_gratious_arp);
2248 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2249 if (ret != 0) {
2250 DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
2251 arp->iface, strerror(ret));
2255 arp->count++;
2256 if (arp->count == CTDB_ARP_REPEAT) {
2257 talloc_free(arp);
2258 return;
2261 tevent_add_timer(arp->ctdb->ev, arp,
2262 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2263 send_gratious_arp, arp);
2268 send a gratious arp
2270 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2272 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2273 struct control_gratious_arp *arp;
2275 /* verify the size of indata */
2276 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2277 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2278 (unsigned)indata.dsize,
2279 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2280 return -1;
2282 if (indata.dsize !=
2283 ( offsetof(struct ctdb_addr_info_old, iface)
2284 + gratious_arp->len ) ){
2286 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2287 "but should be %u bytes\n",
2288 (unsigned)indata.dsize,
2289 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2290 return -1;
2294 arp = talloc(ctdb, struct control_gratious_arp);
2295 CTDB_NO_MEMORY(ctdb, arp);
2297 arp->ctdb = ctdb;
2298 arp->addr = gratious_arp->addr;
2299 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2300 CTDB_NO_MEMORY(ctdb, arp->iface);
2301 arp->count = 0;
2303 tevent_add_timer(arp->ctdb->ev, arp,
2304 timeval_zero(), send_gratious_arp, arp);
2306 return 0;
2309 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2311 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2312 int ret;
2314 /* verify the size of indata */
2315 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2316 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2317 return -1;
2319 if (indata.dsize !=
2320 ( offsetof(struct ctdb_addr_info_old, iface)
2321 + pub->len ) ){
2323 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2324 "but should be %u bytes\n",
2325 (unsigned)indata.dsize,
2326 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2327 return -1;
2330 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2332 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2334 if (ret != 0) {
2335 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2336 return -1;
2339 return 0;
2342 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2344 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2345 struct ctdb_vnn *vnn;
2347 /* verify the size of indata */
2348 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2349 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2350 return -1;
2352 if (indata.dsize !=
2353 ( offsetof(struct ctdb_addr_info_old, iface)
2354 + pub->len ) ){
2356 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2357 "but should be %u bytes\n",
2358 (unsigned)indata.dsize,
2359 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2360 return -1;
2363 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2365 /* walk over all public addresses until we find a match */
2366 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2367 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2368 if (vnn->pnn == ctdb->pnn) {
2369 /* This IP is currently being hosted.
2370 * Defer the deletion until the next
2371 * takeover run. "ctdb reloadips" will
2372 * always cause a takeover run. "ctdb
2373 * delip" will now need an explicit
2374 * "ctdb ipreallocated" afterwards. */
2375 vnn->delete_pending = true;
2376 } else {
2377 /* This IP is not hosted on the
2378 * current node so just delete it
2379 * now. */
2380 do_delete_ip(ctdb, vnn);
2383 return 0;
2387 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2388 ctdb_addr_to_str(&pub->addr)));
2389 return -1;
2393 struct ipreallocated_callback_state {
2394 struct ctdb_req_control_old *c;
2397 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2398 int status, void *p)
2400 struct ipreallocated_callback_state *state =
2401 talloc_get_type(p, struct ipreallocated_callback_state);
2403 if (status != 0) {
2404 DEBUG(DEBUG_ERR,
2405 (" \"ipreallocated\" event script failed (status %d)\n",
2406 status));
2407 if (status == -ETIMEDOUT) {
2408 ctdb_ban_self(ctdb);
2412 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2413 talloc_free(state);
2416 /* A control to run the ipreallocated event */
2417 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2418 struct ctdb_req_control_old *c,
2419 bool *async_reply)
2421 int ret;
2422 struct ipreallocated_callback_state *state;
2424 state = talloc(ctdb, struct ipreallocated_callback_state);
2425 CTDB_NO_MEMORY(ctdb, state);
2427 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2429 ret = ctdb_event_script_callback(ctdb, state,
2430 ctdb_ipreallocated_callback, state,
2431 CTDB_EVENT_IPREALLOCATED,
2432 "%s", "");
2434 if (ret != 0) {
2435 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2436 talloc_free(state);
2437 return -1;
2440 /* tell the control that we will be reply asynchronously */
2441 state->c = talloc_steal(state, c);
2442 *async_reply = true;
2444 return 0;
2448 struct ctdb_reloadips_handle {
2449 struct ctdb_context *ctdb;
2450 struct ctdb_req_control_old *c;
2451 int status;
2452 int fd[2];
2453 pid_t child;
2454 struct tevent_fd *fde;
2457 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2459 if (h == h->ctdb->reload_ips) {
2460 h->ctdb->reload_ips = NULL;
2462 if (h->c != NULL) {
2463 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2464 h->c = NULL;
2466 ctdb_kill(h->ctdb, h->child, SIGKILL);
2467 return 0;
2470 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2471 struct tevent_timer *te,
2472 struct timeval t, void *private_data)
2474 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2476 talloc_free(h);
2479 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2480 struct tevent_fd *fde,
2481 uint16_t flags, void *private_data)
2483 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2485 char res;
2486 int ret;
2488 ret = sys_read(h->fd[0], &res, 1);
2489 if (ret < 1 || res != 0) {
2490 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2491 res = 1;
2493 h->status = res;
2495 talloc_free(h);
2498 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2500 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2501 struct ctdb_public_ip_list_old *ips;
2502 struct ctdb_vnn *vnn;
2503 struct client_async_data *async_data;
2504 struct timeval timeout;
2505 TDB_DATA data;
2506 struct ctdb_client_control_state *state;
2507 bool first_add;
2508 unsigned int i;
2509 int ret;
2511 CTDB_NO_MEMORY(ctdb, mem_ctx);
2513 /* Read IPs from local node */
2514 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2515 CTDB_CURRENT_NODE, mem_ctx, &ips);
2516 if (ret != 0) {
2517 DEBUG(DEBUG_ERR,
2518 ("Unable to fetch public IPs from local node\n"));
2519 talloc_free(mem_ctx);
2520 return -1;
2523 /* Read IPs file - this is safe since this is a child process */
2524 ctdb->vnn = NULL;
2525 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2526 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2527 talloc_free(mem_ctx);
2528 return -1;
2531 async_data = talloc_zero(mem_ctx, struct client_async_data);
2532 CTDB_NO_MEMORY(ctdb, async_data);
2534 /* Compare IPs between node and file for IPs to be deleted */
2535 for (i = 0; i < ips->num; i++) {
2536 /* */
2537 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2538 if (ctdb_same_ip(&vnn->public_address,
2539 &ips->ips[i].addr)) {
2540 /* IP is still in file */
2541 break;
2545 if (vnn == NULL) {
2546 /* Delete IP ips->ips[i] */
2547 struct ctdb_addr_info_old *pub;
2549 DEBUG(DEBUG_NOTICE,
2550 ("IP %s no longer configured, deleting it\n",
2551 ctdb_addr_to_str(&ips->ips[i].addr)));
2553 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2554 CTDB_NO_MEMORY(ctdb, pub);
2556 pub->addr = ips->ips[i].addr;
2557 pub->mask = 0;
2558 pub->len = 0;
2560 timeout = TAKEOVER_TIMEOUT();
2562 data.dsize = offsetof(struct ctdb_addr_info_old,
2563 iface) + pub->len;
2564 data.dptr = (uint8_t *)pub;
2566 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2567 CTDB_CONTROL_DEL_PUBLIC_IP,
2568 0, data, async_data,
2569 &timeout, NULL);
2570 if (state == NULL) {
2571 DEBUG(DEBUG_ERR,
2572 (__location__
2573 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2574 goto failed;
2577 ctdb_client_async_add(async_data, state);
2581 /* Compare IPs between node and file for IPs to be added */
2582 first_add = true;
2583 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2584 for (i = 0; i < ips->num; i++) {
2585 if (ctdb_same_ip(&vnn->public_address,
2586 &ips->ips[i].addr)) {
2587 /* IP already on node */
2588 break;
2591 if (i == ips->num) {
2592 /* Add IP ips->ips[i] */
2593 struct ctdb_addr_info_old *pub;
2594 const char *ifaces = NULL;
2595 uint32_t len;
2596 struct vnn_interface *iface = NULL;
2598 DEBUG(DEBUG_NOTICE,
2599 ("New IP %s configured, adding it\n",
2600 ctdb_addr_to_str(&vnn->public_address)));
2601 if (first_add) {
2602 uint32_t pnn = ctdb_get_pnn(ctdb);
2604 data.dsize = sizeof(pnn);
2605 data.dptr = (uint8_t *)&pnn;
2607 ret = ctdb_client_send_message(
2608 ctdb,
2609 CTDB_BROADCAST_CONNECTED,
2610 CTDB_SRVID_REBALANCE_NODE,
2611 data);
2612 if (ret != 0) {
2613 DEBUG(DEBUG_WARNING,
2614 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2617 first_add = false;
2620 ifaces = vnn->ifaces->iface->name;
2621 iface = vnn->ifaces->next;
2622 while (iface != NULL) {
2623 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2624 iface->iface->name);
2625 iface = iface->next;
2628 len = strlen(ifaces) + 1;
2629 pub = talloc_zero_size(mem_ctx,
2630 offsetof(struct ctdb_addr_info_old, iface) + len);
2631 CTDB_NO_MEMORY(ctdb, pub);
2633 pub->addr = vnn->public_address;
2634 pub->mask = vnn->public_netmask_bits;
2635 pub->len = len;
2636 memcpy(&pub->iface[0], ifaces, pub->len);
2638 timeout = TAKEOVER_TIMEOUT();
2640 data.dsize = offsetof(struct ctdb_addr_info_old,
2641 iface) + pub->len;
2642 data.dptr = (uint8_t *)pub;
2644 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2645 CTDB_CONTROL_ADD_PUBLIC_IP,
2646 0, data, async_data,
2647 &timeout, NULL);
2648 if (state == NULL) {
2649 DEBUG(DEBUG_ERR,
2650 (__location__
2651 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2652 goto failed;
2655 ctdb_client_async_add(async_data, state);
2659 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2660 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2661 goto failed;
2664 talloc_free(mem_ctx);
2665 return 0;
2667 failed:
2668 talloc_free(mem_ctx);
2669 return -1;
2672 /* This control is sent to force the node to re-read the public addresses file
2673 and drop any addresses we should nnot longer host, and add new addresses
2674 that we are now able to host
2676 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2678 struct ctdb_reloadips_handle *h;
2679 pid_t parent = getpid();
2681 if (ctdb->reload_ips != NULL) {
2682 talloc_free(ctdb->reload_ips);
2683 ctdb->reload_ips = NULL;
2686 h = talloc(ctdb, struct ctdb_reloadips_handle);
2687 CTDB_NO_MEMORY(ctdb, h);
2688 h->ctdb = ctdb;
2689 h->c = NULL;
2690 h->status = -1;
2692 if (pipe(h->fd) == -1) {
2693 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2694 talloc_free(h);
2695 return -1;
2698 h->child = ctdb_fork(ctdb);
2699 if (h->child == (pid_t)-1) {
2700 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2701 close(h->fd[0]);
2702 close(h->fd[1]);
2703 talloc_free(h);
2704 return -1;
2707 /* child process */
2708 if (h->child == 0) {
2709 signed char res = 0;
2711 close(h->fd[0]);
2713 prctl_set_comment("ctdb_reloadips");
2714 if (switch_from_server_to_client(ctdb) != 0) {
2715 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2716 res = -1;
2717 } else {
2718 res = ctdb_reloadips_child(ctdb);
2719 if (res != 0) {
2720 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2724 sys_write(h->fd[1], &res, 1);
2725 ctdb_wait_for_process_to_exit(parent);
2726 _exit(0);
2729 h->c = talloc_steal(h, c);
2731 close(h->fd[1]);
2732 set_close_on_exec(h->fd[0]);
2734 talloc_set_destructor(h, ctdb_reloadips_destructor);
2737 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2738 ctdb_reloadips_child_handler, (void *)h);
2739 tevent_fd_set_auto_close(h->fde);
2741 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2742 ctdb_reloadips_timeout_event, h);
2744 /* we reply later */
2745 *async_reply = true;
2746 return 0;