vfs_fruit: Don't unlink the main file
[Samba.git] / ctdb / server / ctdb_takeover.c
bloba7aa8db5372ff9eb34f6160d45b2aeb1073bf3e3
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/common.h"
43 #include "common/logging.h"
45 #include "server/ipalloc.h"
47 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
49 #define CTDB_ARP_INTERVAL 1
50 #define CTDB_ARP_REPEAT 3
52 struct ctdb_interface {
53 struct ctdb_interface *prev, *next;
54 const char *name;
55 bool link_up;
56 uint32_t references;
59 struct vnn_interface {
60 struct vnn_interface *prev, *next;
61 struct ctdb_interface *iface;
64 /* state associated with a public ip address */
65 struct ctdb_vnn {
66 struct ctdb_vnn *prev, *next;
68 struct ctdb_interface *iface;
69 struct vnn_interface *ifaces;
70 ctdb_sock_addr public_address;
71 uint8_t public_netmask_bits;
73 /* the node number that is serving this public address, if any.
74 If no node serves this ip it is set to -1 */
75 int32_t pnn;
77 /* List of clients to tickle for this public address */
78 struct ctdb_tcp_array *tcp_array;
80 /* whether we need to update the other nodes with changes to our list
81 of connected clients */
82 bool tcp_update_needed;
84 /* a context to hang sending gratious arp events off */
85 TALLOC_CTX *takeover_ctx;
87 /* Set to true any time an update to this VNN is in flight.
88 This helps to avoid races. */
89 bool update_in_flight;
91 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
92 * address then this flag is set. It will be deleted in the
93 * release IP callback. */
94 bool delete_pending;
97 static const char *iface_string(const struct ctdb_interface *iface)
99 return (iface != NULL ? iface->name : "__none__");
102 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
104 return iface_string(vnn->iface);
107 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
108 const char *iface);
110 static struct ctdb_interface *
111 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
113 struct ctdb_interface *i;
115 if (strlen(iface) > CTDB_IFACE_SIZE) {
116 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
117 return NULL;
120 /* Verify that we don't have an entry for this ip yet */
121 i = ctdb_find_iface(ctdb, iface);
122 if (i != NULL) {
123 return i;
126 /* create a new structure for this interface */
127 i = talloc_zero(ctdb, struct ctdb_interface);
128 if (i == NULL) {
129 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
130 return NULL;
132 i->name = talloc_strdup(i, iface);
133 if (i->name == NULL) {
134 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
135 talloc_free(i);
136 return NULL;
139 i->link_up = true;
141 DLIST_ADD(ctdb->ifaces, i);
143 return i;
146 static bool vnn_has_interface(struct ctdb_vnn *vnn,
147 const struct ctdb_interface *iface)
149 struct vnn_interface *i;
151 for (i = vnn->ifaces; i != NULL; i = i->next) {
152 if (iface == i->iface) {
153 return true;
157 return false;
160 /* If any interfaces now have no possible IPs then delete them. This
161 * implementation is naive (i.e. simple) rather than clever
162 * (i.e. complex). Given that this is run on delip and that operation
163 * is rare, this doesn't need to be efficient - it needs to be
164 * foolproof. One alternative is reference counting, where the logic
165 * is distributed and can, therefore, be broken in multiple places.
166 * Another alternative is to build a red-black tree of interfaces that
167 * can have addresses (by walking ctdb->vnn once) and then walking
168 * ctdb->ifaces once and deleting those not in the tree. Let's go to
169 * one of those if the naive implementation causes problems... :-)
171 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
172 struct ctdb_vnn *vnn)
174 struct ctdb_interface *i, *next;
176 /* For each interface, check if there's an IP using it. */
177 for (i = ctdb->ifaces; i != NULL; i = next) {
178 struct ctdb_vnn *tv;
179 bool found;
180 next = i->next;
182 /* Only consider interfaces named in the given VNN. */
183 if (!vnn_has_interface(vnn, i)) {
184 continue;
187 /* Search for a vnn with this interface. */
188 found = false;
189 for (tv=ctdb->vnn; tv; tv=tv->next) {
190 if (vnn_has_interface(tv, i)) {
191 found = true;
192 break;
196 if (!found) {
197 /* None of the VNNs are using this interface. */
198 DLIST_REMOVE(ctdb->ifaces, i);
199 talloc_free(i);
205 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
206 const char *iface)
208 struct ctdb_interface *i;
210 for (i=ctdb->ifaces;i;i=i->next) {
211 if (strcmp(i->name, iface) == 0) {
212 return i;
216 return NULL;
219 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
220 struct ctdb_vnn *vnn)
222 struct vnn_interface *i;
223 struct ctdb_interface *cur = NULL;
224 struct ctdb_interface *best = NULL;
226 for (i = vnn->ifaces; i != NULL; i = i->next) {
228 cur = i->iface;
230 if (!cur->link_up) {
231 continue;
234 if (best == NULL) {
235 best = cur;
236 continue;
239 if (cur->references < best->references) {
240 best = cur;
241 continue;
245 return best;
248 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
249 struct ctdb_vnn *vnn)
251 struct ctdb_interface *best = NULL;
253 if (vnn->iface) {
254 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
255 "still assigned to iface '%s'\n",
256 ctdb_addr_to_str(&vnn->public_address),
257 ctdb_vnn_iface_string(vnn)));
258 return 0;
261 best = ctdb_vnn_best_iface(ctdb, vnn);
262 if (best == NULL) {
263 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
264 "cannot assign to iface any iface\n",
265 ctdb_addr_to_str(&vnn->public_address)));
266 return -1;
269 vnn->iface = best;
270 best->references++;
271 vnn->pnn = ctdb->pnn;
273 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
274 "now assigned to iface '%s' refs[%d]\n",
275 ctdb_addr_to_str(&vnn->public_address),
276 ctdb_vnn_iface_string(vnn),
277 best->references));
278 return 0;
281 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
282 struct ctdb_vnn *vnn)
284 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
285 "now unassigned (old iface '%s' refs[%d])\n",
286 ctdb_addr_to_str(&vnn->public_address),
287 ctdb_vnn_iface_string(vnn),
288 vnn->iface?vnn->iface->references:0));
289 if (vnn->iface) {
290 vnn->iface->references--;
292 vnn->iface = NULL;
293 if (vnn->pnn == ctdb->pnn) {
294 vnn->pnn = -1;
298 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
299 struct ctdb_vnn *vnn)
301 struct vnn_interface *i;
303 /* Nodes that are not RUNNING can not host IPs */
304 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
305 return false;
308 if (vnn->delete_pending) {
309 return false;
312 if (vnn->iface && vnn->iface->link_up) {
313 return true;
316 for (i = vnn->ifaces; i != NULL; i = i->next) {
317 if (i->iface->link_up) {
318 return true;
322 return false;
325 struct ctdb_takeover_arp {
326 struct ctdb_context *ctdb;
327 uint32_t count;
328 ctdb_sock_addr addr;
329 struct ctdb_tcp_array *tcparray;
330 struct ctdb_vnn *vnn;
335 lists of tcp endpoints
337 struct ctdb_tcp_list {
338 struct ctdb_tcp_list *prev, *next;
339 struct ctdb_connection connection;
343 list of clients to kill on IP release
345 struct ctdb_client_ip {
346 struct ctdb_client_ip *prev, *next;
347 struct ctdb_context *ctdb;
348 ctdb_sock_addr addr;
349 uint32_t client_id;
354 send a gratuitous arp
356 static void ctdb_control_send_arp(struct tevent_context *ev,
357 struct tevent_timer *te,
358 struct timeval t, void *private_data)
360 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
361 struct ctdb_takeover_arp);
362 int i, ret;
363 struct ctdb_tcp_array *tcparray;
364 const char *iface = ctdb_vnn_iface_string(arp->vnn);
366 ret = ctdb_sys_send_arp(&arp->addr, iface);
367 if (ret != 0) {
368 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
369 iface, strerror(errno)));
372 tcparray = arp->tcparray;
373 if (tcparray) {
374 for (i=0;i<tcparray->num;i++) {
375 struct ctdb_connection *tcon;
377 tcon = &tcparray->connections[i];
378 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
379 (unsigned)ntohs(tcon->dst.ip.sin_port),
380 ctdb_addr_to_str(&tcon->src),
381 (unsigned)ntohs(tcon->src.ip.sin_port)));
382 ret = ctdb_sys_send_tcp(
383 &tcon->src,
384 &tcon->dst,
385 0, 0, 0);
386 if (ret != 0) {
387 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
388 ctdb_addr_to_str(&tcon->src)));
393 arp->count++;
395 if (arp->count == CTDB_ARP_REPEAT) {
396 talloc_free(arp);
397 return;
400 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
401 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
402 ctdb_control_send_arp, arp);
405 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
406 struct ctdb_vnn *vnn)
408 struct ctdb_takeover_arp *arp;
409 struct ctdb_tcp_array *tcparray;
411 if (!vnn->takeover_ctx) {
412 vnn->takeover_ctx = talloc_new(vnn);
413 if (!vnn->takeover_ctx) {
414 return -1;
418 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
419 if (!arp) {
420 return -1;
423 arp->ctdb = ctdb;
424 arp->addr = vnn->public_address;
425 arp->vnn = vnn;
427 tcparray = vnn->tcp_array;
428 if (tcparray) {
429 /* add all of the known tcp connections for this IP to the
430 list of tcp connections to send tickle acks for */
431 arp->tcparray = talloc_steal(arp, tcparray);
433 vnn->tcp_array = NULL;
434 vnn->tcp_update_needed = true;
437 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
438 timeval_zero(), ctdb_control_send_arp, arp);
440 return 0;
443 struct ctdb_do_takeip_state {
444 struct ctdb_req_control_old *c;
445 struct ctdb_vnn *vnn;
449 called when takeip event finishes
451 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
452 void *private_data)
454 struct ctdb_do_takeip_state *state =
455 talloc_get_type(private_data, struct ctdb_do_takeip_state);
456 int32_t ret;
457 TDB_DATA data;
459 if (status != 0) {
460 if (status == -ETIME) {
461 ctdb_ban_self(ctdb);
463 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
464 ctdb_addr_to_str(&state->vnn->public_address),
465 ctdb_vnn_iface_string(state->vnn)));
466 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
468 talloc_free(state);
469 return;
472 if (ctdb->do_checkpublicip) {
474 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
475 if (ret != 0) {
476 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
477 talloc_free(state);
478 return;
483 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
484 data.dsize = strlen((char *)data.dptr) + 1;
485 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
487 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
490 /* the control succeeded */
491 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
492 talloc_free(state);
493 return;
496 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
498 state->vnn->update_in_flight = false;
499 return 0;
503 take over an ip address
505 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
506 struct ctdb_req_control_old *c,
507 struct ctdb_vnn *vnn)
509 int ret;
510 struct ctdb_do_takeip_state *state;
512 if (vnn->update_in_flight) {
513 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
514 "update for this IP already in flight\n",
515 ctdb_addr_to_str(&vnn->public_address),
516 vnn->public_netmask_bits));
517 return -1;
520 ret = ctdb_vnn_assign_iface(ctdb, vnn);
521 if (ret != 0) {
522 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
523 "assign a usable interface\n",
524 ctdb_addr_to_str(&vnn->public_address),
525 vnn->public_netmask_bits));
526 return -1;
529 state = talloc(vnn, struct ctdb_do_takeip_state);
530 CTDB_NO_MEMORY(ctdb, state);
532 state->c = NULL;
533 state->vnn = vnn;
535 vnn->update_in_flight = true;
536 talloc_set_destructor(state, ctdb_takeip_destructor);
538 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
539 ctdb_addr_to_str(&vnn->public_address),
540 vnn->public_netmask_bits,
541 ctdb_vnn_iface_string(vnn)));
543 ret = ctdb_event_script_callback(ctdb,
544 state,
545 ctdb_do_takeip_callback,
546 state,
547 CTDB_EVENT_TAKE_IP,
548 "%s %s %u",
549 ctdb_vnn_iface_string(vnn),
550 ctdb_addr_to_str(&vnn->public_address),
551 vnn->public_netmask_bits);
553 if (ret != 0) {
554 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
555 ctdb_addr_to_str(&vnn->public_address),
556 ctdb_vnn_iface_string(vnn)));
557 talloc_free(state);
558 return -1;
561 state->c = talloc_steal(ctdb, c);
562 return 0;
565 struct ctdb_do_updateip_state {
566 struct ctdb_req_control_old *c;
567 struct ctdb_interface *old;
568 struct ctdb_vnn *vnn;
572 called when updateip event finishes
574 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
575 void *private_data)
577 struct ctdb_do_updateip_state *state =
578 talloc_get_type(private_data, struct ctdb_do_updateip_state);
579 int32_t ret;
581 if (status != 0) {
582 if (status == -ETIME) {
583 ctdb_ban_self(ctdb);
585 DEBUG(DEBUG_ERR,
586 ("Failed update of IP %s from interface %s to %s\n",
587 ctdb_addr_to_str(&state->vnn->public_address),
588 iface_string(state->old),
589 ctdb_vnn_iface_string(state->vnn)));
592 * All we can do is reset the old interface
593 * and let the next run fix it
595 ctdb_vnn_unassign_iface(ctdb, state->vnn);
596 state->vnn->iface = state->old;
597 state->vnn->iface->references++;
599 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
600 talloc_free(state);
601 return;
604 if (ctdb->do_checkpublicip) {
606 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
607 if (ret != 0) {
608 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
609 talloc_free(state);
610 return;
615 /* the control succeeded */
616 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
617 talloc_free(state);
618 return;
621 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
623 state->vnn->update_in_flight = false;
624 return 0;
628 update (move) an ip address
630 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
631 struct ctdb_req_control_old *c,
632 struct ctdb_vnn *vnn)
634 int ret;
635 struct ctdb_do_updateip_state *state;
636 struct ctdb_interface *old = vnn->iface;
637 const char *old_name = iface_string(old);
638 const char *new_name;
640 if (vnn->update_in_flight) {
641 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
642 "update for this IP already in flight\n",
643 ctdb_addr_to_str(&vnn->public_address),
644 vnn->public_netmask_bits));
645 return -1;
648 ctdb_vnn_unassign_iface(ctdb, vnn);
649 ret = ctdb_vnn_assign_iface(ctdb, vnn);
650 if (ret != 0) {
651 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
652 "assign a usable interface (old iface '%s')\n",
653 ctdb_addr_to_str(&vnn->public_address),
654 vnn->public_netmask_bits,
655 old_name));
656 return -1;
659 if (old == vnn->iface) {
660 /* A benign update from one interface onto itself.
661 * no need to run the eventscripts in this case, just return
662 * success.
664 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
665 return 0;
668 state = talloc(vnn, struct ctdb_do_updateip_state);
669 CTDB_NO_MEMORY(ctdb, state);
671 state->c = NULL;
672 state->old = old;
673 state->vnn = vnn;
675 vnn->update_in_flight = true;
676 talloc_set_destructor(state, ctdb_updateip_destructor);
678 new_name = ctdb_vnn_iface_string(vnn);
679 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
680 "interface %s to %s\n",
681 ctdb_addr_to_str(&vnn->public_address),
682 vnn->public_netmask_bits,
683 old_name,
684 new_name));
686 ret = ctdb_event_script_callback(ctdb,
687 state,
688 ctdb_do_updateip_callback,
689 state,
690 CTDB_EVENT_UPDATE_IP,
691 "%s %s %s %u",
692 old_name,
693 new_name,
694 ctdb_addr_to_str(&vnn->public_address),
695 vnn->public_netmask_bits);
696 if (ret != 0) {
697 DEBUG(DEBUG_ERR,
698 ("Failed update IP %s from interface %s to %s\n",
699 ctdb_addr_to_str(&vnn->public_address),
700 old_name, new_name));
701 talloc_free(state);
702 return -1;
705 state->c = talloc_steal(ctdb, c);
706 return 0;
710 Find the vnn of the node that has a public ip address
711 returns -1 if the address is not known as a public address
713 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
715 struct ctdb_vnn *vnn;
717 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
718 if (ctdb_same_ip(&vnn->public_address, addr)) {
719 return vnn;
723 return NULL;
727 take over an ip address
729 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
730 struct ctdb_req_control_old *c,
731 TDB_DATA indata,
732 bool *async_reply)
734 int ret;
735 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
736 struct ctdb_vnn *vnn;
737 bool have_ip = false;
738 bool do_updateip = false;
739 bool do_takeip = false;
740 struct ctdb_interface *best_iface = NULL;
742 if (pip->pnn != ctdb->pnn) {
743 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
744 "with pnn %d, but we're node %d\n",
745 ctdb_addr_to_str(&pip->addr),
746 pip->pnn, ctdb->pnn));
747 return -1;
750 /* update out vnn list */
751 vnn = find_public_ip_vnn(ctdb, &pip->addr);
752 if (vnn == NULL) {
753 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
754 ctdb_addr_to_str(&pip->addr)));
755 return 0;
758 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
759 have_ip = ctdb_sys_have_ip(&pip->addr);
761 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
762 if (best_iface == NULL) {
763 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
764 "a usable interface (old %s, have_ip %d)\n",
765 ctdb_addr_to_str(&vnn->public_address),
766 vnn->public_netmask_bits,
767 ctdb_vnn_iface_string(vnn),
768 have_ip));
769 return -1;
772 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
773 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
774 "and we have it on iface[%s], but it was assigned to node %d"
775 "and we are node %d, banning ourself\n",
776 ctdb_addr_to_str(&vnn->public_address),
777 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
778 ctdb_ban_self(ctdb);
779 return -1;
782 if (vnn->pnn == -1 && have_ip) {
783 /* This will cause connections to be reset and
784 * reestablished. However, this is a very unusual
785 * situation and doing this will completely repair the
786 * inconsistency in the VNN.
788 DEBUG(DEBUG_WARNING,
789 (__location__
790 " Doing updateip for IP %s already on an interface\n",
791 ctdb_addr_to_str(&vnn->public_address)));
792 do_updateip = true;
795 if (vnn->iface) {
796 if (vnn->iface != best_iface) {
797 if (!vnn->iface->link_up) {
798 do_updateip = true;
799 } else if (vnn->iface->references > (best_iface->references + 1)) {
800 /* only move when the rebalance gains something */
801 do_updateip = true;
806 if (!have_ip) {
807 if (do_updateip) {
808 ctdb_vnn_unassign_iface(ctdb, vnn);
809 do_updateip = false;
811 do_takeip = true;
814 if (do_takeip) {
815 ret = ctdb_do_takeip(ctdb, c, vnn);
816 if (ret != 0) {
817 return -1;
819 } else if (do_updateip) {
820 ret = ctdb_do_updateip(ctdb, c, vnn);
821 if (ret != 0) {
822 return -1;
824 } else {
826 * The interface is up and the kernel known the ip
827 * => do nothing
829 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
830 ctdb_addr_to_str(&pip->addr),
831 vnn->public_netmask_bits,
832 ctdb_vnn_iface_string(vnn)));
833 return 0;
836 /* tell ctdb_control.c that we will be replying asynchronously */
837 *async_reply = true;
839 return 0;
842 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
844 DLIST_REMOVE(ctdb->vnn, vnn);
845 ctdb_vnn_unassign_iface(ctdb, vnn);
846 ctdb_remove_orphaned_ifaces(ctdb, vnn);
847 talloc_free(vnn);
850 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
851 struct ctdb_vnn *vnn,
852 ctdb_sock_addr *addr)
854 TDB_DATA data;
856 /* Send a message to all clients of this node telling them
857 * that the cluster has been reconfigured and they should
858 * close any connections on this IP address
860 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
861 data.dsize = strlen((char *)data.dptr)+1;
862 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
863 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
865 ctdb_vnn_unassign_iface(ctdb, vnn);
867 /* Process the IP if it has been marked for deletion */
868 if (vnn->delete_pending) {
869 do_delete_ip(ctdb, vnn);
870 return NULL;
873 return vnn;
876 struct release_ip_callback_state {
877 struct ctdb_req_control_old *c;
878 ctdb_sock_addr *addr;
879 struct ctdb_vnn *vnn;
880 uint32_t target_pnn;
884 called when releaseip event finishes
886 static void release_ip_callback(struct ctdb_context *ctdb, int status,
887 void *private_data)
889 struct release_ip_callback_state *state =
890 talloc_get_type(private_data, struct release_ip_callback_state);
892 if (status == -ETIME) {
893 ctdb_ban_self(ctdb);
896 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
897 if (ctdb_sys_have_ip(state->addr)) {
898 DEBUG(DEBUG_ERR,
899 ("IP %s still hosted during release IP callback, failing\n",
900 ctdb_addr_to_str(state->addr)));
901 ctdb_request_control_reply(ctdb, state->c,
902 NULL, -1, NULL);
903 talloc_free(state);
904 return;
908 state->vnn->pnn = state->target_pnn;
909 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
911 /* the control succeeded */
912 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
913 talloc_free(state);
916 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
918 if (state->vnn != NULL) {
919 state->vnn->update_in_flight = false;
921 return 0;
925 release an ip address
927 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
928 struct ctdb_req_control_old *c,
929 TDB_DATA indata,
930 bool *async_reply)
932 int ret;
933 struct release_ip_callback_state *state;
934 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
935 struct ctdb_vnn *vnn;
936 const char *iface;
938 /* update our vnn list */
939 vnn = find_public_ip_vnn(ctdb, &pip->addr);
940 if (vnn == NULL) {
941 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
942 ctdb_addr_to_str(&pip->addr)));
943 return 0;
946 /* stop any previous arps */
947 talloc_free(vnn->takeover_ctx);
948 vnn->takeover_ctx = NULL;
950 /* RELEASE_IP controls are sent to all nodes that should not
951 * be hosting a particular IP. This serves 2 purposes. The
952 * first is to help resolve any inconsistencies. If a node
953 * does unexpectly host an IP then it will be released. The
954 * 2nd is to use a "redundant release" to tell non-takeover
955 * nodes where an IP is moving to. This is how "ctdb ip" can
956 * report the (likely) location of an IP by only asking the
957 * local node. Redundant releases need to update the PNN but
958 * are otherwise ignored.
960 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
961 if (!ctdb_sys_have_ip(&pip->addr)) {
962 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
963 ctdb_addr_to_str(&pip->addr),
964 vnn->public_netmask_bits,
965 ctdb_vnn_iface_string(vnn)));
966 vnn->pnn = pip->pnn;
967 ctdb_vnn_unassign_iface(ctdb, vnn);
968 return 0;
970 } else {
971 if (vnn->iface == NULL) {
972 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
973 ctdb_addr_to_str(&pip->addr),
974 vnn->public_netmask_bits));
975 vnn->pnn = pip->pnn;
976 return 0;
980 /* There is a potential race between take_ip and us because we
981 * update the VNN via a callback that run when the
982 * eventscripts have been run. Avoid the race by allowing one
983 * update to be in flight at a time.
985 if (vnn->update_in_flight) {
986 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
987 "update for this IP already in flight\n",
988 ctdb_addr_to_str(&vnn->public_address),
989 vnn->public_netmask_bits));
990 return -1;
993 iface = ctdb_vnn_iface_string(vnn);
995 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
996 ctdb_addr_to_str(&pip->addr),
997 vnn->public_netmask_bits,
998 iface,
999 pip->pnn));
1001 state = talloc(ctdb, struct release_ip_callback_state);
1002 if (state == NULL) {
1003 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1004 __FILE__, __LINE__);
1005 return -1;
1008 state->c = NULL;
1009 state->addr = talloc(state, ctdb_sock_addr);
1010 if (state->addr == NULL) {
1011 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1012 __FILE__, __LINE__);
1013 talloc_free(state);
1014 return -1;
1016 *state->addr = pip->addr;
1017 state->target_pnn = pip->pnn;
1018 state->vnn = vnn;
1020 vnn->update_in_flight = true;
1021 talloc_set_destructor(state, ctdb_releaseip_destructor);
1023 ret = ctdb_event_script_callback(ctdb,
1024 state, release_ip_callback, state,
1025 CTDB_EVENT_RELEASE_IP,
1026 "%s %s %u",
1027 iface,
1028 ctdb_addr_to_str(&pip->addr),
1029 vnn->public_netmask_bits);
1030 if (ret != 0) {
1031 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1032 ctdb_addr_to_str(&pip->addr),
1033 ctdb_vnn_iface_string(vnn)));
1034 talloc_free(state);
1035 return -1;
1038 /* tell the control that we will be reply asynchronously */
1039 *async_reply = true;
1040 state->c = talloc_steal(state, c);
1041 return 0;
1044 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1045 ctdb_sock_addr *addr,
1046 unsigned mask, const char *ifaces,
1047 bool check_address)
1049 struct ctdb_vnn *vnn;
1050 char *tmp;
1051 const char *iface;
1053 /* Verify that we don't have an entry for this IP yet */
1054 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1055 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1056 DEBUG(DEBUG_ERR,
1057 ("Duplicate public IP address '%s'\n",
1058 ctdb_addr_to_str(addr)));
1059 return -1;
1063 /* Create a new VNN structure for this IP address */
1064 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1065 if (vnn == NULL) {
1066 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1067 return -1;
1069 tmp = talloc_strdup(vnn, ifaces);
1070 if (tmp == NULL) {
1071 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1072 talloc_free(vnn);
1073 return -1;
1075 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1076 struct vnn_interface *vnn_iface;
1077 struct ctdb_interface *i;
1078 if (!ctdb_sys_check_iface_exists(iface)) {
1079 DEBUG(DEBUG_ERR,
1080 ("Unknown interface %s for public address %s\n",
1081 iface, ctdb_addr_to_str(addr)));
1082 talloc_free(vnn);
1083 return -1;
1086 i = ctdb_add_local_iface(ctdb, iface);
1087 if (i == NULL) {
1088 DEBUG(DEBUG_ERR,
1089 ("Failed to add interface '%s' "
1090 "for public address %s\n",
1091 iface, ctdb_addr_to_str(addr)));
1092 talloc_free(vnn);
1093 return -1;
1096 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1097 if (vnn_iface == NULL) {
1098 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1099 talloc_free(vnn);
1100 return -1;
1103 vnn_iface->iface = i;
1104 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1106 talloc_free(tmp);
1107 vnn->public_address = *addr;
1108 vnn->public_netmask_bits = mask;
1109 vnn->pnn = -1;
1111 DLIST_ADD(ctdb->vnn, vnn);
1113 return 0;
1117 setup the public address lists from a file
1119 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1121 char **lines;
1122 int nlines;
1123 int i;
1125 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1126 if (lines == NULL) {
1127 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1128 return -1;
1130 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1131 nlines--;
1134 for (i=0;i<nlines;i++) {
1135 unsigned mask;
1136 ctdb_sock_addr addr;
1137 const char *addrstr;
1138 const char *ifaces;
1139 char *tok, *line;
1141 line = lines[i];
1142 while ((*line == ' ') || (*line == '\t')) {
1143 line++;
1145 if (*line == '#') {
1146 continue;
1148 if (strcmp(line, "") == 0) {
1149 continue;
1151 tok = strtok(line, " \t");
1152 addrstr = tok;
1153 tok = strtok(NULL, " \t");
1154 if (tok == NULL) {
1155 if (NULL == ctdb->default_public_interface) {
1156 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1157 i+1));
1158 talloc_free(lines);
1159 return -1;
1161 ifaces = ctdb->default_public_interface;
1162 } else {
1163 ifaces = tok;
1166 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1167 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1168 talloc_free(lines);
1169 return -1;
1171 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1172 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1173 talloc_free(lines);
1174 return -1;
1179 talloc_free(lines);
1180 return 0;
1184 destroy a ctdb_client_ip structure
1186 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1188 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1189 ctdb_addr_to_str(&ip->addr),
1190 ntohs(ip->addr.ip.sin_port),
1191 ip->client_id));
1193 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1194 return 0;
1198 called by a client to inform us of a TCP connection that it is managing
1199 that should tickled with an ACK when IP takeover is done
1201 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1202 TDB_DATA indata)
1204 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1205 struct ctdb_connection *tcp_sock = NULL;
1206 struct ctdb_tcp_list *tcp;
1207 struct ctdb_connection t;
1208 int ret;
1209 TDB_DATA data;
1210 struct ctdb_client_ip *ip;
1211 struct ctdb_vnn *vnn;
1212 ctdb_sock_addr addr;
1214 /* If we don't have public IPs, tickles are useless */
1215 if (ctdb->vnn == NULL) {
1216 return 0;
1219 tcp_sock = (struct ctdb_connection *)indata.dptr;
1221 addr = tcp_sock->src;
1222 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1223 addr = tcp_sock->dst;
1224 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1226 ZERO_STRUCT(addr);
1227 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1228 vnn = find_public_ip_vnn(ctdb, &addr);
1229 if (vnn == NULL) {
1230 switch (addr.sa.sa_family) {
1231 case AF_INET:
1232 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1233 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1234 ctdb_addr_to_str(&addr)));
1236 break;
1237 case AF_INET6:
1238 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1239 ctdb_addr_to_str(&addr)));
1240 break;
1241 default:
1242 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1245 return 0;
1248 if (vnn->pnn != ctdb->pnn) {
1249 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1250 ctdb_addr_to_str(&addr),
1251 client_id, client->pid));
1252 /* failing this call will tell smbd to die */
1253 return -1;
1256 ip = talloc(client, struct ctdb_client_ip);
1257 CTDB_NO_MEMORY(ctdb, ip);
1259 ip->ctdb = ctdb;
1260 ip->addr = addr;
1261 ip->client_id = client_id;
1262 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1263 DLIST_ADD(ctdb->client_ip_list, ip);
1265 tcp = talloc(client, struct ctdb_tcp_list);
1266 CTDB_NO_MEMORY(ctdb, tcp);
1268 tcp->connection.src = tcp_sock->src;
1269 tcp->connection.dst = tcp_sock->dst;
1271 DLIST_ADD(client->tcp_list, tcp);
1273 t.src = tcp_sock->src;
1274 t.dst = tcp_sock->dst;
1276 data.dptr = (uint8_t *)&t;
1277 data.dsize = sizeof(t);
1279 switch (addr.sa.sa_family) {
1280 case AF_INET:
1281 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1282 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1283 ctdb_addr_to_str(&tcp_sock->src),
1284 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1285 break;
1286 case AF_INET6:
1287 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1288 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1289 ctdb_addr_to_str(&tcp_sock->src),
1290 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1291 break;
1292 default:
1293 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1297 /* tell all nodes about this tcp connection */
1298 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1299 CTDB_CONTROL_TCP_ADD,
1300 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1301 if (ret != 0) {
1302 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1303 return -1;
1306 return 0;
1310 find a tcp address on a list
1312 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1313 struct ctdb_connection *tcp)
1315 int i;
1317 if (array == NULL) {
1318 return NULL;
1321 for (i=0;i<array->num;i++) {
1322 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1323 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1324 return &array->connections[i];
1327 return NULL;
1333 called by a daemon to inform us of a TCP connection that one of its
1334 clients managing that should tickled with an ACK when IP takeover is
1335 done
1337 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1339 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1340 struct ctdb_tcp_array *tcparray;
1341 struct ctdb_connection tcp;
1342 struct ctdb_vnn *vnn;
1344 /* If we don't have public IPs, tickles are useless */
1345 if (ctdb->vnn == NULL) {
1346 return 0;
1349 vnn = find_public_ip_vnn(ctdb, &p->dst);
1350 if (vnn == NULL) {
1351 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1352 ctdb_addr_to_str(&p->dst)));
1354 return -1;
1358 tcparray = vnn->tcp_array;
1360 /* If this is the first tickle */
1361 if (tcparray == NULL) {
1362 tcparray = talloc(vnn, struct ctdb_tcp_array);
1363 CTDB_NO_MEMORY(ctdb, tcparray);
1364 vnn->tcp_array = tcparray;
1366 tcparray->num = 0;
1367 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1368 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1370 tcparray->connections[tcparray->num].src = p->src;
1371 tcparray->connections[tcparray->num].dst = p->dst;
1372 tcparray->num++;
1374 if (tcp_update_needed) {
1375 vnn->tcp_update_needed = true;
1377 return 0;
1381 /* Do we already have this tickle ?*/
1382 tcp.src = p->src;
1383 tcp.dst = p->dst;
1384 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1385 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1386 ctdb_addr_to_str(&tcp.dst),
1387 ntohs(tcp.dst.ip.sin_port),
1388 vnn->pnn));
1389 return 0;
1392 /* A new tickle, we must add it to the array */
1393 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1394 struct ctdb_connection,
1395 tcparray->num+1);
1396 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1398 tcparray->connections[tcparray->num].src = p->src;
1399 tcparray->connections[tcparray->num].dst = p->dst;
1400 tcparray->num++;
1402 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1403 ctdb_addr_to_str(&tcp.dst),
1404 ntohs(tcp.dst.ip.sin_port),
1405 vnn->pnn));
1407 if (tcp_update_needed) {
1408 vnn->tcp_update_needed = true;
1411 return 0;
1415 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1417 struct ctdb_connection *tcpp;
1419 if (vnn == NULL) {
1420 return;
1423 /* if the array is empty we cant remove it
1424 and we don't need to do anything
1426 if (vnn->tcp_array == NULL) {
1427 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1428 ctdb_addr_to_str(&conn->dst),
1429 ntohs(conn->dst.ip.sin_port)));
1430 return;
1434 /* See if we know this connection
1435 if we don't know this connection then we dont need to do anything
1437 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1438 if (tcpp == NULL) {
1439 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1440 ctdb_addr_to_str(&conn->dst),
1441 ntohs(conn->dst.ip.sin_port)));
1442 return;
1446 /* We need to remove this entry from the array.
1447 Instead of allocating a new array and copying data to it
1448 we cheat and just copy the last entry in the existing array
1449 to the entry that is to be removed and just shring the
1450 ->num field
1452 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1453 vnn->tcp_array->num--;
1455 /* If we deleted the last entry we also need to remove the entire array
1457 if (vnn->tcp_array->num == 0) {
1458 talloc_free(vnn->tcp_array);
1459 vnn->tcp_array = NULL;
1462 vnn->tcp_update_needed = true;
1464 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1465 ctdb_addr_to_str(&conn->src),
1466 ntohs(conn->src.ip.sin_port)));
1471 called by a daemon to inform us of a TCP connection that one of its
1472 clients used are no longer needed in the tickle database
1474 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1476 struct ctdb_vnn *vnn;
1477 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1479 /* If we don't have public IPs, tickles are useless */
1480 if (ctdb->vnn == NULL) {
1481 return 0;
1484 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1485 if (vnn == NULL) {
1486 DEBUG(DEBUG_ERR,
1487 (__location__ " unable to find public address %s\n",
1488 ctdb_addr_to_str(&conn->dst)));
1489 return 0;
1492 ctdb_remove_connection(vnn, conn);
1494 return 0;
1498 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1499 bool force);
1502 Called when another daemon starts - causes all tickles for all
1503 public addresses we are serving to be sent to the new node on the
1504 next check. This actually causes the tickles to be sent to the
1505 other node immediately. In case there is an error, the periodic
1506 timer will send the updates on timer event. This is simple and
1507 doesn't require careful error handling.
1509 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1511 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1512 (unsigned long) pnn));
1514 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1515 return 0;
1520 called when a client structure goes away - hook to remove
1521 elements from the tcp_list in all daemons
1523 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1525 while (client->tcp_list) {
1526 struct ctdb_vnn *vnn;
1527 struct ctdb_tcp_list *tcp = client->tcp_list;
1528 struct ctdb_connection *conn = &tcp->connection;
1530 DLIST_REMOVE(client->tcp_list, tcp);
1532 vnn = find_public_ip_vnn(client->ctdb,
1533 &conn->dst);
1534 if (vnn == NULL) {
1535 DEBUG(DEBUG_ERR,
1536 (__location__ " unable to find public address %s\n",
1537 ctdb_addr_to_str(&conn->dst)));
1538 continue;
1541 /* If the IP address is hosted on this node then
1542 * remove the connection. */
1543 if (vnn->pnn == client->ctdb->pnn) {
1544 ctdb_remove_connection(vnn, conn);
1547 /* Otherwise this function has been called because the
1548 * server IP address has been released to another node
1549 * and the client has exited. This means that we
1550 * should not delete the connection information. The
1551 * takeover node processes connections too. */
1556 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1558 struct ctdb_vnn *vnn, *next;
1559 int count = 0;
1561 if (ctdb->tunable.disable_ip_failover == 1) {
1562 return;
1565 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1566 /* vnn can be freed below in release_ip_post() */
1567 next = vnn->next;
1569 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1570 ctdb_vnn_unassign_iface(ctdb, vnn);
1571 continue;
1574 /* Don't allow multiple releases at once. Some code,
1575 * particularly ctdb_tickle_sentenced_connections() is
1576 * not re-entrant */
1577 if (vnn->update_in_flight) {
1578 DEBUG(DEBUG_WARNING,
1579 (__location__
1580 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1581 ctdb_addr_to_str(&vnn->public_address),
1582 vnn->public_netmask_bits,
1583 ctdb_vnn_iface_string(vnn)));
1584 continue;
1586 vnn->update_in_flight = true;
1588 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1589 ctdb_addr_to_str(&vnn->public_address),
1590 vnn->public_netmask_bits,
1591 ctdb_vnn_iface_string(vnn)));
1593 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1594 ctdb_vnn_iface_string(vnn),
1595 ctdb_addr_to_str(&vnn->public_address),
1596 vnn->public_netmask_bits);
1597 /* releaseip timeouts are converted to success, so to
1598 * detect failures just check if the IP address is
1599 * still there...
1601 if (ctdb_sys_have_ip(&vnn->public_address)) {
1602 DEBUG(DEBUG_ERR,
1603 (__location__
1604 " IP address %s not released\n",
1605 ctdb_addr_to_str(&vnn->public_address)));
1606 vnn->update_in_flight = false;
1607 continue;
1610 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1611 if (vnn != NULL) {
1612 vnn->update_in_flight = false;
1614 count++;
1617 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1622 get list of public IPs
1624 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1625 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1627 int i, num, len;
1628 struct ctdb_public_ip_list_old *ips;
1629 struct ctdb_vnn *vnn;
1630 bool only_available = false;
1632 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1633 only_available = true;
1636 /* count how many public ip structures we have */
1637 num = 0;
1638 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1639 num++;
1642 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1643 num*sizeof(struct ctdb_public_ip);
1644 ips = talloc_zero_size(outdata, len);
1645 CTDB_NO_MEMORY(ctdb, ips);
1647 i = 0;
1648 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1649 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1650 continue;
1652 ips->ips[i].pnn = vnn->pnn;
1653 ips->ips[i].addr = vnn->public_address;
1654 i++;
1656 ips->num = i;
1657 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1658 i*sizeof(struct ctdb_public_ip);
1660 outdata->dsize = len;
1661 outdata->dptr = (uint8_t *)ips;
1663 return 0;
1667 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1668 struct ctdb_req_control_old *c,
1669 TDB_DATA indata,
1670 TDB_DATA *outdata)
1672 int i, num, len;
1673 ctdb_sock_addr *addr;
1674 struct ctdb_public_ip_info_old *info;
1675 struct ctdb_vnn *vnn;
1676 struct vnn_interface *iface;
1678 addr = (ctdb_sock_addr *)indata.dptr;
1680 vnn = find_public_ip_vnn(ctdb, addr);
1681 if (vnn == NULL) {
1682 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1683 "'%s'not a public address\n",
1684 ctdb_addr_to_str(addr)));
1685 return -1;
1688 /* count how many public ip structures we have */
1689 num = 0;
1690 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1691 num++;
1694 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1695 num*sizeof(struct ctdb_iface);
1696 info = talloc_zero_size(outdata, len);
1697 CTDB_NO_MEMORY(ctdb, info);
1699 info->ip.addr = vnn->public_address;
1700 info->ip.pnn = vnn->pnn;
1701 info->active_idx = 0xFFFFFFFF;
1703 i = 0;
1704 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1705 struct ctdb_interface *cur;
1707 cur = iface->iface;
1708 if (vnn->iface == cur) {
1709 info->active_idx = i;
1711 strncpy(info->ifaces[i].name, cur->name,
1712 sizeof(info->ifaces[i].name));
1713 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1714 info->ifaces[i].link_state = cur->link_up;
1715 info->ifaces[i].references = cur->references;
1717 i++;
1719 info->num = i;
1720 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1721 i*sizeof(struct ctdb_iface);
1723 outdata->dsize = len;
1724 outdata->dptr = (uint8_t *)info;
1726 return 0;
1729 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1730 struct ctdb_req_control_old *c,
1731 TDB_DATA *outdata)
1733 int i, num, len;
1734 struct ctdb_iface_list_old *ifaces;
1735 struct ctdb_interface *cur;
1737 /* count how many public ip structures we have */
1738 num = 0;
1739 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1740 num++;
1743 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1744 num*sizeof(struct ctdb_iface);
1745 ifaces = talloc_zero_size(outdata, len);
1746 CTDB_NO_MEMORY(ctdb, ifaces);
1748 i = 0;
1749 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1750 strncpy(ifaces->ifaces[i].name, cur->name,
1751 sizeof(ifaces->ifaces[i].name));
1752 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1753 ifaces->ifaces[i].link_state = cur->link_up;
1754 ifaces->ifaces[i].references = cur->references;
1755 i++;
1757 ifaces->num = i;
1758 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1759 i*sizeof(struct ctdb_iface);
1761 outdata->dsize = len;
1762 outdata->dptr = (uint8_t *)ifaces;
1764 return 0;
1767 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1768 struct ctdb_req_control_old *c,
1769 TDB_DATA indata)
1771 struct ctdb_iface *info;
1772 struct ctdb_interface *iface;
1773 bool link_up = false;
1775 info = (struct ctdb_iface *)indata.dptr;
1777 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1778 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1779 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1780 len, len, info->name));
1781 return -1;
1784 switch (info->link_state) {
1785 case 0:
1786 link_up = false;
1787 break;
1788 case 1:
1789 link_up = true;
1790 break;
1791 default:
1792 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1793 (unsigned int)info->link_state));
1794 return -1;
1797 if (info->references != 0) {
1798 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1799 (unsigned int)info->references));
1800 return -1;
1803 iface = ctdb_find_iface(ctdb, info->name);
1804 if (iface == NULL) {
1805 return -1;
1808 if (link_up == iface->link_up) {
1809 return 0;
1812 DEBUG(DEBUG_ERR,
1813 ("iface[%s] has changed it's link status %s => %s\n",
1814 iface->name,
1815 iface->link_up?"up":"down",
1816 link_up?"up":"down"));
1818 iface->link_up = link_up;
1819 return 0;
1824 called by a daemon to inform us of the entire list of TCP tickles for
1825 a particular public address.
1826 this control should only be sent by the node that is currently serving
1827 that public address.
1829 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1831 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1832 struct ctdb_tcp_array *tcparray;
1833 struct ctdb_vnn *vnn;
1835 /* We must at least have tickles.num or else we cant verify the size
1836 of the received data blob
1838 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1839 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1840 return -1;
1843 /* verify that the size of data matches what we expect */
1844 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1845 + sizeof(struct ctdb_connection) * list->num) {
1846 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1847 return -1;
1850 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1851 ctdb_addr_to_str(&list->addr)));
1853 vnn = find_public_ip_vnn(ctdb, &list->addr);
1854 if (vnn == NULL) {
1855 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1856 ctdb_addr_to_str(&list->addr)));
1858 return 1;
1861 if (vnn->pnn == ctdb->pnn) {
1862 DEBUG(DEBUG_INFO,
1863 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1864 ctdb_addr_to_str(&list->addr)));
1865 return 0;
1868 /* remove any old ticklelist we might have */
1869 talloc_free(vnn->tcp_array);
1870 vnn->tcp_array = NULL;
1872 tcparray = talloc(vnn, struct ctdb_tcp_array);
1873 CTDB_NO_MEMORY(ctdb, tcparray);
1875 tcparray->num = list->num;
1877 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1878 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1880 memcpy(tcparray->connections, &list->connections[0],
1881 sizeof(struct ctdb_connection)*tcparray->num);
1883 /* We now have a new fresh tickle list array for this vnn */
1884 vnn->tcp_array = tcparray;
1886 return 0;
1890 called to return the full list of tickles for the puclic address associated
1891 with the provided vnn
1893 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1895 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1896 struct ctdb_tickle_list_old *list;
1897 struct ctdb_tcp_array *tcparray;
1898 int num, i;
1899 struct ctdb_vnn *vnn;
1900 unsigned port;
1902 vnn = find_public_ip_vnn(ctdb, addr);
1903 if (vnn == NULL) {
1904 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1905 ctdb_addr_to_str(addr)));
1907 return 1;
1910 port = ctdb_addr_to_port(addr);
1912 tcparray = vnn->tcp_array;
1913 num = 0;
1914 if (tcparray != NULL) {
1915 if (port == 0) {
1916 /* All connections */
1917 num = tcparray->num;
1918 } else {
1919 /* Count connections for port */
1920 for (i = 0; i < tcparray->num; i++) {
1921 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1922 num++;
1928 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1929 + sizeof(struct ctdb_connection) * num;
1931 outdata->dptr = talloc_size(outdata, outdata->dsize);
1932 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1933 list = (struct ctdb_tickle_list_old *)outdata->dptr;
1935 list->addr = *addr;
1936 list->num = num;
1938 if (num == 0) {
1939 return 0;
1942 num = 0;
1943 for (i = 0; i < tcparray->num; i++) {
1944 if (port == 0 || \
1945 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1946 list->connections[num] = tcparray->connections[i];
1947 num++;
1951 return 0;
1956 set the list of all tcp tickles for a public address
1958 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1959 ctdb_sock_addr *addr,
1960 struct ctdb_tcp_array *tcparray)
1962 int ret, num;
1963 TDB_DATA data;
1964 struct ctdb_tickle_list_old *list;
1966 if (tcparray) {
1967 num = tcparray->num;
1968 } else {
1969 num = 0;
1972 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
1973 sizeof(struct ctdb_connection) * num;
1974 data.dptr = talloc_size(ctdb, data.dsize);
1975 CTDB_NO_MEMORY(ctdb, data.dptr);
1977 list = (struct ctdb_tickle_list_old *)data.dptr;
1978 list->addr = *addr;
1979 list->num = num;
1980 if (tcparray) {
1981 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
1984 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
1985 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1986 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1987 if (ret != 0) {
1988 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1989 return -1;
1992 talloc_free(data.dptr);
1994 return ret;
1997 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1998 bool force)
2000 struct ctdb_vnn *vnn;
2001 int ret;
2003 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2004 /* we only send out updates for public addresses that
2005 we have taken over
2007 if (ctdb->pnn != vnn->pnn) {
2008 continue;
2011 /* We only send out the updates if we need to */
2012 if (!force && !vnn->tcp_update_needed) {
2013 continue;
2016 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2017 &vnn->public_address,
2018 vnn->tcp_array);
2019 if (ret != 0) {
2020 D_ERR("Failed to send the tickle update for ip %s\n",
2021 ctdb_addr_to_str(&vnn->public_address));
2022 vnn->tcp_update_needed = true;
2023 } else {
2024 D_INFO("Sent tickle update for ip %s\n",
2025 ctdb_addr_to_str(&vnn->public_address));
2026 vnn->tcp_update_needed = false;
2033 perform tickle updates if required
2035 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2036 struct tevent_timer *te,
2037 struct timeval t, void *private_data)
2039 struct ctdb_context *ctdb = talloc_get_type(
2040 private_data, struct ctdb_context);
2042 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2044 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2045 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2046 ctdb_update_tcp_tickles, ctdb);
2050 start periodic update of tcp tickles
2052 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2054 ctdb->tickle_update_context = talloc_new(ctdb);
2056 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2057 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2058 ctdb_update_tcp_tickles, ctdb);
2064 struct control_gratious_arp {
2065 struct ctdb_context *ctdb;
2066 ctdb_sock_addr addr;
2067 const char *iface;
2068 int count;
2072 send a control_gratuitous arp
2074 static void send_gratious_arp(struct tevent_context *ev,
2075 struct tevent_timer *te,
2076 struct timeval t, void *private_data)
2078 int ret;
2079 struct control_gratious_arp *arp = talloc_get_type(private_data,
2080 struct control_gratious_arp);
2082 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2083 if (ret != 0) {
2084 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2085 arp->iface, strerror(errno)));
2089 arp->count++;
2090 if (arp->count == CTDB_ARP_REPEAT) {
2091 talloc_free(arp);
2092 return;
2095 tevent_add_timer(arp->ctdb->ev, arp,
2096 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2097 send_gratious_arp, arp);
2102 send a gratious arp
2104 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2106 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2107 struct control_gratious_arp *arp;
2109 /* verify the size of indata */
2110 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2111 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2112 (unsigned)indata.dsize,
2113 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2114 return -1;
2116 if (indata.dsize !=
2117 ( offsetof(struct ctdb_addr_info_old, iface)
2118 + gratious_arp->len ) ){
2120 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2121 "but should be %u bytes\n",
2122 (unsigned)indata.dsize,
2123 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2124 return -1;
2128 arp = talloc(ctdb, struct control_gratious_arp);
2129 CTDB_NO_MEMORY(ctdb, arp);
2131 arp->ctdb = ctdb;
2132 arp->addr = gratious_arp->addr;
2133 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2134 CTDB_NO_MEMORY(ctdb, arp->iface);
2135 arp->count = 0;
2137 tevent_add_timer(arp->ctdb->ev, arp,
2138 timeval_zero(), send_gratious_arp, arp);
2140 return 0;
2143 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2145 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2146 int ret;
2148 /* verify the size of indata */
2149 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2150 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2151 return -1;
2153 if (indata.dsize !=
2154 ( offsetof(struct ctdb_addr_info_old, iface)
2155 + pub->len ) ){
2157 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2158 "but should be %u bytes\n",
2159 (unsigned)indata.dsize,
2160 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2161 return -1;
2164 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2166 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2168 if (ret != 0) {
2169 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2170 return -1;
2173 return 0;
2176 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2178 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2179 struct ctdb_vnn *vnn;
2181 /* verify the size of indata */
2182 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2183 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2184 return -1;
2186 if (indata.dsize !=
2187 ( offsetof(struct ctdb_addr_info_old, iface)
2188 + pub->len ) ){
2190 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2191 "but should be %u bytes\n",
2192 (unsigned)indata.dsize,
2193 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2194 return -1;
2197 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2199 /* walk over all public addresses until we find a match */
2200 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2201 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2202 if (vnn->pnn == ctdb->pnn) {
2203 /* This IP is currently being hosted.
2204 * Defer the deletion until the next
2205 * takeover run. "ctdb reloadips" will
2206 * always cause a takeover run. "ctdb
2207 * delip" will now need an explicit
2208 * "ctdb ipreallocated" afterwards. */
2209 vnn->delete_pending = true;
2210 } else {
2211 /* This IP is not hosted on the
2212 * current node so just delete it
2213 * now. */
2214 do_delete_ip(ctdb, vnn);
2217 return 0;
2221 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2222 ctdb_addr_to_str(&pub->addr)));
2223 return -1;
2227 struct ipreallocated_callback_state {
2228 struct ctdb_req_control_old *c;
2231 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2232 int status, void *p)
2234 struct ipreallocated_callback_state *state =
2235 talloc_get_type(p, struct ipreallocated_callback_state);
2237 if (status != 0) {
2238 DEBUG(DEBUG_ERR,
2239 (" \"ipreallocated\" event script failed (status %d)\n",
2240 status));
2241 if (status == -ETIME) {
2242 ctdb_ban_self(ctdb);
2246 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2247 talloc_free(state);
2250 /* A control to run the ipreallocated event */
2251 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2252 struct ctdb_req_control_old *c,
2253 bool *async_reply)
2255 int ret;
2256 struct ipreallocated_callback_state *state;
2258 state = talloc(ctdb, struct ipreallocated_callback_state);
2259 CTDB_NO_MEMORY(ctdb, state);
2261 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2263 ret = ctdb_event_script_callback(ctdb, state,
2264 ctdb_ipreallocated_callback, state,
2265 CTDB_EVENT_IPREALLOCATED,
2266 "%s", "");
2268 if (ret != 0) {
2269 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2270 talloc_free(state);
2271 return -1;
2274 /* tell the control that we will be reply asynchronously */
2275 state->c = talloc_steal(state, c);
2276 *async_reply = true;
2278 return 0;
2282 struct ctdb_reloadips_handle {
2283 struct ctdb_context *ctdb;
2284 struct ctdb_req_control_old *c;
2285 int status;
2286 int fd[2];
2287 pid_t child;
2288 struct tevent_fd *fde;
2291 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2293 if (h == h->ctdb->reload_ips) {
2294 h->ctdb->reload_ips = NULL;
2296 if (h->c != NULL) {
2297 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2298 h->c = NULL;
2300 ctdb_kill(h->ctdb, h->child, SIGKILL);
2301 return 0;
2304 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2305 struct tevent_timer *te,
2306 struct timeval t, void *private_data)
2308 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2310 talloc_free(h);
2313 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2314 struct tevent_fd *fde,
2315 uint16_t flags, void *private_data)
2317 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2319 char res;
2320 int ret;
2322 ret = sys_read(h->fd[0], &res, 1);
2323 if (ret < 1 || res != 0) {
2324 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2325 res = 1;
2327 h->status = res;
2329 talloc_free(h);
2332 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2334 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2335 struct ctdb_public_ip_list_old *ips;
2336 struct ctdb_vnn *vnn;
2337 struct client_async_data *async_data;
2338 struct timeval timeout;
2339 TDB_DATA data;
2340 struct ctdb_client_control_state *state;
2341 bool first_add;
2342 int i, ret;
2344 CTDB_NO_MEMORY(ctdb, mem_ctx);
2346 /* Read IPs from local node */
2347 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2348 CTDB_CURRENT_NODE, mem_ctx, &ips);
2349 if (ret != 0) {
2350 DEBUG(DEBUG_ERR,
2351 ("Unable to fetch public IPs from local node\n"));
2352 talloc_free(mem_ctx);
2353 return -1;
2356 /* Read IPs file - this is safe since this is a child process */
2357 ctdb->vnn = NULL;
2358 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2359 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2360 talloc_free(mem_ctx);
2361 return -1;
2364 async_data = talloc_zero(mem_ctx, struct client_async_data);
2365 CTDB_NO_MEMORY(ctdb, async_data);
2367 /* Compare IPs between node and file for IPs to be deleted */
2368 for (i = 0; i < ips->num; i++) {
2369 /* */
2370 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2371 if (ctdb_same_ip(&vnn->public_address,
2372 &ips->ips[i].addr)) {
2373 /* IP is still in file */
2374 break;
2378 if (vnn == NULL) {
2379 /* Delete IP ips->ips[i] */
2380 struct ctdb_addr_info_old *pub;
2382 DEBUG(DEBUG_NOTICE,
2383 ("IP %s no longer configured, deleting it\n",
2384 ctdb_addr_to_str(&ips->ips[i].addr)));
2386 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2387 CTDB_NO_MEMORY(ctdb, pub);
2389 pub->addr = ips->ips[i].addr;
2390 pub->mask = 0;
2391 pub->len = 0;
2393 timeout = TAKEOVER_TIMEOUT();
2395 data.dsize = offsetof(struct ctdb_addr_info_old,
2396 iface) + pub->len;
2397 data.dptr = (uint8_t *)pub;
2399 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2400 CTDB_CONTROL_DEL_PUBLIC_IP,
2401 0, data, async_data,
2402 &timeout, NULL);
2403 if (state == NULL) {
2404 DEBUG(DEBUG_ERR,
2405 (__location__
2406 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2407 goto failed;
2410 ctdb_client_async_add(async_data, state);
2414 /* Compare IPs between node and file for IPs to be added */
2415 first_add = true;
2416 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2417 for (i = 0; i < ips->num; i++) {
2418 if (ctdb_same_ip(&vnn->public_address,
2419 &ips->ips[i].addr)) {
2420 /* IP already on node */
2421 break;
2424 if (i == ips->num) {
2425 /* Add IP ips->ips[i] */
2426 struct ctdb_addr_info_old *pub;
2427 const char *ifaces = NULL;
2428 uint32_t len;
2429 struct vnn_interface *iface = NULL;
2431 DEBUG(DEBUG_NOTICE,
2432 ("New IP %s configured, adding it\n",
2433 ctdb_addr_to_str(&vnn->public_address)));
2434 if (first_add) {
2435 uint32_t pnn = ctdb_get_pnn(ctdb);
2437 data.dsize = sizeof(pnn);
2438 data.dptr = (uint8_t *)&pnn;
2440 ret = ctdb_client_send_message(
2441 ctdb,
2442 CTDB_BROADCAST_CONNECTED,
2443 CTDB_SRVID_REBALANCE_NODE,
2444 data);
2445 if (ret != 0) {
2446 DEBUG(DEBUG_WARNING,
2447 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2450 first_add = false;
2453 ifaces = vnn->ifaces->iface->name;
2454 iface = vnn->ifaces->next;
2455 while (iface != NULL) {
2456 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2457 iface->iface->name);
2458 iface = iface->next;
2461 len = strlen(ifaces) + 1;
2462 pub = talloc_zero_size(mem_ctx,
2463 offsetof(struct ctdb_addr_info_old, iface) + len);
2464 CTDB_NO_MEMORY(ctdb, pub);
2466 pub->addr = vnn->public_address;
2467 pub->mask = vnn->public_netmask_bits;
2468 pub->len = len;
2469 memcpy(&pub->iface[0], ifaces, pub->len);
2471 timeout = TAKEOVER_TIMEOUT();
2473 data.dsize = offsetof(struct ctdb_addr_info_old,
2474 iface) + pub->len;
2475 data.dptr = (uint8_t *)pub;
2477 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2478 CTDB_CONTROL_ADD_PUBLIC_IP,
2479 0, data, async_data,
2480 &timeout, NULL);
2481 if (state == NULL) {
2482 DEBUG(DEBUG_ERR,
2483 (__location__
2484 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2485 goto failed;
2488 ctdb_client_async_add(async_data, state);
2492 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2493 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2494 goto failed;
2497 talloc_free(mem_ctx);
2498 return 0;
2500 failed:
2501 talloc_free(mem_ctx);
2502 return -1;
2505 /* This control is sent to force the node to re-read the public addresses file
2506 and drop any addresses we should nnot longer host, and add new addresses
2507 that we are now able to host
2509 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2511 struct ctdb_reloadips_handle *h;
2512 pid_t parent = getpid();
2514 if (ctdb->reload_ips != NULL) {
2515 talloc_free(ctdb->reload_ips);
2516 ctdb->reload_ips = NULL;
2519 h = talloc(ctdb, struct ctdb_reloadips_handle);
2520 CTDB_NO_MEMORY(ctdb, h);
2521 h->ctdb = ctdb;
2522 h->c = NULL;
2523 h->status = -1;
2525 if (pipe(h->fd) == -1) {
2526 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2527 talloc_free(h);
2528 return -1;
2531 h->child = ctdb_fork(ctdb);
2532 if (h->child == (pid_t)-1) {
2533 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2534 close(h->fd[0]);
2535 close(h->fd[1]);
2536 talloc_free(h);
2537 return -1;
2540 /* child process */
2541 if (h->child == 0) {
2542 signed char res = 0;
2544 close(h->fd[0]);
2546 prctl_set_comment("ctdb_reloadips");
2547 if (switch_from_server_to_client(ctdb) != 0) {
2548 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2549 res = -1;
2550 } else {
2551 res = ctdb_reloadips_child(ctdb);
2552 if (res != 0) {
2553 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2557 sys_write(h->fd[1], &res, 1);
2558 ctdb_wait_for_process_to_exit(parent);
2559 _exit(0);
2562 h->c = talloc_steal(h, c);
2564 close(h->fd[1]);
2565 set_close_on_exec(h->fd[0]);
2567 talloc_set_destructor(h, ctdb_reloadips_destructor);
2570 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2571 ctdb_reloadips_child_handler, (void *)h);
2572 tevent_fd_set_auto_close(h->fde);
2574 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2575 ctdb_reloadips_timeout_event, h);
2577 /* we reply later */
2578 *async_reply = true;
2579 return 0;