4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface
{
52 struct ctdb_interface
*prev
, *next
;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn
*vnn
)
61 return vnn
->iface
->name
;
67 static int ctdb_add_local_iface(struct ctdb_context
*ctdb
, const char *iface
)
69 struct ctdb_interface
*i
;
71 /* Verify that we don't have an entry for this ip yet */
72 for (i
=ctdb
->ifaces
;i
;i
=i
->next
) {
73 if (strcmp(i
->name
, iface
) == 0) {
78 /* create a new structure for this interface */
79 i
= talloc_zero(ctdb
, struct ctdb_interface
);
80 CTDB_NO_MEMORY_FATAL(ctdb
, i
);
81 i
->name
= talloc_strdup(i
, iface
);
82 CTDB_NO_MEMORY(ctdb
, i
->name
);
86 DLIST_ADD(ctdb
->ifaces
, i
);
91 static bool vnn_has_interface_with_name(struct ctdb_vnn
*vnn
,
96 for (n
= 0; vnn
->ifaces
[n
] != NULL
; n
++) {
97 if (strcmp(name
, vnn
->ifaces
[n
]) == 0) {
105 /* If any interfaces now have no possible IPs then delete them. This
106 * implementation is naive (i.e. simple) rather than clever
107 * (i.e. complex). Given that this is run on delip and that operation
108 * is rare, this doesn't need to be efficient - it needs to be
109 * foolproof. One alternative is reference counting, where the logic
110 * is distributed and can, therefore, be broken in multiple places.
111 * Another alternative is to build a red-black tree of interfaces that
112 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
113 * once) and then walking ctdb->ifaces once and deleting those not in
114 * the tree. Let's go to one of those if the naive implementation
115 * causes problems... :-)
117 static void ctdb_remove_orphaned_ifaces(struct ctdb_context
*ctdb
,
118 struct ctdb_vnn
*vnn
)
120 struct ctdb_interface
*i
, *next
;
122 /* For each interface, check if there's an IP using it. */
123 for (i
= ctdb
->ifaces
; i
!= NULL
; i
= next
) {
128 /* Only consider interfaces named in the given VNN. */
129 if (!vnn_has_interface_with_name(vnn
, i
->name
)) {
133 /* Is the "single IP" on this interface? */
134 if ((ctdb
->single_ip_vnn
!= NULL
) &&
135 (ctdb
->single_ip_vnn
->ifaces
[0] != NULL
) &&
136 (strcmp(i
->name
, ctdb
->single_ip_vnn
->ifaces
[0]) == 0)) {
137 /* Found, next interface please... */
140 /* Search for a vnn with this interface. */
142 for (tv
=ctdb
->vnn
; tv
; tv
=tv
->next
) {
143 if (vnn_has_interface_with_name(tv
, i
->name
)) {
150 /* None of the VNNs are using this interface. */
151 DLIST_REMOVE(ctdb
->ifaces
, i
);
158 static struct ctdb_interface
*ctdb_find_iface(struct ctdb_context
*ctdb
,
161 struct ctdb_interface
*i
;
163 for (i
=ctdb
->ifaces
;i
;i
=i
->next
) {
164 if (strcmp(i
->name
, iface
) == 0) {
172 static struct ctdb_interface
*ctdb_vnn_best_iface(struct ctdb_context
*ctdb
,
173 struct ctdb_vnn
*vnn
)
176 struct ctdb_interface
*cur
= NULL
;
177 struct ctdb_interface
*best
= NULL
;
179 for (i
=0; vnn
->ifaces
[i
]; i
++) {
181 cur
= ctdb_find_iface(ctdb
, vnn
->ifaces
[i
]);
195 if (cur
->references
< best
->references
) {
204 static int32_t ctdb_vnn_assign_iface(struct ctdb_context
*ctdb
,
205 struct ctdb_vnn
*vnn
)
207 struct ctdb_interface
*best
= NULL
;
210 DEBUG(DEBUG_INFO
, (__location__
" public address '%s' "
211 "still assigned to iface '%s'\n",
212 ctdb_addr_to_str(&vnn
->public_address
),
213 ctdb_vnn_iface_string(vnn
)));
217 best
= ctdb_vnn_best_iface(ctdb
, vnn
);
219 DEBUG(DEBUG_ERR
, (__location__
" public address '%s' "
220 "cannot assign to iface any iface\n",
221 ctdb_addr_to_str(&vnn
->public_address
)));
227 vnn
->pnn
= ctdb
->pnn
;
229 DEBUG(DEBUG_INFO
, (__location__
" public address '%s' "
230 "now assigned to iface '%s' refs[%d]\n",
231 ctdb_addr_to_str(&vnn
->public_address
),
232 ctdb_vnn_iface_string(vnn
),
237 static void ctdb_vnn_unassign_iface(struct ctdb_context
*ctdb
,
238 struct ctdb_vnn
*vnn
)
240 DEBUG(DEBUG_INFO
, (__location__
" public address '%s' "
241 "now unassigned (old iface '%s' refs[%d])\n",
242 ctdb_addr_to_str(&vnn
->public_address
),
243 ctdb_vnn_iface_string(vnn
),
244 vnn
->iface
?vnn
->iface
->references
:0));
246 vnn
->iface
->references
--;
249 if (vnn
->pnn
== ctdb
->pnn
) {
254 static bool ctdb_vnn_available(struct ctdb_context
*ctdb
,
255 struct ctdb_vnn
*vnn
)
259 /* Nodes that are not RUNNING can not host IPs */
260 if (ctdb
->runstate
!= CTDB_RUNSTATE_RUNNING
) {
264 if (vnn
->delete_pending
) {
268 if (vnn
->iface
&& vnn
->iface
->link_up
) {
272 for (i
=0; vnn
->ifaces
[i
]; i
++) {
273 struct ctdb_interface
*cur
;
275 cur
= ctdb_find_iface(ctdb
, vnn
->ifaces
[i
]);
288 struct ctdb_takeover_arp
{
289 struct ctdb_context
*ctdb
;
292 struct ctdb_tcp_array
*tcparray
;
293 struct ctdb_vnn
*vnn
;
298 lists of tcp endpoints
300 struct ctdb_tcp_list
{
301 struct ctdb_tcp_list
*prev
, *next
;
302 struct ctdb_connection connection
;
306 list of clients to kill on IP release
308 struct ctdb_client_ip
{
309 struct ctdb_client_ip
*prev
, *next
;
310 struct ctdb_context
*ctdb
;
317 send a gratuitous arp
319 static void ctdb_control_send_arp(struct tevent_context
*ev
,
320 struct tevent_timer
*te
,
321 struct timeval t
, void *private_data
)
323 struct ctdb_takeover_arp
*arp
= talloc_get_type(private_data
,
324 struct ctdb_takeover_arp
);
326 struct ctdb_tcp_array
*tcparray
;
327 const char *iface
= ctdb_vnn_iface_string(arp
->vnn
);
329 ret
= ctdb_sys_send_arp(&arp
->addr
, iface
);
331 DEBUG(DEBUG_CRIT
,(__location__
" sending of arp failed on iface '%s' (%s)\n",
332 iface
, strerror(errno
)));
335 tcparray
= arp
->tcparray
;
337 for (i
=0;i
<tcparray
->num
;i
++) {
338 struct ctdb_connection
*tcon
;
340 tcon
= &tcparray
->connections
[i
];
341 DEBUG(DEBUG_INFO
,("sending tcp tickle ack for %u->%s:%u\n",
342 (unsigned)ntohs(tcon
->dst
.ip
.sin_port
),
343 ctdb_addr_to_str(&tcon
->src
),
344 (unsigned)ntohs(tcon
->src
.ip
.sin_port
)));
345 ret
= ctdb_sys_send_tcp(
350 DEBUG(DEBUG_CRIT
,(__location__
" Failed to send tcp tickle ack for %s\n",
351 ctdb_addr_to_str(&tcon
->src
)));
358 if (arp
->count
== CTDB_ARP_REPEAT
) {
363 tevent_add_timer(arp
->ctdb
->ev
, arp
->vnn
->takeover_ctx
,
364 timeval_current_ofs(CTDB_ARP_INTERVAL
, 100000),
365 ctdb_control_send_arp
, arp
);
368 static int32_t ctdb_announce_vnn_iface(struct ctdb_context
*ctdb
,
369 struct ctdb_vnn
*vnn
)
371 struct ctdb_takeover_arp
*arp
;
372 struct ctdb_tcp_array
*tcparray
;
374 if (!vnn
->takeover_ctx
) {
375 vnn
->takeover_ctx
= talloc_new(vnn
);
376 if (!vnn
->takeover_ctx
) {
381 arp
= talloc_zero(vnn
->takeover_ctx
, struct ctdb_takeover_arp
);
387 arp
->addr
= vnn
->public_address
;
390 tcparray
= vnn
->tcp_array
;
392 /* add all of the known tcp connections for this IP to the
393 list of tcp connections to send tickle acks for */
394 arp
->tcparray
= talloc_steal(arp
, tcparray
);
396 vnn
->tcp_array
= NULL
;
397 vnn
->tcp_update_needed
= true;
400 tevent_add_timer(arp
->ctdb
->ev
, vnn
->takeover_ctx
,
401 timeval_zero(), ctdb_control_send_arp
, arp
);
406 struct takeover_callback_state
{
407 struct ctdb_req_control_old
*c
;
408 ctdb_sock_addr
*addr
;
409 struct ctdb_vnn
*vnn
;
412 struct ctdb_do_takeip_state
{
413 struct ctdb_req_control_old
*c
;
414 struct ctdb_vnn
*vnn
;
418 called when takeip event finishes
420 static void ctdb_do_takeip_callback(struct ctdb_context
*ctdb
, int status
,
423 struct ctdb_do_takeip_state
*state
=
424 talloc_get_type(private_data
, struct ctdb_do_takeip_state
);
429 struct ctdb_node
*node
= ctdb
->nodes
[ctdb
->pnn
];
431 if (status
== -ETIME
) {
434 DEBUG(DEBUG_ERR
,(__location__
" Failed to takeover IP %s on interface %s\n",
435 ctdb_addr_to_str(&state
->vnn
->public_address
),
436 ctdb_vnn_iface_string(state
->vnn
)));
437 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, status
, NULL
);
439 node
->flags
|= NODE_FLAGS_UNHEALTHY
;
444 if (ctdb
->do_checkpublicip
) {
446 ret
= ctdb_announce_vnn_iface(ctdb
, state
->vnn
);
448 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, -1, NULL
);
455 data
.dptr
= (uint8_t *)ctdb_addr_to_str(&state
->vnn
->public_address
);
456 data
.dsize
= strlen((char *)data
.dptr
) + 1;
457 DEBUG(DEBUG_INFO
,(__location__
" sending TAKE_IP for '%s'\n", data
.dptr
));
459 ctdb_daemon_send_message(ctdb
, ctdb
->pnn
, CTDB_SRVID_TAKE_IP
, data
);
462 /* the control succeeded */
463 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, 0, NULL
);
468 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state
*state
)
470 state
->vnn
->update_in_flight
= false;
475 take over an ip address
477 static int32_t ctdb_do_takeip(struct ctdb_context
*ctdb
,
478 struct ctdb_req_control_old
*c
,
479 struct ctdb_vnn
*vnn
)
482 struct ctdb_do_takeip_state
*state
;
484 if (vnn
->update_in_flight
) {
485 DEBUG(DEBUG_NOTICE
,("Takeover of IP %s/%u rejected "
486 "update for this IP already in flight\n",
487 ctdb_addr_to_str(&vnn
->public_address
),
488 vnn
->public_netmask_bits
));
492 ret
= ctdb_vnn_assign_iface(ctdb
, vnn
);
494 DEBUG(DEBUG_ERR
,("Takeover of IP %s/%u failed to "
495 "assign a usable interface\n",
496 ctdb_addr_to_str(&vnn
->public_address
),
497 vnn
->public_netmask_bits
));
501 state
= talloc(vnn
, struct ctdb_do_takeip_state
);
502 CTDB_NO_MEMORY(ctdb
, state
);
504 state
->c
= talloc_steal(ctdb
, c
);
507 vnn
->update_in_flight
= true;
508 talloc_set_destructor(state
, ctdb_takeip_destructor
);
510 DEBUG(DEBUG_NOTICE
,("Takeover of IP %s/%u on interface %s\n",
511 ctdb_addr_to_str(&vnn
->public_address
),
512 vnn
->public_netmask_bits
,
513 ctdb_vnn_iface_string(vnn
)));
515 ret
= ctdb_event_script_callback(ctdb
,
517 ctdb_do_takeip_callback
,
521 ctdb_vnn_iface_string(vnn
),
522 ctdb_addr_to_str(&vnn
->public_address
),
523 vnn
->public_netmask_bits
);
526 DEBUG(DEBUG_ERR
,(__location__
" Failed to takeover IP %s on interface %s\n",
527 ctdb_addr_to_str(&vnn
->public_address
),
528 ctdb_vnn_iface_string(vnn
)));
536 struct ctdb_do_updateip_state
{
537 struct ctdb_req_control_old
*c
;
538 struct ctdb_interface
*old
;
539 struct ctdb_vnn
*vnn
;
543 called when updateip event finishes
545 static void ctdb_do_updateip_callback(struct ctdb_context
*ctdb
, int status
,
548 struct ctdb_do_updateip_state
*state
=
549 talloc_get_type(private_data
, struct ctdb_do_updateip_state
);
553 if (status
== -ETIME
) {
556 DEBUG(DEBUG_ERR
,(__location__
" Failed to move IP %s from interface %s to %s\n",
557 ctdb_addr_to_str(&state
->vnn
->public_address
),
559 ctdb_vnn_iface_string(state
->vnn
)));
562 * All we can do is reset the old interface
563 * and let the next run fix it
565 ctdb_vnn_unassign_iface(ctdb
, state
->vnn
);
566 state
->vnn
->iface
= state
->old
;
567 state
->vnn
->iface
->references
++;
569 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, status
, NULL
);
574 if (ctdb
->do_checkpublicip
) {
576 ret
= ctdb_announce_vnn_iface(ctdb
, state
->vnn
);
578 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, -1, NULL
);
585 /* the control succeeded */
586 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, 0, NULL
);
591 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state
*state
)
593 state
->vnn
->update_in_flight
= false;
598 update (move) an ip address
600 static int32_t ctdb_do_updateip(struct ctdb_context
*ctdb
,
601 struct ctdb_req_control_old
*c
,
602 struct ctdb_vnn
*vnn
)
605 struct ctdb_do_updateip_state
*state
;
606 struct ctdb_interface
*old
= vnn
->iface
;
607 const char *new_name
;
609 if (vnn
->update_in_flight
) {
610 DEBUG(DEBUG_NOTICE
,("Update of IP %s/%u rejected "
611 "update for this IP already in flight\n",
612 ctdb_addr_to_str(&vnn
->public_address
),
613 vnn
->public_netmask_bits
));
617 ctdb_vnn_unassign_iface(ctdb
, vnn
);
618 ret
= ctdb_vnn_assign_iface(ctdb
, vnn
);
620 DEBUG(DEBUG_ERR
,("update of IP %s/%u failed to "
621 "assin a usable interface (old iface '%s')\n",
622 ctdb_addr_to_str(&vnn
->public_address
),
623 vnn
->public_netmask_bits
,
628 new_name
= ctdb_vnn_iface_string(vnn
);
629 if (old
->name
!= NULL
&& new_name
!= NULL
&& !strcmp(old
->name
, new_name
)) {
630 /* A benign update from one interface onto itself.
631 * no need to run the eventscripts in this case, just return
634 ctdb_request_control_reply(ctdb
, c
, NULL
, 0, NULL
);
638 state
= talloc(vnn
, struct ctdb_do_updateip_state
);
639 CTDB_NO_MEMORY(ctdb
, state
);
641 state
->c
= talloc_steal(ctdb
, c
);
645 vnn
->update_in_flight
= true;
646 talloc_set_destructor(state
, ctdb_updateip_destructor
);
648 DEBUG(DEBUG_NOTICE
,("Update of IP %s/%u from "
649 "interface %s to %s\n",
650 ctdb_addr_to_str(&vnn
->public_address
),
651 vnn
->public_netmask_bits
,
655 ret
= ctdb_event_script_callback(ctdb
,
657 ctdb_do_updateip_callback
,
659 CTDB_EVENT_UPDATE_IP
,
663 ctdb_addr_to_str(&vnn
->public_address
),
664 vnn
->public_netmask_bits
);
666 DEBUG(DEBUG_ERR
,(__location__
" Failed update IP %s from interface %s to %s\n",
667 ctdb_addr_to_str(&vnn
->public_address
),
668 old
->name
, new_name
));
677 Find the vnn of the node that has a public ip address
678 returns -1 if the address is not known as a public address
680 static struct ctdb_vnn
*find_public_ip_vnn(struct ctdb_context
*ctdb
, ctdb_sock_addr
*addr
)
682 struct ctdb_vnn
*vnn
;
684 for (vnn
=ctdb
->vnn
;vnn
;vnn
=vnn
->next
) {
685 if (ctdb_same_ip(&vnn
->public_address
, addr
)) {
694 take over an ip address
696 int32_t ctdb_control_takeover_ip(struct ctdb_context
*ctdb
,
697 struct ctdb_req_control_old
*c
,
702 struct ctdb_public_ip
*pip
= (struct ctdb_public_ip
*)indata
.dptr
;
703 struct ctdb_vnn
*vnn
;
704 bool have_ip
= false;
705 bool do_updateip
= false;
706 bool do_takeip
= false;
707 struct ctdb_interface
*best_iface
= NULL
;
709 if (pip
->pnn
!= ctdb
->pnn
) {
710 DEBUG(DEBUG_ERR
,(__location__
" takeoverip called for an ip '%s' "
711 "with pnn %d, but we're node %d\n",
712 ctdb_addr_to_str(&pip
->addr
),
713 pip
->pnn
, ctdb
->pnn
));
717 /* update out vnn list */
718 vnn
= find_public_ip_vnn(ctdb
, &pip
->addr
);
720 DEBUG(DEBUG_INFO
,("takeoverip called for an ip '%s' that is not a public address\n",
721 ctdb_addr_to_str(&pip
->addr
)));
725 if (ctdb
->tunable
.disable_ip_failover
== 0 && ctdb
->do_checkpublicip
) {
726 have_ip
= ctdb_sys_have_ip(&pip
->addr
);
728 best_iface
= ctdb_vnn_best_iface(ctdb
, vnn
);
729 if (best_iface
== NULL
) {
730 DEBUG(DEBUG_ERR
,("takeoverip of IP %s/%u failed to find"
731 "a usable interface (old %s, have_ip %d)\n",
732 ctdb_addr_to_str(&vnn
->public_address
),
733 vnn
->public_netmask_bits
,
734 ctdb_vnn_iface_string(vnn
),
739 if (vnn
->iface
== NULL
&& vnn
->pnn
== -1 && have_ip
&& best_iface
!= NULL
) {
740 DEBUG(DEBUG_ERR
,("Taking over newly created ip\n"));
745 if (vnn
->iface
== NULL
&& have_ip
) {
746 DEBUG(DEBUG_CRIT
,(__location__
" takeoverip of IP %s is known to the kernel, "
747 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
748 ctdb_addr_to_str(&vnn
->public_address
)));
752 if (vnn
->pnn
!= ctdb
->pnn
&& have_ip
&& vnn
->pnn
!= -1) {
753 DEBUG(DEBUG_CRIT
,(__location__
" takeoverip of IP %s is known to the kernel, "
754 "and we have it on iface[%s], but it was assigned to node %d"
755 "and we are node %d, banning ourself\n",
756 ctdb_addr_to_str(&vnn
->public_address
),
757 ctdb_vnn_iface_string(vnn
), vnn
->pnn
, ctdb
->pnn
));
762 if (vnn
->pnn
== -1 && have_ip
) {
763 vnn
->pnn
= ctdb
->pnn
;
764 DEBUG(DEBUG_CRIT
,(__location__
" takeoverip of IP %s is known to the kernel, "
765 "and we already have it on iface[%s], update local daemon\n",
766 ctdb_addr_to_str(&vnn
->public_address
),
767 ctdb_vnn_iface_string(vnn
)));
772 if (vnn
->iface
!= best_iface
) {
773 if (!vnn
->iface
->link_up
) {
775 } else if (vnn
->iface
->references
> (best_iface
->references
+ 1)) {
776 /* only move when the rebalance gains something */
784 ctdb_vnn_unassign_iface(ctdb
, vnn
);
791 ret
= ctdb_do_takeip(ctdb
, c
, vnn
);
795 } else if (do_updateip
) {
796 ret
= ctdb_do_updateip(ctdb
, c
, vnn
);
802 * The interface is up and the kernel known the ip
805 DEBUG(DEBUG_INFO
,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
806 ctdb_addr_to_str(&pip
->addr
),
807 vnn
->public_netmask_bits
,
808 ctdb_vnn_iface_string(vnn
)));
812 /* tell ctdb_control.c that we will be replying asynchronously */
818 static void do_delete_ip(struct ctdb_context
*ctdb
, struct ctdb_vnn
*vnn
)
820 DLIST_REMOVE(ctdb
->vnn
, vnn
);
821 ctdb_vnn_unassign_iface(ctdb
, vnn
);
822 ctdb_remove_orphaned_ifaces(ctdb
, vnn
);
827 called when releaseip event finishes
829 static void release_ip_callback(struct ctdb_context
*ctdb
, int status
,
832 struct takeover_callback_state
*state
=
833 talloc_get_type(private_data
, struct takeover_callback_state
);
836 if (status
== -ETIME
) {
840 if (ctdb
->tunable
.disable_ip_failover
== 0 && ctdb
->do_checkpublicip
) {
841 if (ctdb_sys_have_ip(state
->addr
)) {
843 ("IP %s still hosted during release IP callback, failing\n",
844 ctdb_addr_to_str(state
->addr
)));
845 ctdb_request_control_reply(ctdb
, state
->c
,
852 /* send a message to all clients of this node telling them
853 that the cluster has been reconfigured and they should
854 release any sockets on this IP */
855 data
.dptr
= (uint8_t *)talloc_strdup(state
, ctdb_addr_to_str(state
->addr
));
856 CTDB_NO_MEMORY_VOID(ctdb
, data
.dptr
);
857 data
.dsize
= strlen((char *)data
.dptr
)+1;
859 DEBUG(DEBUG_INFO
,(__location__
" sending RELEASE_IP for '%s'\n", data
.dptr
));
861 ctdb_daemon_send_message(ctdb
, ctdb
->pnn
, CTDB_SRVID_RELEASE_IP
, data
);
863 ctdb_vnn_unassign_iface(ctdb
, state
->vnn
);
865 /* Process the IP if it has been marked for deletion */
866 if (state
->vnn
->delete_pending
) {
867 do_delete_ip(ctdb
, state
->vnn
);
871 /* the control succeeded */
872 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, 0, NULL
);
876 static int ctdb_releaseip_destructor(struct takeover_callback_state
*state
)
878 if (state
->vnn
!= NULL
) {
879 state
->vnn
->update_in_flight
= false;
885 release an ip address
887 int32_t ctdb_control_release_ip(struct ctdb_context
*ctdb
,
888 struct ctdb_req_control_old
*c
,
893 struct takeover_callback_state
*state
;
894 struct ctdb_public_ip
*pip
= (struct ctdb_public_ip
*)indata
.dptr
;
895 struct ctdb_vnn
*vnn
;
898 /* update our vnn list */
899 vnn
= find_public_ip_vnn(ctdb
, &pip
->addr
);
901 DEBUG(DEBUG_INFO
,("releaseip called for an ip '%s' that is not a public address\n",
902 ctdb_addr_to_str(&pip
->addr
)));
907 /* stop any previous arps */
908 talloc_free(vnn
->takeover_ctx
);
909 vnn
->takeover_ctx
= NULL
;
911 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
912 * lazy multicast to drop an IP from any node that isn't the
913 * intended new node. The following causes makes ctdbd ignore
914 * a release for any address it doesn't host.
916 if (ctdb
->tunable
.disable_ip_failover
== 0 && ctdb
->do_checkpublicip
) {
917 if (!ctdb_sys_have_ip(&pip
->addr
)) {
918 DEBUG(DEBUG_DEBUG
,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
919 ctdb_addr_to_str(&pip
->addr
),
920 vnn
->public_netmask_bits
,
921 ctdb_vnn_iface_string(vnn
)));
922 ctdb_vnn_unassign_iface(ctdb
, vnn
);
926 if (vnn
->iface
== NULL
) {
927 DEBUG(DEBUG_DEBUG
,("Redundant release of IP %s/%u (ip not held)\n",
928 ctdb_addr_to_str(&pip
->addr
),
929 vnn
->public_netmask_bits
));
934 /* There is a potential race between take_ip and us because we
935 * update the VNN via a callback that run when the
936 * eventscripts have been run. Avoid the race by allowing one
937 * update to be in flight at a time.
939 if (vnn
->update_in_flight
) {
940 DEBUG(DEBUG_NOTICE
,("Release of IP %s/%u rejected "
941 "update for this IP already in flight\n",
942 ctdb_addr_to_str(&vnn
->public_address
),
943 vnn
->public_netmask_bits
));
947 iface
= strdup(ctdb_vnn_iface_string(vnn
));
949 DEBUG(DEBUG_NOTICE
,("Release of IP %s/%u on interface %s node:%d\n",
950 ctdb_addr_to_str(&pip
->addr
),
951 vnn
->public_netmask_bits
,
955 state
= talloc(ctdb
, struct takeover_callback_state
);
957 ctdb_set_error(ctdb
, "Out of memory at %s:%d",
963 state
->c
= talloc_steal(state
, c
);
964 state
->addr
= talloc(state
, ctdb_sock_addr
);
965 if (state
->addr
== NULL
) {
966 ctdb_set_error(ctdb
, "Out of memory at %s:%d",
972 *state
->addr
= pip
->addr
;
975 vnn
->update_in_flight
= true;
976 talloc_set_destructor(state
, ctdb_releaseip_destructor
);
978 ret
= ctdb_event_script_callback(ctdb
,
979 state
, release_ip_callback
, state
,
980 CTDB_EVENT_RELEASE_IP
,
983 ctdb_addr_to_str(&pip
->addr
),
984 vnn
->public_netmask_bits
);
987 DEBUG(DEBUG_ERR
,(__location__
" Failed to release IP %s on interface %s\n",
988 ctdb_addr_to_str(&pip
->addr
),
989 ctdb_vnn_iface_string(vnn
)));
994 /* tell the control that we will be reply asynchronously */
999 static int ctdb_add_public_address(struct ctdb_context
*ctdb
,
1000 ctdb_sock_addr
*addr
,
1001 unsigned mask
, const char *ifaces
,
1004 struct ctdb_vnn
*vnn
;
1011 tmp
= strdup(ifaces
);
1012 for (iface
= strtok(tmp
, ","); iface
; iface
= strtok(NULL
, ",")) {
1013 if (!ctdb_sys_check_iface_exists(iface
)) {
1014 DEBUG(DEBUG_CRIT
,("Interface %s does not exist. Can not add public-address : %s\n", iface
, ctdb_addr_to_str(addr
)));
1021 /* Verify that we don't have an entry for this ip yet */
1022 for (vnn
=ctdb
->vnn
;vnn
;vnn
=vnn
->next
) {
1023 if (ctdb_same_sockaddr(addr
, &vnn
->public_address
)) {
1024 DEBUG(DEBUG_CRIT
,("Same ip '%s' specified multiple times in the public address list \n",
1025 ctdb_addr_to_str(addr
)));
1030 /* create a new vnn structure for this ip address */
1031 vnn
= talloc_zero(ctdb
, struct ctdb_vnn
);
1032 CTDB_NO_MEMORY_FATAL(ctdb
, vnn
);
1033 vnn
->ifaces
= talloc_array(vnn
, const char *, num
+ 2);
1034 tmp
= talloc_strdup(vnn
, ifaces
);
1035 CTDB_NO_MEMORY_FATAL(ctdb
, tmp
);
1036 for (iface
= strtok(tmp
, ","); iface
; iface
= strtok(NULL
, ",")) {
1037 vnn
->ifaces
= talloc_realloc(vnn
, vnn
->ifaces
, const char *, num
+ 2);
1038 CTDB_NO_MEMORY_FATAL(ctdb
, vnn
->ifaces
);
1039 vnn
->ifaces
[num
] = talloc_strdup(vnn
, iface
);
1040 CTDB_NO_MEMORY_FATAL(ctdb
, vnn
->ifaces
[num
]);
1044 vnn
->ifaces
[num
] = NULL
;
1045 vnn
->public_address
= *addr
;
1046 vnn
->public_netmask_bits
= mask
;
1048 if (check_address
) {
1049 if (ctdb_sys_have_ip(addr
)) {
1050 DEBUG(DEBUG_ERR
,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr
), ctdb
->pnn
));
1051 vnn
->pnn
= ctdb
->pnn
;
1055 for (i
=0; vnn
->ifaces
[i
]; i
++) {
1056 ret
= ctdb_add_local_iface(ctdb
, vnn
->ifaces
[i
]);
1058 DEBUG(DEBUG_CRIT
, (__location__
" failed to add iface[%s] "
1059 "for public_address[%s]\n",
1060 vnn
->ifaces
[i
], ctdb_addr_to_str(addr
)));
1066 DLIST_ADD(ctdb
->vnn
, vnn
);
1072 setup the public address lists from a file
1074 int ctdb_set_public_addresses(struct ctdb_context
*ctdb
, bool check_addresses
)
1080 lines
= file_lines_load(ctdb
->public_addresses_file
, &nlines
, 0, ctdb
);
1081 if (lines
== NULL
) {
1082 ctdb_set_error(ctdb
, "Failed to load public address list '%s'\n", ctdb
->public_addresses_file
);
1085 while (nlines
> 0 && strcmp(lines
[nlines
-1], "") == 0) {
1089 for (i
=0;i
<nlines
;i
++) {
1091 ctdb_sock_addr addr
;
1092 const char *addrstr
;
1097 while ((*line
== ' ') || (*line
== '\t')) {
1103 if (strcmp(line
, "") == 0) {
1106 tok
= strtok(line
, " \t");
1108 tok
= strtok(NULL
, " \t");
1110 if (NULL
== ctdb
->default_public_interface
) {
1111 DEBUG(DEBUG_CRIT
,("No default public interface and no interface specified at line %u of public address list\n",
1116 ifaces
= ctdb
->default_public_interface
;
1121 if (!addrstr
|| !parse_ip_mask(addrstr
, ifaces
, &addr
, &mask
)) {
1122 DEBUG(DEBUG_CRIT
,("Badly formed line %u in public address list\n", i
+1));
1126 if (ctdb_add_public_address(ctdb
, &addr
, mask
, ifaces
, check_addresses
)) {
1127 DEBUG(DEBUG_CRIT
,("Failed to add line %u to the public address list\n", i
+1));
1138 int ctdb_set_single_public_ip(struct ctdb_context
*ctdb
,
1142 struct ctdb_vnn
*svnn
;
1143 struct ctdb_interface
*cur
= NULL
;
1147 svnn
= talloc_zero(ctdb
, struct ctdb_vnn
);
1148 CTDB_NO_MEMORY(ctdb
, svnn
);
1150 svnn
->ifaces
= talloc_array(svnn
, const char *, 2);
1151 CTDB_NO_MEMORY(ctdb
, svnn
->ifaces
);
1152 svnn
->ifaces
[0] = talloc_strdup(svnn
->ifaces
, iface
);
1153 CTDB_NO_MEMORY(ctdb
, svnn
->ifaces
[0]);
1154 svnn
->ifaces
[1] = NULL
;
1156 ok
= parse_ip(ip
, iface
, 0, &svnn
->public_address
);
1162 ret
= ctdb_add_local_iface(ctdb
, svnn
->ifaces
[0]);
1164 DEBUG(DEBUG_CRIT
, (__location__
" failed to add iface[%s] "
1165 "for single_ip[%s]\n",
1167 ctdb_addr_to_str(&svnn
->public_address
)));
1172 /* assume the single public ip interface is initially "good" */
1173 cur
= ctdb_find_iface(ctdb
, iface
);
1175 DEBUG(DEBUG_CRIT
,("Can not find public interface %s used by --single-public-ip", iface
));
1178 cur
->link_up
= true;
1180 ret
= ctdb_vnn_assign_iface(ctdb
, svnn
);
1186 ctdb
->single_ip_vnn
= svnn
;
1190 static void *add_ip_callback(void *parm
, void *data
)
1192 struct public_ip_list
*this_ip
= parm
;
1193 struct public_ip_list
*prev_ip
= data
;
1195 if (prev_ip
== NULL
) {
1198 if (this_ip
->pnn
== -1) {
1199 this_ip
->pnn
= prev_ip
->pnn
;
1205 static int getips_count_callback(void *param
, void *data
)
1207 struct public_ip_list
**ip_list
= (struct public_ip_list
**)param
;
1208 struct public_ip_list
*new_ip
= (struct public_ip_list
*)data
;
1210 new_ip
->next
= *ip_list
;
1215 static int verify_remote_ip_allocation(struct ctdb_context
*ctdb
,
1216 struct ctdb_public_ip_list_old
*ips
,
1219 static int ctdb_reload_remote_public_ips(struct ctdb_context
*ctdb
,
1220 struct ipalloc_state
*ipalloc_state
,
1221 struct ctdb_node_map_old
*nodemap
)
1226 if (ipalloc_state
->num
!= nodemap
->num
) {
1229 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1230 ipalloc_state
->num
, nodemap
->num
));
1234 for (j
=0; j
<nodemap
->num
; j
++) {
1235 if (nodemap
->nodes
[j
].flags
& NODE_FLAGS_INACTIVE
) {
1239 /* Retrieve the list of known public IPs from the node */
1240 ret
= ctdb_ctrl_get_public_ips_flags(ctdb
,
1243 ipalloc_state
->known_public_ips
,
1245 &ipalloc_state
->known_public_ips
[j
]);
1248 ("Failed to read known public IPs from node: %u\n",
1253 if (ctdb
->do_checkpublicip
) {
1254 verify_remote_ip_allocation(ctdb
,
1255 ipalloc_state
->known_public_ips
[j
],
1259 /* Retrieve the list of available public IPs from the node */
1260 ret
= ctdb_ctrl_get_public_ips_flags(ctdb
,
1263 ipalloc_state
->available_public_ips
,
1264 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE
,
1265 &ipalloc_state
->available_public_ips
[j
]);
1268 ("Failed to read available public IPs from node: %u\n",
1277 static struct public_ip_list
*
1278 create_merged_ip_list(struct ctdb_context
*ctdb
, struct ipalloc_state
*ipalloc_state
)
1281 struct public_ip_list
*ip_list
;
1282 struct ctdb_public_ip_list_old
*public_ips
;
1284 TALLOC_FREE(ctdb
->ip_tree
);
1285 ctdb
->ip_tree
= trbt_create(ctdb
, 0);
1287 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
1288 public_ips
= ipalloc_state
->known_public_ips
[i
];
1290 if (ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DELETED
) {
1294 /* there were no public ips for this node */
1295 if (public_ips
== NULL
) {
1299 for (j
=0; j
< public_ips
->num
; j
++) {
1300 struct public_ip_list
*tmp_ip
;
1302 tmp_ip
= talloc_zero(ctdb
->ip_tree
, struct public_ip_list
);
1303 CTDB_NO_MEMORY_NULL(ctdb
, tmp_ip
);
1304 /* Do not use information about IP addresses hosted
1305 * on other nodes, it may not be accurate */
1306 if (public_ips
->ips
[j
].pnn
== ctdb
->nodes
[i
]->pnn
) {
1307 tmp_ip
->pnn
= public_ips
->ips
[j
].pnn
;
1311 tmp_ip
->addr
= public_ips
->ips
[j
].addr
;
1312 tmp_ip
->next
= NULL
;
1314 trbt_insertarray32_callback(ctdb
->ip_tree
,
1315 IP_KEYLEN
, ip_key(&public_ips
->ips
[j
].addr
),
1322 trbt_traversearray32(ctdb
->ip_tree
, IP_KEYLEN
, getips_count_callback
, &ip_list
);
1327 static bool all_nodes_are_disabled(struct ctdb_node_map_old
*nodemap
)
1331 for (i
=0;i
<nodemap
->num
;i
++) {
1332 if (!(nodemap
->nodes
[i
].flags
& (NODE_FLAGS_INACTIVE
|NODE_FLAGS_DISABLED
))) {
1333 /* Found one completely healthy node */
1341 struct get_tunable_callback_data
{
1342 const char *tunable
;
1347 static void get_tunable_callback(struct ctdb_context
*ctdb
, uint32_t pnn
,
1348 int32_t res
, TDB_DATA outdata
,
1351 struct get_tunable_callback_data
*cd
=
1352 (struct get_tunable_callback_data
*)callback
;
1356 /* Already handled in fail callback */
1360 if (outdata
.dsize
!= sizeof(uint32_t)) {
1361 DEBUG(DEBUG_ERR
,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1362 cd
->tunable
, pnn
, (int)sizeof(uint32_t),
1363 (int)outdata
.dsize
));
1368 size
= talloc_array_length(cd
->out
);
1370 DEBUG(DEBUG_ERR
,("Got %s reply from node %d but nodemap only has %d entries\n",
1371 cd
->tunable
, pnn
, size
));
1376 cd
->out
[pnn
] = *(uint32_t *)outdata
.dptr
;
1379 static void get_tunable_fail_callback(struct ctdb_context
*ctdb
, uint32_t pnn
,
1380 int32_t res
, TDB_DATA outdata
,
1383 struct get_tunable_callback_data
*cd
=
1384 (struct get_tunable_callback_data
*)callback
;
1389 ("Timed out getting tunable \"%s\" from node %d\n",
1395 DEBUG(DEBUG_WARNING
,
1396 ("Tunable \"%s\" not implemented on node %d\n",
1401 ("Unexpected error getting tunable \"%s\" from node %d\n",
1407 static uint32_t *get_tunable_from_nodes(struct ctdb_context
*ctdb
,
1408 TALLOC_CTX
*tmp_ctx
,
1409 struct ctdb_node_map_old
*nodemap
,
1410 const char *tunable
,
1411 uint32_t default_value
)
1414 struct ctdb_control_get_tunable
*t
;
1417 struct get_tunable_callback_data callback_data
;
1420 tvals
= talloc_array(tmp_ctx
, uint32_t, nodemap
->num
);
1421 CTDB_NO_MEMORY_NULL(ctdb
, tvals
);
1422 for (i
=0; i
<nodemap
->num
; i
++) {
1423 tvals
[i
] = default_value
;
1426 callback_data
.out
= tvals
;
1427 callback_data
.tunable
= tunable
;
1428 callback_data
.fatal
= false;
1430 data
.dsize
= offsetof(struct ctdb_control_get_tunable
, name
) + strlen(tunable
) + 1;
1431 data
.dptr
= talloc_size(tmp_ctx
, data
.dsize
);
1432 t
= (struct ctdb_control_get_tunable
*)data
.dptr
;
1433 t
->length
= strlen(tunable
)+1;
1434 memcpy(t
->name
, tunable
, t
->length
);
1435 nodes
= list_of_connected_nodes(ctdb
, nodemap
, tmp_ctx
, true);
1436 if (ctdb_client_async_control(ctdb
, CTDB_CONTROL_GET_TUNABLE
,
1437 nodes
, 0, TAKEOVER_TIMEOUT(),
1439 get_tunable_callback
,
1440 get_tunable_fail_callback
,
1441 &callback_data
) != 0) {
1442 if (callback_data
.fatal
) {
1448 talloc_free(data
.dptr
);
1453 /* Set internal flags for IP allocation:
1455 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1456 * Set NOIPHOST ip flag for each INACTIVE node
1457 * if all nodes are disabled:
1458 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1460 * Set NOIPHOST ip flags for disabled nodes
1462 static void set_ipflags_internal(struct ipalloc_state
*ipalloc_state
,
1463 struct ctdb_node_map_old
*nodemap
,
1464 uint32_t *tval_noiptakeover
,
1465 uint32_t *tval_noiphostonalldisabled
)
1469 for (i
=0;i
<nodemap
->num
;i
++) {
1470 /* Can not take IPs on node with NoIPTakeover set */
1471 if (tval_noiptakeover
[i
] != 0) {
1472 ipalloc_state
->noiptakeover
[i
] = true;
1475 /* Can not host IPs on INACTIVE node */
1476 if (nodemap
->nodes
[i
].flags
& NODE_FLAGS_INACTIVE
) {
1477 ipalloc_state
->noiphost
[i
] = true;
1481 if (all_nodes_are_disabled(nodemap
)) {
1482 /* If all nodes are disabled, can not host IPs on node
1483 * with NoIPHostOnAllDisabled set
1485 for (i
=0;i
<nodemap
->num
;i
++) {
1486 if (tval_noiphostonalldisabled
[i
] != 0) {
1487 ipalloc_state
->noiphost
[i
] = true;
1491 /* If some nodes are not disabled, then can not host
1492 * IPs on DISABLED node
1494 for (i
=0;i
<nodemap
->num
;i
++) {
1495 if (nodemap
->nodes
[i
].flags
& NODE_FLAGS_DISABLED
) {
1496 ipalloc_state
->noiphost
[i
] = true;
1502 static bool set_ipflags(struct ctdb_context
*ctdb
,
1503 struct ipalloc_state
*ipalloc_state
,
1504 struct ctdb_node_map_old
*nodemap
)
1506 uint32_t *tval_noiptakeover
;
1507 uint32_t *tval_noiphostonalldisabled
;
1509 tval_noiptakeover
= get_tunable_from_nodes(ctdb
, ipalloc_state
, nodemap
,
1511 if (tval_noiptakeover
== NULL
) {
1515 tval_noiphostonalldisabled
=
1516 get_tunable_from_nodes(ctdb
, ipalloc_state
, nodemap
,
1517 "NoIPHostOnAllDisabled", 0);
1518 if (tval_noiphostonalldisabled
== NULL
) {
1519 /* Caller frees tmp_ctx */
1523 set_ipflags_internal(ipalloc_state
, nodemap
,
1525 tval_noiphostonalldisabled
);
1527 talloc_free(tval_noiptakeover
);
1528 talloc_free(tval_noiphostonalldisabled
);
1533 static struct ipalloc_state
* ipalloc_state_init(struct ctdb_context
*ctdb
,
1534 TALLOC_CTX
*mem_ctx
)
1536 struct ipalloc_state
*ipalloc_state
=
1537 talloc_zero(mem_ctx
, struct ipalloc_state
);
1538 if (ipalloc_state
== NULL
) {
1539 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
1543 ipalloc_state
->num
= ctdb
->num_nodes
;
1544 ipalloc_state
->known_public_ips
=
1545 talloc_zero_array(ipalloc_state
,
1546 struct ctdb_public_ip_list_old
*,
1547 ipalloc_state
->num
);
1548 if (ipalloc_state
->known_public_ips
== NULL
) {
1549 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
1550 talloc_free(ipalloc_state
);
1553 ipalloc_state
->available_public_ips
=
1554 talloc_zero_array(ipalloc_state
,
1555 struct ctdb_public_ip_list_old
*,
1556 ipalloc_state
->num
);
1557 if (ipalloc_state
->available_public_ips
== NULL
) {
1558 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
1559 talloc_free(ipalloc_state
);
1562 ipalloc_state
->noiptakeover
=
1563 talloc_zero_array(ipalloc_state
,
1565 ipalloc_state
->num
);
1566 if (ipalloc_state
->noiptakeover
== NULL
) {
1567 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
1568 talloc_free(ipalloc_state
);
1571 ipalloc_state
->noiphost
=
1572 talloc_zero_array(ipalloc_state
,
1574 ipalloc_state
->num
);
1575 if (ipalloc_state
->noiphost
== NULL
) {
1576 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
1577 talloc_free(ipalloc_state
);
1581 if (1 == ctdb
->tunable
.lcp2_public_ip_assignment
) {
1582 ipalloc_state
->algorithm
= IPALLOC_LCP2
;
1583 } else if (1 == ctdb
->tunable
.deterministic_public_ips
) {
1584 ipalloc_state
->algorithm
= IPALLOC_DETERMINISTIC
;
1586 ipalloc_state
->algorithm
= IPALLOC_NONDETERMINISTIC
;
1589 ipalloc_state
->no_ip_failback
= ctdb
->tunable
.no_ip_failback
;
1591 return ipalloc_state
;
1594 struct iprealloc_callback_data
{
1597 client_async_callback fail_callback
;
1598 void *fail_callback_data
;
1599 struct ctdb_node_map_old
*nodemap
;
1602 static void iprealloc_fail_callback(struct ctdb_context
*ctdb
, uint32_t pnn
,
1603 int32_t res
, TDB_DATA outdata
,
1607 struct iprealloc_callback_data
*cd
=
1608 (struct iprealloc_callback_data
*)callback
;
1610 numnodes
= talloc_array_length(cd
->retry_nodes
);
1611 if (pnn
> numnodes
) {
1613 ("ipreallocated failure from node %d, "
1614 "but only %d nodes in nodemap\n",
1619 /* Can't run the "ipreallocated" event on a INACTIVE node */
1620 if (cd
->nodemap
->nodes
[pnn
].flags
& NODE_FLAGS_INACTIVE
) {
1621 DEBUG(DEBUG_WARNING
,
1622 ("ipreallocated failed on inactive node %d, ignoring\n",
1629 /* If the control timed out then that's a real error,
1630 * so call the real fail callback
1632 if (cd
->fail_callback
) {
1633 cd
->fail_callback(ctdb
, pnn
, res
, outdata
,
1634 cd
->fail_callback_data
);
1636 DEBUG(DEBUG_WARNING
,
1637 ("iprealloc timed out but no callback registered\n"));
1641 /* If not a timeout then either the ipreallocated
1642 * eventscript (or some setup) failed. This might
1643 * have failed because the IPREALLOCATED control isn't
1644 * implemented - right now there is no way of knowing
1645 * because the error codes are all folded down to -1.
1646 * Consider retrying using EVENTSCRIPT control...
1648 DEBUG(DEBUG_WARNING
,
1649 ("ipreallocated failure from node %d, flagging retry\n",
1651 cd
->retry_nodes
[pnn
] = true;
1656 struct takeover_callback_data
{
1658 client_async_callback fail_callback
;
1659 void *fail_callback_data
;
1660 struct ctdb_node_map_old
*nodemap
;
1663 static void takeover_run_fail_callback(struct ctdb_context
*ctdb
,
1664 uint32_t node_pnn
, int32_t res
,
1665 TDB_DATA outdata
, void *callback_data
)
1667 struct takeover_callback_data
*cd
=
1668 talloc_get_type_abort(callback_data
,
1669 struct takeover_callback_data
);
1672 for (i
= 0; i
< cd
->nodemap
->num
; i
++) {
1673 if (node_pnn
== cd
->nodemap
->nodes
[i
].pnn
) {
1678 if (i
== cd
->nodemap
->num
) {
1679 DEBUG(DEBUG_ERR
, (__location__
" invalid PNN %u\n", node_pnn
));
1683 if (!cd
->node_failed
[i
]) {
1684 cd
->node_failed
[i
] = true;
1685 cd
->fail_callback(ctdb
, node_pnn
, res
, outdata
,
1686 cd
->fail_callback_data
);
1691 * Recalculate the allocation of public IPs to nodes and have the
1692 * nodes host their allocated addresses.
1694 * - Allocate memory for IP allocation state, including per node
1696 * - Populate IP allocation algorithm in IP allocation state
1697 * - Populate local value of tunable NoIPFailback in IP allocation
1698 state - this is really a cluster-wide configuration variable and
1699 only the value form the master node is used
1700 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1701 * connected nodes - this is done separately so tunable values can
1702 * be faked in unit testing
1703 * - Populate NoIPTakover tunable in IP allocation state
1704 * - Populate NoIPHost in IP allocation state, derived from node flags
1705 * and NoIPHostOnAllDisabled tunable
1706 * - Retrieve and populate known and available IP lists in IP
1708 * - If no available IP addresses then early exit
1709 * - Build list of (known IPs, currently assigned node)
1710 * - Populate list of nodes to force rebalance - internal structure,
1711 * currently no way to fetch, only used by LCP2 for nodes that have
1712 * had new IP addresses added
1713 * - Run IP allocation algorithm
1714 * - Send RELEASE_IP to all nodes for IPs they should not host
1715 * - Send TAKE_IP to all nodes for IPs they should host
1716 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1718 int ctdb_takeover_run(struct ctdb_context
*ctdb
, struct ctdb_node_map_old
*nodemap
,
1719 uint32_t *force_rebalance_nodes
,
1720 client_async_callback fail_callback
, void *callback_data
)
1723 struct ctdb_public_ip ip
;
1725 struct public_ip_list
*all_ips
, *tmp_ip
;
1727 struct timeval timeout
;
1728 struct client_async_data
*async_data
;
1729 struct ctdb_client_control_state
*state
;
1730 TALLOC_CTX
*tmp_ctx
= talloc_new(ctdb
);
1731 struct ipalloc_state
*ipalloc_state
;
1732 struct takeover_callback_data
*takeover_data
;
1733 struct iprealloc_callback_data iprealloc_data
;
1738 * ip failover is completely disabled, just send out the
1739 * ipreallocated event.
1741 if (ctdb
->tunable
.disable_ip_failover
!= 0) {
1745 ipalloc_state
= ipalloc_state_init(ctdb
, tmp_ctx
);
1746 if (ipalloc_state
== NULL
) {
1747 talloc_free(tmp_ctx
);
1751 if (!set_ipflags(ctdb
, ipalloc_state
, nodemap
)) {
1752 DEBUG(DEBUG_ERR
,("Failed to set IP flags - aborting takeover run\n"));
1753 talloc_free(tmp_ctx
);
1757 /* Fetch known/available public IPs from each active node */
1758 ret
= ctdb_reload_remote_public_ips(ctdb
, ipalloc_state
, nodemap
);
1760 talloc_free(tmp_ctx
);
1764 /* Short-circuit IP allocation if no node has available IPs */
1765 can_host_ips
= false;
1766 for (i
=0; i
< ipalloc_state
->num
; i
++) {
1767 if (ipalloc_state
->available_public_ips
[i
] != NULL
) {
1768 can_host_ips
= true;
1771 if (!can_host_ips
) {
1772 DEBUG(DEBUG_WARNING
,("No nodes available to host public IPs yet\n"));
1776 /* since nodes only know about those public addresses that
1777 can be served by that particular node, no single node has
1778 a full list of all public addresses that exist in the cluster.
1779 Walk over all node structures and create a merged list of
1780 all public addresses that exist in the cluster.
1782 keep the tree of ips around as ctdb->ip_tree
1784 all_ips
= create_merged_ip_list(ctdb
, ipalloc_state
);
1785 ipalloc_state
->all_ips
= all_ips
;
1787 ipalloc_state
->force_rebalance_nodes
= force_rebalance_nodes
;
1789 /* Do the IP reassignment calculations */
1790 ipalloc(ipalloc_state
);
1792 /* Now tell all nodes to release any public IPs should not
1793 * host. This will be a NOOP on nodes that don't currently
1794 * hold the given IP.
1796 takeover_data
= talloc_zero(tmp_ctx
, struct takeover_callback_data
);
1797 CTDB_NO_MEMORY_FATAL(ctdb
, takeover_data
);
1799 takeover_data
->node_failed
= talloc_zero_array(tmp_ctx
,
1800 bool, nodemap
->num
);
1801 CTDB_NO_MEMORY_FATAL(ctdb
, takeover_data
->node_failed
);
1802 takeover_data
->fail_callback
= fail_callback
;
1803 takeover_data
->fail_callback_data
= callback_data
;
1804 takeover_data
->nodemap
= nodemap
;
1806 async_data
= talloc_zero(tmp_ctx
, struct client_async_data
);
1807 CTDB_NO_MEMORY_FATAL(ctdb
, async_data
);
1809 async_data
->fail_callback
= takeover_run_fail_callback
;
1810 async_data
->callback_data
= takeover_data
;
1812 ZERO_STRUCT(ip
); /* Avoid valgrind warnings for union */
1814 /* Send a RELEASE_IP to all nodes that should not be hosting
1815 * each IP. For each IP, all but one of these will be
1816 * redundant. However, the redundant ones are used to tell
1817 * nodes which node should be hosting the IP so that commands
1818 * like "ctdb ip" can display a particular nodes idea of who
1819 * is hosting what. */
1820 for (i
=0;i
<nodemap
->num
;i
++) {
1821 /* don't talk to unconnected nodes, but do talk to banned nodes */
1822 if (nodemap
->nodes
[i
].flags
& NODE_FLAGS_DISCONNECTED
) {
1826 for (tmp_ip
=all_ips
;tmp_ip
;tmp_ip
=tmp_ip
->next
) {
1827 if (tmp_ip
->pnn
== nodemap
->nodes
[i
].pnn
) {
1828 /* This node should be serving this
1829 vnn so don't tell it to release the ip
1833 ip
.pnn
= tmp_ip
->pnn
;
1834 ip
.addr
= tmp_ip
->addr
;
1836 timeout
= TAKEOVER_TIMEOUT();
1837 data
.dsize
= sizeof(ip
);
1838 data
.dptr
= (uint8_t *)&ip
;
1839 state
= ctdb_control_send(ctdb
, nodemap
->nodes
[i
].pnn
,
1840 0, CTDB_CONTROL_RELEASE_IP
, 0,
1843 if (state
== NULL
) {
1844 DEBUG(DEBUG_ERR
,(__location__
" Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap
->nodes
[i
].pnn
));
1845 talloc_free(tmp_ctx
);
1849 ctdb_client_async_add(async_data
, state
);
1852 if (ctdb_client_async_wait(ctdb
, async_data
) != 0) {
1853 DEBUG(DEBUG_ERR
,(__location__
" Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1854 talloc_free(tmp_ctx
);
1857 talloc_free(async_data
);
1860 /* For each IP, send a TAKOVER_IP to the node that should be
1861 * hosting it. Many of these will often be redundant (since
1862 * the allocation won't have changed) but they can be useful
1863 * to recover from inconsistencies. */
1864 async_data
= talloc_zero(tmp_ctx
, struct client_async_data
);
1865 CTDB_NO_MEMORY_FATAL(ctdb
, async_data
);
1867 async_data
->fail_callback
= fail_callback
;
1868 async_data
->callback_data
= callback_data
;
1870 for (tmp_ip
=all_ips
;tmp_ip
;tmp_ip
=tmp_ip
->next
) {
1871 if (tmp_ip
->pnn
== -1) {
1872 /* this IP won't be taken over */
1876 ip
.pnn
= tmp_ip
->pnn
;
1877 ip
.addr
= tmp_ip
->addr
;
1879 timeout
= TAKEOVER_TIMEOUT();
1880 data
.dsize
= sizeof(ip
);
1881 data
.dptr
= (uint8_t *)&ip
;
1882 state
= ctdb_control_send(ctdb
, tmp_ip
->pnn
,
1883 0, CTDB_CONTROL_TAKEOVER_IP
, 0,
1884 data
, async_data
, &timeout
, NULL
);
1885 if (state
== NULL
) {
1886 DEBUG(DEBUG_ERR
,(__location__
" Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip
->pnn
));
1887 talloc_free(tmp_ctx
);
1891 ctdb_client_async_add(async_data
, state
);
1893 if (ctdb_client_async_wait(ctdb
, async_data
) != 0) {
1894 DEBUG(DEBUG_ERR
,(__location__
" Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1895 talloc_free(tmp_ctx
);
1901 * Tell all nodes to run eventscripts to process the
1902 * "ipreallocated" event. This can do a lot of things,
1903 * including restarting services to reconfigure them if public
1904 * IPs have moved. Once upon a time this event only used to
1907 retry_data
= talloc_zero_array(tmp_ctx
, bool, nodemap
->num
);
1908 CTDB_NO_MEMORY_FATAL(ctdb
, retry_data
);
1909 iprealloc_data
.retry_nodes
= retry_data
;
1910 iprealloc_data
.retry_count
= 0;
1911 iprealloc_data
.fail_callback
= fail_callback
;
1912 iprealloc_data
.fail_callback_data
= callback_data
;
1913 iprealloc_data
.nodemap
= nodemap
;
1915 nodes
= list_of_connected_nodes(ctdb
, nodemap
, tmp_ctx
, true);
1916 ret
= ctdb_client_async_control(ctdb
, CTDB_CONTROL_IPREALLOCATED
,
1917 nodes
, 0, TAKEOVER_TIMEOUT(),
1919 NULL
, iprealloc_fail_callback
,
1922 /* If the control failed then we should retry to any
1923 * nodes flagged by iprealloc_fail_callback using the
1924 * EVENTSCRIPT control. This is a best-effort at
1925 * backward compatiblity when running a mixed cluster
1926 * where some nodes have not yet been upgraded to
1927 * support the IPREALLOCATED control.
1929 DEBUG(DEBUG_WARNING
,
1930 ("Retry ipreallocated to some nodes using eventscript control\n"));
1932 nodes
= talloc_array(tmp_ctx
, uint32_t,
1933 iprealloc_data
.retry_count
);
1934 CTDB_NO_MEMORY_FATAL(ctdb
, nodes
);
1937 for (i
=0; i
<nodemap
->num
; i
++) {
1938 if (iprealloc_data
.retry_nodes
[i
]) {
1944 data
.dptr
= discard_const("ipreallocated");
1945 data
.dsize
= strlen((char *)data
.dptr
) + 1;
1946 ret
= ctdb_client_async_control(ctdb
,
1947 CTDB_CONTROL_RUN_EVENTSCRIPTS
,
1948 nodes
, 0, TAKEOVER_TIMEOUT(),
1950 NULL
, fail_callback
,
1953 DEBUG(DEBUG_ERR
, (__location__
" failed to send control to run eventscripts with \"ipreallocated\"\n"));
1957 talloc_free(tmp_ctx
);
1963 destroy a ctdb_client_ip structure
1965 static int ctdb_client_ip_destructor(struct ctdb_client_ip
*ip
)
1967 DEBUG(DEBUG_DEBUG
,("destroying client tcp for %s:%u (client_id %u)\n",
1968 ctdb_addr_to_str(&ip
->addr
),
1969 ntohs(ip
->addr
.ip
.sin_port
),
1972 DLIST_REMOVE(ip
->ctdb
->client_ip_list
, ip
);
1977 called by a client to inform us of a TCP connection that it is managing
1978 that should tickled with an ACK when IP takeover is done
1980 int32_t ctdb_control_tcp_client(struct ctdb_context
*ctdb
, uint32_t client_id
,
1983 struct ctdb_client
*client
= reqid_find(ctdb
->idr
, client_id
, struct ctdb_client
);
1984 struct ctdb_connection
*tcp_sock
= NULL
;
1985 struct ctdb_tcp_list
*tcp
;
1986 struct ctdb_connection t
;
1989 struct ctdb_client_ip
*ip
;
1990 struct ctdb_vnn
*vnn
;
1991 ctdb_sock_addr addr
;
1993 /* If we don't have public IPs, tickles are useless */
1994 if (ctdb
->vnn
== NULL
) {
1998 tcp_sock
= (struct ctdb_connection
*)indata
.dptr
;
2000 addr
= tcp_sock
->src
;
2001 ctdb_canonicalize_ip(&addr
, &tcp_sock
->src
);
2002 addr
= tcp_sock
->dst
;
2003 ctdb_canonicalize_ip(&addr
, &tcp_sock
->dst
);
2006 memcpy(&addr
, &tcp_sock
->dst
, sizeof(addr
));
2007 vnn
= find_public_ip_vnn(ctdb
, &addr
);
2009 switch (addr
.sa
.sa_family
) {
2011 if (ntohl(addr
.ip
.sin_addr
.s_addr
) != INADDR_LOOPBACK
) {
2012 DEBUG(DEBUG_ERR
,("Could not add client IP %s. This is not a public address.\n",
2013 ctdb_addr_to_str(&addr
)));
2017 DEBUG(DEBUG_ERR
,("Could not add client IP %s. This is not a public ipv6 address.\n",
2018 ctdb_addr_to_str(&addr
)));
2021 DEBUG(DEBUG_ERR
,(__location__
" Unknown family type %d\n", addr
.sa
.sa_family
));
2027 if (vnn
->pnn
!= ctdb
->pnn
) {
2028 DEBUG(DEBUG_ERR
,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2029 ctdb_addr_to_str(&addr
),
2030 client_id
, client
->pid
));
2031 /* failing this call will tell smbd to die */
2035 ip
= talloc(client
, struct ctdb_client_ip
);
2036 CTDB_NO_MEMORY(ctdb
, ip
);
2040 ip
->client_id
= client_id
;
2041 talloc_set_destructor(ip
, ctdb_client_ip_destructor
);
2042 DLIST_ADD(ctdb
->client_ip_list
, ip
);
2044 tcp
= talloc(client
, struct ctdb_tcp_list
);
2045 CTDB_NO_MEMORY(ctdb
, tcp
);
2047 tcp
->connection
.src
= tcp_sock
->src
;
2048 tcp
->connection
.dst
= tcp_sock
->dst
;
2050 DLIST_ADD(client
->tcp_list
, tcp
);
2052 t
.src
= tcp_sock
->src
;
2053 t
.dst
= tcp_sock
->dst
;
2055 data
.dptr
= (uint8_t *)&t
;
2056 data
.dsize
= sizeof(t
);
2058 switch (addr
.sa
.sa_family
) {
2060 DEBUG(DEBUG_INFO
,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2061 (unsigned)ntohs(tcp_sock
->dst
.ip
.sin_port
),
2062 ctdb_addr_to_str(&tcp_sock
->src
),
2063 (unsigned)ntohs(tcp_sock
->src
.ip
.sin_port
), client_id
, client
->pid
));
2066 DEBUG(DEBUG_INFO
,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2067 (unsigned)ntohs(tcp_sock
->dst
.ip6
.sin6_port
),
2068 ctdb_addr_to_str(&tcp_sock
->src
),
2069 (unsigned)ntohs(tcp_sock
->src
.ip6
.sin6_port
), client_id
, client
->pid
));
2072 DEBUG(DEBUG_ERR
,(__location__
" Unknown family %d\n", addr
.sa
.sa_family
));
2076 /* tell all nodes about this tcp connection */
2077 ret
= ctdb_daemon_send_control(ctdb
, CTDB_BROADCAST_CONNECTED
, 0,
2078 CTDB_CONTROL_TCP_ADD
,
2079 0, CTDB_CTRL_FLAG_NOREPLY
, data
, NULL
, NULL
);
2081 DEBUG(DEBUG_ERR
,(__location__
" Failed to send CTDB_CONTROL_TCP_ADD\n"));
2089 find a tcp address on a list
2091 static struct ctdb_connection
*ctdb_tcp_find(struct ctdb_tcp_array
*array
,
2092 struct ctdb_connection
*tcp
)
2096 if (array
== NULL
) {
2100 for (i
=0;i
<array
->num
;i
++) {
2101 if (ctdb_same_sockaddr(&array
->connections
[i
].src
, &tcp
->src
) &&
2102 ctdb_same_sockaddr(&array
->connections
[i
].dst
, &tcp
->dst
)) {
2103 return &array
->connections
[i
];
2112 called by a daemon to inform us of a TCP connection that one of its
2113 clients managing that should tickled with an ACK when IP takeover is
2116 int32_t ctdb_control_tcp_add(struct ctdb_context
*ctdb
, TDB_DATA indata
, bool tcp_update_needed
)
2118 struct ctdb_connection
*p
= (struct ctdb_connection
*)indata
.dptr
;
2119 struct ctdb_tcp_array
*tcparray
;
2120 struct ctdb_connection tcp
;
2121 struct ctdb_vnn
*vnn
;
2123 /* If we don't have public IPs, tickles are useless */
2124 if (ctdb
->vnn
== NULL
) {
2128 vnn
= find_public_ip_vnn(ctdb
, &p
->dst
);
2130 DEBUG(DEBUG_INFO
,(__location__
" got TCP_ADD control for an address which is not a public address '%s'\n",
2131 ctdb_addr_to_str(&p
->dst
)));
2137 tcparray
= vnn
->tcp_array
;
2139 /* If this is the first tickle */
2140 if (tcparray
== NULL
) {
2141 tcparray
= talloc(vnn
, struct ctdb_tcp_array
);
2142 CTDB_NO_MEMORY(ctdb
, tcparray
);
2143 vnn
->tcp_array
= tcparray
;
2146 tcparray
->connections
= talloc_size(tcparray
, sizeof(struct ctdb_connection
));
2147 CTDB_NO_MEMORY(ctdb
, tcparray
->connections
);
2149 tcparray
->connections
[tcparray
->num
].src
= p
->src
;
2150 tcparray
->connections
[tcparray
->num
].dst
= p
->dst
;
2153 if (tcp_update_needed
) {
2154 vnn
->tcp_update_needed
= true;
2160 /* Do we already have this tickle ?*/
2163 if (ctdb_tcp_find(tcparray
, &tcp
) != NULL
) {
2164 DEBUG(DEBUG_DEBUG
,("Already had tickle info for %s:%u for vnn:%u\n",
2165 ctdb_addr_to_str(&tcp
.dst
),
2166 ntohs(tcp
.dst
.ip
.sin_port
),
2171 /* A new tickle, we must add it to the array */
2172 tcparray
->connections
= talloc_realloc(tcparray
, tcparray
->connections
,
2173 struct ctdb_connection
,
2175 CTDB_NO_MEMORY(ctdb
, tcparray
->connections
);
2177 tcparray
->connections
[tcparray
->num
].src
= p
->src
;
2178 tcparray
->connections
[tcparray
->num
].dst
= p
->dst
;
2181 DEBUG(DEBUG_INFO
,("Added tickle info for %s:%u from vnn %u\n",
2182 ctdb_addr_to_str(&tcp
.dst
),
2183 ntohs(tcp
.dst
.ip
.sin_port
),
2186 if (tcp_update_needed
) {
2187 vnn
->tcp_update_needed
= true;
2194 static void ctdb_remove_connection(struct ctdb_vnn
*vnn
, struct ctdb_connection
*conn
)
2196 struct ctdb_connection
*tcpp
;
2202 /* if the array is empty we cant remove it
2203 and we don't need to do anything
2205 if (vnn
->tcp_array
== NULL
) {
2206 DEBUG(DEBUG_INFO
,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2207 ctdb_addr_to_str(&conn
->dst
),
2208 ntohs(conn
->dst
.ip
.sin_port
)));
2213 /* See if we know this connection
2214 if we don't know this connection then we dont need to do anything
2216 tcpp
= ctdb_tcp_find(vnn
->tcp_array
, conn
);
2218 DEBUG(DEBUG_INFO
,("Trying to remove tickle that doesnt exist %s:%u\n",
2219 ctdb_addr_to_str(&conn
->dst
),
2220 ntohs(conn
->dst
.ip
.sin_port
)));
2225 /* We need to remove this entry from the array.
2226 Instead of allocating a new array and copying data to it
2227 we cheat and just copy the last entry in the existing array
2228 to the entry that is to be removed and just shring the
2231 *tcpp
= vnn
->tcp_array
->connections
[vnn
->tcp_array
->num
- 1];
2232 vnn
->tcp_array
->num
--;
2234 /* If we deleted the last entry we also need to remove the entire array
2236 if (vnn
->tcp_array
->num
== 0) {
2237 talloc_free(vnn
->tcp_array
);
2238 vnn
->tcp_array
= NULL
;
2241 vnn
->tcp_update_needed
= true;
2243 DEBUG(DEBUG_INFO
,("Removed tickle info for %s:%u\n",
2244 ctdb_addr_to_str(&conn
->src
),
2245 ntohs(conn
->src
.ip
.sin_port
)));
2250 called by a daemon to inform us of a TCP connection that one of its
2251 clients used are no longer needed in the tickle database
2253 int32_t ctdb_control_tcp_remove(struct ctdb_context
*ctdb
, TDB_DATA indata
)
2255 struct ctdb_vnn
*vnn
;
2256 struct ctdb_connection
*conn
= (struct ctdb_connection
*)indata
.dptr
;
2258 /* If we don't have public IPs, tickles are useless */
2259 if (ctdb
->vnn
== NULL
) {
2263 vnn
= find_public_ip_vnn(ctdb
, &conn
->dst
);
2266 (__location__
" unable to find public address %s\n",
2267 ctdb_addr_to_str(&conn
->dst
)));
2271 ctdb_remove_connection(vnn
, conn
);
2278 Called when another daemon starts - causes all tickles for all
2279 public addresses we are serving to be sent to the new node on the
2280 next check. This actually causes the next scheduled call to
2281 tdb_update_tcp_tickles() to update all nodes. This is simple and
2282 doesn't require careful error handling.
2284 int32_t ctdb_control_startup(struct ctdb_context
*ctdb
, uint32_t pnn
)
2286 struct ctdb_vnn
*vnn
;
2288 DEBUG(DEBUG_INFO
, ("Received startup control from node %lu\n",
2289 (unsigned long) pnn
));
2291 for (vnn
= ctdb
->vnn
; vnn
!= NULL
; vnn
= vnn
->next
) {
2292 vnn
->tcp_update_needed
= true;
2300 called when a client structure goes away - hook to remove
2301 elements from the tcp_list in all daemons
2303 void ctdb_takeover_client_destructor_hook(struct ctdb_client
*client
)
2305 while (client
->tcp_list
) {
2306 struct ctdb_vnn
*vnn
;
2307 struct ctdb_tcp_list
*tcp
= client
->tcp_list
;
2308 struct ctdb_connection
*conn
= &tcp
->connection
;
2310 DLIST_REMOVE(client
->tcp_list
, tcp
);
2312 vnn
= find_public_ip_vnn(client
->ctdb
,
2316 (__location__
" unable to find public address %s\n",
2317 ctdb_addr_to_str(&conn
->dst
)));
2321 /* If the IP address is hosted on this node then
2322 * remove the connection. */
2323 if (vnn
->pnn
== client
->ctdb
->pnn
) {
2324 ctdb_remove_connection(vnn
, conn
);
2327 /* Otherwise this function has been called because the
2328 * server IP address has been released to another node
2329 * and the client has exited. This means that we
2330 * should not delete the connection information. The
2331 * takeover node processes connections too. */
2336 void ctdb_release_all_ips(struct ctdb_context
*ctdb
)
2338 struct ctdb_vnn
*vnn
;
2341 if (ctdb
->tunable
.disable_ip_failover
== 1) {
2345 for (vnn
=ctdb
->vnn
;vnn
;vnn
=vnn
->next
) {
2346 if (!ctdb_sys_have_ip(&vnn
->public_address
)) {
2347 ctdb_vnn_unassign_iface(ctdb
, vnn
);
2354 /* Don't allow multiple releases at once. Some code,
2355 * particularly ctdb_tickle_sentenced_connections() is
2357 if (vnn
->update_in_flight
) {
2358 DEBUG(DEBUG_WARNING
,
2360 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2361 ctdb_addr_to_str(&vnn
->public_address
),
2362 vnn
->public_netmask_bits
,
2363 ctdb_vnn_iface_string(vnn
)));
2366 vnn
->update_in_flight
= true;
2368 DEBUG(DEBUG_INFO
,("Release of IP %s/%u on interface %s node:-1\n",
2369 ctdb_addr_to_str(&vnn
->public_address
),
2370 vnn
->public_netmask_bits
,
2371 ctdb_vnn_iface_string(vnn
)));
2373 ctdb_event_script_args(ctdb
, CTDB_EVENT_RELEASE_IP
, "%s %s %u",
2374 ctdb_vnn_iface_string(vnn
),
2375 ctdb_addr_to_str(&vnn
->public_address
),
2376 vnn
->public_netmask_bits
);
2377 ctdb_vnn_unassign_iface(ctdb
, vnn
);
2378 vnn
->update_in_flight
= false;
2382 DEBUG(DEBUG_NOTICE
,(__location__
" Released %d public IPs\n", count
));
2387 get list of public IPs
2389 int32_t ctdb_control_get_public_ips(struct ctdb_context
*ctdb
,
2390 struct ctdb_req_control_old
*c
, TDB_DATA
*outdata
)
2393 struct ctdb_public_ip_list_old
*ips
;
2394 struct ctdb_vnn
*vnn
;
2395 bool only_available
= false;
2397 if (c
->flags
& CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE
) {
2398 only_available
= true;
2401 /* count how many public ip structures we have */
2403 for (vnn
=ctdb
->vnn
;vnn
;vnn
=vnn
->next
) {
2407 len
= offsetof(struct ctdb_public_ip_list_old
, ips
) +
2408 num
*sizeof(struct ctdb_public_ip
);
2409 ips
= talloc_zero_size(outdata
, len
);
2410 CTDB_NO_MEMORY(ctdb
, ips
);
2413 for (vnn
=ctdb
->vnn
;vnn
;vnn
=vnn
->next
) {
2414 if (only_available
&& !ctdb_vnn_available(ctdb
, vnn
)) {
2417 ips
->ips
[i
].pnn
= vnn
->pnn
;
2418 ips
->ips
[i
].addr
= vnn
->public_address
;
2422 len
= offsetof(struct ctdb_public_ip_list_old
, ips
) +
2423 i
*sizeof(struct ctdb_public_ip
);
2425 outdata
->dsize
= len
;
2426 outdata
->dptr
= (uint8_t *)ips
;
2432 int32_t ctdb_control_get_public_ip_info(struct ctdb_context
*ctdb
,
2433 struct ctdb_req_control_old
*c
,
2438 ctdb_sock_addr
*addr
;
2439 struct ctdb_public_ip_info_old
*info
;
2440 struct ctdb_vnn
*vnn
;
2442 addr
= (ctdb_sock_addr
*)indata
.dptr
;
2444 vnn
= find_public_ip_vnn(ctdb
, addr
);
2446 /* if it is not a public ip it could be our 'single ip' */
2447 if (ctdb
->single_ip_vnn
) {
2448 if (ctdb_same_ip(&ctdb
->single_ip_vnn
->public_address
, addr
)) {
2449 vnn
= ctdb
->single_ip_vnn
;
2454 DEBUG(DEBUG_ERR
,(__location__
" Could not get public ip info, "
2455 "'%s'not a public address\n",
2456 ctdb_addr_to_str(addr
)));
2460 /* count how many public ip structures we have */
2462 for (;vnn
->ifaces
[num
];) {
2466 len
= offsetof(struct ctdb_public_ip_info_old
, ifaces
) +
2467 num
*sizeof(struct ctdb_iface
);
2468 info
= talloc_zero_size(outdata
, len
);
2469 CTDB_NO_MEMORY(ctdb
, info
);
2471 info
->ip
.addr
= vnn
->public_address
;
2472 info
->ip
.pnn
= vnn
->pnn
;
2473 info
->active_idx
= 0xFFFFFFFF;
2475 for (i
=0; vnn
->ifaces
[i
]; i
++) {
2476 struct ctdb_interface
*cur
;
2478 cur
= ctdb_find_iface(ctdb
, vnn
->ifaces
[i
]);
2480 DEBUG(DEBUG_CRIT
, (__location__
" internal error iface[%s] unknown\n",
2484 if (vnn
->iface
== cur
) {
2485 info
->active_idx
= i
;
2487 strncpy(info
->ifaces
[i
].name
, cur
->name
, sizeof(info
->ifaces
[i
].name
)-1);
2488 info
->ifaces
[i
].link_state
= cur
->link_up
;
2489 info
->ifaces
[i
].references
= cur
->references
;
2492 len
= offsetof(struct ctdb_public_ip_info_old
, ifaces
) +
2493 i
*sizeof(struct ctdb_iface
);
2495 outdata
->dsize
= len
;
2496 outdata
->dptr
= (uint8_t *)info
;
2501 int32_t ctdb_control_get_ifaces(struct ctdb_context
*ctdb
,
2502 struct ctdb_req_control_old
*c
,
2506 struct ctdb_iface_list_old
*ifaces
;
2507 struct ctdb_interface
*cur
;
2509 /* count how many public ip structures we have */
2511 for (cur
=ctdb
->ifaces
;cur
;cur
=cur
->next
) {
2515 len
= offsetof(struct ctdb_iface_list_old
, ifaces
) +
2516 num
*sizeof(struct ctdb_iface
);
2517 ifaces
= talloc_zero_size(outdata
, len
);
2518 CTDB_NO_MEMORY(ctdb
, ifaces
);
2521 for (cur
=ctdb
->ifaces
;cur
;cur
=cur
->next
) {
2522 strcpy(ifaces
->ifaces
[i
].name
, cur
->name
);
2523 ifaces
->ifaces
[i
].link_state
= cur
->link_up
;
2524 ifaces
->ifaces
[i
].references
= cur
->references
;
2528 len
= offsetof(struct ctdb_iface_list_old
, ifaces
) +
2529 i
*sizeof(struct ctdb_iface
);
2531 outdata
->dsize
= len
;
2532 outdata
->dptr
= (uint8_t *)ifaces
;
2537 int32_t ctdb_control_set_iface_link(struct ctdb_context
*ctdb
,
2538 struct ctdb_req_control_old
*c
,
2541 struct ctdb_iface
*info
;
2542 struct ctdb_interface
*iface
;
2543 bool link_up
= false;
2545 info
= (struct ctdb_iface
*)indata
.dptr
;
2547 if (info
->name
[CTDB_IFACE_SIZE
] != '\0') {
2548 int len
= strnlen(info
->name
, CTDB_IFACE_SIZE
);
2549 DEBUG(DEBUG_ERR
, (__location__
" name[%*.*s] not terminated\n",
2550 len
, len
, info
->name
));
2554 switch (info
->link_state
) {
2562 DEBUG(DEBUG_ERR
, (__location__
" link_state[%u] invalid\n",
2563 (unsigned int)info
->link_state
));
2567 if (info
->references
!= 0) {
2568 DEBUG(DEBUG_ERR
, (__location__
" references[%u] should be 0\n",
2569 (unsigned int)info
->references
));
2573 iface
= ctdb_find_iface(ctdb
, info
->name
);
2574 if (iface
== NULL
) {
2578 if (link_up
== iface
->link_up
) {
2582 DEBUG(iface
->link_up
?DEBUG_ERR
:DEBUG_NOTICE
,
2583 ("iface[%s] has changed it's link status %s => %s\n",
2585 iface
->link_up
?"up":"down",
2586 link_up
?"up":"down"));
2588 iface
->link_up
= link_up
;
2594 structure containing the listening socket and the list of tcp connections
2595 that the ctdb daemon is to kill
2597 struct ctdb_kill_tcp
{
2598 struct ctdb_vnn
*vnn
;
2599 struct ctdb_context
*ctdb
;
2601 struct tevent_fd
*fde
;
2602 trbt_tree_t
*connections
;
2607 a tcp connection that is to be killed
2609 struct ctdb_killtcp_con
{
2610 ctdb_sock_addr src_addr
;
2611 ctdb_sock_addr dst_addr
;
2613 struct ctdb_kill_tcp
*killtcp
;
2616 /* this function is used to create a key to represent this socketpair
2617 in the killtcp tree.
2618 this key is used to insert and lookup matching socketpairs that are
2619 to be tickled and RST
2621 #define KILLTCP_KEYLEN 10
2622 static uint32_t *killtcp_key(ctdb_sock_addr
*src
, ctdb_sock_addr
*dst
)
2624 static uint32_t key
[KILLTCP_KEYLEN
];
2626 bzero(key
, sizeof(key
));
2628 if (src
->sa
.sa_family
!= dst
->sa
.sa_family
) {
2629 DEBUG(DEBUG_ERR
, (__location__
" ERROR, different families passed :%u vs %u\n", src
->sa
.sa_family
, dst
->sa
.sa_family
));
2633 switch (src
->sa
.sa_family
) {
2635 key
[0] = dst
->ip
.sin_addr
.s_addr
;
2636 key
[1] = src
->ip
.sin_addr
.s_addr
;
2637 key
[2] = dst
->ip
.sin_port
;
2638 key
[3] = src
->ip
.sin_port
;
2641 uint32_t *dst6_addr32
=
2642 (uint32_t *)&(dst
->ip6
.sin6_addr
.s6_addr
);
2643 uint32_t *src6_addr32
=
2644 (uint32_t *)&(src
->ip6
.sin6_addr
.s6_addr
);
2645 key
[0] = dst6_addr32
[3];
2646 key
[1] = src6_addr32
[3];
2647 key
[2] = dst6_addr32
[2];
2648 key
[3] = src6_addr32
[2];
2649 key
[4] = dst6_addr32
[1];
2650 key
[5] = src6_addr32
[1];
2651 key
[6] = dst6_addr32
[0];
2652 key
[7] = src6_addr32
[0];
2653 key
[8] = dst
->ip6
.sin6_port
;
2654 key
[9] = src
->ip6
.sin6_port
;
2658 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family passed :%u\n", src
->sa
.sa_family
));
2666 called when we get a read event on the raw socket
2668 static void capture_tcp_handler(struct tevent_context
*ev
,
2669 struct tevent_fd
*fde
,
2670 uint16_t flags
, void *private_data
)
2672 struct ctdb_kill_tcp
*killtcp
= talloc_get_type(private_data
, struct ctdb_kill_tcp
);
2673 struct ctdb_killtcp_con
*con
;
2674 ctdb_sock_addr src
, dst
;
2675 uint32_t ack_seq
, seq
;
2677 if (!(flags
& TEVENT_FD_READ
)) {
2681 if (ctdb_sys_read_tcp_packet(killtcp
->capture_fd
,
2682 killtcp
->private_data
,
2684 &ack_seq
, &seq
) != 0) {
2685 /* probably a non-tcp ACK packet */
2689 /* check if we have this guy in our list of connections
2692 con
= trbt_lookuparray32(killtcp
->connections
,
2693 KILLTCP_KEYLEN
, killtcp_key(&src
, &dst
));
2695 /* no this was some other packet we can just ignore */
2699 /* This one has been tickled !
2700 now reset him and remove him from the list.
2702 DEBUG(DEBUG_INFO
, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2703 ntohs(con
->dst_addr
.ip
.sin_port
),
2704 ctdb_addr_to_str(&con
->src_addr
),
2705 ntohs(con
->src_addr
.ip
.sin_port
)));
2707 ctdb_sys_send_tcp(&con
->dst_addr
, &con
->src_addr
, ack_seq
, seq
, 1);
2712 /* when traversing the list of all tcp connections to send tickle acks to
2713 (so that we can capture the ack coming back and kill the connection
2715 this callback is called for each connection we are currently trying to kill
2717 static int tickle_connection_traverse(void *param
, void *data
)
2719 struct ctdb_killtcp_con
*con
= talloc_get_type(data
, struct ctdb_killtcp_con
);
2721 /* have tried too many times, just give up */
2722 if (con
->count
>= 5) {
2723 /* can't delete in traverse: reparent to delete_cons */
2724 talloc_steal(param
, con
);
2728 /* othervise, try tickling it again */
2731 (ctdb_sock_addr
*)&con
->dst_addr
,
2732 (ctdb_sock_addr
*)&con
->src_addr
,
2739 called every second until all sentenced connections have been reset
2741 static void ctdb_tickle_sentenced_connections(struct tevent_context
*ev
,
2742 struct tevent_timer
*te
,
2743 struct timeval t
, void *private_data
)
2745 struct ctdb_kill_tcp
*killtcp
= talloc_get_type(private_data
, struct ctdb_kill_tcp
);
2746 void *delete_cons
= talloc_new(NULL
);
2748 /* loop over all connections sending tickle ACKs */
2749 trbt_traversearray32(killtcp
->connections
, KILLTCP_KEYLEN
, tickle_connection_traverse
, delete_cons
);
2751 /* now we've finished traverse, it's safe to do deletion. */
2752 talloc_free(delete_cons
);
2754 /* If there are no more connections to kill we can remove the
2755 entire killtcp structure
2757 if ( (killtcp
->connections
== NULL
) ||
2758 (killtcp
->connections
->root
== NULL
) ) {
2759 talloc_free(killtcp
);
2763 /* try tickling them again in a seconds time
2765 tevent_add_timer(killtcp
->ctdb
->ev
, killtcp
,
2766 timeval_current_ofs(1, 0),
2767 ctdb_tickle_sentenced_connections
, killtcp
);
2771 destroy the killtcp structure
2773 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp
*killtcp
)
2775 struct ctdb_vnn
*tmpvnn
;
2777 /* verify that this vnn is still active */
2778 for (tmpvnn
= killtcp
->ctdb
->vnn
; tmpvnn
; tmpvnn
= tmpvnn
->next
) {
2779 if (tmpvnn
== killtcp
->vnn
) {
2784 if (tmpvnn
== NULL
) {
2788 if (killtcp
->vnn
->killtcp
!= killtcp
) {
2792 killtcp
->vnn
->killtcp
= NULL
;
2798 /* nothing fancy here, just unconditionally replace any existing
2799 connection structure with the new one.
2801 don't even free the old one if it did exist, that one is talloc_stolen
2802 by the same node in the tree anyway and will be deleted when the new data
2805 static void *add_killtcp_callback(void *parm
, void *data
)
2811 add a tcp socket to the list of connections we want to RST
2813 static int ctdb_killtcp_add_connection(struct ctdb_context
*ctdb
,
2817 ctdb_sock_addr src
, dst
;
2818 struct ctdb_kill_tcp
*killtcp
;
2819 struct ctdb_killtcp_con
*con
;
2820 struct ctdb_vnn
*vnn
;
2822 ctdb_canonicalize_ip(s
, &src
);
2823 ctdb_canonicalize_ip(d
, &dst
);
2825 vnn
= find_public_ip_vnn(ctdb
, &dst
);
2827 vnn
= find_public_ip_vnn(ctdb
, &src
);
2830 /* if it is not a public ip it could be our 'single ip' */
2831 if (ctdb
->single_ip_vnn
) {
2832 if (ctdb_same_ip(&ctdb
->single_ip_vnn
->public_address
, &dst
)) {
2833 vnn
= ctdb
->single_ip_vnn
;
2838 DEBUG(DEBUG_ERR
,(__location__
" Could not killtcp, not a public address\n"));
2842 killtcp
= vnn
->killtcp
;
2844 /* If this is the first connection to kill we must allocate
2847 if (killtcp
== NULL
) {
2848 killtcp
= talloc_zero(vnn
, struct ctdb_kill_tcp
);
2849 CTDB_NO_MEMORY(ctdb
, killtcp
);
2852 killtcp
->ctdb
= ctdb
;
2853 killtcp
->capture_fd
= -1;
2854 killtcp
->connections
= trbt_create(killtcp
, 0);
2856 vnn
->killtcp
= killtcp
;
2857 talloc_set_destructor(killtcp
, ctdb_killtcp_destructor
);
2862 /* create a structure that describes this connection we want to
2863 RST and store it in killtcp->connections
2865 con
= talloc(killtcp
, struct ctdb_killtcp_con
);
2866 CTDB_NO_MEMORY(ctdb
, con
);
2867 con
->src_addr
= src
;
2868 con
->dst_addr
= dst
;
2870 con
->killtcp
= killtcp
;
2873 trbt_insertarray32_callback(killtcp
->connections
,
2874 KILLTCP_KEYLEN
, killtcp_key(&con
->dst_addr
, &con
->src_addr
),
2875 add_killtcp_callback
, con
);
2878 If we don't have a socket to listen on yet we must create it
2880 if (killtcp
->capture_fd
== -1) {
2881 const char *iface
= ctdb_vnn_iface_string(vnn
);
2882 killtcp
->capture_fd
= ctdb_sys_open_capture_socket(iface
, &killtcp
->private_data
);
2883 if (killtcp
->capture_fd
== -1) {
2884 DEBUG(DEBUG_CRIT
,(__location__
" Failed to open capturing "
2885 "socket on iface '%s' for killtcp (%s)\n",
2886 iface
, strerror(errno
)));
2892 if (killtcp
->fde
== NULL
) {
2893 killtcp
->fde
= tevent_add_fd(ctdb
->ev
, killtcp
,
2894 killtcp
->capture_fd
,
2896 capture_tcp_handler
, killtcp
);
2897 tevent_fd_set_auto_close(killtcp
->fde
);
2899 /* We also need to set up some events to tickle all these connections
2900 until they are all reset
2902 tevent_add_timer(ctdb
->ev
, killtcp
, timeval_current_ofs(1, 0),
2903 ctdb_tickle_sentenced_connections
, killtcp
);
2906 /* tickle him once now */
2915 talloc_free(vnn
->killtcp
);
2916 vnn
->killtcp
= NULL
;
2921 kill a TCP connection.
2923 int32_t ctdb_control_kill_tcp(struct ctdb_context
*ctdb
, TDB_DATA indata
)
2925 struct ctdb_connection
*killtcp
= (struct ctdb_connection
*)indata
.dptr
;
2927 return ctdb_killtcp_add_connection(ctdb
, &killtcp
->src
, &killtcp
->dst
);
2931 called by a daemon to inform us of the entire list of TCP tickles for
2932 a particular public address.
2933 this control should only be sent by the node that is currently serving
2934 that public address.
2936 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context
*ctdb
, TDB_DATA indata
)
2938 struct ctdb_tickle_list_old
*list
= (struct ctdb_tickle_list_old
*)indata
.dptr
;
2939 struct ctdb_tcp_array
*tcparray
;
2940 struct ctdb_vnn
*vnn
;
2942 /* We must at least have tickles.num or else we cant verify the size
2943 of the received data blob
2945 if (indata
.dsize
< offsetof(struct ctdb_tickle_list_old
, connections
)) {
2946 DEBUG(DEBUG_ERR
,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2950 /* verify that the size of data matches what we expect */
2951 if (indata
.dsize
< offsetof(struct ctdb_tickle_list_old
, connections
)
2952 + sizeof(struct ctdb_connection
) * list
->num
) {
2953 DEBUG(DEBUG_ERR
,("Bad indata in ctdb_tickle_list\n"));
2957 DEBUG(DEBUG_INFO
, ("Received tickle update for public address %s\n",
2958 ctdb_addr_to_str(&list
->addr
)));
2960 vnn
= find_public_ip_vnn(ctdb
, &list
->addr
);
2962 DEBUG(DEBUG_INFO
,(__location__
" Could not set tcp tickle list, '%s' is not a public address\n",
2963 ctdb_addr_to_str(&list
->addr
)));
2968 if (vnn
->pnn
== ctdb
->pnn
) {
2970 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2971 ctdb_addr_to_str(&list
->addr
)));
2975 /* remove any old ticklelist we might have */
2976 talloc_free(vnn
->tcp_array
);
2977 vnn
->tcp_array
= NULL
;
2979 tcparray
= talloc(vnn
, struct ctdb_tcp_array
);
2980 CTDB_NO_MEMORY(ctdb
, tcparray
);
2982 tcparray
->num
= list
->num
;
2984 tcparray
->connections
= talloc_array(tcparray
, struct ctdb_connection
, tcparray
->num
);
2985 CTDB_NO_MEMORY(ctdb
, tcparray
->connections
);
2987 memcpy(tcparray
->connections
, &list
->connections
[0],
2988 sizeof(struct ctdb_connection
)*tcparray
->num
);
2990 /* We now have a new fresh tickle list array for this vnn */
2991 vnn
->tcp_array
= tcparray
;
2997 called to return the full list of tickles for the puclic address associated
2998 with the provided vnn
3000 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context
*ctdb
, TDB_DATA indata
, TDB_DATA
*outdata
)
3002 ctdb_sock_addr
*addr
= (ctdb_sock_addr
*)indata
.dptr
;
3003 struct ctdb_tickle_list_old
*list
;
3004 struct ctdb_tcp_array
*tcparray
;
3006 struct ctdb_vnn
*vnn
;
3008 vnn
= find_public_ip_vnn(ctdb
, addr
);
3010 DEBUG(DEBUG_ERR
,(__location__
" Could not get tcp tickle list, '%s' is not a public address\n",
3011 ctdb_addr_to_str(addr
)));
3016 tcparray
= vnn
->tcp_array
;
3018 num
= tcparray
->num
;
3023 outdata
->dsize
= offsetof(struct ctdb_tickle_list_old
, connections
)
3024 + sizeof(struct ctdb_connection
) * num
;
3026 outdata
->dptr
= talloc_size(outdata
, outdata
->dsize
);
3027 CTDB_NO_MEMORY(ctdb
, outdata
->dptr
);
3028 list
= (struct ctdb_tickle_list_old
*)outdata
->dptr
;
3033 memcpy(&list
->connections
[0], tcparray
->connections
,
3034 sizeof(struct ctdb_connection
) * num
);
3042 set the list of all tcp tickles for a public address
3044 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context
*ctdb
,
3045 ctdb_sock_addr
*addr
,
3046 struct ctdb_tcp_array
*tcparray
)
3050 struct ctdb_tickle_list_old
*list
;
3053 num
= tcparray
->num
;
3058 data
.dsize
= offsetof(struct ctdb_tickle_list_old
, connections
) +
3059 sizeof(struct ctdb_connection
) * num
;
3060 data
.dptr
= talloc_size(ctdb
, data
.dsize
);
3061 CTDB_NO_MEMORY(ctdb
, data
.dptr
);
3063 list
= (struct ctdb_tickle_list_old
*)data
.dptr
;
3067 memcpy(&list
->connections
[0], tcparray
->connections
, sizeof(struct ctdb_connection
) * num
);
3070 ret
= ctdb_daemon_send_control(ctdb
, CTDB_BROADCAST_ALL
, 0,
3071 CTDB_CONTROL_SET_TCP_TICKLE_LIST
,
3072 0, CTDB_CTRL_FLAG_NOREPLY
, data
, NULL
, NULL
);
3074 DEBUG(DEBUG_ERR
,(__location__
" ctdb_control for set tcp tickles failed\n"));
3078 talloc_free(data
.dptr
);
3085 perform tickle updates if required
3087 static void ctdb_update_tcp_tickles(struct tevent_context
*ev
,
3088 struct tevent_timer
*te
,
3089 struct timeval t
, void *private_data
)
3091 struct ctdb_context
*ctdb
= talloc_get_type(private_data
, struct ctdb_context
);
3093 struct ctdb_vnn
*vnn
;
3095 for (vnn
=ctdb
->vnn
;vnn
;vnn
=vnn
->next
) {
3096 /* we only send out updates for public addresses that
3099 if (ctdb
->pnn
!= vnn
->pnn
) {
3102 /* We only send out the updates if we need to */
3103 if (!vnn
->tcp_update_needed
) {
3106 ret
= ctdb_send_set_tcp_tickles_for_ip(ctdb
,
3107 &vnn
->public_address
,
3110 DEBUG(DEBUG_ERR
,("Failed to send the tickle update for public address %s\n",
3111 ctdb_addr_to_str(&vnn
->public_address
)));
3114 ("Sent tickle update for public address %s\n",
3115 ctdb_addr_to_str(&vnn
->public_address
)));
3116 vnn
->tcp_update_needed
= false;
3120 tevent_add_timer(ctdb
->ev
, ctdb
->tickle_update_context
,
3121 timeval_current_ofs(ctdb
->tunable
.tickle_update_interval
, 0),
3122 ctdb_update_tcp_tickles
, ctdb
);
3126 start periodic update of tcp tickles
3128 void ctdb_start_tcp_tickle_update(struct ctdb_context
*ctdb
)
3130 ctdb
->tickle_update_context
= talloc_new(ctdb
);
3132 tevent_add_timer(ctdb
->ev
, ctdb
->tickle_update_context
,
3133 timeval_current_ofs(ctdb
->tunable
.tickle_update_interval
, 0),
3134 ctdb_update_tcp_tickles
, ctdb
);
3140 struct control_gratious_arp
{
3141 struct ctdb_context
*ctdb
;
3142 ctdb_sock_addr addr
;
3148 send a control_gratuitous arp
3150 static void send_gratious_arp(struct tevent_context
*ev
,
3151 struct tevent_timer
*te
,
3152 struct timeval t
, void *private_data
)
3155 struct control_gratious_arp
*arp
= talloc_get_type(private_data
,
3156 struct control_gratious_arp
);
3158 ret
= ctdb_sys_send_arp(&arp
->addr
, arp
->iface
);
3160 DEBUG(DEBUG_ERR
,(__location__
" sending of gratious arp on iface '%s' failed (%s)\n",
3161 arp
->iface
, strerror(errno
)));
3166 if (arp
->count
== CTDB_ARP_REPEAT
) {
3171 tevent_add_timer(arp
->ctdb
->ev
, arp
,
3172 timeval_current_ofs(CTDB_ARP_INTERVAL
, 0),
3173 send_gratious_arp
, arp
);
3180 int32_t ctdb_control_send_gratious_arp(struct ctdb_context
*ctdb
, TDB_DATA indata
)
3182 struct ctdb_addr_info_old
*gratious_arp
= (struct ctdb_addr_info_old
*)indata
.dptr
;
3183 struct control_gratious_arp
*arp
;
3185 /* verify the size of indata */
3186 if (indata
.dsize
< offsetof(struct ctdb_addr_info_old
, iface
)) {
3187 DEBUG(DEBUG_ERR
,(__location__
" Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3188 (unsigned)indata
.dsize
,
3189 (unsigned)offsetof(struct ctdb_addr_info_old
, iface
)));
3193 ( offsetof(struct ctdb_addr_info_old
, iface
)
3194 + gratious_arp
->len
) ){
3196 DEBUG(DEBUG_ERR
,(__location__
" Wrong size of indata. Was %u bytes "
3197 "but should be %u bytes\n",
3198 (unsigned)indata
.dsize
,
3199 (unsigned)(offsetof(struct ctdb_addr_info_old
, iface
)+gratious_arp
->len
)));
3204 arp
= talloc(ctdb
, struct control_gratious_arp
);
3205 CTDB_NO_MEMORY(ctdb
, arp
);
3208 arp
->addr
= gratious_arp
->addr
;
3209 arp
->iface
= talloc_strdup(arp
, gratious_arp
->iface
);
3210 CTDB_NO_MEMORY(ctdb
, arp
->iface
);
3213 tevent_add_timer(arp
->ctdb
->ev
, arp
,
3214 timeval_zero(), send_gratious_arp
, arp
);
3219 int32_t ctdb_control_add_public_address(struct ctdb_context
*ctdb
, TDB_DATA indata
)
3221 struct ctdb_addr_info_old
*pub
= (struct ctdb_addr_info_old
*)indata
.dptr
;
3224 /* verify the size of indata */
3225 if (indata
.dsize
< offsetof(struct ctdb_addr_info_old
, iface
)) {
3226 DEBUG(DEBUG_ERR
,(__location__
" Too small indata to hold a ctdb_addr_info structure\n"));
3230 ( offsetof(struct ctdb_addr_info_old
, iface
)
3233 DEBUG(DEBUG_ERR
,(__location__
" Wrong size of indata. Was %u bytes "
3234 "but should be %u bytes\n",
3235 (unsigned)indata
.dsize
,
3236 (unsigned)(offsetof(struct ctdb_addr_info_old
, iface
)+pub
->len
)));
3240 DEBUG(DEBUG_NOTICE
,("Add IP %s\n", ctdb_addr_to_str(&pub
->addr
)));
3242 ret
= ctdb_add_public_address(ctdb
, &pub
->addr
, pub
->mask
, &pub
->iface
[0], true);
3245 DEBUG(DEBUG_ERR
,(__location__
" Failed to add public address\n"));
3252 struct delete_ip_callback_state
{
3253 struct ctdb_req_control_old
*c
;
3257 called when releaseip event finishes for del_public_address
3259 static void delete_ip_callback(struct ctdb_context
*ctdb
,
3260 int32_t status
, TDB_DATA data
,
3261 const char *errormsg
,
3264 struct delete_ip_callback_state
*state
=
3265 talloc_get_type(private_data
, struct delete_ip_callback_state
);
3267 /* If release failed then fail. */
3268 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, status
, errormsg
);
3269 talloc_free(private_data
);
3272 int32_t ctdb_control_del_public_address(struct ctdb_context
*ctdb
,
3273 struct ctdb_req_control_old
*c
,
3274 TDB_DATA indata
, bool *async_reply
)
3276 struct ctdb_addr_info_old
*pub
= (struct ctdb_addr_info_old
*)indata
.dptr
;
3277 struct ctdb_vnn
*vnn
;
3279 /* verify the size of indata */
3280 if (indata
.dsize
< offsetof(struct ctdb_addr_info_old
, iface
)) {
3281 DEBUG(DEBUG_ERR
,(__location__
" Too small indata to hold a ctdb_addr_info structure\n"));
3285 ( offsetof(struct ctdb_addr_info_old
, iface
)
3288 DEBUG(DEBUG_ERR
,(__location__
" Wrong size of indata. Was %u bytes "
3289 "but should be %u bytes\n",
3290 (unsigned)indata
.dsize
,
3291 (unsigned)(offsetof(struct ctdb_addr_info_old
, iface
)+pub
->len
)));
3295 DEBUG(DEBUG_NOTICE
,("Delete IP %s\n", ctdb_addr_to_str(&pub
->addr
)));
3297 /* walk over all public addresses until we find a match */
3298 for (vnn
=ctdb
->vnn
;vnn
;vnn
=vnn
->next
) {
3299 if (ctdb_same_ip(&vnn
->public_address
, &pub
->addr
)) {
3300 if (vnn
->pnn
== ctdb
->pnn
) {
3301 struct delete_ip_callback_state
*state
;
3302 struct ctdb_public_ip
*ip
;
3306 vnn
->delete_pending
= true;
3308 state
= talloc(ctdb
,
3309 struct delete_ip_callback_state
);
3310 CTDB_NO_MEMORY(ctdb
, state
);
3313 ip
= talloc(state
, struct ctdb_public_ip
);
3316 (__location__
" Out of memory\n"));
3321 ip
->addr
= pub
->addr
;
3323 data
.dsize
= sizeof(struct ctdb_public_ip
);
3324 data
.dptr
= (unsigned char *)ip
;
3326 ret
= ctdb_daemon_send_control(ctdb
,
3329 CTDB_CONTROL_RELEASE_IP
,
3336 (__location__
"Unable to send "
3337 "CTDB_CONTROL_RELEASE_IP\n"));
3342 state
->c
= talloc_steal(state
, c
);
3343 *async_reply
= true;
3345 /* This IP is not hosted on the
3346 * current node so just delete it
3348 do_delete_ip(ctdb
, vnn
);
3355 DEBUG(DEBUG_ERR
,("Delete IP of unknown public IP address %s\n",
3356 ctdb_addr_to_str(&pub
->addr
)));
3361 struct ipreallocated_callback_state
{
3362 struct ctdb_req_control_old
*c
;
3365 static void ctdb_ipreallocated_callback(struct ctdb_context
*ctdb
,
3366 int status
, void *p
)
3368 struct ipreallocated_callback_state
*state
=
3369 talloc_get_type(p
, struct ipreallocated_callback_state
);
3373 (" \"ipreallocated\" event script failed (status %d)\n",
3375 if (status
== -ETIME
) {
3376 ctdb_ban_self(ctdb
);
3380 ctdb_request_control_reply(ctdb
, state
->c
, NULL
, status
, NULL
);
3384 /* A control to run the ipreallocated event */
3385 int32_t ctdb_control_ipreallocated(struct ctdb_context
*ctdb
,
3386 struct ctdb_req_control_old
*c
,
3390 struct ipreallocated_callback_state
*state
;
3392 state
= talloc(ctdb
, struct ipreallocated_callback_state
);
3393 CTDB_NO_MEMORY(ctdb
, state
);
3395 DEBUG(DEBUG_INFO
,(__location__
" Running \"ipreallocated\" event\n"));
3397 ret
= ctdb_event_script_callback(ctdb
, state
,
3398 ctdb_ipreallocated_callback
, state
,
3399 CTDB_EVENT_IPREALLOCATED
,
3403 DEBUG(DEBUG_ERR
,("Failed to run \"ipreallocated\" event \n"));
3408 /* tell the control that we will be reply asynchronously */
3409 state
->c
= talloc_steal(state
, c
);
3410 *async_reply
= true;
3416 /* This function is called from the recovery daemon to verify that a remote
3417 node has the expected ip allocation.
3418 This is verified against ctdb->ip_tree
3420 static int verify_remote_ip_allocation(struct ctdb_context
*ctdb
,
3421 struct ctdb_public_ip_list_old
*ips
,
3424 struct public_ip_list
*tmp_ip
;
3427 if (ctdb
->ip_tree
== NULL
) {
3428 /* don't know the expected allocation yet, assume remote node
3437 for (i
=0; i
<ips
->num
; i
++) {
3438 tmp_ip
= trbt_lookuparray32(ctdb
->ip_tree
, IP_KEYLEN
, ip_key(&ips
->ips
[i
].addr
));
3439 if (tmp_ip
== NULL
) {
3440 DEBUG(DEBUG_ERR
,("Node %u has new or unknown public IP %s\n", pnn
, ctdb_addr_to_str(&ips
->ips
[i
].addr
)));
3444 if (tmp_ip
->pnn
== -1 || ips
->ips
[i
].pnn
== -1) {
3448 if (tmp_ip
->pnn
!= ips
->ips
[i
].pnn
) {
3450 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3452 ctdb_addr_to_str(&ips
->ips
[i
].addr
),
3453 ips
->ips
[i
].pnn
, tmp_ip
->pnn
));
3461 int update_ip_assignment_tree(struct ctdb_context
*ctdb
, struct ctdb_public_ip
*ip
)
3463 struct public_ip_list
*tmp_ip
;
3465 /* IP tree is never built if DisableIPFailover is set */
3466 if (ctdb
->tunable
.disable_ip_failover
!= 0) {
3470 if (ctdb
->ip_tree
== NULL
) {
3471 DEBUG(DEBUG_ERR
,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3475 tmp_ip
= trbt_lookuparray32(ctdb
->ip_tree
, IP_KEYLEN
, ip_key(&ip
->addr
));
3476 if (tmp_ip
== NULL
) {
3477 DEBUG(DEBUG_ERR
,(__location__
" Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip
->addr
)));
3481 DEBUG(DEBUG_NOTICE
,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip
->addr
), tmp_ip
->pnn
, ip
->pnn
));
3482 tmp_ip
->pnn
= ip
->pnn
;
3487 void clear_ip_assignment_tree(struct ctdb_context
*ctdb
)
3489 TALLOC_FREE(ctdb
->ip_tree
);
3492 struct ctdb_reloadips_handle
{
3493 struct ctdb_context
*ctdb
;
3494 struct ctdb_req_control_old
*c
;
3498 struct tevent_fd
*fde
;
3501 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle
*h
)
3503 if (h
== h
->ctdb
->reload_ips
) {
3504 h
->ctdb
->reload_ips
= NULL
;
3507 ctdb_request_control_reply(h
->ctdb
, h
->c
, NULL
, h
->status
, NULL
);
3510 ctdb_kill(h
->ctdb
, h
->child
, SIGKILL
);
3514 static void ctdb_reloadips_timeout_event(struct tevent_context
*ev
,
3515 struct tevent_timer
*te
,
3516 struct timeval t
, void *private_data
)
3518 struct ctdb_reloadips_handle
*h
= talloc_get_type(private_data
, struct ctdb_reloadips_handle
);
3523 static void ctdb_reloadips_child_handler(struct tevent_context
*ev
,
3524 struct tevent_fd
*fde
,
3525 uint16_t flags
, void *private_data
)
3527 struct ctdb_reloadips_handle
*h
= talloc_get_type(private_data
, struct ctdb_reloadips_handle
);
3532 ret
= sys_read(h
->fd
[0], &res
, 1);
3533 if (ret
< 1 || res
!= 0) {
3534 DEBUG(DEBUG_ERR
, (__location__
" Reloadips child process returned error\n"));
3542 static int ctdb_reloadips_child(struct ctdb_context
*ctdb
)
3544 TALLOC_CTX
*mem_ctx
= talloc_new(NULL
);
3545 struct ctdb_public_ip_list_old
*ips
;
3546 struct ctdb_vnn
*vnn
;
3547 struct client_async_data
*async_data
;
3548 struct timeval timeout
;
3550 struct ctdb_client_control_state
*state
;
3554 CTDB_NO_MEMORY(ctdb
, mem_ctx
);
3556 /* Read IPs from local node */
3557 ret
= ctdb_ctrl_get_public_ips(ctdb
, TAKEOVER_TIMEOUT(),
3558 CTDB_CURRENT_NODE
, mem_ctx
, &ips
);
3561 ("Unable to fetch public IPs from local node\n"));
3562 talloc_free(mem_ctx
);
3566 /* Read IPs file - this is safe since this is a child process */
3568 if (ctdb_set_public_addresses(ctdb
, false) != 0) {
3569 DEBUG(DEBUG_ERR
,("Failed to re-read public addresses file\n"));
3570 talloc_free(mem_ctx
);
3574 async_data
= talloc_zero(mem_ctx
, struct client_async_data
);
3575 CTDB_NO_MEMORY(ctdb
, async_data
);
3577 /* Compare IPs between node and file for IPs to be deleted */
3578 for (i
= 0; i
< ips
->num
; i
++) {
3580 for (vnn
= ctdb
->vnn
; vnn
; vnn
= vnn
->next
) {
3581 if (ctdb_same_ip(&vnn
->public_address
,
3582 &ips
->ips
[i
].addr
)) {
3583 /* IP is still in file */
3589 /* Delete IP ips->ips[i] */
3590 struct ctdb_addr_info_old
*pub
;
3593 ("IP %s no longer configured, deleting it\n",
3594 ctdb_addr_to_str(&ips
->ips
[i
].addr
)));
3596 pub
= talloc_zero(mem_ctx
, struct ctdb_addr_info_old
);
3597 CTDB_NO_MEMORY(ctdb
, pub
);
3599 pub
->addr
= ips
->ips
[i
].addr
;
3603 timeout
= TAKEOVER_TIMEOUT();
3605 data
.dsize
= offsetof(struct ctdb_addr_info_old
,
3607 data
.dptr
= (uint8_t *)pub
;
3609 state
= ctdb_control_send(ctdb
, CTDB_CURRENT_NODE
, 0,
3610 CTDB_CONTROL_DEL_PUBLIC_IP
,
3611 0, data
, async_data
,
3613 if (state
== NULL
) {
3616 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3620 ctdb_client_async_add(async_data
, state
);
3624 /* Compare IPs between node and file for IPs to be added */
3626 for (vnn
= ctdb
->vnn
; vnn
; vnn
= vnn
->next
) {
3627 for (i
= 0; i
< ips
->num
; i
++) {
3628 if (ctdb_same_ip(&vnn
->public_address
,
3629 &ips
->ips
[i
].addr
)) {
3630 /* IP already on node */
3634 if (i
== ips
->num
) {
3635 /* Add IP ips->ips[i] */
3636 struct ctdb_addr_info_old
*pub
;
3637 const char *ifaces
= NULL
;
3642 ("New IP %s configured, adding it\n",
3643 ctdb_addr_to_str(&vnn
->public_address
)));
3645 uint32_t pnn
= ctdb_get_pnn(ctdb
);
3647 data
.dsize
= sizeof(pnn
);
3648 data
.dptr
= (uint8_t *)&pnn
;
3650 ret
= ctdb_client_send_message(
3652 CTDB_BROADCAST_CONNECTED
,
3653 CTDB_SRVID_REBALANCE_NODE
,
3656 DEBUG(DEBUG_WARNING
,
3657 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3663 ifaces
= vnn
->ifaces
[0];
3665 while (vnn
->ifaces
[iface
] != NULL
) {
3666 ifaces
= talloc_asprintf(vnn
, "%s,%s", ifaces
,
3667 vnn
->ifaces
[iface
]);
3671 len
= strlen(ifaces
) + 1;
3672 pub
= talloc_zero_size(mem_ctx
,
3673 offsetof(struct ctdb_addr_info_old
, iface
) + len
);
3674 CTDB_NO_MEMORY(ctdb
, pub
);
3676 pub
->addr
= vnn
->public_address
;
3677 pub
->mask
= vnn
->public_netmask_bits
;
3679 memcpy(&pub
->iface
[0], ifaces
, pub
->len
);
3681 timeout
= TAKEOVER_TIMEOUT();
3683 data
.dsize
= offsetof(struct ctdb_addr_info_old
,
3685 data
.dptr
= (uint8_t *)pub
;
3687 state
= ctdb_control_send(ctdb
, CTDB_CURRENT_NODE
, 0,
3688 CTDB_CONTROL_ADD_PUBLIC_IP
,
3689 0, data
, async_data
,
3691 if (state
== NULL
) {
3694 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3698 ctdb_client_async_add(async_data
, state
);
3702 if (ctdb_client_async_wait(ctdb
, async_data
) != 0) {
3703 DEBUG(DEBUG_ERR
,(__location__
" Add/delete IPs failed\n"));
3707 talloc_free(mem_ctx
);
3711 talloc_free(mem_ctx
);
3715 /* This control is sent to force the node to re-read the public addresses file
3716 and drop any addresses we should nnot longer host, and add new addresses
3717 that we are now able to host
3719 int32_t ctdb_control_reload_public_ips(struct ctdb_context
*ctdb
, struct ctdb_req_control_old
*c
, bool *async_reply
)
3721 struct ctdb_reloadips_handle
*h
;
3722 pid_t parent
= getpid();
3724 if (ctdb
->reload_ips
!= NULL
) {
3725 talloc_free(ctdb
->reload_ips
);
3726 ctdb
->reload_ips
= NULL
;
3729 h
= talloc(ctdb
, struct ctdb_reloadips_handle
);
3730 CTDB_NO_MEMORY(ctdb
, h
);
3735 if (pipe(h
->fd
) == -1) {
3736 DEBUG(DEBUG_ERR
,("Failed to create pipe for ctdb_freeze_lock\n"));
3741 h
->child
= ctdb_fork(ctdb
);
3742 if (h
->child
== (pid_t
)-1) {
3743 DEBUG(DEBUG_ERR
, ("Failed to fork a child for reloadips\n"));
3751 if (h
->child
== 0) {
3752 signed char res
= 0;
3755 debug_extra
= talloc_asprintf(NULL
, "reloadips:");
3757 prctl_set_comment("ctdb_reloadips");
3758 if (switch_from_server_to_client(ctdb
, "reloadips-child") != 0) {
3759 DEBUG(DEBUG_CRIT
,("ERROR: Failed to switch reloadips child into client mode\n"));
3762 res
= ctdb_reloadips_child(ctdb
);
3764 DEBUG(DEBUG_ERR
,("Failed to reload ips on local node\n"));
3768 sys_write(h
->fd
[1], &res
, 1);
3769 /* make sure we die when our parent dies */
3770 while (ctdb_kill(ctdb
, parent
, 0) == 0 || errno
!= ESRCH
) {
3776 h
->c
= talloc_steal(h
, c
);
3779 set_close_on_exec(h
->fd
[0]);
3781 talloc_set_destructor(h
, ctdb_reloadips_destructor
);
3784 h
->fde
= tevent_add_fd(ctdb
->ev
, h
, h
->fd
[0], TEVENT_FD_READ
,
3785 ctdb_reloadips_child_handler
, (void *)h
);
3786 tevent_fd_set_auto_close(h
->fde
);
3788 tevent_add_timer(ctdb
->ev
, h
, timeval_current_ofs(120, 0),
3789 ctdb_reloadips_timeout_event
, h
);
3791 /* we reply later */
3792 *async_reply
= true;