2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 #include "system/network.h"
30 #include "system/filesys.h"
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "protocol/protocol_util.h"
46 #include "client/client.h"
48 #include "common/logging.h"
50 #include "server/ipalloc.h"
52 static int takeover_timeout
= 9;
54 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
60 static bool generic_recv(struct tevent_req
*req
, int *perr
)
64 if (tevent_req_is_unix_error(req
, &err
)) {
74 static enum ipalloc_algorithm
75 determine_algorithm(const struct ctdb_tunable_list
*tunables
)
77 switch (tunables
->ip_alloc_algorithm
) {
79 return IPALLOC_DETERMINISTIC
;
81 return IPALLOC_NONDETERMINISTIC
;
89 /**********************************************************************/
91 struct get_public_ips_state
{
94 struct ctdb_public_ip_list
*ips
;
95 uint32_t *ban_credits
;
98 static void get_public_ips_done(struct tevent_req
*subreq
);
100 static struct tevent_req
*get_public_ips_send(
102 struct tevent_context
*ev
,
103 struct ctdb_client_context
*client
,
105 int count
, int num_nodes
,
106 uint32_t *ban_credits
,
109 struct tevent_req
*req
, *subreq
;
110 struct get_public_ips_state
*state
;
111 struct ctdb_req_control request
;
113 req
= tevent_req_create(mem_ctx
, &state
, struct get_public_ips_state
);
119 state
->count
= count
;
120 state
->ban_credits
= ban_credits
;
122 state
->ips
= talloc_zero_array(state
,
123 struct ctdb_public_ip_list
,
125 if (tevent_req_nomem(state
->ips
, req
)) {
126 return tevent_req_post(req
, ev
);
129 /* Short circuit if no nodes being asked for IPs */
130 if (state
->count
== 0) {
131 tevent_req_done(req
);
132 return tevent_req_post(req
, ev
);
135 ctdb_req_control_get_public_ips(&request
, available_only
);
136 subreq
= ctdb_client_control_multi_send(mem_ctx
, ev
, client
,
139 TIMEOUT(), &request
);
140 if (tevent_req_nomem(subreq
, req
)) {
141 return tevent_req_post(req
, ev
);
143 tevent_req_set_callback(subreq
, get_public_ips_done
, req
);
148 static void get_public_ips_done(struct tevent_req
*subreq
)
150 struct tevent_req
*req
= tevent_req_callback_data(
151 subreq
, struct tevent_req
);
152 struct get_public_ips_state
*state
= tevent_req_data(
153 req
, struct get_public_ips_state
);
154 struct ctdb_reply_control
**reply
;
157 bool status
, found_errors
;
159 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
, &err_list
,
163 for (i
= 0; i
< state
->count
; i
++) {
164 if (err_list
[i
] != 0) {
165 uint32_t pnn
= state
->pnns
[i
];
167 D_ERR("control GET_PUBLIC_IPS failed on "
168 "node %u, ret=%d\n", pnn
, err_list
[i
]);
170 state
->ban_credits
[pnn
]++;
174 tevent_req_error(req
, ret
);
178 found_errors
= false;
179 for (i
= 0; i
< state
->count
; i
++) {
181 struct ctdb_public_ip_list
*ips
;
183 pnn
= state
->pnns
[i
];
184 ret
= ctdb_reply_control_get_public_ips(reply
[i
], state
->ips
,
187 D_ERR("control GET_PUBLIC_IPS failed on "
189 state
->ban_credits
[pnn
]++;
194 D_INFO("Fetched public IPs from node %u\n", pnn
);
195 state
->ips
[pnn
] = *ips
;
199 tevent_req_error(req
, EIO
);
205 tevent_req_done(req
);
208 static bool get_public_ips_recv(struct tevent_req
*req
, int *perr
,
210 struct ctdb_public_ip_list
**ips
)
212 struct get_public_ips_state
*state
= tevent_req_data(
213 req
, struct get_public_ips_state
);
216 if (tevent_req_is_unix_error(req
, &err
)) {
223 *ips
= talloc_steal(mem_ctx
, state
->ips
);
228 /**********************************************************************/
230 struct release_ip_state
{
235 uint32_t *ban_credits
;
238 struct release_ip_one_state
{
239 struct tevent_req
*req
;
245 static void release_ip_done(struct tevent_req
*subreq
);
247 static struct tevent_req
*release_ip_send(TALLOC_CTX
*mem_ctx
,
248 struct tevent_context
*ev
,
249 struct ctdb_client_context
*client
,
252 struct timeval timeout
,
253 struct public_ip_list
*all_ips
,
254 uint32_t *ban_credits
)
256 struct tevent_req
*req
, *subreq
;
257 struct release_ip_state
*state
;
258 struct ctdb_req_control request
;
259 struct public_ip_list
*tmp_ip
;
261 req
= tevent_req_create(mem_ctx
, &state
, struct release_ip_state
);
267 state
->num_replies
= 0;
268 state
->num_fails
= 0;
269 state
->ban_credits
= ban_credits
;
271 /* Send a RELEASE_IP to all nodes that should not be hosting
272 * each IP. For each IP, all but one of these will be
273 * redundant. However, the redundant ones are used to tell
274 * nodes which node should be hosting the IP so that commands
275 * like "ctdb ip" can display a particular nodes idea of who
276 * is hosting what. */
277 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
278 struct release_ip_one_state
*substate
;
279 struct ctdb_public_ip ip
;
282 substate
= talloc_zero(state
, struct release_ip_one_state
);
283 if (tevent_req_nomem(substate
, req
)) {
284 return tevent_req_post(req
, ev
);
287 substate
->pnns
= talloc_zero_array(substate
, uint32_t, count
);
288 if (tevent_req_nomem(substate
->pnns
, req
)) {
289 return tevent_req_post(req
, ev
);
295 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
298 if (tevent_req_nomem(substate
->ip_str
, req
)) {
299 return tevent_req_post(req
, ev
);
302 for (i
= 0; i
< count
; i
++) {
303 uint32_t pnn
= pnns
[i
];
305 /* Skip this node if IP is not known */
306 if (! bitmap_query(tmp_ip
->known_on
, pnn
)) {
310 /* If pnn is not the node that should be
311 * hosting the IP then add it to the list of
312 * nodes that need to do a release. */
313 if (tmp_ip
->pnn
!= pnn
) {
314 substate
->pnns
[substate
->count
] = pnn
;
319 if (substate
->count
== 0) {
320 /* No releases to send for this address... */
321 TALLOC_FREE(substate
);
325 ip
.pnn
= tmp_ip
->pnn
;
326 ip
.addr
= tmp_ip
->addr
;
327 ctdb_req_control_release_ip(&request
, &ip
);
328 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
331 timeout
,/* cumulative */
333 if (tevent_req_nomem(subreq
, req
)) {
334 return tevent_req_post(req
, ev
);
336 tevent_req_set_callback(subreq
, release_ip_done
, substate
);
341 /* None sent, finished... */
342 if (state
->num_sent
== 0) {
343 tevent_req_done(req
);
344 return tevent_req_post(req
, ev
);
350 static void release_ip_done(struct tevent_req
*subreq
)
352 struct release_ip_one_state
*substate
= tevent_req_callback_data(
353 subreq
, struct release_ip_one_state
);
354 struct tevent_req
*req
= substate
->req
;
355 struct release_ip_state
*state
= tevent_req_data(
356 req
, struct release_ip_state
);
359 bool status
, found_errors
;
361 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
366 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
367 substate
->ip_str
, substate
->count
);
371 /* Get some clear error messages out of err_list and count
374 found_errors
= false;
375 for (i
= 0; i
< substate
->count
; i
++) {
376 int err
= err_list
[i
];
378 uint32_t pnn
= substate
->pnns
[i
];
380 D_ERR("RELEASE_IP %s failed on node %u, "
381 "ret=%d\n", substate
->ip_str
, pnn
, err
);
383 state
->ban_credits
[pnn
]++;
384 state
->err_any
= err
;
388 if (! found_errors
) {
389 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
390 substate
->ip_str
, ret
);
391 state
->err_any
= EIO
;
397 talloc_free(substate
);
399 state
->num_replies
++;
401 if (state
->num_replies
< state
->num_sent
) {
402 /* Not all replies received, don't go further */
406 if (state
->num_fails
> 0) {
407 tevent_req_error(req
, state
->err_any
);
411 tevent_req_done(req
);
414 static bool release_ip_recv(struct tevent_req
*req
, int *perr
)
416 return generic_recv(req
, perr
);
419 /**********************************************************************/
421 struct take_ip_state
{
426 uint32_t *ban_credits
;
429 struct take_ip_one_state
{
430 struct tevent_req
*req
;
435 static void take_ip_done(struct tevent_req
*subreq
);
437 static struct tevent_req
*take_ip_send(TALLOC_CTX
*mem_ctx
,
438 struct tevent_context
*ev
,
439 struct ctdb_client_context
*client
,
440 struct timeval timeout
,
441 struct public_ip_list
*all_ips
,
442 uint32_t *ban_credits
)
444 struct tevent_req
*req
, *subreq
;
445 struct take_ip_state
*state
;
446 struct ctdb_req_control request
;
447 struct public_ip_list
*tmp_ip
;
449 req
= tevent_req_create(mem_ctx
, &state
, struct take_ip_state
);
455 state
->num_replies
= 0;
456 state
->num_fails
= 0;
457 state
->ban_credits
= ban_credits
;
459 /* For each IP, send a TAKOVER_IP to the node that should be
460 * hosting it. Many of these will often be redundant (since
461 * the allocation won't have changed) but they can be useful
462 * to recover from inconsistencies. */
463 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
464 struct take_ip_one_state
*substate
;
465 struct ctdb_public_ip ip
;
467 if (tmp_ip
->pnn
== CTDB_UNKNOWN_PNN
) {
468 /* IP will be unassigned */
472 substate
= talloc_zero(state
, struct take_ip_one_state
);
473 if (tevent_req_nomem(substate
, req
)) {
474 return tevent_req_post(req
, ev
);
478 substate
->pnn
= tmp_ip
->pnn
;
480 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
483 if (tevent_req_nomem(substate
->ip_str
, req
)) {
484 return tevent_req_post(req
, ev
);
487 ip
.pnn
= tmp_ip
->pnn
;
488 ip
.addr
= tmp_ip
->addr
;
489 ctdb_req_control_takeover_ip(&request
, &ip
);
490 subreq
= ctdb_client_control_send(
491 state
, ev
, client
, tmp_ip
->pnn
,
492 timeout
, /* cumulative */
494 if (tevent_req_nomem(subreq
, req
)) {
495 return tevent_req_post(req
, ev
);
497 tevent_req_set_callback(subreq
, take_ip_done
, substate
);
502 /* None sent, finished... */
503 if (state
->num_sent
== 0) {
504 tevent_req_done(req
);
505 return tevent_req_post(req
, ev
);
511 static void take_ip_done(struct tevent_req
*subreq
)
513 struct take_ip_one_state
*substate
= tevent_req_callback_data(
514 subreq
, struct take_ip_one_state
);
515 struct tevent_req
*req
= substate
->req
;
516 struct ctdb_reply_control
*reply
;
517 struct take_ip_state
*state
= tevent_req_data(
518 req
, struct take_ip_state
);
522 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
526 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
527 substate
->ip_str
, substate
->pnn
, ret
);
531 ret
= ctdb_reply_control_takeover_ip(reply
);
533 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
534 substate
->ip_str
, substate
->pnn
, ret
);
538 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
539 substate
->ip_str
, substate
->pnn
);
543 state
->ban_credits
[substate
->pnn
]++;
545 state
->err_any
= ret
;
548 talloc_free(substate
);
550 state
->num_replies
++;
552 if (state
->num_replies
< state
->num_sent
) {
553 /* Not all replies received, don't go further */
557 if (state
->num_fails
> 0) {
558 tevent_req_error(req
, state
->err_any
);
562 tevent_req_done(req
);
565 static bool take_ip_recv(struct tevent_req
*req
, int *perr
)
567 return generic_recv(req
, perr
);
570 /**********************************************************************/
572 struct ipreallocated_state
{
575 uint32_t *ban_credits
;
578 static void ipreallocated_done(struct tevent_req
*subreq
);
580 static struct tevent_req
*ipreallocated_send(TALLOC_CTX
*mem_ctx
,
581 struct tevent_context
*ev
,
582 struct ctdb_client_context
*client
,
585 struct timeval timeout
,
586 uint32_t *ban_credits
)
588 struct tevent_req
*req
, *subreq
;
589 struct ipreallocated_state
*state
;
590 struct ctdb_req_control request
;
592 req
= tevent_req_create(mem_ctx
, &state
, struct ipreallocated_state
);
598 state
->count
= count
;
599 state
->ban_credits
= ban_credits
;
601 ctdb_req_control_ipreallocated(&request
);
602 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
604 timeout
, /* cumulative */
606 if (tevent_req_nomem(subreq
, req
)) {
607 return tevent_req_post(req
, ev
);
609 tevent_req_set_callback(subreq
, ipreallocated_done
, req
);
614 static void ipreallocated_done(struct tevent_req
*subreq
)
616 struct tevent_req
*req
= tevent_req_callback_data(
617 subreq
, struct tevent_req
);
618 struct ipreallocated_state
*state
= tevent_req_data(
619 req
, struct ipreallocated_state
);
620 int *err_list
= NULL
;
622 bool status
, found_errors
;
624 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
629 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state
->count
);
630 tevent_req_done(req
);
634 /* Get some clear error messages out of err_list and count
637 found_errors
= false;
638 for (i
= 0; i
< state
->count
; i
++) {
639 int err
= err_list
[i
];
641 uint32_t pnn
= state
->pnns
[i
];
643 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
646 state
->ban_credits
[pnn
]++;
651 if (! found_errors
) {
652 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret
);
655 tevent_req_error(req
, ret
);
658 static bool ipreallocated_recv(struct tevent_req
*req
, int *perr
)
660 return generic_recv(req
, perr
);
663 /**********************************************************************/
666 * Recalculate the allocation of public IPs to nodes and have the
667 * nodes host their allocated addresses.
671 * - Initialise IP allocation state. Pass:
672 * + algorithm to be used;
673 * + various tunables (NoIPTakeover, NoIPFailback)
674 * + list of nodes to force rebalance (internal structure, currently
675 * no way to fetch, only used by LCP2 for nodes that have had new
676 * IP addresses added).
677 * - Set IP flags for IP allocation based on node map
678 * - Retrieve known and available IP addresses (done separately so
679 * values can be faked in unit testing)
680 * - Use ipalloc_set_public_ips() to set known and available IP
681 * addresses for allocation
682 * - If cluster can't host IP addresses then jump to IPREALLOCATED
683 * - Run IP allocation algorithm
684 * - Send RELEASE_IP to all nodes for IPs they should not host
685 * - Send TAKE_IP to all nodes for IPs they should host
686 * - Send IPREALLOCATED to all nodes
689 struct takeover_state
{
690 struct tevent_context
*ev
;
691 struct ctdb_client_context
*client
;
692 struct timeval timeout
;
693 unsigned int num_nodes
;
694 uint32_t *pnns_connected
;
696 uint32_t *pnns_active
;
699 uint32_t *force_rebalance_nodes
;
700 struct ctdb_tunable_list
*tun_list
;
701 struct ipalloc_state
*ipalloc_state
;
702 struct ctdb_public_ip_list
*known_ips
;
703 struct public_ip_list
*all_ips
;
704 uint32_t *ban_credits
;
707 static void takeover_tunables_done(struct tevent_req
*subreq
);
708 static void takeover_nodemap_done(struct tevent_req
*subreq
);
709 static void takeover_known_ips_done(struct tevent_req
*subreq
);
710 static void takeover_avail_ips_done(struct tevent_req
*subreq
);
711 static void takeover_release_ip_done(struct tevent_req
*subreq
);
712 static void takeover_take_ip_done(struct tevent_req
*subreq
);
713 static void takeover_ipreallocated(struct tevent_req
*req
);
714 static void takeover_ipreallocated_done(struct tevent_req
*subreq
);
715 static void takeover_failed(struct tevent_req
*subreq
, int ret
);
716 static void takeover_failed_done(struct tevent_req
*subreq
);
718 static struct tevent_req
*takeover_send(TALLOC_CTX
*mem_ctx
,
719 struct tevent_context
*ev
,
720 struct ctdb_client_context
*client
,
721 uint32_t *force_rebalance_nodes
)
723 struct tevent_req
*req
, *subreq
;
724 struct takeover_state
*state
;
725 struct ctdb_req_control request
;
727 req
= tevent_req_create(mem_ctx
, &state
, struct takeover_state
);
733 state
->client
= client
;
734 state
->force_rebalance_nodes
= force_rebalance_nodes
;
735 state
->destnode
= ctdb_client_pnn(client
);
737 ctdb_req_control_get_all_tunables(&request
);
738 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
739 state
->destnode
, TIMEOUT(),
741 if (tevent_req_nomem(subreq
, req
)) {
742 return tevent_req_post(req
, ev
);
744 tevent_req_set_callback(subreq
, takeover_tunables_done
, req
);
749 static void takeover_tunables_done(struct tevent_req
*subreq
)
751 struct tevent_req
*req
= tevent_req_callback_data(
752 subreq
, struct tevent_req
);
753 struct takeover_state
*state
= tevent_req_data(
754 req
, struct takeover_state
);
755 struct ctdb_reply_control
*reply
;
756 struct ctdb_req_control request
;
760 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
763 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
764 tevent_req_error(req
, ret
);
768 ret
= ctdb_reply_control_get_all_tunables(reply
, state
,
771 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
772 tevent_req_error(req
, ret
);
778 takeover_timeout
= state
->tun_list
->takeover_timeout
;
780 ctdb_req_control_get_nodemap(&request
);
781 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
782 state
->destnode
, TIMEOUT(),
784 if (tevent_req_nomem(subreq
, req
)) {
787 tevent_req_set_callback(subreq
, takeover_nodemap_done
, req
);
790 static void takeover_nodemap_done(struct tevent_req
*subreq
)
792 struct tevent_req
*req
= tevent_req_callback_data(
793 subreq
, struct tevent_req
);
794 struct takeover_state
*state
= tevent_req_data(
795 req
, struct takeover_state
);
796 struct ctdb_reply_control
*reply
;
799 struct ctdb_node_map
*nodemap
;
802 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
805 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
806 state
->destnode
, ret
);
807 tevent_req_error(req
, ret
);
811 ret
= ctdb_reply_control_get_nodemap(reply
, state
, &nodemap
);
813 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret
);
814 tevent_req_error(req
, ret
);
818 state
->num_nodes
= nodemap
->num
;
820 state
->num_connected
= list_of_connected_nodes(nodemap
,
821 CTDB_UNKNOWN_PNN
, state
,
822 &state
->pnns_connected
);
823 if (state
->num_connected
<= 0) {
824 tevent_req_error(req
, ENOMEM
);
828 state
->num_active
= list_of_active_nodes(nodemap
,
829 CTDB_UNKNOWN_PNN
, state
,
830 &state
->pnns_active
);
831 if (state
->num_active
<= 0) {
832 tevent_req_error(req
, ENOMEM
);
836 /* Default timeout for early jump to IPREALLOCATED. See below
837 * for explanation of 3 times...
839 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
841 state
->ban_credits
= talloc_zero_array(state
, uint32_t,
843 if (tevent_req_nomem(state
->ban_credits
, req
)) {
847 ptr
= getenv("CTDB_DISABLE_IP_FAILOVER");
849 /* IP failover is completely disabled so just send out
850 * ipreallocated event.
852 takeover_ipreallocated(req
);
856 state
->ipalloc_state
=
858 state
, state
->num_nodes
,
859 determine_algorithm(state
->tun_list
),
860 (state
->tun_list
->no_ip_takeover
!= 0),
861 (state
->tun_list
->no_ip_failback
!= 0),
862 state
->force_rebalance_nodes
);
863 if (tevent_req_nomem(state
->ipalloc_state
, req
)) {
867 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
868 state
->pnns_connected
, state
->num_connected
,
869 state
->num_nodes
, state
->ban_credits
,
871 if (tevent_req_nomem(subreq
, req
)) {
875 tevent_req_set_callback(subreq
, takeover_known_ips_done
, req
);
878 static void takeover_known_ips_done(struct tevent_req
*subreq
)
880 struct tevent_req
*req
= tevent_req_callback_data(
881 subreq
, struct tevent_req
);
882 struct takeover_state
*state
= tevent_req_data(
883 req
, struct takeover_state
);
886 uint32_t *pnns
= NULL
;
889 status
= get_public_ips_recv(subreq
, &ret
, state
, &state
->known_ips
);
893 D_ERR("Failed to fetch known public IPs\n");
894 takeover_failed(req
, ret
);
898 /* Get available IPs from active nodes that actually have known IPs */
900 pnns
= talloc_zero_array(state
, uint32_t, state
->num_active
);
901 if (tevent_req_nomem(pnns
, req
)) {
906 for (i
= 0; i
< state
->num_active
; i
++) {
907 uint32_t pnn
= state
->pnns_active
[i
];
909 /* If pnn has IPs then fetch available IPs from it */
910 if (state
->known_ips
[pnn
].num
> 0) {
916 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
918 state
->num_nodes
, state
->ban_credits
,
920 if (tevent_req_nomem(subreq
, req
)) {
924 tevent_req_set_callback(subreq
, takeover_avail_ips_done
, req
);
927 static void takeover_avail_ips_done(struct tevent_req
*subreq
)
929 struct tevent_req
*req
= tevent_req_callback_data(
930 subreq
, struct tevent_req
);
931 struct takeover_state
*state
= tevent_req_data(
932 req
, struct takeover_state
);
935 struct ctdb_public_ip_list
*available_ips
;
937 status
= get_public_ips_recv(subreq
, &ret
, state
, &available_ips
);
941 D_ERR("Failed to fetch available public IPs\n");
942 takeover_failed(req
, ret
);
946 ipalloc_set_public_ips(state
->ipalloc_state
,
947 state
->known_ips
, available_ips
);
949 if (! ipalloc_can_host_ips(state
->ipalloc_state
)) {
950 D_NOTICE("No nodes available to host public IPs yet\n");
951 takeover_ipreallocated(req
);
955 /* Do the IP reassignment calculations */
956 state
->all_ips
= ipalloc(state
->ipalloc_state
);
957 if (tevent_req_nomem(state
->all_ips
, req
)) {
961 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
962 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
963 * seconds. However, RELEASE_IP can take longer due to TCP
964 * connection killing, so sometimes needs more time.
965 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
966 * seconds across all 3 stages. No explicit expiry checks are
967 * needed before each stage because tevent is smart enough to
968 * fire the timeouts even if they are in the past. Initialise
969 * this here so it explicitly covers the stages we're
970 * interested in but, in particular, not the time taken by the
973 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
975 subreq
= release_ip_send(state
, state
->ev
, state
->client
,
976 state
->pnns_connected
, state
->num_connected
,
977 state
->timeout
, state
->all_ips
,
979 if (tevent_req_nomem(subreq
, req
)) {
982 tevent_req_set_callback(subreq
, takeover_release_ip_done
, req
);
985 static void takeover_release_ip_done(struct tevent_req
*subreq
)
987 struct tevent_req
*req
= tevent_req_callback_data(
988 subreq
, struct tevent_req
);
989 struct takeover_state
*state
= tevent_req_data(
990 req
, struct takeover_state
);
994 status
= release_ip_recv(subreq
, &ret
);
998 takeover_failed(req
, ret
);
1002 /* All released, now for takeovers */
1004 subreq
= take_ip_send(state
, state
->ev
, state
->client
,
1005 state
->timeout
, state
->all_ips
,
1006 state
->ban_credits
);
1007 if (tevent_req_nomem(subreq
, req
)) {
1010 tevent_req_set_callback(subreq
, takeover_take_ip_done
, req
);
1013 static void takeover_take_ip_done(struct tevent_req
*subreq
)
1015 struct tevent_req
*req
= tevent_req_callback_data(
1016 subreq
, struct tevent_req
);
1020 status
= take_ip_recv(subreq
, &ret
);
1021 TALLOC_FREE(subreq
);
1024 takeover_failed(req
, ret
);
1028 takeover_ipreallocated(req
);
1031 static void takeover_ipreallocated(struct tevent_req
*req
)
1033 struct takeover_state
*state
= tevent_req_data(
1034 req
, struct takeover_state
);
1035 struct tevent_req
*subreq
;
1037 subreq
= ipreallocated_send(state
, state
->ev
, state
->client
,
1038 state
->pnns_connected
,
1039 state
->num_connected
,
1041 state
->ban_credits
);
1042 if (tevent_req_nomem(subreq
, req
)) {
1045 tevent_req_set_callback(subreq
, takeover_ipreallocated_done
, req
);
1048 static void takeover_ipreallocated_done(struct tevent_req
*subreq
)
1050 struct tevent_req
*req
= tevent_req_callback_data(
1051 subreq
, struct tevent_req
);
1055 status
= ipreallocated_recv(subreq
, &ret
);
1056 TALLOC_FREE(subreq
);
1059 takeover_failed(req
, ret
);
1063 tevent_req_done(req
);
1066 struct takeover_failed_state
{
1067 struct tevent_req
*req
;
1071 void takeover_failed(struct tevent_req
*req
, int ret
)
1073 struct takeover_state
*state
= tevent_req_data(
1074 req
, struct takeover_state
);
1075 struct tevent_req
*subreq
;
1076 uint32_t max_pnn
= CTDB_UNKNOWN_PNN
;
1077 unsigned int max_credits
= 0;
1080 /* Check that bans are enabled */
1081 if (state
->tun_list
->enable_bans
== 0) {
1082 tevent_req_error(req
, ret
);
1086 for (pnn
= 0; pnn
< state
->num_nodes
; pnn
++) {
1087 if (state
->ban_credits
[pnn
] > max_credits
) {
1089 max_credits
= state
->ban_credits
[pnn
];
1093 if (max_credits
> 0) {
1094 struct ctdb_req_message message
;
1095 struct takeover_failed_state
*substate
;
1097 D_WARNING("Assigning banning credits to node %u\n", max_pnn
);
1099 substate
= talloc_zero(state
, struct takeover_failed_state
);
1100 if (tevent_req_nomem(substate
, req
)) {
1103 substate
->req
= req
;
1104 substate
->ret
= ret
;
1106 message
.srvid
= CTDB_SRVID_BANNING
;
1107 message
.data
.pnn
= max_pnn
;
1109 subreq
= ctdb_client_message_send(
1110 state
, state
->ev
, state
->client
,
1111 ctdb_client_pnn(state
->client
),
1113 if (subreq
== NULL
) {
1114 D_ERR("failed to assign banning credits\n");
1115 tevent_req_error(req
, ret
);
1118 tevent_req_set_callback(subreq
, takeover_failed_done
, substate
);
1120 tevent_req_error(req
, ret
);
1124 static void takeover_failed_done(struct tevent_req
*subreq
)
1126 struct takeover_failed_state
*substate
= tevent_req_callback_data(
1127 subreq
, struct takeover_failed_state
);
1128 struct tevent_req
*req
= substate
->req
;
1132 status
= ctdb_client_message_recv(subreq
, &ret
);
1133 TALLOC_FREE(subreq
);
1135 D_ERR("failed to assign banning credits, ret=%d\n", ret
);
1138 ret
= substate
->ret
;
1139 talloc_free(substate
);
1140 tevent_req_error(req
, ret
);
1143 static void takeover_recv(struct tevent_req
*req
, int *perr
)
1145 generic_recv(req
, perr
);
1148 static uint32_t *parse_node_list(TALLOC_CTX
*mem_ctx
, const char* s
)
1155 ret
= strv_split(mem_ctx
, &strv
, s
, ",");
1157 D_ERR("out of memory\n");
1161 num
= strv_count(strv
);
1163 nodes
= talloc_array(mem_ctx
, uint32_t, num
);
1164 if (nodes
== NULL
) {
1165 D_ERR("out of memory\n");
1170 for (i
= 0; i
< num
; i
++) {
1171 t
= strv_next(strv
, t
);
1178 static void usage(const char *progname
)
1181 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1182 "[<force-rebalance-nodes>]\n",
1187 * Arguments - write fd, socket path
1189 int main(int argc
, const char *argv
[])
1192 const char *sockpath
;
1193 TALLOC_CTX
*mem_ctx
;
1194 struct tevent_context
*ev
;
1195 struct ctdb_client_context
*client
;
1197 struct tevent_req
*req
;
1198 uint32_t *force_rebalance_nodes
= NULL
;
1200 if (argc
< 3 || argc
> 4) {
1205 write_fd
= atoi(argv
[1]);
1208 mem_ctx
= talloc_new(NULL
);
1209 if (mem_ctx
== NULL
) {
1210 fprintf(stderr
, "talloc_new() failed\n");
1216 force_rebalance_nodes
= parse_node_list(mem_ctx
, argv
[3]);
1217 if (force_rebalance_nodes
== NULL
) {
1224 ret
= logging_init(mem_ctx
, NULL
, NULL
, "ctdb-takeover");
1227 "ctdb-takeover: Unable to initialize logging\n");
1231 ev
= tevent_context_init(mem_ctx
);
1233 D_ERR("tevent_context_init() failed\n");
1238 ret
= ctdb_client_init(mem_ctx
, ev
, sockpath
, &client
);
1240 D_ERR("ctdb_client_init() failed, ret=%d\n", ret
);
1244 req
= takeover_send(mem_ctx
, ev
, client
, force_rebalance_nodes
);
1246 D_ERR("takeover_send() failed\n");
1251 if (! tevent_req_poll(req
, ev
)) {
1252 D_ERR("tevent_req_poll() failed\n");
1257 takeover_recv(req
, &ret
);
1260 D_ERR("takeover run failed, ret=%d\n", ret
);
1264 sys_write_v(write_fd
, &ret
, sizeof(ret
));
1266 talloc_free(mem_ctx
);