2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 #include "system/network.h"
30 #include "system/filesys.h"
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "protocol/protocol_util.h"
46 #include "client/client.h"
48 #include "common/logging.h"
50 #include "server/ipalloc.h"
52 static int takeover_timeout
= 9;
54 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
60 static bool generic_recv(struct tevent_req
*req
, int *perr
)
64 if (tevent_req_is_unix_error(req
, &err
)) {
74 static enum ipalloc_algorithm
75 determine_algorithm(const struct ctdb_tunable_list
*tunables
)
77 switch (tunables
->ip_alloc_algorithm
) {
79 return IPALLOC_DETERMINISTIC
;
81 return IPALLOC_NONDETERMINISTIC
;
89 /**********************************************************************/
91 struct get_public_ips_state
{
94 struct ctdb_public_ip_list
*ips
;
95 uint32_t *ban_credits
;
98 static void get_public_ips_done(struct tevent_req
*subreq
);
100 static struct tevent_req
*get_public_ips_send(
102 struct tevent_context
*ev
,
103 struct ctdb_client_context
*client
,
105 int count
, int num_nodes
,
106 uint32_t *ban_credits
,
109 struct tevent_req
*req
, *subreq
;
110 struct get_public_ips_state
*state
;
111 struct ctdb_req_control request
;
113 req
= tevent_req_create(mem_ctx
, &state
, struct get_public_ips_state
);
119 state
->count
= count
;
120 state
->ban_credits
= ban_credits
;
122 state
->ips
= talloc_zero_array(state
,
123 struct ctdb_public_ip_list
,
125 if (tevent_req_nomem(state
->ips
, req
)) {
126 return tevent_req_post(req
, ev
);
129 /* Short circuit if no nodes being asked for IPs */
130 if (state
->count
== 0) {
131 tevent_req_done(req
);
132 return tevent_req_post(req
, ev
);
135 ctdb_req_control_get_public_ips(&request
, available_only
);
136 subreq
= ctdb_client_control_multi_send(mem_ctx
, ev
, client
,
139 TIMEOUT(), &request
);
140 if (tevent_req_nomem(subreq
, req
)) {
141 return tevent_req_post(req
, ev
);
143 tevent_req_set_callback(subreq
, get_public_ips_done
, req
);
148 static void get_public_ips_done(struct tevent_req
*subreq
)
150 struct tevent_req
*req
= tevent_req_callback_data(
151 subreq
, struct tevent_req
);
152 struct get_public_ips_state
*state
= tevent_req_data(
153 req
, struct get_public_ips_state
);
154 struct ctdb_reply_control
**reply
;
157 bool status
, found_errors
;
159 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
, &err_list
,
163 found_errors
= false;
164 for (i
= 0; i
< state
->count
; i
++) {
165 if (err_list
[i
] != 0) {
166 uint32_t pnn
= state
->pnns
[i
];
168 D_ERR("control GET_PUBLIC_IPS failed on "
169 "node %u, ret=%d\n", pnn
, err_list
[i
]);
171 state
->ban_credits
[pnn
]++;
176 tevent_req_error(req
, ret
);
180 found_errors
= false;
181 for (i
= 0; i
< state
->count
; i
++) {
183 struct ctdb_public_ip_list
*ips
;
185 pnn
= state
->pnns
[i
];
186 ret
= ctdb_reply_control_get_public_ips(reply
[i
], state
->ips
,
189 D_ERR("control GET_PUBLIC_IPS failed on "
191 state
->ban_credits
[pnn
]++;
196 D_INFO("Fetched public IPs from node %u\n", pnn
);
197 state
->ips
[pnn
] = *ips
;
201 tevent_req_error(req
, EIO
);
207 tevent_req_done(req
);
210 static bool get_public_ips_recv(struct tevent_req
*req
, int *perr
,
212 struct ctdb_public_ip_list
**ips
)
214 struct get_public_ips_state
*state
= tevent_req_data(
215 req
, struct get_public_ips_state
);
218 if (tevent_req_is_unix_error(req
, &err
)) {
225 *ips
= talloc_steal(mem_ctx
, state
->ips
);
230 /**********************************************************************/
232 struct release_ip_state
{
237 uint32_t *ban_credits
;
240 struct release_ip_one_state
{
241 struct tevent_req
*req
;
247 static void release_ip_done(struct tevent_req
*subreq
);
249 static struct tevent_req
*release_ip_send(TALLOC_CTX
*mem_ctx
,
250 struct tevent_context
*ev
,
251 struct ctdb_client_context
*client
,
254 struct timeval timeout
,
255 struct public_ip_list
*all_ips
,
256 uint32_t *ban_credits
)
258 struct tevent_req
*req
, *subreq
;
259 struct release_ip_state
*state
;
260 struct ctdb_req_control request
;
261 struct public_ip_list
*tmp_ip
;
263 req
= tevent_req_create(mem_ctx
, &state
, struct release_ip_state
);
269 state
->num_replies
= 0;
270 state
->num_fails
= 0;
271 state
->ban_credits
= ban_credits
;
273 /* Send a RELEASE_IP to all nodes that should not be hosting
274 * each IP. For each IP, all but one of these will be
275 * redundant. However, the redundant ones are used to tell
276 * nodes which node should be hosting the IP so that commands
277 * like "ctdb ip" can display a particular nodes idea of who
278 * is hosting what. */
279 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
280 struct release_ip_one_state
*substate
;
281 struct ctdb_public_ip ip
;
284 substate
= talloc_zero(state
, struct release_ip_one_state
);
285 if (tevent_req_nomem(substate
, req
)) {
286 return tevent_req_post(req
, ev
);
289 substate
->pnns
= talloc_zero_array(substate
, uint32_t, count
);
290 if (tevent_req_nomem(substate
->pnns
, req
)) {
291 return tevent_req_post(req
, ev
);
297 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
300 if (tevent_req_nomem(substate
->ip_str
, req
)) {
301 return tevent_req_post(req
, ev
);
304 for (i
= 0; i
< count
; i
++) {
305 uint32_t pnn
= pnns
[i
];
307 /* Skip this node if IP is not known */
308 if (! bitmap_query(tmp_ip
->known_on
, pnn
)) {
312 /* If pnn is not the node that should be
313 * hosting the IP then add it to the list of
314 * nodes that need to do a release. */
315 if (tmp_ip
->pnn
!= pnn
) {
316 substate
->pnns
[substate
->count
] = pnn
;
321 if (substate
->count
== 0) {
322 /* No releases to send for this address... */
323 TALLOC_FREE(substate
);
327 ip
.pnn
= tmp_ip
->pnn
;
328 ip
.addr
= tmp_ip
->addr
;
329 ctdb_req_control_release_ip(&request
, &ip
);
330 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
333 timeout
,/* cumulative */
335 if (tevent_req_nomem(subreq
, req
)) {
336 return tevent_req_post(req
, ev
);
338 tevent_req_set_callback(subreq
, release_ip_done
, substate
);
343 /* None sent, finished... */
344 if (state
->num_sent
== 0) {
345 tevent_req_done(req
);
346 return tevent_req_post(req
, ev
);
352 static void release_ip_done(struct tevent_req
*subreq
)
354 struct release_ip_one_state
*substate
= tevent_req_callback_data(
355 subreq
, struct release_ip_one_state
);
356 struct tevent_req
*req
= substate
->req
;
357 struct release_ip_state
*state
= tevent_req_data(
358 req
, struct release_ip_state
);
361 bool status
, found_errors
;
363 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
368 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
369 substate
->ip_str
, substate
->count
);
373 /* Get some clear error messages out of err_list and count
376 found_errors
= false;
377 for (i
= 0; i
< substate
->count
; i
++) {
378 int err
= err_list
[i
];
380 uint32_t pnn
= substate
->pnns
[i
];
382 D_ERR("RELEASE_IP %s failed on node %u, "
383 "ret=%d\n", substate
->ip_str
, pnn
, err
);
385 state
->ban_credits
[pnn
]++;
386 state
->err_any
= err
;
390 if (! found_errors
) {
391 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
392 substate
->ip_str
, ret
);
393 state
->err_any
= EIO
;
399 talloc_free(substate
);
401 state
->num_replies
++;
403 if (state
->num_replies
< state
->num_sent
) {
404 /* Not all replies received, don't go further */
408 if (state
->num_fails
> 0) {
409 tevent_req_error(req
, state
->err_any
);
413 tevent_req_done(req
);
416 static bool release_ip_recv(struct tevent_req
*req
, int *perr
)
418 return generic_recv(req
, perr
);
421 /**********************************************************************/
423 struct take_ip_state
{
428 uint32_t *ban_credits
;
431 struct take_ip_one_state
{
432 struct tevent_req
*req
;
437 static void take_ip_done(struct tevent_req
*subreq
);
439 static struct tevent_req
*take_ip_send(TALLOC_CTX
*mem_ctx
,
440 struct tevent_context
*ev
,
441 struct ctdb_client_context
*client
,
442 struct timeval timeout
,
443 struct public_ip_list
*all_ips
,
444 uint32_t *ban_credits
)
446 struct tevent_req
*req
, *subreq
;
447 struct take_ip_state
*state
;
448 struct ctdb_req_control request
;
449 struct public_ip_list
*tmp_ip
;
451 req
= tevent_req_create(mem_ctx
, &state
, struct take_ip_state
);
457 state
->num_replies
= 0;
458 state
->num_fails
= 0;
459 state
->ban_credits
= ban_credits
;
461 /* For each IP, send a TAKOVER_IP to the node that should be
462 * hosting it. Many of these will often be redundant (since
463 * the allocation won't have changed) but they can be useful
464 * to recover from inconsistencies. */
465 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
466 struct take_ip_one_state
*substate
;
467 struct ctdb_public_ip ip
;
469 if (tmp_ip
->pnn
== -1) {
470 /* IP will be unassigned */
474 substate
= talloc_zero(state
, struct take_ip_one_state
);
475 if (tevent_req_nomem(substate
, req
)) {
476 return tevent_req_post(req
, ev
);
480 substate
->pnn
= tmp_ip
->pnn
;
482 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
485 if (tevent_req_nomem(substate
->ip_str
, req
)) {
486 return tevent_req_post(req
, ev
);
489 ip
.pnn
= tmp_ip
->pnn
;
490 ip
.addr
= tmp_ip
->addr
;
491 ctdb_req_control_takeover_ip(&request
, &ip
);
492 subreq
= ctdb_client_control_send(
493 state
, ev
, client
, tmp_ip
->pnn
,
494 timeout
, /* cumulative */
496 if (tevent_req_nomem(subreq
, req
)) {
497 return tevent_req_post(req
, ev
);
499 tevent_req_set_callback(subreq
, take_ip_done
, substate
);
504 /* None sent, finished... */
505 if (state
->num_sent
== 0) {
506 tevent_req_done(req
);
507 return tevent_req_post(req
, ev
);
513 static void take_ip_done(struct tevent_req
*subreq
)
515 struct take_ip_one_state
*substate
= tevent_req_callback_data(
516 subreq
, struct take_ip_one_state
);
517 struct tevent_req
*req
= substate
->req
;
518 struct ctdb_reply_control
*reply
;
519 struct take_ip_state
*state
= tevent_req_data(
520 req
, struct take_ip_state
);
524 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
528 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
529 substate
->ip_str
, substate
->pnn
, ret
);
533 ret
= ctdb_reply_control_takeover_ip(reply
);
535 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
536 substate
->ip_str
, substate
->pnn
, ret
);
540 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
541 substate
->ip_str
, substate
->pnn
);
545 state
->ban_credits
[substate
->pnn
]++;
547 state
->err_any
= ret
;
550 talloc_free(substate
);
552 state
->num_replies
++;
554 if (state
->num_replies
< state
->num_sent
) {
555 /* Not all replies received, don't go further */
559 if (state
->num_fails
> 0) {
560 tevent_req_error(req
, state
->err_any
);
564 tevent_req_done(req
);
567 static bool take_ip_recv(struct tevent_req
*req
, int *perr
)
569 return generic_recv(req
, perr
);
572 /**********************************************************************/
574 struct ipreallocated_state
{
577 uint32_t *ban_credits
;
580 static void ipreallocated_done(struct tevent_req
*subreq
);
582 static struct tevent_req
*ipreallocated_send(TALLOC_CTX
*mem_ctx
,
583 struct tevent_context
*ev
,
584 struct ctdb_client_context
*client
,
587 struct timeval timeout
,
588 uint32_t *ban_credits
)
590 struct tevent_req
*req
, *subreq
;
591 struct ipreallocated_state
*state
;
592 struct ctdb_req_control request
;
594 req
= tevent_req_create(mem_ctx
, &state
, struct ipreallocated_state
);
600 state
->count
= count
;
601 state
->ban_credits
= ban_credits
;
603 ctdb_req_control_ipreallocated(&request
);
604 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
606 timeout
, /* cumulative */
608 if (tevent_req_nomem(subreq
, req
)) {
609 return tevent_req_post(req
, ev
);
611 tevent_req_set_callback(subreq
, ipreallocated_done
, req
);
616 static void ipreallocated_done(struct tevent_req
*subreq
)
618 struct tevent_req
*req
= tevent_req_callback_data(
619 subreq
, struct tevent_req
);
620 struct ipreallocated_state
*state
= tevent_req_data(
621 req
, struct ipreallocated_state
);
622 int *err_list
= NULL
;
624 bool status
, found_errors
;
626 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
631 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state
->count
);
632 tevent_req_done(req
);
636 /* Get some clear error messages out of err_list and count
639 found_errors
= false;
640 for (i
= 0; i
< state
->count
; i
++) {
641 int err
= err_list
[i
];
643 uint32_t pnn
= state
->pnns
[i
];
645 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
648 state
->ban_credits
[pnn
]++;
653 if (! found_errors
) {
654 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret
);
657 tevent_req_error(req
, ret
);
660 static bool ipreallocated_recv(struct tevent_req
*req
, int *perr
)
662 return generic_recv(req
, perr
);
665 /**********************************************************************/
668 * Recalculate the allocation of public IPs to nodes and have the
669 * nodes host their allocated addresses.
673 * - Initialise IP allocation state. Pass:
674 * + algorithm to be used;
675 * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
676 * + list of nodes to force rebalance (internal structure, currently
677 * no way to fetch, only used by LCP2 for nodes that have had new
678 * IP addresses added).
679 * - Set IP flags for IP allocation based on node map
680 * - Retrieve known and available IP addresses (done separately so
681 * values can be faked in unit testing)
682 * - Use ipalloc_set_public_ips() to set known and available IP
683 * addresses for allocation
684 * - If cluster can't host IP addresses then jump to IPREALLOCATED
685 * - Run IP allocation algorithm
686 * - Send RELEASE_IP to all nodes for IPs they should not host
687 * - Send TAKE_IP to all nodes for IPs they should host
688 * - Send IPREALLOCATED to all nodes
691 struct takeover_state
{
692 struct tevent_context
*ev
;
693 struct ctdb_client_context
*client
;
694 struct timeval timeout
;
696 uint32_t *pnns_connected
;
698 uint32_t *pnns_active
;
701 uint32_t *force_rebalance_nodes
;
702 struct ctdb_tunable_list
*tun_list
;
703 struct ipalloc_state
*ipalloc_state
;
704 struct ctdb_public_ip_list
*known_ips
;
705 struct public_ip_list
*all_ips
;
706 uint32_t *ban_credits
;
709 static void takeover_tunables_done(struct tevent_req
*subreq
);
710 static void takeover_nodemap_done(struct tevent_req
*subreq
);
711 static void takeover_known_ips_done(struct tevent_req
*subreq
);
712 static void takeover_avail_ips_done(struct tevent_req
*subreq
);
713 static void takeover_release_ip_done(struct tevent_req
*subreq
);
714 static void takeover_take_ip_done(struct tevent_req
*subreq
);
715 static void takeover_ipreallocated(struct tevent_req
*req
);
716 static void takeover_ipreallocated_done(struct tevent_req
*subreq
);
717 static void takeover_failed(struct tevent_req
*subreq
, int ret
);
718 static void takeover_failed_done(struct tevent_req
*subreq
);
720 static struct tevent_req
*takeover_send(TALLOC_CTX
*mem_ctx
,
721 struct tevent_context
*ev
,
722 struct ctdb_client_context
*client
,
723 uint32_t *force_rebalance_nodes
)
725 struct tevent_req
*req
, *subreq
;
726 struct takeover_state
*state
;
727 struct ctdb_req_control request
;
729 req
= tevent_req_create(mem_ctx
, &state
, struct takeover_state
);
735 state
->client
= client
;
736 state
->force_rebalance_nodes
= force_rebalance_nodes
;
737 state
->destnode
= ctdb_client_pnn(client
);
739 ctdb_req_control_get_all_tunables(&request
);
740 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
741 state
->destnode
, TIMEOUT(),
743 if (tevent_req_nomem(subreq
, req
)) {
744 return tevent_req_post(req
, ev
);
746 tevent_req_set_callback(subreq
, takeover_tunables_done
, req
);
751 static void takeover_tunables_done(struct tevent_req
*subreq
)
753 struct tevent_req
*req
= tevent_req_callback_data(
754 subreq
, struct tevent_req
);
755 struct takeover_state
*state
= tevent_req_data(
756 req
, struct takeover_state
);
757 struct ctdb_reply_control
*reply
;
758 struct ctdb_req_control request
;
762 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
765 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
766 tevent_req_error(req
, ret
);
770 ret
= ctdb_reply_control_get_all_tunables(reply
, state
,
773 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
774 tevent_req_error(req
, ret
);
780 takeover_timeout
= state
->tun_list
->takeover_timeout
;
782 ctdb_req_control_get_nodemap(&request
);
783 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
784 state
->destnode
, TIMEOUT(),
786 if (tevent_req_nomem(subreq
, req
)) {
789 tevent_req_set_callback(subreq
, takeover_nodemap_done
, req
);
792 static void takeover_nodemap_done(struct tevent_req
*subreq
)
794 struct tevent_req
*req
= tevent_req_callback_data(
795 subreq
, struct tevent_req
);
796 struct takeover_state
*state
= tevent_req_data(
797 req
, struct takeover_state
);
798 struct ctdb_reply_control
*reply
;
801 struct ctdb_node_map
*nodemap
;
803 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
806 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
807 state
->destnode
, ret
);
808 tevent_req_error(req
, ret
);
812 ret
= ctdb_reply_control_get_nodemap(reply
, state
, &nodemap
);
814 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret
);
815 tevent_req_error(req
, ret
);
819 state
->num_nodes
= nodemap
->num
;
821 state
->num_connected
= list_of_connected_nodes(nodemap
,
822 CTDB_UNKNOWN_PNN
, state
,
823 &state
->pnns_connected
);
824 if (state
->num_connected
<= 0) {
825 tevent_req_error(req
, ENOMEM
);
829 state
->num_active
= list_of_active_nodes(nodemap
,
830 CTDB_UNKNOWN_PNN
, state
,
831 &state
->pnns_active
);
832 if (state
->num_active
<= 0) {
833 tevent_req_error(req
, ENOMEM
);
837 /* Default timeout for early jump to IPREALLOCATED. See below
838 * for explanation of 3 times...
840 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
842 state
->ban_credits
= talloc_zero_array(state
, uint32_t,
844 if (tevent_req_nomem(state
->ban_credits
, req
)) {
848 if (state
->tun_list
->disable_ip_failover
!= 0) {
849 /* IP failover is completely disabled so just send out
850 * ipreallocated event.
852 takeover_ipreallocated(req
);
856 state
->ipalloc_state
=
858 state
, state
->num_nodes
,
859 determine_algorithm(state
->tun_list
),
860 (state
->tun_list
->no_ip_takeover
!= 0),
861 (state
->tun_list
->no_ip_failback
!= 0),
862 (state
->tun_list
->no_ip_host_on_all_disabled
!= 0),
863 state
->force_rebalance_nodes
);
864 if (tevent_req_nomem(state
->ipalloc_state
, req
)) {
868 ipalloc_set_node_flags(state
->ipalloc_state
, nodemap
);
870 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
871 state
->pnns_connected
, state
->num_connected
,
872 state
->num_nodes
, state
->ban_credits
,
874 if (tevent_req_nomem(subreq
, req
)) {
878 tevent_req_set_callback(subreq
, takeover_known_ips_done
, req
);
881 static void takeover_known_ips_done(struct tevent_req
*subreq
)
883 struct tevent_req
*req
= tevent_req_callback_data(
884 subreq
, struct tevent_req
);
885 struct takeover_state
*state
= tevent_req_data(
886 req
, struct takeover_state
);
889 uint32_t *pnns
= NULL
;
892 status
= get_public_ips_recv(subreq
, &ret
, state
, &state
->known_ips
);
896 D_ERR("Failed to fetch known public IPs\n");
897 takeover_failed(req
, ret
);
901 /* Get available IPs from active nodes that actually have known IPs */
903 pnns
= talloc_zero_array(state
, uint32_t, state
->num_active
);
904 if (tevent_req_nomem(pnns
, req
)) {
909 for (i
= 0; i
< state
->num_active
; i
++) {
910 uint32_t pnn
= state
->pnns_active
[i
];
912 /* If pnn has IPs then fetch available IPs from it */
913 if (state
->known_ips
[pnn
].num
> 0) {
919 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
921 state
->num_nodes
, state
->ban_credits
,
923 if (tevent_req_nomem(subreq
, req
)) {
927 tevent_req_set_callback(subreq
, takeover_avail_ips_done
, req
);
930 static void takeover_avail_ips_done(struct tevent_req
*subreq
)
932 struct tevent_req
*req
= tevent_req_callback_data(
933 subreq
, struct tevent_req
);
934 struct takeover_state
*state
= tevent_req_data(
935 req
, struct takeover_state
);
938 struct ctdb_public_ip_list
*available_ips
;
940 status
= get_public_ips_recv(subreq
, &ret
, state
, &available_ips
);
944 D_ERR("Failed to fetch available public IPs\n");
945 takeover_failed(req
, ret
);
949 ipalloc_set_public_ips(state
->ipalloc_state
,
950 state
->known_ips
, available_ips
);
952 if (! ipalloc_can_host_ips(state
->ipalloc_state
)) {
953 D_NOTICE("No nodes available to host public IPs yet\n");
954 takeover_ipreallocated(req
);
958 /* Do the IP reassignment calculations */
959 state
->all_ips
= ipalloc(state
->ipalloc_state
);
960 if (tevent_req_nomem(state
->all_ips
, req
)) {
964 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
965 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
966 * seconds. However, RELEASE_IP can take longer due to TCP
967 * connection killing, so sometimes needs more time.
968 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
969 * seconds across all 3 stages. No explicit expiry checks are
970 * needed before each stage because tevent is smart enough to
971 * fire the timeouts even if they are in the past. Initialise
972 * this here so it explicitly covers the stages we're
973 * interested in but, in particular, not the time taken by the
976 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
978 subreq
= release_ip_send(state
, state
->ev
, state
->client
,
979 state
->pnns_connected
, state
->num_connected
,
980 state
->timeout
, state
->all_ips
,
982 if (tevent_req_nomem(subreq
, req
)) {
985 tevent_req_set_callback(subreq
, takeover_release_ip_done
, req
);
988 static void takeover_release_ip_done(struct tevent_req
*subreq
)
990 struct tevent_req
*req
= tevent_req_callback_data(
991 subreq
, struct tevent_req
);
992 struct takeover_state
*state
= tevent_req_data(
993 req
, struct takeover_state
);
997 status
= release_ip_recv(subreq
, &ret
);
1001 takeover_failed(req
, ret
);
1005 /* All released, now for takeovers */
1007 subreq
= take_ip_send(state
, state
->ev
, state
->client
,
1008 state
->timeout
, state
->all_ips
,
1009 state
->ban_credits
);
1010 if (tevent_req_nomem(subreq
, req
)) {
1013 tevent_req_set_callback(subreq
, takeover_take_ip_done
, req
);
1016 static void takeover_take_ip_done(struct tevent_req
*subreq
)
1018 struct tevent_req
*req
= tevent_req_callback_data(
1019 subreq
, struct tevent_req
);
1023 status
= take_ip_recv(subreq
, &ret
);
1024 TALLOC_FREE(subreq
);
1027 takeover_failed(req
, ret
);
1031 takeover_ipreallocated(req
);
1034 static void takeover_ipreallocated(struct tevent_req
*req
)
1036 struct takeover_state
*state
= tevent_req_data(
1037 req
, struct takeover_state
);
1038 struct tevent_req
*subreq
;
1040 subreq
= ipreallocated_send(state
, state
->ev
, state
->client
,
1041 state
->pnns_connected
,
1042 state
->num_connected
,
1044 state
->ban_credits
);
1045 if (tevent_req_nomem(subreq
, req
)) {
1048 tevent_req_set_callback(subreq
, takeover_ipreallocated_done
, req
);
1051 static void takeover_ipreallocated_done(struct tevent_req
*subreq
)
1053 struct tevent_req
*req
= tevent_req_callback_data(
1054 subreq
, struct tevent_req
);
1058 status
= ipreallocated_recv(subreq
, &ret
);
1059 TALLOC_FREE(subreq
);
1062 takeover_failed(req
, ret
);
1066 tevent_req_done(req
);
1069 struct takeover_failed_state
{
1070 struct tevent_req
*req
;
1074 void takeover_failed(struct tevent_req
*req
, int ret
)
1076 struct takeover_state
*state
= tevent_req_data(
1077 req
, struct takeover_state
);
1078 struct tevent_req
*subreq
;
1079 uint32_t max_pnn
= CTDB_UNKNOWN_PNN
;
1080 int max_credits
= 0;
1083 /* Check that bans are enabled */
1084 if (state
->tun_list
->enable_bans
== 0) {
1085 tevent_req_error(req
, ret
);
1089 for (pnn
= 0; pnn
< state
->num_nodes
; pnn
++) {
1090 if (state
->ban_credits
[pnn
] > max_credits
) {
1092 max_credits
= state
->ban_credits
[pnn
];
1096 if (max_credits
> 0) {
1097 struct ctdb_req_message message
;
1098 struct takeover_failed_state
*substate
;
1100 D_WARNING("Assigning banning credits to node %u\n", max_pnn
);
1102 substate
= talloc_zero(state
, struct takeover_failed_state
);
1103 if (tevent_req_nomem(substate
, req
)) {
1106 substate
->req
= req
;
1107 substate
->ret
= ret
;
1109 message
.srvid
= CTDB_SRVID_BANNING
;
1110 message
.data
.pnn
= max_pnn
;
1112 subreq
= ctdb_client_message_send(
1113 state
, state
->ev
, state
->client
,
1114 ctdb_client_pnn(state
->client
),
1116 if (subreq
== NULL
) {
1117 D_ERR("failed to assign banning credits\n");
1118 tevent_req_error(req
, ret
);
1121 tevent_req_set_callback(subreq
, takeover_failed_done
, substate
);
1123 tevent_req_error(req
, ret
);
1127 static void takeover_failed_done(struct tevent_req
*subreq
)
1129 struct takeover_failed_state
*substate
= tevent_req_callback_data(
1130 subreq
, struct takeover_failed_state
);
1131 struct tevent_req
*req
= substate
->req
;
1135 status
= ctdb_client_message_recv(subreq
, &ret
);
1136 TALLOC_FREE(subreq
);
1138 D_ERR("failed to assign banning credits, ret=%d\n", ret
);
1141 ret
= substate
->ret
;
1142 talloc_free(substate
);
1143 tevent_req_error(req
, ret
);
1146 static void takeover_recv(struct tevent_req
*req
, int *perr
)
1148 generic_recv(req
, perr
);
1151 static uint32_t *parse_node_list(TALLOC_CTX
*mem_ctx
, const char* s
)
1158 ret
= strv_split(mem_ctx
, &strv
, s
, ",");
1160 D_ERR("out of memory\n");
1164 num
= strv_count(strv
);
1166 nodes
= talloc_array(mem_ctx
, uint32_t, num
);
1167 if (nodes
== NULL
) {
1168 D_ERR("out of memory\n");
1173 for (i
= 0; i
< num
; i
++) {
1174 t
= strv_next(strv
, t
);
1181 static void usage(const char *progname
)
1184 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1185 "[<force-rebalance-nodes>]\n",
1190 * Arguments - write fd, socket path
1192 int main(int argc
, const char *argv
[])
1195 const char *sockpath
;
1196 TALLOC_CTX
*mem_ctx
;
1197 struct tevent_context
*ev
;
1198 struct ctdb_client_context
*client
;
1200 struct tevent_req
*req
;
1201 uint32_t *force_rebalance_nodes
= NULL
;
1203 if (argc
< 3 || argc
> 4) {
1208 write_fd
= atoi(argv
[1]);
1211 mem_ctx
= talloc_new(NULL
);
1212 if (mem_ctx
== NULL
) {
1213 fprintf(stderr
, "talloc_new() failed\n");
1219 force_rebalance_nodes
= parse_node_list(mem_ctx
, argv
[3]);
1220 if (force_rebalance_nodes
== NULL
) {
1227 ret
= logging_init(mem_ctx
, NULL
, NULL
, "ctdb-takeover");
1230 "ctdb-takeover: Unable to initialize logging\n");
1234 ev
= tevent_context_init(mem_ctx
);
1236 D_ERR("tevent_context_init() failed\n");
1241 ret
= ctdb_client_init(mem_ctx
, ev
, sockpath
, &client
);
1243 D_ERR("ctdb_client_init() failed, ret=%d\n", ret
);
1247 req
= takeover_send(mem_ctx
, ev
, client
, force_rebalance_nodes
);
1249 D_ERR("takeover_send() failed\n");
1254 if (! tevent_req_poll(req
, ev
)) {
1255 D_ERR("tevent_req_poll() failed\n");
1260 takeover_recv(req
, &ret
);
1263 D_ERR("takeover run failed, ret=%d\n", ret
);
1267 sys_write_v(write_fd
, &ret
, sizeof(ret
));
1269 talloc_free(mem_ctx
);