2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 #include "system/network.h"
30 #include "system/filesys.h"
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "protocol/protocol_util.h"
46 #include "client/client.h"
48 #include "common/logging.h"
50 #include "server/ipalloc.h"
52 static int takeover_timeout
= 9;
54 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
60 static bool generic_recv(struct tevent_req
*req
, int *perr
)
64 if (tevent_req_is_unix_error(req
, &err
)) {
74 static enum ipalloc_algorithm
75 determine_algorithm(const struct ctdb_tunable_list
*tunables
)
77 switch (tunables
->ip_alloc_algorithm
) {
79 return IPALLOC_DETERMINISTIC
;
81 return IPALLOC_NONDETERMINISTIC
;
89 /**********************************************************************/
91 struct get_public_ips_state
{
94 struct ctdb_public_ip_list
*ips
;
95 uint32_t *ban_credits
;
98 static void get_public_ips_done(struct tevent_req
*subreq
);
100 static struct tevent_req
*get_public_ips_send(
102 struct tevent_context
*ev
,
103 struct ctdb_client_context
*client
,
105 int count
, int num_nodes
,
106 uint32_t *ban_credits
,
109 struct tevent_req
*req
, *subreq
;
110 struct get_public_ips_state
*state
;
111 struct ctdb_req_control request
;
113 req
= tevent_req_create(mem_ctx
, &state
, struct get_public_ips_state
);
119 state
->count
= count
;
120 state
->ban_credits
= ban_credits
;
122 state
->ips
= talloc_zero_array(state
,
123 struct ctdb_public_ip_list
,
125 if (tevent_req_nomem(state
->ips
, req
)) {
126 return tevent_req_post(req
, ev
);
129 /* Short circuit if no nodes being asked for IPs */
130 if (state
->count
== 0) {
131 tevent_req_done(req
);
132 return tevent_req_post(req
, ev
);
135 ctdb_req_control_get_public_ips(&request
, available_only
);
136 subreq
= ctdb_client_control_multi_send(mem_ctx
, ev
, client
,
139 TIMEOUT(), &request
);
140 if (tevent_req_nomem(subreq
, req
)) {
141 return tevent_req_post(req
, ev
);
143 tevent_req_set_callback(subreq
, get_public_ips_done
, req
);
148 static void get_public_ips_done(struct tevent_req
*subreq
)
150 struct tevent_req
*req
= tevent_req_callback_data(
151 subreq
, struct tevent_req
);
152 struct get_public_ips_state
*state
= tevent_req_data(
153 req
, struct get_public_ips_state
);
154 struct ctdb_reply_control
**reply
;
157 bool status
, found_errors
;
159 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
, &err_list
,
163 found_errors
= false;
164 for (i
= 0; i
< state
->count
; i
++) {
165 if (err_list
[i
] != 0) {
166 uint32_t pnn
= state
->pnns
[i
];
168 D_ERR("control GET_PUBLIC_IPS failed on "
169 "node %u, ret=%d\n", pnn
, err_list
[i
]);
171 state
->ban_credits
[pnn
]++;
176 tevent_req_error(req
, ret
);
180 found_errors
= false;
181 for (i
= 0; i
< state
->count
; i
++) {
183 struct ctdb_public_ip_list
*ips
;
185 pnn
= state
->pnns
[i
];
186 ret
= ctdb_reply_control_get_public_ips(reply
[i
], state
->ips
,
189 D_ERR("control GET_PUBLIC_IPS failed on "
191 state
->ban_credits
[pnn
]++;
196 D_INFO("Fetched public IPs from node %u\n", pnn
);
197 state
->ips
[pnn
] = *ips
;
201 tevent_req_error(req
, EIO
);
207 tevent_req_done(req
);
210 static bool get_public_ips_recv(struct tevent_req
*req
, int *perr
,
212 struct ctdb_public_ip_list
**ips
)
214 struct get_public_ips_state
*state
= tevent_req_data(
215 req
, struct get_public_ips_state
);
218 if (tevent_req_is_unix_error(req
, &err
)) {
225 *ips
= talloc_steal(mem_ctx
, state
->ips
);
230 /**********************************************************************/
232 struct release_ip_state
{
237 uint32_t *ban_credits
;
240 struct release_ip_one_state
{
241 struct tevent_req
*req
;
247 static void release_ip_done(struct tevent_req
*subreq
);
249 static struct tevent_req
*release_ip_send(TALLOC_CTX
*mem_ctx
,
250 struct tevent_context
*ev
,
251 struct ctdb_client_context
*client
,
254 struct timeval timeout
,
255 struct public_ip_list
*all_ips
,
256 uint32_t *ban_credits
)
258 struct tevent_req
*req
, *subreq
;
259 struct release_ip_state
*state
;
260 struct ctdb_req_control request
;
261 struct public_ip_list
*tmp_ip
;
263 req
= tevent_req_create(mem_ctx
, &state
, struct release_ip_state
);
269 state
->num_replies
= 0;
270 state
->num_fails
= 0;
271 state
->ban_credits
= ban_credits
;
273 /* Send a RELEASE_IP to all nodes that should not be hosting
274 * each IP. For each IP, all but one of these will be
275 * redundant. However, the redundant ones are used to tell
276 * nodes which node should be hosting the IP so that commands
277 * like "ctdb ip" can display a particular nodes idea of who
278 * is hosting what. */
279 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
280 struct release_ip_one_state
*substate
;
281 struct ctdb_public_ip ip
;
284 substate
= talloc_zero(state
, struct release_ip_one_state
);
285 if (tevent_req_nomem(substate
, req
)) {
286 return tevent_req_post(req
, ev
);
289 substate
->pnns
= talloc_zero_array(substate
, uint32_t, count
);
290 if (tevent_req_nomem(substate
->pnns
, req
)) {
291 return tevent_req_post(req
, ev
);
297 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
300 if (tevent_req_nomem(substate
->ip_str
, req
)) {
301 return tevent_req_post(req
, ev
);
304 for (i
= 0; i
< count
; i
++) {
305 uint32_t pnn
= pnns
[i
];
307 /* Skip this node if IP is not known */
308 if (! bitmap_query(tmp_ip
->known_on
, pnn
)) {
312 /* If pnn is not the node that should be
313 * hosting the IP then add it to the list of
314 * nodes that need to do a release. */
315 if (tmp_ip
->pnn
!= pnn
) {
316 substate
->pnns
[substate
->count
] = pnn
;
321 if (substate
->count
== 0) {
322 /* No releases to send for this address... */
323 TALLOC_FREE(substate
);
327 ip
.pnn
= tmp_ip
->pnn
;
328 ip
.addr
= tmp_ip
->addr
;
329 ctdb_req_control_release_ip(&request
, &ip
);
330 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
333 timeout
,/* cumulative */
335 if (tevent_req_nomem(subreq
, req
)) {
336 return tevent_req_post(req
, ev
);
338 tevent_req_set_callback(subreq
, release_ip_done
, substate
);
343 /* None sent, finished... */
344 if (state
->num_sent
== 0) {
345 tevent_req_done(req
);
346 return tevent_req_post(req
, ev
);
352 static void release_ip_done(struct tevent_req
*subreq
)
354 struct release_ip_one_state
*substate
= tevent_req_callback_data(
355 subreq
, struct release_ip_one_state
);
356 struct tevent_req
*req
= substate
->req
;
357 struct release_ip_state
*state
= tevent_req_data(
358 req
, struct release_ip_state
);
361 bool status
, found_errors
;
363 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
368 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
369 substate
->ip_str
, substate
->count
);
373 /* Get some clear error messages out of err_list and count
376 found_errors
= false;
377 for (i
= 0; i
< substate
->count
; i
++) {
378 int err
= err_list
[i
];
380 uint32_t pnn
= substate
->pnns
[i
];
382 D_ERR("RELEASE_IP %s failed on node %u, "
383 "ret=%d\n", substate
->ip_str
, pnn
, err
);
385 state
->ban_credits
[pnn
]++;
386 state
->err_any
= err
;
390 if (! found_errors
) {
391 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
392 substate
->ip_str
, ret
);
393 state
->err_any
= EIO
;
399 talloc_free(substate
);
401 state
->num_replies
++;
403 if (state
->num_replies
< state
->num_sent
) {
404 /* Not all replies received, don't go further */
408 if (state
->num_fails
> 0) {
409 tevent_req_error(req
, state
->err_any
);
413 tevent_req_done(req
);
416 static bool release_ip_recv(struct tevent_req
*req
, int *perr
)
418 return generic_recv(req
, perr
);
421 /**********************************************************************/
423 struct take_ip_state
{
428 uint32_t *ban_credits
;
431 struct take_ip_one_state
{
432 struct tevent_req
*req
;
437 static void take_ip_done(struct tevent_req
*subreq
);
439 static struct tevent_req
*take_ip_send(TALLOC_CTX
*mem_ctx
,
440 struct tevent_context
*ev
,
441 struct ctdb_client_context
*client
,
442 struct timeval timeout
,
443 struct public_ip_list
*all_ips
,
444 uint32_t *ban_credits
)
446 struct tevent_req
*req
, *subreq
;
447 struct take_ip_state
*state
;
448 struct ctdb_req_control request
;
449 struct public_ip_list
*tmp_ip
;
451 req
= tevent_req_create(mem_ctx
, &state
, struct take_ip_state
);
457 state
->num_replies
= 0;
458 state
->num_fails
= 0;
459 state
->ban_credits
= ban_credits
;
461 /* For each IP, send a TAKOVER_IP to the node that should be
462 * hosting it. Many of these will often be redundant (since
463 * the allocation won't have changed) but they can be useful
464 * to recover from inconsistencies. */
465 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
466 struct take_ip_one_state
*substate
;
467 struct ctdb_public_ip ip
;
469 if (tmp_ip
->pnn
== -1) {
470 /* IP will be unassigned */
474 substate
= talloc_zero(state
, struct take_ip_one_state
);
475 if (tevent_req_nomem(substate
, req
)) {
476 return tevent_req_post(req
, ev
);
480 substate
->pnn
= tmp_ip
->pnn
;
482 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
485 if (tevent_req_nomem(substate
->ip_str
, req
)) {
486 return tevent_req_post(req
, ev
);
489 ip
.pnn
= tmp_ip
->pnn
;
490 ip
.addr
= tmp_ip
->addr
;
491 ctdb_req_control_takeover_ip(&request
, &ip
);
492 subreq
= ctdb_client_control_send(
493 state
, ev
, client
, tmp_ip
->pnn
,
494 timeout
, /* cumulative */
496 if (tevent_req_nomem(subreq
, req
)) {
497 return tevent_req_post(req
, ev
);
499 tevent_req_set_callback(subreq
, take_ip_done
, substate
);
504 /* None sent, finished... */
505 if (state
->num_sent
== 0) {
506 tevent_req_done(req
);
507 return tevent_req_post(req
, ev
);
513 static void take_ip_done(struct tevent_req
*subreq
)
515 struct take_ip_one_state
*substate
= tevent_req_callback_data(
516 subreq
, struct take_ip_one_state
);
517 struct tevent_req
*req
= substate
->req
;
518 struct ctdb_reply_control
*reply
;
519 struct take_ip_state
*state
= tevent_req_data(
520 req
, struct take_ip_state
);
524 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
528 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
529 substate
->ip_str
, substate
->pnn
, ret
);
533 ret
= ctdb_reply_control_takeover_ip(reply
);
535 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
536 substate
->ip_str
, substate
->pnn
, ret
);
540 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
541 substate
->ip_str
, substate
->pnn
);
545 state
->ban_credits
[substate
->pnn
]++;
547 state
->err_any
= ret
;
550 talloc_free(substate
);
552 state
->num_replies
++;
554 if (state
->num_replies
< state
->num_sent
) {
555 /* Not all replies received, don't go further */
559 if (state
->num_fails
> 0) {
560 tevent_req_error(req
, state
->err_any
);
564 tevent_req_done(req
);
567 static bool take_ip_recv(struct tevent_req
*req
, int *perr
)
569 return generic_recv(req
, perr
);
572 /**********************************************************************/
574 struct ipreallocated_state
{
577 uint32_t *ban_credits
;
580 static void ipreallocated_done(struct tevent_req
*subreq
);
582 static struct tevent_req
*ipreallocated_send(TALLOC_CTX
*mem_ctx
,
583 struct tevent_context
*ev
,
584 struct ctdb_client_context
*client
,
587 struct timeval timeout
,
588 uint32_t *ban_credits
)
590 struct tevent_req
*req
, *subreq
;
591 struct ipreallocated_state
*state
;
592 struct ctdb_req_control request
;
594 req
= tevent_req_create(mem_ctx
, &state
, struct ipreallocated_state
);
600 state
->count
= count
;
601 state
->ban_credits
= ban_credits
;
603 ctdb_req_control_ipreallocated(&request
);
604 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
606 timeout
, /* cumulative */
608 if (tevent_req_nomem(subreq
, req
)) {
609 return tevent_req_post(req
, ev
);
611 tevent_req_set_callback(subreq
, ipreallocated_done
, req
);
616 static void ipreallocated_done(struct tevent_req
*subreq
)
618 struct tevent_req
*req
= tevent_req_callback_data(
619 subreq
, struct tevent_req
);
620 struct ipreallocated_state
*state
= tevent_req_data(
621 req
, struct ipreallocated_state
);
622 int *err_list
= NULL
;
624 bool status
, found_errors
;
626 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
631 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state
->count
);
632 tevent_req_done(req
);
636 /* Get some clear error messages out of err_list and count
639 found_errors
= false;
640 for (i
= 0; i
< state
->count
; i
++) {
641 int err
= err_list
[i
];
643 uint32_t pnn
= state
->pnns
[i
];
645 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
648 state
->ban_credits
[pnn
]++;
653 if (! found_errors
) {
654 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret
);
657 tevent_req_error(req
, ret
);
660 static bool ipreallocated_recv(struct tevent_req
*req
, int *perr
)
662 return generic_recv(req
, perr
);
665 /**********************************************************************/
668 * Recalculate the allocation of public IPs to nodes and have the
669 * nodes host their allocated addresses.
673 * - Initialise IP allocation state. Pass:
674 * + algorithm to be used;
675 * + various tunables (NoIPTakeover, NoIPFailback)
676 * + list of nodes to force rebalance (internal structure, currently
677 * no way to fetch, only used by LCP2 for nodes that have had new
678 * IP addresses added).
679 * - Set IP flags for IP allocation based on node map
680 * - Retrieve known and available IP addresses (done separately so
681 * values can be faked in unit testing)
682 * - Use ipalloc_set_public_ips() to set known and available IP
683 * addresses for allocation
684 * - If cluster can't host IP addresses then jump to IPREALLOCATED
685 * - Run IP allocation algorithm
686 * - Send RELEASE_IP to all nodes for IPs they should not host
687 * - Send TAKE_IP to all nodes for IPs they should host
688 * - Send IPREALLOCATED to all nodes
691 struct takeover_state
{
692 struct tevent_context
*ev
;
693 struct ctdb_client_context
*client
;
694 struct timeval timeout
;
696 uint32_t *pnns_connected
;
698 uint32_t *pnns_active
;
701 uint32_t *force_rebalance_nodes
;
702 struct ctdb_tunable_list
*tun_list
;
703 struct ipalloc_state
*ipalloc_state
;
704 struct ctdb_public_ip_list
*known_ips
;
705 struct public_ip_list
*all_ips
;
706 uint32_t *ban_credits
;
709 static void takeover_tunables_done(struct tevent_req
*subreq
);
710 static void takeover_nodemap_done(struct tevent_req
*subreq
);
711 static void takeover_known_ips_done(struct tevent_req
*subreq
);
712 static void takeover_avail_ips_done(struct tevent_req
*subreq
);
713 static void takeover_release_ip_done(struct tevent_req
*subreq
);
714 static void takeover_take_ip_done(struct tevent_req
*subreq
);
715 static void takeover_ipreallocated(struct tevent_req
*req
);
716 static void takeover_ipreallocated_done(struct tevent_req
*subreq
);
717 static void takeover_failed(struct tevent_req
*subreq
, int ret
);
718 static void takeover_failed_done(struct tevent_req
*subreq
);
720 static struct tevent_req
*takeover_send(TALLOC_CTX
*mem_ctx
,
721 struct tevent_context
*ev
,
722 struct ctdb_client_context
*client
,
723 uint32_t *force_rebalance_nodes
)
725 struct tevent_req
*req
, *subreq
;
726 struct takeover_state
*state
;
727 struct ctdb_req_control request
;
729 req
= tevent_req_create(mem_ctx
, &state
, struct takeover_state
);
735 state
->client
= client
;
736 state
->force_rebalance_nodes
= force_rebalance_nodes
;
737 state
->destnode
= ctdb_client_pnn(client
);
739 ctdb_req_control_get_all_tunables(&request
);
740 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
741 state
->destnode
, TIMEOUT(),
743 if (tevent_req_nomem(subreq
, req
)) {
744 return tevent_req_post(req
, ev
);
746 tevent_req_set_callback(subreq
, takeover_tunables_done
, req
);
751 static void takeover_tunables_done(struct tevent_req
*subreq
)
753 struct tevent_req
*req
= tevent_req_callback_data(
754 subreq
, struct tevent_req
);
755 struct takeover_state
*state
= tevent_req_data(
756 req
, struct takeover_state
);
757 struct ctdb_reply_control
*reply
;
758 struct ctdb_req_control request
;
762 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
765 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
766 tevent_req_error(req
, ret
);
770 ret
= ctdb_reply_control_get_all_tunables(reply
, state
,
773 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
774 tevent_req_error(req
, ret
);
780 takeover_timeout
= state
->tun_list
->takeover_timeout
;
782 ctdb_req_control_get_nodemap(&request
);
783 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
784 state
->destnode
, TIMEOUT(),
786 if (tevent_req_nomem(subreq
, req
)) {
789 tevent_req_set_callback(subreq
, takeover_nodemap_done
, req
);
792 static void takeover_nodemap_done(struct tevent_req
*subreq
)
794 struct tevent_req
*req
= tevent_req_callback_data(
795 subreq
, struct tevent_req
);
796 struct takeover_state
*state
= tevent_req_data(
797 req
, struct takeover_state
);
798 struct ctdb_reply_control
*reply
;
801 struct ctdb_node_map
*nodemap
;
804 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
807 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
808 state
->destnode
, ret
);
809 tevent_req_error(req
, ret
);
813 ret
= ctdb_reply_control_get_nodemap(reply
, state
, &nodemap
);
815 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret
);
816 tevent_req_error(req
, ret
);
820 state
->num_nodes
= nodemap
->num
;
822 state
->num_connected
= list_of_connected_nodes(nodemap
,
823 CTDB_UNKNOWN_PNN
, state
,
824 &state
->pnns_connected
);
825 if (state
->num_connected
<= 0) {
826 tevent_req_error(req
, ENOMEM
);
830 state
->num_active
= list_of_active_nodes(nodemap
,
831 CTDB_UNKNOWN_PNN
, state
,
832 &state
->pnns_active
);
833 if (state
->num_active
<= 0) {
834 tevent_req_error(req
, ENOMEM
);
838 /* Default timeout for early jump to IPREALLOCATED. See below
839 * for explanation of 3 times...
841 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
843 state
->ban_credits
= talloc_zero_array(state
, uint32_t,
845 if (tevent_req_nomem(state
->ban_credits
, req
)) {
849 ptr
= getenv("CTDB_DISABLE_IP_FAILOVER");
851 /* IP failover is completely disabled so just send out
852 * ipreallocated event.
854 takeover_ipreallocated(req
);
858 state
->ipalloc_state
=
860 state
, state
->num_nodes
,
861 determine_algorithm(state
->tun_list
),
862 (state
->tun_list
->no_ip_takeover
!= 0),
863 (state
->tun_list
->no_ip_failback
!= 0),
864 state
->force_rebalance_nodes
);
865 if (tevent_req_nomem(state
->ipalloc_state
, req
)) {
869 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
870 state
->pnns_connected
, state
->num_connected
,
871 state
->num_nodes
, state
->ban_credits
,
873 if (tevent_req_nomem(subreq
, req
)) {
877 tevent_req_set_callback(subreq
, takeover_known_ips_done
, req
);
880 static void takeover_known_ips_done(struct tevent_req
*subreq
)
882 struct tevent_req
*req
= tevent_req_callback_data(
883 subreq
, struct tevent_req
);
884 struct takeover_state
*state
= tevent_req_data(
885 req
, struct takeover_state
);
888 uint32_t *pnns
= NULL
;
891 status
= get_public_ips_recv(subreq
, &ret
, state
, &state
->known_ips
);
895 D_ERR("Failed to fetch known public IPs\n");
896 takeover_failed(req
, ret
);
900 /* Get available IPs from active nodes that actually have known IPs */
902 pnns
= talloc_zero_array(state
, uint32_t, state
->num_active
);
903 if (tevent_req_nomem(pnns
, req
)) {
908 for (i
= 0; i
< state
->num_active
; i
++) {
909 uint32_t pnn
= state
->pnns_active
[i
];
911 /* If pnn has IPs then fetch available IPs from it */
912 if (state
->known_ips
[pnn
].num
> 0) {
918 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
920 state
->num_nodes
, state
->ban_credits
,
922 if (tevent_req_nomem(subreq
, req
)) {
926 tevent_req_set_callback(subreq
, takeover_avail_ips_done
, req
);
929 static void takeover_avail_ips_done(struct tevent_req
*subreq
)
931 struct tevent_req
*req
= tevent_req_callback_data(
932 subreq
, struct tevent_req
);
933 struct takeover_state
*state
= tevent_req_data(
934 req
, struct takeover_state
);
937 struct ctdb_public_ip_list
*available_ips
;
939 status
= get_public_ips_recv(subreq
, &ret
, state
, &available_ips
);
943 D_ERR("Failed to fetch available public IPs\n");
944 takeover_failed(req
, ret
);
948 ipalloc_set_public_ips(state
->ipalloc_state
,
949 state
->known_ips
, available_ips
);
951 if (! ipalloc_can_host_ips(state
->ipalloc_state
)) {
952 D_NOTICE("No nodes available to host public IPs yet\n");
953 takeover_ipreallocated(req
);
957 /* Do the IP reassignment calculations */
958 state
->all_ips
= ipalloc(state
->ipalloc_state
);
959 if (tevent_req_nomem(state
->all_ips
, req
)) {
963 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
964 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
965 * seconds. However, RELEASE_IP can take longer due to TCP
966 * connection killing, so sometimes needs more time.
967 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
968 * seconds across all 3 stages. No explicit expiry checks are
969 * needed before each stage because tevent is smart enough to
970 * fire the timeouts even if they are in the past. Initialise
971 * this here so it explicitly covers the stages we're
972 * interested in but, in particular, not the time taken by the
975 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
977 subreq
= release_ip_send(state
, state
->ev
, state
->client
,
978 state
->pnns_connected
, state
->num_connected
,
979 state
->timeout
, state
->all_ips
,
981 if (tevent_req_nomem(subreq
, req
)) {
984 tevent_req_set_callback(subreq
, takeover_release_ip_done
, req
);
987 static void takeover_release_ip_done(struct tevent_req
*subreq
)
989 struct tevent_req
*req
= tevent_req_callback_data(
990 subreq
, struct tevent_req
);
991 struct takeover_state
*state
= tevent_req_data(
992 req
, struct takeover_state
);
996 status
= release_ip_recv(subreq
, &ret
);
1000 takeover_failed(req
, ret
);
1004 /* All released, now for takeovers */
1006 subreq
= take_ip_send(state
, state
->ev
, state
->client
,
1007 state
->timeout
, state
->all_ips
,
1008 state
->ban_credits
);
1009 if (tevent_req_nomem(subreq
, req
)) {
1012 tevent_req_set_callback(subreq
, takeover_take_ip_done
, req
);
1015 static void takeover_take_ip_done(struct tevent_req
*subreq
)
1017 struct tevent_req
*req
= tevent_req_callback_data(
1018 subreq
, struct tevent_req
);
1022 status
= take_ip_recv(subreq
, &ret
);
1023 TALLOC_FREE(subreq
);
1026 takeover_failed(req
, ret
);
1030 takeover_ipreallocated(req
);
1033 static void takeover_ipreallocated(struct tevent_req
*req
)
1035 struct takeover_state
*state
= tevent_req_data(
1036 req
, struct takeover_state
);
1037 struct tevent_req
*subreq
;
1039 subreq
= ipreallocated_send(state
, state
->ev
, state
->client
,
1040 state
->pnns_connected
,
1041 state
->num_connected
,
1043 state
->ban_credits
);
1044 if (tevent_req_nomem(subreq
, req
)) {
1047 tevent_req_set_callback(subreq
, takeover_ipreallocated_done
, req
);
1050 static void takeover_ipreallocated_done(struct tevent_req
*subreq
)
1052 struct tevent_req
*req
= tevent_req_callback_data(
1053 subreq
, struct tevent_req
);
1057 status
= ipreallocated_recv(subreq
, &ret
);
1058 TALLOC_FREE(subreq
);
1061 takeover_failed(req
, ret
);
1065 tevent_req_done(req
);
1068 struct takeover_failed_state
{
1069 struct tevent_req
*req
;
1073 void takeover_failed(struct tevent_req
*req
, int ret
)
1075 struct takeover_state
*state
= tevent_req_data(
1076 req
, struct takeover_state
);
1077 struct tevent_req
*subreq
;
1078 uint32_t max_pnn
= CTDB_UNKNOWN_PNN
;
1079 int max_credits
= 0;
1082 /* Check that bans are enabled */
1083 if (state
->tun_list
->enable_bans
== 0) {
1084 tevent_req_error(req
, ret
);
1088 for (pnn
= 0; pnn
< state
->num_nodes
; pnn
++) {
1089 if (state
->ban_credits
[pnn
] > max_credits
) {
1091 max_credits
= state
->ban_credits
[pnn
];
1095 if (max_credits
> 0) {
1096 struct ctdb_req_message message
;
1097 struct takeover_failed_state
*substate
;
1099 D_WARNING("Assigning banning credits to node %u\n", max_pnn
);
1101 substate
= talloc_zero(state
, struct takeover_failed_state
);
1102 if (tevent_req_nomem(substate
, req
)) {
1105 substate
->req
= req
;
1106 substate
->ret
= ret
;
1108 message
.srvid
= CTDB_SRVID_BANNING
;
1109 message
.data
.pnn
= max_pnn
;
1111 subreq
= ctdb_client_message_send(
1112 state
, state
->ev
, state
->client
,
1113 ctdb_client_pnn(state
->client
),
1115 if (subreq
== NULL
) {
1116 D_ERR("failed to assign banning credits\n");
1117 tevent_req_error(req
, ret
);
1120 tevent_req_set_callback(subreq
, takeover_failed_done
, substate
);
1122 tevent_req_error(req
, ret
);
1126 static void takeover_failed_done(struct tevent_req
*subreq
)
1128 struct takeover_failed_state
*substate
= tevent_req_callback_data(
1129 subreq
, struct takeover_failed_state
);
1130 struct tevent_req
*req
= substate
->req
;
1134 status
= ctdb_client_message_recv(subreq
, &ret
);
1135 TALLOC_FREE(subreq
);
1137 D_ERR("failed to assign banning credits, ret=%d\n", ret
);
1140 ret
= substate
->ret
;
1141 talloc_free(substate
);
1142 tevent_req_error(req
, ret
);
1145 static void takeover_recv(struct tevent_req
*req
, int *perr
)
1147 generic_recv(req
, perr
);
1150 static uint32_t *parse_node_list(TALLOC_CTX
*mem_ctx
, const char* s
)
1157 ret
= strv_split(mem_ctx
, &strv
, s
, ",");
1159 D_ERR("out of memory\n");
1163 num
= strv_count(strv
);
1165 nodes
= talloc_array(mem_ctx
, uint32_t, num
);
1166 if (nodes
== NULL
) {
1167 D_ERR("out of memory\n");
1172 for (i
= 0; i
< num
; i
++) {
1173 t
= strv_next(strv
, t
);
1180 static void usage(const char *progname
)
1183 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1184 "[<force-rebalance-nodes>]\n",
1189 * Arguments - write fd, socket path
1191 int main(int argc
, const char *argv
[])
1194 const char *sockpath
;
1195 TALLOC_CTX
*mem_ctx
;
1196 struct tevent_context
*ev
;
1197 struct ctdb_client_context
*client
;
1199 struct tevent_req
*req
;
1200 uint32_t *force_rebalance_nodes
= NULL
;
1202 if (argc
< 3 || argc
> 4) {
1207 write_fd
= atoi(argv
[1]);
1210 mem_ctx
= talloc_new(NULL
);
1211 if (mem_ctx
== NULL
) {
1212 fprintf(stderr
, "talloc_new() failed\n");
1218 force_rebalance_nodes
= parse_node_list(mem_ctx
, argv
[3]);
1219 if (force_rebalance_nodes
== NULL
) {
1226 ret
= logging_init(mem_ctx
, NULL
, NULL
, "ctdb-takeover");
1229 "ctdb-takeover: Unable to initialize logging\n");
1233 ev
= tevent_context_init(mem_ctx
);
1235 D_ERR("tevent_context_init() failed\n");
1240 ret
= ctdb_client_init(mem_ctx
, ev
, sockpath
, &client
);
1242 D_ERR("ctdb_client_init() failed, ret=%d\n", ret
);
1246 req
= takeover_send(mem_ctx
, ev
, client
, force_rebalance_nodes
);
1248 D_ERR("takeover_send() failed\n");
1253 if (! tevent_req_poll(req
, ev
)) {
1254 D_ERR("tevent_req_poll() failed\n");
1259 takeover_recv(req
, &ret
);
1262 D_ERR("takeover run failed, ret=%d\n", ret
);
1266 sys_write_v(write_fd
, &ret
, sizeof(ret
));
1268 talloc_free(mem_ctx
);