2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 #include "system/network.h"
30 #include "system/filesys.h"
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "protocol/protocol_util.h"
46 #include "client/client.h"
48 #include "common/logging.h"
50 #include "server/ipalloc.h"
52 static int takeover_timeout
= 9;
54 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
60 static bool generic_recv(struct tevent_req
*req
, int *perr
)
64 if (tevent_req_is_unix_error(req
, &err
)) {
74 static enum ipalloc_algorithm
75 determine_algorithm(const struct ctdb_tunable_list
*tunables
)
77 switch (tunables
->ip_alloc_algorithm
) {
79 return IPALLOC_DETERMINISTIC
;
81 return IPALLOC_NONDETERMINISTIC
;
89 /**********************************************************************/
91 struct get_public_ips_state
{
94 struct ctdb_public_ip_list
*ips
;
95 uint32_t *ban_credits
;
98 static void get_public_ips_done(struct tevent_req
*subreq
);
100 static struct tevent_req
*get_public_ips_send(
102 struct tevent_context
*ev
,
103 struct ctdb_client_context
*client
,
105 int count
, int num_nodes
,
106 uint32_t *ban_credits
,
109 struct tevent_req
*req
, *subreq
;
110 struct get_public_ips_state
*state
;
111 struct ctdb_req_control request
;
113 req
= tevent_req_create(mem_ctx
, &state
, struct get_public_ips_state
);
119 state
->count
= count
;
120 state
->ban_credits
= ban_credits
;
122 state
->ips
= talloc_zero_array(state
,
123 struct ctdb_public_ip_list
,
125 if (tevent_req_nomem(state
->ips
, req
)) {
126 return tevent_req_post(req
, ev
);
129 /* Short circuit if no nodes being asked for IPs */
130 if (state
->count
== 0) {
131 tevent_req_done(req
);
132 return tevent_req_post(req
, ev
);
135 ctdb_req_control_get_public_ips(&request
, available_only
);
136 subreq
= ctdb_client_control_multi_send(mem_ctx
, ev
, client
,
139 TIMEOUT(), &request
);
140 if (tevent_req_nomem(subreq
, req
)) {
141 return tevent_req_post(req
, ev
);
143 tevent_req_set_callback(subreq
, get_public_ips_done
, req
);
148 static void get_public_ips_done(struct tevent_req
*subreq
)
150 struct tevent_req
*req
= tevent_req_callback_data(
151 subreq
, struct tevent_req
);
152 struct get_public_ips_state
*state
= tevent_req_data(
153 req
, struct get_public_ips_state
);
154 struct ctdb_reply_control
**reply
;
157 bool status
, found_errors
;
159 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
, &err_list
,
163 for (i
= 0; i
< state
->count
; i
++) {
164 if (err_list
[i
] != 0) {
165 uint32_t pnn
= state
->pnns
[i
];
167 D_ERR("control GET_PUBLIC_IPS failed on "
168 "node %u, ret=%d\n", pnn
, err_list
[i
]);
170 state
->ban_credits
[pnn
]++;
174 tevent_req_error(req
, ret
);
178 found_errors
= false;
179 for (i
= 0; i
< state
->count
; i
++) {
181 struct ctdb_public_ip_list
*ips
;
183 pnn
= state
->pnns
[i
];
184 ret
= ctdb_reply_control_get_public_ips(reply
[i
], state
->ips
,
187 D_ERR("control GET_PUBLIC_IPS failed on "
189 state
->ban_credits
[pnn
]++;
194 D_INFO("Fetched public IPs from node %u\n", pnn
);
195 state
->ips
[pnn
] = *ips
;
199 tevent_req_error(req
, EIO
);
205 tevent_req_done(req
);
208 static bool get_public_ips_recv(struct tevent_req
*req
, int *perr
,
210 struct ctdb_public_ip_list
**ips
)
212 struct get_public_ips_state
*state
= tevent_req_data(
213 req
, struct get_public_ips_state
);
216 if (tevent_req_is_unix_error(req
, &err
)) {
223 *ips
= talloc_steal(mem_ctx
, state
->ips
);
228 /**********************************************************************/
230 struct release_ip_state
{
235 uint32_t *ban_credits
;
238 struct release_ip_one_state
{
239 struct tevent_req
*req
;
245 static void release_ip_done(struct tevent_req
*subreq
);
247 static struct tevent_req
*release_ip_send(TALLOC_CTX
*mem_ctx
,
248 struct tevent_context
*ev
,
249 struct ctdb_client_context
*client
,
252 struct timeval timeout
,
253 struct public_ip_list
*all_ips
,
254 uint32_t *ban_credits
)
256 struct tevent_req
*req
, *subreq
;
257 struct release_ip_state
*state
;
258 struct ctdb_req_control request
;
259 struct public_ip_list
*tmp_ip
;
261 req
= tevent_req_create(mem_ctx
, &state
, struct release_ip_state
);
267 state
->num_replies
= 0;
268 state
->num_fails
= 0;
269 state
->ban_credits
= ban_credits
;
271 /* Send a RELEASE_IP to all nodes that should not be hosting
272 * each IP. For each IP, all but one of these will be
273 * redundant. However, the redundant ones are used to tell
274 * nodes which node should be hosting the IP so that commands
275 * like "ctdb ip" can display a particular nodes idea of who
276 * is hosting what. */
277 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
278 struct release_ip_one_state
*substate
;
279 struct ctdb_public_ip ip
;
282 substate
= talloc_zero(state
, struct release_ip_one_state
);
283 if (tevent_req_nomem(substate
, req
)) {
284 return tevent_req_post(req
, ev
);
287 substate
->pnns
= talloc_zero_array(substate
, uint32_t, count
);
288 if (tevent_req_nomem(substate
->pnns
, req
)) {
289 return tevent_req_post(req
, ev
);
295 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
298 if (tevent_req_nomem(substate
->ip_str
, req
)) {
299 return tevent_req_post(req
, ev
);
302 for (i
= 0; i
< count
; i
++) {
303 uint32_t pnn
= pnns
[i
];
305 /* Skip this node if IP is not known */
306 if (! bitmap_query(tmp_ip
->known_on
, pnn
)) {
310 /* If pnn is not the node that should be
311 * hosting the IP then add it to the list of
312 * nodes that need to do a release. */
313 if (tmp_ip
->pnn
!= pnn
) {
314 substate
->pnns
[substate
->count
] = pnn
;
319 if (substate
->count
== 0) {
320 /* No releases to send for this address... */
321 TALLOC_FREE(substate
);
325 ip
.pnn
= tmp_ip
->pnn
;
326 ip
.addr
= tmp_ip
->addr
;
327 ctdb_req_control_release_ip(&request
, &ip
);
328 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
331 timeout
,/* cumulative */
333 if (tevent_req_nomem(subreq
, req
)) {
334 return tevent_req_post(req
, ev
);
336 tevent_req_set_callback(subreq
, release_ip_done
, substate
);
341 /* None sent, finished... */
342 if (state
->num_sent
== 0) {
343 tevent_req_done(req
);
344 return tevent_req_post(req
, ev
);
350 static void release_ip_done(struct tevent_req
*subreq
)
352 struct release_ip_one_state
*substate
= tevent_req_callback_data(
353 subreq
, struct release_ip_one_state
);
354 struct tevent_req
*req
= substate
->req
;
355 struct release_ip_state
*state
= tevent_req_data(
356 req
, struct release_ip_state
);
359 bool status
, found_errors
;
361 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
366 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
367 substate
->ip_str
, substate
->count
);
371 /* Get some clear error messages out of err_list and count
374 found_errors
= false;
375 for (i
= 0; i
< substate
->count
; i
++) {
376 int err
= err_list
[i
];
378 uint32_t pnn
= substate
->pnns
[i
];
380 D_ERR("RELEASE_IP %s failed on node %u, "
381 "ret=%d\n", substate
->ip_str
, pnn
, err
);
383 state
->ban_credits
[pnn
]++;
384 state
->err_any
= err
;
388 if (! found_errors
) {
389 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
390 substate
->ip_str
, ret
);
391 state
->err_any
= EIO
;
397 talloc_free(substate
);
399 state
->num_replies
++;
401 if (state
->num_replies
< state
->num_sent
) {
402 /* Not all replies received, don't go further */
406 if (state
->num_fails
> 0) {
407 tevent_req_error(req
, state
->err_any
);
411 tevent_req_done(req
);
414 static bool release_ip_recv(struct tevent_req
*req
, int *perr
)
416 return generic_recv(req
, perr
);
419 /**********************************************************************/
421 struct take_ip_state
{
426 uint32_t *ban_credits
;
429 struct take_ip_one_state
{
430 struct tevent_req
*req
;
435 static void take_ip_done(struct tevent_req
*subreq
);
437 static struct tevent_req
*take_ip_send(TALLOC_CTX
*mem_ctx
,
438 struct tevent_context
*ev
,
439 struct ctdb_client_context
*client
,
440 struct timeval timeout
,
441 struct public_ip_list
*all_ips
,
442 uint32_t *ban_credits
)
444 struct tevent_req
*req
, *subreq
;
445 struct take_ip_state
*state
;
446 struct ctdb_req_control request
;
447 struct public_ip_list
*tmp_ip
;
449 req
= tevent_req_create(mem_ctx
, &state
, struct take_ip_state
);
455 state
->num_replies
= 0;
456 state
->num_fails
= 0;
457 state
->ban_credits
= ban_credits
;
459 /* For each IP, send a TAKOVER_IP to the node that should be
460 * hosting it. Many of these will often be redundant (since
461 * the allocation won't have changed) but they can be useful
462 * to recover from inconsistencies. */
463 for (tmp_ip
= all_ips
; tmp_ip
!= NULL
; tmp_ip
= tmp_ip
->next
) {
464 struct take_ip_one_state
*substate
;
465 struct ctdb_public_ip ip
;
467 if (tmp_ip
->pnn
== CTDB_UNKNOWN_PNN
) {
468 /* IP will be unassigned */
472 substate
= talloc_zero(state
, struct take_ip_one_state
);
473 if (tevent_req_nomem(substate
, req
)) {
474 return tevent_req_post(req
, ev
);
478 substate
->pnn
= tmp_ip
->pnn
;
480 substate
->ip_str
= ctdb_sock_addr_to_string(substate
,
483 if (tevent_req_nomem(substate
->ip_str
, req
)) {
484 return tevent_req_post(req
, ev
);
487 ip
.pnn
= tmp_ip
->pnn
;
488 ip
.addr
= tmp_ip
->addr
;
489 ctdb_req_control_takeover_ip(&request
, &ip
);
490 subreq
= ctdb_client_control_send(
491 state
, ev
, client
, tmp_ip
->pnn
,
492 timeout
, /* cumulative */
494 if (tevent_req_nomem(subreq
, req
)) {
495 return tevent_req_post(req
, ev
);
497 tevent_req_set_callback(subreq
, take_ip_done
, substate
);
502 /* None sent, finished... */
503 if (state
->num_sent
== 0) {
504 tevent_req_done(req
);
505 return tevent_req_post(req
, ev
);
511 static void take_ip_done(struct tevent_req
*subreq
)
513 struct take_ip_one_state
*substate
= tevent_req_callback_data(
514 subreq
, struct take_ip_one_state
);
515 struct tevent_req
*req
= substate
->req
;
516 struct ctdb_reply_control
*reply
;
517 struct take_ip_state
*state
= tevent_req_data(
518 req
, struct take_ip_state
);
522 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
526 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
527 substate
->ip_str
, substate
->pnn
, ret
);
531 ret
= ctdb_reply_control_takeover_ip(reply
);
533 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
534 substate
->ip_str
, substate
->pnn
, ret
);
538 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
539 substate
->ip_str
, substate
->pnn
);
543 state
->ban_credits
[substate
->pnn
]++;
545 state
->err_any
= ret
;
548 talloc_free(substate
);
550 state
->num_replies
++;
552 if (state
->num_replies
< state
->num_sent
) {
553 /* Not all replies received, don't go further */
557 if (state
->num_fails
> 0) {
558 tevent_req_error(req
, state
->err_any
);
562 tevent_req_done(req
);
565 static bool take_ip_recv(struct tevent_req
*req
, int *perr
)
567 return generic_recv(req
, perr
);
570 /**********************************************************************/
572 struct ipreallocated_state
{
575 uint32_t *ban_credits
;
578 static void ipreallocated_done(struct tevent_req
*subreq
);
580 static struct tevent_req
*ipreallocated_send(TALLOC_CTX
*mem_ctx
,
581 struct tevent_context
*ev
,
582 struct ctdb_client_context
*client
,
585 struct timeval timeout
,
586 uint32_t *ban_credits
)
588 struct tevent_req
*req
, *subreq
;
589 struct ipreallocated_state
*state
;
590 struct ctdb_req_control request
;
592 req
= tevent_req_create(mem_ctx
, &state
, struct ipreallocated_state
);
598 state
->count
= count
;
599 state
->ban_credits
= ban_credits
;
601 ctdb_req_control_ipreallocated(&request
);
602 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
604 timeout
, /* cumulative */
606 if (tevent_req_nomem(subreq
, req
)) {
607 return tevent_req_post(req
, ev
);
609 tevent_req_set_callback(subreq
, ipreallocated_done
, req
);
614 static void ipreallocated_done(struct tevent_req
*subreq
)
616 struct tevent_req
*req
= tevent_req_callback_data(
617 subreq
, struct tevent_req
);
618 struct ipreallocated_state
*state
= tevent_req_data(
619 req
, struct ipreallocated_state
);
620 int *err_list
= NULL
;
622 bool status
, found_errors
;
624 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
629 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state
->count
);
630 tevent_req_done(req
);
634 /* Get some clear error messages out of err_list and count
637 found_errors
= false;
638 for (i
= 0; i
< state
->count
; i
++) {
639 int err
= err_list
[i
];
641 uint32_t pnn
= state
->pnns
[i
];
643 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
646 state
->ban_credits
[pnn
]++;
651 if (! found_errors
) {
652 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret
);
655 tevent_req_error(req
, ret
);
658 static bool ipreallocated_recv(struct tevent_req
*req
, int *perr
)
660 return generic_recv(req
, perr
);
663 /**********************************************************************/
665 struct start_ipreallocate_state
{
668 uint32_t *ban_credits
;
671 static void start_ipreallocate_done(struct tevent_req
*subreq
);
673 static struct tevent_req
*start_ipreallocate_send(
675 struct tevent_context
*ev
,
676 struct ctdb_client_context
*client
,
679 struct timeval timeout
,
680 uint32_t *ban_credits
)
682 struct tevent_req
*req
, *subreq
;
683 struct start_ipreallocate_state
*state
;
684 struct ctdb_req_control request
;
686 req
= tevent_req_create(mem_ctx
, &state
, struct start_ipreallocate_state
);
692 state
->count
= count
;
693 state
->ban_credits
= ban_credits
;
695 ctdb_req_control_start_ipreallocate(&request
);
696 subreq
= ctdb_client_control_multi_send(state
, ev
, client
,
698 timeout
, /* cumulative */
700 if (tevent_req_nomem(subreq
, req
)) {
701 return tevent_req_post(req
, ev
);
703 tevent_req_set_callback(subreq
, start_ipreallocate_done
, req
);
708 static void start_ipreallocate_done(struct tevent_req
*subreq
)
710 struct tevent_req
*req
= tevent_req_callback_data(
711 subreq
, struct tevent_req
);
712 struct start_ipreallocate_state
*state
= tevent_req_data(
713 req
, struct start_ipreallocate_state
);
714 int *err_list
= NULL
;
716 bool status
, found_errors
;
718 status
= ctdb_client_control_multi_recv(subreq
, &ret
, state
,
723 D_INFO("START_IPREALLOCATE succeeded on %d nodes\n", state
->count
);
724 tevent_req_done(req
);
728 /* Get some clear error messages out of err_list and count
731 found_errors
= false;
732 for (i
= 0; i
< state
->count
; i
++) {
733 int err
= err_list
[i
];
735 uint32_t pnn
= state
->pnns
[i
];
737 D_ERR("START_IPREALLOCATE failed on node %u, ret=%d\n",
740 state
->ban_credits
[pnn
]++;
745 if (! found_errors
) {
746 D_ERR("STARTREALLOCATE internal error, ret=%d\n", ret
);
749 tevent_req_error(req
, ret
);
752 static bool start_ipreallocate_recv(struct tevent_req
*req
, int *perr
)
754 return generic_recv(req
, perr
);
757 /**********************************************************************/
760 * Recalculate the allocation of public IPs to nodes and have the
761 * nodes host their allocated addresses.
765 * - Initialise IP allocation state. Pass:
766 * + algorithm to be used;
767 * + various tunables (NoIPTakeover, NoIPFailback)
768 * + list of nodes to force rebalance (internal structure, currently
769 * no way to fetch, only used by LCP2 for nodes that have had new
770 * IP addresses added).
771 * - Set IP flags for IP allocation based on node map
772 * - Retrieve known and available IP addresses (done separately so
773 * values can be faked in unit testing)
774 * - Use ipalloc_set_public_ips() to set known and available IP
775 * addresses for allocation
776 * - If cluster can't host IP addresses then jump to IPREALLOCATED
777 * - Run IP allocation algorithm
778 * - Send START_IPREALLOCATE to all nodes
779 * - Send RELEASE_IP to all nodes for IPs they should not host
780 * - Send TAKE_IP to all nodes for IPs they should host
781 * - Send IPREALLOCATED to all nodes
784 struct takeover_state
{
785 struct tevent_context
*ev
;
786 struct ctdb_client_context
*client
;
787 struct timeval timeout
;
788 unsigned int num_nodes
;
789 uint32_t *pnns_connected
;
791 uint32_t *pnns_active
;
794 uint32_t *force_rebalance_nodes
;
795 struct ctdb_tunable_list
*tun_list
;
796 struct ipalloc_state
*ipalloc_state
;
797 struct ctdb_public_ip_list
*known_ips
;
798 struct public_ip_list
*all_ips
;
799 uint32_t *ban_credits
;
802 static void takeover_tunables_done(struct tevent_req
*subreq
);
803 static void takeover_nodemap_done(struct tevent_req
*subreq
);
804 static void takeover_known_ips_done(struct tevent_req
*subreq
);
805 static void takeover_avail_ips_done(struct tevent_req
*subreq
);
806 static void takeover_start_ipreallocate_done(struct tevent_req
*subreq
);
807 static void takeover_release_ip_done(struct tevent_req
*subreq
);
808 static void takeover_take_ip_done(struct tevent_req
*subreq
);
809 static void takeover_ipreallocated(struct tevent_req
*req
);
810 static void takeover_ipreallocated_done(struct tevent_req
*subreq
);
811 static void takeover_failed(struct tevent_req
*subreq
, int ret
);
812 static void takeover_failed_done(struct tevent_req
*subreq
);
814 static struct tevent_req
*takeover_send(TALLOC_CTX
*mem_ctx
,
815 struct tevent_context
*ev
,
816 struct ctdb_client_context
*client
,
817 uint32_t *force_rebalance_nodes
)
819 struct tevent_req
*req
, *subreq
;
820 struct takeover_state
*state
;
821 struct ctdb_req_control request
;
823 req
= tevent_req_create(mem_ctx
, &state
, struct takeover_state
);
829 state
->client
= client
;
830 state
->force_rebalance_nodes
= force_rebalance_nodes
;
831 state
->destnode
= ctdb_client_pnn(client
);
833 ctdb_req_control_get_all_tunables(&request
);
834 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
835 state
->destnode
, TIMEOUT(),
837 if (tevent_req_nomem(subreq
, req
)) {
838 return tevent_req_post(req
, ev
);
840 tevent_req_set_callback(subreq
, takeover_tunables_done
, req
);
845 static void takeover_tunables_done(struct tevent_req
*subreq
)
847 struct tevent_req
*req
= tevent_req_callback_data(
848 subreq
, struct tevent_req
);
849 struct takeover_state
*state
= tevent_req_data(
850 req
, struct takeover_state
);
851 struct ctdb_reply_control
*reply
;
852 struct ctdb_req_control request
;
856 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
859 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
860 tevent_req_error(req
, ret
);
864 ret
= ctdb_reply_control_get_all_tunables(reply
, state
,
867 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret
);
868 tevent_req_error(req
, ret
);
874 takeover_timeout
= state
->tun_list
->takeover_timeout
;
876 ctdb_req_control_get_nodemap(&request
);
877 subreq
= ctdb_client_control_send(state
, state
->ev
, state
->client
,
878 state
->destnode
, TIMEOUT(),
880 if (tevent_req_nomem(subreq
, req
)) {
883 tevent_req_set_callback(subreq
, takeover_nodemap_done
, req
);
886 static void takeover_nodemap_done(struct tevent_req
*subreq
)
888 struct tevent_req
*req
= tevent_req_callback_data(
889 subreq
, struct tevent_req
);
890 struct takeover_state
*state
= tevent_req_data(
891 req
, struct takeover_state
);
892 struct ctdb_reply_control
*reply
;
895 struct ctdb_node_map
*nodemap
;
898 status
= ctdb_client_control_recv(subreq
, &ret
, state
, &reply
);
901 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
902 state
->destnode
, ret
);
903 tevent_req_error(req
, ret
);
907 ret
= ctdb_reply_control_get_nodemap(reply
, state
, &nodemap
);
909 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret
);
910 tevent_req_error(req
, ret
);
914 state
->num_nodes
= nodemap
->num
;
916 state
->num_connected
= list_of_connected_nodes(nodemap
,
917 CTDB_UNKNOWN_PNN
, state
,
918 &state
->pnns_connected
);
919 if (state
->num_connected
<= 0) {
920 tevent_req_error(req
, ENOMEM
);
924 state
->num_active
= list_of_active_nodes(nodemap
,
925 CTDB_UNKNOWN_PNN
, state
,
926 &state
->pnns_active
);
927 if (state
->num_active
<= 0) {
928 tevent_req_error(req
, ENOMEM
);
932 /* Default timeout for early jump to IPREALLOCATED. See below
933 * for explanation of 3 times...
935 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
937 state
->ban_credits
= talloc_zero_array(state
, uint32_t,
939 if (tevent_req_nomem(state
->ban_credits
, req
)) {
943 ptr
= getenv("CTDB_DISABLE_IP_FAILOVER");
945 /* IP failover is completely disabled so just send out
946 * ipreallocated event.
948 takeover_ipreallocated(req
);
952 state
->ipalloc_state
=
954 state
, state
->num_nodes
,
955 determine_algorithm(state
->tun_list
),
956 (state
->tun_list
->no_ip_takeover
!= 0),
957 (state
->tun_list
->no_ip_failback
!= 0),
958 state
->force_rebalance_nodes
);
959 if (tevent_req_nomem(state
->ipalloc_state
, req
)) {
963 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
964 state
->pnns_connected
, state
->num_connected
,
965 state
->num_nodes
, state
->ban_credits
,
967 if (tevent_req_nomem(subreq
, req
)) {
971 tevent_req_set_callback(subreq
, takeover_known_ips_done
, req
);
974 static void takeover_known_ips_done(struct tevent_req
*subreq
)
976 struct tevent_req
*req
= tevent_req_callback_data(
977 subreq
, struct tevent_req
);
978 struct takeover_state
*state
= tevent_req_data(
979 req
, struct takeover_state
);
982 uint32_t *pnns
= NULL
;
985 status
= get_public_ips_recv(subreq
, &ret
, state
, &state
->known_ips
);
989 D_ERR("Failed to fetch known public IPs\n");
990 takeover_failed(req
, ret
);
994 /* Get available IPs from active nodes that actually have known IPs */
996 pnns
= talloc_zero_array(state
, uint32_t, state
->num_active
);
997 if (tevent_req_nomem(pnns
, req
)) {
1002 for (i
= 0; i
< state
->num_active
; i
++) {
1003 uint32_t pnn
= state
->pnns_active
[i
];
1005 /* If pnn has IPs then fetch available IPs from it */
1006 if (state
->known_ips
[pnn
].num
> 0) {
1012 subreq
= get_public_ips_send(state
, state
->ev
, state
->client
,
1014 state
->num_nodes
, state
->ban_credits
,
1016 if (tevent_req_nomem(subreq
, req
)) {
1020 tevent_req_set_callback(subreq
, takeover_avail_ips_done
, req
);
1023 static void takeover_avail_ips_done(struct tevent_req
*subreq
)
1025 struct tevent_req
*req
= tevent_req_callback_data(
1026 subreq
, struct tevent_req
);
1027 struct takeover_state
*state
= tevent_req_data(
1028 req
, struct takeover_state
);
1031 struct ctdb_public_ip_list
*available_ips
;
1033 status
= get_public_ips_recv(subreq
, &ret
, state
, &available_ips
);
1034 TALLOC_FREE(subreq
);
1037 D_ERR("Failed to fetch available public IPs\n");
1038 takeover_failed(req
, ret
);
1042 ipalloc_set_public_ips(state
->ipalloc_state
,
1043 state
->known_ips
, available_ips
);
1045 if (! ipalloc_can_host_ips(state
->ipalloc_state
)) {
1046 D_NOTICE("No nodes available to host public IPs yet\n");
1047 takeover_ipreallocated(req
);
1051 /* Do the IP reassignment calculations */
1052 state
->all_ips
= ipalloc(state
->ipalloc_state
);
1053 if (tevent_req_nomem(state
->all_ips
, req
)) {
1057 /* Each of the following stages (START_IPREALLOCATE, RELEASE_IP, TAKEOVER_IP,
1058 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1059 * seconds. However, RELEASE_IP can take longer due to TCP
1060 * connection killing, so sometimes needs more time.
1061 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1062 * seconds across all 4 stages. Using a longer cumulative timeout (e.g.*4)
1063 * would take the takeover run timeout over 30s, which combined with database
1064 * recovery time takes the timeout too close to acceptable SMB limits.
1065 * No explicit expiry checks are
1066 * needed before each stage because tevent is smart enough to
1067 * fire the timeouts even if they are in the past. Initialise
1068 * this here so it explicitly covers the stages we're
1069 * interested in but, in particular, not the time taken by the
1072 state
->timeout
= timeval_current_ofs(3 * takeover_timeout
, 0);
1074 subreq
= start_ipreallocate_send(state
,
1077 state
->pnns_connected
,
1078 state
->num_connected
,
1080 state
->ban_credits
);
1081 if (tevent_req_nomem(subreq
, req
)) {
1084 tevent_req_set_callback(subreq
, takeover_start_ipreallocate_done
, req
);
1087 static void takeover_start_ipreallocate_done(struct tevent_req
*subreq
)
1089 struct tevent_req
*req
= tevent_req_callback_data(
1090 subreq
, struct tevent_req
);
1091 struct takeover_state
*state
= tevent_req_data(
1092 req
, struct takeover_state
);
1096 status
= start_ipreallocate_recv(subreq
, &ret
);
1097 TALLOC_FREE(subreq
);
1100 takeover_failed(req
, ret
);
1104 subreq
= release_ip_send(state
,
1107 state
->pnns_connected
,
1108 state
->num_connected
,
1111 state
->ban_credits
);
1112 if (tevent_req_nomem(subreq
, req
)) {
1115 tevent_req_set_callback(subreq
, takeover_release_ip_done
, req
);
1118 static void takeover_release_ip_done(struct tevent_req
*subreq
)
1120 struct tevent_req
*req
= tevent_req_callback_data(
1121 subreq
, struct tevent_req
);
1122 struct takeover_state
*state
= tevent_req_data(
1123 req
, struct takeover_state
);
1127 status
= release_ip_recv(subreq
, &ret
);
1128 TALLOC_FREE(subreq
);
1131 takeover_failed(req
, ret
);
1135 /* All released, now for takeovers */
1137 subreq
= take_ip_send(state
, state
->ev
, state
->client
,
1138 state
->timeout
, state
->all_ips
,
1139 state
->ban_credits
);
1140 if (tevent_req_nomem(subreq
, req
)) {
1143 tevent_req_set_callback(subreq
, takeover_take_ip_done
, req
);
1146 static void takeover_take_ip_done(struct tevent_req
*subreq
)
1148 struct tevent_req
*req
= tevent_req_callback_data(
1149 subreq
, struct tevent_req
);
1153 status
= take_ip_recv(subreq
, &ret
);
1154 TALLOC_FREE(subreq
);
1157 takeover_failed(req
, ret
);
1161 takeover_ipreallocated(req
);
1164 static void takeover_ipreallocated(struct tevent_req
*req
)
1166 struct takeover_state
*state
= tevent_req_data(
1167 req
, struct takeover_state
);
1168 struct tevent_req
*subreq
;
1170 subreq
= ipreallocated_send(state
, state
->ev
, state
->client
,
1171 state
->pnns_connected
,
1172 state
->num_connected
,
1174 state
->ban_credits
);
1175 if (tevent_req_nomem(subreq
, req
)) {
1178 tevent_req_set_callback(subreq
, takeover_ipreallocated_done
, req
);
1181 static void takeover_ipreallocated_done(struct tevent_req
*subreq
)
1183 struct tevent_req
*req
= tevent_req_callback_data(
1184 subreq
, struct tevent_req
);
1188 status
= ipreallocated_recv(subreq
, &ret
);
1189 TALLOC_FREE(subreq
);
1192 takeover_failed(req
, ret
);
1196 tevent_req_done(req
);
1199 struct takeover_failed_state
{
1200 struct tevent_req
*req
;
1204 void takeover_failed(struct tevent_req
*req
, int ret
)
1206 struct takeover_state
*state
= tevent_req_data(
1207 req
, struct takeover_state
);
1208 struct tevent_req
*subreq
;
1209 uint32_t max_pnn
= CTDB_UNKNOWN_PNN
;
1210 unsigned int max_credits
= 0;
1213 /* Check that bans are enabled */
1214 if (state
->tun_list
->enable_bans
== 0) {
1215 tevent_req_error(req
, ret
);
1219 for (pnn
= 0; pnn
< state
->num_nodes
; pnn
++) {
1220 if (state
->ban_credits
[pnn
] > max_credits
) {
1222 max_credits
= state
->ban_credits
[pnn
];
1226 if (max_credits
> 0) {
1227 struct ctdb_req_message message
;
1228 struct takeover_failed_state
*substate
;
1230 D_WARNING("Assigning banning credits to node %u\n", max_pnn
);
1232 substate
= talloc_zero(state
, struct takeover_failed_state
);
1233 if (tevent_req_nomem(substate
, req
)) {
1236 substate
->req
= req
;
1237 substate
->ret
= ret
;
1239 message
.srvid
= CTDB_SRVID_BANNING
;
1240 message
.data
.pnn
= max_pnn
;
1242 subreq
= ctdb_client_message_send(
1243 state
, state
->ev
, state
->client
,
1244 ctdb_client_pnn(state
->client
),
1246 if (subreq
== NULL
) {
1247 D_ERR("failed to assign banning credits\n");
1248 tevent_req_error(req
, ret
);
1251 tevent_req_set_callback(subreq
, takeover_failed_done
, substate
);
1253 tevent_req_error(req
, ret
);
1257 static void takeover_failed_done(struct tevent_req
*subreq
)
1259 struct takeover_failed_state
*substate
= tevent_req_callback_data(
1260 subreq
, struct takeover_failed_state
);
1261 struct tevent_req
*req
= substate
->req
;
1265 status
= ctdb_client_message_recv(subreq
, &ret
);
1266 TALLOC_FREE(subreq
);
1268 D_ERR("failed to assign banning credits, ret=%d\n", ret
);
1271 ret
= substate
->ret
;
1272 talloc_free(substate
);
1273 tevent_req_error(req
, ret
);
1276 static void takeover_recv(struct tevent_req
*req
, int *perr
)
1278 generic_recv(req
, perr
);
1281 static uint32_t *parse_node_list(TALLOC_CTX
*mem_ctx
, const char* s
)
1288 ret
= strv_split(mem_ctx
, &strv
, s
, ",");
1290 D_ERR("out of memory\n");
1294 num
= strv_count(strv
);
1296 nodes
= talloc_array(mem_ctx
, uint32_t, num
);
1297 if (nodes
== NULL
) {
1298 D_ERR("out of memory\n");
1303 for (i
= 0; i
< num
; i
++) {
1304 t
= strv_next(strv
, t
);
1311 static void usage(const char *progname
)
1314 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1315 "[<force-rebalance-nodes>]\n",
1320 * Arguments - write fd, socket path
1322 int main(int argc
, const char *argv
[])
1325 const char *sockpath
;
1326 TALLOC_CTX
*mem_ctx
;
1327 struct tevent_context
*ev
;
1328 struct ctdb_client_context
*client
;
1331 struct tevent_req
*req
;
1332 uint32_t *force_rebalance_nodes
= NULL
;
1334 if (argc
< 3 || argc
> 4) {
1339 write_fd
= atoi(argv
[1]);
1342 mem_ctx
= talloc_new(NULL
);
1343 if (mem_ctx
== NULL
) {
1344 fprintf(stderr
, "talloc_new() failed\n");
1350 force_rebalance_nodes
= parse_node_list(mem_ctx
, argv
[3]);
1351 if (force_rebalance_nodes
== NULL
) {
1358 ret
= logging_init(mem_ctx
, NULL
, NULL
, "ctdb-takeover");
1361 "ctdb-takeover: Unable to initialize logging\n");
1365 ev
= tevent_context_init(mem_ctx
);
1367 D_ERR("tevent_context_init() failed\n");
1372 status
= logging_setup_sighup_handler(ev
, mem_ctx
, NULL
, NULL
);
1374 D_ERR("logging_setup_sighup_handler() failed\n");
1379 ret
= ctdb_client_init(mem_ctx
, ev
, sockpath
, &client
);
1381 D_ERR("ctdb_client_init() failed, ret=%d\n", ret
);
1385 req
= takeover_send(mem_ctx
, ev
, client
, force_rebalance_nodes
);
1387 D_ERR("takeover_send() failed\n");
1392 if (! tevent_req_poll(req
, ev
)) {
1393 D_ERR("tevent_req_poll() failed\n");
1398 takeover_recv(req
, &ret
);
1401 D_ERR("takeover run failed, ret=%d\n", ret
);
1405 sys_write_v(write_fd
, &ret
, sizeof(ret
));
1407 talloc_free(mem_ctx
);