ctdb-takeover: Add takeover helper
[Samba.git] / ctdb / server / ctdb_takeover_helper.c
blob847a49d27347f91c36a9f36ad3fe215997e910eb
1 /*
2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
9 and ctdb_takeover.c
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/network.h"
30 #include "system/filesys.h"
32 #include <popt.h>
33 #include <talloc.h>
34 #include <tevent.h>
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "client/client.h"
47 #include "common/logging.h"
49 #include "server/ipalloc.h"
51 static int takeover_timeout = 9;
53 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
56 * Utility functions
59 static bool generic_recv(struct tevent_req *req, int *perr)
61 int err;
63 if (tevent_req_is_unix_error(req, &err)) {
64 if (perr != NULL) {
65 *perr = err;
67 return false;
70 return true;
73 static enum ipalloc_algorithm
74 determine_algorithm(const struct ctdb_tunable_list *tunables)
76 switch (tunables->ip_alloc_algorithm) {
77 case 0:
78 return IPALLOC_DETERMINISTIC;
79 case 1:
80 return IPALLOC_NONDETERMINISTIC;
81 case 2:
82 return IPALLOC_LCP2;
83 default:
84 return IPALLOC_LCP2;
88 /**********************************************************************/
90 struct get_public_ips_state {
91 struct tevent_context *ev;
92 struct ctdb_client_context *client;
93 uint32_t *pnns;
94 int count;
95 struct ctdb_public_ip_list *ips;
98 static void get_public_ips_done(struct tevent_req *subreq);
100 static struct tevent_req *get_public_ips_send(
101 TALLOC_CTX *mem_ctx,
102 struct tevent_context *ev,
103 struct ctdb_client_context *client,
104 uint32_t *pnns,
105 int count,
106 bool available_only)
108 struct tevent_req *req, *subreq;
109 struct get_public_ips_state *state;
110 struct ctdb_req_control request;
112 req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
113 if (req == NULL) {
114 return tevent_req_post(req, ev);
117 state->pnns = pnns;
118 state->count = count;
119 state->ips = NULL;
121 ctdb_req_control_get_public_ips(&request, available_only);
122 subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
123 state->pnns,
124 state->count,
125 TIMEOUT(), &request);
126 if (tevent_req_nomem(subreq, req)) {
127 return tevent_req_post(req, ev);
129 tevent_req_set_callback(subreq, get_public_ips_done, req);
131 return req;
134 static void get_public_ips_done(struct tevent_req *subreq)
136 struct tevent_req *req = tevent_req_callback_data(
137 subreq, struct tevent_req);
138 struct get_public_ips_state *state = tevent_req_data(
139 req, struct get_public_ips_state);
140 struct ctdb_reply_control **reply;
141 int *err_list;
142 int ret, i;
143 bool status;
145 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
146 &reply);
147 TALLOC_FREE(subreq);
148 if (! status) {
149 int ret2;
150 uint32_t pnn;
152 ret2 = ctdb_client_control_multi_error(state->pnns,
153 state->count,
154 err_list, &pnn);
155 if (ret2 != 0) {
156 D_ERR("control GET_PUBLIC_IPS failed on "
157 "node %u, ret=%d\n", pnn, ret2);
158 } else {
159 D_ERR("control GET_PUBLIC_IPS failed, "
160 "ret=%d\n", ret);
162 tevent_req_error(req, ret);
163 return;
166 state->ips = talloc_zero_array(state, struct ctdb_public_ip_list,
167 state->count);
168 if (tevent_req_nomem(state->ips, req)) {
169 return;
172 for (i = 0; i < state->count; i++) {
173 uint32_t pnn;
174 struct ctdb_public_ip_list *ips;
176 pnn = state->pnns[i];
177 ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
178 &ips);
179 if (ret != 0) {
180 D_ERR("control GET_PUBLIC_IPS failed on "
181 "node %u\n", pnn);
182 tevent_req_error(req, EIO);
183 return;
185 state->ips[pnn] = *ips;
188 talloc_free(reply);
190 tevent_req_done(req);
193 static bool get_public_ips_recv(struct tevent_req *req, int *perr,
194 TALLOC_CTX *mem_ctx,
195 struct ctdb_public_ip_list **ips)
197 struct get_public_ips_state *state = tevent_req_data(
198 req, struct get_public_ips_state);
199 int err;
201 if (tevent_req_is_unix_error(req, &err)) {
202 if (perr != NULL) {
203 *perr = err;
205 return false;
208 *ips = talloc_steal(mem_ctx, state->ips);
210 return true;
213 /**********************************************************************/
215 struct release_ip_state {
216 int num_sent;
217 int num_replies;
218 int num_fails;
219 int err_any;
220 uint32_t *ban_credits;
223 struct release_ip_one_state {
224 struct tevent_req *req;
225 uint32_t *pnns;
226 int count;
227 const char *ip_str;
230 static void release_ip_done(struct tevent_req *subreq);
232 static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
233 struct tevent_context *ev,
234 struct ctdb_client_context *client,
235 uint32_t *pnns,
236 int count,
237 struct timeval timeout,
238 struct public_ip_list *all_ips,
239 uint32_t *ban_credits)
241 struct tevent_req *req, *subreq;
242 struct release_ip_state *state;
243 struct ctdb_req_control request;
244 struct public_ip_list *tmp_ip;
246 req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
247 if (req == NULL) {
248 return NULL;
251 state->num_sent = 0;
252 state->num_replies = 0;
253 state->num_fails = 0;
254 state->ban_credits = ban_credits;
256 /* Send a RELEASE_IP to all nodes that should not be hosting
257 * each IP. For each IP, all but one of these will be
258 * redundant. However, the redundant ones are used to tell
259 * nodes which node should be hosting the IP so that commands
260 * like "ctdb ip" can display a particular nodes idea of who
261 * is hosting what. */
262 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
263 struct release_ip_one_state *substate;
264 struct ctdb_public_ip ip;
265 int i;
267 substate = talloc_zero(state, struct release_ip_one_state);
268 if (tevent_req_nomem(substate, req)) {
269 return tevent_req_post(req, ev);
272 substate->pnns = talloc_zero_array(substate, uint32_t, count);
273 if (tevent_req_nomem(substate->pnns, req)) {
274 return tevent_req_post(req, ev);
277 substate->count = 0;
278 substate->req = req;
280 substate->ip_str = ctdb_sock_addr_to_string(substate,
281 &tmp_ip->addr);
282 if (tevent_req_nomem(substate->ip_str, req)) {
283 return tevent_req_post(req, ev);
286 for (i = 0; i < count; i++) {
287 uint32_t pnn = pnns[i];
288 /* If pnn is not the node that should be
289 * hosting the IP then add it to the list of
290 * nodes that need to do a release. */
291 if (tmp_ip->pnn != pnn) {
292 substate->pnns[substate->count] = pnn;
293 substate->count++;
297 ip.pnn = tmp_ip->pnn;
298 ip.addr = tmp_ip->addr;
299 ctdb_req_control_release_ip(&request, &ip);
300 subreq = ctdb_client_control_multi_send(state, ev, client,
301 substate->pnns,
302 substate->count,
303 timeout,/* cumulative */
304 &request);
305 if (tevent_req_nomem(subreq, req)) {
306 return tevent_req_post(req, ev);
308 tevent_req_set_callback(subreq, release_ip_done, substate);
310 state->num_sent++;
313 return req;
316 static void release_ip_done(struct tevent_req *subreq)
318 struct release_ip_one_state *substate = tevent_req_callback_data(
319 subreq, struct release_ip_one_state);
320 struct tevent_req *req = substate->req;
321 struct release_ip_state *state = tevent_req_data(
322 req, struct release_ip_state);
323 int ret, i;
324 int *err_list;
325 bool status, found_errors;
327 status = ctdb_client_control_multi_recv(subreq, &ret, state,
328 &err_list, NULL);
329 TALLOC_FREE(subreq);
331 if (status) {
332 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
333 substate->ip_str, substate->count);
334 goto done;
337 /* Get some clear error messages out of err_list and count
338 * banning credits
340 found_errors = false;
341 for (i = 0; i < substate->count; i++) {
342 int err = err_list[i];
343 if (err != 0) {
344 uint32_t pnn = substate->pnns[i];
346 D_ERR("RELEASE_IP %s failed on node %u, "
347 "ret=%d\n", substate->ip_str, pnn, err);
349 state->ban_credits[pnn]++;
350 state->err_any = err;
351 found_errors = true;
354 if (! found_errors) {
355 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
356 substate->ip_str, ret);
357 state->err_any = EIO;
360 state->num_fails++;
362 done:
363 talloc_free(substate);
365 state->num_replies++;
367 if (state->num_replies < state->num_sent) {
368 /* Not all replies received, don't go further */
369 return;
372 if (state->num_fails > 0) {
373 tevent_req_error(req, state->err_any);
374 return;
377 tevent_req_done(req);
380 static bool release_ip_recv(struct tevent_req *req, int *perr)
382 return generic_recv(req, perr);
385 /**********************************************************************/
387 struct take_ip_state {
388 int num_sent;
389 int num_replies;
390 int num_fails;
391 int err_any;
392 uint32_t *ban_credits;
395 struct take_ip_one_state {
396 struct tevent_req *req;
397 uint32_t pnn;
398 const char *ip_str;
401 static void take_ip_done(struct tevent_req *subreq);
403 static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
404 struct tevent_context *ev,
405 struct ctdb_client_context *client,
406 struct timeval timeout,
407 struct public_ip_list *all_ips,
408 uint32_t *ban_credits)
410 struct tevent_req *req, *subreq;
411 struct take_ip_state *state;
412 struct ctdb_req_control request;
413 struct public_ip_list *tmp_ip;
415 req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
416 if (req == NULL) {
417 return NULL;
420 state->num_sent = 0;
421 state->num_replies = 0;
422 state->num_fails = 0;
423 state->ban_credits = ban_credits;
425 /* For each IP, send a TAKOVER_IP to the node that should be
426 * hosting it. Many of these will often be redundant (since
427 * the allocation won't have changed) but they can be useful
428 * to recover from inconsistencies. */
429 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
430 struct take_ip_one_state *substate;
431 struct ctdb_public_ip ip;
433 if (tmp_ip->pnn == -1) {
434 /* IP will be unassigned */
435 continue;
438 substate = talloc_zero(state, struct take_ip_one_state);
439 if (tevent_req_nomem(substate, req)) {
440 return tevent_req_post(req, ev);
443 substate->req = req;
444 substate->pnn = tmp_ip->pnn;
446 substate->ip_str = ctdb_sock_addr_to_string(substate,
447 &tmp_ip->addr);
448 if (tevent_req_nomem(substate->ip_str, req)) {
449 return tevent_req_post(req, ev);
452 ip.pnn = tmp_ip->pnn;
453 ip.addr = tmp_ip->addr;
454 ctdb_req_control_takeover_ip(&request, &ip);
455 subreq = ctdb_client_control_send(
456 state, ev, client, tmp_ip->pnn,
457 timeout, /* cumulative */
458 &request);
459 if (tevent_req_nomem(subreq, req)) {
460 return tevent_req_post(req, ev);
462 tevent_req_set_callback(subreq, take_ip_done, substate);
464 state->num_sent++;
467 /* None sent, finished... */
468 if (state->num_sent == 0) {
469 tevent_req_done(req);
470 return tevent_req_post(req, ev);
473 return req;
476 static void take_ip_done(struct tevent_req *subreq)
478 struct take_ip_one_state *substate = tevent_req_callback_data(
479 subreq, struct take_ip_one_state);
480 struct tevent_req *req = substate->req;
481 struct ctdb_reply_control *reply;
482 struct take_ip_state *state = tevent_req_data(
483 req, struct take_ip_state);
484 int ret = 0;
485 bool status;
487 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
488 TALLOC_FREE(subreq);
490 if (! status) {
491 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
492 substate->ip_str, substate->pnn, ret);
493 goto fail;
496 ret = ctdb_reply_control_takeover_ip(reply);
497 if (ret != 0) {
498 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
499 substate->ip_str, substate->pnn, ret);
500 goto fail;
503 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
504 substate->ip_str, substate->pnn);
505 goto done;
507 fail:
508 state->ban_credits[substate->pnn]++;
509 state->num_fails++;
510 state->err_any = ret;
512 done:
513 talloc_free(substate);
515 state->num_replies++;
517 if (state->num_replies < state->num_sent) {
518 /* Not all replies received, don't go further */
519 return;
522 if (state->num_fails > 0) {
523 tevent_req_error(req, state->err_any);
524 return;
527 tevent_req_done(req);
530 static bool take_ip_recv(struct tevent_req *req, int *perr)
532 return generic_recv(req, perr);
535 /**********************************************************************/
537 struct ipreallocated_state {
538 uint32_t *pnns;
539 int count;
540 uint32_t *ban_credits;
543 static void ipreallocated_done(struct tevent_req *subreq);
545 static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
546 struct tevent_context *ev,
547 struct ctdb_client_context *client,
548 uint32_t *pnns,
549 int count,
550 struct timeval timeout,
551 uint32_t *ban_credits)
553 struct tevent_req *req, *subreq;
554 struct ipreallocated_state *state;
555 struct ctdb_req_control request;
557 req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
558 if (req == NULL) {
559 return NULL;
562 state->pnns = pnns;
563 state->count = count;
564 state->ban_credits = ban_credits;
566 ctdb_req_control_ipreallocated(&request);
567 subreq = ctdb_client_control_multi_send(state, ev, client,
568 pnns, count,
569 timeout, /* cumulative */
570 &request);
571 if (tevent_req_nomem(subreq, req)) {
572 return tevent_req_post(req, ev);
574 tevent_req_set_callback(subreq, ipreallocated_done, req);
576 return req;
579 static void ipreallocated_done(struct tevent_req *subreq)
581 struct tevent_req *req = tevent_req_callback_data(
582 subreq, struct tevent_req);
583 struct ipreallocated_state *state = tevent_req_data(
584 req, struct ipreallocated_state);
585 int *err_list = NULL;
586 int ret, i;
587 bool status, found_errors;
589 status = ctdb_client_control_multi_recv(subreq, &ret, state,
590 &err_list, NULL);
591 TALLOC_FREE(subreq);
593 if (status) {
594 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
595 tevent_req_done(req);
596 return;
599 /* Get some clear error messages out of err_list and count
600 * banning credits
602 found_errors = false;
603 for (i = 0; i < state->count; i++) {
604 int err = err_list[i];
605 if (err != 0) {
606 uint32_t pnn = state->pnns[i];
608 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
609 pnn, err);
611 state->ban_credits[pnn]++;
612 found_errors = true;
616 if (! found_errors) {
617 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
620 tevent_req_error(req, ret);
623 static bool ipreallocated_recv(struct tevent_req *req, int *perr)
625 return generic_recv(req, perr);
628 /**********************************************************************/
631 * Recalculate the allocation of public IPs to nodes and have the
632 * nodes host their allocated addresses.
634 * - Get tunables
635 * - Get nodemap
636 * - Initialise IP allocation state. Pass:
637 * + algorithm to be used;
638 * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
639 * + list of nodes to force rebalance (internal structure, currently
640 * no way to fetch, only used by LCP2 for nodes that have had new
641 * IP addresses added).
642 * - Set IP flags for IP allocation based on node map
643 * - Retrieve known and available IP addresses (done separately so
644 * values can be faked in unit testing)
645 * - Use ipalloc_set_public_ips() to set known and available IP
646 * addresses for allocation
647 * - If cluster can't host IP addresses then jump to IPREALLOCATED
648 * - Run IP allocation algorithm
649 * - Send RELEASE_IP to all nodes for IPs they should not host
650 * - Send TAKE_IP to all nodes for IPs they should host
651 * - Send IPREALLOCATED to all nodes
654 struct takeover_state {
655 struct tevent_context *ev;
656 struct ctdb_client_context *client;
657 struct timeval timeout;
658 int num_nodes;
659 uint32_t *pnns_connected;
660 int num_connected;
661 uint32_t *pnns_active;
662 int num_active;
663 uint32_t destnode;
664 uint32_t *force_rebalance_nodes;
665 struct ctdb_tunable_list *tun_list;
666 struct ipalloc_state *ipalloc_state;
667 struct ctdb_public_ip_list *known_ips;
668 struct public_ip_list *all_ips;
669 uint32_t *ban_credits;
672 static void takeover_tunables_done(struct tevent_req *subreq);
673 static void takeover_nodemap_done(struct tevent_req *subreq);
674 static void takeover_known_ips_done(struct tevent_req *subreq);
675 static void takeover_avail_ips_done(struct tevent_req *subreq);
676 static void takeover_release_ip_done(struct tevent_req *subreq);
677 static void takeover_take_ip_done(struct tevent_req *subreq);
678 static void takeover_ipreallocated(struct tevent_req *req);
679 static void takeover_ipreallocated_done(struct tevent_req *subreq);
680 static void takeover_failed(struct tevent_req *subreq, int ret);
681 static void takeover_failed_done(struct tevent_req *subreq);
683 static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
684 struct tevent_context *ev,
685 struct ctdb_client_context *client,
686 uint32_t *force_rebalance_nodes)
688 struct tevent_req *req, *subreq;
689 struct takeover_state *state;
690 struct ctdb_req_control request;
692 req = tevent_req_create(mem_ctx, &state, struct takeover_state);
693 if (req == NULL) {
694 return NULL;
697 state->ev = ev;
698 state->client = client;
699 state->force_rebalance_nodes = force_rebalance_nodes;
700 state->destnode = ctdb_client_pnn(client);
702 ctdb_req_control_get_all_tunables(&request);
703 subreq = ctdb_client_control_send(state, state->ev, state->client,
704 state->destnode, TIMEOUT(),
705 &request);
706 if (tevent_req_nomem(subreq, req)) {
707 return tevent_req_post(req, ev);
709 tevent_req_set_callback(subreq, takeover_tunables_done, req);
711 return req;
714 static void takeover_tunables_done(struct tevent_req *subreq)
716 struct tevent_req *req = tevent_req_callback_data(
717 subreq, struct tevent_req);
718 struct takeover_state *state = tevent_req_data(
719 req, struct takeover_state);
720 struct ctdb_reply_control *reply;
721 struct ctdb_req_control request;
722 int ret;
723 bool status;
725 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
726 TALLOC_FREE(subreq);
727 if (! status) {
728 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
729 tevent_req_error(req, ret);
730 return;
733 ret = ctdb_reply_control_get_all_tunables(reply, state,
734 &state->tun_list);
735 if (ret != 0) {
736 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
737 tevent_req_error(req, ret);
738 return;
741 talloc_free(reply);
743 takeover_timeout = state->tun_list->takeover_timeout;
745 ctdb_req_control_get_nodemap(&request);
746 subreq = ctdb_client_control_send(state, state->ev, state->client,
747 state->destnode, TIMEOUT(),
748 &request);
749 if (tevent_req_nomem(subreq, req)) {
750 return;
752 tevent_req_set_callback(subreq, takeover_nodemap_done, req);
755 static void takeover_nodemap_done(struct tevent_req *subreq)
757 struct tevent_req *req = tevent_req_callback_data(
758 subreq, struct tevent_req);
759 struct takeover_state *state = tevent_req_data(
760 req, struct takeover_state);
761 struct ctdb_reply_control *reply;
762 bool status;
763 int ret;
764 struct ctdb_node_map *nodemap;
766 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
767 TALLOC_FREE(subreq);
768 if (! status) {
769 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
770 state->destnode, ret);
771 tevent_req_error(req, ret);
772 return;
775 ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
776 if (ret != 0) {
777 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
778 tevent_req_error(req, ret);
779 return;
782 state->num_nodes = nodemap->num;
784 state->num_connected = list_of_connected_nodes(nodemap,
785 CTDB_UNKNOWN_PNN, state,
786 &state->pnns_connected);
787 if (state->num_connected <= 0) {
788 tevent_req_error(req, ENOMEM);
789 return;
792 state->num_active = list_of_active_nodes(nodemap,
793 CTDB_UNKNOWN_PNN, state,
794 &state->pnns_active);
795 if (state->num_active <= 0) {
796 tevent_req_error(req, ENOMEM);
797 return;
800 /* Default timeout for early jump to IPREALLOCATED. See below
801 * for explanation of 3 times...
803 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
805 state->ban_credits = talloc_zero_array(state, uint32_t,
806 state->num_nodes);
807 if (tevent_req_nomem(state->ban_credits, req)) {
808 return;
811 if (state->tun_list->disable_ip_failover != 0) {
812 /* IP failover is completely disabled so just send out
813 * ipreallocated event.
815 takeover_ipreallocated(req);
816 return;
819 state->ipalloc_state =
820 ipalloc_state_init(
821 state, state->num_nodes,
822 determine_algorithm(state->tun_list),
823 (state->tun_list->no_ip_takeover != 0),
824 (state->tun_list->no_ip_failback != 0),
825 (state->tun_list->no_ip_host_on_all_disabled != 0),
826 state->force_rebalance_nodes);
827 if (tevent_req_nomem(state->ipalloc_state, req)) {
828 return;
831 ipalloc_set_node_flags(state->ipalloc_state, nodemap);
833 subreq = get_public_ips_send(state, state->ev, state->client,
834 state->pnns_active, state->num_active,
835 false);
836 if (tevent_req_nomem(subreq, req)) {
837 return;
840 tevent_req_set_callback(subreq, takeover_known_ips_done, req);
843 static void takeover_known_ips_done(struct tevent_req *subreq)
845 struct tevent_req *req = tevent_req_callback_data(
846 subreq, struct tevent_req);
847 struct takeover_state *state = tevent_req_data(
848 req, struct takeover_state);
849 int ret;
850 bool status;
852 status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
853 TALLOC_FREE(subreq);
855 if (! status) {
856 D_ERR("Failed to fetch known public IPs\n");
857 tevent_req_error(req, ret);
858 return;
861 subreq = get_public_ips_send(state, state->ev, state->client,
862 state->pnns_active, state->num_active,
863 true);
864 if (tevent_req_nomem(subreq, req)) {
865 return;
868 tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
871 static void takeover_avail_ips_done(struct tevent_req *subreq)
873 struct tevent_req *req = tevent_req_callback_data(
874 subreq, struct tevent_req);
875 struct takeover_state *state = tevent_req_data(
876 req, struct takeover_state);
877 bool status;
878 int ret;
879 struct ctdb_public_ip_list *available_ips;
881 status = get_public_ips_recv(subreq, &ret, state, &available_ips);
882 TALLOC_FREE(subreq);
884 if (! status) {
885 D_ERR("Failed to fetch available public IPs\n");
886 tevent_req_error(req, ret);
887 return;
890 ipalloc_set_public_ips(state->ipalloc_state,
891 state->known_ips, available_ips);
893 if (! ipalloc_can_host_ips(state->ipalloc_state)) {
894 D_NOTICE("No nodes available to host public IPs yet\n");
895 takeover_ipreallocated(req);
896 return;
899 /* Do the IP reassignment calculations */
900 state->all_ips = ipalloc(state->ipalloc_state);
901 if (tevent_req_nomem(state->all_ips, req)) {
902 return;
905 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
906 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
907 * seconds. However, RELEASE_IP can take longer due to TCP
908 * connection killing, so sometimes needs more time.
909 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
910 * seconds across all 3 stages. No explicit expiry checks are
911 * needed before each stage because tevent is smart enough to
912 * fire the timeouts even if they are in the past. Initialise
913 * this here so it explicitly covers the stages we're
914 * interested in but, in particular, not the time taken by the
915 * ipalloc().
917 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
919 subreq = release_ip_send(state, state->ev, state->client,
920 state->pnns_connected, state->num_connected,
921 state->timeout, state->all_ips,
922 state->ban_credits);
923 if (tevent_req_nomem(subreq, req)) {
924 return;
926 tevent_req_set_callback(subreq, takeover_release_ip_done, req);
929 static void takeover_release_ip_done(struct tevent_req *subreq)
931 struct tevent_req *req = tevent_req_callback_data(
932 subreq, struct tevent_req);
933 struct takeover_state *state = tevent_req_data(
934 req, struct takeover_state);
935 int ret;
936 bool status;
938 status = release_ip_recv(subreq, &ret);
939 TALLOC_FREE(subreq);
941 if (! status) {
942 takeover_failed(req, ret);
943 return;
946 /* All released, now for takeovers */
948 subreq = take_ip_send(state, state->ev, state->client,
949 state->timeout, state->all_ips,
950 state->ban_credits);
951 if (tevent_req_nomem(subreq, req)) {
952 return;
954 tevent_req_set_callback(subreq, takeover_take_ip_done, req);
957 static void takeover_take_ip_done(struct tevent_req *subreq)
959 struct tevent_req *req = tevent_req_callback_data(
960 subreq, struct tevent_req);
961 int ret = 0;
962 bool status;
964 status = take_ip_recv(subreq, &ret);
965 TALLOC_FREE(subreq);
967 if (! status) {
968 takeover_failed(req, ret);
969 return;
972 takeover_ipreallocated(req);
975 static void takeover_ipreallocated(struct tevent_req *req)
977 struct takeover_state *state = tevent_req_data(
978 req, struct takeover_state);
979 struct tevent_req *subreq;
981 subreq = ipreallocated_send(state, state->ev, state->client,
982 state->pnns_connected,
983 state->num_connected,
984 state->timeout,
985 state->ban_credits);
986 if (tevent_req_nomem(subreq, req)) {
987 return;
989 tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
992 static void takeover_ipreallocated_done(struct tevent_req *subreq)
994 struct tevent_req *req = tevent_req_callback_data(
995 subreq, struct tevent_req);
996 int ret;
997 bool status;
999 status = ipreallocated_recv(subreq, &ret);
1000 TALLOC_FREE(subreq);
1002 if (! status) {
1003 takeover_failed(req, ret);
1004 return;
1007 tevent_req_done(req);
1010 struct takeover_failed_state {
1011 struct tevent_req *req;
1012 int ret;
1015 void takeover_failed(struct tevent_req *req, int ret)
1017 struct takeover_state *state = tevent_req_data(
1018 req, struct takeover_state);
1019 struct tevent_req *subreq;
1020 uint32_t max_pnn = CTDB_UNKNOWN_PNN;
1021 int max_credits = 0;
1022 int pnn;
1024 /* Check that bans are enabled */
1025 if (state->tun_list->enable_bans == 0) {
1026 tevent_req_error(req, ret);
1027 return;
1030 for (pnn = 0; pnn < state->num_nodes; pnn++) {
1031 if (state->ban_credits[pnn] > max_credits) {
1032 max_pnn = pnn;
1033 max_credits = state->ban_credits[pnn];
1037 if (max_credits > 0) {
1038 struct ctdb_req_message message;
1039 struct takeover_failed_state *substate;
1041 D_WARNING("Assigning banning credits to node %u\n", max_pnn);
1043 substate = talloc_zero(state, struct takeover_failed_state);
1044 if (tevent_req_nomem(substate, req)) {
1045 return;
1047 substate->req = req;
1048 substate->ret = ret;
1050 message.srvid = CTDB_SRVID_BANNING;
1051 message.data.pnn = max_pnn;
1053 subreq = ctdb_client_message_send(
1054 state, state->ev, state->client,
1055 ctdb_client_pnn(state->client),
1056 &message);
1057 if (subreq == NULL) {
1058 D_ERR("failed to assign banning credits\n");
1059 tevent_req_error(req, ret);
1060 return;
1062 tevent_req_set_callback(subreq, takeover_failed_done, substate);
1063 } else {
1064 tevent_req_error(req, ret);
1068 static void takeover_failed_done(struct tevent_req *subreq)
1070 struct takeover_failed_state *substate = tevent_req_callback_data(
1071 subreq, struct takeover_failed_state);
1072 struct tevent_req *req = substate->req;
1073 int ret;
1074 bool status;
1076 status = ctdb_client_message_recv(subreq, &ret);
1077 TALLOC_FREE(subreq);
1078 if (! status) {
1079 D_ERR("failed to assign banning credits, ret=%d\n", ret);
1082 ret = substate->ret;
1083 talloc_free(substate);
1084 tevent_req_error(req, ret);
1087 static void takeover_recv(struct tevent_req *req, int *perr)
1089 generic_recv(req, perr);
1092 static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
1094 char *strv = NULL;
1095 int num, i, ret;
1096 char *t;
1097 uint32_t *nodes;
1099 ret = strv_split(mem_ctx, &strv, s, ",");
1100 if (ret != 0) {
1101 D_ERR("out of memory\n");
1102 return NULL;
1105 num = strv_count(strv);
1107 nodes = talloc_array(mem_ctx, uint32_t, num);
1108 if (nodes == NULL) {
1109 D_ERR("out of memory\n");
1110 return NULL;
1113 t = NULL;
1114 for (i = 0; i < num; i++) {
1115 t = strv_next(strv, t);
1116 nodes[i] = atoi(t);
1119 return nodes;
1122 static void usage(const char *progname)
1124 fprintf(stderr,
1125 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1126 "[<force-rebalance-nodes>]\n",
1127 progname);
1131 * Arguments - write fd, socket path
1133 int main(int argc, const char *argv[])
1135 int write_fd;
1136 const char *sockpath;
1137 TALLOC_CTX *mem_ctx;
1138 struct tevent_context *ev;
1139 struct ctdb_client_context *client;
1140 int ret;
1141 struct tevent_req *req;
1142 uint32_t *force_rebalance_nodes = NULL;
1144 if (argc < 3 || argc > 4) {
1145 usage(argv[0]);
1146 exit(1);
1149 write_fd = atoi(argv[1]);
1150 sockpath = argv[2];
1152 mem_ctx = talloc_new(NULL);
1153 if (mem_ctx == NULL) {
1154 fprintf(stderr, "talloc_new() failed\n");
1155 ret = ENOMEM;
1156 goto done;
1159 if (argc == 4) {
1160 force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
1161 if (force_rebalance_nodes == NULL) {
1162 usage(argv[0]);
1163 exit(1);
1167 logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
1169 ev = tevent_context_init(mem_ctx);
1170 if (ev == NULL) {
1171 D_ERR("tevent_context_init() failed\n");
1172 ret = ENOMEM;
1173 goto done;
1176 ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
1177 if (ret != 0) {
1178 D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
1179 goto done;
1182 req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
1183 if (req == NULL) {
1184 D_ERR("takeover_send() failed\n");
1185 ret = 1;
1186 goto done;
1189 if (! tevent_req_poll(req, ev)) {
1190 D_ERR("tevent_req_poll() failed\n");
1191 ret = 1;
1192 goto done;
1195 takeover_recv(req, &ret);
1196 TALLOC_FREE(req);
1197 if (ret != 0) {
1198 D_ERR("takeover run failed, ret=%d\n", ret);
1201 done:
1202 sys_write_v(write_fd, &ret, sizeof(ret));
1204 talloc_free(mem_ctx);
1205 return ret;