ctdb-takeover: Generalise error handling for GET_PUBLIC_IPS
[Samba.git] / ctdb / server / ctdb_takeover_helper.c
blob14361b4ef2459b4f7e6fed2a2660c80ed2f9c1d9
1 /*
2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
9 and ctdb_takeover.c
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/network.h"
30 #include "system/filesys.h"
32 #include <popt.h>
33 #include <talloc.h>
34 #include <tevent.h>
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "client/client.h"
47 #include "common/logging.h"
49 #include "server/ipalloc.h"
51 static int takeover_timeout = 9;
53 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
56 * Utility functions
59 static bool generic_recv(struct tevent_req *req, int *perr)
61 int err;
63 if (tevent_req_is_unix_error(req, &err)) {
64 if (perr != NULL) {
65 *perr = err;
67 return false;
70 return true;
73 static enum ipalloc_algorithm
74 determine_algorithm(const struct ctdb_tunable_list *tunables)
76 switch (tunables->ip_alloc_algorithm) {
77 case 0:
78 return IPALLOC_DETERMINISTIC;
79 case 1:
80 return IPALLOC_NONDETERMINISTIC;
81 case 2:
82 return IPALLOC_LCP2;
83 default:
84 return IPALLOC_LCP2;
88 /**********************************************************************/
90 struct get_public_ips_state {
91 uint32_t *pnns;
92 int count;
93 struct ctdb_public_ip_list *ips;
96 static void get_public_ips_done(struct tevent_req *subreq);
98 static struct tevent_req *get_public_ips_send(
99 TALLOC_CTX *mem_ctx,
100 struct tevent_context *ev,
101 struct ctdb_client_context *client,
102 uint32_t *pnns,
103 int count, int num_nodes,
104 bool available_only)
106 struct tevent_req *req, *subreq;
107 struct get_public_ips_state *state;
108 struct ctdb_req_control request;
110 req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
111 if (req == NULL) {
112 return NULL;
115 state->pnns = pnns;
116 state->count = count;
118 state->ips = talloc_zero_array(state,
119 struct ctdb_public_ip_list,
120 num_nodes);
121 if (tevent_req_nomem(state->ips, req)) {
122 return tevent_req_post(req, ev);
125 /* Short circuit if no nodes being asked for IPs */
126 if (state->count == 0) {
127 tevent_req_done(req);
128 return tevent_req_post(req, ev);
131 ctdb_req_control_get_public_ips(&request, available_only);
132 subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
133 state->pnns,
134 state->count,
135 TIMEOUT(), &request);
136 if (tevent_req_nomem(subreq, req)) {
137 return tevent_req_post(req, ev);
139 tevent_req_set_callback(subreq, get_public_ips_done, req);
141 return req;
144 static void get_public_ips_done(struct tevent_req *subreq)
146 struct tevent_req *req = tevent_req_callback_data(
147 subreq, struct tevent_req);
148 struct get_public_ips_state *state = tevent_req_data(
149 req, struct get_public_ips_state);
150 struct ctdb_reply_control **reply;
151 int *err_list;
152 int ret, i;
153 bool status, found_errors;
155 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
156 &reply);
157 TALLOC_FREE(subreq);
158 if (! status) {
159 found_errors = false;
160 for (i = 0; i < state->count; i++) {
161 if (err_list[i] != 0) {
162 uint32_t pnn = state->pnns[i];
164 D_ERR("control GET_PUBLIC_IPS failed on "
165 "node %u, ret=%d\n", pnn, err_list[i]);
167 found_errors = true;
171 tevent_req_error(req, ret);
172 return;
175 found_errors = false;
176 for (i = 0; i < state->count; i++) {
177 uint32_t pnn;
178 struct ctdb_public_ip_list *ips;
180 pnn = state->pnns[i];
181 ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
182 &ips);
183 if (ret != 0) {
184 D_ERR("control GET_PUBLIC_IPS failed on "
185 "node %u\n", pnn);
186 found_errors = true;
187 continue;
190 D_INFO("Fetched public IPs from node %u\n", pnn);
191 state->ips[pnn] = *ips;
194 if (found_errors) {
195 tevent_req_error(req, EIO);
196 return;
199 talloc_free(reply);
201 tevent_req_done(req);
204 static bool get_public_ips_recv(struct tevent_req *req, int *perr,
205 TALLOC_CTX *mem_ctx,
206 struct ctdb_public_ip_list **ips)
208 struct get_public_ips_state *state = tevent_req_data(
209 req, struct get_public_ips_state);
210 int err;
212 if (tevent_req_is_unix_error(req, &err)) {
213 if (perr != NULL) {
214 *perr = err;
216 return false;
219 *ips = talloc_steal(mem_ctx, state->ips);
221 return true;
224 /**********************************************************************/
226 struct release_ip_state {
227 int num_sent;
228 int num_replies;
229 int num_fails;
230 int err_any;
231 uint32_t *ban_credits;
234 struct release_ip_one_state {
235 struct tevent_req *req;
236 uint32_t *pnns;
237 int count;
238 const char *ip_str;
241 static void release_ip_done(struct tevent_req *subreq);
243 static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
244 struct tevent_context *ev,
245 struct ctdb_client_context *client,
246 uint32_t *pnns,
247 int count,
248 struct timeval timeout,
249 struct public_ip_list *all_ips,
250 uint32_t *ban_credits)
252 struct tevent_req *req, *subreq;
253 struct release_ip_state *state;
254 struct ctdb_req_control request;
255 struct public_ip_list *tmp_ip;
257 req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
258 if (req == NULL) {
259 return NULL;
262 state->num_sent = 0;
263 state->num_replies = 0;
264 state->num_fails = 0;
265 state->ban_credits = ban_credits;
267 /* Send a RELEASE_IP to all nodes that should not be hosting
268 * each IP. For each IP, all but one of these will be
269 * redundant. However, the redundant ones are used to tell
270 * nodes which node should be hosting the IP so that commands
271 * like "ctdb ip" can display a particular nodes idea of who
272 * is hosting what. */
273 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
274 struct release_ip_one_state *substate;
275 struct ctdb_public_ip ip;
276 int i;
278 substate = talloc_zero(state, struct release_ip_one_state);
279 if (tevent_req_nomem(substate, req)) {
280 return tevent_req_post(req, ev);
283 substate->pnns = talloc_zero_array(substate, uint32_t, count);
284 if (tevent_req_nomem(substate->pnns, req)) {
285 return tevent_req_post(req, ev);
288 substate->count = 0;
289 substate->req = req;
291 substate->ip_str = ctdb_sock_addr_to_string(substate,
292 &tmp_ip->addr);
293 if (tevent_req_nomem(substate->ip_str, req)) {
294 return tevent_req_post(req, ev);
297 for (i = 0; i < count; i++) {
298 uint32_t pnn = pnns[i];
299 /* If pnn is not the node that should be
300 * hosting the IP then add it to the list of
301 * nodes that need to do a release. */
302 if (tmp_ip->pnn != pnn) {
303 substate->pnns[substate->count] = pnn;
304 substate->count++;
308 if (substate->count == 0) {
309 /* No releases to send for this address... */
310 TALLOC_FREE(substate);
311 continue;
314 ip.pnn = tmp_ip->pnn;
315 ip.addr = tmp_ip->addr;
316 ctdb_req_control_release_ip(&request, &ip);
317 subreq = ctdb_client_control_multi_send(state, ev, client,
318 substate->pnns,
319 substate->count,
320 timeout,/* cumulative */
321 &request);
322 if (tevent_req_nomem(subreq, req)) {
323 return tevent_req_post(req, ev);
325 tevent_req_set_callback(subreq, release_ip_done, substate);
327 state->num_sent++;
330 /* None sent, finished... */
331 if (state->num_sent == 0) {
332 tevent_req_done(req);
333 return tevent_req_post(req, ev);
336 return req;
339 static void release_ip_done(struct tevent_req *subreq)
341 struct release_ip_one_state *substate = tevent_req_callback_data(
342 subreq, struct release_ip_one_state);
343 struct tevent_req *req = substate->req;
344 struct release_ip_state *state = tevent_req_data(
345 req, struct release_ip_state);
346 int ret, i;
347 int *err_list;
348 bool status, found_errors;
350 status = ctdb_client_control_multi_recv(subreq, &ret, state,
351 &err_list, NULL);
352 TALLOC_FREE(subreq);
354 if (status) {
355 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
356 substate->ip_str, substate->count);
357 goto done;
360 /* Get some clear error messages out of err_list and count
361 * banning credits
363 found_errors = false;
364 for (i = 0; i < substate->count; i++) {
365 int err = err_list[i];
366 if (err != 0) {
367 uint32_t pnn = substate->pnns[i];
369 D_ERR("RELEASE_IP %s failed on node %u, "
370 "ret=%d\n", substate->ip_str, pnn, err);
372 state->ban_credits[pnn]++;
373 state->err_any = err;
374 found_errors = true;
377 if (! found_errors) {
378 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
379 substate->ip_str, ret);
380 state->err_any = EIO;
383 state->num_fails++;
385 done:
386 talloc_free(substate);
388 state->num_replies++;
390 if (state->num_replies < state->num_sent) {
391 /* Not all replies received, don't go further */
392 return;
395 if (state->num_fails > 0) {
396 tevent_req_error(req, state->err_any);
397 return;
400 tevent_req_done(req);
403 static bool release_ip_recv(struct tevent_req *req, int *perr)
405 return generic_recv(req, perr);
408 /**********************************************************************/
410 struct take_ip_state {
411 int num_sent;
412 int num_replies;
413 int num_fails;
414 int err_any;
415 uint32_t *ban_credits;
418 struct take_ip_one_state {
419 struct tevent_req *req;
420 uint32_t pnn;
421 const char *ip_str;
424 static void take_ip_done(struct tevent_req *subreq);
426 static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
427 struct tevent_context *ev,
428 struct ctdb_client_context *client,
429 struct timeval timeout,
430 struct public_ip_list *all_ips,
431 uint32_t *ban_credits)
433 struct tevent_req *req, *subreq;
434 struct take_ip_state *state;
435 struct ctdb_req_control request;
436 struct public_ip_list *tmp_ip;
438 req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
439 if (req == NULL) {
440 return NULL;
443 state->num_sent = 0;
444 state->num_replies = 0;
445 state->num_fails = 0;
446 state->ban_credits = ban_credits;
448 /* For each IP, send a TAKOVER_IP to the node that should be
449 * hosting it. Many of these will often be redundant (since
450 * the allocation won't have changed) but they can be useful
451 * to recover from inconsistencies. */
452 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
453 struct take_ip_one_state *substate;
454 struct ctdb_public_ip ip;
456 if (tmp_ip->pnn == -1) {
457 /* IP will be unassigned */
458 continue;
461 substate = talloc_zero(state, struct take_ip_one_state);
462 if (tevent_req_nomem(substate, req)) {
463 return tevent_req_post(req, ev);
466 substate->req = req;
467 substate->pnn = tmp_ip->pnn;
469 substate->ip_str = ctdb_sock_addr_to_string(substate,
470 &tmp_ip->addr);
471 if (tevent_req_nomem(substate->ip_str, req)) {
472 return tevent_req_post(req, ev);
475 ip.pnn = tmp_ip->pnn;
476 ip.addr = tmp_ip->addr;
477 ctdb_req_control_takeover_ip(&request, &ip);
478 subreq = ctdb_client_control_send(
479 state, ev, client, tmp_ip->pnn,
480 timeout, /* cumulative */
481 &request);
482 if (tevent_req_nomem(subreq, req)) {
483 return tevent_req_post(req, ev);
485 tevent_req_set_callback(subreq, take_ip_done, substate);
487 state->num_sent++;
490 /* None sent, finished... */
491 if (state->num_sent == 0) {
492 tevent_req_done(req);
493 return tevent_req_post(req, ev);
496 return req;
499 static void take_ip_done(struct tevent_req *subreq)
501 struct take_ip_one_state *substate = tevent_req_callback_data(
502 subreq, struct take_ip_one_state);
503 struct tevent_req *req = substate->req;
504 struct ctdb_reply_control *reply;
505 struct take_ip_state *state = tevent_req_data(
506 req, struct take_ip_state);
507 int ret = 0;
508 bool status;
510 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
511 TALLOC_FREE(subreq);
513 if (! status) {
514 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
515 substate->ip_str, substate->pnn, ret);
516 goto fail;
519 ret = ctdb_reply_control_takeover_ip(reply);
520 if (ret != 0) {
521 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
522 substate->ip_str, substate->pnn, ret);
523 goto fail;
526 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
527 substate->ip_str, substate->pnn);
528 goto done;
530 fail:
531 state->ban_credits[substate->pnn]++;
532 state->num_fails++;
533 state->err_any = ret;
535 done:
536 talloc_free(substate);
538 state->num_replies++;
540 if (state->num_replies < state->num_sent) {
541 /* Not all replies received, don't go further */
542 return;
545 if (state->num_fails > 0) {
546 tevent_req_error(req, state->err_any);
547 return;
550 tevent_req_done(req);
553 static bool take_ip_recv(struct tevent_req *req, int *perr)
555 return generic_recv(req, perr);
558 /**********************************************************************/
560 struct ipreallocated_state {
561 uint32_t *pnns;
562 int count;
563 uint32_t *ban_credits;
566 static void ipreallocated_done(struct tevent_req *subreq);
568 static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
569 struct tevent_context *ev,
570 struct ctdb_client_context *client,
571 uint32_t *pnns,
572 int count,
573 struct timeval timeout,
574 uint32_t *ban_credits)
576 struct tevent_req *req, *subreq;
577 struct ipreallocated_state *state;
578 struct ctdb_req_control request;
580 req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
581 if (req == NULL) {
582 return NULL;
585 state->pnns = pnns;
586 state->count = count;
587 state->ban_credits = ban_credits;
589 ctdb_req_control_ipreallocated(&request);
590 subreq = ctdb_client_control_multi_send(state, ev, client,
591 pnns, count,
592 timeout, /* cumulative */
593 &request);
594 if (tevent_req_nomem(subreq, req)) {
595 return tevent_req_post(req, ev);
597 tevent_req_set_callback(subreq, ipreallocated_done, req);
599 return req;
602 static void ipreallocated_done(struct tevent_req *subreq)
604 struct tevent_req *req = tevent_req_callback_data(
605 subreq, struct tevent_req);
606 struct ipreallocated_state *state = tevent_req_data(
607 req, struct ipreallocated_state);
608 int *err_list = NULL;
609 int ret, i;
610 bool status, found_errors;
612 status = ctdb_client_control_multi_recv(subreq, &ret, state,
613 &err_list, NULL);
614 TALLOC_FREE(subreq);
616 if (status) {
617 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
618 tevent_req_done(req);
619 return;
622 /* Get some clear error messages out of err_list and count
623 * banning credits
625 found_errors = false;
626 for (i = 0; i < state->count; i++) {
627 int err = err_list[i];
628 if (err != 0) {
629 uint32_t pnn = state->pnns[i];
631 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
632 pnn, err);
634 state->ban_credits[pnn]++;
635 found_errors = true;
639 if (! found_errors) {
640 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
643 tevent_req_error(req, ret);
646 static bool ipreallocated_recv(struct tevent_req *req, int *perr)
648 return generic_recv(req, perr);
651 /**********************************************************************/
654 * Recalculate the allocation of public IPs to nodes and have the
655 * nodes host their allocated addresses.
657 * - Get tunables
658 * - Get nodemap
659 * - Initialise IP allocation state. Pass:
660 * + algorithm to be used;
661 * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
662 * + list of nodes to force rebalance (internal structure, currently
663 * no way to fetch, only used by LCP2 for nodes that have had new
664 * IP addresses added).
665 * - Set IP flags for IP allocation based on node map
666 * - Retrieve known and available IP addresses (done separately so
667 * values can be faked in unit testing)
668 * - Use ipalloc_set_public_ips() to set known and available IP
669 * addresses for allocation
670 * - If cluster can't host IP addresses then jump to IPREALLOCATED
671 * - Run IP allocation algorithm
672 * - Send RELEASE_IP to all nodes for IPs they should not host
673 * - Send TAKE_IP to all nodes for IPs they should host
674 * - Send IPREALLOCATED to all nodes
677 struct takeover_state {
678 struct tevent_context *ev;
679 struct ctdb_client_context *client;
680 struct timeval timeout;
681 int num_nodes;
682 uint32_t *pnns_connected;
683 int num_connected;
684 uint32_t *pnns_active;
685 int num_active;
686 uint32_t destnode;
687 uint32_t *force_rebalance_nodes;
688 struct ctdb_tunable_list *tun_list;
689 struct ipalloc_state *ipalloc_state;
690 struct ctdb_public_ip_list *known_ips;
691 struct public_ip_list *all_ips;
692 uint32_t *ban_credits;
695 static void takeover_tunables_done(struct tevent_req *subreq);
696 static void takeover_nodemap_done(struct tevent_req *subreq);
697 static void takeover_known_ips_done(struct tevent_req *subreq);
698 static void takeover_avail_ips_done(struct tevent_req *subreq);
699 static void takeover_release_ip_done(struct tevent_req *subreq);
700 static void takeover_take_ip_done(struct tevent_req *subreq);
701 static void takeover_ipreallocated(struct tevent_req *req);
702 static void takeover_ipreallocated_done(struct tevent_req *subreq);
703 static void takeover_failed(struct tevent_req *subreq, int ret);
704 static void takeover_failed_done(struct tevent_req *subreq);
706 static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
707 struct tevent_context *ev,
708 struct ctdb_client_context *client,
709 uint32_t *force_rebalance_nodes)
711 struct tevent_req *req, *subreq;
712 struct takeover_state *state;
713 struct ctdb_req_control request;
715 req = tevent_req_create(mem_ctx, &state, struct takeover_state);
716 if (req == NULL) {
717 return NULL;
720 state->ev = ev;
721 state->client = client;
722 state->force_rebalance_nodes = force_rebalance_nodes;
723 state->destnode = ctdb_client_pnn(client);
725 ctdb_req_control_get_all_tunables(&request);
726 subreq = ctdb_client_control_send(state, state->ev, state->client,
727 state->destnode, TIMEOUT(),
728 &request);
729 if (tevent_req_nomem(subreq, req)) {
730 return tevent_req_post(req, ev);
732 tevent_req_set_callback(subreq, takeover_tunables_done, req);
734 return req;
737 static void takeover_tunables_done(struct tevent_req *subreq)
739 struct tevent_req *req = tevent_req_callback_data(
740 subreq, struct tevent_req);
741 struct takeover_state *state = tevent_req_data(
742 req, struct takeover_state);
743 struct ctdb_reply_control *reply;
744 struct ctdb_req_control request;
745 int ret;
746 bool status;
748 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
749 TALLOC_FREE(subreq);
750 if (! status) {
751 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
752 tevent_req_error(req, ret);
753 return;
756 ret = ctdb_reply_control_get_all_tunables(reply, state,
757 &state->tun_list);
758 if (ret != 0) {
759 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
760 tevent_req_error(req, ret);
761 return;
764 talloc_free(reply);
766 takeover_timeout = state->tun_list->takeover_timeout;
768 ctdb_req_control_get_nodemap(&request);
769 subreq = ctdb_client_control_send(state, state->ev, state->client,
770 state->destnode, TIMEOUT(),
771 &request);
772 if (tevent_req_nomem(subreq, req)) {
773 return;
775 tevent_req_set_callback(subreq, takeover_nodemap_done, req);
778 static void takeover_nodemap_done(struct tevent_req *subreq)
780 struct tevent_req *req = tevent_req_callback_data(
781 subreq, struct tevent_req);
782 struct takeover_state *state = tevent_req_data(
783 req, struct takeover_state);
784 struct ctdb_reply_control *reply;
785 bool status;
786 int ret;
787 struct ctdb_node_map *nodemap;
789 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
790 TALLOC_FREE(subreq);
791 if (! status) {
792 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
793 state->destnode, ret);
794 tevent_req_error(req, ret);
795 return;
798 ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
799 if (ret != 0) {
800 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
801 tevent_req_error(req, ret);
802 return;
805 state->num_nodes = nodemap->num;
807 state->num_connected = list_of_connected_nodes(nodemap,
808 CTDB_UNKNOWN_PNN, state,
809 &state->pnns_connected);
810 if (state->num_connected <= 0) {
811 tevent_req_error(req, ENOMEM);
812 return;
815 state->num_active = list_of_active_nodes(nodemap,
816 CTDB_UNKNOWN_PNN, state,
817 &state->pnns_active);
818 if (state->num_active <= 0) {
819 tevent_req_error(req, ENOMEM);
820 return;
823 /* Default timeout for early jump to IPREALLOCATED. See below
824 * for explanation of 3 times...
826 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
828 state->ban_credits = talloc_zero_array(state, uint32_t,
829 state->num_nodes);
830 if (tevent_req_nomem(state->ban_credits, req)) {
831 return;
834 if (state->tun_list->disable_ip_failover != 0) {
835 /* IP failover is completely disabled so just send out
836 * ipreallocated event.
838 takeover_ipreallocated(req);
839 return;
842 state->ipalloc_state =
843 ipalloc_state_init(
844 state, state->num_nodes,
845 determine_algorithm(state->tun_list),
846 (state->tun_list->no_ip_takeover != 0),
847 (state->tun_list->no_ip_failback != 0),
848 (state->tun_list->no_ip_host_on_all_disabled != 0),
849 state->force_rebalance_nodes);
850 if (tevent_req_nomem(state->ipalloc_state, req)) {
851 return;
854 ipalloc_set_node_flags(state->ipalloc_state, nodemap);
856 subreq = get_public_ips_send(state, state->ev, state->client,
857 state->pnns_active, state->num_active,
858 state->num_nodes, false);
859 if (tevent_req_nomem(subreq, req)) {
860 return;
863 tevent_req_set_callback(subreq, takeover_known_ips_done, req);
866 static void takeover_known_ips_done(struct tevent_req *subreq)
868 struct tevent_req *req = tevent_req_callback_data(
869 subreq, struct tevent_req);
870 struct takeover_state *state = tevent_req_data(
871 req, struct takeover_state);
872 int ret;
873 bool status;
874 uint32_t *pnns = NULL;
875 int count, i;
877 status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
878 TALLOC_FREE(subreq);
880 if (! status) {
881 D_ERR("Failed to fetch known public IPs\n");
882 tevent_req_error(req, ret);
883 return;
886 /* Get available IPs from active nodes that actually have known IPs */
888 pnns = talloc_zero_array(state, uint32_t, state->num_active);
889 if (tevent_req_nomem(pnns, req)) {
890 return;
893 count = 0;
894 for (i = 0; i < state->num_active; i++) {
895 uint32_t pnn = state->pnns_active[i];
897 /* If pnn has IPs then fetch available IPs from it */
898 if (state->known_ips[pnn].num > 0) {
899 pnns[count] = pnn;
900 count++;
904 subreq = get_public_ips_send(state, state->ev, state->client,
905 pnns, count,
906 state->num_nodes, true);
907 if (tevent_req_nomem(subreq, req)) {
908 return;
911 tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
914 static void takeover_avail_ips_done(struct tevent_req *subreq)
916 struct tevent_req *req = tevent_req_callback_data(
917 subreq, struct tevent_req);
918 struct takeover_state *state = tevent_req_data(
919 req, struct takeover_state);
920 bool status;
921 int ret;
922 struct ctdb_public_ip_list *available_ips;
924 status = get_public_ips_recv(subreq, &ret, state, &available_ips);
925 TALLOC_FREE(subreq);
927 if (! status) {
928 D_ERR("Failed to fetch available public IPs\n");
929 tevent_req_error(req, ret);
930 return;
933 ipalloc_set_public_ips(state->ipalloc_state,
934 state->known_ips, available_ips);
936 if (! ipalloc_can_host_ips(state->ipalloc_state)) {
937 D_NOTICE("No nodes available to host public IPs yet\n");
938 takeover_ipreallocated(req);
939 return;
942 /* Do the IP reassignment calculations */
943 state->all_ips = ipalloc(state->ipalloc_state);
944 if (tevent_req_nomem(state->all_ips, req)) {
945 return;
948 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
949 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
950 * seconds. However, RELEASE_IP can take longer due to TCP
951 * connection killing, so sometimes needs more time.
952 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
953 * seconds across all 3 stages. No explicit expiry checks are
954 * needed before each stage because tevent is smart enough to
955 * fire the timeouts even if they are in the past. Initialise
956 * this here so it explicitly covers the stages we're
957 * interested in but, in particular, not the time taken by the
958 * ipalloc().
960 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
962 subreq = release_ip_send(state, state->ev, state->client,
963 state->pnns_connected, state->num_connected,
964 state->timeout, state->all_ips,
965 state->ban_credits);
966 if (tevent_req_nomem(subreq, req)) {
967 return;
969 tevent_req_set_callback(subreq, takeover_release_ip_done, req);
972 static void takeover_release_ip_done(struct tevent_req *subreq)
974 struct tevent_req *req = tevent_req_callback_data(
975 subreq, struct tevent_req);
976 struct takeover_state *state = tevent_req_data(
977 req, struct takeover_state);
978 int ret;
979 bool status;
981 status = release_ip_recv(subreq, &ret);
982 TALLOC_FREE(subreq);
984 if (! status) {
985 takeover_failed(req, ret);
986 return;
989 /* All released, now for takeovers */
991 subreq = take_ip_send(state, state->ev, state->client,
992 state->timeout, state->all_ips,
993 state->ban_credits);
994 if (tevent_req_nomem(subreq, req)) {
995 return;
997 tevent_req_set_callback(subreq, takeover_take_ip_done, req);
1000 static void takeover_take_ip_done(struct tevent_req *subreq)
1002 struct tevent_req *req = tevent_req_callback_data(
1003 subreq, struct tevent_req);
1004 int ret = 0;
1005 bool status;
1007 status = take_ip_recv(subreq, &ret);
1008 TALLOC_FREE(subreq);
1010 if (! status) {
1011 takeover_failed(req, ret);
1012 return;
1015 takeover_ipreallocated(req);
1018 static void takeover_ipreallocated(struct tevent_req *req)
1020 struct takeover_state *state = tevent_req_data(
1021 req, struct takeover_state);
1022 struct tevent_req *subreq;
1024 subreq = ipreallocated_send(state, state->ev, state->client,
1025 state->pnns_connected,
1026 state->num_connected,
1027 state->timeout,
1028 state->ban_credits);
1029 if (tevent_req_nomem(subreq, req)) {
1030 return;
1032 tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
1035 static void takeover_ipreallocated_done(struct tevent_req *subreq)
1037 struct tevent_req *req = tevent_req_callback_data(
1038 subreq, struct tevent_req);
1039 int ret;
1040 bool status;
1042 status = ipreallocated_recv(subreq, &ret);
1043 TALLOC_FREE(subreq);
1045 if (! status) {
1046 takeover_failed(req, ret);
1047 return;
1050 tevent_req_done(req);
1053 struct takeover_failed_state {
1054 struct tevent_req *req;
1055 int ret;
1058 void takeover_failed(struct tevent_req *req, int ret)
1060 struct takeover_state *state = tevent_req_data(
1061 req, struct takeover_state);
1062 struct tevent_req *subreq;
1063 uint32_t max_pnn = CTDB_UNKNOWN_PNN;
1064 int max_credits = 0;
1065 int pnn;
1067 /* Check that bans are enabled */
1068 if (state->tun_list->enable_bans == 0) {
1069 tevent_req_error(req, ret);
1070 return;
1073 for (pnn = 0; pnn < state->num_nodes; pnn++) {
1074 if (state->ban_credits[pnn] > max_credits) {
1075 max_pnn = pnn;
1076 max_credits = state->ban_credits[pnn];
1080 if (max_credits > 0) {
1081 struct ctdb_req_message message;
1082 struct takeover_failed_state *substate;
1084 D_WARNING("Assigning banning credits to node %u\n", max_pnn);
1086 substate = talloc_zero(state, struct takeover_failed_state);
1087 if (tevent_req_nomem(substate, req)) {
1088 return;
1090 substate->req = req;
1091 substate->ret = ret;
1093 message.srvid = CTDB_SRVID_BANNING;
1094 message.data.pnn = max_pnn;
1096 subreq = ctdb_client_message_send(
1097 state, state->ev, state->client,
1098 ctdb_client_pnn(state->client),
1099 &message);
1100 if (subreq == NULL) {
1101 D_ERR("failed to assign banning credits\n");
1102 tevent_req_error(req, ret);
1103 return;
1105 tevent_req_set_callback(subreq, takeover_failed_done, substate);
1106 } else {
1107 tevent_req_error(req, ret);
1111 static void takeover_failed_done(struct tevent_req *subreq)
1113 struct takeover_failed_state *substate = tevent_req_callback_data(
1114 subreq, struct takeover_failed_state);
1115 struct tevent_req *req = substate->req;
1116 int ret;
1117 bool status;
1119 status = ctdb_client_message_recv(subreq, &ret);
1120 TALLOC_FREE(subreq);
1121 if (! status) {
1122 D_ERR("failed to assign banning credits, ret=%d\n", ret);
1125 ret = substate->ret;
1126 talloc_free(substate);
1127 tevent_req_error(req, ret);
1130 static void takeover_recv(struct tevent_req *req, int *perr)
1132 generic_recv(req, perr);
1135 static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
1137 char *strv = NULL;
1138 int num, i, ret;
1139 char *t;
1140 uint32_t *nodes;
1142 ret = strv_split(mem_ctx, &strv, s, ",");
1143 if (ret != 0) {
1144 D_ERR("out of memory\n");
1145 return NULL;
1148 num = strv_count(strv);
1150 nodes = talloc_array(mem_ctx, uint32_t, num);
1151 if (nodes == NULL) {
1152 D_ERR("out of memory\n");
1153 return NULL;
1156 t = NULL;
1157 for (i = 0; i < num; i++) {
1158 t = strv_next(strv, t);
1159 nodes[i] = atoi(t);
1162 return nodes;
1165 static void usage(const char *progname)
1167 fprintf(stderr,
1168 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1169 "[<force-rebalance-nodes>]\n",
1170 progname);
1174 * Arguments - write fd, socket path
1176 int main(int argc, const char *argv[])
1178 int write_fd;
1179 const char *sockpath;
1180 TALLOC_CTX *mem_ctx;
1181 struct tevent_context *ev;
1182 struct ctdb_client_context *client;
1183 int ret;
1184 struct tevent_req *req;
1185 uint32_t *force_rebalance_nodes = NULL;
1187 if (argc < 3 || argc > 4) {
1188 usage(argv[0]);
1189 exit(1);
1192 write_fd = atoi(argv[1]);
1193 sockpath = argv[2];
1195 mem_ctx = talloc_new(NULL);
1196 if (mem_ctx == NULL) {
1197 fprintf(stderr, "talloc_new() failed\n");
1198 ret = ENOMEM;
1199 goto done;
1202 if (argc == 4) {
1203 force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
1204 if (force_rebalance_nodes == NULL) {
1205 usage(argv[0]);
1206 ret = EINVAL;
1207 goto done;
1211 ret = logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
1212 if (ret != 0) {
1213 fprintf(stderr,
1214 "ctdb-takeover: Unable to initialize logging\n");
1215 goto done;
1218 ev = tevent_context_init(mem_ctx);
1219 if (ev == NULL) {
1220 D_ERR("tevent_context_init() failed\n");
1221 ret = ENOMEM;
1222 goto done;
1225 ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
1226 if (ret != 0) {
1227 D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
1228 goto done;
1231 req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
1232 if (req == NULL) {
1233 D_ERR("takeover_send() failed\n");
1234 ret = 1;
1235 goto done;
1238 if (! tevent_req_poll(req, ev)) {
1239 D_ERR("tevent_req_poll() failed\n");
1240 ret = 1;
1241 goto done;
1244 takeover_recv(req, &ret);
1245 TALLOC_FREE(req);
1246 if (ret != 0) {
1247 D_ERR("takeover run failed, ret=%d\n", ret);
1250 done:
1251 sys_write_v(write_fd, &ret, sizeof(ret));
1253 talloc_free(mem_ctx);
1254 return ret;