source4 dsdb: Allow duplicate non local objectSIDs
[Samba.git] / ctdb / server / ctdb_takeover_helper.c
blob9461e7149f0c98f503b1c4a1c31aef6b2f32f418
1 /*
2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
9 and ctdb_takeover.c
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/network.h"
30 #include "system/filesys.h"
32 #include <popt.h>
33 #include <talloc.h>
34 #include <tevent.h>
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "protocol/protocol_util.h"
46 #include "client/client.h"
48 #include "common/logging.h"
50 #include "server/ipalloc.h"
52 static int takeover_timeout = 9;
54 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
57 * Utility functions
60 static bool generic_recv(struct tevent_req *req, int *perr)
62 int err;
64 if (tevent_req_is_unix_error(req, &err)) {
65 if (perr != NULL) {
66 *perr = err;
68 return false;
71 return true;
74 static enum ipalloc_algorithm
75 determine_algorithm(const struct ctdb_tunable_list *tunables)
77 switch (tunables->ip_alloc_algorithm) {
78 case 0:
79 return IPALLOC_DETERMINISTIC;
80 case 1:
81 return IPALLOC_NONDETERMINISTIC;
82 case 2:
83 return IPALLOC_LCP2;
84 default:
85 return IPALLOC_LCP2;
89 /**********************************************************************/
91 struct get_public_ips_state {
92 uint32_t *pnns;
93 int count;
94 struct ctdb_public_ip_list *ips;
95 uint32_t *ban_credits;
98 static void get_public_ips_done(struct tevent_req *subreq);
100 static struct tevent_req *get_public_ips_send(
101 TALLOC_CTX *mem_ctx,
102 struct tevent_context *ev,
103 struct ctdb_client_context *client,
104 uint32_t *pnns,
105 int count, int num_nodes,
106 uint32_t *ban_credits,
107 bool available_only)
109 struct tevent_req *req, *subreq;
110 struct get_public_ips_state *state;
111 struct ctdb_req_control request;
113 req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
114 if (req == NULL) {
115 return NULL;
118 state->pnns = pnns;
119 state->count = count;
120 state->ban_credits = ban_credits;
122 state->ips = talloc_zero_array(state,
123 struct ctdb_public_ip_list,
124 num_nodes);
125 if (tevent_req_nomem(state->ips, req)) {
126 return tevent_req_post(req, ev);
129 /* Short circuit if no nodes being asked for IPs */
130 if (state->count == 0) {
131 tevent_req_done(req);
132 return tevent_req_post(req, ev);
135 ctdb_req_control_get_public_ips(&request, available_only);
136 subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
137 state->pnns,
138 state->count,
139 TIMEOUT(), &request);
140 if (tevent_req_nomem(subreq, req)) {
141 return tevent_req_post(req, ev);
143 tevent_req_set_callback(subreq, get_public_ips_done, req);
145 return req;
148 static void get_public_ips_done(struct tevent_req *subreq)
150 struct tevent_req *req = tevent_req_callback_data(
151 subreq, struct tevent_req);
152 struct get_public_ips_state *state = tevent_req_data(
153 req, struct get_public_ips_state);
154 struct ctdb_reply_control **reply;
155 int *err_list;
156 int ret, i;
157 bool status, found_errors;
159 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
160 &reply);
161 TALLOC_FREE(subreq);
162 if (! status) {
163 found_errors = false;
164 for (i = 0; i < state->count; i++) {
165 if (err_list[i] != 0) {
166 uint32_t pnn = state->pnns[i];
168 D_ERR("control GET_PUBLIC_IPS failed on "
169 "node %u, ret=%d\n", pnn, err_list[i]);
171 state->ban_credits[pnn]++;
172 found_errors = true;
176 tevent_req_error(req, ret);
177 return;
180 found_errors = false;
181 for (i = 0; i < state->count; i++) {
182 uint32_t pnn;
183 struct ctdb_public_ip_list *ips;
185 pnn = state->pnns[i];
186 ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
187 &ips);
188 if (ret != 0) {
189 D_ERR("control GET_PUBLIC_IPS failed on "
190 "node %u\n", pnn);
191 state->ban_credits[pnn]++;
192 found_errors = true;
193 continue;
196 D_INFO("Fetched public IPs from node %u\n", pnn);
197 state->ips[pnn] = *ips;
200 if (found_errors) {
201 tevent_req_error(req, EIO);
202 return;
205 talloc_free(reply);
207 tevent_req_done(req);
210 static bool get_public_ips_recv(struct tevent_req *req, int *perr,
211 TALLOC_CTX *mem_ctx,
212 struct ctdb_public_ip_list **ips)
214 struct get_public_ips_state *state = tevent_req_data(
215 req, struct get_public_ips_state);
216 int err;
218 if (tevent_req_is_unix_error(req, &err)) {
219 if (perr != NULL) {
220 *perr = err;
222 return false;
225 *ips = talloc_steal(mem_ctx, state->ips);
227 return true;
230 /**********************************************************************/
232 struct release_ip_state {
233 int num_sent;
234 int num_replies;
235 int num_fails;
236 int err_any;
237 uint32_t *ban_credits;
240 struct release_ip_one_state {
241 struct tevent_req *req;
242 uint32_t *pnns;
243 int count;
244 const char *ip_str;
247 static void release_ip_done(struct tevent_req *subreq);
249 static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
250 struct tevent_context *ev,
251 struct ctdb_client_context *client,
252 uint32_t *pnns,
253 int count,
254 struct timeval timeout,
255 struct public_ip_list *all_ips,
256 uint32_t *ban_credits)
258 struct tevent_req *req, *subreq;
259 struct release_ip_state *state;
260 struct ctdb_req_control request;
261 struct public_ip_list *tmp_ip;
263 req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
264 if (req == NULL) {
265 return NULL;
268 state->num_sent = 0;
269 state->num_replies = 0;
270 state->num_fails = 0;
271 state->ban_credits = ban_credits;
273 /* Send a RELEASE_IP to all nodes that should not be hosting
274 * each IP. For each IP, all but one of these will be
275 * redundant. However, the redundant ones are used to tell
276 * nodes which node should be hosting the IP so that commands
277 * like "ctdb ip" can display a particular nodes idea of who
278 * is hosting what. */
279 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
280 struct release_ip_one_state *substate;
281 struct ctdb_public_ip ip;
282 int i;
284 substate = talloc_zero(state, struct release_ip_one_state);
285 if (tevent_req_nomem(substate, req)) {
286 return tevent_req_post(req, ev);
289 substate->pnns = talloc_zero_array(substate, uint32_t, count);
290 if (tevent_req_nomem(substate->pnns, req)) {
291 return tevent_req_post(req, ev);
294 substate->count = 0;
295 substate->req = req;
297 substate->ip_str = ctdb_sock_addr_to_string(substate,
298 &tmp_ip->addr,
299 false);
300 if (tevent_req_nomem(substate->ip_str, req)) {
301 return tevent_req_post(req, ev);
304 for (i = 0; i < count; i++) {
305 uint32_t pnn = pnns[i];
307 /* Skip this node if IP is not known */
308 if (! bitmap_query(tmp_ip->known_on, pnn)) {
309 continue;
312 /* If pnn is not the node that should be
313 * hosting the IP then add it to the list of
314 * nodes that need to do a release. */
315 if (tmp_ip->pnn != pnn) {
316 substate->pnns[substate->count] = pnn;
317 substate->count++;
321 if (substate->count == 0) {
322 /* No releases to send for this address... */
323 TALLOC_FREE(substate);
324 continue;
327 ip.pnn = tmp_ip->pnn;
328 ip.addr = tmp_ip->addr;
329 ctdb_req_control_release_ip(&request, &ip);
330 subreq = ctdb_client_control_multi_send(state, ev, client,
331 substate->pnns,
332 substate->count,
333 timeout,/* cumulative */
334 &request);
335 if (tevent_req_nomem(subreq, req)) {
336 return tevent_req_post(req, ev);
338 tevent_req_set_callback(subreq, release_ip_done, substate);
340 state->num_sent++;
343 /* None sent, finished... */
344 if (state->num_sent == 0) {
345 tevent_req_done(req);
346 return tevent_req_post(req, ev);
349 return req;
352 static void release_ip_done(struct tevent_req *subreq)
354 struct release_ip_one_state *substate = tevent_req_callback_data(
355 subreq, struct release_ip_one_state);
356 struct tevent_req *req = substate->req;
357 struct release_ip_state *state = tevent_req_data(
358 req, struct release_ip_state);
359 int ret, i;
360 int *err_list;
361 bool status, found_errors;
363 status = ctdb_client_control_multi_recv(subreq, &ret, state,
364 &err_list, NULL);
365 TALLOC_FREE(subreq);
367 if (status) {
368 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
369 substate->ip_str, substate->count);
370 goto done;
373 /* Get some clear error messages out of err_list and count
374 * banning credits
376 found_errors = false;
377 for (i = 0; i < substate->count; i++) {
378 int err = err_list[i];
379 if (err != 0) {
380 uint32_t pnn = substate->pnns[i];
382 D_ERR("RELEASE_IP %s failed on node %u, "
383 "ret=%d\n", substate->ip_str, pnn, err);
385 state->ban_credits[pnn]++;
386 state->err_any = err;
387 found_errors = true;
390 if (! found_errors) {
391 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
392 substate->ip_str, ret);
393 state->err_any = EIO;
396 state->num_fails++;
398 done:
399 talloc_free(substate);
401 state->num_replies++;
403 if (state->num_replies < state->num_sent) {
404 /* Not all replies received, don't go further */
405 return;
408 if (state->num_fails > 0) {
409 tevent_req_error(req, state->err_any);
410 return;
413 tevent_req_done(req);
416 static bool release_ip_recv(struct tevent_req *req, int *perr)
418 return generic_recv(req, perr);
421 /**********************************************************************/
423 struct take_ip_state {
424 int num_sent;
425 int num_replies;
426 int num_fails;
427 int err_any;
428 uint32_t *ban_credits;
431 struct take_ip_one_state {
432 struct tevent_req *req;
433 uint32_t pnn;
434 const char *ip_str;
437 static void take_ip_done(struct tevent_req *subreq);
439 static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
440 struct tevent_context *ev,
441 struct ctdb_client_context *client,
442 struct timeval timeout,
443 struct public_ip_list *all_ips,
444 uint32_t *ban_credits)
446 struct tevent_req *req, *subreq;
447 struct take_ip_state *state;
448 struct ctdb_req_control request;
449 struct public_ip_list *tmp_ip;
451 req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
452 if (req == NULL) {
453 return NULL;
456 state->num_sent = 0;
457 state->num_replies = 0;
458 state->num_fails = 0;
459 state->ban_credits = ban_credits;
461 /* For each IP, send a TAKOVER_IP to the node that should be
462 * hosting it. Many of these will often be redundant (since
463 * the allocation won't have changed) but they can be useful
464 * to recover from inconsistencies. */
465 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
466 struct take_ip_one_state *substate;
467 struct ctdb_public_ip ip;
469 if (tmp_ip->pnn == -1) {
470 /* IP will be unassigned */
471 continue;
474 substate = talloc_zero(state, struct take_ip_one_state);
475 if (tevent_req_nomem(substate, req)) {
476 return tevent_req_post(req, ev);
479 substate->req = req;
480 substate->pnn = tmp_ip->pnn;
482 substate->ip_str = ctdb_sock_addr_to_string(substate,
483 &tmp_ip->addr,
484 false);
485 if (tevent_req_nomem(substate->ip_str, req)) {
486 return tevent_req_post(req, ev);
489 ip.pnn = tmp_ip->pnn;
490 ip.addr = tmp_ip->addr;
491 ctdb_req_control_takeover_ip(&request, &ip);
492 subreq = ctdb_client_control_send(
493 state, ev, client, tmp_ip->pnn,
494 timeout, /* cumulative */
495 &request);
496 if (tevent_req_nomem(subreq, req)) {
497 return tevent_req_post(req, ev);
499 tevent_req_set_callback(subreq, take_ip_done, substate);
501 state->num_sent++;
504 /* None sent, finished... */
505 if (state->num_sent == 0) {
506 tevent_req_done(req);
507 return tevent_req_post(req, ev);
510 return req;
513 static void take_ip_done(struct tevent_req *subreq)
515 struct take_ip_one_state *substate = tevent_req_callback_data(
516 subreq, struct take_ip_one_state);
517 struct tevent_req *req = substate->req;
518 struct ctdb_reply_control *reply;
519 struct take_ip_state *state = tevent_req_data(
520 req, struct take_ip_state);
521 int ret = 0;
522 bool status;
524 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
525 TALLOC_FREE(subreq);
527 if (! status) {
528 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
529 substate->ip_str, substate->pnn, ret);
530 goto fail;
533 ret = ctdb_reply_control_takeover_ip(reply);
534 if (ret != 0) {
535 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
536 substate->ip_str, substate->pnn, ret);
537 goto fail;
540 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
541 substate->ip_str, substate->pnn);
542 goto done;
544 fail:
545 state->ban_credits[substate->pnn]++;
546 state->num_fails++;
547 state->err_any = ret;
549 done:
550 talloc_free(substate);
552 state->num_replies++;
554 if (state->num_replies < state->num_sent) {
555 /* Not all replies received, don't go further */
556 return;
559 if (state->num_fails > 0) {
560 tevent_req_error(req, state->err_any);
561 return;
564 tevent_req_done(req);
567 static bool take_ip_recv(struct tevent_req *req, int *perr)
569 return generic_recv(req, perr);
572 /**********************************************************************/
574 struct ipreallocated_state {
575 uint32_t *pnns;
576 int count;
577 uint32_t *ban_credits;
580 static void ipreallocated_done(struct tevent_req *subreq);
582 static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
583 struct tevent_context *ev,
584 struct ctdb_client_context *client,
585 uint32_t *pnns,
586 int count,
587 struct timeval timeout,
588 uint32_t *ban_credits)
590 struct tevent_req *req, *subreq;
591 struct ipreallocated_state *state;
592 struct ctdb_req_control request;
594 req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
595 if (req == NULL) {
596 return NULL;
599 state->pnns = pnns;
600 state->count = count;
601 state->ban_credits = ban_credits;
603 ctdb_req_control_ipreallocated(&request);
604 subreq = ctdb_client_control_multi_send(state, ev, client,
605 pnns, count,
606 timeout, /* cumulative */
607 &request);
608 if (tevent_req_nomem(subreq, req)) {
609 return tevent_req_post(req, ev);
611 tevent_req_set_callback(subreq, ipreallocated_done, req);
613 return req;
616 static void ipreallocated_done(struct tevent_req *subreq)
618 struct tevent_req *req = tevent_req_callback_data(
619 subreq, struct tevent_req);
620 struct ipreallocated_state *state = tevent_req_data(
621 req, struct ipreallocated_state);
622 int *err_list = NULL;
623 int ret, i;
624 bool status, found_errors;
626 status = ctdb_client_control_multi_recv(subreq, &ret, state,
627 &err_list, NULL);
628 TALLOC_FREE(subreq);
630 if (status) {
631 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
632 tevent_req_done(req);
633 return;
636 /* Get some clear error messages out of err_list and count
637 * banning credits
639 found_errors = false;
640 for (i = 0; i < state->count; i++) {
641 int err = err_list[i];
642 if (err != 0) {
643 uint32_t pnn = state->pnns[i];
645 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
646 pnn, err);
648 state->ban_credits[pnn]++;
649 found_errors = true;
653 if (! found_errors) {
654 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
657 tevent_req_error(req, ret);
660 static bool ipreallocated_recv(struct tevent_req *req, int *perr)
662 return generic_recv(req, perr);
665 /**********************************************************************/
668 * Recalculate the allocation of public IPs to nodes and have the
669 * nodes host their allocated addresses.
671 * - Get tunables
672 * - Get nodemap
673 * - Initialise IP allocation state. Pass:
674 * + algorithm to be used;
675 * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
676 * + list of nodes to force rebalance (internal structure, currently
677 * no way to fetch, only used by LCP2 for nodes that have had new
678 * IP addresses added).
679 * - Set IP flags for IP allocation based on node map
680 * - Retrieve known and available IP addresses (done separately so
681 * values can be faked in unit testing)
682 * - Use ipalloc_set_public_ips() to set known and available IP
683 * addresses for allocation
684 * - If cluster can't host IP addresses then jump to IPREALLOCATED
685 * - Run IP allocation algorithm
686 * - Send RELEASE_IP to all nodes for IPs they should not host
687 * - Send TAKE_IP to all nodes for IPs they should host
688 * - Send IPREALLOCATED to all nodes
691 struct takeover_state {
692 struct tevent_context *ev;
693 struct ctdb_client_context *client;
694 struct timeval timeout;
695 int num_nodes;
696 uint32_t *pnns_connected;
697 int num_connected;
698 uint32_t *pnns_active;
699 int num_active;
700 uint32_t destnode;
701 uint32_t *force_rebalance_nodes;
702 struct ctdb_tunable_list *tun_list;
703 struct ipalloc_state *ipalloc_state;
704 struct ctdb_public_ip_list *known_ips;
705 struct public_ip_list *all_ips;
706 uint32_t *ban_credits;
709 static void takeover_tunables_done(struct tevent_req *subreq);
710 static void takeover_nodemap_done(struct tevent_req *subreq);
711 static void takeover_known_ips_done(struct tevent_req *subreq);
712 static void takeover_avail_ips_done(struct tevent_req *subreq);
713 static void takeover_release_ip_done(struct tevent_req *subreq);
714 static void takeover_take_ip_done(struct tevent_req *subreq);
715 static void takeover_ipreallocated(struct tevent_req *req);
716 static void takeover_ipreallocated_done(struct tevent_req *subreq);
717 static void takeover_failed(struct tevent_req *subreq, int ret);
718 static void takeover_failed_done(struct tevent_req *subreq);
720 static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
721 struct tevent_context *ev,
722 struct ctdb_client_context *client,
723 uint32_t *force_rebalance_nodes)
725 struct tevent_req *req, *subreq;
726 struct takeover_state *state;
727 struct ctdb_req_control request;
729 req = tevent_req_create(mem_ctx, &state, struct takeover_state);
730 if (req == NULL) {
731 return NULL;
734 state->ev = ev;
735 state->client = client;
736 state->force_rebalance_nodes = force_rebalance_nodes;
737 state->destnode = ctdb_client_pnn(client);
739 ctdb_req_control_get_all_tunables(&request);
740 subreq = ctdb_client_control_send(state, state->ev, state->client,
741 state->destnode, TIMEOUT(),
742 &request);
743 if (tevent_req_nomem(subreq, req)) {
744 return tevent_req_post(req, ev);
746 tevent_req_set_callback(subreq, takeover_tunables_done, req);
748 return req;
751 static void takeover_tunables_done(struct tevent_req *subreq)
753 struct tevent_req *req = tevent_req_callback_data(
754 subreq, struct tevent_req);
755 struct takeover_state *state = tevent_req_data(
756 req, struct takeover_state);
757 struct ctdb_reply_control *reply;
758 struct ctdb_req_control request;
759 int ret;
760 bool status;
762 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
763 TALLOC_FREE(subreq);
764 if (! status) {
765 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
766 tevent_req_error(req, ret);
767 return;
770 ret = ctdb_reply_control_get_all_tunables(reply, state,
771 &state->tun_list);
772 if (ret != 0) {
773 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
774 tevent_req_error(req, ret);
775 return;
778 talloc_free(reply);
780 takeover_timeout = state->tun_list->takeover_timeout;
782 ctdb_req_control_get_nodemap(&request);
783 subreq = ctdb_client_control_send(state, state->ev, state->client,
784 state->destnode, TIMEOUT(),
785 &request);
786 if (tevent_req_nomem(subreq, req)) {
787 return;
789 tevent_req_set_callback(subreq, takeover_nodemap_done, req);
792 static void takeover_nodemap_done(struct tevent_req *subreq)
794 struct tevent_req *req = tevent_req_callback_data(
795 subreq, struct tevent_req);
796 struct takeover_state *state = tevent_req_data(
797 req, struct takeover_state);
798 struct ctdb_reply_control *reply;
799 bool status;
800 int ret;
801 struct ctdb_node_map *nodemap;
803 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
804 TALLOC_FREE(subreq);
805 if (! status) {
806 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
807 state->destnode, ret);
808 tevent_req_error(req, ret);
809 return;
812 ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
813 if (ret != 0) {
814 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
815 tevent_req_error(req, ret);
816 return;
819 state->num_nodes = nodemap->num;
821 state->num_connected = list_of_connected_nodes(nodemap,
822 CTDB_UNKNOWN_PNN, state,
823 &state->pnns_connected);
824 if (state->num_connected <= 0) {
825 tevent_req_error(req, ENOMEM);
826 return;
829 state->num_active = list_of_active_nodes(nodemap,
830 CTDB_UNKNOWN_PNN, state,
831 &state->pnns_active);
832 if (state->num_active <= 0) {
833 tevent_req_error(req, ENOMEM);
834 return;
837 /* Default timeout for early jump to IPREALLOCATED. See below
838 * for explanation of 3 times...
840 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
842 state->ban_credits = talloc_zero_array(state, uint32_t,
843 state->num_nodes);
844 if (tevent_req_nomem(state->ban_credits, req)) {
845 return;
848 if (state->tun_list->disable_ip_failover != 0) {
849 /* IP failover is completely disabled so just send out
850 * ipreallocated event.
852 takeover_ipreallocated(req);
853 return;
856 state->ipalloc_state =
857 ipalloc_state_init(
858 state, state->num_nodes,
859 determine_algorithm(state->tun_list),
860 (state->tun_list->no_ip_takeover != 0),
861 (state->tun_list->no_ip_failback != 0),
862 (state->tun_list->no_ip_host_on_all_disabled != 0),
863 state->force_rebalance_nodes);
864 if (tevent_req_nomem(state->ipalloc_state, req)) {
865 return;
868 ipalloc_set_node_flags(state->ipalloc_state, nodemap);
870 subreq = get_public_ips_send(state, state->ev, state->client,
871 state->pnns_connected, state->num_connected,
872 state->num_nodes, state->ban_credits,
873 false);
874 if (tevent_req_nomem(subreq, req)) {
875 return;
878 tevent_req_set_callback(subreq, takeover_known_ips_done, req);
881 static void takeover_known_ips_done(struct tevent_req *subreq)
883 struct tevent_req *req = tevent_req_callback_data(
884 subreq, struct tevent_req);
885 struct takeover_state *state = tevent_req_data(
886 req, struct takeover_state);
887 int ret;
888 bool status;
889 uint32_t *pnns = NULL;
890 int count, i;
892 status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
893 TALLOC_FREE(subreq);
895 if (! status) {
896 D_ERR("Failed to fetch known public IPs\n");
897 takeover_failed(req, ret);
898 return;
901 /* Get available IPs from active nodes that actually have known IPs */
903 pnns = talloc_zero_array(state, uint32_t, state->num_active);
904 if (tevent_req_nomem(pnns, req)) {
905 return;
908 count = 0;
909 for (i = 0; i < state->num_active; i++) {
910 uint32_t pnn = state->pnns_active[i];
912 /* If pnn has IPs then fetch available IPs from it */
913 if (state->known_ips[pnn].num > 0) {
914 pnns[count] = pnn;
915 count++;
919 subreq = get_public_ips_send(state, state->ev, state->client,
920 pnns, count,
921 state->num_nodes, state->ban_credits,
922 true);
923 if (tevent_req_nomem(subreq, req)) {
924 return;
927 tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
930 static void takeover_avail_ips_done(struct tevent_req *subreq)
932 struct tevent_req *req = tevent_req_callback_data(
933 subreq, struct tevent_req);
934 struct takeover_state *state = tevent_req_data(
935 req, struct takeover_state);
936 bool status;
937 int ret;
938 struct ctdb_public_ip_list *available_ips;
940 status = get_public_ips_recv(subreq, &ret, state, &available_ips);
941 TALLOC_FREE(subreq);
943 if (! status) {
944 D_ERR("Failed to fetch available public IPs\n");
945 takeover_failed(req, ret);
946 return;
949 ipalloc_set_public_ips(state->ipalloc_state,
950 state->known_ips, available_ips);
952 if (! ipalloc_can_host_ips(state->ipalloc_state)) {
953 D_NOTICE("No nodes available to host public IPs yet\n");
954 takeover_ipreallocated(req);
955 return;
958 /* Do the IP reassignment calculations */
959 state->all_ips = ipalloc(state->ipalloc_state);
960 if (tevent_req_nomem(state->all_ips, req)) {
961 return;
964 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
965 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
966 * seconds. However, RELEASE_IP can take longer due to TCP
967 * connection killing, so sometimes needs more time.
968 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
969 * seconds across all 3 stages. No explicit expiry checks are
970 * needed before each stage because tevent is smart enough to
971 * fire the timeouts even if they are in the past. Initialise
972 * this here so it explicitly covers the stages we're
973 * interested in but, in particular, not the time taken by the
974 * ipalloc().
976 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
978 subreq = release_ip_send(state, state->ev, state->client,
979 state->pnns_connected, state->num_connected,
980 state->timeout, state->all_ips,
981 state->ban_credits);
982 if (tevent_req_nomem(subreq, req)) {
983 return;
985 tevent_req_set_callback(subreq, takeover_release_ip_done, req);
988 static void takeover_release_ip_done(struct tevent_req *subreq)
990 struct tevent_req *req = tevent_req_callback_data(
991 subreq, struct tevent_req);
992 struct takeover_state *state = tevent_req_data(
993 req, struct takeover_state);
994 int ret;
995 bool status;
997 status = release_ip_recv(subreq, &ret);
998 TALLOC_FREE(subreq);
1000 if (! status) {
1001 takeover_failed(req, ret);
1002 return;
1005 /* All released, now for takeovers */
1007 subreq = take_ip_send(state, state->ev, state->client,
1008 state->timeout, state->all_ips,
1009 state->ban_credits);
1010 if (tevent_req_nomem(subreq, req)) {
1011 return;
1013 tevent_req_set_callback(subreq, takeover_take_ip_done, req);
1016 static void takeover_take_ip_done(struct tevent_req *subreq)
1018 struct tevent_req *req = tevent_req_callback_data(
1019 subreq, struct tevent_req);
1020 int ret = 0;
1021 bool status;
1023 status = take_ip_recv(subreq, &ret);
1024 TALLOC_FREE(subreq);
1026 if (! status) {
1027 takeover_failed(req, ret);
1028 return;
1031 takeover_ipreallocated(req);
1034 static void takeover_ipreallocated(struct tevent_req *req)
1036 struct takeover_state *state = tevent_req_data(
1037 req, struct takeover_state);
1038 struct tevent_req *subreq;
1040 subreq = ipreallocated_send(state, state->ev, state->client,
1041 state->pnns_connected,
1042 state->num_connected,
1043 state->timeout,
1044 state->ban_credits);
1045 if (tevent_req_nomem(subreq, req)) {
1046 return;
1048 tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
1051 static void takeover_ipreallocated_done(struct tevent_req *subreq)
1053 struct tevent_req *req = tevent_req_callback_data(
1054 subreq, struct tevent_req);
1055 int ret;
1056 bool status;
1058 status = ipreallocated_recv(subreq, &ret);
1059 TALLOC_FREE(subreq);
1061 if (! status) {
1062 takeover_failed(req, ret);
1063 return;
1066 tevent_req_done(req);
1069 struct takeover_failed_state {
1070 struct tevent_req *req;
1071 int ret;
1074 void takeover_failed(struct tevent_req *req, int ret)
1076 struct takeover_state *state = tevent_req_data(
1077 req, struct takeover_state);
1078 struct tevent_req *subreq;
1079 uint32_t max_pnn = CTDB_UNKNOWN_PNN;
1080 int max_credits = 0;
1081 int pnn;
1083 /* Check that bans are enabled */
1084 if (state->tun_list->enable_bans == 0) {
1085 tevent_req_error(req, ret);
1086 return;
1089 for (pnn = 0; pnn < state->num_nodes; pnn++) {
1090 if (state->ban_credits[pnn] > max_credits) {
1091 max_pnn = pnn;
1092 max_credits = state->ban_credits[pnn];
1096 if (max_credits > 0) {
1097 struct ctdb_req_message message;
1098 struct takeover_failed_state *substate;
1100 D_WARNING("Assigning banning credits to node %u\n", max_pnn);
1102 substate = talloc_zero(state, struct takeover_failed_state);
1103 if (tevent_req_nomem(substate, req)) {
1104 return;
1106 substate->req = req;
1107 substate->ret = ret;
1109 message.srvid = CTDB_SRVID_BANNING;
1110 message.data.pnn = max_pnn;
1112 subreq = ctdb_client_message_send(
1113 state, state->ev, state->client,
1114 ctdb_client_pnn(state->client),
1115 &message);
1116 if (subreq == NULL) {
1117 D_ERR("failed to assign banning credits\n");
1118 tevent_req_error(req, ret);
1119 return;
1121 tevent_req_set_callback(subreq, takeover_failed_done, substate);
1122 } else {
1123 tevent_req_error(req, ret);
1127 static void takeover_failed_done(struct tevent_req *subreq)
1129 struct takeover_failed_state *substate = tevent_req_callback_data(
1130 subreq, struct takeover_failed_state);
1131 struct tevent_req *req = substate->req;
1132 int ret;
1133 bool status;
1135 status = ctdb_client_message_recv(subreq, &ret);
1136 TALLOC_FREE(subreq);
1137 if (! status) {
1138 D_ERR("failed to assign banning credits, ret=%d\n", ret);
1141 ret = substate->ret;
1142 talloc_free(substate);
1143 tevent_req_error(req, ret);
1146 static void takeover_recv(struct tevent_req *req, int *perr)
1148 generic_recv(req, perr);
1151 static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
1153 char *strv = NULL;
1154 int num, i, ret;
1155 char *t;
1156 uint32_t *nodes;
1158 ret = strv_split(mem_ctx, &strv, s, ",");
1159 if (ret != 0) {
1160 D_ERR("out of memory\n");
1161 return NULL;
1164 num = strv_count(strv);
1166 nodes = talloc_array(mem_ctx, uint32_t, num);
1167 if (nodes == NULL) {
1168 D_ERR("out of memory\n");
1169 return NULL;
1172 t = NULL;
1173 for (i = 0; i < num; i++) {
1174 t = strv_next(strv, t);
1175 nodes[i] = atoi(t);
1178 return nodes;
1181 static void usage(const char *progname)
1183 fprintf(stderr,
1184 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1185 "[<force-rebalance-nodes>]\n",
1186 progname);
1190 * Arguments - write fd, socket path
1192 int main(int argc, const char *argv[])
1194 int write_fd;
1195 const char *sockpath;
1196 TALLOC_CTX *mem_ctx;
1197 struct tevent_context *ev;
1198 struct ctdb_client_context *client;
1199 int ret;
1200 struct tevent_req *req;
1201 uint32_t *force_rebalance_nodes = NULL;
1203 if (argc < 3 || argc > 4) {
1204 usage(argv[0]);
1205 exit(1);
1208 write_fd = atoi(argv[1]);
1209 sockpath = argv[2];
1211 mem_ctx = talloc_new(NULL);
1212 if (mem_ctx == NULL) {
1213 fprintf(stderr, "talloc_new() failed\n");
1214 ret = ENOMEM;
1215 goto done;
1218 if (argc == 4) {
1219 force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
1220 if (force_rebalance_nodes == NULL) {
1221 usage(argv[0]);
1222 ret = EINVAL;
1223 goto done;
1227 ret = logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
1228 if (ret != 0) {
1229 fprintf(stderr,
1230 "ctdb-takeover: Unable to initialize logging\n");
1231 goto done;
1234 ev = tevent_context_init(mem_ctx);
1235 if (ev == NULL) {
1236 D_ERR("tevent_context_init() failed\n");
1237 ret = ENOMEM;
1238 goto done;
1241 ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
1242 if (ret != 0) {
1243 D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
1244 goto done;
1247 req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
1248 if (req == NULL) {
1249 D_ERR("takeover_send() failed\n");
1250 ret = 1;
1251 goto done;
1254 if (! tevent_req_poll(req, ev)) {
1255 D_ERR("tevent_req_poll() failed\n");
1256 ret = 1;
1257 goto done;
1260 takeover_recv(req, &ret);
1261 TALLOC_FREE(req);
1262 if (ret != 0) {
1263 D_ERR("takeover run failed, ret=%d\n", ret);
1266 done:
1267 sys_write_v(write_fd, &ret, sizeof(ret));
1269 talloc_free(mem_ctx);
1270 return ret;