notifyd: Use messaging_register for MSG_SMB_NOTIFY_REC_CHANGE
[Samba.git] / ctdb / server / ctdb_takeover_helper.c
blob5efd61983940bcd56e26f64c4c83a71b43b3fd4d
1 /*
2 CTDB IP takeover helper
4 Copyright (C) Martin Schwenke 2016
6 Based on ctdb_recovery_helper.c
7 Copyright (C) Amitay Isaacs 2015
9 and ctdb_takeover.c
10 Copyright (C) Ronnie Sahlberg 2007
11 Copyright (C) Andrew Tridgell 2007
12 Copyright (C) Martin Schwenke 2011
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/network.h"
30 #include "system/filesys.h"
32 #include <popt.h>
33 #include <talloc.h>
34 #include <tevent.h>
36 #include "lib/util/debug.h"
37 #include "lib/util/strv.h"
38 #include "lib/util/strv_util.h"
39 #include "lib/util/sys_rw.h"
40 #include "lib/util/time.h"
41 #include "lib/util/tevent_unix.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
45 #include "client/client.h"
47 #include "common/logging.h"
49 #include "server/ipalloc.h"
51 static int takeover_timeout = 9;
53 #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
56 * Utility functions
59 static bool generic_recv(struct tevent_req *req, int *perr)
61 int err;
63 if (tevent_req_is_unix_error(req, &err)) {
64 if (perr != NULL) {
65 *perr = err;
67 return false;
70 return true;
73 static enum ipalloc_algorithm
74 determine_algorithm(const struct ctdb_tunable_list *tunables)
76 switch (tunables->ip_alloc_algorithm) {
77 case 0:
78 return IPALLOC_DETERMINISTIC;
79 case 1:
80 return IPALLOC_NONDETERMINISTIC;
81 case 2:
82 return IPALLOC_LCP2;
83 default:
84 return IPALLOC_LCP2;
88 /**********************************************************************/
90 struct get_public_ips_state {
91 uint32_t *pnns;
92 int count;
93 struct ctdb_public_ip_list *ips;
94 uint32_t *ban_credits;
97 static void get_public_ips_done(struct tevent_req *subreq);
99 static struct tevent_req *get_public_ips_send(
100 TALLOC_CTX *mem_ctx,
101 struct tevent_context *ev,
102 struct ctdb_client_context *client,
103 uint32_t *pnns,
104 int count, int num_nodes,
105 uint32_t *ban_credits,
106 bool available_only)
108 struct tevent_req *req, *subreq;
109 struct get_public_ips_state *state;
110 struct ctdb_req_control request;
112 req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
113 if (req == NULL) {
114 return NULL;
117 state->pnns = pnns;
118 state->count = count;
119 state->ban_credits = ban_credits;
121 state->ips = talloc_zero_array(state,
122 struct ctdb_public_ip_list,
123 num_nodes);
124 if (tevent_req_nomem(state->ips, req)) {
125 return tevent_req_post(req, ev);
128 /* Short circuit if no nodes being asked for IPs */
129 if (state->count == 0) {
130 tevent_req_done(req);
131 return tevent_req_post(req, ev);
134 ctdb_req_control_get_public_ips(&request, available_only);
135 subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
136 state->pnns,
137 state->count,
138 TIMEOUT(), &request);
139 if (tevent_req_nomem(subreq, req)) {
140 return tevent_req_post(req, ev);
142 tevent_req_set_callback(subreq, get_public_ips_done, req);
144 return req;
147 static void get_public_ips_done(struct tevent_req *subreq)
149 struct tevent_req *req = tevent_req_callback_data(
150 subreq, struct tevent_req);
151 struct get_public_ips_state *state = tevent_req_data(
152 req, struct get_public_ips_state);
153 struct ctdb_reply_control **reply;
154 int *err_list;
155 int ret, i;
156 bool status, found_errors;
158 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
159 &reply);
160 TALLOC_FREE(subreq);
161 if (! status) {
162 found_errors = false;
163 for (i = 0; i < state->count; i++) {
164 if (err_list[i] != 0) {
165 uint32_t pnn = state->pnns[i];
167 D_ERR("control GET_PUBLIC_IPS failed on "
168 "node %u, ret=%d\n", pnn, err_list[i]);
170 state->ban_credits[pnn]++;
171 found_errors = true;
175 tevent_req_error(req, ret);
176 return;
179 found_errors = false;
180 for (i = 0; i < state->count; i++) {
181 uint32_t pnn;
182 struct ctdb_public_ip_list *ips;
184 pnn = state->pnns[i];
185 ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
186 &ips);
187 if (ret != 0) {
188 D_ERR("control GET_PUBLIC_IPS failed on "
189 "node %u\n", pnn);
190 state->ban_credits[pnn]++;
191 found_errors = true;
192 continue;
195 D_INFO("Fetched public IPs from node %u\n", pnn);
196 state->ips[pnn] = *ips;
199 if (found_errors) {
200 tevent_req_error(req, EIO);
201 return;
204 talloc_free(reply);
206 tevent_req_done(req);
209 static bool get_public_ips_recv(struct tevent_req *req, int *perr,
210 TALLOC_CTX *mem_ctx,
211 struct ctdb_public_ip_list **ips)
213 struct get_public_ips_state *state = tevent_req_data(
214 req, struct get_public_ips_state);
215 int err;
217 if (tevent_req_is_unix_error(req, &err)) {
218 if (perr != NULL) {
219 *perr = err;
221 return false;
224 *ips = talloc_steal(mem_ctx, state->ips);
226 return true;
229 /**********************************************************************/
231 struct release_ip_state {
232 int num_sent;
233 int num_replies;
234 int num_fails;
235 int err_any;
236 uint32_t *ban_credits;
239 struct release_ip_one_state {
240 struct tevent_req *req;
241 uint32_t *pnns;
242 int count;
243 const char *ip_str;
246 static void release_ip_done(struct tevent_req *subreq);
248 static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
249 struct tevent_context *ev,
250 struct ctdb_client_context *client,
251 uint32_t *pnns,
252 int count,
253 struct timeval timeout,
254 struct public_ip_list *all_ips,
255 uint32_t *ban_credits)
257 struct tevent_req *req, *subreq;
258 struct release_ip_state *state;
259 struct ctdb_req_control request;
260 struct public_ip_list *tmp_ip;
262 req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
263 if (req == NULL) {
264 return NULL;
267 state->num_sent = 0;
268 state->num_replies = 0;
269 state->num_fails = 0;
270 state->ban_credits = ban_credits;
272 /* Send a RELEASE_IP to all nodes that should not be hosting
273 * each IP. For each IP, all but one of these will be
274 * redundant. However, the redundant ones are used to tell
275 * nodes which node should be hosting the IP so that commands
276 * like "ctdb ip" can display a particular nodes idea of who
277 * is hosting what. */
278 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
279 struct release_ip_one_state *substate;
280 struct ctdb_public_ip ip;
281 int i;
283 substate = talloc_zero(state, struct release_ip_one_state);
284 if (tevent_req_nomem(substate, req)) {
285 return tevent_req_post(req, ev);
288 substate->pnns = talloc_zero_array(substate, uint32_t, count);
289 if (tevent_req_nomem(substate->pnns, req)) {
290 return tevent_req_post(req, ev);
293 substate->count = 0;
294 substate->req = req;
296 substate->ip_str = ctdb_sock_addr_to_string(substate,
297 &tmp_ip->addr);
298 if (tevent_req_nomem(substate->ip_str, req)) {
299 return tevent_req_post(req, ev);
302 for (i = 0; i < count; i++) {
303 uint32_t pnn = pnns[i];
305 /* Skip this node if IP is not known */
306 if (! bitmap_query(tmp_ip->known_on, pnn)) {
307 continue;
310 /* If pnn is not the node that should be
311 * hosting the IP then add it to the list of
312 * nodes that need to do a release. */
313 if (tmp_ip->pnn != pnn) {
314 substate->pnns[substate->count] = pnn;
315 substate->count++;
319 if (substate->count == 0) {
320 /* No releases to send for this address... */
321 TALLOC_FREE(substate);
322 continue;
325 ip.pnn = tmp_ip->pnn;
326 ip.addr = tmp_ip->addr;
327 ctdb_req_control_release_ip(&request, &ip);
328 subreq = ctdb_client_control_multi_send(state, ev, client,
329 substate->pnns,
330 substate->count,
331 timeout,/* cumulative */
332 &request);
333 if (tevent_req_nomem(subreq, req)) {
334 return tevent_req_post(req, ev);
336 tevent_req_set_callback(subreq, release_ip_done, substate);
338 state->num_sent++;
341 /* None sent, finished... */
342 if (state->num_sent == 0) {
343 tevent_req_done(req);
344 return tevent_req_post(req, ev);
347 return req;
350 static void release_ip_done(struct tevent_req *subreq)
352 struct release_ip_one_state *substate = tevent_req_callback_data(
353 subreq, struct release_ip_one_state);
354 struct tevent_req *req = substate->req;
355 struct release_ip_state *state = tevent_req_data(
356 req, struct release_ip_state);
357 int ret, i;
358 int *err_list;
359 bool status, found_errors;
361 status = ctdb_client_control_multi_recv(subreq, &ret, state,
362 &err_list, NULL);
363 TALLOC_FREE(subreq);
365 if (status) {
366 D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
367 substate->ip_str, substate->count);
368 goto done;
371 /* Get some clear error messages out of err_list and count
372 * banning credits
374 found_errors = false;
375 for (i = 0; i < substate->count; i++) {
376 int err = err_list[i];
377 if (err != 0) {
378 uint32_t pnn = substate->pnns[i];
380 D_ERR("RELEASE_IP %s failed on node %u, "
381 "ret=%d\n", substate->ip_str, pnn, err);
383 state->ban_credits[pnn]++;
384 state->err_any = err;
385 found_errors = true;
388 if (! found_errors) {
389 D_ERR("RELEASE_IP %s internal error, ret=%d\n",
390 substate->ip_str, ret);
391 state->err_any = EIO;
394 state->num_fails++;
396 done:
397 talloc_free(substate);
399 state->num_replies++;
401 if (state->num_replies < state->num_sent) {
402 /* Not all replies received, don't go further */
403 return;
406 if (state->num_fails > 0) {
407 tevent_req_error(req, state->err_any);
408 return;
411 tevent_req_done(req);
414 static bool release_ip_recv(struct tevent_req *req, int *perr)
416 return generic_recv(req, perr);
419 /**********************************************************************/
421 struct take_ip_state {
422 int num_sent;
423 int num_replies;
424 int num_fails;
425 int err_any;
426 uint32_t *ban_credits;
429 struct take_ip_one_state {
430 struct tevent_req *req;
431 uint32_t pnn;
432 const char *ip_str;
435 static void take_ip_done(struct tevent_req *subreq);
437 static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
438 struct tevent_context *ev,
439 struct ctdb_client_context *client,
440 struct timeval timeout,
441 struct public_ip_list *all_ips,
442 uint32_t *ban_credits)
444 struct tevent_req *req, *subreq;
445 struct take_ip_state *state;
446 struct ctdb_req_control request;
447 struct public_ip_list *tmp_ip;
449 req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
450 if (req == NULL) {
451 return NULL;
454 state->num_sent = 0;
455 state->num_replies = 0;
456 state->num_fails = 0;
457 state->ban_credits = ban_credits;
459 /* For each IP, send a TAKOVER_IP to the node that should be
460 * hosting it. Many of these will often be redundant (since
461 * the allocation won't have changed) but they can be useful
462 * to recover from inconsistencies. */
463 for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
464 struct take_ip_one_state *substate;
465 struct ctdb_public_ip ip;
467 if (tmp_ip->pnn == -1) {
468 /* IP will be unassigned */
469 continue;
472 substate = talloc_zero(state, struct take_ip_one_state);
473 if (tevent_req_nomem(substate, req)) {
474 return tevent_req_post(req, ev);
477 substate->req = req;
478 substate->pnn = tmp_ip->pnn;
480 substate->ip_str = ctdb_sock_addr_to_string(substate,
481 &tmp_ip->addr);
482 if (tevent_req_nomem(substate->ip_str, req)) {
483 return tevent_req_post(req, ev);
486 ip.pnn = tmp_ip->pnn;
487 ip.addr = tmp_ip->addr;
488 ctdb_req_control_takeover_ip(&request, &ip);
489 subreq = ctdb_client_control_send(
490 state, ev, client, tmp_ip->pnn,
491 timeout, /* cumulative */
492 &request);
493 if (tevent_req_nomem(subreq, req)) {
494 return tevent_req_post(req, ev);
496 tevent_req_set_callback(subreq, take_ip_done, substate);
498 state->num_sent++;
501 /* None sent, finished... */
502 if (state->num_sent == 0) {
503 tevent_req_done(req);
504 return tevent_req_post(req, ev);
507 return req;
510 static void take_ip_done(struct tevent_req *subreq)
512 struct take_ip_one_state *substate = tevent_req_callback_data(
513 subreq, struct take_ip_one_state);
514 struct tevent_req *req = substate->req;
515 struct ctdb_reply_control *reply;
516 struct take_ip_state *state = tevent_req_data(
517 req, struct take_ip_state);
518 int ret = 0;
519 bool status;
521 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
522 TALLOC_FREE(subreq);
524 if (! status) {
525 D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
526 substate->ip_str, substate->pnn, ret);
527 goto fail;
530 ret = ctdb_reply_control_takeover_ip(reply);
531 if (ret != 0) {
532 D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
533 substate->ip_str, substate->pnn, ret);
534 goto fail;
537 D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
538 substate->ip_str, substate->pnn);
539 goto done;
541 fail:
542 state->ban_credits[substate->pnn]++;
543 state->num_fails++;
544 state->err_any = ret;
546 done:
547 talloc_free(substate);
549 state->num_replies++;
551 if (state->num_replies < state->num_sent) {
552 /* Not all replies received, don't go further */
553 return;
556 if (state->num_fails > 0) {
557 tevent_req_error(req, state->err_any);
558 return;
561 tevent_req_done(req);
564 static bool take_ip_recv(struct tevent_req *req, int *perr)
566 return generic_recv(req, perr);
569 /**********************************************************************/
571 struct ipreallocated_state {
572 uint32_t *pnns;
573 int count;
574 uint32_t *ban_credits;
577 static void ipreallocated_done(struct tevent_req *subreq);
579 static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
580 struct tevent_context *ev,
581 struct ctdb_client_context *client,
582 uint32_t *pnns,
583 int count,
584 struct timeval timeout,
585 uint32_t *ban_credits)
587 struct tevent_req *req, *subreq;
588 struct ipreallocated_state *state;
589 struct ctdb_req_control request;
591 req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
592 if (req == NULL) {
593 return NULL;
596 state->pnns = pnns;
597 state->count = count;
598 state->ban_credits = ban_credits;
600 ctdb_req_control_ipreallocated(&request);
601 subreq = ctdb_client_control_multi_send(state, ev, client,
602 pnns, count,
603 timeout, /* cumulative */
604 &request);
605 if (tevent_req_nomem(subreq, req)) {
606 return tevent_req_post(req, ev);
608 tevent_req_set_callback(subreq, ipreallocated_done, req);
610 return req;
613 static void ipreallocated_done(struct tevent_req *subreq)
615 struct tevent_req *req = tevent_req_callback_data(
616 subreq, struct tevent_req);
617 struct ipreallocated_state *state = tevent_req_data(
618 req, struct ipreallocated_state);
619 int *err_list = NULL;
620 int ret, i;
621 bool status, found_errors;
623 status = ctdb_client_control_multi_recv(subreq, &ret, state,
624 &err_list, NULL);
625 TALLOC_FREE(subreq);
627 if (status) {
628 D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
629 tevent_req_done(req);
630 return;
633 /* Get some clear error messages out of err_list and count
634 * banning credits
636 found_errors = false;
637 for (i = 0; i < state->count; i++) {
638 int err = err_list[i];
639 if (err != 0) {
640 uint32_t pnn = state->pnns[i];
642 D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
643 pnn, err);
645 state->ban_credits[pnn]++;
646 found_errors = true;
650 if (! found_errors) {
651 D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
654 tevent_req_error(req, ret);
657 static bool ipreallocated_recv(struct tevent_req *req, int *perr)
659 return generic_recv(req, perr);
662 /**********************************************************************/
665 * Recalculate the allocation of public IPs to nodes and have the
666 * nodes host their allocated addresses.
668 * - Get tunables
669 * - Get nodemap
670 * - Initialise IP allocation state. Pass:
671 * + algorithm to be used;
672 * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
673 * + list of nodes to force rebalance (internal structure, currently
674 * no way to fetch, only used by LCP2 for nodes that have had new
675 * IP addresses added).
676 * - Set IP flags for IP allocation based on node map
677 * - Retrieve known and available IP addresses (done separately so
678 * values can be faked in unit testing)
679 * - Use ipalloc_set_public_ips() to set known and available IP
680 * addresses for allocation
681 * - If cluster can't host IP addresses then jump to IPREALLOCATED
682 * - Run IP allocation algorithm
683 * - Send RELEASE_IP to all nodes for IPs they should not host
684 * - Send TAKE_IP to all nodes for IPs they should host
685 * - Send IPREALLOCATED to all nodes
688 struct takeover_state {
689 struct tevent_context *ev;
690 struct ctdb_client_context *client;
691 struct timeval timeout;
692 int num_nodes;
693 uint32_t *pnns_connected;
694 int num_connected;
695 uint32_t *pnns_active;
696 int num_active;
697 uint32_t destnode;
698 uint32_t *force_rebalance_nodes;
699 struct ctdb_tunable_list *tun_list;
700 struct ipalloc_state *ipalloc_state;
701 struct ctdb_public_ip_list *known_ips;
702 struct public_ip_list *all_ips;
703 uint32_t *ban_credits;
706 static void takeover_tunables_done(struct tevent_req *subreq);
707 static void takeover_nodemap_done(struct tevent_req *subreq);
708 static void takeover_known_ips_done(struct tevent_req *subreq);
709 static void takeover_avail_ips_done(struct tevent_req *subreq);
710 static void takeover_release_ip_done(struct tevent_req *subreq);
711 static void takeover_take_ip_done(struct tevent_req *subreq);
712 static void takeover_ipreallocated(struct tevent_req *req);
713 static void takeover_ipreallocated_done(struct tevent_req *subreq);
714 static void takeover_failed(struct tevent_req *subreq, int ret);
715 static void takeover_failed_done(struct tevent_req *subreq);
717 static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
718 struct tevent_context *ev,
719 struct ctdb_client_context *client,
720 uint32_t *force_rebalance_nodes)
722 struct tevent_req *req, *subreq;
723 struct takeover_state *state;
724 struct ctdb_req_control request;
726 req = tevent_req_create(mem_ctx, &state, struct takeover_state);
727 if (req == NULL) {
728 return NULL;
731 state->ev = ev;
732 state->client = client;
733 state->force_rebalance_nodes = force_rebalance_nodes;
734 state->destnode = ctdb_client_pnn(client);
736 ctdb_req_control_get_all_tunables(&request);
737 subreq = ctdb_client_control_send(state, state->ev, state->client,
738 state->destnode, TIMEOUT(),
739 &request);
740 if (tevent_req_nomem(subreq, req)) {
741 return tevent_req_post(req, ev);
743 tevent_req_set_callback(subreq, takeover_tunables_done, req);
745 return req;
748 static void takeover_tunables_done(struct tevent_req *subreq)
750 struct tevent_req *req = tevent_req_callback_data(
751 subreq, struct tevent_req);
752 struct takeover_state *state = tevent_req_data(
753 req, struct takeover_state);
754 struct ctdb_reply_control *reply;
755 struct ctdb_req_control request;
756 int ret;
757 bool status;
759 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
760 TALLOC_FREE(subreq);
761 if (! status) {
762 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
763 tevent_req_error(req, ret);
764 return;
767 ret = ctdb_reply_control_get_all_tunables(reply, state,
768 &state->tun_list);
769 if (ret != 0) {
770 D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
771 tevent_req_error(req, ret);
772 return;
775 talloc_free(reply);
777 takeover_timeout = state->tun_list->takeover_timeout;
779 ctdb_req_control_get_nodemap(&request);
780 subreq = ctdb_client_control_send(state, state->ev, state->client,
781 state->destnode, TIMEOUT(),
782 &request);
783 if (tevent_req_nomem(subreq, req)) {
784 return;
786 tevent_req_set_callback(subreq, takeover_nodemap_done, req);
789 static void takeover_nodemap_done(struct tevent_req *subreq)
791 struct tevent_req *req = tevent_req_callback_data(
792 subreq, struct tevent_req);
793 struct takeover_state *state = tevent_req_data(
794 req, struct takeover_state);
795 struct ctdb_reply_control *reply;
796 bool status;
797 int ret;
798 struct ctdb_node_map *nodemap;
800 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
801 TALLOC_FREE(subreq);
802 if (! status) {
803 D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
804 state->destnode, ret);
805 tevent_req_error(req, ret);
806 return;
809 ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
810 if (ret != 0) {
811 D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
812 tevent_req_error(req, ret);
813 return;
816 state->num_nodes = nodemap->num;
818 state->num_connected = list_of_connected_nodes(nodemap,
819 CTDB_UNKNOWN_PNN, state,
820 &state->pnns_connected);
821 if (state->num_connected <= 0) {
822 tevent_req_error(req, ENOMEM);
823 return;
826 state->num_active = list_of_active_nodes(nodemap,
827 CTDB_UNKNOWN_PNN, state,
828 &state->pnns_active);
829 if (state->num_active <= 0) {
830 tevent_req_error(req, ENOMEM);
831 return;
834 /* Default timeout for early jump to IPREALLOCATED. See below
835 * for explanation of 3 times...
837 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
839 state->ban_credits = talloc_zero_array(state, uint32_t,
840 state->num_nodes);
841 if (tevent_req_nomem(state->ban_credits, req)) {
842 return;
845 if (state->tun_list->disable_ip_failover != 0) {
846 /* IP failover is completely disabled so just send out
847 * ipreallocated event.
849 takeover_ipreallocated(req);
850 return;
853 state->ipalloc_state =
854 ipalloc_state_init(
855 state, state->num_nodes,
856 determine_algorithm(state->tun_list),
857 (state->tun_list->no_ip_takeover != 0),
858 (state->tun_list->no_ip_failback != 0),
859 (state->tun_list->no_ip_host_on_all_disabled != 0),
860 state->force_rebalance_nodes);
861 if (tevent_req_nomem(state->ipalloc_state, req)) {
862 return;
865 ipalloc_set_node_flags(state->ipalloc_state, nodemap);
867 subreq = get_public_ips_send(state, state->ev, state->client,
868 state->pnns_connected, state->num_connected,
869 state->num_nodes, state->ban_credits,
870 false);
871 if (tevent_req_nomem(subreq, req)) {
872 return;
875 tevent_req_set_callback(subreq, takeover_known_ips_done, req);
878 static void takeover_known_ips_done(struct tevent_req *subreq)
880 struct tevent_req *req = tevent_req_callback_data(
881 subreq, struct tevent_req);
882 struct takeover_state *state = tevent_req_data(
883 req, struct takeover_state);
884 int ret;
885 bool status;
886 uint32_t *pnns = NULL;
887 int count, i;
889 status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
890 TALLOC_FREE(subreq);
892 if (! status) {
893 D_ERR("Failed to fetch known public IPs\n");
894 takeover_failed(req, ret);
895 return;
898 /* Get available IPs from active nodes that actually have known IPs */
900 pnns = talloc_zero_array(state, uint32_t, state->num_active);
901 if (tevent_req_nomem(pnns, req)) {
902 return;
905 count = 0;
906 for (i = 0; i < state->num_active; i++) {
907 uint32_t pnn = state->pnns_active[i];
909 /* If pnn has IPs then fetch available IPs from it */
910 if (state->known_ips[pnn].num > 0) {
911 pnns[count] = pnn;
912 count++;
916 subreq = get_public_ips_send(state, state->ev, state->client,
917 pnns, count,
918 state->num_nodes, state->ban_credits,
919 true);
920 if (tevent_req_nomem(subreq, req)) {
921 return;
924 tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
927 static void takeover_avail_ips_done(struct tevent_req *subreq)
929 struct tevent_req *req = tevent_req_callback_data(
930 subreq, struct tevent_req);
931 struct takeover_state *state = tevent_req_data(
932 req, struct takeover_state);
933 bool status;
934 int ret;
935 struct ctdb_public_ip_list *available_ips;
937 status = get_public_ips_recv(subreq, &ret, state, &available_ips);
938 TALLOC_FREE(subreq);
940 if (! status) {
941 D_ERR("Failed to fetch available public IPs\n");
942 takeover_failed(req, ret);
943 return;
946 ipalloc_set_public_ips(state->ipalloc_state,
947 state->known_ips, available_ips);
949 if (! ipalloc_can_host_ips(state->ipalloc_state)) {
950 D_NOTICE("No nodes available to host public IPs yet\n");
951 takeover_ipreallocated(req);
952 return;
955 /* Do the IP reassignment calculations */
956 state->all_ips = ipalloc(state->ipalloc_state);
957 if (tevent_req_nomem(state->all_ips, req)) {
958 return;
961 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
962 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
963 * seconds. However, RELEASE_IP can take longer due to TCP
964 * connection killing, so sometimes needs more time.
965 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
966 * seconds across all 3 stages. No explicit expiry checks are
967 * needed before each stage because tevent is smart enough to
968 * fire the timeouts even if they are in the past. Initialise
969 * this here so it explicitly covers the stages we're
970 * interested in but, in particular, not the time taken by the
971 * ipalloc().
973 state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
975 subreq = release_ip_send(state, state->ev, state->client,
976 state->pnns_connected, state->num_connected,
977 state->timeout, state->all_ips,
978 state->ban_credits);
979 if (tevent_req_nomem(subreq, req)) {
980 return;
982 tevent_req_set_callback(subreq, takeover_release_ip_done, req);
985 static void takeover_release_ip_done(struct tevent_req *subreq)
987 struct tevent_req *req = tevent_req_callback_data(
988 subreq, struct tevent_req);
989 struct takeover_state *state = tevent_req_data(
990 req, struct takeover_state);
991 int ret;
992 bool status;
994 status = release_ip_recv(subreq, &ret);
995 TALLOC_FREE(subreq);
997 if (! status) {
998 takeover_failed(req, ret);
999 return;
1002 /* All released, now for takeovers */
1004 subreq = take_ip_send(state, state->ev, state->client,
1005 state->timeout, state->all_ips,
1006 state->ban_credits);
1007 if (tevent_req_nomem(subreq, req)) {
1008 return;
1010 tevent_req_set_callback(subreq, takeover_take_ip_done, req);
1013 static void takeover_take_ip_done(struct tevent_req *subreq)
1015 struct tevent_req *req = tevent_req_callback_data(
1016 subreq, struct tevent_req);
1017 int ret = 0;
1018 bool status;
1020 status = take_ip_recv(subreq, &ret);
1021 TALLOC_FREE(subreq);
1023 if (! status) {
1024 takeover_failed(req, ret);
1025 return;
1028 takeover_ipreallocated(req);
1031 static void takeover_ipreallocated(struct tevent_req *req)
1033 struct takeover_state *state = tevent_req_data(
1034 req, struct takeover_state);
1035 struct tevent_req *subreq;
1037 subreq = ipreallocated_send(state, state->ev, state->client,
1038 state->pnns_connected,
1039 state->num_connected,
1040 state->timeout,
1041 state->ban_credits);
1042 if (tevent_req_nomem(subreq, req)) {
1043 return;
1045 tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
1048 static void takeover_ipreallocated_done(struct tevent_req *subreq)
1050 struct tevent_req *req = tevent_req_callback_data(
1051 subreq, struct tevent_req);
1052 int ret;
1053 bool status;
1055 status = ipreallocated_recv(subreq, &ret);
1056 TALLOC_FREE(subreq);
1058 if (! status) {
1059 takeover_failed(req, ret);
1060 return;
1063 tevent_req_done(req);
1066 struct takeover_failed_state {
1067 struct tevent_req *req;
1068 int ret;
1071 void takeover_failed(struct tevent_req *req, int ret)
1073 struct takeover_state *state = tevent_req_data(
1074 req, struct takeover_state);
1075 struct tevent_req *subreq;
1076 uint32_t max_pnn = CTDB_UNKNOWN_PNN;
1077 int max_credits = 0;
1078 int pnn;
1080 /* Check that bans are enabled */
1081 if (state->tun_list->enable_bans == 0) {
1082 tevent_req_error(req, ret);
1083 return;
1086 for (pnn = 0; pnn < state->num_nodes; pnn++) {
1087 if (state->ban_credits[pnn] > max_credits) {
1088 max_pnn = pnn;
1089 max_credits = state->ban_credits[pnn];
1093 if (max_credits > 0) {
1094 struct ctdb_req_message message;
1095 struct takeover_failed_state *substate;
1097 D_WARNING("Assigning banning credits to node %u\n", max_pnn);
1099 substate = talloc_zero(state, struct takeover_failed_state);
1100 if (tevent_req_nomem(substate, req)) {
1101 return;
1103 substate->req = req;
1104 substate->ret = ret;
1106 message.srvid = CTDB_SRVID_BANNING;
1107 message.data.pnn = max_pnn;
1109 subreq = ctdb_client_message_send(
1110 state, state->ev, state->client,
1111 ctdb_client_pnn(state->client),
1112 &message);
1113 if (subreq == NULL) {
1114 D_ERR("failed to assign banning credits\n");
1115 tevent_req_error(req, ret);
1116 return;
1118 tevent_req_set_callback(subreq, takeover_failed_done, substate);
1119 } else {
1120 tevent_req_error(req, ret);
1124 static void takeover_failed_done(struct tevent_req *subreq)
1126 struct takeover_failed_state *substate = tevent_req_callback_data(
1127 subreq, struct takeover_failed_state);
1128 struct tevent_req *req = substate->req;
1129 int ret;
1130 bool status;
1132 status = ctdb_client_message_recv(subreq, &ret);
1133 TALLOC_FREE(subreq);
1134 if (! status) {
1135 D_ERR("failed to assign banning credits, ret=%d\n", ret);
1138 ret = substate->ret;
1139 talloc_free(substate);
1140 tevent_req_error(req, ret);
1143 static void takeover_recv(struct tevent_req *req, int *perr)
1145 generic_recv(req, perr);
1148 static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
1150 char *strv = NULL;
1151 int num, i, ret;
1152 char *t;
1153 uint32_t *nodes;
1155 ret = strv_split(mem_ctx, &strv, s, ",");
1156 if (ret != 0) {
1157 D_ERR("out of memory\n");
1158 return NULL;
1161 num = strv_count(strv);
1163 nodes = talloc_array(mem_ctx, uint32_t, num);
1164 if (nodes == NULL) {
1165 D_ERR("out of memory\n");
1166 return NULL;
1169 t = NULL;
1170 for (i = 0; i < num; i++) {
1171 t = strv_next(strv, t);
1172 nodes[i] = atoi(t);
1175 return nodes;
1178 static void usage(const char *progname)
1180 fprintf(stderr,
1181 "\nUsage: %s <output-fd> <ctdb-socket-path> "
1182 "[<force-rebalance-nodes>]\n",
1183 progname);
1187 * Arguments - write fd, socket path
1189 int main(int argc, const char *argv[])
1191 int write_fd;
1192 const char *sockpath;
1193 TALLOC_CTX *mem_ctx;
1194 struct tevent_context *ev;
1195 struct ctdb_client_context *client;
1196 int ret;
1197 struct tevent_req *req;
1198 uint32_t *force_rebalance_nodes = NULL;
1200 if (argc < 3 || argc > 4) {
1201 usage(argv[0]);
1202 exit(1);
1205 write_fd = atoi(argv[1]);
1206 sockpath = argv[2];
1208 mem_ctx = talloc_new(NULL);
1209 if (mem_ctx == NULL) {
1210 fprintf(stderr, "talloc_new() failed\n");
1211 ret = ENOMEM;
1212 goto done;
1215 if (argc == 4) {
1216 force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
1217 if (force_rebalance_nodes == NULL) {
1218 usage(argv[0]);
1219 ret = EINVAL;
1220 goto done;
1224 ret = logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
1225 if (ret != 0) {
1226 fprintf(stderr,
1227 "ctdb-takeover: Unable to initialize logging\n");
1228 goto done;
1231 ev = tevent_context_init(mem_ctx);
1232 if (ev == NULL) {
1233 D_ERR("tevent_context_init() failed\n");
1234 ret = ENOMEM;
1235 goto done;
1238 ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
1239 if (ret != 0) {
1240 D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
1241 goto done;
1244 req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
1245 if (req == NULL) {
1246 D_ERR("takeover_send() failed\n");
1247 ret = 1;
1248 goto done;
1251 if (! tevent_req_poll(req, ev)) {
1252 D_ERR("tevent_req_poll() failed\n");
1253 ret = 1;
1254 goto done;
1257 takeover_recv(req, &ret);
1258 TALLOC_FREE(req);
1259 if (ret != 0) {
1260 D_ERR("takeover run failed, ret=%d\n", ret);
1263 done:
1264 sys_write_v(write_fd, &ret, sizeof(ret));
1266 talloc_free(mem_ctx);
1267 return ret;