CVE-2016-0771: tests/dns: Remove dependencies on env variables
[Samba.git] / ctdb / server / ctdb_recovery_helper.c
bloba8c30bbee934aecf17cd684766f53cdb7dced01d
1 /*
2 ctdb parallel database recovery
4 Copyright (C) Amitay Isaacs 2015
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/network.h"
22 #include "system/filesys.h"
24 #include <talloc.h>
25 #include <tevent.h>
26 #include <tdb.h>
27 #include <libgen.h>
29 #include "lib/tdb_wrap/tdb_wrap.h"
30 #include "lib/util/time.h"
31 #include "lib/util/tevent_unix.h"
33 #include "protocol/protocol.h"
34 #include "protocol/protocol_api.h"
35 #include "client/client.h"
37 static int recover_timeout = 120;
39 #define TIMEOUT() timeval_current_ofs(recover_timeout, 0)
41 static void LOG(const char *fmt, ...)
43 va_list ap;
45 va_start(ap, fmt);
46 vfprintf(stderr, fmt, ap);
47 va_end(ap);
51 * Utility functions
54 static ssize_t sys_write(int fd, const void *buf, size_t count)
56 ssize_t ret;
58 do {
59 ret = write(fd, buf, count);
60 #if defined(EWOULDBLOCK)
61 } while (ret == -1 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK));
62 #else
63 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
64 #endif
65 return ret;
69 * Recovery database functions
72 struct recdb_context {
73 uint32_t db_id;
74 const char *db_name;
75 const char *db_path;
76 struct tdb_wrap *db;
77 bool persistent;
80 static struct recdb_context *recdb_create(TALLOC_CTX *mem_ctx, uint32_t db_id,
81 const char *db_name,
82 const char *db_path,
83 uint32_t hash_size, bool persistent)
85 static char *db_dir_state = NULL;
86 struct recdb_context *recdb;
87 unsigned int tdb_flags;
89 recdb = talloc(mem_ctx, struct recdb_context);
90 if (recdb == NULL) {
91 return NULL;
94 if (db_dir_state == NULL) {
95 db_dir_state = getenv("CTDB_DBDIR_STATE");
98 recdb->db_name = db_name;
99 recdb->db_id = db_id;
100 recdb->db_path = talloc_asprintf(recdb, "%s/recdb.%s",
101 db_dir_state != NULL ?
102 db_dir_state :
103 dirname(discard_const(db_path)),
104 db_name);
105 if (recdb->db_path == NULL) {
106 talloc_free(recdb);
107 return NULL;
109 unlink(recdb->db_path);
111 tdb_flags = TDB_NOLOCK | TDB_INCOMPATIBLE_HASH | TDB_DISALLOW_NESTING;
112 recdb->db = tdb_wrap_open(mem_ctx, recdb->db_path, hash_size,
113 tdb_flags, O_RDWR|O_CREAT|O_EXCL, 0600);
114 if (recdb->db == NULL) {
115 talloc_free(recdb);
116 LOG("failed to create recovery db %s\n", recdb->db_path);
119 recdb->persistent = persistent;
121 return recdb;
124 static const char *recdb_name(struct recdb_context *recdb)
126 return recdb->db_name;
129 struct recdb_add_traverse_state {
130 struct recdb_context *recdb;
131 int mypnn;
134 static int recdb_add_traverse(uint32_t reqid, struct ctdb_ltdb_header *header,
135 TDB_DATA key, TDB_DATA data,
136 void *private_data)
138 struct recdb_add_traverse_state *state =
139 (struct recdb_add_traverse_state *)private_data;
140 struct ctdb_ltdb_header *hdr;
141 TDB_DATA prev_data;
142 int ret;
144 /* header is not marshalled separately in the pulldb control */
145 if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
146 return -1;
149 hdr = (struct ctdb_ltdb_header *)data.dptr;
151 /* fetch the existing record, if any */
152 prev_data = tdb_fetch(state->recdb->db->tdb, key);
154 if (prev_data.dptr != NULL) {
155 struct ctdb_ltdb_header prev_hdr;
157 prev_hdr = *(struct ctdb_ltdb_header *)prev_data.dptr;
158 free(prev_data.dptr);
159 if (hdr->rsn < prev_hdr.rsn ||
160 (hdr->rsn == prev_hdr.rsn &&
161 prev_hdr.dmaster != state->mypnn)) {
162 return 0;
166 ret = tdb_store(state->recdb->db->tdb, key, data, TDB_REPLACE);
167 if (ret != 0) {
168 return -1;
170 return 0;
173 static bool recdb_add(struct recdb_context *recdb, int mypnn,
174 struct ctdb_rec_buffer *recbuf)
176 struct recdb_add_traverse_state state;
177 int ret;
179 state.recdb = recdb;
180 state.mypnn = mypnn;
182 ret = ctdb_rec_buffer_traverse(recbuf, recdb_add_traverse, &state);
183 if (ret != 0) {
184 return false;
187 return true;
190 struct recdb_traverse_state {
191 struct ctdb_rec_buffer *recbuf;
192 uint32_t pnn;
193 uint32_t reqid;
194 bool persistent;
195 bool failed;
198 static int recdb_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
199 void *private_data)
201 struct recdb_traverse_state *state =
202 (struct recdb_traverse_state *)private_data;
203 struct ctdb_ltdb_header *header;
204 int ret;
207 * skip empty records - but NOT for persistent databases:
209 * The record-by-record mode of recovery deletes empty records.
210 * For persistent databases, this can lead to data corruption
211 * by deleting records that should be there:
213 * - Assume the cluster has been running for a while.
215 * - A record R in a persistent database has been created and
216 * deleted a couple of times, the last operation being deletion,
217 * leaving an empty record with a high RSN, say 10.
219 * - Now a node N is turned off.
221 * - This leaves the local database copy of D on N with the empty
222 * copy of R and RSN 10. On all other nodes, the recovery has deleted
223 * the copy of record R.
225 * - Now the record is created again while node N is turned off.
226 * This creates R with RSN = 1 on all nodes except for N.
228 * - Now node N is turned on again. The following recovery will chose
229 * the older empty copy of R due to RSN 10 > RSN 1.
231 * ==> Hence the record is gone after the recovery.
233 * On databases like Samba's registry, this can damage the higher-level
234 * data structures built from the various tdb-level records.
236 if (!state->persistent &&
237 data.dsize <= sizeof(struct ctdb_ltdb_header)) {
238 return 0;
241 /* update the dmaster field to point to us */
242 header = (struct ctdb_ltdb_header *)data.dptr;
243 if (!state->persistent) {
244 header->dmaster = state->pnn;
245 header->flags |= CTDB_REC_FLAG_MIGRATED_WITH_DATA;
248 ret = ctdb_rec_buffer_add(state->recbuf, state->recbuf, state->reqid,
249 NULL, key, data);
250 if (ret != 0) {
251 state->failed = true;
252 return ret;
255 return 0;
258 static struct ctdb_rec_buffer *recdb_records(struct recdb_context *recdb,
259 TALLOC_CTX *mem_ctx, uint32_t pnn)
261 struct recdb_traverse_state state;
262 int ret;
264 state.recbuf = ctdb_rec_buffer_init(mem_ctx, recdb->db_id);
265 if (state.recbuf == NULL) {
266 return NULL;
268 state.pnn = pnn;
269 state.reqid = 0;
270 state.persistent = recdb->persistent;
271 state.failed = false;
273 ret = tdb_traverse_read(recdb->db->tdb, recdb_traverse, &state);
274 if (ret == -1 || state.failed) {
275 LOG("Failed to marshall recovery records for %s\n",
276 recdb->db_name);
277 TALLOC_FREE(state.recbuf);
278 return NULL;
281 return state.recbuf;
285 * Collect databases using highest sequence number
288 struct collect_highseqnum_db_state {
289 struct tevent_context *ev;
290 struct ctdb_client_context *client;
291 uint32_t *pnn_list;
292 int count;
293 uint32_t db_id;
294 struct recdb_context *recdb;
295 uint32_t max_pnn;
298 static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq);
299 static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq);
301 static struct tevent_req *collect_highseqnum_db_send(
302 TALLOC_CTX *mem_ctx,
303 struct tevent_context *ev,
304 struct ctdb_client_context *client,
305 uint32_t *pnn_list, int count,
306 uint32_t db_id, struct recdb_context *recdb)
308 struct tevent_req *req, *subreq;
309 struct collect_highseqnum_db_state *state;
310 struct ctdb_req_control request;
312 req = tevent_req_create(mem_ctx, &state,
313 struct collect_highseqnum_db_state);
314 if (req == NULL) {
315 return NULL;
318 state->ev = ev;
319 state->client = client;
320 state->pnn_list = pnn_list;
321 state->count = count;
322 state->db_id = db_id;
323 state->recdb = recdb;
325 ctdb_req_control_get_db_seqnum(&request, db_id);
326 subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
327 state->pnn_list, state->count,
328 TIMEOUT(), &request);
329 if (tevent_req_nomem(subreq, req)) {
330 return tevent_req_post(req, ev);
332 tevent_req_set_callback(subreq, collect_highseqnum_db_seqnum_done,
333 req);
335 return req;
338 static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq)
340 struct tevent_req *req = tevent_req_callback_data(
341 subreq, struct tevent_req);
342 struct collect_highseqnum_db_state *state = tevent_req_data(
343 req, struct collect_highseqnum_db_state);
344 struct ctdb_reply_control **reply;
345 struct ctdb_req_control request;
346 struct ctdb_pulldb pulldb;
347 int *err_list;
348 bool status;
349 int ret, i;
350 uint64_t seqnum, max_seqnum;
352 status = ctdb_client_control_multi_recv(subreq, &ret, state,
353 &err_list, &reply);
354 TALLOC_FREE(subreq);
355 if (! status) {
356 int ret2;
357 uint32_t pnn;
359 ret2 = ctdb_client_control_multi_error(state->pnn_list,
360 state->count, err_list,
361 &pnn);
362 if (ret2 != 0) {
363 LOG("control GET_DB_SEQNUM failed for %s on node %u,"
364 " ret=%d\n", recdb_name(state->recdb), pnn, ret2);
365 } else {
366 LOG("control GET_DB_SEQNUM failed for %s, ret=%d\n",
367 recdb_name(state->recdb), ret);
369 tevent_req_error(req, ret);
370 return;
373 max_seqnum = 0;
374 state->max_pnn = state->pnn_list[0];
375 for (i=0; i<state->count; i++) {
376 ret = ctdb_reply_control_get_db_seqnum(reply[i], &seqnum);
377 if (ret != 0) {
378 tevent_req_error(req, EPROTO);
379 return;
382 if (max_seqnum < seqnum) {
383 max_seqnum = seqnum;
384 state->max_pnn = state->pnn_list[i];
388 talloc_free(reply);
390 LOG("Pull persistent db %s from node %d with seqnum 0x%"PRIx64"\n",
391 recdb_name(state->recdb), state->max_pnn, max_seqnum);
393 pulldb.db_id = state->db_id;
394 pulldb.lmaster = CTDB_LMASTER_ANY;
396 ctdb_req_control_pull_db(&request, &pulldb);
397 subreq = ctdb_client_control_send(state, state->ev, state->client,
398 state->max_pnn, TIMEOUT(), &request);
399 if (tevent_req_nomem(subreq, req)) {
400 return;
402 tevent_req_set_callback(subreq, collect_highseqnum_db_pulldb_done,
403 req);
406 static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq)
408 struct tevent_req *req = tevent_req_callback_data(
409 subreq, struct tevent_req);
410 struct collect_highseqnum_db_state *state = tevent_req_data(
411 req, struct collect_highseqnum_db_state);
412 struct ctdb_reply_control *reply;
413 struct ctdb_rec_buffer *recbuf;
414 int ret;
415 bool status;
417 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
418 TALLOC_FREE(subreq);
419 if (! status) {
420 LOG("control PULL_DB failed for %s on node %u, ret=%d\n",
421 recdb_name(state->recdb), state->max_pnn, ret);
422 tevent_req_error(req, ret);
423 return;
426 ret = ctdb_reply_control_pull_db(reply, state, &recbuf);
427 if (ret != 0) {
428 tevent_req_error(req, EPROTO);
429 return;
432 talloc_free(reply);
434 ret = recdb_add(state->recdb, ctdb_client_pnn(state->client), recbuf);
435 talloc_free(recbuf);
436 if (! ret) {
437 tevent_req_error(req, EIO);
438 return;
441 tevent_req_done(req);
444 static bool collect_highseqnum_db_recv(struct tevent_req *req, int *perr)
446 int err;
448 if (tevent_req_is_unix_error(req, &err)) {
449 if (perr != NULL) {
450 *perr = err;
452 return false;
455 return true;
459 * Collect all databases
462 struct collect_all_db_state {
463 struct tevent_context *ev;
464 struct ctdb_client_context *client;
465 uint32_t *pnn_list;
466 int count;
467 uint32_t db_id;
468 struct recdb_context *recdb;
469 struct ctdb_pulldb pulldb;
470 int index;
473 static void collect_all_db_pulldb_done(struct tevent_req *subreq);
475 static struct tevent_req *collect_all_db_send(
476 TALLOC_CTX *mem_ctx,
477 struct tevent_context *ev,
478 struct ctdb_client_context *client,
479 uint32_t *pnn_list, int count,
480 uint32_t db_id, struct recdb_context *recdb)
482 struct tevent_req *req, *subreq;
483 struct collect_all_db_state *state;
484 struct ctdb_req_control request;
486 req = tevent_req_create(mem_ctx, &state,
487 struct collect_all_db_state);
488 if (req == NULL) {
489 return NULL;
492 state->ev = ev;
493 state->client = client;
494 state->pnn_list = pnn_list;
495 state->count = count;
496 state->db_id = db_id;
497 state->recdb = recdb;
499 state->pulldb.db_id = db_id;
500 state->pulldb.lmaster = CTDB_LMASTER_ANY;
502 state->index = 0;
504 ctdb_req_control_pull_db(&request, &state->pulldb);
505 subreq = ctdb_client_control_send(state, ev, client,
506 state->pnn_list[state->index],
507 TIMEOUT(), &request);
508 if (tevent_req_nomem(subreq, req)) {
509 return tevent_req_post(req, ev);
511 tevent_req_set_callback(subreq, collect_all_db_pulldb_done, req);
513 return req;
516 static void collect_all_db_pulldb_done(struct tevent_req *subreq)
518 struct tevent_req *req = tevent_req_callback_data(
519 subreq, struct tevent_req);
520 struct collect_all_db_state *state = tevent_req_data(
521 req, struct collect_all_db_state);
522 struct ctdb_reply_control *reply;
523 struct ctdb_req_control request;
524 struct ctdb_rec_buffer *recbuf;
525 int ret;
526 bool status;
528 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
529 TALLOC_FREE(subreq);
530 if (! status) {
531 LOG("control PULL_DB failed for %s from node %u, ret=%d\n",
532 recdb_name(state->recdb), state->pnn_list[state->index],
533 ret);
534 tevent_req_error(req, ret);
535 return;
538 ret = ctdb_reply_control_pull_db(reply, state, &recbuf);
539 if (ret != 0) {
540 LOG("control PULL_DB failed for %s, ret=%d\n",
541 recdb_name(state->recdb), ret);
542 tevent_req_error(req, EPROTO);
543 return;
546 talloc_free(reply);
548 status = recdb_add(state->recdb, ctdb_client_pnn(state->client), recbuf);
549 talloc_free(recbuf);
550 if (! status) {
551 tevent_req_error(req, EIO);
552 return;
555 state->index += 1;
556 if (state->index == state->count) {
557 tevent_req_done(req);
558 return;
561 ctdb_req_control_pull_db(&request, &state->pulldb);
562 subreq = ctdb_client_control_send(state, state->ev, state->client,
563 state->pnn_list[state->index],
564 TIMEOUT(), &request);
565 if (tevent_req_nomem(subreq, req)) {
566 return;
568 tevent_req_set_callback(subreq, collect_all_db_pulldb_done, req);
571 static bool collect_all_db_recv(struct tevent_req *req, int *perr)
573 int err;
575 if (tevent_req_is_unix_error(req, &err)) {
576 if (perr != NULL) {
577 *perr = err;
579 return false;
582 return true;
587 * For each database do the following:
588 * - Get DB name
589 * - Get DB path
590 * - Freeze database on all nodes
591 * - Start transaction on all nodes
592 * - Collect database from all nodes
593 * - Wipe database on all nodes
594 * - Push database to all nodes
595 * - Commit transaction on all nodes
596 * - Thaw database on all nodes
599 struct recover_db_state {
600 struct tevent_context *ev;
601 struct ctdb_client_context *client;
602 struct ctdb_tunable_list *tun_list;
603 uint32_t *pnn_list;
604 int count;
605 uint32_t db_id;
606 bool persistent;
608 uint32_t destnode;
609 struct ctdb_transdb transdb;
611 const char *db_name, *db_path;
612 struct recdb_context *recdb;
613 struct ctdb_rec_buffer *recbuf;
617 static void recover_db_name_done(struct tevent_req *subreq);
618 static void recover_db_path_done(struct tevent_req *subreq);
619 static void recover_db_freeze_done(struct tevent_req *subreq);
620 static void recover_db_transaction_started(struct tevent_req *subreq);
621 static void recover_db_collect_done(struct tevent_req *subreq);
622 static void recover_db_wipedb_done(struct tevent_req *subreq);
623 static void recover_db_pushdb_done(struct tevent_req *subreq);
624 static void recover_db_transaction_committed(struct tevent_req *subreq);
625 static void recover_db_thaw_done(struct tevent_req *subreq);
627 static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx,
628 struct tevent_context *ev,
629 struct ctdb_client_context *client,
630 struct ctdb_tunable_list *tun_list,
631 uint32_t *pnn_list, int count,
632 uint32_t generation,
633 uint32_t db_id, bool persistent)
635 struct tevent_req *req, *subreq;
636 struct recover_db_state *state;
637 struct ctdb_req_control request;
639 req = tevent_req_create(mem_ctx, &state, struct recover_db_state);
640 if (req == NULL) {
641 return NULL;
644 state->ev = ev;
645 state->client = client;
646 state->tun_list = tun_list;
647 state->pnn_list = pnn_list;
648 state->count = count;
649 state->db_id = db_id;
650 state->persistent = persistent;
652 state->destnode = ctdb_client_pnn(client);
653 state->transdb.db_id = db_id;
654 state->transdb.tid = generation;
656 ctdb_req_control_get_dbname(&request, db_id);
657 subreq = ctdb_client_control_send(state, ev, client, state->destnode,
658 TIMEOUT(), &request);
659 if (tevent_req_nomem(subreq, req)) {
660 return tevent_req_post(req, ev);
662 tevent_req_set_callback(subreq, recover_db_name_done, req);
664 return req;
667 static void recover_db_name_done(struct tevent_req *subreq)
669 struct tevent_req *req = tevent_req_callback_data(
670 subreq, struct tevent_req);
671 struct recover_db_state *state = tevent_req_data(
672 req, struct recover_db_state);
673 struct ctdb_reply_control *reply;
674 struct ctdb_req_control request;
675 int ret;
676 bool status;
678 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
679 TALLOC_FREE(subreq);
680 if (! status) {
681 LOG("control GET_DBNAME failed for db=0x%x, ret=%d\n",
682 state->db_id, ret);
683 tevent_req_error(req, ret);
684 return;
687 ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name);
688 if (ret != 0) {
689 LOG("control GET_DBNAME failed for db=0x%x, ret=%d\n",
690 state->db_id, ret);
691 tevent_req_error(req, EPROTO);
692 return;
695 talloc_free(reply);
697 ctdb_req_control_getdbpath(&request, state->db_id);
698 subreq = ctdb_client_control_send(state, state->ev, state->client,
699 state->destnode, TIMEOUT(),
700 &request);
701 if (tevent_req_nomem(subreq, req)) {
702 return;
704 tevent_req_set_callback(subreq, recover_db_path_done, req);
707 static void recover_db_path_done(struct tevent_req *subreq)
709 struct tevent_req *req = tevent_req_callback_data(
710 subreq, struct tevent_req);
711 struct recover_db_state *state = tevent_req_data(
712 req, struct recover_db_state);
713 struct ctdb_reply_control *reply;
714 struct ctdb_req_control request;
715 int ret;
716 bool status;
718 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
719 TALLOC_FREE(subreq);
720 if (! status) {
721 LOG("control GETDBPATH failed for db %s, ret=%d\n",
722 state->db_name, ret);
723 tevent_req_error(req, ret);
724 return;
727 ret = ctdb_reply_control_getdbpath(reply, state, &state->db_path);
728 if (ret != 0) {
729 LOG("control GETDBPATH failed for db %s, ret=%d\n",
730 state->db_name, ret);
731 tevent_req_error(req, EPROTO);
732 return;
735 talloc_free(reply);
737 ctdb_req_control_db_freeze(&request, state->db_id);
738 subreq = ctdb_client_control_multi_send(state, state->ev,
739 state->client,
740 state->pnn_list, state->count,
741 TIMEOUT(), &request);
742 if (tevent_req_nomem(subreq, req)) {
743 return;
745 tevent_req_set_callback(subreq, recover_db_freeze_done, req);
748 static void recover_db_freeze_done(struct tevent_req *subreq)
750 struct tevent_req *req = tevent_req_callback_data(
751 subreq, struct tevent_req);
752 struct recover_db_state *state = tevent_req_data(
753 req, struct recover_db_state);
754 struct ctdb_req_control request;
755 int *err_list;
756 int ret;
757 bool status;
759 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
760 NULL);
761 TALLOC_FREE(subreq);
762 if (! status) {
763 int ret2;
764 uint32_t pnn;
766 ret2 = ctdb_client_control_multi_error(state->pnn_list,
767 state->count, err_list,
768 &pnn);
769 if (ret2 != 0) {
770 LOG("control FREEZE_DB failed for db %s on node %u,"
771 " ret=%d\n", state->db_name, pnn, ret2);
772 } else {
773 LOG("control FREEZE_DB failed for db %s, ret=%d\n",
774 state->db_name, ret);
776 tevent_req_error(req, ret);
777 return;
780 ctdb_req_control_db_transaction_start(&request, &state->transdb);
781 subreq = ctdb_client_control_multi_send(state, state->ev,
782 state->client,
783 state->pnn_list, state->count,
784 TIMEOUT(), &request);
785 if (tevent_req_nomem(subreq, req)) {
786 return;
788 tevent_req_set_callback(subreq, recover_db_transaction_started, req);
791 static void recover_db_transaction_started(struct tevent_req *subreq)
793 struct tevent_req *req = tevent_req_callback_data(
794 subreq, struct tevent_req);
795 struct recover_db_state *state = tevent_req_data(
796 req, struct recover_db_state);
797 int *err_list;
798 int ret;
799 bool status;
801 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
802 NULL);
803 TALLOC_FREE(subreq);
804 if (! status) {
805 int ret2;
806 uint32_t pnn;
808 ret2 = ctdb_client_control_multi_error(state->pnn_list,
809 state->count,
810 err_list, &pnn);
811 if (ret2 != 0) {
812 LOG("control TRANSACTION_DB failed for db=%s,"
813 " ret=%d\n", state->db_name, pnn, ret2);
814 } else {
815 LOG("control TRANSACTION_DB failed for db=%s,"
816 " ret=%d\n", state->db_name, ret);
818 tevent_req_error(req, ret);
819 return;
822 state->recdb = recdb_create(state, state->db_id, state->db_name,
823 state->db_path,
824 state->tun_list->database_hash_size,
825 state->persistent);
826 if (tevent_req_nomem(state->recdb, req)) {
827 return;
830 if (state->persistent && state->tun_list->recover_pdb_by_seqnum != 0) {
831 subreq = collect_highseqnum_db_send(
832 state, state->ev, state->client,
833 state->pnn_list, state->count,
834 state->db_id, state->recdb);
835 } else {
836 subreq = collect_all_db_send(
837 state, state->ev, state->client,
838 state->pnn_list, state->count,
839 state->db_id, state->recdb);
841 if (tevent_req_nomem(subreq, req)) {
842 return;
844 tevent_req_set_callback(subreq, recover_db_collect_done, req);
847 static void recover_db_collect_done(struct tevent_req *subreq)
849 struct tevent_req *req = tevent_req_callback_data(
850 subreq, struct tevent_req);
851 struct recover_db_state *state = tevent_req_data(
852 req, struct recover_db_state);
853 struct ctdb_req_control request;
854 int ret;
855 bool status;
857 if (state->persistent && state->tun_list->recover_pdb_by_seqnum != 0) {
858 status = collect_highseqnum_db_recv(subreq, &ret);
859 } else {
860 status = collect_all_db_recv(subreq, &ret);
862 TALLOC_FREE(subreq);
863 if (! status) {
864 tevent_req_error(req, ret);
865 return;
868 ctdb_req_control_wipe_database(&request, &state->transdb);
869 subreq = ctdb_client_control_multi_send(state, state->ev,
870 state->client,
871 state->pnn_list, state->count,
872 TIMEOUT(), &request);
873 if (tevent_req_nomem(subreq, req)) {
874 return;
876 tevent_req_set_callback(subreq, recover_db_wipedb_done, req);
879 static void recover_db_wipedb_done(struct tevent_req *subreq)
881 struct tevent_req *req = tevent_req_callback_data(
882 subreq, struct tevent_req);
883 struct recover_db_state *state = tevent_req_data(
884 req, struct recover_db_state);
885 struct ctdb_req_control request;
886 int *err_list;
887 int ret;
888 bool status;
890 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
891 NULL);
892 TALLOC_FREE(subreq);
893 if (! status) {
894 int ret2;
895 uint32_t pnn;
897 ret2 = ctdb_client_control_multi_error(state->pnn_list,
898 state->count,
899 err_list, &pnn);
900 if (ret2 != 0) {
901 LOG("control WIPEDB failed for db %s on node %u,"
902 " ret=%d\n", state->db_name, pnn, ret2);
903 } else {
904 LOG("control WIPEDB failed for db %s, ret=%d\n",
905 state->db_name, pnn, ret);
907 tevent_req_error(req, ret);
908 return;
911 state->recbuf = recdb_records(state->recdb, state, state->destnode);
912 if (tevent_req_nomem(state->recbuf, req)) {
913 return;
916 TALLOC_FREE(state->recdb);
918 ctdb_req_control_push_db(&request, state->recbuf);
919 subreq = ctdb_client_control_multi_send(state, state->ev,
920 state->client,
921 state->pnn_list, state->count,
922 TIMEOUT(), &request);
923 if (tevent_req_nomem(subreq, req)) {
924 return;
926 tevent_req_set_callback(subreq, recover_db_pushdb_done, req);
929 static void recover_db_pushdb_done(struct tevent_req *subreq)
931 struct tevent_req *req = tevent_req_callback_data(
932 subreq, struct tevent_req);
933 struct recover_db_state *state = tevent_req_data(
934 req, struct recover_db_state);
935 struct ctdb_req_control request;
936 int *err_list;
937 int ret;
938 bool status;
940 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
941 NULL);
942 TALLOC_FREE(subreq);
943 if (! status) {
944 int ret2;
945 uint32_t pnn;
947 ret2 = ctdb_client_control_multi_error(state->pnn_list,
948 state->count,
949 err_list, &pnn);
950 if (ret2 != 0) {
951 LOG("control PUSHDB failed for db %s on node %u,"
952 " ret=%d\n", state->db_name, pnn, ret2);
953 } else {
954 LOG("control PUSHDB failed for db %s, ret=%d\n",
955 state->db_name, ret);
957 tevent_req_error(req, ret);
958 return;
961 TALLOC_FREE(state->recbuf);
963 ctdb_req_control_db_transaction_commit(&request, &state->transdb);
964 subreq = ctdb_client_control_multi_send(state, state->ev,
965 state->client,
966 state->pnn_list, state->count,
967 TIMEOUT(), &request);
968 if (tevent_req_nomem(subreq, req)) {
969 return;
971 tevent_req_set_callback(subreq, recover_db_transaction_committed, req);
974 static void recover_db_transaction_committed(struct tevent_req *subreq)
976 struct tevent_req *req = tevent_req_callback_data(
977 subreq, struct tevent_req);
978 struct recover_db_state *state = tevent_req_data(
979 req, struct recover_db_state);
980 struct ctdb_req_control request;
981 int *err_list;
982 int ret;
983 bool status;
985 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
986 NULL);
987 TALLOC_FREE(subreq);
988 if (! status) {
989 int ret2;
990 uint32_t pnn;
992 ret2 = ctdb_client_control_multi_error(state->pnn_list,
993 state->count,
994 err_list, &pnn);
995 if (ret2 != 0) {
996 LOG("control DB_TRANSACTION_COMMIT failed for db %s"
997 " on node %u, ret=%d\n", state->db_name, pnn, ret2);
998 } else {
999 LOG("control DB_TRANSACTION_COMMIT failed for db %s,"
1000 " ret=%d\n", state->db_name, ret);
1002 tevent_req_error(req, ret);
1003 return;
1006 ctdb_req_control_db_thaw(&request, state->db_id);
1007 subreq = ctdb_client_control_multi_send(state, state->ev,
1008 state->client,
1009 state->pnn_list, state->count,
1010 TIMEOUT(), &request);
1011 if (tevent_req_nomem(subreq, req)) {
1012 return;
1014 tevent_req_set_callback(subreq, recover_db_thaw_done, req);
1017 static void recover_db_thaw_done(struct tevent_req *subreq)
1019 struct tevent_req *req = tevent_req_callback_data(
1020 subreq, struct tevent_req);
1021 struct recover_db_state *state = tevent_req_data(
1022 req, struct recover_db_state);
1023 int *err_list;
1024 int ret;
1025 bool status;
1027 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
1028 NULL);
1029 TALLOC_FREE(subreq);
1030 if (! status) {
1031 int ret2;
1032 uint32_t pnn;
1034 ret2 = ctdb_client_control_multi_error(state->pnn_list,
1035 state->count,
1036 err_list, &pnn);
1037 if (ret2 != 0) {
1038 LOG("control DB_THAW failed for db %s on node %u,"
1039 " ret=%d\n", state->db_name, pnn, ret2);
1040 } else {
1041 LOG("control DB_THAW failed for db %s, ret=%d\n",
1042 state->db_name, ret);
1044 tevent_req_error(req, ret);
1045 return;
1048 tevent_req_done(req);
1051 static bool recover_db_recv(struct tevent_req *req)
1053 int err;
1055 if (tevent_req_is_unix_error(req, &err)) {
1056 return false;
1059 return true;
1064 * Start database recovery for each database
1066 * Try to recover each database 5 times before failing recovery.
1069 struct db_recovery_state {
1070 struct tevent_context *ev;
1071 struct ctdb_dbid_map *dbmap;
1072 int num_replies;
1073 int num_failed;
1076 struct db_recovery_one_state {
1077 struct tevent_req *req;
1078 struct ctdb_client_context *client;
1079 struct ctdb_dbid_map *dbmap;
1080 struct ctdb_tunable_list *tun_list;
1081 uint32_t *pnn_list;
1082 int count;
1083 uint32_t generation;
1084 uint32_t db_id;
1085 bool persistent;
1086 int num_fails;
1089 static void db_recovery_one_done(struct tevent_req *subreq);
1091 static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx,
1092 struct tevent_context *ev,
1093 struct ctdb_client_context *client,
1094 struct ctdb_dbid_map *dbmap,
1095 struct ctdb_tunable_list *tun_list,
1096 uint32_t *pnn_list, int count,
1097 uint32_t generation)
1099 struct tevent_req *req, *subreq;
1100 struct db_recovery_state *state;
1101 int i;
1103 req = tevent_req_create(mem_ctx, &state, struct db_recovery_state);
1104 if (req == NULL) {
1105 return NULL;
1108 state->ev = ev;
1109 state->dbmap = dbmap;
1110 state->num_replies = 0;
1111 state->num_failed = 0;
1113 if (dbmap->num == 0) {
1114 tevent_req_done(req);
1115 return tevent_req_post(req, ev);
1118 for (i=0; i<dbmap->num; i++) {
1119 struct db_recovery_one_state *substate;
1121 substate = talloc_zero(state, struct db_recovery_one_state);
1122 if (tevent_req_nomem(substate, req)) {
1123 return tevent_req_post(req, ev);
1126 substate->req = req;
1127 substate->client = client;
1128 substate->dbmap = dbmap;
1129 substate->tun_list = tun_list;
1130 substate->pnn_list = pnn_list;
1131 substate->count = count;
1132 substate->generation = generation;
1133 substate->db_id = dbmap->dbs[i].db_id;
1134 substate->persistent = dbmap->dbs[i].flags &
1135 CTDB_DB_FLAGS_PERSISTENT;
1137 subreq = recover_db_send(state, ev, client, tun_list,
1138 pnn_list, count, generation,
1139 substate->db_id,
1140 substate->persistent);
1141 if (tevent_req_nomem(subreq, req)) {
1142 return tevent_req_post(req, ev);
1144 tevent_req_set_callback(subreq, db_recovery_one_done,
1145 substate);
1146 LOG("recover database 0x%08x\n", substate->db_id);
1149 return req;
1152 static void db_recovery_one_done(struct tevent_req *subreq)
1154 struct db_recovery_one_state *substate = tevent_req_callback_data(
1155 subreq, struct db_recovery_one_state);
1156 struct tevent_req *req = substate->req;
1157 struct db_recovery_state *state = tevent_req_data(
1158 req, struct db_recovery_state);
1159 bool status;
1161 status = recover_db_recv(subreq);
1162 TALLOC_FREE(subreq);
1164 if (status) {
1165 talloc_free(substate);
1166 goto done;
1169 substate->num_fails += 1;
1170 if (substate->num_fails < 5) {
1171 subreq = recover_db_send(state, state->ev, substate->client,
1172 substate->tun_list,
1173 substate->pnn_list, substate->count,
1174 substate->generation, substate->db_id,
1175 substate->persistent);
1176 if (tevent_req_nomem(subreq, req)) {
1177 goto failed;
1179 tevent_req_set_callback(subreq, db_recovery_one_done, substate);
1180 LOG("recover database 0x%08x, attempt %d\n", substate->db_id,
1181 substate->num_fails+1);
1182 return;
1185 failed:
1186 state->num_failed += 1;
1188 done:
1189 state->num_replies += 1;
1191 if (state->num_replies == state->dbmap->num) {
1192 tevent_req_done(req);
1196 static bool db_recovery_recv(struct tevent_req *req, int *count)
1198 struct db_recovery_state *state = tevent_req_data(
1199 req, struct db_recovery_state);
1200 int err;
1202 if (tevent_req_is_unix_error(req, &err)) {
1203 *count = 0;
1204 return false;
1207 *count = state->num_replies - state->num_failed;
1209 if (state->num_failed > 0) {
1210 return false;
1213 return true;
1218 * Run the parallel database recovery
1220 * - Get tunables
1221 * - Get nodemap
1222 * - Get vnnmap
1223 * - Get capabilities from all nodes
1224 * - Get dbmap
1225 * - Set RECOVERY_ACTIVE
1226 * - Send START_RECOVERY
1227 * - Update vnnmap on all nodes
1228 * - Run database recovery
1229 * - Send END_RECOVERY
1230 * - Set RECOVERY_NORMAL
1233 struct recovery_state {
1234 struct tevent_context *ev;
1235 struct ctdb_client_context *client;
1236 uint32_t generation;
1237 uint32_t *pnn_list;
1238 int count;
1239 uint32_t destnode;
1240 struct ctdb_node_map *nodemap;
1241 uint32_t *caps;
1242 struct ctdb_tunable_list *tun_list;
1243 struct ctdb_vnn_map *vnnmap;
1244 struct ctdb_dbid_map *dbmap;
1247 static void recovery_tunables_done(struct tevent_req *subreq);
1248 static void recovery_nodemap_done(struct tevent_req *subreq);
1249 static void recovery_vnnmap_done(struct tevent_req *subreq);
1250 static void recovery_capabilities_done(struct tevent_req *subreq);
1251 static void recovery_dbmap_done(struct tevent_req *subreq);
1252 static void recovery_active_done(struct tevent_req *subreq);
1253 static void recovery_start_recovery_done(struct tevent_req *subreq);
1254 static void recovery_vnnmap_update_done(struct tevent_req *subreq);
1255 static void recovery_db_recovery_done(struct tevent_req *subreq);
1256 static void recovery_normal_done(struct tevent_req *subreq);
1257 static void recovery_end_recovery_done(struct tevent_req *subreq);
1259 static struct tevent_req *recovery_send(TALLOC_CTX *mem_ctx,
1260 struct tevent_context *ev,
1261 struct ctdb_client_context *client,
1262 uint32_t generation)
1264 struct tevent_req *req, *subreq;
1265 struct recovery_state *state;
1266 struct ctdb_req_control request;
1268 req = tevent_req_create(mem_ctx, &state, struct recovery_state);
1269 if (req == NULL) {
1270 return NULL;
1273 state->ev = ev;
1274 state->client = client;
1275 state->generation = generation;
1276 state->destnode = ctdb_client_pnn(client);
1278 ctdb_req_control_get_all_tunables(&request);
1279 subreq = ctdb_client_control_send(state, state->ev, state->client,
1280 state->destnode, TIMEOUT(),
1281 &request);
1282 if (tevent_req_nomem(subreq, req)) {
1283 return tevent_req_post(req, ev);
1285 tevent_req_set_callback(subreq, recovery_tunables_done, req);
1287 return req;
1290 static void recovery_tunables_done(struct tevent_req *subreq)
1292 struct tevent_req *req = tevent_req_callback_data(
1293 subreq, struct tevent_req);
1294 struct recovery_state *state = tevent_req_data(
1295 req, struct recovery_state);
1296 struct ctdb_reply_control *reply;
1297 struct ctdb_req_control request;
1298 int ret;
1299 bool status;
1301 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
1302 TALLOC_FREE(subreq);
1303 if (! status) {
1304 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
1305 tevent_req_error(req, ret);
1306 return;
1309 ret = ctdb_reply_control_get_all_tunables(reply, state,
1310 &state->tun_list);
1311 if (ret != 0) {
1312 LOG("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
1313 tevent_req_error(req, EPROTO);
1314 return;
1317 talloc_free(reply);
1319 recover_timeout = state->tun_list->recover_timeout;
1321 ctdb_req_control_get_nodemap(&request);
1322 subreq = ctdb_client_control_send(state, state->ev, state->client,
1323 state->destnode, TIMEOUT(),
1324 &request);
1325 if (tevent_req_nomem(subreq, req)) {
1326 return;
1328 tevent_req_set_callback(subreq, recovery_nodemap_done, req);
1331 static void recovery_nodemap_done(struct tevent_req *subreq)
1333 struct tevent_req *req = tevent_req_callback_data(
1334 subreq, struct tevent_req);
1335 struct recovery_state *state = tevent_req_data(
1336 req, struct recovery_state);
1337 struct ctdb_reply_control *reply;
1338 struct ctdb_req_control request;
1339 bool status;
1340 int ret;
1342 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
1343 TALLOC_FREE(subreq);
1344 if (! status) {
1345 LOG("control GET_NODEMAP failed to node %u, ret=%d\n",
1346 state->destnode, ret);
1347 tevent_req_error(req, ret);
1348 return;
1351 ret = ctdb_reply_control_get_nodemap(reply, state, &state->nodemap);
1352 if (ret != 0) {
1353 LOG("control GET_NODEMAP failed, ret=%d\n", ret);
1354 tevent_req_error(req, ret);
1355 return;
1358 state->count = list_of_active_nodes(state->nodemap, CTDB_UNKNOWN_PNN,
1359 state, &state->pnn_list);
1360 if (state->count <= 0) {
1361 tevent_req_error(req, ENOMEM);
1362 return;
1365 ctdb_req_control_getvnnmap(&request);
1366 subreq = ctdb_client_control_send(state, state->ev, state->client,
1367 state->destnode, TIMEOUT(),
1368 &request);
1369 if (tevent_req_nomem(subreq, req)) {
1370 return;
1372 tevent_req_set_callback(subreq, recovery_vnnmap_done, req);
1375 static void recovery_vnnmap_done(struct tevent_req *subreq)
1377 struct tevent_req *req = tevent_req_callback_data(
1378 subreq, struct tevent_req);
1379 struct recovery_state *state = tevent_req_data(
1380 req, struct recovery_state);
1381 struct ctdb_reply_control *reply;
1382 struct ctdb_req_control request;
1383 bool status;
1384 int ret;
1386 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
1387 TALLOC_FREE(subreq);
1388 if (! status) {
1389 LOG("control GETVNNMAP failed to node %u, ret=%d\n",
1390 state->destnode, ret);
1391 tevent_req_error(req, ret);
1392 return;
1395 ret = ctdb_reply_control_getvnnmap(reply, state, &state->vnnmap);
1396 if (ret != 0) {
1397 LOG("control GETVNNMAP failed, ret=%d\n", ret);
1398 tevent_req_error(req, ret);
1399 return;
1402 ctdb_req_control_get_capabilities(&request);
1403 subreq = ctdb_client_control_multi_send(state, state->ev,
1404 state->client,
1405 state->pnn_list, state->count,
1406 TIMEOUT(), &request);
1407 if (tevent_req_nomem(subreq, req)) {
1408 return;
1410 tevent_req_set_callback(subreq, recovery_capabilities_done, req);
1413 static void recovery_capabilities_done(struct tevent_req *subreq)
1415 struct tevent_req *req = tevent_req_callback_data(
1416 subreq, struct tevent_req);
1417 struct recovery_state *state = tevent_req_data(
1418 req, struct recovery_state);
1419 struct ctdb_reply_control **reply;
1420 struct ctdb_req_control request;
1421 int *err_list;
1422 int ret, i;
1423 bool status;
1425 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
1426 &reply);
1427 TALLOC_FREE(subreq);
1428 if (! status) {
1429 int ret2;
1430 uint32_t pnn;
1432 ret2 = ctdb_client_control_multi_error(state->pnn_list,
1433 state->count,
1434 err_list, &pnn);
1435 if (ret2 != 0) {
1436 LOG("control GET_CAPABILITIES failed on node %u,"
1437 " ret=%d\n", pnn, ret2);
1438 } else {
1439 LOG("control GET_CAPABILITIES failed, ret=%d\n", ret);
1441 tevent_req_error(req, ret);
1442 return;
1445 /* Make the array size same as nodemap */
1446 state->caps = talloc_zero_array(state, uint32_t,
1447 state->nodemap->num);
1448 if (tevent_req_nomem(state->caps, req)) {
1449 return;
1452 for (i=0; i<state->count; i++) {
1453 uint32_t pnn;
1455 pnn = state->pnn_list[i];
1456 ret = ctdb_reply_control_get_capabilities(reply[i],
1457 &state->caps[pnn]);
1458 if (ret != 0) {
1459 LOG("control GET_CAPABILITIES failed on node %u\n", pnn);
1460 tevent_req_error(req, EPROTO);
1461 return;
1465 talloc_free(reply);
1467 ctdb_req_control_get_dbmap(&request);
1468 subreq = ctdb_client_control_send(state, state->ev, state->client,
1469 state->destnode, TIMEOUT(),
1470 &request);
1471 if (tevent_req_nomem(subreq, req)) {
1472 return;
1474 tevent_req_set_callback(subreq, recovery_dbmap_done, req);
1477 static void recovery_dbmap_done(struct tevent_req *subreq)
1479 struct tevent_req *req = tevent_req_callback_data(
1480 subreq, struct tevent_req);
1481 struct recovery_state *state = tevent_req_data(
1482 req, struct recovery_state);
1483 struct ctdb_reply_control *reply;
1484 struct ctdb_req_control request;
1485 int ret;
1486 bool status;
1488 status = ctdb_client_control_recv(subreq, &ret, state, &reply);
1489 TALLOC_FREE(subreq);
1490 if (! status) {
1491 LOG("control GET_DBMAP failed to node %u, ret=%d\n",
1492 state->destnode, ret);
1493 tevent_req_error(req, ret);
1494 return;
1497 ret = ctdb_reply_control_get_dbmap(reply, state, &state->dbmap);
1498 if (ret != 0) {
1499 LOG("control GET_DBMAP failed, ret=%d\n", ret);
1500 tevent_req_error(req, ret);
1501 return;
1504 ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE);
1505 subreq = ctdb_client_control_multi_send(state, state->ev,
1506 state->client,
1507 state->pnn_list, state->count,
1508 TIMEOUT(), &request);
1509 if (tevent_req_nomem(subreq, req)) {
1510 return;
1512 tevent_req_set_callback(subreq, recovery_active_done, req);
1515 static void recovery_active_done(struct tevent_req *subreq)
1517 struct tevent_req *req = tevent_req_callback_data(
1518 subreq, struct tevent_req);
1519 struct recovery_state *state = tevent_req_data(
1520 req, struct recovery_state);
1521 struct ctdb_req_control request;
1522 struct ctdb_vnn_map *vnnmap;
1523 int *err_list;
1524 int ret, count, i;
1525 bool status;
1527 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
1528 NULL);
1529 TALLOC_FREE(subreq);
1530 if (! status) {
1531 int ret2;
1532 uint32_t pnn;
1534 ret2 = ctdb_client_control_multi_error(state->pnn_list,
1535 state->count,
1536 err_list, &pnn);
1537 if (ret2 != 0) {
1538 LOG("failed to set recovery mode to ACTIVE on node %u,"
1539 " ret=%d\n", pnn, ret2);
1540 } else {
1541 LOG("failed to set recovery mode to ACTIVE, ret=%d\n",
1542 ret);
1544 tevent_req_error(req, ret);
1545 return;
1548 LOG("set recovery mode to ACTIVE\n");
1550 /* Calculate new VNNMAP */
1551 count = 0;
1552 for (i=0; i<state->nodemap->num; i++) {
1553 if (state->nodemap->node[i].flags & NODE_FLAGS_INACTIVE) {
1554 continue;
1556 if (!(state->caps[i] & CTDB_CAP_LMASTER)) {
1557 continue;
1559 count += 1;
1562 if (count == 0) {
1563 LOG("no active lmasters found. Adding recmaster anyway\n");
1566 vnnmap = talloc_zero(state, struct ctdb_vnn_map);
1567 if (tevent_req_nomem(vnnmap, req)) {
1568 return;
1571 vnnmap->size = (count == 0 ? 1 : count);
1572 vnnmap->map = talloc_array(vnnmap, uint32_t, vnnmap->size);
1573 if (tevent_req_nomem(vnnmap->map, req)) {
1574 return;
1577 if (count == 0) {
1578 vnnmap->map[0] = state->destnode;
1579 } else {
1580 count = 0;
1581 for (i=0; i<state->nodemap->num; i++) {
1582 if (state->nodemap->node[i].flags &
1583 NODE_FLAGS_INACTIVE) {
1584 continue;
1586 if (!(state->caps[i] & CTDB_CAP_LMASTER)) {
1587 continue;
1590 vnnmap->map[count] = state->nodemap->node[i].pnn;
1591 count += 1;
1595 vnnmap->generation = state->generation;
1597 talloc_free(state->vnnmap);
1598 state->vnnmap = vnnmap;
1600 ctdb_req_control_start_recovery(&request);
1601 subreq = ctdb_client_control_multi_send(state, state->ev,
1602 state->client,
1603 state->pnn_list, state->count,
1604 TIMEOUT(), &request);
1605 if (tevent_req_nomem(subreq, req)) {
1606 return;
1608 tevent_req_set_callback(subreq, recovery_start_recovery_done, req);
1611 static void recovery_start_recovery_done(struct tevent_req *subreq)
1613 struct tevent_req *req = tevent_req_callback_data(
1614 subreq, struct tevent_req);
1615 struct recovery_state *state = tevent_req_data(
1616 req, struct recovery_state);
1617 struct ctdb_req_control request;
1618 int *err_list;
1619 int ret;
1620 bool status;
1622 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
1623 NULL);
1624 TALLOC_FREE(subreq);
1625 if (! status) {
1626 int ret2;
1627 uint32_t pnn;
1629 ret2 = ctdb_client_control_multi_error(state->pnn_list,
1630 state->count,
1631 err_list, &pnn);
1632 if (ret2 != 0) {
1633 LOG("failed to run start_recovery event on node %u,"
1634 " ret=%d\n", pnn, ret2);
1635 } else {
1636 LOG("failed to run start_recovery event, ret=%d\n",
1637 ret);
1639 tevent_req_error(req, ret);
1640 return;
1643 LOG("start_recovery event finished\n");
1645 ctdb_req_control_setvnnmap(&request, state->vnnmap);
1646 subreq = ctdb_client_control_multi_send(state, state->ev,
1647 state->client,
1648 state->pnn_list, state->count,
1649 TIMEOUT(), &request);
1650 if (tevent_req_nomem(subreq, req)) {
1651 return;
1653 tevent_req_set_callback(subreq, recovery_vnnmap_update_done, req);
1656 static void recovery_vnnmap_update_done(struct tevent_req *subreq)
1658 struct tevent_req *req = tevent_req_callback_data(
1659 subreq, struct tevent_req);
1660 struct recovery_state *state = tevent_req_data(
1661 req, struct recovery_state);
1662 int *err_list;
1663 int ret;
1664 bool status;
1666 status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list,
1667 NULL);
1668 TALLOC_FREE(subreq);
1669 if (! status) {
1670 int ret2;
1671 uint32_t pnn;
1673 ret2 = ctdb_client_control_multi_error(state->pnn_list,
1674 state->count,
1675 err_list, &pnn);
1676 if (ret2 != 0) {
1677 LOG("failed to update VNNMAP on node %u, ret=%d\n",
1678 pnn, ret2);
1679 } else {
1680 LOG("failed to update VNNMAP, ret=%d\n", ret);
1682 tevent_req_error(req, ret);
1683 return;
1686 LOG("updated VNNMAP\n");
1688 subreq = db_recovery_send(state, state->ev, state->client,
1689 state->dbmap, state->tun_list,
1690 state->pnn_list, state->count,
1691 state->vnnmap->generation);
1692 if (tevent_req_nomem(subreq, req)) {
1693 return;
1695 tevent_req_set_callback(subreq, recovery_db_recovery_done, req);
1698 static void recovery_db_recovery_done(struct tevent_req *subreq)
1700 struct tevent_req *req = tevent_req_callback_data(
1701 subreq, struct tevent_req);
1702 struct recovery_state *state = tevent_req_data(
1703 req, struct recovery_state);
1704 struct ctdb_req_control request;
1705 bool status;
1706 int count;
1708 status = db_recovery_recv(subreq, &count);
1709 TALLOC_FREE(subreq);
1711 LOG("%d databases recovered\n", count);
1713 if (! status) {
1714 tevent_req_error(req, EIO);
1715 return;
1718 ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_NORMAL);
1719 subreq = ctdb_client_control_multi_send(state, state->ev,
1720 state->client,
1721 state->pnn_list, state->count,
1722 TIMEOUT(), &request);
1723 if (tevent_req_nomem(subreq, req)) {
1724 return;
1726 tevent_req_set_callback(subreq, recovery_normal_done, req);
1729 static void recovery_normal_done(struct tevent_req *subreq)
1731 struct tevent_req *req = tevent_req_callback_data(
1732 subreq, struct tevent_req);
1733 struct recovery_state *state = tevent_req_data(
1734 req, struct recovery_state);
1735 struct ctdb_req_control request;
1736 int *err_list;
1737 int ret;
1738 bool status;
1740 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
1741 NULL);
1742 TALLOC_FREE(subreq);
1743 if (! status) {
1744 int ret2;
1745 uint32_t pnn;
1747 ret2 = ctdb_client_control_multi_error(state->pnn_list,
1748 state->count,
1749 err_list, &pnn);
1750 if (ret2 != 0) {
1751 LOG("failed to set recovery mode to NORMAL on node %u,"
1752 " ret=%d\n", pnn, ret2);
1753 } else {
1754 LOG("failed to set recovery mode to NORMAL, ret=%d\n",
1755 ret);
1757 tevent_req_error(req, ret);
1758 return;
1761 LOG("set recovery mode to NORMAL\n");
1763 ctdb_req_control_end_recovery(&request);
1764 subreq = ctdb_client_control_multi_send(state, state->ev,
1765 state->client,
1766 state->pnn_list, state->count,
1767 TIMEOUT(), &request);
1768 if (tevent_req_nomem(subreq, req)) {
1769 return;
1771 tevent_req_set_callback(subreq, recovery_end_recovery_done, req);
1774 static void recovery_end_recovery_done(struct tevent_req *subreq)
1776 struct tevent_req *req = tevent_req_callback_data(
1777 subreq, struct tevent_req);
1778 struct recovery_state *state = tevent_req_data(
1779 req, struct recovery_state);
1780 int *err_list;
1781 int ret;
1782 bool status;
1784 status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
1785 NULL);
1786 TALLOC_FREE(subreq);
1787 if (! status) {
1788 int ret2;
1789 uint32_t pnn;
1791 ret2 = ctdb_client_control_multi_error(state->pnn_list,
1792 state->count,
1793 err_list, &pnn);
1794 if (ret2 != 0) {
1795 LOG("failed to run recovered event on node %u,"
1796 " ret=%d\n", pnn, ret2);
1797 } else {
1798 LOG("failed to run recovered event, ret=%d\n", ret);
1800 tevent_req_error(req, ret);
1801 return;
1804 LOG("recovered event finished\n");
1806 tevent_req_done(req);
1809 static void recovery_recv(struct tevent_req *req, int *perr)
1811 int err;
1813 if (tevent_req_is_unix_error(req, &err)) {
1814 if (perr != NULL) {
1815 *perr = err;
1817 return;
1821 static void usage(const char *progname)
1823 fprintf(stderr, "\nUsage: %s <log-fd> <output-fd> <ctdb-socket-path> <generation>\n",
1824 progname);
1829 * Arguments - log fd, write fd, socket path, generation
1831 int main(int argc, char *argv[])
1833 int log_fd, write_fd;
1834 const char *sockpath;
1835 TALLOC_CTX *mem_ctx;
1836 struct tevent_context *ev;
1837 struct ctdb_client_context *client;
1838 int ret;
1839 struct tevent_req *req;
1840 uint32_t generation;
1842 if (argc != 5) {
1843 usage(argv[0]);
1844 exit(1);
1847 log_fd = atoi(argv[1]);
1848 if (log_fd != STDOUT_FILENO && log_fd != STDERR_FILENO) {
1849 close(STDOUT_FILENO);
1850 close(STDERR_FILENO);
1851 dup2(log_fd, STDOUT_FILENO);
1852 dup2(log_fd, STDERR_FILENO);
1854 close(log_fd);
1856 write_fd = atoi(argv[2]);
1857 sockpath = argv[3];
1858 generation = (uint32_t)strtoul(argv[4], NULL, 0);
1860 mem_ctx = talloc_new(NULL);
1861 if (mem_ctx == NULL) {
1862 LOG("talloc_new() failed\n");
1863 goto failed;
1866 ev = tevent_context_init(mem_ctx);
1867 if (ev == NULL) {
1868 LOG("tevent_context_init() failed\n");
1869 goto failed;
1872 ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
1873 if (ret != 0) {
1874 LOG("ctdb_client_init() failed, ret=%d\n", ret);
1875 goto failed;
1878 req = recovery_send(mem_ctx, ev, client, generation);
1879 if (req == NULL) {
1880 LOG("database_recover_send() failed\n");
1881 goto failed;
1884 if (! tevent_req_poll(req, ev)) {
1885 LOG("tevent_req_poll() failed\n");
1886 goto failed;
1889 recovery_recv(req, &ret);
1890 TALLOC_FREE(req);
1891 if (ret != 0) {
1892 LOG("database recovery failed, ret=%d\n", ret);
1893 goto failed;
1896 sys_write(write_fd, &ret, sizeof(ret));
1897 return 0;
1899 failed:
1900 talloc_free(mem_ctx);
1901 return 1;