build: Fix tdb.h path to enable building with system TDB library
[Samba/wip.git] / ctdb / server / ctdb_persistent.c
blobdcbb6e0c5c39346f881b4239071ab16620dee267
1 /*
2 persistent store logic
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
24 #include "db_wrap.h"
25 #include "tdb.h"
26 #include "../include/ctdb_private.h"
28 struct ctdb_persistent_state {
29 struct ctdb_context *ctdb;
30 struct ctdb_db_context *ctdb_db; /* used by trans3_commit */
31 struct ctdb_client *client; /* used by trans3_commit */
32 struct ctdb_req_control *c;
33 const char *errormsg;
34 uint32_t num_pending;
35 int32_t status;
36 uint32_t num_failed, num_sent;
40 1) all nodes fail, and all nodes reply
41 2) some nodes fail, all nodes reply
42 3) some nodes timeout
43 4) all nodes succeed
47 called when a node has acknowledged a ctdb_control_update_record call
49 static void ctdb_persistent_callback(struct ctdb_context *ctdb,
50 int32_t status, TDB_DATA data,
51 const char *errormsg,
52 void *private_data)
54 struct ctdb_persistent_state *state = talloc_get_type(private_data,
55 struct ctdb_persistent_state);
56 enum ctdb_trans2_commit_error etype;
58 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
59 DEBUG(DEBUG_INFO, ("ctdb_persistent_callback: ignoring reply "
60 "during recovery\n"));
61 return;
64 if (status != 0) {
65 DEBUG(DEBUG_ERR,("ctdb_persistent_callback failed with status %d (%s)\n",
66 status, errormsg?errormsg:"no error message given"));
67 state->status = status;
68 state->errormsg = errormsg;
69 state->num_failed++;
72 * If a node failed to complete the update_record control,
73 * then either a recovery is already running or something
74 * bad is going on. So trigger a recovery and let the
75 * recovery finish the transaction, sending back the reply
76 * for the trans3_commit control to the client.
78 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
79 return;
82 state->num_pending--;
84 if (state->num_pending != 0) {
85 return;
88 if (state->num_failed == state->num_sent) {
89 etype = CTDB_TRANS2_COMMIT_ALLFAIL;
90 } else if (state->num_failed != 0) {
91 etype = CTDB_TRANS2_COMMIT_SOMEFAIL;
92 } else {
93 etype = CTDB_TRANS2_COMMIT_SUCCESS;
96 ctdb_request_control_reply(state->ctdb, state->c, NULL, etype, state->errormsg);
97 talloc_free(state);
101 called if persistent store times out
103 static void ctdb_persistent_store_timeout(struct event_context *ev, struct timed_event *te,
104 struct timeval t, void *private_data)
106 struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state);
108 if (state->ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
109 DEBUG(DEBUG_INFO, ("ctdb_persistent_store_timeout: ignoring "
110 "timeout during recovery\n"));
111 return;
114 ctdb_request_control_reply(state->ctdb, state->c, NULL, CTDB_TRANS2_COMMIT_TIMEOUT,
115 "timeout in ctdb_persistent_state");
117 talloc_free(state);
121 * Finish pending trans3 commit controls, i.e. send
122 * reply to the client. This is called by the end-recovery
123 * control to fix the situation when a recovery interrupts
124 * the usual progress of a transaction.
126 void ctdb_persistent_finish_trans3_commits(struct ctdb_context *ctdb)
128 struct ctdb_db_context *ctdb_db;
130 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
131 DEBUG(DEBUG_INFO, ("ctdb_persistent_finish_trans3_commits: "
132 "skipping execution when recovery is "
133 "active\n"));
134 return;
137 for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
138 struct ctdb_persistent_state *state;
140 if (ctdb_db->persistent_state == NULL) {
141 continue;
144 state = ctdb_db->persistent_state;
146 ctdb_request_control_reply(ctdb, state->c, NULL,
147 CTDB_TRANS2_COMMIT_SOMEFAIL,
148 "trans3 commit ended by recovery");
150 /* The destructor sets ctdb_db->persistent_state to NULL. */
151 talloc_free(state);
156 store a set of persistent records - called from a ctdb client when it has updated
157 some records in a persistent database. The client will have the record
158 locked for the duration of this call. The client is the dmaster when
159 this call is made
161 int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
162 struct ctdb_req_control *c,
163 TDB_DATA recdata, bool *async_reply)
165 struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
166 struct ctdb_persistent_state *state;
167 int i;
168 struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
169 struct ctdb_db_context *ctdb_db;
171 ctdb_db = find_ctdb_db(ctdb, m->db_id);
172 if (ctdb_db == NULL) {
173 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans2_commit: "
174 "Unknown database db_id[0x%08x]\n", m->db_id));
175 return -1;
178 if (client == NULL) {
179 DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store to a client. Returning error\n"));
180 return -1;
183 if (ctdb_db->unhealthy_reason) {
184 DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_trans2_commit: %s\n",
185 ctdb_db->db_name, ctdb_db->unhealthy_reason));
186 return -1;
189 /* handling num_persistent_updates is a bit strange -
190 there are 3 cases
191 1) very old clients, which never called CTDB_CONTROL_START_PERSISTENT_UPDATE
192 They don't expect num_persistent_updates to be used at all
194 2) less old clients, which uses CTDB_CONTROL_START_PERSISTENT_UPDATE, and expected
195 this commit to then decrement it
197 3) new clients which use TRANS2 commit functions, and
198 expect this function to increment the counter, and
199 then have it decremented in ctdb_control_trans2_error
200 or ctdb_control_trans2_finished
202 switch (c->opcode) {
203 case CTDB_CONTROL_PERSISTENT_STORE:
204 if (ctdb_db->transaction_active) {
205 DEBUG(DEBUG_ERR, (__location__ " trans2_commit: a "
206 "transaction is active on database "
207 "db_id[0x%08x] - refusing persistent "
208 " store for client id[0x%08x]\n",
209 ctdb_db->db_id, client->client_id));
210 return -1;
212 if (client->num_persistent_updates > 0) {
213 client->num_persistent_updates--;
215 break;
216 case CTDB_CONTROL_TRANS2_COMMIT:
217 if (ctdb_db->transaction_active) {
218 DEBUG(DEBUG_ERR,(__location__ " trans2_commit: there is"
219 " already a transaction commit "
220 "active on db_id[0x%08x] - forbidding "
221 "client_id[0x%08x] to commit\n",
222 ctdb_db->db_id, client->client_id));
223 return -1;
225 if (client->db_id != 0) {
226 DEBUG(DEBUG_ERR,(__location__ " ERROR: trans2_commit: "
227 "client-db_id[0x%08x] != 0 "
228 "(client_id[0x%08x])\n",
229 client->db_id, client->client_id));
230 return -1;
232 client->num_persistent_updates++;
233 ctdb_db->transaction_active = true;
234 client->db_id = m->db_id;
235 DEBUG(DEBUG_DEBUG, (__location__ " client id[0x%08x] started to"
236 " commit transaction on db id[0x%08x]\n",
237 client->client_id, client->db_id));
238 break;
239 case CTDB_CONTROL_TRANS2_COMMIT_RETRY:
240 /* already updated from the first commit */
241 if (client->db_id != m->db_id) {
242 DEBUG(DEBUG_ERR,(__location__ " ERROR: trans2_commit "
243 "retry: client-db_id[0x%08x] != "
244 "db_id[0x%08x] (client_id[0x%08x])\n",
245 client->db_id,
246 m->db_id, client->client_id));
247 return -1;
249 DEBUG(DEBUG_DEBUG, (__location__ " client id[0x%08x] started "
250 "transaction commit retry on "
251 "db_id[0x%08x]\n",
252 client->client_id, client->db_id));
253 break;
256 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
257 DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans2_commit when recovery active\n"));
258 return -1;
261 state = talloc_zero(ctdb, struct ctdb_persistent_state);
262 CTDB_NO_MEMORY(ctdb, state);
264 state->ctdb = ctdb;
265 state->c = c;
267 for (i=0;i<ctdb->vnn_map->size;i++) {
268 struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
269 int ret;
271 /* only send to active nodes */
272 if (node->flags & NODE_FLAGS_INACTIVE) {
273 continue;
276 /* don't send to ourselves */
277 if (node->pnn == ctdb->pnn) {
278 continue;
281 ret = ctdb_daemon_send_control(ctdb, node->pnn, 0, CTDB_CONTROL_UPDATE_RECORD,
282 c->client_id, 0, recdata,
283 ctdb_persistent_callback, state);
284 if (ret == -1) {
285 DEBUG(DEBUG_ERR,("Unable to send CTDB_CONTROL_UPDATE_RECORD to pnn %u\n", node->pnn));
286 talloc_free(state);
287 return -1;
290 state->num_pending++;
291 state->num_sent++;
294 if (state->num_pending == 0) {
295 talloc_free(state);
296 return 0;
299 /* we need to wait for the replies */
300 *async_reply = true;
302 /* need to keep the control structure around */
303 talloc_steal(state, c);
305 /* but we won't wait forever */
306 event_add_timed(ctdb->ev, state,
307 timeval_current_ofs(ctdb->tunable.control_timeout, 0),
308 ctdb_persistent_store_timeout, state);
310 return 0;
313 static int ctdb_persistent_state_destructor(struct ctdb_persistent_state *state)
315 if (state->client != NULL) {
316 state->client->db_id = 0;
319 if (state->ctdb_db != NULL) {
320 state->ctdb_db->persistent_state = NULL;
323 return 0;
327 * Store a set of persistent records.
328 * This is used to roll out a transaction to all nodes.
330 int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb,
331 struct ctdb_req_control *c,
332 TDB_DATA recdata, bool *async_reply)
334 struct ctdb_client *client;
335 struct ctdb_persistent_state *state;
336 int i;
337 struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
338 struct ctdb_db_context *ctdb_db;
340 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
341 DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans3_commit when recovery active\n"));
342 return -1;
345 client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
346 if (client == NULL) {
347 DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store "
348 "to a client. Returning error\n"));
349 return -1;
352 if (client->db_id != 0) {
353 DEBUG(DEBUG_ERR,(__location__ " ERROR: trans3_commit: "
354 "client-db_id[0x%08x] != 0 "
355 "(client_id[0x%08x]): trans3_commit active?\n",
356 client->db_id, client->client_id));
357 return -1;
360 ctdb_db = find_ctdb_db(ctdb, m->db_id);
361 if (ctdb_db == NULL) {
362 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans3_commit: "
363 "Unknown database db_id[0x%08x]\n", m->db_id));
364 return -1;
367 if (ctdb_db->persistent_state != NULL) {
368 DEBUG(DEBUG_ERR, (__location__ " Error: "
369 "ctdb_control_trans3_commit "
370 "called while a transaction commit is "
371 "active. db_id[0x%08x]\n", m->db_id));
372 return -1;
375 ctdb_db->persistent_state = talloc_zero(ctdb_db,
376 struct ctdb_persistent_state);
377 CTDB_NO_MEMORY(ctdb, ctdb_db->persistent_state);
379 client->db_id = m->db_id;
381 state = ctdb_db->persistent_state;
382 state->ctdb = ctdb;
383 state->ctdb_db = ctdb_db;
384 state->c = c;
385 state->client = client;
387 talloc_set_destructor(state, ctdb_persistent_state_destructor);
389 for (i = 0; i < ctdb->vnn_map->size; i++) {
390 struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
391 int ret;
393 /* only send to active nodes */
394 if (node->flags & NODE_FLAGS_INACTIVE) {
395 continue;
398 ret = ctdb_daemon_send_control(ctdb, node->pnn, 0,
399 CTDB_CONTROL_UPDATE_RECORD,
400 c->client_id, 0, recdata,
401 ctdb_persistent_callback,
402 state);
403 if (ret == -1) {
404 DEBUG(DEBUG_ERR,("Unable to send "
405 "CTDB_CONTROL_UPDATE_RECORD "
406 "to pnn %u\n", node->pnn));
407 talloc_free(state);
408 return -1;
411 state->num_pending++;
412 state->num_sent++;
415 if (state->num_pending == 0) {
416 talloc_free(state);
417 return 0;
420 /* we need to wait for the replies */
421 *async_reply = true;
423 /* need to keep the control structure around */
424 talloc_steal(state, c);
426 /* but we won't wait forever */
427 event_add_timed(ctdb->ev, state,
428 timeval_current_ofs(ctdb->tunable.control_timeout, 0),
429 ctdb_persistent_store_timeout, state);
431 return 0;
436 called when a client has finished a local commit in a transaction to
437 a persistent database
439 int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb,
440 struct ctdb_req_control *c)
442 struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
443 struct ctdb_db_context *ctdb_db;
445 ctdb_db = find_ctdb_db(ctdb, client->db_id);
446 if (ctdb_db == NULL) {
447 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans2_finish "
448 "Unknown database 0x%08x\n", client->db_id));
449 return -1;
451 if (!ctdb_db->transaction_active) {
452 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans2_finish: "
453 "Database 0x%08x has no transaction commit "
454 "started\n", client->db_id));
455 return -1;
458 ctdb_db->transaction_active = false;
459 client->db_id = 0;
461 if (client->num_persistent_updates == 0) {
462 DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n"));
463 DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n"));
464 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
465 return -1;
467 client->num_persistent_updates--;
469 DEBUG(DEBUG_DEBUG, (__location__ " client id[0x%08x] finished "
470 "transaction commit db_id[0x%08x]\n",
471 client->client_id, ctdb_db->db_id));
473 return 0;
477 called when a client gets an error committing its database
478 during a transaction commit
480 int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
481 struct ctdb_req_control *c)
483 struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
484 struct ctdb_db_context *ctdb_db;
486 ctdb_db = find_ctdb_db(ctdb, client->db_id);
487 if (ctdb_db == NULL) {
488 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans2_error: "
489 "Unknown database 0x%08x\n", client->db_id));
490 return -1;
492 if (!ctdb_db->transaction_active) {
493 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans2_error: "
494 "Database 0x%08x has no transaction commit "
495 "started\n", client->db_id));
496 return -1;
499 ctdb_db->transaction_active = false;
500 client->db_id = 0;
502 if (client->num_persistent_updates == 0) {
503 DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n"));
504 } else {
505 client->num_persistent_updates--;
508 DEBUG(DEBUG_ERR,(__location__ " An error occurred during transaction on"
509 " db_id[0x%08x] - forcing recovery\n",
510 ctdb_db->db_id));
511 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
513 return 0;
517 * Tell whether a transaction is active on this node on the give DB.
519 int32_t ctdb_control_trans2_active(struct ctdb_context *ctdb,
520 struct ctdb_req_control *c,
521 uint32_t db_id)
523 struct ctdb_db_context *ctdb_db;
524 struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
526 ctdb_db = find_ctdb_db(ctdb, db_id);
527 if (!ctdb_db) {
528 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
529 return -1;
532 if (client->db_id == db_id) {
533 return 0;
536 if (ctdb_db->transaction_active) {
537 return 1;
538 } else {
539 return 0;
544 backwards compatibility:
546 start a persistent store operation. passing both the key, header and
547 data to the daemon. If the client disconnects before it has issued
548 a persistent_update call to the daemon we trigger a full recovery
549 to ensure the databases are brought back in sync.
550 for now we ignore the recdata that the client has passed to us.
552 int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
553 struct ctdb_req_control *c,
554 TDB_DATA recdata)
556 struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
558 if (client == NULL) {
559 DEBUG(DEBUG_ERR,(__location__ " can not match start_persistent_update to a client. Returning error\n"));
560 return -1;
563 client->num_persistent_updates++;
565 return 0;
569 backwards compatibility:
571 called to tell ctdbd that it is no longer doing a persistent update
573 int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
574 struct ctdb_req_control *c,
575 TDB_DATA recdata)
577 struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
579 if (client == NULL) {
580 DEBUG(DEBUG_ERR,(__location__ " can not match cancel_persistent_update to a client. Returning error\n"));
581 return -1;
584 if (client->num_persistent_updates > 0) {
585 client->num_persistent_updates--;
588 return 0;
593 backwards compatibility:
595 single record varient of ctdb_control_trans2_commit for older clients
597 int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
598 struct ctdb_req_control *c,
599 TDB_DATA recdata, bool *async_reply)
601 struct ctdb_marshall_buffer *m;
602 struct ctdb_rec_data *rec = (struct ctdb_rec_data *)recdata.dptr;
603 TDB_DATA key, data;
605 if (recdata.dsize != offsetof(struct ctdb_rec_data, data) +
606 rec->keylen + rec->datalen) {
607 DEBUG(DEBUG_ERR, (__location__ " Bad data size in recdata\n"));
608 return -1;
611 key.dptr = &rec->data[0];
612 key.dsize = rec->keylen;
613 data.dptr = &rec->data[rec->keylen];
614 data.dsize = rec->datalen;
616 m = ctdb_marshall_add(c, NULL, rec->reqid, rec->reqid, key, NULL, data);
617 CTDB_NO_MEMORY(ctdb, m);
619 return ctdb_control_trans2_commit(ctdb, c, ctdb_marshall_finish(m), async_reply);
622 static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
623 uint32_t db_id,
624 uint64_t *seqnum)
626 int32_t ret;
627 struct ctdb_db_context *ctdb_db;
628 const char *keyname = CTDB_DB_SEQNUM_KEY;
629 TDB_DATA key;
630 TDB_DATA data;
631 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
632 struct ctdb_ltdb_header header;
634 ctdb_db = find_ctdb_db(ctdb, db_id);
635 if (!ctdb_db) {
636 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
637 ret = -1;
638 goto done;
641 key.dptr = (uint8_t *)discard_const(keyname);
642 key.dsize = strlen(keyname) + 1;
644 ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, &header, mem_ctx, &data);
645 if (ret != 0) {
646 goto done;
649 if (data.dsize != sizeof(uint64_t)) {
650 *seqnum = 0;
651 goto done;
654 *seqnum = *(uint64_t *)data.dptr;
656 done:
657 talloc_free(mem_ctx);
658 return ret;
662 * Get the sequence number of a persistent database.
664 int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
665 TDB_DATA indata,
666 TDB_DATA *outdata)
668 uint32_t db_id;
669 int32_t ret;
670 uint64_t seqnum;
672 db_id = *(uint32_t *)indata.dptr;
673 ret = ctdb_get_db_seqnum(ctdb, db_id, &seqnum);
674 if (ret != 0) {
675 goto done;
678 outdata->dsize = sizeof(uint64_t);
679 outdata->dptr = (uint8_t *)talloc_zero(outdata, uint64_t);
680 if (outdata->dptr == NULL) {
681 ret = -1;
682 goto done;
685 *(outdata->dptr) = seqnum;
687 done:
688 return ret;