ldb_tdb: Use mem_ctx and so avoid leak onto long-term memory on duplicated add.
[Samba.git] / ctdb / server / ctdb_persistent.c
blobfc2865527474c58119955a9234de810b749cab96
1 /*
2 persistent store logic
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/filesys.h"
23 #include "system/network.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/tdb_wrap/tdb_wrap.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
34 #include "ctdb_private.h"
36 #include "common/reqid.h"
37 #include "common/common.h"
38 #include "common/logging.h"
40 struct ctdb_persistent_state {
41 struct ctdb_context *ctdb;
42 struct ctdb_db_context *ctdb_db; /* used by trans3_commit */
43 struct ctdb_client *client; /* used by trans3_commit */
44 struct ctdb_req_control_old *c;
45 const char *errormsg;
46 uint32_t num_pending;
47 int32_t status;
48 uint32_t num_failed, num_sent;
52 1) all nodes fail, and all nodes reply
53 2) some nodes fail, all nodes reply
54 3) some nodes timeout
55 4) all nodes succeed
59 called when a node has acknowledged a ctdb_control_update_record call
61 static void ctdb_persistent_callback(struct ctdb_context *ctdb,
62 int32_t status, TDB_DATA data,
63 const char *errormsg,
64 void *private_data)
66 struct ctdb_persistent_state *state = talloc_get_type(private_data,
67 struct ctdb_persistent_state);
69 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
70 DEBUG(DEBUG_INFO, ("ctdb_persistent_callback: ignoring reply "
71 "during recovery\n"));
72 return;
75 if (status != 0) {
76 DEBUG(DEBUG_ERR,("ctdb_persistent_callback failed with status %d (%s)\n",
77 status, errormsg?errormsg:"no error message given"));
78 state->status = status;
79 state->errormsg = errormsg;
80 state->num_failed++;
83 * If a node failed to complete the update_record control,
84 * then either a recovery is already running or something
85 * bad is going on. So trigger a recovery and let the
86 * recovery finish the transaction, sending back the reply
87 * for the trans3_commit control to the client.
89 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
90 return;
93 state->num_pending--;
95 if (state->num_pending != 0) {
96 return;
99 ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, state->errormsg);
100 talloc_free(state);
104 called if persistent store times out
106 static void ctdb_persistent_store_timeout(struct tevent_context *ev,
107 struct tevent_timer *te,
108 struct timeval t, void *private_data)
110 struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state);
112 if (state->ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
113 DEBUG(DEBUG_INFO, ("ctdb_persistent_store_timeout: ignoring "
114 "timeout during recovery\n"));
115 return;
118 ctdb_request_control_reply(state->ctdb, state->c, NULL, 1,
119 "timeout in ctdb_persistent_state");
121 talloc_free(state);
125 * Finish pending trans3 commit controls, i.e. send
126 * reply to the client. This is called by the end-recovery
127 * control to fix the situation when a recovery interrupts
128 * the usual progress of a transaction.
130 void ctdb_persistent_finish_trans3_commits(struct ctdb_context *ctdb)
132 struct ctdb_db_context *ctdb_db;
134 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
135 DEBUG(DEBUG_INFO, ("ctdb_persistent_finish_trans3_commits: "
136 "skipping execution when recovery is "
137 "active\n"));
138 return;
141 for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
142 struct ctdb_persistent_state *state;
144 if (ctdb_db->persistent_state == NULL) {
145 continue;
148 state = ctdb_db->persistent_state;
150 ctdb_request_control_reply(ctdb, state->c, NULL, 2,
151 "trans3 commit ended by recovery");
153 /* The destructor sets ctdb_db->persistent_state to NULL. */
154 talloc_free(state);
158 static int ctdb_persistent_state_destructor(struct ctdb_persistent_state *state)
160 if (state->client != NULL) {
161 state->client->db_id = 0;
164 if (state->ctdb_db != NULL) {
165 state->ctdb_db->persistent_state = NULL;
168 return 0;
172 * Store a set of persistent records.
173 * This is used to roll out a transaction to all nodes.
175 int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb,
176 struct ctdb_req_control_old *c,
177 TDB_DATA recdata, bool *async_reply)
179 struct ctdb_client *client;
180 struct ctdb_persistent_state *state;
181 int i;
182 struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
183 struct ctdb_db_context *ctdb_db;
185 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
186 DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans3_commit when recovery active\n"));
187 return -1;
190 client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
191 if (client == NULL) {
192 DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store "
193 "to a client. Returning error\n"));
194 return -1;
197 if (client->db_id != 0) {
198 DEBUG(DEBUG_ERR,(__location__ " ERROR: trans3_commit: "
199 "client-db_id[0x%08x] != 0 "
200 "(client_id[0x%08x]): trans3_commit active?\n",
201 client->db_id, client->client_id));
202 return -1;
205 ctdb_db = find_ctdb_db(ctdb, m->db_id);
206 if (ctdb_db == NULL) {
207 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans3_commit: "
208 "Unknown database db_id[0x%08x]\n", m->db_id));
209 return -1;
212 if (ctdb_db->persistent_state != NULL) {
213 DEBUG(DEBUG_ERR, (__location__ " Error: "
214 "ctdb_control_trans3_commit "
215 "called while a transaction commit is "
216 "active. db_id[0x%08x]\n", m->db_id));
217 return -1;
220 ctdb_db->persistent_state = talloc_zero(ctdb_db,
221 struct ctdb_persistent_state);
222 CTDB_NO_MEMORY(ctdb, ctdb_db->persistent_state);
224 client->db_id = m->db_id;
226 state = ctdb_db->persistent_state;
227 state->ctdb = ctdb;
228 state->ctdb_db = ctdb_db;
229 state->c = c;
230 state->client = client;
232 talloc_set_destructor(state, ctdb_persistent_state_destructor);
234 for (i = 0; i < ctdb->vnn_map->size; i++) {
235 struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
236 int ret;
238 /* only send to active nodes */
239 if (node->flags & NODE_FLAGS_INACTIVE) {
240 continue;
243 ret = ctdb_daemon_send_control(ctdb, node->pnn, 0,
244 CTDB_CONTROL_UPDATE_RECORD,
245 c->client_id, 0, recdata,
246 ctdb_persistent_callback,
247 state);
248 if (ret == -1) {
249 DEBUG(DEBUG_ERR,("Unable to send "
250 "CTDB_CONTROL_UPDATE_RECORD "
251 "to pnn %u\n", node->pnn));
252 talloc_free(state);
253 return -1;
256 state->num_pending++;
257 state->num_sent++;
260 if (state->num_pending == 0) {
261 talloc_free(state);
262 return 0;
265 /* we need to wait for the replies */
266 *async_reply = true;
268 /* need to keep the control structure around */
269 talloc_steal(state, c);
271 /* but we won't wait forever */
272 tevent_add_timer(ctdb->ev, state,
273 timeval_current_ofs(ctdb->tunable.control_timeout, 0),
274 ctdb_persistent_store_timeout, state);
276 return 0;
281 backwards compatibility:
283 start a persistent store operation. passing both the key, header and
284 data to the daemon. If the client disconnects before it has issued
285 a persistent_update call to the daemon we trigger a full recovery
286 to ensure the databases are brought back in sync.
287 for now we ignore the recdata that the client has passed to us.
289 int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
290 struct ctdb_req_control_old *c,
291 TDB_DATA recdata)
293 struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
295 if (client == NULL) {
296 DEBUG(DEBUG_ERR,(__location__ " can not match start_persistent_update to a client. Returning error\n"));
297 return -1;
300 client->num_persistent_updates++;
302 return 0;
306 backwards compatibility:
308 called to tell ctdbd that it is no longer doing a persistent update
310 int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
311 struct ctdb_req_control_old *c,
312 TDB_DATA recdata)
314 struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
316 if (client == NULL) {
317 DEBUG(DEBUG_ERR,(__location__ " can not match cancel_persistent_update to a client. Returning error\n"));
318 return -1;
321 if (client->num_persistent_updates > 0) {
322 client->num_persistent_updates--;
325 return 0;
328 static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
329 uint32_t db_id,
330 uint64_t *seqnum)
332 int32_t ret;
333 struct ctdb_db_context *ctdb_db;
334 const char *keyname = CTDB_DB_SEQNUM_KEY;
335 TDB_DATA key;
336 TDB_DATA data;
337 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
338 struct ctdb_ltdb_header header;
340 ctdb_db = find_ctdb_db(ctdb, db_id);
341 if (!ctdb_db) {
342 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
343 ret = -1;
344 goto done;
347 if (! ctdb_db_allow_access(ctdb_db)) {
348 ret = -1;
349 goto done;
352 key.dptr = (uint8_t *)discard_const(keyname);
353 key.dsize = strlen(keyname) + 1;
355 ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, &header, mem_ctx, &data);
356 if (ret != 0) {
357 goto done;
360 if (data.dsize != sizeof(uint64_t)) {
361 *seqnum = 0;
362 goto done;
365 *seqnum = *(uint64_t *)data.dptr;
367 done:
368 talloc_free(mem_ctx);
369 return ret;
373 * Get the sequence number of a persistent database.
375 int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
376 TDB_DATA indata,
377 TDB_DATA *outdata)
379 uint32_t db_id;
380 int32_t ret;
381 uint64_t seqnum;
383 db_id = *(uint32_t *)indata.dptr;
384 ret = ctdb_get_db_seqnum(ctdb, db_id, &seqnum);
385 if (ret != 0) {
386 goto done;
389 outdata->dsize = sizeof(uint64_t);
390 outdata->dptr = talloc_memdup(outdata, &seqnum, sizeof(uint64_t));
391 if (outdata->dptr == NULL) {
392 ret = -1;
395 done:
396 return ret;