s4-dsdb/reanimate: Use 'show deleted' control in modify operations too
[Samba.git] / lib / ntdb / ntdb.c
blob51fbbcae64d3065d322cba0f12b021cc1645bd47
1 /*
2 Trivial Database 2: fetch, store and misc routines.
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 #include "private.h"
19 #ifndef HAVE_LIBREPLACE
20 #include <stdarg.h>
21 #endif
23 static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb,
24 ntdb_off_t off,
25 ntdb_len_t keylen,
26 ntdb_len_t datalen,
27 struct ntdb_used_record *rec)
29 uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
30 enum NTDB_ERROR ecode;
32 ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen,
33 keylen + dataroom);
34 if (ecode == NTDB_SUCCESS) {
35 ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec));
37 return ecode;
40 static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb,
41 struct hash_info *h,
42 NTDB_DATA key, NTDB_DATA dbuf,
43 ntdb_off_t old_off, ntdb_len_t old_room,
44 bool growing)
46 ntdb_off_t new_off;
47 enum NTDB_ERROR ecode;
49 /* Allocate a new record. */
50 new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing);
51 if (NTDB_OFF_IS_ERR(new_off)) {
52 return NTDB_OFF_TO_ERR(new_off);
55 /* We didn't like the existing one: remove it. */
56 if (old_off) {
57 ntdb->stats.frees++;
58 ecode = add_free_record(ntdb, old_off,
59 sizeof(struct ntdb_used_record)
60 + key.dsize + old_room,
61 NTDB_LOCK_WAIT, true);
62 if (ecode == NTDB_SUCCESS)
63 ecode = replace_in_hash(ntdb, h, new_off);
64 } else {
65 ecode = add_to_hash(ntdb, h, new_off);
67 if (ecode != NTDB_SUCCESS) {
68 return ecode;
71 new_off += sizeof(struct ntdb_used_record);
72 ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize);
73 if (ecode != NTDB_SUCCESS) {
74 return ecode;
77 new_off += key.dsize;
78 ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize);
79 if (ecode != NTDB_SUCCESS) {
80 return ecode;
83 if (ntdb->flags & NTDB_SEQNUM)
84 ntdb_inc_seqnum(ntdb);
86 return NTDB_SUCCESS;
89 static enum NTDB_ERROR update_data(struct ntdb_context *ntdb,
90 ntdb_off_t off,
91 NTDB_DATA dbuf,
92 ntdb_len_t extra)
94 enum NTDB_ERROR ecode;
96 ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize);
97 if (ecode == NTDB_SUCCESS && extra) {
98 /* Put a zero in; future versions may append other data. */
99 ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1);
101 if (ntdb->flags & NTDB_SEQNUM)
102 ntdb_inc_seqnum(ntdb);
104 return ecode;
107 _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
108 NTDB_DATA key, NTDB_DATA dbuf, int flag)
110 struct hash_info h;
111 ntdb_off_t off;
112 ntdb_len_t old_room = 0;
113 struct ntdb_used_record rec;
114 enum NTDB_ERROR ecode;
116 off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
117 if (NTDB_OFF_IS_ERR(off)) {
118 return NTDB_OFF_TO_ERR(off);
121 /* Now we have lock on this hash bucket. */
122 if (flag == NTDB_INSERT) {
123 if (off) {
124 ecode = NTDB_ERR_EXISTS;
125 goto out;
127 } else {
128 if (off) {
129 old_room = rec_data_length(&rec)
130 + rec_extra_padding(&rec);
131 if (old_room >= dbuf.dsize) {
132 /* Can modify in-place. Easy! */
133 ecode = update_rec_hdr(ntdb, off,
134 key.dsize, dbuf.dsize,
135 &rec);
136 if (ecode != NTDB_SUCCESS) {
137 goto out;
139 ecode = update_data(ntdb,
140 off + sizeof(rec)
141 + key.dsize, dbuf,
142 old_room - dbuf.dsize);
143 if (ecode != NTDB_SUCCESS) {
144 goto out;
146 ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
147 return NTDB_SUCCESS;
149 } else {
150 if (flag == NTDB_MODIFY) {
151 /* if the record doesn't exist and we
152 are in NTDB_MODIFY mode then we should fail
153 the store */
154 ecode = NTDB_ERR_NOEXIST;
155 goto out;
160 /* If we didn't use the old record, this implies we're growing. */
161 ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off);
162 out:
163 ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
164 return ecode;
167 _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
168 NTDB_DATA key, NTDB_DATA dbuf)
170 struct hash_info h;
171 ntdb_off_t off;
172 struct ntdb_used_record rec;
173 ntdb_len_t old_room = 0, old_dlen;
174 unsigned char *newdata;
175 NTDB_DATA new_dbuf;
176 enum NTDB_ERROR ecode;
178 off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
179 if (NTDB_OFF_IS_ERR(off)) {
180 return NTDB_OFF_TO_ERR(off);
183 if (off) {
184 old_dlen = rec_data_length(&rec);
185 old_room = old_dlen + rec_extra_padding(&rec);
187 /* Fast path: can append in place. */
188 if (rec_extra_padding(&rec) >= dbuf.dsize) {
189 ecode = update_rec_hdr(ntdb, off, key.dsize,
190 old_dlen + dbuf.dsize, &rec);
191 if (ecode != NTDB_SUCCESS) {
192 goto out;
195 off += sizeof(rec) + key.dsize + old_dlen;
196 ecode = update_data(ntdb, off, dbuf,
197 rec_extra_padding(&rec));
198 goto out;
201 /* Slow path. */
202 newdata = ntdb->alloc_fn(ntdb, key.dsize + old_dlen + dbuf.dsize,
203 ntdb->alloc_data);
204 if (!newdata) {
205 ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
206 "ntdb_append:"
207 " failed to allocate %zu bytes",
208 (size_t)(key.dsize + old_dlen
209 + dbuf.dsize));
210 goto out;
212 ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize,
213 newdata, old_dlen);
214 if (ecode != NTDB_SUCCESS) {
215 goto out_free_newdata;
217 memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
218 new_dbuf.dptr = newdata;
219 new_dbuf.dsize = old_dlen + dbuf.dsize;
220 } else {
221 newdata = NULL;
222 new_dbuf = dbuf;
225 /* If they're using ntdb_append(), it implies they're growing record. */
226 ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true);
228 out_free_newdata:
229 ntdb->free_fn(newdata, ntdb->alloc_data);
230 out:
231 ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
232 return ecode;
235 _PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
236 NTDB_DATA *data)
238 ntdb_off_t off;
239 struct ntdb_used_record rec;
240 struct hash_info h;
241 enum NTDB_ERROR ecode;
242 const char *keyp;
244 off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
245 if (NTDB_OFF_IS_ERR(off)) {
246 return NTDB_OFF_TO_ERR(off);
249 if (!off) {
250 ecode = NTDB_ERR_NOEXIST;
251 } else {
252 data->dsize = rec_data_length(&rec);
253 data->dptr = ntdb->alloc_fn(ntdb, data->dsize, ntdb->alloc_data);
254 if (unlikely(!data->dptr)) {
255 ecode = NTDB_ERR_OOM;
256 } else {
257 memcpy(data->dptr, keyp + key.dsize, data->dsize);
258 ecode = NTDB_SUCCESS;
260 ntdb_access_release(ntdb, keyp);
263 ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
264 return ecode;
267 _PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key)
269 ntdb_off_t off;
270 struct ntdb_used_record rec;
271 struct hash_info h;
273 off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
274 if (NTDB_OFF_IS_ERR(off)) {
275 return false;
277 ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
279 return off ? true : false;
282 _PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
284 ntdb_off_t off;
285 struct ntdb_used_record rec;
286 struct hash_info h;
287 enum NTDB_ERROR ecode;
289 off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
290 if (NTDB_OFF_IS_ERR(off)) {
291 return NTDB_OFF_TO_ERR(off);
294 if (!off) {
295 ecode = NTDB_ERR_NOEXIST;
296 goto unlock;
299 ecode = delete_from_hash(ntdb, &h);
300 if (ecode != NTDB_SUCCESS) {
301 goto unlock;
304 /* Free the deleted entry. */
305 ntdb->stats.frees++;
306 ecode = add_free_record(ntdb, off,
307 sizeof(struct ntdb_used_record)
308 + rec_key_length(&rec)
309 + rec_data_length(&rec)
310 + rec_extra_padding(&rec),
311 NTDB_LOCK_WAIT, true);
313 if (ntdb->flags & NTDB_SEQNUM)
314 ntdb_inc_seqnum(ntdb);
316 unlock:
317 ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
318 return ecode;
321 _PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb)
323 return ntdb->flags;
326 static bool inside_transaction(const struct ntdb_context *ntdb)
328 return ntdb->transaction != NULL;
331 static bool readonly_changable(struct ntdb_context *ntdb, const char *caller)
333 if (inside_transaction(ntdb)) {
334 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
335 "%s: can't change"
336 " NTDB_RDONLY inside transaction",
337 caller);
338 return false;
340 return true;
343 _PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag)
345 if (ntdb->flags & NTDB_INTERNAL) {
346 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
347 "ntdb_add_flag: internal db");
348 return;
350 switch (flag) {
351 case NTDB_NOLOCK:
352 ntdb->flags |= NTDB_NOLOCK;
353 break;
354 case NTDB_NOMMAP:
355 if (ntdb->file->direct_count) {
356 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
357 "ntdb_add_flag: Can't get NTDB_NOMMAP from"
358 " ntdb_parse_record!");
359 return;
361 ntdb->flags |= NTDB_NOMMAP;
362 #ifndef HAVE_INCOHERENT_MMAP
363 ntdb_munmap(ntdb);
364 #endif
365 break;
366 case NTDB_NOSYNC:
367 ntdb->flags |= NTDB_NOSYNC;
368 break;
369 case NTDB_SEQNUM:
370 ntdb->flags |= NTDB_SEQNUM;
371 break;
372 case NTDB_ALLOW_NESTING:
373 ntdb->flags |= NTDB_ALLOW_NESTING;
374 break;
375 case NTDB_RDONLY:
376 if (readonly_changable(ntdb, "ntdb_add_flag"))
377 ntdb->flags |= NTDB_RDONLY;
378 break;
379 default:
380 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
381 "ntdb_add_flag: Unknown flag %u", flag);
385 _PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag)
387 if (ntdb->flags & NTDB_INTERNAL) {
388 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
389 "ntdb_remove_flag: internal db");
390 return;
392 switch (flag) {
393 case NTDB_NOLOCK:
394 ntdb->flags &= ~NTDB_NOLOCK;
395 break;
396 case NTDB_NOMMAP:
397 ntdb->flags &= ~NTDB_NOMMAP;
398 #ifndef HAVE_INCOHERENT_MMAP
399 /* If mmap incoherent, we were mmaping anyway. */
400 ntdb_mmap(ntdb);
401 #endif
402 break;
403 case NTDB_NOSYNC:
404 ntdb->flags &= ~NTDB_NOSYNC;
405 break;
406 case NTDB_SEQNUM:
407 ntdb->flags &= ~NTDB_SEQNUM;
408 break;
409 case NTDB_ALLOW_NESTING:
410 ntdb->flags &= ~NTDB_ALLOW_NESTING;
411 break;
412 case NTDB_RDONLY:
413 if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) {
414 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
415 "ntdb_remove_flag: can't"
416 " remove NTDB_RDONLY on ntdb"
417 " opened with O_RDONLY");
418 break;
420 if (readonly_changable(ntdb, "ntdb_remove_flag"))
421 ntdb->flags &= ~NTDB_RDONLY;
422 break;
423 default:
424 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
425 "ntdb_remove_flag: Unknown flag %u",
426 flag);
430 _PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode)
432 /* Gcc warns if you miss a case in the switch, so use that. */
433 switch (NTDB_ERR_TO_OFF(ecode)) {
434 case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success";
435 case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database";
436 case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error";
437 case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error";
438 case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory";
439 case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists";
440 case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter";
441 case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist";
442 case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted";
444 return "Invalid error code";
447 enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb,
448 enum NTDB_ERROR ecode,
449 enum ntdb_log_level level,
450 const char *fmt, ...)
452 char *message;
453 va_list ap;
454 size_t len;
455 /* ntdb_open paths care about errno, so save it. */
456 int saved_errno = errno;
458 if (!ntdb->log_fn)
459 return ecode;
461 va_start(ap, fmt);
462 len = vsnprintf(NULL, 0, fmt, ap);
463 va_end(ap);
465 message = ntdb->alloc_fn(ntdb, len + 1, ntdb->alloc_data);
466 if (!message) {
467 ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM,
468 "out of memory formatting message:", ntdb->log_data);
469 ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data);
470 } else {
471 va_start(ap, fmt);
472 vsnprintf(message, len+1, fmt, ap);
473 va_end(ap);
474 ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data);
475 ntdb->free_fn(message, ntdb->alloc_data);
477 errno = saved_errno;
478 return ecode;
481 _PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
482 NTDB_DATA key,
483 enum NTDB_ERROR (*parse)(NTDB_DATA k,
484 NTDB_DATA d,
485 void *data),
486 void *data)
488 ntdb_off_t off;
489 struct ntdb_used_record rec;
490 struct hash_info h;
491 enum NTDB_ERROR ecode;
492 const char *keyp;
494 off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
495 if (NTDB_OFF_IS_ERR(off)) {
496 return NTDB_OFF_TO_ERR(off);
499 if (!off) {
500 ecode = NTDB_ERR_NOEXIST;
501 } else {
502 unsigned int old_flags;
503 NTDB_DATA d = ntdb_mkdata(keyp + key.dsize,
504 rec_data_length(&rec));
507 * Make sure they don't try to write db, since they
508 * have read lock! They can if they've done
509 * ntdb_lockall(): if it was ntdb_lockall_read, that'll
510 * stop them doing a write operation anyway.
512 old_flags = ntdb->flags;
513 if (!ntdb->file->allrecord_lock.count &&
514 !(ntdb->flags & NTDB_NOLOCK)) {
515 ntdb->flags |= NTDB_RDONLY;
517 ecode = parse(key, d, data);
518 ntdb->flags = old_flags;
519 ntdb_access_release(ntdb, keyp);
522 ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
523 return ecode;
526 _PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb)
528 return ntdb->name;
531 _PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb)
533 return ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
537 _PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb)
539 return ntdb->file->fd;
542 struct traverse_state {
543 enum NTDB_ERROR error;
544 struct ntdb_context *dest_db;
548 traverse function for repacking
550 static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data,
551 struct traverse_state *state)
553 state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT);
554 if (state->error != NTDB_SUCCESS) {
555 return -1;
557 return 0;
560 _PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb)
562 struct ntdb_context *tmp_db;
563 struct traverse_state state;
565 state.error = ntdb_transaction_start(ntdb);
566 if (state.error != NTDB_SUCCESS) {
567 return state.error;
570 tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
571 if (tmp_db == NULL) {
572 state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
573 __location__
574 " Failed to create tmp_db");
575 ntdb_transaction_cancel(ntdb);
576 return state.error;
579 state.dest_db = tmp_db;
580 if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) {
581 goto fail;
584 state.error = ntdb_wipe_all(ntdb);
585 if (state.error != NTDB_SUCCESS) {
586 goto fail;
589 state.dest_db = ntdb;
590 if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) {
591 goto fail;
594 ntdb_close(tmp_db);
595 return ntdb_transaction_commit(ntdb);
597 fail:
598 ntdb_transaction_cancel(ntdb);
599 ntdb_close(tmp_db);
600 return state.error;