tdb: Remove "header" from tdb_context
[Samba/gebeck_regimport.git] / lib / tdb / common / tdb.c
bloba2ae187f57720f477877d258739d008cf1d3dc65
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
34 the TDB_SEQNUM flag
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
38 tdb_off_t seqnum=0;
40 if (!(tdb->flags & TDB_SEQNUM)) {
41 return;
44 /* we ignore errors from this, as we have no sane way of
45 dealing with them.
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
48 seqnum++;
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
54 the TDB_SEQNUM flag
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
59 return;
62 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
63 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
64 return;
67 tdb_increment_seqnum_nonblock(tdb);
69 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
72 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
74 return memcmp(data.dptr, key.dptr, data.dsize);
77 /* Returns 0 on fail. On success, return offset of record, and fills
78 in rec */
79 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
80 struct tdb_record *r)
82 tdb_off_t rec_ptr;
84 /* read in the hash top */
85 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
86 return 0;
88 /* keep looking until we find the right record */
89 while (rec_ptr) {
90 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
91 return 0;
93 if (!TDB_DEAD(r) && hash==r->full_hash
94 && key.dsize==r->key_len
95 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
96 r->key_len, tdb_key_compare,
97 NULL) == 0) {
98 return rec_ptr;
100 /* detect tight infinite loop */
101 if (rec_ptr == r->next) {
102 tdb->ecode = TDB_ERR_CORRUPT;
103 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
104 return 0;
106 rec_ptr = r->next;
108 tdb->ecode = TDB_ERR_NOEXIST;
109 return 0;
112 /* As tdb_find, but if you succeed, keep the lock */
113 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
114 struct tdb_record *rec)
116 uint32_t rec_ptr;
118 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
119 return 0;
120 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
121 tdb_unlock(tdb, BUCKET(hash), locktype);
122 return rec_ptr;
125 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
127 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
129 TDB_DATA *dbuf = (TDB_DATA *)private_data;
131 if (dbuf->dsize != data.dsize) {
132 return -1;
134 if (memcmp(dbuf->dptr, data.dptr, data.dsize) != 0) {
135 return -1;
137 return 0;
140 /* update an entry in place - this only works if the new data size
141 is <= the old data size and the key exists.
142 on failure return -1.
144 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
146 struct tdb_record rec;
147 tdb_off_t rec_ptr;
149 /* find entry */
150 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
151 return -1;
153 /* it could be an exact duplicate of what is there - this is
154 * surprisingly common (eg. with a ldb re-index). */
155 if (rec.key_len == key.dsize &&
156 rec.data_len == dbuf.dsize &&
157 rec.full_hash == hash &&
158 tdb_parse_record(tdb, key, tdb_update_hash_cmp, &dbuf) == 0) {
159 return 0;
162 /* must be long enough key, data and tailer */
163 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
164 tdb->ecode = TDB_SUCCESS; /* Not really an error */
165 return -1;
168 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
169 dbuf.dptr, dbuf.dsize) == -1)
170 return -1;
172 if (dbuf.dsize != rec.data_len) {
173 /* update size */
174 rec.data_len = dbuf.dsize;
175 return tdb_rec_write(tdb, rec_ptr, &rec);
178 return 0;
181 /* find an entry in the database given a key */
182 /* If an entry doesn't exist tdb_err will be set to
183 * TDB_ERR_NOEXIST. If a key has no data attached
184 * then the TDB_DATA will have zero length but
185 * a non-zero pointer
187 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
189 tdb_off_t rec_ptr;
190 struct tdb_record rec;
191 TDB_DATA ret;
192 uint32_t hash;
194 /* find which hash bucket it is in */
195 hash = tdb->hash_fn(&key);
196 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
197 return tdb_null;
199 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
200 rec.data_len);
201 ret.dsize = rec.data_len;
202 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
203 return ret;
206 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
208 TDB_DATA ret = _tdb_fetch(tdb, key);
210 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
211 return ret;
215 * Find an entry in the database and hand the record's data to a parsing
216 * function. The parsing function is executed under the chain read lock, so it
217 * should be fast and should not block on other syscalls.
219 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
221 * For mmapped tdb's that do not have a transaction open it points the parsing
222 * function directly at the mmap area, it avoids the malloc/memcpy in this
223 * case. If a transaction is open or no mmap is available, it has to do
224 * malloc/read/parse/free.
226 * This is interesting for all readers of potentially large data structures in
227 * the tdb records, ldb indexes being one example.
229 * Return -1 if the record was not found.
232 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
233 int (*parser)(TDB_DATA key, TDB_DATA data,
234 void *private_data),
235 void *private_data)
237 tdb_off_t rec_ptr;
238 struct tdb_record rec;
239 int ret;
240 uint32_t hash;
242 /* find which hash bucket it is in */
243 hash = tdb->hash_fn(&key);
245 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
246 /* record not found */
247 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
248 tdb->ecode = TDB_ERR_NOEXIST;
249 return -1;
251 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
253 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
254 rec.data_len, parser, private_data);
256 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
258 return ret;
261 /* check if an entry in the database exists
263 note that 1 is returned if the key is found and 0 is returned if not found
264 this doesn't match the conventions in the rest of this module, but is
265 compatible with gdbm
267 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
269 struct tdb_record rec;
271 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
272 return 0;
273 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
274 return 1;
277 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
279 uint32_t hash = tdb->hash_fn(&key);
280 int ret;
282 ret = tdb_exists_hash(tdb, key, hash);
283 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
284 return ret;
287 /* actually delete an entry in the database given the offset */
288 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
290 tdb_off_t last_ptr, i;
291 struct tdb_record lastrec;
293 if (tdb->read_only || tdb->traverse_read) return -1;
295 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
296 tdb_write_lock_record(tdb, rec_ptr) == -1) {
297 /* Someone traversing here: mark it as dead */
298 rec->magic = TDB_DEAD_MAGIC;
299 return tdb_rec_write(tdb, rec_ptr, rec);
301 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
302 return -1;
304 /* find previous record in hash chain */
305 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
306 return -1;
307 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
308 if (tdb_rec_read(tdb, i, &lastrec) == -1)
309 return -1;
311 /* unlink it: next ptr is at start of record. */
312 if (last_ptr == 0)
313 last_ptr = TDB_HASH_TOP(rec->full_hash);
314 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
315 return -1;
317 /* recover the space */
318 if (tdb_free(tdb, rec_ptr, rec) == -1)
319 return -1;
320 return 0;
323 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
325 int res = 0;
326 tdb_off_t rec_ptr;
327 struct tdb_record rec;
329 /* read in the hash top */
330 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
331 return 0;
333 while (rec_ptr) {
334 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
335 return 0;
337 if (rec.magic == TDB_DEAD_MAGIC) {
338 res += 1;
340 rec_ptr = rec.next;
342 return res;
346 * Purge all DEAD records from a hash chain
348 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
350 int res = -1;
351 struct tdb_record rec;
352 tdb_off_t rec_ptr;
354 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
355 return -1;
358 /* read in the hash top */
359 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
360 goto fail;
362 while (rec_ptr) {
363 tdb_off_t next;
365 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
366 goto fail;
369 next = rec.next;
371 if (rec.magic == TDB_DEAD_MAGIC
372 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
373 goto fail;
375 rec_ptr = next;
377 res = 0;
378 fail:
379 tdb_unlock(tdb, -1, F_WRLCK);
380 return res;
383 /* delete an entry in the database given a key */
384 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
386 tdb_off_t rec_ptr;
387 struct tdb_record rec;
388 int ret;
390 if (tdb->max_dead_records != 0) {
393 * Allow for some dead records per hash chain, mainly for
394 * tdb's with a very high create/delete rate like locking.tdb.
397 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
398 return -1;
400 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
402 * Don't let the per-chain freelist grow too large,
403 * delete all existing dead records
405 tdb_purge_dead(tdb, hash);
408 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
409 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
410 return -1;
414 * Just mark the record as dead.
416 rec.magic = TDB_DEAD_MAGIC;
417 ret = tdb_rec_write(tdb, rec_ptr, &rec);
419 else {
420 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
421 &rec)))
422 return -1;
424 ret = tdb_do_delete(tdb, rec_ptr, &rec);
427 if (ret == 0) {
428 tdb_increment_seqnum(tdb);
431 if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
432 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
433 return ret;
436 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
438 uint32_t hash = tdb->hash_fn(&key);
439 int ret;
441 ret = tdb_delete_hash(tdb, key, hash);
442 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
443 return ret;
447 * See if we have a dead record around with enough space
449 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
450 struct tdb_record *r, tdb_len_t length)
452 tdb_off_t rec_ptr;
454 /* read in the hash top */
455 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
456 return 0;
458 /* keep looking until we find the right record */
459 while (rec_ptr) {
460 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
461 return 0;
463 if (TDB_DEAD(r) && r->rec_len >= length) {
465 * First fit for simple coding, TODO: change to best
466 * fit
468 return rec_ptr;
470 rec_ptr = r->next;
472 return 0;
475 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
476 TDB_DATA dbuf, int flag, uint32_t hash)
478 struct tdb_record rec;
479 tdb_off_t rec_ptr;
480 int ret = -1;
482 /* check for it existing, on insert. */
483 if (flag == TDB_INSERT) {
484 if (tdb_exists_hash(tdb, key, hash)) {
485 tdb->ecode = TDB_ERR_EXISTS;
486 goto fail;
488 } else {
489 /* first try in-place update, on modify or replace. */
490 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
491 goto done;
493 if (tdb->ecode == TDB_ERR_NOEXIST &&
494 flag == TDB_MODIFY) {
495 /* if the record doesn't exist and we are in TDB_MODIFY mode then
496 we should fail the store */
497 goto fail;
500 /* reset the error code potentially set by the tdb_update() */
501 tdb->ecode = TDB_SUCCESS;
503 /* delete any existing record - if it doesn't exist we don't
504 care. Doing this first reduces fragmentation, and avoids
505 coalescing with `allocated' block before it's updated. */
506 if (flag != TDB_INSERT)
507 tdb_delete_hash(tdb, key, hash);
509 if (tdb->max_dead_records != 0) {
511 * Allow for some dead records per hash chain, look if we can
512 * find one that can hold the new record. We need enough space
513 * for key, data and tailer. If we find one, we don't have to
514 * consult the central freelist.
516 rec_ptr = tdb_find_dead(
517 tdb, hash, &rec,
518 key.dsize + dbuf.dsize + sizeof(tdb_off_t));
520 if (rec_ptr != 0) {
521 rec.key_len = key.dsize;
522 rec.data_len = dbuf.dsize;
523 rec.full_hash = hash;
524 rec.magic = TDB_MAGIC;
525 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
526 || tdb->methods->tdb_write(
527 tdb, rec_ptr + sizeof(rec),
528 key.dptr, key.dsize) == -1
529 || tdb->methods->tdb_write(
530 tdb, rec_ptr + sizeof(rec) + key.dsize,
531 dbuf.dptr, dbuf.dsize) == -1) {
532 goto fail;
534 goto done;
539 * We have to allocate some space from the freelist, so this means we
540 * have to lock it. Use the chance to purge all the DEAD records from
541 * the hash chain under the freelist lock.
544 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
545 goto fail;
548 if ((tdb->max_dead_records != 0)
549 && (tdb_purge_dead(tdb, hash) == -1)) {
550 tdb_unlock(tdb, -1, F_WRLCK);
551 goto fail;
554 /* we have to allocate some space */
555 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
557 tdb_unlock(tdb, -1, F_WRLCK);
559 if (rec_ptr == 0) {
560 goto fail;
563 /* Read hash top into next ptr */
564 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
565 goto fail;
567 rec.key_len = key.dsize;
568 rec.data_len = dbuf.dsize;
569 rec.full_hash = hash;
570 rec.magic = TDB_MAGIC;
572 /* write out and point the top of the hash chain at it */
573 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
574 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec),
575 key.dptr, key.dsize) == -1
576 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize,
577 dbuf.dptr, dbuf.dsize) == -1
578 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
579 /* Need to tdb_unallocate() here */
580 goto fail;
583 done:
584 ret = 0;
585 fail:
586 if (ret == 0) {
587 tdb_increment_seqnum(tdb);
589 return ret;
592 /* store an element in the database, replacing any existing element
593 with the same key
595 return 0 on success, -1 on failure
597 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
599 uint32_t hash;
600 int ret;
602 if (tdb->read_only || tdb->traverse_read) {
603 tdb->ecode = TDB_ERR_RDONLY;
604 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
605 return -1;
608 /* find which hash bucket it is in */
609 hash = tdb->hash_fn(&key);
610 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
611 return -1;
613 ret = _tdb_store(tdb, key, dbuf, flag, hash);
614 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
615 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
616 return ret;
619 /* Append to an entry. Create if not exist. */
620 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
622 uint32_t hash;
623 TDB_DATA dbuf;
624 int ret = -1;
626 /* find which hash bucket it is in */
627 hash = tdb->hash_fn(&key);
628 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
629 return -1;
631 dbuf = _tdb_fetch(tdb, key);
633 if (dbuf.dptr == NULL) {
634 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
635 } else {
636 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
637 unsigned char *new_dptr;
639 /* realloc '0' is special: don't do that. */
640 if (new_len == 0)
641 new_len = 1;
642 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
643 if (new_dptr == NULL) {
644 free(dbuf.dptr);
646 dbuf.dptr = new_dptr;
649 if (dbuf.dptr == NULL) {
650 tdb->ecode = TDB_ERR_OOM;
651 goto failed;
654 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
655 dbuf.dsize += new_dbuf.dsize;
657 ret = _tdb_store(tdb, key, dbuf, 0, hash);
658 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
660 failed:
661 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
662 SAFE_FREE(dbuf.dptr);
663 return ret;
668 return the name of the current tdb file
669 useful for external logging functions
671 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
673 return tdb->name;
677 return the underlying file descriptor being used by tdb, or -1
678 useful for external routines that want to check the device/inode
679 of the fd
681 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
683 return tdb->fd;
687 return the current logging function
688 useful for external tdb routines that wish to log tdb errors
690 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
692 return tdb->log.log_fn;
697 get the tdb sequence number. Only makes sense if the writers opened
698 with TDB_SEQNUM set. Note that this sequence number will wrap quite
699 quickly, so it should only be used for a 'has something changed'
700 test, not for code that relies on the count of the number of changes
701 made. If you want a counter then use a tdb record.
703 The aim of this sequence number is to allow for a very lightweight
704 test of a possible tdb change.
706 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
708 tdb_off_t seqnum=0;
710 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
711 return seqnum;
714 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
716 return tdb->hash_size;
719 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
721 return tdb->map_size;
724 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
726 return tdb->flags;
729 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
731 if ((flags & TDB_ALLOW_NESTING) &&
732 (flags & TDB_DISALLOW_NESTING)) {
733 tdb->ecode = TDB_ERR_NESTING;
734 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
735 "allow_nesting and disallow_nesting are not allowed together!"));
736 return;
739 if (flags & TDB_ALLOW_NESTING) {
740 tdb->flags &= ~TDB_DISALLOW_NESTING;
742 if (flags & TDB_DISALLOW_NESTING) {
743 tdb->flags &= ~TDB_ALLOW_NESTING;
746 tdb->flags |= flags;
749 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
751 if ((flags & TDB_ALLOW_NESTING) &&
752 (flags & TDB_DISALLOW_NESTING)) {
753 tdb->ecode = TDB_ERR_NESTING;
754 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
755 "allow_nesting and disallow_nesting are not allowed together!"));
756 return;
759 if (flags & TDB_ALLOW_NESTING) {
760 tdb->flags |= TDB_DISALLOW_NESTING;
762 if (flags & TDB_DISALLOW_NESTING) {
763 tdb->flags |= TDB_ALLOW_NESTING;
766 tdb->flags &= ~flags;
771 enable sequence number handling on an open tdb
773 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
775 tdb->flags |= TDB_SEQNUM;
780 add a region of the file to the freelist. Length is the size of the region in bytes,
781 which includes the free list header that needs to be added
783 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
785 struct tdb_record rec;
786 if (length <= sizeof(rec)) {
787 /* the region is not worth adding */
788 return 0;
790 if (length + offset > tdb->map_size) {
791 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
792 return -1;
794 memset(&rec,'\0',sizeof(rec));
795 rec.rec_len = length - sizeof(rec);
796 if (tdb_free(tdb, offset, &rec) == -1) {
797 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
798 return -1;
800 return 0;
804 wipe the entire database, deleting all records. This can be done
805 very fast by using a allrecord lock. The entire data portion of the
806 file becomes a single entry in the freelist.
808 This code carefully steps around the recovery area, leaving it alone
810 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
812 int i;
813 tdb_off_t offset = 0;
814 ssize_t data_len;
815 tdb_off_t recovery_head;
816 tdb_len_t recovery_size = 0;
818 if (tdb_lockall(tdb) != 0) {
819 return -1;
822 tdb_trace(tdb, "tdb_wipe_all");
824 /* see if the tdb has a recovery area, and remember its size
825 if so. We don't want to lose this as otherwise each
826 tdb_wipe_all() in a transaction will increase the size of
827 the tdb by the size of the recovery area */
828 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
829 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
830 goto failed;
833 if (recovery_head != 0) {
834 struct tdb_record rec;
835 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
836 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
837 return -1;
839 recovery_size = rec.rec_len + sizeof(rec);
842 /* wipe the hashes */
843 for (i=0;i<tdb->hash_size;i++) {
844 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
845 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
846 goto failed;
850 /* wipe the freelist */
851 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
852 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
853 goto failed;
856 /* add all the rest of the file to the freelist, possibly leaving a gap
857 for the recovery area */
858 if (recovery_size == 0) {
859 /* the simple case - the whole file can be used as a freelist */
860 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
861 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
862 goto failed;
864 } else {
865 /* we need to add two freelist entries - one on either
866 side of the recovery area
868 Note that we cannot shift the recovery area during
869 this operation. Only the transaction.c code may
870 move the recovery area or we risk subtle data
871 corruption
873 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
874 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
875 goto failed;
877 /* and the 2nd free list entry after the recovery area - if any */
878 data_len = tdb->map_size - (recovery_head+recovery_size);
879 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
880 goto failed;
884 tdb_increment_seqnum_nonblock(tdb);
886 if (tdb_unlockall(tdb) != 0) {
887 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
888 goto failed;
891 return 0;
893 failed:
894 tdb_unlockall(tdb);
895 return -1;
898 struct traverse_state {
899 bool error;
900 struct tdb_context *dest_db;
904 traverse function for repacking
906 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
908 struct traverse_state *state = (struct traverse_state *)private_data;
909 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
910 state->error = true;
911 return -1;
913 return 0;
917 repack a tdb
919 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
921 struct tdb_context *tmp_db;
922 struct traverse_state state;
924 tdb_trace(tdb, "tdb_repack");
926 if (tdb_transaction_start(tdb) != 0) {
927 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
928 return -1;
931 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
932 if (tmp_db == NULL) {
933 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
934 tdb_transaction_cancel(tdb);
935 return -1;
938 state.error = false;
939 state.dest_db = tmp_db;
941 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
942 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
943 tdb_transaction_cancel(tdb);
944 tdb_close(tmp_db);
945 return -1;
948 if (state.error) {
949 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
950 tdb_transaction_cancel(tdb);
951 tdb_close(tmp_db);
952 return -1;
955 if (tdb_wipe_all(tdb) != 0) {
956 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
957 tdb_transaction_cancel(tdb);
958 tdb_close(tmp_db);
959 return -1;
962 state.error = false;
963 state.dest_db = tdb;
965 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
966 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
967 tdb_transaction_cancel(tdb);
968 tdb_close(tmp_db);
969 return -1;
972 if (state.error) {
973 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
974 tdb_transaction_cancel(tdb);
975 tdb_close(tmp_db);
976 return -1;
979 tdb_close(tmp_db);
981 if (tdb_transaction_commit(tdb) != 0) {
982 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
983 return -1;
986 return 0;
989 /* Even on files, we can get partial writes due to signals. */
990 bool tdb_write_all(int fd, const void *buf, size_t count)
992 while (count) {
993 ssize_t ret;
994 ret = write(fd, buf, count);
995 if (ret < 0)
996 return false;
997 buf = (const char *)buf + ret;
998 count -= ret;
1000 return true;
1003 #ifdef TDB_TRACE
1004 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1006 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1007 close(tdb->tracefd);
1008 tdb->tracefd = -1;
1012 static void tdb_trace_start(struct tdb_context *tdb)
1014 tdb_off_t seqnum=0;
1015 char msg[sizeof(tdb_off_t) * 4 + 1];
1017 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1018 snprintf(msg, sizeof(msg), "%u ", seqnum);
1019 tdb_trace_write(tdb, msg);
1022 static void tdb_trace_end(struct tdb_context *tdb)
1024 tdb_trace_write(tdb, "\n");
1027 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1029 char msg[sizeof(ret) * 4 + 4];
1030 snprintf(msg, sizeof(msg), " = %i\n", ret);
1031 tdb_trace_write(tdb, msg);
1034 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1036 char msg[20 + rec.dsize*2], *p;
1037 unsigned int i;
1039 /* We differentiate zero-length records from non-existent ones. */
1040 if (rec.dptr == NULL) {
1041 tdb_trace_write(tdb, " NULL");
1042 return;
1045 /* snprintf here is purely cargo-cult programming. */
1046 p = msg;
1047 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1048 for (i = 0; i < rec.dsize; i++)
1049 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1051 tdb_trace_write(tdb, msg);
1054 void tdb_trace(struct tdb_context *tdb, const char *op)
1056 tdb_trace_start(tdb);
1057 tdb_trace_write(tdb, op);
1058 tdb_trace_end(tdb);
1061 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1063 char msg[sizeof(tdb_off_t) * 4 + 1];
1065 snprintf(msg, sizeof(msg), "%u ", seqnum);
1066 tdb_trace_write(tdb, msg);
1067 tdb_trace_write(tdb, op);
1068 tdb_trace_end(tdb);
1071 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1072 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1074 char msg[128];
1076 snprintf(msg, sizeof(msg),
1077 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1078 tdb_trace_start(tdb);
1079 tdb_trace_write(tdb, msg);
1080 tdb_trace_end(tdb);
1083 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1085 tdb_trace_start(tdb);
1086 tdb_trace_write(tdb, op);
1087 tdb_trace_end_ret(tdb, ret);
1090 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1092 tdb_trace_start(tdb);
1093 tdb_trace_write(tdb, op);
1094 tdb_trace_write(tdb, " =");
1095 tdb_trace_record(tdb, ret);
1096 tdb_trace_end(tdb);
1099 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1100 TDB_DATA rec)
1102 tdb_trace_start(tdb);
1103 tdb_trace_write(tdb, op);
1104 tdb_trace_record(tdb, rec);
1105 tdb_trace_end(tdb);
1108 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1109 TDB_DATA rec, int ret)
1111 tdb_trace_start(tdb);
1112 tdb_trace_write(tdb, op);
1113 tdb_trace_record(tdb, rec);
1114 tdb_trace_end_ret(tdb, ret);
1117 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1118 TDB_DATA rec, TDB_DATA ret)
1120 tdb_trace_start(tdb);
1121 tdb_trace_write(tdb, op);
1122 tdb_trace_record(tdb, rec);
1123 tdb_trace_write(tdb, " =");
1124 tdb_trace_record(tdb, ret);
1125 tdb_trace_end(tdb);
1128 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1129 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1130 int ret)
1132 char msg[1 + sizeof(ret) * 4];
1134 snprintf(msg, sizeof(msg), " %#x", flag);
1135 tdb_trace_start(tdb);
1136 tdb_trace_write(tdb, op);
1137 tdb_trace_record(tdb, rec1);
1138 tdb_trace_record(tdb, rec2);
1139 tdb_trace_write(tdb, msg);
1140 tdb_trace_end_ret(tdb, ret);
1143 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1144 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1146 tdb_trace_start(tdb);
1147 tdb_trace_write(tdb, op);
1148 tdb_trace_record(tdb, rec1);
1149 tdb_trace_record(tdb, rec2);
1150 tdb_trace_write(tdb, " =");
1151 tdb_trace_record(tdb, ret);
1152 tdb_trace_end(tdb);
1154 #endif