2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null
;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_
void tdb_increment_seqnum_nonblock(struct tdb_context
*tdb
)
40 if (!(tdb
->flags
& TDB_SEQNUM
)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb
, TDB_SEQNUM_OFS
, &seqnum
);
49 tdb_ofs_write(tdb
, TDB_SEQNUM_OFS
, &seqnum
);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context
*tdb
)
58 if (!(tdb
->flags
& TDB_SEQNUM
)) {
62 if (tdb
->transaction
!= NULL
) {
63 tdb_increment_seqnum_nonblock(tdb
);
67 if (tdb_nest_lock(tdb
, TDB_SEQNUM_OFS
, F_WRLCK
,
68 TDB_LOCK_WAIT
|TDB_LOCK_PROBE
) != 0) {
72 tdb_increment_seqnum_nonblock(tdb
);
74 tdb_nest_unlock(tdb
, TDB_SEQNUM_OFS
, F_WRLCK
, false);
77 static int tdb_key_compare(TDB_DATA key
, TDB_DATA data
, void *private_data
)
79 return memcmp(data
.dptr
, key
.dptr
, data
.dsize
);
82 void tdb_chainwalk_init(struct tdb_chainwalk_ctx
*ctx
, tdb_off_t ptr
)
84 *ctx
= (struct tdb_chainwalk_ctx
) { .slow_ptr
= ptr
};
87 bool tdb_chainwalk_check(struct tdb_context
*tdb
,
88 struct tdb_chainwalk_ctx
*ctx
,
93 if (ctx
->slow_chase
) {
94 ret
= tdb_ofs_read(tdb
, ctx
->slow_ptr
, &ctx
->slow_ptr
);
99 ctx
->slow_chase
= !ctx
->slow_chase
;
101 if (next_ptr
== ctx
->slow_ptr
) {
102 tdb
->ecode
= TDB_ERR_CORRUPT
;
103 TDB_LOG((tdb
, TDB_DEBUG_ERROR
,
104 "tdb_chainwalk_check: circular chain\n"));
111 /* Returns 0 on fail. On success, return offset of record, and fills
113 static tdb_off_t
tdb_find(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
,
114 struct tdb_record
*r
)
117 struct tdb_chainwalk_ctx chainwalk
;
119 /* read in the hash top */
120 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(hash
), &rec_ptr
) == -1)
123 tdb_chainwalk_init(&chainwalk
, rec_ptr
);
125 /* keep looking until we find the right record */
129 if (tdb_rec_read(tdb
, rec_ptr
, r
) == -1)
132 if (!TDB_DEAD(r
) && hash
==r
->full_hash
133 && key
.dsize
==r
->key_len
134 && tdb_parse_data(tdb
, key
, rec_ptr
+ sizeof(*r
),
135 r
->key_len
, tdb_key_compare
,
141 ok
= tdb_chainwalk_check(tdb
, &chainwalk
, rec_ptr
);
146 tdb
->ecode
= TDB_ERR_NOEXIST
;
150 /* As tdb_find, but if you succeed, keep the lock */
151 tdb_off_t
tdb_find_lock_hash(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
, int locktype
,
152 struct tdb_record
*rec
)
156 if (tdb_lock(tdb
, BUCKET(hash
), locktype
) == -1)
158 if (!(rec_ptr
= tdb_find(tdb
, key
, hash
, rec
)))
159 tdb_unlock(tdb
, BUCKET(hash
), locktype
);
163 static TDB_DATA
_tdb_fetch(struct tdb_context
*tdb
, TDB_DATA key
);
165 struct tdb_update_hash_state
{
166 const TDB_DATA
*dbufs
;
171 static int tdb_update_hash_cmp(TDB_DATA key
, TDB_DATA data
, void *private_data
)
173 struct tdb_update_hash_state
*state
= private_data
;
174 unsigned char *dptr
= data
.dptr
;
177 if (state
->dbufs_len
!= data
.dsize
) {
181 for (i
=0; i
<state
->num_dbufs
; i
++) {
182 TDB_DATA dbuf
= state
->dbufs
[i
];
184 ret
= memcmp(dptr
, dbuf
.dptr
, dbuf
.dsize
);
194 /* update an entry in place - this only works if the new data size
195 is <= the old data size and the key exists.
196 on failure return -1.
198 static int tdb_update_hash(struct tdb_context
*tdb
, TDB_DATA key
,
200 const TDB_DATA
*dbufs
, int num_dbufs
,
203 struct tdb_record rec
;
204 tdb_off_t rec_ptr
, ofs
;
208 if (!(rec_ptr
= tdb_find(tdb
, key
, hash
, &rec
)))
211 /* it could be an exact duplicate of what is there - this is
212 * surprisingly common (eg. with a ldb re-index). */
213 if (rec
.data_len
== dbufs_len
) {
214 struct tdb_update_hash_state state
= {
215 .dbufs
= dbufs
, .num_dbufs
= num_dbufs
,
216 .dbufs_len
= dbufs_len
220 ret
= tdb_parse_record(tdb
, key
, tdb_update_hash_cmp
, &state
);
226 /* must be long enough key, data and tailer */
227 if (rec
.rec_len
< key
.dsize
+ dbufs_len
+ sizeof(tdb_off_t
)) {
228 tdb
->ecode
= TDB_SUCCESS
; /* Not really an error */
232 ofs
= rec_ptr
+ sizeof(rec
) + rec
.key_len
;
234 for (i
=0; i
<num_dbufs
; i
++) {
235 TDB_DATA dbuf
= dbufs
[i
];
238 ret
= tdb
->methods
->tdb_write(tdb
, ofs
, dbuf
.dptr
, dbuf
.dsize
);
245 if (dbufs_len
!= rec
.data_len
) {
247 rec
.data_len
= dbufs_len
;
248 return tdb_rec_write(tdb
, rec_ptr
, &rec
);
254 /* find an entry in the database given a key */
255 /* If an entry doesn't exist tdb_err will be set to
256 * TDB_ERR_NOEXIST. If a key has no data attached
257 * then the TDB_DATA will have zero length but
260 static TDB_DATA
_tdb_fetch(struct tdb_context
*tdb
, TDB_DATA key
)
263 struct tdb_record rec
;
267 /* find which hash bucket it is in */
268 hash
= tdb
->hash_fn(&key
);
269 if (!(rec_ptr
= tdb_find_lock_hash(tdb
,key
,hash
,F_RDLCK
,&rec
)))
272 ret
.dptr
= tdb_alloc_read(tdb
, rec_ptr
+ sizeof(rec
) + rec
.key_len
,
274 ret
.dsize
= rec
.data_len
;
275 tdb_unlock(tdb
, BUCKET(rec
.full_hash
), F_RDLCK
);
279 _PUBLIC_ TDB_DATA
tdb_fetch(struct tdb_context
*tdb
, TDB_DATA key
)
281 TDB_DATA ret
= _tdb_fetch(tdb
, key
);
283 tdb_trace_1rec_retrec(tdb
, "tdb_fetch", key
, ret
);
288 * Find an entry in the database and hand the record's data to a parsing
289 * function. The parsing function is executed under the chain read lock, so it
290 * should be fast and should not block on other syscalls.
292 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
294 * For mmapped tdb's that do not have a transaction open it points the parsing
295 * function directly at the mmap area, it avoids the malloc/memcpy in this
296 * case. If a transaction is open or no mmap is available, it has to do
297 * malloc/read/parse/free.
299 * This is interesting for all readers of potentially large data structures in
300 * the tdb records, ldb indexes being one example.
302 * Return -1 if the record was not found.
305 _PUBLIC_
int tdb_parse_record(struct tdb_context
*tdb
, TDB_DATA key
,
306 int (*parser
)(TDB_DATA key
, TDB_DATA data
,
311 struct tdb_record rec
;
315 /* find which hash bucket it is in */
316 hash
= tdb
->hash_fn(&key
);
318 if (!(rec_ptr
= tdb_find_lock_hash(tdb
,key
,hash
,F_RDLCK
,&rec
))) {
319 /* record not found */
320 tdb_trace_1rec_ret(tdb
, "tdb_parse_record", key
, -1);
321 tdb
->ecode
= TDB_ERR_NOEXIST
;
324 tdb_trace_1rec_ret(tdb
, "tdb_parse_record", key
, 0);
326 ret
= tdb_parse_data(tdb
, key
, rec_ptr
+ sizeof(rec
) + rec
.key_len
,
327 rec
.data_len
, parser
, private_data
);
329 tdb_unlock(tdb
, BUCKET(rec
.full_hash
), F_RDLCK
);
334 /* check if an entry in the database exists
336 note that 1 is returned if the key is found and 0 is returned if not found
337 this doesn't match the conventions in the rest of this module, but is
340 static int tdb_exists_hash(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
)
342 struct tdb_record rec
;
344 if (tdb_find_lock_hash(tdb
, key
, hash
, F_RDLCK
, &rec
) == 0)
346 tdb_unlock(tdb
, BUCKET(rec
.full_hash
), F_RDLCK
);
350 _PUBLIC_
int tdb_exists(struct tdb_context
*tdb
, TDB_DATA key
)
352 uint32_t hash
= tdb
->hash_fn(&key
);
355 ret
= tdb_exists_hash(tdb
, key
, hash
);
356 tdb_trace_1rec_ret(tdb
, "tdb_exists", key
, ret
);
361 * Move a dead record to the freelist. The hash chain and freelist
364 static int tdb_del_dead(struct tdb_context
*tdb
,
367 struct tdb_record
*rec
,
372 ret
= tdb_write_lock_record(tdb
, rec_ptr
);
374 /* Someone traversing here: Just leave it dead */
377 ret
= tdb_write_unlock_record(tdb
, rec_ptr
);
381 ret
= tdb_ofs_write(tdb
, last_ptr
, &rec
->next
);
388 ret
= tdb_free(tdb
, rec_ptr
, rec
);
393 * Walk the hash chain and leave tdb->max_dead_records around. Move
394 * the rest of dead records to the freelist.
396 int tdb_trim_dead(struct tdb_context
*tdb
, uint32_t hash
)
398 struct tdb_chainwalk_ctx chainwalk
;
399 struct tdb_record rec
;
400 tdb_off_t last_ptr
, rec_ptr
;
401 bool locked_freelist
= false;
405 last_ptr
= TDB_HASH_TOP(hash
);
408 * Init chainwalk with the pointer to the hash top. It might
409 * be that the very first record in the chain is a dead one
410 * that we have to delete.
412 tdb_chainwalk_init(&chainwalk
, last_ptr
);
414 ret
= tdb_ofs_read(tdb
, last_ptr
, &rec_ptr
);
419 while (rec_ptr
!= 0) {
420 bool deleted
= false;
423 ret
= tdb_rec_read(tdb
, rec_ptr
, &rec
);
429 * Make a copy of rec.next: Further down we might
430 * delete and put the record on the freelist. Make
431 * sure that modifications in that code path can't
432 * break the chainwalk here.
436 if (rec
.magic
== TDB_DEAD_MAGIC
) {
439 if (num_dead
> tdb
->max_dead_records
) {
441 if (!locked_freelist
) {
443 * Lock the freelist only if
444 * it's really required.
446 ret
= tdb_lock(tdb
, -1, F_WRLCK
);
450 locked_freelist
= true;
467 * Don't do the chainwalk check if "rec_ptr" was
468 * deleted. We reduced the chain, and the chainwalk
469 * check might catch up early. Imagine a valid chain
470 * with just dead records: We never can bump the
471 * "slow" pointer in chainwalk_check, as there isn't
472 * anything left to jump to and compare.
479 ok
= tdb_chainwalk_check(tdb
, &chainwalk
, next
);
489 if (locked_freelist
) {
490 tdb_unlock(tdb
, -1, F_WRLCK
);
495 /* delete an entry in the database given a key */
496 static int tdb_delete_hash(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
)
499 struct tdb_record rec
;
502 if (tdb
->read_only
|| tdb
->traverse_read
) {
503 tdb
->ecode
= TDB_ERR_RDONLY
;
507 rec_ptr
= tdb_find_lock_hash(tdb
, key
, hash
, F_WRLCK
, &rec
);
513 * Mark the record dead
515 rec
.magic
= TDB_DEAD_MAGIC
;
516 ret
= tdb_rec_write(tdb
, rec_ptr
, &rec
);
521 tdb_increment_seqnum(tdb
);
523 ret
= tdb_trim_dead(tdb
, hash
);
525 if (tdb_unlock(tdb
, BUCKET(hash
), F_WRLCK
) != 0)
526 TDB_LOG((tdb
, TDB_DEBUG_WARNING
, "tdb_delete: WARNING tdb_unlock failed!\n"));
530 _PUBLIC_
int tdb_delete(struct tdb_context
*tdb
, TDB_DATA key
)
532 uint32_t hash
= tdb
->hash_fn(&key
);
535 ret
= tdb_delete_hash(tdb
, key
, hash
);
536 tdb_trace_1rec_ret(tdb
, "tdb_delete", key
, ret
);
541 * See if we have a dead record around with enough space
543 tdb_off_t
tdb_find_dead(struct tdb_context
*tdb
, uint32_t hash
,
544 struct tdb_record
*r
, tdb_len_t length
,
545 tdb_off_t
*p_last_ptr
)
547 tdb_off_t rec_ptr
, last_ptr
;
548 struct tdb_chainwalk_ctx chainwalk
;
549 tdb_off_t best_rec_ptr
= 0;
550 tdb_off_t best_last_ptr
= 0;
551 struct tdb_record best
= { .rec_len
= UINT32_MAX
};
553 length
+= sizeof(tdb_off_t
); /* tailer */
555 last_ptr
= TDB_HASH_TOP(hash
);
557 /* read in the hash top */
558 if (tdb_ofs_read(tdb
, last_ptr
, &rec_ptr
) == -1)
561 tdb_chainwalk_init(&chainwalk
, rec_ptr
);
563 /* keep looking until we find the right record */
567 if (tdb_rec_read(tdb
, rec_ptr
, r
) == -1)
570 if (TDB_DEAD(r
) && (r
->rec_len
>= length
) &&
571 (r
->rec_len
< best
.rec_len
)) {
572 best_rec_ptr
= rec_ptr
;
573 best_last_ptr
= last_ptr
;
579 ok
= tdb_chainwalk_check(tdb
, &chainwalk
, rec_ptr
);
585 if (best
.rec_len
== UINT32_MAX
) {
590 *p_last_ptr
= best_last_ptr
;
594 static int _tdb_storev(struct tdb_context
*tdb
, TDB_DATA key
,
595 const TDB_DATA
*dbufs
, int num_dbufs
,
596 int flag
, uint32_t hash
)
598 struct tdb_record rec
;
599 tdb_off_t rec_ptr
, ofs
;
600 tdb_len_t rec_len
, dbufs_len
;
606 for (i
=0; i
<num_dbufs
; i
++) {
607 size_t dsize
= dbufs
[i
].dsize
;
609 if ((dsize
!= 0) && (dbufs
[i
].dptr
== NULL
)) {
610 tdb
->ecode
= TDB_ERR_EINVAL
;
615 if (dbufs_len
< dsize
) {
616 tdb
->ecode
= TDB_ERR_OOM
;
621 rec_len
= key
.dsize
+ dbufs_len
;
622 if ((rec_len
< key
.dsize
) || (rec_len
< dbufs_len
)) {
623 tdb
->ecode
= TDB_ERR_OOM
;
627 /* check for it existing, on insert. */
628 if (flag
== TDB_INSERT
) {
629 if (tdb_exists_hash(tdb
, key
, hash
)) {
630 tdb
->ecode
= TDB_ERR_EXISTS
;
634 /* first try in-place update, on modify or replace. */
635 if (tdb_update_hash(tdb
, key
, hash
, dbufs
, num_dbufs
,
639 if (tdb
->ecode
== TDB_ERR_NOEXIST
&&
640 flag
== TDB_MODIFY
) {
641 /* if the record doesn't exist and we are in TDB_MODIFY mode then
642 we should fail the store */
646 /* reset the error code potentially set by the tdb_update_hash() */
647 tdb
->ecode
= TDB_SUCCESS
;
649 /* delete any existing record - if it doesn't exist we don't
650 care. Doing this first reduces fragmentation, and avoids
651 coalescing with `allocated' block before it's updated. */
652 if (flag
!= TDB_INSERT
)
653 tdb_delete_hash(tdb
, key
, hash
);
655 /* we have to allocate some space */
656 rec_ptr
= tdb_allocate(tdb
, hash
, rec_len
, &rec
);
662 /* Read hash top into next ptr */
663 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(hash
), &rec
.next
) == -1)
666 rec
.key_len
= key
.dsize
;
667 rec
.data_len
= dbufs_len
;
668 rec
.full_hash
= hash
;
669 rec
.magic
= TDB_MAGIC
;
673 /* write out and point the top of the hash chain at it */
674 ret
= tdb_rec_write(tdb
, ofs
, &rec
);
680 ret
= tdb
->methods
->tdb_write(tdb
, ofs
, key
.dptr
, key
.dsize
);
686 for (i
=0; i
<num_dbufs
; i
++) {
687 if (dbufs
[i
].dsize
== 0) {
691 ret
= tdb
->methods
->tdb_write(tdb
, ofs
, dbufs
[i
].dptr
,
696 ofs
+= dbufs
[i
].dsize
;
699 ret
= tdb_ofs_write(tdb
, TDB_HASH_TOP(hash
), &rec_ptr
);
701 /* Need to tdb_unallocate() here */
709 tdb_increment_seqnum(tdb
);
714 static int _tdb_store(struct tdb_context
*tdb
, TDB_DATA key
,
715 TDB_DATA dbuf
, int flag
, uint32_t hash
)
717 return _tdb_storev(tdb
, key
, &dbuf
, 1, flag
, hash
);
720 /* store an element in the database, replacing any existing element
723 return 0 on success, -1 on failure
725 _PUBLIC_
int tdb_store(struct tdb_context
*tdb
, TDB_DATA key
, TDB_DATA dbuf
, int flag
)
730 if (tdb
->read_only
|| tdb
->traverse_read
) {
731 tdb
->ecode
= TDB_ERR_RDONLY
;
732 tdb_trace_2rec_flag_ret(tdb
, "tdb_store", key
, dbuf
, flag
, -1);
736 /* find which hash bucket it is in */
737 hash
= tdb
->hash_fn(&key
);
738 if (tdb_lock(tdb
, BUCKET(hash
), F_WRLCK
) == -1)
741 ret
= _tdb_store(tdb
, key
, dbuf
, flag
, hash
);
742 tdb_trace_2rec_flag_ret(tdb
, "tdb_store", key
, dbuf
, flag
, ret
);
743 tdb_unlock(tdb
, BUCKET(hash
), F_WRLCK
);
747 _PUBLIC_
int tdb_storev(struct tdb_context
*tdb
, TDB_DATA key
,
748 const TDB_DATA
*dbufs
, int num_dbufs
, int flag
)
753 if (tdb
->read_only
|| tdb
->traverse_read
) {
754 tdb
->ecode
= TDB_ERR_RDONLY
;
755 tdb_trace_1plusn_rec_flag_ret(tdb
, "tdb_storev", key
,
756 dbufs
, num_dbufs
, flag
, -1);
760 /* find which hash bucket it is in */
761 hash
= tdb
->hash_fn(&key
);
762 if (tdb_lock(tdb
, BUCKET(hash
), F_WRLCK
) == -1)
765 ret
= _tdb_storev(tdb
, key
, dbufs
, num_dbufs
, flag
, hash
);
766 tdb_trace_1plusn_rec_flag_ret(tdb
, "tdb_storev", key
,
767 dbufs
, num_dbufs
, flag
, -1);
768 tdb_unlock(tdb
, BUCKET(hash
), F_WRLCK
);
772 /* Append to an entry. Create if not exist. */
773 _PUBLIC_
int tdb_append(struct tdb_context
*tdb
, TDB_DATA key
, TDB_DATA new_dbuf
)
779 /* find which hash bucket it is in */
780 hash
= tdb
->hash_fn(&key
);
781 if (tdb_lock(tdb
, BUCKET(hash
), F_WRLCK
) == -1)
784 dbufs
[0] = _tdb_fetch(tdb
, key
);
787 ret
= _tdb_storev(tdb
, key
, dbufs
, 2, 0, hash
);
788 tdb_trace_2rec_retrec(tdb
, "tdb_append", key
, dbufs
[0], dbufs
[1]);
790 tdb_unlock(tdb
, BUCKET(hash
), F_WRLCK
);
791 SAFE_FREE(dbufs
[0].dptr
);
797 return the name of the current tdb file
798 useful for external logging functions
800 _PUBLIC_
const char *tdb_name(struct tdb_context
*tdb
)
806 return the underlying file descriptor being used by tdb, or -1
807 useful for external routines that want to check the device/inode
810 _PUBLIC_
int tdb_fd(struct tdb_context
*tdb
)
816 return the current logging function
817 useful for external tdb routines that wish to log tdb errors
819 _PUBLIC_ tdb_log_func
tdb_log_fn(struct tdb_context
*tdb
)
821 return tdb
->log
.log_fn
;
826 get the tdb sequence number. Only makes sense if the writers opened
827 with TDB_SEQNUM set. Note that this sequence number will wrap quite
828 quickly, so it should only be used for a 'has something changed'
829 test, not for code that relies on the count of the number of changes
830 made. If you want a counter then use a tdb record.
832 The aim of this sequence number is to allow for a very lightweight
833 test of a possible tdb change.
835 _PUBLIC_
int tdb_get_seqnum(struct tdb_context
*tdb
)
839 tdb_ofs_read(tdb
, TDB_SEQNUM_OFS
, &seqnum
);
843 _PUBLIC_
int tdb_hash_size(struct tdb_context
*tdb
)
845 return tdb
->hash_size
;
848 _PUBLIC_
size_t tdb_map_size(struct tdb_context
*tdb
)
850 return tdb
->map_size
;
853 _PUBLIC_
int tdb_get_flags(struct tdb_context
*tdb
)
858 _PUBLIC_
void tdb_add_flags(struct tdb_context
*tdb
, unsigned flags
)
860 if ((flags
& TDB_ALLOW_NESTING
) &&
861 (flags
& TDB_DISALLOW_NESTING
)) {
862 tdb
->ecode
= TDB_ERR_NESTING
;
863 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_add_flags: "
864 "allow_nesting and disallow_nesting are not allowed together!"));
868 if (flags
& TDB_ALLOW_NESTING
) {
869 tdb
->flags
&= ~TDB_DISALLOW_NESTING
;
871 if (flags
& TDB_DISALLOW_NESTING
) {
872 tdb
->flags
&= ~TDB_ALLOW_NESTING
;
878 _PUBLIC_
void tdb_remove_flags(struct tdb_context
*tdb
, unsigned flags
)
880 if ((flags
& TDB_ALLOW_NESTING
) &&
881 (flags
& TDB_DISALLOW_NESTING
)) {
882 tdb
->ecode
= TDB_ERR_NESTING
;
883 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_remove_flags: "
884 "allow_nesting and disallow_nesting are not allowed together!"));
888 if ((flags
& TDB_NOLOCK
) &&
889 (tdb
->feature_flags
& TDB_FEATURE_FLAG_MUTEX
) &&
890 (tdb
->mutexes
== NULL
)) {
891 tdb
->ecode
= TDB_ERR_LOCK
;
892 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_remove_flags: "
893 "Can not remove NOLOCK flag on mutexed databases"));
897 if (flags
& TDB_ALLOW_NESTING
) {
898 tdb
->flags
|= TDB_DISALLOW_NESTING
;
900 if (flags
& TDB_DISALLOW_NESTING
) {
901 tdb
->flags
|= TDB_ALLOW_NESTING
;
904 tdb
->flags
&= ~flags
;
909 enable sequence number handling on an open tdb
911 _PUBLIC_
void tdb_enable_seqnum(struct tdb_context
*tdb
)
913 tdb
->flags
|= TDB_SEQNUM
;
918 add a region of the file to the freelist. Length is the size of the region in bytes,
919 which includes the free list header that needs to be added
921 static int tdb_free_region(struct tdb_context
*tdb
, tdb_off_t offset
, ssize_t length
)
923 struct tdb_record rec
;
924 if (length
<= sizeof(rec
)) {
925 /* the region is not worth adding */
928 if (length
+ offset
> tdb
->map_size
) {
929 TDB_LOG((tdb
, TDB_DEBUG_FATAL
,"tdb_free_region: adding region beyond end of file\n"));
932 memset(&rec
,'\0',sizeof(rec
));
933 rec
.rec_len
= length
- sizeof(rec
);
934 if (tdb_free(tdb
, offset
, &rec
) == -1) {
935 TDB_LOG((tdb
, TDB_DEBUG_FATAL
,"tdb_free_region: failed to add free record\n"));
942 wipe the entire database, deleting all records. This can be done
943 very fast by using a allrecord lock. The entire data portion of the
944 file becomes a single entry in the freelist.
946 This code carefully steps around the recovery area, leaving it alone
948 _PUBLIC_
int tdb_wipe_all(struct tdb_context
*tdb
)
951 tdb_off_t offset
= 0;
953 tdb_off_t recovery_head
;
954 tdb_len_t recovery_size
= 0;
956 if (tdb_lockall(tdb
) != 0) {
960 tdb_trace(tdb
, "tdb_wipe_all");
962 /* see if the tdb has a recovery area, and remember its size
963 if so. We don't want to lose this as otherwise each
964 tdb_wipe_all() in a transaction will increase the size of
965 the tdb by the size of the recovery area */
966 if (tdb_ofs_read(tdb
, TDB_RECOVERY_HEAD
, &recovery_head
) == -1) {
967 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_wipe_all: failed to read recovery head\n"));
971 if (recovery_head
!= 0) {
972 struct tdb_record rec
;
973 if (tdb
->methods
->tdb_read(tdb
, recovery_head
, &rec
, sizeof(rec
), DOCONV()) == -1) {
974 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_wipe_all: failed to read recovery record\n"));
977 recovery_size
= rec
.rec_len
+ sizeof(rec
);
980 /* wipe the hashes */
981 for (i
=0;i
<tdb
->hash_size
;i
++) {
982 if (tdb_ofs_write(tdb
, TDB_HASH_TOP(i
), &offset
) == -1) {
983 TDB_LOG((tdb
, TDB_DEBUG_FATAL
,"tdb_wipe_all: failed to write hash %d\n", i
));
988 /* wipe the freelist */
989 if (tdb_ofs_write(tdb
, FREELIST_TOP
, &offset
) == -1) {
990 TDB_LOG((tdb
, TDB_DEBUG_FATAL
,"tdb_wipe_all: failed to write freelist\n"));
994 /* add all the rest of the file to the freelist, possibly leaving a gap
995 for the recovery area */
996 if (recovery_size
== 0) {
997 /* the simple case - the whole file can be used as a freelist */
998 data_len
= (tdb
->map_size
- TDB_DATA_START(tdb
->hash_size
));
999 if (tdb_free_region(tdb
, TDB_DATA_START(tdb
->hash_size
), data_len
) != 0) {
1003 /* we need to add two freelist entries - one on either
1004 side of the recovery area
1006 Note that we cannot shift the recovery area during
1007 this operation. Only the transaction.c code may
1008 move the recovery area or we risk subtle data
1011 data_len
= (recovery_head
- TDB_DATA_START(tdb
->hash_size
));
1012 if (tdb_free_region(tdb
, TDB_DATA_START(tdb
->hash_size
), data_len
) != 0) {
1015 /* and the 2nd free list entry after the recovery area - if any */
1016 data_len
= tdb
->map_size
- (recovery_head
+recovery_size
);
1017 if (tdb_free_region(tdb
, recovery_head
+recovery_size
, data_len
) != 0) {
1022 tdb_increment_seqnum_nonblock(tdb
);
1024 if (tdb_unlockall(tdb
) != 0) {
1025 TDB_LOG((tdb
, TDB_DEBUG_FATAL
,"tdb_wipe_all: failed to unlock\n"));
1036 struct traverse_state
{
1038 struct tdb_context
*dest_db
;
1042 traverse function for repacking
1044 static int repack_traverse(struct tdb_context
*tdb
, TDB_DATA key
, TDB_DATA data
, void *private_data
)
1046 struct traverse_state
*state
= (struct traverse_state
*)private_data
;
1047 if (tdb_store(state
->dest_db
, key
, data
, TDB_INSERT
) != 0) {
1048 state
->error
= true;
1057 _PUBLIC_
int tdb_repack(struct tdb_context
*tdb
)
1059 struct tdb_context
*tmp_db
;
1060 struct traverse_state state
;
1062 tdb_trace(tdb
, "tdb_repack");
1064 if (tdb_transaction_start(tdb
) != 0) {
1065 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Failed to start transaction\n"));
1069 tmp_db
= tdb_open("tmpdb", tdb_hash_size(tdb
), TDB_INTERNAL
, O_RDWR
|O_CREAT
, 0);
1070 if (tmp_db
== NULL
) {
1071 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Failed to create tmp_db\n"));
1072 tdb_transaction_cancel(tdb
);
1076 state
.error
= false;
1077 state
.dest_db
= tmp_db
;
1079 if (tdb_traverse_read(tdb
, repack_traverse
, &state
) == -1) {
1080 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Failed to traverse copying out\n"));
1081 tdb_transaction_cancel(tdb
);
1087 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Error during traversal\n"));
1088 tdb_transaction_cancel(tdb
);
1093 if (tdb_wipe_all(tdb
) != 0) {
1094 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Failed to wipe database\n"));
1095 tdb_transaction_cancel(tdb
);
1100 state
.error
= false;
1101 state
.dest_db
= tdb
;
1103 if (tdb_traverse_read(tmp_db
, repack_traverse
, &state
) == -1) {
1104 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Failed to traverse copying back\n"));
1105 tdb_transaction_cancel(tdb
);
1111 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Error during second traversal\n"));
1112 tdb_transaction_cancel(tdb
);
1119 if (tdb_transaction_commit(tdb
) != 0) {
1120 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, __location__
" Failed to commit\n"));
1127 /* Even on files, we can get partial writes due to signals. */
1128 bool tdb_write_all(int fd
, const void *buf
, size_t count
)
1132 ret
= write(fd
, buf
, count
);
1135 buf
= (const char *)buf
+ ret
;
1141 bool tdb_add_off_t(tdb_off_t a
, tdb_off_t b
, tdb_off_t
*pret
)
1143 tdb_off_t ret
= a
+ b
;
1145 if ((ret
< a
) || (ret
< b
)) {
1153 static void tdb_trace_write(struct tdb_context
*tdb
, const char *str
)
1155 if (!tdb_write_all(tdb
->tracefd
, str
, strlen(str
))) {
1156 close(tdb
->tracefd
);
1161 static void tdb_trace_start(struct tdb_context
*tdb
)
1164 char msg
[sizeof(tdb_off_t
) * 4 + 1];
1166 tdb_ofs_read(tdb
, TDB_SEQNUM_OFS
, &seqnum
);
1167 snprintf(msg
, sizeof(msg
), "%u ", seqnum
);
1168 tdb_trace_write(tdb
, msg
);
1171 static void tdb_trace_end(struct tdb_context
*tdb
)
1173 tdb_trace_write(tdb
, "\n");
1176 static void tdb_trace_end_ret(struct tdb_context
*tdb
, int ret
)
1178 char msg
[sizeof(ret
) * 4 + 4];
1179 snprintf(msg
, sizeof(msg
), " = %i\n", ret
);
1180 tdb_trace_write(tdb
, msg
);
1183 static void tdb_trace_record(struct tdb_context
*tdb
, TDB_DATA rec
)
1185 char msg
[20 + rec
.dsize
*2], *p
;
1188 /* We differentiate zero-length records from non-existent ones. */
1189 if (rec
.dptr
== NULL
) {
1190 tdb_trace_write(tdb
, " NULL");
1194 /* snprintf here is purely cargo-cult programming. */
1196 p
+= snprintf(p
, sizeof(msg
), " %zu:", rec
.dsize
);
1197 for (i
= 0; i
< rec
.dsize
; i
++)
1198 p
+= snprintf(p
, 2, "%02x", rec
.dptr
[i
]);
1200 tdb_trace_write(tdb
, msg
);
1203 void tdb_trace(struct tdb_context
*tdb
, const char *op
)
1205 tdb_trace_start(tdb
);
1206 tdb_trace_write(tdb
, op
);
1210 void tdb_trace_seqnum(struct tdb_context
*tdb
, uint32_t seqnum
, const char *op
)
1212 char msg
[sizeof(tdb_off_t
) * 4 + 1];
1214 snprintf(msg
, sizeof(msg
), "%u ", seqnum
);
1215 tdb_trace_write(tdb
, msg
);
1216 tdb_trace_write(tdb
, op
);
1220 void tdb_trace_open(struct tdb_context
*tdb
, const char *op
,
1221 unsigned hash_size
, unsigned tdb_flags
, unsigned open_flags
)
1225 snprintf(msg
, sizeof(msg
),
1226 "%s %u 0x%x 0x%x", op
, hash_size
, tdb_flags
, open_flags
);
1227 tdb_trace_start(tdb
);
1228 tdb_trace_write(tdb
, msg
);
1232 void tdb_trace_ret(struct tdb_context
*tdb
, const char *op
, int ret
)
1234 tdb_trace_start(tdb
);
1235 tdb_trace_write(tdb
, op
);
1236 tdb_trace_end_ret(tdb
, ret
);
1239 void tdb_trace_retrec(struct tdb_context
*tdb
, const char *op
, TDB_DATA ret
)
1241 tdb_trace_start(tdb
);
1242 tdb_trace_write(tdb
, op
);
1243 tdb_trace_write(tdb
, " =");
1244 tdb_trace_record(tdb
, ret
);
1248 void tdb_trace_1rec(struct tdb_context
*tdb
, const char *op
,
1251 tdb_trace_start(tdb
);
1252 tdb_trace_write(tdb
, op
);
1253 tdb_trace_record(tdb
, rec
);
1257 void tdb_trace_1rec_ret(struct tdb_context
*tdb
, const char *op
,
1258 TDB_DATA rec
, int ret
)
1260 tdb_trace_start(tdb
);
1261 tdb_trace_write(tdb
, op
);
1262 tdb_trace_record(tdb
, rec
);
1263 tdb_trace_end_ret(tdb
, ret
);
1266 void tdb_trace_1rec_retrec(struct tdb_context
*tdb
, const char *op
,
1267 TDB_DATA rec
, TDB_DATA ret
)
1269 tdb_trace_start(tdb
);
1270 tdb_trace_write(tdb
, op
);
1271 tdb_trace_record(tdb
, rec
);
1272 tdb_trace_write(tdb
, " =");
1273 tdb_trace_record(tdb
, ret
);
1277 void tdb_trace_2rec_flag_ret(struct tdb_context
*tdb
, const char *op
,
1278 TDB_DATA rec1
, TDB_DATA rec2
, unsigned flag
,
1281 char msg
[1 + sizeof(ret
) * 4];
1283 snprintf(msg
, sizeof(msg
), " %#x", flag
);
1284 tdb_trace_start(tdb
);
1285 tdb_trace_write(tdb
, op
);
1286 tdb_trace_record(tdb
, rec1
);
1287 tdb_trace_record(tdb
, rec2
);
1288 tdb_trace_write(tdb
, msg
);
1289 tdb_trace_end_ret(tdb
, ret
);
1292 void tdb_trace_1plusn_rec_flag_ret(struct tdb_context
*tdb
, const char *op
,
1294 const TDB_DATA
*recs
, int num_recs
,
1295 unsigned flag
, int ret
)
1297 char msg
[1 + sizeof(ret
) * 4];
1300 snprintf(msg
, sizeof(msg
), " %#x", flag
);
1301 tdb_trace_start(tdb
);
1302 tdb_trace_write(tdb
, op
);
1303 tdb_trace_record(tdb
, rec
);
1304 for (i
=0; i
<num_recs
; i
++) {
1305 tdb_trace_record(tdb
, recs
[i
]);
1307 tdb_trace_write(tdb
, msg
);
1308 tdb_trace_end_ret(tdb
, ret
);
1311 void tdb_trace_2rec_retrec(struct tdb_context
*tdb
, const char *op
,
1312 TDB_DATA rec1
, TDB_DATA rec2
, TDB_DATA ret
)
1314 tdb_trace_start(tdb
);
1315 tdb_trace_write(tdb
, op
);
1316 tdb_trace_record(tdb
, rec1
);
1317 tdb_trace_record(tdb
, rec2
);
1318 tdb_trace_write(tdb
, " =");
1319 tdb_trace_record(tdb
, ret
);