2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include "tdb_private.h"
34 increment the tdb sequence number if the tdb has been opened using
37 static void tdb_increment_seqnum(struct tdb_context
*tdb
)
41 if (!(tdb
->flags
& TDB_SEQNUM
)) {
45 if (tdb_brlock(tdb
, TDB_SEQNUM_OFS
, F_WRLCK
, F_SETLKW
, 1, 1) != 0) {
49 /* we ignore errors from this, as we have no sane way of
52 tdb_ofs_read(tdb
, TDB_SEQNUM_OFS
, &seqnum
);
54 tdb_ofs_write(tdb
, TDB_SEQNUM_OFS
, &seqnum
);
56 tdb_brlock(tdb
, TDB_SEQNUM_OFS
, F_UNLCK
, F_SETLKW
, 1, 1);
59 static int tdb_key_compare(TDB_DATA key
, TDB_DATA data
, void *private_data
)
61 return memcmp(data
.dptr
, key
.dptr
, data
.dsize
);
64 /* Returns 0 on fail. On success, return offset of record, and fills
66 static tdb_off_t
tdb_find(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
,
67 struct list_struct
*r
)
71 /* read in the hash top */
72 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(hash
), &rec_ptr
) == -1)
75 /* keep looking until we find the right record */
77 if (tdb_rec_read(tdb
, rec_ptr
, r
) == -1)
80 if (!TDB_DEAD(r
) && hash
==r
->full_hash
81 && key
.dsize
==r
->key_len
82 && tdb_parse_data(tdb
, key
, rec_ptr
+ sizeof(*r
),
83 r
->key_len
, tdb_key_compare
,
89 return TDB_ERRCODE(TDB_ERR_NOEXIST
, 0);
92 /* As tdb_find, but if you succeed, keep the lock */
93 tdb_off_t
tdb_find_lock_hash(struct tdb_context
*tdb
, TDB_DATA key
,
94 uint32_t hash
, int locktype
,
95 struct list_struct
*rec
)
99 if (tdb_lock(tdb
, BUCKET(hash
), locktype
) == -1)
101 if (!(rec_ptr
= tdb_find(tdb
, key
, hash
, rec
)))
102 tdb_unlock(tdb
, BUCKET(hash
), locktype
);
107 /* update an entry in place - this only works if the new data size
108 is <= the old data size and the key exists.
109 on failure return -1.
111 static int tdb_update_hash(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
, TDB_DATA dbuf
)
113 struct list_struct rec
;
117 if (!(rec_ptr
= tdb_find(tdb
, key
, hash
, &rec
)))
120 /* must be long enough key, data and tailer */
121 if (rec
.rec_len
< key
.dsize
+ dbuf
.dsize
+ sizeof(tdb_off_t
)) {
122 tdb
->ecode
= TDB_SUCCESS
; /* Not really an error */
126 if (tdb
->methods
->tdb_write(tdb
, rec_ptr
+ sizeof(rec
) + rec
.key_len
,
127 dbuf
.dptr
, dbuf
.dsize
) == -1)
130 if (dbuf
.dsize
!= rec
.data_len
) {
132 rec
.data_len
= dbuf
.dsize
;
133 return tdb_rec_write(tdb
, rec_ptr
, &rec
);
139 /* find an entry in the database given a key */
140 /* If an entry doesn't exist tdb_err will be set to
141 * TDB_ERR_NOEXIST. If a key has no data attached
142 * then the TDB_DATA will have zero length but
145 TDB_DATA
tdb_fetch(struct tdb_context
*tdb
, TDB_DATA key
)
148 struct list_struct rec
;
152 /* find which hash bucket it is in */
153 hash
= tdb
->hash_fn(&key
);
154 if (!(rec_ptr
= tdb_find_lock_hash(tdb
,key
,hash
,F_RDLCK
,&rec
)))
157 ret
.dptr
= tdb_alloc_read(tdb
, rec_ptr
+ sizeof(rec
) + rec
.key_len
,
159 ret
.dsize
= rec
.data_len
;
160 tdb_unlock(tdb
, BUCKET(rec
.full_hash
), F_RDLCK
);
165 * Find an entry in the database and hand the record's data to a parsing
166 * function. The parsing function is executed under the chain read lock, so it
167 * should be fast and should not block on other syscalls.
169 * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
171 * For mmapped tdb's that do not have a transaction open it points the parsing
172 * function directly at the mmap area, it avoids the malloc/memcpy in this
173 * case. If a transaction is open or no mmap is available, it has to do
174 * malloc/read/parse/free.
176 * This is interesting for all readers of potentially large data structures in
177 * the tdb records, ldb indexes being one example.
180 int tdb_parse_record(struct tdb_context
*tdb
, TDB_DATA key
,
181 int (*parser
)(TDB_DATA key
, TDB_DATA data
,
186 struct list_struct rec
;
190 /* find which hash bucket it is in */
191 hash
= tdb
->hash_fn(&key
);
193 if (!(rec_ptr
= tdb_find_lock_hash(tdb
,key
,hash
,F_RDLCK
,&rec
))) {
194 return TDB_ERRCODE(TDB_ERR_NOEXIST
, 0);
197 ret
= tdb_parse_data(tdb
, key
, rec_ptr
+ sizeof(rec
) + rec
.key_len
,
198 rec
.data_len
, parser
, private_data
);
200 tdb_unlock(tdb
, BUCKET(rec
.full_hash
), F_RDLCK
);
205 /* check if an entry in the database exists
207 note that 1 is returned if the key is found and 0 is returned if not found
208 this doesn't match the conventions in the rest of this module, but is
211 static int tdb_exists_hash(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
)
213 struct list_struct rec
;
215 if (tdb_find_lock_hash(tdb
, key
, hash
, F_RDLCK
, &rec
) == 0)
217 tdb_unlock(tdb
, BUCKET(rec
.full_hash
), F_RDLCK
);
221 int tdb_exists(struct tdb_context
*tdb
, TDB_DATA key
)
223 uint32_t hash
= tdb
->hash_fn(&key
);
224 return tdb_exists_hash(tdb
, key
, hash
);
227 /* actually delete an entry in the database given the offset */
228 int tdb_do_delete(struct tdb_context
*tdb
, tdb_off_t rec_ptr
, struct list_struct
*rec
)
230 tdb_off_t last_ptr
, i
;
231 struct list_struct lastrec
;
233 if (tdb
->read_only
|| tdb
->traverse_read
) return -1;
235 if (tdb_write_lock_record(tdb
, rec_ptr
) == -1) {
236 /* Someone traversing here: mark it as dead */
237 rec
->magic
= TDB_DEAD_MAGIC
;
238 return tdb_rec_write(tdb
, rec_ptr
, rec
);
240 if (tdb_write_unlock_record(tdb
, rec_ptr
) != 0)
243 /* find previous record in hash chain */
244 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(rec
->full_hash
), &i
) == -1)
246 for (last_ptr
= 0; i
!= rec_ptr
; last_ptr
= i
, i
= lastrec
.next
)
247 if (tdb_rec_read(tdb
, i
, &lastrec
) == -1)
250 /* unlink it: next ptr is at start of record. */
252 last_ptr
= TDB_HASH_TOP(rec
->full_hash
);
253 if (tdb_ofs_write(tdb
, last_ptr
, &rec
->next
) == -1)
256 /* recover the space */
257 if (tdb_free(tdb
, rec_ptr
, rec
) == -1)
262 static int tdb_count_dead(struct tdb_context
*tdb
, uint32_t hash
)
266 struct list_struct rec
;
268 /* read in the hash top */
269 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(hash
), &rec_ptr
) == -1)
273 if (tdb_rec_read(tdb
, rec_ptr
, &rec
) == -1)
276 if (rec
.magic
== TDB_DEAD_MAGIC
) {
285 * Purge all DEAD records from a hash chain
287 static int tdb_purge_dead(struct tdb_context
*tdb
, uint32_t hash
)
290 struct list_struct rec
;
293 if (tdb_lock(tdb
, -1, F_WRLCK
) == -1) {
297 /* read in the hash top */
298 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(hash
), &rec_ptr
) == -1)
304 if (tdb_rec_read(tdb
, rec_ptr
, &rec
) == -1) {
310 if (rec
.magic
== TDB_DEAD_MAGIC
311 && tdb_do_delete(tdb
, rec_ptr
, &rec
) == -1) {
318 tdb_unlock(tdb
, -1, F_WRLCK
);
322 /* delete an entry in the database given a key */
323 static int tdb_delete_hash(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
)
326 struct list_struct rec
;
329 if (tdb
->max_dead_records
!= 0) {
332 * Allow for some dead records per hash chain, mainly for
333 * tdb's with a very high create/delete rate like locking.tdb.
336 if (tdb_lock(tdb
, BUCKET(hash
), F_WRLCK
) == -1)
339 if (tdb_count_dead(tdb
, hash
) >= tdb
->max_dead_records
) {
341 * Don't let the per-chain freelist grow too large,
342 * delete all existing dead records
344 tdb_purge_dead(tdb
, hash
);
347 if (!(rec_ptr
= tdb_find(tdb
, key
, hash
, &rec
))) {
348 tdb_unlock(tdb
, BUCKET(hash
), F_WRLCK
);
353 * Just mark the record as dead.
355 rec
.magic
= TDB_DEAD_MAGIC
;
356 ret
= tdb_rec_write(tdb
, rec_ptr
, &rec
);
359 if (!(rec_ptr
= tdb_find_lock_hash(tdb
, key
, hash
, F_WRLCK
,
363 ret
= tdb_do_delete(tdb
, rec_ptr
, &rec
);
367 tdb_increment_seqnum(tdb
);
370 if (tdb_unlock(tdb
, BUCKET(rec
.full_hash
), F_WRLCK
) != 0)
371 TDB_LOG((tdb
, TDB_DEBUG_WARNING
, "tdb_delete: WARNING tdb_unlock failed!\n"));
375 int tdb_delete(struct tdb_context
*tdb
, TDB_DATA key
)
377 uint32_t hash
= tdb
->hash_fn(&key
);
378 return tdb_delete_hash(tdb
, key
, hash
);
382 * See if we have a dead record around with enough space
384 static tdb_off_t
tdb_find_dead(struct tdb_context
*tdb
, uint32_t hash
,
385 struct list_struct
*r
, tdb_len_t length
)
389 /* read in the hash top */
390 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(hash
), &rec_ptr
) == -1)
393 /* keep looking until we find the right record */
395 if (tdb_rec_read(tdb
, rec_ptr
, r
) == -1)
398 if (TDB_DEAD(r
) && r
->rec_len
>= length
) {
400 * First fit for simple coding, TODO: change to best
410 /* store an element in the database, replacing any existing element
413 return 0 on success, -1 on failure
415 int tdb_store(struct tdb_context
*tdb
, TDB_DATA key
, TDB_DATA dbuf
, int flag
)
417 struct list_struct rec
;
423 if (tdb
->read_only
|| tdb
->traverse_read
) {
424 tdb
->ecode
= TDB_ERR_RDONLY
;
428 /* find which hash bucket it is in */
429 hash
= tdb
->hash_fn(&key
);
430 if (tdb_lock(tdb
, BUCKET(hash
), F_WRLCK
) == -1)
433 /* check for it existing, on insert. */
434 if (flag
== TDB_INSERT
) {
435 if (tdb_exists_hash(tdb
, key
, hash
)) {
436 tdb
->ecode
= TDB_ERR_EXISTS
;
440 /* first try in-place update, on modify or replace. */
441 if (tdb_update_hash(tdb
, key
, hash
, dbuf
) == 0) {
444 if (tdb
->ecode
== TDB_ERR_NOEXIST
&&
445 flag
== TDB_MODIFY
) {
446 /* if the record doesn't exist and we are in TDB_MODIFY mode then
447 we should fail the store */
451 /* reset the error code potentially set by the tdb_update() */
452 tdb
->ecode
= TDB_SUCCESS
;
454 /* delete any existing record - if it doesn't exist we don't
455 care. Doing this first reduces fragmentation, and avoids
456 coalescing with `allocated' block before it's updated. */
457 if (flag
!= TDB_INSERT
)
458 tdb_delete_hash(tdb
, key
, hash
);
460 /* Copy key+value *before* allocating free space in case malloc
461 fails and we are left with a dead spot in the tdb. */
463 if (!(p
= (char *)malloc(key
.dsize
+ dbuf
.dsize
))) {
464 tdb
->ecode
= TDB_ERR_OOM
;
468 memcpy(p
, key
.dptr
, key
.dsize
);
470 memcpy(p
+key
.dsize
, dbuf
.dptr
, dbuf
.dsize
);
472 if (tdb
->max_dead_records
!= 0) {
474 * Allow for some dead records per hash chain, look if we can
475 * find one that can hold the new record. We need enough space
476 * for key, data and tailer. If we find one, we don't have to
477 * consult the central freelist.
479 rec_ptr
= tdb_find_dead(
481 key
.dsize
+ dbuf
.dsize
+ sizeof(tdb_off_t
));
484 rec
.key_len
= key
.dsize
;
485 rec
.data_len
= dbuf
.dsize
;
486 rec
.full_hash
= hash
;
487 rec
.magic
= TDB_MAGIC
;
488 if (tdb_rec_write(tdb
, rec_ptr
, &rec
) == -1
489 || tdb
->methods
->tdb_write(
490 tdb
, rec_ptr
+ sizeof(rec
),
491 p
, key
.dsize
+ dbuf
.dsize
) == -1) {
499 * We have to allocate some space from the freelist, so this means we
500 * have to lock it. Use the chance to purge all the DEAD records from
501 * the hash chain under the freelist lock.
504 if (tdb_lock(tdb
, -1, F_WRLCK
) == -1) {
508 if ((tdb
->max_dead_records
!= 0)
509 && (tdb_purge_dead(tdb
, hash
) == -1)) {
510 tdb_unlock(tdb
, -1, F_WRLCK
);
514 /* we have to allocate some space */
515 rec_ptr
= tdb_allocate(tdb
, key
.dsize
+ dbuf
.dsize
, &rec
);
517 tdb_unlock(tdb
, -1, F_WRLCK
);
523 /* Read hash top into next ptr */
524 if (tdb_ofs_read(tdb
, TDB_HASH_TOP(hash
), &rec
.next
) == -1)
527 rec
.key_len
= key
.dsize
;
528 rec
.data_len
= dbuf
.dsize
;
529 rec
.full_hash
= hash
;
530 rec
.magic
= TDB_MAGIC
;
532 /* write out and point the top of the hash chain at it */
533 if (tdb_rec_write(tdb
, rec_ptr
, &rec
) == -1
534 || tdb
->methods
->tdb_write(tdb
, rec_ptr
+sizeof(rec
), p
, key
.dsize
+dbuf
.dsize
)==-1
535 || tdb_ofs_write(tdb
, TDB_HASH_TOP(hash
), &rec_ptr
) == -1) {
536 /* Need to tdb_unallocate() here */
544 tdb_increment_seqnum(tdb
);
548 tdb_unlock(tdb
, BUCKET(hash
), F_WRLCK
);
553 /* Append to an entry. Create if not exist. */
554 int tdb_append(struct tdb_context
*tdb
, TDB_DATA key
, TDB_DATA new_dbuf
)
560 /* find which hash bucket it is in */
561 hash
= tdb
->hash_fn(&key
);
562 if (tdb_lock(tdb
, BUCKET(hash
), F_WRLCK
) == -1)
565 dbuf
= tdb_fetch(tdb
, key
);
567 if (dbuf
.dptr
== NULL
) {
568 dbuf
.dptr
= (unsigned char *)malloc(new_dbuf
.dsize
);
570 unsigned char *new_dptr
= (unsigned char *)realloc(dbuf
.dptr
,
571 dbuf
.dsize
+ new_dbuf
.dsize
);
572 if (new_dptr
== NULL
) {
575 dbuf
.dptr
= new_dptr
;
578 if (dbuf
.dptr
== NULL
) {
579 tdb
->ecode
= TDB_ERR_OOM
;
583 memcpy(dbuf
.dptr
+ dbuf
.dsize
, new_dbuf
.dptr
, new_dbuf
.dsize
);
584 dbuf
.dsize
+= new_dbuf
.dsize
;
586 ret
= tdb_store(tdb
, key
, dbuf
, 0);
589 tdb_unlock(tdb
, BUCKET(hash
), F_WRLCK
);
590 SAFE_FREE(dbuf
.dptr
);
596 return the name of the current tdb file
597 useful for external logging functions
599 const char *tdb_name(struct tdb_context
*tdb
)
605 return the underlying file descriptor being used by tdb, or -1
606 useful for external routines that want to check the device/inode
609 int tdb_fd(struct tdb_context
*tdb
)
615 return the current logging function
616 useful for external tdb routines that wish to log tdb errors
618 tdb_log_func
tdb_log_fn(struct tdb_context
*tdb
)
620 return tdb
->log
.log_fn
;
625 get the tdb sequence number. Only makes sense if the writers opened
626 with TDB_SEQNUM set. Note that this sequence number will wrap quite
627 quickly, so it should only be used for a 'has something changed'
628 test, not for code that relies on the count of the number of changes
629 made. If you want a counter then use a tdb record.
631 The aim of this sequence number is to allow for a very lightweight
632 test of a possible tdb change.
634 int tdb_get_seqnum(struct tdb_context
*tdb
)
638 tdb_ofs_read(tdb
, TDB_SEQNUM_OFS
, &seqnum
);
642 int tdb_hash_size(struct tdb_context
*tdb
)
644 return tdb
->header
.hash_size
;
647 size_t tdb_map_size(struct tdb_context
*tdb
)
649 return tdb
->map_size
;
652 int tdb_get_flags(struct tdb_context
*tdb
)