r25068: Older samba3 DCs will return DCERPC_FAULT_OP_RNG_ERROR for every opcode on the
[Samba.git] / source / lib / tdb / common / tdb.c
blob92811bea49ff81685449a7193ebf8deb1a74bb9d
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include "tdb_private.h"
31 TDB_DATA tdb_null;
34 increment the tdb sequence number if the tdb has been opened using
35 the TDB_SEQNUM flag
37 static void tdb_increment_seqnum(struct tdb_context *tdb)
39 tdb_off_t seqnum=0;
41 if (!(tdb->flags & TDB_SEQNUM)) {
42 return;
45 if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
46 return;
49 /* we ignore errors from this, as we have no sane way of
50 dealing with them.
52 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
53 seqnum++;
54 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
56 tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
59 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
61 return memcmp(data.dptr, key.dptr, data.dsize);
64 /* Returns 0 on fail. On success, return offset of record, and fills
65 in rec */
66 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
67 struct list_struct *r)
69 tdb_off_t rec_ptr;
71 /* read in the hash top */
72 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
73 return 0;
75 /* keep looking until we find the right record */
76 while (rec_ptr) {
77 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
78 return 0;
80 if (!TDB_DEAD(r) && hash==r->full_hash
81 && key.dsize==r->key_len
82 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
83 r->key_len, tdb_key_compare,
84 NULL) == 0) {
85 return rec_ptr;
87 rec_ptr = r->next;
89 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
92 /* As tdb_find, but if you succeed, keep the lock */
93 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key,
94 uint32_t hash, int locktype,
95 struct list_struct *rec)
97 uint32_t rec_ptr;
99 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
100 return 0;
101 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
102 tdb_unlock(tdb, BUCKET(hash), locktype);
103 return rec_ptr;
107 /* update an entry in place - this only works if the new data size
108 is <= the old data size and the key exists.
109 on failure return -1.
111 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
113 struct list_struct rec;
114 tdb_off_t rec_ptr;
116 /* find entry */
117 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
118 return -1;
120 /* must be long enough key, data and tailer */
121 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
122 tdb->ecode = TDB_SUCCESS; /* Not really an error */
123 return -1;
126 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
127 dbuf.dptr, dbuf.dsize) == -1)
128 return -1;
130 if (dbuf.dsize != rec.data_len) {
131 /* update size */
132 rec.data_len = dbuf.dsize;
133 return tdb_rec_write(tdb, rec_ptr, &rec);
136 return 0;
139 /* find an entry in the database given a key */
140 /* If an entry doesn't exist tdb_err will be set to
141 * TDB_ERR_NOEXIST. If a key has no data attached
142 * then the TDB_DATA will have zero length but
143 * a non-zero pointer
145 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
147 tdb_off_t rec_ptr;
148 struct list_struct rec;
149 TDB_DATA ret;
150 uint32_t hash;
152 /* find which hash bucket it is in */
153 hash = tdb->hash_fn(&key);
154 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
155 return tdb_null;
157 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
158 rec.data_len);
159 ret.dsize = rec.data_len;
160 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
161 return ret;
165 * Find an entry in the database and hand the record's data to a parsing
166 * function. The parsing function is executed under the chain read lock, so it
167 * should be fast and should not block on other syscalls.
169 * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
171 * For mmapped tdb's that do not have a transaction open it points the parsing
172 * function directly at the mmap area, it avoids the malloc/memcpy in this
173 * case. If a transaction is open or no mmap is available, it has to do
174 * malloc/read/parse/free.
176 * This is interesting for all readers of potentially large data structures in
177 * the tdb records, ldb indexes being one example.
180 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
181 int (*parser)(TDB_DATA key, TDB_DATA data,
182 void *private_data),
183 void *private_data)
185 tdb_off_t rec_ptr;
186 struct list_struct rec;
187 int ret;
188 uint32_t hash;
190 /* find which hash bucket it is in */
191 hash = tdb->hash_fn(&key);
193 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
194 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
197 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
198 rec.data_len, parser, private_data);
200 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
202 return ret;
205 /* check if an entry in the database exists
207 note that 1 is returned if the key is found and 0 is returned if not found
208 this doesn't match the conventions in the rest of this module, but is
209 compatible with gdbm
211 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
213 struct list_struct rec;
215 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
216 return 0;
217 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
218 return 1;
221 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
223 uint32_t hash = tdb->hash_fn(&key);
224 return tdb_exists_hash(tdb, key, hash);
227 /* actually delete an entry in the database given the offset */
228 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec)
230 tdb_off_t last_ptr, i;
231 struct list_struct lastrec;
233 if (tdb->read_only || tdb->traverse_read) return -1;
235 if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
236 /* Someone traversing here: mark it as dead */
237 rec->magic = TDB_DEAD_MAGIC;
238 return tdb_rec_write(tdb, rec_ptr, rec);
240 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
241 return -1;
243 /* find previous record in hash chain */
244 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
245 return -1;
246 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
247 if (tdb_rec_read(tdb, i, &lastrec) == -1)
248 return -1;
250 /* unlink it: next ptr is at start of record. */
251 if (last_ptr == 0)
252 last_ptr = TDB_HASH_TOP(rec->full_hash);
253 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
254 return -1;
256 /* recover the space */
257 if (tdb_free(tdb, rec_ptr, rec) == -1)
258 return -1;
259 return 0;
262 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
264 int res = 0;
265 tdb_off_t rec_ptr;
266 struct list_struct rec;
268 /* read in the hash top */
269 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
270 return 0;
272 while (rec_ptr) {
273 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
274 return 0;
276 if (rec.magic == TDB_DEAD_MAGIC) {
277 res += 1;
279 rec_ptr = rec.next;
281 return res;
285 * Purge all DEAD records from a hash chain
287 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
289 int res = -1;
290 struct list_struct rec;
291 tdb_off_t rec_ptr;
293 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
294 return -1;
297 /* read in the hash top */
298 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
299 goto fail;
301 while (rec_ptr) {
302 tdb_off_t next;
304 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
305 goto fail;
308 next = rec.next;
310 if (rec.magic == TDB_DEAD_MAGIC
311 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
312 goto fail;
314 rec_ptr = next;
316 res = 0;
317 fail:
318 tdb_unlock(tdb, -1, F_WRLCK);
319 return res;
322 /* delete an entry in the database given a key */
323 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
325 tdb_off_t rec_ptr;
326 struct list_struct rec;
327 int ret;
329 if (tdb->max_dead_records != 0) {
332 * Allow for some dead records per hash chain, mainly for
333 * tdb's with a very high create/delete rate like locking.tdb.
336 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
337 return -1;
339 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
341 * Don't let the per-chain freelist grow too large,
342 * delete all existing dead records
344 tdb_purge_dead(tdb, hash);
347 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
348 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
349 return -1;
353 * Just mark the record as dead.
355 rec.magic = TDB_DEAD_MAGIC;
356 ret = tdb_rec_write(tdb, rec_ptr, &rec);
358 else {
359 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
360 &rec)))
361 return -1;
363 ret = tdb_do_delete(tdb, rec_ptr, &rec);
366 if (ret == 0) {
367 tdb_increment_seqnum(tdb);
370 if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
371 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
372 return ret;
375 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
377 uint32_t hash = tdb->hash_fn(&key);
378 return tdb_delete_hash(tdb, key, hash);
382 * See if we have a dead record around with enough space
384 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
385 struct list_struct *r, tdb_len_t length)
387 tdb_off_t rec_ptr;
389 /* read in the hash top */
390 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
391 return 0;
393 /* keep looking until we find the right record */
394 while (rec_ptr) {
395 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
396 return 0;
398 if (TDB_DEAD(r) && r->rec_len >= length) {
400 * First fit for simple coding, TODO: change to best
401 * fit
403 return rec_ptr;
405 rec_ptr = r->next;
407 return 0;
410 /* store an element in the database, replacing any existing element
411 with the same key
413 return 0 on success, -1 on failure
415 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
417 struct list_struct rec;
418 uint32_t hash;
419 tdb_off_t rec_ptr;
420 char *p = NULL;
421 int ret = -1;
423 if (tdb->read_only || tdb->traverse_read) {
424 tdb->ecode = TDB_ERR_RDONLY;
425 return -1;
428 /* find which hash bucket it is in */
429 hash = tdb->hash_fn(&key);
430 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
431 return -1;
433 /* check for it existing, on insert. */
434 if (flag == TDB_INSERT) {
435 if (tdb_exists_hash(tdb, key, hash)) {
436 tdb->ecode = TDB_ERR_EXISTS;
437 goto fail;
439 } else {
440 /* first try in-place update, on modify or replace. */
441 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
442 goto done;
444 if (tdb->ecode == TDB_ERR_NOEXIST &&
445 flag == TDB_MODIFY) {
446 /* if the record doesn't exist and we are in TDB_MODIFY mode then
447 we should fail the store */
448 goto fail;
451 /* reset the error code potentially set by the tdb_update() */
452 tdb->ecode = TDB_SUCCESS;
454 /* delete any existing record - if it doesn't exist we don't
455 care. Doing this first reduces fragmentation, and avoids
456 coalescing with `allocated' block before it's updated. */
457 if (flag != TDB_INSERT)
458 tdb_delete_hash(tdb, key, hash);
460 /* Copy key+value *before* allocating free space in case malloc
461 fails and we are left with a dead spot in the tdb. */
463 if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
464 tdb->ecode = TDB_ERR_OOM;
465 goto fail;
468 memcpy(p, key.dptr, key.dsize);
469 if (dbuf.dsize)
470 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
472 if (tdb->max_dead_records != 0) {
474 * Allow for some dead records per hash chain, look if we can
475 * find one that can hold the new record. We need enough space
476 * for key, data and tailer. If we find one, we don't have to
477 * consult the central freelist.
479 rec_ptr = tdb_find_dead(
480 tdb, hash, &rec,
481 key.dsize + dbuf.dsize + sizeof(tdb_off_t));
483 if (rec_ptr != 0) {
484 rec.key_len = key.dsize;
485 rec.data_len = dbuf.dsize;
486 rec.full_hash = hash;
487 rec.magic = TDB_MAGIC;
488 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
489 || tdb->methods->tdb_write(
490 tdb, rec_ptr + sizeof(rec),
491 p, key.dsize + dbuf.dsize) == -1) {
492 goto fail;
494 goto done;
499 * We have to allocate some space from the freelist, so this means we
500 * have to lock it. Use the chance to purge all the DEAD records from
501 * the hash chain under the freelist lock.
504 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
505 goto fail;
508 if ((tdb->max_dead_records != 0)
509 && (tdb_purge_dead(tdb, hash) == -1)) {
510 tdb_unlock(tdb, -1, F_WRLCK);
511 goto fail;
514 /* we have to allocate some space */
515 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
517 tdb_unlock(tdb, -1, F_WRLCK);
519 if (rec_ptr == 0) {
520 goto fail;
523 /* Read hash top into next ptr */
524 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
525 goto fail;
527 rec.key_len = key.dsize;
528 rec.data_len = dbuf.dsize;
529 rec.full_hash = hash;
530 rec.magic = TDB_MAGIC;
532 /* write out and point the top of the hash chain at it */
533 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
534 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
535 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
536 /* Need to tdb_unallocate() here */
537 goto fail;
540 done:
541 ret = 0;
542 fail:
543 if (ret == 0) {
544 tdb_increment_seqnum(tdb);
547 SAFE_FREE(p);
548 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
549 return ret;
553 /* Append to an entry. Create if not exist. */
554 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
556 uint32_t hash;
557 TDB_DATA dbuf;
558 int ret = -1;
560 /* find which hash bucket it is in */
561 hash = tdb->hash_fn(&key);
562 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
563 return -1;
565 dbuf = tdb_fetch(tdb, key);
567 if (dbuf.dptr == NULL) {
568 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
569 } else {
570 unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr,
571 dbuf.dsize + new_dbuf.dsize);
572 if (new_dptr == NULL) {
573 free(dbuf.dptr);
575 dbuf.dptr = new_dptr;
578 if (dbuf.dptr == NULL) {
579 tdb->ecode = TDB_ERR_OOM;
580 goto failed;
583 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
584 dbuf.dsize += new_dbuf.dsize;
586 ret = tdb_store(tdb, key, dbuf, 0);
588 failed:
589 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
590 SAFE_FREE(dbuf.dptr);
591 return ret;
596 return the name of the current tdb file
597 useful for external logging functions
599 const char *tdb_name(struct tdb_context *tdb)
601 return tdb->name;
605 return the underlying file descriptor being used by tdb, or -1
606 useful for external routines that want to check the device/inode
607 of the fd
609 int tdb_fd(struct tdb_context *tdb)
611 return tdb->fd;
615 return the current logging function
616 useful for external tdb routines that wish to log tdb errors
618 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
620 return tdb->log.log_fn;
625 get the tdb sequence number. Only makes sense if the writers opened
626 with TDB_SEQNUM set. Note that this sequence number will wrap quite
627 quickly, so it should only be used for a 'has something changed'
628 test, not for code that relies on the count of the number of changes
629 made. If you want a counter then use a tdb record.
631 The aim of this sequence number is to allow for a very lightweight
632 test of a possible tdb change.
634 int tdb_get_seqnum(struct tdb_context *tdb)
636 tdb_off_t seqnum=0;
638 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
639 return seqnum;
642 int tdb_hash_size(struct tdb_context *tdb)
644 return tdb->header.hash_size;
647 size_t tdb_map_size(struct tdb_context *tdb)
649 return tdb->map_size;
652 int tdb_get_flags(struct tdb_context *tdb)
654 return tdb->flags;