tdb2: unify tdb1_fetch into tdb_fetch
[Samba/gebeck_regimport.git] / lib / tdb2 / tdb1_tdb.c
blobbba16338e97ec937dd66d1b41e1ac0e02eb96003
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb1_private.h"
29 #include <assert.h>
31 TDB_DATA tdb1_null;
34 non-blocking increment of the tdb sequence number if the tdb has been opened using
35 the TDB_SEQNUM flag
37 void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
39 tdb1_off_t seqnum=0;
41 if (!(tdb->flags & TDB_SEQNUM)) {
42 return;
45 /* we ignore errors from this, as we have no sane way of
46 dealing with them.
48 tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
49 seqnum++;
50 tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
54 increment the tdb sequence number if the tdb has been opened using
55 the TDB_SEQNUM flag
57 static void tdb1_increment_seqnum(struct tdb_context *tdb)
59 if (!(tdb->flags & TDB_SEQNUM)) {
60 return;
63 if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
64 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
65 return;
68 tdb1_increment_seqnum_nonblock(tdb);
70 tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
73 static int tdb1_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
75 return memcmp(data.dptr, key.dptr, data.dsize);
78 /* Returns 0 on fail. On success, return offset of record, and fills
79 in rec */
80 static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
81 struct tdb1_record *r)
83 tdb1_off_t rec_ptr;
85 /* read in the hash top */
86 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
87 return 0;
89 /* keep looking until we find the right record */
90 while (rec_ptr) {
91 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
92 return 0;
94 if (!TDB1_DEAD(r) && hash==r->full_hash
95 && key.dsize==r->key_len
96 && tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
97 r->key_len, tdb1_key_compare,
98 NULL) == 0) {
99 return rec_ptr;
101 /* detect tight infinite loop */
102 if (rec_ptr == r->next) {
103 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
104 TDB_LOG_ERROR,
105 "tdb1_find: loop detected.");
106 return 0;
108 rec_ptr = r->next;
110 tdb->last_error = TDB_ERR_NOEXIST;
111 return 0;
114 /* As tdb1_find, but if you succeed, keep the lock */
115 tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
116 struct tdb1_record *rec)
118 uint32_t rec_ptr;
120 if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
121 return 0;
122 if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
123 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
124 return rec_ptr;
127 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
129 /* update an entry in place - this only works if the new data size
130 is <= the old data size and the key exists.
131 on failure return -1.
133 static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
135 struct tdb1_record rec;
136 tdb1_off_t rec_ptr;
138 /* find entry */
139 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
140 return -1;
142 /* it could be an exact duplicate of what is there - this is
143 * surprisingly common (eg. with a ldb re-index). */
144 if (rec.key_len == key.dsize &&
145 rec.data_len == dbuf.dsize &&
146 rec.full_hash == hash) {
147 TDB_DATA data = _tdb1_fetch(tdb, key);
148 if (data.dsize == dbuf.dsize &&
149 memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
150 if (data.dptr) {
151 free(data.dptr);
153 return 0;
155 if (data.dptr) {
156 free(data.dptr);
160 /* must be long enough key, data and tailer */
161 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
162 tdb->last_error = TDB_SUCCESS; /* Not really an error */
163 return -1;
166 if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
167 dbuf.dptr, dbuf.dsize) == -1)
168 return -1;
170 if (dbuf.dsize != rec.data_len) {
171 /* update size */
172 rec.data_len = dbuf.dsize;
173 return tdb1_rec_write(tdb, rec_ptr, &rec);
176 return 0;
179 /* find an entry in the database given a key */
180 /* If an entry doesn't exist tdb1_err will be set to
181 * TDB_ERR_NOEXIST. If a key has no data attached
182 * then the TDB_DATA will have zero length but
183 * a non-zero pointer
185 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
187 tdb1_off_t rec_ptr;
188 struct tdb1_record rec;
189 TDB_DATA ret;
190 uint32_t hash;
192 /* find which hash bucket it is in */
193 hash = tdb_hash(tdb, key.dptr, key.dsize);
194 if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
195 return tdb1_null;
197 ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
198 rec.data_len);
199 ret.dsize = rec.data_len;
200 tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
201 return ret;
204 enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key, TDB_DATA *data)
206 *data = _tdb1_fetch(tdb, key);
207 if (data->dptr == NULL)
208 return tdb->last_error;
209 return TDB_SUCCESS;
213 * Find an entry in the database and hand the record's data to a parsing
214 * function. The parsing function is executed under the chain read lock, so it
215 * should be fast and should not block on other syscalls.
217 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
219 * For mmapped tdb's that do not have a transaction open it points the parsing
220 * function directly at the mmap area, it avoids the malloc/memcpy in this
221 * case. If a transaction is open or no mmap is available, it has to do
222 * malloc/read/parse/free.
224 * This is interesting for all readers of potentially large data structures in
225 * the tdb records, ldb indexes being one example.
227 * Return -1 if the record was not found.
230 int tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
231 int (*parser)(TDB_DATA key, TDB_DATA data,
232 void *private_data),
233 void *private_data)
235 tdb1_off_t rec_ptr;
236 struct tdb1_record rec;
237 int ret;
238 uint32_t hash;
240 /* find which hash bucket it is in */
241 hash = tdb_hash(tdb, key.dptr, key.dsize);
243 if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
244 /* record not found */
245 tdb->last_error = TDB_ERR_NOEXIST;
246 return -1;
249 ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
250 rec.data_len, parser, private_data);
252 tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
254 return ret;
257 /* check if an entry in the database exists
259 note that 1 is returned if the key is found and 0 is returned if not found
260 this doesn't match the conventions in the rest of this module, but is
261 compatible with gdbm
263 static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
265 struct tdb1_record rec;
267 if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
268 return 0;
269 tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
270 return 1;
273 int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
275 uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
276 int ret;
278 ret = tdb1_exists_hash(tdb, key, hash);
279 return ret;
282 /* actually delete an entry in the database given the offset */
283 int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
285 tdb1_off_t last_ptr, i;
286 struct tdb1_record lastrec;
288 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
290 if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
291 tdb1_write_lock_record(tdb, rec_ptr) == -1) {
292 /* Someone traversing here: mark it as dead */
293 rec->magic = TDB1_DEAD_MAGIC;
294 return tdb1_rec_write(tdb, rec_ptr, rec);
296 if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
297 return -1;
299 /* find previous record in hash chain */
300 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
301 return -1;
302 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
303 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
304 return -1;
306 /* unlink it: next ptr is at start of record. */
307 if (last_ptr == 0)
308 last_ptr = TDB1_HASH_TOP(rec->full_hash);
309 if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
310 return -1;
312 /* recover the space */
313 if (tdb1_free(tdb, rec_ptr, rec) == -1)
314 return -1;
315 return 0;
318 static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
320 int res = 0;
321 tdb1_off_t rec_ptr;
322 struct tdb1_record rec;
324 /* read in the hash top */
325 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
326 return 0;
328 while (rec_ptr) {
329 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
330 return 0;
332 if (rec.magic == TDB1_DEAD_MAGIC) {
333 res += 1;
335 rec_ptr = rec.next;
337 return res;
341 * Purge all DEAD records from a hash chain
343 static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
345 int res = -1;
346 struct tdb1_record rec;
347 tdb1_off_t rec_ptr;
349 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
350 return -1;
353 /* read in the hash top */
354 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
355 goto fail;
357 while (rec_ptr) {
358 tdb1_off_t next;
360 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
361 goto fail;
364 next = rec.next;
366 if (rec.magic == TDB1_DEAD_MAGIC
367 && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
368 goto fail;
370 rec_ptr = next;
372 res = 0;
373 fail:
374 tdb1_unlock(tdb, -1, F_WRLCK);
375 return res;
378 /* delete an entry in the database given a key */
379 static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
381 tdb1_off_t rec_ptr;
382 struct tdb1_record rec;
383 int ret;
385 if (tdb->tdb1.max_dead_records != 0) {
388 * Allow for some dead records per hash chain, mainly for
389 * tdb's with a very high create/delete rate like locking.tdb.
392 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
393 return -1;
395 if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
397 * Don't let the per-chain freelist grow too large,
398 * delete all existing dead records
400 tdb1_purge_dead(tdb, hash);
403 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
404 tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
405 return -1;
409 * Just mark the record as dead.
411 rec.magic = TDB1_DEAD_MAGIC;
412 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
414 else {
415 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
416 &rec)))
417 return -1;
419 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
422 if (ret == 0) {
423 tdb1_increment_seqnum(tdb);
426 if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
427 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
428 "tdb1_delete: WARNING tdb1_unlock failed!");
429 return ret;
432 int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
434 uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
435 int ret;
437 ret = tdb1_delete_hash(tdb, key, hash);
438 return ret;
442 * See if we have a dead record around with enough space
444 static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
445 struct tdb1_record *r, tdb1_len_t length)
447 tdb1_off_t rec_ptr;
449 /* read in the hash top */
450 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
451 return 0;
453 /* keep looking until we find the right record */
454 while (rec_ptr) {
455 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
456 return 0;
458 if (TDB1_DEAD(r) && r->rec_len >= length) {
460 * First fit for simple coding, TODO: change to best
461 * fit
463 return rec_ptr;
465 rec_ptr = r->next;
467 return 0;
470 static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
471 TDB_DATA dbuf, int flag, uint32_t hash)
473 struct tdb1_record rec;
474 tdb1_off_t rec_ptr;
475 char *p = NULL;
476 int ret = -1;
478 /* check for it existing, on insert. */
479 if (flag == TDB_INSERT) {
480 if (tdb1_exists_hash(tdb, key, hash)) {
481 tdb->last_error = TDB_ERR_EXISTS;
482 goto fail;
484 } else {
485 /* first try in-place update, on modify or replace. */
486 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
487 goto done;
489 if (tdb->last_error == TDB_ERR_NOEXIST &&
490 flag == TDB_MODIFY) {
491 /* if the record doesn't exist and we are in TDB1_MODIFY mode then
492 we should fail the store */
493 goto fail;
496 /* reset the error code potentially set by the tdb1_update() */
497 tdb->last_error = TDB_SUCCESS;
499 /* delete any existing record - if it doesn't exist we don't
500 care. Doing this first reduces fragmentation, and avoids
501 coalescing with `allocated' block before it's updated. */
502 if (flag != TDB_INSERT)
503 tdb1_delete_hash(tdb, key, hash);
505 /* Copy key+value *before* allocating free space in case malloc
506 fails and we are left with a dead spot in the tdb. */
508 if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
509 tdb->last_error = TDB_ERR_OOM;
510 goto fail;
513 memcpy(p, key.dptr, key.dsize);
514 if (dbuf.dsize)
515 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
517 if (tdb->tdb1.max_dead_records != 0) {
519 * Allow for some dead records per hash chain, look if we can
520 * find one that can hold the new record. We need enough space
521 * for key, data and tailer. If we find one, we don't have to
522 * consult the central freelist.
524 rec_ptr = tdb1_find_dead(
525 tdb, hash, &rec,
526 key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
528 if (rec_ptr != 0) {
529 rec.key_len = key.dsize;
530 rec.data_len = dbuf.dsize;
531 rec.full_hash = hash;
532 rec.magic = TDB1_MAGIC;
533 if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
534 || tdb->tdb1.io->tdb1_write(
535 tdb, rec_ptr + sizeof(rec),
536 p, key.dsize + dbuf.dsize) == -1) {
537 goto fail;
539 goto done;
544 * We have to allocate some space from the freelist, so this means we
545 * have to lock it. Use the chance to purge all the DEAD records from
546 * the hash chain under the freelist lock.
549 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
550 goto fail;
553 if ((tdb->tdb1.max_dead_records != 0)
554 && (tdb1_purge_dead(tdb, hash) == -1)) {
555 tdb1_unlock(tdb, -1, F_WRLCK);
556 goto fail;
559 /* we have to allocate some space */
560 rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
562 tdb1_unlock(tdb, -1, F_WRLCK);
564 if (rec_ptr == 0) {
565 goto fail;
568 /* Read hash top into next ptr */
569 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
570 goto fail;
572 rec.key_len = key.dsize;
573 rec.data_len = dbuf.dsize;
574 rec.full_hash = hash;
575 rec.magic = TDB1_MAGIC;
577 /* write out and point the top of the hash chain at it */
578 if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
579 || tdb->tdb1.io->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
580 || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
581 /* Need to tdb1_unallocate() here */
582 goto fail;
585 done:
586 ret = 0;
587 fail:
588 if (ret == 0) {
589 tdb1_increment_seqnum(tdb);
592 SAFE_FREE(p);
593 return ret;
596 /* store an element in the database, replacing any existing element
597 with the same key
599 return 0 on success, -1 on failure
601 int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
603 uint32_t hash;
604 int ret;
606 assert(tdb->flags & TDB_VERSION1);
608 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
609 tdb->last_error = TDB_ERR_RDONLY;
610 return -1;
613 /* find which hash bucket it is in */
614 hash = tdb_hash(tdb, key.dptr, key.dsize);
615 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
616 return -1;
618 ret = _tdb1_store(tdb, key, dbuf, flag, hash);
619 tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
620 return ret;
623 /* Append to an entry. Create if not exist. */
624 int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
626 uint32_t hash;
627 TDB_DATA dbuf;
628 int ret = -1;
630 /* find which hash bucket it is in */
631 hash = tdb_hash(tdb, key.dptr, key.dsize);
632 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
633 return -1;
635 dbuf = _tdb1_fetch(tdb, key);
637 if (dbuf.dptr == NULL) {
638 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
639 } else {
640 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
641 unsigned char *new_dptr;
643 /* realloc '0' is special: don't do that. */
644 if (new_len == 0)
645 new_len = 1;
646 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
647 if (new_dptr == NULL) {
648 free(dbuf.dptr);
650 dbuf.dptr = new_dptr;
653 if (dbuf.dptr == NULL) {
654 tdb->last_error = TDB_ERR_OOM;
655 goto failed;
658 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
659 dbuf.dsize += new_dbuf.dsize;
661 ret = _tdb1_store(tdb, key, dbuf, 0, hash);
663 failed:
664 tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
665 SAFE_FREE(dbuf.dptr);
666 return ret;
671 get the tdb sequence number. Only makes sense if the writers opened
672 with TDB1_SEQNUM set. Note that this sequence number will wrap quite
673 quickly, so it should only be used for a 'has something changed'
674 test, not for code that relies on the count of the number of changes
675 made. If you want a counter then use a tdb record.
677 The aim of this sequence number is to allow for a very lightweight
678 test of a possible tdb change.
680 int tdb1_get_seqnum(struct tdb_context *tdb)
682 tdb1_off_t seqnum=0;
684 tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
685 return seqnum;
690 add a region of the file to the freelist. Length is the size of the region in bytes,
691 which includes the free list header that needs to be added
693 static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
695 struct tdb1_record rec;
696 if (length <= sizeof(rec)) {
697 /* the region is not worth adding */
698 return 0;
700 if (length + offset > tdb->file->map_size) {
701 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
702 "tdb1_free_region: adding region beyond"
703 " end of file");
704 return -1;
706 memset(&rec,'\0',sizeof(rec));
707 rec.rec_len = length - sizeof(rec);
708 if (tdb1_free(tdb, offset, &rec) == -1) {
709 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
710 "tdb1_free_region: failed to add free record");
711 return -1;
713 return 0;
717 wipe the entire database, deleting all records. This can be done
718 very fast by using a allrecord lock. The entire data portion of the
719 file becomes a single entry in the freelist.
721 This code carefully steps around the recovery area, leaving it alone
723 int tdb1_wipe_all(struct tdb_context *tdb)
725 int i;
726 tdb1_off_t offset = 0;
727 ssize_t data_len;
728 tdb1_off_t recovery_head;
729 tdb1_len_t recovery_size = 0;
731 if (tdb1_lockall(tdb) != 0) {
732 return -1;
736 /* see if the tdb has a recovery area, and remember its size
737 if so. We don't want to lose this as otherwise each
738 tdb1_wipe_all() in a transaction will increase the size of
739 the tdb by the size of the recovery area */
740 if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
741 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
742 "tdb1_wipe_all: failed to read recovery head");
743 goto failed;
746 if (recovery_head != 0) {
747 struct tdb1_record rec;
748 if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
749 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
750 "tdb1_wipe_all: failed to read recovery record");
751 return -1;
753 recovery_size = rec.rec_len + sizeof(rec);
756 /* wipe the hashes */
757 for (i=0;i<tdb->tdb1.header.hash_size;i++) {
758 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
759 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
760 "tdb1_wipe_all: failed to write hash %d", i);
761 goto failed;
765 /* wipe the freelist */
766 if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
767 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
768 "tdb1_wipe_all: failed to write freelist");
769 goto failed;
772 /* add all the rest of the file to the freelist, possibly leaving a gap
773 for the recovery area */
774 if (recovery_size == 0) {
775 /* the simple case - the whole file can be used as a freelist */
776 data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
777 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
778 goto failed;
780 } else {
781 /* we need to add two freelist entries - one on either
782 side of the recovery area
784 Note that we cannot shift the recovery area during
785 this operation. Only the transaction.c code may
786 move the recovery area or we risk subtle data
787 corruption
789 data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
790 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
791 goto failed;
793 /* and the 2nd free list entry after the recovery area - if any */
794 data_len = tdb->file->map_size - (recovery_head+recovery_size);
795 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
796 goto failed;
800 if (tdb1_unlockall(tdb) != 0) {
801 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
802 "tdb1_wipe_all: failed to unlock");
803 goto failed;
806 return 0;
808 failed:
809 tdb1_unlockall(tdb);
810 return -1;
813 struct traverse_state {
814 enum TDB_ERROR error;
815 struct tdb_context *dest_db;
819 traverse function for repacking
821 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
823 struct traverse_state *state = (struct traverse_state *)private_data;
824 if (tdb1_store(state->dest_db, key, data, TDB_INSERT) != 0) {
825 state->error = state->dest_db->last_error;
826 return -1;
828 return 0;
832 repack a tdb
834 int tdb1_repack(struct tdb_context *tdb)
836 struct tdb_context *tmp_db;
837 struct traverse_state state;
838 union tdb_attribute hsize;
840 hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
841 hsize.base.next = NULL;
842 hsize.tdb1_hashsize.hsize = tdb->tdb1.header.hash_size;
844 if (tdb1_transaction_start(tdb) != 0) {
845 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
846 __location__ " Failed to start transaction");
847 return -1;
850 tmp_db = tdb_open("tmpdb", TDB_INTERNAL, O_RDWR|O_CREAT, 0, &hsize);
851 if (tmp_db == NULL) {
852 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
853 __location__ " Failed to create tmp_db");
854 tdb1_transaction_cancel(tdb);
855 return -1;
858 state.error = TDB_SUCCESS;
859 state.dest_db = tmp_db;
861 if (tdb1_traverse_read(tdb, repack_traverse, &state) == -1) {
862 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
863 __location__ " Failed to traverse copying out");
864 tdb1_transaction_cancel(tdb);
865 tdb_close(tmp_db);
866 return -1;
869 if (state.error != TDB_SUCCESS) {
870 tdb->last_error = tdb_logerr(tdb, state.error, TDB_LOG_ERROR,
871 __location__ " Error during traversal");
872 tdb1_transaction_cancel(tdb);
873 tdb_close(tmp_db);
874 return -1;
877 if (tdb1_wipe_all(tdb) != 0) {
878 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
879 __location__ " Failed to wipe database\n");
880 tdb1_transaction_cancel(tdb);
881 tdb_close(tmp_db);
882 return -1;
885 state.error = TDB_SUCCESS;
886 state.dest_db = tdb;
888 if (tdb1_traverse_read(tmp_db, repack_traverse, &state) == -1) {
889 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
890 __location__ " Failed to traverse copying back");
891 tdb1_transaction_cancel(tdb);
892 tdb_close(tmp_db);
893 return -1;
896 if (state.error) {
897 tdb->last_error = tdb_logerr(tdb, state.error, TDB_LOG_ERROR,
898 __location__ " Error during second traversal");
899 tdb1_transaction_cancel(tdb);
900 tdb_close(tmp_db);
901 return -1;
904 tdb_close(tmp_db);
906 if (tdb1_transaction_commit(tdb) != 0) {
907 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
908 __location__ " Failed to commit");
909 return -1;
912 return 0;
915 /* Even on files, we can get partial writes due to signals. */
916 bool tdb1_write_all(int fd, const void *buf, size_t count)
918 while (count) {
919 ssize_t ret;
920 ret = write(fd, buf, count);
921 if (ret < 0)
922 return false;
923 buf = (const char *)buf + ret;
924 count -= ret;
926 return true;