lib/tdb2/tdb1_tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29 #include <assert.h>
  30
  31 /*
  32   non-blocking increment of the tdb sequence number if the tdb has been opened using
  33   the TDB_SEQNUM flag
  34 */
  35 void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
  36 {
  37         tdb1_off_t seqnum=0;
  38
  39         if (!(tdb->flags & TDB_SEQNUM)) {
  40                 return;
  41         }
  42
  43         /* we ignore errors from this, as we have no sane way of
  44            dealing with them.
  45         */
  46         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
  47         seqnum++;
  48         tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
  49 }
  50
  51 /*
  52   increment the tdb sequence number if the tdb has been opened using
  53   the TDB_SEQNUM flag
  54 */
  55 static void tdb1_increment_seqnum(struct tdb_context *tdb)
  56 {
  57         if (!(tdb->flags & TDB_SEQNUM)) {
  58                 return;
  59         }
  60
  61         if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
  62                            TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
  63                 return;
  64         }
  65
  66         tdb1_increment_seqnum_nonblock(tdb);
  67
  68         tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
  69 }
  70
  71 static enum TDB_ERROR tdb1_key_compare(TDB_DATA key, TDB_DATA data,
  72                                        void *matches_)
  73 {
  74         bool *matches = matches_;
  75         *matches = (memcmp(data.dptr, key.dptr, data.dsize) == 0);
  76         return TDB_SUCCESS;
  77 }
  78
  79 /* Returns 0 on fail; last_error will be TDB_ERR_NOEXIST if it simply
  80  * wasn't there, otherwise a real error.
  81  * On success, return offset of record, and fills in rec */
  82 static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  83                         struct tdb1_record *r)
  84 {
  85         tdb1_off_t rec_ptr;
  86
  87         /* read in the hash top */
  88         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
  89                 return 0;
  90
  91         /* keep looking until we find the right record */
  92         while (rec_ptr) {
  93                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
  94                         return 0;
  95
  96                 tdb->stats.compares++;
  97                 if (TDB1_DEAD(r)) {
  98                         tdb->stats.compare_wrong_bucket++;
  99                 } else if (key.dsize != r->key_len) {
 100                         tdb->stats.compare_wrong_keylen++;
 101                 } else if (hash != r->full_hash) {
 102                         tdb->stats.compare_wrong_rechash++;
 103                 } else {
 104                         enum TDB_ERROR ecode;
 105                         bool matches;
 106                         ecode = tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
 107                                                 r->key_len, tdb1_key_compare,
 108                                                 &matches);
 109
 110                         if (ecode != TDB_SUCCESS) {
 111                                 tdb->last_error = ecode;
 112                                 return 0;
 113                         }
 114
 115                         if (!matches) {
 116                                 tdb->stats.compare_wrong_keycmp++;
 117                         } else {
 118                                 return rec_ptr;
 119                         }
 120                 }
 121                 /* detect tight infinite loop */
 122                 if (rec_ptr == r->next) {
 123                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
 124                                                 TDB_LOG_ERROR,
 125                                                 "tdb1_find: loop detected.");
 126                         return 0;
 127                 }
 128                 rec_ptr = r->next;
 129         }
 130         tdb->last_error = TDB_ERR_NOEXIST;
 131         return 0;
 132 }
 133
 134 /* As tdb1_find, but if you succeed, keep the lock */
 135 tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 136                            struct tdb1_record *rec)
 137 {
 138         uint32_t rec_ptr;
 139
 140         if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
 141                 return 0;
 142         if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
 143                 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
 144         return rec_ptr;
 145 }
 146
 147 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
 148
 149 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
 150 {
 151         TDB_DATA *dbuf = (TDB_DATA *)private_data;
 152
 153         if (dbuf->dsize != data.dsize) {
 154                 return -1;
 155         }
 156         if (memcmp(dbuf->dptr, data.dptr, data.dsize) != 0) {
 157                 return -1;
 158         }
 159         return 0;
 160 }
 161
 162 /* update an entry in place - this only works if the new data size
 163    is <= the old data size and the key exists.
 164    on failure return -1.
 165 */
 166 static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 167 {
 168         struct tdb1_record rec;
 169         tdb1_off_t rec_ptr;
 170
 171         /* find entry */
 172         if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
 173                 return -1;
 174
 175         /* it could be an exact duplicate of what is there - this is
 176          * surprisingly common (eg. with a ldb re-index). */
 177         if (rec.key_len == key.dsize &&
 178             rec.data_len == dbuf.dsize &&
 179             rec.full_hash == hash &&
 180             tdb1_parse_record(tdb, key, tdb_update_hash_cmp, &dbuf) == 0) {
 181                         return 0;
 182         }
 183
 184         /* must be long enough key, data and tailer */
 185         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
 186                 tdb->last_error = TDB_SUCCESS; /* Not really an error */
 187                 return -1;
 188         }
 189
 190         if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 191                       dbuf.dptr, dbuf.dsize) == -1)
 192                 return -1;
 193
 194         if (dbuf.dsize != rec.data_len) {
 195                 /* update size */
 196                 rec.data_len = dbuf.dsize;
 197                 return tdb1_rec_write(tdb, rec_ptr, &rec);
 198         }
 199
 200         return 0;
 201 }
 202
 203 /* find an entry in the database given a key */
 204 /* If an entry doesn't exist tdb1_err will be set to
 205  * TDB_ERR_NOEXIST. If a key has no data attached
 206  * then the TDB_DATA will have zero length but
 207  * a non-zero pointer
 208  */
 209 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
 210 {
 211         tdb1_off_t rec_ptr;
 212         struct tdb1_record rec;
 213         TDB_DATA ret;
 214         uint32_t hash;
 215
 216         /* find which hash bucket it is in */
 217         hash = tdb_hash(tdb, key.dptr, key.dsize);
 218         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 219                 ret.dptr = NULL;
 220                 ret.dsize = 0;
 221                 return ret;
 222         }
 223
 224         ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 225                                   rec.data_len);
 226         ret.dsize = rec.data_len;
 227         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 228         return ret;
 229 }
 230
 231 enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key, TDB_DATA *data)
 232 {
 233         *data = _tdb1_fetch(tdb, key);
 234         if (data->dptr == NULL)
 235                 return tdb->last_error;
 236         return TDB_SUCCESS;
 237 }
 238
 239 enum TDB_ERROR tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
 240                                  enum TDB_ERROR (*parser)(TDB_DATA key,
 241                                                           TDB_DATA data,
 242                                                           void *private_data),
 243                                  void *private_data)
 244 {
 245         tdb1_off_t rec_ptr;
 246         struct tdb1_record rec;
 247         enum TDB_ERROR ret;
 248         uint32_t hash;
 249
 250         /* find which hash bucket it is in */
 251         hash = tdb_hash(tdb, key.dptr, key.dsize);
 252
 253         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 254                 return tdb->last_error;
 255         }
 256
 257         ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 258                              rec.data_len, parser, private_data);
 259
 260         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 261
 262         return ret;
 263 }
 264
 265 /* check if an entry in the database exists
 266
 267    note that 1 is returned if the key is found and 0 is returned if not found
 268    this doesn't match the conventions in the rest of this module, but is
 269    compatible with gdbm
 270 */
 271 static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 272 {
 273         struct tdb1_record rec;
 274
 275         if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 276                 return 0;
 277         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 278         return 1;
 279 }
 280
 281 int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
 282 {
 283         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 284         int ret;
 285
 286         assert(tdb->flags & TDB_VERSION1);
 287         ret = tdb1_exists_hash(tdb, key, hash);
 288         return ret;
 289 }
 290
 291 /* actually delete an entry in the database given the offset */
 292 int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
 293 {
 294         tdb1_off_t last_ptr, i;
 295         struct tdb1_record lastrec;
 296
 297         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
 298
 299         if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
 300             tdb1_write_lock_record(tdb, rec_ptr) == -1) {
 301                 /* Someone traversing here: mark it as dead */
 302                 rec->magic = TDB1_DEAD_MAGIC;
 303                 return tdb1_rec_write(tdb, rec_ptr, rec);
 304         }
 305         if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
 306                 return -1;
 307
 308         /* find previous record in hash chain */
 309         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
 310                 return -1;
 311         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 312                 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
 313                         return -1;
 314
 315         /* unlink it: next ptr is at start of record. */
 316         if (last_ptr == 0)
 317                 last_ptr = TDB1_HASH_TOP(rec->full_hash);
 318         if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
 319                 return -1;
 320
 321         /* recover the space */
 322         if (tdb1_free(tdb, rec_ptr, rec) == -1)
 323                 return -1;
 324         return 0;
 325 }
 326
 327 static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
 328 {
 329         int res = 0;
 330         tdb1_off_t rec_ptr;
 331         struct tdb1_record rec;
 332
 333         /* read in the hash top */
 334         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 335                 return 0;
 336
 337         while (rec_ptr) {
 338                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
 339                         return 0;
 340
 341                 if (rec.magic == TDB1_DEAD_MAGIC) {
 342                         res += 1;
 343                 }
 344                 rec_ptr = rec.next;
 345         }
 346         return res;
 347 }
 348
 349 /*
 350  * Purge all DEAD records from a hash chain
 351  */
 352 static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
 353 {
 354         int res = -1;
 355         struct tdb1_record rec;
 356         tdb1_off_t rec_ptr;
 357
 358         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 359                 return -1;
 360         }
 361
 362         /* read in the hash top */
 363         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 364                 goto fail;
 365
 366         while (rec_ptr) {
 367                 tdb1_off_t next;
 368
 369                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
 370                         goto fail;
 371                 }
 372
 373                 next = rec.next;
 374
 375                 if (rec.magic == TDB1_DEAD_MAGIC
 376                     && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
 377                         goto fail;
 378                 }
 379                 rec_ptr = next;
 380         }
 381         res = 0;
 382  fail:
 383         tdb1_unlock(tdb, -1, F_WRLCK);
 384         return res;
 385 }
 386
 387 /* delete an entry in the database given a key */
 388 static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 389 {
 390         tdb1_off_t rec_ptr;
 391         struct tdb1_record rec;
 392         int ret;
 393
 394         if (tdb->tdb1.max_dead_records != 0) {
 395
 396                 /*
 397                  * Allow for some dead records per hash chain, mainly for
 398                  * tdb's with a very high create/delete rate like locking.tdb.
 399                  */
 400
 401                 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 402                         return -1;
 403
 404                 if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
 405                         /*
 406                          * Don't let the per-chain freelist grow too large,
 407                          * delete all existing dead records
 408                          */
 409                         tdb1_purge_dead(tdb, hash);
 410                 }
 411
 412                 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
 413                         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 414                         return -1;
 415                 }
 416
 417                 /*
 418                  * Just mark the record as dead.
 419                  */
 420                 rec.magic = TDB1_DEAD_MAGIC;
 421                 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
 422         }
 423         else {
 424                 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
 425                                                    &rec)))
 426                         return -1;
 427
 428                 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
 429         }
 430
 431         if (ret == 0) {
 432                 tdb1_increment_seqnum(tdb);
 433         }
 434
 435         if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
 436                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 437                            "tdb1_delete: WARNING tdb1_unlock failed!");
 438         return ret;
 439 }
 440
 441 int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
 442 {
 443         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 444         int ret;
 445
 446         assert(tdb->flags & TDB_VERSION1);
 447         ret = tdb1_delete_hash(tdb, key, hash);
 448         return ret;
 449 }
 450
 451 /*
 452  * See if we have a dead record around with enough space
 453  */
 454 static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
 455                                struct tdb1_record *r, tdb1_len_t length)
 456 {
 457         tdb1_off_t rec_ptr;
 458
 459         /* read in the hash top */
 460         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 461                 return 0;
 462
 463         /* keep looking until we find the right record */
 464         while (rec_ptr) {
 465                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
 466                         return 0;
 467
 468                 if (TDB1_DEAD(r) && r->rec_len >= length) {
 469                         /*
 470                          * First fit for simple coding, TODO: change to best
 471                          * fit
 472                          */
 473                         return rec_ptr;
 474                 }
 475                 rec_ptr = r->next;
 476         }
 477         return 0;
 478 }
 479
 480 static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
 481                        TDB_DATA dbuf, int flag, uint32_t hash)
 482 {
 483         struct tdb1_record rec;
 484         tdb1_off_t rec_ptr;
 485         int ret = -1;
 486
 487         /* check for it existing, on insert. */
 488         if (flag == TDB_INSERT) {
 489                 if (tdb1_exists_hash(tdb, key, hash)) {
 490                         tdb->last_error = TDB_ERR_EXISTS;
 491                         goto fail;
 492                 }
 493                 if (tdb->last_error != TDB_ERR_NOEXIST) {
 494                         goto fail;
 495                 }
 496         } else {
 497                 /* first try in-place update, on modify or replace. */
 498                 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
 499                         goto done;
 500                 }
 501                 if (tdb->last_error != TDB_SUCCESS) {
 502                         if (tdb->last_error != TDB_ERR_NOEXIST) {
 503                                 goto fail;
 504                         }
 505                         if (flag == TDB_MODIFY) {
 506                                 /* if the record doesn't exist and we are in TDB1_MODIFY mode then
 507                                    we should fail the store */
 508                                 goto fail;
 509                         }
 510                 }
 511         }
 512         /* reset the error code potentially set by the tdb1_update() */
 513         tdb->last_error = TDB_SUCCESS;
 514
 515         /* delete any existing record - if it doesn't exist we don't
 516            care.  Doing this first reduces fragmentation, and avoids
 517            coalescing with `allocated' block before it's updated. */
 518         if (flag != TDB_INSERT)
 519                 tdb1_delete_hash(tdb, key, hash);
 520
 521         if (tdb->tdb1.max_dead_records != 0) {
 522                 /*
 523                  * Allow for some dead records per hash chain, look if we can
 524                  * find one that can hold the new record. We need enough space
 525                  * for key, data and tailer. If we find one, we don't have to
 526                  * consult the central freelist.
 527                  */
 528                 rec_ptr = tdb1_find_dead(
 529                         tdb, hash, &rec,
 530                         key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
 531
 532                 if (rec_ptr != 0) {
 533                         rec.key_len = key.dsize;
 534                         rec.data_len = dbuf.dsize;
 535                         rec.full_hash = hash;
 536                         rec.magic = TDB1_MAGIC;
 537                         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 538                             || tdb->tdb1.io->tdb1_write(
 539                                     tdb, rec_ptr + sizeof(rec),
 540                                     key.dptr, key.dsize) == -1
 541                             || tdb->tdb1.io->tdb1_write(
 542                                     tdb, rec_ptr + sizeof(rec) + key.dsize,
 543                                     dbuf.dptr, dbuf.dsize) == -1) {
 544                                 goto fail;
 545                         }
 546                         goto done;
 547                 }
 548         }
 549
 550         /*
 551          * We have to allocate some space from the freelist, so this means we
 552          * have to lock it. Use the chance to purge all the DEAD records from
 553          * the hash chain under the freelist lock.
 554          */
 555
 556         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 557                 goto fail;
 558         }
 559
 560         if ((tdb->tdb1.max_dead_records != 0)
 561             && (tdb1_purge_dead(tdb, hash) == -1)) {
 562                 tdb1_unlock(tdb, -1, F_WRLCK);
 563                 goto fail;
 564         }
 565
 566         /* we have to allocate some space */
 567         rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 568
 569         tdb1_unlock(tdb, -1, F_WRLCK);
 570
 571         if (rec_ptr == 0) {
 572                 goto fail;
 573         }
 574
 575         /* Read hash top into next ptr */
 576         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
 577                 goto fail;
 578
 579         rec.key_len = key.dsize;
 580         rec.data_len = dbuf.dsize;
 581         rec.full_hash = hash;
 582         rec.magic = TDB1_MAGIC;
 583
 584         /* write out and point the top of the hash chain at it */
 585         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 586             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec),
 587                                         key.dptr, key.dsize) == -1
 588             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + key.dsize,
 589                                         dbuf.dptr, dbuf.dsize) == -1
 590             || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
 591                 /* Need to tdb1_unallocate() here */
 592                 goto fail;
 593         }
 594
 595  done:
 596         ret = 0;
 597  fail:
 598         if (ret == 0) {
 599                 tdb1_increment_seqnum(tdb);
 600         }
 601         return ret;
 602 }
 603
 604 /* store an element in the database, replacing any existing element
 605    with the same key
 606
 607    return 0 on success, -1 on failure
 608 */
 609 int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 610 {
 611         uint32_t hash;
 612         int ret;
 613
 614         assert(tdb->flags & TDB_VERSION1);
 615
 616         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
 617                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
 618                                              TDB_LOG_USE_ERROR,
 619                                              "tdb_store: read-only tdb");
 620                 return -1;
 621         }
 622
 623         /* find which hash bucket it is in */
 624         hash = tdb_hash(tdb, key.dptr, key.dsize);
 625         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 626                 return -1;
 627
 628         ret = _tdb1_store(tdb, key, dbuf, flag, hash);
 629         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 630         return ret;
 631 }
 632
 633 /* Append to an entry. Create if not exist. */
 634 int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 635 {
 636         uint32_t hash;
 637         TDB_DATA dbuf;
 638         int ret = -1;
 639
 640         assert(tdb->flags & TDB_VERSION1);
 641
 642         /* find which hash bucket it is in */
 643         hash = tdb_hash(tdb, key.dptr, key.dsize);
 644         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 645                 return -1;
 646
 647         dbuf = _tdb1_fetch(tdb, key);
 648
 649         if (dbuf.dptr == NULL) {
 650                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 651         } else {
 652                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 653                 unsigned char *new_dptr;
 654
 655                 /* realloc '0' is special: don't do that. */
 656                 if (new_len == 0)
 657                         new_len = 1;
 658                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 659                 if (new_dptr == NULL) {
 660                         free(dbuf.dptr);
 661                 }
 662                 dbuf.dptr = new_dptr;
 663         }
 664
 665         if (dbuf.dptr == NULL) {
 666                 tdb->last_error = TDB_ERR_OOM;
 667                 goto failed;
 668         }
 669
 670         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 671         dbuf.dsize += new_dbuf.dsize;
 672
 673         ret = _tdb1_store(tdb, key, dbuf, 0, hash);
 674
 675 failed:
 676         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 677         SAFE_FREE(dbuf.dptr);
 678         return ret;
 679 }
 680
 681
 682 /*
 683   get the tdb sequence number. Only makes sense if the writers opened
 684   with TDB1_SEQNUM set. Note that this sequence number will wrap quite
 685   quickly, so it should only be used for a 'has something changed'
 686   test, not for code that relies on the count of the number of changes
 687   made. If you want a counter then use a tdb record.
 688
 689   The aim of this sequence number is to allow for a very lightweight
 690   test of a possible tdb change.
 691 */
 692 int tdb1_get_seqnum(struct tdb_context *tdb)
 693 {
 694         tdb1_off_t seqnum=0;
 695
 696         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
 697         return seqnum;
 698 }
 699
 700
 701 /*
 702   add a region of the file to the freelist. Length is the size of the region in bytes,
 703   which includes the free list header that needs to be added
 704  */
 705 static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
 706 {
 707         struct tdb1_record rec;
 708         if (length <= sizeof(rec)) {
 709                 /* the region is not worth adding */
 710                 return 0;
 711         }
 712         if (length + offset > tdb->file->map_size) {
 713                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
 714                                         "tdb1_free_region: adding region beyond"
 715                                         " end of file");
 716                 return -1;
 717         }
 718         memset(&rec,'\0',sizeof(rec));
 719         rec.rec_len = length - sizeof(rec);
 720         if (tdb1_free(tdb, offset, &rec) == -1) {
 721                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 722                            "tdb1_free_region: failed to add free record");
 723                 return -1;
 724         }
 725         return 0;
 726 }
 727
 728 /*
 729   wipe the entire database, deleting all records. This can be done
 730   very fast by using a allrecord lock. The entire data portion of the
 731   file becomes a single entry in the freelist.
 732
 733   This code carefully steps around the recovery area, leaving it alone
 734  */
 735 int tdb1_wipe_all(struct tdb_context *tdb)
 736 {
 737         int i;
 738         tdb1_off_t offset = 0;
 739         ssize_t data_len;
 740         tdb1_off_t recovery_head;
 741         tdb1_len_t recovery_size = 0;
 742
 743         if (tdb_lockall(tdb) != TDB_SUCCESS) {
 744                 return -1;
 745         }
 746
 747
 748         /* see if the tdb has a recovery area, and remember its size
 749            if so. We don't want to lose this as otherwise each
 750            tdb1_wipe_all() in a transaction will increase the size of
 751            the tdb by the size of the recovery area */
 752         if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
 753                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 754                            "tdb1_wipe_all: failed to read recovery head");
 755                 goto failed;
 756         }
 757
 758         if (recovery_head != 0) {
 759                 struct tdb1_record rec;
 760                 if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
 761                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 762                                    "tdb1_wipe_all: failed to read recovery record");
 763                         return -1;
 764                 }
 765                 recovery_size = rec.rec_len + sizeof(rec);
 766         }
 767
 768         /* wipe the hashes */
 769         for (i=0;i<tdb->tdb1.header.hash_size;i++) {
 770                 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
 771                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 772                                    "tdb1_wipe_all: failed to write hash %d", i);
 773                         goto failed;
 774                 }
 775         }
 776
 777         /* wipe the freelist */
 778         if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
 779                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 780                            "tdb1_wipe_all: failed to write freelist");
 781                 goto failed;
 782         }
 783
 784         /* add all the rest of the file to the freelist, possibly leaving a gap
 785            for the recovery area */
 786         if (recovery_size == 0) {
 787                 /* the simple case - the whole file can be used as a freelist */
 788                 data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 789                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 790                         goto failed;
 791                 }
 792         } else {
 793                 /* we need to add two freelist entries - one on either
 794                    side of the recovery area
 795
 796                    Note that we cannot shift the recovery area during
 797                    this operation. Only the transaction.c code may
 798                    move the recovery area or we risk subtle data
 799                    corruption
 800                 */
 801                 data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 802                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 803                         goto failed;
 804                 }
 805                 /* and the 2nd free list entry after the recovery area - if any */
 806                 data_len = tdb->file->map_size - (recovery_head+recovery_size);
 807                 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 808                         goto failed;
 809                 }
 810         }
 811
 812         tdb1_increment_seqnum_nonblock(tdb);
 813         tdb_unlockall(tdb);
 814         return 0;
 815
 816 failed:
 817         tdb_unlockall(tdb);
 818         return -1;
 819 }
 820
 821 /* Even on files, we can get partial writes due to signals. */
 822 bool tdb1_write_all(int fd, const void *buf, size_t count)
 823 {
 824         while (count) {
 825                 ssize_t ret;
 826                 ret = write(fd, buf, count);
 827                 if (ret < 0)
 828                         return false;
 829                 buf = (const char *)buf + ret;
 830                 count -= ret;
 831         }
 832         return true;
 833 }