lib/tdb2/tdb1_tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29 #include <assert.h>
  30
  31 /*
  32   non-blocking increment of the tdb sequence number if the tdb has been opened using
  33   the TDB_SEQNUM flag
  34 */
  35 void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
  36 {
  37         tdb1_off_t seqnum=0;
  38
  39         if (!(tdb->flags & TDB_SEQNUM)) {
  40                 return;
  41         }
  42
  43         /* we ignore errors from this, as we have no sane way of
  44            dealing with them.
  45         */
  46         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
  47         seqnum++;
  48         tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
  49 }
  50
  51 /*
  52   increment the tdb sequence number if the tdb has been opened using
  53   the TDB_SEQNUM flag
  54 */
  55 static void tdb1_increment_seqnum(struct tdb_context *tdb)
  56 {
  57         if (!(tdb->flags & TDB_SEQNUM)) {
  58                 return;
  59         }
  60
  61         if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
  62                            TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
  63                 return;
  64         }
  65
  66         tdb1_increment_seqnum_nonblock(tdb);
  67
  68         tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
  69 }
  70
  71 static int tdb1_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  72 {
  73         return memcmp(data.dptr, key.dptr, data.dsize);
  74 }
  75
  76 /* Returns 0 on fail.  On success, return offset of record, and fills
  77    in rec */
  78 static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  79                         struct tdb1_record *r)
  80 {
  81         tdb1_off_t rec_ptr;
  82
  83         /* read in the hash top */
  84         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
  85                 return 0;
  86
  87         /* keep looking until we find the right record */
  88         while (rec_ptr) {
  89                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
  90                         return 0;
  91
  92                 if (!TDB1_DEAD(r) && hash==r->full_hash
  93                     && key.dsize==r->key_len
  94                     && tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
  95                                       r->key_len, tdb1_key_compare,
  96                                       NULL) == 0) {
  97                         return rec_ptr;
  98                 }
  99                 /* detect tight infinite loop */
 100                 if (rec_ptr == r->next) {
 101                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
 102                                                 TDB_LOG_ERROR,
 103                                                 "tdb1_find: loop detected.");
 104                         return 0;
 105                 }
 106                 rec_ptr = r->next;
 107         }
 108         tdb->last_error = TDB_ERR_NOEXIST;
 109         return 0;
 110 }
 111
 112 /* As tdb1_find, but if you succeed, keep the lock */
 113 tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 114                            struct tdb1_record *rec)
 115 {
 116         uint32_t rec_ptr;
 117
 118         if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
 119                 return 0;
 120         if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
 121                 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
 122         return rec_ptr;
 123 }
 124
 125 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
 126
 127 /* update an entry in place - this only works if the new data size
 128    is <= the old data size and the key exists.
 129    on failure return -1.
 130 */
 131 static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 132 {
 133         struct tdb1_record rec;
 134         tdb1_off_t rec_ptr;
 135
 136         /* find entry */
 137         if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
 138                 return -1;
 139
 140         /* it could be an exact duplicate of what is there - this is
 141          * surprisingly common (eg. with a ldb re-index). */
 142         if (rec.key_len == key.dsize &&
 143             rec.data_len == dbuf.dsize &&
 144             rec.full_hash == hash) {
 145                 TDB_DATA data = _tdb1_fetch(tdb, key);
 146                 if (data.dsize == dbuf.dsize &&
 147                     memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
 148                         if (data.dptr) {
 149                                 free(data.dptr);
 150                         }
 151                         return 0;
 152                 }
 153                 if (data.dptr) {
 154                         free(data.dptr);
 155                 }
 156         }
 157
 158         /* must be long enough key, data and tailer */
 159         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
 160                 tdb->last_error = TDB_SUCCESS; /* Not really an error */
 161                 return -1;
 162         }
 163
 164         if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 165                       dbuf.dptr, dbuf.dsize) == -1)
 166                 return -1;
 167
 168         if (dbuf.dsize != rec.data_len) {
 169                 /* update size */
 170                 rec.data_len = dbuf.dsize;
 171                 return tdb1_rec_write(tdb, rec_ptr, &rec);
 172         }
 173
 174         return 0;
 175 }
 176
 177 /* find an entry in the database given a key */
 178 /* If an entry doesn't exist tdb1_err will be set to
 179  * TDB_ERR_NOEXIST. If a key has no data attached
 180  * then the TDB_DATA will have zero length but
 181  * a non-zero pointer
 182  */
 183 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
 184 {
 185         tdb1_off_t rec_ptr;
 186         struct tdb1_record rec;
 187         TDB_DATA ret;
 188         uint32_t hash;
 189
 190         /* find which hash bucket it is in */
 191         hash = tdb_hash(tdb, key.dptr, key.dsize);
 192         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 193                 ret.dptr = NULL;
 194                 ret.dsize = 0;
 195                 return ret;
 196         }
 197
 198         ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 199                                   rec.data_len);
 200         ret.dsize = rec.data_len;
 201         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 202         return ret;
 203 }
 204
 205 enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key, TDB_DATA *data)
 206 {
 207         *data = _tdb1_fetch(tdb, key);
 208         if (data->dptr == NULL)
 209                 return tdb->last_error;
 210         return TDB_SUCCESS;
 211 }
 212
 213 enum TDB_ERROR tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
 214                                  enum TDB_ERROR (*parser)(TDB_DATA key,
 215                                                           TDB_DATA data,
 216                                                           void *private_data),
 217                                  void *private_data)
 218 {
 219         tdb1_off_t rec_ptr;
 220         struct tdb1_record rec;
 221         enum TDB_ERROR ret;
 222         uint32_t hash;
 223
 224         /* find which hash bucket it is in */
 225         hash = tdb_hash(tdb, key.dptr, key.dsize);
 226
 227         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 228                 /* record not found */
 229                 return TDB_ERR_NOEXIST;
 230         }
 231
 232         ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 233                              rec.data_len, parser, private_data);
 234
 235         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 236
 237         return ret;
 238 }
 239
 240 /* check if an entry in the database exists
 241
 242    note that 1 is returned if the key is found and 0 is returned if not found
 243    this doesn't match the conventions in the rest of this module, but is
 244    compatible with gdbm
 245 */
 246 static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 247 {
 248         struct tdb1_record rec;
 249
 250         if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 251                 return 0;
 252         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 253         return 1;
 254 }
 255
 256 int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
 257 {
 258         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 259         int ret;
 260
 261         assert(tdb->flags & TDB_VERSION1);
 262         ret = tdb1_exists_hash(tdb, key, hash);
 263         return ret;
 264 }
 265
 266 /* actually delete an entry in the database given the offset */
 267 int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
 268 {
 269         tdb1_off_t last_ptr, i;
 270         struct tdb1_record lastrec;
 271
 272         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
 273
 274         if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
 275             tdb1_write_lock_record(tdb, rec_ptr) == -1) {
 276                 /* Someone traversing here: mark it as dead */
 277                 rec->magic = TDB1_DEAD_MAGIC;
 278                 return tdb1_rec_write(tdb, rec_ptr, rec);
 279         }
 280         if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
 281                 return -1;
 282
 283         /* find previous record in hash chain */
 284         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
 285                 return -1;
 286         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 287                 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
 288                         return -1;
 289
 290         /* unlink it: next ptr is at start of record. */
 291         if (last_ptr == 0)
 292                 last_ptr = TDB1_HASH_TOP(rec->full_hash);
 293         if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
 294                 return -1;
 295
 296         /* recover the space */
 297         if (tdb1_free(tdb, rec_ptr, rec) == -1)
 298                 return -1;
 299         return 0;
 300 }
 301
 302 static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
 303 {
 304         int res = 0;
 305         tdb1_off_t rec_ptr;
 306         struct tdb1_record rec;
 307
 308         /* read in the hash top */
 309         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 310                 return 0;
 311
 312         while (rec_ptr) {
 313                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
 314                         return 0;
 315
 316                 if (rec.magic == TDB1_DEAD_MAGIC) {
 317                         res += 1;
 318                 }
 319                 rec_ptr = rec.next;
 320         }
 321         return res;
 322 }
 323
 324 /*
 325  * Purge all DEAD records from a hash chain
 326  */
 327 static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
 328 {
 329         int res = -1;
 330         struct tdb1_record rec;
 331         tdb1_off_t rec_ptr;
 332
 333         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 334                 return -1;
 335         }
 336
 337         /* read in the hash top */
 338         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 339                 goto fail;
 340
 341         while (rec_ptr) {
 342                 tdb1_off_t next;
 343
 344                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
 345                         goto fail;
 346                 }
 347
 348                 next = rec.next;
 349
 350                 if (rec.magic == TDB1_DEAD_MAGIC
 351                     && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
 352                         goto fail;
 353                 }
 354                 rec_ptr = next;
 355         }
 356         res = 0;
 357  fail:
 358         tdb1_unlock(tdb, -1, F_WRLCK);
 359         return res;
 360 }
 361
 362 /* delete an entry in the database given a key */
 363 static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 364 {
 365         tdb1_off_t rec_ptr;
 366         struct tdb1_record rec;
 367         int ret;
 368
 369         if (tdb->tdb1.max_dead_records != 0) {
 370
 371                 /*
 372                  * Allow for some dead records per hash chain, mainly for
 373                  * tdb's with a very high create/delete rate like locking.tdb.
 374                  */
 375
 376                 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 377                         return -1;
 378
 379                 if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
 380                         /*
 381                          * Don't let the per-chain freelist grow too large,
 382                          * delete all existing dead records
 383                          */
 384                         tdb1_purge_dead(tdb, hash);
 385                 }
 386
 387                 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
 388                         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 389                         return -1;
 390                 }
 391
 392                 /*
 393                  * Just mark the record as dead.
 394                  */
 395                 rec.magic = TDB1_DEAD_MAGIC;
 396                 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
 397         }
 398         else {
 399                 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
 400                                                    &rec)))
 401                         return -1;
 402
 403                 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
 404         }
 405
 406         if (ret == 0) {
 407                 tdb1_increment_seqnum(tdb);
 408         }
 409
 410         if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
 411                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 412                            "tdb1_delete: WARNING tdb1_unlock failed!");
 413         return ret;
 414 }
 415
 416 int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
 417 {
 418         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 419         int ret;
 420
 421         assert(tdb->flags & TDB_VERSION1);
 422         ret = tdb1_delete_hash(tdb, key, hash);
 423         return ret;
 424 }
 425
 426 /*
 427  * See if we have a dead record around with enough space
 428  */
 429 static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
 430                                struct tdb1_record *r, tdb1_len_t length)
 431 {
 432         tdb1_off_t rec_ptr;
 433
 434         /* read in the hash top */
 435         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 436                 return 0;
 437
 438         /* keep looking until we find the right record */
 439         while (rec_ptr) {
 440                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
 441                         return 0;
 442
 443                 if (TDB1_DEAD(r) && r->rec_len >= length) {
 444                         /*
 445                          * First fit for simple coding, TODO: change to best
 446                          * fit
 447                          */
 448                         return rec_ptr;
 449                 }
 450                 rec_ptr = r->next;
 451         }
 452         return 0;
 453 }
 454
 455 static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
 456                        TDB_DATA dbuf, int flag, uint32_t hash)
 457 {
 458         struct tdb1_record rec;
 459         tdb1_off_t rec_ptr;
 460         char *p = NULL;
 461         int ret = -1;
 462
 463         /* check for it existing, on insert. */
 464         if (flag == TDB_INSERT) {
 465                 if (tdb1_exists_hash(tdb, key, hash)) {
 466                         tdb->last_error = TDB_ERR_EXISTS;
 467                         goto fail;
 468                 }
 469         } else {
 470                 /* first try in-place update, on modify or replace. */
 471                 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
 472                         goto done;
 473                 }
 474                 if (tdb->last_error == TDB_ERR_NOEXIST &&
 475                     flag == TDB_MODIFY) {
 476                         /* if the record doesn't exist and we are in TDB1_MODIFY mode then
 477                          we should fail the store */
 478                         goto fail;
 479                 }
 480         }
 481         /* reset the error code potentially set by the tdb1_update() */
 482         tdb->last_error = TDB_SUCCESS;
 483
 484         /* delete any existing record - if it doesn't exist we don't
 485            care.  Doing this first reduces fragmentation, and avoids
 486            coalescing with `allocated' block before it's updated. */
 487         if (flag != TDB_INSERT)
 488                 tdb1_delete_hash(tdb, key, hash);
 489
 490         /* Copy key+value *before* allocating free space in case malloc
 491            fails and we are left with a dead spot in the tdb. */
 492
 493         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 494                 tdb->last_error = TDB_ERR_OOM;
 495                 goto fail;
 496         }
 497
 498         memcpy(p, key.dptr, key.dsize);
 499         if (dbuf.dsize)
 500                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 501
 502         if (tdb->tdb1.max_dead_records != 0) {
 503                 /*
 504                  * Allow for some dead records per hash chain, look if we can
 505                  * find one that can hold the new record. We need enough space
 506                  * for key, data and tailer. If we find one, we don't have to
 507                  * consult the central freelist.
 508                  */
 509                 rec_ptr = tdb1_find_dead(
 510                         tdb, hash, &rec,
 511                         key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
 512
 513                 if (rec_ptr != 0) {
 514                         rec.key_len = key.dsize;
 515                         rec.data_len = dbuf.dsize;
 516                         rec.full_hash = hash;
 517                         rec.magic = TDB1_MAGIC;
 518                         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 519                             || tdb->tdb1.io->tdb1_write(
 520                                     tdb, rec_ptr + sizeof(rec),
 521                                     p, key.dsize + dbuf.dsize) == -1) {
 522                                 goto fail;
 523                         }
 524                         goto done;
 525                 }
 526         }
 527
 528         /*
 529          * We have to allocate some space from the freelist, so this means we
 530          * have to lock it. Use the chance to purge all the DEAD records from
 531          * the hash chain under the freelist lock.
 532          */
 533
 534         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 535                 goto fail;
 536         }
 537
 538         if ((tdb->tdb1.max_dead_records != 0)
 539             && (tdb1_purge_dead(tdb, hash) == -1)) {
 540                 tdb1_unlock(tdb, -1, F_WRLCK);
 541                 goto fail;
 542         }
 543
 544         /* we have to allocate some space */
 545         rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 546
 547         tdb1_unlock(tdb, -1, F_WRLCK);
 548
 549         if (rec_ptr == 0) {
 550                 goto fail;
 551         }
 552
 553         /* Read hash top into next ptr */
 554         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
 555                 goto fail;
 556
 557         rec.key_len = key.dsize;
 558         rec.data_len = dbuf.dsize;
 559         rec.full_hash = hash;
 560         rec.magic = TDB1_MAGIC;
 561
 562         /* write out and point the top of the hash chain at it */
 563         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 564             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 565             || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
 566                 /* Need to tdb1_unallocate() here */
 567                 goto fail;
 568         }
 569
 570  done:
 571         ret = 0;
 572  fail:
 573         if (ret == 0) {
 574                 tdb1_increment_seqnum(tdb);
 575         }
 576
 577         SAFE_FREE(p);
 578         return ret;
 579 }
 580
 581 /* store an element in the database, replacing any existing element
 582    with the same key
 583
 584    return 0 on success, -1 on failure
 585 */
 586 int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 587 {
 588         uint32_t hash;
 589         int ret;
 590
 591         assert(tdb->flags & TDB_VERSION1);
 592
 593         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
 594                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
 595                                              TDB_LOG_USE_ERROR,
 596                                              "tdb_store: read-only tdb");
 597                 return -1;
 598         }
 599
 600         /* find which hash bucket it is in */
 601         hash = tdb_hash(tdb, key.dptr, key.dsize);
 602         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 603                 return -1;
 604
 605         ret = _tdb1_store(tdb, key, dbuf, flag, hash);
 606         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 607         return ret;
 608 }
 609
 610 /* Append to an entry. Create if not exist. */
 611 int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 612 {
 613         uint32_t hash;
 614         TDB_DATA dbuf;
 615         int ret = -1;
 616
 617         assert(tdb->flags & TDB_VERSION1);
 618
 619         /* find which hash bucket it is in */
 620         hash = tdb_hash(tdb, key.dptr, key.dsize);
 621         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 622                 return -1;
 623
 624         dbuf = _tdb1_fetch(tdb, key);
 625
 626         if (dbuf.dptr == NULL) {
 627                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 628         } else {
 629                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 630                 unsigned char *new_dptr;
 631
 632                 /* realloc '0' is special: don't do that. */
 633                 if (new_len == 0)
 634                         new_len = 1;
 635                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 636                 if (new_dptr == NULL) {
 637                         free(dbuf.dptr);
 638                 }
 639                 dbuf.dptr = new_dptr;
 640         }
 641
 642         if (dbuf.dptr == NULL) {
 643                 tdb->last_error = TDB_ERR_OOM;
 644                 goto failed;
 645         }
 646
 647         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 648         dbuf.dsize += new_dbuf.dsize;
 649
 650         ret = _tdb1_store(tdb, key, dbuf, 0, hash);
 651
 652 failed:
 653         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 654         SAFE_FREE(dbuf.dptr);
 655         return ret;
 656 }
 657
 658
 659 /*
 660   get the tdb sequence number. Only makes sense if the writers opened
 661   with TDB1_SEQNUM set. Note that this sequence number will wrap quite
 662   quickly, so it should only be used for a 'has something changed'
 663   test, not for code that relies on the count of the number of changes
 664   made. If you want a counter then use a tdb record.
 665
 666   The aim of this sequence number is to allow for a very lightweight
 667   test of a possible tdb change.
 668 */
 669 int tdb1_get_seqnum(struct tdb_context *tdb)
 670 {
 671         tdb1_off_t seqnum=0;
 672
 673         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
 674         return seqnum;
 675 }
 676
 677
 678 /*
 679   add a region of the file to the freelist. Length is the size of the region in bytes,
 680   which includes the free list header that needs to be added
 681  */
 682 static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
 683 {
 684         struct tdb1_record rec;
 685         if (length <= sizeof(rec)) {
 686                 /* the region is not worth adding */
 687                 return 0;
 688         }
 689         if (length + offset > tdb->file->map_size) {
 690                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
 691                                         "tdb1_free_region: adding region beyond"
 692                                         " end of file");
 693                 return -1;
 694         }
 695         memset(&rec,'\0',sizeof(rec));
 696         rec.rec_len = length - sizeof(rec);
 697         if (tdb1_free(tdb, offset, &rec) == -1) {
 698                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 699                            "tdb1_free_region: failed to add free record");
 700                 return -1;
 701         }
 702         return 0;
 703 }
 704
 705 /*
 706   wipe the entire database, deleting all records. This can be done
 707   very fast by using a allrecord lock. The entire data portion of the
 708   file becomes a single entry in the freelist.
 709
 710   This code carefully steps around the recovery area, leaving it alone
 711  */
 712 int tdb1_wipe_all(struct tdb_context *tdb)
 713 {
 714         int i;
 715         tdb1_off_t offset = 0;
 716         ssize_t data_len;
 717         tdb1_off_t recovery_head;
 718         tdb1_len_t recovery_size = 0;
 719
 720         if (tdb_lockall(tdb) != TDB_SUCCESS) {
 721                 return -1;
 722         }
 723
 724
 725         /* see if the tdb has a recovery area, and remember its size
 726            if so. We don't want to lose this as otherwise each
 727            tdb1_wipe_all() in a transaction will increase the size of
 728            the tdb by the size of the recovery area */
 729         if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
 730                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 731                            "tdb1_wipe_all: failed to read recovery head");
 732                 goto failed;
 733         }
 734
 735         if (recovery_head != 0) {
 736                 struct tdb1_record rec;
 737                 if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
 738                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 739                                    "tdb1_wipe_all: failed to read recovery record");
 740                         return -1;
 741                 }
 742                 recovery_size = rec.rec_len + sizeof(rec);
 743         }
 744
 745         /* wipe the hashes */
 746         for (i=0;i<tdb->tdb1.header.hash_size;i++) {
 747                 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
 748                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 749                                    "tdb1_wipe_all: failed to write hash %d", i);
 750                         goto failed;
 751                 }
 752         }
 753
 754         /* wipe the freelist */
 755         if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
 756                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 757                            "tdb1_wipe_all: failed to write freelist");
 758                 goto failed;
 759         }
 760
 761         /* add all the rest of the file to the freelist, possibly leaving a gap
 762            for the recovery area */
 763         if (recovery_size == 0) {
 764                 /* the simple case - the whole file can be used as a freelist */
 765                 data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 766                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 767                         goto failed;
 768                 }
 769         } else {
 770                 /* we need to add two freelist entries - one on either
 771                    side of the recovery area
 772
 773                    Note that we cannot shift the recovery area during
 774                    this operation. Only the transaction.c code may
 775                    move the recovery area or we risk subtle data
 776                    corruption
 777                 */
 778                 data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 779                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 780                         goto failed;
 781                 }
 782                 /* and the 2nd free list entry after the recovery area - if any */
 783                 data_len = tdb->file->map_size - (recovery_head+recovery_size);
 784                 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 785                         goto failed;
 786                 }
 787         }
 788
 789         tdb_unlockall(tdb);
 790         return 0;
 791
 792 failed:
 793         tdb_unlockall(tdb);
 794         return -1;
 795 }
 796
 797 /* Even on files, we can get partial writes due to signals. */
 798 bool tdb1_write_all(int fd, const void *buf, size_t count)
 799 {
 800         while (count) {
 801                 ssize_t ret;
 802                 ret = write(fd, buf, count);
 803                 if (ret < 0)
 804                         return false;
 805                 buf = (const char *)buf + ret;
 806                 count -= ret;
 807         }
 808         return true;
 809 }