lib/tdb2/tdb1_traverse.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29
  30 #define TDB1_NEXT_LOCK_ERR ((tdb1_off_t)-1)
  31
  32 static TDB_DATA tdb1_null;
  33
  34 /* Uses traverse lock: 0 = finish, TDB1_NEXT_LOCK_ERR = error,
  35    other = record offset */
  36 static tdb1_off_t tdb1_next_lock(struct tdb_context *tdb, struct tdb1_traverse_lock *tlock,
  37                          struct tdb1_record *rec)
  38 {
  39         int want_next = (tlock->off != 0);
  40
  41         /* Lock each chain from the start one. */
  42         for (; tlock->hash < tdb->tdb1.header.hash_size; tlock->hash++) {
  43                 if (!tlock->off && tlock->hash != 0) {
  44                         /* this is an optimisation for the common case where
  45                            the hash chain is empty, which is particularly
  46                            common for the use of tdb with ldb, where large
  47                            hashes are used. In that case we spend most of our
  48                            time in tdb1_brlock(), locking empty hash chains.
  49
  50                            To avoid this, we do an unlocked pre-check to see
  51                            if the hash chain is empty before starting to look
  52                            inside it. If it is empty then we can avoid that
  53                            hash chain. If it isn't empty then we can't believe
  54                            the value we get back, as we read it without a
  55                            lock, so instead we get the lock and re-fetch the
  56                            value below.
  57
  58                            Notice that not doing this optimisation on the
  59                            first hash chain is critical. We must guarantee
  60                            that we have done at least one fcntl lock at the
  61                            start of a search to guarantee that memory is
  62                            coherent on SMP systems. If records are added by
  63                            others during the search then thats OK, and we
  64                            could possibly miss those with this trick, but we
  65                            could miss them anyway without this trick, so the
  66                            semantics don't change.
  67
  68                            With a non-indexed ldb search this trick gains us a
  69                            factor of around 80 in speed on a linux 2.6.x
  70                            system (testing using ldbtest).
  71                         */
  72                         tdb->tdb1.io->next_hash_chain(tdb, &tlock->hash);
  73                         if (tlock->hash == tdb->tdb1.header.hash_size) {
  74                                 continue;
  75                         }
  76                 }
  77
  78                 if (tdb1_lock(tdb, tlock->hash, tlock->lock_rw) == -1)
  79                         return TDB1_NEXT_LOCK_ERR;
  80
  81                 /* No previous record?  Start at top of chain. */
  82                 if (!tlock->off) {
  83                         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(tlock->hash),
  84                                      &tlock->off) == -1)
  85                                 goto fail;
  86                 } else {
  87                         /* Otherwise unlock the previous record. */
  88                         if (tdb1_unlock_record(tdb, tlock->off) != 0)
  89                                 goto fail;
  90                 }
  91
  92                 if (want_next) {
  93                         /* We have offset of old record: grab next */
  94                         if (tdb1_rec_read(tdb, tlock->off, rec) == -1)
  95                                 goto fail;
  96                         tlock->off = rec->next;
  97                 }
  98
  99                 /* Iterate through chain */
 100                 while( tlock->off) {
 101                         tdb1_off_t current;
 102                         if (tdb1_rec_read(tdb, tlock->off, rec) == -1)
 103                                 goto fail;
 104
 105                         /* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
 106                         if (tlock->off == rec->next) {
 107                                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
 108                                                         TDB_LOG_ERROR,
 109                                                         "tdb1_next_lock:"
 110                                                         " loop detected.");
 111                                 goto fail;
 112                         }
 113
 114                         if (!TDB1_DEAD(rec)) {
 115                                 /* Woohoo: we found one! */
 116                                 if (tdb1_lock_record(tdb, tlock->off) != 0)
 117                                         goto fail;
 118                                 return tlock->off;
 119                         }
 120
 121                         /* Try to clean dead ones from old traverses */
 122                         current = tlock->off;
 123                         tlock->off = rec->next;
 124                         if (!((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) &&
 125                             tdb1_do_delete(tdb, current, rec) != 0)
 126                                 goto fail;
 127                 }
 128                 tdb1_unlock(tdb, tlock->hash, tlock->lock_rw);
 129                 want_next = 0;
 130         }
 131         /* We finished iteration without finding anything */
 132         tdb->last_error = TDB_SUCCESS;
 133         return 0;
 134
 135  fail:
 136         tlock->off = 0;
 137         if (tdb1_unlock(tdb, tlock->hash, tlock->lock_rw) != 0)
 138                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 139                            "tdb1_next_lock: On error unlock failed!");
 140         return TDB1_NEXT_LOCK_ERR;
 141 }
 142
 143 /* traverse the entire database - calling fn(tdb, key, data) on each element.
 144    return -1 on error or the record count traversed
 145    if fn is NULL then it is not called
 146    a non-zero return value from fn() indicates that the traversal should stop
 147   */
 148 static int tdb1_traverse_internal(struct tdb_context *tdb,
 149                                   int (*fn)(struct tdb_context *,
 150                                             TDB_DATA, TDB_DATA, void *),
 151                                   void *private_data,
 152                                   struct tdb1_traverse_lock *tl)
 153 {
 154         TDB_DATA key, dbuf;
 155         struct tdb1_record rec;
 156         int ret = 0, count = 0;
 157         tdb1_off_t off;
 158
 159         /* This was in the initializaton, above, but the IRIX compiler
 160          * did not like it.  crh
 161          */
 162         tl->next = tdb->tdb1.travlocks.next;
 163
 164         /* fcntl locks don't stack: beware traverse inside traverse */
 165         tdb->tdb1.travlocks.next = tl;
 166
 167         /* tdb1_next_lock places locks on the record returned, and its chain */
 168         while ((off = tdb1_next_lock(tdb, tl, &rec)) != 0) {
 169                 if (off == TDB1_NEXT_LOCK_ERR) {
 170                         ret = -1;
 171                         goto out;
 172                 }
 173                 count++;
 174                 /* now read the full record */
 175                 key.dptr = tdb1_alloc_read(tdb, tl->off + sizeof(rec),
 176                                           rec.key_len + rec.data_len);
 177                 if (!key.dptr) {
 178                         ret = -1;
 179                         if (tdb1_unlock(tdb, tl->hash, tl->lock_rw) != 0)
 180                                 goto out;
 181                         if (tdb1_unlock_record(tdb, tl->off) != 0)
 182                                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 183                                            "tdb1_traverse: key.dptr == NULL and"
 184                                            " unlock_record failed!");
 185                         goto out;
 186                 }
 187                 key.dsize = rec.key_len;
 188                 dbuf.dptr = key.dptr + rec.key_len;
 189                 dbuf.dsize = rec.data_len;
 190
 191                 /* Drop chain lock, call out */
 192                 if (tdb1_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
 193                         ret = -1;
 194                         SAFE_FREE(key.dptr);
 195                         goto out;
 196                 }
 197                 if (fn && fn(tdb, key, dbuf, private_data)) {
 198                         /* They want us to terminate traversal */
 199                         if (tdb1_unlock_record(tdb, tl->off) != 0) {
 200                                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 201                                            "tdb1_traverse:"
 202                                            " unlock_record failed!");
 203                                 ret = -1;
 204                         }
 205                         SAFE_FREE(key.dptr);
 206                         goto out;
 207                 }
 208                 SAFE_FREE(key.dptr);
 209         }
 210 out:
 211         tdb->tdb1.travlocks.next = tl->next;
 212         if (ret < 0)
 213                 return -1;
 214         else
 215                 return count;
 216 }
 217
 218
 219 /*
 220   a read style traverse - only if db read only
 221 */
 222 static int tdb1_traverse_read(struct tdb_context *tdb,
 223                               int (*fn)(struct tdb_context *,
 224                                         TDB_DATA, TDB_DATA, void *),
 225                               void *private_data)
 226 {
 227         struct tdb1_traverse_lock tl = { NULL, 0, 0, F_RDLCK };
 228         int ret;
 229
 230         /* we need to get a read lock on the transaction lock here to
 231            cope with the lock ordering semantics of solaris10 */
 232         if (tdb1_transaction_lock(tdb, F_RDLCK, TDB_LOCK_WAIT)) {
 233                 return -1;
 234         }
 235
 236         tdb->tdb1.traverse_read++;
 237         ret = tdb1_traverse_internal(tdb, fn, private_data, &tl);
 238         tdb->tdb1.traverse_read--;
 239
 240         tdb1_transaction_unlock(tdb, F_RDLCK);
 241
 242         return ret;
 243 }
 244
 245 /*
 246   a write style traverse - needs to get the transaction lock to
 247   prevent deadlocks
 248
 249   WARNING: The data buffer given to the callback fn does NOT meet the
 250   alignment restrictions malloc gives you.
 251 */
 252 int tdb1_traverse(struct tdb_context *tdb,
 253                   int (*fn)(struct tdb_context *, TDB_DATA, TDB_DATA, void *),
 254                   void *private_data)
 255 {
 256         struct tdb1_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
 257         int ret;
 258
 259         /* If we're read-only, we don't have to write-lock whole db. */
 260         if (tdb->flags & TDB_RDONLY) {
 261                 return tdb1_traverse_read(tdb, fn, private_data);
 262         }
 263
 264         if (tdb1_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT)) {
 265                 return -1;
 266         }
 267
 268         tdb->tdb1.traverse_write++;
 269         ret = tdb1_traverse_internal(tdb, fn, private_data, &tl);
 270         tdb->tdb1.traverse_write--;
 271
 272         tdb1_transaction_unlock(tdb, F_WRLCK);
 273
 274         return ret;
 275 }
 276
 277
 278 /* find the first entry in the database and return its key */
 279 TDB_DATA tdb1_firstkey(struct tdb_context *tdb)
 280 {
 281         TDB_DATA key;
 282         struct tdb1_record rec;
 283         tdb1_off_t off;
 284
 285         /* release any old lock */
 286         if (tdb1_unlock_record(tdb, tdb->tdb1.travlocks.off) != 0)
 287                 return tdb1_null;
 288         tdb->tdb1.travlocks.off = tdb->tdb1.travlocks.hash = 0;
 289         tdb->tdb1.travlocks.lock_rw = F_RDLCK;
 290
 291         /* Grab first record: locks chain and returned record. */
 292         off = tdb1_next_lock(tdb, &tdb->tdb1.travlocks, &rec);
 293         if (off == 0 || off == TDB1_NEXT_LOCK_ERR) {
 294                 return tdb1_null;
 295         }
 296         /* now read the key */
 297         key.dsize = rec.key_len;
 298         key.dptr =tdb1_alloc_read(tdb,tdb->tdb1.travlocks.off+sizeof(rec),key.dsize);
 299
 300         /* Unlock the hash chain of the record we just read. */
 301         if (tdb1_unlock(tdb, tdb->tdb1.travlocks.hash, tdb->tdb1.travlocks.lock_rw) != 0)
 302                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 303                            "tdb1_firstkey:"
 304                            " error occurred while tdb1_unlocking!");
 305         return key;
 306 }
 307
 308 /* find the next entry in the database, returning its key */
 309 TDB_DATA tdb1_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
 310 {
 311         uint32_t oldhash;
 312         TDB_DATA key = tdb1_null;
 313         struct tdb1_record rec;
 314         unsigned char *k = NULL;
 315         tdb1_off_t off;
 316
 317         /* Is locked key the old key?  If so, traverse will be reliable. */
 318         if (tdb->tdb1.travlocks.off) {
 319                 if (tdb1_lock(tdb,tdb->tdb1.travlocks.hash,tdb->tdb1.travlocks.lock_rw))
 320                         return tdb1_null;
 321                 if (tdb1_rec_read(tdb, tdb->tdb1.travlocks.off, &rec) == -1
 322                     || !(k = tdb1_alloc_read(tdb,tdb->tdb1.travlocks.off+sizeof(rec),
 323                                             rec.key_len))
 324                     || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
 325                         /* No, it wasn't: unlock it and start from scratch */
 326                         if (tdb1_unlock_record(tdb, tdb->tdb1.travlocks.off) != 0) {
 327                                 SAFE_FREE(k);
 328                                 return tdb1_null;
 329                         }
 330                         if (tdb1_unlock(tdb, tdb->tdb1.travlocks.hash, tdb->tdb1.travlocks.lock_rw) != 0) {
 331                                 SAFE_FREE(k);
 332                                 return tdb1_null;
 333                         }
 334                         tdb->tdb1.travlocks.off = 0;
 335                 }
 336
 337                 SAFE_FREE(k);
 338         }
 339
 340         if (!tdb->tdb1.travlocks.off) {
 341                 /* No previous element: do normal find, and lock record */
 342                 tdb->tdb1.travlocks.off = tdb1_find_lock_hash(tdb, oldkey, tdb_hash(tdb, oldkey.dptr, oldkey.dsize), tdb->tdb1.travlocks.lock_rw, &rec);
 343                 if (!tdb->tdb1.travlocks.off) {
 344                         return tdb1_null;
 345                 }
 346                 tdb->tdb1.travlocks.hash = TDB1_BUCKET(rec.full_hash);
 347                 if (tdb1_lock_record(tdb, tdb->tdb1.travlocks.off) != 0) {
 348                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 349                                    "tdb1_nextkey: lock_record failed (%s)!",
 350                                    strerror(errno));
 351                         return tdb1_null;
 352                 }
 353         }
 354         oldhash = tdb->tdb1.travlocks.hash;
 355
 356         /* Grab next record: locks chain and returned record,
 357            unlocks old record */
 358         off = tdb1_next_lock(tdb, &tdb->tdb1.travlocks, &rec);
 359         if (off != TDB1_NEXT_LOCK_ERR && off != 0) {
 360                 key.dsize = rec.key_len;
 361                 key.dptr = tdb1_alloc_read(tdb, tdb->tdb1.travlocks.off+sizeof(rec),
 362                                           key.dsize);
 363                 /* Unlock the chain of this new record */
 364                 if (tdb1_unlock(tdb, tdb->tdb1.travlocks.hash, tdb->tdb1.travlocks.lock_rw) != 0)
 365                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 366                                    "tdb1_nextkey: WARNING tdb1_unlock failed!");
 367         }
 368         /* Unlock the chain of old record */
 369         if (tdb1_unlock(tdb, TDB1_BUCKET(oldhash), tdb->tdb1.travlocks.lock_rw) != 0)
 370                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 371                            "tdb1_nextkey: WARNING tdb1_unlock failed!");
 372         return key;
 373 }