lib/tdb/common/rescue.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library, rescue attempt code.
   5
   6    Copyright (C) Rusty Russell             2012
   7
   8      ** NOTE! The following LGPL license applies to the tdb
   9      ** library. This does NOT imply that all of Samba is released
  10      ** under the LGPL
  11
  12    This library is free software; you can redistribute it and/or
  13    modify it under the terms of the GNU Lesser General Public
  14    License as published by the Free Software Foundation; either
  15    version 3 of the License, or (at your option) any later version.
  16
  17    This library is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20    Lesser General Public License for more details.
  21
  22    You should have received a copy of the GNU Lesser General Public
  23    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  24 */
  25 #include "tdb_private.h"
  26 #include <assert.h>
  27
  28
  29 struct found {
  30         tdb_off_t head; /* 0 -> invalid. */
  31         struct tdb_record rec;
  32         TDB_DATA key;
  33         bool in_hash;
  34         bool in_free;
  35 };
  36
  37 struct found_table {
  38         /* As an ordered array (by head offset). */
  39         struct found *arr;
  40         unsigned int num, max;
  41 };
  42
  43 static bool looks_like_valid_record(struct tdb_context *tdb,
  44                                     tdb_off_t off,
  45                                     const struct tdb_record *rec,
  46                                     TDB_DATA *key)
  47 {
  48         unsigned int hval;
  49
  50         if (rec->magic != TDB_MAGIC)
  51                 return false;
  52
  53         if (rec->key_len + rec->data_len > rec->rec_len)
  54                 return false;
  55
  56         if (rec->rec_len % TDB_ALIGNMENT)
  57                 return false;
  58
  59         /* Next pointer must make some sense. */
  60         if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size))
  61                 return false;
  62
  63         if (tdb_oob(tdb, rec->next, sizeof(*rec), 1))
  64                 return false;
  65
  66         key->dsize = rec->key_len;
  67         key->dptr = tdb_alloc_read(tdb, off + sizeof(*rec), key->dsize);
  68         if (!key->dptr)
  69                 return false;
  70
  71         hval = tdb->hash_fn(key);
  72         if (hval != rec->full_hash) {
  73                 free(key->dptr);
  74                 return false;
  75         }
  76
  77         /* Caller frees up key->dptr */
  78         return true;
  79 }
  80
  81 static bool add_to_table(struct found_table *found,
  82                          tdb_off_t off,
  83                          struct tdb_record *rec,
  84                          TDB_DATA key)
  85 {
  86         if (found->num + 1 > found->max) {
  87                 struct found *new;
  88                 found->max = (found->max ? found->max * 2 : 128);
  89                 new = realloc(found->arr, found->max * sizeof(found->arr[0]));
  90                 if (!new)
  91                         return false;
  92                 found->arr = new;
  93         }
  94
  95         found->arr[found->num].head = off;
  96         found->arr[found->num].rec = *rec;
  97         found->arr[found->num].key = key;
  98         found->arr[found->num].in_hash = false;
  99         found->arr[found->num].in_free = false;
 100
 101         found->num++;
 102         return true;
 103 }
 104
 105 static bool walk_record(struct tdb_context *tdb,
 106                         const struct found *f,
 107                         void (*walk)(TDB_DATA, TDB_DATA, void *private_data),
 108                         void *private_data)
 109 {
 110         TDB_DATA data;
 111
 112         data.dsize = f->rec.data_len;
 113         data.dptr = tdb_alloc_read(tdb,
 114                                    f->head + sizeof(f->rec) + f->rec.key_len,
 115                                    data.dsize);
 116         if (!data.dptr) {
 117                 if (tdb->ecode == TDB_ERR_OOM)
 118                         return false;
 119                 /* I/O errors are expected. */
 120                 return true;
 121         }
 122
 123         walk(f->key, data, private_data);
 124         free(data.dptr);
 125         return true;
 126 }
 127
 128 /* First entry which has offset >= this one. */
 129 static unsigned int find_entry(struct found_table *found, tdb_off_t off)
 130 {
 131         unsigned int start = 0, end = found->num;
 132
 133         while (start < end) {
 134                 /* We can't overflow here. */
 135                 unsigned int mid = (start + end) / 2;
 136
 137                 if (off < found->arr[mid].head) {
 138                         end = mid;
 139                 } else if (off > found->arr[mid].head) {
 140                         start = mid + 1;
 141                 } else {
 142                         return mid;
 143                 }
 144         }
 145
 146         assert(start == end);
 147         return end;
 148 }
 149
 150 static void found_in_hashchain(struct found_table *found, tdb_off_t head)
 151 {
 152         unsigned int match;
 153
 154         match = find_entry(found, head);
 155         if (match < found->num && found->arr[match].head == head) {
 156                 found->arr[match].in_hash = true;
 157         }
 158 }
 159
 160 static void mark_free_area(struct found_table *found, tdb_off_t head,
 161                            tdb_len_t len)
 162 {
 163         unsigned int match;
 164
 165         match = find_entry(found, head);
 166         /* Mark everything within this free entry. */
 167         while (match < found->num) {
 168                 if (found->arr[match].head >= head + len) {
 169                         break;
 170                 }
 171                 found->arr[match].in_free = true;
 172                 match++;
 173         }
 174 }
 175
 176 static int cmp_key(const void *a, const void *b)
 177 {
 178         const struct found *fa = a, *fb = b;
 179
 180         if (fa->key.dsize < fb->key.dsize) {
 181                 return -1;
 182         } else if (fa->key.dsize > fb->key.dsize) {
 183                 return 1;
 184         }
 185         return memcmp(fa->key.dptr, fb->key.dptr, fa->key.dsize);
 186 }
 187
 188 static bool key_eq(TDB_DATA a, TDB_DATA b)
 189 {
 190         return a.dsize == b.dsize
 191                 && memcmp(a.dptr, b.dptr, a.dsize) == 0;
 192 }
 193
 194 static void free_table(struct found_table *found)
 195 {
 196         unsigned int i;
 197
 198         for (i = 0; i < found->num; i++) {
 199                 free(found->arr[i].key.dptr);
 200         }
 201         free(found->arr);
 202 }
 203
 204 static void logging_suppressed(struct tdb_context *tdb,
 205                                enum tdb_debug_level level, const char *fmt, ...)
 206 {
 207 }
 208
 209 _PUBLIC_ int tdb_rescue(struct tdb_context *tdb,
 210                         void (*walk)(TDB_DATA, TDB_DATA, void *private_data),
 211                         void *private_data)
 212 {
 213         struct found_table found = { NULL, 0, 0 };
 214         tdb_off_t h, off, i;
 215         tdb_log_func oldlog = tdb->log.log_fn;
 216         struct tdb_record rec;
 217         TDB_DATA key;
 218         bool locked;
 219
 220         /* Read-only databases use no locking at all: it's best-effort.
 221          * We may have a write lock already, so skip that case too. */
 222         if (tdb->read_only || tdb->allrecord_lock.count != 0) {
 223                 locked = false;
 224         } else {
 225                 if (tdb_lockall_read(tdb) == -1)
 226                         return -1;
 227                 locked = true;
 228         }
 229
 230         /* Make sure we know true size of the underlying file. */
 231         tdb_oob(tdb, tdb->map_size, 1, 1);
 232
 233         /* Suppress logging, since we anticipate errors. */
 234         tdb->log.log_fn = logging_suppressed;
 235
 236         /* Now walk entire db looking for records. */
 237         for (off = TDB_DATA_START(tdb->hash_size);
 238              off < tdb->map_size;
 239              off += TDB_ALIGNMENT) {
 240                 if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
 241                                            DOCONV()) == -1)
 242                         continue;
 243
 244                 if (looks_like_valid_record(tdb, off, &rec, &key)) {
 245                         if (!add_to_table(&found, off, &rec, key)) {
 246                                 goto oom;
 247                         }
 248                 }
 249         }
 250
 251         /* Walk hash chains to positive vet. */
 252         for (h = 0; h < 1+tdb->hash_size; h++) {
 253                 bool slow_chase = false;
 254                 tdb_off_t slow_off = FREELIST_TOP + h*sizeof(tdb_off_t);
 255
 256                 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
 257                                  &off) == -1)
 258                         continue;
 259
 260                 while (off && off != slow_off) {
 261                         if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
 262                                                    DOCONV()) != 0) {
 263                                 break;
 264                         }
 265
 266                         /* 0 is the free list, rest are hash chains. */
 267                         if (h == 0) {
 268                                 /* Don't mark garbage as free. */
 269                                 if (rec.magic != TDB_FREE_MAGIC) {
 270                                         break;
 271                                 }
 272                                 mark_free_area(&found, off,
 273                                                sizeof(rec) + rec.rec_len);
 274                         } else {
 275                                 found_in_hashchain(&found, off);
 276                         }
 277
 278                         off = rec.next;
 279
 280                         /* Loop detection using second pointer at half-speed */
 281                         if (slow_chase) {
 282                                 /* First entry happens to be next ptr */
 283                                 tdb_ofs_read(tdb, slow_off, &slow_off);
 284                         }
 285                         slow_chase = !slow_chase;
 286                 }
 287         }
 288
 289         /* Recovery area: must be marked as free, since it often has old
 290          * records in there! */
 291         if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &off) == 0 && off != 0) {
 292                 if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
 293                                            DOCONV()) == 0) {
 294                         mark_free_area(&found, off, sizeof(rec) + rec.rec_len);
 295                 }
 296         }
 297
 298         /* Now sort by key! */
 299         if (found.arr != NULL) {
 300                 qsort(found.arr, found.num, sizeof(found.arr[0]), cmp_key);
 301         }
 302
 303         for (i = 0; (found.arr != NULL) && i < found.num; ) {
 304                 unsigned int num, num_in_hash = 0;
 305
 306                 /* How many are identical? */
 307                 for (num = 0; num < found.num - i; num++) {
 308                         if (!key_eq(found.arr[i].key, found.arr[i+num].key)) {
 309                                 break;
 310                         }
 311                         if (found.arr[i+num].in_hash) {
 312                                 if (!walk_record(tdb, &found.arr[i+num],
 313                                                  walk, private_data))
 314                                         goto oom;
 315                                 num_in_hash++;
 316                         }
 317                 }
 318                 assert(num);
 319
 320                 /* If none were in the hash, we print any not in free list. */
 321                 if (num_in_hash == 0) {
 322                         unsigned int j;
 323
 324                         for (j = i; j < i + num; j++) {
 325                                 if (!found.arr[j].in_free) {
 326                                         if (!walk_record(tdb, &found.arr[j],
 327                                                          walk, private_data))
 328                                                 goto oom;
 329                                 }
 330                         }
 331                 }
 332
 333                 i += num;
 334         }
 335
 336         tdb->log.log_fn = oldlog;
 337         if (locked) {
 338                 tdb_unlockall_read(tdb);
 339         }
 340         return 0;
 341
 342 oom:
 343         tdb->log.log_fn = oldlog;
 344         tdb->ecode = TDB_ERR_OOM;
 345         TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_rescue: failed allocating\n"));
 346         free_table(&found);
 347         if (locked) {
 348                 tdb_unlockall_read(tdb);
 349         }
 350         return -1;
 351 }