tdb: Fix a C++ warning
[Samba/bb.git] / lib / tdb / common / check.c
blobf0a15f801b6e51c76cbff860ab007bfad4b0878c
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Rusty Russell 2009
8 ** NOTE! The following LGPL license applies to the tdb
9 ** library. This does NOT imply that all of Samba is released
10 ** under the LGPL
12 This library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Lesser General Public
14 License as published by the Free Software Foundation; either
15 version 3 of the License, or (at your option) any later version.
17 This library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License for more details.
22 You should have received a copy of the GNU Lesser General Public
23 License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 #include "tdb_private.h"
27 /* Since we opened it, these shouldn't fail unless it's recent corruption. */
28 static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
30 struct tdb_header hdr;
32 if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), DOCONV()) == -1)
33 return false;
34 if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
35 goto corrupt;
37 CONVERT(hdr);
38 if (hdr.version != TDB_VERSION)
39 goto corrupt;
41 if (hdr.rwlocks != 0)
42 goto corrupt;
44 if (hdr.hash_size == 0)
45 goto corrupt;
47 if (hdr.hash_size != tdb->header.hash_size)
48 goto corrupt;
50 if (hdr.recovery_start != 0 &&
51 hdr.recovery_start < TDB_DATA_START(tdb->header.hash_size))
52 goto corrupt;
54 *recovery = hdr.recovery_start;
55 return true;
57 corrupt:
58 tdb->ecode = TDB_ERR_CORRUPT;
59 TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
60 return false;
63 /* Generic record header check. */
64 static bool tdb_check_record(struct tdb_context *tdb,
65 tdb_off_t off,
66 const struct tdb_record *rec)
68 tdb_off_t tailer;
70 /* Check rec->next: 0 or points to record offset, aligned. */
71 if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->header.hash_size)){
72 TDB_LOG((tdb, TDB_DEBUG_ERROR,
73 "Record offset %d too small next %d\n",
74 off, rec->next));
75 goto corrupt;
77 if (rec->next + sizeof(*rec) < rec->next) {
78 TDB_LOG((tdb, TDB_DEBUG_ERROR,
79 "Record offset %d too large next %d\n",
80 off, rec->next));
81 goto corrupt;
83 if ((rec->next % TDB_ALIGNMENT) != 0) {
84 TDB_LOG((tdb, TDB_DEBUG_ERROR,
85 "Record offset %d misaligned next %d\n",
86 off, rec->next));
87 goto corrupt;
89 if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0))
90 goto corrupt;
92 /* Check rec_len: similar to rec->next, implies next record. */
93 if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
94 TDB_LOG((tdb, TDB_DEBUG_ERROR,
95 "Record offset %d misaligned length %d\n",
96 off, rec->rec_len));
97 goto corrupt;
99 /* Must fit tailer. */
100 if (rec->rec_len < sizeof(tailer)) {
101 TDB_LOG((tdb, TDB_DEBUG_ERROR,
102 "Record offset %d too short length %d\n",
103 off, rec->rec_len));
104 goto corrupt;
106 /* OOB allows "right at the end" access, so this works for last rec. */
107 if (tdb->methods->tdb_oob(tdb, off+sizeof(*rec)+rec->rec_len, 0))
108 goto corrupt;
110 /* Check tailer. */
111 if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
112 &tailer) == -1)
113 goto corrupt;
114 if (tailer != sizeof(*rec) + rec->rec_len) {
115 TDB_LOG((tdb, TDB_DEBUG_ERROR,
116 "Record offset %d invalid tailer\n", off));
117 goto corrupt;
120 return true;
122 corrupt:
123 tdb->ecode = TDB_ERR_CORRUPT;
124 return false;
127 /* Grab some bytes: may copy if can't use mmap.
128 Caller has already done bounds check. */
129 static TDB_DATA get_bytes(struct tdb_context *tdb,
130 tdb_off_t off, tdb_len_t len)
132 TDB_DATA d;
134 d.dsize = len;
136 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
137 d.dptr = (unsigned char *)tdb->map_ptr + off;
138 else
139 d.dptr = tdb_alloc_read(tdb, off, d.dsize);
140 return d;
143 /* Frees data if we're not able to simply use mmap. */
144 static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
146 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
147 return;
148 free(d.dptr);
151 /* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
152 * See: http://burtleburtle.net/bob/c/lookup3.c
154 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
155 static void hash(uint32_t key, uint32_t *pc, uint32_t *pb)
157 uint32_t a,b,c;
159 /* Set up the internal state */
160 a = b = c = 0xdeadbeef + *pc;
161 c += *pb;
162 a += key;
163 c ^= b; c -= rot(b,14);
164 a ^= c; a -= rot(c,11);
165 b ^= a; b -= rot(a,25);
166 c ^= b; c -= rot(b,16);
167 a ^= c; a -= rot(c,4);
168 b ^= a; b -= rot(a,14);
169 c ^= b; c -= rot(b,24);
170 *pc=c; *pb=b;
174 We want to check that all free records are in the free list
175 (only once), and all free list entries are free records. Similarly
176 for each hash chain of used records.
178 Doing that naively (without walking hash chains, since we want to be
179 linear) means keeping a list of records which have been seen in each
180 hash chain, and another of records pointed to (ie. next pointers
181 from records and the initial hash chain heads). These two lists
182 should be equal. This will take 8 bytes per record, and require
183 sorting at the end.
185 So instead, we record each offset in a bitmap such a way that
186 recording it twice will cancel out. Since each offset should appear
187 exactly twice, the bitmap should be zero at the end.
189 The approach was inspired by Bloom Filters (see Wikipedia). For
190 each value, we flip K bits in a bitmap of size N. The number of
191 distinct arrangements is:
193 N! / (K! * (N-K)!)
195 Of course, not all arrangements are actually distinct, but testing
196 shows this formula to be close enough.
198 So, if K == 8 and N == 256, the probability of two things flipping the same
199 bits is 1 in 409,663,695,276,000.
201 Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
202 (320k) seems reasonable.
204 #define NUM_HASHES 8
205 #define BITMAP_BITS 256
207 static void bit_flip(unsigned char bits[], unsigned int idx)
209 bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
212 /* We record offsets in a bitmap for the particular chain it should be in. */
213 static void record_offset(unsigned char bits[], tdb_off_t off)
215 uint32_t h1 = off, h2 = 0;
216 unsigned int i;
218 /* We get two good hash values out of jhash2, so we use both. Then
219 * we keep going to produce further hash values. */
220 for (i = 0; i < NUM_HASHES / 2; i++) {
221 hash(off, &h1, &h2);
222 bit_flip(bits, h1 % BITMAP_BITS);
223 bit_flip(bits, h2 % BITMAP_BITS);
224 h2++;
228 /* Check that an in-use record is valid. */
229 static bool tdb_check_used_record(struct tdb_context *tdb,
230 tdb_off_t off,
231 const struct tdb_record *rec,
232 unsigned char **hashes,
233 int (*check)(TDB_DATA, TDB_DATA, void *),
234 void *private_data)
236 TDB_DATA key, data;
238 if (!tdb_check_record(tdb, off, rec))
239 return false;
241 /* key + data + tailer must fit in record */
242 if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) {
243 TDB_LOG((tdb, TDB_DEBUG_ERROR,
244 "Record offset %d too short for contents\n", off));
245 return false;
248 key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
249 if (!key.dptr)
250 return false;
252 if (tdb->hash_fn(&key) != rec->full_hash) {
253 TDB_LOG((tdb, TDB_DEBUG_ERROR,
254 "Record offset %d has incorrect hash\n", off));
255 goto fail_put_key;
258 /* Mark this offset as a known value for this hash bucket. */
259 record_offset(hashes[BUCKET(rec->full_hash)+1], off);
260 /* And similarly if the next pointer is valid. */
261 if (rec->next)
262 record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
264 /* If they supply a check function and this record isn't dead,
265 get data and feed it. */
266 if (check && rec->magic != TDB_DEAD_MAGIC) {
267 data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
268 rec->data_len);
269 if (!data.dptr)
270 goto fail_put_key;
272 if (check(key, data, private_data) == -1)
273 goto fail_put_data;
274 put_bytes(tdb, data);
277 put_bytes(tdb, key);
278 return true;
280 fail_put_data:
281 put_bytes(tdb, data);
282 fail_put_key:
283 put_bytes(tdb, key);
284 return false;
287 /* Check that an unused record is valid. */
288 static bool tdb_check_free_record(struct tdb_context *tdb,
289 tdb_off_t off,
290 const struct tdb_record *rec,
291 unsigned char **hashes)
293 if (!tdb_check_record(tdb, off, rec))
294 return false;
296 /* Mark this offset as a known value for the free list. */
297 record_offset(hashes[0], off);
298 /* And similarly if the next pointer is valid. */
299 if (rec->next)
300 record_offset(hashes[0], rec->next);
301 return true;
304 int tdb_check(struct tdb_context *tdb,
305 int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
306 void *private_data)
308 unsigned int h;
309 unsigned char **hashes;
310 tdb_off_t off, recovery_start;
311 struct tdb_record rec;
312 bool found_recovery = false;
314 if (tdb_lockall(tdb) == -1)
315 return -1;
317 /* Make sure we know true size of the underlying file. */
318 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
320 /* Header must be OK: also gets us the recovery ptr, if any. */
321 if (!tdb_check_header(tdb, &recovery_start))
322 goto unlock;
324 /* We should have the whole header, too. */
325 if (tdb->map_size < TDB_DATA_START(tdb->header.hash_size)) {
326 tdb->ecode = TDB_ERR_CORRUPT;
327 TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
328 goto unlock;
331 /* One big malloc: pointers then bit arrays. */
332 hashes = (unsigned char **)calloc(
333 1, sizeof(hashes[0]) * (1+tdb->header.hash_size)
334 + BITMAP_BITS / CHAR_BIT * (1+tdb->header.hash_size));
335 if (!hashes) {
336 tdb->ecode = TDB_ERR_OOM;
337 goto unlock;
340 /* Initialize pointers */
341 hashes[0] = (unsigned char *)(&hashes[1+tdb->header.hash_size]);
342 for (h = 1; h < 1+tdb->header.hash_size; h++)
343 hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
345 /* Freelist and hash headers are all in a row: read them. */
346 for (h = 0; h < 1+tdb->header.hash_size; h++) {
347 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
348 &off) == -1)
349 goto free;
350 if (off)
351 record_offset(hashes[h], off);
354 /* For each record, read it in and check it's ok. */
355 for (off = TDB_DATA_START(tdb->header.hash_size);
356 off < tdb->map_size;
357 off += sizeof(rec) + rec.rec_len) {
358 if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
359 DOCONV()) == -1)
360 goto free;
361 switch (rec.magic) {
362 case TDB_MAGIC:
363 case TDB_DEAD_MAGIC:
364 if (!tdb_check_used_record(tdb, off, &rec, hashes,
365 check, private_data))
366 goto free;
367 break;
368 case TDB_FREE_MAGIC:
369 if (!tdb_check_free_record(tdb, off, &rec, hashes))
370 goto free;
371 break;
372 case TDB_RECOVERY_MAGIC:
373 case 0: /* Used for invalid (or in-progress) recovery area. */
374 if (recovery_start != off) {
375 TDB_LOG((tdb, TDB_DEBUG_ERROR,
376 "Unexpected recovery record at offset %d\n",
377 off));
378 goto free;
380 found_recovery = true;
381 break;
382 default:
383 tdb->ecode = TDB_ERR_CORRUPT;
384 TDB_LOG((tdb, TDB_DEBUG_ERROR,
385 "Bad magic 0x%x at offset %d\n",
386 rec.magic, off));
387 goto free;
391 /* Now, hashes should all be empty: each record exists and is referred
392 * to by one other. */
393 for (h = 0; h < 1+tdb->header.hash_size; h++) {
394 unsigned int i;
395 for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
396 if (hashes[h][i] != 0) {
397 tdb->ecode = TDB_ERR_CORRUPT;
398 TDB_LOG((tdb, TDB_DEBUG_ERROR,
399 "Hashes do not match records\n"));
400 goto free;
405 /* We must have found recovery area if there was one. */
406 if (recovery_start != 0 && !found_recovery) {
407 TDB_LOG((tdb, TDB_DEBUG_ERROR,
408 "Expected %s recovery area, got %s\n",
409 recovery_start ? "a" : "no",
410 found_recovery ? "one" : "none"));
411 goto free;
414 free(hashes);
415 tdb_unlockall(tdb);
416 return 0;
418 free:
419 free(hashes);
420 unlock:
421 tdb_unlockall(tdb);
422 return -1;