tdb2: make TDB1 code use tdb2's TDB_ERROR and tdb_logerr()
[Samba/gbeck.git] / lib / tdb2 / tdb1_io.c
blobcd6efc34b1b2f4141f799be12150654493ef5434
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "tdb1_private.h"
30 #ifndef MAX
31 #define MAX(a,b) ((a) > (b) ? (a) : (b))
32 #endif
34 /* check for an out of bounds access - if it is out of bounds then
35 see if the database has been expanded by someone else and expand
36 if necessary
37 note that "len" is the minimum length needed for the db
39 static int tdb1_oob(struct tdb1_context *tdb, tdb1_off_t len, int probe)
41 struct stat st;
42 if (len <= tdb->map_size)
43 return 0;
44 if (tdb->flags & TDB1_INTERNAL) {
45 if (!probe) {
46 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
47 "tdb1_oob len %d beyond internal malloc size %d",
48 (int)len, (int)tdb->map_size);
50 return -1;
53 if (fstat(tdb->fd, &st) == -1) {
54 tdb->last_error = TDB_ERR_IO;
55 return -1;
58 if (st.st_size < (size_t)len) {
59 if (!probe) {
60 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
61 "tdb1_oob len %d beyond eof at %d",
62 (int)len, (int)st.st_size);
64 return -1;
67 /* Unmap, update size, remap */
68 if (tdb1_munmap(tdb) == -1) {
69 tdb->last_error = TDB_ERR_IO;
70 return -1;
72 tdb->map_size = st.st_size;
73 tdb1_mmap(tdb);
74 return 0;
77 /* write a lump of data at a specified offset */
78 static int tdb1_write(struct tdb1_context *tdb, tdb1_off_t off,
79 const void *buf, tdb1_len_t len)
81 if (len == 0) {
82 return 0;
85 if (tdb->read_only || tdb->traverse_read) {
86 tdb->last_error = TDB_ERR_RDONLY;
87 return -1;
90 if (tdb->methods->tdb1_oob(tdb, off + len, 0) != 0)
91 return -1;
93 if (tdb->map_ptr) {
94 memcpy(off + (char *)tdb->map_ptr, buf, len);
95 } else {
96 ssize_t written = pwrite(tdb->fd, buf, len, off);
97 if ((written != (ssize_t)len) && (written != -1)) {
98 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
99 "tdb1_write: wrote only "
100 "%d of %d bytes at %d, trying once more",
101 (int)written, len, off);
102 written = pwrite(tdb->fd, (const char *)buf+written,
103 len-written,
104 off+written);
106 if (written == -1) {
107 /* Ensure ecode is set for log fn. */
108 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
109 "tdb1_write failed at %d "
110 "len=%d (%s)",
111 off, len, strerror(errno));
112 return -1;
113 } else if (written != (ssize_t)len) {
114 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
115 "tdb1_write: failed to "
116 "write %d bytes at %d in two attempts",
117 len, off);
118 return -1;
121 return 0;
124 /* Endian conversion: we only ever deal with 4 byte quantities */
125 void *tdb1_convert(void *buf, uint32_t size)
127 uint32_t i, *p = (uint32_t *)buf;
128 for (i = 0; i < size / 4; i++)
129 p[i] = TDB1_BYTEREV(p[i]);
130 return buf;
134 /* read a lump of data at a specified offset, maybe convert */
135 static int tdb1_read(struct tdb1_context *tdb, tdb1_off_t off, void *buf,
136 tdb1_len_t len, int cv)
138 if (tdb->methods->tdb1_oob(tdb, off + len, 0) != 0) {
139 return -1;
142 if (tdb->map_ptr) {
143 memcpy(buf, off + (char *)tdb->map_ptr, len);
144 } else {
145 ssize_t ret = pread(tdb->fd, buf, len, off);
146 if (ret != (ssize_t)len) {
147 /* Ensure ecode is set for log fn. */
148 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
149 "tdb1_read failed at %d "
150 "len=%d ret=%d (%s) map_size=%d",
151 (int)off, (int)len, (int)ret,
152 strerror(errno),
153 (int)tdb->map_size);
154 return -1;
157 if (cv) {
158 tdb1_convert(buf, len);
160 return 0;
166 do an unlocked scan of the hash table heads to find the next non-zero head. The value
167 will then be confirmed with the lock held
169 static void tdb1_next_hash_chain(struct tdb1_context *tdb, uint32_t *chain)
171 uint32_t h = *chain;
172 if (tdb->map_ptr) {
173 for (;h < tdb->header.hash_size;h++) {
174 if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
175 break;
178 } else {
179 uint32_t off=0;
180 for (;h < tdb->header.hash_size;h++) {
181 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
182 break;
186 (*chain) = h;
190 int tdb1_munmap(struct tdb1_context *tdb)
192 if (tdb->flags & TDB1_INTERNAL)
193 return 0;
195 #if HAVE_MMAP
196 if (tdb->map_ptr) {
197 int ret;
199 ret = munmap(tdb->map_ptr, tdb->map_size);
200 if (ret != 0)
201 return ret;
203 #endif
204 tdb->map_ptr = NULL;
205 return 0;
208 void tdb1_mmap(struct tdb1_context *tdb)
210 if (tdb->flags & TDB1_INTERNAL)
211 return;
213 #if HAVE_MMAP
214 if (!(tdb->flags & TDB1_NOMMAP)) {
215 tdb->map_ptr = mmap(NULL, tdb->map_size,
216 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
217 MAP_SHARED|MAP_FILE, tdb->fd, 0);
220 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
223 if (tdb->map_ptr == MAP_FAILED) {
224 tdb->map_ptr = NULL;
225 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
226 "tdb1_mmap failed for size %d (%s)",
227 tdb->map_size, strerror(errno));
229 } else {
230 tdb->map_ptr = NULL;
232 #else
233 tdb->map_ptr = NULL;
234 #endif
237 /* expand a file. we prefer to use ftruncate, as that is what posix
238 says to use for mmap expansion */
239 static int tdb1_expand_file(struct tdb1_context *tdb, tdb1_off_t size, tdb1_off_t addition)
241 char buf[8192];
243 if (tdb->read_only || tdb->traverse_read) {
244 tdb->last_error = TDB_ERR_RDONLY;
245 return -1;
248 if (ftruncate(tdb->fd, size+addition) == -1) {
249 char b = 0;
250 ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
251 if (written == 0) {
252 /* try once more, potentially revealing errno */
253 written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
255 if (written == 0) {
256 /* again - give up, guessing errno */
257 errno = ENOSPC;
259 if (written != 1) {
260 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
261 "expand_file to %d failed (%s)",
262 size+addition,
263 strerror(errno));
264 return -1;
268 /* now fill the file with something. This ensures that the
269 file isn't sparse, which would be very bad if we ran out of
270 disk. This must be done with write, not via mmap */
271 memset(buf, TDB1_PAD_BYTE, sizeof(buf));
272 while (addition) {
273 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
274 ssize_t written = pwrite(tdb->fd, buf, n, size);
275 if (written == 0) {
276 /* prevent infinite loops: try _once_ more */
277 written = pwrite(tdb->fd, buf, n, size);
279 if (written == 0) {
280 /* give up, trying to provide a useful errno */
281 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
282 "expand_file write "
283 "returned 0 twice: giving up!");
284 errno = ENOSPC;
285 return -1;
286 } else if (written == -1) {
287 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
288 "expand_file write of "
289 "%d bytes failed (%s)", (int)n,
290 strerror(errno));
291 return -1;
292 } else if (written != n) {
293 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
294 "expand_file: wrote "
295 "only %d of %d bytes - retrying",
296 (int)written, (int)n);
298 addition -= written;
299 size += written;
301 return 0;
305 /* expand the database at least size bytes by expanding the underlying
306 file and doing the mmap again if necessary */
307 int tdb1_expand(struct tdb1_context *tdb, tdb1_off_t size)
309 struct tdb1_record rec;
310 tdb1_off_t offset, new_size, top_size, map_size;
312 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
313 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
314 "lock failed in tdb1_expand");
315 return -1;
318 /* must know about any previous expansions by another process */
319 tdb->methods->tdb1_oob(tdb, tdb->map_size + 1, 1);
321 /* limit size in order to avoid using up huge amounts of memory for
322 * in memory tdbs if an oddball huge record creeps in */
323 if (size > 100 * 1024) {
324 top_size = tdb->map_size + size * 2;
325 } else {
326 top_size = tdb->map_size + size * 100;
329 /* always make room for at least top_size more records, and at
330 least 25% more space. if the DB is smaller than 100MiB,
331 otherwise grow it by 10% only. */
332 if (tdb->map_size > 100 * 1024 * 1024) {
333 map_size = tdb->map_size * 1.10;
334 } else {
335 map_size = tdb->map_size * 1.25;
338 /* Round the database up to a multiple of the page size */
339 new_size = MAX(top_size, map_size);
340 size = TDB1_ALIGN(new_size, tdb->page_size) - tdb->map_size;
342 if (!(tdb->flags & TDB1_INTERNAL))
343 tdb1_munmap(tdb);
346 * We must ensure the file is unmapped before doing this
347 * to ensure consistency with systems like OpenBSD where
348 * writes and mmaps are not consistent.
351 /* expand the file itself */
352 if (!(tdb->flags & TDB1_INTERNAL)) {
353 if (tdb->methods->tdb1_expand_file(tdb, tdb->map_size, size) != 0)
354 goto fail;
357 tdb->map_size += size;
359 if (tdb->flags & TDB1_INTERNAL) {
360 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
361 tdb->map_size);
362 if (!new_map_ptr) {
363 tdb->map_size -= size;
364 goto fail;
366 tdb->map_ptr = new_map_ptr;
367 } else {
369 * We must ensure the file is remapped before adding the space
370 * to ensure consistency with systems like OpenBSD where
371 * writes and mmaps are not consistent.
374 /* We're ok if the mmap fails as we'll fallback to read/write */
375 tdb1_mmap(tdb);
378 /* form a new freelist record */
379 memset(&rec,'\0',sizeof(rec));
380 rec.rec_len = size - sizeof(rec);
382 /* link it into the free list */
383 offset = tdb->map_size - size;
384 if (tdb1_free(tdb, offset, &rec) == -1)
385 goto fail;
387 tdb1_unlock(tdb, -1, F_WRLCK);
388 return 0;
389 fail:
390 tdb1_unlock(tdb, -1, F_WRLCK);
391 return -1;
394 /* read/write a tdb1_off_t */
395 int tdb1_ofs_read(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
397 return tdb->methods->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
400 int tdb1_ofs_write(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
402 tdb1_off_t off = *d;
403 return tdb->methods->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
407 /* read a lump of data, allocating the space for it */
408 unsigned char *tdb1_alloc_read(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_len_t len)
410 unsigned char *buf;
412 /* some systems don't like zero length malloc */
414 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
415 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
416 "tdb1_alloc_read malloc failed"
417 " len=%d (%s)",
418 len, strerror(errno));
419 return NULL;
421 if (tdb->methods->tdb1_read(tdb, offset, buf, len, 0) == -1) {
422 SAFE_FREE(buf);
423 return NULL;
425 return buf;
428 /* Give a piece of tdb data to a parser */
430 int tdb1_parse_data(struct tdb1_context *tdb, TDB1_DATA key,
431 tdb1_off_t offset, tdb1_len_t len,
432 int (*parser)(TDB1_DATA key, TDB1_DATA data,
433 void *private_data),
434 void *private_data)
436 TDB1_DATA data;
437 int result;
439 data.dsize = len;
441 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
443 * Optimize by avoiding the malloc/memcpy/free, point the
444 * parser directly at the mmap area.
446 if (tdb->methods->tdb1_oob(tdb, offset+len, 0) != 0) {
447 return -1;
449 data.dptr = offset + (unsigned char *)tdb->map_ptr;
450 return parser(key, data, private_data);
453 if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
454 return -1;
457 result = parser(key, data, private_data);
458 free(data.dptr);
459 return result;
462 /* read/write a record */
463 int tdb1_rec_read(struct tdb1_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
465 if (tdb->methods->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
466 return -1;
467 if (TDB1_BAD_MAGIC(rec)) {
468 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
469 "tdb1_rec_read bad magic 0x%x at offset=%d",
470 rec->magic, offset);
471 return -1;
473 return tdb->methods->tdb1_oob(tdb, rec->next+sizeof(*rec), 0);
476 int tdb1_rec_write(struct tdb1_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
478 struct tdb1_record r = *rec;
479 return tdb->methods->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
482 static const struct tdb1_methods io1_methods = {
483 tdb1_read,
484 tdb1_write,
485 tdb1_next_hash_chain,
486 tdb1_oob,
487 tdb1_expand_file,
491 initialise the default methods table
493 void tdb1_io_init(struct tdb1_context *tdb)
495 tdb->methods = &io1_methods;