Improve the VFS Makefile so that it is easier for use out of tree but still works...
[Samba/gebeck_regimport.git] / lib / tdb2 / tdb1_io.c
blob4371c236cc66f720b7ae414fe2af32cce2a77a69
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "tdb1_private.h"
30 #ifndef MAX
31 #define MAX(a,b) ((a) > (b) ? (a) : (b))
32 #endif
34 /* check for an out of bounds access - if it is out of bounds then
35 see if the database has been expanded by someone else and expand
36 if necessary
37 note that "len" is the minimum length needed for the db
39 static int tdb1_oob(struct tdb_context *tdb, tdb1_off_t len, int probe)
41 struct stat st;
42 if (len <= tdb->file->map_size)
43 return 0;
44 if (tdb->flags & TDB_INTERNAL) {
45 if (!probe) {
46 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
47 "tdb1_oob len %d beyond internal malloc size %d",
48 (int)len, (int)tdb->file->map_size);
50 return -1;
53 if (fstat(tdb->file->fd, &st) == -1) {
54 tdb->last_error = TDB_ERR_IO;
55 return -1;
58 if (st.st_size < (size_t)len) {
59 if (!probe) {
60 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
61 "tdb1_oob len %d beyond eof at %d",
62 (int)len, (int)st.st_size);
64 return -1;
67 /* Unmap, update size, remap */
68 if (tdb1_munmap(tdb) == -1) {
69 tdb->last_error = TDB_ERR_IO;
70 return -1;
72 tdb->file->map_size = st.st_size;
73 return tdb1_mmap(tdb);
76 /* write a lump of data at a specified offset */
77 static int tdb1_write(struct tdb_context *tdb, tdb1_off_t off,
78 const void *buf, tdb1_len_t len)
80 if (len == 0) {
81 return 0;
84 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
85 tdb->last_error = TDB_ERR_RDONLY;
86 return -1;
89 if (tdb->tdb1.io->tdb1_oob(tdb, off + len, 0) != 0)
90 return -1;
92 if (tdb->file->map_ptr) {
93 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
94 } else {
95 #ifdef HAVE_INCOHERENT_MMAP
96 tdb->last_error = TDB_ERR_IO;
97 return -1;
98 #else
99 ssize_t written = pwrite(tdb->file->fd, buf, len, off);
100 if ((written != (ssize_t)len) && (written != -1)) {
101 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
102 "tdb1_write: wrote only "
103 "%d of %d bytes at %d, trying once more",
104 (int)written, len, off);
105 written = pwrite(tdb->file->fd,
106 (const char *)buf+written,
107 len-written,
108 off+written);
110 if (written == -1) {
111 /* Ensure ecode is set for log fn. */
112 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
113 "tdb1_write failed at %d "
114 "len=%d (%s)",
115 off, len, strerror(errno));
116 return -1;
117 } else if (written != (ssize_t)len) {
118 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
119 "tdb1_write: failed to "
120 "write %d bytes at %d in two attempts",
121 len, off);
122 return -1;
124 #endif
126 return 0;
129 /* Endian conversion: we only ever deal with 4 byte quantities */
130 void *tdb1_convert(void *buf, uint32_t size)
132 uint32_t i, *p = (uint32_t *)buf;
133 for (i = 0; i < size / 4; i++)
134 p[i] = TDB1_BYTEREV(p[i]);
135 return buf;
139 /* read a lump of data at a specified offset, maybe convert */
140 static int tdb1_read(struct tdb_context *tdb, tdb1_off_t off, void *buf,
141 tdb1_len_t len, int cv)
143 if (tdb->tdb1.io->tdb1_oob(tdb, off + len, 0) != 0) {
144 return -1;
147 if (tdb->file->map_ptr) {
148 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
149 } else {
150 #ifdef HAVE_INCOHERENT_MMAP
151 tdb->last_error = TDB_ERR_IO;
152 return -1;
153 #else
154 ssize_t ret = pread(tdb->file->fd, buf, len, off);
155 if (ret != (ssize_t)len) {
156 /* Ensure ecode is set for log fn. */
157 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
158 "tdb1_read failed at %d "
159 "len=%d ret=%d (%s) map_size=%d",
160 (int)off, (int)len, (int)ret,
161 strerror(errno),
162 (int)tdb->file->map_size);
163 return -1;
165 #endif
167 if (cv) {
168 tdb1_convert(buf, len);
170 return 0;
176 do an unlocked scan of the hash table heads to find the next non-zero head. The value
177 will then be confirmed with the lock held
179 static void tdb1_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
181 uint32_t h = *chain;
182 if (tdb->file->map_ptr) {
183 for (;h < tdb->tdb1.header.hash_size;h++) {
184 if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->file->map_ptr)) {
185 break;
188 } else {
189 uint32_t off=0;
190 for (;h < tdb->tdb1.header.hash_size;h++) {
191 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
192 break;
196 (*chain) = h;
200 int tdb1_munmap(struct tdb_context *tdb)
202 if (tdb->flags & TDB_INTERNAL)
203 return 0;
205 #if HAVE_MMAP
206 if (tdb->file->map_ptr) {
207 int ret;
209 ret = munmap(tdb->file->map_ptr, tdb->file->map_size);
210 if (ret != 0)
211 return ret;
213 #endif
214 tdb->file->map_ptr = NULL;
215 return 0;
218 /* If mmap isn't coherent, *everyone* must always mmap. */
219 static bool should_mmap(const struct tdb_context *tdb)
221 #ifdef HAVE_INCOHERENT_MMAP
222 return true;
223 #else
224 return !(tdb->flags & TDB_NOMMAP);
225 #endif
228 int tdb1_mmap(struct tdb_context *tdb)
230 if (tdb->flags & TDB_INTERNAL)
231 return 0;
233 #if HAVE_MMAP
234 if (should_mmap(tdb)) {
235 int mmap_flags;
236 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
237 mmap_flags = PROT_READ;
238 else
239 mmap_flags = PROT_READ | PROT_WRITE;
241 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
242 mmap_flags,
243 MAP_SHARED|MAP_FILE, tdb->file->fd, 0);
246 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
249 if (tdb->file->map_ptr == MAP_FAILED) {
250 tdb->file->map_ptr = NULL;
251 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
252 "tdb1_mmap failed for size %llu (%s)",
253 (long long)tdb->file->map_size,
254 strerror(errno));
255 #ifdef HAVE_INCOHERENT_MMAP
256 tdb->last_error = TDB_ERR_IO;
257 return -1;
258 #endif
260 } else {
261 tdb->file->map_ptr = NULL;
263 #else
264 tdb->file->map_ptr = NULL;
265 #endif
266 return 0;
269 /* expand a file. we prefer to use ftruncate, as that is what posix
270 says to use for mmap expansion */
271 static int tdb1_expand_file(struct tdb_context *tdb, tdb1_off_t size, tdb1_off_t addition)
273 char buf[8192];
275 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
276 tdb->last_error = TDB_ERR_RDONLY;
277 return -1;
280 if (ftruncate(tdb->file->fd, size+addition) == -1) {
281 char b = 0;
282 ssize_t written = pwrite(tdb->file->fd, &b, 1,
283 (size+addition) - 1);
284 if (written == 0) {
285 /* try once more, potentially revealing errno */
286 written = pwrite(tdb->file->fd, &b, 1,
287 (size+addition) - 1);
289 if (written == 0) {
290 /* again - give up, guessing errno */
291 errno = ENOSPC;
293 if (written != 1) {
294 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
295 "expand_file to %d failed (%s)",
296 size+addition,
297 strerror(errno));
298 return -1;
302 /* now fill the file with something. This ensures that the
303 file isn't sparse, which would be very bad if we ran out of
304 disk. This must be done with write, not via mmap */
305 memset(buf, TDB1_PAD_BYTE, sizeof(buf));
306 while (addition) {
307 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
308 ssize_t written = pwrite(tdb->file->fd, buf, n, size);
309 if (written == 0) {
310 /* prevent infinite loops: try _once_ more */
311 written = pwrite(tdb->file->fd, buf, n, size);
313 if (written == 0) {
314 /* give up, trying to provide a useful errno */
315 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
316 "expand_file write "
317 "returned 0 twice: giving up!");
318 errno = ENOSPC;
319 return -1;
320 } else if (written == -1) {
321 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
322 "expand_file write of "
323 "%d bytes failed (%s)", (int)n,
324 strerror(errno));
325 return -1;
326 } else if (written != n) {
327 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
328 "expand_file: wrote "
329 "only %d of %d bytes - retrying",
330 (int)written, (int)n);
332 addition -= written;
333 size += written;
335 tdb->stats.expands++;
336 return 0;
340 /* expand the database at least size bytes by expanding the underlying
341 file and doing the mmap again if necessary */
342 int tdb1_expand(struct tdb_context *tdb, tdb1_off_t size)
344 struct tdb1_record rec;
345 tdb1_off_t offset, new_size, top_size, map_size;
347 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
348 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
349 "lock failed in tdb1_expand");
350 return -1;
353 /* must know about any previous expansions by another process */
354 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size + 1, 1);
356 /* limit size in order to avoid using up huge amounts of memory for
357 * in memory tdbs if an oddball huge record creeps in */
358 if (size > 100 * 1024) {
359 top_size = tdb->file->map_size + size * 2;
360 } else {
361 top_size = tdb->file->map_size + size * 100;
364 /* always make room for at least top_size more records, and at
365 least 25% more space. if the DB is smaller than 100MiB,
366 otherwise grow it by 10% only. */
367 if (tdb->file->map_size > 100 * 1024 * 1024) {
368 map_size = tdb->file->map_size * 1.10;
369 } else {
370 map_size = tdb->file->map_size * 1.25;
373 /* Round the database up to a multiple of the page size */
374 new_size = MAX(top_size, map_size);
375 size = TDB1_ALIGN(new_size, tdb->tdb1.page_size) - tdb->file->map_size;
377 if (!(tdb->flags & TDB_INTERNAL))
378 tdb1_munmap(tdb);
380 /* expand the file itself */
381 if (!(tdb->flags & TDB_INTERNAL)) {
382 if (tdb->tdb1.io->tdb1_expand_file(tdb, tdb->file->map_size, size) != 0)
383 goto fail;
386 tdb->file->map_size += size;
388 if (tdb->flags & TDB_INTERNAL) {
389 char *new_map_ptr = (char *)realloc(tdb->file->map_ptr,
390 tdb->file->map_size);
391 if (!new_map_ptr) {
392 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM,
393 TDB_LOG_ERROR,
394 "tdb1_expand: no memory");
395 tdb->file->map_size -= size;
396 goto fail;
398 tdb->file->map_ptr = new_map_ptr;
399 } else {
400 if (tdb1_mmap(tdb) != 0) {
401 goto fail;
405 /* form a new freelist record */
406 memset(&rec,'\0',sizeof(rec));
407 rec.rec_len = size - sizeof(rec);
409 /* link it into the free list */
410 offset = tdb->file->map_size - size;
411 if (tdb1_free(tdb, offset, &rec) == -1)
412 goto fail;
414 tdb1_unlock(tdb, -1, F_WRLCK);
415 return 0;
416 fail:
417 tdb1_unlock(tdb, -1, F_WRLCK);
418 return -1;
421 /* read/write a tdb1_off_t */
422 int tdb1_ofs_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
424 return tdb->tdb1.io->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
427 int tdb1_ofs_write(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
429 tdb1_off_t off = *d;
430 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
434 /* read a lump of data, allocating the space for it */
435 unsigned char *tdb1_alloc_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_len_t len)
437 unsigned char *buf;
439 /* some systems don't like zero length malloc */
441 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
442 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
443 "tdb1_alloc_read malloc failed"
444 " len=%d (%s)",
445 len, strerror(errno));
446 return NULL;
448 if (tdb->tdb1.io->tdb1_read(tdb, offset, buf, len, 0) == -1) {
449 SAFE_FREE(buf);
450 return NULL;
452 return buf;
455 /* Give a piece of tdb data to a parser */
456 enum TDB_ERROR tdb1_parse_data(struct tdb_context *tdb, TDB_DATA key,
457 tdb1_off_t offset, tdb1_len_t len,
458 enum TDB_ERROR (*parser)(TDB_DATA key,
459 TDB_DATA data,
460 void *private_data),
461 void *private_data)
463 TDB_DATA data;
464 enum TDB_ERROR result;
466 data.dsize = len;
468 if ((tdb->tdb1.transaction == NULL) && (tdb->file->map_ptr != NULL)) {
470 * Optimize by avoiding the malloc/memcpy/free, point the
471 * parser directly at the mmap area.
473 if (tdb->tdb1.io->tdb1_oob(tdb, offset+len, 0) != 0) {
474 return tdb->last_error;
476 data.dptr = offset + (unsigned char *)tdb->file->map_ptr;
477 return parser(key, data, private_data);
480 if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
481 return tdb->last_error;
484 result = parser(key, data, private_data);
485 free(data.dptr);
486 return result;
489 /* read/write a record */
490 int tdb1_rec_read(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
492 if (tdb->tdb1.io->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
493 return -1;
494 if (TDB1_BAD_MAGIC(rec)) {
495 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
496 "tdb1_rec_read bad magic 0x%x at offset=%d",
497 rec->magic, offset);
498 return -1;
500 return tdb->tdb1.io->tdb1_oob(tdb, rec->next+sizeof(*rec), 0);
503 int tdb1_rec_write(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
505 struct tdb1_record r = *rec;
506 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
509 static const struct tdb1_methods io1_methods = {
510 tdb1_read,
511 tdb1_write,
512 tdb1_next_hash_chain,
513 tdb1_oob,
514 tdb1_expand_file,
518 initialise the default methods table
520 void tdb1_io_init(struct tdb_context *tdb)
522 tdb->tdb1.io = &io1_methods;
525 enum TDB_ERROR tdb1_probe_length(struct tdb_context *tdb)
527 tdb->last_error = TDB_SUCCESS;
528 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size + 1, true);
529 return tdb->last_error;