doc: Fixes for the talloc dynamic type system tutorial.
[Samba/gbeck.git] / lib / tdb2 / io.c
blobe27a9fe832fb29acf36e76e43712b42924f3fa76
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
13 ** under the LGPL
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
34 if (file->fd == -1)
35 return;
37 if (file->map_ptr) {
38 munmap(file->map_ptr, file->map_size);
39 file->map_ptr = NULL;
43 enum TDB_ERROR tdb_mmap(struct tdb_context *tdb)
45 int mmap_flags;
47 if (tdb->flags & TDB_INTERNAL)
48 return TDB_SUCCESS;
50 #ifndef HAVE_INCOHERENT_MMAP
51 if (tdb->flags & TDB_NOMMAP)
52 return TDB_SUCCESS;
53 #endif
55 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
56 mmap_flags = PROT_READ;
57 else
58 mmap_flags = PROT_READ | PROT_WRITE;
60 /* size_t can be smaller than off_t. */
61 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
62 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
63 mmap_flags,
64 MAP_SHARED, tdb->file->fd, 0);
65 } else
66 tdb->file->map_ptr = MAP_FAILED;
69 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
71 if (tdb->file->map_ptr == MAP_FAILED) {
72 tdb->file->map_ptr = NULL;
73 #ifdef HAVE_INCOHERENT_MMAP
74 /* Incoherent mmap means everyone must mmap! */
75 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
76 "tdb_mmap failed for size %lld (%s)",
77 (long long)tdb->file->map_size,
78 strerror(errno));
79 #else
80 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
81 "tdb_mmap failed for size %lld (%s)",
82 (long long)tdb->file->map_size, strerror(errno));
83 #endif
85 return TDB_SUCCESS;
88 /* check for an out of bounds access - if it is out of bounds then
89 see if the database has been expanded by someone else and expand
90 if necessary
91 note that "len" is the minimum length needed for the db.
93 If probe is true, len being too large isn't a failure.
95 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb,
96 tdb_off_t off, tdb_len_t len, bool probe)
98 struct stat st;
99 enum TDB_ERROR ecode;
101 /* We can't hold pointers during this: we could unmap! */
102 assert(!tdb->tdb2.direct_access
103 || (tdb->flags & TDB_NOLOCK)
104 || tdb_has_expansion_lock(tdb));
106 if (len + off < len) {
107 if (probe)
108 return TDB_SUCCESS;
110 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111 "tdb_oob off %llu len %llu wrap\n",
112 (long long)off, (long long)len);
115 if (len + off <= tdb->file->map_size)
116 return TDB_SUCCESS;
117 if (tdb->flags & TDB_INTERNAL) {
118 if (probe)
119 return TDB_SUCCESS;
121 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
122 "tdb_oob len %lld beyond internal"
123 " malloc size %lld",
124 (long long)(off + len),
125 (long long)tdb->file->map_size);
126 return TDB_ERR_IO;
129 ecode = tdb_lock_expand(tdb, F_RDLCK);
130 if (ecode != TDB_SUCCESS) {
131 return ecode;
134 if (fstat(tdb->file->fd, &st) != 0) {
135 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
136 "Failed to fstat file: %s", strerror(errno));
137 tdb_unlock_expand(tdb, F_RDLCK);
138 return TDB_ERR_IO;
141 tdb_unlock_expand(tdb, F_RDLCK);
143 if (st.st_size < off + len) {
144 if (probe)
145 return TDB_SUCCESS;
147 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
148 "tdb_oob len %llu beyond eof at %llu",
149 (long long)(off + len), (long long)st.st_size);
150 return TDB_ERR_IO;
153 /* Unmap, update size, remap */
154 tdb_munmap(tdb->file);
156 tdb->file->map_size = st.st_size;
157 return tdb_mmap(tdb);
160 /* Endian conversion: we only ever deal with 8 byte quantities */
161 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
163 assert(size % 8 == 0);
164 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
165 uint64_t i, *p = (uint64_t *)buf;
166 for (i = 0; i < size / 8; i++)
167 p[i] = bswap_64(p[i]);
169 return buf;
172 /* Return first non-zero offset in offset array, or end, or -ve error. */
173 /* FIXME: Return the off? */
174 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
175 tdb_off_t base, uint64_t start, uint64_t end)
177 uint64_t i;
178 const uint64_t *val;
180 /* Zero vs non-zero is the same unconverted: minor optimization. */
181 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
182 (end - start) * sizeof(tdb_off_t), false);
183 if (TDB_PTR_IS_ERR(val)) {
184 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
187 for (i = 0; i < (end - start); i++) {
188 if (val[i])
189 break;
191 tdb_access_release(tdb, val);
192 return start + i;
195 /* Return first zero offset in num offset array, or num, or -ve error. */
196 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
197 uint64_t num)
199 uint64_t i;
200 const uint64_t *val;
202 /* Zero vs non-zero is the same unconverted: minor optimization. */
203 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
204 if (TDB_PTR_IS_ERR(val)) {
205 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
208 for (i = 0; i < num; i++) {
209 if (!val[i])
210 break;
212 tdb_access_release(tdb, val);
213 return i;
216 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
218 char buf[8192] = { 0 };
219 void *p = tdb->tdb2.io->direct(tdb, off, len, true);
220 enum TDB_ERROR ecode = TDB_SUCCESS;
222 assert(!(tdb->flags & TDB_RDONLY));
223 if (TDB_PTR_IS_ERR(p)) {
224 return TDB_PTR_ERR(p);
226 if (p) {
227 memset(p, 0, len);
228 return ecode;
230 while (len) {
231 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
232 ecode = tdb->tdb2.io->twrite(tdb, off, buf, todo);
233 if (ecode != TDB_SUCCESS) {
234 break;
236 len -= todo;
237 off += todo;
239 return ecode;
242 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
244 tdb_off_t ret;
245 enum TDB_ERROR ecode;
247 if (likely(!(tdb->flags & TDB_CONVERT))) {
248 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
249 false);
250 if (TDB_PTR_IS_ERR(p)) {
251 return TDB_ERR_TO_OFF(TDB_PTR_ERR(p));
253 if (p)
254 return *p;
257 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
258 if (ecode != TDB_SUCCESS) {
259 return TDB_ERR_TO_OFF(ecode);
261 return ret;
264 /* write a lump of data at a specified offset */
265 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
266 const void *buf, tdb_len_t len)
268 enum TDB_ERROR ecode;
270 if (tdb->flags & TDB_RDONLY) {
271 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
272 "Write to read-only database");
275 ecode = tdb->tdb2.io->oob(tdb, off, len, false);
276 if (ecode != TDB_SUCCESS) {
277 return ecode;
280 if (tdb->file->map_ptr) {
281 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
282 } else {
283 #ifdef HAVE_INCOHERENT_MMAP
284 return TDB_ERR_IO;
285 #else
286 ssize_t ret;
287 ret = pwrite(tdb->file->fd, buf, len, off);
288 if (ret != len) {
289 /* This shouldn't happen: we avoid sparse files. */
290 if (ret >= 0)
291 errno = ENOSPC;
293 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
294 "tdb_write: %zi at %zu len=%zu (%s)",
295 ret, (size_t)off, (size_t)len,
296 strerror(errno));
298 #endif
300 return TDB_SUCCESS;
303 /* read a lump of data at a specified offset */
304 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
305 void *buf, tdb_len_t len)
307 enum TDB_ERROR ecode;
309 ecode = tdb->tdb2.io->oob(tdb, off, len, false);
310 if (ecode != TDB_SUCCESS) {
311 return ecode;
314 if (tdb->file->map_ptr) {
315 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
316 } else {
317 #ifdef HAVE_INCOHERENT_MMAP
318 return TDB_ERR_IO;
319 #else
320 ssize_t r = pread(tdb->file->fd, buf, len, off);
321 if (r != len) {
322 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
323 "tdb_read failed with %zi at %zu "
324 "len=%zu (%s) map_size=%zu",
325 r, (size_t)off, (size_t)len,
326 strerror(errno),
327 (size_t)tdb->file->map_size);
329 #endif
331 return TDB_SUCCESS;
334 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
335 const void *rec, size_t len)
337 enum TDB_ERROR ecode;
339 if (unlikely((tdb->flags & TDB_CONVERT))) {
340 void *conv = malloc(len);
341 if (!conv) {
342 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
343 "tdb_write: no memory converting"
344 " %zu bytes", len);
346 memcpy(conv, rec, len);
347 ecode = tdb->tdb2.io->twrite(tdb, off,
348 tdb_convert(tdb, conv, len), len);
349 free(conv);
350 } else {
351 ecode = tdb->tdb2.io->twrite(tdb, off, rec, len);
353 return ecode;
356 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
357 void *rec, size_t len)
359 enum TDB_ERROR ecode = tdb->tdb2.io->tread(tdb, off, rec, len);
360 tdb_convert(tdb, rec, len);
361 return ecode;
364 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
365 tdb_off_t off, tdb_off_t val)
367 if (tdb->flags & TDB_RDONLY) {
368 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
369 "Write to read-only database");
372 if (likely(!(tdb->flags & TDB_CONVERT))) {
373 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
374 true);
375 if (TDB_PTR_IS_ERR(p)) {
376 return TDB_PTR_ERR(p);
378 if (p) {
379 *p = val;
380 return TDB_SUCCESS;
383 return tdb_write_convert(tdb, off, &val, sizeof(val));
386 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
387 tdb_len_t len, unsigned int prefix)
389 unsigned char *buf;
390 enum TDB_ERROR ecode;
392 /* some systems don't like zero length malloc */
393 buf = malloc(prefix + len ? prefix + len : 1);
394 if (!buf) {
395 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
396 "tdb_alloc_read malloc failed len=%zu",
397 (size_t)(prefix + len));
398 return TDB_ERR_PTR(TDB_ERR_OOM);
399 } else {
400 ecode = tdb->tdb2.io->tread(tdb, offset, buf+prefix, len);
401 if (unlikely(ecode != TDB_SUCCESS)) {
402 free(buf);
403 return TDB_ERR_PTR(ecode);
406 return buf;
409 /* read a lump of data, allocating the space for it */
410 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
412 return _tdb_alloc_read(tdb, offset, len, 0);
415 static enum TDB_ERROR fill(struct tdb_context *tdb,
416 const void *buf, size_t size,
417 tdb_off_t off, tdb_len_t len)
419 while (len) {
420 size_t n = len > size ? size : len;
421 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
422 if (ret != n) {
423 if (ret >= 0)
424 errno = ENOSPC;
426 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
427 "fill failed:"
428 " %zi at %zu len=%zu (%s)",
429 ret, (size_t)off, (size_t)len,
430 strerror(errno));
432 len -= n;
433 off += n;
435 return TDB_SUCCESS;
438 /* expand a file. we prefer to use ftruncate, as that is what posix
439 says to use for mmap expansion */
440 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
441 tdb_len_t addition)
443 char buf[8192];
444 enum TDB_ERROR ecode;
446 if (tdb->flags & TDB_RDONLY) {
447 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
448 "Expand on read-only database");
451 if (tdb->flags & TDB_INTERNAL) {
452 char *new = realloc(tdb->file->map_ptr,
453 tdb->file->map_size + addition);
454 if (!new) {
455 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
456 "No memory to expand database");
458 tdb->file->map_ptr = new;
459 tdb->file->map_size += addition;
460 return TDB_SUCCESS;
461 } else {
462 /* Unmap before trying to write; old TDB claimed OpenBSD had
463 * problem with this otherwise. */
464 tdb_munmap(tdb->file);
466 /* If this fails, we try to fill anyway. */
467 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
470 /* now fill the file with something. This ensures that the
471 file isn't sparse, which would be very bad if we ran out of
472 disk. This must be done with write, not via mmap */
473 memset(buf, 0x43, sizeof(buf));
474 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
475 addition);
476 if (ecode != TDB_SUCCESS)
477 return ecode;
478 tdb->file->map_size += addition;
479 return tdb_mmap(tdb);
483 const void *tdb_access_read(struct tdb_context *tdb,
484 tdb_off_t off, tdb_len_t len, bool convert)
486 void *ret = NULL;
488 if (likely(!(tdb->flags & TDB_CONVERT))) {
489 ret = tdb->tdb2.io->direct(tdb, off, len, false);
491 if (TDB_PTR_IS_ERR(ret)) {
492 return ret;
495 if (!ret) {
496 struct tdb_access_hdr *hdr;
497 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
498 if (TDB_PTR_IS_ERR(hdr)) {
499 return hdr;
501 hdr->next = tdb->tdb2.access;
502 tdb->tdb2.access = hdr;
503 ret = hdr + 1;
504 if (convert) {
505 tdb_convert(tdb, (void *)ret, len);
507 } else
508 tdb->tdb2.direct_access++;
510 return ret;
513 void *tdb_access_write(struct tdb_context *tdb,
514 tdb_off_t off, tdb_len_t len, bool convert)
516 void *ret = NULL;
518 if (tdb->flags & TDB_RDONLY) {
519 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
520 "Write to read-only database");
521 return TDB_ERR_PTR(TDB_ERR_RDONLY);
524 if (likely(!(tdb->flags & TDB_CONVERT))) {
525 ret = tdb->tdb2.io->direct(tdb, off, len, true);
527 if (TDB_PTR_IS_ERR(ret)) {
528 return ret;
532 if (!ret) {
533 struct tdb_access_hdr *hdr;
534 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
535 if (TDB_PTR_IS_ERR(hdr)) {
536 return hdr;
538 hdr->next = tdb->tdb2.access;
539 tdb->tdb2.access = hdr;
540 hdr->off = off;
541 hdr->len = len;
542 hdr->convert = convert;
543 ret = hdr + 1;
544 if (convert)
545 tdb_convert(tdb, (void *)ret, len);
546 } else
547 tdb->tdb2.direct_access++;
549 return ret;
552 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
554 struct tdb_access_hdr **hp;
556 for (hp = &tdb->tdb2.access; *hp; hp = &(*hp)->next) {
557 if (*hp + 1 == p)
558 return hp;
560 return NULL;
563 void tdb_access_release(struct tdb_context *tdb, const void *p)
565 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
567 if (hp) {
568 hdr = *hp;
569 *hp = hdr->next;
570 free(hdr);
571 } else
572 tdb->tdb2.direct_access--;
575 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
577 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
578 enum TDB_ERROR ecode;
580 if (hp) {
581 hdr = *hp;
582 if (hdr->convert)
583 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
584 else
585 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
586 *hp = hdr->next;
587 free(hdr);
588 } else {
589 tdb->tdb2.direct_access--;
590 ecode = TDB_SUCCESS;
593 return ecode;
596 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
597 bool write_mode)
599 enum TDB_ERROR ecode;
601 if (unlikely(!tdb->file->map_ptr))
602 return NULL;
604 ecode = tdb_oob(tdb, off, len, false);
605 if (unlikely(ecode != TDB_SUCCESS))
606 return TDB_ERR_PTR(ecode);
607 return (char *)tdb->file->map_ptr + off;
610 void tdb_inc_seqnum(struct tdb_context *tdb)
612 tdb_off_t seq;
614 if (tdb->flags & TDB_VERSION1) {
615 tdb1_increment_seqnum_nonblock(tdb);
616 return;
619 if (likely(!(tdb->flags & TDB_CONVERT))) {
620 int64_t *direct;
622 direct = tdb->tdb2.io->direct(tdb,
623 offsetof(struct tdb_header,
624 seqnum),
625 sizeof(*direct), true);
626 if (likely(direct)) {
627 /* Don't let it go negative, even briefly */
628 if (unlikely((*direct) + 1) < 0)
629 *direct = 0;
630 (*direct)++;
631 return;
635 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
636 if (!TDB_OFF_IS_ERR(seq)) {
637 seq++;
638 if (unlikely((int64_t)seq < 0))
639 seq = 0;
640 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
644 static const struct tdb_methods io_methods = {
645 tdb_read,
646 tdb_write,
647 tdb_oob,
648 tdb_expand_file,
649 tdb_direct,
653 initialise the default methods table
655 void tdb_io_init(struct tdb_context *tdb)
657 tdb->tdb2.io = &io_methods;