s4:torture: cleanup after smb2 setinfo scan
[Samba/gebeck_regimport.git] / lib / tdb2 / io.c
blobb4a6f0beae11160ac37ab7987d957c81e92612ac
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
13 ** under the LGPL
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
34 if (file->fd == -1)
35 return;
37 if (file->map_ptr) {
38 munmap(file->map_ptr, file->map_size);
39 file->map_ptr = NULL;
43 void tdb_mmap(struct tdb_context *tdb)
45 int mmap_flags;
47 if (tdb->flags & TDB_INTERNAL)
48 return;
50 if (tdb->flags & TDB_NOMMAP)
51 return;
53 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
54 mmap_flags = PROT_READ;
55 else
56 mmap_flags = PROT_READ | PROT_WRITE;
58 /* size_t can be smaller than off_t. */
59 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
60 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
61 mmap_flags,
62 MAP_SHARED, tdb->file->fd, 0);
63 } else
64 tdb->file->map_ptr = MAP_FAILED;
67 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
69 if (tdb->file->map_ptr == MAP_FAILED) {
70 tdb->file->map_ptr = NULL;
71 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
72 "tdb_mmap failed for size %lld (%s)",
73 (long long)tdb->file->map_size, strerror(errno));
77 /* check for an out of bounds access - if it is out of bounds then
78 see if the database has been expanded by someone else and expand
79 if necessary
80 note that "len" is the minimum length needed for the db.
82 If probe is true, len being too large isn't a failure.
84 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb,
85 tdb_off_t off, tdb_len_t len, bool probe)
87 struct stat st;
88 enum TDB_ERROR ecode;
90 /* We can't hold pointers during this: we could unmap! */
91 assert(!tdb->tdb2.direct_access
92 || (tdb->flags & TDB_NOLOCK)
93 || tdb_has_expansion_lock(tdb));
95 if (len + off < len) {
96 if (probe)
97 return TDB_SUCCESS;
99 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
100 "tdb_oob off %llu len %llu wrap\n",
101 (long long)off, (long long)len);
104 if (len + off <= tdb->file->map_size)
105 return TDB_SUCCESS;
106 if (tdb->flags & TDB_INTERNAL) {
107 if (probe)
108 return TDB_SUCCESS;
110 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111 "tdb_oob len %lld beyond internal"
112 " malloc size %lld",
113 (long long)(off + len),
114 (long long)tdb->file->map_size);
115 return TDB_ERR_IO;
118 ecode = tdb_lock_expand(tdb, F_RDLCK);
119 if (ecode != TDB_SUCCESS) {
120 return ecode;
123 if (fstat(tdb->file->fd, &st) != 0) {
124 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
125 "Failed to fstat file: %s", strerror(errno));
126 tdb_unlock_expand(tdb, F_RDLCK);
127 return TDB_ERR_IO;
130 tdb_unlock_expand(tdb, F_RDLCK);
132 if (st.st_size < off + len) {
133 if (probe)
134 return TDB_SUCCESS;
136 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
137 "tdb_oob len %llu beyond eof at %zu",
138 (long long)(off + len), st.st_size);
139 return TDB_ERR_IO;
142 /* Unmap, update size, remap */
143 tdb_munmap(tdb->file);
145 tdb->file->map_size = st.st_size;
146 tdb_mmap(tdb);
147 return TDB_SUCCESS;
150 /* Endian conversion: we only ever deal with 8 byte quantities */
151 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
153 assert(size % 8 == 0);
154 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
155 uint64_t i, *p = (uint64_t *)buf;
156 for (i = 0; i < size / 8; i++)
157 p[i] = bswap_64(p[i]);
159 return buf;
162 /* Return first non-zero offset in offset array, or end, or -ve error. */
163 /* FIXME: Return the off? */
164 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
165 tdb_off_t base, uint64_t start, uint64_t end)
167 uint64_t i;
168 const uint64_t *val;
170 /* Zero vs non-zero is the same unconverted: minor optimization. */
171 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
172 (end - start) * sizeof(tdb_off_t), false);
173 if (TDB_PTR_IS_ERR(val)) {
174 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
177 for (i = 0; i < (end - start); i++) {
178 if (val[i])
179 break;
181 tdb_access_release(tdb, val);
182 return start + i;
185 /* Return first zero offset in num offset array, or num, or -ve error. */
186 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
187 uint64_t num)
189 uint64_t i;
190 const uint64_t *val;
192 /* Zero vs non-zero is the same unconverted: minor optimization. */
193 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
194 if (TDB_PTR_IS_ERR(val)) {
195 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
198 for (i = 0; i < num; i++) {
199 if (!val[i])
200 break;
202 tdb_access_release(tdb, val);
203 return i;
206 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
208 char buf[8192] = { 0 };
209 void *p = tdb->tdb2.io->direct(tdb, off, len, true);
210 enum TDB_ERROR ecode = TDB_SUCCESS;
212 assert(!(tdb->flags & TDB_RDONLY));
213 if (TDB_PTR_IS_ERR(p)) {
214 return TDB_PTR_ERR(p);
216 if (p) {
217 memset(p, 0, len);
218 return ecode;
220 while (len) {
221 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
222 ecode = tdb->tdb2.io->twrite(tdb, off, buf, todo);
223 if (ecode != TDB_SUCCESS) {
224 break;
226 len -= todo;
227 off += todo;
229 return ecode;
232 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
234 tdb_off_t ret;
235 enum TDB_ERROR ecode;
237 if (likely(!(tdb->flags & TDB_CONVERT))) {
238 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
239 false);
240 if (TDB_PTR_IS_ERR(p)) {
241 return TDB_ERR_TO_OFF(TDB_PTR_ERR(p));
243 if (p)
244 return *p;
247 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
248 if (ecode != TDB_SUCCESS) {
249 return TDB_ERR_TO_OFF(ecode);
251 return ret;
254 /* write a lump of data at a specified offset */
255 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
256 const void *buf, tdb_len_t len)
258 enum TDB_ERROR ecode;
260 if (tdb->flags & TDB_RDONLY) {
261 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
262 "Write to read-only database");
265 ecode = tdb->tdb2.io->oob(tdb, off, len, false);
266 if (ecode != TDB_SUCCESS) {
267 return ecode;
270 if (tdb->file->map_ptr) {
271 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
272 } else {
273 ssize_t ret;
274 ret = pwrite(tdb->file->fd, buf, len, off);
275 if (ret != len) {
276 /* This shouldn't happen: we avoid sparse files. */
277 if (ret >= 0)
278 errno = ENOSPC;
280 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
281 "tdb_write: %zi at %zu len=%zu (%s)",
282 ret, (size_t)off, (size_t)len,
283 strerror(errno));
286 return TDB_SUCCESS;
289 /* read a lump of data at a specified offset */
290 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
291 void *buf, tdb_len_t len)
293 enum TDB_ERROR ecode;
295 ecode = tdb->tdb2.io->oob(tdb, off, len, false);
296 if (ecode != TDB_SUCCESS) {
297 return ecode;
300 if (tdb->file->map_ptr) {
301 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
302 } else {
303 ssize_t r = pread(tdb->file->fd, buf, len, off);
304 if (r != len) {
305 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
306 "tdb_read failed with %zi at %zu "
307 "len=%zu (%s) map_size=%zu",
308 r, (size_t)off, (size_t)len,
309 strerror(errno),
310 (size_t)tdb->file->map_size);
313 return TDB_SUCCESS;
316 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
317 const void *rec, size_t len)
319 enum TDB_ERROR ecode;
321 if (unlikely((tdb->flags & TDB_CONVERT))) {
322 void *conv = malloc(len);
323 if (!conv) {
324 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
325 "tdb_write: no memory converting"
326 " %zu bytes", len);
328 memcpy(conv, rec, len);
329 ecode = tdb->tdb2.io->twrite(tdb, off,
330 tdb_convert(tdb, conv, len), len);
331 free(conv);
332 } else {
333 ecode = tdb->tdb2.io->twrite(tdb, off, rec, len);
335 return ecode;
338 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
339 void *rec, size_t len)
341 enum TDB_ERROR ecode = tdb->tdb2.io->tread(tdb, off, rec, len);
342 tdb_convert(tdb, rec, len);
343 return ecode;
346 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
347 tdb_off_t off, tdb_off_t val)
349 if (tdb->flags & TDB_RDONLY) {
350 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
351 "Write to read-only database");
354 if (likely(!(tdb->flags & TDB_CONVERT))) {
355 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
356 true);
357 if (TDB_PTR_IS_ERR(p)) {
358 return TDB_PTR_ERR(p);
360 if (p) {
361 *p = val;
362 return TDB_SUCCESS;
365 return tdb_write_convert(tdb, off, &val, sizeof(val));
368 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
369 tdb_len_t len, unsigned int prefix)
371 unsigned char *buf;
372 enum TDB_ERROR ecode;
374 /* some systems don't like zero length malloc */
375 buf = malloc(prefix + len ? prefix + len : 1);
376 if (!buf) {
377 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
378 "tdb_alloc_read malloc failed len=%zu",
379 (size_t)(prefix + len));
380 return TDB_ERR_PTR(TDB_ERR_OOM);
381 } else {
382 ecode = tdb->tdb2.io->tread(tdb, offset, buf+prefix, len);
383 if (unlikely(ecode != TDB_SUCCESS)) {
384 free(buf);
385 return TDB_ERR_PTR(ecode);
388 return buf;
391 /* read a lump of data, allocating the space for it */
392 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
394 return _tdb_alloc_read(tdb, offset, len, 0);
397 static enum TDB_ERROR fill(struct tdb_context *tdb,
398 const void *buf, size_t size,
399 tdb_off_t off, tdb_len_t len)
401 while (len) {
402 size_t n = len > size ? size : len;
403 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
404 if (ret != n) {
405 if (ret >= 0)
406 errno = ENOSPC;
408 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
409 "fill failed:"
410 " %zi at %zu len=%zu (%s)",
411 ret, (size_t)off, (size_t)len,
412 strerror(errno));
414 len -= n;
415 off += n;
417 return TDB_SUCCESS;
420 /* expand a file. we prefer to use ftruncate, as that is what posix
421 says to use for mmap expansion */
422 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
423 tdb_len_t addition)
425 char buf[8192];
426 enum TDB_ERROR ecode;
428 if (tdb->flags & TDB_RDONLY) {
429 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
430 "Expand on read-only database");
433 if (tdb->flags & TDB_INTERNAL) {
434 char *new = realloc(tdb->file->map_ptr,
435 tdb->file->map_size + addition);
436 if (!new) {
437 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
438 "No memory to expand database");
440 tdb->file->map_ptr = new;
441 tdb->file->map_size += addition;
442 } else {
443 /* Unmap before trying to write; old TDB claimed OpenBSD had
444 * problem with this otherwise. */
445 tdb_munmap(tdb->file);
447 /* If this fails, we try to fill anyway. */
448 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
451 /* now fill the file with something. This ensures that the
452 file isn't sparse, which would be very bad if we ran out of
453 disk. This must be done with write, not via mmap */
454 memset(buf, 0x43, sizeof(buf));
455 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
456 addition);
457 if (ecode != TDB_SUCCESS)
458 return ecode;
459 tdb->file->map_size += addition;
460 tdb_mmap(tdb);
462 return TDB_SUCCESS;
465 const void *tdb_access_read(struct tdb_context *tdb,
466 tdb_off_t off, tdb_len_t len, bool convert)
468 void *ret = NULL;
470 if (likely(!(tdb->flags & TDB_CONVERT))) {
471 ret = tdb->tdb2.io->direct(tdb, off, len, false);
473 if (TDB_PTR_IS_ERR(ret)) {
474 return ret;
477 if (!ret) {
478 struct tdb_access_hdr *hdr;
479 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
480 if (TDB_PTR_IS_ERR(hdr)) {
481 return hdr;
483 hdr->next = tdb->tdb2.access;
484 tdb->tdb2.access = hdr;
485 ret = hdr + 1;
486 if (convert) {
487 tdb_convert(tdb, (void *)ret, len);
489 } else
490 tdb->tdb2.direct_access++;
492 return ret;
495 void *tdb_access_write(struct tdb_context *tdb,
496 tdb_off_t off, tdb_len_t len, bool convert)
498 void *ret = NULL;
500 if (tdb->flags & TDB_RDONLY) {
501 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
502 "Write to read-only database");
503 return TDB_ERR_PTR(TDB_ERR_RDONLY);
506 if (likely(!(tdb->flags & TDB_CONVERT))) {
507 ret = tdb->tdb2.io->direct(tdb, off, len, true);
509 if (TDB_PTR_IS_ERR(ret)) {
510 return ret;
514 if (!ret) {
515 struct tdb_access_hdr *hdr;
516 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
517 if (TDB_PTR_IS_ERR(hdr)) {
518 return hdr;
520 hdr->next = tdb->tdb2.access;
521 tdb->tdb2.access = hdr;
522 hdr->off = off;
523 hdr->len = len;
524 hdr->convert = convert;
525 ret = hdr + 1;
526 if (convert)
527 tdb_convert(tdb, (void *)ret, len);
528 } else
529 tdb->tdb2.direct_access++;
531 return ret;
534 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
536 struct tdb_access_hdr **hp;
538 for (hp = &tdb->tdb2.access; *hp; hp = &(*hp)->next) {
539 if (*hp + 1 == p)
540 return hp;
542 return NULL;
545 void tdb_access_release(struct tdb_context *tdb, const void *p)
547 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
549 if (hp) {
550 hdr = *hp;
551 *hp = hdr->next;
552 free(hdr);
553 } else
554 tdb->tdb2.direct_access--;
557 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
559 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
560 enum TDB_ERROR ecode;
562 if (hp) {
563 hdr = *hp;
564 if (hdr->convert)
565 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
566 else
567 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
568 *hp = hdr->next;
569 free(hdr);
570 } else {
571 tdb->tdb2.direct_access--;
572 ecode = TDB_SUCCESS;
575 return ecode;
578 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
579 bool write_mode)
581 enum TDB_ERROR ecode;
583 if (unlikely(!tdb->file->map_ptr))
584 return NULL;
586 ecode = tdb_oob(tdb, off, len, false);
587 if (unlikely(ecode != TDB_SUCCESS))
588 return TDB_ERR_PTR(ecode);
589 return (char *)tdb->file->map_ptr + off;
592 void tdb_inc_seqnum(struct tdb_context *tdb)
594 tdb_off_t seq;
596 if (tdb->flags & TDB_VERSION1) {
597 tdb1_increment_seqnum_nonblock(tdb);
598 return;
601 if (likely(!(tdb->flags & TDB_CONVERT))) {
602 int64_t *direct;
604 direct = tdb->tdb2.io->direct(tdb,
605 offsetof(struct tdb_header,
606 seqnum),
607 sizeof(*direct), true);
608 if (likely(direct)) {
609 /* Don't let it go negative, even briefly */
610 if (unlikely((*direct) + 1) < 0)
611 *direct = 0;
612 (*direct)++;
613 return;
617 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
618 if (!TDB_OFF_IS_ERR(seq)) {
619 seq++;
620 if (unlikely((int64_t)seq < 0))
621 seq = 0;
622 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
626 static const struct tdb_methods io_methods = {
627 tdb_read,
628 tdb_write,
629 tdb_oob,
630 tdb_expand_file,
631 tdb_direct,
635 initialise the default methods table
637 void tdb_io_init(struct tdb_context *tdb)
639 tdb->tdb2.io = &io_methods;