ntdb: inline oob check
[Samba.git] / lib / ntdb / io.c
blob138a405dda3ff5679dfb70cbc36937c0f53b75f1
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the ntdb
12 ** library. This does NOT imply that all of Samba is released
13 ** under the LGPL
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
32 void ntdb_munmap(struct ntdb_file *file)
34 if (file->fd == -1)
35 return;
37 if (file->map_ptr) {
38 munmap(file->map_ptr, file->map_size);
39 file->map_ptr = NULL;
43 enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb)
45 int mmap_flags;
47 if (ntdb->flags & NTDB_INTERNAL)
48 return NTDB_SUCCESS;
50 #ifndef HAVE_INCOHERENT_MMAP
51 if (ntdb->flags & NTDB_NOMMAP)
52 return NTDB_SUCCESS;
53 #endif
55 if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY)
56 mmap_flags = PROT_READ;
57 else
58 mmap_flags = PROT_READ | PROT_WRITE;
60 /* size_t can be smaller than off_t. */
61 if ((size_t)ntdb->file->map_size == ntdb->file->map_size) {
62 ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size,
63 mmap_flags,
64 MAP_SHARED, ntdb->file->fd, 0);
65 } else
66 ntdb->file->map_ptr = MAP_FAILED;
69 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
71 if (ntdb->file->map_ptr == MAP_FAILED) {
72 ntdb->file->map_ptr = NULL;
73 #ifdef HAVE_INCOHERENT_MMAP
74 /* Incoherent mmap means everyone must mmap! */
75 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
76 "ntdb_mmap failed for size %lld (%s)",
77 (long long)ntdb->file->map_size,
78 strerror(errno));
79 #else
80 ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
81 "ntdb_mmap failed for size %lld (%s)",
82 (long long)ntdb->file->map_size, strerror(errno));
83 #endif
85 return NTDB_SUCCESS;
88 /* check for an out of bounds access - if it is out of bounds then
89 see if the database has been expanded by someone else and expand
90 if necessary
91 note that "len" is the minimum length needed for the db.
93 If probe is true, len being too large isn't a failure.
95 static enum NTDB_ERROR ntdb_normal_oob(struct ntdb_context *ntdb,
96 ntdb_off_t off, ntdb_len_t len,
97 bool probe)
99 struct stat st;
100 enum NTDB_ERROR ecode;
102 /* We can't hold pointers during this: we could unmap! */
103 assert(!ntdb->direct_access
104 || (ntdb->flags & NTDB_NOLOCK)
105 || ntdb_has_expansion_lock(ntdb));
107 if (len + off < len) {
108 if (probe)
109 return NTDB_SUCCESS;
111 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
112 "ntdb_oob off %llu len %llu wrap\n",
113 (long long)off, (long long)len);
116 if (ntdb->flags & NTDB_INTERNAL) {
117 if (probe)
118 return NTDB_SUCCESS;
120 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
121 "ntdb_oob len %lld beyond internal"
122 " alloc size %lld",
123 (long long)(off + len),
124 (long long)ntdb->file->map_size);
125 return NTDB_ERR_IO;
128 ecode = ntdb_lock_expand(ntdb, F_RDLCK);
129 if (ecode != NTDB_SUCCESS) {
130 return ecode;
133 if (fstat(ntdb->file->fd, &st) != 0) {
134 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
135 "Failed to fstat file: %s", strerror(errno));
136 ntdb_unlock_expand(ntdb, F_RDLCK);
137 return NTDB_ERR_IO;
140 ntdb_unlock_expand(ntdb, F_RDLCK);
142 if (st.st_size < off + len) {
143 if (probe)
144 return NTDB_SUCCESS;
146 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
147 "ntdb_oob len %llu beyond eof at %llu",
148 (long long)(off + len), (long long)st.st_size);
149 return NTDB_ERR_IO;
152 /* Unmap, update size, remap */
153 ntdb_munmap(ntdb->file);
155 ntdb->file->map_size = st.st_size;
156 return ntdb_mmap(ntdb);
159 /* Endian conversion: we only ever deal with 8 byte quantities */
160 void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size)
162 assert(size % 8 == 0);
163 if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) {
164 uint64_t i, *p = (uint64_t *)buf;
165 for (i = 0; i < size / 8; i++)
166 p[i] = bswap_64(p[i]);
168 return buf;
171 /* Return first non-zero offset in offset array, or end, or -ve error. */
172 /* FIXME: Return the off? */
173 uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
174 ntdb_off_t base, uint64_t start, uint64_t end)
176 uint64_t i;
177 const uint64_t *val;
179 /* Zero vs non-zero is the same unconverted: minor optimization. */
180 val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t),
181 (end - start) * sizeof(ntdb_off_t), false);
182 if (NTDB_PTR_IS_ERR(val)) {
183 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
186 for (i = 0; i < (end - start); i++) {
187 if (val[i])
188 break;
190 ntdb_access_release(ntdb, val);
191 return start + i;
194 /* Return first zero offset in num offset array, or num, or -ve error. */
195 uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
196 uint64_t num)
198 uint64_t i;
199 const uint64_t *val;
201 /* Zero vs non-zero is the same unconverted: minor optimization. */
202 val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false);
203 if (NTDB_PTR_IS_ERR(val)) {
204 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
207 for (i = 0; i < num; i++) {
208 if (!val[i])
209 break;
211 ntdb_access_release(ntdb, val);
212 return i;
215 enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len)
217 char buf[8192] = { 0 };
218 void *p = ntdb->io->direct(ntdb, off, len, true);
219 enum NTDB_ERROR ecode = NTDB_SUCCESS;
221 assert(!(ntdb->flags & NTDB_RDONLY));
222 if (NTDB_PTR_IS_ERR(p)) {
223 return NTDB_PTR_ERR(p);
225 if (p) {
226 memset(p, 0, len);
227 return ecode;
229 while (len) {
230 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
231 ecode = ntdb->io->twrite(ntdb, off, buf, todo);
232 if (ecode != NTDB_SUCCESS) {
233 break;
235 len -= todo;
236 off += todo;
238 return ecode;
241 ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb, ntdb_off_t off)
243 ntdb_off_t ret;
244 enum NTDB_ERROR ecode;
246 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
247 ntdb_off_t *p = ntdb->io->direct(ntdb, off, sizeof(*p), false);
248 if (NTDB_PTR_IS_ERR(p)) {
249 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p));
251 if (p)
252 return *p;
255 ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret));
256 if (ecode != NTDB_SUCCESS) {
257 return NTDB_ERR_TO_OFF(ecode);
259 return ret;
262 /* write a lump of data at a specified offset */
263 static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off,
264 const void *buf, ntdb_len_t len)
266 enum NTDB_ERROR ecode;
268 if (ntdb->flags & NTDB_RDONLY) {
269 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
270 "Write to read-only database");
273 ecode = ntdb_oob(ntdb, off, len, false);
274 if (ecode != NTDB_SUCCESS) {
275 return ecode;
278 if (ntdb->file->map_ptr) {
279 memcpy(off + (char *)ntdb->file->map_ptr, buf, len);
280 } else {
281 #ifdef HAVE_INCOHERENT_MMAP
282 return NTDB_ERR_IO;
283 #else
284 ssize_t ret;
285 ret = pwrite(ntdb->file->fd, buf, len, off);
286 if (ret != len) {
287 /* This shouldn't happen: we avoid sparse files. */
288 if (ret >= 0)
289 errno = ENOSPC;
291 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
292 "ntdb_write: %zi at %zu len=%zu (%s)",
293 ret, (size_t)off, (size_t)len,
294 strerror(errno));
296 #endif
298 return NTDB_SUCCESS;
301 /* read a lump of data at a specified offset */
302 static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off,
303 void *buf, ntdb_len_t len)
305 enum NTDB_ERROR ecode;
307 ecode = ntdb_oob(ntdb, off, len, false);
308 if (ecode != NTDB_SUCCESS) {
309 return ecode;
312 if (ntdb->file->map_ptr) {
313 memcpy(buf, off + (char *)ntdb->file->map_ptr, len);
314 } else {
315 #ifdef HAVE_INCOHERENT_MMAP
316 return NTDB_ERR_IO;
317 #else
318 ssize_t r = pread(ntdb->file->fd, buf, len, off);
319 if (r != len) {
320 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
321 "ntdb_read failed with %zi at %zu "
322 "len=%zu (%s) map_size=%zu",
323 r, (size_t)off, (size_t)len,
324 strerror(errno),
325 (size_t)ntdb->file->map_size);
327 #endif
329 return NTDB_SUCCESS;
332 enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
333 const void *rec, size_t len)
335 enum NTDB_ERROR ecode;
337 if (unlikely((ntdb->flags & NTDB_CONVERT))) {
338 void *conv = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data);
339 if (!conv) {
340 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
341 "ntdb_write: no memory converting"
342 " %zu bytes", len);
344 memcpy(conv, rec, len);
345 ecode = ntdb->io->twrite(ntdb, off,
346 ntdb_convert(ntdb, conv, len), len);
347 ntdb->free_fn(conv, ntdb->alloc_data);
348 } else {
349 ecode = ntdb->io->twrite(ntdb, off, rec, len);
351 return ecode;
354 enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
355 void *rec, size_t len)
357 enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len);
358 ntdb_convert(ntdb, rec, len);
359 return ecode;
362 enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb,
363 ntdb_off_t off, ntdb_off_t val)
365 if (ntdb->flags & NTDB_RDONLY) {
366 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
367 "Write to read-only database");
370 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
371 ntdb_off_t *p = ntdb->io->direct(ntdb, off, sizeof(*p), true);
372 if (NTDB_PTR_IS_ERR(p)) {
373 return NTDB_PTR_ERR(p);
375 if (p) {
376 *p = val;
377 return NTDB_SUCCESS;
380 return ntdb_write_convert(ntdb, off, &val, sizeof(val));
383 static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset,
384 ntdb_len_t len, unsigned int prefix)
386 unsigned char *buf;
387 enum NTDB_ERROR ecode;
389 /* some systems don't like zero length malloc */
390 buf = ntdb->alloc_fn(ntdb, prefix + len ? prefix + len : 1,
391 ntdb->alloc_data);
392 if (!buf) {
393 ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_USE_ERROR,
394 "ntdb_alloc_read alloc failed len=%zu",
395 (size_t)(prefix + len));
396 return NTDB_ERR_PTR(NTDB_ERR_OOM);
397 } else {
398 ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len);
399 if (unlikely(ecode != NTDB_SUCCESS)) {
400 ntdb->free_fn(buf, ntdb->alloc_data);
401 return NTDB_ERR_PTR(ecode);
404 return buf;
407 /* read a lump of data, allocating the space for it */
408 void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len)
410 return _ntdb_alloc_read(ntdb, offset, len, 0);
413 static enum NTDB_ERROR fill(struct ntdb_context *ntdb,
414 const void *buf, size_t size,
415 ntdb_off_t off, ntdb_len_t len)
417 while (len) {
418 size_t n = len > size ? size : len;
419 ssize_t ret = pwrite(ntdb->file->fd, buf, n, off);
420 if (ret != n) {
421 if (ret >= 0)
422 errno = ENOSPC;
424 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
425 "fill failed:"
426 " %zi at %zu len=%zu (%s)",
427 ret, (size_t)off, (size_t)len,
428 strerror(errno));
430 len -= n;
431 off += n;
433 return NTDB_SUCCESS;
436 /* expand a file. we prefer to use ftruncate, as that is what posix
437 says to use for mmap expansion */
438 static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb,
439 ntdb_len_t addition)
441 char buf[8192];
442 enum NTDB_ERROR ecode;
444 assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0);
445 if (ntdb->flags & NTDB_RDONLY) {
446 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
447 "Expand on read-only database");
450 if (ntdb->flags & NTDB_INTERNAL) {
451 char *new = ntdb->expand_fn(ntdb->file->map_ptr,
452 ntdb->file->map_size + addition,
453 ntdb->alloc_data);
454 if (!new) {
455 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
456 "No memory to expand database");
458 ntdb->file->map_ptr = new;
459 ntdb->file->map_size += addition;
460 return NTDB_SUCCESS;
461 } else {
462 /* Unmap before trying to write; old NTDB claimed OpenBSD had
463 * problem with this otherwise. */
464 ntdb_munmap(ntdb->file);
466 /* If this fails, we try to fill anyway. */
467 if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition))
470 /* now fill the file with something. This ensures that the
471 file isn't sparse, which would be very bad if we ran out of
472 disk. This must be done with write, not via mmap */
473 memset(buf, 0x43, sizeof(buf));
474 ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size,
475 addition);
476 if (ecode != NTDB_SUCCESS)
477 return ecode;
478 ntdb->file->map_size += addition;
479 return ntdb_mmap(ntdb);
483 const void *ntdb_access_read(struct ntdb_context *ntdb,
484 ntdb_off_t off, ntdb_len_t len, bool convert)
486 void *ret = NULL;
488 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
489 ret = ntdb->io->direct(ntdb, off, len, false);
491 if (NTDB_PTR_IS_ERR(ret)) {
492 return ret;
495 if (!ret) {
496 struct ntdb_access_hdr *hdr;
497 hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
498 if (NTDB_PTR_IS_ERR(hdr)) {
499 return hdr;
501 hdr->next = ntdb->access;
502 ntdb->access = hdr;
503 ret = hdr + 1;
504 if (convert) {
505 ntdb_convert(ntdb, (void *)ret, len);
507 } else
508 ntdb->direct_access++;
510 return ret;
513 void *ntdb_access_write(struct ntdb_context *ntdb,
514 ntdb_off_t off, ntdb_len_t len, bool convert)
516 void *ret = NULL;
518 if (ntdb->flags & NTDB_RDONLY) {
519 ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
520 "Write to read-only database");
521 return NTDB_ERR_PTR(NTDB_ERR_RDONLY);
524 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
525 ret = ntdb->io->direct(ntdb, off, len, true);
527 if (NTDB_PTR_IS_ERR(ret)) {
528 return ret;
532 if (!ret) {
533 struct ntdb_access_hdr *hdr;
534 hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
535 if (NTDB_PTR_IS_ERR(hdr)) {
536 return hdr;
538 hdr->next = ntdb->access;
539 ntdb->access = hdr;
540 hdr->off = off;
541 hdr->len = len;
542 hdr->convert = convert;
543 ret = hdr + 1;
544 if (convert)
545 ntdb_convert(ntdb, (void *)ret, len);
546 } else
547 ntdb->direct_access++;
549 return ret;
552 static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p)
554 struct ntdb_access_hdr **hp;
556 for (hp = &ntdb->access; *hp; hp = &(*hp)->next) {
557 if (*hp + 1 == p)
558 return hp;
560 return NULL;
563 void ntdb_access_release(struct ntdb_context *ntdb, const void *p)
565 struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
567 if (hp) {
568 hdr = *hp;
569 *hp = hdr->next;
570 ntdb->free_fn(hdr, ntdb->alloc_data);
571 } else
572 ntdb->direct_access--;
575 enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p)
577 struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
578 enum NTDB_ERROR ecode;
580 if (hp) {
581 hdr = *hp;
582 if (hdr->convert)
583 ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len);
584 else
585 ecode = ntdb_write(ntdb, hdr->off, p, hdr->len);
586 *hp = hdr->next;
587 ntdb->free_fn(hdr, ntdb->alloc_data);
588 } else {
589 ntdb->direct_access--;
590 ecode = NTDB_SUCCESS;
593 return ecode;
596 static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len,
597 bool write_mode)
599 enum NTDB_ERROR ecode;
601 if (unlikely(!ntdb->file->map_ptr))
602 return NULL;
604 ecode = ntdb_oob(ntdb, off, len, false);
605 if (unlikely(ecode != NTDB_SUCCESS))
606 return NTDB_ERR_PTR(ecode);
607 return (char *)ntdb->file->map_ptr + off;
610 void ntdb_inc_seqnum(struct ntdb_context *ntdb)
612 ntdb_off_t seq;
614 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
615 int64_t *direct;
617 direct = ntdb->io->direct(ntdb,
618 offsetof(struct ntdb_header, seqnum),
619 sizeof(*direct), true);
620 if (likely(direct)) {
621 /* Don't let it go negative, even briefly */
622 if (unlikely((*direct) + 1) < 0)
623 *direct = 0;
624 (*direct)++;
625 return;
629 seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
630 if (!NTDB_OFF_IS_ERR(seq)) {
631 seq++;
632 if (unlikely((int64_t)seq < 0))
633 seq = 0;
634 ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq);
638 static const struct ntdb_methods io_methods = {
639 ntdb_read,
640 ntdb_write,
641 ntdb_normal_oob,
642 ntdb_expand_file,
643 ntdb_direct,
647 initialise the default methods table
649 void ntdb_io_init(struct ntdb_context *ntdb)
651 ntdb->io = &io_methods;