samba-tool: Move main command implementation to samba.netcmd.main, so it is accessibl...
[Samba/gebeck_regimport.git] / lib / tdb2 / io.c
blobafab0c1a4cc37879abd9ffcd626dc9b4a49d20e0
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
13 ** under the LGPL
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
34 if (file->fd == -1)
35 return;
37 if (file->map_ptr) {
38 munmap(file->map_ptr, file->map_size);
39 file->map_ptr = NULL;
43 void tdb_mmap(struct tdb_context *tdb)
45 int mmap_flags;
47 if (tdb->flags & TDB_INTERNAL)
48 return;
50 if (tdb->flags & TDB_NOMMAP)
51 return;
53 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
54 mmap_flags = PROT_READ;
55 else
56 mmap_flags = PROT_READ | PROT_WRITE;
58 /* size_t can be smaller than off_t. */
59 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
60 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
61 mmap_flags,
62 MAP_SHARED, tdb->file->fd, 0);
63 } else
64 tdb->file->map_ptr = MAP_FAILED;
67 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
69 if (tdb->file->map_ptr == MAP_FAILED) {
70 tdb->file->map_ptr = NULL;
71 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
72 "tdb_mmap failed for size %lld (%s)",
73 (long long)tdb->file->map_size, strerror(errno));
77 /* check for an out of bounds access - if it is out of bounds then
78 see if the database has been expanded by someone else and expand
79 if necessary
80 note that "len" is the minimum length needed for the db.
82 If probe is true, len being too large isn't a failure.
84 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
85 bool probe)
87 struct stat st;
88 enum TDB_ERROR ecode;
90 /* We can't hold pointers during this: we could unmap! */
91 assert(!tdb->tdb2.direct_access
92 || (tdb->flags & TDB_NOLOCK)
93 || tdb_has_expansion_lock(tdb));
95 if (len <= tdb->file->map_size)
96 return TDB_SUCCESS;
97 if (tdb->flags & TDB_INTERNAL) {
98 if (probe)
99 return TDB_SUCCESS;
101 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
102 "tdb_oob len %lld beyond internal"
103 " malloc size %lld",
104 (long long)len,
105 (long long)tdb->file->map_size);
106 return TDB_ERR_IO;
109 ecode = tdb_lock_expand(tdb, F_RDLCK);
110 if (ecode != TDB_SUCCESS) {
111 return ecode;
114 if (fstat(tdb->file->fd, &st) != 0) {
115 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
116 "Failed to fstat file: %s", strerror(errno));
117 tdb_unlock_expand(tdb, F_RDLCK);
118 return TDB_ERR_IO;
121 tdb_unlock_expand(tdb, F_RDLCK);
123 if (st.st_size < (size_t)len) {
124 if (probe)
125 return TDB_SUCCESS;
127 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
128 "tdb_oob len %zu beyond eof at %zu",
129 (size_t)len, st.st_size);
130 return TDB_ERR_IO;
133 /* Unmap, update size, remap */
134 tdb_munmap(tdb->file);
136 tdb->file->map_size = st.st_size;
137 tdb_mmap(tdb);
138 return TDB_SUCCESS;
141 /* Endian conversion: we only ever deal with 8 byte quantities */
142 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
144 assert(size % 8 == 0);
145 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
146 uint64_t i, *p = (uint64_t *)buf;
147 for (i = 0; i < size / 8; i++)
148 p[i] = bswap_64(p[i]);
150 return buf;
153 /* Return first non-zero offset in offset array, or end, or -ve error. */
154 /* FIXME: Return the off? */
155 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
156 tdb_off_t base, uint64_t start, uint64_t end)
158 uint64_t i;
159 const uint64_t *val;
161 /* Zero vs non-zero is the same unconverted: minor optimization. */
162 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
163 (end - start) * sizeof(tdb_off_t), false);
164 if (TDB_PTR_IS_ERR(val)) {
165 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
168 for (i = 0; i < (end - start); i++) {
169 if (val[i])
170 break;
172 tdb_access_release(tdb, val);
173 return start + i;
176 /* Return first zero offset in num offset array, or num, or -ve error. */
177 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
178 uint64_t num)
180 uint64_t i;
181 const uint64_t *val;
183 /* Zero vs non-zero is the same unconverted: minor optimization. */
184 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
185 if (TDB_PTR_IS_ERR(val)) {
186 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
189 for (i = 0; i < num; i++) {
190 if (!val[i])
191 break;
193 tdb_access_release(tdb, val);
194 return i;
197 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
199 char buf[8192] = { 0 };
200 void *p = tdb->tdb2.io->direct(tdb, off, len, true);
201 enum TDB_ERROR ecode = TDB_SUCCESS;
203 assert(!(tdb->flags & TDB_RDONLY));
204 if (TDB_PTR_IS_ERR(p)) {
205 return TDB_PTR_ERR(p);
207 if (p) {
208 memset(p, 0, len);
209 return ecode;
211 while (len) {
212 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
213 ecode = tdb->tdb2.io->twrite(tdb, off, buf, todo);
214 if (ecode != TDB_SUCCESS) {
215 break;
217 len -= todo;
218 off += todo;
220 return ecode;
223 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
225 tdb_off_t ret;
226 enum TDB_ERROR ecode;
228 if (likely(!(tdb->flags & TDB_CONVERT))) {
229 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
230 false);
231 if (TDB_PTR_IS_ERR(p)) {
232 return TDB_ERR_TO_OFF(TDB_PTR_ERR(p));
234 if (p)
235 return *p;
238 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
239 if (ecode != TDB_SUCCESS) {
240 return TDB_ERR_TO_OFF(ecode);
242 return ret;
245 /* write a lump of data at a specified offset */
246 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
247 const void *buf, tdb_len_t len)
249 enum TDB_ERROR ecode;
251 if (tdb->flags & TDB_RDONLY) {
252 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
253 "Write to read-only database");
256 ecode = tdb->tdb2.io->oob(tdb, off + len, false);
257 if (ecode != TDB_SUCCESS) {
258 return ecode;
261 if (tdb->file->map_ptr) {
262 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
263 } else {
264 ssize_t ret;
265 ret = pwrite(tdb->file->fd, buf, len, off);
266 if (ret != len) {
267 /* This shouldn't happen: we avoid sparse files. */
268 if (ret >= 0)
269 errno = ENOSPC;
271 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
272 "tdb_write: %zi at %zu len=%zu (%s)",
273 ret, (size_t)off, (size_t)len,
274 strerror(errno));
277 return TDB_SUCCESS;
280 /* read a lump of data at a specified offset */
281 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
282 void *buf, tdb_len_t len)
284 enum TDB_ERROR ecode;
286 ecode = tdb->tdb2.io->oob(tdb, off + len, false);
287 if (ecode != TDB_SUCCESS) {
288 return ecode;
291 if (tdb->file->map_ptr) {
292 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
293 } else {
294 ssize_t r = pread(tdb->file->fd, buf, len, off);
295 if (r != len) {
296 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
297 "tdb_read failed with %zi at %zu "
298 "len=%zu (%s) map_size=%zu",
299 r, (size_t)off, (size_t)len,
300 strerror(errno),
301 (size_t)tdb->file->map_size);
304 return TDB_SUCCESS;
307 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
308 const void *rec, size_t len)
310 enum TDB_ERROR ecode;
312 if (unlikely((tdb->flags & TDB_CONVERT))) {
313 void *conv = malloc(len);
314 if (!conv) {
315 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
316 "tdb_write: no memory converting"
317 " %zu bytes", len);
319 memcpy(conv, rec, len);
320 ecode = tdb->tdb2.io->twrite(tdb, off,
321 tdb_convert(tdb, conv, len), len);
322 free(conv);
323 } else {
324 ecode = tdb->tdb2.io->twrite(tdb, off, rec, len);
326 return ecode;
329 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
330 void *rec, size_t len)
332 enum TDB_ERROR ecode = tdb->tdb2.io->tread(tdb, off, rec, len);
333 tdb_convert(tdb, rec, len);
334 return ecode;
337 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
338 tdb_off_t off, tdb_off_t val)
340 if (tdb->flags & TDB_RDONLY) {
341 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
342 "Write to read-only database");
345 if (likely(!(tdb->flags & TDB_CONVERT))) {
346 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
347 true);
348 if (TDB_PTR_IS_ERR(p)) {
349 return TDB_PTR_ERR(p);
351 if (p) {
352 *p = val;
353 return TDB_SUCCESS;
356 return tdb_write_convert(tdb, off, &val, sizeof(val));
359 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
360 tdb_len_t len, unsigned int prefix)
362 unsigned char *buf;
363 enum TDB_ERROR ecode;
365 /* some systems don't like zero length malloc */
366 buf = malloc(prefix + len ? prefix + len : 1);
367 if (!buf) {
368 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
369 "tdb_alloc_read malloc failed len=%zu",
370 (size_t)(prefix + len));
371 return TDB_ERR_PTR(TDB_ERR_OOM);
372 } else {
373 ecode = tdb->tdb2.io->tread(tdb, offset, buf+prefix, len);
374 if (unlikely(ecode != TDB_SUCCESS)) {
375 free(buf);
376 return TDB_ERR_PTR(ecode);
379 return buf;
382 /* read a lump of data, allocating the space for it */
383 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
385 return _tdb_alloc_read(tdb, offset, len, 0);
388 static enum TDB_ERROR fill(struct tdb_context *tdb,
389 const void *buf, size_t size,
390 tdb_off_t off, tdb_len_t len)
392 while (len) {
393 size_t n = len > size ? size : len;
394 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
395 if (ret != n) {
396 if (ret >= 0)
397 errno = ENOSPC;
399 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
400 "fill failed:"
401 " %zi at %zu len=%zu (%s)",
402 ret, (size_t)off, (size_t)len,
403 strerror(errno));
405 len -= n;
406 off += n;
408 return TDB_SUCCESS;
411 /* expand a file. we prefer to use ftruncate, as that is what posix
412 says to use for mmap expansion */
413 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
414 tdb_len_t addition)
416 char buf[8192];
417 enum TDB_ERROR ecode;
419 if (tdb->flags & TDB_RDONLY) {
420 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
421 "Expand on read-only database");
424 if (tdb->flags & TDB_INTERNAL) {
425 char *new = realloc(tdb->file->map_ptr,
426 tdb->file->map_size + addition);
427 if (!new) {
428 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
429 "No memory to expand database");
431 tdb->file->map_ptr = new;
432 tdb->file->map_size += addition;
433 } else {
434 /* Unmap before trying to write; old TDB claimed OpenBSD had
435 * problem with this otherwise. */
436 tdb_munmap(tdb->file);
438 /* If this fails, we try to fill anyway. */
439 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
442 /* now fill the file with something. This ensures that the
443 file isn't sparse, which would be very bad if we ran out of
444 disk. This must be done with write, not via mmap */
445 memset(buf, 0x43, sizeof(buf));
446 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
447 addition);
448 if (ecode != TDB_SUCCESS)
449 return ecode;
450 tdb->file->map_size += addition;
451 tdb_mmap(tdb);
453 return TDB_SUCCESS;
456 const void *tdb_access_read(struct tdb_context *tdb,
457 tdb_off_t off, tdb_len_t len, bool convert)
459 void *ret = NULL;
461 if (likely(!(tdb->flags & TDB_CONVERT))) {
462 ret = tdb->tdb2.io->direct(tdb, off, len, false);
464 if (TDB_PTR_IS_ERR(ret)) {
465 return ret;
468 if (!ret) {
469 struct tdb_access_hdr *hdr;
470 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
471 if (TDB_PTR_IS_ERR(hdr)) {
472 return hdr;
474 hdr->next = tdb->tdb2.access;
475 tdb->tdb2.access = hdr;
476 ret = hdr + 1;
477 if (convert) {
478 tdb_convert(tdb, (void *)ret, len);
480 } else
481 tdb->tdb2.direct_access++;
483 return ret;
486 void *tdb_access_write(struct tdb_context *tdb,
487 tdb_off_t off, tdb_len_t len, bool convert)
489 void *ret = NULL;
491 if (tdb->flags & TDB_RDONLY) {
492 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
493 "Write to read-only database");
494 return TDB_ERR_PTR(TDB_ERR_RDONLY);
497 if (likely(!(tdb->flags & TDB_CONVERT))) {
498 ret = tdb->tdb2.io->direct(tdb, off, len, true);
500 if (TDB_PTR_IS_ERR(ret)) {
501 return ret;
505 if (!ret) {
506 struct tdb_access_hdr *hdr;
507 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
508 if (TDB_PTR_IS_ERR(hdr)) {
509 return hdr;
511 hdr->next = tdb->tdb2.access;
512 tdb->tdb2.access = hdr;
513 hdr->off = off;
514 hdr->len = len;
515 hdr->convert = convert;
516 ret = hdr + 1;
517 if (convert)
518 tdb_convert(tdb, (void *)ret, len);
519 } else
520 tdb->tdb2.direct_access++;
522 return ret;
525 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
527 struct tdb_access_hdr **hp;
529 for (hp = &tdb->tdb2.access; *hp; hp = &(*hp)->next) {
530 if (*hp + 1 == p)
531 return hp;
533 return NULL;
536 void tdb_access_release(struct tdb_context *tdb, const void *p)
538 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
540 if (hp) {
541 hdr = *hp;
542 *hp = hdr->next;
543 free(hdr);
544 } else
545 tdb->tdb2.direct_access--;
548 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
550 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
551 enum TDB_ERROR ecode;
553 if (hp) {
554 hdr = *hp;
555 if (hdr->convert)
556 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
557 else
558 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
559 *hp = hdr->next;
560 free(hdr);
561 } else {
562 tdb->tdb2.direct_access--;
563 ecode = TDB_SUCCESS;
566 return ecode;
569 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
570 bool write_mode)
572 enum TDB_ERROR ecode;
574 if (unlikely(!tdb->file->map_ptr))
575 return NULL;
577 ecode = tdb_oob(tdb, off + len, false);
578 if (unlikely(ecode != TDB_SUCCESS))
579 return TDB_ERR_PTR(ecode);
580 return (char *)tdb->file->map_ptr + off;
583 void tdb_inc_seqnum(struct tdb_context *tdb)
585 tdb_off_t seq;
587 if (tdb->flags & TDB_VERSION1) {
588 tdb1_increment_seqnum_nonblock(tdb);
589 return;
592 if (likely(!(tdb->flags & TDB_CONVERT))) {
593 int64_t *direct;
595 direct = tdb->tdb2.io->direct(tdb,
596 offsetof(struct tdb_header,
597 seqnum),
598 sizeof(*direct), true);
599 if (likely(direct)) {
600 /* Don't let it go negative, even briefly */
601 if (unlikely((*direct) + 1) < 0)
602 *direct = 0;
603 (*direct)++;
604 return;
608 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
609 if (!TDB_OFF_IS_ERR(seq)) {
610 seq++;
611 if (unlikely((int64_t)seq < 0))
612 seq = 0;
613 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
617 static const struct tdb_methods io_methods = {
618 tdb_read,
619 tdb_write,
620 tdb_oob,
621 tdb_expand_file,
622 tdb_direct,
626 initialise the default methods table
628 void tdb_io_init(struct tdb_context *tdb)
630 tdb->tdb2.io = &io_methods;