2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file
*file
)
38 munmap(file
->map_ptr
, file
->map_size
);
43 enum TDB_ERROR
tdb_mmap(struct tdb_context
*tdb
)
47 if (tdb
->flags
& TDB_INTERNAL
)
50 #ifndef HAVE_INCOHERENT_MMAP
51 if (tdb
->flags
& TDB_NOMMAP
)
55 if ((tdb
->open_flags
& O_ACCMODE
) == O_RDONLY
)
56 mmap_flags
= PROT_READ
;
58 mmap_flags
= PROT_READ
| PROT_WRITE
;
60 /* size_t can be smaller than off_t. */
61 if ((size_t)tdb
->file
->map_size
== tdb
->file
->map_size
) {
62 tdb
->file
->map_ptr
= mmap(NULL
, tdb
->file
->map_size
,
64 MAP_SHARED
, tdb
->file
->fd
, 0);
66 tdb
->file
->map_ptr
= MAP_FAILED
;
69 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
71 if (tdb
->file
->map_ptr
== MAP_FAILED
) {
72 tdb
->file
->map_ptr
= NULL
;
73 #ifdef HAVE_INCOHERENT_MMAP
74 /* Incoherent mmap means everyone must mmap! */
75 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
76 "tdb_mmap failed for size %lld (%s)",
77 (long long)tdb
->file
->map_size
,
80 tdb_logerr(tdb
, TDB_SUCCESS
, TDB_LOG_WARNING
,
81 "tdb_mmap failed for size %lld (%s)",
82 (long long)tdb
->file
->map_size
, strerror(errno
));
88 /* check for an out of bounds access - if it is out of bounds then
89 see if the database has been expanded by someone else and expand
91 note that "len" is the minimum length needed for the db.
93 If probe is true, len being too large isn't a failure.
95 static enum TDB_ERROR
tdb_oob(struct tdb_context
*tdb
,
96 tdb_off_t off
, tdb_len_t len
, bool probe
)
101 /* We can't hold pointers during this: we could unmap! */
102 assert(!tdb
->tdb2
.direct_access
103 || (tdb
->flags
& TDB_NOLOCK
)
104 || tdb_has_expansion_lock(tdb
));
106 if (len
+ off
< len
) {
110 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
111 "tdb_oob off %llu len %llu wrap\n",
112 (long long)off
, (long long)len
);
115 if (len
+ off
<= tdb
->file
->map_size
)
117 if (tdb
->flags
& TDB_INTERNAL
) {
121 tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
122 "tdb_oob len %lld beyond internal"
124 (long long)(off
+ len
),
125 (long long)tdb
->file
->map_size
);
129 ecode
= tdb_lock_expand(tdb
, F_RDLCK
);
130 if (ecode
!= TDB_SUCCESS
) {
134 if (fstat(tdb
->file
->fd
, &st
) != 0) {
135 tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
136 "Failed to fstat file: %s", strerror(errno
));
137 tdb_unlock_expand(tdb
, F_RDLCK
);
141 tdb_unlock_expand(tdb
, F_RDLCK
);
143 if (st
.st_size
< off
+ len
) {
147 tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
148 "tdb_oob len %llu beyond eof at %llu",
149 (long long)(off
+ len
), (long long)st
.st_size
);
153 /* Unmap, update size, remap */
154 tdb_munmap(tdb
->file
);
156 tdb
->file
->map_size
= st
.st_size
;
157 return tdb_mmap(tdb
);
160 /* Endian conversion: we only ever deal with 8 byte quantities */
161 void *tdb_convert(const struct tdb_context
*tdb
, void *buf
, tdb_len_t size
)
163 assert(size
% 8 == 0);
164 if (unlikely((tdb
->flags
& TDB_CONVERT
)) && buf
) {
165 uint64_t i
, *p
= (uint64_t *)buf
;
166 for (i
= 0; i
< size
/ 8; i
++)
167 p
[i
] = bswap_64(p
[i
]);
172 /* Return first non-zero offset in offset array, or end, or -ve error. */
173 /* FIXME: Return the off? */
174 uint64_t tdb_find_nonzero_off(struct tdb_context
*tdb
,
175 tdb_off_t base
, uint64_t start
, uint64_t end
)
180 /* Zero vs non-zero is the same unconverted: minor optimization. */
181 val
= tdb_access_read(tdb
, base
+ start
* sizeof(tdb_off_t
),
182 (end
- start
) * sizeof(tdb_off_t
), false);
183 if (TDB_PTR_IS_ERR(val
)) {
184 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val
));
187 for (i
= 0; i
< (end
- start
); i
++) {
191 tdb_access_release(tdb
, val
);
195 /* Return first zero offset in num offset array, or num, or -ve error. */
196 uint64_t tdb_find_zero_off(struct tdb_context
*tdb
, tdb_off_t off
,
202 /* Zero vs non-zero is the same unconverted: minor optimization. */
203 val
= tdb_access_read(tdb
, off
, num
* sizeof(tdb_off_t
), false);
204 if (TDB_PTR_IS_ERR(val
)) {
205 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val
));
208 for (i
= 0; i
< num
; i
++) {
212 tdb_access_release(tdb
, val
);
216 enum TDB_ERROR
zero_out(struct tdb_context
*tdb
, tdb_off_t off
, tdb_len_t len
)
218 char buf
[8192] = { 0 };
219 void *p
= tdb
->tdb2
.io
->direct(tdb
, off
, len
, true);
220 enum TDB_ERROR ecode
= TDB_SUCCESS
;
222 assert(!(tdb
->flags
& TDB_RDONLY
));
223 if (TDB_PTR_IS_ERR(p
)) {
224 return TDB_PTR_ERR(p
);
231 unsigned todo
= len
< sizeof(buf
) ? len
: sizeof(buf
);
232 ecode
= tdb
->tdb2
.io
->twrite(tdb
, off
, buf
, todo
);
233 if (ecode
!= TDB_SUCCESS
) {
242 tdb_off_t
tdb_read_off(struct tdb_context
*tdb
, tdb_off_t off
)
245 enum TDB_ERROR ecode
;
247 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
248 tdb_off_t
*p
= tdb
->tdb2
.io
->direct(tdb
, off
, sizeof(*p
),
250 if (TDB_PTR_IS_ERR(p
)) {
251 return TDB_ERR_TO_OFF(TDB_PTR_ERR(p
));
257 ecode
= tdb_read_convert(tdb
, off
, &ret
, sizeof(ret
));
258 if (ecode
!= TDB_SUCCESS
) {
259 return TDB_ERR_TO_OFF(ecode
);
264 /* write a lump of data at a specified offset */
265 static enum TDB_ERROR
tdb_write(struct tdb_context
*tdb
, tdb_off_t off
,
266 const void *buf
, tdb_len_t len
)
268 enum TDB_ERROR ecode
;
270 if (tdb
->flags
& TDB_RDONLY
) {
271 return tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
272 "Write to read-only database");
275 ecode
= tdb
->tdb2
.io
->oob(tdb
, off
, len
, false);
276 if (ecode
!= TDB_SUCCESS
) {
280 if (tdb
->file
->map_ptr
) {
281 memcpy(off
+ (char *)tdb
->file
->map_ptr
, buf
, len
);
283 #ifdef HAVE_INCOHERENT_MMAP
287 ret
= pwrite(tdb
->file
->fd
, buf
, len
, off
);
289 /* This shouldn't happen: we avoid sparse files. */
293 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
294 "tdb_write: %zi at %zu len=%zu (%s)",
295 ret
, (size_t)off
, (size_t)len
,
303 /* read a lump of data at a specified offset */
304 static enum TDB_ERROR
tdb_read(struct tdb_context
*tdb
, tdb_off_t off
,
305 void *buf
, tdb_len_t len
)
307 enum TDB_ERROR ecode
;
309 ecode
= tdb
->tdb2
.io
->oob(tdb
, off
, len
, false);
310 if (ecode
!= TDB_SUCCESS
) {
314 if (tdb
->file
->map_ptr
) {
315 memcpy(buf
, off
+ (char *)tdb
->file
->map_ptr
, len
);
317 #ifdef HAVE_INCOHERENT_MMAP
320 ssize_t r
= pread(tdb
->file
->fd
, buf
, len
, off
);
322 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
323 "tdb_read failed with %zi at %zu "
324 "len=%zu (%s) map_size=%zu",
325 r
, (size_t)off
, (size_t)len
,
327 (size_t)tdb
->file
->map_size
);
334 enum TDB_ERROR
tdb_write_convert(struct tdb_context
*tdb
, tdb_off_t off
,
335 const void *rec
, size_t len
)
337 enum TDB_ERROR ecode
;
339 if (unlikely((tdb
->flags
& TDB_CONVERT
))) {
340 void *conv
= malloc(len
);
342 return tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_ERROR
,
343 "tdb_write: no memory converting"
346 memcpy(conv
, rec
, len
);
347 ecode
= tdb
->tdb2
.io
->twrite(tdb
, off
,
348 tdb_convert(tdb
, conv
, len
), len
);
351 ecode
= tdb
->tdb2
.io
->twrite(tdb
, off
, rec
, len
);
356 enum TDB_ERROR
tdb_read_convert(struct tdb_context
*tdb
, tdb_off_t off
,
357 void *rec
, size_t len
)
359 enum TDB_ERROR ecode
= tdb
->tdb2
.io
->tread(tdb
, off
, rec
, len
);
360 tdb_convert(tdb
, rec
, len
);
364 enum TDB_ERROR
tdb_write_off(struct tdb_context
*tdb
,
365 tdb_off_t off
, tdb_off_t val
)
367 if (tdb
->flags
& TDB_RDONLY
) {
368 return tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
369 "Write to read-only database");
372 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
373 tdb_off_t
*p
= tdb
->tdb2
.io
->direct(tdb
, off
, sizeof(*p
),
375 if (TDB_PTR_IS_ERR(p
)) {
376 return TDB_PTR_ERR(p
);
383 return tdb_write_convert(tdb
, off
, &val
, sizeof(val
));
386 static void *_tdb_alloc_read(struct tdb_context
*tdb
, tdb_off_t offset
,
387 tdb_len_t len
, unsigned int prefix
)
390 enum TDB_ERROR ecode
;
392 /* some systems don't like zero length malloc */
393 buf
= malloc(prefix
+ len
? prefix
+ len
: 1);
395 tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_USE_ERROR
,
396 "tdb_alloc_read malloc failed len=%zu",
397 (size_t)(prefix
+ len
));
398 return TDB_ERR_PTR(TDB_ERR_OOM
);
400 ecode
= tdb
->tdb2
.io
->tread(tdb
, offset
, buf
+prefix
, len
);
401 if (unlikely(ecode
!= TDB_SUCCESS
)) {
403 return TDB_ERR_PTR(ecode
);
409 /* read a lump of data, allocating the space for it */
410 void *tdb_alloc_read(struct tdb_context
*tdb
, tdb_off_t offset
, tdb_len_t len
)
412 return _tdb_alloc_read(tdb
, offset
, len
, 0);
415 static enum TDB_ERROR
fill(struct tdb_context
*tdb
,
416 const void *buf
, size_t size
,
417 tdb_off_t off
, tdb_len_t len
)
420 size_t n
= len
> size
? size
: len
;
421 ssize_t ret
= pwrite(tdb
->file
->fd
, buf
, n
, off
);
426 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
428 " %zi at %zu len=%zu (%s)",
429 ret
, (size_t)off
, (size_t)len
,
438 /* expand a file. we prefer to use ftruncate, as that is what posix
439 says to use for mmap expansion */
440 static enum TDB_ERROR
tdb_expand_file(struct tdb_context
*tdb
,
444 enum TDB_ERROR ecode
;
446 if (tdb
->flags
& TDB_RDONLY
) {
447 return tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
448 "Expand on read-only database");
451 if (tdb
->flags
& TDB_INTERNAL
) {
452 char *new = realloc(tdb
->file
->map_ptr
,
453 tdb
->file
->map_size
+ addition
);
455 return tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_ERROR
,
456 "No memory to expand database");
458 tdb
->file
->map_ptr
= new;
459 tdb
->file
->map_size
+= addition
;
462 /* Unmap before trying to write; old TDB claimed OpenBSD had
463 * problem with this otherwise. */
464 tdb_munmap(tdb
->file
);
466 /* If this fails, we try to fill anyway. */
467 if (ftruncate(tdb
->file
->fd
, tdb
->file
->map_size
+ addition
))
470 /* now fill the file with something. This ensures that the
471 file isn't sparse, which would be very bad if we ran out of
472 disk. This must be done with write, not via mmap */
473 memset(buf
, 0x43, sizeof(buf
));
474 ecode
= fill(tdb
, buf
, sizeof(buf
), tdb
->file
->map_size
,
476 if (ecode
!= TDB_SUCCESS
)
478 tdb
->file
->map_size
+= addition
;
479 return tdb_mmap(tdb
);
483 const void *tdb_access_read(struct tdb_context
*tdb
,
484 tdb_off_t off
, tdb_len_t len
, bool convert
)
488 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
489 ret
= tdb
->tdb2
.io
->direct(tdb
, off
, len
, false);
491 if (TDB_PTR_IS_ERR(ret
)) {
496 struct tdb_access_hdr
*hdr
;
497 hdr
= _tdb_alloc_read(tdb
, off
, len
, sizeof(*hdr
));
498 if (TDB_PTR_IS_ERR(hdr
)) {
501 hdr
->next
= tdb
->tdb2
.access
;
502 tdb
->tdb2
.access
= hdr
;
505 tdb_convert(tdb
, (void *)ret
, len
);
508 tdb
->tdb2
.direct_access
++;
513 void *tdb_access_write(struct tdb_context
*tdb
,
514 tdb_off_t off
, tdb_len_t len
, bool convert
)
518 if (tdb
->flags
& TDB_RDONLY
) {
519 tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
520 "Write to read-only database");
521 return TDB_ERR_PTR(TDB_ERR_RDONLY
);
524 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
525 ret
= tdb
->tdb2
.io
->direct(tdb
, off
, len
, true);
527 if (TDB_PTR_IS_ERR(ret
)) {
533 struct tdb_access_hdr
*hdr
;
534 hdr
= _tdb_alloc_read(tdb
, off
, len
, sizeof(*hdr
));
535 if (TDB_PTR_IS_ERR(hdr
)) {
538 hdr
->next
= tdb
->tdb2
.access
;
539 tdb
->tdb2
.access
= hdr
;
542 hdr
->convert
= convert
;
545 tdb_convert(tdb
, (void *)ret
, len
);
547 tdb
->tdb2
.direct_access
++;
552 static struct tdb_access_hdr
**find_hdr(struct tdb_context
*tdb
, const void *p
)
554 struct tdb_access_hdr
**hp
;
556 for (hp
= &tdb
->tdb2
.access
; *hp
; hp
= &(*hp
)->next
) {
563 void tdb_access_release(struct tdb_context
*tdb
, const void *p
)
565 struct tdb_access_hdr
*hdr
, **hp
= find_hdr(tdb
, p
);
572 tdb
->tdb2
.direct_access
--;
575 enum TDB_ERROR
tdb_access_commit(struct tdb_context
*tdb
, void *p
)
577 struct tdb_access_hdr
*hdr
, **hp
= find_hdr(tdb
, p
);
578 enum TDB_ERROR ecode
;
583 ecode
= tdb_write_convert(tdb
, hdr
->off
, p
, hdr
->len
);
585 ecode
= tdb_write(tdb
, hdr
->off
, p
, hdr
->len
);
589 tdb
->tdb2
.direct_access
--;
596 static void *tdb_direct(struct tdb_context
*tdb
, tdb_off_t off
, size_t len
,
599 enum TDB_ERROR ecode
;
601 if (unlikely(!tdb
->file
->map_ptr
))
604 ecode
= tdb_oob(tdb
, off
, len
, false);
605 if (unlikely(ecode
!= TDB_SUCCESS
))
606 return TDB_ERR_PTR(ecode
);
607 return (char *)tdb
->file
->map_ptr
+ off
;
610 void tdb_inc_seqnum(struct tdb_context
*tdb
)
614 if (tdb
->flags
& TDB_VERSION1
) {
615 tdb1_increment_seqnum_nonblock(tdb
);
619 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
622 direct
= tdb
->tdb2
.io
->direct(tdb
,
623 offsetof(struct tdb_header
,
625 sizeof(*direct
), true);
626 if (likely(direct
)) {
627 /* Don't let it go negative, even briefly */
628 if (unlikely((*direct
) + 1) < 0)
635 seq
= tdb_read_off(tdb
, offsetof(struct tdb_header
, seqnum
));
636 if (!TDB_OFF_IS_ERR(seq
)) {
638 if (unlikely((int64_t)seq
< 0))
640 tdb_write_off(tdb
, offsetof(struct tdb_header
, seqnum
), seq
);
644 static const struct tdb_methods io_methods
= {
653 initialise the default methods table
655 void tdb_io_init(struct tdb_context
*tdb
)
657 tdb
->tdb2
.io
= &io_methods
;