2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file
*file
)
38 munmap(file
->map_ptr
, file
->map_size
);
43 void tdb_mmap(struct tdb_context
*tdb
)
45 if (tdb
->flags
& TDB_INTERNAL
)
48 if (tdb
->flags
& TDB_NOMMAP
)
51 /* size_t can be smaller than off_t. */
52 if ((size_t)tdb
->file
->map_size
== tdb
->file
->map_size
) {
53 tdb
->file
->map_ptr
= mmap(NULL
, tdb
->file
->map_size
,
55 MAP_SHARED
, tdb
->file
->fd
, 0);
57 tdb
->file
->map_ptr
= MAP_FAILED
;
60 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
62 if (tdb
->file
->map_ptr
== MAP_FAILED
) {
63 tdb
->file
->map_ptr
= NULL
;
64 tdb_logerr(tdb
, TDB_SUCCESS
, TDB_LOG_WARNING
,
65 "tdb_mmap failed for size %lld (%s)",
66 (long long)tdb
->file
->map_size
, strerror(errno
));
70 /* check for an out of bounds access - if it is out of bounds then
71 see if the database has been expanded by someone else and expand
73 note that "len" is the minimum length needed for the db
75 static enum TDB_ERROR
tdb_oob(struct tdb_context
*tdb
, tdb_off_t len
,
81 /* We can't hold pointers during this: we could unmap! */
82 assert(!tdb
->direct_access
83 || (tdb
->flags
& TDB_NOLOCK
)
84 || tdb_has_expansion_lock(tdb
));
86 if (len
<= tdb
->file
->map_size
)
88 if (tdb
->flags
& TDB_INTERNAL
) {
90 tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
91 "tdb_oob len %lld beyond internal"
94 (long long)tdb
->file
->map_size
);
99 ecode
= tdb_lock_expand(tdb
, F_RDLCK
);
100 if (ecode
!= TDB_SUCCESS
) {
104 if (fstat(tdb
->file
->fd
, &st
) != 0) {
105 tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
106 "Failed to fstat file: %s", strerror(errno
));
107 tdb_unlock_expand(tdb
, F_RDLCK
);
111 tdb_unlock_expand(tdb
, F_RDLCK
);
113 if (st
.st_size
< (size_t)len
) {
115 tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
116 "tdb_oob len %zu beyond eof at %zu",
117 (size_t)len
, st
.st_size
);
122 /* Unmap, update size, remap */
123 tdb_munmap(tdb
->file
);
125 tdb
->file
->map_size
= st
.st_size
;
130 /* Endian conversion: we only ever deal with 8 byte quantities */
131 void *tdb_convert(const struct tdb_context
*tdb
, void *buf
, tdb_len_t size
)
133 assert(size
% 8 == 0);
134 if (unlikely((tdb
->flags
& TDB_CONVERT
)) && buf
) {
135 uint64_t i
, *p
= (uint64_t *)buf
;
136 for (i
= 0; i
< size
/ 8; i
++)
137 p
[i
] = bswap_64(p
[i
]);
142 /* Return first non-zero offset in offset array, or end, or -ve error. */
143 /* FIXME: Return the off? */
144 uint64_t tdb_find_nonzero_off(struct tdb_context
*tdb
,
145 tdb_off_t base
, uint64_t start
, uint64_t end
)
150 /* Zero vs non-zero is the same unconverted: minor optimization. */
151 val
= tdb_access_read(tdb
, base
+ start
* sizeof(tdb_off_t
),
152 (end
- start
) * sizeof(tdb_off_t
), false);
153 if (TDB_PTR_IS_ERR(val
)) {
154 return TDB_PTR_ERR(val
);
157 for (i
= 0; i
< (end
- start
); i
++) {
161 tdb_access_release(tdb
, val
);
165 /* Return first zero offset in num offset array, or num, or -ve error. */
166 uint64_t tdb_find_zero_off(struct tdb_context
*tdb
, tdb_off_t off
,
172 /* Zero vs non-zero is the same unconverted: minor optimization. */
173 val
= tdb_access_read(tdb
, off
, num
* sizeof(tdb_off_t
), false);
174 if (TDB_PTR_IS_ERR(val
)) {
175 return TDB_PTR_ERR(val
);
178 for (i
= 0; i
< num
; i
++) {
182 tdb_access_release(tdb
, val
);
186 enum TDB_ERROR
zero_out(struct tdb_context
*tdb
, tdb_off_t off
, tdb_len_t len
)
188 char buf
[8192] = { 0 };
189 void *p
= tdb
->methods
->direct(tdb
, off
, len
, true);
190 enum TDB_ERROR ecode
= TDB_SUCCESS
;
192 assert(!tdb
->read_only
);
193 if (TDB_PTR_IS_ERR(p
)) {
194 return TDB_PTR_ERR(p
);
201 unsigned todo
= len
< sizeof(buf
) ? len
: sizeof(buf
);
202 ecode
= tdb
->methods
->twrite(tdb
, off
, buf
, todo
);
203 if (ecode
!= TDB_SUCCESS
) {
212 tdb_off_t
tdb_read_off(struct tdb_context
*tdb
, tdb_off_t off
)
215 enum TDB_ERROR ecode
;
217 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
218 tdb_off_t
*p
= tdb
->methods
->direct(tdb
, off
, sizeof(*p
),
220 if (TDB_PTR_IS_ERR(p
)) {
221 return TDB_PTR_ERR(p
);
227 ecode
= tdb_read_convert(tdb
, off
, &ret
, sizeof(ret
));
228 if (ecode
!= TDB_SUCCESS
) {
234 /* write a lump of data at a specified offset */
235 static enum TDB_ERROR
tdb_write(struct tdb_context
*tdb
, tdb_off_t off
,
236 const void *buf
, tdb_len_t len
)
238 enum TDB_ERROR ecode
;
240 if (tdb
->read_only
) {
241 return tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
242 "Write to read-only database");
245 ecode
= tdb
->methods
->oob(tdb
, off
+ len
, 0);
246 if (ecode
!= TDB_SUCCESS
) {
250 if (tdb
->file
->map_ptr
) {
251 memcpy(off
+ (char *)tdb
->file
->map_ptr
, buf
, len
);
254 ret
= pwrite(tdb
->file
->fd
, buf
, len
, off
);
256 /* This shouldn't happen: we avoid sparse files. */
260 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
261 "tdb_write: %zi at %zu len=%zu (%s)",
262 ret
, (size_t)off
, (size_t)len
,
269 /* read a lump of data at a specified offset */
270 static enum TDB_ERROR
tdb_read(struct tdb_context
*tdb
, tdb_off_t off
,
271 void *buf
, tdb_len_t len
)
273 enum TDB_ERROR ecode
;
275 ecode
= tdb
->methods
->oob(tdb
, off
+ len
, 0);
276 if (ecode
!= TDB_SUCCESS
) {
280 if (tdb
->file
->map_ptr
) {
281 memcpy(buf
, off
+ (char *)tdb
->file
->map_ptr
, len
);
283 ssize_t r
= pread(tdb
->file
->fd
, buf
, len
, off
);
285 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
286 "tdb_read failed with %zi at %zu "
287 "len=%zu (%s) map_size=%zu",
288 r
, (size_t)off
, (size_t)len
,
290 (size_t)tdb
->file
->map_size
);
296 enum TDB_ERROR
tdb_write_convert(struct tdb_context
*tdb
, tdb_off_t off
,
297 const void *rec
, size_t len
)
299 enum TDB_ERROR ecode
;
301 if (unlikely((tdb
->flags
& TDB_CONVERT
))) {
302 void *conv
= malloc(len
);
304 return tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_ERROR
,
305 "tdb_write: no memory converting"
308 memcpy(conv
, rec
, len
);
309 ecode
= tdb
->methods
->twrite(tdb
, off
,
310 tdb_convert(tdb
, conv
, len
), len
);
313 ecode
= tdb
->methods
->twrite(tdb
, off
, rec
, len
);
318 enum TDB_ERROR
tdb_read_convert(struct tdb_context
*tdb
, tdb_off_t off
,
319 void *rec
, size_t len
)
321 enum TDB_ERROR ecode
= tdb
->methods
->tread(tdb
, off
, rec
, len
);
322 tdb_convert(tdb
, rec
, len
);
326 enum TDB_ERROR
tdb_write_off(struct tdb_context
*tdb
,
327 tdb_off_t off
, tdb_off_t val
)
329 if (tdb
->read_only
) {
330 return tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
331 "Write to read-only database");
334 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
335 tdb_off_t
*p
= tdb
->methods
->direct(tdb
, off
, sizeof(*p
),
337 if (TDB_PTR_IS_ERR(p
)) {
338 return TDB_PTR_ERR(p
);
345 return tdb_write_convert(tdb
, off
, &val
, sizeof(val
));
348 static void *_tdb_alloc_read(struct tdb_context
*tdb
, tdb_off_t offset
,
349 tdb_len_t len
, unsigned int prefix
)
352 enum TDB_ERROR ecode
;
354 /* some systems don't like zero length malloc */
355 buf
= malloc(prefix
+ len
? prefix
+ len
: 1);
357 tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_USE_ERROR
,
358 "tdb_alloc_read malloc failed len=%zu",
359 (size_t)(prefix
+ len
));
360 return TDB_ERR_PTR(TDB_ERR_OOM
);
362 ecode
= tdb
->methods
->tread(tdb
, offset
, buf
+prefix
, len
);
363 if (unlikely(ecode
!= TDB_SUCCESS
)) {
365 return TDB_ERR_PTR(ecode
);
371 /* read a lump of data, allocating the space for it */
372 void *tdb_alloc_read(struct tdb_context
*tdb
, tdb_off_t offset
, tdb_len_t len
)
374 return _tdb_alloc_read(tdb
, offset
, len
, 0);
377 static enum TDB_ERROR
fill(struct tdb_context
*tdb
,
378 const void *buf
, size_t size
,
379 tdb_off_t off
, tdb_len_t len
)
382 size_t n
= len
> size
? size
: len
;
383 ssize_t ret
= pwrite(tdb
->file
->fd
, buf
, n
, off
);
388 return tdb_logerr(tdb
, TDB_ERR_IO
, TDB_LOG_ERROR
,
390 " %zi at %zu len=%zu (%s)",
391 ret
, (size_t)off
, (size_t)len
,
400 /* expand a file. we prefer to use ftruncate, as that is what posix
401 says to use for mmap expansion */
402 static enum TDB_ERROR
tdb_expand_file(struct tdb_context
*tdb
,
406 enum TDB_ERROR ecode
;
408 if (tdb
->read_only
) {
409 return tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
410 "Expand on read-only database");
413 if (tdb
->flags
& TDB_INTERNAL
) {
414 char *new = realloc(tdb
->file
->map_ptr
,
415 tdb
->file
->map_size
+ addition
);
417 return tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_ERROR
,
418 "No memory to expand database");
420 tdb
->file
->map_ptr
= new;
421 tdb
->file
->map_size
+= addition
;
423 /* Unmap before trying to write; old TDB claimed OpenBSD had
424 * problem with this otherwise. */
425 tdb_munmap(tdb
->file
);
427 /* If this fails, we try to fill anyway. */
428 if (ftruncate(tdb
->file
->fd
, tdb
->file
->map_size
+ addition
))
431 /* now fill the file with something. This ensures that the
432 file isn't sparse, which would be very bad if we ran out of
433 disk. This must be done with write, not via mmap */
434 memset(buf
, 0x43, sizeof(buf
));
435 ecode
= fill(tdb
, buf
, sizeof(buf
), tdb
->file
->map_size
,
437 if (ecode
!= TDB_SUCCESS
)
439 tdb
->file
->map_size
+= addition
;
445 const void *tdb_access_read(struct tdb_context
*tdb
,
446 tdb_off_t off
, tdb_len_t len
, bool convert
)
450 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
451 ret
= tdb
->methods
->direct(tdb
, off
, len
, false);
453 if (TDB_PTR_IS_ERR(ret
)) {
458 struct tdb_access_hdr
*hdr
;
459 hdr
= _tdb_alloc_read(tdb
, off
, len
, sizeof(*hdr
));
460 if (TDB_PTR_IS_ERR(hdr
)) {
463 hdr
->next
= tdb
->access
;
467 tdb_convert(tdb
, (void *)ret
, len
);
470 tdb
->direct_access
++;
475 void *tdb_access_write(struct tdb_context
*tdb
,
476 tdb_off_t off
, tdb_len_t len
, bool convert
)
480 if (tdb
->read_only
) {
481 tdb_logerr(tdb
, TDB_ERR_RDONLY
, TDB_LOG_USE_ERROR
,
482 "Write to read-only database");
483 return TDB_ERR_PTR(TDB_ERR_RDONLY
);
486 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
487 ret
= tdb
->methods
->direct(tdb
, off
, len
, true);
489 if (TDB_PTR_IS_ERR(ret
)) {
495 struct tdb_access_hdr
*hdr
;
496 hdr
= _tdb_alloc_read(tdb
, off
, len
, sizeof(*hdr
));
497 if (TDB_PTR_IS_ERR(hdr
)) {
500 hdr
->next
= tdb
->access
;
504 hdr
->convert
= convert
;
507 tdb_convert(tdb
, (void *)ret
, len
);
509 tdb
->direct_access
++;
514 static struct tdb_access_hdr
**find_hdr(struct tdb_context
*tdb
, const void *p
)
516 struct tdb_access_hdr
**hp
;
518 for (hp
= &tdb
->access
; *hp
; hp
= &(*hp
)->next
) {
525 void tdb_access_release(struct tdb_context
*tdb
, const void *p
)
527 struct tdb_access_hdr
*hdr
, **hp
= find_hdr(tdb
, p
);
534 tdb
->direct_access
--;
537 enum TDB_ERROR
tdb_access_commit(struct tdb_context
*tdb
, void *p
)
539 struct tdb_access_hdr
*hdr
, **hp
= find_hdr(tdb
, p
);
540 enum TDB_ERROR ecode
;
545 ecode
= tdb_write_convert(tdb
, hdr
->off
, p
, hdr
->len
);
547 ecode
= tdb_write(tdb
, hdr
->off
, p
, hdr
->len
);
551 tdb
->direct_access
--;
558 static void *tdb_direct(struct tdb_context
*tdb
, tdb_off_t off
, size_t len
,
561 enum TDB_ERROR ecode
;
563 if (unlikely(!tdb
->file
->map_ptr
))
566 ecode
= tdb_oob(tdb
, off
+ len
, true);
567 if (unlikely(ecode
!= TDB_SUCCESS
))
568 return TDB_ERR_PTR(ecode
);
569 return (char *)tdb
->file
->map_ptr
+ off
;
572 void tdb_inc_seqnum(struct tdb_context
*tdb
)
576 if (likely(!(tdb
->flags
& TDB_CONVERT
))) {
579 direct
= tdb
->methods
->direct(tdb
,
580 offsetof(struct tdb_header
,
582 sizeof(*direct
), true);
583 if (likely(direct
)) {
584 /* Don't let it go negative, even briefly */
585 if (unlikely((*direct
) + 1) < 0)
592 seq
= tdb_read_off(tdb
, offsetof(struct tdb_header
, seqnum
));
593 if (!TDB_OFF_IS_ERR(seq
)) {
595 if (unlikely((int64_t)seq
< 0))
597 tdb_write_off(tdb
, offsetof(struct tdb_header
, seqnum
), seq
);
601 static const struct tdb_methods io_methods
= {
610 initialise the default methods table
612 void tdb_io_init(struct tdb_context
*tdb
)
614 tdb
->methods
= &io_methods
;