2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the ntdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include <ccan/likely/likely.h>
31 void ntdb_munmap(struct ntdb_file
*file
)
37 munmap(file
->map_ptr
, file
->map_size
);
42 enum NTDB_ERROR
ntdb_mmap(struct ntdb_context
*ntdb
)
46 if (ntdb
->flags
& NTDB_INTERNAL
)
49 #ifndef HAVE_INCOHERENT_MMAP
50 if (ntdb
->flags
& NTDB_NOMMAP
)
54 if ((ntdb
->open_flags
& O_ACCMODE
) == O_RDONLY
)
55 mmap_flags
= PROT_READ
;
57 mmap_flags
= PROT_READ
| PROT_WRITE
;
59 /* size_t can be smaller than off_t. */
60 if ((size_t)ntdb
->file
->map_size
== ntdb
->file
->map_size
) {
61 ntdb
->file
->map_ptr
= mmap(NULL
, ntdb
->file
->map_size
,
63 MAP_SHARED
, ntdb
->file
->fd
, 0);
65 ntdb
->file
->map_ptr
= MAP_FAILED
;
68 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
70 if (ntdb
->file
->map_ptr
== MAP_FAILED
) {
71 ntdb
->file
->map_ptr
= NULL
;
72 #ifdef HAVE_INCOHERENT_MMAP
73 /* Incoherent mmap means everyone must mmap! */
74 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
75 "ntdb_mmap failed for size %lld (%s)",
76 (long long)ntdb
->file
->map_size
,
79 ntdb_logerr(ntdb
, NTDB_SUCCESS
, NTDB_LOG_WARNING
,
80 "ntdb_mmap failed for size %lld (%s)",
81 (long long)ntdb
->file
->map_size
, strerror(errno
));
87 /* check for an out of bounds access - if it is out of bounds then
88 see if the database has been expanded by someone else and expand
90 note that "len" is the minimum length needed for the db.
92 If probe is true, len being too large isn't a failure.
94 static enum NTDB_ERROR
ntdb_normal_oob(struct ntdb_context
*ntdb
,
95 ntdb_off_t off
, ntdb_len_t len
,
99 enum NTDB_ERROR ecode
;
101 /* We can't hold pointers during this: we could unmap! */
102 assert(!ntdb
->direct_access
103 || (ntdb
->flags
& NTDB_NOLOCK
)
104 || ntdb_has_expansion_lock(ntdb
));
106 if (len
+ off
< len
) {
110 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
111 "ntdb_oob off %llu len %llu wrap\n",
112 (long long)off
, (long long)len
);
115 if (ntdb
->flags
& NTDB_INTERNAL
) {
119 ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
120 "ntdb_oob len %lld beyond internal"
122 (long long)(off
+ len
),
123 (long long)ntdb
->file
->map_size
);
127 ecode
= ntdb_lock_expand(ntdb
, F_RDLCK
);
128 if (ecode
!= NTDB_SUCCESS
) {
132 if (fstat(ntdb
->file
->fd
, &st
) != 0) {
133 ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
134 "Failed to fstat file: %s", strerror(errno
));
135 ntdb_unlock_expand(ntdb
, F_RDLCK
);
139 ntdb_unlock_expand(ntdb
, F_RDLCK
);
141 if (st
.st_size
< off
+ len
) {
145 ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
146 "ntdb_oob len %llu beyond eof at %llu",
147 (long long)(off
+ len
), (long long)st
.st_size
);
151 /* Unmap, update size, remap */
152 ntdb_munmap(ntdb
->file
);
154 ntdb
->file
->map_size
= st
.st_size
;
155 return ntdb_mmap(ntdb
);
158 /* Endian conversion: we only ever deal with 8 byte quantities */
159 void *ntdb_convert(const struct ntdb_context
*ntdb
, void *buf
, ntdb_len_t size
)
161 assert(size
% 8 == 0);
162 if (unlikely((ntdb
->flags
& NTDB_CONVERT
)) && buf
) {
163 uint64_t i
, *p
= (uint64_t *)buf
;
164 for (i
= 0; i
< size
/ 8; i
++)
165 p
[i
] = bswap_64(p
[i
]);
170 /* Return first non-zero offset in offset array, or end, or -ve error. */
171 /* FIXME: Return the off? */
172 uint64_t ntdb_find_nonzero_off(struct ntdb_context
*ntdb
,
173 ntdb_off_t base
, uint64_t start
, uint64_t end
)
178 /* Zero vs non-zero is the same unconverted: minor optimization. */
179 val
= ntdb_access_read(ntdb
, base
+ start
* sizeof(ntdb_off_t
),
180 (end
- start
) * sizeof(ntdb_off_t
), false);
181 if (NTDB_PTR_IS_ERR(val
)) {
182 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val
));
185 for (i
= 0; i
< (end
- start
); i
++) {
189 ntdb_access_release(ntdb
, val
);
193 /* Return first zero offset in num offset array, or num, or -ve error. */
194 uint64_t ntdb_find_zero_off(struct ntdb_context
*ntdb
, ntdb_off_t off
,
200 /* Zero vs non-zero is the same unconverted: minor optimization. */
201 val
= ntdb_access_read(ntdb
, off
, num
* sizeof(ntdb_off_t
), false);
202 if (NTDB_PTR_IS_ERR(val
)) {
203 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val
));
206 for (i
= 0; i
< num
; i
++) {
210 ntdb_access_release(ntdb
, val
);
214 enum NTDB_ERROR
zero_out(struct ntdb_context
*ntdb
, ntdb_off_t off
, ntdb_len_t len
)
216 char buf
[8192] = { 0 };
217 void *p
= ntdb
->io
->direct(ntdb
, off
, len
, true);
218 enum NTDB_ERROR ecode
= NTDB_SUCCESS
;
220 assert(!(ntdb
->flags
& NTDB_RDONLY
));
221 if (NTDB_PTR_IS_ERR(p
)) {
222 return NTDB_PTR_ERR(p
);
229 unsigned todo
= len
< sizeof(buf
) ? len
: sizeof(buf
);
230 ecode
= ntdb
->io
->twrite(ntdb
, off
, buf
, todo
);
231 if (ecode
!= NTDB_SUCCESS
) {
240 /* write a lump of data at a specified offset */
241 static enum NTDB_ERROR
ntdb_write(struct ntdb_context
*ntdb
, ntdb_off_t off
,
242 const void *buf
, ntdb_len_t len
)
244 enum NTDB_ERROR ecode
;
246 if (ntdb
->flags
& NTDB_RDONLY
) {
247 return ntdb_logerr(ntdb
, NTDB_ERR_RDONLY
, NTDB_LOG_USE_ERROR
,
248 "Write to read-only database");
251 ecode
= ntdb_oob(ntdb
, off
, len
, false);
252 if (ecode
!= NTDB_SUCCESS
) {
256 if (ntdb
->file
->map_ptr
) {
257 memcpy(off
+ (char *)ntdb
->file
->map_ptr
, buf
, len
);
259 #ifdef HAVE_INCOHERENT_MMAP
263 ret
= pwrite(ntdb
->file
->fd
, buf
, len
, off
);
265 /* This shouldn't happen: we avoid sparse files. */
269 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
270 "ntdb_write: %zi at %zu len=%zu (%s)",
271 ret
, (size_t)off
, (size_t)len
,
279 /* read a lump of data at a specified offset */
280 static enum NTDB_ERROR
ntdb_read(struct ntdb_context
*ntdb
, ntdb_off_t off
,
281 void *buf
, ntdb_len_t len
)
283 enum NTDB_ERROR ecode
;
285 ecode
= ntdb_oob(ntdb
, off
, len
, false);
286 if (ecode
!= NTDB_SUCCESS
) {
290 if (ntdb
->file
->map_ptr
) {
291 memcpy(buf
, off
+ (char *)ntdb
->file
->map_ptr
, len
);
293 #ifdef HAVE_INCOHERENT_MMAP
296 ssize_t r
= pread(ntdb
->file
->fd
, buf
, len
, off
);
298 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
299 "ntdb_read failed with %zi at %zu "
300 "len=%zu (%s) map_size=%zu",
301 r
, (size_t)off
, (size_t)len
,
303 (size_t)ntdb
->file
->map_size
);
310 enum NTDB_ERROR
ntdb_write_convert(struct ntdb_context
*ntdb
, ntdb_off_t off
,
311 const void *rec
, size_t len
)
313 enum NTDB_ERROR ecode
;
315 if (unlikely((ntdb
->flags
& NTDB_CONVERT
))) {
316 void *conv
= ntdb
->alloc_fn(ntdb
, len
, ntdb
->alloc_data
);
318 return ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
319 "ntdb_write: no memory converting"
322 memcpy(conv
, rec
, len
);
323 ecode
= ntdb
->io
->twrite(ntdb
, off
,
324 ntdb_convert(ntdb
, conv
, len
), len
);
325 ntdb
->free_fn(conv
, ntdb
->alloc_data
);
327 ecode
= ntdb
->io
->twrite(ntdb
, off
, rec
, len
);
332 enum NTDB_ERROR
ntdb_read_convert(struct ntdb_context
*ntdb
, ntdb_off_t off
,
333 void *rec
, size_t len
)
335 enum NTDB_ERROR ecode
= ntdb
->io
->tread(ntdb
, off
, rec
, len
);
336 ntdb_convert(ntdb
, rec
, len
);
340 static void *_ntdb_alloc_read(struct ntdb_context
*ntdb
, ntdb_off_t offset
,
341 ntdb_len_t len
, unsigned int prefix
)
344 enum NTDB_ERROR ecode
;
346 /* some systems don't like zero length malloc */
347 buf
= ntdb
->alloc_fn(ntdb
, prefix
+ len
? prefix
+ len
: 1,
350 ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_USE_ERROR
,
351 "ntdb_alloc_read alloc failed len=%zu",
352 (size_t)(prefix
+ len
));
353 return NTDB_ERR_PTR(NTDB_ERR_OOM
);
355 ecode
= ntdb
->io
->tread(ntdb
, offset
, buf
+prefix
, len
);
356 if (unlikely(ecode
!= NTDB_SUCCESS
)) {
357 ntdb
->free_fn(buf
, ntdb
->alloc_data
);
358 return NTDB_ERR_PTR(ecode
);
364 /* read a lump of data, allocating the space for it */
365 void *ntdb_alloc_read(struct ntdb_context
*ntdb
, ntdb_off_t offset
, ntdb_len_t len
)
367 return _ntdb_alloc_read(ntdb
, offset
, len
, 0);
370 static enum NTDB_ERROR
fill(struct ntdb_context
*ntdb
,
371 const void *buf
, size_t size
,
372 ntdb_off_t off
, ntdb_len_t len
)
375 size_t n
= len
> size
? size
: len
;
376 ssize_t ret
= pwrite(ntdb
->file
->fd
, buf
, n
, off
);
381 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
383 " %zi at %zu len=%zu (%s)",
384 ret
, (size_t)off
, (size_t)len
,
393 /* expand a file. we prefer to use ftruncate, as that is what posix
394 says to use for mmap expansion */
395 static enum NTDB_ERROR
ntdb_expand_file(struct ntdb_context
*ntdb
,
399 enum NTDB_ERROR ecode
;
401 assert((ntdb
->file
->map_size
+ addition
) % NTDB_PGSIZE
== 0);
402 if (ntdb
->flags
& NTDB_RDONLY
) {
403 return ntdb_logerr(ntdb
, NTDB_ERR_RDONLY
, NTDB_LOG_USE_ERROR
,
404 "Expand on read-only database");
407 if (ntdb
->flags
& NTDB_INTERNAL
) {
408 char *new = ntdb
->expand_fn(ntdb
->file
->map_ptr
,
409 ntdb
->file
->map_size
+ addition
,
412 return ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
413 "No memory to expand database");
415 ntdb
->file
->map_ptr
= new;
416 ntdb
->file
->map_size
+= addition
;
419 /* Unmap before trying to write; old NTDB claimed OpenBSD had
420 * problem with this otherwise. */
421 ntdb_munmap(ntdb
->file
);
423 /* If this fails, we try to fill anyway. */
424 if (ftruncate(ntdb
->file
->fd
, ntdb
->file
->map_size
+ addition
))
427 /* now fill the file with something. This ensures that the
428 file isn't sparse, which would be very bad if we ran out of
429 disk. This must be done with write, not via mmap */
430 memset(buf
, 0x43, sizeof(buf
));
431 ecode
= fill(ntdb
, buf
, sizeof(buf
), ntdb
->file
->map_size
,
433 if (ecode
!= NTDB_SUCCESS
)
435 ntdb
->file
->map_size
+= addition
;
436 return ntdb_mmap(ntdb
);
440 const void *ntdb_access_read(struct ntdb_context
*ntdb
,
441 ntdb_off_t off
, ntdb_len_t len
, bool convert
)
445 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
446 ret
= ntdb
->io
->direct(ntdb
, off
, len
, false);
448 if (NTDB_PTR_IS_ERR(ret
)) {
453 struct ntdb_access_hdr
*hdr
;
454 hdr
= _ntdb_alloc_read(ntdb
, off
, len
, sizeof(*hdr
));
455 if (NTDB_PTR_IS_ERR(hdr
)) {
458 hdr
->next
= ntdb
->access
;
462 ntdb_convert(ntdb
, (void *)ret
, len
);
465 ntdb
->direct_access
++;
470 void *ntdb_access_write(struct ntdb_context
*ntdb
,
471 ntdb_off_t off
, ntdb_len_t len
, bool convert
)
475 if (ntdb
->flags
& NTDB_RDONLY
) {
476 ntdb_logerr(ntdb
, NTDB_ERR_RDONLY
, NTDB_LOG_USE_ERROR
,
477 "Write to read-only database");
478 return NTDB_ERR_PTR(NTDB_ERR_RDONLY
);
481 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
482 ret
= ntdb
->io
->direct(ntdb
, off
, len
, true);
484 if (NTDB_PTR_IS_ERR(ret
)) {
490 struct ntdb_access_hdr
*hdr
;
491 hdr
= _ntdb_alloc_read(ntdb
, off
, len
, sizeof(*hdr
));
492 if (NTDB_PTR_IS_ERR(hdr
)) {
495 hdr
->next
= ntdb
->access
;
499 hdr
->convert
= convert
;
502 ntdb_convert(ntdb
, (void *)ret
, len
);
504 ntdb
->direct_access
++;
509 static struct ntdb_access_hdr
**find_hdr(struct ntdb_context
*ntdb
, const void *p
)
511 struct ntdb_access_hdr
**hp
;
513 for (hp
= &ntdb
->access
; *hp
; hp
= &(*hp
)->next
) {
520 void ntdb_access_release(struct ntdb_context
*ntdb
, const void *p
)
522 struct ntdb_access_hdr
*hdr
, **hp
= find_hdr(ntdb
, p
);
527 ntdb
->free_fn(hdr
, ntdb
->alloc_data
);
529 ntdb
->direct_access
--;
532 enum NTDB_ERROR
ntdb_access_commit(struct ntdb_context
*ntdb
, void *p
)
534 struct ntdb_access_hdr
*hdr
, **hp
= find_hdr(ntdb
, p
);
535 enum NTDB_ERROR ecode
;
540 ecode
= ntdb_write_convert(ntdb
, hdr
->off
, p
, hdr
->len
);
542 ecode
= ntdb_write(ntdb
, hdr
->off
, p
, hdr
->len
);
544 ntdb
->free_fn(hdr
, ntdb
->alloc_data
);
546 ntdb
->direct_access
--;
547 ecode
= NTDB_SUCCESS
;
553 static void *ntdb_direct(struct ntdb_context
*ntdb
, ntdb_off_t off
, size_t len
,
556 enum NTDB_ERROR ecode
;
558 if (unlikely(!ntdb
->file
->map_ptr
))
561 ecode
= ntdb_oob(ntdb
, off
, len
, false);
562 if (unlikely(ecode
!= NTDB_SUCCESS
))
563 return NTDB_ERR_PTR(ecode
);
564 return (char *)ntdb
->file
->map_ptr
+ off
;
567 static ntdb_off_t
ntdb_read_normal_off(struct ntdb_context
*ntdb
,
571 enum NTDB_ERROR ecode
;
574 p
= ntdb_direct(ntdb
, off
, sizeof(*p
), false);
575 if (NTDB_PTR_IS_ERR(p
)) {
576 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p
));
582 ecode
= ntdb_read(ntdb
, off
, &ret
, sizeof(ret
));
583 if (ecode
!= NTDB_SUCCESS
) {
584 return NTDB_ERR_TO_OFF(ecode
);
589 static ntdb_off_t
ntdb_read_convert_off(struct ntdb_context
*ntdb
,
593 enum NTDB_ERROR ecode
;
595 ecode
= ntdb_read_convert(ntdb
, off
, &ret
, sizeof(ret
));
596 if (ecode
!= NTDB_SUCCESS
) {
597 return NTDB_ERR_TO_OFF(ecode
);
602 static enum NTDB_ERROR
ntdb_write_normal_off(struct ntdb_context
*ntdb
,
603 ntdb_off_t off
, ntdb_off_t val
)
607 p
= ntdb_direct(ntdb
, off
, sizeof(*p
), true);
608 if (NTDB_PTR_IS_ERR(p
)) {
609 return NTDB_PTR_ERR(p
);
615 return ntdb_write(ntdb
, off
, &val
, sizeof(val
));
618 static enum NTDB_ERROR
ntdb_write_convert_off(struct ntdb_context
*ntdb
,
619 ntdb_off_t off
, ntdb_off_t val
)
621 return ntdb_write_convert(ntdb
, off
, &val
, sizeof(val
));
624 void ntdb_inc_seqnum(struct ntdb_context
*ntdb
)
628 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
631 direct
= ntdb
->io
->direct(ntdb
,
632 offsetof(struct ntdb_header
, seqnum
),
633 sizeof(*direct
), true);
634 if (likely(direct
)) {
635 /* Don't let it go negative, even briefly */
636 if (unlikely((*direct
) + 1) < 0)
643 seq
= ntdb_read_off(ntdb
, offsetof(struct ntdb_header
, seqnum
));
644 if (!NTDB_OFF_IS_ERR(seq
)) {
646 if (unlikely((int64_t)seq
< 0))
648 ntdb_write_off(ntdb
, offsetof(struct ntdb_header
, seqnum
), seq
);
652 static const struct ntdb_methods io_methods
= {
658 ntdb_read_normal_off
,
659 ntdb_write_normal_off
,
662 static const struct ntdb_methods io_convert_methods
= {
668 ntdb_read_convert_off
,
669 ntdb_write_convert_off
,
673 initialise the default methods table
675 void ntdb_io_init(struct ntdb_context
*ntdb
)
677 if (ntdb
->flags
& NTDB_CONVERT
)
678 ntdb
->io
= &io_convert_methods
;
680 ntdb
->io
= &io_methods
;