2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the ntdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void ntdb_munmap(struct ntdb_file
*file
)
38 munmap(file
->map_ptr
, file
->map_size
);
43 enum NTDB_ERROR
ntdb_mmap(struct ntdb_context
*ntdb
)
47 if (ntdb
->flags
& NTDB_INTERNAL
)
50 #ifndef HAVE_INCOHERENT_MMAP
51 if (ntdb
->flags
& NTDB_NOMMAP
)
55 if ((ntdb
->open_flags
& O_ACCMODE
) == O_RDONLY
)
56 mmap_flags
= PROT_READ
;
58 mmap_flags
= PROT_READ
| PROT_WRITE
;
60 /* size_t can be smaller than off_t. */
61 if ((size_t)ntdb
->file
->map_size
== ntdb
->file
->map_size
) {
62 ntdb
->file
->map_ptr
= mmap(NULL
, ntdb
->file
->map_size
,
64 MAP_SHARED
, ntdb
->file
->fd
, 0);
66 ntdb
->file
->map_ptr
= MAP_FAILED
;
69 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
71 if (ntdb
->file
->map_ptr
== MAP_FAILED
) {
72 ntdb
->file
->map_ptr
= NULL
;
73 #ifdef HAVE_INCOHERENT_MMAP
74 /* Incoherent mmap means everyone must mmap! */
75 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
76 "ntdb_mmap failed for size %lld (%s)",
77 (long long)ntdb
->file
->map_size
,
80 ntdb_logerr(ntdb
, NTDB_SUCCESS
, NTDB_LOG_WARNING
,
81 "ntdb_mmap failed for size %lld (%s)",
82 (long long)ntdb
->file
->map_size
, strerror(errno
));
88 /* check for an out of bounds access - if it is out of bounds then
89 see if the database has been expanded by someone else and expand
91 note that "len" is the minimum length needed for the db.
93 If probe is true, len being too large isn't a failure.
95 static enum NTDB_ERROR
ntdb_normal_oob(struct ntdb_context
*ntdb
,
96 ntdb_off_t off
, ntdb_len_t len
,
100 enum NTDB_ERROR ecode
;
102 /* We can't hold pointers during this: we could unmap! */
103 assert(!ntdb
->direct_access
104 || (ntdb
->flags
& NTDB_NOLOCK
)
105 || ntdb_has_expansion_lock(ntdb
));
107 if (len
+ off
< len
) {
111 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
112 "ntdb_oob off %llu len %llu wrap\n",
113 (long long)off
, (long long)len
);
116 if (ntdb
->flags
& NTDB_INTERNAL
) {
120 ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
121 "ntdb_oob len %lld beyond internal"
123 (long long)(off
+ len
),
124 (long long)ntdb
->file
->map_size
);
128 ecode
= ntdb_lock_expand(ntdb
, F_RDLCK
);
129 if (ecode
!= NTDB_SUCCESS
) {
133 if (fstat(ntdb
->file
->fd
, &st
) != 0) {
134 ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
135 "Failed to fstat file: %s", strerror(errno
));
136 ntdb_unlock_expand(ntdb
, F_RDLCK
);
140 ntdb_unlock_expand(ntdb
, F_RDLCK
);
142 if (st
.st_size
< off
+ len
) {
146 ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
147 "ntdb_oob len %llu beyond eof at %llu",
148 (long long)(off
+ len
), (long long)st
.st_size
);
152 /* Unmap, update size, remap */
153 ntdb_munmap(ntdb
->file
);
155 ntdb
->file
->map_size
= st
.st_size
;
156 return ntdb_mmap(ntdb
);
159 /* Endian conversion: we only ever deal with 8 byte quantities */
160 void *ntdb_convert(const struct ntdb_context
*ntdb
, void *buf
, ntdb_len_t size
)
162 assert(size
% 8 == 0);
163 if (unlikely((ntdb
->flags
& NTDB_CONVERT
)) && buf
) {
164 uint64_t i
, *p
= (uint64_t *)buf
;
165 for (i
= 0; i
< size
/ 8; i
++)
166 p
[i
] = bswap_64(p
[i
]);
171 /* Return first non-zero offset in offset array, or end, or -ve error. */
172 /* FIXME: Return the off? */
173 uint64_t ntdb_find_nonzero_off(struct ntdb_context
*ntdb
,
174 ntdb_off_t base
, uint64_t start
, uint64_t end
)
179 /* Zero vs non-zero is the same unconverted: minor optimization. */
180 val
= ntdb_access_read(ntdb
, base
+ start
* sizeof(ntdb_off_t
),
181 (end
- start
) * sizeof(ntdb_off_t
), false);
182 if (NTDB_PTR_IS_ERR(val
)) {
183 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val
));
186 for (i
= 0; i
< (end
- start
); i
++) {
190 ntdb_access_release(ntdb
, val
);
194 /* Return first zero offset in num offset array, or num, or -ve error. */
195 uint64_t ntdb_find_zero_off(struct ntdb_context
*ntdb
, ntdb_off_t off
,
201 /* Zero vs non-zero is the same unconverted: minor optimization. */
202 val
= ntdb_access_read(ntdb
, off
, num
* sizeof(ntdb_off_t
), false);
203 if (NTDB_PTR_IS_ERR(val
)) {
204 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val
));
207 for (i
= 0; i
< num
; i
++) {
211 ntdb_access_release(ntdb
, val
);
215 enum NTDB_ERROR
zero_out(struct ntdb_context
*ntdb
, ntdb_off_t off
, ntdb_len_t len
)
217 char buf
[8192] = { 0 };
218 void *p
= ntdb
->io
->direct(ntdb
, off
, len
, true);
219 enum NTDB_ERROR ecode
= NTDB_SUCCESS
;
221 assert(!(ntdb
->flags
& NTDB_RDONLY
));
222 if (NTDB_PTR_IS_ERR(p
)) {
223 return NTDB_PTR_ERR(p
);
230 unsigned todo
= len
< sizeof(buf
) ? len
: sizeof(buf
);
231 ecode
= ntdb
->io
->twrite(ntdb
, off
, buf
, todo
);
232 if (ecode
!= NTDB_SUCCESS
) {
241 ntdb_off_t
ntdb_read_off(struct ntdb_context
*ntdb
, ntdb_off_t off
)
244 enum NTDB_ERROR ecode
;
246 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
247 ntdb_off_t
*p
= ntdb
->io
->direct(ntdb
, off
, sizeof(*p
), false);
248 if (NTDB_PTR_IS_ERR(p
)) {
249 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p
));
255 ecode
= ntdb_read_convert(ntdb
, off
, &ret
, sizeof(ret
));
256 if (ecode
!= NTDB_SUCCESS
) {
257 return NTDB_ERR_TO_OFF(ecode
);
262 /* write a lump of data at a specified offset */
263 static enum NTDB_ERROR
ntdb_write(struct ntdb_context
*ntdb
, ntdb_off_t off
,
264 const void *buf
, ntdb_len_t len
)
266 enum NTDB_ERROR ecode
;
268 if (ntdb
->flags
& NTDB_RDONLY
) {
269 return ntdb_logerr(ntdb
, NTDB_ERR_RDONLY
, NTDB_LOG_USE_ERROR
,
270 "Write to read-only database");
273 ecode
= ntdb_oob(ntdb
, off
, len
, false);
274 if (ecode
!= NTDB_SUCCESS
) {
278 if (ntdb
->file
->map_ptr
) {
279 memcpy(off
+ (char *)ntdb
->file
->map_ptr
, buf
, len
);
281 #ifdef HAVE_INCOHERENT_MMAP
285 ret
= pwrite(ntdb
->file
->fd
, buf
, len
, off
);
287 /* This shouldn't happen: we avoid sparse files. */
291 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
292 "ntdb_write: %zi at %zu len=%zu (%s)",
293 ret
, (size_t)off
, (size_t)len
,
301 /* read a lump of data at a specified offset */
302 static enum NTDB_ERROR
ntdb_read(struct ntdb_context
*ntdb
, ntdb_off_t off
,
303 void *buf
, ntdb_len_t len
)
305 enum NTDB_ERROR ecode
;
307 ecode
= ntdb_oob(ntdb
, off
, len
, false);
308 if (ecode
!= NTDB_SUCCESS
) {
312 if (ntdb
->file
->map_ptr
) {
313 memcpy(buf
, off
+ (char *)ntdb
->file
->map_ptr
, len
);
315 #ifdef HAVE_INCOHERENT_MMAP
318 ssize_t r
= pread(ntdb
->file
->fd
, buf
, len
, off
);
320 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
321 "ntdb_read failed with %zi at %zu "
322 "len=%zu (%s) map_size=%zu",
323 r
, (size_t)off
, (size_t)len
,
325 (size_t)ntdb
->file
->map_size
);
332 enum NTDB_ERROR
ntdb_write_convert(struct ntdb_context
*ntdb
, ntdb_off_t off
,
333 const void *rec
, size_t len
)
335 enum NTDB_ERROR ecode
;
337 if (unlikely((ntdb
->flags
& NTDB_CONVERT
))) {
338 void *conv
= ntdb
->alloc_fn(ntdb
, len
, ntdb
->alloc_data
);
340 return ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
341 "ntdb_write: no memory converting"
344 memcpy(conv
, rec
, len
);
345 ecode
= ntdb
->io
->twrite(ntdb
, off
,
346 ntdb_convert(ntdb
, conv
, len
), len
);
347 ntdb
->free_fn(conv
, ntdb
->alloc_data
);
349 ecode
= ntdb
->io
->twrite(ntdb
, off
, rec
, len
);
354 enum NTDB_ERROR
ntdb_read_convert(struct ntdb_context
*ntdb
, ntdb_off_t off
,
355 void *rec
, size_t len
)
357 enum NTDB_ERROR ecode
= ntdb
->io
->tread(ntdb
, off
, rec
, len
);
358 ntdb_convert(ntdb
, rec
, len
);
362 enum NTDB_ERROR
ntdb_write_off(struct ntdb_context
*ntdb
,
363 ntdb_off_t off
, ntdb_off_t val
)
365 if (ntdb
->flags
& NTDB_RDONLY
) {
366 return ntdb_logerr(ntdb
, NTDB_ERR_RDONLY
, NTDB_LOG_USE_ERROR
,
367 "Write to read-only database");
370 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
371 ntdb_off_t
*p
= ntdb
->io
->direct(ntdb
, off
, sizeof(*p
), true);
372 if (NTDB_PTR_IS_ERR(p
)) {
373 return NTDB_PTR_ERR(p
);
380 return ntdb_write_convert(ntdb
, off
, &val
, sizeof(val
));
383 static void *_ntdb_alloc_read(struct ntdb_context
*ntdb
, ntdb_off_t offset
,
384 ntdb_len_t len
, unsigned int prefix
)
387 enum NTDB_ERROR ecode
;
389 /* some systems don't like zero length malloc */
390 buf
= ntdb
->alloc_fn(ntdb
, prefix
+ len
? prefix
+ len
: 1,
393 ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_USE_ERROR
,
394 "ntdb_alloc_read alloc failed len=%zu",
395 (size_t)(prefix
+ len
));
396 return NTDB_ERR_PTR(NTDB_ERR_OOM
);
398 ecode
= ntdb
->io
->tread(ntdb
, offset
, buf
+prefix
, len
);
399 if (unlikely(ecode
!= NTDB_SUCCESS
)) {
400 ntdb
->free_fn(buf
, ntdb
->alloc_data
);
401 return NTDB_ERR_PTR(ecode
);
407 /* read a lump of data, allocating the space for it */
408 void *ntdb_alloc_read(struct ntdb_context
*ntdb
, ntdb_off_t offset
, ntdb_len_t len
)
410 return _ntdb_alloc_read(ntdb
, offset
, len
, 0);
413 static enum NTDB_ERROR
fill(struct ntdb_context
*ntdb
,
414 const void *buf
, size_t size
,
415 ntdb_off_t off
, ntdb_len_t len
)
418 size_t n
= len
> size
? size
: len
;
419 ssize_t ret
= pwrite(ntdb
->file
->fd
, buf
, n
, off
);
424 return ntdb_logerr(ntdb
, NTDB_ERR_IO
, NTDB_LOG_ERROR
,
426 " %zi at %zu len=%zu (%s)",
427 ret
, (size_t)off
, (size_t)len
,
436 /* expand a file. we prefer to use ftruncate, as that is what posix
437 says to use for mmap expansion */
438 static enum NTDB_ERROR
ntdb_expand_file(struct ntdb_context
*ntdb
,
442 enum NTDB_ERROR ecode
;
444 assert((ntdb
->file
->map_size
+ addition
) % NTDB_PGSIZE
== 0);
445 if (ntdb
->flags
& NTDB_RDONLY
) {
446 return ntdb_logerr(ntdb
, NTDB_ERR_RDONLY
, NTDB_LOG_USE_ERROR
,
447 "Expand on read-only database");
450 if (ntdb
->flags
& NTDB_INTERNAL
) {
451 char *new = ntdb
->expand_fn(ntdb
->file
->map_ptr
,
452 ntdb
->file
->map_size
+ addition
,
455 return ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
456 "No memory to expand database");
458 ntdb
->file
->map_ptr
= new;
459 ntdb
->file
->map_size
+= addition
;
462 /* Unmap before trying to write; old NTDB claimed OpenBSD had
463 * problem with this otherwise. */
464 ntdb_munmap(ntdb
->file
);
466 /* If this fails, we try to fill anyway. */
467 if (ftruncate(ntdb
->file
->fd
, ntdb
->file
->map_size
+ addition
))
470 /* now fill the file with something. This ensures that the
471 file isn't sparse, which would be very bad if we ran out of
472 disk. This must be done with write, not via mmap */
473 memset(buf
, 0x43, sizeof(buf
));
474 ecode
= fill(ntdb
, buf
, sizeof(buf
), ntdb
->file
->map_size
,
476 if (ecode
!= NTDB_SUCCESS
)
478 ntdb
->file
->map_size
+= addition
;
479 return ntdb_mmap(ntdb
);
483 const void *ntdb_access_read(struct ntdb_context
*ntdb
,
484 ntdb_off_t off
, ntdb_len_t len
, bool convert
)
488 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
489 ret
= ntdb
->io
->direct(ntdb
, off
, len
, false);
491 if (NTDB_PTR_IS_ERR(ret
)) {
496 struct ntdb_access_hdr
*hdr
;
497 hdr
= _ntdb_alloc_read(ntdb
, off
, len
, sizeof(*hdr
));
498 if (NTDB_PTR_IS_ERR(hdr
)) {
501 hdr
->next
= ntdb
->access
;
505 ntdb_convert(ntdb
, (void *)ret
, len
);
508 ntdb
->direct_access
++;
513 void *ntdb_access_write(struct ntdb_context
*ntdb
,
514 ntdb_off_t off
, ntdb_len_t len
, bool convert
)
518 if (ntdb
->flags
& NTDB_RDONLY
) {
519 ntdb_logerr(ntdb
, NTDB_ERR_RDONLY
, NTDB_LOG_USE_ERROR
,
520 "Write to read-only database");
521 return NTDB_ERR_PTR(NTDB_ERR_RDONLY
);
524 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
525 ret
= ntdb
->io
->direct(ntdb
, off
, len
, true);
527 if (NTDB_PTR_IS_ERR(ret
)) {
533 struct ntdb_access_hdr
*hdr
;
534 hdr
= _ntdb_alloc_read(ntdb
, off
, len
, sizeof(*hdr
));
535 if (NTDB_PTR_IS_ERR(hdr
)) {
538 hdr
->next
= ntdb
->access
;
542 hdr
->convert
= convert
;
545 ntdb_convert(ntdb
, (void *)ret
, len
);
547 ntdb
->direct_access
++;
552 static struct ntdb_access_hdr
**find_hdr(struct ntdb_context
*ntdb
, const void *p
)
554 struct ntdb_access_hdr
**hp
;
556 for (hp
= &ntdb
->access
; *hp
; hp
= &(*hp
)->next
) {
563 void ntdb_access_release(struct ntdb_context
*ntdb
, const void *p
)
565 struct ntdb_access_hdr
*hdr
, **hp
= find_hdr(ntdb
, p
);
570 ntdb
->free_fn(hdr
, ntdb
->alloc_data
);
572 ntdb
->direct_access
--;
575 enum NTDB_ERROR
ntdb_access_commit(struct ntdb_context
*ntdb
, void *p
)
577 struct ntdb_access_hdr
*hdr
, **hp
= find_hdr(ntdb
, p
);
578 enum NTDB_ERROR ecode
;
583 ecode
= ntdb_write_convert(ntdb
, hdr
->off
, p
, hdr
->len
);
585 ecode
= ntdb_write(ntdb
, hdr
->off
, p
, hdr
->len
);
587 ntdb
->free_fn(hdr
, ntdb
->alloc_data
);
589 ntdb
->direct_access
--;
590 ecode
= NTDB_SUCCESS
;
596 static void *ntdb_direct(struct ntdb_context
*ntdb
, ntdb_off_t off
, size_t len
,
599 enum NTDB_ERROR ecode
;
601 if (unlikely(!ntdb
->file
->map_ptr
))
604 ecode
= ntdb_oob(ntdb
, off
, len
, false);
605 if (unlikely(ecode
!= NTDB_SUCCESS
))
606 return NTDB_ERR_PTR(ecode
);
607 return (char *)ntdb
->file
->map_ptr
+ off
;
610 void ntdb_inc_seqnum(struct ntdb_context
*ntdb
)
614 if (likely(!(ntdb
->flags
& NTDB_CONVERT
))) {
617 direct
= ntdb
->io
->direct(ntdb
,
618 offsetof(struct ntdb_header
, seqnum
),
619 sizeof(*direct
), true);
620 if (likely(direct
)) {
621 /* Don't let it go negative, even briefly */
622 if (unlikely((*direct
) + 1) < 0)
629 seq
= ntdb_read_off(ntdb
, offsetof(struct ntdb_header
, seqnum
));
630 if (!NTDB_OFF_IS_ERR(seq
)) {
632 if (unlikely((int64_t)seq
< 0))
634 ntdb_write_off(ntdb
, offsetof(struct ntdb_header
, seqnum
), seq
);
638 static const struct ntdb_methods io_methods
= {
647 initialise the default methods table
649 void ntdb_io_init(struct ntdb_context
*ntdb
)
651 ntdb
->io
= &io_methods
;