wintest: add option to select the dns backend
[Samba/gebeck_regimport.git] / lib / ntdb / open.c
blob9de9e9b48cc992873933cd1ce248b3520a85dc20
1 /*
2 Trivial Database 2: opening and closing TDBs
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 #include "private.h"
19 #include <ccan/build_assert/build_assert.h>
21 /* all tdbs, to detect double-opens (fcntl file don't nest!) */
22 static struct ntdb_context *tdbs = NULL;
24 static struct ntdb_file *find_file(dev_t device, ino_t ino)
26 struct ntdb_context *i;
28 for (i = tdbs; i; i = i->next) {
29 if (i->file->device == device && i->file->inode == ino) {
30 i->file->refcnt++;
31 return i->file;
34 return NULL;
37 static bool read_all(int fd, void *buf, size_t len)
39 while (len) {
40 ssize_t ret;
41 ret = read(fd, buf, len);
42 if (ret < 0)
43 return false;
44 if (ret == 0) {
45 /* ETOOSHORT? */
46 errno = EWOULDBLOCK;
47 return false;
49 buf = (char *)buf + ret;
50 len -= ret;
52 return true;
55 static uint32_t random_number(struct ntdb_context *ntdb)
57 int fd;
58 uint32_t ret = 0;
59 struct timeval now;
61 fd = open("/dev/urandom", O_RDONLY);
62 if (fd >= 0) {
63 if (read_all(fd, &ret, sizeof(ret))) {
64 close(fd);
65 return ret;
67 close(fd);
69 /* FIXME: Untested! Based on Wikipedia protocol description! */
70 fd = open("/dev/egd-pool", O_RDWR);
71 if (fd >= 0) {
72 /* Command is 1, next byte is size we want to read. */
73 char cmd[2] = { 1, sizeof(uint32_t) };
74 if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
75 char reply[1 + sizeof(uint32_t)];
76 int r = read(fd, reply, sizeof(reply));
77 if (r > 1) {
78 /* Copy at least some bytes. */
79 memcpy(&ret, reply+1, r - 1);
80 if (reply[0] == sizeof(uint32_t)
81 && r == sizeof(reply)) {
82 close(fd);
83 return ret;
87 close(fd);
90 /* Fallback: pid and time. */
91 gettimeofday(&now, NULL);
92 ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
93 ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
94 "ntdb_open: random from getpid and time");
95 return ret;
98 static void ntdb_context_init(struct ntdb_context *ntdb)
100 /* Initialize the NTDB fields here */
101 ntdb_io_init(ntdb);
102 ntdb->direct_access = 0;
103 ntdb->transaction = NULL;
104 ntdb->access = NULL;
107 /* initialise a new database:
109 * struct ntdb_header;
110 * struct {
111 * struct ntdb_used_record hash_header;
112 * ntdb_off_t hash_buckets[1 << ntdb->hash_bits];
113 * } hash;
114 * struct ntdb_freetable ftable;
115 * struct {
116 * struct ntdb_free_record free_header;
117 * char forty_three[...];
118 * } remainder;
120 #define NEW_DATABASE_HDR_SIZE(hbits) \
121 (sizeof(struct ntdb_header) \
122 + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \
123 + sizeof(struct ntdb_freetable) \
124 + sizeof(struct ntdb_free_record))
126 static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
127 struct ntdb_attribute_seed *seed,
128 struct ntdb_header *rhdr)
130 /* We make it up in memory, then write it out if not internal */
131 struct ntdb_freetable *ftable;
132 struct ntdb_used_record *htable;
133 struct ntdb_header *hdr;
134 struct ntdb_free_record *remainder;
135 char *mem;
136 unsigned int magic_len;
137 ssize_t rlen;
138 size_t dbsize, hashsize, hdrsize, remaindersize;
139 enum NTDB_ERROR ecode;
141 hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits;
143 /* Always make db a multiple of NTDB_PGSIZE */
144 hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits);
145 dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
147 mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data);
148 if (!mem) {
149 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
150 "ntdb_new_database: failed to allocate");
153 hdr = (void *)mem;
154 htable = (void *)(mem + sizeof(*hdr));
155 ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize);
156 remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize
157 + sizeof(*ftable));
159 /* Fill in the header */
160 hdr->version = NTDB_VERSION;
161 if (seed)
162 hdr->hash_seed = seed->seed;
163 else
164 hdr->hash_seed = random_number(ntdb);
165 hdr->hash_test = NTDB_HASH_MAGIC;
166 hdr->hash_test = ntdb->hash_fn(&hdr->hash_test,
167 sizeof(hdr->hash_test),
168 hdr->hash_seed,
169 ntdb->hash_data);
170 hdr->hash_bits = ntdb->hash_bits;
171 hdr->recovery = 0;
172 hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK;
173 hdr->seqnum = 0;
174 hdr->capabilities = 0;
175 memset(hdr->reserved, 0, sizeof(hdr->reserved));
177 /* Hash is all zero after header. */
178 set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize);
179 memset(htable + 1, 0, hashsize);
181 /* Free is empty. */
182 hdr->free_table = (char *)ftable - (char *)hdr;
183 memset(ftable, 0, sizeof(*ftable));
184 ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
185 sizeof(*ftable) - sizeof(ftable->hdr),
186 sizeof(*ftable) - sizeof(ftable->hdr));
187 if (ecode != NTDB_SUCCESS) {
188 goto out;
191 /* Rest of database is a free record, containing junk. */
192 remaindersize = dbsize - hdrsize;
193 remainder->ftable_and_len
194 = (remaindersize + sizeof(*remainder)
195 - sizeof(struct ntdb_used_record));
196 remainder->next = 0;
197 remainder->magic_and_prev
198 = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL))
199 | ((char *)remainder - (char *)hdr);
200 memset(remainder + 1, 0x43, remaindersize);
202 /* Put in our single free entry. */
203 ftable->buckets[size_to_bucket(remaindersize)] =
204 (char *)remainder - (char *)hdr;
206 /* Magic food */
207 memset(hdr->magic_food, 0, sizeof(hdr->magic_food));
208 strcpy(hdr->magic_food, NTDB_MAGIC_FOOD);
210 /* This creates an endian-converted database, as if read from disk */
211 magic_len = sizeof(hdr->magic_food);
212 ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len);
214 /* Return copy of header. */
215 *rhdr = *hdr;
217 if (ntdb->flags & NTDB_INTERNAL) {
218 ntdb->file->map_size = dbsize;
219 ntdb->file->map_ptr = hdr;
220 return NTDB_SUCCESS;
222 if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
223 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
224 "ntdb_new_database:"
225 " failed to seek: %s", strerror(errno));
226 goto out;
229 if (ftruncate(ntdb->file->fd, 0) == -1) {
230 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
231 "ntdb_new_database:"
232 " failed to truncate: %s", strerror(errno));
233 goto out;
236 rlen = write(ntdb->file->fd, hdr, dbsize);
237 if (rlen != dbsize) {
238 if (rlen >= 0)
239 errno = ENOSPC;
240 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
241 "ntdb_new_database: %zi writing header: %s",
242 rlen, strerror(errno));
243 goto out;
246 out:
247 ntdb->free_fn(hdr, ntdb->alloc_data);
248 return ecode;
251 static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb)
253 ntdb->file = ntdb->alloc_fn(NULL, sizeof(*ntdb->file), ntdb->alloc_data);
254 if (!ntdb->file)
255 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
256 "ntdb_open: cannot alloc ntdb_file structure");
257 ntdb->file->num_lockrecs = 0;
258 ntdb->file->lockrecs = NULL;
259 ntdb->file->allrecord_lock.count = 0;
260 ntdb->file->refcnt = 1;
261 ntdb->file->map_ptr = NULL;
262 return NTDB_SUCCESS;
265 _PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
266 const union ntdb_attribute *attr)
268 switch (attr->base.attr) {
269 case NTDB_ATTRIBUTE_LOG:
270 ntdb->log_fn = attr->log.fn;
271 ntdb->log_data = attr->log.data;
272 break;
273 case NTDB_ATTRIBUTE_HASH:
274 case NTDB_ATTRIBUTE_SEED:
275 case NTDB_ATTRIBUTE_OPENHOOK:
276 case NTDB_ATTRIBUTE_HASHSIZE:
277 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
278 NTDB_LOG_USE_ERROR,
279 "ntdb_set_attribute:"
280 " cannot set %s after opening",
281 attr->base.attr == NTDB_ATTRIBUTE_HASH
282 ? "NTDB_ATTRIBUTE_HASH"
283 : attr->base.attr == NTDB_ATTRIBUTE_SEED
284 ? "NTDB_ATTRIBUTE_SEED"
285 : attr->base.attr == NTDB_ATTRIBUTE_OPENHOOK
286 ? "NTDB_ATTRIBUTE_OPENHOOK"
287 : "NTDB_ATTRIBUTE_HASHSIZE");
288 case NTDB_ATTRIBUTE_STATS:
289 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
290 NTDB_LOG_USE_ERROR,
291 "ntdb_set_attribute:"
292 " cannot set NTDB_ATTRIBUTE_STATS");
293 case NTDB_ATTRIBUTE_FLOCK:
294 ntdb->lock_fn = attr->flock.lock;
295 ntdb->unlock_fn = attr->flock.unlock;
296 ntdb->lock_data = attr->flock.data;
297 break;
298 case NTDB_ATTRIBUTE_ALLOCATOR:
299 ntdb->alloc_fn = attr->alloc.alloc;
300 ntdb->expand_fn = attr->alloc.expand;
301 ntdb->free_fn = attr->alloc.free;
302 ntdb->alloc_data = attr->alloc.priv_data;
303 break;
304 default:
305 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
306 NTDB_LOG_USE_ERROR,
307 "ntdb_set_attribute:"
308 " unknown attribute type %u",
309 attr->base.attr);
311 return NTDB_SUCCESS;
314 _PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
315 union ntdb_attribute *attr)
317 switch (attr->base.attr) {
318 case NTDB_ATTRIBUTE_LOG:
319 if (!ntdb->log_fn)
320 return NTDB_ERR_NOEXIST;
321 attr->log.fn = ntdb->log_fn;
322 attr->log.data = ntdb->log_data;
323 break;
324 case NTDB_ATTRIBUTE_HASH:
325 attr->hash.fn = ntdb->hash_fn;
326 attr->hash.data = ntdb->hash_data;
327 break;
328 case NTDB_ATTRIBUTE_SEED:
329 attr->seed.seed = ntdb->hash_seed;
330 break;
331 case NTDB_ATTRIBUTE_OPENHOOK:
332 if (!ntdb->openhook)
333 return NTDB_ERR_NOEXIST;
334 attr->openhook.fn = ntdb->openhook;
335 attr->openhook.data = ntdb->openhook_data;
336 break;
337 case NTDB_ATTRIBUTE_STATS: {
338 size_t size = attr->stats.size;
339 if (size > ntdb->stats.size)
340 size = ntdb->stats.size;
341 memcpy(&attr->stats, &ntdb->stats, size);
342 break;
344 case NTDB_ATTRIBUTE_FLOCK:
345 attr->flock.lock = ntdb->lock_fn;
346 attr->flock.unlock = ntdb->unlock_fn;
347 attr->flock.data = ntdb->lock_data;
348 break;
349 case NTDB_ATTRIBUTE_ALLOCATOR:
350 attr->alloc.alloc = ntdb->alloc_fn;
351 attr->alloc.expand = ntdb->expand_fn;
352 attr->alloc.free = ntdb->free_fn;
353 attr->alloc.priv_data = ntdb->alloc_data;
354 break;
355 case NTDB_ATTRIBUTE_HASHSIZE:
356 attr->hashsize.size = 1 << ntdb->hash_bits;
357 break;
358 default:
359 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
360 NTDB_LOG_USE_ERROR,
361 "ntdb_get_attribute:"
362 " unknown attribute type %u",
363 attr->base.attr);
365 attr->base.next = NULL;
366 return NTDB_SUCCESS;
369 _PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb,
370 enum ntdb_attribute_type type)
372 switch (type) {
373 case NTDB_ATTRIBUTE_LOG:
374 ntdb->log_fn = NULL;
375 break;
376 case NTDB_ATTRIBUTE_OPENHOOK:
377 ntdb->openhook = NULL;
378 break;
379 case NTDB_ATTRIBUTE_HASH:
380 case NTDB_ATTRIBUTE_SEED:
381 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
382 "ntdb_unset_attribute: cannot unset %s after opening",
383 type == NTDB_ATTRIBUTE_HASH
384 ? "NTDB_ATTRIBUTE_HASH"
385 : "NTDB_ATTRIBUTE_SEED");
386 break;
387 case NTDB_ATTRIBUTE_STATS:
388 ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
389 NTDB_LOG_USE_ERROR,
390 "ntdb_unset_attribute:"
391 "cannot unset NTDB_ATTRIBUTE_STATS");
392 break;
393 case NTDB_ATTRIBUTE_FLOCK:
394 ntdb->lock_fn = ntdb_fcntl_lock;
395 ntdb->unlock_fn = ntdb_fcntl_unlock;
396 break;
397 default:
398 ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
399 NTDB_LOG_USE_ERROR,
400 "ntdb_unset_attribute: unknown attribute type %u",
401 type);
405 /* The top three bits of the capability tell us whether it matters. */
406 enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
407 ntdb_off_t type)
409 if (type & NTDB_CAP_NOOPEN) {
410 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
411 "%s: file has unknown capability %llu",
412 caller, type & NTDB_CAP_NOOPEN);
415 if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) {
416 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR,
417 "%s: file has unknown capability %llu"
418 " (cannot write to it)",
419 caller, type & NTDB_CAP_NOOPEN);
422 if (type & NTDB_CAP_NOCHECK) {
423 ntdb->flags |= NTDB_CANT_CHECK;
425 return NTDB_SUCCESS;
428 static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb,
429 ntdb_off_t capabilities)
431 ntdb_off_t off, next;
432 enum NTDB_ERROR ecode = NTDB_SUCCESS;
433 const struct ntdb_capability *cap;
435 /* Check capability list. */
436 for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) {
437 cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
438 if (NTDB_PTR_IS_ERR(cap)) {
439 return NTDB_PTR_ERR(cap);
442 switch (cap->type & NTDB_CAP_TYPE_MASK) {
443 /* We don't understand any capabilities (yet). */
444 default:
445 ecode = unknown_capability(ntdb, "ntdb_open", cap->type);
447 next = cap->next;
448 ntdb_access_release(ntdb, cap);
450 return ecode;
453 static void *default_alloc(const void *owner, size_t len, void *priv_data)
455 return malloc(len);
458 static void *default_expand(void *ptr, size_t len, void *priv_data)
460 return realloc(ptr, len);
463 static void default_free(void *ptr, void *priv_data)
465 free(ptr);
468 /* First allocation needs manual search of attributes. */
469 static struct ntdb_context *alloc_ntdb(const union ntdb_attribute *attr,
470 const char *name)
472 size_t len = sizeof(struct ntdb_context) + strlen(name) + 1;
474 while (attr) {
475 if (attr->base.attr == NTDB_ATTRIBUTE_ALLOCATOR) {
476 return attr->alloc.alloc(NULL, len,
477 attr->alloc.priv_data);
479 attr = attr->base.next;
481 return default_alloc(NULL, len, NULL);
484 static unsigned int next_pow2(uint64_t size)
486 unsigned int bits = 1;
488 while ((1ULL << bits) < size)
489 bits++;
490 return bits;
493 _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
494 int open_flags, mode_t mode,
495 union ntdb_attribute *attr)
497 struct ntdb_context *ntdb;
498 struct stat st;
499 int saved_errno = 0;
500 uint64_t hash_test;
501 unsigned v;
502 ssize_t rlen;
503 struct ntdb_header hdr;
504 struct ntdb_attribute_seed *seed = NULL;
505 ntdb_bool_err berr;
506 enum NTDB_ERROR ecode;
507 int openlock;
509 ntdb = alloc_ntdb(attr, name);
510 if (!ntdb) {
511 /* Can't log this */
512 errno = ENOMEM;
513 return NULL;
515 /* Set name immediately for logging functions. */
516 ntdb->name = strcpy((char *)(ntdb + 1), name);
517 ntdb->flags = ntdb_flags;
518 ntdb->log_fn = NULL;
519 ntdb->open_flags = open_flags;
520 ntdb->file = NULL;
521 ntdb->openhook = NULL;
522 ntdb->lock_fn = ntdb_fcntl_lock;
523 ntdb->unlock_fn = ntdb_fcntl_unlock;
524 ntdb->hash_fn = ntdb_jenkins_hash;
525 memset(&ntdb->stats, 0, sizeof(ntdb->stats));
526 ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS;
527 ntdb->stats.size = sizeof(ntdb->stats);
528 ntdb->alloc_fn = default_alloc;
529 ntdb->expand_fn = default_expand;
530 ntdb->free_fn = default_free;
531 ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */
533 while (attr) {
534 switch (attr->base.attr) {
535 case NTDB_ATTRIBUTE_HASH:
536 ntdb->hash_fn = attr->hash.fn;
537 ntdb->hash_data = attr->hash.data;
538 break;
539 case NTDB_ATTRIBUTE_SEED:
540 seed = &attr->seed;
541 break;
542 case NTDB_ATTRIBUTE_OPENHOOK:
543 ntdb->openhook = attr->openhook.fn;
544 ntdb->openhook_data = attr->openhook.data;
545 break;
546 case NTDB_ATTRIBUTE_HASHSIZE:
547 ntdb->hash_bits = next_pow2(attr->hashsize.size);
548 if (ntdb->hash_bits > 31) {
549 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
550 NTDB_LOG_USE_ERROR,
551 "ntdb_open: hash_size %u"
552 " too large",
553 attr->hashsize.size);
554 goto fail;
556 break;
557 default:
558 /* These are set as normal. */
559 ecode = ntdb_set_attribute(ntdb, attr);
560 if (ecode != NTDB_SUCCESS)
561 goto fail;
563 attr = attr->base.next;
566 if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT
567 | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING
568 | NTDB_RDONLY)) {
569 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
570 "ntdb_open: unknown flags %u", ntdb_flags);
571 goto fail;
574 if (seed) {
575 if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) {
576 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
577 NTDB_LOG_USE_ERROR,
578 "ntdb_open:"
579 " cannot set NTDB_ATTRIBUTE_SEED"
580 " without O_CREAT.");
581 goto fail;
585 if ((open_flags & O_ACCMODE) == O_WRONLY) {
586 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
587 "ntdb_open: can't open ntdb %s write-only",
588 name);
589 goto fail;
592 if ((open_flags & O_ACCMODE) == O_RDONLY) {
593 openlock = F_RDLCK;
594 ntdb->flags |= NTDB_RDONLY;
595 } else {
596 if (ntdb_flags & NTDB_RDONLY) {
597 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
598 NTDB_LOG_USE_ERROR,
599 "ntdb_open: can't use NTDB_RDONLY"
600 " without O_RDONLY");
601 goto fail;
603 openlock = F_WRLCK;
606 /* internal databases don't need any of the rest. */
607 if (ntdb->flags & NTDB_INTERNAL) {
608 ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP);
609 ecode = ntdb_new_file(ntdb);
610 if (ecode != NTDB_SUCCESS) {
611 goto fail;
613 ntdb->file->fd = -1;
614 ecode = ntdb_new_database(ntdb, seed, &hdr);
615 if (ecode == NTDB_SUCCESS) {
616 ntdb_convert(ntdb, &hdr.hash_seed,
617 sizeof(hdr.hash_seed));
618 ntdb->hash_seed = hdr.hash_seed;
619 ntdb_context_init(ntdb);
620 ntdb_ftable_init(ntdb);
622 if (ecode != NTDB_SUCCESS) {
623 goto fail;
625 return ntdb;
628 if (stat(name, &st) != -1)
629 ntdb->file = find_file(st.st_dev, st.st_ino);
631 if (!ntdb->file) {
632 ecode = ntdb_new_file(ntdb);
633 if (ecode != NTDB_SUCCESS) {
634 goto fail;
637 /* Set this now, as ntdb_nest_lock examines it. */
638 ntdb->file->map_size = 0;
640 if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) {
641 enum ntdb_log_level lvl;
642 /* errno set by open(2) */
643 saved_errno = errno;
645 /* Probing for files like this is a common pattern. */
646 if (!(open_flags & O_CREAT) && errno == ENOENT) {
647 lvl = NTDB_LOG_WARNING;
648 } else {
649 lvl = NTDB_LOG_ERROR;
651 ntdb_logerr(ntdb, NTDB_ERR_IO, lvl,
652 "ntdb_open: could not open file %s: %s",
653 name, strerror(errno));
655 goto fail_errno;
658 /* ensure there is only one process initialising at once:
659 * do it immediately to reduce the create/openlock race. */
660 ecode = ntdb_lock_open(ntdb, openlock,
661 NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
662 if (ecode != NTDB_SUCCESS) {
663 saved_errno = errno;
664 goto fail_errno;
667 /* on exec, don't inherit the fd */
668 v = fcntl(ntdb->file->fd, F_GETFD, 0);
669 fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC);
671 if (fstat(ntdb->file->fd, &st) == -1) {
672 saved_errno = errno;
673 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
674 "ntdb_open: could not stat open %s: %s",
675 name, strerror(errno));
676 goto fail_errno;
679 ntdb->file->device = st.st_dev;
680 ntdb->file->inode = st.st_ino;
681 } else {
682 /* ensure there is only one process initialising at once */
683 ecode = ntdb_lock_open(ntdb, openlock,
684 NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
685 if (ecode != NTDB_SUCCESS) {
686 saved_errno = errno;
687 goto fail_errno;
691 /* call their open hook if they gave us one. */
692 if (ntdb->openhook) {
693 ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data);
694 if (ecode != NTDB_SUCCESS) {
695 ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
696 "ntdb_open: open hook failed");
697 goto fail;
699 open_flags |= O_CREAT;
702 /* If they used O_TRUNC, read will return 0. */
703 rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0);
704 if (rlen == 0 && (open_flags & O_CREAT)) {
705 ecode = ntdb_new_database(ntdb, seed, &hdr);
706 if (ecode != NTDB_SUCCESS) {
707 goto fail;
709 } else if (rlen < 0) {
710 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
711 "ntdb_open: error %s reading %s",
712 strerror(errno), name);
713 goto fail;
714 } else if (rlen < sizeof(hdr)
715 || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
716 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
717 "ntdb_open: %s is not a ntdb file", name);
718 goto fail;
721 if (hdr.version != NTDB_VERSION) {
722 if (hdr.version == bswap_64(NTDB_VERSION))
723 ntdb->flags |= NTDB_CONVERT;
724 else {
725 /* wrong version */
726 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
727 "ntdb_open:"
728 " %s is unknown version 0x%llx",
729 name, (long long)hdr.version);
730 goto fail;
732 } else if (ntdb->flags & NTDB_CONVERT) {
733 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
734 "ntdb_open:"
735 " %s does not need NTDB_CONVERT",
736 name);
737 goto fail;
740 ntdb_context_init(ntdb);
742 ntdb_convert(ntdb, &hdr, sizeof(hdr));
743 ntdb->hash_bits = hdr.hash_bits;
744 ntdb->hash_seed = hdr.hash_seed;
745 hash_test = NTDB_HASH_MAGIC;
746 hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
747 if (hdr.hash_test != hash_test) {
748 /* wrong hash variant */
749 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
750 "ntdb_open:"
751 " %s uses a different hash function",
752 name);
753 goto fail;
756 ecode = capabilities_ok(ntdb, hdr.capabilities);
757 if (ecode != NTDB_SUCCESS) {
758 goto fail;
761 /* Clear any features we don't understand. */
762 if ((open_flags & O_ACCMODE) != O_RDONLY) {
763 hdr.features_used &= NTDB_FEATURE_MASK;
764 ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
765 features_used),
766 &hdr.features_used,
767 sizeof(hdr.features_used));
768 if (ecode != NTDB_SUCCESS)
769 goto fail;
772 ntdb_unlock_open(ntdb, openlock);
774 /* This makes sure we have current map_size and mmap. */
775 ecode = ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
776 if (unlikely(ecode != NTDB_SUCCESS))
777 goto fail;
779 if (ntdb->file->map_size % NTDB_PGSIZE != 0) {
780 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
781 "ntdb_open:"
782 " %s size %llu isn't a multiple of %u",
783 name, (long long)ntdb->file->map_size,
784 NTDB_PGSIZE);
785 goto fail;
788 /* Now it's fully formed, recover if necessary. */
789 berr = ntdb_needs_recovery(ntdb);
790 if (unlikely(berr != false)) {
791 if (berr < 0) {
792 ecode = NTDB_OFF_TO_ERR(berr);
793 goto fail;
795 ecode = ntdb_lock_and_recover(ntdb);
796 if (ecode != NTDB_SUCCESS) {
797 goto fail;
801 ecode = ntdb_ftable_init(ntdb);
802 if (ecode != NTDB_SUCCESS) {
803 goto fail;
806 ntdb->next = tdbs;
807 tdbs = ntdb;
808 return ntdb;
810 fail:
811 /* Map ecode to some logical errno. */
812 switch (NTDB_ERR_TO_OFF(ecode)) {
813 case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT):
814 case NTDB_ERR_TO_OFF(NTDB_ERR_IO):
815 saved_errno = EIO;
816 break;
817 case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK):
818 saved_errno = EWOULDBLOCK;
819 break;
820 case NTDB_ERR_TO_OFF(NTDB_ERR_OOM):
821 saved_errno = ENOMEM;
822 break;
823 case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL):
824 saved_errno = EINVAL;
825 break;
826 default:
827 saved_errno = EINVAL;
828 break;
831 fail_errno:
832 #ifdef NTDB_TRACE
833 close(ntdb->tracefd);
834 #endif
835 if (ntdb->file) {
836 ntdb_lock_cleanup(ntdb);
837 if (--ntdb->file->refcnt == 0) {
838 assert(ntdb->file->num_lockrecs == 0);
839 if (ntdb->file->map_ptr) {
840 if (ntdb->flags & NTDB_INTERNAL) {
841 ntdb->free_fn(ntdb->file->map_ptr,
842 ntdb->alloc_data);
843 } else
844 ntdb_munmap(ntdb->file);
846 if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0)
847 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
848 "ntdb_open: failed to close ntdb fd"
849 " on error: %s", strerror(errno));
850 ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
851 ntdb->free_fn(ntdb->file, ntdb->alloc_data);
855 ntdb->free_fn(ntdb, ntdb->alloc_data);
856 errno = saved_errno;
857 return NULL;
860 _PUBLIC_ int ntdb_close(struct ntdb_context *ntdb)
862 int ret = 0;
863 struct ntdb_context **i;
865 ntdb_trace(ntdb, "ntdb_close");
867 if (ntdb->transaction) {
868 ntdb_transaction_cancel(ntdb);
871 if (ntdb->file->map_ptr) {
872 if (ntdb->flags & NTDB_INTERNAL)
873 ntdb->free_fn(ntdb->file->map_ptr, ntdb->alloc_data);
874 else
875 ntdb_munmap(ntdb->file);
877 if (ntdb->file) {
878 ntdb_lock_cleanup(ntdb);
879 if (--ntdb->file->refcnt == 0) {
880 ret = close(ntdb->file->fd);
881 ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
882 ntdb->free_fn(ntdb->file, ntdb->alloc_data);
886 /* Remove from tdbs list */
887 for (i = &tdbs; *i; i = &(*i)->next) {
888 if (*i == ntdb) {
889 *i = ntdb->next;
890 break;
894 #ifdef NTDB_TRACE
895 close(ntdb->tracefd);
896 #endif
897 ntdb->free_fn(ntdb, ntdb->alloc_data);
899 return ret;
902 _PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p)
904 struct ntdb_context *i;
906 for (i = tdbs; i; i = i->next) {
907 if (fn(i, p) != 0)
908 break;