talloc: Put pool-specific data before the chunk
[Samba.git] / lib / ntdb / open.c
blob2a265afe7d585aac6f975ab2f5837ed7645586cf
1 /*
2 Trivial Database 2: opening and closing TDBs
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 #include "private.h"
19 #include <ccan/build_assert/build_assert.h>
21 /* all tdbs, to detect double-opens (fcntl file don't nest!) */
22 static struct ntdb_context *tdbs = NULL;
24 static struct ntdb_file *find_file(dev_t device, ino_t ino)
26 struct ntdb_context *i;
28 for (i = tdbs; i; i = i->next) {
29 if (i->file->device == device && i->file->inode == ino) {
30 i->file->refcnt++;
31 return i->file;
34 return NULL;
37 static bool read_all(int fd, void *buf, size_t len)
39 while (len) {
40 ssize_t ret;
41 ret = read(fd, buf, len);
42 if (ret < 0)
43 return false;
44 if (ret == 0) {
45 /* ETOOSHORT? */
46 errno = EWOULDBLOCK;
47 return false;
49 buf = (char *)buf + ret;
50 len -= ret;
52 return true;
55 static uint32_t random_number(struct ntdb_context *ntdb)
57 int fd;
58 uint32_t ret = 0;
59 struct timeval now;
61 fd = open("/dev/urandom", O_RDONLY);
62 if (fd >= 0) {
63 if (read_all(fd, &ret, sizeof(ret))) {
64 close(fd);
65 return ret;
67 close(fd);
69 /* FIXME: Untested! Based on Wikipedia protocol description! */
70 fd = open("/dev/egd-pool", O_RDWR);
71 if (fd >= 0) {
72 /* Command is 1, next byte is size we want to read. */
73 char cmd[2] = { 1, sizeof(uint32_t) };
74 if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
75 char reply[1 + sizeof(uint32_t)];
76 int r = read(fd, reply, sizeof(reply));
77 if (r > 1) {
78 /* Copy at least some bytes. */
79 memcpy(&ret, reply+1, r - 1);
80 if (reply[0] == sizeof(uint32_t)
81 && r == sizeof(reply)) {
82 close(fd);
83 return ret;
87 close(fd);
90 /* Fallback: pid and time. */
91 gettimeofday(&now, NULL);
92 ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
93 ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
94 "ntdb_open: random from getpid and time");
95 return ret;
98 static void ntdb_context_init(struct ntdb_context *ntdb)
100 /* Initialize the NTDB fields here */
101 ntdb_io_init(ntdb);
102 ntdb->transaction = NULL;
103 ntdb->access = NULL;
106 /* initialise a new database:
108 * struct ntdb_header;
109 * struct {
110 * struct ntdb_used_record hash_header;
111 * ntdb_off_t hash_buckets[1 << ntdb->hash_bits];
112 * } hash;
113 * struct ntdb_freetable ftable;
114 * struct {
115 * struct ntdb_free_record free_header;
116 * char forty_three[...];
117 * } remainder;
119 #define NEW_DATABASE_HDR_SIZE(hbits) \
120 (sizeof(struct ntdb_header) \
121 + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \
122 + sizeof(struct ntdb_freetable) \
123 + sizeof(struct ntdb_free_record))
125 static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
126 struct ntdb_attribute_seed *seed,
127 struct ntdb_header *rhdr)
129 /* We make it up in memory, then write it out if not internal */
130 struct ntdb_freetable *ftable;
131 struct ntdb_used_record *htable;
132 struct ntdb_header *hdr;
133 struct ntdb_free_record *remainder;
134 char *mem;
135 unsigned int magic_len;
136 ssize_t rlen;
137 size_t dbsize, hashsize, hdrsize, remaindersize;
138 enum NTDB_ERROR ecode;
140 hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits;
142 /* Always make db a multiple of NTDB_PGSIZE */
143 hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits);
144 dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
146 mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data);
147 if (!mem) {
148 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
149 "ntdb_new_database: failed to allocate");
152 hdr = (void *)mem;
153 htable = (void *)(mem + sizeof(*hdr));
154 ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize);
155 remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize
156 + sizeof(*ftable));
158 /* Fill in the header */
159 hdr->version = NTDB_VERSION;
160 if (seed)
161 hdr->hash_seed = seed->seed;
162 else
163 hdr->hash_seed = random_number(ntdb);
164 hdr->hash_test = NTDB_HASH_MAGIC;
165 hdr->hash_test = ntdb->hash_fn(&hdr->hash_test,
166 sizeof(hdr->hash_test),
167 hdr->hash_seed,
168 ntdb->hash_data);
169 hdr->hash_bits = ntdb->hash_bits;
170 hdr->recovery = 0;
171 hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK;
172 hdr->seqnum = 0;
173 hdr->capabilities = 0;
174 memset(hdr->reserved, 0, sizeof(hdr->reserved));
176 /* Hash is all zero after header. */
177 set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize);
178 memset(htable + 1, 0, hashsize);
180 /* Free is empty. */
181 hdr->free_table = (char *)ftable - (char *)hdr;
182 memset(ftable, 0, sizeof(*ftable));
183 ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
184 sizeof(*ftable) - sizeof(ftable->hdr),
185 sizeof(*ftable) - sizeof(ftable->hdr));
186 if (ecode != NTDB_SUCCESS) {
187 goto out;
190 /* Rest of database is a free record, containing junk. */
191 remaindersize = dbsize - hdrsize;
192 remainder->ftable_and_len
193 = (remaindersize + sizeof(*remainder)
194 - sizeof(struct ntdb_used_record));
195 remainder->next = 0;
196 remainder->magic_and_prev
197 = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL))
198 | ((char *)remainder - (char *)hdr);
199 memset(remainder + 1, 0x43, remaindersize);
201 /* Put in our single free entry. */
202 ftable->buckets[size_to_bucket(remaindersize)] =
203 (char *)remainder - (char *)hdr;
205 /* Magic food */
206 memset(hdr->magic_food, 0, sizeof(hdr->magic_food));
207 strcpy(hdr->magic_food, NTDB_MAGIC_FOOD);
209 /* This creates an endian-converted database, as if read from disk */
210 magic_len = sizeof(hdr->magic_food);
211 ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len);
213 /* Return copy of header. */
214 *rhdr = *hdr;
216 if (ntdb->flags & NTDB_INTERNAL) {
217 ntdb->file->map_size = dbsize;
218 ntdb->file->map_ptr = hdr;
219 return NTDB_SUCCESS;
221 if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
222 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
223 "ntdb_new_database:"
224 " failed to seek: %s", strerror(errno));
225 goto out;
228 if (ftruncate(ntdb->file->fd, 0) == -1) {
229 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
230 "ntdb_new_database:"
231 " failed to truncate: %s", strerror(errno));
232 goto out;
235 rlen = write(ntdb->file->fd, hdr, dbsize);
236 if (rlen != dbsize) {
237 if (rlen >= 0)
238 errno = ENOSPC;
239 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
240 "ntdb_new_database: %zi writing header: %s",
241 rlen, strerror(errno));
242 goto out;
245 out:
246 ntdb->free_fn(hdr, ntdb->alloc_data);
247 return ecode;
250 static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb)
252 ntdb->file = ntdb->alloc_fn(NULL, sizeof(*ntdb->file), ntdb->alloc_data);
253 if (!ntdb->file)
254 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
255 "ntdb_open: cannot alloc ntdb_file structure");
256 ntdb->file->num_lockrecs = 0;
257 ntdb->file->lockrecs = NULL;
258 ntdb->file->allrecord_lock.count = 0;
259 ntdb->file->refcnt = 1;
260 ntdb->file->map_ptr = NULL;
261 ntdb->file->direct_count = 0;
262 ntdb->file->old_mmaps = NULL;
263 return NTDB_SUCCESS;
266 _PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
267 const union ntdb_attribute *attr)
269 switch (attr->base.attr) {
270 case NTDB_ATTRIBUTE_LOG:
271 ntdb->log_fn = attr->log.fn;
272 ntdb->log_data = attr->log.data;
273 break;
274 case NTDB_ATTRIBUTE_HASH:
275 case NTDB_ATTRIBUTE_SEED:
276 case NTDB_ATTRIBUTE_OPENHOOK:
277 case NTDB_ATTRIBUTE_HASHSIZE:
278 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
279 NTDB_LOG_USE_ERROR,
280 "ntdb_set_attribute:"
281 " cannot set %s after opening",
282 attr->base.attr == NTDB_ATTRIBUTE_HASH
283 ? "NTDB_ATTRIBUTE_HASH"
284 : attr->base.attr == NTDB_ATTRIBUTE_SEED
285 ? "NTDB_ATTRIBUTE_SEED"
286 : attr->base.attr == NTDB_ATTRIBUTE_OPENHOOK
287 ? "NTDB_ATTRIBUTE_OPENHOOK"
288 : "NTDB_ATTRIBUTE_HASHSIZE");
289 case NTDB_ATTRIBUTE_STATS:
290 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
291 NTDB_LOG_USE_ERROR,
292 "ntdb_set_attribute:"
293 " cannot set NTDB_ATTRIBUTE_STATS");
294 case NTDB_ATTRIBUTE_FLOCK:
295 ntdb->lock_fn = attr->flock.lock;
296 ntdb->unlock_fn = attr->flock.unlock;
297 ntdb->lock_data = attr->flock.data;
298 break;
299 case NTDB_ATTRIBUTE_ALLOCATOR:
300 ntdb->alloc_fn = attr->alloc.alloc;
301 ntdb->expand_fn = attr->alloc.expand;
302 ntdb->free_fn = attr->alloc.free;
303 ntdb->alloc_data = attr->alloc.priv_data;
304 break;
305 default:
306 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
307 NTDB_LOG_USE_ERROR,
308 "ntdb_set_attribute:"
309 " unknown attribute type %u",
310 attr->base.attr);
312 return NTDB_SUCCESS;
315 _PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
316 union ntdb_attribute *attr)
318 switch (attr->base.attr) {
319 case NTDB_ATTRIBUTE_LOG:
320 if (!ntdb->log_fn)
321 return NTDB_ERR_NOEXIST;
322 attr->log.fn = ntdb->log_fn;
323 attr->log.data = ntdb->log_data;
324 break;
325 case NTDB_ATTRIBUTE_HASH:
326 attr->hash.fn = ntdb->hash_fn;
327 attr->hash.data = ntdb->hash_data;
328 break;
329 case NTDB_ATTRIBUTE_SEED:
330 attr->seed.seed = ntdb->hash_seed;
331 break;
332 case NTDB_ATTRIBUTE_OPENHOOK:
333 if (!ntdb->openhook)
334 return NTDB_ERR_NOEXIST;
335 attr->openhook.fn = ntdb->openhook;
336 attr->openhook.data = ntdb->openhook_data;
337 break;
338 case NTDB_ATTRIBUTE_STATS: {
339 size_t size = attr->stats.size;
340 if (size > ntdb->stats.size)
341 size = ntdb->stats.size;
342 memcpy(&attr->stats, &ntdb->stats, size);
343 break;
345 case NTDB_ATTRIBUTE_FLOCK:
346 attr->flock.lock = ntdb->lock_fn;
347 attr->flock.unlock = ntdb->unlock_fn;
348 attr->flock.data = ntdb->lock_data;
349 break;
350 case NTDB_ATTRIBUTE_ALLOCATOR:
351 attr->alloc.alloc = ntdb->alloc_fn;
352 attr->alloc.expand = ntdb->expand_fn;
353 attr->alloc.free = ntdb->free_fn;
354 attr->alloc.priv_data = ntdb->alloc_data;
355 break;
356 case NTDB_ATTRIBUTE_HASHSIZE:
357 attr->hashsize.size = 1 << ntdb->hash_bits;
358 break;
359 default:
360 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
361 NTDB_LOG_USE_ERROR,
362 "ntdb_get_attribute:"
363 " unknown attribute type %u",
364 attr->base.attr);
366 attr->base.next = NULL;
367 return NTDB_SUCCESS;
370 _PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb,
371 enum ntdb_attribute_type type)
373 switch (type) {
374 case NTDB_ATTRIBUTE_LOG:
375 ntdb->log_fn = NULL;
376 break;
377 case NTDB_ATTRIBUTE_OPENHOOK:
378 ntdb->openhook = NULL;
379 break;
380 case NTDB_ATTRIBUTE_HASH:
381 case NTDB_ATTRIBUTE_SEED:
382 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
383 "ntdb_unset_attribute: cannot unset %s after opening",
384 type == NTDB_ATTRIBUTE_HASH
385 ? "NTDB_ATTRIBUTE_HASH"
386 : "NTDB_ATTRIBUTE_SEED");
387 break;
388 case NTDB_ATTRIBUTE_STATS:
389 ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
390 NTDB_LOG_USE_ERROR,
391 "ntdb_unset_attribute:"
392 "cannot unset NTDB_ATTRIBUTE_STATS");
393 break;
394 case NTDB_ATTRIBUTE_FLOCK:
395 ntdb->lock_fn = ntdb_fcntl_lock;
396 ntdb->unlock_fn = ntdb_fcntl_unlock;
397 break;
398 default:
399 ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
400 NTDB_LOG_USE_ERROR,
401 "ntdb_unset_attribute: unknown attribute type %u",
402 type);
406 /* The top three bits of the capability tell us whether it matters. */
407 enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
408 ntdb_off_t type)
410 if (type & NTDB_CAP_NOOPEN) {
411 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
412 "%s: file has unknown capability %llu",
413 caller, type & NTDB_CAP_NOOPEN);
416 if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) {
417 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR,
418 "%s: file has unknown capability %llu"
419 " (cannot write to it)",
420 caller, type & NTDB_CAP_NOOPEN);
423 if (type & NTDB_CAP_NOCHECK) {
424 ntdb->flags |= NTDB_CANT_CHECK;
426 return NTDB_SUCCESS;
429 static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb,
430 ntdb_off_t capabilities)
432 ntdb_off_t off, next;
433 enum NTDB_ERROR ecode = NTDB_SUCCESS;
434 const struct ntdb_capability *cap;
436 /* Check capability list. */
437 for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) {
438 cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
439 if (NTDB_PTR_IS_ERR(cap)) {
440 return NTDB_PTR_ERR(cap);
443 switch (cap->type & NTDB_CAP_TYPE_MASK) {
444 /* We don't understand any capabilities (yet). */
445 default:
446 ecode = unknown_capability(ntdb, "ntdb_open", cap->type);
448 next = cap->next;
449 ntdb_access_release(ntdb, cap);
451 return ecode;
454 static void *default_alloc(const void *owner, size_t len, void *priv_data)
456 return malloc(len);
459 static void *default_expand(void *ptr, size_t len, void *priv_data)
461 return realloc(ptr, len);
464 static void default_free(void *ptr, void *priv_data)
466 free(ptr);
469 /* First allocation needs manual search of attributes. */
470 static struct ntdb_context *alloc_ntdb(const union ntdb_attribute *attr,
471 const char *name)
473 size_t len = sizeof(struct ntdb_context) + strlen(name) + 1;
475 while (attr) {
476 if (attr->base.attr == NTDB_ATTRIBUTE_ALLOCATOR) {
477 return attr->alloc.alloc(NULL, len,
478 attr->alloc.priv_data);
480 attr = attr->base.next;
482 return default_alloc(NULL, len, NULL);
485 static unsigned int next_pow2(uint64_t size)
487 unsigned int bits = 1;
489 while ((1ULL << bits) < size)
490 bits++;
491 return bits;
494 _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
495 int open_flags, mode_t mode,
496 union ntdb_attribute *attr)
498 struct ntdb_context *ntdb;
499 struct stat st;
500 int saved_errno = 0;
501 uint64_t hash_test;
502 unsigned v;
503 ssize_t rlen;
504 struct ntdb_header hdr;
505 struct ntdb_attribute_seed *seed = NULL;
506 ntdb_bool_err berr;
507 enum NTDB_ERROR ecode;
508 int openlock;
510 ntdb = alloc_ntdb(attr, name);
511 if (!ntdb) {
512 /* Can't log this */
513 errno = ENOMEM;
514 return NULL;
516 /* Set name immediately for logging functions. */
517 ntdb->name = strcpy((char *)(ntdb + 1), name);
518 ntdb->flags = ntdb_flags;
519 ntdb->log_fn = NULL;
520 ntdb->open_flags = open_flags;
521 ntdb->file = NULL;
522 ntdb->openhook = NULL;
523 ntdb->lock_fn = ntdb_fcntl_lock;
524 ntdb->unlock_fn = ntdb_fcntl_unlock;
525 ntdb->hash_fn = ntdb_jenkins_hash;
526 memset(&ntdb->stats, 0, sizeof(ntdb->stats));
527 ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS;
528 ntdb->stats.size = sizeof(ntdb->stats);
529 ntdb->alloc_fn = default_alloc;
530 ntdb->expand_fn = default_expand;
531 ntdb->free_fn = default_free;
532 ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */
534 while (attr) {
535 switch (attr->base.attr) {
536 case NTDB_ATTRIBUTE_HASH:
537 ntdb->hash_fn = attr->hash.fn;
538 ntdb->hash_data = attr->hash.data;
539 break;
540 case NTDB_ATTRIBUTE_SEED:
541 seed = &attr->seed;
542 break;
543 case NTDB_ATTRIBUTE_OPENHOOK:
544 ntdb->openhook = attr->openhook.fn;
545 ntdb->openhook_data = attr->openhook.data;
546 break;
547 case NTDB_ATTRIBUTE_HASHSIZE:
548 ntdb->hash_bits = next_pow2(attr->hashsize.size);
549 if (ntdb->hash_bits > 31) {
550 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
551 NTDB_LOG_USE_ERROR,
552 "ntdb_open: hash_size %u"
553 " too large",
554 attr->hashsize.size);
555 goto fail;
557 break;
558 default:
559 /* These are set as normal. */
560 ecode = ntdb_set_attribute(ntdb, attr);
561 if (ecode != NTDB_SUCCESS)
562 goto fail;
564 attr = attr->base.next;
567 if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT
568 | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING
569 | NTDB_RDONLY)) {
570 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
571 "ntdb_open: unknown flags %u", ntdb_flags);
572 goto fail;
575 if (seed) {
576 if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) {
577 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
578 NTDB_LOG_USE_ERROR,
579 "ntdb_open:"
580 " cannot set NTDB_ATTRIBUTE_SEED"
581 " without O_CREAT.");
582 goto fail;
586 if ((open_flags & O_ACCMODE) == O_WRONLY) {
587 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
588 "ntdb_open: can't open ntdb %s write-only",
589 name);
590 goto fail;
593 if ((open_flags & O_ACCMODE) == O_RDONLY) {
594 openlock = F_RDLCK;
595 ntdb->flags |= NTDB_RDONLY;
596 } else {
597 if (ntdb_flags & NTDB_RDONLY) {
598 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
599 NTDB_LOG_USE_ERROR,
600 "ntdb_open: can't use NTDB_RDONLY"
601 " without O_RDONLY");
602 goto fail;
604 openlock = F_WRLCK;
607 /* internal databases don't need any of the rest. */
608 if (ntdb->flags & NTDB_INTERNAL) {
609 ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP);
610 ecode = ntdb_new_file(ntdb);
611 if (ecode != NTDB_SUCCESS) {
612 goto fail;
614 ntdb->file->fd = -1;
615 ecode = ntdb_new_database(ntdb, seed, &hdr);
616 if (ecode == NTDB_SUCCESS) {
617 ntdb_convert(ntdb, &hdr.hash_seed,
618 sizeof(hdr.hash_seed));
619 ntdb->hash_seed = hdr.hash_seed;
620 ntdb_context_init(ntdb);
621 ntdb_ftable_init(ntdb);
623 if (ecode != NTDB_SUCCESS) {
624 goto fail;
626 return ntdb;
629 if (stat(name, &st) != -1)
630 ntdb->file = find_file(st.st_dev, st.st_ino);
632 if (!ntdb->file) {
633 ecode = ntdb_new_file(ntdb);
634 if (ecode != NTDB_SUCCESS) {
635 goto fail;
638 /* Set this now, as ntdb_nest_lock examines it. */
639 ntdb->file->map_size = 0;
641 if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) {
642 enum ntdb_log_level lvl;
643 /* errno set by open(2) */
644 saved_errno = errno;
646 /* Probing for files like this is a common pattern. */
647 if (!(open_flags & O_CREAT) && errno == ENOENT) {
648 lvl = NTDB_LOG_WARNING;
649 } else {
650 lvl = NTDB_LOG_ERROR;
652 ntdb_logerr(ntdb, NTDB_ERR_IO, lvl,
653 "ntdb_open: could not open file %s: %s",
654 name, strerror(errno));
656 goto fail_errno;
659 /* ensure there is only one process initialising at once:
660 * do it immediately to reduce the create/openlock race. */
661 ecode = ntdb_lock_open(ntdb, openlock,
662 NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
663 if (ecode != NTDB_SUCCESS) {
664 saved_errno = errno;
665 goto fail_errno;
668 /* on exec, don't inherit the fd */
669 v = fcntl(ntdb->file->fd, F_GETFD, 0);
670 fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC);
672 if (fstat(ntdb->file->fd, &st) == -1) {
673 saved_errno = errno;
674 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
675 "ntdb_open: could not stat open %s: %s",
676 name, strerror(errno));
677 goto fail_errno;
680 ntdb->file->device = st.st_dev;
681 ntdb->file->inode = st.st_ino;
683 /* call their open hook if they gave us one. */
684 if (ntdb->openhook) {
685 ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data);
686 if (ecode != NTDB_SUCCESS) {
687 ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
688 "ntdb_open: open hook failed");
689 goto fail;
691 open_flags |= O_CREAT;
693 } else {
694 /* ensure there is only one process initialising at once */
695 ecode = ntdb_lock_open(ntdb, openlock,
696 NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
697 if (ecode != NTDB_SUCCESS) {
698 saved_errno = errno;
699 goto fail_errno;
703 /* If they used O_TRUNC, read will return 0. */
704 rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0);
705 if (rlen == 0 && (open_flags & O_CREAT)) {
706 ecode = ntdb_new_database(ntdb, seed, &hdr);
707 if (ecode != NTDB_SUCCESS) {
708 goto fail;
710 } else if (rlen < 0) {
711 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
712 "ntdb_open: error %s reading %s",
713 strerror(errno), name);
714 goto fail;
715 } else if (rlen < sizeof(hdr)
716 || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
717 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
718 "ntdb_open: %s is not a ntdb file", name);
719 goto fail;
722 if (hdr.version != NTDB_VERSION) {
723 if (hdr.version == bswap_64(NTDB_VERSION))
724 ntdb->flags |= NTDB_CONVERT;
725 else {
726 /* wrong version */
727 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
728 "ntdb_open:"
729 " %s is unknown version 0x%llx",
730 name, (long long)hdr.version);
731 goto fail;
733 } else if (ntdb->flags & NTDB_CONVERT) {
734 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
735 "ntdb_open:"
736 " %s does not need NTDB_CONVERT",
737 name);
738 goto fail;
741 ntdb_context_init(ntdb);
743 ntdb_convert(ntdb, &hdr, sizeof(hdr));
744 ntdb->hash_bits = hdr.hash_bits;
745 ntdb->hash_seed = hdr.hash_seed;
746 hash_test = NTDB_HASH_MAGIC;
747 hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
748 if (hdr.hash_test != hash_test) {
749 /* wrong hash variant */
750 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
751 "ntdb_open:"
752 " %s uses a different hash function",
753 name);
754 goto fail;
757 ecode = capabilities_ok(ntdb, hdr.capabilities);
758 if (ecode != NTDB_SUCCESS) {
759 goto fail;
762 /* Clear any features we don't understand. */
763 if ((open_flags & O_ACCMODE) != O_RDONLY) {
764 hdr.features_used &= NTDB_FEATURE_MASK;
765 ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
766 features_used),
767 &hdr.features_used,
768 sizeof(hdr.features_used));
769 if (ecode != NTDB_SUCCESS)
770 goto fail;
773 ntdb_unlock_open(ntdb, openlock);
775 /* This makes sure we have current map_size and mmap. */
776 ecode = ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
777 if (unlikely(ecode != NTDB_SUCCESS))
778 goto fail;
780 if (ntdb->file->map_size % NTDB_PGSIZE != 0) {
781 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
782 "ntdb_open:"
783 " %s size %llu isn't a multiple of %u",
784 name, (long long)ntdb->file->map_size,
785 NTDB_PGSIZE);
786 goto fail;
789 /* Now it's fully formed, recover if necessary. */
790 berr = ntdb_needs_recovery(ntdb);
791 if (unlikely(berr != false)) {
792 if (berr < 0) {
793 ecode = NTDB_OFF_TO_ERR(berr);
794 goto fail;
796 ecode = ntdb_lock_and_recover(ntdb);
797 if (ecode != NTDB_SUCCESS) {
798 goto fail;
802 ecode = ntdb_ftable_init(ntdb);
803 if (ecode != NTDB_SUCCESS) {
804 goto fail;
807 ntdb->next = tdbs;
808 tdbs = ntdb;
809 return ntdb;
811 fail:
812 /* Map ecode to some logical errno. */
813 switch (NTDB_ERR_TO_OFF(ecode)) {
814 case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT):
815 case NTDB_ERR_TO_OFF(NTDB_ERR_IO):
816 saved_errno = EIO;
817 break;
818 case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK):
819 saved_errno = EWOULDBLOCK;
820 break;
821 case NTDB_ERR_TO_OFF(NTDB_ERR_OOM):
822 saved_errno = ENOMEM;
823 break;
824 case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL):
825 saved_errno = EINVAL;
826 break;
827 default:
828 saved_errno = EINVAL;
829 break;
832 fail_errno:
833 #ifdef NTDB_TRACE
834 close(ntdb->tracefd);
835 #endif
836 if (ntdb->file) {
837 ntdb_lock_cleanup(ntdb);
838 if (--ntdb->file->refcnt == 0) {
839 assert(ntdb->file->num_lockrecs == 0);
840 if (ntdb->file->map_ptr) {
841 if (ntdb->flags & NTDB_INTERNAL) {
842 ntdb->free_fn(ntdb->file->map_ptr,
843 ntdb->alloc_data);
844 } else
845 ntdb_munmap(ntdb);
847 if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0)
848 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
849 "ntdb_open: failed to close ntdb fd"
850 " on error: %s", strerror(errno));
851 ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
852 ntdb->free_fn(ntdb->file, ntdb->alloc_data);
856 ntdb->free_fn(ntdb, ntdb->alloc_data);
857 errno = saved_errno;
858 return NULL;
861 _PUBLIC_ int ntdb_close(struct ntdb_context *ntdb)
863 int ret = 0;
864 struct ntdb_context **i;
866 ntdb_trace(ntdb, "ntdb_close");
868 if (ntdb->transaction) {
869 ntdb_transaction_cancel(ntdb);
872 ntdb_lock_cleanup(ntdb);
873 if (--ntdb->file->refcnt == 0) {
874 if (ntdb->file->map_ptr) {
875 if (ntdb->flags & NTDB_INTERNAL) {
876 ntdb->free_fn(ntdb->file->map_ptr,
877 ntdb->alloc_data);
878 } else {
879 ntdb_munmap(ntdb);
882 ret = close(ntdb->file->fd);
883 ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
884 ntdb->free_fn(ntdb->file, ntdb->alloc_data);
887 /* Remove from tdbs list */
888 for (i = &tdbs; *i; i = &(*i)->next) {
889 if (*i == ntdb) {
890 *i = ntdb->next;
891 break;
895 #ifdef NTDB_TRACE
896 close(ntdb->tracefd);
897 #endif
898 ntdb->free_fn(ntdb, ntdb->alloc_data);
900 return ret;
903 _PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p)
905 struct ntdb_context *i;
907 for (i = tdbs; i; i = i->next) {
908 if (fn(i, p) != 0)
909 break;