s4-winbind: Use winbindd in the AD DC for fl2003dc and plugin_s4_dc
[Samba/wip.git] / lib / tdb / common / io.c
blobfe47d18a5a4694ed365e11c9329afe14a4fda9e2
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "tdb_private.h"
32 * We prepend the mutex area, so fixup offsets. See mutex.c for details.
33 * tdb->hdr_ofs is 0 or header.mutex_size.
35 * Note: that we only have the 4GB limit of tdb_off_t for
36 * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs!
39 static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
41 off_t tmp = tdb->hdr_ofs + *off;
43 if ((tmp < tdb->hdr_ofs) || (tmp < *off)) {
44 errno = EIO;
45 return false;
48 *off = tmp;
49 return true;
52 static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
53 size_t count, off_t offset)
55 if (!tdb_adjust_offset(tdb, &offset)) {
56 return -1;
58 return pwrite(tdb->fd, buf, count, offset);
61 static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
62 size_t count, off_t offset)
64 if (!tdb_adjust_offset(tdb, &offset)) {
65 return -1;
67 return pread(tdb->fd, buf, count, offset);
70 static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
72 if (!tdb_adjust_offset(tdb, &length)) {
73 return -1;
75 return ftruncate(tdb->fd, length);
78 static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
80 int ret;
82 ret = fstat(tdb->fd, buf);
83 if (ret == -1) {
84 return -1;
87 if (buf->st_size < tdb->hdr_ofs) {
88 errno = EIO;
89 return -1;
91 buf->st_size -= tdb->hdr_ofs;
93 return ret;
96 /* check for an out of bounds access - if it is out of bounds then
97 see if the database has been expanded by someone else and expand
98 if necessary
100 static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
101 int probe)
103 struct stat st;
104 if (len + off < len) {
105 if (!probe) {
106 /* Ensure ecode is set for log fn. */
107 tdb->ecode = TDB_ERR_IO;
108 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob off %u len %u wrap\n",
109 off, len));
111 return -1;
114 if (off + len <= tdb->map_size)
115 return 0;
116 if (tdb->flags & TDB_INTERNAL) {
117 if (!probe) {
118 /* Ensure ecode is set for log fn. */
119 tdb->ecode = TDB_ERR_IO;
120 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond internal malloc size %u\n",
121 (int)(off + len), (int)tdb->map_size));
123 return -1;
126 if (tdb_fstat(tdb, &st) == -1) {
127 tdb->ecode = TDB_ERR_IO;
128 return -1;
131 /* Beware >4G files! */
132 if ((tdb_off_t)st.st_size != st.st_size) {
133 /* Ensure ecode is set for log fn. */
134 tdb->ecode = TDB_ERR_IO;
135 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_oob len %llu too large!\n",
136 (long long)st.st_size));
137 return -1;
140 /* Unmap, update size, remap. We do this unconditionally, to handle
141 * the unusual case where the db is truncated.
143 * This can happen to a child using tdb_reopen_all(true) on a
144 * TDB_CLEAR_IF_FIRST tdb whose parent crashes: the next
145 * opener will truncate the database. */
146 if (tdb_munmap(tdb) == -1) {
147 tdb->ecode = TDB_ERR_IO;
148 return -1;
150 tdb->map_size = st.st_size;
151 if (tdb_mmap(tdb) != 0) {
152 return -1;
155 if (st.st_size < (size_t)off + len) {
156 if (!probe) {
157 /* Ensure ecode is set for log fn. */
158 tdb->ecode = TDB_ERR_IO;
159 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n",
160 (int)(off + len), (int)st.st_size));
162 return -1;
164 return 0;
167 /* write a lump of data at a specified offset */
168 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
169 const void *buf, tdb_len_t len)
171 if (len == 0) {
172 return 0;
175 if (tdb->read_only || tdb->traverse_read) {
176 tdb->ecode = TDB_ERR_RDONLY;
177 return -1;
180 if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0)
181 return -1;
183 if (tdb->map_ptr) {
184 memcpy(off + (char *)tdb->map_ptr, buf, len);
185 } else {
186 #ifdef HAVE_INCOHERENT_MMAP
187 tdb->ecode = TDB_ERR_IO;
188 return -1;
189 #else
190 ssize_t written;
192 written = tdb_pwrite(tdb, buf, len, off);
194 if ((written != (ssize_t)len) && (written != -1)) {
195 /* try once more */
196 tdb->ecode = TDB_ERR_IO;
197 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
198 "%zi of %u bytes at %u, trying once more\n",
199 written, len, off));
200 written = tdb_pwrite(tdb, (const char *)buf+written,
201 len-written, off+written);
203 if (written == -1) {
204 /* Ensure ecode is set for log fn. */
205 tdb->ecode = TDB_ERR_IO;
206 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %u "
207 "len=%u (%s)\n", off, len, strerror(errno)));
208 return -1;
209 } else if (written != (ssize_t)len) {
210 tdb->ecode = TDB_ERR_IO;
211 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
212 "write %u bytes at %u in two attempts\n",
213 len, off));
214 return -1;
216 #endif
218 return 0;
221 /* Endian conversion: we only ever deal with 4 byte quantities */
222 void *tdb_convert(void *buf, uint32_t size)
224 uint32_t i, *p = (uint32_t *)buf;
225 for (i = 0; i < size / 4; i++)
226 p[i] = TDB_BYTEREV(p[i]);
227 return buf;
231 /* read a lump of data at a specified offset, maybe convert */
232 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
233 tdb_len_t len, int cv)
235 if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0) {
236 return -1;
239 if (tdb->map_ptr) {
240 memcpy(buf, off + (char *)tdb->map_ptr, len);
241 } else {
242 #ifdef HAVE_INCOHERENT_MMAP
243 tdb->ecode = TDB_ERR_IO;
244 return -1;
245 #else
246 ssize_t ret;
248 ret = tdb_pread(tdb, buf, len, off);
249 if (ret != (ssize_t)len) {
250 /* Ensure ecode is set for log fn. */
251 tdb->ecode = TDB_ERR_IO;
252 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %u "
253 "len=%u ret=%zi (%s) map_size=%u\n",
254 off, len, ret, strerror(errno),
255 tdb->map_size));
256 return -1;
258 #endif
260 if (cv) {
261 tdb_convert(buf, len);
263 return 0;
269 do an unlocked scan of the hash table heads to find the next non-zero head. The value
270 will then be confirmed with the lock held
272 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
274 uint32_t h = *chain;
275 if (tdb->map_ptr) {
276 for (;h < tdb->hash_size;h++) {
277 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
278 break;
281 } else {
282 uint32_t off=0;
283 for (;h < tdb->hash_size;h++) {
284 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
285 break;
289 (*chain) = h;
293 int tdb_munmap(struct tdb_context *tdb)
295 if (tdb->flags & TDB_INTERNAL)
296 return 0;
298 #ifdef HAVE_MMAP
299 if (tdb->map_ptr) {
300 int ret;
302 ret = munmap(tdb->map_ptr, tdb->map_size);
303 if (ret != 0)
304 return ret;
306 #endif
307 tdb->map_ptr = NULL;
308 return 0;
311 /* If mmap isn't coherent, *everyone* must always mmap. */
312 static bool should_mmap(const struct tdb_context *tdb)
314 #ifdef HAVE_INCOHERENT_MMAP
315 return true;
316 #else
317 return !(tdb->flags & TDB_NOMMAP);
318 #endif
321 int tdb_mmap(struct tdb_context *tdb)
323 if (tdb->flags & TDB_INTERNAL)
324 return 0;
326 #ifdef HAVE_MMAP
327 if (should_mmap(tdb)) {
328 tdb->map_ptr = mmap(NULL, tdb->map_size,
329 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
330 MAP_SHARED|MAP_FILE, tdb->fd,
331 tdb->hdr_ofs);
334 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
337 if (tdb->map_ptr == MAP_FAILED) {
338 tdb->map_ptr = NULL;
339 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %u (%s)\n",
340 tdb->map_size, strerror(errno)));
341 #ifdef HAVE_INCOHERENT_MMAP
342 tdb->ecode = TDB_ERR_IO;
343 return -1;
344 #endif
346 } else {
347 tdb->map_ptr = NULL;
349 #else
350 tdb->map_ptr = NULL;
351 #endif
352 return 0;
355 /* expand a file. we prefer to use ftruncate, as that is what posix
356 says to use for mmap expansion */
357 static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
359 char buf[8192];
360 tdb_off_t new_size;
362 if (tdb->read_only || tdb->traverse_read) {
363 tdb->ecode = TDB_ERR_RDONLY;
364 return -1;
367 if (!tdb_add_off_t(size, addition, &new_size)) {
368 tdb->ecode = TDB_ERR_OOM;
369 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
370 "overflow detected current size[%u] addition[%u]!\n",
371 (unsigned)size, (unsigned)addition));
372 errno = ENOSPC;
373 return -1;
376 if (tdb_ftruncate(tdb, new_size) == -1) {
377 char b = 0;
378 ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
379 if (written == 0) {
380 /* try once more, potentially revealing errno */
381 written = tdb_pwrite(tdb, &b, 1, new_size - 1);
383 if (written == 0) {
384 /* again - give up, guessing errno */
385 errno = ENOSPC;
387 if (written != 1) {
388 tdb->ecode = TDB_ERR_OOM;
389 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %u failed (%s)\n",
390 (unsigned)new_size, strerror(errno)));
391 return -1;
395 /* now fill the file with something. This ensures that the
396 file isn't sparse, which would be very bad if we ran out of
397 disk. This must be done with write, not via mmap */
398 memset(buf, TDB_PAD_BYTE, sizeof(buf));
399 while (addition) {
400 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
401 ssize_t written = tdb_pwrite(tdb, buf, n, size);
402 if (written == 0) {
403 /* prevent infinite loops: try _once_ more */
404 written = tdb_pwrite(tdb, buf, n, size);
406 if (written == 0) {
407 /* give up, trying to provide a useful errno */
408 tdb->ecode = TDB_ERR_OOM;
409 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
410 "returned 0 twice: giving up!\n"));
411 errno = ENOSPC;
412 return -1;
414 if (written == -1) {
415 tdb->ecode = TDB_ERR_OOM;
416 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
417 "%u bytes failed (%s)\n", (int)n,
418 strerror(errno)));
419 return -1;
421 if (written != n) {
422 TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
423 "only %zu of %zi bytes - retrying\n", written,
424 n));
426 addition -= written;
427 size += written;
429 return 0;
433 /* You need 'size', this tells you how much you should expand by. */
434 tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size)
436 tdb_off_t new_size, top_size, increment;
437 tdb_off_t max_size = UINT32_MAX - map_size;
439 if (size > max_size) {
441 * We can't round up anymore, just give back
442 * what we're asked for.
444 * The caller has to take care of the ENOSPC handling.
446 return size;
449 /* limit size in order to avoid using up huge amounts of memory for
450 * in memory tdbs if an oddball huge record creeps in */
451 if (size > 100 * 1024) {
452 increment = size * 2;
453 } else {
454 increment = size * 100;
456 if (increment < size) {
457 goto overflow;
460 if (!tdb_add_off_t(map_size, increment, &top_size)) {
461 goto overflow;
464 /* always make room for at least top_size more records, and at
465 least 25% more space. if the DB is smaller than 100MiB,
466 otherwise grow it by 10% only. */
467 if (map_size > 100 * 1024 * 1024) {
468 new_size = map_size * 1.10;
469 } else {
470 new_size = map_size * 1.25;
472 if (new_size < map_size) {
473 goto overflow;
476 /* Round the database up to a multiple of the page size */
477 new_size = MAX(top_size, new_size);
479 if (new_size + page_size < new_size) {
480 /* There's a "+" in TDB_ALIGN that might overflow... */
481 goto overflow;
484 return TDB_ALIGN(new_size, page_size) - map_size;
486 overflow:
488 * Somewhere in between we went over 4GB. Make one big jump to
489 * exactly 4GB database size.
491 return max_size;
494 /* expand the database at least size bytes by expanding the underlying
495 file and doing the mmap again if necessary */
496 int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
498 struct tdb_record rec;
499 tdb_off_t offset;
500 tdb_off_t new_size;
502 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
503 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
504 return -1;
507 /* must know about any previous expansions by another process */
508 tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
511 * Note: that we don't care about tdb->hdr_ofs != 0 here
513 * The 4GB limitation is just related to tdb->map_size
514 * and the offset calculation in the records.
516 * The file on disk can be up to 4GB + tdb->hdr_ofs
518 size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
520 if (!tdb_add_off_t(tdb->map_size, size, &new_size)) {
521 tdb->ecode = TDB_ERR_OOM;
522 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_expand "
523 "overflow detected current map_size[%u] size[%u]!\n",
524 (unsigned)tdb->map_size, (unsigned)size));
525 goto fail;
528 /* form a new freelist record */
529 offset = tdb->map_size;
530 memset(&rec,'\0',sizeof(rec));
531 rec.rec_len = size - sizeof(rec);
533 if (tdb->flags & TDB_INTERNAL) {
534 char *new_map_ptr;
536 new_map_ptr = (char *)realloc(tdb->map_ptr, new_size);
537 if (!new_map_ptr) {
538 tdb->ecode = TDB_ERR_OOM;
539 goto fail;
541 tdb->map_ptr = new_map_ptr;
542 tdb->map_size = new_size;
543 } else {
544 int ret;
547 * expand the file itself
549 ret = tdb->methods->tdb_expand_file(tdb, tdb->map_size, size);
550 if (ret != 0) {
551 goto fail;
554 /* Explicitly remap: if we're in a transaction, this won't
555 * happen automatically! */
556 tdb_munmap(tdb);
557 tdb->map_size = new_size;
558 if (tdb_mmap(tdb) != 0) {
559 goto fail;
563 /* link it into the free list */
564 if (tdb_free(tdb, offset, &rec) == -1)
565 goto fail;
567 tdb_unlock(tdb, -1, F_WRLCK);
568 return 0;
569 fail:
570 tdb_unlock(tdb, -1, F_WRLCK);
571 return -1;
574 /* read/write a tdb_off_t */
575 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
577 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
580 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
582 tdb_off_t off = *d;
583 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
587 /* read a lump of data, allocating the space for it */
588 unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
590 unsigned char *buf;
592 /* some systems don't like zero length malloc */
594 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
595 /* Ensure ecode is set for log fn. */
596 tdb->ecode = TDB_ERR_OOM;
597 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%u (%s)\n",
598 len, strerror(errno)));
599 return NULL;
601 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
602 SAFE_FREE(buf);
603 return NULL;
605 return buf;
608 /* Give a piece of tdb data to a parser */
610 int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
611 tdb_off_t offset, tdb_len_t len,
612 int (*parser)(TDB_DATA key, TDB_DATA data,
613 void *private_data),
614 void *private_data)
616 TDB_DATA data;
617 int result;
619 data.dsize = len;
621 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
623 * Optimize by avoiding the malloc/memcpy/free, point the
624 * parser directly at the mmap area.
626 if (tdb->methods->tdb_oob(tdb, offset, len, 0) != 0) {
627 return -1;
629 data.dptr = offset + (unsigned char *)tdb->map_ptr;
630 return parser(key, data, private_data);
633 if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
634 return -1;
637 result = parser(key, data, private_data);
638 free(data.dptr);
639 return result;
642 /* read/write a record */
643 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
645 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
646 return -1;
647 if (TDB_BAD_MAGIC(rec)) {
648 /* Ensure ecode is set for log fn. */
649 tdb->ecode = TDB_ERR_CORRUPT;
650 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%u\n", rec->magic, offset));
651 return -1;
653 return tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0);
656 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
658 struct tdb_record r = *rec;
659 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
662 static const struct tdb_methods io_methods = {
663 tdb_read,
664 tdb_write,
665 tdb_next_hash_chain,
666 tdb_oob,
667 tdb_expand_file,
671 initialise the default methods table
673 void tdb_io_init(struct tdb_context *tdb)
675 tdb->methods = &io_methods;