s3:net_util: add some const to sockaddr_storage
[Samba/gebeck_regimport.git] / lib / tdb2 / io.c
blob8c5f45f30827c9e722227e3787797288713e056e
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
13 ** under the LGPL
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
34 if (file->fd == -1)
35 return;
37 if (file->map_ptr) {
38 munmap(file->map_ptr, file->map_size);
39 file->map_ptr = NULL;
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
46 return;
48 if (tdb->flags & TDB_NOMMAP)
49 return;
51 /* size_t can be smaller than off_t. */
52 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
53 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
54 tdb->mmap_flags,
55 MAP_SHARED, tdb->file->fd, 0);
56 } else
57 tdb->file->map_ptr = MAP_FAILED;
60 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
62 if (tdb->file->map_ptr == MAP_FAILED) {
63 tdb->file->map_ptr = NULL;
64 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
65 "tdb_mmap failed for size %lld (%s)",
66 (long long)tdb->file->map_size, strerror(errno));
70 /* check for an out of bounds access - if it is out of bounds then
71 see if the database has been expanded by someone else and expand
72 if necessary
73 note that "len" is the minimum length needed for the db
75 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
76 bool probe)
78 struct stat st;
79 enum TDB_ERROR ecode;
81 /* We can't hold pointers during this: we could unmap! */
82 assert(!tdb->direct_access
83 || (tdb->flags & TDB_NOLOCK)
84 || tdb_has_expansion_lock(tdb));
86 if (len <= tdb->file->map_size)
87 return 0;
88 if (tdb->flags & TDB_INTERNAL) {
89 if (!probe) {
90 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
91 "tdb_oob len %lld beyond internal"
92 " malloc size %lld",
93 (long long)len,
94 (long long)tdb->file->map_size);
96 return TDB_ERR_IO;
99 ecode = tdb_lock_expand(tdb, F_RDLCK);
100 if (ecode != TDB_SUCCESS) {
101 return ecode;
104 if (fstat(tdb->file->fd, &st) != 0) {
105 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
106 "Failed to fstat file: %s", strerror(errno));
107 tdb_unlock_expand(tdb, F_RDLCK);
108 return TDB_ERR_IO;
111 tdb_unlock_expand(tdb, F_RDLCK);
113 if (st.st_size < (size_t)len) {
114 if (!probe) {
115 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
116 "tdb_oob len %zu beyond eof at %zu",
117 (size_t)len, st.st_size);
119 return TDB_ERR_IO;
122 /* Unmap, update size, remap */
123 tdb_munmap(tdb->file);
125 tdb->file->map_size = st.st_size;
126 tdb_mmap(tdb);
127 return TDB_SUCCESS;
130 /* Endian conversion: we only ever deal with 8 byte quantities */
131 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
133 assert(size % 8 == 0);
134 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
135 uint64_t i, *p = (uint64_t *)buf;
136 for (i = 0; i < size / 8; i++)
137 p[i] = bswap_64(p[i]);
139 return buf;
142 /* Return first non-zero offset in offset array, or end, or -ve error. */
143 /* FIXME: Return the off? */
144 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
145 tdb_off_t base, uint64_t start, uint64_t end)
147 uint64_t i;
148 const uint64_t *val;
150 /* Zero vs non-zero is the same unconverted: minor optimization. */
151 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
152 (end - start) * sizeof(tdb_off_t), false);
153 if (TDB_PTR_IS_ERR(val)) {
154 return TDB_PTR_ERR(val);
157 for (i = 0; i < (end - start); i++) {
158 if (val[i])
159 break;
161 tdb_access_release(tdb, val);
162 return start + i;
165 /* Return first zero offset in num offset array, or num, or -ve error. */
166 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
167 uint64_t num)
169 uint64_t i;
170 const uint64_t *val;
172 /* Zero vs non-zero is the same unconverted: minor optimization. */
173 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
174 if (TDB_PTR_IS_ERR(val)) {
175 return TDB_PTR_ERR(val);
178 for (i = 0; i < num; i++) {
179 if (!val[i])
180 break;
182 tdb_access_release(tdb, val);
183 return i;
186 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
188 char buf[8192] = { 0 };
189 void *p = tdb->methods->direct(tdb, off, len, true);
190 enum TDB_ERROR ecode = TDB_SUCCESS;
192 assert(!tdb->read_only);
193 if (TDB_PTR_IS_ERR(p)) {
194 return TDB_PTR_ERR(p);
196 if (p) {
197 memset(p, 0, len);
198 return ecode;
200 while (len) {
201 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
202 ecode = tdb->methods->twrite(tdb, off, buf, todo);
203 if (ecode != TDB_SUCCESS) {
204 break;
206 len -= todo;
207 off += todo;
209 return ecode;
212 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
214 tdb_off_t ret;
215 enum TDB_ERROR ecode;
217 if (likely(!(tdb->flags & TDB_CONVERT))) {
218 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
219 false);
220 if (TDB_PTR_IS_ERR(p)) {
221 return TDB_PTR_ERR(p);
223 if (p)
224 return *p;
227 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
228 if (ecode != TDB_SUCCESS) {
229 return ecode;
231 return ret;
234 /* write a lump of data at a specified offset */
235 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
236 const void *buf, tdb_len_t len)
238 enum TDB_ERROR ecode;
240 if (tdb->read_only) {
241 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
242 "Write to read-only database");
245 ecode = tdb->methods->oob(tdb, off + len, 0);
246 if (ecode != TDB_SUCCESS) {
247 return ecode;
250 if (tdb->file->map_ptr) {
251 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
252 } else {
253 ssize_t ret;
254 ret = pwrite(tdb->file->fd, buf, len, off);
255 if (ret != len) {
256 /* This shouldn't happen: we avoid sparse files. */
257 if (ret >= 0)
258 errno = ENOSPC;
260 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
261 "tdb_write: %zi at %zu len=%zu (%s)",
262 ret, (size_t)off, (size_t)len,
263 strerror(errno));
266 return TDB_SUCCESS;
269 /* read a lump of data at a specified offset */
270 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
271 void *buf, tdb_len_t len)
273 enum TDB_ERROR ecode;
275 ecode = tdb->methods->oob(tdb, off + len, 0);
276 if (ecode != TDB_SUCCESS) {
277 return ecode;
280 if (tdb->file->map_ptr) {
281 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
282 } else {
283 ssize_t r = pread(tdb->file->fd, buf, len, off);
284 if (r != len) {
285 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
286 "tdb_read failed with %zi at %zu "
287 "len=%zu (%s) map_size=%zu",
288 r, (size_t)off, (size_t)len,
289 strerror(errno),
290 (size_t)tdb->file->map_size);
293 return TDB_SUCCESS;
296 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
297 const void *rec, size_t len)
299 enum TDB_ERROR ecode;
301 if (unlikely((tdb->flags & TDB_CONVERT))) {
302 void *conv = malloc(len);
303 if (!conv) {
304 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
305 "tdb_write: no memory converting"
306 " %zu bytes", len);
308 memcpy(conv, rec, len);
309 ecode = tdb->methods->twrite(tdb, off,
310 tdb_convert(tdb, conv, len), len);
311 free(conv);
312 } else {
313 ecode = tdb->methods->twrite(tdb, off, rec, len);
315 return ecode;
318 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
319 void *rec, size_t len)
321 enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
322 tdb_convert(tdb, rec, len);
323 return ecode;
326 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
327 tdb_off_t off, tdb_off_t val)
329 if (tdb->read_only) {
330 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
331 "Write to read-only database");
334 if (likely(!(tdb->flags & TDB_CONVERT))) {
335 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
336 true);
337 if (TDB_PTR_IS_ERR(p)) {
338 return TDB_PTR_ERR(p);
340 if (p) {
341 *p = val;
342 return TDB_SUCCESS;
345 return tdb_write_convert(tdb, off, &val, sizeof(val));
348 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
349 tdb_len_t len, unsigned int prefix)
351 unsigned char *buf;
352 enum TDB_ERROR ecode;
354 /* some systems don't like zero length malloc */
355 buf = malloc(prefix + len ? prefix + len : 1);
356 if (!buf) {
357 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
358 "tdb_alloc_read malloc failed len=%zu",
359 (size_t)(prefix + len));
360 return TDB_ERR_PTR(TDB_ERR_OOM);
361 } else {
362 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
363 if (unlikely(ecode != TDB_SUCCESS)) {
364 free(buf);
365 return TDB_ERR_PTR(ecode);
368 return buf;
371 /* read a lump of data, allocating the space for it */
372 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
374 return _tdb_alloc_read(tdb, offset, len, 0);
377 static enum TDB_ERROR fill(struct tdb_context *tdb,
378 const void *buf, size_t size,
379 tdb_off_t off, tdb_len_t len)
381 while (len) {
382 size_t n = len > size ? size : len;
383 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
384 if (ret != n) {
385 if (ret >= 0)
386 errno = ENOSPC;
388 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
389 "fill failed:"
390 " %zi at %zu len=%zu (%s)",
391 ret, (size_t)off, (size_t)len,
392 strerror(errno));
394 len -= n;
395 off += n;
397 return TDB_SUCCESS;
400 /* expand a file. we prefer to use ftruncate, as that is what posix
401 says to use for mmap expansion */
402 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
403 tdb_len_t addition)
405 char buf[8192];
406 enum TDB_ERROR ecode;
408 if (tdb->read_only) {
409 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
410 "Expand on read-only database");
413 if (tdb->flags & TDB_INTERNAL) {
414 char *new = realloc(tdb->file->map_ptr,
415 tdb->file->map_size + addition);
416 if (!new) {
417 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
418 "No memory to expand database");
420 tdb->file->map_ptr = new;
421 tdb->file->map_size += addition;
422 } else {
423 /* Unmap before trying to write; old TDB claimed OpenBSD had
424 * problem with this otherwise. */
425 tdb_munmap(tdb->file);
427 /* If this fails, we try to fill anyway. */
428 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
431 /* now fill the file with something. This ensures that the
432 file isn't sparse, which would be very bad if we ran out of
433 disk. This must be done with write, not via mmap */
434 memset(buf, 0x43, sizeof(buf));
435 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
436 addition);
437 if (ecode != TDB_SUCCESS)
438 return ecode;
439 tdb->file->map_size += addition;
440 tdb_mmap(tdb);
442 return TDB_SUCCESS;
445 const void *tdb_access_read(struct tdb_context *tdb,
446 tdb_off_t off, tdb_len_t len, bool convert)
448 void *ret = NULL;
450 if (likely(!(tdb->flags & TDB_CONVERT))) {
451 ret = tdb->methods->direct(tdb, off, len, false);
453 if (TDB_PTR_IS_ERR(ret)) {
454 return ret;
457 if (!ret) {
458 struct tdb_access_hdr *hdr;
459 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
460 if (TDB_PTR_IS_ERR(hdr)) {
461 return hdr;
463 hdr->next = tdb->access;
464 tdb->access = hdr;
465 ret = hdr + 1;
466 if (convert) {
467 tdb_convert(tdb, (void *)ret, len);
469 } else
470 tdb->direct_access++;
472 return ret;
475 void *tdb_access_write(struct tdb_context *tdb,
476 tdb_off_t off, tdb_len_t len, bool convert)
478 void *ret = NULL;
480 if (tdb->read_only) {
481 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
482 "Write to read-only database");
483 return TDB_ERR_PTR(TDB_ERR_RDONLY);
486 if (likely(!(tdb->flags & TDB_CONVERT))) {
487 ret = tdb->methods->direct(tdb, off, len, true);
489 if (TDB_PTR_IS_ERR(ret)) {
490 return ret;
494 if (!ret) {
495 struct tdb_access_hdr *hdr;
496 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
497 if (TDB_PTR_IS_ERR(hdr)) {
498 return hdr;
500 hdr->next = tdb->access;
501 tdb->access = hdr;
502 hdr->off = off;
503 hdr->len = len;
504 hdr->convert = convert;
505 ret = hdr + 1;
506 if (convert)
507 tdb_convert(tdb, (void *)ret, len);
508 } else
509 tdb->direct_access++;
511 return ret;
514 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
516 struct tdb_access_hdr **hp;
518 for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
519 if (*hp + 1 == p)
520 return hp;
522 return NULL;
525 void tdb_access_release(struct tdb_context *tdb, const void *p)
527 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
529 if (hp) {
530 hdr = *hp;
531 *hp = hdr->next;
532 free(hdr);
533 } else
534 tdb->direct_access--;
537 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
539 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
540 enum TDB_ERROR ecode;
542 if (hp) {
543 hdr = *hp;
544 if (hdr->convert)
545 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
546 else
547 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
548 *hp = hdr->next;
549 free(hdr);
550 } else {
551 tdb->direct_access--;
552 ecode = TDB_SUCCESS;
555 return ecode;
558 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
559 bool write_mode)
561 enum TDB_ERROR ecode;
563 if (unlikely(!tdb->file->map_ptr))
564 return NULL;
566 ecode = tdb_oob(tdb, off + len, true);
567 if (unlikely(ecode != TDB_SUCCESS))
568 return TDB_ERR_PTR(ecode);
569 return (char *)tdb->file->map_ptr + off;
572 void tdb_inc_seqnum(struct tdb_context *tdb)
574 tdb_off_t seq;
576 if (likely(!(tdb->flags & TDB_CONVERT))) {
577 int64_t *direct;
579 direct = tdb->methods->direct(tdb,
580 offsetof(struct tdb_header,
581 seqnum),
582 sizeof(*direct), true);
583 if (likely(direct)) {
584 /* Don't let it go negative, even briefly */
585 if (unlikely((*direct) + 1) < 0)
586 *direct = 0;
587 (*direct)++;
588 return;
592 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
593 if (!TDB_OFF_IS_ERR(seq)) {
594 seq++;
595 if (unlikely((int64_t)seq < 0))
596 seq = 0;
597 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
601 static const struct tdb_methods io_methods = {
602 tdb_read,
603 tdb_write,
604 tdb_oob,
605 tdb_expand_file,
606 tdb_direct,
610 initialise the default methods table
612 void tdb_io_init(struct tdb_context *tdb)
614 tdb->methods = &io_methods;