2 Trivial Database 2: fetch, store and misc routines.
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #ifndef HAVE_LIBREPLACE
20 #include <ccan/asprintf/asprintf.h>
24 static enum TDB_ERROR
update_rec_hdr(struct tdb_context
*tdb
,
28 struct tdb_used_record
*rec
,
31 uint64_t dataroom
= rec_data_length(rec
) + rec_extra_padding(rec
);
34 ecode
= set_header(tdb
, rec
, TDB_USED_MAGIC
, keylen
, datalen
,
35 keylen
+ dataroom
, h
);
36 if (ecode
== TDB_SUCCESS
) {
37 ecode
= tdb_write_convert(tdb
, off
, rec
, sizeof(*rec
));
42 static enum TDB_ERROR
replace_data(struct tdb_context
*tdb
,
44 struct tdb_data key
, struct tdb_data dbuf
,
45 tdb_off_t old_off
, tdb_len_t old_room
,
51 /* Allocate a new record. */
52 new_off
= alloc(tdb
, key
.dsize
, dbuf
.dsize
, h
->h
, TDB_USED_MAGIC
,
54 if (TDB_OFF_IS_ERR(new_off
)) {
55 return TDB_OFF_TO_ERR(new_off
);
58 /* We didn't like the existing one: remove it. */
61 ecode
= add_free_record(tdb
, old_off
,
62 sizeof(struct tdb_used_record
)
63 + key
.dsize
+ old_room
,
65 if (ecode
== TDB_SUCCESS
)
66 ecode
= replace_in_hash(tdb
, h
, new_off
);
68 ecode
= add_to_hash(tdb
, h
, new_off
);
70 if (ecode
!= TDB_SUCCESS
) {
74 new_off
+= sizeof(struct tdb_used_record
);
75 ecode
= tdb
->tdb2
.io
->twrite(tdb
, new_off
, key
.dptr
, key
.dsize
);
76 if (ecode
!= TDB_SUCCESS
) {
81 ecode
= tdb
->tdb2
.io
->twrite(tdb
, new_off
, dbuf
.dptr
, dbuf
.dsize
);
82 if (ecode
!= TDB_SUCCESS
) {
86 if (tdb
->flags
& TDB_SEQNUM
)
92 static enum TDB_ERROR
update_data(struct tdb_context
*tdb
,
99 ecode
= tdb
->tdb2
.io
->twrite(tdb
, off
, dbuf
.dptr
, dbuf
.dsize
);
100 if (ecode
== TDB_SUCCESS
&& extra
) {
101 /* Put a zero in; future versions may append other data. */
102 ecode
= tdb
->tdb2
.io
->twrite(tdb
, off
+ dbuf
.dsize
, "", 1);
104 if (tdb
->flags
& TDB_SEQNUM
)
110 _PUBLIC_
enum TDB_ERROR
tdb_store(struct tdb_context
*tdb
,
111 struct tdb_data key
, struct tdb_data dbuf
, int flag
)
115 tdb_len_t old_room
= 0;
116 struct tdb_used_record rec
;
117 enum TDB_ERROR ecode
;
119 if (tdb
->flags
& TDB_VERSION1
) {
120 if (tdb1_store(tdb
, key
, dbuf
, flag
) == -1)
121 return tdb
->last_error
;
125 off
= find_and_lock(tdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
126 if (TDB_OFF_IS_ERR(off
)) {
127 return tdb
->last_error
= TDB_OFF_TO_ERR(off
);
130 /* Now we have lock on this hash bucket. */
131 if (flag
== TDB_INSERT
) {
133 ecode
= TDB_ERR_EXISTS
;
138 old_room
= rec_data_length(&rec
)
139 + rec_extra_padding(&rec
);
140 if (old_room
>= dbuf
.dsize
) {
141 /* Can modify in-place. Easy! */
142 ecode
= update_rec_hdr(tdb
, off
,
143 key
.dsize
, dbuf
.dsize
,
145 if (ecode
!= TDB_SUCCESS
) {
148 ecode
= update_data(tdb
,
151 old_room
- dbuf
.dsize
);
152 if (ecode
!= TDB_SUCCESS
) {
155 tdb_unlock_hashes(tdb
, h
.hlock_start
,
156 h
.hlock_range
, F_WRLCK
);
157 return tdb
->last_error
= TDB_SUCCESS
;
160 if (flag
== TDB_MODIFY
) {
161 /* if the record doesn't exist and we
162 are in TDB_MODIFY mode then we should fail
164 ecode
= TDB_ERR_NOEXIST
;
170 /* If we didn't use the old record, this implies we're growing. */
171 ecode
= replace_data(tdb
, &h
, key
, dbuf
, off
, old_room
, off
);
173 tdb_unlock_hashes(tdb
, h
.hlock_start
, h
.hlock_range
, F_WRLCK
);
174 return tdb
->last_error
= ecode
;
177 _PUBLIC_
enum TDB_ERROR
tdb_append(struct tdb_context
*tdb
,
178 struct tdb_data key
, struct tdb_data dbuf
)
182 struct tdb_used_record rec
;
183 tdb_len_t old_room
= 0, old_dlen
;
184 unsigned char *newdata
;
185 struct tdb_data new_dbuf
;
186 enum TDB_ERROR ecode
;
188 if (tdb
->flags
& TDB_VERSION1
) {
189 if (tdb1_append(tdb
, key
, dbuf
) == -1)
190 return tdb
->last_error
;
194 off
= find_and_lock(tdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
195 if (TDB_OFF_IS_ERR(off
)) {
196 return tdb
->last_error
= TDB_OFF_TO_ERR(off
);
200 old_dlen
= rec_data_length(&rec
);
201 old_room
= old_dlen
+ rec_extra_padding(&rec
);
203 /* Fast path: can append in place. */
204 if (rec_extra_padding(&rec
) >= dbuf
.dsize
) {
205 ecode
= update_rec_hdr(tdb
, off
, key
.dsize
,
206 old_dlen
+ dbuf
.dsize
, &rec
,
208 if (ecode
!= TDB_SUCCESS
) {
212 off
+= sizeof(rec
) + key
.dsize
+ old_dlen
;
213 ecode
= update_data(tdb
, off
, dbuf
,
214 rec_extra_padding(&rec
));
219 newdata
= malloc(key
.dsize
+ old_dlen
+ dbuf
.dsize
);
221 ecode
= tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_ERROR
,
223 " failed to allocate %zu bytes",
224 (size_t)(key
.dsize
+ old_dlen
228 ecode
= tdb
->tdb2
.io
->tread(tdb
, off
+ sizeof(rec
) + key
.dsize
,
230 if (ecode
!= TDB_SUCCESS
) {
231 goto out_free_newdata
;
233 memcpy(newdata
+ old_dlen
, dbuf
.dptr
, dbuf
.dsize
);
234 new_dbuf
.dptr
= newdata
;
235 new_dbuf
.dsize
= old_dlen
+ dbuf
.dsize
;
241 /* If they're using tdb_append(), it implies they're growing record. */
242 ecode
= replace_data(tdb
, &h
, key
, new_dbuf
, off
, old_room
, true);
247 tdb_unlock_hashes(tdb
, h
.hlock_start
, h
.hlock_range
, F_WRLCK
);
248 return tdb
->last_error
= ecode
;
251 _PUBLIC_
enum TDB_ERROR
tdb_fetch(struct tdb_context
*tdb
, struct tdb_data key
,
252 struct tdb_data
*data
)
255 struct tdb_used_record rec
;
257 enum TDB_ERROR ecode
;
259 if (tdb
->flags
& TDB_VERSION1
)
260 return tdb1_fetch(tdb
, key
, data
);
262 off
= find_and_lock(tdb
, key
, F_RDLCK
, &h
, &rec
, NULL
);
263 if (TDB_OFF_IS_ERR(off
)) {
264 return tdb
->last_error
= TDB_OFF_TO_ERR(off
);
268 ecode
= TDB_ERR_NOEXIST
;
270 data
->dsize
= rec_data_length(&rec
);
271 data
->dptr
= tdb_alloc_read(tdb
, off
+ sizeof(rec
) + key
.dsize
,
273 if (TDB_PTR_IS_ERR(data
->dptr
)) {
274 ecode
= TDB_PTR_ERR(data
->dptr
);
279 tdb_unlock_hashes(tdb
, h
.hlock_start
, h
.hlock_range
, F_RDLCK
);
280 return tdb
->last_error
= ecode
;
283 _PUBLIC_
bool tdb_exists(struct tdb_context
*tdb
, TDB_DATA key
)
286 struct tdb_used_record rec
;
289 if (tdb
->flags
& TDB_VERSION1
) {
290 return tdb1_exists(tdb
, key
);
293 off
= find_and_lock(tdb
, key
, F_RDLCK
, &h
, &rec
, NULL
);
294 if (TDB_OFF_IS_ERR(off
)) {
295 tdb
->last_error
= TDB_OFF_TO_ERR(off
);
298 tdb_unlock_hashes(tdb
, h
.hlock_start
, h
.hlock_range
, F_RDLCK
);
300 tdb
->last_error
= TDB_SUCCESS
;
301 return off
? true : false;
304 _PUBLIC_
enum TDB_ERROR
tdb_delete(struct tdb_context
*tdb
, struct tdb_data key
)
307 struct tdb_used_record rec
;
309 enum TDB_ERROR ecode
;
311 if (tdb
->flags
& TDB_VERSION1
) {
312 if (tdb1_delete(tdb
, key
) == -1)
313 return tdb
->last_error
;
317 off
= find_and_lock(tdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
318 if (TDB_OFF_IS_ERR(off
)) {
319 return tdb
->last_error
= TDB_OFF_TO_ERR(off
);
323 ecode
= TDB_ERR_NOEXIST
;
327 ecode
= delete_from_hash(tdb
, &h
);
328 if (ecode
!= TDB_SUCCESS
) {
332 /* Free the deleted entry. */
334 ecode
= add_free_record(tdb
, off
,
335 sizeof(struct tdb_used_record
)
336 + rec_key_length(&rec
)
337 + rec_data_length(&rec
)
338 + rec_extra_padding(&rec
),
339 TDB_LOCK_WAIT
, true);
341 if (tdb
->flags
& TDB_SEQNUM
)
345 tdb_unlock_hashes(tdb
, h
.hlock_start
, h
.hlock_range
, F_WRLCK
);
346 return tdb
->last_error
= ecode
;
349 _PUBLIC_
unsigned int tdb_get_flags(struct tdb_context
*tdb
)
354 static bool inside_transaction(const struct tdb_context
*tdb
)
356 if (tdb
->flags
& TDB_VERSION1
)
357 return tdb
->tdb1
.transaction
!= NULL
;
359 return tdb
->tdb2
.transaction
!= NULL
;
362 static bool readonly_changable(struct tdb_context
*tdb
, const char *caller
)
364 if (inside_transaction(tdb
)) {
365 tdb
->last_error
= tdb_logerr(tdb
, TDB_ERR_EINVAL
,
368 " TDB_RDONLY inside transaction",
375 _PUBLIC_
void tdb_add_flag(struct tdb_context
*tdb
, unsigned flag
)
377 if (tdb
->flags
& TDB_INTERNAL
) {
378 tdb
->last_error
= tdb_logerr(tdb
, TDB_ERR_EINVAL
,
380 "tdb_add_flag: internal db");
385 tdb
->flags
|= TDB_NOLOCK
;
388 tdb
->flags
|= TDB_NOMMAP
;
389 #ifndef HAVE_INCOHERENT_MMAP
390 tdb_munmap(tdb
->file
);
394 tdb
->flags
|= TDB_NOSYNC
;
397 tdb
->flags
|= TDB_SEQNUM
;
399 case TDB_ALLOW_NESTING
:
400 tdb
->flags
|= TDB_ALLOW_NESTING
;
403 if (readonly_changable(tdb
, "tdb_add_flag"))
404 tdb
->flags
|= TDB_RDONLY
;
407 tdb
->last_error
= tdb_logerr(tdb
, TDB_ERR_EINVAL
,
409 "tdb_add_flag: Unknown flag %u",
414 _PUBLIC_
void tdb_remove_flag(struct tdb_context
*tdb
, unsigned flag
)
416 if (tdb
->flags
& TDB_INTERNAL
) {
417 tdb
->last_error
= tdb_logerr(tdb
, TDB_ERR_EINVAL
,
419 "tdb_remove_flag: internal db");
424 tdb
->flags
&= ~TDB_NOLOCK
;
427 tdb
->flags
&= ~TDB_NOMMAP
;
428 #ifndef HAVE_INCOHERENT_MMAP
429 /* If mmap incoherent, we were mmaping anyway. */
434 tdb
->flags
&= ~TDB_NOSYNC
;
437 tdb
->flags
&= ~TDB_SEQNUM
;
439 case TDB_ALLOW_NESTING
:
440 tdb
->flags
&= ~TDB_ALLOW_NESTING
;
443 if ((tdb
->open_flags
& O_ACCMODE
) == O_RDONLY
) {
444 tdb
->last_error
= tdb_logerr(tdb
, TDB_ERR_EINVAL
,
446 "tdb_remove_flag: can't"
447 " remove TDB_RDONLY on tdb"
448 " opened with O_RDONLY");
451 if (readonly_changable(tdb
, "tdb_remove_flag"))
452 tdb
->flags
&= ~TDB_RDONLY
;
455 tdb
->last_error
= tdb_logerr(tdb
, TDB_ERR_EINVAL
,
457 "tdb_remove_flag: Unknown flag %u",
462 _PUBLIC_
const char *tdb_errorstr(enum TDB_ERROR ecode
)
464 /* Gcc warns if you miss a case in the switch, so use that. */
465 switch (TDB_ERR_TO_OFF(ecode
)) {
466 case TDB_ERR_TO_OFF(TDB_SUCCESS
): return "Success";
467 case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT
): return "Corrupt database";
468 case TDB_ERR_TO_OFF(TDB_ERR_IO
): return "IO Error";
469 case TDB_ERR_TO_OFF(TDB_ERR_LOCK
): return "Locking error";
470 case TDB_ERR_TO_OFF(TDB_ERR_OOM
): return "Out of memory";
471 case TDB_ERR_TO_OFF(TDB_ERR_EXISTS
): return "Record exists";
472 case TDB_ERR_TO_OFF(TDB_ERR_EINVAL
): return "Invalid parameter";
473 case TDB_ERR_TO_OFF(TDB_ERR_NOEXIST
): return "Record does not exist";
474 case TDB_ERR_TO_OFF(TDB_ERR_RDONLY
): return "write not permitted";
476 return "Invalid error code";
479 _PUBLIC_
enum TDB_ERROR
tdb_error(struct tdb_context
*tdb
)
481 return tdb
->last_error
;
484 enum TDB_ERROR COLD
tdb_logerr(struct tdb_context
*tdb
,
485 enum TDB_ERROR ecode
,
486 enum tdb_log_level level
,
487 const char *fmt
, ...)
492 /* tdb_open paths care about errno, so save it. */
493 int saved_errno
= errno
;
499 len
= vasprintf(&message
, fmt
, ap
);
503 tdb
->log_fn(tdb
, TDB_LOG_ERROR
, TDB_ERR_OOM
,
504 "out of memory formatting message:", tdb
->log_data
);
505 tdb
->log_fn(tdb
, level
, ecode
, fmt
, tdb
->log_data
);
507 tdb
->log_fn(tdb
, level
, ecode
, message
, tdb
->log_data
);
514 _PUBLIC_
enum TDB_ERROR
tdb_parse_record_(struct tdb_context
*tdb
,
516 enum TDB_ERROR (*parse
)(TDB_DATA k
,
522 struct tdb_used_record rec
;
524 enum TDB_ERROR ecode
;
526 if (tdb
->flags
& TDB_VERSION1
) {
527 return tdb
->last_error
= tdb1_parse_record(tdb
, key
, parse
,
531 off
= find_and_lock(tdb
, key
, F_RDLCK
, &h
, &rec
, NULL
);
532 if (TDB_OFF_IS_ERR(off
)) {
533 return tdb
->last_error
= TDB_OFF_TO_ERR(off
);
537 ecode
= TDB_ERR_NOEXIST
;
540 dptr
= tdb_access_read(tdb
, off
+ sizeof(rec
) + key
.dsize
,
541 rec_data_length(&rec
), false);
542 if (TDB_PTR_IS_ERR(dptr
)) {
543 ecode
= TDB_PTR_ERR(dptr
);
545 TDB_DATA d
= tdb_mkdata(dptr
, rec_data_length(&rec
));
547 ecode
= parse(key
, d
, data
);
548 tdb_access_release(tdb
, dptr
);
552 tdb_unlock_hashes(tdb
, h
.hlock_start
, h
.hlock_range
, F_RDLCK
);
553 return tdb
->last_error
= ecode
;
556 _PUBLIC_
const char *tdb_name(const struct tdb_context
*tdb
)
561 _PUBLIC_
int64_t tdb_get_seqnum(struct tdb_context
*tdb
)
565 if (tdb
->flags
& TDB_VERSION1
) {
567 tdb
->last_error
= TDB_SUCCESS
;
568 val
= tdb1_get_seqnum(tdb
);
570 if (tdb
->last_error
!= TDB_SUCCESS
)
571 return TDB_ERR_TO_OFF(tdb
->last_error
);
576 off
= tdb_read_off(tdb
, offsetof(struct tdb_header
, seqnum
));
577 if (TDB_OFF_IS_ERR(off
))
578 tdb
->last_error
= TDB_OFF_TO_ERR(off
);
580 tdb
->last_error
= TDB_SUCCESS
;
585 _PUBLIC_
int tdb_fd(const struct tdb_context
*tdb
)
587 return tdb
->file
->fd
;
590 struct traverse_state
{
591 enum TDB_ERROR error
;
592 struct tdb_context
*dest_db
;
596 traverse function for repacking
598 static int repack_traverse(struct tdb_context
*tdb
, TDB_DATA key
, TDB_DATA data
,
599 struct traverse_state
*state
)
601 state
->error
= tdb_store(state
->dest_db
, key
, data
, TDB_INSERT
);
602 if (state
->error
!= TDB_SUCCESS
) {
608 _PUBLIC_
enum TDB_ERROR
tdb_repack(struct tdb_context
*tdb
)
610 struct tdb_context
*tmp_db
;
611 struct traverse_state state
;
613 state
.error
= tdb_transaction_start(tdb
);
614 if (state
.error
!= TDB_SUCCESS
) {
618 tmp_db
= tdb_open("tmpdb", TDB_INTERNAL
, O_RDWR
|O_CREAT
, 0, NULL
);
619 if (tmp_db
== NULL
) {
620 state
.error
= tdb_logerr(tdb
, TDB_ERR_OOM
, TDB_LOG_ERROR
,
622 " Failed to create tmp_db");
623 tdb_transaction_cancel(tdb
);
624 return tdb
->last_error
= state
.error
;
627 state
.dest_db
= tmp_db
;
628 if (tdb_traverse(tdb
, repack_traverse
, &state
) < 0) {
632 state
.error
= tdb_wipe_all(tdb
);
633 if (state
.error
!= TDB_SUCCESS
) {
638 if (tdb_traverse(tmp_db
, repack_traverse
, &state
) < 0) {
643 return tdb_transaction_commit(tdb
);
646 tdb_transaction_cancel(tdb
);