2 Trivial Database 2: fetch, store and misc routines.
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #ifndef HAVE_LIBREPLACE
20 #include <ccan/asprintf/asprintf.h>
24 static enum NTDB_ERROR
update_rec_hdr(struct ntdb_context
*ntdb
,
28 struct ntdb_used_record
*rec
)
30 uint64_t dataroom
= rec_data_length(rec
) + rec_extra_padding(rec
);
31 enum NTDB_ERROR ecode
;
33 ecode
= set_header(ntdb
, rec
, NTDB_USED_MAGIC
, keylen
, datalen
,
35 if (ecode
== NTDB_SUCCESS
) {
36 ecode
= ntdb_write_convert(ntdb
, off
, rec
, sizeof(*rec
));
41 static enum NTDB_ERROR
replace_data(struct ntdb_context
*ntdb
,
43 NTDB_DATA key
, NTDB_DATA dbuf
,
44 ntdb_off_t old_off
, ntdb_len_t old_room
,
48 enum NTDB_ERROR ecode
;
50 /* Allocate a new record. */
51 new_off
= alloc(ntdb
, key
.dsize
, dbuf
.dsize
, NTDB_USED_MAGIC
, growing
);
52 if (NTDB_OFF_IS_ERR(new_off
)) {
53 return NTDB_OFF_TO_ERR(new_off
);
56 /* We didn't like the existing one: remove it. */
59 ecode
= add_free_record(ntdb
, old_off
,
60 sizeof(struct ntdb_used_record
)
61 + key
.dsize
+ old_room
,
62 NTDB_LOCK_WAIT
, true);
63 if (ecode
== NTDB_SUCCESS
)
64 ecode
= replace_in_hash(ntdb
, h
, new_off
);
66 ecode
= add_to_hash(ntdb
, h
, new_off
);
68 if (ecode
!= NTDB_SUCCESS
) {
72 new_off
+= sizeof(struct ntdb_used_record
);
73 ecode
= ntdb
->io
->twrite(ntdb
, new_off
, key
.dptr
, key
.dsize
);
74 if (ecode
!= NTDB_SUCCESS
) {
79 ecode
= ntdb
->io
->twrite(ntdb
, new_off
, dbuf
.dptr
, dbuf
.dsize
);
80 if (ecode
!= NTDB_SUCCESS
) {
84 if (ntdb
->flags
& NTDB_SEQNUM
)
85 ntdb_inc_seqnum(ntdb
);
90 static enum NTDB_ERROR
update_data(struct ntdb_context
*ntdb
,
95 enum NTDB_ERROR ecode
;
97 ecode
= ntdb
->io
->twrite(ntdb
, off
, dbuf
.dptr
, dbuf
.dsize
);
98 if (ecode
== NTDB_SUCCESS
&& extra
) {
99 /* Put a zero in; future versions may append other data. */
100 ecode
= ntdb
->io
->twrite(ntdb
, off
+ dbuf
.dsize
, "", 1);
102 if (ntdb
->flags
& NTDB_SEQNUM
)
103 ntdb_inc_seqnum(ntdb
);
108 _PUBLIC_
enum NTDB_ERROR
ntdb_store(struct ntdb_context
*ntdb
,
109 NTDB_DATA key
, NTDB_DATA dbuf
, int flag
)
113 ntdb_len_t old_room
= 0;
114 struct ntdb_used_record rec
;
115 enum NTDB_ERROR ecode
;
117 off
= find_and_lock(ntdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
118 if (NTDB_OFF_IS_ERR(off
)) {
119 return NTDB_OFF_TO_ERR(off
);
122 /* Now we have lock on this hash bucket. */
123 if (flag
== NTDB_INSERT
) {
125 ecode
= NTDB_ERR_EXISTS
;
130 old_room
= rec_data_length(&rec
)
131 + rec_extra_padding(&rec
);
132 if (old_room
>= dbuf
.dsize
) {
133 /* Can modify in-place. Easy! */
134 ecode
= update_rec_hdr(ntdb
, off
,
135 key
.dsize
, dbuf
.dsize
,
137 if (ecode
!= NTDB_SUCCESS
) {
140 ecode
= update_data(ntdb
,
143 old_room
- dbuf
.dsize
);
144 if (ecode
!= NTDB_SUCCESS
) {
147 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
151 if (flag
== NTDB_MODIFY
) {
152 /* if the record doesn't exist and we
153 are in NTDB_MODIFY mode then we should fail
155 ecode
= NTDB_ERR_NOEXIST
;
161 /* If we didn't use the old record, this implies we're growing. */
162 ecode
= replace_data(ntdb
, &h
, key
, dbuf
, off
, old_room
, off
);
164 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
168 _PUBLIC_
enum NTDB_ERROR
ntdb_append(struct ntdb_context
*ntdb
,
169 NTDB_DATA key
, NTDB_DATA dbuf
)
173 struct ntdb_used_record rec
;
174 ntdb_len_t old_room
= 0, old_dlen
;
175 unsigned char *newdata
;
177 enum NTDB_ERROR ecode
;
179 off
= find_and_lock(ntdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
180 if (NTDB_OFF_IS_ERR(off
)) {
181 return NTDB_OFF_TO_ERR(off
);
185 old_dlen
= rec_data_length(&rec
);
186 old_room
= old_dlen
+ rec_extra_padding(&rec
);
188 /* Fast path: can append in place. */
189 if (rec_extra_padding(&rec
) >= dbuf
.dsize
) {
190 ecode
= update_rec_hdr(ntdb
, off
, key
.dsize
,
191 old_dlen
+ dbuf
.dsize
, &rec
);
192 if (ecode
!= NTDB_SUCCESS
) {
196 off
+= sizeof(rec
) + key
.dsize
+ old_dlen
;
197 ecode
= update_data(ntdb
, off
, dbuf
,
198 rec_extra_padding(&rec
));
203 newdata
= ntdb
->alloc_fn(ntdb
, key
.dsize
+ old_dlen
+ dbuf
.dsize
,
206 ecode
= ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
208 " failed to allocate %zu bytes",
209 (size_t)(key
.dsize
+ old_dlen
213 ecode
= ntdb
->io
->tread(ntdb
, off
+ sizeof(rec
) + key
.dsize
,
215 if (ecode
!= NTDB_SUCCESS
) {
216 goto out_free_newdata
;
218 memcpy(newdata
+ old_dlen
, dbuf
.dptr
, dbuf
.dsize
);
219 new_dbuf
.dptr
= newdata
;
220 new_dbuf
.dsize
= old_dlen
+ dbuf
.dsize
;
226 /* If they're using ntdb_append(), it implies they're growing record. */
227 ecode
= replace_data(ntdb
, &h
, key
, new_dbuf
, off
, old_room
, true);
230 ntdb
->free_fn(newdata
, ntdb
->alloc_data
);
232 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
236 _PUBLIC_
enum NTDB_ERROR
ntdb_fetch(struct ntdb_context
*ntdb
, NTDB_DATA key
,
240 struct ntdb_used_record rec
;
242 enum NTDB_ERROR ecode
;
245 off
= find_and_lock(ntdb
, key
, F_RDLCK
, &h
, &rec
, &keyp
);
246 if (NTDB_OFF_IS_ERR(off
)) {
247 return NTDB_OFF_TO_ERR(off
);
251 ecode
= NTDB_ERR_NOEXIST
;
253 data
->dsize
= rec_data_length(&rec
);
254 data
->dptr
= ntdb
->alloc_fn(ntdb
, data
->dsize
, ntdb
->alloc_data
);
255 if (unlikely(!data
->dptr
)) {
256 ecode
= NTDB_ERR_OOM
;
258 memcpy(data
->dptr
, keyp
+ key
.dsize
, data
->dsize
);
259 ecode
= NTDB_SUCCESS
;
261 ntdb_access_release(ntdb
, keyp
);
264 ntdb_unlock_hash(ntdb
, h
.h
, F_RDLCK
);
268 _PUBLIC_
bool ntdb_exists(struct ntdb_context
*ntdb
, NTDB_DATA key
)
271 struct ntdb_used_record rec
;
274 off
= find_and_lock(ntdb
, key
, F_RDLCK
, &h
, &rec
, NULL
);
275 if (NTDB_OFF_IS_ERR(off
)) {
278 ntdb_unlock_hash(ntdb
, h
.h
, F_RDLCK
);
280 return off
? true : false;
283 _PUBLIC_
enum NTDB_ERROR
ntdb_delete(struct ntdb_context
*ntdb
, NTDB_DATA key
)
286 struct ntdb_used_record rec
;
288 enum NTDB_ERROR ecode
;
290 off
= find_and_lock(ntdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
291 if (NTDB_OFF_IS_ERR(off
)) {
292 return NTDB_OFF_TO_ERR(off
);
296 ecode
= NTDB_ERR_NOEXIST
;
300 ecode
= delete_from_hash(ntdb
, &h
);
301 if (ecode
!= NTDB_SUCCESS
) {
305 /* Free the deleted entry. */
307 ecode
= add_free_record(ntdb
, off
,
308 sizeof(struct ntdb_used_record
)
309 + rec_key_length(&rec
)
310 + rec_data_length(&rec
)
311 + rec_extra_padding(&rec
),
312 NTDB_LOCK_WAIT
, true);
314 if (ntdb
->flags
& NTDB_SEQNUM
)
315 ntdb_inc_seqnum(ntdb
);
318 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
322 _PUBLIC_
unsigned int ntdb_get_flags(struct ntdb_context
*ntdb
)
327 static bool inside_transaction(const struct ntdb_context
*ntdb
)
329 return ntdb
->transaction
!= NULL
;
332 static bool readonly_changable(struct ntdb_context
*ntdb
, const char *caller
)
334 if (inside_transaction(ntdb
)) {
335 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
337 " NTDB_RDONLY inside transaction",
344 _PUBLIC_
void ntdb_add_flag(struct ntdb_context
*ntdb
, unsigned flag
)
346 if (ntdb
->flags
& NTDB_INTERNAL
) {
347 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
348 "ntdb_add_flag: internal db");
353 ntdb
->flags
|= NTDB_NOLOCK
;
356 if (ntdb
->file
->direct_count
) {
357 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
358 "ntdb_add_flag: Can't get NTDB_NOMMAP from"
359 " ntdb_parse_record!");
362 ntdb
->flags
|= NTDB_NOMMAP
;
363 #ifndef HAVE_INCOHERENT_MMAP
368 ntdb
->flags
|= NTDB_NOSYNC
;
371 ntdb
->flags
|= NTDB_SEQNUM
;
373 case NTDB_ALLOW_NESTING
:
374 ntdb
->flags
|= NTDB_ALLOW_NESTING
;
377 if (readonly_changable(ntdb
, "ntdb_add_flag"))
378 ntdb
->flags
|= NTDB_RDONLY
;
381 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
382 "ntdb_add_flag: Unknown flag %u", flag
);
386 _PUBLIC_
void ntdb_remove_flag(struct ntdb_context
*ntdb
, unsigned flag
)
388 if (ntdb
->flags
& NTDB_INTERNAL
) {
389 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
390 "ntdb_remove_flag: internal db");
395 ntdb
->flags
&= ~NTDB_NOLOCK
;
398 ntdb
->flags
&= ~NTDB_NOMMAP
;
399 #ifndef HAVE_INCOHERENT_MMAP
400 /* If mmap incoherent, we were mmaping anyway. */
405 ntdb
->flags
&= ~NTDB_NOSYNC
;
408 ntdb
->flags
&= ~NTDB_SEQNUM
;
410 case NTDB_ALLOW_NESTING
:
411 ntdb
->flags
&= ~NTDB_ALLOW_NESTING
;
414 if ((ntdb
->open_flags
& O_ACCMODE
) == O_RDONLY
) {
415 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
416 "ntdb_remove_flag: can't"
417 " remove NTDB_RDONLY on ntdb"
418 " opened with O_RDONLY");
421 if (readonly_changable(ntdb
, "ntdb_remove_flag"))
422 ntdb
->flags
&= ~NTDB_RDONLY
;
425 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
426 "ntdb_remove_flag: Unknown flag %u",
431 _PUBLIC_
const char *ntdb_errorstr(enum NTDB_ERROR ecode
)
433 /* Gcc warns if you miss a case in the switch, so use that. */
434 switch (NTDB_ERR_TO_OFF(ecode
)) {
435 case NTDB_ERR_TO_OFF(NTDB_SUCCESS
): return "Success";
436 case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT
): return "Corrupt database";
437 case NTDB_ERR_TO_OFF(NTDB_ERR_IO
): return "IO Error";
438 case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK
): return "Locking error";
439 case NTDB_ERR_TO_OFF(NTDB_ERR_OOM
): return "Out of memory";
440 case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS
): return "Record exists";
441 case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL
): return "Invalid parameter";
442 case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST
): return "Record does not exist";
443 case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY
): return "write not permitted";
445 return "Invalid error code";
448 enum NTDB_ERROR COLD
ntdb_logerr(struct ntdb_context
*ntdb
,
449 enum NTDB_ERROR ecode
,
450 enum ntdb_log_level level
,
451 const char *fmt
, ...)
456 /* ntdb_open paths care about errno, so save it. */
457 int saved_errno
= errno
;
463 len
= vsnprintf(NULL
, 0, fmt
, ap
);
466 message
= ntdb
->alloc_fn(ntdb
, len
+ 1, ntdb
->alloc_data
);
468 ntdb
->log_fn(ntdb
, NTDB_LOG_ERROR
, NTDB_ERR_OOM
,
469 "out of memory formatting message:", ntdb
->log_data
);
470 ntdb
->log_fn(ntdb
, level
, ecode
, fmt
, ntdb
->log_data
);
473 vsnprintf(message
, len
+1, fmt
, ap
);
475 ntdb
->log_fn(ntdb
, level
, ecode
, message
, ntdb
->log_data
);
476 ntdb
->free_fn(message
, ntdb
->alloc_data
);
482 _PUBLIC_
enum NTDB_ERROR
ntdb_parse_record_(struct ntdb_context
*ntdb
,
484 enum NTDB_ERROR (*parse
)(NTDB_DATA k
,
490 struct ntdb_used_record rec
;
492 enum NTDB_ERROR ecode
;
495 off
= find_and_lock(ntdb
, key
, F_RDLCK
, &h
, &rec
, &keyp
);
496 if (NTDB_OFF_IS_ERR(off
)) {
497 return NTDB_OFF_TO_ERR(off
);
501 ecode
= NTDB_ERR_NOEXIST
;
503 unsigned int old_flags
;
504 NTDB_DATA d
= ntdb_mkdata(keyp
+ key
.dsize
,
505 rec_data_length(&rec
));
508 * Make sure they don't try to write db, since they
509 * have read lock! They can if they've done
510 * ntdb_lockall(): if it was ntdb_lockall_read, that'll
511 * stop them doing a write operation anyway.
513 old_flags
= ntdb
->flags
;
514 if (!ntdb
->file
->allrecord_lock
.count
&&
515 !(ntdb
->flags
& NTDB_NOLOCK
)) {
516 ntdb
->flags
|= NTDB_RDONLY
;
518 ecode
= parse(key
, d
, data
);
519 ntdb
->flags
= old_flags
;
520 ntdb_access_release(ntdb
, keyp
);
523 ntdb_unlock_hash(ntdb
, h
.h
, F_RDLCK
);
527 _PUBLIC_
const char *ntdb_name(const struct ntdb_context
*ntdb
)
532 _PUBLIC_
int64_t ntdb_get_seqnum(struct ntdb_context
*ntdb
)
534 return ntdb_read_off(ntdb
, offsetof(struct ntdb_header
, seqnum
));
538 _PUBLIC_
int ntdb_fd(const struct ntdb_context
*ntdb
)
540 return ntdb
->file
->fd
;
543 struct traverse_state
{
544 enum NTDB_ERROR error
;
545 struct ntdb_context
*dest_db
;
549 traverse function for repacking
551 static int repack_traverse(struct ntdb_context
*ntdb
, NTDB_DATA key
, NTDB_DATA data
,
552 struct traverse_state
*state
)
554 state
->error
= ntdb_store(state
->dest_db
, key
, data
, NTDB_INSERT
);
555 if (state
->error
!= NTDB_SUCCESS
) {
561 _PUBLIC_
enum NTDB_ERROR
ntdb_repack(struct ntdb_context
*ntdb
)
563 struct ntdb_context
*tmp_db
;
564 struct traverse_state state
;
566 state
.error
= ntdb_transaction_start(ntdb
);
567 if (state
.error
!= NTDB_SUCCESS
) {
571 tmp_db
= ntdb_open("tmpdb", NTDB_INTERNAL
, O_RDWR
|O_CREAT
, 0, NULL
);
572 if (tmp_db
== NULL
) {
573 state
.error
= ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
575 " Failed to create tmp_db");
576 ntdb_transaction_cancel(ntdb
);
580 state
.dest_db
= tmp_db
;
581 if (ntdb_traverse(ntdb
, repack_traverse
, &state
) < 0) {
585 state
.error
= ntdb_wipe_all(ntdb
);
586 if (state
.error
!= NTDB_SUCCESS
) {
590 state
.dest_db
= ntdb
;
591 if (ntdb_traverse(tmp_db
, repack_traverse
, &state
) < 0) {
596 return ntdb_transaction_commit(ntdb
);
599 ntdb_transaction_cancel(ntdb
);