2 Trivial Database 2: fetch, store and misc routines.
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #ifndef HAVE_LIBREPLACE
23 static enum NTDB_ERROR
update_rec_hdr(struct ntdb_context
*ntdb
,
27 struct ntdb_used_record
*rec
)
29 uint64_t dataroom
= rec_data_length(rec
) + rec_extra_padding(rec
);
30 enum NTDB_ERROR ecode
;
32 ecode
= set_header(ntdb
, rec
, NTDB_USED_MAGIC
, keylen
, datalen
,
34 if (ecode
== NTDB_SUCCESS
) {
35 ecode
= ntdb_write_convert(ntdb
, off
, rec
, sizeof(*rec
));
40 static enum NTDB_ERROR
replace_data(struct ntdb_context
*ntdb
,
42 NTDB_DATA key
, NTDB_DATA dbuf
,
43 ntdb_off_t old_off
, ntdb_len_t old_room
,
47 enum NTDB_ERROR ecode
;
49 /* Allocate a new record. */
50 new_off
= alloc(ntdb
, key
.dsize
, dbuf
.dsize
, NTDB_USED_MAGIC
, growing
);
51 if (NTDB_OFF_IS_ERR(new_off
)) {
52 return NTDB_OFF_TO_ERR(new_off
);
55 /* We didn't like the existing one: remove it. */
58 ecode
= add_free_record(ntdb
, old_off
,
59 sizeof(struct ntdb_used_record
)
60 + key
.dsize
+ old_room
,
61 NTDB_LOCK_WAIT
, true);
62 if (ecode
== NTDB_SUCCESS
)
63 ecode
= replace_in_hash(ntdb
, h
, new_off
);
65 ecode
= add_to_hash(ntdb
, h
, new_off
);
67 if (ecode
!= NTDB_SUCCESS
) {
71 new_off
+= sizeof(struct ntdb_used_record
);
72 ecode
= ntdb
->io
->twrite(ntdb
, new_off
, key
.dptr
, key
.dsize
);
73 if (ecode
!= NTDB_SUCCESS
) {
78 ecode
= ntdb
->io
->twrite(ntdb
, new_off
, dbuf
.dptr
, dbuf
.dsize
);
79 if (ecode
!= NTDB_SUCCESS
) {
83 if (ntdb
->flags
& NTDB_SEQNUM
)
84 ntdb_inc_seqnum(ntdb
);
89 static enum NTDB_ERROR
update_data(struct ntdb_context
*ntdb
,
94 enum NTDB_ERROR ecode
;
96 ecode
= ntdb
->io
->twrite(ntdb
, off
, dbuf
.dptr
, dbuf
.dsize
);
97 if (ecode
== NTDB_SUCCESS
&& extra
) {
98 /* Put a zero in; future versions may append other data. */
99 ecode
= ntdb
->io
->twrite(ntdb
, off
+ dbuf
.dsize
, "", 1);
101 if (ntdb
->flags
& NTDB_SEQNUM
)
102 ntdb_inc_seqnum(ntdb
);
107 _PUBLIC_
enum NTDB_ERROR
ntdb_store(struct ntdb_context
*ntdb
,
108 NTDB_DATA key
, NTDB_DATA dbuf
, int flag
)
112 ntdb_len_t old_room
= 0;
113 struct ntdb_used_record rec
;
114 enum NTDB_ERROR ecode
;
116 off
= find_and_lock(ntdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
117 if (NTDB_OFF_IS_ERR(off
)) {
118 return NTDB_OFF_TO_ERR(off
);
121 /* Now we have lock on this hash bucket. */
122 if (flag
== NTDB_INSERT
) {
124 ecode
= NTDB_ERR_EXISTS
;
129 old_room
= rec_data_length(&rec
)
130 + rec_extra_padding(&rec
);
131 if (old_room
>= dbuf
.dsize
) {
132 /* Can modify in-place. Easy! */
133 ecode
= update_rec_hdr(ntdb
, off
,
134 key
.dsize
, dbuf
.dsize
,
136 if (ecode
!= NTDB_SUCCESS
) {
139 ecode
= update_data(ntdb
,
142 old_room
- dbuf
.dsize
);
143 if (ecode
!= NTDB_SUCCESS
) {
146 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
150 if (flag
== NTDB_MODIFY
) {
151 /* if the record doesn't exist and we
152 are in NTDB_MODIFY mode then we should fail
154 ecode
= NTDB_ERR_NOEXIST
;
160 /* If we didn't use the old record, this implies we're growing. */
161 ecode
= replace_data(ntdb
, &h
, key
, dbuf
, off
, old_room
, off
);
163 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
167 _PUBLIC_
enum NTDB_ERROR
ntdb_append(struct ntdb_context
*ntdb
,
168 NTDB_DATA key
, NTDB_DATA dbuf
)
172 struct ntdb_used_record rec
;
173 ntdb_len_t old_room
= 0, old_dlen
;
174 unsigned char *newdata
;
176 enum NTDB_ERROR ecode
;
178 off
= find_and_lock(ntdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
179 if (NTDB_OFF_IS_ERR(off
)) {
180 return NTDB_OFF_TO_ERR(off
);
184 old_dlen
= rec_data_length(&rec
);
185 old_room
= old_dlen
+ rec_extra_padding(&rec
);
187 /* Fast path: can append in place. */
188 if (rec_extra_padding(&rec
) >= dbuf
.dsize
) {
189 ecode
= update_rec_hdr(ntdb
, off
, key
.dsize
,
190 old_dlen
+ dbuf
.dsize
, &rec
);
191 if (ecode
!= NTDB_SUCCESS
) {
195 off
+= sizeof(rec
) + key
.dsize
+ old_dlen
;
196 ecode
= update_data(ntdb
, off
, dbuf
,
197 rec_extra_padding(&rec
));
202 newdata
= ntdb
->alloc_fn(ntdb
, key
.dsize
+ old_dlen
+ dbuf
.dsize
,
205 ecode
= ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
207 " failed to allocate %zu bytes",
208 (size_t)(key
.dsize
+ old_dlen
212 ecode
= ntdb
->io
->tread(ntdb
, off
+ sizeof(rec
) + key
.dsize
,
214 if (ecode
!= NTDB_SUCCESS
) {
215 goto out_free_newdata
;
217 memcpy(newdata
+ old_dlen
, dbuf
.dptr
, dbuf
.dsize
);
218 new_dbuf
.dptr
= newdata
;
219 new_dbuf
.dsize
= old_dlen
+ dbuf
.dsize
;
225 /* If they're using ntdb_append(), it implies they're growing record. */
226 ecode
= replace_data(ntdb
, &h
, key
, new_dbuf
, off
, old_room
, true);
229 ntdb
->free_fn(newdata
, ntdb
->alloc_data
);
231 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
235 _PUBLIC_
enum NTDB_ERROR
ntdb_fetch(struct ntdb_context
*ntdb
, NTDB_DATA key
,
239 struct ntdb_used_record rec
;
241 enum NTDB_ERROR ecode
;
244 off
= find_and_lock(ntdb
, key
, F_RDLCK
, &h
, &rec
, &keyp
);
245 if (NTDB_OFF_IS_ERR(off
)) {
246 return NTDB_OFF_TO_ERR(off
);
250 ecode
= NTDB_ERR_NOEXIST
;
252 data
->dsize
= rec_data_length(&rec
);
253 data
->dptr
= ntdb
->alloc_fn(ntdb
, data
->dsize
, ntdb
->alloc_data
);
254 if (unlikely(!data
->dptr
)) {
255 ecode
= NTDB_ERR_OOM
;
257 memcpy(data
->dptr
, keyp
+ key
.dsize
, data
->dsize
);
258 ecode
= NTDB_SUCCESS
;
260 ntdb_access_release(ntdb
, keyp
);
263 ntdb_unlock_hash(ntdb
, h
.h
, F_RDLCK
);
267 _PUBLIC_
bool ntdb_exists(struct ntdb_context
*ntdb
, NTDB_DATA key
)
270 struct ntdb_used_record rec
;
273 off
= find_and_lock(ntdb
, key
, F_RDLCK
, &h
, &rec
, NULL
);
274 if (NTDB_OFF_IS_ERR(off
)) {
277 ntdb_unlock_hash(ntdb
, h
.h
, F_RDLCK
);
279 return off
? true : false;
282 _PUBLIC_
enum NTDB_ERROR
ntdb_delete(struct ntdb_context
*ntdb
, NTDB_DATA key
)
285 struct ntdb_used_record rec
;
287 enum NTDB_ERROR ecode
;
289 off
= find_and_lock(ntdb
, key
, F_WRLCK
, &h
, &rec
, NULL
);
290 if (NTDB_OFF_IS_ERR(off
)) {
291 return NTDB_OFF_TO_ERR(off
);
295 ecode
= NTDB_ERR_NOEXIST
;
299 ecode
= delete_from_hash(ntdb
, &h
);
300 if (ecode
!= NTDB_SUCCESS
) {
304 /* Free the deleted entry. */
306 ecode
= add_free_record(ntdb
, off
,
307 sizeof(struct ntdb_used_record
)
308 + rec_key_length(&rec
)
309 + rec_data_length(&rec
)
310 + rec_extra_padding(&rec
),
311 NTDB_LOCK_WAIT
, true);
313 if (ntdb
->flags
& NTDB_SEQNUM
)
314 ntdb_inc_seqnum(ntdb
);
317 ntdb_unlock_hash(ntdb
, h
.h
, F_WRLCK
);
321 _PUBLIC_
unsigned int ntdb_get_flags(struct ntdb_context
*ntdb
)
326 static bool inside_transaction(const struct ntdb_context
*ntdb
)
328 return ntdb
->transaction
!= NULL
;
331 static bool readonly_changable(struct ntdb_context
*ntdb
, const char *caller
)
333 if (inside_transaction(ntdb
)) {
334 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
336 " NTDB_RDONLY inside transaction",
343 _PUBLIC_
void ntdb_add_flag(struct ntdb_context
*ntdb
, unsigned flag
)
345 if (ntdb
->flags
& NTDB_INTERNAL
) {
346 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
347 "ntdb_add_flag: internal db");
352 ntdb
->flags
|= NTDB_NOLOCK
;
355 if (ntdb
->file
->direct_count
) {
356 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
357 "ntdb_add_flag: Can't get NTDB_NOMMAP from"
358 " ntdb_parse_record!");
361 ntdb
->flags
|= NTDB_NOMMAP
;
362 #ifndef HAVE_INCOHERENT_MMAP
367 ntdb
->flags
|= NTDB_NOSYNC
;
370 ntdb
->flags
|= NTDB_SEQNUM
;
372 case NTDB_ALLOW_NESTING
:
373 ntdb
->flags
|= NTDB_ALLOW_NESTING
;
376 if (readonly_changable(ntdb
, "ntdb_add_flag"))
377 ntdb
->flags
|= NTDB_RDONLY
;
380 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
381 "ntdb_add_flag: Unknown flag %u", flag
);
385 _PUBLIC_
void ntdb_remove_flag(struct ntdb_context
*ntdb
, unsigned flag
)
387 if (ntdb
->flags
& NTDB_INTERNAL
) {
388 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
389 "ntdb_remove_flag: internal db");
394 ntdb
->flags
&= ~NTDB_NOLOCK
;
397 ntdb
->flags
&= ~NTDB_NOMMAP
;
398 #ifndef HAVE_INCOHERENT_MMAP
399 /* If mmap incoherent, we were mmaping anyway. */
404 ntdb
->flags
&= ~NTDB_NOSYNC
;
407 ntdb
->flags
&= ~NTDB_SEQNUM
;
409 case NTDB_ALLOW_NESTING
:
410 ntdb
->flags
&= ~NTDB_ALLOW_NESTING
;
413 if ((ntdb
->open_flags
& O_ACCMODE
) == O_RDONLY
) {
414 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
415 "ntdb_remove_flag: can't"
416 " remove NTDB_RDONLY on ntdb"
417 " opened with O_RDONLY");
420 if (readonly_changable(ntdb
, "ntdb_remove_flag"))
421 ntdb
->flags
&= ~NTDB_RDONLY
;
424 ntdb_logerr(ntdb
, NTDB_ERR_EINVAL
, NTDB_LOG_USE_ERROR
,
425 "ntdb_remove_flag: Unknown flag %u",
430 _PUBLIC_
const char *ntdb_errorstr(enum NTDB_ERROR ecode
)
432 /* Gcc warns if you miss a case in the switch, so use that. */
433 switch (NTDB_ERR_TO_OFF(ecode
)) {
434 case NTDB_ERR_TO_OFF(NTDB_SUCCESS
): return "Success";
435 case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT
): return "Corrupt database";
436 case NTDB_ERR_TO_OFF(NTDB_ERR_IO
): return "IO Error";
437 case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK
): return "Locking error";
438 case NTDB_ERR_TO_OFF(NTDB_ERR_OOM
): return "Out of memory";
439 case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS
): return "Record exists";
440 case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL
): return "Invalid parameter";
441 case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST
): return "Record does not exist";
442 case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY
): return "write not permitted";
444 return "Invalid error code";
447 enum NTDB_ERROR COLD
ntdb_logerr(struct ntdb_context
*ntdb
,
448 enum NTDB_ERROR ecode
,
449 enum ntdb_log_level level
,
450 const char *fmt
, ...)
455 /* ntdb_open paths care about errno, so save it. */
456 int saved_errno
= errno
;
462 len
= vsnprintf(NULL
, 0, fmt
, ap
);
465 message
= ntdb
->alloc_fn(ntdb
, len
+ 1, ntdb
->alloc_data
);
467 ntdb
->log_fn(ntdb
, NTDB_LOG_ERROR
, NTDB_ERR_OOM
,
468 "out of memory formatting message:", ntdb
->log_data
);
469 ntdb
->log_fn(ntdb
, level
, ecode
, fmt
, ntdb
->log_data
);
472 vsnprintf(message
, len
+1, fmt
, ap
);
474 ntdb
->log_fn(ntdb
, level
, ecode
, message
, ntdb
->log_data
);
475 ntdb
->free_fn(message
, ntdb
->alloc_data
);
481 _PUBLIC_
enum NTDB_ERROR
ntdb_parse_record_(struct ntdb_context
*ntdb
,
483 enum NTDB_ERROR (*parse
)(NTDB_DATA k
,
489 struct ntdb_used_record rec
;
491 enum NTDB_ERROR ecode
;
494 off
= find_and_lock(ntdb
, key
, F_RDLCK
, &h
, &rec
, &keyp
);
495 if (NTDB_OFF_IS_ERR(off
)) {
496 return NTDB_OFF_TO_ERR(off
);
500 ecode
= NTDB_ERR_NOEXIST
;
502 unsigned int old_flags
;
503 NTDB_DATA d
= ntdb_mkdata(keyp
+ key
.dsize
,
504 rec_data_length(&rec
));
507 * Make sure they don't try to write db, since they
508 * have read lock! They can if they've done
509 * ntdb_lockall(): if it was ntdb_lockall_read, that'll
510 * stop them doing a write operation anyway.
512 old_flags
= ntdb
->flags
;
513 if (!ntdb
->file
->allrecord_lock
.count
&&
514 !(ntdb
->flags
& NTDB_NOLOCK
)) {
515 ntdb
->flags
|= NTDB_RDONLY
;
517 ecode
= parse(key
, d
, data
);
518 ntdb
->flags
= old_flags
;
519 ntdb_access_release(ntdb
, keyp
);
522 ntdb_unlock_hash(ntdb
, h
.h
, F_RDLCK
);
526 _PUBLIC_
const char *ntdb_name(const struct ntdb_context
*ntdb
)
531 _PUBLIC_
int64_t ntdb_get_seqnum(struct ntdb_context
*ntdb
)
533 return ntdb_read_off(ntdb
, offsetof(struct ntdb_header
, seqnum
));
537 _PUBLIC_
int ntdb_fd(const struct ntdb_context
*ntdb
)
539 return ntdb
->file
->fd
;
542 struct traverse_state
{
543 enum NTDB_ERROR error
;
544 struct ntdb_context
*dest_db
;
548 traverse function for repacking
550 static int repack_traverse(struct ntdb_context
*ntdb
, NTDB_DATA key
, NTDB_DATA data
,
551 struct traverse_state
*state
)
553 state
->error
= ntdb_store(state
->dest_db
, key
, data
, NTDB_INSERT
);
554 if (state
->error
!= NTDB_SUCCESS
) {
560 _PUBLIC_
enum NTDB_ERROR
ntdb_repack(struct ntdb_context
*ntdb
)
562 struct ntdb_context
*tmp_db
;
563 struct traverse_state state
;
565 state
.error
= ntdb_transaction_start(ntdb
);
566 if (state
.error
!= NTDB_SUCCESS
) {
570 tmp_db
= ntdb_open("tmpdb", NTDB_INTERNAL
, O_RDWR
|O_CREAT
, 0, NULL
);
571 if (tmp_db
== NULL
) {
572 state
.error
= ntdb_logerr(ntdb
, NTDB_ERR_OOM
, NTDB_LOG_ERROR
,
574 " Failed to create tmp_db");
575 ntdb_transaction_cancel(ntdb
);
579 state
.dest_db
= tmp_db
;
580 if (ntdb_traverse(ntdb
, repack_traverse
, &state
) < 0) {
584 state
.error
= ntdb_wipe_all(ntdb
);
585 if (state
.error
!= NTDB_SUCCESS
) {
589 state
.dest_db
= ntdb
;
590 if (ntdb_traverse(tmp_db
, repack_traverse
, &state
) < 0) {
595 return ntdb_transaction_commit(ntdb
);
598 ntdb_transaction_cancel(ntdb
);