2 Unix SMB/CIFS implementation.
3 Database interface wrapper around red-black trees
4 Copyright (C) Volker Lendecke 2007, 2008
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "dbwrap/dbwrap.h"
22 #include "dbwrap/dbwrap_private.h"
23 #include "dbwrap/dbwrap_rbt.h"
24 #include "../lib/util/rbtree.h"
25 #include "../lib/util/dlinklist.h"
27 #define DBWRAP_RBT_ALIGN(_size_) (((_size_)+15)&~15)
31 struct db_rbt_node
*nodes
;
33 struct db_rbt_node
**traverse_nextp
;
37 struct db_rbt_node
*node
;
40 /* The structure that ends up in the tree */
43 struct rb_node rb_node
;
44 size_t keysize
, valuesize
;
45 struct db_rbt_node
*prev
, *next
;
49 * Hide the ugly pointer calculations in a function
52 static struct db_rbt_node
*db_rbt2node(struct rb_node
*node
)
54 return (struct db_rbt_node
*)
55 ((char *)node
- offsetof(struct db_rbt_node
, rb_node
));
62 static int db_rbt_compare(TDB_DATA a
, TDB_DATA b
)
66 res
= memcmp(a
.dptr
, b
.dptr
, MIN(a
.dsize
, b
.dsize
));
68 if ((res
< 0) || ((res
== 0) && (a
.dsize
< b
.dsize
))) {
71 if ((res
> 0) || ((res
== 0) && (a
.dsize
> b
.dsize
))) {
78 * dissect a db_rbt_node into its implicit key and value parts
81 static void db_rbt_parse_node(struct db_rbt_node
*node
,
82 TDB_DATA
*key
, TDB_DATA
*value
)
84 size_t key_offset
, value_offset
;
86 key_offset
= DBWRAP_RBT_ALIGN(sizeof(struct db_rbt_node
));
87 key
->dptr
= ((uint8_t *)node
) + key_offset
;
88 key
->dsize
= node
->keysize
;
90 value_offset
= DBWRAP_RBT_ALIGN(node
->keysize
);
91 value
->dptr
= key
->dptr
+ value_offset
;
92 value
->dsize
= node
->valuesize
;
95 static ssize_t
db_rbt_reclen(size_t keylen
, size_t valuelen
)
99 len
= DBWRAP_RBT_ALIGN(sizeof(struct db_rbt_node
));
101 tmp
= DBWRAP_RBT_ALIGN(keylen
);
112 if (len
< valuelen
) {
121 static NTSTATUS
db_rbt_storev(struct db_record
*rec
,
122 const TDB_DATA
*dbufs
, int num_dbufs
, int flag
)
124 struct db_rbt_ctx
*db_ctx
= talloc_get_type_abort(
125 rec
->db
->private_data
, struct db_rbt_ctx
);
126 struct db_rbt_rec
*rec_priv
= (struct db_rbt_rec
*)rec
->private_data
;
127 struct db_rbt_node
*node
;
130 struct rb_node
*parent
= NULL
;
131 struct db_rbt_node
*parent_node
= NULL
;
134 TDB_DATA data
, this_key
, this_val
;
135 void *to_free
= NULL
;
137 if (db_ctx
->traverse_read
> 0) {
138 return NT_STATUS_MEDIA_WRITE_PROTECTED
;
141 if (num_dbufs
== 1) {
144 data
= dbwrap_merge_dbufs(rec
, dbufs
, num_dbufs
);
145 if (data
.dptr
== NULL
) {
146 return NT_STATUS_NO_MEMORY
;
151 if (rec_priv
->node
!= NULL
) {
154 * The record was around previously
157 db_rbt_parse_node(rec_priv
->node
, &this_key
, &this_val
);
159 SMB_ASSERT(this_key
.dsize
== rec
->key
.dsize
);
160 SMB_ASSERT(memcmp(this_key
.dptr
, rec
->key
.dptr
,
161 this_key
.dsize
) == 0);
163 if (this_val
.dsize
>= data
.dsize
) {
165 * The new value fits into the old space
167 memcpy(this_val
.dptr
, data
.dptr
, data
.dsize
);
168 rec_priv
->node
->valuesize
= data
.dsize
;
169 TALLOC_FREE(to_free
);
174 reclen
= db_rbt_reclen(rec
->key
.dsize
, data
.dsize
);
176 TALLOC_FREE(to_free
);
177 return NT_STATUS_INSUFFICIENT_RESOURCES
;
180 node
= talloc_zero_size(db_ctx
, reclen
);
182 TALLOC_FREE(to_free
);
183 return NT_STATUS_NO_MEMORY
;
186 if (rec_priv
->node
!= NULL
) {
187 if (db_ctx
->traverse_nextp
!= NULL
) {
188 if (*db_ctx
->traverse_nextp
== rec_priv
->node
) {
189 *db_ctx
->traverse_nextp
= node
;
194 * We need to delete the key from the tree and start fresh,
195 * there's not enough space in the existing record
198 rb_erase(&rec_priv
->node
->rb_node
, &db_ctx
->tree
);
199 DLIST_REMOVE(db_ctx
->nodes
, rec_priv
->node
);
202 * Keep the existing node around for a while: If the record
203 * existed before, we reference the key data in there.
207 node
->keysize
= rec
->key
.dsize
;
208 node
->valuesize
= data
.dsize
;
210 db_rbt_parse_node(node
, &this_key
, &this_val
);
212 memcpy(this_key
.dptr
, rec
->key
.dptr
, node
->keysize
);
213 TALLOC_FREE(rec_priv
->node
);
214 rec_priv
->node
= node
;
216 memcpy(this_val
.dptr
, data
.dptr
, node
->valuesize
);
219 p
= &db_ctx
->tree
.rb_node
;
222 struct db_rbt_node
*r
;
223 TDB_DATA search_key
, search_val
;
231 db_rbt_parse_node(r
, &search_key
, &search_val
);
233 res
= db_rbt_compare(this_key
, search_key
);
242 smb_panic("someone messed with the tree");
246 rb_link_node(&node
->rb_node
, parent
, p
);
247 DLIST_ADD_AFTER(db_ctx
->nodes
, node
, parent_node
);
248 rb_insert_color(&node
->rb_node
, &db_ctx
->tree
);
250 TALLOC_FREE(to_free
);
255 static NTSTATUS
db_rbt_delete(struct db_record
*rec
)
257 struct db_rbt_ctx
*db_ctx
= talloc_get_type_abort(
258 rec
->db
->private_data
, struct db_rbt_ctx
);
259 struct db_rbt_rec
*rec_priv
= (struct db_rbt_rec
*)rec
->private_data
;
261 if (db_ctx
->traverse_read
> 0) {
262 return NT_STATUS_MEDIA_WRITE_PROTECTED
;
265 if (rec_priv
->node
== NULL
) {
269 if (db_ctx
->traverse_nextp
!= NULL
) {
270 if (*db_ctx
->traverse_nextp
== rec_priv
->node
) {
271 *db_ctx
->traverse_nextp
= rec_priv
->node
->next
;
275 rb_erase(&rec_priv
->node
->rb_node
, &db_ctx
->tree
);
276 DLIST_REMOVE(db_ctx
->nodes
, rec_priv
->node
);
277 TALLOC_FREE(rec_priv
->node
);
282 struct db_rbt_search_result
{
285 struct db_rbt_node
* node
;
288 static bool db_rbt_search_internal(struct db_context
*db
, TDB_DATA key
,
289 struct db_rbt_search_result
*result
)
291 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
292 db
->private_data
, struct db_rbt_ctx
);
296 struct db_rbt_node
*r
= NULL
;
297 TDB_DATA search_key
= { 0 };
298 TDB_DATA search_val
= { 0 };
300 n
= ctx
->tree
.rb_node
;
307 db_rbt_parse_node(r
, &search_key
, &search_val
);
309 res
= db_rbt_compare(key
, search_key
);
322 if (result
!= NULL
) {
324 result
->key
= search_key
;
325 result
->val
= search_val
;
328 ZERO_STRUCT(*result
);
334 static struct db_record
*db_rbt_fetch_locked(struct db_context
*db_ctx
,
338 struct db_rbt_rec
*rec_priv
;
339 struct db_record
*result
;
342 struct db_rbt_search_result res
;
344 found
= db_rbt_search_internal(db_ctx
, key
, &res
);
347 * In this low-level routine, play tricks to reduce the number of
348 * tallocs to one. Not recommened for general use, but here it pays
352 size
= DBWRAP_RBT_ALIGN(sizeof(struct db_record
))
353 + sizeof(struct db_rbt_rec
);
357 * We need to keep the key around for later store
362 result
= (struct db_record
*)talloc_size(mem_ctx
, size
);
363 if (result
== NULL
) {
367 rec_priv
= (struct db_rbt_rec
*)
368 ((char *)result
+ DBWRAP_RBT_ALIGN(sizeof(struct db_record
)));
370 result
->storev
= db_rbt_storev
;
371 result
->delete_rec
= db_rbt_delete
;
372 result
->private_data
= rec_priv
;
374 rec_priv
->node
= res
.node
;
375 result
->value
= res
.val
;
378 result
->key
= res
.key
;
381 result
->key
.dptr
= (uint8_t *)
382 ((char *)rec_priv
+ sizeof(*rec_priv
));
383 result
->key
.dsize
= key
.dsize
;
384 memcpy(result
->key
.dptr
, key
.dptr
, key
.dsize
);
390 static int db_rbt_exists(struct db_context
*db
, TDB_DATA key
)
392 return db_rbt_search_internal(db
, key
, NULL
);
395 static int db_rbt_wipe(struct db_context
*db
)
397 struct db_rbt_ctx
*old_ctx
= talloc_get_type_abort(
398 db
->private_data
, struct db_rbt_ctx
);
399 struct db_rbt_ctx
*new_ctx
= talloc_zero(db
, struct db_rbt_ctx
);
400 if (new_ctx
== NULL
) {
403 db
->private_data
= new_ctx
;
404 talloc_free(old_ctx
);
408 static NTSTATUS
db_rbt_parse_record(struct db_context
*db
, TDB_DATA key
,
409 void (*parser
)(TDB_DATA key
, TDB_DATA data
,
413 struct db_rbt_search_result res
;
414 bool found
= db_rbt_search_internal(db
, key
, &res
);
417 return NT_STATUS_NOT_FOUND
;
419 parser(res
.key
, res
.val
, private_data
);
423 static int db_rbt_traverse_internal(struct db_context
*db
,
424 int (*f
)(struct db_record
*db
,
426 void *private_data
, uint32_t* count
,
429 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
430 db
->private_data
, struct db_rbt_ctx
);
431 struct db_rbt_node
*cur
= NULL
;
432 struct db_rbt_node
*next
= NULL
;
435 for (cur
= ctx
->nodes
; cur
!= NULL
; cur
= next
) {
436 struct db_record rec
;
437 struct db_rbt_rec rec_priv
;
440 next
= rec_priv
.node
->next
;
444 rec
.private_data
= &rec_priv
;
445 rec
.storev
= db_rbt_storev
;
446 rec
.delete_rec
= db_rbt_delete
;
447 db_rbt_parse_node(rec_priv
.node
, &rec
.key
, &rec
.value
);
450 ctx
->traverse_nextp
= &next
;
452 ret
= f(&rec
, private_data
);
455 ctx
->traverse_nextp
= NULL
;
460 if (rec_priv
.node
!= NULL
) {
461 next
= rec_priv
.node
->next
;
468 static int db_rbt_traverse_read(struct db_context
*db
,
469 int (*f
)(struct db_record
*db
,
473 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
474 db
->private_data
, struct db_rbt_ctx
);
478 ctx
->traverse_read
++;
479 ret
= db_rbt_traverse_internal(db
,
480 f
, private_data
, &count
,
482 ctx
->traverse_read
--;
486 if (count
> INT_MAX
) {
492 static int db_rbt_traverse(struct db_context
*db
,
493 int (*f
)(struct db_record
*db
,
497 struct db_rbt_ctx
*ctx
= talloc_get_type_abort(
498 db
->private_data
, struct db_rbt_ctx
);
502 if (ctx
->traverse_nextp
!= NULL
) {
506 if (ctx
->traverse_read
> 0) {
507 return db_rbt_traverse_read(db
, f
, private_data
);
510 ret
= db_rbt_traverse_internal(db
,
511 f
, private_data
, &count
,
516 if (count
> INT_MAX
) {
522 static int db_rbt_get_seqnum(struct db_context
*db
)
527 static int db_rbt_trans_dummy(struct db_context
*db
)
530 * Transactions are pretty pointless in-memory, just return success.
535 static size_t db_rbt_id(struct db_context
*db
, uint8_t *id
, size_t idlen
)
537 if (idlen
>= sizeof(struct db_context
*)) {
538 memcpy(id
, &db
, sizeof(struct db_context
*));
540 return sizeof(struct db_context
*);
543 struct db_context
*db_open_rbt(TALLOC_CTX
*mem_ctx
)
545 struct db_context
*result
;
547 result
= talloc_zero(mem_ctx
, struct db_context
);
549 if (result
== NULL
) {
553 result
->private_data
= talloc_zero(result
, struct db_rbt_ctx
);
555 if (result
->private_data
== NULL
) {
560 result
->fetch_locked
= db_rbt_fetch_locked
;
561 result
->traverse
= db_rbt_traverse
;
562 result
->traverse_read
= db_rbt_traverse_read
;
563 result
->get_seqnum
= db_rbt_get_seqnum
;
564 result
->transaction_start
= db_rbt_trans_dummy
;
565 result
->transaction_commit
= db_rbt_trans_dummy
;
566 result
->transaction_cancel
= db_rbt_trans_dummy
;
567 result
->exists
= db_rbt_exists
;
568 result
->wipe
= db_rbt_wipe
;
569 result
->parse_record
= db_rbt_parse_record
;
570 result
->id
= db_rbt_id
;
571 result
->name
= "dbwrap rbt";