2 * linux/fs/hfs/binsert.c
4 * Copyright (C) 1995-1997 Paul H. Hargrove
5 * This file may be distributed under the terms of the GNU Public License.
7 * This file contains the code to insert records in a B-tree.
9 * "XXX" in a comment is a note to myself to consider changing something.
11 * In function preconditions the term "valid" applied to a pointer to
12 * a structure means that the pointer is non-NULL and the structure it
13 * points to has all fields initialized to consistent values.
16 #include "hfs_btree.h"
18 /*================ File-local functions ================*/
20 /* btree locking functions */
21 static inline void hfs_btree_lock(struct hfs_btree
*tree
)
24 hfs_sleep_on(&tree
->wait
);
28 static inline void hfs_btree_unlock(struct hfs_btree
*tree
)
31 hfs_wake_up(&tree
->wait
);
38 * Inserts a record in a given bnode known to have sufficient space.
40 * struct hfs_brec* brec: pointer to the brec for the insertion
41 * struct hfs_belem* belem: the element in the search path to insert in
42 * struct hfs_bkey* key: pointer to the key for the record to insert
43 * void* data: pointer to the record to insert
44 * hfs_u16 keysize: size of the key to insert
45 * hfs_u16 datasize: size of the record to insert
51 * 'brec' points to a valid (struct hfs_brec).
52 * 'belem' points to a valid (struct hfs_belem) in 'brec', the node
53 * of which has enough free space to insert 'key' and 'data'.
54 * 'key' is a pointer to a valid (struct hfs_bkey) of length 'keysize'
55 * which, in sorted order, belongs at the location indicated by 'brec'.
56 * 'data' is non-NULL an points to appropriate data of length 'datasize'
58 * The record has been inserted in the position indicated by 'brec'.
60 static void binsert_nonfull(struct hfs_brec
*brec
, struct hfs_belem
*belem
,
61 const struct hfs_bkey
*key
, const void *data
,
62 hfs_u8 keysize
, hfs_u16 datasize
)
64 int i
, rec
, nrecs
, size
, tomove
;
66 struct hfs_bnode
*bnode
= belem
->bnr
.bn
;
68 rec
= ++(belem
->record
);
69 size
= ROUND(keysize
+1) + datasize
;
70 nrecs
= bnode
->ndNRecs
+ 1;
71 tomove
= bnode_offset(bnode
, nrecs
) - bnode_offset(bnode
, rec
);
73 /* adjust the record table */
74 for (i
= nrecs
; i
>= rec
; --i
) {
75 hfs_put_hs(bnode_offset(bnode
,i
) + size
, RECTBL(bnode
,i
+1));
79 start
= bnode_key(bnode
, rec
);
80 memmove(start
+ size
, start
, tomove
);
82 /* copy in the key and the data*/
84 keysize
= ROUND(keysize
+1);
85 memcpy(start
+ 1, (hfs_u8
*)key
+ 1, keysize
-1);
86 memcpy(start
+ keysize
, data
, datasize
);
88 /* update record count */
96 * Adds a new root to a B*-tree, increasing its height.
98 * struct hfs_btree *tree: the tree to add a new root to
99 * struct hfs_bnode *left: the new root's first child or NULL
100 * struct hfs_bnode *right: the new root's second child or NULL
101 * Output Variable(s):
106 * 'tree' points to a valid (struct hfs_btree).
107 * 'left' and 'right' point to valid (struct hfs_bnode)s, which
108 * resulted from splitting the old root node, or are both NULL
109 * if there was no root node before.
111 * Upon success a new root node is added to 'tree' with either
112 * two children ('left' and 'right') or none.
114 static void add_root(struct hfs_btree
*tree
,
115 struct hfs_bnode
*left
,
116 struct hfs_bnode
*right
)
118 struct hfs_bnode_ref bnr
;
119 struct hfs_bnode
*root
;
120 struct hfs_bkey
*key
;
121 int keylen
= tree
->bthKeyLen
;
123 if (left
&& !right
) {
124 hfs_warn("add_root: LEFT but no RIGHT\n");
128 bnr
= hfs_bnode_alloc(tree
);
129 if (!(root
= bnr
.bn
)) {
133 root
->sticky
= HFS_STICKY
;
135 tree
->bthRoot
= root
->node
;
138 root
->ndNHeight
= tree
->bthDepth
;
144 root
->ndType
= ndLeafNode
;
147 tree
->bthFNode
= root
->node
;
148 tree
->bthLNode
= root
->node
;
150 root
->ndType
= ndIndxNode
;
153 hfs_put_hs(sizeof(struct NodeDescriptor
) + ROUND(1+keylen
) +
154 sizeof(hfs_u32
), RECTBL(root
, 2));
155 key
= bnode_key(root
, 1);
156 key
->KeyLen
= keylen
;
158 ((struct hfs_bkey
*)bnode_key(left
, 1))->value
, keylen
);
159 hfs_put_hl(left
->node
, bkey_record(key
));
161 hfs_put_hs(sizeof(struct NodeDescriptor
) + 2*ROUND(1+keylen
) +
162 2*sizeof(hfs_u32
), RECTBL(root
, 3));
163 key
= bnode_key(root
, 2);
164 key
->KeyLen
= keylen
;
166 ((struct hfs_bkey
*)bnode_key(right
, 1))->value
, keylen
);
167 hfs_put_hl(right
->node
, bkey_record(key
));
169 /* the former root (left) is now just a normal node */
170 left
->sticky
= HFS_NOT_STICKY
;
171 if ((left
->next
= bhash(tree
, left
->node
))) {
172 left
->next
->prev
= left
;
174 bhash(tree
, left
->node
) = left
;
176 hfs_bnode_relse(&bnr
);
181 * insert_empty_bnode()
184 * Adds an empty node to the right of 'left'.
186 * struct hfs_btree *tree: the tree to add a node to
187 * struct hfs_bnode *left: the node to add a node after
188 * Output Variable(s):
191 * struct hfs_bnode_ref *: reference to the new bnode.
193 * 'tree' points to a valid (struct hfs_btree) with at least 1 free node.
194 * 'left' points to a valid (struct hfs_bnode) belonging to 'tree'.
196 * If NULL is returned then 'tree' and 'left' are unchanged.
197 * Otherwise a node with 0 records is inserted in the tree to the right
198 * of the node 'left'. The 'ndFLink' of 'left' and the 'ndBLink' of
199 * the former right-neighbor of 'left' (if one existed) point to the
200 * new node. If 'left' had no right neighbor and is a leaf node the
201 * the 'bthLNode' of 'tree' points to the new node. The free-count and
202 * bitmap for 'tree' are kept current by hfs_bnode_alloc() which supplies
205 static struct hfs_bnode_ref
insert_empty_bnode(struct hfs_btree
*tree
,
206 struct hfs_bnode
*left
)
208 struct hfs_bnode_ref retval
;
209 struct hfs_bnode_ref right
;
211 retval
= hfs_bnode_alloc(tree
);
213 hfs_warn("hfs_binsert: out of bnodes?.\n");
216 retval
.bn
->sticky
= HFS_NOT_STICKY
;
217 if ((retval
.bn
->next
= bhash(tree
, retval
.bn
->node
))) {
218 retval
.bn
->next
->prev
= retval
.bn
;
220 bhash(tree
, retval
.bn
->node
) = retval
.bn
;
223 right
= hfs_bnode_find(tree
, left
->ndFLink
, HFS_LOCK_WRITE
);
225 hfs_warn("hfs_binsert: corrupt btree.\n");
226 hfs_bnode_bitop(tree
, retval
.bn
->node
, 0);
227 hfs_bnode_relse(&retval
);
230 right
.bn
->ndBLink
= retval
.bn
->node
;
231 hfs_bnode_relse(&right
);
232 } else if (left
->ndType
== ndLeafNode
) {
233 tree
->bthLNode
= retval
.bn
->node
;
237 retval
.bn
->ndFLink
= left
->ndFLink
;
238 retval
.bn
->ndBLink
= left
->node
;
239 retval
.bn
->ndType
= left
->ndType
;
240 retval
.bn
->ndNHeight
= left
->ndNHeight
;
241 retval
.bn
->ndNRecs
= 0;
243 left
->ndFLink
= retval
.bn
->node
;
253 * Splits an over full node during insertion.
254 * Picks the split point that results in the most-nearly equal
255 * space usage in the new and old nodes.
257 * struct hfs_belem *elem: the over full node.
258 * int size: the number of bytes to be used by the new record and its key.
259 * Output Variable(s):
260 * struct hfs_belem *elem: changed to indicate where the new record
261 * should be inserted.
263 * struct hfs_bnode_ref: reference to the new bnode.
265 * 'elem' points to a valid path element corresponding to the over full node.
266 * 'size' is positive.
268 * The records in the node corresponding to 'elem' are redistributed across
269 * the old and new nodes so that after inserting the new record, the space
270 * usage in these two nodes is as equal as possible.
271 * 'elem' is updated so that a call to binsert_nonfull() will insert the
272 * new record in the correct location.
274 static inline struct hfs_bnode_ref
split(struct hfs_belem
*elem
, int size
)
276 struct hfs_bnode
*bnode
= elem
->bnr
.bn
;
277 int nrecs
, cutoff
, index
, tmp
, used
, in_right
;
278 struct hfs_bnode_ref right
;
280 right
= insert_empty_bnode(bnode
->tree
, bnode
);
282 nrecs
= bnode
->ndNRecs
;
283 cutoff
= (size
+ bnode_end(bnode
) -
284 sizeof(struct NodeDescriptor
) +
285 (nrecs
+1)*sizeof(hfs_u16
))/2;
288 /* note that this only works because records sizes are even */
289 for (index
=1; index
<= elem
->record
; ++index
) {
290 tmp
= (sizeof(hfs_u16
) + bnode_rsize(bnode
, index
))/2;
297 tmp
= (size
+ sizeof(hfs_u16
))/2;
304 for (; index
<= nrecs
; ++index
) {
305 tmp
= (sizeof(hfs_u16
) + bnode_rsize(bnode
, index
))/2;
312 /* couldn't find the split point! */
313 hfs_bnode_relse(&right
);
320 elem
->record
-= index
-1;
322 hfs_bnode_shift_right(bnode
, right
.bn
, index
);
331 * Inserts a record in a tree known to have enough room, even if the
332 * insertion requires the splitting of nodes.
334 * struct hfs_brec *brec: partial path to the node to insert in
335 * const struct hfs_bkey *key: key for the new record
336 * const void *data: data for the new record
337 * hfs_u8 keysize: size of the key
338 * hfs_u16 datasize: size of the data
339 * int reserve: number of nodes reserved in case of splits
340 * Output Variable(s):
343 * int: 0 on success, error code on failure
345 * 'brec' points to a valid (struct hfs_brec) corresponding to a
346 * record in a leaf node, after which a record is to be inserted,
347 * or to "record 0" of the leaf node if the record is to be inserted
348 * before all existing records in the node. The (struct hfs_brec)
349 * includes all ancestors of the leaf node that are needed to
350 * complete the insertion including the parents of any nodes that
352 * 'key' points to a valid (struct hfs_bkey) which is appropriate
353 * to this tree, and which belongs at the insertion point.
354 * 'data' points data appropriate for the indicated node.
355 * 'keysize' gives the size in bytes of the key.
356 * 'datasize' gives the size in bytes of the data.
357 * 'reserve' gives the number of nodes that have been reserved in the
358 * tree to allow for splitting of nodes.
360 * All 'reserve'd nodes have been either used or released.
362 * On success the key and data have been inserted at the indicated
363 * location in the tree, all appropriate fields of the in-core data
364 * structures have been changed and updated versions of the on-disk
365 * data structures have been scheduled for write-back to disk.
366 * On failure the B*-tree is probably invalid both on disk and in-core.
368 * XXX: Some attempt at repair might be made in the event of failure,
369 * or the fs should be remounted read-only so things don't get worse.
371 static int binsert(struct hfs_brec
*brec
, const struct hfs_bkey
*key
,
372 const void *data
, hfs_u8 keysize
, hfs_u16 datasize
,
375 struct hfs_bnode_ref left
, right
, other
;
376 struct hfs_btree
*tree
= brec
->tree
;
377 struct hfs_belem
*belem
= brec
->bottom
;
378 int tmpsize
= 1 + tree
->bthKeyLen
;
379 struct hfs_bkey
*tmpkey
= hfs_malloc(tmpsize
);
382 while ((belem
>= brec
->top
) && (belem
->flags
& HFS_BPATH_OVERFLOW
)) {
384 if (left
.bn
->ndFLink
&&
385 hfs_bnode_in_brec(left
.bn
->ndFLink
, brec
)) {
386 hfs_warn("hfs_binsert: corrupt btree\n");
387 tree
->reserved
-= reserve
;
388 hfs_free(tmpkey
, tmpsize
);
392 right
= split(belem
, ROUND(keysize
+1) + ROUND(datasize
));
396 hfs_warn("hfs_binsert: unable to split node!\n");
397 tree
->reserved
-= reserve
;
398 hfs_free(tmpkey
, tmpsize
);
401 binsert_nonfull(brec
, belem
, key
, data
, keysize
, datasize
);
403 if (belem
->bnr
.bn
== left
.bn
) {
405 if (belem
->record
== 1) {
406 hfs_bnode_update_key(brec
, belem
, left
.bn
, 0);
412 if (left
.bn
->node
== tree
->root
->node
) {
413 add_root(tree
, left
.bn
, right
.bn
);
414 hfs_bnode_relse(&other
);
419 datasize
= sizeof(node
);
420 node
= htonl(right
.bn
->node
);
422 keysize
= tree
->bthKeyLen
;
423 memcpy(tmpkey
, bnode_key(right
.bn
, 1), keysize
+1);
424 hfs_bnode_relse(&other
);
429 if (belem
< brec
->top
) {
430 hfs_warn("hfs_binsert: Missing parent.\n");
431 tree
->reserved
-= reserve
;
432 hfs_free(tmpkey
, tmpsize
);
436 binsert_nonfull(brec
, belem
, key
, data
, keysize
, datasize
);
439 tree
->reserved
-= reserve
;
440 hfs_free(tmpkey
, tmpsize
);
444 /*================ Global functions ================*/
450 * This function inserts a new record into a b-tree.
452 * struct hfs_btree *tree: pointer to the (struct hfs_btree) to insert in
453 * struct hfs_bkey *key: pointer to the (struct hfs_bkey) to insert
454 * void *data: pointer to the data to associate with 'key' in the b-tree
455 * unsigned int datasize: the size of the data
456 * Output Variable(s):
459 * int: 0 on success, error code on failure
461 * 'tree' points to a valid (struct hfs_btree)
462 * 'key' points to a valid (struct hfs_bkey)
463 * 'data' points to valid memory of length 'datasize'
465 * If zero is returned then the record has been inserted in the
466 * indicated location updating all in-core data structures and
467 * scheduling all on-disk data structures for write-back.
469 int hfs_binsert(struct hfs_btree
*tree
, const struct hfs_bkey
*key
,
470 const void *data
, hfs_u16 datasize
)
472 struct hfs_brec brec
;
473 struct hfs_belem
*belem
;
474 int err
, reserve
, retval
;
477 if (!tree
|| (tree
->magic
!= HFS_BTREE_MAGIC
) || !key
|| !data
) {
478 hfs_warn("hfs_binsert: invalid arguments.\n");
482 if (key
->KeyLen
> tree
->bthKeyLen
) {
483 hfs_warn("hfs_binsert: oversized key\n");
488 if (!tree
->bthNRecs
) {
489 /* create the root bnode */
490 add_root(tree
, NULL
, NULL
);
491 if (!hfs_brec_init(&brec
, tree
, HFS_BFIND_INSERT
)) {
492 hfs_warn("hfs_binsert: failed to create root.\n");
496 err
= hfs_bfind(&brec
, tree
, key
, HFS_BFIND_INSERT
);
498 hfs_warn("hfs_binsert: hfs_brec_find failed.\n");
500 } else if (err
== 0) {
501 hfs_brec_relse(&brec
, NULL
);
506 keysize
= key
->KeyLen
;
507 datasize
= ROUND(datasize
);
510 if (bnode_freespace(belem
->bnr
.bn
) <
511 (sizeof(hfs_u16
) + ROUND(keysize
+1) + datasize
)) {
512 belem
->flags
|= HFS_BPATH_OVERFLOW
;
514 if (belem
->record
== 0) {
515 belem
->flags
|= HFS_BPATH_FIRST
;
519 hfs_brec_lock(&brec
, brec
.bottom
);
522 reserve
= brec
.bottom
- brec
.top
;
526 /* make certain we have enough nodes to proceed */
527 if ((tree
->bthFree
- tree
->reserved
) < reserve
) {
528 hfs_brec_relse(&brec
, NULL
);
529 hfs_btree_lock(tree
);
530 if ((tree
->bthFree
- tree
->reserved
) < reserve
) {
531 hfs_btree_extend(tree
);
533 hfs_btree_unlock(tree
);
534 if ((tree
->bthFree
- tree
->reserved
) < reserve
) {
540 tree
->reserved
+= reserve
;
541 hfs_brec_lock(&brec
, NULL
);
544 retval
= binsert(&brec
, key
, data
, keysize
, datasize
, reserve
);
545 hfs_brec_relse(&brec
, NULL
);