3 #include "kerncompat.h"
4 #include "radix-tree.h"
7 #include "print-tree.h"
9 static int split_node(struct ctree_root
*root
, struct ctree_path
*path
,
11 static int split_leaf(struct ctree_root
*root
, struct ctree_path
*path
,
13 static int push_node_left(struct ctree_root
*root
, struct tree_buffer
*dst
,
14 struct tree_buffer
*src
);
15 static int balance_node_right(struct ctree_root
*root
,
16 struct tree_buffer
*dst_buf
,
17 struct tree_buffer
*src_buf
);
18 static int del_ptr(struct ctree_root
*root
, struct ctree_path
*path
, int level
,
21 inline void init_path(struct ctree_path
*p
)
23 memset(p
, 0, sizeof(*p
));
26 void release_path(struct ctree_root
*root
, struct ctree_path
*p
)
29 for (i
= 0; i
< MAX_LEVEL
; i
++) {
32 tree_block_release(root
, p
->nodes
[i
]);
34 memset(p
, 0, sizeof(*p
));
38 * The leaf data grows from end-to-front in the node.
39 * this returns the address of the start of the last item,
40 * which is the stop of the leaf data stack
42 static inline unsigned int leaf_data_end(struct leaf
*leaf
)
44 unsigned int nr
= leaf
->header
.nritems
;
46 return sizeof(leaf
->data
);
47 return leaf
->items
[nr
-1].offset
;
51 * The space between the end of the leaf items and
52 * the start of the leaf data. IOW, how much room
53 * the leaf has left for both items and data
55 int leaf_free_space(struct leaf
*leaf
)
57 int data_end
= leaf_data_end(leaf
);
58 int nritems
= leaf
->header
.nritems
;
59 char *items_end
= (char *)(leaf
->items
+ nritems
+ 1);
60 return (char *)(leaf
->data
+ data_end
) - (char *)items_end
;
64 * compare two keys in a memcmp fashion
66 int comp_keys(struct key
*k1
, struct key
*k2
)
68 if (k1
->objectid
> k2
->objectid
)
70 if (k1
->objectid
< k2
->objectid
)
72 if (k1
->flags
> k2
->flags
)
74 if (k1
->flags
< k2
->flags
)
76 if (k1
->offset
> k2
->offset
)
78 if (k1
->offset
< k2
->offset
)
83 int check_node(struct ctree_path
*path
, int level
)
86 struct node
*parent
= NULL
;
87 struct node
*node
= &path
->nodes
[level
]->node
;
90 if (path
->nodes
[level
+ 1])
91 parent
= &path
->nodes
[level
+ 1]->node
;
92 parent_slot
= path
->slots
[level
+ 1];
93 if (parent
&& node
->header
.nritems
> 0) {
94 struct key
*parent_key
;
95 parent_key
= &parent
->keys
[parent_slot
];
96 BUG_ON(memcmp(parent_key
, node
->keys
, sizeof(struct key
)));
97 BUG_ON(parent
->blockptrs
[parent_slot
] != node
->header
.blocknr
);
99 BUG_ON(node
->header
.nritems
> NODEPTRS_PER_BLOCK
);
100 for (i
= 0; i
< node
->header
.nritems
- 2; i
++) {
101 BUG_ON(comp_keys(&node
->keys
[i
], &node
->keys
[i
+1]) >= 0);
106 int check_leaf(struct ctree_path
*path
, int level
)
109 struct leaf
*leaf
= &path
->nodes
[level
]->leaf
;
110 struct node
*parent
= NULL
;
113 if (path
->nodes
[level
+ 1])
114 parent
= &path
->nodes
[level
+ 1]->node
;
115 parent_slot
= path
->slots
[level
+ 1];
116 if (parent
&& leaf
->header
.nritems
> 0) {
117 struct key
*parent_key
;
118 parent_key
= &parent
->keys
[parent_slot
];
119 BUG_ON(memcmp(parent_key
, &leaf
->items
[0].key
,
120 sizeof(struct key
)));
121 BUG_ON(parent
->blockptrs
[parent_slot
] != leaf
->header
.blocknr
);
123 for (i
= 0; i
< leaf
->header
.nritems
- 2; i
++) {
124 BUG_ON(comp_keys(&leaf
->items
[i
].key
,
125 &leaf
->items
[i
+1].key
) >= 0);
126 BUG_ON(leaf
->items
[i
].offset
!= leaf
->items
[i
+ 1].offset
+
127 leaf
->items
[i
+ 1].size
);
129 BUG_ON(leaf
->items
[i
].offset
+ leaf
->items
[i
].size
!=
133 BUG_ON(leaf_free_space(leaf
) < 0);
137 int check_block(struct ctree_path
*path
, int level
)
140 return check_leaf(path
, level
);
141 return check_node(path
, level
);
145 * search for key in the array p. items p are item_size apart
146 * and there are 'max' items in p
147 * the slot in the array is returned via slot, and it points to
148 * the place where you would insert key if it is not found in
151 * slot may point to max if the key is bigger than all of the keys
153 int generic_bin_search(char *p
, int item_size
, struct key
*key
,
163 mid
= (low
+ high
) / 2;
164 tmp
= (struct key
*)(p
+ mid
* item_size
);
165 ret
= comp_keys(tmp
, key
);
181 * simple bin_search frontend that does the right thing for
184 int bin_search(struct node
*c
, struct key
*key
, int *slot
)
186 if (is_leaf(c
->header
.flags
)) {
187 struct leaf
*l
= (struct leaf
*)c
;
188 return generic_bin_search((void *)l
->items
, sizeof(struct item
),
189 key
, c
->header
.nritems
, slot
);
191 return generic_bin_search((void *)c
->keys
, sizeof(struct key
),
192 key
, c
->header
.nritems
, slot
);
197 struct tree_buffer
*read_node_slot(struct ctree_root
*root
,
198 struct tree_buffer
*parent_buf
,
201 struct node
*node
= &parent_buf
->node
;
204 if (slot
>= node
->header
.nritems
)
206 return read_tree_block(root
, node
->blockptrs
[slot
]);
209 static int balance_level(struct ctree_root
*root
, struct ctree_path
*path
,
212 struct tree_buffer
*right_buf
;
213 struct tree_buffer
*mid_buf
;
214 struct tree_buffer
*left_buf
;
215 struct tree_buffer
*parent_buf
= NULL
;
216 struct node
*right
= NULL
;
218 struct node
*left
= NULL
;
219 struct node
*parent
= NULL
;
223 int orig_slot
= path
->slots
[level
];
229 mid_buf
= path
->nodes
[level
];
230 mid
= &mid_buf
->node
;
231 orig_ptr
= mid
->blockptrs
[orig_slot
];
233 if (level
< MAX_LEVEL
- 1)
234 parent_buf
= path
->nodes
[level
+ 1];
235 pslot
= path
->slots
[level
+ 1];
238 struct tree_buffer
*child
;
239 u64 blocknr
= mid_buf
->blocknr
;
241 if (mid
->header
.nritems
!= 1)
244 /* promote the child to a root */
245 child
= read_node_slot(root
, mid_buf
, 0);
248 path
->nodes
[level
] = NULL
;
249 /* once for the path */
250 tree_block_release(root
, mid_buf
);
251 /* once for the root ptr */
252 tree_block_release(root
, mid_buf
);
253 return free_extent(root
, blocknr
, 1);
255 parent
= &parent_buf
->node
;
257 if (mid
->header
.nritems
> NODEPTRS_PER_BLOCK
/ 4)
260 left_buf
= read_node_slot(root
, parent_buf
, pslot
- 1);
261 right_buf
= read_node_slot(root
, parent_buf
, pslot
+ 1);
263 /* first, try to make some room in the middle buffer */
265 left
= &left_buf
->node
;
266 orig_slot
+= left
->header
.nritems
;
267 wret
= push_node_left(root
, left_buf
, mid_buf
);
273 * then try to empty the right most buffer into the middle
276 right
= &right_buf
->node
;
277 wret
= push_node_left(root
, mid_buf
, right_buf
);
280 if (right
->header
.nritems
== 0) {
281 u64 blocknr
= right_buf
->blocknr
;
282 tree_block_release(root
, right_buf
);
285 wret
= del_ptr(root
, path
, level
+ 1, pslot
+ 1);
288 wret
= free_extent(root
, blocknr
, 1);
292 memcpy(parent
->keys
+ pslot
+ 1, right
->keys
,
294 wret
= write_tree_block(root
, parent_buf
);
299 if (mid
->header
.nritems
== 1) {
301 * we're not allowed to leave a node with one item in the
302 * tree during a delete. A deletion from lower in the tree
303 * could try to delete the only pointer in this node.
304 * So, pull some keys from the left.
305 * There has to be a left pointer at this point because
306 * otherwise we would have pulled some pointers from the
310 wret
= balance_node_right(root
, mid_buf
, left_buf
);
315 if (mid
->header
.nritems
== 0) {
316 /* we've managed to empty the middle node, drop it */
317 u64 blocknr
= mid_buf
->blocknr
;
318 tree_block_release(root
, mid_buf
);
321 wret
= del_ptr(root
, path
, level
+ 1, pslot
);
324 wret
= free_extent(root
, blocknr
, 1);
328 /* update the parent key to reflect our changes */
329 memcpy(parent
->keys
+ pslot
, mid
->keys
, sizeof(struct key
));
330 wret
= write_tree_block(root
, parent_buf
);
335 /* update the path */
337 if (left
->header
.nritems
> orig_slot
) {
338 left_buf
->count
++; // released below
339 path
->nodes
[level
] = left_buf
;
340 path
->slots
[level
+ 1] -= 1;
341 path
->slots
[level
] = orig_slot
;
343 tree_block_release(root
, mid_buf
);
345 orig_slot
-= left
->header
.nritems
;
346 path
->slots
[level
] = orig_slot
;
349 /* double check we haven't messed things up */
350 check_block(path
, level
);
351 if (orig_ptr
!= path
->nodes
[level
]->node
.blockptrs
[path
->slots
[level
]])
355 tree_block_release(root
, right_buf
);
357 tree_block_release(root
, left_buf
);
362 * look for key in the tree. path is filled in with nodes along the way
363 * if key is found, we return zero and you can find the item in the leaf
364 * level of the path (level 0)
366 * If the key isn't found, the path points to the slot where it should
367 * be inserted, and 1 is returned. If there are other errors during the
368 * search a negative error number is returned.
370 * if ins_len > 0, nodes and leaves will be split as we walk down the
371 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
374 int search_slot(struct ctree_root
*root
, struct key
*key
,
375 struct ctree_path
*p
, int ins_len
)
377 struct tree_buffer
*b
;
388 level
= node_level(c
->header
.flags
);
390 ret
= check_block(p
, level
);
393 ret
= bin_search(c
, key
, &slot
);
394 if (!is_leaf(c
->header
.flags
)) {
397 p
->slots
[level
] = slot
;
399 c
->header
.nritems
== NODEPTRS_PER_BLOCK
) {
400 int sret
= split_node(root
, p
, level
);
406 slot
= p
->slots
[level
];
407 } else if (ins_len
< 0) {
408 int sret
= balance_level(root
, p
, level
);
415 slot
= p
->slots
[level
];
416 BUG_ON(c
->header
.nritems
== 1);
418 b
= read_tree_block(root
, c
->blockptrs
[slot
]);
420 struct leaf
*l
= (struct leaf
*)c
;
421 p
->slots
[level
] = slot
;
422 if (ins_len
> 0 && leaf_free_space(l
) <
423 sizeof(struct item
) + ins_len
) {
424 int sret
= split_leaf(root
, p
, ins_len
);
429 BUG_ON(root
->node
->count
== 1);
433 BUG_ON(root
->node
->count
== 1);
438 * adjust the pointers going up the tree, starting at level
439 * making sure the right key of each node is points to 'key'.
440 * This is used after shifting pointers to the left, so it stops
441 * fixing up pointers when a given leaf/node is not in slot 0 of the
444 * If this fails to write a tree block, it returns -1, but continues
445 * fixing up the blocks in ram so the tree is consistent.
447 static int fixup_low_keys(struct ctree_root
*root
,
448 struct ctree_path
*path
, struct key
*key
,
454 for (i
= level
; i
< MAX_LEVEL
; i
++) {
456 int tslot
= path
->slots
[i
];
459 t
= &path
->nodes
[i
]->node
;
460 memcpy(t
->keys
+ tslot
, key
, sizeof(*key
));
461 wret
= write_tree_block(root
, path
->nodes
[i
]);
471 * try to push data from one node into the next node left in the
474 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
475 * error, and > 0 if there was no room in the left hand block.
477 static int push_node_left(struct ctree_root
*root
, struct tree_buffer
*dst_buf
,
478 struct tree_buffer
*src_buf
)
480 struct node
*src
= &src_buf
->node
;
481 struct node
*dst
= &dst_buf
->node
;
488 src_nritems
= src
->header
.nritems
;
489 dst_nritems
= dst
->header
.nritems
;
490 push_items
= NODEPTRS_PER_BLOCK
- dst_nritems
;
491 if (push_items
<= 0) {
495 if (src_nritems
< push_items
)
496 push_items
= src_nritems
;
498 memcpy(dst
->keys
+ dst_nritems
, src
->keys
,
499 push_items
* sizeof(struct key
));
500 memcpy(dst
->blockptrs
+ dst_nritems
, src
->blockptrs
,
501 push_items
* sizeof(u64
));
502 if (push_items
< src_nritems
) {
503 memmove(src
->keys
, src
->keys
+ push_items
,
504 (src_nritems
- push_items
) * sizeof(struct key
));
505 memmove(src
->blockptrs
, src
->blockptrs
+ push_items
,
506 (src_nritems
- push_items
) * sizeof(u64
));
508 src
->header
.nritems
-= push_items
;
509 dst
->header
.nritems
+= push_items
;
511 wret
= write_tree_block(root
, src_buf
);
515 wret
= write_tree_block(root
, dst_buf
);
522 * try to push data from one node into the next node right in the
525 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
526 * error, and > 0 if there was no room in the right hand block.
528 * this will only push up to 1/2 the contents of the left node over
530 static int balance_node_right(struct ctree_root
*root
,
531 struct tree_buffer
*dst_buf
,
532 struct tree_buffer
*src_buf
)
534 struct node
*src
= &src_buf
->node
;
535 struct node
*dst
= &dst_buf
->node
;
543 src_nritems
= src
->header
.nritems
;
544 dst_nritems
= dst
->header
.nritems
;
545 push_items
= NODEPTRS_PER_BLOCK
- dst_nritems
;
546 if (push_items
<= 0) {
550 max_push
= src_nritems
/ 2 + 1;
551 /* don't try to empty the node */
552 if (max_push
> src_nritems
)
554 if (max_push
< push_items
)
555 push_items
= max_push
;
557 memmove(dst
->keys
+ push_items
, dst
->keys
,
558 dst_nritems
* sizeof(struct key
));
559 memmove(dst
->blockptrs
+ push_items
, dst
->blockptrs
,
560 dst_nritems
* sizeof(u64
));
561 memcpy(dst
->keys
, src
->keys
+ src_nritems
- push_items
,
562 push_items
* sizeof(struct key
));
563 memcpy(dst
->blockptrs
, src
->blockptrs
+ src_nritems
- push_items
,
564 push_items
* sizeof(u64
));
566 src
->header
.nritems
-= push_items
;
567 dst
->header
.nritems
+= push_items
;
569 wret
= write_tree_block(root
, src_buf
);
573 wret
= write_tree_block(root
, dst_buf
);
580 * helper function to insert a new root level in the tree.
581 * A new node is allocated, and a single item is inserted to
582 * point to the existing root
584 * returns zero on success or < 0 on failure.
586 static int insert_new_root(struct ctree_root
*root
,
587 struct ctree_path
*path
, int level
)
589 struct tree_buffer
*t
;
592 struct key
*lower_key
;
594 BUG_ON(path
->nodes
[level
]);
595 BUG_ON(path
->nodes
[level
-1] != root
->node
);
597 t
= alloc_free_block(root
);
599 memset(c
, 0, sizeof(c
));
600 c
->header
.nritems
= 1;
601 c
->header
.flags
= node_level(level
);
602 c
->header
.blocknr
= t
->blocknr
;
603 c
->header
.parentid
= root
->node
->node
.header
.parentid
;
604 lower
= &path
->nodes
[level
-1]->node
;
605 if (is_leaf(lower
->header
.flags
))
606 lower_key
= &((struct leaf
*)lower
)->items
[0].key
;
608 lower_key
= lower
->keys
;
609 memcpy(c
->keys
, lower_key
, sizeof(struct key
));
610 c
->blockptrs
[0] = path
->nodes
[level
-1]->blocknr
;
611 /* the super has an extra ref to root->node */
612 tree_block_release(root
, root
->node
);
615 write_tree_block(root
, t
);
616 path
->nodes
[level
] = t
;
617 path
->slots
[level
] = 0;
622 * worker function to insert a single pointer in a node.
623 * the node should have enough room for the pointer already
625 * slot and level indicate where you want the key to go, and
626 * blocknr is the block the key points to.
628 * returns zero on success and < 0 on any error
630 static int insert_ptr(struct ctree_root
*root
,
631 struct ctree_path
*path
, struct key
*key
,
632 u64 blocknr
, int slot
, int level
)
637 BUG_ON(!path
->nodes
[level
]);
638 lower
= &path
->nodes
[level
]->node
;
639 nritems
= lower
->header
.nritems
;
642 if (nritems
== NODEPTRS_PER_BLOCK
)
644 if (slot
!= nritems
) {
645 memmove(lower
->keys
+ slot
+ 1, lower
->keys
+ slot
,
646 (nritems
- slot
) * sizeof(struct key
));
647 memmove(lower
->blockptrs
+ slot
+ 1, lower
->blockptrs
+ slot
,
648 (nritems
- slot
) * sizeof(u64
));
650 memcpy(lower
->keys
+ slot
, key
, sizeof(struct key
));
651 lower
->blockptrs
[slot
] = blocknr
;
652 lower
->header
.nritems
++;
653 if (lower
->keys
[1].objectid
== 0)
655 write_tree_block(root
, path
->nodes
[level
]);
660 * split the node at the specified level in path in two.
661 * The path is corrected to point to the appropriate node after the split
663 * Before splitting this tries to make some room in the node by pushing
664 * left and right, if either one works, it returns right away.
666 * returns 0 on success and < 0 on failure
668 static int split_node(struct ctree_root
*root
, struct ctree_path
*path
,
671 struct tree_buffer
*t
;
673 struct tree_buffer
*split_buffer
;
679 t
= path
->nodes
[level
];
681 if (t
== root
->node
) {
682 /* trying to split the root, lets make a new one */
683 ret
= insert_new_root(root
, path
, level
+ 1);
687 split_buffer
= alloc_free_block(root
);
688 split
= &split_buffer
->node
;
689 split
->header
.flags
= c
->header
.flags
;
690 split
->header
.blocknr
= split_buffer
->blocknr
;
691 split
->header
.parentid
= root
->node
->node
.header
.parentid
;
692 mid
= (c
->header
.nritems
+ 1) / 2;
693 memcpy(split
->keys
, c
->keys
+ mid
,
694 (c
->header
.nritems
- mid
) * sizeof(struct key
));
695 memcpy(split
->blockptrs
, c
->blockptrs
+ mid
,
696 (c
->header
.nritems
- mid
) * sizeof(u64
));
697 split
->header
.nritems
= c
->header
.nritems
- mid
;
698 c
->header
.nritems
= mid
;
701 wret
= write_tree_block(root
, t
);
704 wret
= write_tree_block(root
, split_buffer
);
707 wret
= insert_ptr(root
, path
, split
->keys
, split_buffer
->blocknr
,
708 path
->slots
[level
+ 1] + 1, level
+ 1);
712 if (path
->slots
[level
] >= mid
) {
713 path
->slots
[level
] -= mid
;
714 tree_block_release(root
, t
);
715 path
->nodes
[level
] = split_buffer
;
716 path
->slots
[level
+ 1] += 1;
718 tree_block_release(root
, split_buffer
);
724 * how many bytes are required to store the items in a leaf. start
725 * and nr indicate which items in the leaf to check. This totals up the
726 * space used both by the item structs and the item data
728 static int leaf_space_used(struct leaf
*l
, int start
, int nr
)
731 int end
= start
+ nr
- 1;
735 data_len
= l
->items
[start
].offset
+ l
->items
[start
].size
;
736 data_len
= data_len
- l
->items
[end
].offset
;
737 data_len
+= sizeof(struct item
) * nr
;
742 * push some data in the path leaf to the right, trying to free up at
743 * least data_size bytes. returns zero if the push worked, nonzero otherwise
745 * returns 1 if the push failed because the other node didn't have enough
746 * room, 0 if everything worked out and < 0 if there were major errors.
748 static int push_leaf_right(struct ctree_root
*root
, struct ctree_path
*path
,
751 struct tree_buffer
*left_buf
= path
->nodes
[0];
752 struct leaf
*left
= &left_buf
->leaf
;
754 struct tree_buffer
*right_buf
;
755 struct tree_buffer
*upper
;
763 slot
= path
->slots
[1];
764 if (!path
->nodes
[1]) {
767 upper
= path
->nodes
[1];
768 if (slot
>= upper
->node
.header
.nritems
- 1) {
771 right_buf
= read_tree_block(root
, upper
->node
.blockptrs
[slot
+ 1]);
772 right
= &right_buf
->leaf
;
773 free_space
= leaf_free_space(right
);
774 if (free_space
< data_size
+ sizeof(struct item
)) {
775 tree_block_release(root
, right_buf
);
778 for (i
= left
->header
.nritems
- 1; i
>= 0; i
--) {
779 item
= left
->items
+ i
;
780 if (path
->slots
[0] == i
)
781 push_space
+= data_size
+ sizeof(*item
);
782 if (item
->size
+ sizeof(*item
) + push_space
> free_space
)
785 push_space
+= item
->size
+ sizeof(*item
);
787 if (push_items
== 0) {
788 tree_block_release(root
, right_buf
);
791 /* push left to right */
792 push_space
= left
->items
[left
->header
.nritems
- push_items
].offset
+
793 left
->items
[left
->header
.nritems
- push_items
].size
;
794 push_space
-= leaf_data_end(left
);
795 /* make room in the right data area */
796 memmove(right
->data
+ leaf_data_end(right
) - push_space
,
797 right
->data
+ leaf_data_end(right
),
798 LEAF_DATA_SIZE
- leaf_data_end(right
));
799 /* copy from the left data area */
800 memcpy(right
->data
+ LEAF_DATA_SIZE
- push_space
,
801 left
->data
+ leaf_data_end(left
),
803 memmove(right
->items
+ push_items
, right
->items
,
804 right
->header
.nritems
* sizeof(struct item
));
805 /* copy the items from left to right */
806 memcpy(right
->items
, left
->items
+ left
->header
.nritems
- push_items
,
807 push_items
* sizeof(struct item
));
809 /* update the item pointers */
810 right
->header
.nritems
+= push_items
;
811 push_space
= LEAF_DATA_SIZE
;
812 for (i
= 0; i
< right
->header
.nritems
; i
++) {
813 right
->items
[i
].offset
= push_space
- right
->items
[i
].size
;
814 push_space
= right
->items
[i
].offset
;
816 left
->header
.nritems
-= push_items
;
818 write_tree_block(root
, left_buf
);
819 write_tree_block(root
, right_buf
);
820 memcpy(upper
->node
.keys
+ slot
+ 1,
821 &right
->items
[0].key
, sizeof(struct key
));
822 write_tree_block(root
, upper
);
823 /* then fixup the leaf pointer in the path */
824 if (path
->slots
[0] >= left
->header
.nritems
) {
825 path
->slots
[0] -= left
->header
.nritems
;
826 tree_block_release(root
, path
->nodes
[0]);
827 path
->nodes
[0] = right_buf
;
830 tree_block_release(root
, right_buf
);
835 * push some data in the path leaf to the left, trying to free up at
836 * least data_size bytes. returns zero if the push worked, nonzero otherwise
838 static int push_leaf_left(struct ctree_root
*root
, struct ctree_path
*path
,
841 struct tree_buffer
*right_buf
= path
->nodes
[0];
842 struct leaf
*right
= &right_buf
->leaf
;
843 struct tree_buffer
*t
;
851 int old_left_nritems
;
855 slot
= path
->slots
[1];
859 if (!path
->nodes
[1]) {
862 t
= read_tree_block(root
, path
->nodes
[1]->node
.blockptrs
[slot
- 1]);
864 free_space
= leaf_free_space(left
);
865 if (free_space
< data_size
+ sizeof(struct item
)) {
866 tree_block_release(root
, t
);
869 for (i
= 0; i
< right
->header
.nritems
; i
++) {
870 item
= right
->items
+ i
;
871 if (path
->slots
[0] == i
)
872 push_space
+= data_size
+ sizeof(*item
);
873 if (item
->size
+ sizeof(*item
) + push_space
> free_space
)
876 push_space
+= item
->size
+ sizeof(*item
);
878 if (push_items
== 0) {
879 tree_block_release(root
, t
);
882 /* push data from right to left */
883 memcpy(left
->items
+ left
->header
.nritems
,
884 right
->items
, push_items
* sizeof(struct item
));
885 push_space
= LEAF_DATA_SIZE
- right
->items
[push_items
-1].offset
;
886 memcpy(left
->data
+ leaf_data_end(left
) - push_space
,
887 right
->data
+ right
->items
[push_items
- 1].offset
,
889 old_left_nritems
= left
->header
.nritems
;
890 BUG_ON(old_left_nritems
< 0);
892 for(i
= old_left_nritems
; i
< old_left_nritems
+ push_items
; i
++) {
893 left
->items
[i
].offset
-= LEAF_DATA_SIZE
-
894 left
->items
[old_left_nritems
-1].offset
;
896 left
->header
.nritems
+= push_items
;
898 /* fixup right node */
899 push_space
= right
->items
[push_items
-1].offset
- leaf_data_end(right
);
900 memmove(right
->data
+ LEAF_DATA_SIZE
- push_space
, right
->data
+
901 leaf_data_end(right
), push_space
);
902 memmove(right
->items
, right
->items
+ push_items
,
903 (right
->header
.nritems
- push_items
) * sizeof(struct item
));
904 right
->header
.nritems
-= push_items
;
905 push_space
= LEAF_DATA_SIZE
;
907 for (i
= 0; i
< right
->header
.nritems
; i
++) {
908 right
->items
[i
].offset
= push_space
- right
->items
[i
].size
;
909 push_space
= right
->items
[i
].offset
;
912 wret
= write_tree_block(root
, t
);
915 wret
= write_tree_block(root
, right_buf
);
919 wret
= fixup_low_keys(root
, path
, &right
->items
[0].key
, 1);
923 /* then fixup the leaf pointer in the path */
924 if (path
->slots
[0] < push_items
) {
925 path
->slots
[0] += old_left_nritems
;
926 tree_block_release(root
, path
->nodes
[0]);
930 tree_block_release(root
, t
);
931 path
->slots
[0] -= push_items
;
933 BUG_ON(path
->slots
[0] < 0);
938 * split the path's leaf in two, making sure there is at least data_size
939 * available for the resulting leaf level of the path.
941 * returns 0 if all went well and < 0 on failure.
943 static int split_leaf(struct ctree_root
*root
, struct ctree_path
*path
,
946 struct tree_buffer
*l_buf
;
952 struct tree_buffer
*right_buffer
;
953 int space_needed
= data_size
+ sizeof(struct item
);
960 wret
= push_leaf_left(root
, path
, data_size
);
964 wret
= push_leaf_right(root
, path
, data_size
);
968 l_buf
= path
->nodes
[0];
971 /* did the pushes work? */
972 if (leaf_free_space(l
) >= sizeof(struct item
) + data_size
)
975 if (!path
->nodes
[1]) {
976 ret
= insert_new_root(root
, path
, 1);
980 slot
= path
->slots
[0];
981 nritems
= l
->header
.nritems
;
982 mid
= (nritems
+ 1)/ 2;
984 right_buffer
= alloc_free_block(root
);
985 BUG_ON(!right_buffer
);
986 BUG_ON(mid
== nritems
);
987 right
= &right_buffer
->leaf
;
988 memset(right
, 0, sizeof(*right
));
990 /* FIXME, just alloc a new leaf here */
991 if (leaf_space_used(l
, mid
, nritems
- mid
) + space_needed
>
995 /* FIXME, just alloc a new leaf here */
996 if (leaf_space_used(l
, 0, mid
+ 1) + space_needed
>
1000 right
->header
.nritems
= nritems
- mid
;
1001 right
->header
.blocknr
= right_buffer
->blocknr
;
1002 right
->header
.flags
= node_level(0);
1003 right
->header
.parentid
= root
->node
->node
.header
.parentid
;
1004 data_copy_size
= l
->items
[mid
].offset
+ l
->items
[mid
].size
-
1006 memcpy(right
->items
, l
->items
+ mid
,
1007 (nritems
- mid
) * sizeof(struct item
));
1008 memcpy(right
->data
+ LEAF_DATA_SIZE
- data_copy_size
,
1009 l
->data
+ leaf_data_end(l
), data_copy_size
);
1010 rt_data_off
= LEAF_DATA_SIZE
-
1011 (l
->items
[mid
].offset
+ l
->items
[mid
].size
);
1013 for (i
= 0; i
< right
->header
.nritems
; i
++)
1014 right
->items
[i
].offset
+= rt_data_off
;
1016 l
->header
.nritems
= mid
;
1018 wret
= insert_ptr(root
, path
, &right
->items
[0].key
,
1019 right_buffer
->blocknr
, path
->slots
[1] + 1, 1);
1022 wret
= write_tree_block(root
, right_buffer
);
1025 wret
= write_tree_block(root
, l_buf
);
1029 BUG_ON(path
->slots
[0] != slot
);
1031 tree_block_release(root
, path
->nodes
[0]);
1032 path
->nodes
[0] = right_buffer
;
1033 path
->slots
[0] -= mid
;
1034 path
->slots
[1] += 1;
1036 tree_block_release(root
, right_buffer
);
1037 BUG_ON(path
->slots
[0] < 0);
1042 * Given a key and some data, insert an item into the tree.
1043 * This does all the path init required, making room in the tree if needed.
1045 int insert_item(struct ctree_root
*root
, struct key
*key
,
1046 void *data
, int data_size
)
1053 struct tree_buffer
*leaf_buf
;
1054 unsigned int nritems
;
1055 unsigned int data_end
;
1056 struct ctree_path path
;
1058 /* create a root if there isn't one */
1062 ret
= search_slot(root
, key
, &path
, data_size
);
1064 release_path(root
, &path
);
1068 release_path(root
, &path
);
1072 slot_orig
= path
.slots
[0];
1073 leaf_buf
= path
.nodes
[0];
1074 leaf
= &leaf_buf
->leaf
;
1076 nritems
= leaf
->header
.nritems
;
1077 data_end
= leaf_data_end(leaf
);
1079 if (leaf_free_space(leaf
) < sizeof(struct item
) + data_size
)
1082 slot
= path
.slots
[0];
1084 if (slot
!= nritems
) {
1086 unsigned int old_data
= leaf
->items
[slot
].offset
+
1087 leaf
->items
[slot
].size
;
1090 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1092 /* first correct the data pointers */
1093 for (i
= slot
; i
< nritems
; i
++)
1094 leaf
->items
[i
].offset
-= data_size
;
1096 /* shift the items */
1097 memmove(leaf
->items
+ slot
+ 1, leaf
->items
+ slot
,
1098 (nritems
- slot
) * sizeof(struct item
));
1100 /* shift the data */
1101 memmove(leaf
->data
+ data_end
- data_size
, leaf
->data
+
1102 data_end
, old_data
- data_end
);
1103 data_end
= old_data
;
1105 /* copy the new data in */
1106 memcpy(&leaf
->items
[slot
].key
, key
, sizeof(struct key
));
1107 leaf
->items
[slot
].offset
= data_end
- data_size
;
1108 leaf
->items
[slot
].size
= data_size
;
1109 memcpy(leaf
->data
+ data_end
- data_size
, data
, data_size
);
1110 leaf
->header
.nritems
+= 1;
1114 ret
= fixup_low_keys(root
, &path
, key
, 1);
1116 wret
= write_tree_block(root
, leaf_buf
);
1120 if (leaf_free_space(leaf
) < 0)
1122 check_leaf(&path
, 0);
1123 release_path(root
, &path
);
1128 * delete the pointer from a given node.
1130 * If the delete empties a node, the node is removed from the tree,
1131 * continuing all the way the root if required. The root is converted into
1132 * a leaf if all the nodes are emptied.
1134 static int del_ptr(struct ctree_root
*root
, struct ctree_path
*path
, int level
,
1138 struct tree_buffer
*parent
= path
->nodes
[level
];
1143 node
= &parent
->node
;
1144 nritems
= node
->header
.nritems
;
1146 if (slot
!= nritems
-1) {
1147 memmove(node
->keys
+ slot
, node
->keys
+ slot
+ 1,
1148 sizeof(struct key
) * (nritems
- slot
- 1));
1149 memmove(node
->blockptrs
+ slot
,
1150 node
->blockptrs
+ slot
+ 1,
1151 sizeof(u64
) * (nritems
- slot
- 1));
1153 node
->header
.nritems
--;
1154 if (node
->header
.nritems
== 0 && parent
== root
->node
) {
1155 BUG_ON(node_level(root
->node
->node
.header
.flags
) != 1);
1156 /* just turn the root into a leaf and break */
1157 root
->node
->node
.header
.flags
= node_level(0);
1158 } else if (slot
== 0) {
1159 wret
= fixup_low_keys(root
, path
, node
->keys
, level
+ 1);
1163 wret
= write_tree_block(root
, parent
);
1170 * delete the item at the leaf level in path. If that empties
1171 * the leaf, remove it from the tree
1173 int del_item(struct ctree_root
*root
, struct ctree_path
*path
)
1177 struct tree_buffer
*leaf_buf
;
1183 leaf_buf
= path
->nodes
[0];
1184 leaf
= &leaf_buf
->leaf
;
1185 slot
= path
->slots
[0];
1186 doff
= leaf
->items
[slot
].offset
;
1187 dsize
= leaf
->items
[slot
].size
;
1189 if (slot
!= leaf
->header
.nritems
- 1) {
1191 int data_end
= leaf_data_end(leaf
);
1192 memmove(leaf
->data
+ data_end
+ dsize
,
1193 leaf
->data
+ data_end
,
1195 for (i
= slot
+ 1; i
< leaf
->header
.nritems
; i
++)
1196 leaf
->items
[i
].offset
+= dsize
;
1197 memmove(leaf
->items
+ slot
, leaf
->items
+ slot
+ 1,
1198 sizeof(struct item
) *
1199 (leaf
->header
.nritems
- slot
- 1));
1201 leaf
->header
.nritems
-= 1;
1202 /* delete the leaf if we've emptied it */
1203 if (leaf
->header
.nritems
== 0) {
1204 if (leaf_buf
== root
->node
) {
1205 leaf
->header
.flags
= node_level(0);
1206 write_tree_block(root
, leaf_buf
);
1208 wret
= del_ptr(root
, path
, 1, path
->slots
[1]);
1211 wret
= free_extent(root
, leaf_buf
->blocknr
, 1);
1216 int used
= leaf_space_used(leaf
, 0, leaf
->header
.nritems
);
1218 wret
= fixup_low_keys(root
, path
,
1219 &leaf
->items
[0].key
, 1);
1223 wret
= write_tree_block(root
, leaf_buf
);
1227 /* delete the leaf if it is mostly empty */
1228 if (used
< LEAF_DATA_SIZE
/ 3) {
1229 /* push_leaf_left fixes the path.
1230 * make sure the path still points to our leaf
1231 * for possible call to del_ptr below
1233 slot
= path
->slots
[1];
1235 wret
= push_leaf_left(root
, path
, 1);
1238 if (leaf
->header
.nritems
) {
1239 wret
= push_leaf_right(root
, path
, 1);
1243 if (leaf
->header
.nritems
== 0) {
1244 u64 blocknr
= leaf_buf
->blocknr
;
1245 wret
= del_ptr(root
, path
, 1, slot
);
1248 tree_block_release(root
, leaf_buf
);
1249 wret
= free_extent(root
, blocknr
, 1);
1253 tree_block_release(root
, leaf_buf
);
1261 * walk up the tree as far as required to find the next leaf.
1262 * returns 0 if it found something or 1 if there are no greater leaves.
1263 * returns < 0 on io errors.
1265 int next_leaf(struct ctree_root
*root
, struct ctree_path
*path
)
1270 struct tree_buffer
*c
;
1271 struct tree_buffer
*next
= NULL
;
1273 while(level
< MAX_LEVEL
) {
1274 if (!path
->nodes
[level
])
1276 slot
= path
->slots
[level
] + 1;
1277 c
= path
->nodes
[level
];
1278 if (slot
>= c
->node
.header
.nritems
) {
1282 blocknr
= c
->node
.blockptrs
[slot
];
1284 tree_block_release(root
, next
);
1285 next
= read_tree_block(root
, blocknr
);
1288 path
->slots
[level
] = slot
;
1291 c
= path
->nodes
[level
];
1292 tree_block_release(root
, c
);
1293 path
->nodes
[level
] = next
;
1294 path
->slots
[level
] = 0;
1297 next
= read_tree_block(root
, next
->node
.blockptrs
[0]);