Add generation numbers to block pointers
[btrfs-progs-unstable/devel.git] / ctree.c
blob71796067cbbff6fc631819db75f725bc6050dd6c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "ctree.h"
23 #include "disk-io.h"
24 #include "transaction.h"
25 #include "print-tree.h"
27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, struct btrfs_path *path, int level);
29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
30 *root, struct btrfs_key *ins_key,
31 struct btrfs_path *path, int data_size, int extend);
32 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
33 *root, struct btrfs_buffer *dst, struct btrfs_buffer
34 *src);
35 static int balance_node_right(struct btrfs_trans_handle *trans, struct
36 btrfs_root *root, struct btrfs_buffer *dst_buf,
37 struct btrfs_buffer *src_buf);
38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot);
41 inline void btrfs_init_path(struct btrfs_path *p)
43 memset(p, 0, sizeof(*p));
46 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
48 int i;
49 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
50 if (!p->nodes[i])
51 break;
52 btrfs_block_release(root, p->nodes[i]);
54 memset(p, 0, sizeof(*p));
56 int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
57 *root, struct btrfs_buffer *buf, struct btrfs_buffer
58 *parent, int parent_slot, struct btrfs_buffer
59 **cow_ret)
61 struct btrfs_buffer *cow;
63 if (!list_empty(&buf->dirty)) {
64 *cow_ret = buf;
65 return 0;
67 cow = btrfs_alloc_free_block(trans, root, buf->size);
68 memcpy(&cow->node, &buf->node, buf->size);
69 btrfs_set_header_bytenr(&cow->node.header, cow->bytenr);
70 btrfs_set_header_generation(&cow->node.header, trans->transid);
71 btrfs_set_header_owner(&cow->node.header, root->root_key.objectid);
72 *cow_ret = cow;
73 btrfs_inc_ref(trans, root, buf);
74 if (buf == root->node) {
75 root->node = cow;
76 cow->count++;
77 if (buf != root->commit_root)
78 btrfs_free_extent(trans, root, buf->bytenr,
79 buf->size, 1);
80 btrfs_block_release(root, buf);
81 } else {
82 btrfs_set_node_blockptr(&parent->node, parent_slot,
83 cow->bytenr);
84 btrfs_set_node_ptr_generation(&parent->node, parent_slot,
85 trans->transid);
86 BUG_ON(list_empty(&parent->dirty));
87 btrfs_free_extent(trans, root, buf->bytenr, buf->size, 1);
89 btrfs_block_release(root, buf);
90 return 0;
94 * The leaf data grows from end-to-front in the node.
95 * this returns the address of the start of the last item,
96 * which is the stop of the leaf data stack
98 static inline unsigned int leaf_data_end(struct btrfs_root *root,
99 struct btrfs_leaf *leaf)
101 u32 nr = btrfs_header_nritems(&leaf->header);
102 if (nr == 0)
103 return BTRFS_LEAF_DATA_SIZE(root);
104 return btrfs_item_offset(leaf->items + nr - 1);
108 * how many bytes are required to store the items in a leaf. start
109 * and nr indicate which items in the leaf to check. This totals up the
110 * space used both by the item structs and the item data
112 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
114 int data_len;
115 int nritems = btrfs_header_nritems(&l->header);
116 int end;
118 if (nritems < start + nr)
119 end = nritems - 1;
120 else
121 end = start + nr - 1;
123 if (!nr)
124 return 0;
125 data_len = btrfs_item_end(l->items + start);
126 data_len = data_len - btrfs_item_offset(l->items + end);
127 data_len += sizeof(struct btrfs_item) * nr;
128 return data_len;
132 * The space between the end of the leaf items and
133 * the start of the leaf data. IOW, how much room
134 * the leaf has left for both items and data
136 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
138 int nritems = btrfs_header_nritems(&leaf->header);
139 return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
143 * compare two keys in a memcmp fashion
145 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
147 struct btrfs_key k1;
149 btrfs_disk_key_to_cpu(&k1, disk);
151 if (k1.objectid > k2->objectid)
152 return 1;
153 if (k1.objectid < k2->objectid)
154 return -1;
155 if (k1.type > k2->type)
156 return 1;
157 if (k1.type < k2->type)
158 return -1;
159 if (k1.offset > k2->offset)
160 return 1;
161 if (k1.offset < k2->offset)
162 return -1;
163 return 0;
166 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
167 int level)
169 int i;
170 struct btrfs_node *parent = NULL;
171 struct btrfs_node *node = &path->nodes[level]->node;
172 int parent_slot;
173 u32 nritems = btrfs_header_nritems(&node->header);
175 if (path->nodes[level + 1])
176 parent = &path->nodes[level + 1]->node;
177 parent_slot = path->slots[level + 1];
178 BUG_ON(nritems == 0);
179 if (parent) {
180 struct btrfs_disk_key *parent_key;
181 parent_key = &parent->ptrs[parent_slot].key;
182 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
183 sizeof(struct btrfs_disk_key)));
184 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
185 btrfs_header_bytenr(&node->header));
187 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
188 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
189 struct btrfs_key cpukey;
190 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
191 BUG_ON(btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
193 return 0;
196 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
197 int level)
199 int i;
200 struct btrfs_leaf *leaf = &path->nodes[level]->leaf;
201 struct btrfs_node *parent = NULL;
202 int parent_slot;
203 u32 nritems = btrfs_header_nritems(&leaf->header);
205 if (path->nodes[level + 1])
206 parent = &path->nodes[level + 1]->node;
207 parent_slot = path->slots[level + 1];
208 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
210 if (nritems == 0)
211 return 0;
213 if (parent) {
214 struct btrfs_disk_key *parent_key;
215 parent_key = &parent->ptrs[parent_slot].key;
216 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
217 sizeof(struct btrfs_disk_key)));
218 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
219 btrfs_header_bytenr(&leaf->header));
221 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
222 struct btrfs_key cpukey;
223 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
224 BUG_ON(btrfs_comp_keys(&leaf->items[i].key,
225 &cpukey) >= 0);
226 BUG_ON(btrfs_item_offset(leaf->items + i) !=
227 btrfs_item_end(leaf->items + i + 1));
228 if (i == 0) {
229 BUG_ON(btrfs_item_offset(leaf->items + i) +
230 btrfs_item_size(leaf->items + i) !=
231 BTRFS_LEAF_DATA_SIZE(root));
234 return 0;
237 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
238 int level)
240 if (level == 0)
241 return check_leaf(root, path, level);
242 return check_node(root, path, level);
246 * search for key in the array p. items p are item_size apart
247 * and there are 'max' items in p
248 * the slot in the array is returned via slot, and it points to
249 * the place where you would insert key if it is not found in
250 * the array.
252 * slot may point to max if the key is bigger than all of the keys
254 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
255 int max, int *slot)
257 int low = 0;
258 int high = max;
259 int mid;
260 int ret;
261 struct btrfs_disk_key *tmp;
263 while(low < high) {
264 mid = (low + high) / 2;
265 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
266 ret = btrfs_comp_keys(tmp, key);
268 if (ret < 0)
269 low = mid + 1;
270 else if (ret > 0)
271 high = mid;
272 else {
273 *slot = mid;
274 return 0;
277 *slot = low;
278 return 1;
282 * simple bin_search frontend that does the right thing for
283 * leaves vs nodes
285 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
287 if (btrfs_is_leaf(c)) {
288 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
289 return generic_bin_search((void *)l->items,
290 sizeof(struct btrfs_item),
291 key, btrfs_header_nritems(&c->header),
292 slot);
293 } else {
294 return generic_bin_search((void *)c->ptrs,
295 sizeof(struct btrfs_key_ptr),
296 key, btrfs_header_nritems(&c->header),
297 slot);
299 return -1;
302 static struct btrfs_buffer *read_node_slot(struct btrfs_root *root,
303 struct btrfs_buffer *parent_buf,
304 int slot)
306 struct btrfs_node *node = &parent_buf->node;
307 int level = btrfs_header_level(&node->header);
308 if (slot < 0)
309 return NULL;
310 if (slot >= btrfs_header_nritems(&node->header))
311 return NULL;
312 return read_tree_block(root, btrfs_node_blockptr(node, slot),
313 btrfs_level_size(root, level - 1));
316 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
317 *root, struct btrfs_path *path, int level)
319 struct btrfs_buffer *right_buf;
320 struct btrfs_buffer *mid_buf;
321 struct btrfs_buffer *left_buf;
322 struct btrfs_buffer *parent_buf = NULL;
323 struct btrfs_node *right = NULL;
324 struct btrfs_node *mid;
325 struct btrfs_node *left = NULL;
326 struct btrfs_node *parent = NULL;
327 int ret = 0;
328 int wret;
329 int pslot;
330 int orig_slot = path->slots[level];
331 u64 orig_ptr;
333 if (level == 0)
334 return 0;
336 mid_buf = path->nodes[level];
337 mid = &mid_buf->node;
338 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
340 if (level < BTRFS_MAX_LEVEL - 1)
341 parent_buf = path->nodes[level + 1];
342 pslot = path->slots[level + 1];
345 * deal with the case where there is only one pointer in the root
346 * by promoting the node below to a root
348 if (!parent_buf) {
349 struct btrfs_buffer *child;
350 u64 bytenr = mid_buf->bytenr;
352 if (btrfs_header_nritems(&mid->header) != 1)
353 return 0;
355 /* promote the child to a root */
356 child = read_node_slot(root, mid_buf, 0);
357 BUG_ON(!child);
358 root->node = child;
359 path->nodes[level] = NULL;
360 /* once for the path */
361 btrfs_block_release(root, mid_buf);
362 /* once for the root ptr */
363 btrfs_block_release(root, mid_buf);
364 clean_tree_block(trans, root, mid_buf);
365 return btrfs_free_extent(trans, root, bytenr,
366 root->nodesize, 1);
368 parent = &parent_buf->node;
370 if (btrfs_header_nritems(&mid->header) >
371 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
372 return 0;
374 left_buf = read_node_slot(root, parent_buf, pslot - 1);
375 right_buf = read_node_slot(root, parent_buf, pslot + 1);
377 /* first, try to make some room in the middle buffer */
378 if (left_buf) {
379 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
380 &left_buf);
381 left = &left_buf->node;
382 orig_slot += btrfs_header_nritems(&left->header);
383 wret = push_node_left(trans, root, left_buf, mid_buf);
384 if (wret < 0)
385 ret = wret;
389 * then try to empty the right most buffer into the middle
391 if (right_buf) {
392 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
393 &right_buf);
394 right = &right_buf->node;
395 wret = push_node_left(trans, root, mid_buf, right_buf);
396 if (wret < 0)
397 ret = wret;
398 if (btrfs_header_nritems(&right->header) == 0) {
399 u64 bytenr = right_buf->bytenr;
400 btrfs_block_release(root, right_buf);
401 clean_tree_block(trans, root, right_buf);
402 right_buf = NULL;
403 right = NULL;
404 wret = del_ptr(trans, root, path, level + 1, pslot +
406 if (wret)
407 ret = wret;
408 wret = btrfs_free_extent(trans, root, bytenr,
409 root->nodesize, 1);
410 if (wret)
411 ret = wret;
412 } else {
413 memcpy(&parent->ptrs[pslot + 1].key,
414 &right->ptrs[0].key,
415 sizeof(struct btrfs_disk_key));
416 BUG_ON(list_empty(&parent_buf->dirty));
419 if (btrfs_header_nritems(&mid->header) == 1) {
421 * we're not allowed to leave a node with one item in the
422 * tree during a delete. A deletion from lower in the tree
423 * could try to delete the only pointer in this node.
424 * So, pull some keys from the left.
425 * There has to be a left pointer at this point because
426 * otherwise we would have pulled some pointers from the
427 * right
429 BUG_ON(!left_buf);
430 wret = balance_node_right(trans, root, mid_buf, left_buf);
431 if (wret < 0)
432 ret = wret;
433 BUG_ON(wret == 1);
435 if (btrfs_header_nritems(&mid->header) == 0) {
436 /* we've managed to empty the middle node, drop it */
437 u64 bytenr = mid_buf->bytenr;
438 btrfs_block_release(root, mid_buf);
439 clean_tree_block(trans, root, mid_buf);
440 mid_buf = NULL;
441 mid = NULL;
442 wret = del_ptr(trans, root, path, level + 1, pslot);
443 if (wret)
444 ret = wret;
445 wret = btrfs_free_extent(trans, root, bytenr,
446 root->nodesize, 1);
447 if (wret)
448 ret = wret;
449 } else {
450 /* update the parent key to reflect our changes */
451 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
452 sizeof(struct btrfs_disk_key));
453 BUG_ON(list_empty(&parent_buf->dirty));
456 /* update the path */
457 if (left_buf) {
458 if (btrfs_header_nritems(&left->header) > orig_slot) {
459 left_buf->count++; // released below
460 path->nodes[level] = left_buf;
461 path->slots[level + 1] -= 1;
462 path->slots[level] = orig_slot;
463 if (mid_buf)
464 btrfs_block_release(root, mid_buf);
465 } else {
466 orig_slot -= btrfs_header_nritems(&left->header);
467 path->slots[level] = orig_slot;
470 /* double check we haven't messed things up */
471 check_block(root, path, level);
472 if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node,
473 path->slots[level]))
474 BUG();
476 if (right_buf)
477 btrfs_block_release(root, right_buf);
478 if (left_buf)
479 btrfs_block_release(root, left_buf);
480 return ret;
482 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
483 struct btrfs_root *root,
484 struct btrfs_path *path, int level)
486 struct btrfs_node *right;
487 struct btrfs_node *mid;
488 struct btrfs_node *left;
489 struct btrfs_node *parent;
490 struct btrfs_buffer *right_buf;
491 struct btrfs_buffer *mid_buf;
492 struct btrfs_buffer *left_buf;
493 struct btrfs_buffer *parent_buf = NULL;
494 int ret = 0;
495 int wret;
496 int pslot;
497 int orig_slot = path->slots[level];
498 u64 orig_ptr;
500 if (level == 0)
501 return 1;
503 mid_buf = path->nodes[level];
504 mid = &mid_buf->node;
505 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
507 if (level < BTRFS_MAX_LEVEL - 1)
508 parent_buf = path->nodes[level + 1];
509 pslot = path->slots[level + 1];
511 if (!parent_buf)
512 return 1;
513 parent = &parent_buf->node;
515 left_buf = read_node_slot(root, parent_buf, pslot - 1);
516 left = &left_buf->node;
518 /* first, try to make some room in the middle buffer */
519 if (left_buf) {
520 u32 left_nr;
521 left_nr = btrfs_header_nritems(&left->header);
522 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
523 wret = 1;
524 } else {
525 ret = btrfs_cow_block(trans, root, left_buf,
526 parent_buf, pslot - 1,
527 &left_buf);
528 left = &left_buf->node;
529 if (ret)
530 wret = 1;
531 else {
532 wret = push_node_left(trans, root,
533 left_buf, mid_buf);
536 if (wret < 0)
537 ret = wret;
538 if (wret == 0) {
539 orig_slot += left_nr;
540 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
541 sizeof(struct btrfs_disk_key));
542 BUG_ON(list_empty(&parent_buf->dirty));
543 if (btrfs_header_nritems(&left->header) > orig_slot) {
544 path->nodes[level] = left_buf;
545 path->slots[level + 1] -= 1;
546 path->slots[level] = orig_slot;
547 btrfs_block_release(root, mid_buf);
548 } else {
549 orig_slot -=
550 btrfs_header_nritems(&left->header);
551 path->slots[level] = orig_slot;
552 btrfs_block_release(root, left_buf);
554 return 0;
556 btrfs_block_release(root, left_buf);
559 right_buf = read_node_slot(root, parent_buf, pslot + 1);
560 right = &right_buf->node;
563 * then try to empty the right most buffer into the middle
565 if (right_buf) {
566 u32 right_nr;
567 right_nr = btrfs_header_nritems(&right->header);
568 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
569 wret = 1;
570 } else {
571 ret = btrfs_cow_block(trans, root, right_buf,
572 parent_buf, pslot + 1,
573 &right_buf);
574 right = &right_buf->node;
575 if (ret)
576 wret = 1;
577 else {
578 wret = balance_node_right(trans, root,
579 right_buf, mid_buf);
582 if (wret < 0)
583 ret = wret;
584 if (wret == 0) {
585 memcpy(&parent->ptrs[pslot + 1].key,
586 &right->ptrs[0].key,
587 sizeof(struct btrfs_disk_key));
588 BUG_ON(list_empty(&parent_buf->dirty));
589 if (btrfs_header_nritems(&mid->header) <= orig_slot) {
590 path->nodes[level] = right_buf;
591 path->slots[level + 1] += 1;
592 path->slots[level] = orig_slot -
593 btrfs_header_nritems(&mid->header);
594 btrfs_block_release(root, mid_buf);
595 } else {
596 btrfs_block_release(root, right_buf);
598 return 0;
600 btrfs_block_release(root, right_buf);
602 return 1;
606 * look for key in the tree. path is filled in with nodes along the way
607 * if key is found, we return zero and you can find the item in the leaf
608 * level of the path (level 0)
610 * If the key isn't found, the path points to the slot where it should
611 * be inserted, and 1 is returned. If there are other errors during the
612 * search a negative error number is returned.
614 * if ins_len > 0, nodes and leaves will be split as we walk down the
615 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
616 * possible)
618 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
619 *root, struct btrfs_key *key, struct btrfs_path *p, int
620 ins_len, int cow)
622 struct btrfs_buffer *b;
623 struct btrfs_node *c;
624 int slot;
625 int ret;
626 int level;
628 again:
629 b = root->node;
630 b->count++;
631 while (b) {
632 level = btrfs_header_level(&b->node.header);
633 if (cow) {
634 int wret;
635 wret = btrfs_cow_block(trans, root, b,
636 p->nodes[level + 1],
637 p->slots[level + 1],
638 &b);
639 if (wret) {
640 btrfs_block_release(root, b);
641 return wret;
644 BUG_ON(!cow && ins_len);
645 c = &b->node;
646 p->nodes[level] = b;
647 ret = check_block(root, p, level);
648 if (ret)
649 return -1;
650 ret = bin_search(c, key, &slot);
651 if (!btrfs_is_leaf(c)) {
652 if (ret && slot > 0)
653 slot -= 1;
654 p->slots[level] = slot;
655 if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
656 BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
657 int sret = split_node(trans, root, p, level);
658 BUG_ON(sret > 0);
659 if (sret)
660 return sret;
661 b = p->nodes[level];
662 c = &b->node;
663 slot = p->slots[level];
664 } else if (ins_len < 0) {
665 int sret = balance_level(trans, root, p,
666 level);
667 if (sret)
668 return sret;
669 b = p->nodes[level];
670 if (!b) {
671 btrfs_release_path(NULL, p);
672 goto again;
674 c = &b->node;
675 slot = p->slots[level];
676 BUG_ON(btrfs_header_nritems(&c->header) == 1);
678 b = read_tree_block(root,
679 btrfs_node_blockptr(c, slot),
680 btrfs_level_size(root, level - 1));
681 } else {
682 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
683 p->slots[level] = slot;
684 if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
685 sizeof(struct btrfs_item) + ins_len) {
686 int sret = split_leaf(trans, root, key,
687 p, ins_len, ret == 0);
688 BUG_ON(sret > 0);
689 if (sret)
690 return sret;
692 BUG_ON(root->node->count == 1);
693 return ret;
696 BUG_ON(root->node->count == 1);
697 return 1;
701 * adjust the pointers going up the tree, starting at level
702 * making sure the right key of each node is points to 'key'.
703 * This is used after shifting pointers to the left, so it stops
704 * fixing up pointers when a given leaf/node is not in slot 0 of the
705 * higher levels
707 * If this fails to write a tree block, it returns -1, but continues
708 * fixing up the blocks in ram so the tree is consistent.
710 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
711 *root, struct btrfs_path *path, struct btrfs_disk_key
712 *key, int level)
714 int i;
715 int ret = 0;
716 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
717 struct btrfs_node *t;
718 int tslot = path->slots[i];
719 if (!path->nodes[i])
720 break;
721 t = &path->nodes[i]->node;
722 memcpy(&t->ptrs[tslot].key, key, sizeof(*key));
723 BUG_ON(list_empty(&path->nodes[i]->dirty));
724 if (tslot != 0)
725 break;
727 return ret;
731 * try to push data from one node into the next node left in the
732 * tree.
734 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
735 * error, and > 0 if there was no room in the left hand block.
737 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
738 *root, struct btrfs_buffer *dst_buf, struct
739 btrfs_buffer *src_buf)
741 struct btrfs_node *src = &src_buf->node;
742 struct btrfs_node *dst = &dst_buf->node;
743 int push_items = 0;
744 int src_nritems;
745 int dst_nritems;
746 int ret = 0;
748 src_nritems = btrfs_header_nritems(&src->header);
749 dst_nritems = btrfs_header_nritems(&dst->header);
750 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
751 if (push_items <= 0) {
752 return 1;
755 if (src_nritems < push_items)
756 push_items = src_nritems;
758 memcpy(dst->ptrs + dst_nritems, src->ptrs,
759 push_items * sizeof(struct btrfs_key_ptr));
760 if (push_items < src_nritems) {
761 memmove(src->ptrs, src->ptrs + push_items,
762 (src_nritems - push_items) *
763 sizeof(struct btrfs_key_ptr));
765 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
766 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
767 BUG_ON(list_empty(&src_buf->dirty));
768 BUG_ON(list_empty(&dst_buf->dirty));
769 return ret;
773 * try to push data from one node into the next node right in the
774 * tree.
776 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
777 * error, and > 0 if there was no room in the right hand block.
779 * this will only push up to 1/2 the contents of the left node over
781 static int balance_node_right(struct btrfs_trans_handle *trans, struct
782 btrfs_root *root, struct btrfs_buffer *dst_buf,
783 struct btrfs_buffer *src_buf)
785 struct btrfs_node *src = &src_buf->node;
786 struct btrfs_node *dst = &dst_buf->node;
787 int push_items = 0;
788 int max_push;
789 int src_nritems;
790 int dst_nritems;
791 int ret = 0;
793 src_nritems = btrfs_header_nritems(&src->header);
794 dst_nritems = btrfs_header_nritems(&dst->header);
795 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
796 if (push_items <= 0) {
797 return 1;
799 max_push = src_nritems / 2 + 1;
800 /* don't try to empty the node */
801 if (max_push >= src_nritems)
802 return 1;
803 if (max_push < push_items)
804 push_items = max_push;
806 memmove(dst->ptrs + push_items, dst->ptrs,
807 dst_nritems * sizeof(struct btrfs_key_ptr));
808 memcpy(dst->ptrs, src->ptrs + src_nritems - push_items,
809 push_items * sizeof(struct btrfs_key_ptr));
811 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
812 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
814 BUG_ON(list_empty(&src_buf->dirty));
815 BUG_ON(list_empty(&dst_buf->dirty));
816 return ret;
820 * helper function to insert a new root level in the tree.
821 * A new node is allocated, and a single item is inserted to
822 * point to the existing root
824 * returns zero on success or < 0 on failure.
826 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
827 *root, struct btrfs_path *path, int level)
829 struct btrfs_buffer *t;
830 struct btrfs_node *lower;
831 struct btrfs_node *c;
832 struct btrfs_disk_key *lower_key;
834 BUG_ON(path->nodes[level]);
835 BUG_ON(path->nodes[level-1] != root->node);
836 t = btrfs_alloc_free_block(trans, root, root->nodesize);
837 c = &t->node;
838 memset(&c->header, 0, sizeof(c->header));
839 btrfs_set_header_nritems(&c->header, 1);
840 btrfs_set_header_level(&c->header, level);
841 btrfs_set_header_bytenr(&c->header, t->bytenr);
842 btrfs_set_header_generation(&c->header, trans->transid);
843 btrfs_set_header_owner(&c->header, root->root_key.objectid);
844 memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
845 sizeof(c->header.fsid));
846 lower = &path->nodes[level-1]->node;
848 if (btrfs_is_leaf(lower))
849 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
850 else
851 lower_key = &lower->ptrs[0].key;
852 memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key));
853 btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->bytenr);
854 BUG_ON(list_empty(&t->dirty));
855 btrfs_set_node_ptr_generation(c, 0,
856 btrfs_header_generation(&path->nodes[level - 1]->node.header));
857 /* the super has an extra ref to root->node */
858 btrfs_block_release(root, root->node);
859 root->node = t;
860 t->count++;
861 path->nodes[level] = t;
862 path->slots[level] = 0;
863 return 0;
867 * worker function to insert a single pointer in a node.
868 * the node should have enough room for the pointer already
870 * slot and level indicate where you want the key to go, and
871 * bytenr is the block the key points to.
873 * returns zero on success and < 0 on any error
875 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
876 *root, struct btrfs_path *path, struct btrfs_disk_key
877 *key, u64 bytenr, int slot, int level)
879 struct btrfs_node *lower;
880 int nritems;
882 BUG_ON(!path->nodes[level]);
883 lower = &path->nodes[level]->node;
884 nritems = btrfs_header_nritems(&lower->header);
885 if (slot > nritems)
886 BUG();
887 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
888 BUG();
889 if (slot != nritems) {
890 memmove(lower->ptrs + slot + 1, lower->ptrs + slot,
891 (nritems - slot) * sizeof(struct btrfs_key_ptr));
893 memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key));
894 btrfs_set_node_blockptr(lower, slot, bytenr);
895 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
896 btrfs_set_header_nritems(&lower->header, nritems + 1);
897 BUG_ON(list_empty(&path->nodes[level]->dirty));
898 return 0;
902 * split the node at the specified level in path in two.
903 * The path is corrected to point to the appropriate node after the split
905 * Before splitting this tries to make some room in the node by pushing
906 * left and right, if either one works, it returns right away.
908 * returns 0 on success and < 0 on failure
910 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
911 *root, struct btrfs_path *path, int level)
913 struct btrfs_buffer *t;
914 struct btrfs_node *c;
915 struct btrfs_buffer *split_buffer;
916 struct btrfs_node *split;
917 int mid;
918 int ret;
919 int wret;
920 u32 c_nritems;
922 t = path->nodes[level];
923 c = &t->node;
924 if (t == root->node) {
925 /* trying to split the root, lets make a new one */
926 ret = insert_new_root(trans, root, path, level + 1);
927 if (ret)
928 return ret;
929 } else {
930 ret = push_nodes_for_insert(trans, root, path, level);
931 t = path->nodes[level];
932 c = &t->node;
933 if (!ret && btrfs_header_nritems(&c->header) <
934 BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
935 return 0;
936 if (ret < 0)
937 return ret;
939 c_nritems = btrfs_header_nritems(&c->header);
940 split_buffer = btrfs_alloc_free_block(trans, root, root->nodesize);
941 split = &split_buffer->node;
942 btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
943 btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
944 btrfs_set_header_bytenr(&split->header, split_buffer->bytenr);
945 btrfs_set_header_generation(&split->header, trans->transid);
946 btrfs_set_header_owner(&split->header, root->root_key.objectid);
947 memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
948 sizeof(split->header.fsid));
949 mid = (c_nritems + 1) / 2;
950 memcpy(split->ptrs, c->ptrs + mid,
951 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
952 btrfs_set_header_nritems(&split->header, c_nritems - mid);
953 btrfs_set_header_nritems(&c->header, mid);
954 ret = 0;
956 BUG_ON(list_empty(&t->dirty));
957 wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
958 split_buffer->bytenr, path->slots[level + 1] + 1,
959 level + 1);
960 if (wret)
961 ret = wret;
963 if (path->slots[level] >= mid) {
964 path->slots[level] -= mid;
965 btrfs_block_release(root, t);
966 path->nodes[level] = split_buffer;
967 path->slots[level + 1] += 1;
968 } else {
969 btrfs_block_release(root, split_buffer);
971 return ret;
975 * push some data in the path leaf to the right, trying to free up at
976 * least data_size bytes. returns zero if the push worked, nonzero otherwise
978 * returns 1 if the push failed because the other node didn't have enough
979 * room, 0 if everything worked out and < 0 if there were major errors.
981 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
982 *root, struct btrfs_path *path, int data_size,
983 int empty)
985 struct btrfs_buffer *left_buf = path->nodes[0];
986 struct btrfs_leaf *left = &left_buf->leaf;
987 struct btrfs_leaf *right;
988 struct btrfs_buffer *right_buf;
989 struct btrfs_buffer *upper;
990 int slot;
991 u32 i;
992 int free_space;
993 int push_space = 0;
994 int push_items = 0;
995 struct btrfs_item *item;
996 u32 left_nritems;
997 u32 nr;
998 u32 right_nritems;
999 slot = path->slots[1];
1000 if (!path->nodes[1]) {
1001 return 1;
1003 upper = path->nodes[1];
1004 if (slot >= btrfs_header_nritems(&upper->node.header) - 1) {
1005 return 1;
1007 right_buf = read_tree_block(root,
1008 btrfs_node_blockptr(&upper->node, slot + 1),
1009 root->leafsize);
1010 right = &right_buf->leaf;
1011 free_space = btrfs_leaf_free_space(root, right);
1012 if (free_space < data_size + sizeof(struct btrfs_item)) {
1013 btrfs_block_release(root, right_buf);
1014 return 1;
1016 /* cow and double check */
1017 btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
1018 right = &right_buf->leaf;
1019 free_space = btrfs_leaf_free_space(root, right);
1020 if (free_space < data_size + sizeof(struct btrfs_item)) {
1021 btrfs_block_release(root, right_buf);
1022 return 1;
1024 left_nritems = btrfs_header_nritems(&left->header);
1025 if (left_nritems == 0) {
1026 btrfs_block_release(root, right_buf);
1027 return 1;
1030 if (empty)
1031 nr = 0;
1032 else
1033 nr = 1;
1035 i = left_nritems - 1;
1036 while (i >= nr) {
1037 item = left->items + i;
1038 if (path->slots[0] == i)
1039 push_space += data_size + sizeof(*item);
1040 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1041 free_space)
1042 break;
1043 push_items++;
1044 push_space += btrfs_item_size(item) + sizeof(*item);
1045 if (i == 0)
1046 break;
1047 i--;
1049 if (push_items == 0) {
1050 btrfs_block_release(root, right_buf);
1051 return 1;
1053 right_nritems = btrfs_header_nritems(&right->header);
1054 /* push left to right */
1055 push_space = btrfs_item_end(left->items + left_nritems - push_items);
1056 push_space -= leaf_data_end(root, left);
1057 /* make room in the right data area */
1058 memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) -
1059 push_space, btrfs_leaf_data(right) + leaf_data_end(root, right),
1060 BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right));
1061 /* copy from the left data area */
1062 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space,
1063 btrfs_leaf_data(left) + leaf_data_end(root, left), push_space);
1064 memmove(right->items + push_items, right->items,
1065 right_nritems * sizeof(struct btrfs_item));
1066 /* copy the items from left to right */
1067 memcpy(right->items, left->items + left_nritems - push_items,
1068 push_items * sizeof(struct btrfs_item));
1070 /* update the item pointers */
1071 right_nritems += push_items;
1072 btrfs_set_header_nritems(&right->header, right_nritems);
1073 push_space = BTRFS_LEAF_DATA_SIZE(root);
1074 for (i = 0; i < right_nritems; i++) {
1075 btrfs_set_item_offset(right->items + i, push_space -
1076 btrfs_item_size(right->items + i));
1077 push_space = btrfs_item_offset(right->items + i);
1079 left_nritems -= push_items;
1080 btrfs_set_header_nritems(&left->header, left_nritems);
1082 BUG_ON(list_empty(&left_buf->dirty));
1083 BUG_ON(list_empty(&right_buf->dirty));
1084 memcpy(&upper->node.ptrs[slot + 1].key,
1085 &right->items[0].key, sizeof(struct btrfs_disk_key));
1086 BUG_ON(list_empty(&upper->dirty));
1088 /* then fixup the leaf pointer in the path */
1089 if (path->slots[0] >= left_nritems) {
1090 path->slots[0] -= left_nritems;
1091 btrfs_block_release(root, path->nodes[0]);
1092 path->nodes[0] = right_buf;
1093 path->slots[1] += 1;
1094 } else {
1095 btrfs_block_release(root, right_buf);
1097 return 0;
1100 * push some data in the path leaf to the left, trying to free up at
1101 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1103 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1104 *root, struct btrfs_path *path, int data_size,
1105 int empty)
1107 struct btrfs_buffer *right_buf = path->nodes[0];
1108 struct btrfs_leaf *right = &right_buf->leaf;
1109 struct btrfs_buffer *t;
1110 struct btrfs_leaf *left;
1111 int slot;
1112 int i;
1113 int free_space;
1114 int push_space = 0;
1115 int push_items = 0;
1116 struct btrfs_item *item;
1117 u32 old_left_nritems;
1118 u32 right_nritems;
1119 u32 nr;
1120 int ret = 0;
1121 int wret;
1122 slot = path->slots[1];
1123 if (slot == 0) {
1124 return 1;
1126 if (!path->nodes[1]) {
1127 return 1;
1129 right_nritems = btrfs_header_nritems(&right->header);
1130 if (right_nritems == 0) {
1131 return 1;
1134 t = read_tree_block(root,
1135 btrfs_node_blockptr(&path->nodes[1]->node, slot - 1),
1136 root->leafsize);
1137 left = &t->leaf;
1138 free_space = btrfs_leaf_free_space(root, left);
1139 if (free_space < data_size + sizeof(struct btrfs_item)) {
1140 btrfs_block_release(root, t);
1141 return 1;
1144 /* cow and double check */
1145 btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
1146 left = &t->leaf;
1147 free_space = btrfs_leaf_free_space(root, left);
1148 if (free_space < data_size + sizeof(struct btrfs_item)) {
1149 btrfs_block_release(root, t);
1150 return 1;
1152 if (empty)
1153 nr = right_nritems;
1154 else
1155 nr = right_nritems - 1;
1157 for (i = 0; i < nr; i++) {
1158 item = right->items + i;
1159 if (path->slots[0] == i)
1160 push_space += data_size + sizeof(*item);
1161 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1162 free_space)
1163 break;
1164 push_items++;
1165 push_space += btrfs_item_size(item) + sizeof(*item);
1167 if (push_items == 0) {
1168 btrfs_block_release(root, t);
1169 return 1;
1171 /* push data from right to left */
1172 memcpy(left->items + btrfs_header_nritems(&left->header),
1173 right->items, push_items * sizeof(struct btrfs_item));
1174 push_space = BTRFS_LEAF_DATA_SIZE(root) -
1175 btrfs_item_offset(right->items + push_items -1);
1176 memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space,
1177 btrfs_leaf_data(right) +
1178 btrfs_item_offset(right->items + push_items - 1),
1179 push_space);
1180 old_left_nritems = btrfs_header_nritems(&left->header);
1181 BUG_ON(old_left_nritems < 0);
1183 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1184 u32 ioff = btrfs_item_offset(left->items + i);
1185 btrfs_set_item_offset(left->items + i, ioff -
1186 (BTRFS_LEAF_DATA_SIZE(root) -
1187 btrfs_item_offset(left->items +
1188 old_left_nritems - 1)));
1190 btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1191 /* fixup right node */
1192 if (push_items < right_nritems) {
1193 push_space = btrfs_item_offset(right->items + push_items - 1) -
1194 leaf_data_end(root, right);
1195 memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1196 push_space, btrfs_leaf_data(right) +
1197 leaf_data_end(root, right), push_space);
1198 memmove(right->items, right->items + push_items,
1199 (right_nritems - push_items) *
1200 sizeof(struct btrfs_item));
1202 right_nritems -= push_items;
1203 btrfs_set_header_nritems(&right->header, right_nritems);
1204 push_space = BTRFS_LEAF_DATA_SIZE(root);
1205 for (i = 0; i < right_nritems; i++) {
1206 btrfs_set_item_offset(right->items + i, push_space -
1207 btrfs_item_size(right->items + i));
1208 push_space = btrfs_item_offset(right->items + i);
1211 BUG_ON(list_empty(&t->dirty));
1212 BUG_ON(list_empty(&right_buf->dirty));
1214 wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1215 if (wret)
1216 ret = wret;
1218 /* then fixup the leaf pointer in the path */
1219 if (path->slots[0] < push_items) {
1220 path->slots[0] += old_left_nritems;
1221 btrfs_block_release(root, path->nodes[0]);
1222 path->nodes[0] = t;
1223 path->slots[1] -= 1;
1224 } else {
1225 btrfs_block_release(root, t);
1226 path->slots[0] -= push_items;
1228 BUG_ON(path->slots[0] < 0);
1229 return ret;
1233 * split the path's leaf in two, making sure there is at least data_size
1234 * available for the resulting leaf level of the path.
1236 * returns 0 if all went well and < 0 on failure.
1238 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1239 *root, struct btrfs_key *ins_key,
1240 struct btrfs_path *path, int data_size, int extend)
1242 struct btrfs_buffer *l_buf;
1243 struct btrfs_leaf *l;
1244 u32 nritems;
1245 int mid;
1246 int slot;
1247 struct btrfs_leaf *right;
1248 struct btrfs_buffer *right_buffer;
1249 int space_needed = data_size + sizeof(struct btrfs_item);
1250 int data_copy_size;
1251 int rt_data_off;
1252 int i;
1253 int ret = 0;
1254 int wret;
1255 int double_split;
1256 int num_doubles = 0;
1257 struct btrfs_disk_key disk_key;
1259 if (extend)
1260 space_needed = data_size;
1261 /* first try to make some room by pushing left and right */
1262 if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1263 wret = push_leaf_right(trans, root, path, data_size, 0);
1264 if (wret < 0) {
1265 return wret;
1267 if (wret) {
1268 wret = push_leaf_left(trans, root, path, data_size, 0);
1269 if (wret < 0)
1270 return wret;
1272 l_buf = path->nodes[0];
1273 l = &l_buf->leaf;
1275 /* did the pushes work? */
1276 if (btrfs_leaf_free_space(root, l) >= space_needed)
1277 return 0;
1279 if (!path->nodes[1]) {
1280 ret = insert_new_root(trans, root, path, 1);
1281 if (ret)
1282 return ret;
1284 again:
1285 double_split = 0;
1286 l_buf = path->nodes[0];
1287 l = &l_buf->leaf;
1288 slot = path->slots[0];
1289 nritems = btrfs_header_nritems(&l->header);
1290 mid = (nritems + 1)/ 2;
1292 right_buffer = btrfs_alloc_free_block(trans, root, root->leafsize);
1293 right = &right_buffer->leaf;
1294 memset(&right->header, 0, sizeof(right->header));
1295 btrfs_set_header_bytenr(&right->header, right_buffer->bytenr);
1296 btrfs_set_header_level(&right->header, 0);
1297 btrfs_set_header_owner(&right->header, root->root_key.objectid);
1298 btrfs_set_header_generation(&right->header, trans->transid);
1299 memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1300 sizeof(right->header.fsid));
1301 if (mid <= slot) {
1302 if (nritems == 1 ||
1303 leaf_space_used(l, mid, nritems - mid) + space_needed >
1304 BTRFS_LEAF_DATA_SIZE(root)) {
1305 if (slot >= nritems) {
1306 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1307 btrfs_set_header_nritems(&right->header, 0);
1308 wret = insert_ptr(trans, root, path,
1309 &disk_key, right_buffer->bytenr,
1310 path->slots[1] + 1, 1);
1311 if (wret)
1312 ret = wret;
1313 btrfs_block_release(root, path->nodes[0]);
1314 path->nodes[0] = right_buffer;
1315 path->slots[0] = 0;
1316 path->slots[1] += 1;
1317 return ret;
1319 mid = slot;
1320 if (mid != nritems &&
1321 leaf_space_used(l, mid, nritems - mid) +
1322 space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1323 double_split = 1;
1326 } else {
1327 if (leaf_space_used(l, 0, mid) + space_needed >
1328 BTRFS_LEAF_DATA_SIZE(root)) {
1329 if (!extend && slot == 0) {
1330 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1331 btrfs_set_header_nritems(&right->header, 0);
1332 wret = insert_ptr(trans, root, path,
1333 &disk_key,
1334 right_buffer->bytenr,
1335 path->slots[1], 1);
1336 if (wret)
1337 ret = wret;
1338 btrfs_block_release(root, path->nodes[0]);
1339 path->nodes[0] = right_buffer;
1340 path->slots[0] = 0;
1341 if (path->slots[1] == 0) {
1342 wret = fixup_low_keys(trans, root,
1343 path, &disk_key, 1);
1344 if (wret)
1345 ret = wret;
1347 return ret;
1348 } else if (extend && slot == 0) {
1349 mid = 1;
1350 } else {
1351 mid = slot;
1352 if (mid != nritems &&
1353 leaf_space_used(l, mid, nritems - mid) +
1354 space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1355 double_split = 1;
1360 nritems = nritems - mid;
1361 btrfs_set_header_nritems(&right->header, nritems);
1362 data_copy_size = btrfs_item_end(l->items + mid) -
1363 leaf_data_end(root, l);
1364 memcpy(right->items, l->items + mid,
1365 nritems * sizeof(struct btrfs_item));
1366 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1367 data_copy_size, btrfs_leaf_data(l) +
1368 leaf_data_end(root, l), data_copy_size);
1369 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1370 btrfs_item_end(l->items + mid);
1371 for (i = 0; i < nritems; i++) {
1372 u32 ioff = btrfs_item_offset(right->items + i);
1373 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1376 btrfs_set_header_nritems(&l->header, mid);
1377 ret = 0;
1378 wret = insert_ptr(trans, root, path, &right->items[0].key,
1379 right_buffer->bytenr, path->slots[1] + 1, 1);
1380 if (wret)
1381 ret = wret;
1383 BUG_ON(list_empty(&right_buffer->dirty));
1384 BUG_ON(list_empty(&l_buf->dirty));
1385 BUG_ON(path->slots[0] != slot);
1386 if (mid <= slot) {
1387 btrfs_block_release(root, path->nodes[0]);
1388 path->nodes[0] = right_buffer;
1389 path->slots[0] -= mid;
1390 path->slots[1] += 1;
1391 } else
1392 btrfs_block_release(root, right_buffer);
1394 BUG_ON(path->slots[0] < 0);
1395 if (double_split) {
1396 BUG_ON(num_doubles != 0);
1397 num_doubles++;
1398 goto again;
1400 return ret;
1403 * Given a key and some data, insert an item into the tree.
1404 * This does all the path init required, making room in the tree if needed.
1406 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1407 *root, struct btrfs_path *path, struct btrfs_key
1408 *cpu_key, u32 data_size)
1410 int ret = 0;
1411 int slot;
1412 int slot_orig;
1413 struct btrfs_leaf *leaf;
1414 struct btrfs_buffer *leaf_buf;
1415 u32 nritems;
1416 unsigned int data_end;
1417 struct btrfs_disk_key disk_key;
1419 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1421 /* create a root if there isn't one */
1422 if (!root->node)
1423 BUG();
1424 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1425 if (ret == 0) {
1426 return -EEXIST;
1428 if (ret < 0)
1429 goto out;
1431 slot_orig = path->slots[0];
1432 leaf_buf = path->nodes[0];
1433 leaf = &leaf_buf->leaf;
1435 nritems = btrfs_header_nritems(&leaf->header);
1436 data_end = leaf_data_end(root, leaf);
1438 if (btrfs_leaf_free_space(root, leaf) <
1439 sizeof(struct btrfs_item) + data_size)
1440 BUG();
1442 slot = path->slots[0];
1443 BUG_ON(slot < 0);
1444 if (slot != nritems) {
1445 int i;
1446 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1449 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1451 /* first correct the data pointers */
1452 for (i = slot; i < nritems; i++) {
1453 u32 ioff = btrfs_item_offset(leaf->items + i);
1454 btrfs_set_item_offset(leaf->items + i,
1455 ioff - data_size);
1458 /* shift the items */
1459 memmove(leaf->items + slot + 1, leaf->items + slot,
1460 (nritems - slot) * sizeof(struct btrfs_item));
1462 /* shift the data */
1463 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1464 btrfs_leaf_data(leaf) +
1465 data_end, old_data - data_end);
1466 data_end = old_data;
1468 /* setup the item for the new data */
1469 memcpy(&leaf->items[slot].key, &disk_key,
1470 sizeof(struct btrfs_disk_key));
1471 btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1472 btrfs_set_item_size(leaf->items + slot, data_size);
1473 btrfs_set_header_nritems(&leaf->header, nritems + 1);
1475 ret = 0;
1476 if (slot == 0)
1477 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1479 BUG_ON(list_empty(&leaf_buf->dirty));
1480 if (btrfs_leaf_free_space(root, leaf) < 0)
1481 BUG();
1482 check_leaf(root, path, 0);
1483 out:
1484 return ret;
1488 * Given a key and some data, insert an item into the tree.
1489 * This does all the path init required, making room in the tree if needed.
1491 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1492 *root, struct btrfs_key *cpu_key, void *data, u32
1493 data_size)
1495 int ret = 0;
1496 struct btrfs_path path;
1497 u8 *ptr;
1499 btrfs_init_path(&path);
1500 ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
1501 if (!ret) {
1502 ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8);
1503 memcpy(ptr, data, data_size);
1505 btrfs_release_path(root, &path);
1506 return ret;
1510 * delete the pointer from a given node.
1512 * If the delete empties a node, the node is removed from the tree,
1513 * continuing all the way the root if required. The root is converted into
1514 * a leaf if all the nodes are emptied.
1516 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1517 struct btrfs_path *path, int level, int slot)
1519 struct btrfs_node *node;
1520 struct btrfs_buffer *parent = path->nodes[level];
1521 u32 nritems;
1522 int ret = 0;
1523 int wret;
1525 node = &parent->node;
1526 nritems = btrfs_header_nritems(&node->header);
1527 if (slot != nritems -1) {
1528 memmove(node->ptrs + slot, node->ptrs + slot + 1,
1529 sizeof(struct btrfs_key_ptr) * (nritems - slot - 1));
1531 nritems--;
1532 btrfs_set_header_nritems(&node->header, nritems);
1533 if (nritems == 0 && parent == root->node) {
1534 BUG_ON(btrfs_header_level(&root->node->node.header) != 1);
1535 /* just turn the root into a leaf and break */
1536 btrfs_set_header_level(&root->node->node.header, 0);
1537 } else if (slot == 0) {
1538 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1539 level + 1);
1540 if (wret)
1541 ret = wret;
1543 BUG_ON(list_empty(&parent->dirty));
1544 return ret;
1548 * delete the item at the leaf level in path. If that empties
1549 * the leaf, remove it from the tree
1551 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1552 struct btrfs_path *path)
1554 int slot;
1555 struct btrfs_leaf *leaf;
1556 struct btrfs_buffer *leaf_buf;
1557 int doff;
1558 int dsize;
1559 int ret = 0;
1560 int wret;
1561 u32 nritems;
1563 leaf_buf = path->nodes[0];
1564 leaf = &leaf_buf->leaf;
1565 slot = path->slots[0];
1566 doff = btrfs_item_offset(leaf->items + slot);
1567 dsize = btrfs_item_size(leaf->items + slot);
1568 nritems = btrfs_header_nritems(&leaf->header);
1570 if (slot != nritems - 1) {
1571 int i;
1572 int data_end = leaf_data_end(root, leaf);
1573 memmove(btrfs_leaf_data(leaf) + data_end + dsize,
1574 btrfs_leaf_data(leaf) + data_end,
1575 doff - data_end);
1576 for (i = slot + 1; i < nritems; i++) {
1577 u32 ioff = btrfs_item_offset(leaf->items + i);
1578 btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1580 memmove(leaf->items + slot, leaf->items + slot + 1,
1581 sizeof(struct btrfs_item) *
1582 (nritems - slot - 1));
1584 btrfs_set_header_nritems(&leaf->header, nritems - 1);
1585 nritems--;
1586 /* delete the leaf if we've emptied it */
1587 if (nritems == 0) {
1588 if (leaf_buf == root->node) {
1589 btrfs_set_header_level(&leaf->header, 0);
1590 BUG_ON(list_empty(&leaf_buf->dirty));
1591 } else {
1592 clean_tree_block(trans, root, leaf_buf);
1593 wret = del_ptr(trans, root, path, 1, path->slots[1]);
1594 if (wret)
1595 ret = wret;
1596 wret = btrfs_free_extent(trans, root,
1597 leaf_buf->bytenr,
1598 leaf_buf->size, 1);
1599 if (wret)
1600 ret = wret;
1602 } else {
1603 int used = leaf_space_used(leaf, 0, nritems);
1604 if (slot == 0) {
1605 wret = fixup_low_keys(trans, root, path,
1606 &leaf->items[0].key, 1);
1607 if (wret)
1608 ret = wret;
1610 BUG_ON(list_empty(&leaf_buf->dirty));
1612 /* delete the leaf if it is mostly empty */
1613 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1614 /* push_leaf_left fixes the path.
1615 * make sure the path still points to our leaf
1616 * for possible call to del_ptr below
1618 slot = path->slots[1];
1619 leaf_buf->count++;
1620 wret = push_leaf_right(trans, root, path, 1, 1);
1621 if (wret < 0)
1622 ret = wret;
1623 if (path->nodes[0] == leaf_buf &&
1624 btrfs_header_nritems(&leaf->header)) {
1625 wret = push_leaf_left(trans, root, path, 1, 1);
1626 if (wret < 0)
1627 ret = wret;
1629 if (btrfs_header_nritems(&leaf->header) == 0) {
1630 u64 bytenr = leaf_buf->bytenr;
1631 clean_tree_block(trans, root, leaf_buf);
1632 wret = del_ptr(trans, root, path, 1, slot);
1633 if (wret)
1634 ret = wret;
1635 wret = btrfs_free_extent(trans, root, bytenr,
1636 leaf_buf->size, 1);
1637 btrfs_block_release(root, leaf_buf);
1638 if (wret)
1639 ret = wret;
1640 } else {
1641 btrfs_block_release(root, leaf_buf);
1645 return ret;
1647 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1648 struct btrfs_root *root,
1649 struct btrfs_path *path,
1650 u32 new_size, int from_end)
1652 int ret = 0;
1653 int slot;
1654 int slot_orig;
1655 struct btrfs_leaf *leaf;
1656 struct btrfs_item *item;
1657 u32 nritems;
1658 unsigned int data_end;
1659 unsigned int old_data_start;
1660 unsigned int old_size;
1661 unsigned int size_diff;
1662 int i;
1664 slot_orig = path->slots[0];
1665 leaf = &path->nodes[0]->leaf;
1666 slot = path->slots[0];
1668 old_size = btrfs_item_size(leaf->items + slot);
1669 if (old_size == new_size)
1670 return 0;
1672 nritems = btrfs_header_nritems(&leaf->header);
1673 data_end = leaf_data_end(root, leaf);
1675 old_data_start = btrfs_item_offset(leaf->items + slot);
1677 size_diff = old_size - new_size;
1679 BUG_ON(slot < 0);
1680 BUG_ON(slot >= nritems);
1683 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1685 /* first correct the data pointers */
1686 for (i = slot; i < nritems; i++) {
1687 u32 ioff;
1688 item = leaf->items + i;
1689 ioff = btrfs_item_offset(item);
1690 btrfs_set_item_offset(item, ioff + size_diff);
1693 /* shift the data */
1694 if (from_end) {
1695 memmove(btrfs_leaf_data(leaf) + data_end + size_diff,
1696 btrfs_leaf_data(leaf) + data_end,
1697 old_data_start + new_size - data_end);
1698 } else {
1699 struct btrfs_disk_key *disk_key;
1700 u64 offset;
1702 disk_key = &leaf->items[slot].key;
1703 if (btrfs_disk_key_type(disk_key) == BTRFS_EXTENT_DATA_KEY) {
1704 char *ptr;
1705 struct btrfs_file_extent_item *fi;
1707 fi = btrfs_item_ptr(leaf, slot,
1708 struct btrfs_file_extent_item);
1709 fi = (struct btrfs_file_extent_item *)(
1710 (unsigned long)fi - size_diff);
1712 if (btrfs_file_extent_type(fi) ==
1713 BTRFS_FILE_EXTENT_INLINE) {
1714 ptr = btrfs_item_ptr(leaf, slot, char);
1715 memmove(ptr, (char *)fi,
1716 offsetof(struct btrfs_file_extent_item,
1717 disk_bytenr));
1721 memmove(btrfs_leaf_data(leaf) + data_end + size_diff,
1722 btrfs_leaf_data(leaf) + data_end,
1723 old_data_start - data_end);
1725 offset = btrfs_disk_key_offset(disk_key);
1726 btrfs_set_disk_key_offset(disk_key, offset + size_diff);
1727 if (slot == 0)
1728 fixup_low_keys(trans, root, path, disk_key, 1);
1731 item = leaf->items + slot;
1732 btrfs_set_item_size(item, new_size);
1733 BUG_ON(list_empty(&path->nodes[0]->dirty));
1735 ret = 0;
1736 if (btrfs_leaf_free_space(root, leaf) < 0) {
1737 btrfs_print_leaf(root, leaf);
1738 BUG();
1740 return ret;
1743 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1744 *root, struct btrfs_path *path, u32 data_size)
1746 int ret = 0;
1747 int slot;
1748 int slot_orig;
1749 struct btrfs_leaf *leaf;
1750 struct btrfs_buffer *leaf_buf;
1751 u32 nritems;
1752 unsigned int data_end;
1753 unsigned int old_data;
1754 unsigned int old_size;
1755 int i;
1757 slot_orig = path->slots[0];
1758 leaf_buf = path->nodes[0];
1759 leaf = &leaf_buf->leaf;
1761 nritems = btrfs_header_nritems(&leaf->header);
1762 data_end = leaf_data_end(root, leaf);
1764 if (btrfs_leaf_free_space(root, leaf) < data_size)
1765 BUG();
1766 slot = path->slots[0];
1767 old_data = btrfs_item_end(leaf->items + slot);
1769 BUG_ON(slot < 0);
1770 BUG_ON(slot >= nritems);
1773 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1775 /* first correct the data pointers */
1776 for (i = slot; i < nritems; i++) {
1777 u32 ioff = btrfs_item_offset(leaf->items + i);
1778 btrfs_set_item_offset(leaf->items + i,
1779 ioff - data_size);
1781 /* shift the data */
1782 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1783 btrfs_leaf_data(leaf) + data_end, old_data - data_end);
1784 data_end = old_data;
1785 old_size = btrfs_item_size(leaf->items + slot);
1786 btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1788 ret = 0;
1789 if (btrfs_leaf_free_space(root, leaf) < 0)
1790 BUG();
1791 check_leaf(root, path, 0);
1792 return ret;
1796 * walk up the tree as far as required to find the next leaf.
1797 * returns 0 if it found something or 1 if there are no greater leaves.
1798 * returns < 0 on io errors.
1800 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1802 int slot;
1803 int level = 1;
1804 u64 bytenr;
1805 struct btrfs_buffer *c;
1806 struct btrfs_buffer *next = NULL;
1808 while(level < BTRFS_MAX_LEVEL) {
1809 if (!path->nodes[level])
1810 return 1;
1811 slot = path->slots[level] + 1;
1812 c = path->nodes[level];
1813 if (slot >= btrfs_header_nritems(&c->node.header)) {
1814 level++;
1815 continue;
1817 bytenr = btrfs_node_blockptr(&c->node, slot);
1818 if (next)
1819 btrfs_block_release(root, next);
1820 next = read_tree_block(root, bytenr,
1821 btrfs_level_size(root, level - 1));
1822 break;
1824 path->slots[level] = slot;
1825 while(1) {
1826 level--;
1827 c = path->nodes[level];
1828 btrfs_block_release(root, c);
1829 path->nodes[level] = next;
1830 path->slots[level] = 0;
1831 if (!level)
1832 break;
1833 next = read_tree_block(root,
1834 btrfs_node_blockptr(&next->node, 0),
1835 btrfs_level_size(root, level - 1));
1837 check_leaf(root, path, 0);
1838 return 0;