2 * Copyright (C) 2007,2008 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
22 #include "transaction.h"
23 #include "print-tree.h"
26 static int split_node(struct btrfs_trans_handle
*trans
, struct btrfs_root
27 *root
, struct btrfs_path
*path
, int level
);
28 static int split_leaf(struct btrfs_trans_handle
*trans
, struct btrfs_root
29 *root
, struct btrfs_key
*ins_key
,
30 struct btrfs_path
*path
, int data_size
, int extend
);
31 static int push_node_left(struct btrfs_trans_handle
*trans
,
32 struct btrfs_root
*root
, struct extent_buffer
*dst
,
33 struct extent_buffer
*src
, int empty
);
34 static int balance_node_right(struct btrfs_trans_handle
*trans
,
35 struct btrfs_root
*root
,
36 struct extent_buffer
*dst_buf
,
37 struct extent_buffer
*src_buf
);
38 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
39 struct btrfs_path
*path
, int level
, int slot
);
41 struct btrfs_path
*btrfs_alloc_path(void)
43 struct btrfs_path
*path
;
44 path
= kmem_cache_zalloc(btrfs_path_cachep
, GFP_NOFS
);
51 * set all locked nodes in the path to blocking locks. This should
52 * be done before scheduling
54 noinline
void btrfs_set_path_blocking(struct btrfs_path
*p
)
57 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
58 if (p
->nodes
[i
] && p
->locks
[i
])
59 btrfs_set_lock_blocking(p
->nodes
[i
]);
64 * reset all the locked nodes in the patch to spinning locks.
66 noinline
void btrfs_clear_path_blocking(struct btrfs_path
*p
)
69 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
70 if (p
->nodes
[i
] && p
->locks
[i
])
71 btrfs_clear_lock_blocking(p
->nodes
[i
]);
75 /* this also releases the path */
76 void btrfs_free_path(struct btrfs_path
*p
)
78 btrfs_release_path(NULL
, p
);
79 kmem_cache_free(btrfs_path_cachep
, p
);
83 * path release drops references on the extent buffers in the path
84 * and it drops any locks held by this path
86 * It is safe to call this on paths that no locks or extent buffers held.
88 noinline
void btrfs_release_path(struct btrfs_root
*root
, struct btrfs_path
*p
)
92 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
97 btrfs_tree_unlock(p
->nodes
[i
]);
100 free_extent_buffer(p
->nodes
[i
]);
106 * safely gets a reference on the root node of a tree. A lock
107 * is not taken, so a concurrent writer may put a different node
108 * at the root of the tree. See btrfs_lock_root_node for the
111 * The extent buffer returned by this has a reference taken, so
112 * it won't disappear. It may stop being the root of the tree
113 * at any time because there are no locks held.
115 struct extent_buffer
*btrfs_root_node(struct btrfs_root
*root
)
117 struct extent_buffer
*eb
;
118 spin_lock(&root
->node_lock
);
120 extent_buffer_get(eb
);
121 spin_unlock(&root
->node_lock
);
125 /* loop around taking references on and locking the root node of the
126 * tree until you end up with a lock on the root. A locked buffer
127 * is returned, with a reference held.
129 struct extent_buffer
*btrfs_lock_root_node(struct btrfs_root
*root
)
131 struct extent_buffer
*eb
;
134 eb
= btrfs_root_node(root
);
137 spin_lock(&root
->node_lock
);
138 if (eb
== root
->node
) {
139 spin_unlock(&root
->node_lock
);
142 spin_unlock(&root
->node_lock
);
144 btrfs_tree_unlock(eb
);
145 free_extent_buffer(eb
);
150 /* cowonly root (everything not a reference counted cow subvolume), just get
151 * put onto a simple dirty list. transaction.c walks this to make sure they
152 * get properly updated on disk.
154 static void add_root_to_dirty_list(struct btrfs_root
*root
)
156 if (root
->track_dirty
&& list_empty(&root
->dirty_list
)) {
157 list_add(&root
->dirty_list
,
158 &root
->fs_info
->dirty_cowonly_roots
);
163 * used by snapshot creation to make a copy of a root for a tree with
164 * a given objectid. The buffer with the new root node is returned in
165 * cow_ret, and this func returns zero on success or a negative error code.
167 int btrfs_copy_root(struct btrfs_trans_handle
*trans
,
168 struct btrfs_root
*root
,
169 struct extent_buffer
*buf
,
170 struct extent_buffer
**cow_ret
, u64 new_root_objectid
)
172 struct extent_buffer
*cow
;
176 struct btrfs_root
*new_root
;
178 new_root
= kmalloc(sizeof(*new_root
), GFP_NOFS
);
182 memcpy(new_root
, root
, sizeof(*new_root
));
183 new_root
->root_key
.objectid
= new_root_objectid
;
185 WARN_ON(root
->ref_cows
&& trans
->transid
!=
186 root
->fs_info
->running_transaction
->transid
);
187 WARN_ON(root
->ref_cows
&& trans
->transid
!= root
->last_trans
);
189 level
= btrfs_header_level(buf
);
190 nritems
= btrfs_header_nritems(buf
);
192 cow
= btrfs_alloc_free_block(trans
, new_root
, buf
->len
, 0,
193 new_root_objectid
, trans
->transid
,
194 level
, buf
->start
, 0);
200 copy_extent_buffer(cow
, buf
, 0, 0, cow
->len
);
201 btrfs_set_header_bytenr(cow
, cow
->start
);
202 btrfs_set_header_generation(cow
, trans
->transid
);
203 btrfs_set_header_owner(cow
, new_root_objectid
);
204 btrfs_clear_header_flag(cow
, BTRFS_HEADER_FLAG_WRITTEN
);
206 write_extent_buffer(cow
, root
->fs_info
->fsid
,
207 (unsigned long)btrfs_header_fsid(cow
),
210 WARN_ON(btrfs_header_generation(buf
) > trans
->transid
);
211 ret
= btrfs_inc_ref(trans
, new_root
, buf
, cow
, NULL
);
217 btrfs_mark_buffer_dirty(cow
);
223 * does the dirty work in cow of a single block. The parent block (if
224 * supplied) is updated to point to the new cow copy. The new buffer is marked
225 * dirty and returned locked. If you modify the block it needs to be marked
228 * search_start -- an allocation hint for the new block
230 * empty_size -- a hint that you plan on doing more cow. This is the size in
231 * bytes the allocator should try to find free next to the block it returns.
232 * This is just a hint and may be ignored by the allocator.
234 * prealloc_dest -- if you have already reserved a destination for the cow,
235 * this uses that block instead of allocating a new one.
236 * btrfs_alloc_reserved_extent is used to finish the allocation.
238 static noinline
int __btrfs_cow_block(struct btrfs_trans_handle
*trans
,
239 struct btrfs_root
*root
,
240 struct extent_buffer
*buf
,
241 struct extent_buffer
*parent
, int parent_slot
,
242 struct extent_buffer
**cow_ret
,
243 u64 search_start
, u64 empty_size
,
247 struct extent_buffer
*cow
;
256 WARN_ON(!btrfs_tree_locked(buf
));
259 parent_start
= parent
->start
;
263 WARN_ON(root
->ref_cows
&& trans
->transid
!=
264 root
->fs_info
->running_transaction
->transid
);
265 WARN_ON(root
->ref_cows
&& trans
->transid
!= root
->last_trans
);
267 level
= btrfs_header_level(buf
);
268 nritems
= btrfs_header_nritems(buf
);
271 struct btrfs_key ins
;
273 ins
.objectid
= prealloc_dest
;
274 ins
.offset
= buf
->len
;
275 ins
.type
= BTRFS_EXTENT_ITEM_KEY
;
277 ret
= btrfs_alloc_reserved_extent(trans
, root
, parent_start
,
278 root
->root_key
.objectid
,
279 trans
->transid
, level
, &ins
);
281 cow
= btrfs_init_new_buffer(trans
, root
, prealloc_dest
,
284 cow
= btrfs_alloc_free_block(trans
, root
, buf
->len
,
286 root
->root_key
.objectid
,
287 trans
->transid
, level
,
288 search_start
, empty_size
);
293 /* cow is set to blocking by btrfs_init_new_buffer */
295 copy_extent_buffer(cow
, buf
, 0, 0, cow
->len
);
296 btrfs_set_header_bytenr(cow
, cow
->start
);
297 btrfs_set_header_generation(cow
, trans
->transid
);
298 btrfs_set_header_owner(cow
, root
->root_key
.objectid
);
299 btrfs_clear_header_flag(cow
, BTRFS_HEADER_FLAG_WRITTEN
);
301 write_extent_buffer(cow
, root
->fs_info
->fsid
,
302 (unsigned long)btrfs_header_fsid(cow
),
305 WARN_ON(btrfs_header_generation(buf
) > trans
->transid
);
306 if (btrfs_header_generation(buf
) != trans
->transid
) {
308 ret
= btrfs_inc_ref(trans
, root
, buf
, cow
, &nr_extents
);
312 ret
= btrfs_cache_ref(trans
, root
, buf
, nr_extents
);
314 } else if (btrfs_header_owner(buf
) == BTRFS_TREE_RELOC_OBJECTID
) {
316 * There are only two places that can drop reference to
317 * tree blocks owned by living reloc trees, one is here,
318 * the other place is btrfs_drop_subtree. In both places,
319 * we check reference count while tree block is locked.
320 * Furthermore, if reference count is one, it won't get
321 * increased by someone else.
324 ret
= btrfs_lookup_extent_ref(trans
, root
, buf
->start
,
328 ret
= btrfs_update_ref(trans
, root
, buf
, cow
,
330 clean_tree_block(trans
, root
, buf
);
332 ret
= btrfs_inc_ref(trans
, root
, buf
, cow
, NULL
);
336 ret
= btrfs_update_ref(trans
, root
, buf
, cow
, 0, nritems
);
339 clean_tree_block(trans
, root
, buf
);
342 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
343 ret
= btrfs_reloc_tree_cache_ref(trans
, root
, cow
, buf
->start
);
347 if (buf
== root
->node
) {
348 WARN_ON(parent
&& parent
!= buf
);
350 spin_lock(&root
->node_lock
);
352 extent_buffer_get(cow
);
353 spin_unlock(&root
->node_lock
);
355 if (buf
!= root
->commit_root
) {
356 btrfs_free_extent(trans
, root
, buf
->start
,
357 buf
->len
, buf
->start
,
358 root
->root_key
.objectid
,
359 btrfs_header_generation(buf
),
362 free_extent_buffer(buf
);
363 add_root_to_dirty_list(root
);
365 btrfs_set_node_blockptr(parent
, parent_slot
,
367 WARN_ON(trans
->transid
== 0);
368 btrfs_set_node_ptr_generation(parent
, parent_slot
,
370 btrfs_mark_buffer_dirty(parent
);
371 WARN_ON(btrfs_header_generation(parent
) != trans
->transid
);
372 btrfs_free_extent(trans
, root
, buf
->start
, buf
->len
,
373 parent_start
, btrfs_header_owner(parent
),
374 btrfs_header_generation(parent
), level
, 1);
377 btrfs_tree_unlock(buf
);
378 free_extent_buffer(buf
);
379 btrfs_mark_buffer_dirty(cow
);
385 * cows a single block, see __btrfs_cow_block for the real work.
386 * This version of it has extra checks so that a block isn't cow'd more than
387 * once per transaction, as long as it hasn't been written yet
389 noinline
int btrfs_cow_block(struct btrfs_trans_handle
*trans
,
390 struct btrfs_root
*root
, struct extent_buffer
*buf
,
391 struct extent_buffer
*parent
, int parent_slot
,
392 struct extent_buffer
**cow_ret
, u64 prealloc_dest
)
397 if (trans
->transaction
!= root
->fs_info
->running_transaction
) {
398 printk(KERN_CRIT
"trans %llu running %llu\n",
399 (unsigned long long)trans
->transid
,
401 root
->fs_info
->running_transaction
->transid
);
404 if (trans
->transid
!= root
->fs_info
->generation
) {
405 printk(KERN_CRIT
"trans %llu running %llu\n",
406 (unsigned long long)trans
->transid
,
407 (unsigned long long)root
->fs_info
->generation
);
411 if (btrfs_header_generation(buf
) == trans
->transid
&&
412 btrfs_header_owner(buf
) == root
->root_key
.objectid
&&
413 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
415 WARN_ON(prealloc_dest
);
419 search_start
= buf
->start
& ~((u64
)(1024 * 1024 * 1024) - 1);
422 btrfs_set_lock_blocking(parent
);
423 btrfs_set_lock_blocking(buf
);
425 ret
= __btrfs_cow_block(trans
, root
, buf
, parent
,
426 parent_slot
, cow_ret
, search_start
, 0,
432 * helper function for defrag to decide if two blocks pointed to by a
433 * node are actually close by
435 static int close_blocks(u64 blocknr
, u64 other
, u32 blocksize
)
437 if (blocknr
< other
&& other
- (blocknr
+ blocksize
) < 32768)
439 if (blocknr
> other
&& blocknr
- (other
+ blocksize
) < 32768)
445 * compare two keys in a memcmp fashion
447 static int comp_keys(struct btrfs_disk_key
*disk
, struct btrfs_key
*k2
)
451 btrfs_disk_key_to_cpu(&k1
, disk
);
453 if (k1
.objectid
> k2
->objectid
)
455 if (k1
.objectid
< k2
->objectid
)
457 if (k1
.type
> k2
->type
)
459 if (k1
.type
< k2
->type
)
461 if (k1
.offset
> k2
->offset
)
463 if (k1
.offset
< k2
->offset
)
469 * same as comp_keys only with two btrfs_key's
471 static int comp_cpu_keys(struct btrfs_key
*k1
, struct btrfs_key
*k2
)
473 if (k1
->objectid
> k2
->objectid
)
475 if (k1
->objectid
< k2
->objectid
)
477 if (k1
->type
> k2
->type
)
479 if (k1
->type
< k2
->type
)
481 if (k1
->offset
> k2
->offset
)
483 if (k1
->offset
< k2
->offset
)
489 * this is used by the defrag code to go through all the
490 * leaves pointed to by a node and reallocate them so that
491 * disk order is close to key order
493 int btrfs_realloc_node(struct btrfs_trans_handle
*trans
,
494 struct btrfs_root
*root
, struct extent_buffer
*parent
,
495 int start_slot
, int cache_only
, u64
*last_ret
,
496 struct btrfs_key
*progress
)
498 struct extent_buffer
*cur
;
501 u64 search_start
= *last_ret
;
511 int progress_passed
= 0;
512 struct btrfs_disk_key disk_key
;
514 parent_level
= btrfs_header_level(parent
);
515 if (cache_only
&& parent_level
!= 1)
518 if (trans
->transaction
!= root
->fs_info
->running_transaction
)
520 if (trans
->transid
!= root
->fs_info
->generation
)
523 parent_nritems
= btrfs_header_nritems(parent
);
524 blocksize
= btrfs_level_size(root
, parent_level
- 1);
525 end_slot
= parent_nritems
;
527 if (parent_nritems
== 1)
530 btrfs_set_lock_blocking(parent
);
532 for (i
= start_slot
; i
< end_slot
; i
++) {
535 if (!parent
->map_token
) {
536 map_extent_buffer(parent
,
537 btrfs_node_key_ptr_offset(i
),
538 sizeof(struct btrfs_key_ptr
),
539 &parent
->map_token
, &parent
->kaddr
,
540 &parent
->map_start
, &parent
->map_len
,
543 btrfs_node_key(parent
, &disk_key
, i
);
544 if (!progress_passed
&& comp_keys(&disk_key
, progress
) < 0)
548 blocknr
= btrfs_node_blockptr(parent
, i
);
549 gen
= btrfs_node_ptr_generation(parent
, i
);
551 last_block
= blocknr
;
554 other
= btrfs_node_blockptr(parent
, i
- 1);
555 close
= close_blocks(blocknr
, other
, blocksize
);
557 if (!close
&& i
< end_slot
- 2) {
558 other
= btrfs_node_blockptr(parent
, i
+ 1);
559 close
= close_blocks(blocknr
, other
, blocksize
);
562 last_block
= blocknr
;
565 if (parent
->map_token
) {
566 unmap_extent_buffer(parent
, parent
->map_token
,
568 parent
->map_token
= NULL
;
571 cur
= btrfs_find_tree_block(root
, blocknr
, blocksize
);
573 uptodate
= btrfs_buffer_uptodate(cur
, gen
);
576 if (!cur
|| !uptodate
) {
578 free_extent_buffer(cur
);
582 cur
= read_tree_block(root
, blocknr
,
584 } else if (!uptodate
) {
585 btrfs_read_buffer(cur
, gen
);
588 if (search_start
== 0)
589 search_start
= last_block
;
591 btrfs_tree_lock(cur
);
592 btrfs_set_lock_blocking(cur
);
593 err
= __btrfs_cow_block(trans
, root
, cur
, parent
, i
,
596 (end_slot
- i
) * blocksize
), 0);
598 btrfs_tree_unlock(cur
);
599 free_extent_buffer(cur
);
602 search_start
= cur
->start
;
603 last_block
= cur
->start
;
604 *last_ret
= search_start
;
605 btrfs_tree_unlock(cur
);
606 free_extent_buffer(cur
);
608 if (parent
->map_token
) {
609 unmap_extent_buffer(parent
, parent
->map_token
,
611 parent
->map_token
= NULL
;
617 * The leaf data grows from end-to-front in the node.
618 * this returns the address of the start of the last item,
619 * which is the stop of the leaf data stack
621 static inline unsigned int leaf_data_end(struct btrfs_root
*root
,
622 struct extent_buffer
*leaf
)
624 u32 nr
= btrfs_header_nritems(leaf
);
626 return BTRFS_LEAF_DATA_SIZE(root
);
627 return btrfs_item_offset_nr(leaf
, nr
- 1);
631 * extra debugging checks to make sure all the items in a key are
632 * well formed and in the proper order
634 static int check_node(struct btrfs_root
*root
, struct btrfs_path
*path
,
637 struct extent_buffer
*parent
= NULL
;
638 struct extent_buffer
*node
= path
->nodes
[level
];
639 struct btrfs_disk_key parent_key
;
640 struct btrfs_disk_key node_key
;
643 struct btrfs_key cpukey
;
644 u32 nritems
= btrfs_header_nritems(node
);
646 if (path
->nodes
[level
+ 1])
647 parent
= path
->nodes
[level
+ 1];
649 slot
= path
->slots
[level
];
650 BUG_ON(nritems
== 0);
652 parent_slot
= path
->slots
[level
+ 1];
653 btrfs_node_key(parent
, &parent_key
, parent_slot
);
654 btrfs_node_key(node
, &node_key
, 0);
655 BUG_ON(memcmp(&parent_key
, &node_key
,
656 sizeof(struct btrfs_disk_key
)));
657 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
658 btrfs_header_bytenr(node
));
660 BUG_ON(nritems
> BTRFS_NODEPTRS_PER_BLOCK(root
));
662 btrfs_node_key_to_cpu(node
, &cpukey
, slot
- 1);
663 btrfs_node_key(node
, &node_key
, slot
);
664 BUG_ON(comp_keys(&node_key
, &cpukey
) <= 0);
666 if (slot
< nritems
- 1) {
667 btrfs_node_key_to_cpu(node
, &cpukey
, slot
+ 1);
668 btrfs_node_key(node
, &node_key
, slot
);
669 BUG_ON(comp_keys(&node_key
, &cpukey
) >= 0);
675 * extra checking to make sure all the items in a leaf are
676 * well formed and in the proper order
678 static int check_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
,
681 struct extent_buffer
*leaf
= path
->nodes
[level
];
682 struct extent_buffer
*parent
= NULL
;
684 struct btrfs_key cpukey
;
685 struct btrfs_disk_key parent_key
;
686 struct btrfs_disk_key leaf_key
;
687 int slot
= path
->slots
[0];
689 u32 nritems
= btrfs_header_nritems(leaf
);
691 if (path
->nodes
[level
+ 1])
692 parent
= path
->nodes
[level
+ 1];
698 parent_slot
= path
->slots
[level
+ 1];
699 btrfs_node_key(parent
, &parent_key
, parent_slot
);
700 btrfs_item_key(leaf
, &leaf_key
, 0);
702 BUG_ON(memcmp(&parent_key
, &leaf_key
,
703 sizeof(struct btrfs_disk_key
)));
704 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
705 btrfs_header_bytenr(leaf
));
707 if (slot
!= 0 && slot
< nritems
- 1) {
708 btrfs_item_key(leaf
, &leaf_key
, slot
);
709 btrfs_item_key_to_cpu(leaf
, &cpukey
, slot
- 1);
710 if (comp_keys(&leaf_key
, &cpukey
) <= 0) {
711 btrfs_print_leaf(root
, leaf
);
712 printk(KERN_CRIT
"slot %d offset bad key\n", slot
);
715 if (btrfs_item_offset_nr(leaf
, slot
- 1) !=
716 btrfs_item_end_nr(leaf
, slot
)) {
717 btrfs_print_leaf(root
, leaf
);
718 printk(KERN_CRIT
"slot %d offset bad\n", slot
);
722 if (slot
< nritems
- 1) {
723 btrfs_item_key(leaf
, &leaf_key
, slot
);
724 btrfs_item_key_to_cpu(leaf
, &cpukey
, slot
+ 1);
725 BUG_ON(comp_keys(&leaf_key
, &cpukey
) >= 0);
726 if (btrfs_item_offset_nr(leaf
, slot
) !=
727 btrfs_item_end_nr(leaf
, slot
+ 1)) {
728 btrfs_print_leaf(root
, leaf
);
729 printk(KERN_CRIT
"slot %d offset bad\n", slot
);
733 BUG_ON(btrfs_item_offset_nr(leaf
, 0) +
734 btrfs_item_size_nr(leaf
, 0) != BTRFS_LEAF_DATA_SIZE(root
));
738 static noinline
int check_block(struct btrfs_root
*root
,
739 struct btrfs_path
*path
, int level
)
743 return check_leaf(root
, path
, level
);
744 return check_node(root
, path
, level
);
748 * search for key in the extent_buffer. The items start at offset p,
749 * and they are item_size apart. There are 'max' items in p.
751 * the slot in the array is returned via slot, and it points to
752 * the place where you would insert key if it is not found in
755 * slot may point to max if the key is bigger than all of the keys
757 static noinline
int generic_bin_search(struct extent_buffer
*eb
,
759 int item_size
, struct btrfs_key
*key
,
766 struct btrfs_disk_key
*tmp
= NULL
;
767 struct btrfs_disk_key unaligned
;
768 unsigned long offset
;
769 char *map_token
= NULL
;
771 unsigned long map_start
= 0;
772 unsigned long map_len
= 0;
776 mid
= (low
+ high
) / 2;
777 offset
= p
+ mid
* item_size
;
779 if (!map_token
|| offset
< map_start
||
780 (offset
+ sizeof(struct btrfs_disk_key
)) >
781 map_start
+ map_len
) {
783 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
787 err
= map_private_extent_buffer(eb
, offset
,
788 sizeof(struct btrfs_disk_key
),
790 &map_start
, &map_len
, KM_USER0
);
793 tmp
= (struct btrfs_disk_key
*)(kaddr
+ offset
-
796 read_extent_buffer(eb
, &unaligned
,
797 offset
, sizeof(unaligned
));
802 tmp
= (struct btrfs_disk_key
*)(kaddr
+ offset
-
805 ret
= comp_keys(tmp
, key
);
814 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
820 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
825 * simple bin_search frontend that does the right thing for
828 static int bin_search(struct extent_buffer
*eb
, struct btrfs_key
*key
,
829 int level
, int *slot
)
832 return generic_bin_search(eb
,
833 offsetof(struct btrfs_leaf
, items
),
834 sizeof(struct btrfs_item
),
835 key
, btrfs_header_nritems(eb
),
838 return generic_bin_search(eb
,
839 offsetof(struct btrfs_node
, ptrs
),
840 sizeof(struct btrfs_key_ptr
),
841 key
, btrfs_header_nritems(eb
),
847 /* given a node and slot number, this reads the blocks it points to. The
848 * extent buffer is returned with a reference taken (but unlocked).
849 * NULL is returned on error.
851 static noinline
struct extent_buffer
*read_node_slot(struct btrfs_root
*root
,
852 struct extent_buffer
*parent
, int slot
)
854 int level
= btrfs_header_level(parent
);
857 if (slot
>= btrfs_header_nritems(parent
))
862 return read_tree_block(root
, btrfs_node_blockptr(parent
, slot
),
863 btrfs_level_size(root
, level
- 1),
864 btrfs_node_ptr_generation(parent
, slot
));
868 * node level balancing, used to make sure nodes are in proper order for
869 * item deletion. We balance from the top down, so we have to make sure
870 * that a deletion won't leave an node completely empty later on.
872 static noinline
int balance_level(struct btrfs_trans_handle
*trans
,
873 struct btrfs_root
*root
,
874 struct btrfs_path
*path
, int level
)
876 struct extent_buffer
*right
= NULL
;
877 struct extent_buffer
*mid
;
878 struct extent_buffer
*left
= NULL
;
879 struct extent_buffer
*parent
= NULL
;
883 int orig_slot
= path
->slots
[level
];
884 int err_on_enospc
= 0;
890 mid
= path
->nodes
[level
];
892 WARN_ON(!path
->locks
[level
]);
893 WARN_ON(btrfs_header_generation(mid
) != trans
->transid
);
895 orig_ptr
= btrfs_node_blockptr(mid
, orig_slot
);
897 if (level
< BTRFS_MAX_LEVEL
- 1)
898 parent
= path
->nodes
[level
+ 1];
899 pslot
= path
->slots
[level
+ 1];
902 * deal with the case where there is only one pointer in the root
903 * by promoting the node below to a root
906 struct extent_buffer
*child
;
908 if (btrfs_header_nritems(mid
) != 1)
911 /* promote the child to a root */
912 child
= read_node_slot(root
, mid
, 0);
914 btrfs_tree_lock(child
);
915 btrfs_set_lock_blocking(child
);
916 ret
= btrfs_cow_block(trans
, root
, child
, mid
, 0, &child
, 0);
919 spin_lock(&root
->node_lock
);
921 spin_unlock(&root
->node_lock
);
923 ret
= btrfs_update_extent_ref(trans
, root
, child
->start
,
924 mid
->start
, child
->start
,
925 root
->root_key
.objectid
,
926 trans
->transid
, level
- 1);
929 add_root_to_dirty_list(root
);
930 btrfs_tree_unlock(child
);
932 path
->locks
[level
] = 0;
933 path
->nodes
[level
] = NULL
;
934 clean_tree_block(trans
, root
, mid
);
935 btrfs_tree_unlock(mid
);
936 /* once for the path */
937 free_extent_buffer(mid
);
938 ret
= btrfs_free_extent(trans
, root
, mid
->start
, mid
->len
,
939 mid
->start
, root
->root_key
.objectid
,
940 btrfs_header_generation(mid
),
942 /* once for the root ptr */
943 free_extent_buffer(mid
);
946 if (btrfs_header_nritems(mid
) >
947 BTRFS_NODEPTRS_PER_BLOCK(root
) / 4)
950 if (btrfs_header_nritems(mid
) < 2)
953 left
= read_node_slot(root
, parent
, pslot
- 1);
955 btrfs_tree_lock(left
);
956 btrfs_set_lock_blocking(left
);
957 wret
= btrfs_cow_block(trans
, root
, left
,
958 parent
, pslot
- 1, &left
, 0);
964 right
= read_node_slot(root
, parent
, pslot
+ 1);
966 btrfs_tree_lock(right
);
967 btrfs_set_lock_blocking(right
);
968 wret
= btrfs_cow_block(trans
, root
, right
,
969 parent
, pslot
+ 1, &right
, 0);
976 /* first, try to make some room in the middle buffer */
978 orig_slot
+= btrfs_header_nritems(left
);
979 wret
= push_node_left(trans
, root
, left
, mid
, 1);
982 if (btrfs_header_nritems(mid
) < 2)
987 * then try to empty the right most buffer into the middle
990 wret
= push_node_left(trans
, root
, mid
, right
, 1);
991 if (wret
< 0 && wret
!= -ENOSPC
)
993 if (btrfs_header_nritems(right
) == 0) {
994 u64 bytenr
= right
->start
;
995 u64 generation
= btrfs_header_generation(parent
);
996 u32 blocksize
= right
->len
;
998 clean_tree_block(trans
, root
, right
);
999 btrfs_tree_unlock(right
);
1000 free_extent_buffer(right
);
1002 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
+
1006 wret
= btrfs_free_extent(trans
, root
, bytenr
,
1007 blocksize
, parent
->start
,
1008 btrfs_header_owner(parent
),
1009 generation
, level
, 1);
1013 struct btrfs_disk_key right_key
;
1014 btrfs_node_key(right
, &right_key
, 0);
1015 btrfs_set_node_key(parent
, &right_key
, pslot
+ 1);
1016 btrfs_mark_buffer_dirty(parent
);
1019 if (btrfs_header_nritems(mid
) == 1) {
1021 * we're not allowed to leave a node with one item in the
1022 * tree during a delete. A deletion from lower in the tree
1023 * could try to delete the only pointer in this node.
1024 * So, pull some keys from the left.
1025 * There has to be a left pointer at this point because
1026 * otherwise we would have pulled some pointers from the
1030 wret
= balance_node_right(trans
, root
, mid
, left
);
1036 wret
= push_node_left(trans
, root
, left
, mid
, 1);
1042 if (btrfs_header_nritems(mid
) == 0) {
1043 /* we've managed to empty the middle node, drop it */
1044 u64 root_gen
= btrfs_header_generation(parent
);
1045 u64 bytenr
= mid
->start
;
1046 u32 blocksize
= mid
->len
;
1048 clean_tree_block(trans
, root
, mid
);
1049 btrfs_tree_unlock(mid
);
1050 free_extent_buffer(mid
);
1052 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
);
1055 wret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
1057 btrfs_header_owner(parent
),
1058 root_gen
, level
, 1);
1062 /* update the parent key to reflect our changes */
1063 struct btrfs_disk_key mid_key
;
1064 btrfs_node_key(mid
, &mid_key
, 0);
1065 btrfs_set_node_key(parent
, &mid_key
, pslot
);
1066 btrfs_mark_buffer_dirty(parent
);
1069 /* update the path */
1071 if (btrfs_header_nritems(left
) > orig_slot
) {
1072 extent_buffer_get(left
);
1073 /* left was locked after cow */
1074 path
->nodes
[level
] = left
;
1075 path
->slots
[level
+ 1] -= 1;
1076 path
->slots
[level
] = orig_slot
;
1078 btrfs_tree_unlock(mid
);
1079 free_extent_buffer(mid
);
1082 orig_slot
-= btrfs_header_nritems(left
);
1083 path
->slots
[level
] = orig_slot
;
1086 /* double check we haven't messed things up */
1087 check_block(root
, path
, level
);
1089 btrfs_node_blockptr(path
->nodes
[level
], path
->slots
[level
]))
1093 btrfs_tree_unlock(right
);
1094 free_extent_buffer(right
);
1097 if (path
->nodes
[level
] != left
)
1098 btrfs_tree_unlock(left
);
1099 free_extent_buffer(left
);
1104 /* Node balancing for insertion. Here we only split or push nodes around
1105 * when they are completely full. This is also done top down, so we
1106 * have to be pessimistic.
1108 static noinline
int push_nodes_for_insert(struct btrfs_trans_handle
*trans
,
1109 struct btrfs_root
*root
,
1110 struct btrfs_path
*path
, int level
)
1112 struct extent_buffer
*right
= NULL
;
1113 struct extent_buffer
*mid
;
1114 struct extent_buffer
*left
= NULL
;
1115 struct extent_buffer
*parent
= NULL
;
1119 int orig_slot
= path
->slots
[level
];
1125 mid
= path
->nodes
[level
];
1126 WARN_ON(btrfs_header_generation(mid
) != trans
->transid
);
1127 orig_ptr
= btrfs_node_blockptr(mid
, orig_slot
);
1129 if (level
< BTRFS_MAX_LEVEL
- 1)
1130 parent
= path
->nodes
[level
+ 1];
1131 pslot
= path
->slots
[level
+ 1];
1136 left
= read_node_slot(root
, parent
, pslot
- 1);
1138 /* first, try to make some room in the middle buffer */
1142 btrfs_tree_lock(left
);
1143 btrfs_set_lock_blocking(left
);
1145 left_nr
= btrfs_header_nritems(left
);
1146 if (left_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
1149 ret
= btrfs_cow_block(trans
, root
, left
, parent
,
1150 pslot
- 1, &left
, 0);
1154 wret
= push_node_left(trans
, root
,
1161 struct btrfs_disk_key disk_key
;
1162 orig_slot
+= left_nr
;
1163 btrfs_node_key(mid
, &disk_key
, 0);
1164 btrfs_set_node_key(parent
, &disk_key
, pslot
);
1165 btrfs_mark_buffer_dirty(parent
);
1166 if (btrfs_header_nritems(left
) > orig_slot
) {
1167 path
->nodes
[level
] = left
;
1168 path
->slots
[level
+ 1] -= 1;
1169 path
->slots
[level
] = orig_slot
;
1170 btrfs_tree_unlock(mid
);
1171 free_extent_buffer(mid
);
1174 btrfs_header_nritems(left
);
1175 path
->slots
[level
] = orig_slot
;
1176 btrfs_tree_unlock(left
);
1177 free_extent_buffer(left
);
1181 btrfs_tree_unlock(left
);
1182 free_extent_buffer(left
);
1184 right
= read_node_slot(root
, parent
, pslot
+ 1);
1187 * then try to empty the right most buffer into the middle
1192 btrfs_tree_lock(right
);
1193 btrfs_set_lock_blocking(right
);
1195 right_nr
= btrfs_header_nritems(right
);
1196 if (right_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
1199 ret
= btrfs_cow_block(trans
, root
, right
,
1205 wret
= balance_node_right(trans
, root
,
1212 struct btrfs_disk_key disk_key
;
1214 btrfs_node_key(right
, &disk_key
, 0);
1215 btrfs_set_node_key(parent
, &disk_key
, pslot
+ 1);
1216 btrfs_mark_buffer_dirty(parent
);
1218 if (btrfs_header_nritems(mid
) <= orig_slot
) {
1219 path
->nodes
[level
] = right
;
1220 path
->slots
[level
+ 1] += 1;
1221 path
->slots
[level
] = orig_slot
-
1222 btrfs_header_nritems(mid
);
1223 btrfs_tree_unlock(mid
);
1224 free_extent_buffer(mid
);
1226 btrfs_tree_unlock(right
);
1227 free_extent_buffer(right
);
1231 btrfs_tree_unlock(right
);
1232 free_extent_buffer(right
);
1238 * readahead one full node of leaves, finding things that are close
1239 * to the block in 'slot', and triggering ra on them.
1241 static noinline
void reada_for_search(struct btrfs_root
*root
,
1242 struct btrfs_path
*path
,
1243 int level
, int slot
, u64 objectid
)
1245 struct extent_buffer
*node
;
1246 struct btrfs_disk_key disk_key
;
1251 int direction
= path
->reada
;
1252 struct extent_buffer
*eb
;
1260 if (!path
->nodes
[level
])
1263 node
= path
->nodes
[level
];
1265 search
= btrfs_node_blockptr(node
, slot
);
1266 blocksize
= btrfs_level_size(root
, level
- 1);
1267 eb
= btrfs_find_tree_block(root
, search
, blocksize
);
1269 free_extent_buffer(eb
);
1275 nritems
= btrfs_header_nritems(node
);
1278 if (direction
< 0) {
1282 } else if (direction
> 0) {
1287 if (path
->reada
< 0 && objectid
) {
1288 btrfs_node_key(node
, &disk_key
, nr
);
1289 if (btrfs_disk_key_objectid(&disk_key
) != objectid
)
1292 search
= btrfs_node_blockptr(node
, nr
);
1293 if ((search
<= target
&& target
- search
<= 65536) ||
1294 (search
> target
&& search
- target
<= 65536)) {
1295 readahead_tree_block(root
, search
, blocksize
,
1296 btrfs_node_ptr_generation(node
, nr
));
1300 if ((nread
> 65536 || nscan
> 32))
1306 * returns -EAGAIN if it had to drop the path, or zero if everything was in
1309 static noinline
int reada_for_balance(struct btrfs_root
*root
,
1310 struct btrfs_path
*path
, int level
)
1314 struct extent_buffer
*parent
;
1315 struct extent_buffer
*eb
;
1322 parent
= path
->nodes
[level
- 1];
1326 nritems
= btrfs_header_nritems(parent
);
1327 slot
= path
->slots
[level
];
1328 blocksize
= btrfs_level_size(root
, level
);
1331 block1
= btrfs_node_blockptr(parent
, slot
- 1);
1332 gen
= btrfs_node_ptr_generation(parent
, slot
- 1);
1333 eb
= btrfs_find_tree_block(root
, block1
, blocksize
);
1334 if (eb
&& btrfs_buffer_uptodate(eb
, gen
))
1336 free_extent_buffer(eb
);
1338 if (slot
< nritems
) {
1339 block2
= btrfs_node_blockptr(parent
, slot
+ 1);
1340 gen
= btrfs_node_ptr_generation(parent
, slot
+ 1);
1341 eb
= btrfs_find_tree_block(root
, block2
, blocksize
);
1342 if (eb
&& btrfs_buffer_uptodate(eb
, gen
))
1344 free_extent_buffer(eb
);
1346 if (block1
|| block2
) {
1348 btrfs_release_path(root
, path
);
1350 readahead_tree_block(root
, block1
, blocksize
, 0);
1352 readahead_tree_block(root
, block2
, blocksize
, 0);
1355 eb
= read_tree_block(root
, block1
, blocksize
, 0);
1356 free_extent_buffer(eb
);
1359 eb
= read_tree_block(root
, block2
, blocksize
, 0);
1360 free_extent_buffer(eb
);
1368 * when we walk down the tree, it is usually safe to unlock the higher layers
1369 * in the tree. The exceptions are when our path goes through slot 0, because
1370 * operations on the tree might require changing key pointers higher up in the
1373 * callers might also have set path->keep_locks, which tells this code to keep
1374 * the lock if the path points to the last slot in the block. This is part of
1375 * walking through the tree, and selecting the next slot in the higher block.
1377 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1378 * if lowest_unlock is 1, level 0 won't be unlocked
1380 static noinline
void unlock_up(struct btrfs_path
*path
, int level
,
1384 int skip_level
= level
;
1386 struct extent_buffer
*t
;
1388 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1389 if (!path
->nodes
[i
])
1391 if (!path
->locks
[i
])
1393 if (!no_skips
&& path
->slots
[i
] == 0) {
1397 if (!no_skips
&& path
->keep_locks
) {
1400 nritems
= btrfs_header_nritems(t
);
1401 if (nritems
< 1 || path
->slots
[i
] >= nritems
- 1) {
1406 if (skip_level
< i
&& i
>= lowest_unlock
)
1410 if (i
>= lowest_unlock
&& i
> skip_level
&& path
->locks
[i
]) {
1411 btrfs_tree_unlock(t
);
1418 * This releases any locks held in the path starting at level and
1419 * going all the way up to the root.
1421 * btrfs_search_slot will keep the lock held on higher nodes in a few
1422 * corner cases, such as COW of the block at slot zero in the node. This
1423 * ignores those rules, and it should only be called when there are no
1424 * more updates to be done higher up in the tree.
1426 noinline
void btrfs_unlock_up_safe(struct btrfs_path
*path
, int level
)
1430 if (path
->keep_locks
|| path
->lowest_level
)
1433 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1434 if (!path
->nodes
[i
])
1436 if (!path
->locks
[i
])
1438 btrfs_tree_unlock(path
->nodes
[i
]);
1444 * look for key in the tree. path is filled in with nodes along the way
1445 * if key is found, we return zero and you can find the item in the leaf
1446 * level of the path (level 0)
1448 * If the key isn't found, the path points to the slot where it should
1449 * be inserted, and 1 is returned. If there are other errors during the
1450 * search a negative error number is returned.
1452 * if ins_len > 0, nodes and leaves will be split as we walk down the
1453 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
1456 int btrfs_search_slot(struct btrfs_trans_handle
*trans
, struct btrfs_root
1457 *root
, struct btrfs_key
*key
, struct btrfs_path
*p
, int
1460 struct extent_buffer
*b
;
1461 struct extent_buffer
*tmp
;
1465 int should_reada
= p
->reada
;
1466 int lowest_unlock
= 1;
1468 u8 lowest_level
= 0;
1471 struct btrfs_key prealloc_block
;
1473 lowest_level
= p
->lowest_level
;
1474 WARN_ON(lowest_level
&& ins_len
> 0);
1475 WARN_ON(p
->nodes
[0] != NULL
);
1480 prealloc_block
.objectid
= 0;
1483 if (p
->skip_locking
)
1484 b
= btrfs_root_node(root
);
1486 b
= btrfs_lock_root_node(root
);
1489 level
= btrfs_header_level(b
);
1492 * setup the path here so we can release it under lock
1493 * contention with the cow code
1495 p
->nodes
[level
] = b
;
1496 if (!p
->skip_locking
)
1497 p
->locks
[level
] = 1;
1502 /* is a cow on this block not required */
1503 if (btrfs_header_generation(b
) == trans
->transid
&&
1504 btrfs_header_owner(b
) == root
->root_key
.objectid
&&
1505 !btrfs_header_flag(b
, BTRFS_HEADER_FLAG_WRITTEN
)) {
1509 /* ok, we have to cow, is our old prealloc the right
1512 if (prealloc_block
.objectid
&&
1513 prealloc_block
.offset
!= b
->len
) {
1514 btrfs_release_path(root
, p
);
1515 btrfs_free_reserved_extent(root
,
1516 prealloc_block
.objectid
,
1517 prealloc_block
.offset
);
1518 prealloc_block
.objectid
= 0;
1523 * for higher level blocks, try not to allocate blocks
1524 * with the block and the parent locks held.
1526 if (level
> 0 && !prealloc_block
.objectid
) {
1528 u64 hint
= b
->start
;
1530 btrfs_release_path(root
, p
);
1531 ret
= btrfs_reserve_extent(trans
, root
,
1534 &prealloc_block
, 0);
1539 btrfs_set_path_blocking(p
);
1541 wret
= btrfs_cow_block(trans
, root
, b
,
1542 p
->nodes
[level
+ 1],
1543 p
->slots
[level
+ 1],
1544 &b
, prealloc_block
.objectid
);
1545 prealloc_block
.objectid
= 0;
1547 free_extent_buffer(b
);
1553 BUG_ON(!cow
&& ins_len
);
1554 if (level
!= btrfs_header_level(b
))
1556 level
= btrfs_header_level(b
);
1558 p
->nodes
[level
] = b
;
1559 if (!p
->skip_locking
)
1560 p
->locks
[level
] = 1;
1562 btrfs_clear_path_blocking(p
);
1565 * we have a lock on b and as long as we aren't changing
1566 * the tree, there is no way to for the items in b to change.
1567 * It is safe to drop the lock on our parent before we
1568 * go through the expensive btree search on b.
1570 * If cow is true, then we might be changing slot zero,
1571 * which may require changing the parent. So, we can't
1572 * drop the lock until after we know which slot we're
1576 btrfs_unlock_up_safe(p
, level
+ 1);
1578 ret
= check_block(root
, p
, level
);
1584 ret
= bin_search(b
, key
, level
, &slot
);
1587 if (ret
&& slot
> 0)
1589 p
->slots
[level
] = slot
;
1590 if ((p
->search_for_split
|| ins_len
> 0) &&
1591 btrfs_header_nritems(b
) >=
1592 BTRFS_NODEPTRS_PER_BLOCK(root
) - 3) {
1595 sret
= reada_for_balance(root
, p
, level
);
1599 btrfs_set_path_blocking(p
);
1600 sret
= split_node(trans
, root
, p
, level
);
1601 btrfs_clear_path_blocking(p
);
1608 b
= p
->nodes
[level
];
1609 slot
= p
->slots
[level
];
1610 } else if (ins_len
< 0 &&
1611 btrfs_header_nritems(b
) <
1612 BTRFS_NODEPTRS_PER_BLOCK(root
) / 4) {
1615 sret
= reada_for_balance(root
, p
, level
);
1619 btrfs_set_path_blocking(p
);
1620 sret
= balance_level(trans
, root
, p
, level
);
1621 btrfs_clear_path_blocking(p
);
1627 b
= p
->nodes
[level
];
1629 btrfs_release_path(NULL
, p
);
1632 slot
= p
->slots
[level
];
1633 BUG_ON(btrfs_header_nritems(b
) == 1);
1635 unlock_up(p
, level
, lowest_unlock
);
1637 /* this is only true while dropping a snapshot */
1638 if (level
== lowest_level
) {
1643 blocknr
= btrfs_node_blockptr(b
, slot
);
1644 gen
= btrfs_node_ptr_generation(b
, slot
);
1645 blocksize
= btrfs_level_size(root
, level
- 1);
1647 tmp
= btrfs_find_tree_block(root
, blocknr
, blocksize
);
1648 if (tmp
&& btrfs_buffer_uptodate(tmp
, gen
)) {
1652 * reduce lock contention at high levels
1653 * of the btree by dropping locks before
1657 btrfs_release_path(NULL
, p
);
1659 free_extent_buffer(tmp
);
1661 reada_for_search(root
, p
,
1665 tmp
= read_tree_block(root
, blocknr
,
1668 free_extent_buffer(tmp
);
1671 btrfs_set_path_blocking(p
);
1673 free_extent_buffer(tmp
);
1675 reada_for_search(root
, p
,
1678 b
= read_node_slot(root
, b
, slot
);
1681 if (!p
->skip_locking
) {
1684 btrfs_clear_path_blocking(p
);
1685 lret
= btrfs_try_spin_lock(b
);
1688 btrfs_set_path_blocking(p
);
1690 btrfs_clear_path_blocking(p
);
1694 p
->slots
[level
] = slot
;
1696 btrfs_leaf_free_space(root
, b
) < ins_len
) {
1699 btrfs_set_path_blocking(p
);
1700 sret
= split_leaf(trans
, root
, key
,
1701 p
, ins_len
, ret
== 0);
1702 btrfs_clear_path_blocking(p
);
1710 if (!p
->search_for_split
)
1711 unlock_up(p
, level
, lowest_unlock
);
1718 * we don't really know what they plan on doing with the path
1719 * from here on, so for now just mark it as blocking
1721 btrfs_set_path_blocking(p
);
1722 if (prealloc_block
.objectid
) {
1723 btrfs_free_reserved_extent(root
,
1724 prealloc_block
.objectid
,
1725 prealloc_block
.offset
);
1730 int btrfs_merge_path(struct btrfs_trans_handle
*trans
,
1731 struct btrfs_root
*root
,
1732 struct btrfs_key
*node_keys
,
1733 u64
*nodes
, int lowest_level
)
1735 struct extent_buffer
*eb
;
1736 struct extent_buffer
*parent
;
1737 struct btrfs_key key
;
1746 eb
= btrfs_lock_root_node(root
);
1747 ret
= btrfs_cow_block(trans
, root
, eb
, NULL
, 0, &eb
, 0);
1750 btrfs_set_lock_blocking(eb
);
1754 level
= btrfs_header_level(parent
);
1755 if (level
== 0 || level
<= lowest_level
)
1758 ret
= bin_search(parent
, &node_keys
[lowest_level
], level
,
1760 if (ret
&& slot
> 0)
1763 bytenr
= btrfs_node_blockptr(parent
, slot
);
1764 if (nodes
[level
- 1] == bytenr
)
1767 blocksize
= btrfs_level_size(root
, level
- 1);
1768 generation
= btrfs_node_ptr_generation(parent
, slot
);
1769 btrfs_node_key_to_cpu(eb
, &key
, slot
);
1770 key_match
= !memcmp(&key
, &node_keys
[level
- 1], sizeof(key
));
1772 if (generation
== trans
->transid
) {
1773 eb
= read_tree_block(root
, bytenr
, blocksize
,
1775 btrfs_tree_lock(eb
);
1776 btrfs_set_lock_blocking(eb
);
1780 * if node keys match and node pointer hasn't been modified
1781 * in the running transaction, we can merge the path. for
1782 * blocks owened by reloc trees, the node pointer check is
1783 * skipped, this is because these blocks are fully controlled
1784 * by the space balance code, no one else can modify them.
1786 if (!nodes
[level
- 1] || !key_match
||
1787 (generation
== trans
->transid
&&
1788 btrfs_header_owner(eb
) != BTRFS_TREE_RELOC_OBJECTID
)) {
1789 if (level
== 1 || level
== lowest_level
+ 1) {
1790 if (generation
== trans
->transid
) {
1791 btrfs_tree_unlock(eb
);
1792 free_extent_buffer(eb
);
1797 if (generation
!= trans
->transid
) {
1798 eb
= read_tree_block(root
, bytenr
, blocksize
,
1800 btrfs_tree_lock(eb
);
1801 btrfs_set_lock_blocking(eb
);
1804 ret
= btrfs_cow_block(trans
, root
, eb
, parent
, slot
,
1808 if (root
->root_key
.objectid
==
1809 BTRFS_TREE_RELOC_OBJECTID
) {
1810 if (!nodes
[level
- 1]) {
1811 nodes
[level
- 1] = eb
->start
;
1812 memcpy(&node_keys
[level
- 1], &key
,
1813 sizeof(node_keys
[0]));
1819 btrfs_tree_unlock(parent
);
1820 free_extent_buffer(parent
);
1825 btrfs_set_node_blockptr(parent
, slot
, nodes
[level
- 1]);
1826 btrfs_set_node_ptr_generation(parent
, slot
, trans
->transid
);
1827 btrfs_mark_buffer_dirty(parent
);
1829 ret
= btrfs_inc_extent_ref(trans
, root
,
1831 blocksize
, parent
->start
,
1832 btrfs_header_owner(parent
),
1833 btrfs_header_generation(parent
),
1838 * If the block was created in the running transaction,
1839 * it's possible this is the last reference to it, so we
1840 * should drop the subtree.
1842 if (generation
== trans
->transid
) {
1843 ret
= btrfs_drop_subtree(trans
, root
, eb
, parent
);
1845 btrfs_tree_unlock(eb
);
1846 free_extent_buffer(eb
);
1848 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1849 blocksize
, parent
->start
,
1850 btrfs_header_owner(parent
),
1851 btrfs_header_generation(parent
),
1857 btrfs_tree_unlock(parent
);
1858 free_extent_buffer(parent
);
1863 * adjust the pointers going up the tree, starting at level
1864 * making sure the right key of each node is points to 'key'.
1865 * This is used after shifting pointers to the left, so it stops
1866 * fixing up pointers when a given leaf/node is not in slot 0 of the
1869 * If this fails to write a tree block, it returns -1, but continues
1870 * fixing up the blocks in ram so the tree is consistent.
1872 static int fixup_low_keys(struct btrfs_trans_handle
*trans
,
1873 struct btrfs_root
*root
, struct btrfs_path
*path
,
1874 struct btrfs_disk_key
*key
, int level
)
1878 struct extent_buffer
*t
;
1880 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1881 int tslot
= path
->slots
[i
];
1882 if (!path
->nodes
[i
])
1885 btrfs_set_node_key(t
, key
, tslot
);
1886 btrfs_mark_buffer_dirty(path
->nodes
[i
]);
1896 * This function isn't completely safe. It's the caller's responsibility
1897 * that the new key won't break the order
1899 int btrfs_set_item_key_safe(struct btrfs_trans_handle
*trans
,
1900 struct btrfs_root
*root
, struct btrfs_path
*path
,
1901 struct btrfs_key
*new_key
)
1903 struct btrfs_disk_key disk_key
;
1904 struct extent_buffer
*eb
;
1907 eb
= path
->nodes
[0];
1908 slot
= path
->slots
[0];
1910 btrfs_item_key(eb
, &disk_key
, slot
- 1);
1911 if (comp_keys(&disk_key
, new_key
) >= 0)
1914 if (slot
< btrfs_header_nritems(eb
) - 1) {
1915 btrfs_item_key(eb
, &disk_key
, slot
+ 1);
1916 if (comp_keys(&disk_key
, new_key
) <= 0)
1920 btrfs_cpu_key_to_disk(&disk_key
, new_key
);
1921 btrfs_set_item_key(eb
, &disk_key
, slot
);
1922 btrfs_mark_buffer_dirty(eb
);
1924 fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
1929 * try to push data from one node into the next node left in the
1932 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1933 * error, and > 0 if there was no room in the left hand block.
1935 static int push_node_left(struct btrfs_trans_handle
*trans
,
1936 struct btrfs_root
*root
, struct extent_buffer
*dst
,
1937 struct extent_buffer
*src
, int empty
)
1944 src_nritems
= btrfs_header_nritems(src
);
1945 dst_nritems
= btrfs_header_nritems(dst
);
1946 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
1947 WARN_ON(btrfs_header_generation(src
) != trans
->transid
);
1948 WARN_ON(btrfs_header_generation(dst
) != trans
->transid
);
1950 if (!empty
&& src_nritems
<= 8)
1953 if (push_items
<= 0)
1957 push_items
= min(src_nritems
, push_items
);
1958 if (push_items
< src_nritems
) {
1959 /* leave at least 8 pointers in the node if
1960 * we aren't going to empty it
1962 if (src_nritems
- push_items
< 8) {
1963 if (push_items
<= 8)
1969 push_items
= min(src_nritems
- 8, push_items
);
1971 copy_extent_buffer(dst
, src
,
1972 btrfs_node_key_ptr_offset(dst_nritems
),
1973 btrfs_node_key_ptr_offset(0),
1974 push_items
* sizeof(struct btrfs_key_ptr
));
1976 if (push_items
< src_nritems
) {
1977 memmove_extent_buffer(src
, btrfs_node_key_ptr_offset(0),
1978 btrfs_node_key_ptr_offset(push_items
),
1979 (src_nritems
- push_items
) *
1980 sizeof(struct btrfs_key_ptr
));
1982 btrfs_set_header_nritems(src
, src_nritems
- push_items
);
1983 btrfs_set_header_nritems(dst
, dst_nritems
+ push_items
);
1984 btrfs_mark_buffer_dirty(src
);
1985 btrfs_mark_buffer_dirty(dst
);
1987 ret
= btrfs_update_ref(trans
, root
, src
, dst
, dst_nritems
, push_items
);
1994 * try to push data from one node into the next node right in the
1997 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1998 * error, and > 0 if there was no room in the right hand block.
2000 * this will only push up to 1/2 the contents of the left node over
2002 static int balance_node_right(struct btrfs_trans_handle
*trans
,
2003 struct btrfs_root
*root
,
2004 struct extent_buffer
*dst
,
2005 struct extent_buffer
*src
)
2013 WARN_ON(btrfs_header_generation(src
) != trans
->transid
);
2014 WARN_ON(btrfs_header_generation(dst
) != trans
->transid
);
2016 src_nritems
= btrfs_header_nritems(src
);
2017 dst_nritems
= btrfs_header_nritems(dst
);
2018 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
2019 if (push_items
<= 0)
2022 if (src_nritems
< 4)
2025 max_push
= src_nritems
/ 2 + 1;
2026 /* don't try to empty the node */
2027 if (max_push
>= src_nritems
)
2030 if (max_push
< push_items
)
2031 push_items
= max_push
;
2033 memmove_extent_buffer(dst
, btrfs_node_key_ptr_offset(push_items
),
2034 btrfs_node_key_ptr_offset(0),
2036 sizeof(struct btrfs_key_ptr
));
2038 copy_extent_buffer(dst
, src
,
2039 btrfs_node_key_ptr_offset(0),
2040 btrfs_node_key_ptr_offset(src_nritems
- push_items
),
2041 push_items
* sizeof(struct btrfs_key_ptr
));
2043 btrfs_set_header_nritems(src
, src_nritems
- push_items
);
2044 btrfs_set_header_nritems(dst
, dst_nritems
+ push_items
);
2046 btrfs_mark_buffer_dirty(src
);
2047 btrfs_mark_buffer_dirty(dst
);
2049 ret
= btrfs_update_ref(trans
, root
, src
, dst
, 0, push_items
);
2056 * helper function to insert a new root level in the tree.
2057 * A new node is allocated, and a single item is inserted to
2058 * point to the existing root
2060 * returns zero on success or < 0 on failure.
2062 static noinline
int insert_new_root(struct btrfs_trans_handle
*trans
,
2063 struct btrfs_root
*root
,
2064 struct btrfs_path
*path
, int level
)
2067 struct extent_buffer
*lower
;
2068 struct extent_buffer
*c
;
2069 struct extent_buffer
*old
;
2070 struct btrfs_disk_key lower_key
;
2073 BUG_ON(path
->nodes
[level
]);
2074 BUG_ON(path
->nodes
[level
-1] != root
->node
);
2076 lower
= path
->nodes
[level
-1];
2078 btrfs_item_key(lower
, &lower_key
, 0);
2080 btrfs_node_key(lower
, &lower_key
, 0);
2082 c
= btrfs_alloc_free_block(trans
, root
, root
->nodesize
, 0,
2083 root
->root_key
.objectid
, trans
->transid
,
2084 level
, root
->node
->start
, 0);
2088 memset_extent_buffer(c
, 0, 0, root
->nodesize
);
2089 btrfs_set_header_nritems(c
, 1);
2090 btrfs_set_header_level(c
, level
);
2091 btrfs_set_header_bytenr(c
, c
->start
);
2092 btrfs_set_header_generation(c
, trans
->transid
);
2093 btrfs_set_header_owner(c
, root
->root_key
.objectid
);
2095 write_extent_buffer(c
, root
->fs_info
->fsid
,
2096 (unsigned long)btrfs_header_fsid(c
),
2099 write_extent_buffer(c
, root
->fs_info
->chunk_tree_uuid
,
2100 (unsigned long)btrfs_header_chunk_tree_uuid(c
),
2103 btrfs_set_node_key(c
, &lower_key
, 0);
2104 btrfs_set_node_blockptr(c
, 0, lower
->start
);
2105 lower_gen
= btrfs_header_generation(lower
);
2106 WARN_ON(lower_gen
!= trans
->transid
);
2108 btrfs_set_node_ptr_generation(c
, 0, lower_gen
);
2110 btrfs_mark_buffer_dirty(c
);
2112 spin_lock(&root
->node_lock
);
2115 spin_unlock(&root
->node_lock
);
2117 ret
= btrfs_update_extent_ref(trans
, root
, lower
->start
,
2118 lower
->start
, c
->start
,
2119 root
->root_key
.objectid
,
2120 trans
->transid
, level
- 1);
2123 /* the super has an extra ref to root->node */
2124 free_extent_buffer(old
);
2126 add_root_to_dirty_list(root
);
2127 extent_buffer_get(c
);
2128 path
->nodes
[level
] = c
;
2129 path
->locks
[level
] = 1;
2130 path
->slots
[level
] = 0;
2135 * worker function to insert a single pointer in a node.
2136 * the node should have enough room for the pointer already
2138 * slot and level indicate where you want the key to go, and
2139 * blocknr is the block the key points to.
2141 * returns zero on success and < 0 on any error
2143 static int insert_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
2144 *root
, struct btrfs_path
*path
, struct btrfs_disk_key
2145 *key
, u64 bytenr
, int slot
, int level
)
2147 struct extent_buffer
*lower
;
2150 BUG_ON(!path
->nodes
[level
]);
2151 lower
= path
->nodes
[level
];
2152 nritems
= btrfs_header_nritems(lower
);
2155 if (nritems
== BTRFS_NODEPTRS_PER_BLOCK(root
))
2157 if (slot
!= nritems
) {
2158 memmove_extent_buffer(lower
,
2159 btrfs_node_key_ptr_offset(slot
+ 1),
2160 btrfs_node_key_ptr_offset(slot
),
2161 (nritems
- slot
) * sizeof(struct btrfs_key_ptr
));
2163 btrfs_set_node_key(lower
, key
, slot
);
2164 btrfs_set_node_blockptr(lower
, slot
, bytenr
);
2165 WARN_ON(trans
->transid
== 0);
2166 btrfs_set_node_ptr_generation(lower
, slot
, trans
->transid
);
2167 btrfs_set_header_nritems(lower
, nritems
+ 1);
2168 btrfs_mark_buffer_dirty(lower
);
2173 * split the node at the specified level in path in two.
2174 * The path is corrected to point to the appropriate node after the split
2176 * Before splitting this tries to make some room in the node by pushing
2177 * left and right, if either one works, it returns right away.
2179 * returns 0 on success and < 0 on failure
2181 static noinline
int split_node(struct btrfs_trans_handle
*trans
,
2182 struct btrfs_root
*root
,
2183 struct btrfs_path
*path
, int level
)
2185 struct extent_buffer
*c
;
2186 struct extent_buffer
*split
;
2187 struct btrfs_disk_key disk_key
;
2193 c
= path
->nodes
[level
];
2194 WARN_ON(btrfs_header_generation(c
) != trans
->transid
);
2195 if (c
== root
->node
) {
2196 /* trying to split the root, lets make a new one */
2197 ret
= insert_new_root(trans
, root
, path
, level
+ 1);
2201 ret
= push_nodes_for_insert(trans
, root
, path
, level
);
2202 c
= path
->nodes
[level
];
2203 if (!ret
&& btrfs_header_nritems(c
) <
2204 BTRFS_NODEPTRS_PER_BLOCK(root
) - 3)
2210 c_nritems
= btrfs_header_nritems(c
);
2212 split
= btrfs_alloc_free_block(trans
, root
, root
->nodesize
,
2213 path
->nodes
[level
+ 1]->start
,
2214 root
->root_key
.objectid
,
2215 trans
->transid
, level
, c
->start
, 0);
2217 return PTR_ERR(split
);
2219 btrfs_set_header_flags(split
, btrfs_header_flags(c
));
2220 btrfs_set_header_level(split
, btrfs_header_level(c
));
2221 btrfs_set_header_bytenr(split
, split
->start
);
2222 btrfs_set_header_generation(split
, trans
->transid
);
2223 btrfs_set_header_owner(split
, root
->root_key
.objectid
);
2224 btrfs_set_header_flags(split
, 0);
2225 write_extent_buffer(split
, root
->fs_info
->fsid
,
2226 (unsigned long)btrfs_header_fsid(split
),
2228 write_extent_buffer(split
, root
->fs_info
->chunk_tree_uuid
,
2229 (unsigned long)btrfs_header_chunk_tree_uuid(split
),
2232 mid
= (c_nritems
+ 1) / 2;
2234 copy_extent_buffer(split
, c
,
2235 btrfs_node_key_ptr_offset(0),
2236 btrfs_node_key_ptr_offset(mid
),
2237 (c_nritems
- mid
) * sizeof(struct btrfs_key_ptr
));
2238 btrfs_set_header_nritems(split
, c_nritems
- mid
);
2239 btrfs_set_header_nritems(c
, mid
);
2242 btrfs_mark_buffer_dirty(c
);
2243 btrfs_mark_buffer_dirty(split
);
2245 btrfs_node_key(split
, &disk_key
, 0);
2246 wret
= insert_ptr(trans
, root
, path
, &disk_key
, split
->start
,
2247 path
->slots
[level
+ 1] + 1,
2252 ret
= btrfs_update_ref(trans
, root
, c
, split
, 0, c_nritems
- mid
);
2255 if (path
->slots
[level
] >= mid
) {
2256 path
->slots
[level
] -= mid
;
2257 btrfs_tree_unlock(c
);
2258 free_extent_buffer(c
);
2259 path
->nodes
[level
] = split
;
2260 path
->slots
[level
+ 1] += 1;
2262 btrfs_tree_unlock(split
);
2263 free_extent_buffer(split
);
2269 * how many bytes are required to store the items in a leaf. start
2270 * and nr indicate which items in the leaf to check. This totals up the
2271 * space used both by the item structs and the item data
2273 static int leaf_space_used(struct extent_buffer
*l
, int start
, int nr
)
2276 int nritems
= btrfs_header_nritems(l
);
2277 int end
= min(nritems
, start
+ nr
) - 1;
2281 data_len
= btrfs_item_end_nr(l
, start
);
2282 data_len
= data_len
- btrfs_item_offset_nr(l
, end
);
2283 data_len
+= sizeof(struct btrfs_item
) * nr
;
2284 WARN_ON(data_len
< 0);
2289 * The space between the end of the leaf items and
2290 * the start of the leaf data. IOW, how much room
2291 * the leaf has left for both items and data
2293 noinline
int btrfs_leaf_free_space(struct btrfs_root
*root
,
2294 struct extent_buffer
*leaf
)
2296 int nritems
= btrfs_header_nritems(leaf
);
2298 ret
= BTRFS_LEAF_DATA_SIZE(root
) - leaf_space_used(leaf
, 0, nritems
);
2300 printk(KERN_CRIT
"leaf free space ret %d, leaf data size %lu, "
2301 "used %d nritems %d\n",
2302 ret
, (unsigned long) BTRFS_LEAF_DATA_SIZE(root
),
2303 leaf_space_used(leaf
, 0, nritems
), nritems
);
2309 * push some data in the path leaf to the right, trying to free up at
2310 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2312 * returns 1 if the push failed because the other node didn't have enough
2313 * room, 0 if everything worked out and < 0 if there were major errors.
2315 static int push_leaf_right(struct btrfs_trans_handle
*trans
, struct btrfs_root
2316 *root
, struct btrfs_path
*path
, int data_size
,
2319 struct extent_buffer
*left
= path
->nodes
[0];
2320 struct extent_buffer
*right
;
2321 struct extent_buffer
*upper
;
2322 struct btrfs_disk_key disk_key
;
2328 struct btrfs_item
*item
;
2336 slot
= path
->slots
[1];
2337 if (!path
->nodes
[1])
2340 upper
= path
->nodes
[1];
2341 if (slot
>= btrfs_header_nritems(upper
) - 1)
2344 WARN_ON(!btrfs_tree_locked(path
->nodes
[1]));
2346 right
= read_node_slot(root
, upper
, slot
+ 1);
2347 btrfs_tree_lock(right
);
2348 btrfs_set_lock_blocking(right
);
2350 free_space
= btrfs_leaf_free_space(root
, right
);
2351 if (free_space
< data_size
)
2354 /* cow and double check */
2355 ret
= btrfs_cow_block(trans
, root
, right
, upper
,
2356 slot
+ 1, &right
, 0);
2360 free_space
= btrfs_leaf_free_space(root
, right
);
2361 if (free_space
< data_size
)
2364 left_nritems
= btrfs_header_nritems(left
);
2365 if (left_nritems
== 0)
2373 if (path
->slots
[0] >= left_nritems
)
2374 push_space
+= data_size
;
2376 i
= left_nritems
- 1;
2378 item
= btrfs_item_nr(left
, i
);
2380 if (!empty
&& push_items
> 0) {
2381 if (path
->slots
[0] > i
)
2383 if (path
->slots
[0] == i
) {
2384 int space
= btrfs_leaf_free_space(root
, left
);
2385 if (space
+ push_space
* 2 > free_space
)
2390 if (path
->slots
[0] == i
)
2391 push_space
+= data_size
;
2393 if (!left
->map_token
) {
2394 map_extent_buffer(left
, (unsigned long)item
,
2395 sizeof(struct btrfs_item
),
2396 &left
->map_token
, &left
->kaddr
,
2397 &left
->map_start
, &left
->map_len
,
2401 this_item_size
= btrfs_item_size(left
, item
);
2402 if (this_item_size
+ sizeof(*item
) + push_space
> free_space
)
2406 push_space
+= this_item_size
+ sizeof(*item
);
2411 if (left
->map_token
) {
2412 unmap_extent_buffer(left
, left
->map_token
, KM_USER1
);
2413 left
->map_token
= NULL
;
2416 if (push_items
== 0)
2419 if (!empty
&& push_items
== left_nritems
)
2422 /* push left to right */
2423 right_nritems
= btrfs_header_nritems(right
);
2425 push_space
= btrfs_item_end_nr(left
, left_nritems
- push_items
);
2426 push_space
-= leaf_data_end(root
, left
);
2428 /* make room in the right data area */
2429 data_end
= leaf_data_end(root
, right
);
2430 memmove_extent_buffer(right
,
2431 btrfs_leaf_data(right
) + data_end
- push_space
,
2432 btrfs_leaf_data(right
) + data_end
,
2433 BTRFS_LEAF_DATA_SIZE(root
) - data_end
);
2435 /* copy from the left data area */
2436 copy_extent_buffer(right
, left
, btrfs_leaf_data(right
) +
2437 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
2438 btrfs_leaf_data(left
) + leaf_data_end(root
, left
),
2441 memmove_extent_buffer(right
, btrfs_item_nr_offset(push_items
),
2442 btrfs_item_nr_offset(0),
2443 right_nritems
* sizeof(struct btrfs_item
));
2445 /* copy the items from left to right */
2446 copy_extent_buffer(right
, left
, btrfs_item_nr_offset(0),
2447 btrfs_item_nr_offset(left_nritems
- push_items
),
2448 push_items
* sizeof(struct btrfs_item
));
2450 /* update the item pointers */
2451 right_nritems
+= push_items
;
2452 btrfs_set_header_nritems(right
, right_nritems
);
2453 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
2454 for (i
= 0; i
< right_nritems
; i
++) {
2455 item
= btrfs_item_nr(right
, i
);
2456 if (!right
->map_token
) {
2457 map_extent_buffer(right
, (unsigned long)item
,
2458 sizeof(struct btrfs_item
),
2459 &right
->map_token
, &right
->kaddr
,
2460 &right
->map_start
, &right
->map_len
,
2463 push_space
-= btrfs_item_size(right
, item
);
2464 btrfs_set_item_offset(right
, item
, push_space
);
2467 if (right
->map_token
) {
2468 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2469 right
->map_token
= NULL
;
2471 left_nritems
-= push_items
;
2472 btrfs_set_header_nritems(left
, left_nritems
);
2475 btrfs_mark_buffer_dirty(left
);
2476 btrfs_mark_buffer_dirty(right
);
2478 ret
= btrfs_update_ref(trans
, root
, left
, right
, 0, push_items
);
2481 btrfs_item_key(right
, &disk_key
, 0);
2482 btrfs_set_node_key(upper
, &disk_key
, slot
+ 1);
2483 btrfs_mark_buffer_dirty(upper
);
2485 /* then fixup the leaf pointer in the path */
2486 if (path
->slots
[0] >= left_nritems
) {
2487 path
->slots
[0] -= left_nritems
;
2488 if (btrfs_header_nritems(path
->nodes
[0]) == 0)
2489 clean_tree_block(trans
, root
, path
->nodes
[0]);
2490 btrfs_tree_unlock(path
->nodes
[0]);
2491 free_extent_buffer(path
->nodes
[0]);
2492 path
->nodes
[0] = right
;
2493 path
->slots
[1] += 1;
2495 btrfs_tree_unlock(right
);
2496 free_extent_buffer(right
);
2501 btrfs_tree_unlock(right
);
2502 free_extent_buffer(right
);
2507 * push some data in the path leaf to the left, trying to free up at
2508 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2510 static int push_leaf_left(struct btrfs_trans_handle
*trans
, struct btrfs_root
2511 *root
, struct btrfs_path
*path
, int data_size
,
2514 struct btrfs_disk_key disk_key
;
2515 struct extent_buffer
*right
= path
->nodes
[0];
2516 struct extent_buffer
*left
;
2522 struct btrfs_item
*item
;
2523 u32 old_left_nritems
;
2529 u32 old_left_item_size
;
2531 slot
= path
->slots
[1];
2534 if (!path
->nodes
[1])
2537 right_nritems
= btrfs_header_nritems(right
);
2538 if (right_nritems
== 0)
2541 WARN_ON(!btrfs_tree_locked(path
->nodes
[1]));
2543 left
= read_node_slot(root
, path
->nodes
[1], slot
- 1);
2544 btrfs_tree_lock(left
);
2545 btrfs_set_lock_blocking(left
);
2547 free_space
= btrfs_leaf_free_space(root
, left
);
2548 if (free_space
< data_size
) {
2553 /* cow and double check */
2554 ret
= btrfs_cow_block(trans
, root
, left
,
2555 path
->nodes
[1], slot
- 1, &left
, 0);
2557 /* we hit -ENOSPC, but it isn't fatal here */
2562 free_space
= btrfs_leaf_free_space(root
, left
);
2563 if (free_space
< data_size
) {
2571 nr
= right_nritems
- 1;
2573 for (i
= 0; i
< nr
; i
++) {
2574 item
= btrfs_item_nr(right
, i
);
2575 if (!right
->map_token
) {
2576 map_extent_buffer(right
, (unsigned long)item
,
2577 sizeof(struct btrfs_item
),
2578 &right
->map_token
, &right
->kaddr
,
2579 &right
->map_start
, &right
->map_len
,
2583 if (!empty
&& push_items
> 0) {
2584 if (path
->slots
[0] < i
)
2586 if (path
->slots
[0] == i
) {
2587 int space
= btrfs_leaf_free_space(root
, right
);
2588 if (space
+ push_space
* 2 > free_space
)
2593 if (path
->slots
[0] == i
)
2594 push_space
+= data_size
;
2596 this_item_size
= btrfs_item_size(right
, item
);
2597 if (this_item_size
+ sizeof(*item
) + push_space
> free_space
)
2601 push_space
+= this_item_size
+ sizeof(*item
);
2604 if (right
->map_token
) {
2605 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2606 right
->map_token
= NULL
;
2609 if (push_items
== 0) {
2613 if (!empty
&& push_items
== btrfs_header_nritems(right
))
2616 /* push data from right to left */
2617 copy_extent_buffer(left
, right
,
2618 btrfs_item_nr_offset(btrfs_header_nritems(left
)),
2619 btrfs_item_nr_offset(0),
2620 push_items
* sizeof(struct btrfs_item
));
2622 push_space
= BTRFS_LEAF_DATA_SIZE(root
) -
2623 btrfs_item_offset_nr(right
, push_items
- 1);
2625 copy_extent_buffer(left
, right
, btrfs_leaf_data(left
) +
2626 leaf_data_end(root
, left
) - push_space
,
2627 btrfs_leaf_data(right
) +
2628 btrfs_item_offset_nr(right
, push_items
- 1),
2630 old_left_nritems
= btrfs_header_nritems(left
);
2631 BUG_ON(old_left_nritems
<= 0);
2633 old_left_item_size
= btrfs_item_offset_nr(left
, old_left_nritems
- 1);
2634 for (i
= old_left_nritems
; i
< old_left_nritems
+ push_items
; i
++) {
2637 item
= btrfs_item_nr(left
, i
);
2638 if (!left
->map_token
) {
2639 map_extent_buffer(left
, (unsigned long)item
,
2640 sizeof(struct btrfs_item
),
2641 &left
->map_token
, &left
->kaddr
,
2642 &left
->map_start
, &left
->map_len
,
2646 ioff
= btrfs_item_offset(left
, item
);
2647 btrfs_set_item_offset(left
, item
,
2648 ioff
- (BTRFS_LEAF_DATA_SIZE(root
) - old_left_item_size
));
2650 btrfs_set_header_nritems(left
, old_left_nritems
+ push_items
);
2651 if (left
->map_token
) {
2652 unmap_extent_buffer(left
, left
->map_token
, KM_USER1
);
2653 left
->map_token
= NULL
;
2656 /* fixup right node */
2657 if (push_items
> right_nritems
) {
2658 printk(KERN_CRIT
"push items %d nr %u\n", push_items
,
2663 if (push_items
< right_nritems
) {
2664 push_space
= btrfs_item_offset_nr(right
, push_items
- 1) -
2665 leaf_data_end(root
, right
);
2666 memmove_extent_buffer(right
, btrfs_leaf_data(right
) +
2667 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
2668 btrfs_leaf_data(right
) +
2669 leaf_data_end(root
, right
), push_space
);
2671 memmove_extent_buffer(right
, btrfs_item_nr_offset(0),
2672 btrfs_item_nr_offset(push_items
),
2673 (btrfs_header_nritems(right
) - push_items
) *
2674 sizeof(struct btrfs_item
));
2676 right_nritems
-= push_items
;
2677 btrfs_set_header_nritems(right
, right_nritems
);
2678 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
2679 for (i
= 0; i
< right_nritems
; i
++) {
2680 item
= btrfs_item_nr(right
, i
);
2682 if (!right
->map_token
) {
2683 map_extent_buffer(right
, (unsigned long)item
,
2684 sizeof(struct btrfs_item
),
2685 &right
->map_token
, &right
->kaddr
,
2686 &right
->map_start
, &right
->map_len
,
2690 push_space
= push_space
- btrfs_item_size(right
, item
);
2691 btrfs_set_item_offset(right
, item
, push_space
);
2693 if (right
->map_token
) {
2694 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2695 right
->map_token
= NULL
;
2698 btrfs_mark_buffer_dirty(left
);
2700 btrfs_mark_buffer_dirty(right
);
2702 ret
= btrfs_update_ref(trans
, root
, right
, left
,
2703 old_left_nritems
, push_items
);
2706 btrfs_item_key(right
, &disk_key
, 0);
2707 wret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
2711 /* then fixup the leaf pointer in the path */
2712 if (path
->slots
[0] < push_items
) {
2713 path
->slots
[0] += old_left_nritems
;
2714 if (btrfs_header_nritems(path
->nodes
[0]) == 0)
2715 clean_tree_block(trans
, root
, path
->nodes
[0]);
2716 btrfs_tree_unlock(path
->nodes
[0]);
2717 free_extent_buffer(path
->nodes
[0]);
2718 path
->nodes
[0] = left
;
2719 path
->slots
[1] -= 1;
2721 btrfs_tree_unlock(left
);
2722 free_extent_buffer(left
);
2723 path
->slots
[0] -= push_items
;
2725 BUG_ON(path
->slots
[0] < 0);
2728 btrfs_tree_unlock(left
);
2729 free_extent_buffer(left
);
2734 * split the path's leaf in two, making sure there is at least data_size
2735 * available for the resulting leaf level of the path.
2737 * returns 0 if all went well and < 0 on failure.
2739 static noinline
int split_leaf(struct btrfs_trans_handle
*trans
,
2740 struct btrfs_root
*root
,
2741 struct btrfs_key
*ins_key
,
2742 struct btrfs_path
*path
, int data_size
,
2745 struct extent_buffer
*l
;
2749 struct extent_buffer
*right
;
2756 int num_doubles
= 0;
2757 struct btrfs_disk_key disk_key
;
2759 /* first try to make some room by pushing left and right */
2760 if (data_size
&& ins_key
->type
!= BTRFS_DIR_ITEM_KEY
) {
2761 wret
= push_leaf_right(trans
, root
, path
, data_size
, 0);
2765 wret
= push_leaf_left(trans
, root
, path
, data_size
, 0);
2771 /* did the pushes work? */
2772 if (btrfs_leaf_free_space(root
, l
) >= data_size
)
2776 if (!path
->nodes
[1]) {
2777 ret
= insert_new_root(trans
, root
, path
, 1);
2784 slot
= path
->slots
[0];
2785 nritems
= btrfs_header_nritems(l
);
2786 mid
= (nritems
+ 1) / 2;
2788 right
= btrfs_alloc_free_block(trans
, root
, root
->leafsize
,
2789 path
->nodes
[1]->start
,
2790 root
->root_key
.objectid
,
2791 trans
->transid
, 0, l
->start
, 0);
2792 if (IS_ERR(right
)) {
2794 return PTR_ERR(right
);
2797 memset_extent_buffer(right
, 0, 0, sizeof(struct btrfs_header
));
2798 btrfs_set_header_bytenr(right
, right
->start
);
2799 btrfs_set_header_generation(right
, trans
->transid
);
2800 btrfs_set_header_owner(right
, root
->root_key
.objectid
);
2801 btrfs_set_header_level(right
, 0);
2802 write_extent_buffer(right
, root
->fs_info
->fsid
,
2803 (unsigned long)btrfs_header_fsid(right
),
2806 write_extent_buffer(right
, root
->fs_info
->chunk_tree_uuid
,
2807 (unsigned long)btrfs_header_chunk_tree_uuid(right
),
2811 leaf_space_used(l
, mid
, nritems
- mid
) + data_size
>
2812 BTRFS_LEAF_DATA_SIZE(root
)) {
2813 if (slot
>= nritems
) {
2814 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
2815 btrfs_set_header_nritems(right
, 0);
2816 wret
= insert_ptr(trans
, root
, path
,
2817 &disk_key
, right
->start
,
2818 path
->slots
[1] + 1, 1);
2822 btrfs_tree_unlock(path
->nodes
[0]);
2823 free_extent_buffer(path
->nodes
[0]);
2824 path
->nodes
[0] = right
;
2826 path
->slots
[1] += 1;
2827 btrfs_mark_buffer_dirty(right
);
2831 if (mid
!= nritems
&&
2832 leaf_space_used(l
, mid
, nritems
- mid
) +
2833 data_size
> BTRFS_LEAF_DATA_SIZE(root
)) {
2838 if (leaf_space_used(l
, 0, mid
) + data_size
>
2839 BTRFS_LEAF_DATA_SIZE(root
)) {
2840 if (!extend
&& data_size
&& slot
== 0) {
2841 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
2842 btrfs_set_header_nritems(right
, 0);
2843 wret
= insert_ptr(trans
, root
, path
,
2849 btrfs_tree_unlock(path
->nodes
[0]);
2850 free_extent_buffer(path
->nodes
[0]);
2851 path
->nodes
[0] = right
;
2853 if (path
->slots
[1] == 0) {
2854 wret
= fixup_low_keys(trans
, root
,
2855 path
, &disk_key
, 1);
2859 btrfs_mark_buffer_dirty(right
);
2861 } else if ((extend
|| !data_size
) && slot
== 0) {
2865 if (mid
!= nritems
&&
2866 leaf_space_used(l
, mid
, nritems
- mid
) +
2867 data_size
> BTRFS_LEAF_DATA_SIZE(root
)) {
2873 nritems
= nritems
- mid
;
2874 btrfs_set_header_nritems(right
, nritems
);
2875 data_copy_size
= btrfs_item_end_nr(l
, mid
) - leaf_data_end(root
, l
);
2877 copy_extent_buffer(right
, l
, btrfs_item_nr_offset(0),
2878 btrfs_item_nr_offset(mid
),
2879 nritems
* sizeof(struct btrfs_item
));
2881 copy_extent_buffer(right
, l
,
2882 btrfs_leaf_data(right
) + BTRFS_LEAF_DATA_SIZE(root
) -
2883 data_copy_size
, btrfs_leaf_data(l
) +
2884 leaf_data_end(root
, l
), data_copy_size
);
2886 rt_data_off
= BTRFS_LEAF_DATA_SIZE(root
) -
2887 btrfs_item_end_nr(l
, mid
);
2889 for (i
= 0; i
< nritems
; i
++) {
2890 struct btrfs_item
*item
= btrfs_item_nr(right
, i
);
2893 if (!right
->map_token
) {
2894 map_extent_buffer(right
, (unsigned long)item
,
2895 sizeof(struct btrfs_item
),
2896 &right
->map_token
, &right
->kaddr
,
2897 &right
->map_start
, &right
->map_len
,
2901 ioff
= btrfs_item_offset(right
, item
);
2902 btrfs_set_item_offset(right
, item
, ioff
+ rt_data_off
);
2905 if (right
->map_token
) {
2906 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2907 right
->map_token
= NULL
;
2910 btrfs_set_header_nritems(l
, mid
);
2912 btrfs_item_key(right
, &disk_key
, 0);
2913 wret
= insert_ptr(trans
, root
, path
, &disk_key
, right
->start
,
2914 path
->slots
[1] + 1, 1);
2918 btrfs_mark_buffer_dirty(right
);
2919 btrfs_mark_buffer_dirty(l
);
2920 BUG_ON(path
->slots
[0] != slot
);
2922 ret
= btrfs_update_ref(trans
, root
, l
, right
, 0, nritems
);
2926 btrfs_tree_unlock(path
->nodes
[0]);
2927 free_extent_buffer(path
->nodes
[0]);
2928 path
->nodes
[0] = right
;
2929 path
->slots
[0] -= mid
;
2930 path
->slots
[1] += 1;
2932 btrfs_tree_unlock(right
);
2933 free_extent_buffer(right
);
2936 BUG_ON(path
->slots
[0] < 0);
2939 BUG_ON(num_doubles
!= 0);
2947 * This function splits a single item into two items,
2948 * giving 'new_key' to the new item and splitting the
2949 * old one at split_offset (from the start of the item).
2951 * The path may be released by this operation. After
2952 * the split, the path is pointing to the old item. The
2953 * new item is going to be in the same node as the old one.
2955 * Note, the item being split must be smaller enough to live alone on
2956 * a tree block with room for one extra struct btrfs_item
2958 * This allows us to split the item in place, keeping a lock on the
2959 * leaf the entire time.
2961 int btrfs_split_item(struct btrfs_trans_handle
*trans
,
2962 struct btrfs_root
*root
,
2963 struct btrfs_path
*path
,
2964 struct btrfs_key
*new_key
,
2965 unsigned long split_offset
)
2968 struct extent_buffer
*leaf
;
2969 struct btrfs_key orig_key
;
2970 struct btrfs_item
*item
;
2971 struct btrfs_item
*new_item
;
2976 struct btrfs_disk_key disk_key
;
2979 leaf
= path
->nodes
[0];
2980 btrfs_item_key_to_cpu(leaf
, &orig_key
, path
->slots
[0]);
2981 if (btrfs_leaf_free_space(root
, leaf
) >= sizeof(struct btrfs_item
))
2984 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
2985 btrfs_release_path(root
, path
);
2987 path
->search_for_split
= 1;
2988 path
->keep_locks
= 1;
2990 ret
= btrfs_search_slot(trans
, root
, &orig_key
, path
, 0, 1);
2991 path
->search_for_split
= 0;
2993 /* if our item isn't there or got smaller, return now */
2994 if (ret
!= 0 || item_size
!= btrfs_item_size_nr(path
->nodes
[0],
2996 path
->keep_locks
= 0;
3000 ret
= split_leaf(trans
, root
, &orig_key
, path
,
3001 sizeof(struct btrfs_item
), 1);
3002 path
->keep_locks
= 0;
3006 * make sure any changes to the path from split_leaf leave it
3007 * in a blocking state
3009 btrfs_set_path_blocking(path
);
3011 leaf
= path
->nodes
[0];
3012 BUG_ON(btrfs_leaf_free_space(root
, leaf
) < sizeof(struct btrfs_item
));
3015 item
= btrfs_item_nr(leaf
, path
->slots
[0]);
3016 orig_offset
= btrfs_item_offset(leaf
, item
);
3017 item_size
= btrfs_item_size(leaf
, item
);
3020 buf
= kmalloc(item_size
, GFP_NOFS
);
3021 read_extent_buffer(leaf
, buf
, btrfs_item_ptr_offset(leaf
,
3022 path
->slots
[0]), item_size
);
3023 slot
= path
->slots
[0] + 1;
3024 leaf
= path
->nodes
[0];
3026 nritems
= btrfs_header_nritems(leaf
);
3028 if (slot
!= nritems
) {
3029 /* shift the items */
3030 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ 1),
3031 btrfs_item_nr_offset(slot
),
3032 (nritems
- slot
) * sizeof(struct btrfs_item
));
3036 btrfs_cpu_key_to_disk(&disk_key
, new_key
);
3037 btrfs_set_item_key(leaf
, &disk_key
, slot
);
3039 new_item
= btrfs_item_nr(leaf
, slot
);
3041 btrfs_set_item_offset(leaf
, new_item
, orig_offset
);
3042 btrfs_set_item_size(leaf
, new_item
, item_size
- split_offset
);
3044 btrfs_set_item_offset(leaf
, item
,
3045 orig_offset
+ item_size
- split_offset
);
3046 btrfs_set_item_size(leaf
, item
, split_offset
);
3048 btrfs_set_header_nritems(leaf
, nritems
+ 1);
3050 /* write the data for the start of the original item */
3051 write_extent_buffer(leaf
, buf
,
3052 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
3055 /* write the data for the new item */
3056 write_extent_buffer(leaf
, buf
+ split_offset
,
3057 btrfs_item_ptr_offset(leaf
, slot
),
3058 item_size
- split_offset
);
3059 btrfs_mark_buffer_dirty(leaf
);
3062 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3063 btrfs_print_leaf(root
, leaf
);
3071 * make the item pointed to by the path smaller. new_size indicates
3072 * how small to make it, and from_end tells us if we just chop bytes
3073 * off the end of the item or if we shift the item to chop bytes off
3076 int btrfs_truncate_item(struct btrfs_trans_handle
*trans
,
3077 struct btrfs_root
*root
,
3078 struct btrfs_path
*path
,
3079 u32 new_size
, int from_end
)
3084 struct extent_buffer
*leaf
;
3085 struct btrfs_item
*item
;
3087 unsigned int data_end
;
3088 unsigned int old_data_start
;
3089 unsigned int old_size
;
3090 unsigned int size_diff
;
3093 slot_orig
= path
->slots
[0];
3094 leaf
= path
->nodes
[0];
3095 slot
= path
->slots
[0];
3097 old_size
= btrfs_item_size_nr(leaf
, slot
);
3098 if (old_size
== new_size
)
3101 nritems
= btrfs_header_nritems(leaf
);
3102 data_end
= leaf_data_end(root
, leaf
);
3104 old_data_start
= btrfs_item_offset_nr(leaf
, slot
);
3106 size_diff
= old_size
- new_size
;
3109 BUG_ON(slot
>= nritems
);
3112 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3114 /* first correct the data pointers */
3115 for (i
= slot
; i
< nritems
; i
++) {
3117 item
= btrfs_item_nr(leaf
, i
);
3119 if (!leaf
->map_token
) {
3120 map_extent_buffer(leaf
, (unsigned long)item
,
3121 sizeof(struct btrfs_item
),
3122 &leaf
->map_token
, &leaf
->kaddr
,
3123 &leaf
->map_start
, &leaf
->map_len
,
3127 ioff
= btrfs_item_offset(leaf
, item
);
3128 btrfs_set_item_offset(leaf
, item
, ioff
+ size_diff
);
3131 if (leaf
->map_token
) {
3132 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3133 leaf
->map_token
= NULL
;
3136 /* shift the data */
3138 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3139 data_end
+ size_diff
, btrfs_leaf_data(leaf
) +
3140 data_end
, old_data_start
+ new_size
- data_end
);
3142 struct btrfs_disk_key disk_key
;
3145 btrfs_item_key(leaf
, &disk_key
, slot
);
3147 if (btrfs_disk_key_type(&disk_key
) == BTRFS_EXTENT_DATA_KEY
) {
3149 struct btrfs_file_extent_item
*fi
;
3151 fi
= btrfs_item_ptr(leaf
, slot
,
3152 struct btrfs_file_extent_item
);
3153 fi
= (struct btrfs_file_extent_item
*)(
3154 (unsigned long)fi
- size_diff
);
3156 if (btrfs_file_extent_type(leaf
, fi
) ==
3157 BTRFS_FILE_EXTENT_INLINE
) {
3158 ptr
= btrfs_item_ptr_offset(leaf
, slot
);
3159 memmove_extent_buffer(leaf
, ptr
,
3161 offsetof(struct btrfs_file_extent_item
,
3166 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3167 data_end
+ size_diff
, btrfs_leaf_data(leaf
) +
3168 data_end
, old_data_start
- data_end
);
3170 offset
= btrfs_disk_key_offset(&disk_key
);
3171 btrfs_set_disk_key_offset(&disk_key
, offset
+ size_diff
);
3172 btrfs_set_item_key(leaf
, &disk_key
, slot
);
3174 fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3177 item
= btrfs_item_nr(leaf
, slot
);
3178 btrfs_set_item_size(leaf
, item
, new_size
);
3179 btrfs_mark_buffer_dirty(leaf
);
3182 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3183 btrfs_print_leaf(root
, leaf
);
3190 * make the item pointed to by the path bigger, data_size is the new size.
3192 int btrfs_extend_item(struct btrfs_trans_handle
*trans
,
3193 struct btrfs_root
*root
, struct btrfs_path
*path
,
3199 struct extent_buffer
*leaf
;
3200 struct btrfs_item
*item
;
3202 unsigned int data_end
;
3203 unsigned int old_data
;
3204 unsigned int old_size
;
3207 slot_orig
= path
->slots
[0];
3208 leaf
= path
->nodes
[0];
3210 nritems
= btrfs_header_nritems(leaf
);
3211 data_end
= leaf_data_end(root
, leaf
);
3213 if (btrfs_leaf_free_space(root
, leaf
) < data_size
) {
3214 btrfs_print_leaf(root
, leaf
);
3217 slot
= path
->slots
[0];
3218 old_data
= btrfs_item_end_nr(leaf
, slot
);
3221 if (slot
>= nritems
) {
3222 btrfs_print_leaf(root
, leaf
);
3223 printk(KERN_CRIT
"slot %d too large, nritems %d\n",
3229 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3231 /* first correct the data pointers */
3232 for (i
= slot
; i
< nritems
; i
++) {
3234 item
= btrfs_item_nr(leaf
, i
);
3236 if (!leaf
->map_token
) {
3237 map_extent_buffer(leaf
, (unsigned long)item
,
3238 sizeof(struct btrfs_item
),
3239 &leaf
->map_token
, &leaf
->kaddr
,
3240 &leaf
->map_start
, &leaf
->map_len
,
3243 ioff
= btrfs_item_offset(leaf
, item
);
3244 btrfs_set_item_offset(leaf
, item
, ioff
- data_size
);
3247 if (leaf
->map_token
) {
3248 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3249 leaf
->map_token
= NULL
;
3252 /* shift the data */
3253 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3254 data_end
- data_size
, btrfs_leaf_data(leaf
) +
3255 data_end
, old_data
- data_end
);
3257 data_end
= old_data
;
3258 old_size
= btrfs_item_size_nr(leaf
, slot
);
3259 item
= btrfs_item_nr(leaf
, slot
);
3260 btrfs_set_item_size(leaf
, item
, old_size
+ data_size
);
3261 btrfs_mark_buffer_dirty(leaf
);
3264 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3265 btrfs_print_leaf(root
, leaf
);
3272 * Given a key and some data, insert items into the tree.
3273 * This does all the path init required, making room in the tree if needed.
3274 * Returns the number of keys that were inserted.
3276 int btrfs_insert_some_items(struct btrfs_trans_handle
*trans
,
3277 struct btrfs_root
*root
,
3278 struct btrfs_path
*path
,
3279 struct btrfs_key
*cpu_key
, u32
*data_size
,
3282 struct extent_buffer
*leaf
;
3283 struct btrfs_item
*item
;
3290 unsigned int data_end
;
3291 struct btrfs_disk_key disk_key
;
3292 struct btrfs_key found_key
;
3294 for (i
= 0; i
< nr
; i
++) {
3295 if (total_size
+ data_size
[i
] + sizeof(struct btrfs_item
) >
3296 BTRFS_LEAF_DATA_SIZE(root
)) {
3300 total_data
+= data_size
[i
];
3301 total_size
+= data_size
[i
] + sizeof(struct btrfs_item
);
3305 ret
= btrfs_search_slot(trans
, root
, cpu_key
, path
, total_size
, 1);
3311 leaf
= path
->nodes
[0];
3313 nritems
= btrfs_header_nritems(leaf
);
3314 data_end
= leaf_data_end(root
, leaf
);
3316 if (btrfs_leaf_free_space(root
, leaf
) < total_size
) {
3317 for (i
= nr
; i
>= 0; i
--) {
3318 total_data
-= data_size
[i
];
3319 total_size
-= data_size
[i
] + sizeof(struct btrfs_item
);
3320 if (total_size
< btrfs_leaf_free_space(root
, leaf
))
3326 slot
= path
->slots
[0];
3329 if (slot
!= nritems
) {
3330 unsigned int old_data
= btrfs_item_end_nr(leaf
, slot
);
3332 item
= btrfs_item_nr(leaf
, slot
);
3333 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
3335 /* figure out how many keys we can insert in here */
3336 total_data
= data_size
[0];
3337 for (i
= 1; i
< nr
; i
++) {
3338 if (comp_cpu_keys(&found_key
, cpu_key
+ i
) <= 0)
3340 total_data
+= data_size
[i
];
3344 if (old_data
< data_end
) {
3345 btrfs_print_leaf(root
, leaf
);
3346 printk(KERN_CRIT
"slot %d old_data %d data_end %d\n",
3347 slot
, old_data
, data_end
);
3351 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3353 /* first correct the data pointers */
3354 WARN_ON(leaf
->map_token
);
3355 for (i
= slot
; i
< nritems
; i
++) {
3358 item
= btrfs_item_nr(leaf
, i
);
3359 if (!leaf
->map_token
) {
3360 map_extent_buffer(leaf
, (unsigned long)item
,
3361 sizeof(struct btrfs_item
),
3362 &leaf
->map_token
, &leaf
->kaddr
,
3363 &leaf
->map_start
, &leaf
->map_len
,
3367 ioff
= btrfs_item_offset(leaf
, item
);
3368 btrfs_set_item_offset(leaf
, item
, ioff
- total_data
);
3370 if (leaf
->map_token
) {
3371 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3372 leaf
->map_token
= NULL
;
3375 /* shift the items */
3376 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ nr
),
3377 btrfs_item_nr_offset(slot
),
3378 (nritems
- slot
) * sizeof(struct btrfs_item
));
3380 /* shift the data */
3381 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3382 data_end
- total_data
, btrfs_leaf_data(leaf
) +
3383 data_end
, old_data
- data_end
);
3384 data_end
= old_data
;
3387 * this sucks but it has to be done, if we are inserting at
3388 * the end of the leaf only insert 1 of the items, since we
3389 * have no way of knowing whats on the next leaf and we'd have
3390 * to drop our current locks to figure it out
3395 /* setup the item for the new data */
3396 for (i
= 0; i
< nr
; i
++) {
3397 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
+ i
);
3398 btrfs_set_item_key(leaf
, &disk_key
, slot
+ i
);
3399 item
= btrfs_item_nr(leaf
, slot
+ i
);
3400 btrfs_set_item_offset(leaf
, item
, data_end
- data_size
[i
]);
3401 data_end
-= data_size
[i
];
3402 btrfs_set_item_size(leaf
, item
, data_size
[i
]);
3404 btrfs_set_header_nritems(leaf
, nritems
+ nr
);
3405 btrfs_mark_buffer_dirty(leaf
);
3409 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
3410 ret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3413 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3414 btrfs_print_leaf(root
, leaf
);
3424 * Given a key and some data, insert items into the tree.
3425 * This does all the path init required, making room in the tree if needed.
3427 int btrfs_insert_empty_items(struct btrfs_trans_handle
*trans
,
3428 struct btrfs_root
*root
,
3429 struct btrfs_path
*path
,
3430 struct btrfs_key
*cpu_key
, u32
*data_size
,
3433 struct extent_buffer
*leaf
;
3434 struct btrfs_item
*item
;
3442 unsigned int data_end
;
3443 struct btrfs_disk_key disk_key
;
3445 for (i
= 0; i
< nr
; i
++)
3446 total_data
+= data_size
[i
];
3448 total_size
= total_data
+ (nr
* sizeof(struct btrfs_item
));
3449 ret
= btrfs_search_slot(trans
, root
, cpu_key
, path
, total_size
, 1);
3455 slot_orig
= path
->slots
[0];
3456 leaf
= path
->nodes
[0];
3458 nritems
= btrfs_header_nritems(leaf
);
3459 data_end
= leaf_data_end(root
, leaf
);
3461 if (btrfs_leaf_free_space(root
, leaf
) < total_size
) {
3462 btrfs_print_leaf(root
, leaf
);
3463 printk(KERN_CRIT
"not enough freespace need %u have %d\n",
3464 total_size
, btrfs_leaf_free_space(root
, leaf
));
3468 slot
= path
->slots
[0];
3471 if (slot
!= nritems
) {
3472 unsigned int old_data
= btrfs_item_end_nr(leaf
, slot
);
3474 if (old_data
< data_end
) {
3475 btrfs_print_leaf(root
, leaf
);
3476 printk(KERN_CRIT
"slot %d old_data %d data_end %d\n",
3477 slot
, old_data
, data_end
);
3481 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3483 /* first correct the data pointers */
3484 WARN_ON(leaf
->map_token
);
3485 for (i
= slot
; i
< nritems
; i
++) {
3488 item
= btrfs_item_nr(leaf
, i
);
3489 if (!leaf
->map_token
) {
3490 map_extent_buffer(leaf
, (unsigned long)item
,
3491 sizeof(struct btrfs_item
),
3492 &leaf
->map_token
, &leaf
->kaddr
,
3493 &leaf
->map_start
, &leaf
->map_len
,
3497 ioff
= btrfs_item_offset(leaf
, item
);
3498 btrfs_set_item_offset(leaf
, item
, ioff
- total_data
);
3500 if (leaf
->map_token
) {
3501 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3502 leaf
->map_token
= NULL
;
3505 /* shift the items */
3506 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ nr
),
3507 btrfs_item_nr_offset(slot
),
3508 (nritems
- slot
) * sizeof(struct btrfs_item
));
3510 /* shift the data */
3511 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3512 data_end
- total_data
, btrfs_leaf_data(leaf
) +
3513 data_end
, old_data
- data_end
);
3514 data_end
= old_data
;
3517 /* setup the item for the new data */
3518 for (i
= 0; i
< nr
; i
++) {
3519 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
+ i
);
3520 btrfs_set_item_key(leaf
, &disk_key
, slot
+ i
);
3521 item
= btrfs_item_nr(leaf
, slot
+ i
);
3522 btrfs_set_item_offset(leaf
, item
, data_end
- data_size
[i
]);
3523 data_end
-= data_size
[i
];
3524 btrfs_set_item_size(leaf
, item
, data_size
[i
]);
3526 btrfs_set_header_nritems(leaf
, nritems
+ nr
);
3527 btrfs_mark_buffer_dirty(leaf
);
3531 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
3532 ret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3535 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3536 btrfs_print_leaf(root
, leaf
);
3540 btrfs_unlock_up_safe(path
, 1);
3545 * Given a key and some data, insert an item into the tree.
3546 * This does all the path init required, making room in the tree if needed.
3548 int btrfs_insert_item(struct btrfs_trans_handle
*trans
, struct btrfs_root
3549 *root
, struct btrfs_key
*cpu_key
, void *data
, u32
3553 struct btrfs_path
*path
;
3554 struct extent_buffer
*leaf
;
3557 path
= btrfs_alloc_path();
3559 ret
= btrfs_insert_empty_item(trans
, root
, path
, cpu_key
, data_size
);
3561 leaf
= path
->nodes
[0];
3562 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
3563 write_extent_buffer(leaf
, data
, ptr
, data_size
);
3564 btrfs_mark_buffer_dirty(leaf
);
3566 btrfs_free_path(path
);
3571 * delete the pointer from a given node.
3573 * the tree should have been previously balanced so the deletion does not
3576 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
3577 struct btrfs_path
*path
, int level
, int slot
)
3579 struct extent_buffer
*parent
= path
->nodes
[level
];
3584 nritems
= btrfs_header_nritems(parent
);
3585 if (slot
!= nritems
- 1) {
3586 memmove_extent_buffer(parent
,
3587 btrfs_node_key_ptr_offset(slot
),
3588 btrfs_node_key_ptr_offset(slot
+ 1),
3589 sizeof(struct btrfs_key_ptr
) *
3590 (nritems
- slot
- 1));
3593 btrfs_set_header_nritems(parent
, nritems
);
3594 if (nritems
== 0 && parent
== root
->node
) {
3595 BUG_ON(btrfs_header_level(root
->node
) != 1);
3596 /* just turn the root into a leaf and break */
3597 btrfs_set_header_level(root
->node
, 0);
3598 } else if (slot
== 0) {
3599 struct btrfs_disk_key disk_key
;
3601 btrfs_node_key(parent
, &disk_key
, 0);
3602 wret
= fixup_low_keys(trans
, root
, path
, &disk_key
, level
+ 1);
3606 btrfs_mark_buffer_dirty(parent
);
3611 * a helper function to delete the leaf pointed to by path->slots[1] and
3612 * path->nodes[1]. bytenr is the node block pointer, but since the callers
3613 * already know it, it is faster to have them pass it down than to
3614 * read it out of the node again.
3616 * This deletes the pointer in path->nodes[1] and frees the leaf
3617 * block extent. zero is returned if it all worked out, < 0 otherwise.
3619 * The path must have already been setup for deleting the leaf, including
3620 * all the proper balancing. path->nodes[1] must be locked.
3622 noinline
int btrfs_del_leaf(struct btrfs_trans_handle
*trans
,
3623 struct btrfs_root
*root
,
3624 struct btrfs_path
*path
, u64 bytenr
)
3627 u64 root_gen
= btrfs_header_generation(path
->nodes
[1]);
3628 u64 parent_start
= path
->nodes
[1]->start
;
3629 u64 parent_owner
= btrfs_header_owner(path
->nodes
[1]);
3631 ret
= del_ptr(trans
, root
, path
, 1, path
->slots
[1]);
3636 * btrfs_free_extent is expensive, we want to make sure we
3637 * aren't holding any locks when we call it
3639 btrfs_unlock_up_safe(path
, 0);
3641 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3642 btrfs_level_size(root
, 0),
3643 parent_start
, parent_owner
,
3648 * delete the item at the leaf level in path. If that empties
3649 * the leaf, remove it from the tree
3651 int btrfs_del_items(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
3652 struct btrfs_path
*path
, int slot
, int nr
)
3654 struct extent_buffer
*leaf
;
3655 struct btrfs_item
*item
;
3663 leaf
= path
->nodes
[0];
3664 last_off
= btrfs_item_offset_nr(leaf
, slot
+ nr
- 1);
3666 for (i
= 0; i
< nr
; i
++)
3667 dsize
+= btrfs_item_size_nr(leaf
, slot
+ i
);
3669 nritems
= btrfs_header_nritems(leaf
);
3671 if (slot
+ nr
!= nritems
) {
3672 int data_end
= leaf_data_end(root
, leaf
);
3674 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3676 btrfs_leaf_data(leaf
) + data_end
,
3677 last_off
- data_end
);
3679 for (i
= slot
+ nr
; i
< nritems
; i
++) {
3682 item
= btrfs_item_nr(leaf
, i
);
3683 if (!leaf
->map_token
) {
3684 map_extent_buffer(leaf
, (unsigned long)item
,
3685 sizeof(struct btrfs_item
),
3686 &leaf
->map_token
, &leaf
->kaddr
,
3687 &leaf
->map_start
, &leaf
->map_len
,
3690 ioff
= btrfs_item_offset(leaf
, item
);
3691 btrfs_set_item_offset(leaf
, item
, ioff
+ dsize
);
3694 if (leaf
->map_token
) {
3695 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3696 leaf
->map_token
= NULL
;
3699 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
),
3700 btrfs_item_nr_offset(slot
+ nr
),
3701 sizeof(struct btrfs_item
) *
3702 (nritems
- slot
- nr
));
3704 btrfs_set_header_nritems(leaf
, nritems
- nr
);
3707 /* delete the leaf if we've emptied it */
3709 if (leaf
== root
->node
) {
3710 btrfs_set_header_level(leaf
, 0);
3712 ret
= btrfs_del_leaf(trans
, root
, path
, leaf
->start
);
3716 int used
= leaf_space_used(leaf
, 0, nritems
);
3718 struct btrfs_disk_key disk_key
;
3720 btrfs_item_key(leaf
, &disk_key
, 0);
3721 wret
= fixup_low_keys(trans
, root
, path
,
3727 /* delete the leaf if it is mostly empty */
3728 if (used
< BTRFS_LEAF_DATA_SIZE(root
) / 4) {
3729 /* push_leaf_left fixes the path.
3730 * make sure the path still points to our leaf
3731 * for possible call to del_ptr below
3733 slot
= path
->slots
[1];
3734 extent_buffer_get(leaf
);
3736 wret
= push_leaf_left(trans
, root
, path
, 1, 1);
3737 if (wret
< 0 && wret
!= -ENOSPC
)
3740 if (path
->nodes
[0] == leaf
&&
3741 btrfs_header_nritems(leaf
)) {
3742 wret
= push_leaf_right(trans
, root
, path
, 1, 1);
3743 if (wret
< 0 && wret
!= -ENOSPC
)
3747 if (btrfs_header_nritems(leaf
) == 0) {
3748 path
->slots
[1] = slot
;
3749 ret
= btrfs_del_leaf(trans
, root
, path
,
3752 free_extent_buffer(leaf
);
3754 /* if we're still in the path, make sure
3755 * we're dirty. Otherwise, one of the
3756 * push_leaf functions must have already
3757 * dirtied this buffer
3759 if (path
->nodes
[0] == leaf
)
3760 btrfs_mark_buffer_dirty(leaf
);
3761 free_extent_buffer(leaf
);
3764 btrfs_mark_buffer_dirty(leaf
);
3771 * search the tree again to find a leaf with lesser keys
3772 * returns 0 if it found something or 1 if there are no lesser leaves.
3773 * returns < 0 on io errors.
3775 * This may release the path, and so you may lose any locks held at the
3778 int btrfs_prev_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
)
3780 struct btrfs_key key
;
3781 struct btrfs_disk_key found_key
;
3784 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, 0);
3788 else if (key
.type
> 0)
3790 else if (key
.objectid
> 0)
3795 btrfs_release_path(root
, path
);
3796 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
3799 btrfs_item_key(path
->nodes
[0], &found_key
, 0);
3800 ret
= comp_keys(&found_key
, &key
);
3807 * A helper function to walk down the tree starting at min_key, and looking
3808 * for nodes or leaves that are either in cache or have a minimum
3809 * transaction id. This is used by the btree defrag code, and tree logging
3811 * This does not cow, but it does stuff the starting key it finds back
3812 * into min_key, so you can call btrfs_search_slot with cow=1 on the
3813 * key and get a writable path.
3815 * This does lock as it descends, and path->keep_locks should be set
3816 * to 1 by the caller.
3818 * This honors path->lowest_level to prevent descent past a given level
3821 * min_trans indicates the oldest transaction that you are interested
3822 * in walking through. Any nodes or leaves older than min_trans are
3823 * skipped over (without reading them).
3825 * returns zero if something useful was found, < 0 on error and 1 if there
3826 * was nothing in the tree that matched the search criteria.
3828 int btrfs_search_forward(struct btrfs_root
*root
, struct btrfs_key
*min_key
,
3829 struct btrfs_key
*max_key
,
3830 struct btrfs_path
*path
, int cache_only
,
3833 struct extent_buffer
*cur
;
3834 struct btrfs_key found_key
;
3841 WARN_ON(!path
->keep_locks
);
3843 cur
= btrfs_lock_root_node(root
);
3844 level
= btrfs_header_level(cur
);
3845 WARN_ON(path
->nodes
[level
]);
3846 path
->nodes
[level
] = cur
;
3847 path
->locks
[level
] = 1;
3849 if (btrfs_header_generation(cur
) < min_trans
) {
3854 nritems
= btrfs_header_nritems(cur
);
3855 level
= btrfs_header_level(cur
);
3856 sret
= bin_search(cur
, min_key
, level
, &slot
);
3858 /* at the lowest level, we're done, setup the path and exit */
3859 if (level
== path
->lowest_level
) {
3860 if (slot
>= nritems
)
3863 path
->slots
[level
] = slot
;
3864 btrfs_item_key_to_cpu(cur
, &found_key
, slot
);
3867 if (sret
&& slot
> 0)
3870 * check this node pointer against the cache_only and
3871 * min_trans parameters. If it isn't in cache or is too
3872 * old, skip to the next one.
3874 while (slot
< nritems
) {
3877 struct extent_buffer
*tmp
;
3878 struct btrfs_disk_key disk_key
;
3880 blockptr
= btrfs_node_blockptr(cur
, slot
);
3881 gen
= btrfs_node_ptr_generation(cur
, slot
);
3882 if (gen
< min_trans
) {
3890 btrfs_node_key(cur
, &disk_key
, slot
);
3891 if (comp_keys(&disk_key
, max_key
) >= 0) {
3897 tmp
= btrfs_find_tree_block(root
, blockptr
,
3898 btrfs_level_size(root
, level
- 1));
3900 if (tmp
&& btrfs_buffer_uptodate(tmp
, gen
)) {
3901 free_extent_buffer(tmp
);
3905 free_extent_buffer(tmp
);
3910 * we didn't find a candidate key in this node, walk forward
3911 * and find another one
3913 if (slot
>= nritems
) {
3914 path
->slots
[level
] = slot
;
3915 btrfs_set_path_blocking(path
);
3916 sret
= btrfs_find_next_key(root
, path
, min_key
, level
,
3917 cache_only
, min_trans
);
3919 btrfs_release_path(root
, path
);
3922 btrfs_clear_path_blocking(path
);
3926 /* save our key for returning back */
3927 btrfs_node_key_to_cpu(cur
, &found_key
, slot
);
3928 path
->slots
[level
] = slot
;
3929 if (level
== path
->lowest_level
) {
3931 unlock_up(path
, level
, 1);
3934 btrfs_set_path_blocking(path
);
3935 cur
= read_node_slot(root
, cur
, slot
);
3937 btrfs_tree_lock(cur
);
3939 path
->locks
[level
- 1] = 1;
3940 path
->nodes
[level
- 1] = cur
;
3941 unlock_up(path
, level
, 1);
3942 btrfs_clear_path_blocking(path
);
3946 memcpy(min_key
, &found_key
, sizeof(found_key
));
3947 btrfs_set_path_blocking(path
);
3952 * this is similar to btrfs_next_leaf, but does not try to preserve
3953 * and fixup the path. It looks for and returns the next key in the
3954 * tree based on the current path and the cache_only and min_trans
3957 * 0 is returned if another key is found, < 0 if there are any errors
3958 * and 1 is returned if there are no higher keys in the tree
3960 * path->keep_locks should be set to 1 on the search made before
3961 * calling this function.
3963 int btrfs_find_next_key(struct btrfs_root
*root
, struct btrfs_path
*path
,
3964 struct btrfs_key
*key
, int lowest_level
,
3965 int cache_only
, u64 min_trans
)
3967 int level
= lowest_level
;
3969 struct extent_buffer
*c
;
3971 WARN_ON(!path
->keep_locks
);
3972 while (level
< BTRFS_MAX_LEVEL
) {
3973 if (!path
->nodes
[level
])
3976 slot
= path
->slots
[level
] + 1;
3977 c
= path
->nodes
[level
];
3979 if (slot
>= btrfs_header_nritems(c
)) {
3981 if (level
== BTRFS_MAX_LEVEL
)
3986 btrfs_item_key_to_cpu(c
, key
, slot
);
3988 u64 blockptr
= btrfs_node_blockptr(c
, slot
);
3989 u64 gen
= btrfs_node_ptr_generation(c
, slot
);
3992 struct extent_buffer
*cur
;
3993 cur
= btrfs_find_tree_block(root
, blockptr
,
3994 btrfs_level_size(root
, level
- 1));
3995 if (!cur
|| !btrfs_buffer_uptodate(cur
, gen
)) {
3998 free_extent_buffer(cur
);
4001 free_extent_buffer(cur
);
4003 if (gen
< min_trans
) {
4007 btrfs_node_key_to_cpu(c
, key
, slot
);
4015 * search the tree again to find a leaf with greater keys
4016 * returns 0 if it found something or 1 if there are no greater leaves.
4017 * returns < 0 on io errors.
4019 int btrfs_next_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
)
4023 struct extent_buffer
*c
;
4024 struct extent_buffer
*next
= NULL
;
4025 struct btrfs_key key
;
4029 nritems
= btrfs_header_nritems(path
->nodes
[0]);
4033 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, nritems
- 1);
4035 btrfs_release_path(root
, path
);
4036 path
->keep_locks
= 1;
4037 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
4038 path
->keep_locks
= 0;
4043 btrfs_set_path_blocking(path
);
4044 nritems
= btrfs_header_nritems(path
->nodes
[0]);
4046 * by releasing the path above we dropped all our locks. A balance
4047 * could have added more items next to the key that used to be
4048 * at the very end of the block. So, check again here and
4049 * advance the path if there are now more items available.
4051 if (nritems
> 0 && path
->slots
[0] < nritems
- 1) {
4056 while (level
< BTRFS_MAX_LEVEL
) {
4057 if (!path
->nodes
[level
])
4060 slot
= path
->slots
[level
] + 1;
4061 c
= path
->nodes
[level
];
4062 if (slot
>= btrfs_header_nritems(c
)) {
4064 if (level
== BTRFS_MAX_LEVEL
)
4070 btrfs_tree_unlock(next
);
4071 free_extent_buffer(next
);
4074 /* the path was set to blocking above */
4075 if (level
== 1 && (path
->locks
[1] || path
->skip_locking
) &&
4077 reada_for_search(root
, path
, level
, slot
, 0);
4079 next
= read_node_slot(root
, c
, slot
);
4080 if (!path
->skip_locking
) {
4081 WARN_ON(!btrfs_tree_locked(c
));
4082 btrfs_tree_lock(next
);
4083 btrfs_set_lock_blocking(next
);
4087 path
->slots
[level
] = slot
;
4090 c
= path
->nodes
[level
];
4091 if (path
->locks
[level
])
4092 btrfs_tree_unlock(c
);
4093 free_extent_buffer(c
);
4094 path
->nodes
[level
] = next
;
4095 path
->slots
[level
] = 0;
4096 if (!path
->skip_locking
)
4097 path
->locks
[level
] = 1;
4101 btrfs_set_path_blocking(path
);
4102 if (level
== 1 && path
->locks
[1] && path
->reada
)
4103 reada_for_search(root
, path
, level
, slot
, 0);
4104 next
= read_node_slot(root
, next
, 0);
4105 if (!path
->skip_locking
) {
4106 WARN_ON(!btrfs_tree_locked(path
->nodes
[level
]));
4107 btrfs_tree_lock(next
);
4108 btrfs_set_lock_blocking(next
);
4112 unlock_up(path
, 0, 1);
4117 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4118 * searching until it gets past min_objectid or finds an item of 'type'
4120 * returns 0 if something is found, 1 if nothing was found and < 0 on error
4122 int btrfs_previous_item(struct btrfs_root
*root
,
4123 struct btrfs_path
*path
, u64 min_objectid
,
4126 struct btrfs_key found_key
;
4127 struct extent_buffer
*leaf
;
4132 if (path
->slots
[0] == 0) {
4133 btrfs_set_path_blocking(path
);
4134 ret
= btrfs_prev_leaf(root
, path
);
4140 leaf
= path
->nodes
[0];
4141 nritems
= btrfs_header_nritems(leaf
);
4144 if (path
->slots
[0] == nritems
)
4147 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4148 if (found_key
.type
== type
)
4150 if (found_key
.objectid
< min_objectid
)
4152 if (found_key
.objectid
== min_objectid
&&
4153 found_key
.type
< type
)