2 * Copyright (C) 2007,2008 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
20 #include <linux/slab.h>
23 #include "transaction.h"
24 #include "print-tree.h"
27 static int split_node(struct btrfs_trans_handle
*trans
, struct btrfs_root
28 *root
, struct btrfs_path
*path
, int level
);
29 static int split_leaf(struct btrfs_trans_handle
*trans
, struct btrfs_root
30 *root
, struct btrfs_key
*ins_key
,
31 struct btrfs_path
*path
, int data_size
, int extend
);
32 static int push_node_left(struct btrfs_trans_handle
*trans
,
33 struct btrfs_root
*root
, struct extent_buffer
*dst
,
34 struct extent_buffer
*src
, int empty
);
35 static int balance_node_right(struct btrfs_trans_handle
*trans
,
36 struct btrfs_root
*root
,
37 struct extent_buffer
*dst_buf
,
38 struct extent_buffer
*src_buf
);
39 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
40 struct btrfs_path
*path
, int level
, int slot
);
41 static int setup_items_for_insert(struct btrfs_trans_handle
*trans
,
42 struct btrfs_root
*root
, struct btrfs_path
*path
,
43 struct btrfs_key
*cpu_key
, u32
*data_size
,
44 u32 total_data
, u32 total_size
, int nr
);
47 struct btrfs_path
*btrfs_alloc_path(void)
49 struct btrfs_path
*path
;
50 path
= kmem_cache_zalloc(btrfs_path_cachep
, GFP_NOFS
);
57 * set all locked nodes in the path to blocking locks. This should
58 * be done before scheduling
60 noinline
void btrfs_set_path_blocking(struct btrfs_path
*p
)
63 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
64 if (p
->nodes
[i
] && p
->locks
[i
])
65 btrfs_set_lock_blocking(p
->nodes
[i
]);
70 * reset all the locked nodes in the patch to spinning locks.
72 * held is used to keep lockdep happy, when lockdep is enabled
73 * we set held to a blocking lock before we go around and
74 * retake all the spinlocks in the path. You can safely use NULL
77 noinline
void btrfs_clear_path_blocking(struct btrfs_path
*p
,
78 struct extent_buffer
*held
)
82 #ifdef CONFIG_DEBUG_LOCK_ALLOC
83 /* lockdep really cares that we take all of these spinlocks
84 * in the right order. If any of the locks in the path are not
85 * currently blocking, it is going to complain. So, make really
86 * really sure by forcing the path to blocking before we clear
90 btrfs_set_lock_blocking(held
);
91 btrfs_set_path_blocking(p
);
94 for (i
= BTRFS_MAX_LEVEL
- 1; i
>= 0; i
--) {
95 if (p
->nodes
[i
] && p
->locks
[i
])
96 btrfs_clear_lock_blocking(p
->nodes
[i
]);
99 #ifdef CONFIG_DEBUG_LOCK_ALLOC
101 btrfs_clear_lock_blocking(held
);
105 /* this also releases the path */
106 void btrfs_free_path(struct btrfs_path
*p
)
108 btrfs_release_path(NULL
, p
);
109 kmem_cache_free(btrfs_path_cachep
, p
);
113 * path release drops references on the extent buffers in the path
114 * and it drops any locks held by this path
116 * It is safe to call this on paths that no locks or extent buffers held.
118 noinline
void btrfs_release_path(struct btrfs_root
*root
, struct btrfs_path
*p
)
122 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
127 btrfs_tree_unlock(p
->nodes
[i
]);
130 free_extent_buffer(p
->nodes
[i
]);
136 * safely gets a reference on the root node of a tree. A lock
137 * is not taken, so a concurrent writer may put a different node
138 * at the root of the tree. See btrfs_lock_root_node for the
141 * The extent buffer returned by this has a reference taken, so
142 * it won't disappear. It may stop being the root of the tree
143 * at any time because there are no locks held.
145 struct extent_buffer
*btrfs_root_node(struct btrfs_root
*root
)
147 struct extent_buffer
*eb
;
148 spin_lock(&root
->node_lock
);
150 extent_buffer_get(eb
);
151 spin_unlock(&root
->node_lock
);
155 /* loop around taking references on and locking the root node of the
156 * tree until you end up with a lock on the root. A locked buffer
157 * is returned, with a reference held.
159 struct extent_buffer
*btrfs_lock_root_node(struct btrfs_root
*root
)
161 struct extent_buffer
*eb
;
164 eb
= btrfs_root_node(root
);
167 spin_lock(&root
->node_lock
);
168 if (eb
== root
->node
) {
169 spin_unlock(&root
->node_lock
);
172 spin_unlock(&root
->node_lock
);
174 btrfs_tree_unlock(eb
);
175 free_extent_buffer(eb
);
180 /* cowonly root (everything not a reference counted cow subvolume), just get
181 * put onto a simple dirty list. transaction.c walks this to make sure they
182 * get properly updated on disk.
184 static void add_root_to_dirty_list(struct btrfs_root
*root
)
186 if (root
->track_dirty
&& list_empty(&root
->dirty_list
)) {
187 list_add(&root
->dirty_list
,
188 &root
->fs_info
->dirty_cowonly_roots
);
193 * used by snapshot creation to make a copy of a root for a tree with
194 * a given objectid. The buffer with the new root node is returned in
195 * cow_ret, and this func returns zero on success or a negative error code.
197 int btrfs_copy_root(struct btrfs_trans_handle
*trans
,
198 struct btrfs_root
*root
,
199 struct extent_buffer
*buf
,
200 struct extent_buffer
**cow_ret
, u64 new_root_objectid
)
202 struct extent_buffer
*cow
;
205 struct btrfs_disk_key disk_key
;
207 WARN_ON(root
->ref_cows
&& trans
->transid
!=
208 root
->fs_info
->running_transaction
->transid
);
209 WARN_ON(root
->ref_cows
&& trans
->transid
!= root
->last_trans
);
211 level
= btrfs_header_level(buf
);
213 btrfs_item_key(buf
, &disk_key
, 0);
215 btrfs_node_key(buf
, &disk_key
, 0);
217 cow
= btrfs_alloc_free_block(trans
, root
, buf
->len
, 0,
218 new_root_objectid
, &disk_key
, level
,
223 copy_extent_buffer(cow
, buf
, 0, 0, cow
->len
);
224 btrfs_set_header_bytenr(cow
, cow
->start
);
225 btrfs_set_header_generation(cow
, trans
->transid
);
226 btrfs_set_header_backref_rev(cow
, BTRFS_MIXED_BACKREF_REV
);
227 btrfs_clear_header_flag(cow
, BTRFS_HEADER_FLAG_WRITTEN
|
228 BTRFS_HEADER_FLAG_RELOC
);
229 if (new_root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
230 btrfs_set_header_flag(cow
, BTRFS_HEADER_FLAG_RELOC
);
232 btrfs_set_header_owner(cow
, new_root_objectid
);
234 write_extent_buffer(cow
, root
->fs_info
->fsid
,
235 (unsigned long)btrfs_header_fsid(cow
),
238 WARN_ON(btrfs_header_generation(buf
) > trans
->transid
);
239 if (new_root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
240 ret
= btrfs_inc_ref(trans
, root
, cow
, 1);
242 ret
= btrfs_inc_ref(trans
, root
, cow
, 0);
247 btrfs_mark_buffer_dirty(cow
);
253 * check if the tree block can be shared by multiple trees
255 int btrfs_block_can_be_shared(struct btrfs_root
*root
,
256 struct extent_buffer
*buf
)
259 * Tree blocks not in refernece counted trees and tree roots
260 * are never shared. If a block was allocated after the last
261 * snapshot and the block was not allocated by tree relocation,
262 * we know the block is not shared.
264 if (root
->ref_cows
&&
265 buf
!= root
->node
&& buf
!= root
->commit_root
&&
266 (btrfs_header_generation(buf
) <=
267 btrfs_root_last_snapshot(&root
->root_item
) ||
268 btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
)))
270 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
271 if (root
->ref_cows
&&
272 btrfs_header_backref_rev(buf
) < BTRFS_MIXED_BACKREF_REV
)
278 static noinline
int update_ref_for_cow(struct btrfs_trans_handle
*trans
,
279 struct btrfs_root
*root
,
280 struct extent_buffer
*buf
,
281 struct extent_buffer
*cow
,
291 * Backrefs update rules:
293 * Always use full backrefs for extent pointers in tree block
294 * allocated by tree relocation.
296 * If a shared tree block is no longer referenced by its owner
297 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
298 * use full backrefs for extent pointers in tree block.
300 * If a tree block is been relocating
301 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
302 * use full backrefs for extent pointers in tree block.
303 * The reason for this is some operations (such as drop tree)
304 * are only allowed for blocks use full backrefs.
307 if (btrfs_block_can_be_shared(root
, buf
)) {
308 ret
= btrfs_lookup_extent_info(trans
, root
, buf
->start
,
309 buf
->len
, &refs
, &flags
);
314 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
||
315 btrfs_header_backref_rev(buf
) < BTRFS_MIXED_BACKREF_REV
)
316 flags
= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
321 owner
= btrfs_header_owner(buf
);
322 BUG_ON(owner
== BTRFS_TREE_RELOC_OBJECTID
&&
323 !(flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
));
326 if ((owner
== root
->root_key
.objectid
||
327 root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) &&
328 !(flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
)) {
329 ret
= btrfs_inc_ref(trans
, root
, buf
, 1);
332 if (root
->root_key
.objectid
==
333 BTRFS_TREE_RELOC_OBJECTID
) {
334 ret
= btrfs_dec_ref(trans
, root
, buf
, 0);
336 ret
= btrfs_inc_ref(trans
, root
, cow
, 1);
339 new_flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
342 if (root
->root_key
.objectid
==
343 BTRFS_TREE_RELOC_OBJECTID
)
344 ret
= btrfs_inc_ref(trans
, root
, cow
, 1);
346 ret
= btrfs_inc_ref(trans
, root
, cow
, 0);
349 if (new_flags
!= 0) {
350 ret
= btrfs_set_disk_extent_flags(trans
, root
,
357 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
358 if (root
->root_key
.objectid
==
359 BTRFS_TREE_RELOC_OBJECTID
)
360 ret
= btrfs_inc_ref(trans
, root
, cow
, 1);
362 ret
= btrfs_inc_ref(trans
, root
, cow
, 0);
364 ret
= btrfs_dec_ref(trans
, root
, buf
, 1);
367 clean_tree_block(trans
, root
, buf
);
374 * does the dirty work in cow of a single block. The parent block (if
375 * supplied) is updated to point to the new cow copy. The new buffer is marked
376 * dirty and returned locked. If you modify the block it needs to be marked
379 * search_start -- an allocation hint for the new block
381 * empty_size -- a hint that you plan on doing more cow. This is the size in
382 * bytes the allocator should try to find free next to the block it returns.
383 * This is just a hint and may be ignored by the allocator.
385 static noinline
int __btrfs_cow_block(struct btrfs_trans_handle
*trans
,
386 struct btrfs_root
*root
,
387 struct extent_buffer
*buf
,
388 struct extent_buffer
*parent
, int parent_slot
,
389 struct extent_buffer
**cow_ret
,
390 u64 search_start
, u64 empty_size
)
392 struct btrfs_disk_key disk_key
;
393 struct extent_buffer
*cow
;
402 btrfs_assert_tree_locked(buf
);
404 WARN_ON(root
->ref_cows
&& trans
->transid
!=
405 root
->fs_info
->running_transaction
->transid
);
406 WARN_ON(root
->ref_cows
&& trans
->transid
!= root
->last_trans
);
408 level
= btrfs_header_level(buf
);
411 btrfs_item_key(buf
, &disk_key
, 0);
413 btrfs_node_key(buf
, &disk_key
, 0);
415 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
417 parent_start
= parent
->start
;
423 cow
= btrfs_alloc_free_block(trans
, root
, buf
->len
, parent_start
,
424 root
->root_key
.objectid
, &disk_key
,
425 level
, search_start
, empty_size
);
429 /* cow is set to blocking by btrfs_init_new_buffer */
431 copy_extent_buffer(cow
, buf
, 0, 0, cow
->len
);
432 btrfs_set_header_bytenr(cow
, cow
->start
);
433 btrfs_set_header_generation(cow
, trans
->transid
);
434 btrfs_set_header_backref_rev(cow
, BTRFS_MIXED_BACKREF_REV
);
435 btrfs_clear_header_flag(cow
, BTRFS_HEADER_FLAG_WRITTEN
|
436 BTRFS_HEADER_FLAG_RELOC
);
437 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
438 btrfs_set_header_flag(cow
, BTRFS_HEADER_FLAG_RELOC
);
440 btrfs_set_header_owner(cow
, root
->root_key
.objectid
);
442 write_extent_buffer(cow
, root
->fs_info
->fsid
,
443 (unsigned long)btrfs_header_fsid(cow
),
446 update_ref_for_cow(trans
, root
, buf
, cow
, &last_ref
);
449 btrfs_reloc_cow_block(trans
, root
, buf
, cow
);
451 if (buf
== root
->node
) {
452 WARN_ON(parent
&& parent
!= buf
);
453 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
||
454 btrfs_header_backref_rev(buf
) < BTRFS_MIXED_BACKREF_REV
)
455 parent_start
= buf
->start
;
459 spin_lock(&root
->node_lock
);
461 extent_buffer_get(cow
);
462 spin_unlock(&root
->node_lock
);
464 btrfs_free_tree_block(trans
, root
, buf
, parent_start
,
466 free_extent_buffer(buf
);
467 add_root_to_dirty_list(root
);
469 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
470 parent_start
= parent
->start
;
474 WARN_ON(trans
->transid
!= btrfs_header_generation(parent
));
475 btrfs_set_node_blockptr(parent
, parent_slot
,
477 btrfs_set_node_ptr_generation(parent
, parent_slot
,
479 btrfs_mark_buffer_dirty(parent
);
480 btrfs_free_tree_block(trans
, root
, buf
, parent_start
,
484 btrfs_tree_unlock(buf
);
485 free_extent_buffer(buf
);
486 btrfs_mark_buffer_dirty(cow
);
491 static inline int should_cow_block(struct btrfs_trans_handle
*trans
,
492 struct btrfs_root
*root
,
493 struct extent_buffer
*buf
)
495 if (btrfs_header_generation(buf
) == trans
->transid
&&
496 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
) &&
497 !(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
&&
498 btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
)))
504 * cows a single block, see __btrfs_cow_block for the real work.
505 * This version of it has extra checks so that a block isn't cow'd more than
506 * once per transaction, as long as it hasn't been written yet
508 noinline
int btrfs_cow_block(struct btrfs_trans_handle
*trans
,
509 struct btrfs_root
*root
, struct extent_buffer
*buf
,
510 struct extent_buffer
*parent
, int parent_slot
,
511 struct extent_buffer
**cow_ret
)
516 if (trans
->transaction
!= root
->fs_info
->running_transaction
) {
517 printk(KERN_CRIT
"trans %llu running %llu\n",
518 (unsigned long long)trans
->transid
,
520 root
->fs_info
->running_transaction
->transid
);
523 if (trans
->transid
!= root
->fs_info
->generation
) {
524 printk(KERN_CRIT
"trans %llu running %llu\n",
525 (unsigned long long)trans
->transid
,
526 (unsigned long long)root
->fs_info
->generation
);
530 if (!should_cow_block(trans
, root
, buf
)) {
535 search_start
= buf
->start
& ~((u64
)(1024 * 1024 * 1024) - 1);
538 btrfs_set_lock_blocking(parent
);
539 btrfs_set_lock_blocking(buf
);
541 ret
= __btrfs_cow_block(trans
, root
, buf
, parent
,
542 parent_slot
, cow_ret
, search_start
, 0);
547 * helper function for defrag to decide if two blocks pointed to by a
548 * node are actually close by
550 static int close_blocks(u64 blocknr
, u64 other
, u32 blocksize
)
552 if (blocknr
< other
&& other
- (blocknr
+ blocksize
) < 32768)
554 if (blocknr
> other
&& blocknr
- (other
+ blocksize
) < 32768)
560 * compare two keys in a memcmp fashion
562 static int comp_keys(struct btrfs_disk_key
*disk
, struct btrfs_key
*k2
)
566 btrfs_disk_key_to_cpu(&k1
, disk
);
568 return btrfs_comp_cpu_keys(&k1
, k2
);
572 * same as comp_keys only with two btrfs_key's
574 int btrfs_comp_cpu_keys(struct btrfs_key
*k1
, struct btrfs_key
*k2
)
576 if (k1
->objectid
> k2
->objectid
)
578 if (k1
->objectid
< k2
->objectid
)
580 if (k1
->type
> k2
->type
)
582 if (k1
->type
< k2
->type
)
584 if (k1
->offset
> k2
->offset
)
586 if (k1
->offset
< k2
->offset
)
592 * this is used by the defrag code to go through all the
593 * leaves pointed to by a node and reallocate them so that
594 * disk order is close to key order
596 int btrfs_realloc_node(struct btrfs_trans_handle
*trans
,
597 struct btrfs_root
*root
, struct extent_buffer
*parent
,
598 int start_slot
, int cache_only
, u64
*last_ret
,
599 struct btrfs_key
*progress
)
601 struct extent_buffer
*cur
;
604 u64 search_start
= *last_ret
;
614 int progress_passed
= 0;
615 struct btrfs_disk_key disk_key
;
617 parent_level
= btrfs_header_level(parent
);
618 if (cache_only
&& parent_level
!= 1)
621 if (trans
->transaction
!= root
->fs_info
->running_transaction
)
623 if (trans
->transid
!= root
->fs_info
->generation
)
626 parent_nritems
= btrfs_header_nritems(parent
);
627 blocksize
= btrfs_level_size(root
, parent_level
- 1);
628 end_slot
= parent_nritems
;
630 if (parent_nritems
== 1)
633 btrfs_set_lock_blocking(parent
);
635 for (i
= start_slot
; i
< end_slot
; i
++) {
638 if (!parent
->map_token
) {
639 map_extent_buffer(parent
,
640 btrfs_node_key_ptr_offset(i
),
641 sizeof(struct btrfs_key_ptr
),
642 &parent
->map_token
, &parent
->kaddr
,
643 &parent
->map_start
, &parent
->map_len
,
646 btrfs_node_key(parent
, &disk_key
, i
);
647 if (!progress_passed
&& comp_keys(&disk_key
, progress
) < 0)
651 blocknr
= btrfs_node_blockptr(parent
, i
);
652 gen
= btrfs_node_ptr_generation(parent
, i
);
654 last_block
= blocknr
;
657 other
= btrfs_node_blockptr(parent
, i
- 1);
658 close
= close_blocks(blocknr
, other
, blocksize
);
660 if (!close
&& i
< end_slot
- 2) {
661 other
= btrfs_node_blockptr(parent
, i
+ 1);
662 close
= close_blocks(blocknr
, other
, blocksize
);
665 last_block
= blocknr
;
668 if (parent
->map_token
) {
669 unmap_extent_buffer(parent
, parent
->map_token
,
671 parent
->map_token
= NULL
;
674 cur
= btrfs_find_tree_block(root
, blocknr
, blocksize
);
676 uptodate
= btrfs_buffer_uptodate(cur
, gen
);
679 if (!cur
|| !uptodate
) {
681 free_extent_buffer(cur
);
685 cur
= read_tree_block(root
, blocknr
,
687 } else if (!uptodate
) {
688 btrfs_read_buffer(cur
, gen
);
691 if (search_start
== 0)
692 search_start
= last_block
;
694 btrfs_tree_lock(cur
);
695 btrfs_set_lock_blocking(cur
);
696 err
= __btrfs_cow_block(trans
, root
, cur
, parent
, i
,
699 (end_slot
- i
) * blocksize
));
701 btrfs_tree_unlock(cur
);
702 free_extent_buffer(cur
);
705 search_start
= cur
->start
;
706 last_block
= cur
->start
;
707 *last_ret
= search_start
;
708 btrfs_tree_unlock(cur
);
709 free_extent_buffer(cur
);
711 if (parent
->map_token
) {
712 unmap_extent_buffer(parent
, parent
->map_token
,
714 parent
->map_token
= NULL
;
720 * The leaf data grows from end-to-front in the node.
721 * this returns the address of the start of the last item,
722 * which is the stop of the leaf data stack
724 static inline unsigned int leaf_data_end(struct btrfs_root
*root
,
725 struct extent_buffer
*leaf
)
727 u32 nr
= btrfs_header_nritems(leaf
);
729 return BTRFS_LEAF_DATA_SIZE(root
);
730 return btrfs_item_offset_nr(leaf
, nr
- 1);
734 * extra debugging checks to make sure all the items in a key are
735 * well formed and in the proper order
737 static int check_node(struct btrfs_root
*root
, struct btrfs_path
*path
,
740 struct extent_buffer
*parent
= NULL
;
741 struct extent_buffer
*node
= path
->nodes
[level
];
742 struct btrfs_disk_key parent_key
;
743 struct btrfs_disk_key node_key
;
746 struct btrfs_key cpukey
;
747 u32 nritems
= btrfs_header_nritems(node
);
749 if (path
->nodes
[level
+ 1])
750 parent
= path
->nodes
[level
+ 1];
752 slot
= path
->slots
[level
];
753 BUG_ON(nritems
== 0);
755 parent_slot
= path
->slots
[level
+ 1];
756 btrfs_node_key(parent
, &parent_key
, parent_slot
);
757 btrfs_node_key(node
, &node_key
, 0);
758 BUG_ON(memcmp(&parent_key
, &node_key
,
759 sizeof(struct btrfs_disk_key
)));
760 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
761 btrfs_header_bytenr(node
));
763 BUG_ON(nritems
> BTRFS_NODEPTRS_PER_BLOCK(root
));
765 btrfs_node_key_to_cpu(node
, &cpukey
, slot
- 1);
766 btrfs_node_key(node
, &node_key
, slot
);
767 BUG_ON(comp_keys(&node_key
, &cpukey
) <= 0);
769 if (slot
< nritems
- 1) {
770 btrfs_node_key_to_cpu(node
, &cpukey
, slot
+ 1);
771 btrfs_node_key(node
, &node_key
, slot
);
772 BUG_ON(comp_keys(&node_key
, &cpukey
) >= 0);
778 * extra checking to make sure all the items in a leaf are
779 * well formed and in the proper order
781 static int check_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
,
784 struct extent_buffer
*leaf
= path
->nodes
[level
];
785 struct extent_buffer
*parent
= NULL
;
787 struct btrfs_key cpukey
;
788 struct btrfs_disk_key parent_key
;
789 struct btrfs_disk_key leaf_key
;
790 int slot
= path
->slots
[0];
792 u32 nritems
= btrfs_header_nritems(leaf
);
794 if (path
->nodes
[level
+ 1])
795 parent
= path
->nodes
[level
+ 1];
801 parent_slot
= path
->slots
[level
+ 1];
802 btrfs_node_key(parent
, &parent_key
, parent_slot
);
803 btrfs_item_key(leaf
, &leaf_key
, 0);
805 BUG_ON(memcmp(&parent_key
, &leaf_key
,
806 sizeof(struct btrfs_disk_key
)));
807 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
808 btrfs_header_bytenr(leaf
));
810 if (slot
!= 0 && slot
< nritems
- 1) {
811 btrfs_item_key(leaf
, &leaf_key
, slot
);
812 btrfs_item_key_to_cpu(leaf
, &cpukey
, slot
- 1);
813 if (comp_keys(&leaf_key
, &cpukey
) <= 0) {
814 btrfs_print_leaf(root
, leaf
);
815 printk(KERN_CRIT
"slot %d offset bad key\n", slot
);
818 if (btrfs_item_offset_nr(leaf
, slot
- 1) !=
819 btrfs_item_end_nr(leaf
, slot
)) {
820 btrfs_print_leaf(root
, leaf
);
821 printk(KERN_CRIT
"slot %d offset bad\n", slot
);
825 if (slot
< nritems
- 1) {
826 btrfs_item_key(leaf
, &leaf_key
, slot
);
827 btrfs_item_key_to_cpu(leaf
, &cpukey
, slot
+ 1);
828 BUG_ON(comp_keys(&leaf_key
, &cpukey
) >= 0);
829 if (btrfs_item_offset_nr(leaf
, slot
) !=
830 btrfs_item_end_nr(leaf
, slot
+ 1)) {
831 btrfs_print_leaf(root
, leaf
);
832 printk(KERN_CRIT
"slot %d offset bad\n", slot
);
836 BUG_ON(btrfs_item_offset_nr(leaf
, 0) +
837 btrfs_item_size_nr(leaf
, 0) != BTRFS_LEAF_DATA_SIZE(root
));
841 static noinline
int check_block(struct btrfs_root
*root
,
842 struct btrfs_path
*path
, int level
)
846 return check_leaf(root
, path
, level
);
847 return check_node(root
, path
, level
);
851 * search for key in the extent_buffer. The items start at offset p,
852 * and they are item_size apart. There are 'max' items in p.
854 * the slot in the array is returned via slot, and it points to
855 * the place where you would insert key if it is not found in
858 * slot may point to max if the key is bigger than all of the keys
860 static noinline
int generic_bin_search(struct extent_buffer
*eb
,
862 int item_size
, struct btrfs_key
*key
,
869 struct btrfs_disk_key
*tmp
= NULL
;
870 struct btrfs_disk_key unaligned
;
871 unsigned long offset
;
872 char *map_token
= NULL
;
874 unsigned long map_start
= 0;
875 unsigned long map_len
= 0;
879 mid
= (low
+ high
) / 2;
880 offset
= p
+ mid
* item_size
;
882 if (!map_token
|| offset
< map_start
||
883 (offset
+ sizeof(struct btrfs_disk_key
)) >
884 map_start
+ map_len
) {
886 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
890 err
= map_private_extent_buffer(eb
, offset
,
891 sizeof(struct btrfs_disk_key
),
893 &map_start
, &map_len
, KM_USER0
);
896 tmp
= (struct btrfs_disk_key
*)(kaddr
+ offset
-
899 read_extent_buffer(eb
, &unaligned
,
900 offset
, sizeof(unaligned
));
905 tmp
= (struct btrfs_disk_key
*)(kaddr
+ offset
-
908 ret
= comp_keys(tmp
, key
);
917 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
923 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
928 * simple bin_search frontend that does the right thing for
931 static int bin_search(struct extent_buffer
*eb
, struct btrfs_key
*key
,
932 int level
, int *slot
)
935 return generic_bin_search(eb
,
936 offsetof(struct btrfs_leaf
, items
),
937 sizeof(struct btrfs_item
),
938 key
, btrfs_header_nritems(eb
),
941 return generic_bin_search(eb
,
942 offsetof(struct btrfs_node
, ptrs
),
943 sizeof(struct btrfs_key_ptr
),
944 key
, btrfs_header_nritems(eb
),
950 int btrfs_bin_search(struct extent_buffer
*eb
, struct btrfs_key
*key
,
951 int level
, int *slot
)
953 return bin_search(eb
, key
, level
, slot
);
956 static void root_add_used(struct btrfs_root
*root
, u32 size
)
958 spin_lock(&root
->accounting_lock
);
959 btrfs_set_root_used(&root
->root_item
,
960 btrfs_root_used(&root
->root_item
) + size
);
961 spin_unlock(&root
->accounting_lock
);
964 static void root_sub_used(struct btrfs_root
*root
, u32 size
)
966 spin_lock(&root
->accounting_lock
);
967 btrfs_set_root_used(&root
->root_item
,
968 btrfs_root_used(&root
->root_item
) - size
);
969 spin_unlock(&root
->accounting_lock
);
972 /* given a node and slot number, this reads the blocks it points to. The
973 * extent buffer is returned with a reference taken (but unlocked).
974 * NULL is returned on error.
976 static noinline
struct extent_buffer
*read_node_slot(struct btrfs_root
*root
,
977 struct extent_buffer
*parent
, int slot
)
979 int level
= btrfs_header_level(parent
);
982 if (slot
>= btrfs_header_nritems(parent
))
987 return read_tree_block(root
, btrfs_node_blockptr(parent
, slot
),
988 btrfs_level_size(root
, level
- 1),
989 btrfs_node_ptr_generation(parent
, slot
));
993 * node level balancing, used to make sure nodes are in proper order for
994 * item deletion. We balance from the top down, so we have to make sure
995 * that a deletion won't leave an node completely empty later on.
997 static noinline
int balance_level(struct btrfs_trans_handle
*trans
,
998 struct btrfs_root
*root
,
999 struct btrfs_path
*path
, int level
)
1001 struct extent_buffer
*right
= NULL
;
1002 struct extent_buffer
*mid
;
1003 struct extent_buffer
*left
= NULL
;
1004 struct extent_buffer
*parent
= NULL
;
1008 int orig_slot
= path
->slots
[level
];
1014 mid
= path
->nodes
[level
];
1016 WARN_ON(!path
->locks
[level
]);
1017 WARN_ON(btrfs_header_generation(mid
) != trans
->transid
);
1019 orig_ptr
= btrfs_node_blockptr(mid
, orig_slot
);
1021 if (level
< BTRFS_MAX_LEVEL
- 1)
1022 parent
= path
->nodes
[level
+ 1];
1023 pslot
= path
->slots
[level
+ 1];
1026 * deal with the case where there is only one pointer in the root
1027 * by promoting the node below to a root
1030 struct extent_buffer
*child
;
1032 if (btrfs_header_nritems(mid
) != 1)
1035 /* promote the child to a root */
1036 child
= read_node_slot(root
, mid
, 0);
1038 btrfs_tree_lock(child
);
1039 btrfs_set_lock_blocking(child
);
1040 ret
= btrfs_cow_block(trans
, root
, child
, mid
, 0, &child
);
1042 btrfs_tree_unlock(child
);
1043 free_extent_buffer(child
);
1047 spin_lock(&root
->node_lock
);
1049 spin_unlock(&root
->node_lock
);
1051 add_root_to_dirty_list(root
);
1052 btrfs_tree_unlock(child
);
1054 path
->locks
[level
] = 0;
1055 path
->nodes
[level
] = NULL
;
1056 clean_tree_block(trans
, root
, mid
);
1057 btrfs_tree_unlock(mid
);
1058 /* once for the path */
1059 free_extent_buffer(mid
);
1061 root_sub_used(root
, mid
->len
);
1062 btrfs_free_tree_block(trans
, root
, mid
, 0, 1);
1063 /* once for the root ptr */
1064 free_extent_buffer(mid
);
1067 if (btrfs_header_nritems(mid
) >
1068 BTRFS_NODEPTRS_PER_BLOCK(root
) / 4)
1071 btrfs_header_nritems(mid
);
1073 left
= read_node_slot(root
, parent
, pslot
- 1);
1075 btrfs_tree_lock(left
);
1076 btrfs_set_lock_blocking(left
);
1077 wret
= btrfs_cow_block(trans
, root
, left
,
1078 parent
, pslot
- 1, &left
);
1084 right
= read_node_slot(root
, parent
, pslot
+ 1);
1086 btrfs_tree_lock(right
);
1087 btrfs_set_lock_blocking(right
);
1088 wret
= btrfs_cow_block(trans
, root
, right
,
1089 parent
, pslot
+ 1, &right
);
1096 /* first, try to make some room in the middle buffer */
1098 orig_slot
+= btrfs_header_nritems(left
);
1099 wret
= push_node_left(trans
, root
, left
, mid
, 1);
1102 btrfs_header_nritems(mid
);
1106 * then try to empty the right most buffer into the middle
1109 wret
= push_node_left(trans
, root
, mid
, right
, 1);
1110 if (wret
< 0 && wret
!= -ENOSPC
)
1112 if (btrfs_header_nritems(right
) == 0) {
1113 clean_tree_block(trans
, root
, right
);
1114 btrfs_tree_unlock(right
);
1115 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
+
1119 root_sub_used(root
, right
->len
);
1120 btrfs_free_tree_block(trans
, root
, right
, 0, 1);
1121 free_extent_buffer(right
);
1124 struct btrfs_disk_key right_key
;
1125 btrfs_node_key(right
, &right_key
, 0);
1126 btrfs_set_node_key(parent
, &right_key
, pslot
+ 1);
1127 btrfs_mark_buffer_dirty(parent
);
1130 if (btrfs_header_nritems(mid
) == 1) {
1132 * we're not allowed to leave a node with one item in the
1133 * tree during a delete. A deletion from lower in the tree
1134 * could try to delete the only pointer in this node.
1135 * So, pull some keys from the left.
1136 * There has to be a left pointer at this point because
1137 * otherwise we would have pulled some pointers from the
1141 wret
= balance_node_right(trans
, root
, mid
, left
);
1147 wret
= push_node_left(trans
, root
, left
, mid
, 1);
1153 if (btrfs_header_nritems(mid
) == 0) {
1154 clean_tree_block(trans
, root
, mid
);
1155 btrfs_tree_unlock(mid
);
1156 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
);
1159 root_sub_used(root
, mid
->len
);
1160 btrfs_free_tree_block(trans
, root
, mid
, 0, 1);
1161 free_extent_buffer(mid
);
1164 /* update the parent key to reflect our changes */
1165 struct btrfs_disk_key mid_key
;
1166 btrfs_node_key(mid
, &mid_key
, 0);
1167 btrfs_set_node_key(parent
, &mid_key
, pslot
);
1168 btrfs_mark_buffer_dirty(parent
);
1171 /* update the path */
1173 if (btrfs_header_nritems(left
) > orig_slot
) {
1174 extent_buffer_get(left
);
1175 /* left was locked after cow */
1176 path
->nodes
[level
] = left
;
1177 path
->slots
[level
+ 1] -= 1;
1178 path
->slots
[level
] = orig_slot
;
1180 btrfs_tree_unlock(mid
);
1181 free_extent_buffer(mid
);
1184 orig_slot
-= btrfs_header_nritems(left
);
1185 path
->slots
[level
] = orig_slot
;
1188 /* double check we haven't messed things up */
1189 check_block(root
, path
, level
);
1191 btrfs_node_blockptr(path
->nodes
[level
], path
->slots
[level
]))
1195 btrfs_tree_unlock(right
);
1196 free_extent_buffer(right
);
1199 if (path
->nodes
[level
] != left
)
1200 btrfs_tree_unlock(left
);
1201 free_extent_buffer(left
);
1206 /* Node balancing for insertion. Here we only split or push nodes around
1207 * when they are completely full. This is also done top down, so we
1208 * have to be pessimistic.
1210 static noinline
int push_nodes_for_insert(struct btrfs_trans_handle
*trans
,
1211 struct btrfs_root
*root
,
1212 struct btrfs_path
*path
, int level
)
1214 struct extent_buffer
*right
= NULL
;
1215 struct extent_buffer
*mid
;
1216 struct extent_buffer
*left
= NULL
;
1217 struct extent_buffer
*parent
= NULL
;
1221 int orig_slot
= path
->slots
[level
];
1226 mid
= path
->nodes
[level
];
1227 WARN_ON(btrfs_header_generation(mid
) != trans
->transid
);
1229 if (level
< BTRFS_MAX_LEVEL
- 1)
1230 parent
= path
->nodes
[level
+ 1];
1231 pslot
= path
->slots
[level
+ 1];
1236 left
= read_node_slot(root
, parent
, pslot
- 1);
1238 /* first, try to make some room in the middle buffer */
1242 btrfs_tree_lock(left
);
1243 btrfs_set_lock_blocking(left
);
1245 left_nr
= btrfs_header_nritems(left
);
1246 if (left_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
1249 ret
= btrfs_cow_block(trans
, root
, left
, parent
,
1254 wret
= push_node_left(trans
, root
,
1261 struct btrfs_disk_key disk_key
;
1262 orig_slot
+= left_nr
;
1263 btrfs_node_key(mid
, &disk_key
, 0);
1264 btrfs_set_node_key(parent
, &disk_key
, pslot
);
1265 btrfs_mark_buffer_dirty(parent
);
1266 if (btrfs_header_nritems(left
) > orig_slot
) {
1267 path
->nodes
[level
] = left
;
1268 path
->slots
[level
+ 1] -= 1;
1269 path
->slots
[level
] = orig_slot
;
1270 btrfs_tree_unlock(mid
);
1271 free_extent_buffer(mid
);
1274 btrfs_header_nritems(left
);
1275 path
->slots
[level
] = orig_slot
;
1276 btrfs_tree_unlock(left
);
1277 free_extent_buffer(left
);
1281 btrfs_tree_unlock(left
);
1282 free_extent_buffer(left
);
1284 right
= read_node_slot(root
, parent
, pslot
+ 1);
1287 * then try to empty the right most buffer into the middle
1292 btrfs_tree_lock(right
);
1293 btrfs_set_lock_blocking(right
);
1295 right_nr
= btrfs_header_nritems(right
);
1296 if (right_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
1299 ret
= btrfs_cow_block(trans
, root
, right
,
1305 wret
= balance_node_right(trans
, root
,
1312 struct btrfs_disk_key disk_key
;
1314 btrfs_node_key(right
, &disk_key
, 0);
1315 btrfs_set_node_key(parent
, &disk_key
, pslot
+ 1);
1316 btrfs_mark_buffer_dirty(parent
);
1318 if (btrfs_header_nritems(mid
) <= orig_slot
) {
1319 path
->nodes
[level
] = right
;
1320 path
->slots
[level
+ 1] += 1;
1321 path
->slots
[level
] = orig_slot
-
1322 btrfs_header_nritems(mid
);
1323 btrfs_tree_unlock(mid
);
1324 free_extent_buffer(mid
);
1326 btrfs_tree_unlock(right
);
1327 free_extent_buffer(right
);
1331 btrfs_tree_unlock(right
);
1332 free_extent_buffer(right
);
1338 * readahead one full node of leaves, finding things that are close
1339 * to the block in 'slot', and triggering ra on them.
1341 static void reada_for_search(struct btrfs_root
*root
,
1342 struct btrfs_path
*path
,
1343 int level
, int slot
, u64 objectid
)
1345 struct extent_buffer
*node
;
1346 struct btrfs_disk_key disk_key
;
1351 int direction
= path
->reada
;
1352 struct extent_buffer
*eb
;
1360 if (!path
->nodes
[level
])
1363 node
= path
->nodes
[level
];
1365 search
= btrfs_node_blockptr(node
, slot
);
1366 blocksize
= btrfs_level_size(root
, level
- 1);
1367 eb
= btrfs_find_tree_block(root
, search
, blocksize
);
1369 free_extent_buffer(eb
);
1375 nritems
= btrfs_header_nritems(node
);
1378 if (direction
< 0) {
1382 } else if (direction
> 0) {
1387 if (path
->reada
< 0 && objectid
) {
1388 btrfs_node_key(node
, &disk_key
, nr
);
1389 if (btrfs_disk_key_objectid(&disk_key
) != objectid
)
1392 search
= btrfs_node_blockptr(node
, nr
);
1393 if ((search
<= target
&& target
- search
<= 65536) ||
1394 (search
> target
&& search
- target
<= 65536)) {
1395 readahead_tree_block(root
, search
, blocksize
,
1396 btrfs_node_ptr_generation(node
, nr
));
1400 if ((nread
> 65536 || nscan
> 32))
1406 * returns -EAGAIN if it had to drop the path, or zero if everything was in
1409 static noinline
int reada_for_balance(struct btrfs_root
*root
,
1410 struct btrfs_path
*path
, int level
)
1414 struct extent_buffer
*parent
;
1415 struct extent_buffer
*eb
;
1422 parent
= path
->nodes
[level
+ 1];
1426 nritems
= btrfs_header_nritems(parent
);
1427 slot
= path
->slots
[level
+ 1];
1428 blocksize
= btrfs_level_size(root
, level
);
1431 block1
= btrfs_node_blockptr(parent
, slot
- 1);
1432 gen
= btrfs_node_ptr_generation(parent
, slot
- 1);
1433 eb
= btrfs_find_tree_block(root
, block1
, blocksize
);
1434 if (eb
&& btrfs_buffer_uptodate(eb
, gen
))
1436 free_extent_buffer(eb
);
1438 if (slot
+ 1 < nritems
) {
1439 block2
= btrfs_node_blockptr(parent
, slot
+ 1);
1440 gen
= btrfs_node_ptr_generation(parent
, slot
+ 1);
1441 eb
= btrfs_find_tree_block(root
, block2
, blocksize
);
1442 if (eb
&& btrfs_buffer_uptodate(eb
, gen
))
1444 free_extent_buffer(eb
);
1446 if (block1
|| block2
) {
1449 /* release the whole path */
1450 btrfs_release_path(root
, path
);
1452 /* read the blocks */
1454 readahead_tree_block(root
, block1
, blocksize
, 0);
1456 readahead_tree_block(root
, block2
, blocksize
, 0);
1459 eb
= read_tree_block(root
, block1
, blocksize
, 0);
1460 free_extent_buffer(eb
);
1463 eb
= read_tree_block(root
, block2
, blocksize
, 0);
1464 free_extent_buffer(eb
);
1472 * when we walk down the tree, it is usually safe to unlock the higher layers
1473 * in the tree. The exceptions are when our path goes through slot 0, because
1474 * operations on the tree might require changing key pointers higher up in the
1477 * callers might also have set path->keep_locks, which tells this code to keep
1478 * the lock if the path points to the last slot in the block. This is part of
1479 * walking through the tree, and selecting the next slot in the higher block.
1481 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1482 * if lowest_unlock is 1, level 0 won't be unlocked
1484 static noinline
void unlock_up(struct btrfs_path
*path
, int level
,
1488 int skip_level
= level
;
1490 struct extent_buffer
*t
;
1492 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1493 if (!path
->nodes
[i
])
1495 if (!path
->locks
[i
])
1497 if (!no_skips
&& path
->slots
[i
] == 0) {
1501 if (!no_skips
&& path
->keep_locks
) {
1504 nritems
= btrfs_header_nritems(t
);
1505 if (nritems
< 1 || path
->slots
[i
] >= nritems
- 1) {
1510 if (skip_level
< i
&& i
>= lowest_unlock
)
1514 if (i
>= lowest_unlock
&& i
> skip_level
&& path
->locks
[i
]) {
1515 btrfs_tree_unlock(t
);
1522 * This releases any locks held in the path starting at level and
1523 * going all the way up to the root.
1525 * btrfs_search_slot will keep the lock held on higher nodes in a few
1526 * corner cases, such as COW of the block at slot zero in the node. This
1527 * ignores those rules, and it should only be called when there are no
1528 * more updates to be done higher up in the tree.
1530 noinline
void btrfs_unlock_up_safe(struct btrfs_path
*path
, int level
)
1534 if (path
->keep_locks
)
1537 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1538 if (!path
->nodes
[i
])
1540 if (!path
->locks
[i
])
1542 btrfs_tree_unlock(path
->nodes
[i
]);
1548 * helper function for btrfs_search_slot. The goal is to find a block
1549 * in cache without setting the path to blocking. If we find the block
1550 * we return zero and the path is unchanged.
1552 * If we can't find the block, we set the path blocking and do some
1553 * reada. -EAGAIN is returned and the search must be repeated.
1556 read_block_for_search(struct btrfs_trans_handle
*trans
,
1557 struct btrfs_root
*root
, struct btrfs_path
*p
,
1558 struct extent_buffer
**eb_ret
, int level
, int slot
,
1559 struct btrfs_key
*key
)
1564 struct extent_buffer
*b
= *eb_ret
;
1565 struct extent_buffer
*tmp
;
1568 blocknr
= btrfs_node_blockptr(b
, slot
);
1569 gen
= btrfs_node_ptr_generation(b
, slot
);
1570 blocksize
= btrfs_level_size(root
, level
- 1);
1572 tmp
= btrfs_find_tree_block(root
, blocknr
, blocksize
);
1574 if (btrfs_buffer_uptodate(tmp
, 0)) {
1575 if (btrfs_buffer_uptodate(tmp
, gen
)) {
1577 * we found an up to date block without
1584 /* the pages were up to date, but we failed
1585 * the generation number check. Do a full
1586 * read for the generation number that is correct.
1587 * We must do this without dropping locks so
1588 * we can trust our generation number
1590 free_extent_buffer(tmp
);
1591 tmp
= read_tree_block(root
, blocknr
, blocksize
, gen
);
1592 if (tmp
&& btrfs_buffer_uptodate(tmp
, gen
)) {
1596 free_extent_buffer(tmp
);
1597 btrfs_release_path(NULL
, p
);
1603 * reduce lock contention at high levels
1604 * of the btree by dropping locks before
1605 * we read. Don't release the lock on the current
1606 * level because we need to walk this node to figure
1607 * out which blocks to read.
1609 btrfs_unlock_up_safe(p
, level
+ 1);
1610 btrfs_set_path_blocking(p
);
1612 free_extent_buffer(tmp
);
1614 reada_for_search(root
, p
, level
, slot
, key
->objectid
);
1616 btrfs_release_path(NULL
, p
);
1619 tmp
= read_tree_block(root
, blocknr
, blocksize
, 0);
1622 * If the read above didn't mark this buffer up to date,
1623 * it will never end up being up to date. Set ret to EIO now
1624 * and give up so that our caller doesn't loop forever
1627 if (!btrfs_buffer_uptodate(tmp
, 0))
1629 free_extent_buffer(tmp
);
1635 * helper function for btrfs_search_slot. This does all of the checks
1636 * for node-level blocks and does any balancing required based on
1639 * If no extra work was required, zero is returned. If we had to
1640 * drop the path, -EAGAIN is returned and btrfs_search_slot must
1644 setup_nodes_for_search(struct btrfs_trans_handle
*trans
,
1645 struct btrfs_root
*root
, struct btrfs_path
*p
,
1646 struct extent_buffer
*b
, int level
, int ins_len
)
1649 if ((p
->search_for_split
|| ins_len
> 0) && btrfs_header_nritems(b
) >=
1650 BTRFS_NODEPTRS_PER_BLOCK(root
) - 3) {
1653 sret
= reada_for_balance(root
, p
, level
);
1657 btrfs_set_path_blocking(p
);
1658 sret
= split_node(trans
, root
, p
, level
);
1659 btrfs_clear_path_blocking(p
, NULL
);
1666 b
= p
->nodes
[level
];
1667 } else if (ins_len
< 0 && btrfs_header_nritems(b
) <
1668 BTRFS_NODEPTRS_PER_BLOCK(root
) / 2) {
1671 sret
= reada_for_balance(root
, p
, level
);
1675 btrfs_set_path_blocking(p
);
1676 sret
= balance_level(trans
, root
, p
, level
);
1677 btrfs_clear_path_blocking(p
, NULL
);
1683 b
= p
->nodes
[level
];
1685 btrfs_release_path(NULL
, p
);
1688 BUG_ON(btrfs_header_nritems(b
) == 1);
1699 * look for key in the tree. path is filled in with nodes along the way
1700 * if key is found, we return zero and you can find the item in the leaf
1701 * level of the path (level 0)
1703 * If the key isn't found, the path points to the slot where it should
1704 * be inserted, and 1 is returned. If there are other errors during the
1705 * search a negative error number is returned.
1707 * if ins_len > 0, nodes and leaves will be split as we walk down the
1708 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
1711 int btrfs_search_slot(struct btrfs_trans_handle
*trans
, struct btrfs_root
1712 *root
, struct btrfs_key
*key
, struct btrfs_path
*p
, int
1715 struct extent_buffer
*b
;
1720 int lowest_unlock
= 1;
1721 u8 lowest_level
= 0;
1723 lowest_level
= p
->lowest_level
;
1724 WARN_ON(lowest_level
&& ins_len
> 0);
1725 WARN_ON(p
->nodes
[0] != NULL
);
1731 if (p
->search_commit_root
) {
1732 b
= root
->commit_root
;
1733 extent_buffer_get(b
);
1734 if (!p
->skip_locking
)
1737 if (p
->skip_locking
)
1738 b
= btrfs_root_node(root
);
1740 b
= btrfs_lock_root_node(root
);
1744 level
= btrfs_header_level(b
);
1747 * setup the path here so we can release it under lock
1748 * contention with the cow code
1750 p
->nodes
[level
] = b
;
1751 if (!p
->skip_locking
)
1752 p
->locks
[level
] = 1;
1756 * if we don't really need to cow this block
1757 * then we don't want to set the path blocking,
1758 * so we test it here
1760 if (!should_cow_block(trans
, root
, b
))
1763 btrfs_set_path_blocking(p
);
1765 err
= btrfs_cow_block(trans
, root
, b
,
1766 p
->nodes
[level
+ 1],
1767 p
->slots
[level
+ 1], &b
);
1774 BUG_ON(!cow
&& ins_len
);
1775 if (level
!= btrfs_header_level(b
))
1777 level
= btrfs_header_level(b
);
1779 p
->nodes
[level
] = b
;
1780 if (!p
->skip_locking
)
1781 p
->locks
[level
] = 1;
1783 btrfs_clear_path_blocking(p
, NULL
);
1786 * we have a lock on b and as long as we aren't changing
1787 * the tree, there is no way to for the items in b to change.
1788 * It is safe to drop the lock on our parent before we
1789 * go through the expensive btree search on b.
1791 * If cow is true, then we might be changing slot zero,
1792 * which may require changing the parent. So, we can't
1793 * drop the lock until after we know which slot we're
1797 btrfs_unlock_up_safe(p
, level
+ 1);
1799 ret
= check_block(root
, p
, level
);
1805 ret
= bin_search(b
, key
, level
, &slot
);
1809 if (ret
&& slot
> 0) {
1813 p
->slots
[level
] = slot
;
1814 err
= setup_nodes_for_search(trans
, root
, p
, b
, level
,
1822 b
= p
->nodes
[level
];
1823 slot
= p
->slots
[level
];
1825 unlock_up(p
, level
, lowest_unlock
);
1827 if (level
== lowest_level
) {
1833 err
= read_block_for_search(trans
, root
, p
,
1834 &b
, level
, slot
, key
);
1842 if (!p
->skip_locking
) {
1843 btrfs_clear_path_blocking(p
, NULL
);
1844 err
= btrfs_try_spin_lock(b
);
1847 btrfs_set_path_blocking(p
);
1849 btrfs_clear_path_blocking(p
, b
);
1853 p
->slots
[level
] = slot
;
1855 btrfs_leaf_free_space(root
, b
) < ins_len
) {
1856 btrfs_set_path_blocking(p
);
1857 err
= split_leaf(trans
, root
, key
,
1858 p
, ins_len
, ret
== 0);
1859 btrfs_clear_path_blocking(p
, NULL
);
1867 if (!p
->search_for_split
)
1868 unlock_up(p
, level
, lowest_unlock
);
1875 * we don't really know what they plan on doing with the path
1876 * from here on, so for now just mark it as blocking
1878 if (!p
->leave_spinning
)
1879 btrfs_set_path_blocking(p
);
1881 btrfs_release_path(root
, p
);
1886 * adjust the pointers going up the tree, starting at level
1887 * making sure the right key of each node is points to 'key'.
1888 * This is used after shifting pointers to the left, so it stops
1889 * fixing up pointers when a given leaf/node is not in slot 0 of the
1892 * If this fails to write a tree block, it returns -1, but continues
1893 * fixing up the blocks in ram so the tree is consistent.
1895 static int fixup_low_keys(struct btrfs_trans_handle
*trans
,
1896 struct btrfs_root
*root
, struct btrfs_path
*path
,
1897 struct btrfs_disk_key
*key
, int level
)
1901 struct extent_buffer
*t
;
1903 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1904 int tslot
= path
->slots
[i
];
1905 if (!path
->nodes
[i
])
1908 btrfs_set_node_key(t
, key
, tslot
);
1909 btrfs_mark_buffer_dirty(path
->nodes
[i
]);
1919 * This function isn't completely safe. It's the caller's responsibility
1920 * that the new key won't break the order
1922 int btrfs_set_item_key_safe(struct btrfs_trans_handle
*trans
,
1923 struct btrfs_root
*root
, struct btrfs_path
*path
,
1924 struct btrfs_key
*new_key
)
1926 struct btrfs_disk_key disk_key
;
1927 struct extent_buffer
*eb
;
1930 eb
= path
->nodes
[0];
1931 slot
= path
->slots
[0];
1933 btrfs_item_key(eb
, &disk_key
, slot
- 1);
1934 if (comp_keys(&disk_key
, new_key
) >= 0)
1937 if (slot
< btrfs_header_nritems(eb
) - 1) {
1938 btrfs_item_key(eb
, &disk_key
, slot
+ 1);
1939 if (comp_keys(&disk_key
, new_key
) <= 0)
1943 btrfs_cpu_key_to_disk(&disk_key
, new_key
);
1944 btrfs_set_item_key(eb
, &disk_key
, slot
);
1945 btrfs_mark_buffer_dirty(eb
);
1947 fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
1952 * try to push data from one node into the next node left in the
1955 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1956 * error, and > 0 if there was no room in the left hand block.
1958 static int push_node_left(struct btrfs_trans_handle
*trans
,
1959 struct btrfs_root
*root
, struct extent_buffer
*dst
,
1960 struct extent_buffer
*src
, int empty
)
1967 src_nritems
= btrfs_header_nritems(src
);
1968 dst_nritems
= btrfs_header_nritems(dst
);
1969 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
1970 WARN_ON(btrfs_header_generation(src
) != trans
->transid
);
1971 WARN_ON(btrfs_header_generation(dst
) != trans
->transid
);
1973 if (!empty
&& src_nritems
<= 8)
1976 if (push_items
<= 0)
1980 push_items
= min(src_nritems
, push_items
);
1981 if (push_items
< src_nritems
) {
1982 /* leave at least 8 pointers in the node if
1983 * we aren't going to empty it
1985 if (src_nritems
- push_items
< 8) {
1986 if (push_items
<= 8)
1992 push_items
= min(src_nritems
- 8, push_items
);
1994 copy_extent_buffer(dst
, src
,
1995 btrfs_node_key_ptr_offset(dst_nritems
),
1996 btrfs_node_key_ptr_offset(0),
1997 push_items
* sizeof(struct btrfs_key_ptr
));
1999 if (push_items
< src_nritems
) {
2000 memmove_extent_buffer(src
, btrfs_node_key_ptr_offset(0),
2001 btrfs_node_key_ptr_offset(push_items
),
2002 (src_nritems
- push_items
) *
2003 sizeof(struct btrfs_key_ptr
));
2005 btrfs_set_header_nritems(src
, src_nritems
- push_items
);
2006 btrfs_set_header_nritems(dst
, dst_nritems
+ push_items
);
2007 btrfs_mark_buffer_dirty(src
);
2008 btrfs_mark_buffer_dirty(dst
);
2014 * try to push data from one node into the next node right in the
2017 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
2018 * error, and > 0 if there was no room in the right hand block.
2020 * this will only push up to 1/2 the contents of the left node over
2022 static int balance_node_right(struct btrfs_trans_handle
*trans
,
2023 struct btrfs_root
*root
,
2024 struct extent_buffer
*dst
,
2025 struct extent_buffer
*src
)
2033 WARN_ON(btrfs_header_generation(src
) != trans
->transid
);
2034 WARN_ON(btrfs_header_generation(dst
) != trans
->transid
);
2036 src_nritems
= btrfs_header_nritems(src
);
2037 dst_nritems
= btrfs_header_nritems(dst
);
2038 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
2039 if (push_items
<= 0)
2042 if (src_nritems
< 4)
2045 max_push
= src_nritems
/ 2 + 1;
2046 /* don't try to empty the node */
2047 if (max_push
>= src_nritems
)
2050 if (max_push
< push_items
)
2051 push_items
= max_push
;
2053 memmove_extent_buffer(dst
, btrfs_node_key_ptr_offset(push_items
),
2054 btrfs_node_key_ptr_offset(0),
2056 sizeof(struct btrfs_key_ptr
));
2058 copy_extent_buffer(dst
, src
,
2059 btrfs_node_key_ptr_offset(0),
2060 btrfs_node_key_ptr_offset(src_nritems
- push_items
),
2061 push_items
* sizeof(struct btrfs_key_ptr
));
2063 btrfs_set_header_nritems(src
, src_nritems
- push_items
);
2064 btrfs_set_header_nritems(dst
, dst_nritems
+ push_items
);
2066 btrfs_mark_buffer_dirty(src
);
2067 btrfs_mark_buffer_dirty(dst
);
2073 * helper function to insert a new root level in the tree.
2074 * A new node is allocated, and a single item is inserted to
2075 * point to the existing root
2077 * returns zero on success or < 0 on failure.
2079 static noinline
int insert_new_root(struct btrfs_trans_handle
*trans
,
2080 struct btrfs_root
*root
,
2081 struct btrfs_path
*path
, int level
)
2084 struct extent_buffer
*lower
;
2085 struct extent_buffer
*c
;
2086 struct extent_buffer
*old
;
2087 struct btrfs_disk_key lower_key
;
2089 BUG_ON(path
->nodes
[level
]);
2090 BUG_ON(path
->nodes
[level
-1] != root
->node
);
2092 lower
= path
->nodes
[level
-1];
2094 btrfs_item_key(lower
, &lower_key
, 0);
2096 btrfs_node_key(lower
, &lower_key
, 0);
2098 c
= btrfs_alloc_free_block(trans
, root
, root
->nodesize
, 0,
2099 root
->root_key
.objectid
, &lower_key
,
2100 level
, root
->node
->start
, 0);
2104 root_add_used(root
, root
->nodesize
);
2106 memset_extent_buffer(c
, 0, 0, sizeof(struct btrfs_header
));
2107 btrfs_set_header_nritems(c
, 1);
2108 btrfs_set_header_level(c
, level
);
2109 btrfs_set_header_bytenr(c
, c
->start
);
2110 btrfs_set_header_generation(c
, trans
->transid
);
2111 btrfs_set_header_backref_rev(c
, BTRFS_MIXED_BACKREF_REV
);
2112 btrfs_set_header_owner(c
, root
->root_key
.objectid
);
2114 write_extent_buffer(c
, root
->fs_info
->fsid
,
2115 (unsigned long)btrfs_header_fsid(c
),
2118 write_extent_buffer(c
, root
->fs_info
->chunk_tree_uuid
,
2119 (unsigned long)btrfs_header_chunk_tree_uuid(c
),
2122 btrfs_set_node_key(c
, &lower_key
, 0);
2123 btrfs_set_node_blockptr(c
, 0, lower
->start
);
2124 lower_gen
= btrfs_header_generation(lower
);
2125 WARN_ON(lower_gen
!= trans
->transid
);
2127 btrfs_set_node_ptr_generation(c
, 0, lower_gen
);
2129 btrfs_mark_buffer_dirty(c
);
2131 spin_lock(&root
->node_lock
);
2134 spin_unlock(&root
->node_lock
);
2136 /* the super has an extra ref to root->node */
2137 free_extent_buffer(old
);
2139 add_root_to_dirty_list(root
);
2140 extent_buffer_get(c
);
2141 path
->nodes
[level
] = c
;
2142 path
->locks
[level
] = 1;
2143 path
->slots
[level
] = 0;
2148 * worker function to insert a single pointer in a node.
2149 * the node should have enough room for the pointer already
2151 * slot and level indicate where you want the key to go, and
2152 * blocknr is the block the key points to.
2154 * returns zero on success and < 0 on any error
2156 static int insert_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
2157 *root
, struct btrfs_path
*path
, struct btrfs_disk_key
2158 *key
, u64 bytenr
, int slot
, int level
)
2160 struct extent_buffer
*lower
;
2163 BUG_ON(!path
->nodes
[level
]);
2164 btrfs_assert_tree_locked(path
->nodes
[level
]);
2165 lower
= path
->nodes
[level
];
2166 nritems
= btrfs_header_nritems(lower
);
2167 BUG_ON(slot
> nritems
);
2168 if (nritems
== BTRFS_NODEPTRS_PER_BLOCK(root
))
2170 if (slot
!= nritems
) {
2171 memmove_extent_buffer(lower
,
2172 btrfs_node_key_ptr_offset(slot
+ 1),
2173 btrfs_node_key_ptr_offset(slot
),
2174 (nritems
- slot
) * sizeof(struct btrfs_key_ptr
));
2176 btrfs_set_node_key(lower
, key
, slot
);
2177 btrfs_set_node_blockptr(lower
, slot
, bytenr
);
2178 WARN_ON(trans
->transid
== 0);
2179 btrfs_set_node_ptr_generation(lower
, slot
, trans
->transid
);
2180 btrfs_set_header_nritems(lower
, nritems
+ 1);
2181 btrfs_mark_buffer_dirty(lower
);
2186 * split the node at the specified level in path in two.
2187 * The path is corrected to point to the appropriate node after the split
2189 * Before splitting this tries to make some room in the node by pushing
2190 * left and right, if either one works, it returns right away.
2192 * returns 0 on success and < 0 on failure
2194 static noinline
int split_node(struct btrfs_trans_handle
*trans
,
2195 struct btrfs_root
*root
,
2196 struct btrfs_path
*path
, int level
)
2198 struct extent_buffer
*c
;
2199 struct extent_buffer
*split
;
2200 struct btrfs_disk_key disk_key
;
2206 c
= path
->nodes
[level
];
2207 WARN_ON(btrfs_header_generation(c
) != trans
->transid
);
2208 if (c
== root
->node
) {
2209 /* trying to split the root, lets make a new one */
2210 ret
= insert_new_root(trans
, root
, path
, level
+ 1);
2214 ret
= push_nodes_for_insert(trans
, root
, path
, level
);
2215 c
= path
->nodes
[level
];
2216 if (!ret
&& btrfs_header_nritems(c
) <
2217 BTRFS_NODEPTRS_PER_BLOCK(root
) - 3)
2223 c_nritems
= btrfs_header_nritems(c
);
2224 mid
= (c_nritems
+ 1) / 2;
2225 btrfs_node_key(c
, &disk_key
, mid
);
2227 split
= btrfs_alloc_free_block(trans
, root
, root
->nodesize
, 0,
2228 root
->root_key
.objectid
,
2229 &disk_key
, level
, c
->start
, 0);
2231 return PTR_ERR(split
);
2233 root_add_used(root
, root
->nodesize
);
2235 memset_extent_buffer(split
, 0, 0, sizeof(struct btrfs_header
));
2236 btrfs_set_header_level(split
, btrfs_header_level(c
));
2237 btrfs_set_header_bytenr(split
, split
->start
);
2238 btrfs_set_header_generation(split
, trans
->transid
);
2239 btrfs_set_header_backref_rev(split
, BTRFS_MIXED_BACKREF_REV
);
2240 btrfs_set_header_owner(split
, root
->root_key
.objectid
);
2241 write_extent_buffer(split
, root
->fs_info
->fsid
,
2242 (unsigned long)btrfs_header_fsid(split
),
2244 write_extent_buffer(split
, root
->fs_info
->chunk_tree_uuid
,
2245 (unsigned long)btrfs_header_chunk_tree_uuid(split
),
2249 copy_extent_buffer(split
, c
,
2250 btrfs_node_key_ptr_offset(0),
2251 btrfs_node_key_ptr_offset(mid
),
2252 (c_nritems
- mid
) * sizeof(struct btrfs_key_ptr
));
2253 btrfs_set_header_nritems(split
, c_nritems
- mid
);
2254 btrfs_set_header_nritems(c
, mid
);
2257 btrfs_mark_buffer_dirty(c
);
2258 btrfs_mark_buffer_dirty(split
);
2260 wret
= insert_ptr(trans
, root
, path
, &disk_key
, split
->start
,
2261 path
->slots
[level
+ 1] + 1,
2266 if (path
->slots
[level
] >= mid
) {
2267 path
->slots
[level
] -= mid
;
2268 btrfs_tree_unlock(c
);
2269 free_extent_buffer(c
);
2270 path
->nodes
[level
] = split
;
2271 path
->slots
[level
+ 1] += 1;
2273 btrfs_tree_unlock(split
);
2274 free_extent_buffer(split
);
2280 * how many bytes are required to store the items in a leaf. start
2281 * and nr indicate which items in the leaf to check. This totals up the
2282 * space used both by the item structs and the item data
2284 static int leaf_space_used(struct extent_buffer
*l
, int start
, int nr
)
2287 int nritems
= btrfs_header_nritems(l
);
2288 int end
= min(nritems
, start
+ nr
) - 1;
2292 data_len
= btrfs_item_end_nr(l
, start
);
2293 data_len
= data_len
- btrfs_item_offset_nr(l
, end
);
2294 data_len
+= sizeof(struct btrfs_item
) * nr
;
2295 WARN_ON(data_len
< 0);
2300 * The space between the end of the leaf items and
2301 * the start of the leaf data. IOW, how much room
2302 * the leaf has left for both items and data
2304 noinline
int btrfs_leaf_free_space(struct btrfs_root
*root
,
2305 struct extent_buffer
*leaf
)
2307 int nritems
= btrfs_header_nritems(leaf
);
2309 ret
= BTRFS_LEAF_DATA_SIZE(root
) - leaf_space_used(leaf
, 0, nritems
);
2311 printk(KERN_CRIT
"leaf free space ret %d, leaf data size %lu, "
2312 "used %d nritems %d\n",
2313 ret
, (unsigned long) BTRFS_LEAF_DATA_SIZE(root
),
2314 leaf_space_used(leaf
, 0, nritems
), nritems
);
2320 * min slot controls the lowest index we're willing to push to the
2321 * right. We'll push up to and including min_slot, but no lower
2323 static noinline
int __push_leaf_right(struct btrfs_trans_handle
*trans
,
2324 struct btrfs_root
*root
,
2325 struct btrfs_path
*path
,
2326 int data_size
, int empty
,
2327 struct extent_buffer
*right
,
2328 int free_space
, u32 left_nritems
,
2331 struct extent_buffer
*left
= path
->nodes
[0];
2332 struct extent_buffer
*upper
= path
->nodes
[1];
2333 struct btrfs_disk_key disk_key
;
2338 struct btrfs_item
*item
;
2347 nr
= max_t(u32
, 1, min_slot
);
2349 if (path
->slots
[0] >= left_nritems
)
2350 push_space
+= data_size
;
2352 slot
= path
->slots
[1];
2353 i
= left_nritems
- 1;
2355 item
= btrfs_item_nr(left
, i
);
2357 if (!empty
&& push_items
> 0) {
2358 if (path
->slots
[0] > i
)
2360 if (path
->slots
[0] == i
) {
2361 int space
= btrfs_leaf_free_space(root
, left
);
2362 if (space
+ push_space
* 2 > free_space
)
2367 if (path
->slots
[0] == i
)
2368 push_space
+= data_size
;
2370 if (!left
->map_token
) {
2371 map_extent_buffer(left
, (unsigned long)item
,
2372 sizeof(struct btrfs_item
),
2373 &left
->map_token
, &left
->kaddr
,
2374 &left
->map_start
, &left
->map_len
,
2378 this_item_size
= btrfs_item_size(left
, item
);
2379 if (this_item_size
+ sizeof(*item
) + push_space
> free_space
)
2383 push_space
+= this_item_size
+ sizeof(*item
);
2388 if (left
->map_token
) {
2389 unmap_extent_buffer(left
, left
->map_token
, KM_USER1
);
2390 left
->map_token
= NULL
;
2393 if (push_items
== 0)
2396 if (!empty
&& push_items
== left_nritems
)
2399 /* push left to right */
2400 right_nritems
= btrfs_header_nritems(right
);
2402 push_space
= btrfs_item_end_nr(left
, left_nritems
- push_items
);
2403 push_space
-= leaf_data_end(root
, left
);
2405 /* make room in the right data area */
2406 data_end
= leaf_data_end(root
, right
);
2407 memmove_extent_buffer(right
,
2408 btrfs_leaf_data(right
) + data_end
- push_space
,
2409 btrfs_leaf_data(right
) + data_end
,
2410 BTRFS_LEAF_DATA_SIZE(root
) - data_end
);
2412 /* copy from the left data area */
2413 copy_extent_buffer(right
, left
, btrfs_leaf_data(right
) +
2414 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
2415 btrfs_leaf_data(left
) + leaf_data_end(root
, left
),
2418 memmove_extent_buffer(right
, btrfs_item_nr_offset(push_items
),
2419 btrfs_item_nr_offset(0),
2420 right_nritems
* sizeof(struct btrfs_item
));
2422 /* copy the items from left to right */
2423 copy_extent_buffer(right
, left
, btrfs_item_nr_offset(0),
2424 btrfs_item_nr_offset(left_nritems
- push_items
),
2425 push_items
* sizeof(struct btrfs_item
));
2427 /* update the item pointers */
2428 right_nritems
+= push_items
;
2429 btrfs_set_header_nritems(right
, right_nritems
);
2430 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
2431 for (i
= 0; i
< right_nritems
; i
++) {
2432 item
= btrfs_item_nr(right
, i
);
2433 if (!right
->map_token
) {
2434 map_extent_buffer(right
, (unsigned long)item
,
2435 sizeof(struct btrfs_item
),
2436 &right
->map_token
, &right
->kaddr
,
2437 &right
->map_start
, &right
->map_len
,
2440 push_space
-= btrfs_item_size(right
, item
);
2441 btrfs_set_item_offset(right
, item
, push_space
);
2444 if (right
->map_token
) {
2445 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2446 right
->map_token
= NULL
;
2448 left_nritems
-= push_items
;
2449 btrfs_set_header_nritems(left
, left_nritems
);
2452 btrfs_mark_buffer_dirty(left
);
2454 clean_tree_block(trans
, root
, left
);
2456 btrfs_mark_buffer_dirty(right
);
2458 btrfs_item_key(right
, &disk_key
, 0);
2459 btrfs_set_node_key(upper
, &disk_key
, slot
+ 1);
2460 btrfs_mark_buffer_dirty(upper
);
2462 /* then fixup the leaf pointer in the path */
2463 if (path
->slots
[0] >= left_nritems
) {
2464 path
->slots
[0] -= left_nritems
;
2465 if (btrfs_header_nritems(path
->nodes
[0]) == 0)
2466 clean_tree_block(trans
, root
, path
->nodes
[0]);
2467 btrfs_tree_unlock(path
->nodes
[0]);
2468 free_extent_buffer(path
->nodes
[0]);
2469 path
->nodes
[0] = right
;
2470 path
->slots
[1] += 1;
2472 btrfs_tree_unlock(right
);
2473 free_extent_buffer(right
);
2478 btrfs_tree_unlock(right
);
2479 free_extent_buffer(right
);
2484 * push some data in the path leaf to the right, trying to free up at
2485 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2487 * returns 1 if the push failed because the other node didn't have enough
2488 * room, 0 if everything worked out and < 0 if there were major errors.
2490 * this will push starting from min_slot to the end of the leaf. It won't
2491 * push any slot lower than min_slot
2493 static int push_leaf_right(struct btrfs_trans_handle
*trans
, struct btrfs_root
2494 *root
, struct btrfs_path
*path
,
2495 int min_data_size
, int data_size
,
2496 int empty
, u32 min_slot
)
2498 struct extent_buffer
*left
= path
->nodes
[0];
2499 struct extent_buffer
*right
;
2500 struct extent_buffer
*upper
;
2506 if (!path
->nodes
[1])
2509 slot
= path
->slots
[1];
2510 upper
= path
->nodes
[1];
2511 if (slot
>= btrfs_header_nritems(upper
) - 1)
2514 btrfs_assert_tree_locked(path
->nodes
[1]);
2516 right
= read_node_slot(root
, upper
, slot
+ 1);
2517 btrfs_tree_lock(right
);
2518 btrfs_set_lock_blocking(right
);
2520 free_space
= btrfs_leaf_free_space(root
, right
);
2521 if (free_space
< data_size
)
2524 /* cow and double check */
2525 ret
= btrfs_cow_block(trans
, root
, right
, upper
,
2530 free_space
= btrfs_leaf_free_space(root
, right
);
2531 if (free_space
< data_size
)
2534 left_nritems
= btrfs_header_nritems(left
);
2535 if (left_nritems
== 0)
2538 return __push_leaf_right(trans
, root
, path
, min_data_size
, empty
,
2539 right
, free_space
, left_nritems
, min_slot
);
2541 btrfs_tree_unlock(right
);
2542 free_extent_buffer(right
);
2547 * push some data in the path leaf to the left, trying to free up at
2548 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2550 * max_slot can put a limit on how far into the leaf we'll push items. The
2551 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2554 static noinline
int __push_leaf_left(struct btrfs_trans_handle
*trans
,
2555 struct btrfs_root
*root
,
2556 struct btrfs_path
*path
, int data_size
,
2557 int empty
, struct extent_buffer
*left
,
2558 int free_space
, u32 right_nritems
,
2561 struct btrfs_disk_key disk_key
;
2562 struct extent_buffer
*right
= path
->nodes
[0];
2566 struct btrfs_item
*item
;
2567 u32 old_left_nritems
;
2572 u32 old_left_item_size
;
2575 nr
= min(right_nritems
, max_slot
);
2577 nr
= min(right_nritems
- 1, max_slot
);
2579 for (i
= 0; i
< nr
; i
++) {
2580 item
= btrfs_item_nr(right
, i
);
2581 if (!right
->map_token
) {
2582 map_extent_buffer(right
, (unsigned long)item
,
2583 sizeof(struct btrfs_item
),
2584 &right
->map_token
, &right
->kaddr
,
2585 &right
->map_start
, &right
->map_len
,
2589 if (!empty
&& push_items
> 0) {
2590 if (path
->slots
[0] < i
)
2592 if (path
->slots
[0] == i
) {
2593 int space
= btrfs_leaf_free_space(root
, right
);
2594 if (space
+ push_space
* 2 > free_space
)
2599 if (path
->slots
[0] == i
)
2600 push_space
+= data_size
;
2602 this_item_size
= btrfs_item_size(right
, item
);
2603 if (this_item_size
+ sizeof(*item
) + push_space
> free_space
)
2607 push_space
+= this_item_size
+ sizeof(*item
);
2610 if (right
->map_token
) {
2611 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2612 right
->map_token
= NULL
;
2615 if (push_items
== 0) {
2619 if (!empty
&& push_items
== btrfs_header_nritems(right
))
2622 /* push data from right to left */
2623 copy_extent_buffer(left
, right
,
2624 btrfs_item_nr_offset(btrfs_header_nritems(left
)),
2625 btrfs_item_nr_offset(0),
2626 push_items
* sizeof(struct btrfs_item
));
2628 push_space
= BTRFS_LEAF_DATA_SIZE(root
) -
2629 btrfs_item_offset_nr(right
, push_items
- 1);
2631 copy_extent_buffer(left
, right
, btrfs_leaf_data(left
) +
2632 leaf_data_end(root
, left
) - push_space
,
2633 btrfs_leaf_data(right
) +
2634 btrfs_item_offset_nr(right
, push_items
- 1),
2636 old_left_nritems
= btrfs_header_nritems(left
);
2637 BUG_ON(old_left_nritems
<= 0);
2639 old_left_item_size
= btrfs_item_offset_nr(left
, old_left_nritems
- 1);
2640 for (i
= old_left_nritems
; i
< old_left_nritems
+ push_items
; i
++) {
2643 item
= btrfs_item_nr(left
, i
);
2644 if (!left
->map_token
) {
2645 map_extent_buffer(left
, (unsigned long)item
,
2646 sizeof(struct btrfs_item
),
2647 &left
->map_token
, &left
->kaddr
,
2648 &left
->map_start
, &left
->map_len
,
2652 ioff
= btrfs_item_offset(left
, item
);
2653 btrfs_set_item_offset(left
, item
,
2654 ioff
- (BTRFS_LEAF_DATA_SIZE(root
) - old_left_item_size
));
2656 btrfs_set_header_nritems(left
, old_left_nritems
+ push_items
);
2657 if (left
->map_token
) {
2658 unmap_extent_buffer(left
, left
->map_token
, KM_USER1
);
2659 left
->map_token
= NULL
;
2662 /* fixup right node */
2663 if (push_items
> right_nritems
) {
2664 printk(KERN_CRIT
"push items %d nr %u\n", push_items
,
2669 if (push_items
< right_nritems
) {
2670 push_space
= btrfs_item_offset_nr(right
, push_items
- 1) -
2671 leaf_data_end(root
, right
);
2672 memmove_extent_buffer(right
, btrfs_leaf_data(right
) +
2673 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
2674 btrfs_leaf_data(right
) +
2675 leaf_data_end(root
, right
), push_space
);
2677 memmove_extent_buffer(right
, btrfs_item_nr_offset(0),
2678 btrfs_item_nr_offset(push_items
),
2679 (btrfs_header_nritems(right
) - push_items
) *
2680 sizeof(struct btrfs_item
));
2682 right_nritems
-= push_items
;
2683 btrfs_set_header_nritems(right
, right_nritems
);
2684 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
2685 for (i
= 0; i
< right_nritems
; i
++) {
2686 item
= btrfs_item_nr(right
, i
);
2688 if (!right
->map_token
) {
2689 map_extent_buffer(right
, (unsigned long)item
,
2690 sizeof(struct btrfs_item
),
2691 &right
->map_token
, &right
->kaddr
,
2692 &right
->map_start
, &right
->map_len
,
2696 push_space
= push_space
- btrfs_item_size(right
, item
);
2697 btrfs_set_item_offset(right
, item
, push_space
);
2699 if (right
->map_token
) {
2700 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2701 right
->map_token
= NULL
;
2704 btrfs_mark_buffer_dirty(left
);
2706 btrfs_mark_buffer_dirty(right
);
2708 clean_tree_block(trans
, root
, right
);
2710 btrfs_item_key(right
, &disk_key
, 0);
2711 wret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
2715 /* then fixup the leaf pointer in the path */
2716 if (path
->slots
[0] < push_items
) {
2717 path
->slots
[0] += old_left_nritems
;
2718 btrfs_tree_unlock(path
->nodes
[0]);
2719 free_extent_buffer(path
->nodes
[0]);
2720 path
->nodes
[0] = left
;
2721 path
->slots
[1] -= 1;
2723 btrfs_tree_unlock(left
);
2724 free_extent_buffer(left
);
2725 path
->slots
[0] -= push_items
;
2727 BUG_ON(path
->slots
[0] < 0);
2730 btrfs_tree_unlock(left
);
2731 free_extent_buffer(left
);
2736 * push some data in the path leaf to the left, trying to free up at
2737 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2739 * max_slot can put a limit on how far into the leaf we'll push items. The
2740 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2743 static int push_leaf_left(struct btrfs_trans_handle
*trans
, struct btrfs_root
2744 *root
, struct btrfs_path
*path
, int min_data_size
,
2745 int data_size
, int empty
, u32 max_slot
)
2747 struct extent_buffer
*right
= path
->nodes
[0];
2748 struct extent_buffer
*left
;
2754 slot
= path
->slots
[1];
2757 if (!path
->nodes
[1])
2760 right_nritems
= btrfs_header_nritems(right
);
2761 if (right_nritems
== 0)
2764 btrfs_assert_tree_locked(path
->nodes
[1]);
2766 left
= read_node_slot(root
, path
->nodes
[1], slot
- 1);
2767 btrfs_tree_lock(left
);
2768 btrfs_set_lock_blocking(left
);
2770 free_space
= btrfs_leaf_free_space(root
, left
);
2771 if (free_space
< data_size
) {
2776 /* cow and double check */
2777 ret
= btrfs_cow_block(trans
, root
, left
,
2778 path
->nodes
[1], slot
- 1, &left
);
2780 /* we hit -ENOSPC, but it isn't fatal here */
2785 free_space
= btrfs_leaf_free_space(root
, left
);
2786 if (free_space
< data_size
) {
2791 return __push_leaf_left(trans
, root
, path
, min_data_size
,
2792 empty
, left
, free_space
, right_nritems
,
2795 btrfs_tree_unlock(left
);
2796 free_extent_buffer(left
);
2801 * split the path's leaf in two, making sure there is at least data_size
2802 * available for the resulting leaf level of the path.
2804 * returns 0 if all went well and < 0 on failure.
2806 static noinline
int copy_for_split(struct btrfs_trans_handle
*trans
,
2807 struct btrfs_root
*root
,
2808 struct btrfs_path
*path
,
2809 struct extent_buffer
*l
,
2810 struct extent_buffer
*right
,
2811 int slot
, int mid
, int nritems
)
2818 struct btrfs_disk_key disk_key
;
2820 nritems
= nritems
- mid
;
2821 btrfs_set_header_nritems(right
, nritems
);
2822 data_copy_size
= btrfs_item_end_nr(l
, mid
) - leaf_data_end(root
, l
);
2824 copy_extent_buffer(right
, l
, btrfs_item_nr_offset(0),
2825 btrfs_item_nr_offset(mid
),
2826 nritems
* sizeof(struct btrfs_item
));
2828 copy_extent_buffer(right
, l
,
2829 btrfs_leaf_data(right
) + BTRFS_LEAF_DATA_SIZE(root
) -
2830 data_copy_size
, btrfs_leaf_data(l
) +
2831 leaf_data_end(root
, l
), data_copy_size
);
2833 rt_data_off
= BTRFS_LEAF_DATA_SIZE(root
) -
2834 btrfs_item_end_nr(l
, mid
);
2836 for (i
= 0; i
< nritems
; i
++) {
2837 struct btrfs_item
*item
= btrfs_item_nr(right
, i
);
2840 if (!right
->map_token
) {
2841 map_extent_buffer(right
, (unsigned long)item
,
2842 sizeof(struct btrfs_item
),
2843 &right
->map_token
, &right
->kaddr
,
2844 &right
->map_start
, &right
->map_len
,
2848 ioff
= btrfs_item_offset(right
, item
);
2849 btrfs_set_item_offset(right
, item
, ioff
+ rt_data_off
);
2852 if (right
->map_token
) {
2853 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2854 right
->map_token
= NULL
;
2857 btrfs_set_header_nritems(l
, mid
);
2859 btrfs_item_key(right
, &disk_key
, 0);
2860 wret
= insert_ptr(trans
, root
, path
, &disk_key
, right
->start
,
2861 path
->slots
[1] + 1, 1);
2865 btrfs_mark_buffer_dirty(right
);
2866 btrfs_mark_buffer_dirty(l
);
2867 BUG_ON(path
->slots
[0] != slot
);
2870 btrfs_tree_unlock(path
->nodes
[0]);
2871 free_extent_buffer(path
->nodes
[0]);
2872 path
->nodes
[0] = right
;
2873 path
->slots
[0] -= mid
;
2874 path
->slots
[1] += 1;
2876 btrfs_tree_unlock(right
);
2877 free_extent_buffer(right
);
2880 BUG_ON(path
->slots
[0] < 0);
2886 * double splits happen when we need to insert a big item in the middle
2887 * of a leaf. A double split can leave us with 3 mostly empty leaves:
2888 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2891 * We avoid this by trying to push the items on either side of our target
2892 * into the adjacent leaves. If all goes well we can avoid the double split
2895 static noinline
int push_for_double_split(struct btrfs_trans_handle
*trans
,
2896 struct btrfs_root
*root
,
2897 struct btrfs_path
*path
,
2905 slot
= path
->slots
[0];
2908 * try to push all the items after our slot into the
2911 ret
= push_leaf_right(trans
, root
, path
, 1, data_size
, 0, slot
);
2918 nritems
= btrfs_header_nritems(path
->nodes
[0]);
2920 * our goal is to get our slot at the start or end of a leaf. If
2921 * we've done so we're done
2923 if (path
->slots
[0] == 0 || path
->slots
[0] == nritems
)
2926 if (btrfs_leaf_free_space(root
, path
->nodes
[0]) >= data_size
)
2929 /* try to push all the items before our slot into the next leaf */
2930 slot
= path
->slots
[0];
2931 ret
= push_leaf_left(trans
, root
, path
, 1, data_size
, 0, slot
);
2944 * split the path's leaf in two, making sure there is at least data_size
2945 * available for the resulting leaf level of the path.
2947 * returns 0 if all went well and < 0 on failure.
2949 static noinline
int split_leaf(struct btrfs_trans_handle
*trans
,
2950 struct btrfs_root
*root
,
2951 struct btrfs_key
*ins_key
,
2952 struct btrfs_path
*path
, int data_size
,
2955 struct btrfs_disk_key disk_key
;
2956 struct extent_buffer
*l
;
2960 struct extent_buffer
*right
;
2964 int num_doubles
= 0;
2965 int tried_avoid_double
= 0;
2968 slot
= path
->slots
[0];
2969 if (extend
&& data_size
+ btrfs_item_size_nr(l
, slot
) +
2970 sizeof(struct btrfs_item
) > BTRFS_LEAF_DATA_SIZE(root
))
2973 /* first try to make some room by pushing left and right */
2975 wret
= push_leaf_right(trans
, root
, path
, data_size
,
2980 wret
= push_leaf_left(trans
, root
, path
, data_size
,
2981 data_size
, 0, (u32
)-1);
2987 /* did the pushes work? */
2988 if (btrfs_leaf_free_space(root
, l
) >= data_size
)
2992 if (!path
->nodes
[1]) {
2993 ret
= insert_new_root(trans
, root
, path
, 1);
3000 slot
= path
->slots
[0];
3001 nritems
= btrfs_header_nritems(l
);
3002 mid
= (nritems
+ 1) / 2;
3006 leaf_space_used(l
, mid
, nritems
- mid
) + data_size
>
3007 BTRFS_LEAF_DATA_SIZE(root
)) {
3008 if (slot
>= nritems
) {
3012 if (mid
!= nritems
&&
3013 leaf_space_used(l
, mid
, nritems
- mid
) +
3014 data_size
> BTRFS_LEAF_DATA_SIZE(root
)) {
3015 if (data_size
&& !tried_avoid_double
)
3016 goto push_for_double
;
3022 if (leaf_space_used(l
, 0, mid
) + data_size
>
3023 BTRFS_LEAF_DATA_SIZE(root
)) {
3024 if (!extend
&& data_size
&& slot
== 0) {
3026 } else if ((extend
|| !data_size
) && slot
== 0) {
3030 if (mid
!= nritems
&&
3031 leaf_space_used(l
, mid
, nritems
- mid
) +
3032 data_size
> BTRFS_LEAF_DATA_SIZE(root
)) {
3033 if (data_size
&& !tried_avoid_double
)
3034 goto push_for_double
;
3042 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
3044 btrfs_item_key(l
, &disk_key
, mid
);
3046 right
= btrfs_alloc_free_block(trans
, root
, root
->leafsize
, 0,
3047 root
->root_key
.objectid
,
3048 &disk_key
, 0, l
->start
, 0);
3050 return PTR_ERR(right
);
3052 root_add_used(root
, root
->leafsize
);
3054 memset_extent_buffer(right
, 0, 0, sizeof(struct btrfs_header
));
3055 btrfs_set_header_bytenr(right
, right
->start
);
3056 btrfs_set_header_generation(right
, trans
->transid
);
3057 btrfs_set_header_backref_rev(right
, BTRFS_MIXED_BACKREF_REV
);
3058 btrfs_set_header_owner(right
, root
->root_key
.objectid
);
3059 btrfs_set_header_level(right
, 0);
3060 write_extent_buffer(right
, root
->fs_info
->fsid
,
3061 (unsigned long)btrfs_header_fsid(right
),
3064 write_extent_buffer(right
, root
->fs_info
->chunk_tree_uuid
,
3065 (unsigned long)btrfs_header_chunk_tree_uuid(right
),
3070 btrfs_set_header_nritems(right
, 0);
3071 wret
= insert_ptr(trans
, root
, path
,
3072 &disk_key
, right
->start
,
3073 path
->slots
[1] + 1, 1);
3077 btrfs_tree_unlock(path
->nodes
[0]);
3078 free_extent_buffer(path
->nodes
[0]);
3079 path
->nodes
[0] = right
;
3081 path
->slots
[1] += 1;
3083 btrfs_set_header_nritems(right
, 0);
3084 wret
= insert_ptr(trans
, root
, path
,
3090 btrfs_tree_unlock(path
->nodes
[0]);
3091 free_extent_buffer(path
->nodes
[0]);
3092 path
->nodes
[0] = right
;
3094 if (path
->slots
[1] == 0) {
3095 wret
= fixup_low_keys(trans
, root
,
3096 path
, &disk_key
, 1);
3101 btrfs_mark_buffer_dirty(right
);
3105 ret
= copy_for_split(trans
, root
, path
, l
, right
, slot
, mid
, nritems
);
3109 BUG_ON(num_doubles
!= 0);
3117 push_for_double_split(trans
, root
, path
, data_size
);
3118 tried_avoid_double
= 1;
3119 if (btrfs_leaf_free_space(root
, path
->nodes
[0]) >= data_size
)
3124 static noinline
int setup_leaf_for_split(struct btrfs_trans_handle
*trans
,
3125 struct btrfs_root
*root
,
3126 struct btrfs_path
*path
, int ins_len
)
3128 struct btrfs_key key
;
3129 struct extent_buffer
*leaf
;
3130 struct btrfs_file_extent_item
*fi
;
3135 leaf
= path
->nodes
[0];
3136 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
3138 BUG_ON(key
.type
!= BTRFS_EXTENT_DATA_KEY
&&
3139 key
.type
!= BTRFS_EXTENT_CSUM_KEY
);
3141 if (btrfs_leaf_free_space(root
, leaf
) >= ins_len
)
3144 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
3145 if (key
.type
== BTRFS_EXTENT_DATA_KEY
) {
3146 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
3147 struct btrfs_file_extent_item
);
3148 extent_len
= btrfs_file_extent_num_bytes(leaf
, fi
);
3150 btrfs_release_path(root
, path
);
3152 path
->keep_locks
= 1;
3153 path
->search_for_split
= 1;
3154 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
3155 path
->search_for_split
= 0;
3160 leaf
= path
->nodes
[0];
3161 /* if our item isn't there or got smaller, return now */
3162 if (ret
> 0 || item_size
!= btrfs_item_size_nr(leaf
, path
->slots
[0]))
3165 /* the leaf has changed, it now has room. return now */
3166 if (btrfs_leaf_free_space(root
, path
->nodes
[0]) >= ins_len
)
3169 if (key
.type
== BTRFS_EXTENT_DATA_KEY
) {
3170 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
3171 struct btrfs_file_extent_item
);
3172 if (extent_len
!= btrfs_file_extent_num_bytes(leaf
, fi
))
3176 btrfs_set_path_blocking(path
);
3177 ret
= split_leaf(trans
, root
, &key
, path
, ins_len
, 1);
3181 path
->keep_locks
= 0;
3182 btrfs_unlock_up_safe(path
, 1);
3185 path
->keep_locks
= 0;
3189 static noinline
int split_item(struct btrfs_trans_handle
*trans
,
3190 struct btrfs_root
*root
,
3191 struct btrfs_path
*path
,
3192 struct btrfs_key
*new_key
,
3193 unsigned long split_offset
)
3195 struct extent_buffer
*leaf
;
3196 struct btrfs_item
*item
;
3197 struct btrfs_item
*new_item
;
3203 struct btrfs_disk_key disk_key
;
3205 leaf
= path
->nodes
[0];
3206 BUG_ON(btrfs_leaf_free_space(root
, leaf
) < sizeof(struct btrfs_item
));
3208 btrfs_set_path_blocking(path
);
3210 item
= btrfs_item_nr(leaf
, path
->slots
[0]);
3211 orig_offset
= btrfs_item_offset(leaf
, item
);
3212 item_size
= btrfs_item_size(leaf
, item
);
3214 buf
= kmalloc(item_size
, GFP_NOFS
);
3218 read_extent_buffer(leaf
, buf
, btrfs_item_ptr_offset(leaf
,
3219 path
->slots
[0]), item_size
);
3221 slot
= path
->slots
[0] + 1;
3222 nritems
= btrfs_header_nritems(leaf
);
3223 if (slot
!= nritems
) {
3224 /* shift the items */
3225 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ 1),
3226 btrfs_item_nr_offset(slot
),
3227 (nritems
- slot
) * sizeof(struct btrfs_item
));
3230 btrfs_cpu_key_to_disk(&disk_key
, new_key
);
3231 btrfs_set_item_key(leaf
, &disk_key
, slot
);
3233 new_item
= btrfs_item_nr(leaf
, slot
);
3235 btrfs_set_item_offset(leaf
, new_item
, orig_offset
);
3236 btrfs_set_item_size(leaf
, new_item
, item_size
- split_offset
);
3238 btrfs_set_item_offset(leaf
, item
,
3239 orig_offset
+ item_size
- split_offset
);
3240 btrfs_set_item_size(leaf
, item
, split_offset
);
3242 btrfs_set_header_nritems(leaf
, nritems
+ 1);
3244 /* write the data for the start of the original item */
3245 write_extent_buffer(leaf
, buf
,
3246 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
3249 /* write the data for the new item */
3250 write_extent_buffer(leaf
, buf
+ split_offset
,
3251 btrfs_item_ptr_offset(leaf
, slot
),
3252 item_size
- split_offset
);
3253 btrfs_mark_buffer_dirty(leaf
);
3255 BUG_ON(btrfs_leaf_free_space(root
, leaf
) < 0);
3261 * This function splits a single item into two items,
3262 * giving 'new_key' to the new item and splitting the
3263 * old one at split_offset (from the start of the item).
3265 * The path may be released by this operation. After
3266 * the split, the path is pointing to the old item. The
3267 * new item is going to be in the same node as the old one.
3269 * Note, the item being split must be smaller enough to live alone on
3270 * a tree block with room for one extra struct btrfs_item
3272 * This allows us to split the item in place, keeping a lock on the
3273 * leaf the entire time.
3275 int btrfs_split_item(struct btrfs_trans_handle
*trans
,
3276 struct btrfs_root
*root
,
3277 struct btrfs_path
*path
,
3278 struct btrfs_key
*new_key
,
3279 unsigned long split_offset
)
3282 ret
= setup_leaf_for_split(trans
, root
, path
,
3283 sizeof(struct btrfs_item
));
3287 ret
= split_item(trans
, root
, path
, new_key
, split_offset
);
3292 * This function duplicate a item, giving 'new_key' to the new item.
3293 * It guarantees both items live in the same tree leaf and the new item
3294 * is contiguous with the original item.
3296 * This allows us to split file extent in place, keeping a lock on the
3297 * leaf the entire time.
3299 int btrfs_duplicate_item(struct btrfs_trans_handle
*trans
,
3300 struct btrfs_root
*root
,
3301 struct btrfs_path
*path
,
3302 struct btrfs_key
*new_key
)
3304 struct extent_buffer
*leaf
;
3308 leaf
= path
->nodes
[0];
3309 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
3310 ret
= setup_leaf_for_split(trans
, root
, path
,
3311 item_size
+ sizeof(struct btrfs_item
));
3316 ret
= setup_items_for_insert(trans
, root
, path
, new_key
, &item_size
,
3317 item_size
, item_size
+
3318 sizeof(struct btrfs_item
), 1);
3321 leaf
= path
->nodes
[0];
3322 memcpy_extent_buffer(leaf
,
3323 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
3324 btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1),
3330 * make the item pointed to by the path smaller. new_size indicates
3331 * how small to make it, and from_end tells us if we just chop bytes
3332 * off the end of the item or if we shift the item to chop bytes off
3335 int btrfs_truncate_item(struct btrfs_trans_handle
*trans
,
3336 struct btrfs_root
*root
,
3337 struct btrfs_path
*path
,
3338 u32 new_size
, int from_end
)
3342 struct extent_buffer
*leaf
;
3343 struct btrfs_item
*item
;
3345 unsigned int data_end
;
3346 unsigned int old_data_start
;
3347 unsigned int old_size
;
3348 unsigned int size_diff
;
3351 leaf
= path
->nodes
[0];
3352 slot
= path
->slots
[0];
3354 old_size
= btrfs_item_size_nr(leaf
, slot
);
3355 if (old_size
== new_size
)
3358 nritems
= btrfs_header_nritems(leaf
);
3359 data_end
= leaf_data_end(root
, leaf
);
3361 old_data_start
= btrfs_item_offset_nr(leaf
, slot
);
3363 size_diff
= old_size
- new_size
;
3366 BUG_ON(slot
>= nritems
);
3369 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3371 /* first correct the data pointers */
3372 for (i
= slot
; i
< nritems
; i
++) {
3374 item
= btrfs_item_nr(leaf
, i
);
3376 if (!leaf
->map_token
) {
3377 map_extent_buffer(leaf
, (unsigned long)item
,
3378 sizeof(struct btrfs_item
),
3379 &leaf
->map_token
, &leaf
->kaddr
,
3380 &leaf
->map_start
, &leaf
->map_len
,
3384 ioff
= btrfs_item_offset(leaf
, item
);
3385 btrfs_set_item_offset(leaf
, item
, ioff
+ size_diff
);
3388 if (leaf
->map_token
) {
3389 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3390 leaf
->map_token
= NULL
;
3393 /* shift the data */
3395 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3396 data_end
+ size_diff
, btrfs_leaf_data(leaf
) +
3397 data_end
, old_data_start
+ new_size
- data_end
);
3399 struct btrfs_disk_key disk_key
;
3402 btrfs_item_key(leaf
, &disk_key
, slot
);
3404 if (btrfs_disk_key_type(&disk_key
) == BTRFS_EXTENT_DATA_KEY
) {
3406 struct btrfs_file_extent_item
*fi
;
3408 fi
= btrfs_item_ptr(leaf
, slot
,
3409 struct btrfs_file_extent_item
);
3410 fi
= (struct btrfs_file_extent_item
*)(
3411 (unsigned long)fi
- size_diff
);
3413 if (btrfs_file_extent_type(leaf
, fi
) ==
3414 BTRFS_FILE_EXTENT_INLINE
) {
3415 ptr
= btrfs_item_ptr_offset(leaf
, slot
);
3416 memmove_extent_buffer(leaf
, ptr
,
3418 offsetof(struct btrfs_file_extent_item
,
3423 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3424 data_end
+ size_diff
, btrfs_leaf_data(leaf
) +
3425 data_end
, old_data_start
- data_end
);
3427 offset
= btrfs_disk_key_offset(&disk_key
);
3428 btrfs_set_disk_key_offset(&disk_key
, offset
+ size_diff
);
3429 btrfs_set_item_key(leaf
, &disk_key
, slot
);
3431 fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3434 item
= btrfs_item_nr(leaf
, slot
);
3435 btrfs_set_item_size(leaf
, item
, new_size
);
3436 btrfs_mark_buffer_dirty(leaf
);
3439 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3440 btrfs_print_leaf(root
, leaf
);
3447 * make the item pointed to by the path bigger, data_size is the new size.
3449 int btrfs_extend_item(struct btrfs_trans_handle
*trans
,
3450 struct btrfs_root
*root
, struct btrfs_path
*path
,
3455 struct extent_buffer
*leaf
;
3456 struct btrfs_item
*item
;
3458 unsigned int data_end
;
3459 unsigned int old_data
;
3460 unsigned int old_size
;
3463 leaf
= path
->nodes
[0];
3465 nritems
= btrfs_header_nritems(leaf
);
3466 data_end
= leaf_data_end(root
, leaf
);
3468 if (btrfs_leaf_free_space(root
, leaf
) < data_size
) {
3469 btrfs_print_leaf(root
, leaf
);
3472 slot
= path
->slots
[0];
3473 old_data
= btrfs_item_end_nr(leaf
, slot
);
3476 if (slot
>= nritems
) {
3477 btrfs_print_leaf(root
, leaf
);
3478 printk(KERN_CRIT
"slot %d too large, nritems %d\n",
3484 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3486 /* first correct the data pointers */
3487 for (i
= slot
; i
< nritems
; i
++) {
3489 item
= btrfs_item_nr(leaf
, i
);
3491 if (!leaf
->map_token
) {
3492 map_extent_buffer(leaf
, (unsigned long)item
,
3493 sizeof(struct btrfs_item
),
3494 &leaf
->map_token
, &leaf
->kaddr
,
3495 &leaf
->map_start
, &leaf
->map_len
,
3498 ioff
= btrfs_item_offset(leaf
, item
);
3499 btrfs_set_item_offset(leaf
, item
, ioff
- data_size
);
3502 if (leaf
->map_token
) {
3503 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3504 leaf
->map_token
= NULL
;
3507 /* shift the data */
3508 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3509 data_end
- data_size
, btrfs_leaf_data(leaf
) +
3510 data_end
, old_data
- data_end
);
3512 data_end
= old_data
;
3513 old_size
= btrfs_item_size_nr(leaf
, slot
);
3514 item
= btrfs_item_nr(leaf
, slot
);
3515 btrfs_set_item_size(leaf
, item
, old_size
+ data_size
);
3516 btrfs_mark_buffer_dirty(leaf
);
3519 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3520 btrfs_print_leaf(root
, leaf
);
3527 * Given a key and some data, insert items into the tree.
3528 * This does all the path init required, making room in the tree if needed.
3529 * Returns the number of keys that were inserted.
3531 int btrfs_insert_some_items(struct btrfs_trans_handle
*trans
,
3532 struct btrfs_root
*root
,
3533 struct btrfs_path
*path
,
3534 struct btrfs_key
*cpu_key
, u32
*data_size
,
3537 struct extent_buffer
*leaf
;
3538 struct btrfs_item
*item
;
3545 unsigned int data_end
;
3546 struct btrfs_disk_key disk_key
;
3547 struct btrfs_key found_key
;
3549 for (i
= 0; i
< nr
; i
++) {
3550 if (total_size
+ data_size
[i
] + sizeof(struct btrfs_item
) >
3551 BTRFS_LEAF_DATA_SIZE(root
)) {
3555 total_data
+= data_size
[i
];
3556 total_size
+= data_size
[i
] + sizeof(struct btrfs_item
);
3560 ret
= btrfs_search_slot(trans
, root
, cpu_key
, path
, total_size
, 1);
3566 leaf
= path
->nodes
[0];
3568 nritems
= btrfs_header_nritems(leaf
);
3569 data_end
= leaf_data_end(root
, leaf
);
3571 if (btrfs_leaf_free_space(root
, leaf
) < total_size
) {
3572 for (i
= nr
; i
>= 0; i
--) {
3573 total_data
-= data_size
[i
];
3574 total_size
-= data_size
[i
] + sizeof(struct btrfs_item
);
3575 if (total_size
< btrfs_leaf_free_space(root
, leaf
))
3581 slot
= path
->slots
[0];
3584 if (slot
!= nritems
) {
3585 unsigned int old_data
= btrfs_item_end_nr(leaf
, slot
);
3587 item
= btrfs_item_nr(leaf
, slot
);
3588 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
3590 /* figure out how many keys we can insert in here */
3591 total_data
= data_size
[0];
3592 for (i
= 1; i
< nr
; i
++) {
3593 if (btrfs_comp_cpu_keys(&found_key
, cpu_key
+ i
) <= 0)
3595 total_data
+= data_size
[i
];
3599 if (old_data
< data_end
) {
3600 btrfs_print_leaf(root
, leaf
);
3601 printk(KERN_CRIT
"slot %d old_data %d data_end %d\n",
3602 slot
, old_data
, data_end
);
3606 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3608 /* first correct the data pointers */
3609 WARN_ON(leaf
->map_token
);
3610 for (i
= slot
; i
< nritems
; i
++) {
3613 item
= btrfs_item_nr(leaf
, i
);
3614 if (!leaf
->map_token
) {
3615 map_extent_buffer(leaf
, (unsigned long)item
,
3616 sizeof(struct btrfs_item
),
3617 &leaf
->map_token
, &leaf
->kaddr
,
3618 &leaf
->map_start
, &leaf
->map_len
,
3622 ioff
= btrfs_item_offset(leaf
, item
);
3623 btrfs_set_item_offset(leaf
, item
, ioff
- total_data
);
3625 if (leaf
->map_token
) {
3626 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3627 leaf
->map_token
= NULL
;
3630 /* shift the items */
3631 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ nr
),
3632 btrfs_item_nr_offset(slot
),
3633 (nritems
- slot
) * sizeof(struct btrfs_item
));
3635 /* shift the data */
3636 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3637 data_end
- total_data
, btrfs_leaf_data(leaf
) +
3638 data_end
, old_data
- data_end
);
3639 data_end
= old_data
;
3642 * this sucks but it has to be done, if we are inserting at
3643 * the end of the leaf only insert 1 of the items, since we
3644 * have no way of knowing whats on the next leaf and we'd have
3645 * to drop our current locks to figure it out
3650 /* setup the item for the new data */
3651 for (i
= 0; i
< nr
; i
++) {
3652 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
+ i
);
3653 btrfs_set_item_key(leaf
, &disk_key
, slot
+ i
);
3654 item
= btrfs_item_nr(leaf
, slot
+ i
);
3655 btrfs_set_item_offset(leaf
, item
, data_end
- data_size
[i
]);
3656 data_end
-= data_size
[i
];
3657 btrfs_set_item_size(leaf
, item
, data_size
[i
]);
3659 btrfs_set_header_nritems(leaf
, nritems
+ nr
);
3660 btrfs_mark_buffer_dirty(leaf
);
3664 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
3665 ret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3668 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3669 btrfs_print_leaf(root
, leaf
);
3679 * this is a helper for btrfs_insert_empty_items, the main goal here is
3680 * to save stack depth by doing the bulk of the work in a function
3681 * that doesn't call btrfs_search_slot
3683 static noinline_for_stack
int
3684 setup_items_for_insert(struct btrfs_trans_handle
*trans
,
3685 struct btrfs_root
*root
, struct btrfs_path
*path
,
3686 struct btrfs_key
*cpu_key
, u32
*data_size
,
3687 u32 total_data
, u32 total_size
, int nr
)
3689 struct btrfs_item
*item
;
3692 unsigned int data_end
;
3693 struct btrfs_disk_key disk_key
;
3695 struct extent_buffer
*leaf
;
3698 leaf
= path
->nodes
[0];
3699 slot
= path
->slots
[0];
3701 nritems
= btrfs_header_nritems(leaf
);
3702 data_end
= leaf_data_end(root
, leaf
);
3704 if (btrfs_leaf_free_space(root
, leaf
) < total_size
) {
3705 btrfs_print_leaf(root
, leaf
);
3706 printk(KERN_CRIT
"not enough freespace need %u have %d\n",
3707 total_size
, btrfs_leaf_free_space(root
, leaf
));
3711 if (slot
!= nritems
) {
3712 unsigned int old_data
= btrfs_item_end_nr(leaf
, slot
);
3714 if (old_data
< data_end
) {
3715 btrfs_print_leaf(root
, leaf
);
3716 printk(KERN_CRIT
"slot %d old_data %d data_end %d\n",
3717 slot
, old_data
, data_end
);
3721 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3723 /* first correct the data pointers */
3724 WARN_ON(leaf
->map_token
);
3725 for (i
= slot
; i
< nritems
; i
++) {
3728 item
= btrfs_item_nr(leaf
, i
);
3729 if (!leaf
->map_token
) {
3730 map_extent_buffer(leaf
, (unsigned long)item
,
3731 sizeof(struct btrfs_item
),
3732 &leaf
->map_token
, &leaf
->kaddr
,
3733 &leaf
->map_start
, &leaf
->map_len
,
3737 ioff
= btrfs_item_offset(leaf
, item
);
3738 btrfs_set_item_offset(leaf
, item
, ioff
- total_data
);
3740 if (leaf
->map_token
) {
3741 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3742 leaf
->map_token
= NULL
;
3745 /* shift the items */
3746 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ nr
),
3747 btrfs_item_nr_offset(slot
),
3748 (nritems
- slot
) * sizeof(struct btrfs_item
));
3750 /* shift the data */
3751 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3752 data_end
- total_data
, btrfs_leaf_data(leaf
) +
3753 data_end
, old_data
- data_end
);
3754 data_end
= old_data
;
3757 /* setup the item for the new data */
3758 for (i
= 0; i
< nr
; i
++) {
3759 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
+ i
);
3760 btrfs_set_item_key(leaf
, &disk_key
, slot
+ i
);
3761 item
= btrfs_item_nr(leaf
, slot
+ i
);
3762 btrfs_set_item_offset(leaf
, item
, data_end
- data_size
[i
]);
3763 data_end
-= data_size
[i
];
3764 btrfs_set_item_size(leaf
, item
, data_size
[i
]);
3767 btrfs_set_header_nritems(leaf
, nritems
+ nr
);
3771 struct btrfs_disk_key disk_key
;
3772 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
3773 ret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3775 btrfs_unlock_up_safe(path
, 1);
3776 btrfs_mark_buffer_dirty(leaf
);
3778 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3779 btrfs_print_leaf(root
, leaf
);
3786 * Given a key and some data, insert items into the tree.
3787 * This does all the path init required, making room in the tree if needed.
3789 int btrfs_insert_empty_items(struct btrfs_trans_handle
*trans
,
3790 struct btrfs_root
*root
,
3791 struct btrfs_path
*path
,
3792 struct btrfs_key
*cpu_key
, u32
*data_size
,
3801 for (i
= 0; i
< nr
; i
++)
3802 total_data
+= data_size
[i
];
3804 total_size
= total_data
+ (nr
* sizeof(struct btrfs_item
));
3805 ret
= btrfs_search_slot(trans
, root
, cpu_key
, path
, total_size
, 1);
3811 slot
= path
->slots
[0];
3814 ret
= setup_items_for_insert(trans
, root
, path
, cpu_key
, data_size
,
3815 total_data
, total_size
, nr
);
3822 * Given a key and some data, insert an item into the tree.
3823 * This does all the path init required, making room in the tree if needed.
3825 int btrfs_insert_item(struct btrfs_trans_handle
*trans
, struct btrfs_root
3826 *root
, struct btrfs_key
*cpu_key
, void *data
, u32
3830 struct btrfs_path
*path
;
3831 struct extent_buffer
*leaf
;
3834 path
= btrfs_alloc_path();
3836 ret
= btrfs_insert_empty_item(trans
, root
, path
, cpu_key
, data_size
);
3838 leaf
= path
->nodes
[0];
3839 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
3840 write_extent_buffer(leaf
, data
, ptr
, data_size
);
3841 btrfs_mark_buffer_dirty(leaf
);
3843 btrfs_free_path(path
);
3848 * delete the pointer from a given node.
3850 * the tree should have been previously balanced so the deletion does not
3853 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
3854 struct btrfs_path
*path
, int level
, int slot
)
3856 struct extent_buffer
*parent
= path
->nodes
[level
];
3861 nritems
= btrfs_header_nritems(parent
);
3862 if (slot
!= nritems
- 1) {
3863 memmove_extent_buffer(parent
,
3864 btrfs_node_key_ptr_offset(slot
),
3865 btrfs_node_key_ptr_offset(slot
+ 1),
3866 sizeof(struct btrfs_key_ptr
) *
3867 (nritems
- slot
- 1));
3870 btrfs_set_header_nritems(parent
, nritems
);
3871 if (nritems
== 0 && parent
== root
->node
) {
3872 BUG_ON(btrfs_header_level(root
->node
) != 1);
3873 /* just turn the root into a leaf and break */
3874 btrfs_set_header_level(root
->node
, 0);
3875 } else if (slot
== 0) {
3876 struct btrfs_disk_key disk_key
;
3878 btrfs_node_key(parent
, &disk_key
, 0);
3879 wret
= fixup_low_keys(trans
, root
, path
, &disk_key
, level
+ 1);
3883 btrfs_mark_buffer_dirty(parent
);
3888 * a helper function to delete the leaf pointed to by path->slots[1] and
3891 * This deletes the pointer in path->nodes[1] and frees the leaf
3892 * block extent. zero is returned if it all worked out, < 0 otherwise.
3894 * The path must have already been setup for deleting the leaf, including
3895 * all the proper balancing. path->nodes[1] must be locked.
3897 static noinline
int btrfs_del_leaf(struct btrfs_trans_handle
*trans
,
3898 struct btrfs_root
*root
,
3899 struct btrfs_path
*path
,
3900 struct extent_buffer
*leaf
)
3904 WARN_ON(btrfs_header_generation(leaf
) != trans
->transid
);
3905 ret
= del_ptr(trans
, root
, path
, 1, path
->slots
[1]);
3910 * btrfs_free_extent is expensive, we want to make sure we
3911 * aren't holding any locks when we call it
3913 btrfs_unlock_up_safe(path
, 0);
3915 root_sub_used(root
, leaf
->len
);
3917 btrfs_free_tree_block(trans
, root
, leaf
, 0, 1);
3921 * delete the item at the leaf level in path. If that empties
3922 * the leaf, remove it from the tree
3924 int btrfs_del_items(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
3925 struct btrfs_path
*path
, int slot
, int nr
)
3927 struct extent_buffer
*leaf
;
3928 struct btrfs_item
*item
;
3936 leaf
= path
->nodes
[0];
3937 last_off
= btrfs_item_offset_nr(leaf
, slot
+ nr
- 1);
3939 for (i
= 0; i
< nr
; i
++)
3940 dsize
+= btrfs_item_size_nr(leaf
, slot
+ i
);
3942 nritems
= btrfs_header_nritems(leaf
);
3944 if (slot
+ nr
!= nritems
) {
3945 int data_end
= leaf_data_end(root
, leaf
);
3947 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3949 btrfs_leaf_data(leaf
) + data_end
,
3950 last_off
- data_end
);
3952 for (i
= slot
+ nr
; i
< nritems
; i
++) {
3955 item
= btrfs_item_nr(leaf
, i
);
3956 if (!leaf
->map_token
) {
3957 map_extent_buffer(leaf
, (unsigned long)item
,
3958 sizeof(struct btrfs_item
),
3959 &leaf
->map_token
, &leaf
->kaddr
,
3960 &leaf
->map_start
, &leaf
->map_len
,
3963 ioff
= btrfs_item_offset(leaf
, item
);
3964 btrfs_set_item_offset(leaf
, item
, ioff
+ dsize
);
3967 if (leaf
->map_token
) {
3968 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3969 leaf
->map_token
= NULL
;
3972 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
),
3973 btrfs_item_nr_offset(slot
+ nr
),
3974 sizeof(struct btrfs_item
) *
3975 (nritems
- slot
- nr
));
3977 btrfs_set_header_nritems(leaf
, nritems
- nr
);
3980 /* delete the leaf if we've emptied it */
3982 if (leaf
== root
->node
) {
3983 btrfs_set_header_level(leaf
, 0);
3985 btrfs_set_path_blocking(path
);
3986 clean_tree_block(trans
, root
, leaf
);
3987 ret
= btrfs_del_leaf(trans
, root
, path
, leaf
);
3991 int used
= leaf_space_used(leaf
, 0, nritems
);
3993 struct btrfs_disk_key disk_key
;
3995 btrfs_item_key(leaf
, &disk_key
, 0);
3996 wret
= fixup_low_keys(trans
, root
, path
,
4002 /* delete the leaf if it is mostly empty */
4003 if (used
< BTRFS_LEAF_DATA_SIZE(root
) / 3) {
4004 /* push_leaf_left fixes the path.
4005 * make sure the path still points to our leaf
4006 * for possible call to del_ptr below
4008 slot
= path
->slots
[1];
4009 extent_buffer_get(leaf
);
4011 btrfs_set_path_blocking(path
);
4012 wret
= push_leaf_left(trans
, root
, path
, 1, 1,
4014 if (wret
< 0 && wret
!= -ENOSPC
)
4017 if (path
->nodes
[0] == leaf
&&
4018 btrfs_header_nritems(leaf
)) {
4019 wret
= push_leaf_right(trans
, root
, path
, 1,
4021 if (wret
< 0 && wret
!= -ENOSPC
)
4025 if (btrfs_header_nritems(leaf
) == 0) {
4026 path
->slots
[1] = slot
;
4027 ret
= btrfs_del_leaf(trans
, root
, path
, leaf
);
4029 free_extent_buffer(leaf
);
4031 /* if we're still in the path, make sure
4032 * we're dirty. Otherwise, one of the
4033 * push_leaf functions must have already
4034 * dirtied this buffer
4036 if (path
->nodes
[0] == leaf
)
4037 btrfs_mark_buffer_dirty(leaf
);
4038 free_extent_buffer(leaf
);
4041 btrfs_mark_buffer_dirty(leaf
);
4048 * search the tree again to find a leaf with lesser keys
4049 * returns 0 if it found something or 1 if there are no lesser leaves.
4050 * returns < 0 on io errors.
4052 * This may release the path, and so you may lose any locks held at the
4055 int btrfs_prev_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
)
4057 struct btrfs_key key
;
4058 struct btrfs_disk_key found_key
;
4061 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, 0);
4065 else if (key
.type
> 0)
4067 else if (key
.objectid
> 0)
4072 btrfs_release_path(root
, path
);
4073 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
4076 btrfs_item_key(path
->nodes
[0], &found_key
, 0);
4077 ret
= comp_keys(&found_key
, &key
);
4084 * A helper function to walk down the tree starting at min_key, and looking
4085 * for nodes or leaves that are either in cache or have a minimum
4086 * transaction id. This is used by the btree defrag code, and tree logging
4088 * This does not cow, but it does stuff the starting key it finds back
4089 * into min_key, so you can call btrfs_search_slot with cow=1 on the
4090 * key and get a writable path.
4092 * This does lock as it descends, and path->keep_locks should be set
4093 * to 1 by the caller.
4095 * This honors path->lowest_level to prevent descent past a given level
4098 * min_trans indicates the oldest transaction that you are interested
4099 * in walking through. Any nodes or leaves older than min_trans are
4100 * skipped over (without reading them).
4102 * returns zero if something useful was found, < 0 on error and 1 if there
4103 * was nothing in the tree that matched the search criteria.
4105 int btrfs_search_forward(struct btrfs_root
*root
, struct btrfs_key
*min_key
,
4106 struct btrfs_key
*max_key
,
4107 struct btrfs_path
*path
, int cache_only
,
4110 struct extent_buffer
*cur
;
4111 struct btrfs_key found_key
;
4118 WARN_ON(!path
->keep_locks
);
4120 cur
= btrfs_lock_root_node(root
);
4121 level
= btrfs_header_level(cur
);
4122 WARN_ON(path
->nodes
[level
]);
4123 path
->nodes
[level
] = cur
;
4124 path
->locks
[level
] = 1;
4126 if (btrfs_header_generation(cur
) < min_trans
) {
4131 nritems
= btrfs_header_nritems(cur
);
4132 level
= btrfs_header_level(cur
);
4133 sret
= bin_search(cur
, min_key
, level
, &slot
);
4135 /* at the lowest level, we're done, setup the path and exit */
4136 if (level
== path
->lowest_level
) {
4137 if (slot
>= nritems
)
4140 path
->slots
[level
] = slot
;
4141 btrfs_item_key_to_cpu(cur
, &found_key
, slot
);
4144 if (sret
&& slot
> 0)
4147 * check this node pointer against the cache_only and
4148 * min_trans parameters. If it isn't in cache or is too
4149 * old, skip to the next one.
4151 while (slot
< nritems
) {
4154 struct extent_buffer
*tmp
;
4155 struct btrfs_disk_key disk_key
;
4157 blockptr
= btrfs_node_blockptr(cur
, slot
);
4158 gen
= btrfs_node_ptr_generation(cur
, slot
);
4159 if (gen
< min_trans
) {
4167 btrfs_node_key(cur
, &disk_key
, slot
);
4168 if (comp_keys(&disk_key
, max_key
) >= 0) {
4174 tmp
= btrfs_find_tree_block(root
, blockptr
,
4175 btrfs_level_size(root
, level
- 1));
4177 if (tmp
&& btrfs_buffer_uptodate(tmp
, gen
)) {
4178 free_extent_buffer(tmp
);
4182 free_extent_buffer(tmp
);
4187 * we didn't find a candidate key in this node, walk forward
4188 * and find another one
4190 if (slot
>= nritems
) {
4191 path
->slots
[level
] = slot
;
4192 btrfs_set_path_blocking(path
);
4193 sret
= btrfs_find_next_key(root
, path
, min_key
, level
,
4194 cache_only
, min_trans
);
4196 btrfs_release_path(root
, path
);
4202 /* save our key for returning back */
4203 btrfs_node_key_to_cpu(cur
, &found_key
, slot
);
4204 path
->slots
[level
] = slot
;
4205 if (level
== path
->lowest_level
) {
4207 unlock_up(path
, level
, 1);
4210 btrfs_set_path_blocking(path
);
4211 cur
= read_node_slot(root
, cur
, slot
);
4213 btrfs_tree_lock(cur
);
4215 path
->locks
[level
- 1] = 1;
4216 path
->nodes
[level
- 1] = cur
;
4217 unlock_up(path
, level
, 1);
4218 btrfs_clear_path_blocking(path
, NULL
);
4222 memcpy(min_key
, &found_key
, sizeof(found_key
));
4223 btrfs_set_path_blocking(path
);
4228 * this is similar to btrfs_next_leaf, but does not try to preserve
4229 * and fixup the path. It looks for and returns the next key in the
4230 * tree based on the current path and the cache_only and min_trans
4233 * 0 is returned if another key is found, < 0 if there are any errors
4234 * and 1 is returned if there are no higher keys in the tree
4236 * path->keep_locks should be set to 1 on the search made before
4237 * calling this function.
4239 int btrfs_find_next_key(struct btrfs_root
*root
, struct btrfs_path
*path
,
4240 struct btrfs_key
*key
, int level
,
4241 int cache_only
, u64 min_trans
)
4244 struct extent_buffer
*c
;
4246 WARN_ON(!path
->keep_locks
);
4247 while (level
< BTRFS_MAX_LEVEL
) {
4248 if (!path
->nodes
[level
])
4251 slot
= path
->slots
[level
] + 1;
4252 c
= path
->nodes
[level
];
4254 if (slot
>= btrfs_header_nritems(c
)) {
4257 struct btrfs_key cur_key
;
4258 if (level
+ 1 >= BTRFS_MAX_LEVEL
||
4259 !path
->nodes
[level
+ 1])
4262 if (path
->locks
[level
+ 1]) {
4267 slot
= btrfs_header_nritems(c
) - 1;
4269 btrfs_item_key_to_cpu(c
, &cur_key
, slot
);
4271 btrfs_node_key_to_cpu(c
, &cur_key
, slot
);
4273 orig_lowest
= path
->lowest_level
;
4274 btrfs_release_path(root
, path
);
4275 path
->lowest_level
= level
;
4276 ret
= btrfs_search_slot(NULL
, root
, &cur_key
, path
,
4278 path
->lowest_level
= orig_lowest
;
4282 c
= path
->nodes
[level
];
4283 slot
= path
->slots
[level
];
4290 btrfs_item_key_to_cpu(c
, key
, slot
);
4292 u64 blockptr
= btrfs_node_blockptr(c
, slot
);
4293 u64 gen
= btrfs_node_ptr_generation(c
, slot
);
4296 struct extent_buffer
*cur
;
4297 cur
= btrfs_find_tree_block(root
, blockptr
,
4298 btrfs_level_size(root
, level
- 1));
4299 if (!cur
|| !btrfs_buffer_uptodate(cur
, gen
)) {
4302 free_extent_buffer(cur
);
4305 free_extent_buffer(cur
);
4307 if (gen
< min_trans
) {
4311 btrfs_node_key_to_cpu(c
, key
, slot
);
4319 * search the tree again to find a leaf with greater keys
4320 * returns 0 if it found something or 1 if there are no greater leaves.
4321 * returns < 0 on io errors.
4323 int btrfs_next_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
)
4327 struct extent_buffer
*c
;
4328 struct extent_buffer
*next
;
4329 struct btrfs_key key
;
4332 int old_spinning
= path
->leave_spinning
;
4333 int force_blocking
= 0;
4335 nritems
= btrfs_header_nritems(path
->nodes
[0]);
4340 * we take the blocks in an order that upsets lockdep. Using
4341 * blocking mode is the only way around it.
4343 #ifdef CONFIG_DEBUG_LOCK_ALLOC
4347 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, nritems
- 1);
4351 btrfs_release_path(root
, path
);
4353 path
->keep_locks
= 1;
4355 if (!force_blocking
)
4356 path
->leave_spinning
= 1;
4358 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
4359 path
->keep_locks
= 0;
4364 nritems
= btrfs_header_nritems(path
->nodes
[0]);
4366 * by releasing the path above we dropped all our locks. A balance
4367 * could have added more items next to the key that used to be
4368 * at the very end of the block. So, check again here and
4369 * advance the path if there are now more items available.
4371 if (nritems
> 0 && path
->slots
[0] < nritems
- 1) {
4378 while (level
< BTRFS_MAX_LEVEL
) {
4379 if (!path
->nodes
[level
]) {
4384 slot
= path
->slots
[level
] + 1;
4385 c
= path
->nodes
[level
];
4386 if (slot
>= btrfs_header_nritems(c
)) {
4388 if (level
== BTRFS_MAX_LEVEL
) {
4396 btrfs_tree_unlock(next
);
4397 free_extent_buffer(next
);
4401 ret
= read_block_for_search(NULL
, root
, path
, &next
, level
,
4407 btrfs_release_path(root
, path
);
4411 if (!path
->skip_locking
) {
4412 ret
= btrfs_try_spin_lock(next
);
4414 btrfs_set_path_blocking(path
);
4415 btrfs_tree_lock(next
);
4416 if (!force_blocking
)
4417 btrfs_clear_path_blocking(path
, next
);
4420 btrfs_set_lock_blocking(next
);
4424 path
->slots
[level
] = slot
;
4427 c
= path
->nodes
[level
];
4428 if (path
->locks
[level
])
4429 btrfs_tree_unlock(c
);
4431 free_extent_buffer(c
);
4432 path
->nodes
[level
] = next
;
4433 path
->slots
[level
] = 0;
4434 if (!path
->skip_locking
)
4435 path
->locks
[level
] = 1;
4440 ret
= read_block_for_search(NULL
, root
, path
, &next
, level
,
4446 btrfs_release_path(root
, path
);
4450 if (!path
->skip_locking
) {
4451 btrfs_assert_tree_locked(path
->nodes
[level
]);
4452 ret
= btrfs_try_spin_lock(next
);
4454 btrfs_set_path_blocking(path
);
4455 btrfs_tree_lock(next
);
4456 if (!force_blocking
)
4457 btrfs_clear_path_blocking(path
, next
);
4460 btrfs_set_lock_blocking(next
);
4465 unlock_up(path
, 0, 1);
4466 path
->leave_spinning
= old_spinning
;
4468 btrfs_set_path_blocking(path
);
4474 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4475 * searching until it gets past min_objectid or finds an item of 'type'
4477 * returns 0 if something is found, 1 if nothing was found and < 0 on error
4479 int btrfs_previous_item(struct btrfs_root
*root
,
4480 struct btrfs_path
*path
, u64 min_objectid
,
4483 struct btrfs_key found_key
;
4484 struct extent_buffer
*leaf
;
4489 if (path
->slots
[0] == 0) {
4490 btrfs_set_path_blocking(path
);
4491 ret
= btrfs_prev_leaf(root
, path
);
4497 leaf
= path
->nodes
[0];
4498 nritems
= btrfs_header_nritems(leaf
);
4501 if (path
->slots
[0] == nritems
)
4504 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4505 if (found_key
.objectid
< min_objectid
)
4507 if (found_key
.type
== type
)
4509 if (found_key
.objectid
== min_objectid
&&
4510 found_key
.type
< type
)