2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
24 #include <linux/kthread.h>
29 #include "print-tree.h"
30 #include "transaction.h"
33 #include "free-space-cache.h"
35 static int update_reserved_extents(struct btrfs_root
*root
,
36 u64 bytenr
, u64 num
, int reserve
);
37 static int update_block_group(struct btrfs_trans_handle
*trans
,
38 struct btrfs_root
*root
,
39 u64 bytenr
, u64 num_bytes
, int alloc
,
41 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
42 struct btrfs_root
*root
,
43 u64 bytenr
, u64 num_bytes
, u64 parent
,
44 u64 root_objectid
, u64 owner_objectid
,
45 u64 owner_offset
, int refs_to_drop
,
46 struct btrfs_delayed_extent_op
*extra_op
);
47 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op
*extent_op
,
48 struct extent_buffer
*leaf
,
49 struct btrfs_extent_item
*ei
);
50 static int alloc_reserved_file_extent(struct btrfs_trans_handle
*trans
,
51 struct btrfs_root
*root
,
52 u64 parent
, u64 root_objectid
,
53 u64 flags
, u64 owner
, u64 offset
,
54 struct btrfs_key
*ins
, int ref_mod
);
55 static int alloc_reserved_tree_block(struct btrfs_trans_handle
*trans
,
56 struct btrfs_root
*root
,
57 u64 parent
, u64 root_objectid
,
58 u64 flags
, struct btrfs_disk_key
*key
,
59 int level
, struct btrfs_key
*ins
);
61 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
62 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
63 u64 flags
, int force
);
66 block_group_cache_done(struct btrfs_block_group_cache
*cache
)
69 return cache
->cached
== BTRFS_CACHE_FINISHED
;
72 static int block_group_bits(struct btrfs_block_group_cache
*cache
, u64 bits
)
74 return (cache
->flags
& bits
) == bits
;
78 * this adds the block group to the fs_info rb tree for the block group
81 static int btrfs_add_block_group_cache(struct btrfs_fs_info
*info
,
82 struct btrfs_block_group_cache
*block_group
)
85 struct rb_node
*parent
= NULL
;
86 struct btrfs_block_group_cache
*cache
;
88 spin_lock(&info
->block_group_cache_lock
);
89 p
= &info
->block_group_cache_tree
.rb_node
;
93 cache
= rb_entry(parent
, struct btrfs_block_group_cache
,
95 if (block_group
->key
.objectid
< cache
->key
.objectid
) {
97 } else if (block_group
->key
.objectid
> cache
->key
.objectid
) {
100 spin_unlock(&info
->block_group_cache_lock
);
105 rb_link_node(&block_group
->cache_node
, parent
, p
);
106 rb_insert_color(&block_group
->cache_node
,
107 &info
->block_group_cache_tree
);
108 spin_unlock(&info
->block_group_cache_lock
);
114 * This will return the block group at or after bytenr if contains is 0, else
115 * it will return the block group that contains the bytenr
117 static struct btrfs_block_group_cache
*
118 block_group_cache_tree_search(struct btrfs_fs_info
*info
, u64 bytenr
,
121 struct btrfs_block_group_cache
*cache
, *ret
= NULL
;
125 spin_lock(&info
->block_group_cache_lock
);
126 n
= info
->block_group_cache_tree
.rb_node
;
129 cache
= rb_entry(n
, struct btrfs_block_group_cache
,
131 end
= cache
->key
.objectid
+ cache
->key
.offset
- 1;
132 start
= cache
->key
.objectid
;
134 if (bytenr
< start
) {
135 if (!contains
&& (!ret
|| start
< ret
->key
.objectid
))
138 } else if (bytenr
> start
) {
139 if (contains
&& bytenr
<= end
) {
150 atomic_inc(&ret
->count
);
151 spin_unlock(&info
->block_group_cache_lock
);
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
162 void btrfs_free_pinned_extents(struct btrfs_fs_info
*info
)
164 u64 start
, end
, last
= 0;
168 ret
= find_first_extent_bit(&info
->pinned_extents
, last
,
170 EXTENT_LOCKED
|EXTENT_DIRTY
);
174 clear_extent_bits(&info
->pinned_extents
, start
, end
,
175 EXTENT_LOCKED
|EXTENT_DIRTY
, GFP_NOFS
);
180 static int remove_sb_from_cache(struct btrfs_root
*root
,
181 struct btrfs_block_group_cache
*cache
)
183 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
189 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
190 bytenr
= btrfs_sb_offset(i
);
191 ret
= btrfs_rmap_block(&root
->fs_info
->mapping_tree
,
192 cache
->key
.objectid
, bytenr
,
193 0, &logical
, &nr
, &stripe_len
);
196 try_lock_extent(&fs_info
->pinned_extents
,
198 logical
[nr
] + stripe_len
- 1, GFP_NOFS
);
207 * this is only called by cache_block_group, since we could have freed extents
208 * we need to check the pinned_extents for any extents that can't be used yet
209 * since their free space will be released as soon as the transaction commits.
211 static u64
add_new_free_space(struct btrfs_block_group_cache
*block_group
,
212 struct btrfs_fs_info
*info
, u64 start
, u64 end
)
214 u64 extent_start
, extent_end
, size
, total_added
= 0;
217 while (start
< end
) {
218 ret
= find_first_extent_bit(&info
->pinned_extents
, start
,
219 &extent_start
, &extent_end
,
220 EXTENT_DIRTY
|EXTENT_LOCKED
);
224 if (extent_start
== start
) {
225 start
= extent_end
+ 1;
226 } else if (extent_start
> start
&& extent_start
< end
) {
227 size
= extent_start
- start
;
229 ret
= btrfs_add_free_space(block_group
, start
,
232 start
= extent_end
+ 1;
241 ret
= btrfs_add_free_space(block_group
, start
, size
);
248 static int caching_kthread(void *data
)
250 struct btrfs_block_group_cache
*block_group
= data
;
251 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
253 struct btrfs_path
*path
;
255 struct btrfs_key key
;
256 struct extent_buffer
*leaf
;
262 path
= btrfs_alloc_path();
266 atomic_inc(&block_group
->space_info
->caching_threads
);
267 last
= max_t(u64
, block_group
->key
.objectid
, BTRFS_SUPER_INFO_OFFSET
);
269 /* need to make sure the commit_root doesn't disappear */
270 down_read(&fs_info
->extent_root
->commit_root_sem
);
273 * We don't want to deadlock with somebody trying to allocate a new
274 * extent for the extent root while also trying to search the extent
275 * root to add free space. So we skip locking and search the commit
276 * root, since its read-only
278 path
->skip_locking
= 1;
279 path
->search_commit_root
= 1;
284 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
285 ret
= btrfs_search_slot(NULL
, fs_info
->extent_root
, &key
, path
, 0, 0);
291 if (block_group
->fs_info
->closing
)
294 leaf
= path
->nodes
[0];
295 slot
= path
->slots
[0];
296 if (slot
>= btrfs_header_nritems(leaf
)) {
297 ret
= btrfs_next_leaf(fs_info
->extent_root
, path
);
303 if (need_resched()) {
304 btrfs_release_path(fs_info
->extent_root
, path
);
305 up_read(&fs_info
->extent_root
->commit_root_sem
);
312 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
313 if (key
.objectid
< block_group
->key
.objectid
)
316 if (key
.objectid
>= block_group
->key
.objectid
+
317 block_group
->key
.offset
)
320 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
321 total_found
+= add_new_free_space(block_group
,
324 last
= key
.objectid
+ key
.offset
;
327 if (total_found
> (1024 * 1024 * 2)) {
329 wake_up(&block_group
->caching_q
);
336 total_found
+= add_new_free_space(block_group
, fs_info
, last
,
337 block_group
->key
.objectid
+
338 block_group
->key
.offset
);
340 spin_lock(&block_group
->lock
);
341 block_group
->cached
= BTRFS_CACHE_FINISHED
;
342 spin_unlock(&block_group
->lock
);
345 btrfs_free_path(path
);
346 up_read(&fs_info
->extent_root
->commit_root_sem
);
347 atomic_dec(&block_group
->space_info
->caching_threads
);
348 wake_up(&block_group
->caching_q
);
353 static int cache_block_group(struct btrfs_block_group_cache
*cache
)
355 struct task_struct
*tsk
;
358 spin_lock(&cache
->lock
);
359 if (cache
->cached
!= BTRFS_CACHE_NO
) {
360 spin_unlock(&cache
->lock
);
363 cache
->cached
= BTRFS_CACHE_STARTED
;
364 spin_unlock(&cache
->lock
);
366 tsk
= kthread_run(caching_kthread
, cache
, "btrfs-cache-%llu\n",
367 cache
->key
.objectid
);
370 printk(KERN_ERR
"error running thread %d\n", ret
);
378 * return the block group that starts at or after bytenr
380 static struct btrfs_block_group_cache
*
381 btrfs_lookup_first_block_group(struct btrfs_fs_info
*info
, u64 bytenr
)
383 struct btrfs_block_group_cache
*cache
;
385 cache
= block_group_cache_tree_search(info
, bytenr
, 0);
391 * return the block group that contains the given bytenr
393 struct btrfs_block_group_cache
*btrfs_lookup_block_group(
394 struct btrfs_fs_info
*info
,
397 struct btrfs_block_group_cache
*cache
;
399 cache
= block_group_cache_tree_search(info
, bytenr
, 1);
404 void btrfs_put_block_group(struct btrfs_block_group_cache
*cache
)
406 if (atomic_dec_and_test(&cache
->count
))
410 static struct btrfs_space_info
*__find_space_info(struct btrfs_fs_info
*info
,
413 struct list_head
*head
= &info
->space_info
;
414 struct btrfs_space_info
*found
;
417 list_for_each_entry_rcu(found
, head
, list
) {
418 if (found
->flags
== flags
) {
428 * after adding space to the filesystem, we need to clear the full flags
429 * on all the space infos.
431 void btrfs_clear_space_info_full(struct btrfs_fs_info
*info
)
433 struct list_head
*head
= &info
->space_info
;
434 struct btrfs_space_info
*found
;
437 list_for_each_entry_rcu(found
, head
, list
)
442 static u64
div_factor(u64 num
, int factor
)
451 u64
btrfs_find_block_group(struct btrfs_root
*root
,
452 u64 search_start
, u64 search_hint
, int owner
)
454 struct btrfs_block_group_cache
*cache
;
456 u64 last
= max(search_hint
, search_start
);
463 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
467 spin_lock(&cache
->lock
);
468 last
= cache
->key
.objectid
+ cache
->key
.offset
;
469 used
= btrfs_block_group_used(&cache
->item
);
471 if ((full_search
|| !cache
->ro
) &&
472 block_group_bits(cache
, BTRFS_BLOCK_GROUP_METADATA
)) {
473 if (used
+ cache
->pinned
+ cache
->reserved
<
474 div_factor(cache
->key
.offset
, factor
)) {
475 group_start
= cache
->key
.objectid
;
476 spin_unlock(&cache
->lock
);
477 btrfs_put_block_group(cache
);
481 spin_unlock(&cache
->lock
);
482 btrfs_put_block_group(cache
);
490 if (!full_search
&& factor
< 10) {
500 /* simple helper to search for an existing extent at a given offset */
501 int btrfs_lookup_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
504 struct btrfs_key key
;
505 struct btrfs_path
*path
;
507 path
= btrfs_alloc_path();
509 key
.objectid
= start
;
511 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
512 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
514 btrfs_free_path(path
);
519 * Back reference rules. Back refs have three main goals:
521 * 1) differentiate between all holders of references to an extent so that
522 * when a reference is dropped we can make sure it was a valid reference
523 * before freeing the extent.
525 * 2) Provide enough information to quickly find the holders of an extent
526 * if we notice a given block is corrupted or bad.
528 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
529 * maintenance. This is actually the same as #2, but with a slightly
530 * different use case.
532 * There are two kinds of back refs. The implicit back refs is optimized
533 * for pointers in non-shared tree blocks. For a given pointer in a block,
534 * back refs of this kind provide information about the block's owner tree
535 * and the pointer's key. These information allow us to find the block by
536 * b-tree searching. The full back refs is for pointers in tree blocks not
537 * referenced by their owner trees. The location of tree block is recorded
538 * in the back refs. Actually the full back refs is generic, and can be
539 * used in all cases the implicit back refs is used. The major shortcoming
540 * of the full back refs is its overhead. Every time a tree block gets
541 * COWed, we have to update back refs entry for all pointers in it.
543 * For a newly allocated tree block, we use implicit back refs for
544 * pointers in it. This means most tree related operations only involve
545 * implicit back refs. For a tree block created in old transaction, the
546 * only way to drop a reference to it is COW it. So we can detect the
547 * event that tree block loses its owner tree's reference and do the
548 * back refs conversion.
550 * When a tree block is COW'd through a tree, there are four cases:
552 * The reference count of the block is one and the tree is the block's
553 * owner tree. Nothing to do in this case.
555 * The reference count of the block is one and the tree is not the
556 * block's owner tree. In this case, full back refs is used for pointers
557 * in the block. Remove these full back refs, add implicit back refs for
558 * every pointers in the new block.
560 * The reference count of the block is greater than one and the tree is
561 * the block's owner tree. In this case, implicit back refs is used for
562 * pointers in the block. Add full back refs for every pointers in the
563 * block, increase lower level extents' reference counts. The original
564 * implicit back refs are entailed to the new block.
566 * The reference count of the block is greater than one and the tree is
567 * not the block's owner tree. Add implicit back refs for every pointer in
568 * the new block, increase lower level extents' reference count.
570 * Back Reference Key composing:
572 * The key objectid corresponds to the first byte in the extent,
573 * The key type is used to differentiate between types of back refs.
574 * There are different meanings of the key offset for different types
577 * File extents can be referenced by:
579 * - multiple snapshots, subvolumes, or different generations in one subvol
580 * - different files inside a single subvolume
581 * - different offsets inside a file (bookend extents in file.c)
583 * The extent ref structure for the implicit back refs has fields for:
585 * - Objectid of the subvolume root
586 * - objectid of the file holding the reference
587 * - original offset in the file
588 * - how many bookend extents
590 * The key offset for the implicit back refs is hash of the first
593 * The extent ref structure for the full back refs has field for:
595 * - number of pointers in the tree leaf
597 * The key offset for the implicit back refs is the first byte of
600 * When a file extent is allocated, The implicit back refs is used.
601 * the fields are filled in:
603 * (root_key.objectid, inode objectid, offset in file, 1)
605 * When a file extent is removed file truncation, we find the
606 * corresponding implicit back refs and check the following fields:
608 * (btrfs_header_owner(leaf), inode objectid, offset in file)
610 * Btree extents can be referenced by:
612 * - Different subvolumes
614 * Both the implicit back refs and the full back refs for tree blocks
615 * only consist of key. The key offset for the implicit back refs is
616 * objectid of block's owner tree. The key offset for the full back refs
617 * is the first byte of parent block.
619 * When implicit back refs is used, information about the lowest key and
620 * level of the tree block are required. These information are stored in
621 * tree block info structure.
624 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
625 static int convert_extent_item_v0(struct btrfs_trans_handle
*trans
,
626 struct btrfs_root
*root
,
627 struct btrfs_path
*path
,
628 u64 owner
, u32 extra_size
)
630 struct btrfs_extent_item
*item
;
631 struct btrfs_extent_item_v0
*ei0
;
632 struct btrfs_extent_ref_v0
*ref0
;
633 struct btrfs_tree_block_info
*bi
;
634 struct extent_buffer
*leaf
;
635 struct btrfs_key key
;
636 struct btrfs_key found_key
;
637 u32 new_size
= sizeof(*item
);
641 leaf
= path
->nodes
[0];
642 BUG_ON(btrfs_item_size_nr(leaf
, path
->slots
[0]) != sizeof(*ei0
));
644 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
645 ei0
= btrfs_item_ptr(leaf
, path
->slots
[0],
646 struct btrfs_extent_item_v0
);
647 refs
= btrfs_extent_refs_v0(leaf
, ei0
);
649 if (owner
== (u64
)-1) {
651 if (path
->slots
[0] >= btrfs_header_nritems(leaf
)) {
652 ret
= btrfs_next_leaf(root
, path
);
656 leaf
= path
->nodes
[0];
658 btrfs_item_key_to_cpu(leaf
, &found_key
,
660 BUG_ON(key
.objectid
!= found_key
.objectid
);
661 if (found_key
.type
!= BTRFS_EXTENT_REF_V0_KEY
) {
665 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
666 struct btrfs_extent_ref_v0
);
667 owner
= btrfs_ref_objectid_v0(leaf
, ref0
);
671 btrfs_release_path(root
, path
);
673 if (owner
< BTRFS_FIRST_FREE_OBJECTID
)
674 new_size
+= sizeof(*bi
);
676 new_size
-= sizeof(*ei0
);
677 ret
= btrfs_search_slot(trans
, root
, &key
, path
,
678 new_size
+ extra_size
, 1);
683 ret
= btrfs_extend_item(trans
, root
, path
, new_size
);
686 leaf
= path
->nodes
[0];
687 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
688 btrfs_set_extent_refs(leaf
, item
, refs
);
689 /* FIXME: get real generation */
690 btrfs_set_extent_generation(leaf
, item
, 0);
691 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
692 btrfs_set_extent_flags(leaf
, item
,
693 BTRFS_EXTENT_FLAG_TREE_BLOCK
|
694 BTRFS_BLOCK_FLAG_FULL_BACKREF
);
695 bi
= (struct btrfs_tree_block_info
*)(item
+ 1);
696 /* FIXME: get first key of the block */
697 memset_extent_buffer(leaf
, 0, (unsigned long)bi
, sizeof(*bi
));
698 btrfs_set_tree_block_level(leaf
, bi
, (int)owner
);
700 btrfs_set_extent_flags(leaf
, item
, BTRFS_EXTENT_FLAG_DATA
);
702 btrfs_mark_buffer_dirty(leaf
);
707 static u64
hash_extent_data_ref(u64 root_objectid
, u64 owner
, u64 offset
)
709 u32 high_crc
= ~(u32
)0;
710 u32 low_crc
= ~(u32
)0;
713 lenum
= cpu_to_le64(root_objectid
);
714 high_crc
= crc32c(high_crc
, &lenum
, sizeof(lenum
));
715 lenum
= cpu_to_le64(owner
);
716 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
717 lenum
= cpu_to_le64(offset
);
718 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
720 return ((u64
)high_crc
<< 31) ^ (u64
)low_crc
;
723 static u64
hash_extent_data_ref_item(struct extent_buffer
*leaf
,
724 struct btrfs_extent_data_ref
*ref
)
726 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf
, ref
),
727 btrfs_extent_data_ref_objectid(leaf
, ref
),
728 btrfs_extent_data_ref_offset(leaf
, ref
));
731 static int match_extent_data_ref(struct extent_buffer
*leaf
,
732 struct btrfs_extent_data_ref
*ref
,
733 u64 root_objectid
, u64 owner
, u64 offset
)
735 if (btrfs_extent_data_ref_root(leaf
, ref
) != root_objectid
||
736 btrfs_extent_data_ref_objectid(leaf
, ref
) != owner
||
737 btrfs_extent_data_ref_offset(leaf
, ref
) != offset
)
742 static noinline
int lookup_extent_data_ref(struct btrfs_trans_handle
*trans
,
743 struct btrfs_root
*root
,
744 struct btrfs_path
*path
,
745 u64 bytenr
, u64 parent
,
747 u64 owner
, u64 offset
)
749 struct btrfs_key key
;
750 struct btrfs_extent_data_ref
*ref
;
751 struct extent_buffer
*leaf
;
757 key
.objectid
= bytenr
;
759 key
.type
= BTRFS_SHARED_DATA_REF_KEY
;
762 key
.type
= BTRFS_EXTENT_DATA_REF_KEY
;
763 key
.offset
= hash_extent_data_ref(root_objectid
,
768 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
777 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
778 key
.type
= BTRFS_EXTENT_REF_V0_KEY
;
779 btrfs_release_path(root
, path
);
780 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
791 leaf
= path
->nodes
[0];
792 nritems
= btrfs_header_nritems(leaf
);
794 if (path
->slots
[0] >= nritems
) {
795 ret
= btrfs_next_leaf(root
, path
);
801 leaf
= path
->nodes
[0];
802 nritems
= btrfs_header_nritems(leaf
);
806 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
807 if (key
.objectid
!= bytenr
||
808 key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
)
811 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
812 struct btrfs_extent_data_ref
);
814 if (match_extent_data_ref(leaf
, ref
, root_objectid
,
817 btrfs_release_path(root
, path
);
829 static noinline
int insert_extent_data_ref(struct btrfs_trans_handle
*trans
,
830 struct btrfs_root
*root
,
831 struct btrfs_path
*path
,
832 u64 bytenr
, u64 parent
,
833 u64 root_objectid
, u64 owner
,
834 u64 offset
, int refs_to_add
)
836 struct btrfs_key key
;
837 struct extent_buffer
*leaf
;
842 key
.objectid
= bytenr
;
844 key
.type
= BTRFS_SHARED_DATA_REF_KEY
;
846 size
= sizeof(struct btrfs_shared_data_ref
);
848 key
.type
= BTRFS_EXTENT_DATA_REF_KEY
;
849 key
.offset
= hash_extent_data_ref(root_objectid
,
851 size
= sizeof(struct btrfs_extent_data_ref
);
854 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, size
);
855 if (ret
&& ret
!= -EEXIST
)
858 leaf
= path
->nodes
[0];
860 struct btrfs_shared_data_ref
*ref
;
861 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
862 struct btrfs_shared_data_ref
);
864 btrfs_set_shared_data_ref_count(leaf
, ref
, refs_to_add
);
866 num_refs
= btrfs_shared_data_ref_count(leaf
, ref
);
867 num_refs
+= refs_to_add
;
868 btrfs_set_shared_data_ref_count(leaf
, ref
, num_refs
);
871 struct btrfs_extent_data_ref
*ref
;
872 while (ret
== -EEXIST
) {
873 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
874 struct btrfs_extent_data_ref
);
875 if (match_extent_data_ref(leaf
, ref
, root_objectid
,
878 btrfs_release_path(root
, path
);
880 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
882 if (ret
&& ret
!= -EEXIST
)
885 leaf
= path
->nodes
[0];
887 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
888 struct btrfs_extent_data_ref
);
890 btrfs_set_extent_data_ref_root(leaf
, ref
,
892 btrfs_set_extent_data_ref_objectid(leaf
, ref
, owner
);
893 btrfs_set_extent_data_ref_offset(leaf
, ref
, offset
);
894 btrfs_set_extent_data_ref_count(leaf
, ref
, refs_to_add
);
896 num_refs
= btrfs_extent_data_ref_count(leaf
, ref
);
897 num_refs
+= refs_to_add
;
898 btrfs_set_extent_data_ref_count(leaf
, ref
, num_refs
);
901 btrfs_mark_buffer_dirty(leaf
);
904 btrfs_release_path(root
, path
);
908 static noinline
int remove_extent_data_ref(struct btrfs_trans_handle
*trans
,
909 struct btrfs_root
*root
,
910 struct btrfs_path
*path
,
913 struct btrfs_key key
;
914 struct btrfs_extent_data_ref
*ref1
= NULL
;
915 struct btrfs_shared_data_ref
*ref2
= NULL
;
916 struct extent_buffer
*leaf
;
920 leaf
= path
->nodes
[0];
921 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
923 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
924 ref1
= btrfs_item_ptr(leaf
, path
->slots
[0],
925 struct btrfs_extent_data_ref
);
926 num_refs
= btrfs_extent_data_ref_count(leaf
, ref1
);
927 } else if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
928 ref2
= btrfs_item_ptr(leaf
, path
->slots
[0],
929 struct btrfs_shared_data_ref
);
930 num_refs
= btrfs_shared_data_ref_count(leaf
, ref2
);
931 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
932 } else if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
933 struct btrfs_extent_ref_v0
*ref0
;
934 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
935 struct btrfs_extent_ref_v0
);
936 num_refs
= btrfs_ref_count_v0(leaf
, ref0
);
942 BUG_ON(num_refs
< refs_to_drop
);
943 num_refs
-= refs_to_drop
;
946 ret
= btrfs_del_item(trans
, root
, path
);
948 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
)
949 btrfs_set_extent_data_ref_count(leaf
, ref1
, num_refs
);
950 else if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
)
951 btrfs_set_shared_data_ref_count(leaf
, ref2
, num_refs
);
952 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
954 struct btrfs_extent_ref_v0
*ref0
;
955 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
956 struct btrfs_extent_ref_v0
);
957 btrfs_set_ref_count_v0(leaf
, ref0
, num_refs
);
960 btrfs_mark_buffer_dirty(leaf
);
965 static noinline u32
extent_data_ref_count(struct btrfs_root
*root
,
966 struct btrfs_path
*path
,
967 struct btrfs_extent_inline_ref
*iref
)
969 struct btrfs_key key
;
970 struct extent_buffer
*leaf
;
971 struct btrfs_extent_data_ref
*ref1
;
972 struct btrfs_shared_data_ref
*ref2
;
975 leaf
= path
->nodes
[0];
976 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
978 if (btrfs_extent_inline_ref_type(leaf
, iref
) ==
979 BTRFS_EXTENT_DATA_REF_KEY
) {
980 ref1
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
981 num_refs
= btrfs_extent_data_ref_count(leaf
, ref1
);
983 ref2
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
984 num_refs
= btrfs_shared_data_ref_count(leaf
, ref2
);
986 } else if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
987 ref1
= btrfs_item_ptr(leaf
, path
->slots
[0],
988 struct btrfs_extent_data_ref
);
989 num_refs
= btrfs_extent_data_ref_count(leaf
, ref1
);
990 } else if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
991 ref2
= btrfs_item_ptr(leaf
, path
->slots
[0],
992 struct btrfs_shared_data_ref
);
993 num_refs
= btrfs_shared_data_ref_count(leaf
, ref2
);
994 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
995 } else if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
996 struct btrfs_extent_ref_v0
*ref0
;
997 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
998 struct btrfs_extent_ref_v0
);
999 num_refs
= btrfs_ref_count_v0(leaf
, ref0
);
1007 static noinline
int lookup_tree_block_ref(struct btrfs_trans_handle
*trans
,
1008 struct btrfs_root
*root
,
1009 struct btrfs_path
*path
,
1010 u64 bytenr
, u64 parent
,
1013 struct btrfs_key key
;
1016 key
.objectid
= bytenr
;
1018 key
.type
= BTRFS_SHARED_BLOCK_REF_KEY
;
1019 key
.offset
= parent
;
1021 key
.type
= BTRFS_TREE_BLOCK_REF_KEY
;
1022 key
.offset
= root_objectid
;
1025 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1028 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1029 if (ret
== -ENOENT
&& parent
) {
1030 btrfs_release_path(root
, path
);
1031 key
.type
= BTRFS_EXTENT_REF_V0_KEY
;
1032 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1040 static noinline
int insert_tree_block_ref(struct btrfs_trans_handle
*trans
,
1041 struct btrfs_root
*root
,
1042 struct btrfs_path
*path
,
1043 u64 bytenr
, u64 parent
,
1046 struct btrfs_key key
;
1049 key
.objectid
= bytenr
;
1051 key
.type
= BTRFS_SHARED_BLOCK_REF_KEY
;
1052 key
.offset
= parent
;
1054 key
.type
= BTRFS_TREE_BLOCK_REF_KEY
;
1055 key
.offset
= root_objectid
;
1058 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
1059 btrfs_release_path(root
, path
);
1063 static inline int extent_ref_type(u64 parent
, u64 owner
)
1066 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1068 type
= BTRFS_SHARED_BLOCK_REF_KEY
;
1070 type
= BTRFS_TREE_BLOCK_REF_KEY
;
1073 type
= BTRFS_SHARED_DATA_REF_KEY
;
1075 type
= BTRFS_EXTENT_DATA_REF_KEY
;
1080 static int find_next_key(struct btrfs_path
*path
, int level
,
1081 struct btrfs_key
*key
)
1084 for (; level
< BTRFS_MAX_LEVEL
; level
++) {
1085 if (!path
->nodes
[level
])
1087 if (path
->slots
[level
] + 1 >=
1088 btrfs_header_nritems(path
->nodes
[level
]))
1091 btrfs_item_key_to_cpu(path
->nodes
[level
], key
,
1092 path
->slots
[level
] + 1);
1094 btrfs_node_key_to_cpu(path
->nodes
[level
], key
,
1095 path
->slots
[level
] + 1);
1102 * look for inline back ref. if back ref is found, *ref_ret is set
1103 * to the address of inline back ref, and 0 is returned.
1105 * if back ref isn't found, *ref_ret is set to the address where it
1106 * should be inserted, and -ENOENT is returned.
1108 * if insert is true and there are too many inline back refs, the path
1109 * points to the extent item, and -EAGAIN is returned.
1111 * NOTE: inline back refs are ordered in the same way that back ref
1112 * items in the tree are ordered.
1114 static noinline_for_stack
1115 int lookup_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1116 struct btrfs_root
*root
,
1117 struct btrfs_path
*path
,
1118 struct btrfs_extent_inline_ref
**ref_ret
,
1119 u64 bytenr
, u64 num_bytes
,
1120 u64 parent
, u64 root_objectid
,
1121 u64 owner
, u64 offset
, int insert
)
1123 struct btrfs_key key
;
1124 struct extent_buffer
*leaf
;
1125 struct btrfs_extent_item
*ei
;
1126 struct btrfs_extent_inline_ref
*iref
;
1137 key
.objectid
= bytenr
;
1138 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1139 key
.offset
= num_bytes
;
1141 want
= extent_ref_type(parent
, owner
);
1143 extra_size
= btrfs_extent_inline_ref_size(want
);
1144 path
->keep_locks
= 1;
1147 ret
= btrfs_search_slot(trans
, root
, &key
, path
, extra_size
, 1);
1154 leaf
= path
->nodes
[0];
1155 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1156 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1157 if (item_size
< sizeof(*ei
)) {
1162 ret
= convert_extent_item_v0(trans
, root
, path
, owner
,
1168 leaf
= path
->nodes
[0];
1169 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1172 BUG_ON(item_size
< sizeof(*ei
));
1174 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1175 flags
= btrfs_extent_flags(leaf
, ei
);
1177 ptr
= (unsigned long)(ei
+ 1);
1178 end
= (unsigned long)ei
+ item_size
;
1180 if (flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
) {
1181 ptr
+= sizeof(struct btrfs_tree_block_info
);
1184 BUG_ON(!(flags
& BTRFS_EXTENT_FLAG_DATA
));
1193 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
1194 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
1198 ptr
+= btrfs_extent_inline_ref_size(type
);
1202 if (type
== BTRFS_EXTENT_DATA_REF_KEY
) {
1203 struct btrfs_extent_data_ref
*dref
;
1204 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1205 if (match_extent_data_ref(leaf
, dref
, root_objectid
,
1210 if (hash_extent_data_ref_item(leaf
, dref
) <
1211 hash_extent_data_ref(root_objectid
, owner
, offset
))
1215 ref_offset
= btrfs_extent_inline_ref_offset(leaf
, iref
);
1217 if (parent
== ref_offset
) {
1221 if (ref_offset
< parent
)
1224 if (root_objectid
== ref_offset
) {
1228 if (ref_offset
< root_objectid
)
1232 ptr
+= btrfs_extent_inline_ref_size(type
);
1234 if (err
== -ENOENT
&& insert
) {
1235 if (item_size
+ extra_size
>=
1236 BTRFS_MAX_EXTENT_ITEM_SIZE(root
)) {
1241 * To add new inline back ref, we have to make sure
1242 * there is no corresponding back ref item.
1243 * For simplicity, we just do not add new inline back
1244 * ref if there is any kind of item for this block
1246 if (find_next_key(path
, 0, &key
) == 0 &&
1247 key
.objectid
== bytenr
&&
1248 key
.type
< BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1253 *ref_ret
= (struct btrfs_extent_inline_ref
*)ptr
;
1256 path
->keep_locks
= 0;
1257 btrfs_unlock_up_safe(path
, 1);
1263 * helper to add new inline back ref
1265 static noinline_for_stack
1266 int setup_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1267 struct btrfs_root
*root
,
1268 struct btrfs_path
*path
,
1269 struct btrfs_extent_inline_ref
*iref
,
1270 u64 parent
, u64 root_objectid
,
1271 u64 owner
, u64 offset
, int refs_to_add
,
1272 struct btrfs_delayed_extent_op
*extent_op
)
1274 struct extent_buffer
*leaf
;
1275 struct btrfs_extent_item
*ei
;
1278 unsigned long item_offset
;
1284 leaf
= path
->nodes
[0];
1285 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1286 item_offset
= (unsigned long)iref
- (unsigned long)ei
;
1288 type
= extent_ref_type(parent
, owner
);
1289 size
= btrfs_extent_inline_ref_size(type
);
1291 ret
= btrfs_extend_item(trans
, root
, path
, size
);
1294 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1295 refs
= btrfs_extent_refs(leaf
, ei
);
1296 refs
+= refs_to_add
;
1297 btrfs_set_extent_refs(leaf
, ei
, refs
);
1299 __run_delayed_extent_op(extent_op
, leaf
, ei
);
1301 ptr
= (unsigned long)ei
+ item_offset
;
1302 end
= (unsigned long)ei
+ btrfs_item_size_nr(leaf
, path
->slots
[0]);
1303 if (ptr
< end
- size
)
1304 memmove_extent_buffer(leaf
, ptr
+ size
, ptr
,
1307 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
1308 btrfs_set_extent_inline_ref_type(leaf
, iref
, type
);
1309 if (type
== BTRFS_EXTENT_DATA_REF_KEY
) {
1310 struct btrfs_extent_data_ref
*dref
;
1311 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1312 btrfs_set_extent_data_ref_root(leaf
, dref
, root_objectid
);
1313 btrfs_set_extent_data_ref_objectid(leaf
, dref
, owner
);
1314 btrfs_set_extent_data_ref_offset(leaf
, dref
, offset
);
1315 btrfs_set_extent_data_ref_count(leaf
, dref
, refs_to_add
);
1316 } else if (type
== BTRFS_SHARED_DATA_REF_KEY
) {
1317 struct btrfs_shared_data_ref
*sref
;
1318 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
1319 btrfs_set_shared_data_ref_count(leaf
, sref
, refs_to_add
);
1320 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
1321 } else if (type
== BTRFS_SHARED_BLOCK_REF_KEY
) {
1322 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
1324 btrfs_set_extent_inline_ref_offset(leaf
, iref
, root_objectid
);
1326 btrfs_mark_buffer_dirty(leaf
);
1330 static int lookup_extent_backref(struct btrfs_trans_handle
*trans
,
1331 struct btrfs_root
*root
,
1332 struct btrfs_path
*path
,
1333 struct btrfs_extent_inline_ref
**ref_ret
,
1334 u64 bytenr
, u64 num_bytes
, u64 parent
,
1335 u64 root_objectid
, u64 owner
, u64 offset
)
1339 ret
= lookup_inline_extent_backref(trans
, root
, path
, ref_ret
,
1340 bytenr
, num_bytes
, parent
,
1341 root_objectid
, owner
, offset
, 0);
1345 btrfs_release_path(root
, path
);
1348 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1349 ret
= lookup_tree_block_ref(trans
, root
, path
, bytenr
, parent
,
1352 ret
= lookup_extent_data_ref(trans
, root
, path
, bytenr
, parent
,
1353 root_objectid
, owner
, offset
);
1359 * helper to update/remove inline back ref
1361 static noinline_for_stack
1362 int update_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1363 struct btrfs_root
*root
,
1364 struct btrfs_path
*path
,
1365 struct btrfs_extent_inline_ref
*iref
,
1367 struct btrfs_delayed_extent_op
*extent_op
)
1369 struct extent_buffer
*leaf
;
1370 struct btrfs_extent_item
*ei
;
1371 struct btrfs_extent_data_ref
*dref
= NULL
;
1372 struct btrfs_shared_data_ref
*sref
= NULL
;
1381 leaf
= path
->nodes
[0];
1382 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1383 refs
= btrfs_extent_refs(leaf
, ei
);
1384 WARN_ON(refs_to_mod
< 0 && refs
+ refs_to_mod
<= 0);
1385 refs
+= refs_to_mod
;
1386 btrfs_set_extent_refs(leaf
, ei
, refs
);
1388 __run_delayed_extent_op(extent_op
, leaf
, ei
);
1390 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
1392 if (type
== BTRFS_EXTENT_DATA_REF_KEY
) {
1393 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1394 refs
= btrfs_extent_data_ref_count(leaf
, dref
);
1395 } else if (type
== BTRFS_SHARED_DATA_REF_KEY
) {
1396 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
1397 refs
= btrfs_shared_data_ref_count(leaf
, sref
);
1400 BUG_ON(refs_to_mod
!= -1);
1403 BUG_ON(refs_to_mod
< 0 && refs
< -refs_to_mod
);
1404 refs
+= refs_to_mod
;
1407 if (type
== BTRFS_EXTENT_DATA_REF_KEY
)
1408 btrfs_set_extent_data_ref_count(leaf
, dref
, refs
);
1410 btrfs_set_shared_data_ref_count(leaf
, sref
, refs
);
1412 size
= btrfs_extent_inline_ref_size(type
);
1413 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1414 ptr
= (unsigned long)iref
;
1415 end
= (unsigned long)ei
+ item_size
;
1416 if (ptr
+ size
< end
)
1417 memmove_extent_buffer(leaf
, ptr
, ptr
+ size
,
1420 ret
= btrfs_truncate_item(trans
, root
, path
, item_size
, 1);
1423 btrfs_mark_buffer_dirty(leaf
);
1427 static noinline_for_stack
1428 int insert_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1429 struct btrfs_root
*root
,
1430 struct btrfs_path
*path
,
1431 u64 bytenr
, u64 num_bytes
, u64 parent
,
1432 u64 root_objectid
, u64 owner
,
1433 u64 offset
, int refs_to_add
,
1434 struct btrfs_delayed_extent_op
*extent_op
)
1436 struct btrfs_extent_inline_ref
*iref
;
1439 ret
= lookup_inline_extent_backref(trans
, root
, path
, &iref
,
1440 bytenr
, num_bytes
, parent
,
1441 root_objectid
, owner
, offset
, 1);
1443 BUG_ON(owner
< BTRFS_FIRST_FREE_OBJECTID
);
1444 ret
= update_inline_extent_backref(trans
, root
, path
, iref
,
1445 refs_to_add
, extent_op
);
1446 } else if (ret
== -ENOENT
) {
1447 ret
= setup_inline_extent_backref(trans
, root
, path
, iref
,
1448 parent
, root_objectid
,
1449 owner
, offset
, refs_to_add
,
1455 static int insert_extent_backref(struct btrfs_trans_handle
*trans
,
1456 struct btrfs_root
*root
,
1457 struct btrfs_path
*path
,
1458 u64 bytenr
, u64 parent
, u64 root_objectid
,
1459 u64 owner
, u64 offset
, int refs_to_add
)
1462 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1463 BUG_ON(refs_to_add
!= 1);
1464 ret
= insert_tree_block_ref(trans
, root
, path
, bytenr
,
1465 parent
, root_objectid
);
1467 ret
= insert_extent_data_ref(trans
, root
, path
, bytenr
,
1468 parent
, root_objectid
,
1469 owner
, offset
, refs_to_add
);
1474 static int remove_extent_backref(struct btrfs_trans_handle
*trans
,
1475 struct btrfs_root
*root
,
1476 struct btrfs_path
*path
,
1477 struct btrfs_extent_inline_ref
*iref
,
1478 int refs_to_drop
, int is_data
)
1482 BUG_ON(!is_data
&& refs_to_drop
!= 1);
1484 ret
= update_inline_extent_backref(trans
, root
, path
, iref
,
1485 -refs_to_drop
, NULL
);
1486 } else if (is_data
) {
1487 ret
= remove_extent_data_ref(trans
, root
, path
, refs_to_drop
);
1489 ret
= btrfs_del_item(trans
, root
, path
);
1494 #ifdef BIO_RW_DISCARD
1495 static void btrfs_issue_discard(struct block_device
*bdev
,
1498 blkdev_issue_discard(bdev
, start
>> 9, len
>> 9, GFP_KERNEL
);
1502 static int btrfs_discard_extent(struct btrfs_root
*root
, u64 bytenr
,
1505 #ifdef BIO_RW_DISCARD
1507 u64 map_length
= num_bytes
;
1508 struct btrfs_multi_bio
*multi
= NULL
;
1510 /* Tell the block device(s) that the sectors can be discarded */
1511 ret
= btrfs_map_block(&root
->fs_info
->mapping_tree
, READ
,
1512 bytenr
, &map_length
, &multi
, 0);
1514 struct btrfs_bio_stripe
*stripe
= multi
->stripes
;
1517 if (map_length
> num_bytes
)
1518 map_length
= num_bytes
;
1520 for (i
= 0; i
< multi
->num_stripes
; i
++, stripe
++) {
1521 btrfs_issue_discard(stripe
->dev
->bdev
,
1534 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1535 struct btrfs_root
*root
,
1536 u64 bytenr
, u64 num_bytes
, u64 parent
,
1537 u64 root_objectid
, u64 owner
, u64 offset
)
1540 BUG_ON(owner
< BTRFS_FIRST_FREE_OBJECTID
&&
1541 root_objectid
== BTRFS_TREE_LOG_OBJECTID
);
1543 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1544 ret
= btrfs_add_delayed_tree_ref(trans
, bytenr
, num_bytes
,
1545 parent
, root_objectid
, (int)owner
,
1546 BTRFS_ADD_DELAYED_REF
, NULL
);
1548 ret
= btrfs_add_delayed_data_ref(trans
, bytenr
, num_bytes
,
1549 parent
, root_objectid
, owner
, offset
,
1550 BTRFS_ADD_DELAYED_REF
, NULL
);
1555 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1556 struct btrfs_root
*root
,
1557 u64 bytenr
, u64 num_bytes
,
1558 u64 parent
, u64 root_objectid
,
1559 u64 owner
, u64 offset
, int refs_to_add
,
1560 struct btrfs_delayed_extent_op
*extent_op
)
1562 struct btrfs_path
*path
;
1563 struct extent_buffer
*leaf
;
1564 struct btrfs_extent_item
*item
;
1569 path
= btrfs_alloc_path();
1574 path
->leave_spinning
= 1;
1575 /* this will setup the path even if it fails to insert the back ref */
1576 ret
= insert_inline_extent_backref(trans
, root
->fs_info
->extent_root
,
1577 path
, bytenr
, num_bytes
, parent
,
1578 root_objectid
, owner
, offset
,
1579 refs_to_add
, extent_op
);
1583 if (ret
!= -EAGAIN
) {
1588 leaf
= path
->nodes
[0];
1589 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1590 refs
= btrfs_extent_refs(leaf
, item
);
1591 btrfs_set_extent_refs(leaf
, item
, refs
+ refs_to_add
);
1593 __run_delayed_extent_op(extent_op
, leaf
, item
);
1595 btrfs_mark_buffer_dirty(leaf
);
1596 btrfs_release_path(root
->fs_info
->extent_root
, path
);
1599 path
->leave_spinning
= 1;
1601 /* now insert the actual backref */
1602 ret
= insert_extent_backref(trans
, root
->fs_info
->extent_root
,
1603 path
, bytenr
, parent
, root_objectid
,
1604 owner
, offset
, refs_to_add
);
1607 btrfs_free_path(path
);
1611 static int run_delayed_data_ref(struct btrfs_trans_handle
*trans
,
1612 struct btrfs_root
*root
,
1613 struct btrfs_delayed_ref_node
*node
,
1614 struct btrfs_delayed_extent_op
*extent_op
,
1615 int insert_reserved
)
1618 struct btrfs_delayed_data_ref
*ref
;
1619 struct btrfs_key ins
;
1624 ins
.objectid
= node
->bytenr
;
1625 ins
.offset
= node
->num_bytes
;
1626 ins
.type
= BTRFS_EXTENT_ITEM_KEY
;
1628 ref
= btrfs_delayed_node_to_data_ref(node
);
1629 if (node
->type
== BTRFS_SHARED_DATA_REF_KEY
)
1630 parent
= ref
->parent
;
1632 ref_root
= ref
->root
;
1634 if (node
->action
== BTRFS_ADD_DELAYED_REF
&& insert_reserved
) {
1636 BUG_ON(extent_op
->update_key
);
1637 flags
|= extent_op
->flags_to_set
;
1639 ret
= alloc_reserved_file_extent(trans
, root
,
1640 parent
, ref_root
, flags
,
1641 ref
->objectid
, ref
->offset
,
1642 &ins
, node
->ref_mod
);
1643 update_reserved_extents(root
, ins
.objectid
, ins
.offset
, 0);
1644 } else if (node
->action
== BTRFS_ADD_DELAYED_REF
) {
1645 ret
= __btrfs_inc_extent_ref(trans
, root
, node
->bytenr
,
1646 node
->num_bytes
, parent
,
1647 ref_root
, ref
->objectid
,
1648 ref
->offset
, node
->ref_mod
,
1650 } else if (node
->action
== BTRFS_DROP_DELAYED_REF
) {
1651 ret
= __btrfs_free_extent(trans
, root
, node
->bytenr
,
1652 node
->num_bytes
, parent
,
1653 ref_root
, ref
->objectid
,
1654 ref
->offset
, node
->ref_mod
,
1662 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op
*extent_op
,
1663 struct extent_buffer
*leaf
,
1664 struct btrfs_extent_item
*ei
)
1666 u64 flags
= btrfs_extent_flags(leaf
, ei
);
1667 if (extent_op
->update_flags
) {
1668 flags
|= extent_op
->flags_to_set
;
1669 btrfs_set_extent_flags(leaf
, ei
, flags
);
1672 if (extent_op
->update_key
) {
1673 struct btrfs_tree_block_info
*bi
;
1674 BUG_ON(!(flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
));
1675 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
1676 btrfs_set_tree_block_key(leaf
, bi
, &extent_op
->key
);
1680 static int run_delayed_extent_op(struct btrfs_trans_handle
*trans
,
1681 struct btrfs_root
*root
,
1682 struct btrfs_delayed_ref_node
*node
,
1683 struct btrfs_delayed_extent_op
*extent_op
)
1685 struct btrfs_key key
;
1686 struct btrfs_path
*path
;
1687 struct btrfs_extent_item
*ei
;
1688 struct extent_buffer
*leaf
;
1693 path
= btrfs_alloc_path();
1697 key
.objectid
= node
->bytenr
;
1698 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1699 key
.offset
= node
->num_bytes
;
1702 path
->leave_spinning
= 1;
1703 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
,
1714 leaf
= path
->nodes
[0];
1715 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1716 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1717 if (item_size
< sizeof(*ei
)) {
1718 ret
= convert_extent_item_v0(trans
, root
->fs_info
->extent_root
,
1724 leaf
= path
->nodes
[0];
1725 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1728 BUG_ON(item_size
< sizeof(*ei
));
1729 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1730 __run_delayed_extent_op(extent_op
, leaf
, ei
);
1732 btrfs_mark_buffer_dirty(leaf
);
1734 btrfs_free_path(path
);
1738 static int run_delayed_tree_ref(struct btrfs_trans_handle
*trans
,
1739 struct btrfs_root
*root
,
1740 struct btrfs_delayed_ref_node
*node
,
1741 struct btrfs_delayed_extent_op
*extent_op
,
1742 int insert_reserved
)
1745 struct btrfs_delayed_tree_ref
*ref
;
1746 struct btrfs_key ins
;
1750 ins
.objectid
= node
->bytenr
;
1751 ins
.offset
= node
->num_bytes
;
1752 ins
.type
= BTRFS_EXTENT_ITEM_KEY
;
1754 ref
= btrfs_delayed_node_to_tree_ref(node
);
1755 if (node
->type
== BTRFS_SHARED_BLOCK_REF_KEY
)
1756 parent
= ref
->parent
;
1758 ref_root
= ref
->root
;
1760 BUG_ON(node
->ref_mod
!= 1);
1761 if (node
->action
== BTRFS_ADD_DELAYED_REF
&& insert_reserved
) {
1762 BUG_ON(!extent_op
|| !extent_op
->update_flags
||
1763 !extent_op
->update_key
);
1764 ret
= alloc_reserved_tree_block(trans
, root
,
1766 extent_op
->flags_to_set
,
1769 update_reserved_extents(root
, ins
.objectid
, ins
.offset
, 0);
1770 } else if (node
->action
== BTRFS_ADD_DELAYED_REF
) {
1771 ret
= __btrfs_inc_extent_ref(trans
, root
, node
->bytenr
,
1772 node
->num_bytes
, parent
, ref_root
,
1773 ref
->level
, 0, 1, extent_op
);
1774 } else if (node
->action
== BTRFS_DROP_DELAYED_REF
) {
1775 ret
= __btrfs_free_extent(trans
, root
, node
->bytenr
,
1776 node
->num_bytes
, parent
, ref_root
,
1777 ref
->level
, 0, 1, extent_op
);
1785 /* helper function to actually process a single delayed ref entry */
1786 static int run_one_delayed_ref(struct btrfs_trans_handle
*trans
,
1787 struct btrfs_root
*root
,
1788 struct btrfs_delayed_ref_node
*node
,
1789 struct btrfs_delayed_extent_op
*extent_op
,
1790 int insert_reserved
)
1793 if (btrfs_delayed_ref_is_head(node
)) {
1794 struct btrfs_delayed_ref_head
*head
;
1796 * we've hit the end of the chain and we were supposed
1797 * to insert this extent into the tree. But, it got
1798 * deleted before we ever needed to insert it, so all
1799 * we have to do is clean up the accounting
1802 head
= btrfs_delayed_node_to_head(node
);
1803 if (insert_reserved
) {
1804 if (head
->is_data
) {
1805 ret
= btrfs_del_csums(trans
, root
,
1810 btrfs_update_pinned_extents(root
, node
->bytenr
,
1811 node
->num_bytes
, 1);
1812 update_reserved_extents(root
, node
->bytenr
,
1813 node
->num_bytes
, 0);
1815 mutex_unlock(&head
->mutex
);
1819 if (node
->type
== BTRFS_TREE_BLOCK_REF_KEY
||
1820 node
->type
== BTRFS_SHARED_BLOCK_REF_KEY
)
1821 ret
= run_delayed_tree_ref(trans
, root
, node
, extent_op
,
1823 else if (node
->type
== BTRFS_EXTENT_DATA_REF_KEY
||
1824 node
->type
== BTRFS_SHARED_DATA_REF_KEY
)
1825 ret
= run_delayed_data_ref(trans
, root
, node
, extent_op
,
1832 static noinline
struct btrfs_delayed_ref_node
*
1833 select_delayed_ref(struct btrfs_delayed_ref_head
*head
)
1835 struct rb_node
*node
;
1836 struct btrfs_delayed_ref_node
*ref
;
1837 int action
= BTRFS_ADD_DELAYED_REF
;
1840 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
1841 * this prevents ref count from going down to zero when
1842 * there still are pending delayed ref.
1844 node
= rb_prev(&head
->node
.rb_node
);
1848 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
,
1850 if (ref
->bytenr
!= head
->node
.bytenr
)
1852 if (ref
->action
== action
)
1854 node
= rb_prev(node
);
1856 if (action
== BTRFS_ADD_DELAYED_REF
) {
1857 action
= BTRFS_DROP_DELAYED_REF
;
1863 static noinline
int run_clustered_refs(struct btrfs_trans_handle
*trans
,
1864 struct btrfs_root
*root
,
1865 struct list_head
*cluster
)
1867 struct btrfs_delayed_ref_root
*delayed_refs
;
1868 struct btrfs_delayed_ref_node
*ref
;
1869 struct btrfs_delayed_ref_head
*locked_ref
= NULL
;
1870 struct btrfs_delayed_extent_op
*extent_op
;
1873 int must_insert_reserved
= 0;
1875 delayed_refs
= &trans
->transaction
->delayed_refs
;
1878 /* pick a new head ref from the cluster list */
1879 if (list_empty(cluster
))
1882 locked_ref
= list_entry(cluster
->next
,
1883 struct btrfs_delayed_ref_head
, cluster
);
1885 /* grab the lock that says we are going to process
1886 * all the refs for this head */
1887 ret
= btrfs_delayed_ref_lock(trans
, locked_ref
);
1890 * we may have dropped the spin lock to get the head
1891 * mutex lock, and that might have given someone else
1892 * time to free the head. If that's true, it has been
1893 * removed from our list and we can move on.
1895 if (ret
== -EAGAIN
) {
1903 * record the must insert reserved flag before we
1904 * drop the spin lock.
1906 must_insert_reserved
= locked_ref
->must_insert_reserved
;
1907 locked_ref
->must_insert_reserved
= 0;
1909 extent_op
= locked_ref
->extent_op
;
1910 locked_ref
->extent_op
= NULL
;
1913 * locked_ref is the head node, so we have to go one
1914 * node back for any delayed ref updates
1916 ref
= select_delayed_ref(locked_ref
);
1918 /* All delayed refs have been processed, Go ahead
1919 * and send the head node to run_one_delayed_ref,
1920 * so that any accounting fixes can happen
1922 ref
= &locked_ref
->node
;
1924 if (extent_op
&& must_insert_reserved
) {
1930 spin_unlock(&delayed_refs
->lock
);
1932 ret
= run_delayed_extent_op(trans
, root
,
1938 spin_lock(&delayed_refs
->lock
);
1942 list_del_init(&locked_ref
->cluster
);
1947 rb_erase(&ref
->rb_node
, &delayed_refs
->root
);
1948 delayed_refs
->num_entries
--;
1950 spin_unlock(&delayed_refs
->lock
);
1952 ret
= run_one_delayed_ref(trans
, root
, ref
, extent_op
,
1953 must_insert_reserved
);
1956 btrfs_put_delayed_ref(ref
);
1961 spin_lock(&delayed_refs
->lock
);
1967 * this starts processing the delayed reference count updates and
1968 * extent insertions we have queued up so far. count can be
1969 * 0, which means to process everything in the tree at the start
1970 * of the run (but not newly added entries), or it can be some target
1971 * number you'd like to process.
1973 int btrfs_run_delayed_refs(struct btrfs_trans_handle
*trans
,
1974 struct btrfs_root
*root
, unsigned long count
)
1976 struct rb_node
*node
;
1977 struct btrfs_delayed_ref_root
*delayed_refs
;
1978 struct btrfs_delayed_ref_node
*ref
;
1979 struct list_head cluster
;
1981 int run_all
= count
== (unsigned long)-1;
1984 if (root
== root
->fs_info
->extent_root
)
1985 root
= root
->fs_info
->tree_root
;
1987 delayed_refs
= &trans
->transaction
->delayed_refs
;
1988 INIT_LIST_HEAD(&cluster
);
1990 spin_lock(&delayed_refs
->lock
);
1992 count
= delayed_refs
->num_entries
* 2;
1996 if (!(run_all
|| run_most
) &&
1997 delayed_refs
->num_heads_ready
< 64)
2001 * go find something we can process in the rbtree. We start at
2002 * the beginning of the tree, and then build a cluster
2003 * of refs to process starting at the first one we are able to
2006 ret
= btrfs_find_ref_cluster(trans
, &cluster
,
2007 delayed_refs
->run_delayed_start
);
2011 ret
= run_clustered_refs(trans
, root
, &cluster
);
2014 count
-= min_t(unsigned long, ret
, count
);
2021 node
= rb_first(&delayed_refs
->root
);
2024 count
= (unsigned long)-1;
2027 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
,
2029 if (btrfs_delayed_ref_is_head(ref
)) {
2030 struct btrfs_delayed_ref_head
*head
;
2032 head
= btrfs_delayed_node_to_head(ref
);
2033 atomic_inc(&ref
->refs
);
2035 spin_unlock(&delayed_refs
->lock
);
2036 mutex_lock(&head
->mutex
);
2037 mutex_unlock(&head
->mutex
);
2039 btrfs_put_delayed_ref(ref
);
2043 node
= rb_next(node
);
2045 spin_unlock(&delayed_refs
->lock
);
2046 schedule_timeout(1);
2050 spin_unlock(&delayed_refs
->lock
);
2054 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle
*trans
,
2055 struct btrfs_root
*root
,
2056 u64 bytenr
, u64 num_bytes
, u64 flags
,
2059 struct btrfs_delayed_extent_op
*extent_op
;
2062 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
2066 extent_op
->flags_to_set
= flags
;
2067 extent_op
->update_flags
= 1;
2068 extent_op
->update_key
= 0;
2069 extent_op
->is_data
= is_data
? 1 : 0;
2071 ret
= btrfs_add_delayed_extent_op(trans
, bytenr
, num_bytes
, extent_op
);
2077 static noinline
int check_delayed_ref(struct btrfs_trans_handle
*trans
,
2078 struct btrfs_root
*root
,
2079 struct btrfs_path
*path
,
2080 u64 objectid
, u64 offset
, u64 bytenr
)
2082 struct btrfs_delayed_ref_head
*head
;
2083 struct btrfs_delayed_ref_node
*ref
;
2084 struct btrfs_delayed_data_ref
*data_ref
;
2085 struct btrfs_delayed_ref_root
*delayed_refs
;
2086 struct rb_node
*node
;
2090 delayed_refs
= &trans
->transaction
->delayed_refs
;
2091 spin_lock(&delayed_refs
->lock
);
2092 head
= btrfs_find_delayed_ref_head(trans
, bytenr
);
2096 if (!mutex_trylock(&head
->mutex
)) {
2097 atomic_inc(&head
->node
.refs
);
2098 spin_unlock(&delayed_refs
->lock
);
2100 btrfs_release_path(root
->fs_info
->extent_root
, path
);
2102 mutex_lock(&head
->mutex
);
2103 mutex_unlock(&head
->mutex
);
2104 btrfs_put_delayed_ref(&head
->node
);
2108 node
= rb_prev(&head
->node
.rb_node
);
2112 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, rb_node
);
2114 if (ref
->bytenr
!= bytenr
)
2118 if (ref
->type
!= BTRFS_EXTENT_DATA_REF_KEY
)
2121 data_ref
= btrfs_delayed_node_to_data_ref(ref
);
2123 node
= rb_prev(node
);
2125 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, rb_node
);
2126 if (ref
->bytenr
== bytenr
)
2130 if (data_ref
->root
!= root
->root_key
.objectid
||
2131 data_ref
->objectid
!= objectid
|| data_ref
->offset
!= offset
)
2136 mutex_unlock(&head
->mutex
);
2138 spin_unlock(&delayed_refs
->lock
);
2142 static noinline
int check_committed_ref(struct btrfs_trans_handle
*trans
,
2143 struct btrfs_root
*root
,
2144 struct btrfs_path
*path
,
2145 u64 objectid
, u64 offset
, u64 bytenr
)
2147 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
2148 struct extent_buffer
*leaf
;
2149 struct btrfs_extent_data_ref
*ref
;
2150 struct btrfs_extent_inline_ref
*iref
;
2151 struct btrfs_extent_item
*ei
;
2152 struct btrfs_key key
;
2156 key
.objectid
= bytenr
;
2157 key
.offset
= (u64
)-1;
2158 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
2160 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
2166 if (path
->slots
[0] == 0)
2170 leaf
= path
->nodes
[0];
2171 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
2173 if (key
.objectid
!= bytenr
|| key
.type
!= BTRFS_EXTENT_ITEM_KEY
)
2177 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
2178 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2179 if (item_size
< sizeof(*ei
)) {
2180 WARN_ON(item_size
!= sizeof(struct btrfs_extent_item_v0
));
2184 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
2186 if (item_size
!= sizeof(*ei
) +
2187 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY
))
2190 if (btrfs_extent_generation(leaf
, ei
) <=
2191 btrfs_root_last_snapshot(&root
->root_item
))
2194 iref
= (struct btrfs_extent_inline_ref
*)(ei
+ 1);
2195 if (btrfs_extent_inline_ref_type(leaf
, iref
) !=
2196 BTRFS_EXTENT_DATA_REF_KEY
)
2199 ref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
2200 if (btrfs_extent_refs(leaf
, ei
) !=
2201 btrfs_extent_data_ref_count(leaf
, ref
) ||
2202 btrfs_extent_data_ref_root(leaf
, ref
) !=
2203 root
->root_key
.objectid
||
2204 btrfs_extent_data_ref_objectid(leaf
, ref
) != objectid
||
2205 btrfs_extent_data_ref_offset(leaf
, ref
) != offset
)
2213 int btrfs_cross_ref_exist(struct btrfs_trans_handle
*trans
,
2214 struct btrfs_root
*root
,
2215 u64 objectid
, u64 offset
, u64 bytenr
)
2217 struct btrfs_path
*path
;
2221 path
= btrfs_alloc_path();
2226 ret
= check_committed_ref(trans
, root
, path
, objectid
,
2228 if (ret
&& ret
!= -ENOENT
)
2231 ret2
= check_delayed_ref(trans
, root
, path
, objectid
,
2233 } while (ret2
== -EAGAIN
);
2235 if (ret2
&& ret2
!= -ENOENT
) {
2240 if (ret
!= -ENOENT
|| ret2
!= -ENOENT
)
2243 btrfs_free_path(path
);
2248 int btrfs_cache_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
2249 struct extent_buffer
*buf
, u32 nr_extents
)
2251 struct btrfs_key key
;
2252 struct btrfs_file_extent_item
*fi
;
2260 if (!root
->ref_cows
)
2263 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
2265 root_gen
= root
->root_key
.offset
;
2268 root_gen
= trans
->transid
- 1;
2271 level
= btrfs_header_level(buf
);
2272 nritems
= btrfs_header_nritems(buf
);
2275 struct btrfs_leaf_ref
*ref
;
2276 struct btrfs_extent_info
*info
;
2278 ref
= btrfs_alloc_leaf_ref(root
, nr_extents
);
2284 ref
->root_gen
= root_gen
;
2285 ref
->bytenr
= buf
->start
;
2286 ref
->owner
= btrfs_header_owner(buf
);
2287 ref
->generation
= btrfs_header_generation(buf
);
2288 ref
->nritems
= nr_extents
;
2289 info
= ref
->extents
;
2291 for (i
= 0; nr_extents
> 0 && i
< nritems
; i
++) {
2293 btrfs_item_key_to_cpu(buf
, &key
, i
);
2294 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
2296 fi
= btrfs_item_ptr(buf
, i
,
2297 struct btrfs_file_extent_item
);
2298 if (btrfs_file_extent_type(buf
, fi
) ==
2299 BTRFS_FILE_EXTENT_INLINE
)
2301 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
2302 if (disk_bytenr
== 0)
2305 info
->bytenr
= disk_bytenr
;
2307 btrfs_file_extent_disk_num_bytes(buf
, fi
);
2308 info
->objectid
= key
.objectid
;
2309 info
->offset
= key
.offset
;
2313 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
2314 if (ret
== -EEXIST
&& shared
) {
2315 struct btrfs_leaf_ref
*old
;
2316 old
= btrfs_lookup_leaf_ref(root
, ref
->bytenr
);
2318 btrfs_remove_leaf_ref(root
, old
);
2319 btrfs_free_leaf_ref(root
, old
);
2320 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
2323 btrfs_free_leaf_ref(root
, ref
);
2329 /* when a block goes through cow, we update the reference counts of
2330 * everything that block points to. The internal pointers of the block
2331 * can be in just about any order, and it is likely to have clusters of
2332 * things that are close together and clusters of things that are not.
2334 * To help reduce the seeks that come with updating all of these reference
2335 * counts, sort them by byte number before actual updates are done.
2337 * struct refsort is used to match byte number to slot in the btree block.
2338 * we sort based on the byte number and then use the slot to actually
2341 * struct refsort is smaller than strcut btrfs_item and smaller than
2342 * struct btrfs_key_ptr. Since we're currently limited to the page size
2343 * for a btree block, there's no way for a kmalloc of refsorts for a
2344 * single node to be bigger than a page.
2352 * for passing into sort()
2354 static int refsort_cmp(const void *a_void
, const void *b_void
)
2356 const struct refsort
*a
= a_void
;
2357 const struct refsort
*b
= b_void
;
2359 if (a
->bytenr
< b
->bytenr
)
2361 if (a
->bytenr
> b
->bytenr
)
2367 static int __btrfs_mod_ref(struct btrfs_trans_handle
*trans
,
2368 struct btrfs_root
*root
,
2369 struct extent_buffer
*buf
,
2370 int full_backref
, int inc
)
2377 struct btrfs_key key
;
2378 struct btrfs_file_extent_item
*fi
;
2382 int (*process_func
)(struct btrfs_trans_handle
*, struct btrfs_root
*,
2383 u64
, u64
, u64
, u64
, u64
, u64
);
2385 ref_root
= btrfs_header_owner(buf
);
2386 nritems
= btrfs_header_nritems(buf
);
2387 level
= btrfs_header_level(buf
);
2389 if (!root
->ref_cows
&& level
== 0)
2393 process_func
= btrfs_inc_extent_ref
;
2395 process_func
= btrfs_free_extent
;
2398 parent
= buf
->start
;
2402 for (i
= 0; i
< nritems
; i
++) {
2404 btrfs_item_key_to_cpu(buf
, &key
, i
);
2405 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
2407 fi
= btrfs_item_ptr(buf
, i
,
2408 struct btrfs_file_extent_item
);
2409 if (btrfs_file_extent_type(buf
, fi
) ==
2410 BTRFS_FILE_EXTENT_INLINE
)
2412 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
2416 num_bytes
= btrfs_file_extent_disk_num_bytes(buf
, fi
);
2417 key
.offset
-= btrfs_file_extent_offset(buf
, fi
);
2418 ret
= process_func(trans
, root
, bytenr
, num_bytes
,
2419 parent
, ref_root
, key
.objectid
,
2424 bytenr
= btrfs_node_blockptr(buf
, i
);
2425 num_bytes
= btrfs_level_size(root
, level
- 1);
2426 ret
= process_func(trans
, root
, bytenr
, num_bytes
,
2427 parent
, ref_root
, level
- 1, 0);
2438 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
2439 struct extent_buffer
*buf
, int full_backref
)
2441 return __btrfs_mod_ref(trans
, root
, buf
, full_backref
, 1);
2444 int btrfs_dec_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
2445 struct extent_buffer
*buf
, int full_backref
)
2447 return __btrfs_mod_ref(trans
, root
, buf
, full_backref
, 0);
2450 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
2451 struct btrfs_root
*root
,
2452 struct btrfs_path
*path
,
2453 struct btrfs_block_group_cache
*cache
)
2456 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
2458 struct extent_buffer
*leaf
;
2460 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
2465 leaf
= path
->nodes
[0];
2466 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
2467 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
2468 btrfs_mark_buffer_dirty(leaf
);
2469 btrfs_release_path(extent_root
, path
);
2477 static struct btrfs_block_group_cache
*
2478 next_block_group(struct btrfs_root
*root
,
2479 struct btrfs_block_group_cache
*cache
)
2481 struct rb_node
*node
;
2482 spin_lock(&root
->fs_info
->block_group_cache_lock
);
2483 node
= rb_next(&cache
->cache_node
);
2484 btrfs_put_block_group(cache
);
2486 cache
= rb_entry(node
, struct btrfs_block_group_cache
,
2488 atomic_inc(&cache
->count
);
2491 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
2495 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
2496 struct btrfs_root
*root
)
2498 struct btrfs_block_group_cache
*cache
;
2500 struct btrfs_path
*path
;
2503 path
= btrfs_alloc_path();
2509 err
= btrfs_run_delayed_refs(trans
, root
,
2514 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
2518 cache
= next_block_group(root
, cache
);
2528 last
= cache
->key
.objectid
+ cache
->key
.offset
;
2530 err
= write_one_cache_group(trans
, root
, path
, cache
);
2532 btrfs_put_block_group(cache
);
2535 btrfs_free_path(path
);
2539 int btrfs_extent_readonly(struct btrfs_root
*root
, u64 bytenr
)
2541 struct btrfs_block_group_cache
*block_group
;
2544 block_group
= btrfs_lookup_block_group(root
->fs_info
, bytenr
);
2545 if (!block_group
|| block_group
->ro
)
2548 btrfs_put_block_group(block_group
);
2552 static int update_space_info(struct btrfs_fs_info
*info
, u64 flags
,
2553 u64 total_bytes
, u64 bytes_used
,
2554 struct btrfs_space_info
**space_info
)
2556 struct btrfs_space_info
*found
;
2558 found
= __find_space_info(info
, flags
);
2560 spin_lock(&found
->lock
);
2561 found
->total_bytes
+= total_bytes
;
2562 found
->bytes_used
+= bytes_used
;
2564 spin_unlock(&found
->lock
);
2565 *space_info
= found
;
2568 found
= kzalloc(sizeof(*found
), GFP_NOFS
);
2572 INIT_LIST_HEAD(&found
->block_groups
);
2573 init_rwsem(&found
->groups_sem
);
2574 spin_lock_init(&found
->lock
);
2575 found
->flags
= flags
;
2576 found
->total_bytes
= total_bytes
;
2577 found
->bytes_used
= bytes_used
;
2578 found
->bytes_pinned
= 0;
2579 found
->bytes_reserved
= 0;
2580 found
->bytes_readonly
= 0;
2581 found
->bytes_delalloc
= 0;
2583 found
->force_alloc
= 0;
2584 *space_info
= found
;
2585 list_add_rcu(&found
->list
, &info
->space_info
);
2586 atomic_set(&found
->caching_threads
, 0);
2590 static void set_avail_alloc_bits(struct btrfs_fs_info
*fs_info
, u64 flags
)
2592 u64 extra_flags
= flags
& (BTRFS_BLOCK_GROUP_RAID0
|
2593 BTRFS_BLOCK_GROUP_RAID1
|
2594 BTRFS_BLOCK_GROUP_RAID10
|
2595 BTRFS_BLOCK_GROUP_DUP
);
2597 if (flags
& BTRFS_BLOCK_GROUP_DATA
)
2598 fs_info
->avail_data_alloc_bits
|= extra_flags
;
2599 if (flags
& BTRFS_BLOCK_GROUP_METADATA
)
2600 fs_info
->avail_metadata_alloc_bits
|= extra_flags
;
2601 if (flags
& BTRFS_BLOCK_GROUP_SYSTEM
)
2602 fs_info
->avail_system_alloc_bits
|= extra_flags
;
2606 static void set_block_group_readonly(struct btrfs_block_group_cache
*cache
)
2608 spin_lock(&cache
->space_info
->lock
);
2609 spin_lock(&cache
->lock
);
2611 cache
->space_info
->bytes_readonly
+= cache
->key
.offset
-
2612 btrfs_block_group_used(&cache
->item
);
2615 spin_unlock(&cache
->lock
);
2616 spin_unlock(&cache
->space_info
->lock
);
2619 u64
btrfs_reduce_alloc_profile(struct btrfs_root
*root
, u64 flags
)
2621 u64 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
2623 if (num_devices
== 1)
2624 flags
&= ~(BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID0
);
2625 if (num_devices
< 4)
2626 flags
&= ~BTRFS_BLOCK_GROUP_RAID10
;
2628 if ((flags
& BTRFS_BLOCK_GROUP_DUP
) &&
2629 (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
2630 BTRFS_BLOCK_GROUP_RAID10
))) {
2631 flags
&= ~BTRFS_BLOCK_GROUP_DUP
;
2634 if ((flags
& BTRFS_BLOCK_GROUP_RAID1
) &&
2635 (flags
& BTRFS_BLOCK_GROUP_RAID10
)) {
2636 flags
&= ~BTRFS_BLOCK_GROUP_RAID1
;
2639 if ((flags
& BTRFS_BLOCK_GROUP_RAID0
) &&
2640 ((flags
& BTRFS_BLOCK_GROUP_RAID1
) |
2641 (flags
& BTRFS_BLOCK_GROUP_RAID10
) |
2642 (flags
& BTRFS_BLOCK_GROUP_DUP
)))
2643 flags
&= ~BTRFS_BLOCK_GROUP_RAID0
;
2647 static u64
btrfs_get_alloc_profile(struct btrfs_root
*root
, u64 data
)
2649 struct btrfs_fs_info
*info
= root
->fs_info
;
2653 alloc_profile
= info
->avail_data_alloc_bits
&
2654 info
->data_alloc_profile
;
2655 data
= BTRFS_BLOCK_GROUP_DATA
| alloc_profile
;
2656 } else if (root
== root
->fs_info
->chunk_root
) {
2657 alloc_profile
= info
->avail_system_alloc_bits
&
2658 info
->system_alloc_profile
;
2659 data
= BTRFS_BLOCK_GROUP_SYSTEM
| alloc_profile
;
2661 alloc_profile
= info
->avail_metadata_alloc_bits
&
2662 info
->metadata_alloc_profile
;
2663 data
= BTRFS_BLOCK_GROUP_METADATA
| alloc_profile
;
2666 return btrfs_reduce_alloc_profile(root
, data
);
2669 void btrfs_set_inode_space_info(struct btrfs_root
*root
, struct inode
*inode
)
2673 alloc_target
= btrfs_get_alloc_profile(root
, 1);
2674 BTRFS_I(inode
)->space_info
= __find_space_info(root
->fs_info
,
2679 * for now this just makes sure we have at least 5% of our metadata space free
2682 int btrfs_check_metadata_free_space(struct btrfs_root
*root
)
2684 struct btrfs_fs_info
*info
= root
->fs_info
;
2685 struct btrfs_space_info
*meta_sinfo
;
2686 u64 alloc_target
, thresh
;
2687 int committed
= 0, ret
;
2689 /* get the space info for where the metadata will live */
2690 alloc_target
= btrfs_get_alloc_profile(root
, 0);
2691 meta_sinfo
= __find_space_info(info
, alloc_target
);
2694 spin_lock(&meta_sinfo
->lock
);
2695 if (!meta_sinfo
->full
)
2696 thresh
= meta_sinfo
->total_bytes
* 80;
2698 thresh
= meta_sinfo
->total_bytes
* 95;
2700 do_div(thresh
, 100);
2702 if (meta_sinfo
->bytes_used
+ meta_sinfo
->bytes_reserved
+
2703 meta_sinfo
->bytes_pinned
+ meta_sinfo
->bytes_readonly
> thresh
) {
2704 struct btrfs_trans_handle
*trans
;
2705 if (!meta_sinfo
->full
) {
2706 meta_sinfo
->force_alloc
= 1;
2707 spin_unlock(&meta_sinfo
->lock
);
2709 trans
= btrfs_start_transaction(root
, 1);
2713 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2714 2 * 1024 * 1024, alloc_target
, 0);
2715 btrfs_end_transaction(trans
, root
);
2718 spin_unlock(&meta_sinfo
->lock
);
2722 trans
= btrfs_join_transaction(root
, 1);
2725 ret
= btrfs_commit_transaction(trans
, root
);
2732 spin_unlock(&meta_sinfo
->lock
);
2738 * This will check the space that the inode allocates from to make sure we have
2739 * enough space for bytes.
2741 int btrfs_check_data_free_space(struct btrfs_root
*root
, struct inode
*inode
,
2744 struct btrfs_space_info
*data_sinfo
;
2745 int ret
= 0, committed
= 0;
2747 /* make sure bytes are sectorsize aligned */
2748 bytes
= (bytes
+ root
->sectorsize
- 1) & ~((u64
)root
->sectorsize
- 1);
2750 data_sinfo
= BTRFS_I(inode
)->space_info
;
2752 /* make sure we have enough space to handle the data first */
2753 spin_lock(&data_sinfo
->lock
);
2754 if (data_sinfo
->total_bytes
- data_sinfo
->bytes_used
-
2755 data_sinfo
->bytes_delalloc
- data_sinfo
->bytes_reserved
-
2756 data_sinfo
->bytes_pinned
- data_sinfo
->bytes_readonly
-
2757 data_sinfo
->bytes_may_use
< bytes
) {
2758 struct btrfs_trans_handle
*trans
;
2761 * if we don't have enough free bytes in this space then we need
2762 * to alloc a new chunk.
2764 if (!data_sinfo
->full
) {
2767 data_sinfo
->force_alloc
= 1;
2768 spin_unlock(&data_sinfo
->lock
);
2770 alloc_target
= btrfs_get_alloc_profile(root
, 1);
2771 trans
= btrfs_start_transaction(root
, 1);
2775 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2776 bytes
+ 2 * 1024 * 1024,
2778 btrfs_end_transaction(trans
, root
);
2783 spin_unlock(&data_sinfo
->lock
);
2785 /* commit the current transaction and try again */
2788 trans
= btrfs_join_transaction(root
, 1);
2791 ret
= btrfs_commit_transaction(trans
, root
);
2797 printk(KERN_ERR
"no space left, need %llu, %llu delalloc bytes"
2798 ", %llu bytes_used, %llu bytes_reserved, "
2799 "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
2800 "%llu total\n", (unsigned long long)bytes
,
2801 (unsigned long long)data_sinfo
->bytes_delalloc
,
2802 (unsigned long long)data_sinfo
->bytes_used
,
2803 (unsigned long long)data_sinfo
->bytes_reserved
,
2804 (unsigned long long)data_sinfo
->bytes_pinned
,
2805 (unsigned long long)data_sinfo
->bytes_readonly
,
2806 (unsigned long long)data_sinfo
->bytes_may_use
,
2807 (unsigned long long)data_sinfo
->total_bytes
);
2810 data_sinfo
->bytes_may_use
+= bytes
;
2811 BTRFS_I(inode
)->reserved_bytes
+= bytes
;
2812 spin_unlock(&data_sinfo
->lock
);
2814 return btrfs_check_metadata_free_space(root
);
2818 * if there was an error for whatever reason after calling
2819 * btrfs_check_data_free_space, call this so we can cleanup the counters.
2821 void btrfs_free_reserved_data_space(struct btrfs_root
*root
,
2822 struct inode
*inode
, u64 bytes
)
2824 struct btrfs_space_info
*data_sinfo
;
2826 /* make sure bytes are sectorsize aligned */
2827 bytes
= (bytes
+ root
->sectorsize
- 1) & ~((u64
)root
->sectorsize
- 1);
2829 data_sinfo
= BTRFS_I(inode
)->space_info
;
2830 spin_lock(&data_sinfo
->lock
);
2831 data_sinfo
->bytes_may_use
-= bytes
;
2832 BTRFS_I(inode
)->reserved_bytes
-= bytes
;
2833 spin_unlock(&data_sinfo
->lock
);
2836 /* called when we are adding a delalloc extent to the inode's io_tree */
2837 void btrfs_delalloc_reserve_space(struct btrfs_root
*root
, struct inode
*inode
,
2840 struct btrfs_space_info
*data_sinfo
;
2842 /* get the space info for where this inode will be storing its data */
2843 data_sinfo
= BTRFS_I(inode
)->space_info
;
2845 /* make sure we have enough space to handle the data first */
2846 spin_lock(&data_sinfo
->lock
);
2847 data_sinfo
->bytes_delalloc
+= bytes
;
2850 * we are adding a delalloc extent without calling
2851 * btrfs_check_data_free_space first. This happens on a weird
2852 * writepage condition, but shouldn't hurt our accounting
2854 if (unlikely(bytes
> BTRFS_I(inode
)->reserved_bytes
)) {
2855 data_sinfo
->bytes_may_use
-= BTRFS_I(inode
)->reserved_bytes
;
2856 BTRFS_I(inode
)->reserved_bytes
= 0;
2858 data_sinfo
->bytes_may_use
-= bytes
;
2859 BTRFS_I(inode
)->reserved_bytes
-= bytes
;
2862 spin_unlock(&data_sinfo
->lock
);
2865 /* called when we are clearing an delalloc extent from the inode's io_tree */
2866 void btrfs_delalloc_free_space(struct btrfs_root
*root
, struct inode
*inode
,
2869 struct btrfs_space_info
*info
;
2871 info
= BTRFS_I(inode
)->space_info
;
2873 spin_lock(&info
->lock
);
2874 info
->bytes_delalloc
-= bytes
;
2875 spin_unlock(&info
->lock
);
2878 static void force_metadata_allocation(struct btrfs_fs_info
*info
)
2880 struct list_head
*head
= &info
->space_info
;
2881 struct btrfs_space_info
*found
;
2884 list_for_each_entry_rcu(found
, head
, list
) {
2885 if (found
->flags
& BTRFS_BLOCK_GROUP_METADATA
)
2886 found
->force_alloc
= 1;
2891 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
2892 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
2893 u64 flags
, int force
)
2895 struct btrfs_space_info
*space_info
;
2896 struct btrfs_fs_info
*fs_info
= extent_root
->fs_info
;
2900 mutex_lock(&fs_info
->chunk_mutex
);
2902 flags
= btrfs_reduce_alloc_profile(extent_root
, flags
);
2904 space_info
= __find_space_info(extent_root
->fs_info
, flags
);
2906 ret
= update_space_info(extent_root
->fs_info
, flags
,
2910 BUG_ON(!space_info
);
2912 spin_lock(&space_info
->lock
);
2913 if (space_info
->force_alloc
) {
2915 space_info
->force_alloc
= 0;
2917 if (space_info
->full
) {
2918 spin_unlock(&space_info
->lock
);
2922 thresh
= space_info
->total_bytes
- space_info
->bytes_readonly
;
2923 thresh
= div_factor(thresh
, 6);
2925 (space_info
->bytes_used
+ space_info
->bytes_pinned
+
2926 space_info
->bytes_reserved
+ alloc_bytes
) < thresh
) {
2927 spin_unlock(&space_info
->lock
);
2930 spin_unlock(&space_info
->lock
);
2933 * if we're doing a data chunk, go ahead and make sure that
2934 * we keep a reasonable number of metadata chunks allocated in the
2937 if (flags
& BTRFS_BLOCK_GROUP_DATA
) {
2938 fs_info
->data_chunk_allocations
++;
2939 if (!(fs_info
->data_chunk_allocations
%
2940 fs_info
->metadata_ratio
))
2941 force_metadata_allocation(fs_info
);
2944 ret
= btrfs_alloc_chunk(trans
, extent_root
, flags
);
2946 space_info
->full
= 1;
2948 mutex_unlock(&extent_root
->fs_info
->chunk_mutex
);
2952 static int update_block_group(struct btrfs_trans_handle
*trans
,
2953 struct btrfs_root
*root
,
2954 u64 bytenr
, u64 num_bytes
, int alloc
,
2957 struct btrfs_block_group_cache
*cache
;
2958 struct btrfs_fs_info
*info
= root
->fs_info
;
2959 u64 total
= num_bytes
;
2963 /* block accounting for super block */
2964 spin_lock(&info
->delalloc_lock
);
2965 old_val
= btrfs_super_bytes_used(&info
->super_copy
);
2967 old_val
+= num_bytes
;
2969 old_val
-= num_bytes
;
2970 btrfs_set_super_bytes_used(&info
->super_copy
, old_val
);
2972 /* block accounting for root item */
2973 old_val
= btrfs_root_used(&root
->root_item
);
2975 old_val
+= num_bytes
;
2977 old_val
-= num_bytes
;
2978 btrfs_set_root_used(&root
->root_item
, old_val
);
2979 spin_unlock(&info
->delalloc_lock
);
2982 cache
= btrfs_lookup_block_group(info
, bytenr
);
2985 byte_in_group
= bytenr
- cache
->key
.objectid
;
2986 WARN_ON(byte_in_group
> cache
->key
.offset
);
2988 spin_lock(&cache
->space_info
->lock
);
2989 spin_lock(&cache
->lock
);
2991 old_val
= btrfs_block_group_used(&cache
->item
);
2992 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
2994 old_val
+= num_bytes
;
2995 cache
->space_info
->bytes_used
+= num_bytes
;
2997 cache
->space_info
->bytes_readonly
-= num_bytes
;
2998 btrfs_set_block_group_used(&cache
->item
, old_val
);
2999 spin_unlock(&cache
->lock
);
3000 spin_unlock(&cache
->space_info
->lock
);
3002 old_val
-= num_bytes
;
3003 cache
->space_info
->bytes_used
-= num_bytes
;
3005 cache
->space_info
->bytes_readonly
+= num_bytes
;
3006 btrfs_set_block_group_used(&cache
->item
, old_val
);
3007 spin_unlock(&cache
->lock
);
3008 spin_unlock(&cache
->space_info
->lock
);
3012 ret
= btrfs_discard_extent(root
, bytenr
,
3016 ret
= btrfs_add_free_space(cache
, bytenr
,
3021 btrfs_put_block_group(cache
);
3023 bytenr
+= num_bytes
;
3028 static u64
first_logical_byte(struct btrfs_root
*root
, u64 search_start
)
3030 struct btrfs_block_group_cache
*cache
;
3033 cache
= btrfs_lookup_first_block_group(root
->fs_info
, search_start
);
3037 bytenr
= cache
->key
.objectid
;
3038 btrfs_put_block_group(cache
);
3043 int btrfs_update_pinned_extents(struct btrfs_root
*root
,
3044 u64 bytenr
, u64 num
, int pin
)
3047 struct btrfs_block_group_cache
*cache
;
3048 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3051 set_extent_dirty(&fs_info
->pinned_extents
,
3052 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
3055 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
3057 len
= min(num
, cache
->key
.offset
-
3058 (bytenr
- cache
->key
.objectid
));
3060 spin_lock(&cache
->space_info
->lock
);
3061 spin_lock(&cache
->lock
);
3062 cache
->pinned
+= len
;
3063 cache
->space_info
->bytes_pinned
+= len
;
3064 spin_unlock(&cache
->lock
);
3065 spin_unlock(&cache
->space_info
->lock
);
3066 fs_info
->total_pinned
+= len
;
3071 * in order to not race with the block group caching, we
3072 * only want to unpin the extent if we are cached. If
3073 * we aren't cached, we want to start async caching this
3074 * block group so we can free the extent the next time
3077 spin_lock(&cache
->space_info
->lock
);
3078 spin_lock(&cache
->lock
);
3079 unpin
= (cache
->cached
== BTRFS_CACHE_FINISHED
);
3080 if (likely(unpin
)) {
3081 cache
->pinned
-= len
;
3082 cache
->space_info
->bytes_pinned
-= len
;
3083 fs_info
->total_pinned
-= len
;
3085 spin_unlock(&cache
->lock
);
3086 spin_unlock(&cache
->space_info
->lock
);
3089 clear_extent_dirty(&fs_info
->pinned_extents
,
3090 bytenr
, bytenr
+ len
-1,
3093 cache_block_group(cache
);
3096 btrfs_add_free_space(cache
, bytenr
, len
);
3098 btrfs_put_block_group(cache
);
3105 static int update_reserved_extents(struct btrfs_root
*root
,
3106 u64 bytenr
, u64 num
, int reserve
)
3109 struct btrfs_block_group_cache
*cache
;
3110 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3113 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
3115 len
= min(num
, cache
->key
.offset
-
3116 (bytenr
- cache
->key
.objectid
));
3118 spin_lock(&cache
->space_info
->lock
);
3119 spin_lock(&cache
->lock
);
3121 cache
->reserved
+= len
;
3122 cache
->space_info
->bytes_reserved
+= len
;
3124 cache
->reserved
-= len
;
3125 cache
->space_info
->bytes_reserved
-= len
;
3127 spin_unlock(&cache
->lock
);
3128 spin_unlock(&cache
->space_info
->lock
);
3129 btrfs_put_block_group(cache
);
3136 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_io_tree
*copy
)
3141 struct extent_io_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
3145 ret
= find_first_extent_bit(pinned_extents
, last
,
3146 &start
, &end
, EXTENT_DIRTY
);
3150 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
3156 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
3157 struct btrfs_root
*root
,
3158 struct extent_io_tree
*unpin
)
3165 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
3170 ret
= btrfs_discard_extent(root
, start
, end
+ 1 - start
);
3172 /* unlocks the pinned mutex */
3173 btrfs_update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
3174 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
3182 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
3183 struct btrfs_root
*root
,
3184 struct btrfs_path
*path
,
3185 u64 bytenr
, u64 num_bytes
, int is_data
,
3186 struct extent_buffer
**must_clean
)
3189 struct extent_buffer
*buf
;
3194 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
3198 /* we can reuse a block if it hasn't been written
3199 * and it is from this transaction. We can't
3200 * reuse anything from the tree log root because
3201 * it has tiny sub-transactions.
3203 if (btrfs_buffer_uptodate(buf
, 0) &&
3204 btrfs_try_tree_lock(buf
)) {
3205 u64 header_owner
= btrfs_header_owner(buf
);
3206 u64 header_transid
= btrfs_header_generation(buf
);
3207 if (header_owner
!= BTRFS_TREE_LOG_OBJECTID
&&
3208 header_transid
== trans
->transid
&&
3209 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
3213 btrfs_tree_unlock(buf
);
3215 free_extent_buffer(buf
);
3217 btrfs_set_path_blocking(path
);
3218 /* unlocks the pinned mutex */
3219 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1);
3226 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
3227 struct btrfs_root
*root
,
3228 u64 bytenr
, u64 num_bytes
, u64 parent
,
3229 u64 root_objectid
, u64 owner_objectid
,
3230 u64 owner_offset
, int refs_to_drop
,
3231 struct btrfs_delayed_extent_op
*extent_op
)
3233 struct btrfs_key key
;
3234 struct btrfs_path
*path
;
3235 struct btrfs_fs_info
*info
= root
->fs_info
;
3236 struct btrfs_root
*extent_root
= info
->extent_root
;
3237 struct extent_buffer
*leaf
;
3238 struct btrfs_extent_item
*ei
;
3239 struct btrfs_extent_inline_ref
*iref
;
3242 int extent_slot
= 0;
3243 int found_extent
= 0;
3248 path
= btrfs_alloc_path();
3253 path
->leave_spinning
= 1;
3255 is_data
= owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
;
3256 BUG_ON(!is_data
&& refs_to_drop
!= 1);
3258 ret
= lookup_extent_backref(trans
, extent_root
, path
, &iref
,
3259 bytenr
, num_bytes
, parent
,
3260 root_objectid
, owner_objectid
,
3263 extent_slot
= path
->slots
[0];
3264 while (extent_slot
>= 0) {
3265 btrfs_item_key_to_cpu(path
->nodes
[0], &key
,
3267 if (key
.objectid
!= bytenr
)
3269 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
3270 key
.offset
== num_bytes
) {
3274 if (path
->slots
[0] - extent_slot
> 5)
3278 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3279 item_size
= btrfs_item_size_nr(path
->nodes
[0], extent_slot
);
3280 if (found_extent
&& item_size
< sizeof(*ei
))
3283 if (!found_extent
) {
3285 ret
= remove_extent_backref(trans
, extent_root
, path
,
3289 btrfs_release_path(extent_root
, path
);
3290 path
->leave_spinning
= 1;
3292 key
.objectid
= bytenr
;
3293 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
3294 key
.offset
= num_bytes
;
3296 ret
= btrfs_search_slot(trans
, extent_root
,
3299 printk(KERN_ERR
"umm, got %d back from search"
3300 ", was looking for %llu\n", ret
,
3301 (unsigned long long)bytenr
);
3302 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
3305 extent_slot
= path
->slots
[0];
3308 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
3310 printk(KERN_ERR
"btrfs unable to find ref byte nr %llu "
3311 "parent %llu root %llu owner %llu offset %llu\n",
3312 (unsigned long long)bytenr
,
3313 (unsigned long long)parent
,
3314 (unsigned long long)root_objectid
,
3315 (unsigned long long)owner_objectid
,
3316 (unsigned long long)owner_offset
);
3319 leaf
= path
->nodes
[0];
3320 item_size
= btrfs_item_size_nr(leaf
, extent_slot
);
3321 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3322 if (item_size
< sizeof(*ei
)) {
3323 BUG_ON(found_extent
|| extent_slot
!= path
->slots
[0]);
3324 ret
= convert_extent_item_v0(trans
, extent_root
, path
,
3328 btrfs_release_path(extent_root
, path
);
3329 path
->leave_spinning
= 1;
3331 key
.objectid
= bytenr
;
3332 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
3333 key
.offset
= num_bytes
;
3335 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
,
3338 printk(KERN_ERR
"umm, got %d back from search"
3339 ", was looking for %llu\n", ret
,
3340 (unsigned long long)bytenr
);
3341 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
3344 extent_slot
= path
->slots
[0];
3345 leaf
= path
->nodes
[0];
3346 item_size
= btrfs_item_size_nr(leaf
, extent_slot
);
3349 BUG_ON(item_size
< sizeof(*ei
));
3350 ei
= btrfs_item_ptr(leaf
, extent_slot
,
3351 struct btrfs_extent_item
);
3352 if (owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
3353 struct btrfs_tree_block_info
*bi
;
3354 BUG_ON(item_size
< sizeof(*ei
) + sizeof(*bi
));
3355 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
3356 WARN_ON(owner_objectid
!= btrfs_tree_block_level(leaf
, bi
));
3359 refs
= btrfs_extent_refs(leaf
, ei
);
3360 BUG_ON(refs
< refs_to_drop
);
3361 refs
-= refs_to_drop
;
3365 __run_delayed_extent_op(extent_op
, leaf
, ei
);
3367 * In the case of inline back ref, reference count will
3368 * be updated by remove_extent_backref
3371 BUG_ON(!found_extent
);
3373 btrfs_set_extent_refs(leaf
, ei
, refs
);
3374 btrfs_mark_buffer_dirty(leaf
);
3377 ret
= remove_extent_backref(trans
, extent_root
, path
,
3384 struct extent_buffer
*must_clean
= NULL
;
3387 BUG_ON(is_data
&& refs_to_drop
!=
3388 extent_data_ref_count(root
, path
, iref
));
3390 BUG_ON(path
->slots
[0] != extent_slot
);
3392 BUG_ON(path
->slots
[0] != extent_slot
+ 1);
3393 path
->slots
[0] = extent_slot
;
3398 ret
= pin_down_bytes(trans
, root
, path
, bytenr
,
3399 num_bytes
, is_data
, &must_clean
);
3404 * it is going to be very rare for someone to be waiting
3405 * on the block we're freeing. del_items might need to
3406 * schedule, so rather than get fancy, just force it
3410 btrfs_set_lock_blocking(must_clean
);
3412 ret
= btrfs_del_items(trans
, extent_root
, path
, path
->slots
[0],
3415 btrfs_release_path(extent_root
, path
);
3418 clean_tree_block(NULL
, root
, must_clean
);
3419 btrfs_tree_unlock(must_clean
);
3420 free_extent_buffer(must_clean
);
3424 ret
= btrfs_del_csums(trans
, root
, bytenr
, num_bytes
);
3427 invalidate_mapping_pages(info
->btree_inode
->i_mapping
,
3428 bytenr
>> PAGE_CACHE_SHIFT
,
3429 (bytenr
+ num_bytes
- 1) >> PAGE_CACHE_SHIFT
);
3432 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
3436 btrfs_free_path(path
);
3441 * when we free an extent, it is possible (and likely) that we free the last
3442 * delayed ref for that extent as well. This searches the delayed ref tree for
3443 * a given extent, and if there are no other delayed refs to be processed, it
3444 * removes it from the tree.
3446 static noinline
int check_ref_cleanup(struct btrfs_trans_handle
*trans
,
3447 struct btrfs_root
*root
, u64 bytenr
)
3449 struct btrfs_delayed_ref_head
*head
;
3450 struct btrfs_delayed_ref_root
*delayed_refs
;
3451 struct btrfs_delayed_ref_node
*ref
;
3452 struct rb_node
*node
;
3455 delayed_refs
= &trans
->transaction
->delayed_refs
;
3456 spin_lock(&delayed_refs
->lock
);
3457 head
= btrfs_find_delayed_ref_head(trans
, bytenr
);
3461 node
= rb_prev(&head
->node
.rb_node
);
3465 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, rb_node
);
3467 /* there are still entries for this ref, we can't drop it */
3468 if (ref
->bytenr
== bytenr
)
3471 if (head
->extent_op
) {
3472 if (!head
->must_insert_reserved
)
3474 kfree(head
->extent_op
);
3475 head
->extent_op
= NULL
;
3479 * waiting for the lock here would deadlock. If someone else has it
3480 * locked they are already in the process of dropping it anyway
3482 if (!mutex_trylock(&head
->mutex
))
3486 * at this point we have a head with no other entries. Go
3487 * ahead and process it.
3489 head
->node
.in_tree
= 0;
3490 rb_erase(&head
->node
.rb_node
, &delayed_refs
->root
);
3492 delayed_refs
->num_entries
--;
3495 * we don't take a ref on the node because we're removing it from the
3496 * tree, so we just steal the ref the tree was holding.
3498 delayed_refs
->num_heads
--;
3499 if (list_empty(&head
->cluster
))
3500 delayed_refs
->num_heads_ready
--;
3502 list_del_init(&head
->cluster
);
3503 spin_unlock(&delayed_refs
->lock
);
3505 ret
= run_one_delayed_ref(trans
, root
->fs_info
->tree_root
,
3506 &head
->node
, head
->extent_op
,
3507 head
->must_insert_reserved
);
3509 btrfs_put_delayed_ref(&head
->node
);
3512 spin_unlock(&delayed_refs
->lock
);
3516 int btrfs_free_extent(struct btrfs_trans_handle
*trans
,
3517 struct btrfs_root
*root
,
3518 u64 bytenr
, u64 num_bytes
, u64 parent
,
3519 u64 root_objectid
, u64 owner
, u64 offset
)
3524 * tree log blocks never actually go into the extent allocation
3525 * tree, just update pinning info and exit early.
3527 if (root_objectid
== BTRFS_TREE_LOG_OBJECTID
) {
3528 WARN_ON(owner
>= BTRFS_FIRST_FREE_OBJECTID
);
3529 /* unlocks the pinned mutex */
3530 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1);
3531 update_reserved_extents(root
, bytenr
, num_bytes
, 0);
3533 } else if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
3534 ret
= btrfs_add_delayed_tree_ref(trans
, bytenr
, num_bytes
,
3535 parent
, root_objectid
, (int)owner
,
3536 BTRFS_DROP_DELAYED_REF
, NULL
);
3538 ret
= check_ref_cleanup(trans
, root
, bytenr
);
3541 ret
= btrfs_add_delayed_data_ref(trans
, bytenr
, num_bytes
,
3542 parent
, root_objectid
, owner
,
3543 offset
, BTRFS_DROP_DELAYED_REF
, NULL
);
3549 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
3551 u64 mask
= ((u64
)root
->stripesize
- 1);
3552 u64 ret
= (val
+ mask
) & ~mask
;
3557 * when we wait for progress in the block group caching, its because
3558 * our allocation attempt failed at least once. So, we must sleep
3559 * and let some progress happen before we try again.
3561 * This function will sleep at least once waiting for new free space to
3562 * show up, and then it will check the block group free space numbers
3563 * for our min num_bytes. Another option is to have it go ahead
3564 * and look in the rbtree for a free extent of a given size, but this
3568 wait_block_group_cache_progress(struct btrfs_block_group_cache
*cache
,
3573 prepare_to_wait(&cache
->caching_q
, &wait
, TASK_UNINTERRUPTIBLE
);
3575 if (block_group_cache_done(cache
)) {
3576 finish_wait(&cache
->caching_q
, &wait
);
3580 finish_wait(&cache
->caching_q
, &wait
);
3582 wait_event(cache
->caching_q
, block_group_cache_done(cache
) ||
3583 (cache
->free_space
>= num_bytes
));
3587 enum btrfs_loop_type
{
3588 LOOP_CACHED_ONLY
= 0,
3589 LOOP_CACHING_NOWAIT
= 1,
3590 LOOP_CACHING_WAIT
= 2,
3591 LOOP_ALLOC_CHUNK
= 3,
3592 LOOP_NO_EMPTY_SIZE
= 4,
3596 * walks the btree of allocated extents and find a hole of a given size.
3597 * The key ins is changed to record the hole:
3598 * ins->objectid == block start
3599 * ins->flags = BTRFS_EXTENT_ITEM_KEY
3600 * ins->offset == number of blocks
3601 * Any available blocks before search_start are skipped.
3603 static noinline
int find_free_extent(struct btrfs_trans_handle
*trans
,
3604 struct btrfs_root
*orig_root
,
3605 u64 num_bytes
, u64 empty_size
,
3606 u64 search_start
, u64 search_end
,
3607 u64 hint_byte
, struct btrfs_key
*ins
,
3608 u64 exclude_start
, u64 exclude_nr
,
3612 struct btrfs_root
*root
= orig_root
->fs_info
->extent_root
;
3613 struct btrfs_free_cluster
*last_ptr
= NULL
;
3614 struct btrfs_block_group_cache
*block_group
= NULL
;
3615 int empty_cluster
= 2 * 1024 * 1024;
3616 int allowed_chunk_alloc
= 0;
3617 struct btrfs_space_info
*space_info
;
3618 int last_ptr_loop
= 0;
3620 bool found_uncached_bg
= false;
3622 WARN_ON(num_bytes
< root
->sectorsize
);
3623 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
3627 space_info
= __find_space_info(root
->fs_info
, data
);
3629 if (orig_root
->ref_cows
|| empty_size
)
3630 allowed_chunk_alloc
= 1;
3632 if (data
& BTRFS_BLOCK_GROUP_METADATA
) {
3633 last_ptr
= &root
->fs_info
->meta_alloc_cluster
;
3634 if (!btrfs_test_opt(root
, SSD
))
3635 empty_cluster
= 64 * 1024;
3638 if ((data
& BTRFS_BLOCK_GROUP_DATA
) && btrfs_test_opt(root
, SSD
)) {
3639 last_ptr
= &root
->fs_info
->data_alloc_cluster
;
3643 spin_lock(&last_ptr
->lock
);
3644 if (last_ptr
->block_group
)
3645 hint_byte
= last_ptr
->window_start
;
3646 spin_unlock(&last_ptr
->lock
);
3649 search_start
= max(search_start
, first_logical_byte(root
, 0));
3650 search_start
= max(search_start
, hint_byte
);
3655 if (search_start
== hint_byte
) {
3656 block_group
= btrfs_lookup_block_group(root
->fs_info
,
3659 * we don't want to use the block group if it doesn't match our
3660 * allocation bits, or if its not cached.
3662 if (block_group
&& block_group_bits(block_group
, data
) &&
3663 block_group_cache_done(block_group
)) {
3664 down_read(&space_info
->groups_sem
);
3665 if (list_empty(&block_group
->list
) ||
3668 * someone is removing this block group,
3669 * we can't jump into the have_block_group
3670 * target because our list pointers are not
3673 btrfs_put_block_group(block_group
);
3674 up_read(&space_info
->groups_sem
);
3676 goto have_block_group
;
3677 } else if (block_group
) {
3678 btrfs_put_block_group(block_group
);
3683 down_read(&space_info
->groups_sem
);
3684 list_for_each_entry(block_group
, &space_info
->block_groups
, list
) {
3688 atomic_inc(&block_group
->count
);
3689 search_start
= block_group
->key
.objectid
;
3692 if (unlikely(block_group
->cached
== BTRFS_CACHE_NO
)) {
3694 * we want to start caching kthreads, but not too many
3695 * right off the bat so we don't overwhelm the system,
3696 * so only start them if there are less than 2 and we're
3697 * in the initial allocation phase.
3699 if (loop
> LOOP_CACHING_NOWAIT
||
3700 atomic_read(&space_info
->caching_threads
) < 2) {
3701 ret
= cache_block_group(block_group
);
3706 cached
= block_group_cache_done(block_group
);
3707 if (unlikely(!cached
)) {
3708 found_uncached_bg
= true;
3710 /* if we only want cached bgs, loop */
3711 if (loop
== LOOP_CACHED_ONLY
)
3715 if (unlikely(block_group
->ro
))
3720 * the refill lock keeps out other
3721 * people trying to start a new cluster
3723 spin_lock(&last_ptr
->refill_lock
);
3724 if (last_ptr
->block_group
&&
3725 (last_ptr
->block_group
->ro
||
3726 !block_group_bits(last_ptr
->block_group
, data
))) {
3728 goto refill_cluster
;
3731 offset
= btrfs_alloc_from_cluster(block_group
, last_ptr
,
3732 num_bytes
, search_start
);
3734 /* we have a block, we're done */
3735 spin_unlock(&last_ptr
->refill_lock
);
3739 spin_lock(&last_ptr
->lock
);
3741 * whoops, this cluster doesn't actually point to
3742 * this block group. Get a ref on the block
3743 * group is does point to and try again
3745 if (!last_ptr_loop
&& last_ptr
->block_group
&&
3746 last_ptr
->block_group
!= block_group
) {
3748 btrfs_put_block_group(block_group
);
3749 block_group
= last_ptr
->block_group
;
3750 atomic_inc(&block_group
->count
);
3751 spin_unlock(&last_ptr
->lock
);
3752 spin_unlock(&last_ptr
->refill_lock
);
3755 search_start
= block_group
->key
.objectid
;
3757 * we know this block group is properly
3758 * in the list because
3759 * btrfs_remove_block_group, drops the
3760 * cluster before it removes the block
3761 * group from the list
3763 goto have_block_group
;
3765 spin_unlock(&last_ptr
->lock
);
3768 * this cluster didn't work out, free it and
3771 btrfs_return_cluster_to_free_space(NULL
, last_ptr
);
3775 /* allocate a cluster in this block group */
3776 ret
= btrfs_find_space_cluster(trans
, root
,
3777 block_group
, last_ptr
,
3779 empty_cluster
+ empty_size
);
3782 * now pull our allocation out of this
3785 offset
= btrfs_alloc_from_cluster(block_group
,
3786 last_ptr
, num_bytes
,
3789 /* we found one, proceed */
3790 spin_unlock(&last_ptr
->refill_lock
);
3793 } else if (!cached
&& loop
> LOOP_CACHING_NOWAIT
) {
3794 spin_unlock(&last_ptr
->refill_lock
);
3796 wait_block_group_cache_progress(block_group
,
3797 num_bytes
+ empty_cluster
+ empty_size
);
3798 goto have_block_group
;
3802 * at this point we either didn't find a cluster
3803 * or we weren't able to allocate a block from our
3804 * cluster. Free the cluster we've been trying
3805 * to use, and go to the next block group
3807 if (loop
< LOOP_NO_EMPTY_SIZE
) {
3808 btrfs_return_cluster_to_free_space(NULL
,
3810 spin_unlock(&last_ptr
->refill_lock
);
3813 spin_unlock(&last_ptr
->refill_lock
);
3816 offset
= btrfs_find_space_for_alloc(block_group
, search_start
,
3817 num_bytes
, empty_size
);
3818 if (!offset
&& (cached
|| (!cached
&&
3819 loop
== LOOP_CACHING_NOWAIT
))) {
3821 } else if (!offset
&& (!cached
&&
3822 loop
> LOOP_CACHING_NOWAIT
)) {
3823 wait_block_group_cache_progress(block_group
,
3824 num_bytes
+ empty_size
);
3825 goto have_block_group
;
3828 search_start
= stripe_align(root
, offset
);
3829 /* move on to the next group */
3830 if (search_start
+ num_bytes
>= search_end
) {
3831 btrfs_add_free_space(block_group
, offset
, num_bytes
);
3835 /* move on to the next group */
3836 if (search_start
+ num_bytes
>
3837 block_group
->key
.objectid
+ block_group
->key
.offset
) {
3838 btrfs_add_free_space(block_group
, offset
, num_bytes
);
3842 if (exclude_nr
> 0 &&
3843 (search_start
+ num_bytes
> exclude_start
&&
3844 search_start
< exclude_start
+ exclude_nr
)) {
3845 search_start
= exclude_start
+ exclude_nr
;
3847 btrfs_add_free_space(block_group
, offset
, num_bytes
);
3849 * if search_start is still in this block group
3850 * then we just re-search this block group
3852 if (search_start
>= block_group
->key
.objectid
&&
3853 search_start
< (block_group
->key
.objectid
+
3854 block_group
->key
.offset
))
3855 goto have_block_group
;
3859 ins
->objectid
= search_start
;
3860 ins
->offset
= num_bytes
;
3862 if (offset
< search_start
)
3863 btrfs_add_free_space(block_group
, offset
,
3864 search_start
- offset
);
3865 BUG_ON(offset
> search_start
);
3867 /* we are all good, lets return */
3870 btrfs_put_block_group(block_group
);
3872 up_read(&space_info
->groups_sem
);
3874 /* LOOP_CACHED_ONLY, only search fully cached block groups
3875 * LOOP_CACHING_NOWAIT, search partially cached block groups, but
3876 * dont wait foR them to finish caching
3877 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
3878 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
3879 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
3882 if (!ins
->objectid
&& loop
< LOOP_NO_EMPTY_SIZE
&&
3883 (found_uncached_bg
|| empty_size
|| empty_cluster
||
3884 allowed_chunk_alloc
)) {
3885 if (found_uncached_bg
) {
3886 found_uncached_bg
= false;
3887 if (loop
< LOOP_CACHING_WAIT
) {
3893 if (loop
== LOOP_ALLOC_CHUNK
) {
3898 if (allowed_chunk_alloc
) {
3899 ret
= do_chunk_alloc(trans
, root
, num_bytes
+
3900 2 * 1024 * 1024, data
, 1);
3901 allowed_chunk_alloc
= 0;
3903 space_info
->force_alloc
= 1;
3906 if (loop
< LOOP_NO_EMPTY_SIZE
) {
3911 } else if (!ins
->objectid
) {
3915 /* we found what we needed */
3916 if (ins
->objectid
) {
3917 if (!(data
& BTRFS_BLOCK_GROUP_DATA
))
3918 trans
->block_group
= block_group
->key
.objectid
;
3920 btrfs_put_block_group(block_group
);
3927 static void dump_space_info(struct btrfs_space_info
*info
, u64 bytes
)
3929 struct btrfs_block_group_cache
*cache
;
3931 printk(KERN_INFO
"space_info has %llu free, is %sfull\n",
3932 (unsigned long long)(info
->total_bytes
- info
->bytes_used
-
3933 info
->bytes_pinned
- info
->bytes_reserved
),
3934 (info
->full
) ? "" : "not ");
3935 printk(KERN_INFO
"space_info total=%llu, pinned=%llu, delalloc=%llu,"
3936 " may_use=%llu, used=%llu\n",
3937 (unsigned long long)info
->total_bytes
,
3938 (unsigned long long)info
->bytes_pinned
,
3939 (unsigned long long)info
->bytes_delalloc
,
3940 (unsigned long long)info
->bytes_may_use
,
3941 (unsigned long long)info
->bytes_used
);
3943 down_read(&info
->groups_sem
);
3944 list_for_each_entry(cache
, &info
->block_groups
, list
) {
3945 spin_lock(&cache
->lock
);
3946 printk(KERN_INFO
"block group %llu has %llu bytes, %llu used "
3947 "%llu pinned %llu reserved\n",
3948 (unsigned long long)cache
->key
.objectid
,
3949 (unsigned long long)cache
->key
.offset
,
3950 (unsigned long long)btrfs_block_group_used(&cache
->item
),
3951 (unsigned long long)cache
->pinned
,
3952 (unsigned long long)cache
->reserved
);
3953 btrfs_dump_free_space(cache
, bytes
);
3954 spin_unlock(&cache
->lock
);
3956 up_read(&info
->groups_sem
);
3959 static int __btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
3960 struct btrfs_root
*root
,
3961 u64 num_bytes
, u64 min_alloc_size
,
3962 u64 empty_size
, u64 hint_byte
,
3963 u64 search_end
, struct btrfs_key
*ins
,
3967 u64 search_start
= 0;
3968 struct btrfs_fs_info
*info
= root
->fs_info
;
3970 data
= btrfs_get_alloc_profile(root
, data
);
3973 * the only place that sets empty_size is btrfs_realloc_node, which
3974 * is not called recursively on allocations
3976 if (empty_size
|| root
->ref_cows
) {
3977 if (!(data
& BTRFS_BLOCK_GROUP_METADATA
)) {
3978 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3980 BTRFS_BLOCK_GROUP_METADATA
|
3981 (info
->metadata_alloc_profile
&
3982 info
->avail_metadata_alloc_bits
), 0);
3984 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3985 num_bytes
+ 2 * 1024 * 1024, data
, 0);
3988 WARN_ON(num_bytes
< root
->sectorsize
);
3989 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
3990 search_start
, search_end
, hint_byte
, ins
,
3991 trans
->alloc_exclude_start
,
3992 trans
->alloc_exclude_nr
, data
);
3994 if (ret
== -ENOSPC
&& num_bytes
> min_alloc_size
) {
3995 num_bytes
= num_bytes
>> 1;
3996 num_bytes
= num_bytes
& ~(root
->sectorsize
- 1);
3997 num_bytes
= max(num_bytes
, min_alloc_size
);
3998 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3999 num_bytes
, data
, 1);
4002 if (ret
== -ENOSPC
) {
4003 struct btrfs_space_info
*sinfo
;
4005 sinfo
= __find_space_info(root
->fs_info
, data
);
4006 printk(KERN_ERR
"btrfs allocation failed flags %llu, "
4007 "wanted %llu\n", (unsigned long long)data
,
4008 (unsigned long long)num_bytes
);
4009 dump_space_info(sinfo
, num_bytes
);
4015 int btrfs_free_reserved_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
4017 struct btrfs_block_group_cache
*cache
;
4020 cache
= btrfs_lookup_block_group(root
->fs_info
, start
);
4022 printk(KERN_ERR
"Unable to find block group for %llu\n",
4023 (unsigned long long)start
);
4027 ret
= btrfs_discard_extent(root
, start
, len
);
4029 btrfs_add_free_space(cache
, start
, len
);
4030 btrfs_put_block_group(cache
);
4031 update_reserved_extents(root
, start
, len
, 0);
4036 int btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
4037 struct btrfs_root
*root
,
4038 u64 num_bytes
, u64 min_alloc_size
,
4039 u64 empty_size
, u64 hint_byte
,
4040 u64 search_end
, struct btrfs_key
*ins
,
4044 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, min_alloc_size
,
4045 empty_size
, hint_byte
, search_end
, ins
,
4048 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
4053 static int alloc_reserved_file_extent(struct btrfs_trans_handle
*trans
,
4054 struct btrfs_root
*root
,
4055 u64 parent
, u64 root_objectid
,
4056 u64 flags
, u64 owner
, u64 offset
,
4057 struct btrfs_key
*ins
, int ref_mod
)
4060 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
4061 struct btrfs_extent_item
*extent_item
;
4062 struct btrfs_extent_inline_ref
*iref
;
4063 struct btrfs_path
*path
;
4064 struct extent_buffer
*leaf
;
4069 type
= BTRFS_SHARED_DATA_REF_KEY
;
4071 type
= BTRFS_EXTENT_DATA_REF_KEY
;
4073 size
= sizeof(*extent_item
) + btrfs_extent_inline_ref_size(type
);
4075 path
= btrfs_alloc_path();
4078 path
->leave_spinning
= 1;
4079 ret
= btrfs_insert_empty_item(trans
, fs_info
->extent_root
, path
,
4083 leaf
= path
->nodes
[0];
4084 extent_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
4085 struct btrfs_extent_item
);
4086 btrfs_set_extent_refs(leaf
, extent_item
, ref_mod
);
4087 btrfs_set_extent_generation(leaf
, extent_item
, trans
->transid
);
4088 btrfs_set_extent_flags(leaf
, extent_item
,
4089 flags
| BTRFS_EXTENT_FLAG_DATA
);
4091 iref
= (struct btrfs_extent_inline_ref
*)(extent_item
+ 1);
4092 btrfs_set_extent_inline_ref_type(leaf
, iref
, type
);
4094 struct btrfs_shared_data_ref
*ref
;
4095 ref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
4096 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
4097 btrfs_set_shared_data_ref_count(leaf
, ref
, ref_mod
);
4099 struct btrfs_extent_data_ref
*ref
;
4100 ref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
4101 btrfs_set_extent_data_ref_root(leaf
, ref
, root_objectid
);
4102 btrfs_set_extent_data_ref_objectid(leaf
, ref
, owner
);
4103 btrfs_set_extent_data_ref_offset(leaf
, ref
, offset
);
4104 btrfs_set_extent_data_ref_count(leaf
, ref
, ref_mod
);
4107 btrfs_mark_buffer_dirty(path
->nodes
[0]);
4108 btrfs_free_path(path
);
4110 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
,
4113 printk(KERN_ERR
"btrfs update block group failed for %llu "
4114 "%llu\n", (unsigned long long)ins
->objectid
,
4115 (unsigned long long)ins
->offset
);
4121 static int alloc_reserved_tree_block(struct btrfs_trans_handle
*trans
,
4122 struct btrfs_root
*root
,
4123 u64 parent
, u64 root_objectid
,
4124 u64 flags
, struct btrfs_disk_key
*key
,
4125 int level
, struct btrfs_key
*ins
)
4128 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
4129 struct btrfs_extent_item
*extent_item
;
4130 struct btrfs_tree_block_info
*block_info
;
4131 struct btrfs_extent_inline_ref
*iref
;
4132 struct btrfs_path
*path
;
4133 struct extent_buffer
*leaf
;
4134 u32 size
= sizeof(*extent_item
) + sizeof(*block_info
) + sizeof(*iref
);
4136 path
= btrfs_alloc_path();
4139 path
->leave_spinning
= 1;
4140 ret
= btrfs_insert_empty_item(trans
, fs_info
->extent_root
, path
,
4144 leaf
= path
->nodes
[0];
4145 extent_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
4146 struct btrfs_extent_item
);
4147 btrfs_set_extent_refs(leaf
, extent_item
, 1);
4148 btrfs_set_extent_generation(leaf
, extent_item
, trans
->transid
);
4149 btrfs_set_extent_flags(leaf
, extent_item
,
4150 flags
| BTRFS_EXTENT_FLAG_TREE_BLOCK
);
4151 block_info
= (struct btrfs_tree_block_info
*)(extent_item
+ 1);
4153 btrfs_set_tree_block_key(leaf
, block_info
, key
);
4154 btrfs_set_tree_block_level(leaf
, block_info
, level
);
4156 iref
= (struct btrfs_extent_inline_ref
*)(block_info
+ 1);
4158 BUG_ON(!(flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
));
4159 btrfs_set_extent_inline_ref_type(leaf
, iref
,
4160 BTRFS_SHARED_BLOCK_REF_KEY
);
4161 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
4163 btrfs_set_extent_inline_ref_type(leaf
, iref
,
4164 BTRFS_TREE_BLOCK_REF_KEY
);
4165 btrfs_set_extent_inline_ref_offset(leaf
, iref
, root_objectid
);
4168 btrfs_mark_buffer_dirty(leaf
);
4169 btrfs_free_path(path
);
4171 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
,
4174 printk(KERN_ERR
"btrfs update block group failed for %llu "
4175 "%llu\n", (unsigned long long)ins
->objectid
,
4176 (unsigned long long)ins
->offset
);
4182 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle
*trans
,
4183 struct btrfs_root
*root
,
4184 u64 root_objectid
, u64 owner
,
4185 u64 offset
, struct btrfs_key
*ins
)
4189 BUG_ON(root_objectid
== BTRFS_TREE_LOG_OBJECTID
);
4191 ret
= btrfs_add_delayed_data_ref(trans
, ins
->objectid
, ins
->offset
,
4192 0, root_objectid
, owner
, offset
,
4193 BTRFS_ADD_DELAYED_EXTENT
, NULL
);
4198 * this is used by the tree logging recovery code. It records that
4199 * an extent has been allocated and makes sure to clear the free
4200 * space cache bits as well
4202 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle
*trans
,
4203 struct btrfs_root
*root
,
4204 u64 root_objectid
, u64 owner
, u64 offset
,
4205 struct btrfs_key
*ins
)
4208 struct btrfs_block_group_cache
*block_group
;
4210 block_group
= btrfs_lookup_block_group(root
->fs_info
, ins
->objectid
);
4211 cache_block_group(block_group
);
4212 wait_event(block_group
->caching_q
,
4213 block_group_cache_done(block_group
));
4215 ret
= btrfs_remove_free_space(block_group
, ins
->objectid
,
4218 btrfs_put_block_group(block_group
);
4219 ret
= alloc_reserved_file_extent(trans
, root
, 0, root_objectid
,
4220 0, owner
, offset
, ins
, 1);
4225 * finds a free extent and does all the dirty work required for allocation
4226 * returns the key for the extent through ins, and a tree buffer for
4227 * the first block of the extent through buf.
4229 * returns 0 if everything worked, non-zero otherwise.
4231 static int alloc_tree_block(struct btrfs_trans_handle
*trans
,
4232 struct btrfs_root
*root
,
4233 u64 num_bytes
, u64 parent
, u64 root_objectid
,
4234 struct btrfs_disk_key
*key
, int level
,
4235 u64 empty_size
, u64 hint_byte
, u64 search_end
,
4236 struct btrfs_key
*ins
)
4241 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, num_bytes
,
4242 empty_size
, hint_byte
, search_end
,
4247 if (root_objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
4249 parent
= ins
->objectid
;
4250 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
4254 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
4255 if (root_objectid
!= BTRFS_TREE_LOG_OBJECTID
) {
4256 struct btrfs_delayed_extent_op
*extent_op
;
4257 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
4260 memcpy(&extent_op
->key
, key
, sizeof(extent_op
->key
));
4262 memset(&extent_op
->key
, 0, sizeof(extent_op
->key
));
4263 extent_op
->flags_to_set
= flags
;
4264 extent_op
->update_key
= 1;
4265 extent_op
->update_flags
= 1;
4266 extent_op
->is_data
= 0;
4268 ret
= btrfs_add_delayed_tree_ref(trans
, ins
->objectid
,
4269 ins
->offset
, parent
, root_objectid
,
4270 level
, BTRFS_ADD_DELAYED_EXTENT
,
4277 struct extent_buffer
*btrfs_init_new_buffer(struct btrfs_trans_handle
*trans
,
4278 struct btrfs_root
*root
,
4279 u64 bytenr
, u32 blocksize
,
4282 struct extent_buffer
*buf
;
4284 buf
= btrfs_find_create_tree_block(root
, bytenr
, blocksize
);
4286 return ERR_PTR(-ENOMEM
);
4287 btrfs_set_header_generation(buf
, trans
->transid
);
4288 btrfs_set_buffer_lockdep_class(buf
, level
);
4289 btrfs_tree_lock(buf
);
4290 clean_tree_block(trans
, root
, buf
);
4292 btrfs_set_lock_blocking(buf
);
4293 btrfs_set_buffer_uptodate(buf
);
4295 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
4296 set_extent_dirty(&root
->dirty_log_pages
, buf
->start
,
4297 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
4299 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
4300 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
4302 trans
->blocks_used
++;
4303 /* this returns a buffer locked for blocking */
4308 * helper function to allocate a block for a given tree
4309 * returns the tree buffer or NULL.
4311 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
4312 struct btrfs_root
*root
, u32 blocksize
,
4313 u64 parent
, u64 root_objectid
,
4314 struct btrfs_disk_key
*key
, int level
,
4315 u64 hint
, u64 empty_size
)
4317 struct btrfs_key ins
;
4319 struct extent_buffer
*buf
;
4321 ret
= alloc_tree_block(trans
, root
, blocksize
, parent
, root_objectid
,
4322 key
, level
, empty_size
, hint
, (u64
)-1, &ins
);
4325 return ERR_PTR(ret
);
4328 buf
= btrfs_init_new_buffer(trans
, root
, ins
.objectid
,
4334 int btrfs_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
4335 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
4339 struct btrfs_key key
;
4340 struct btrfs_file_extent_item
*fi
;
4345 BUG_ON(!btrfs_is_leaf(leaf
));
4346 nritems
= btrfs_header_nritems(leaf
);
4348 for (i
= 0; i
< nritems
; i
++) {
4350 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4352 /* only extents have references, skip everything else */
4353 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
4356 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4358 /* inline extents live in the btree, they don't have refs */
4359 if (btrfs_file_extent_type(leaf
, fi
) ==
4360 BTRFS_FILE_EXTENT_INLINE
)
4363 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
4365 /* holes don't have refs */
4366 if (disk_bytenr
== 0)
4369 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4370 ret
= btrfs_free_extent(trans
, root
, disk_bytenr
, num_bytes
,
4371 leaf
->start
, 0, key
.objectid
, 0);
4377 static noinline
int cache_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
4378 struct btrfs_root
*root
,
4379 struct btrfs_leaf_ref
*ref
)
4383 struct btrfs_extent_info
*info
;
4384 struct refsort
*sorted
;
4386 if (ref
->nritems
== 0)
4389 sorted
= kmalloc(sizeof(*sorted
) * ref
->nritems
, GFP_NOFS
);
4390 for (i
= 0; i
< ref
->nritems
; i
++) {
4391 sorted
[i
].bytenr
= ref
->extents
[i
].bytenr
;
4394 sort(sorted
, ref
->nritems
, sizeof(struct refsort
), refsort_cmp
, NULL
);
4397 * the items in the ref were sorted when the ref was inserted
4398 * into the ref cache, so this is already in order
4400 for (i
= 0; i
< ref
->nritems
; i
++) {
4401 info
= ref
->extents
+ sorted
[i
].slot
;
4402 ret
= btrfs_free_extent(trans
, root
, info
->bytenr
,
4403 info
->num_bytes
, ref
->bytenr
,
4404 ref
->owner
, ref
->generation
,
4407 atomic_inc(&root
->fs_info
->throttle_gen
);
4408 wake_up(&root
->fs_info
->transaction_throttle
);
4420 static int drop_snap_lookup_refcount(struct btrfs_trans_handle
*trans
,
4421 struct btrfs_root
*root
, u64 start
,
4426 ret
= btrfs_lookup_extent_refs(trans
, root
, start
, len
, refs
);
4429 #if 0 /* some debugging code in case we see problems here */
4430 /* if the refs count is one, it won't get increased again. But
4431 * if the ref count is > 1, someone may be decreasing it at
4432 * the same time we are.
4435 struct extent_buffer
*eb
= NULL
;
4436 eb
= btrfs_find_create_tree_block(root
, start
, len
);
4438 btrfs_tree_lock(eb
);
4440 mutex_lock(&root
->fs_info
->alloc_mutex
);
4441 ret
= lookup_extent_ref(NULL
, root
, start
, len
, refs
);
4443 mutex_unlock(&root
->fs_info
->alloc_mutex
);
4446 btrfs_tree_unlock(eb
);
4447 free_extent_buffer(eb
);
4450 printk(KERN_ERR
"btrfs block %llu went down to one "
4451 "during drop_snap\n", (unsigned long long)start
);
4463 * this is used while deleting old snapshots, and it drops the refs
4464 * on a whole subtree starting from a level 1 node.
4466 * The idea is to sort all the leaf pointers, and then drop the
4467 * ref on all the leaves in order. Most of the time the leaves
4468 * will have ref cache entries, so no leaf IOs will be required to
4469 * find the extents they have references on.
4471 * For each leaf, any references it has are also dropped in order
4473 * This ends up dropping the references in something close to optimal
4474 * order for reading and modifying the extent allocation tree.
4476 static noinline
int drop_level_one_refs(struct btrfs_trans_handle
*trans
,
4477 struct btrfs_root
*root
,
4478 struct btrfs_path
*path
)
4483 struct extent_buffer
*eb
= path
->nodes
[1];
4484 struct extent_buffer
*leaf
;
4485 struct btrfs_leaf_ref
*ref
;
4486 struct refsort
*sorted
= NULL
;
4487 int nritems
= btrfs_header_nritems(eb
);
4491 int slot
= path
->slots
[1];
4492 u32 blocksize
= btrfs_level_size(root
, 0);
4498 root_owner
= btrfs_header_owner(eb
);
4499 root_gen
= btrfs_header_generation(eb
);
4500 sorted
= kmalloc(sizeof(*sorted
) * nritems
, GFP_NOFS
);
4503 * step one, sort all the leaf pointers so we don't scribble
4504 * randomly into the extent allocation tree
4506 for (i
= slot
; i
< nritems
; i
++) {
4507 sorted
[refi
].bytenr
= btrfs_node_blockptr(eb
, i
);
4508 sorted
[refi
].slot
= i
;
4513 * nritems won't be zero, but if we're picking up drop_snapshot
4514 * after a crash, slot might be > 0, so double check things
4520 sort(sorted
, refi
, sizeof(struct refsort
), refsort_cmp
, NULL
);
4523 * the first loop frees everything the leaves point to
4525 for (i
= 0; i
< refi
; i
++) {
4528 bytenr
= sorted
[i
].bytenr
;
4531 * check the reference count on this leaf. If it is > 1
4532 * we just decrement it below and don't update any
4533 * of the refs the leaf points to.
4535 ret
= drop_snap_lookup_refcount(trans
, root
, bytenr
,
4541 ptr_gen
= btrfs_node_ptr_generation(eb
, sorted
[i
].slot
);
4544 * the leaf only had one reference, which means the
4545 * only thing pointing to this leaf is the snapshot
4546 * we're deleting. It isn't possible for the reference
4547 * count to increase again later
4549 * The reference cache is checked for the leaf,
4550 * and if found we'll be able to drop any refs held by
4551 * the leaf without needing to read it in.
4553 ref
= btrfs_lookup_leaf_ref(root
, bytenr
);
4554 if (ref
&& ref
->generation
!= ptr_gen
) {
4555 btrfs_free_leaf_ref(root
, ref
);
4559 ret
= cache_drop_leaf_ref(trans
, root
, ref
);
4561 btrfs_remove_leaf_ref(root
, ref
);
4562 btrfs_free_leaf_ref(root
, ref
);
4565 * the leaf wasn't in the reference cache, so
4566 * we have to read it.
4568 leaf
= read_tree_block(root
, bytenr
, blocksize
,
4570 ret
= btrfs_drop_leaf_ref(trans
, root
, leaf
);
4572 free_extent_buffer(leaf
);
4574 atomic_inc(&root
->fs_info
->throttle_gen
);
4575 wake_up(&root
->fs_info
->transaction_throttle
);
4580 * run through the loop again to free the refs on the leaves.
4581 * This is faster than doing it in the loop above because
4582 * the leaves are likely to be clustered together. We end up
4583 * working in nice chunks on the extent allocation tree.
4585 for (i
= 0; i
< refi
; i
++) {
4586 bytenr
= sorted
[i
].bytenr
;
4587 ret
= btrfs_free_extent(trans
, root
, bytenr
,
4588 blocksize
, eb
->start
,
4589 root_owner
, root_gen
, 0, 1);
4592 atomic_inc(&root
->fs_info
->throttle_gen
);
4593 wake_up(&root
->fs_info
->transaction_throttle
);
4600 * update the path to show we've processed the entire level 1
4601 * node. This will get saved into the root's drop_snapshot_progress
4602 * field so these drops are not repeated again if this transaction
4605 path
->slots
[1] = nritems
;
4610 * helper function for drop_snapshot, this walks down the tree dropping ref
4611 * counts as it goes.
4613 static noinline
int walk_down_tree(struct btrfs_trans_handle
*trans
,
4614 struct btrfs_root
*root
,
4615 struct btrfs_path
*path
, int *level
)
4621 struct extent_buffer
*next
;
4622 struct extent_buffer
*cur
;
4623 struct extent_buffer
*parent
;
4628 WARN_ON(*level
< 0);
4629 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
4630 ret
= drop_snap_lookup_refcount(trans
, root
, path
->nodes
[*level
]->start
,
4631 path
->nodes
[*level
]->len
, &refs
);
4637 * walk down to the last node level and free all the leaves
4639 while (*level
>= 0) {
4640 WARN_ON(*level
< 0);
4641 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
4642 cur
= path
->nodes
[*level
];
4644 if (btrfs_header_level(cur
) != *level
)
4647 if (path
->slots
[*level
] >=
4648 btrfs_header_nritems(cur
))
4651 /* the new code goes down to level 1 and does all the
4652 * leaves pointed to that node in bulk. So, this check
4653 * for level 0 will always be false.
4655 * But, the disk format allows the drop_snapshot_progress
4656 * field in the root to leave things in a state where
4657 * a leaf will need cleaning up here. If someone crashes
4658 * with the old code and then boots with the new code,
4659 * we might find a leaf here.
4662 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
4668 * once we get to level one, process the whole node
4669 * at once, including everything below it.
4672 ret
= drop_level_one_refs(trans
, root
, path
);
4677 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
4678 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
4679 blocksize
= btrfs_level_size(root
, *level
- 1);
4681 ret
= drop_snap_lookup_refcount(trans
, root
, bytenr
,
4686 * if there is more than one reference, we don't need
4687 * to read that node to drop any references it has. We
4688 * just drop the ref we hold on that node and move on to the
4689 * next slot in this level.
4692 parent
= path
->nodes
[*level
];
4693 root_owner
= btrfs_header_owner(parent
);
4694 root_gen
= btrfs_header_generation(parent
);
4695 path
->slots
[*level
]++;
4697 ret
= btrfs_free_extent(trans
, root
, bytenr
,
4698 blocksize
, parent
->start
,
4699 root_owner
, root_gen
,
4703 atomic_inc(&root
->fs_info
->throttle_gen
);
4704 wake_up(&root
->fs_info
->transaction_throttle
);
4711 * we need to keep freeing things in the next level down.
4712 * read the block and loop around to process it
4714 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
4715 WARN_ON(*level
<= 0);
4716 if (path
->nodes
[*level
-1])
4717 free_extent_buffer(path
->nodes
[*level
-1]);
4718 path
->nodes
[*level
-1] = next
;
4719 *level
= btrfs_header_level(next
);
4720 path
->slots
[*level
] = 0;
4724 WARN_ON(*level
< 0);
4725 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
4727 if (path
->nodes
[*level
] == root
->node
) {
4728 parent
= path
->nodes
[*level
];
4729 bytenr
= path
->nodes
[*level
]->start
;
4731 parent
= path
->nodes
[*level
+ 1];
4732 bytenr
= btrfs_node_blockptr(parent
, path
->slots
[*level
+ 1]);
4735 blocksize
= btrfs_level_size(root
, *level
);
4736 root_owner
= btrfs_header_owner(parent
);
4737 root_gen
= btrfs_header_generation(parent
);
4740 * cleanup and free the reference on the last node
4743 ret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
4744 parent
->start
, root_owner
, root_gen
,
4746 free_extent_buffer(path
->nodes
[*level
]);
4747 path
->nodes
[*level
] = NULL
;
4757 struct walk_control
{
4758 u64 refs
[BTRFS_MAX_LEVEL
];
4759 u64 flags
[BTRFS_MAX_LEVEL
];
4760 struct btrfs_key update_progress
;
4768 #define DROP_REFERENCE 1
4769 #define UPDATE_BACKREF 2
4772 * hepler to process tree block while walking down the tree.
4774 * when wc->stage == DROP_REFERENCE, this function checks
4775 * reference count of the block. if the block is shared and
4776 * we need update back refs for the subtree rooted at the
4777 * block, this function changes wc->stage to UPDATE_BACKREF
4779 * when wc->stage == UPDATE_BACKREF, this function updates
4780 * back refs for pointers in the block.
4782 * NOTE: return value 1 means we should stop walking down.
4784 static noinline
int walk_down_proc(struct btrfs_trans_handle
*trans
,
4785 struct btrfs_root
*root
,
4786 struct btrfs_path
*path
,
4787 struct walk_control
*wc
)
4789 int level
= wc
->level
;
4790 struct extent_buffer
*eb
= path
->nodes
[level
];
4791 struct btrfs_key key
;
4792 u64 flag
= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
4795 if (wc
->stage
== UPDATE_BACKREF
&&
4796 btrfs_header_owner(eb
) != root
->root_key
.objectid
)
4800 * when reference count of tree block is 1, it won't increase
4801 * again. once full backref flag is set, we never clear it.
4803 if ((wc
->stage
== DROP_REFERENCE
&& wc
->refs
[level
] != 1) ||
4804 (wc
->stage
== UPDATE_BACKREF
&& !(wc
->flags
[level
] & flag
))) {
4805 BUG_ON(!path
->locks
[level
]);
4806 ret
= btrfs_lookup_extent_info(trans
, root
,
4811 BUG_ON(wc
->refs
[level
] == 0);
4814 if (wc
->stage
== DROP_REFERENCE
&&
4815 wc
->update_ref
&& wc
->refs
[level
] > 1) {
4816 BUG_ON(eb
== root
->node
);
4817 BUG_ON(path
->slots
[level
] > 0);
4819 btrfs_item_key_to_cpu(eb
, &key
, path
->slots
[level
]);
4821 btrfs_node_key_to_cpu(eb
, &key
, path
->slots
[level
]);
4822 if (btrfs_header_owner(eb
) == root
->root_key
.objectid
&&
4823 btrfs_comp_cpu_keys(&key
, &wc
->update_progress
) >= 0) {
4824 wc
->stage
= UPDATE_BACKREF
;
4825 wc
->shared_level
= level
;
4829 if (wc
->stage
== DROP_REFERENCE
) {
4830 if (wc
->refs
[level
] > 1)
4833 if (path
->locks
[level
] && !wc
->keep_locks
) {
4834 btrfs_tree_unlock(eb
);
4835 path
->locks
[level
] = 0;
4840 /* wc->stage == UPDATE_BACKREF */
4841 if (!(wc
->flags
[level
] & flag
)) {
4842 BUG_ON(!path
->locks
[level
]);
4843 ret
= btrfs_inc_ref(trans
, root
, eb
, 1);
4845 ret
= btrfs_dec_ref(trans
, root
, eb
, 0);
4847 ret
= btrfs_set_disk_extent_flags(trans
, root
, eb
->start
,
4850 wc
->flags
[level
] |= flag
;
4854 * the block is shared by multiple trees, so it's not good to
4855 * keep the tree lock
4857 if (path
->locks
[level
] && level
> 0) {
4858 btrfs_tree_unlock(eb
);
4859 path
->locks
[level
] = 0;
4865 * hepler to process tree block while walking up the tree.
4867 * when wc->stage == DROP_REFERENCE, this function drops
4868 * reference count on the block.
4870 * when wc->stage == UPDATE_BACKREF, this function changes
4871 * wc->stage back to DROP_REFERENCE if we changed wc->stage
4872 * to UPDATE_BACKREF previously while processing the block.
4874 * NOTE: return value 1 means we should stop walking up.
4876 static noinline
int walk_up_proc(struct btrfs_trans_handle
*trans
,
4877 struct btrfs_root
*root
,
4878 struct btrfs_path
*path
,
4879 struct walk_control
*wc
)
4882 int level
= wc
->level
;
4883 struct extent_buffer
*eb
= path
->nodes
[level
];
4886 if (wc
->stage
== UPDATE_BACKREF
) {
4887 BUG_ON(wc
->shared_level
< level
);
4888 if (level
< wc
->shared_level
)
4891 BUG_ON(wc
->refs
[level
] <= 1);
4892 ret
= find_next_key(path
, level
+ 1, &wc
->update_progress
);
4896 wc
->stage
= DROP_REFERENCE
;
4897 wc
->shared_level
= -1;
4898 path
->slots
[level
] = 0;
4901 * check reference count again if the block isn't locked.
4902 * we should start walking down the tree again if reference
4905 if (!path
->locks
[level
]) {
4907 btrfs_tree_lock(eb
);
4908 btrfs_set_lock_blocking(eb
);
4909 path
->locks
[level
] = 1;
4911 ret
= btrfs_lookup_extent_info(trans
, root
,
4916 BUG_ON(wc
->refs
[level
] == 0);
4917 if (wc
->refs
[level
] == 1) {
4918 btrfs_tree_unlock(eb
);
4919 path
->locks
[level
] = 0;
4927 /* wc->stage == DROP_REFERENCE */
4928 BUG_ON(wc
->refs
[level
] > 1 && !path
->locks
[level
]);
4930 if (wc
->refs
[level
] == 1) {
4932 if (wc
->flags
[level
] & BTRFS_BLOCK_FLAG_FULL_BACKREF
)
4933 ret
= btrfs_dec_ref(trans
, root
, eb
, 1);
4935 ret
= btrfs_dec_ref(trans
, root
, eb
, 0);
4938 /* make block locked assertion in clean_tree_block happy */
4939 if (!path
->locks
[level
] &&
4940 btrfs_header_generation(eb
) == trans
->transid
) {
4941 btrfs_tree_lock(eb
);
4942 btrfs_set_lock_blocking(eb
);
4943 path
->locks
[level
] = 1;
4945 clean_tree_block(trans
, root
, eb
);
4948 if (eb
== root
->node
) {
4949 if (wc
->flags
[level
] & BTRFS_BLOCK_FLAG_FULL_BACKREF
)
4952 BUG_ON(root
->root_key
.objectid
!=
4953 btrfs_header_owner(eb
));
4955 if (wc
->flags
[level
+ 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF
)
4956 parent
= path
->nodes
[level
+ 1]->start
;
4958 BUG_ON(root
->root_key
.objectid
!=
4959 btrfs_header_owner(path
->nodes
[level
+ 1]));
4962 ret
= btrfs_free_extent(trans
, root
, eb
->start
, eb
->len
, parent
,
4963 root
->root_key
.objectid
, level
, 0);
4966 wc
->refs
[level
] = 0;
4967 wc
->flags
[level
] = 0;
4971 static noinline
int walk_down_tree(struct btrfs_trans_handle
*trans
,
4972 struct btrfs_root
*root
,
4973 struct btrfs_path
*path
,
4974 struct walk_control
*wc
)
4976 struct extent_buffer
*next
;
4977 struct extent_buffer
*cur
;
4981 int level
= wc
->level
;
4984 while (level
>= 0) {
4985 cur
= path
->nodes
[level
];
4986 BUG_ON(path
->slots
[level
] >= btrfs_header_nritems(cur
));
4988 ret
= walk_down_proc(trans
, root
, path
, wc
);
4995 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[level
]);
4996 blocksize
= btrfs_level_size(root
, level
- 1);
4997 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[level
]);
4999 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
5000 btrfs_tree_lock(next
);
5001 btrfs_set_lock_blocking(next
);
5004 BUG_ON(level
!= btrfs_header_level(next
));
5005 path
->nodes
[level
] = next
;
5006 path
->slots
[level
] = 0;
5007 path
->locks
[level
] = 1;
5013 static noinline
int walk_up_tree(struct btrfs_trans_handle
*trans
,
5014 struct btrfs_root
*root
,
5015 struct btrfs_path
*path
,
5016 struct walk_control
*wc
, int max_level
)
5018 int level
= wc
->level
;
5021 path
->slots
[level
] = btrfs_header_nritems(path
->nodes
[level
]);
5022 while (level
< max_level
&& path
->nodes
[level
]) {
5024 if (path
->slots
[level
] + 1 <
5025 btrfs_header_nritems(path
->nodes
[level
])) {
5026 path
->slots
[level
]++;
5029 ret
= walk_up_proc(trans
, root
, path
, wc
);
5033 if (path
->locks
[level
]) {
5034 btrfs_tree_unlock(path
->nodes
[level
]);
5035 path
->locks
[level
] = 0;
5037 free_extent_buffer(path
->nodes
[level
]);
5038 path
->nodes
[level
] = NULL
;
5046 * drop a subvolume tree.
5048 * this function traverses the tree freeing any blocks that only
5049 * referenced by the tree.
5051 * when a shared tree block is found. this function decreases its
5052 * reference count by one. if update_ref is true, this function
5053 * also make sure backrefs for the shared block and all lower level
5054 * blocks are properly updated.
5056 int btrfs_drop_snapshot(struct btrfs_root
*root
, int update_ref
)
5058 struct btrfs_path
*path
;
5059 struct btrfs_trans_handle
*trans
;
5060 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
5061 struct btrfs_root_item
*root_item
= &root
->root_item
;
5062 struct walk_control
*wc
;
5063 struct btrfs_key key
;
5068 path
= btrfs_alloc_path();
5071 wc
= kzalloc(sizeof(*wc
), GFP_NOFS
);
5074 trans
= btrfs_start_transaction(tree_root
, 1);
5076 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
5077 level
= btrfs_header_level(root
->node
);
5078 path
->nodes
[level
] = btrfs_lock_root_node(root
);
5079 btrfs_set_lock_blocking(path
->nodes
[level
]);
5080 path
->slots
[level
] = 0;
5081 path
->locks
[level
] = 1;
5082 memset(&wc
->update_progress
, 0,
5083 sizeof(wc
->update_progress
));
5085 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
5086 memcpy(&wc
->update_progress
, &key
,
5087 sizeof(wc
->update_progress
));
5089 level
= root_item
->drop_level
;
5091 path
->lowest_level
= level
;
5092 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5093 path
->lowest_level
= 0;
5098 btrfs_node_key_to_cpu(path
->nodes
[level
], &key
,
5099 path
->slots
[level
]);
5100 WARN_ON(memcmp(&key
, &wc
->update_progress
, sizeof(key
)));
5103 * unlock our path, this is safe because only this
5104 * function is allowed to delete this snapshot
5106 btrfs_unlock_up_safe(path
, 0);
5108 level
= btrfs_header_level(root
->node
);
5110 btrfs_tree_lock(path
->nodes
[level
]);
5111 btrfs_set_lock_blocking(path
->nodes
[level
]);
5113 ret
= btrfs_lookup_extent_info(trans
, root
,
5114 path
->nodes
[level
]->start
,
5115 path
->nodes
[level
]->len
,
5119 BUG_ON(wc
->refs
[level
] == 0);
5121 if (level
== root_item
->drop_level
)
5124 btrfs_tree_unlock(path
->nodes
[level
]);
5125 WARN_ON(wc
->refs
[level
] != 1);
5131 wc
->shared_level
= -1;
5132 wc
->stage
= DROP_REFERENCE
;
5133 wc
->update_ref
= update_ref
;
5137 ret
= walk_down_tree(trans
, root
, path
, wc
);
5143 ret
= walk_up_tree(trans
, root
, path
, wc
, BTRFS_MAX_LEVEL
);
5150 BUG_ON(wc
->stage
!= DROP_REFERENCE
);
5154 if (wc
->stage
== DROP_REFERENCE
) {
5156 btrfs_node_key(path
->nodes
[level
],
5157 &root_item
->drop_progress
,
5158 path
->slots
[level
]);
5159 root_item
->drop_level
= level
;
5162 BUG_ON(wc
->level
== 0);
5163 if (trans
->transaction
->in_commit
||
5164 trans
->transaction
->delayed_refs
.flushing
) {
5165 ret
= btrfs_update_root(trans
, tree_root
,
5170 btrfs_end_transaction(trans
, tree_root
);
5171 trans
= btrfs_start_transaction(tree_root
, 1);
5173 unsigned long update
;
5174 update
= trans
->delayed_ref_updates
;
5175 trans
->delayed_ref_updates
= 0;
5177 btrfs_run_delayed_refs(trans
, tree_root
,
5181 btrfs_release_path(root
, path
);
5184 ret
= btrfs_del_root(trans
, tree_root
, &root
->root_key
);
5187 free_extent_buffer(root
->node
);
5188 free_extent_buffer(root
->commit_root
);
5191 btrfs_end_transaction(trans
, tree_root
);
5193 btrfs_free_path(path
);
5198 * drop subtree rooted at tree block 'node'.
5200 * NOTE: this function will unlock and release tree block 'node'
5202 int btrfs_drop_subtree(struct btrfs_trans_handle
*trans
,
5203 struct btrfs_root
*root
,
5204 struct extent_buffer
*node
,
5205 struct extent_buffer
*parent
)
5207 struct btrfs_path
*path
;
5208 struct walk_control
*wc
;
5214 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
5216 path
= btrfs_alloc_path();
5219 wc
= kzalloc(sizeof(*wc
), GFP_NOFS
);
5222 btrfs_assert_tree_locked(parent
);
5223 parent_level
= btrfs_header_level(parent
);
5224 extent_buffer_get(parent
);
5225 path
->nodes
[parent_level
] = parent
;
5226 path
->slots
[parent_level
] = btrfs_header_nritems(parent
);
5228 btrfs_assert_tree_locked(node
);
5229 level
= btrfs_header_level(node
);
5230 path
->nodes
[level
] = node
;
5231 path
->slots
[level
] = 0;
5232 path
->locks
[level
] = 1;
5234 wc
->refs
[parent_level
] = 1;
5235 wc
->flags
[parent_level
] = BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5237 wc
->shared_level
= -1;
5238 wc
->stage
= DROP_REFERENCE
;
5243 wret
= walk_down_tree(trans
, root
, path
, wc
);
5249 wret
= walk_up_tree(trans
, root
, path
, wc
, parent_level
);
5257 btrfs_free_path(path
);
5262 static unsigned long calc_ra(unsigned long start
, unsigned long last
,
5265 return min(last
, start
+ nr
- 1);
5268 static noinline
int relocate_inode_pages(struct inode
*inode
, u64 start
,
5273 unsigned long first_index
;
5274 unsigned long last_index
;
5277 struct extent_io_tree
*io_tree
= &BTRFS_I(inode
)->io_tree
;
5278 struct file_ra_state
*ra
;
5279 struct btrfs_ordered_extent
*ordered
;
5280 unsigned int total_read
= 0;
5281 unsigned int total_dirty
= 0;
5284 ra
= kzalloc(sizeof(*ra
), GFP_NOFS
);
5286 mutex_lock(&inode
->i_mutex
);
5287 first_index
= start
>> PAGE_CACHE_SHIFT
;
5288 last_index
= (start
+ len
- 1) >> PAGE_CACHE_SHIFT
;
5290 /* make sure the dirty trick played by the caller work */
5291 ret
= invalidate_inode_pages2_range(inode
->i_mapping
,
5292 first_index
, last_index
);
5296 file_ra_state_init(ra
, inode
->i_mapping
);
5298 for (i
= first_index
; i
<= last_index
; i
++) {
5299 if (total_read
% ra
->ra_pages
== 0) {
5300 btrfs_force_ra(inode
->i_mapping
, ra
, NULL
, i
,
5301 calc_ra(i
, last_index
, ra
->ra_pages
));
5305 if (((u64
)i
<< PAGE_CACHE_SHIFT
) > i_size_read(inode
))
5307 page
= grab_cache_page(inode
->i_mapping
, i
);
5312 if (!PageUptodate(page
)) {
5313 btrfs_readpage(NULL
, page
);
5315 if (!PageUptodate(page
)) {
5317 page_cache_release(page
);
5322 wait_on_page_writeback(page
);
5324 page_start
= (u64
)page
->index
<< PAGE_CACHE_SHIFT
;
5325 page_end
= page_start
+ PAGE_CACHE_SIZE
- 1;
5326 lock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
5328 ordered
= btrfs_lookup_ordered_extent(inode
, page_start
);
5330 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
5332 page_cache_release(page
);
5333 btrfs_start_ordered_extent(inode
, ordered
, 1);
5334 btrfs_put_ordered_extent(ordered
);
5337 set_page_extent_mapped(page
);
5339 if (i
== first_index
)
5340 set_extent_bits(io_tree
, page_start
, page_end
,
5341 EXTENT_BOUNDARY
, GFP_NOFS
);
5342 btrfs_set_extent_delalloc(inode
, page_start
, page_end
);
5344 set_page_dirty(page
);
5347 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
5349 page_cache_release(page
);
5354 mutex_unlock(&inode
->i_mutex
);
5355 balance_dirty_pages_ratelimited_nr(inode
->i_mapping
, total_dirty
);
5359 static noinline
int relocate_data_extent(struct inode
*reloc_inode
,
5360 struct btrfs_key
*extent_key
,
5363 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
5364 struct extent_map_tree
*em_tree
= &BTRFS_I(reloc_inode
)->extent_tree
;
5365 struct extent_map
*em
;
5366 u64 start
= extent_key
->objectid
- offset
;
5367 u64 end
= start
+ extent_key
->offset
- 1;
5369 em
= alloc_extent_map(GFP_NOFS
);
5370 BUG_ON(!em
|| IS_ERR(em
));
5373 em
->len
= extent_key
->offset
;
5374 em
->block_len
= extent_key
->offset
;
5375 em
->block_start
= extent_key
->objectid
;
5376 em
->bdev
= root
->fs_info
->fs_devices
->latest_bdev
;
5377 set_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
5379 /* setup extent map to cheat btrfs_readpage */
5380 lock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
5383 spin_lock(&em_tree
->lock
);
5384 ret
= add_extent_mapping(em_tree
, em
);
5385 spin_unlock(&em_tree
->lock
);
5386 if (ret
!= -EEXIST
) {
5387 free_extent_map(em
);
5390 btrfs_drop_extent_cache(reloc_inode
, start
, end
, 0);
5392 unlock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
5394 return relocate_inode_pages(reloc_inode
, start
, extent_key
->offset
);
5397 struct btrfs_ref_path
{
5399 u64 nodes
[BTRFS_MAX_LEVEL
];
5401 u64 root_generation
;
5408 struct btrfs_key node_keys
[BTRFS_MAX_LEVEL
];
5409 u64 new_nodes
[BTRFS_MAX_LEVEL
];
5412 struct disk_extent
{
5423 static int is_cowonly_root(u64 root_objectid
)
5425 if (root_objectid
== BTRFS_ROOT_TREE_OBJECTID
||
5426 root_objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
5427 root_objectid
== BTRFS_CHUNK_TREE_OBJECTID
||
5428 root_objectid
== BTRFS_DEV_TREE_OBJECTID
||
5429 root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
5430 root_objectid
== BTRFS_CSUM_TREE_OBJECTID
)
5435 static noinline
int __next_ref_path(struct btrfs_trans_handle
*trans
,
5436 struct btrfs_root
*extent_root
,
5437 struct btrfs_ref_path
*ref_path
,
5440 struct extent_buffer
*leaf
;
5441 struct btrfs_path
*path
;
5442 struct btrfs_extent_ref
*ref
;
5443 struct btrfs_key key
;
5444 struct btrfs_key found_key
;
5450 path
= btrfs_alloc_path();
5455 ref_path
->lowest_level
= -1;
5456 ref_path
->current_level
= -1;
5457 ref_path
->shared_level
= -1;
5461 level
= ref_path
->current_level
- 1;
5462 while (level
>= -1) {
5464 if (level
< ref_path
->lowest_level
)
5468 bytenr
= ref_path
->nodes
[level
];
5470 bytenr
= ref_path
->extent_start
;
5471 BUG_ON(bytenr
== 0);
5473 parent
= ref_path
->nodes
[level
+ 1];
5474 ref_path
->nodes
[level
+ 1] = 0;
5475 ref_path
->current_level
= level
;
5476 BUG_ON(parent
== 0);
5478 key
.objectid
= bytenr
;
5479 key
.offset
= parent
+ 1;
5480 key
.type
= BTRFS_EXTENT_REF_KEY
;
5482 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
5487 leaf
= path
->nodes
[0];
5488 nritems
= btrfs_header_nritems(leaf
);
5489 if (path
->slots
[0] >= nritems
) {
5490 ret
= btrfs_next_leaf(extent_root
, path
);
5495 leaf
= path
->nodes
[0];
5498 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5499 if (found_key
.objectid
== bytenr
&&
5500 found_key
.type
== BTRFS_EXTENT_REF_KEY
) {
5501 if (level
< ref_path
->shared_level
)
5502 ref_path
->shared_level
= level
;
5507 btrfs_release_path(extent_root
, path
);
5510 /* reached lowest level */
5514 level
= ref_path
->current_level
;
5515 while (level
< BTRFS_MAX_LEVEL
- 1) {
5519 bytenr
= ref_path
->nodes
[level
];
5521 bytenr
= ref_path
->extent_start
;
5523 BUG_ON(bytenr
== 0);
5525 key
.objectid
= bytenr
;
5527 key
.type
= BTRFS_EXTENT_REF_KEY
;
5529 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
5533 leaf
= path
->nodes
[0];
5534 nritems
= btrfs_header_nritems(leaf
);
5535 if (path
->slots
[0] >= nritems
) {
5536 ret
= btrfs_next_leaf(extent_root
, path
);
5540 /* the extent was freed by someone */
5541 if (ref_path
->lowest_level
== level
)
5543 btrfs_release_path(extent_root
, path
);
5546 leaf
= path
->nodes
[0];
5549 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5550 if (found_key
.objectid
!= bytenr
||
5551 found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
5552 /* the extent was freed by someone */
5553 if (ref_path
->lowest_level
== level
) {
5557 btrfs_release_path(extent_root
, path
);
5561 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
5562 struct btrfs_extent_ref
);
5563 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
5564 if (ref_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
5566 level
= (int)ref_objectid
;
5567 BUG_ON(level
>= BTRFS_MAX_LEVEL
);
5568 ref_path
->lowest_level
= level
;
5569 ref_path
->current_level
= level
;
5570 ref_path
->nodes
[level
] = bytenr
;
5572 WARN_ON(ref_objectid
!= level
);
5575 WARN_ON(level
!= -1);
5579 if (ref_path
->lowest_level
== level
) {
5580 ref_path
->owner_objectid
= ref_objectid
;
5581 ref_path
->num_refs
= btrfs_ref_num_refs(leaf
, ref
);
5585 * the block is tree root or the block isn't in reference
5588 if (found_key
.objectid
== found_key
.offset
||
5589 is_cowonly_root(btrfs_ref_root(leaf
, ref
))) {
5590 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
5591 ref_path
->root_generation
=
5592 btrfs_ref_generation(leaf
, ref
);
5594 /* special reference from the tree log */
5595 ref_path
->nodes
[0] = found_key
.offset
;
5596 ref_path
->current_level
= 0;
5603 BUG_ON(ref_path
->nodes
[level
] != 0);
5604 ref_path
->nodes
[level
] = found_key
.offset
;
5605 ref_path
->current_level
= level
;
5608 * the reference was created in the running transaction,
5609 * no need to continue walking up.
5611 if (btrfs_ref_generation(leaf
, ref
) == trans
->transid
) {
5612 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
5613 ref_path
->root_generation
=
5614 btrfs_ref_generation(leaf
, ref
);
5619 btrfs_release_path(extent_root
, path
);
5622 /* reached max tree level, but no tree root found. */
5625 btrfs_free_path(path
);
5629 static int btrfs_first_ref_path(struct btrfs_trans_handle
*trans
,
5630 struct btrfs_root
*extent_root
,
5631 struct btrfs_ref_path
*ref_path
,
5634 memset(ref_path
, 0, sizeof(*ref_path
));
5635 ref_path
->extent_start
= extent_start
;
5637 return __next_ref_path(trans
, extent_root
, ref_path
, 1);
5640 static int btrfs_next_ref_path(struct btrfs_trans_handle
*trans
,
5641 struct btrfs_root
*extent_root
,
5642 struct btrfs_ref_path
*ref_path
)
5644 return __next_ref_path(trans
, extent_root
, ref_path
, 0);
5647 static noinline
int get_new_locations(struct inode
*reloc_inode
,
5648 struct btrfs_key
*extent_key
,
5649 u64 offset
, int no_fragment
,
5650 struct disk_extent
**extents
,
5653 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
5654 struct btrfs_path
*path
;
5655 struct btrfs_file_extent_item
*fi
;
5656 struct extent_buffer
*leaf
;
5657 struct disk_extent
*exts
= *extents
;
5658 struct btrfs_key found_key
;
5663 int max
= *nr_extents
;
5666 WARN_ON(!no_fragment
&& *extents
);
5669 exts
= kmalloc(sizeof(*exts
) * max
, GFP_NOFS
);
5674 path
= btrfs_alloc_path();
5677 cur_pos
= extent_key
->objectid
- offset
;
5678 last_byte
= extent_key
->objectid
+ extent_key
->offset
;
5679 ret
= btrfs_lookup_file_extent(NULL
, root
, path
, reloc_inode
->i_ino
,
5689 leaf
= path
->nodes
[0];
5690 nritems
= btrfs_header_nritems(leaf
);
5691 if (path
->slots
[0] >= nritems
) {
5692 ret
= btrfs_next_leaf(root
, path
);
5697 leaf
= path
->nodes
[0];
5700 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5701 if (found_key
.offset
!= cur_pos
||
5702 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
||
5703 found_key
.objectid
!= reloc_inode
->i_ino
)
5706 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
5707 struct btrfs_file_extent_item
);
5708 if (btrfs_file_extent_type(leaf
, fi
) !=
5709 BTRFS_FILE_EXTENT_REG
||
5710 btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
5714 struct disk_extent
*old
= exts
;
5716 exts
= kzalloc(sizeof(*exts
) * max
, GFP_NOFS
);
5717 memcpy(exts
, old
, sizeof(*exts
) * nr
);
5718 if (old
!= *extents
)
5722 exts
[nr
].disk_bytenr
=
5723 btrfs_file_extent_disk_bytenr(leaf
, fi
);
5724 exts
[nr
].disk_num_bytes
=
5725 btrfs_file_extent_disk_num_bytes(leaf
, fi
);
5726 exts
[nr
].offset
= btrfs_file_extent_offset(leaf
, fi
);
5727 exts
[nr
].num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
5728 exts
[nr
].ram_bytes
= btrfs_file_extent_ram_bytes(leaf
, fi
);
5729 exts
[nr
].compression
= btrfs_file_extent_compression(leaf
, fi
);
5730 exts
[nr
].encryption
= btrfs_file_extent_encryption(leaf
, fi
);
5731 exts
[nr
].other_encoding
= btrfs_file_extent_other_encoding(leaf
,
5733 BUG_ON(exts
[nr
].offset
> 0);
5734 BUG_ON(exts
[nr
].compression
|| exts
[nr
].encryption
);
5735 BUG_ON(exts
[nr
].num_bytes
!= exts
[nr
].disk_num_bytes
);
5737 cur_pos
+= exts
[nr
].num_bytes
;
5740 if (cur_pos
+ offset
>= last_byte
)
5750 BUG_ON(cur_pos
+ offset
> last_byte
);
5751 if (cur_pos
+ offset
< last_byte
) {
5757 btrfs_free_path(path
);
5759 if (exts
!= *extents
)
5768 static noinline
int replace_one_extent(struct btrfs_trans_handle
*trans
,
5769 struct btrfs_root
*root
,
5770 struct btrfs_path
*path
,
5771 struct btrfs_key
*extent_key
,
5772 struct btrfs_key
*leaf_key
,
5773 struct btrfs_ref_path
*ref_path
,
5774 struct disk_extent
*new_extents
,
5777 struct extent_buffer
*leaf
;
5778 struct btrfs_file_extent_item
*fi
;
5779 struct inode
*inode
= NULL
;
5780 struct btrfs_key key
;
5785 u64 search_end
= (u64
)-1;
5788 int extent_locked
= 0;
5792 memcpy(&key
, leaf_key
, sizeof(key
));
5793 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
5794 if (key
.objectid
< ref_path
->owner_objectid
||
5795 (key
.objectid
== ref_path
->owner_objectid
&&
5796 key
.type
< BTRFS_EXTENT_DATA_KEY
)) {
5797 key
.objectid
= ref_path
->owner_objectid
;
5798 key
.type
= BTRFS_EXTENT_DATA_KEY
;
5804 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
5808 leaf
= path
->nodes
[0];
5809 nritems
= btrfs_header_nritems(leaf
);
5811 if (extent_locked
&& ret
> 0) {
5813 * the file extent item was modified by someone
5814 * before the extent got locked.
5816 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
5817 lock_end
, GFP_NOFS
);
5821 if (path
->slots
[0] >= nritems
) {
5822 if (++nr_scaned
> 2)
5825 BUG_ON(extent_locked
);
5826 ret
= btrfs_next_leaf(root
, path
);
5831 leaf
= path
->nodes
[0];
5832 nritems
= btrfs_header_nritems(leaf
);
5835 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5837 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
5838 if ((key
.objectid
> ref_path
->owner_objectid
) ||
5839 (key
.objectid
== ref_path
->owner_objectid
&&
5840 key
.type
> BTRFS_EXTENT_DATA_KEY
) ||
5841 key
.offset
>= search_end
)
5845 if (inode
&& key
.objectid
!= inode
->i_ino
) {
5846 BUG_ON(extent_locked
);
5847 btrfs_release_path(root
, path
);
5848 mutex_unlock(&inode
->i_mutex
);
5854 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
5859 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
5860 struct btrfs_file_extent_item
);
5861 extent_type
= btrfs_file_extent_type(leaf
, fi
);
5862 if ((extent_type
!= BTRFS_FILE_EXTENT_REG
&&
5863 extent_type
!= BTRFS_FILE_EXTENT_PREALLOC
) ||
5864 (btrfs_file_extent_disk_bytenr(leaf
, fi
) !=
5865 extent_key
->objectid
)) {
5871 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
5872 ext_offset
= btrfs_file_extent_offset(leaf
, fi
);
5874 if (search_end
== (u64
)-1) {
5875 search_end
= key
.offset
- ext_offset
+
5876 btrfs_file_extent_ram_bytes(leaf
, fi
);
5879 if (!extent_locked
) {
5880 lock_start
= key
.offset
;
5881 lock_end
= lock_start
+ num_bytes
- 1;
5883 if (lock_start
> key
.offset
||
5884 lock_end
+ 1 < key
.offset
+ num_bytes
) {
5885 unlock_extent(&BTRFS_I(inode
)->io_tree
,
5886 lock_start
, lock_end
, GFP_NOFS
);
5892 btrfs_release_path(root
, path
);
5894 inode
= btrfs_iget_locked(root
->fs_info
->sb
,
5895 key
.objectid
, root
);
5896 if (inode
->i_state
& I_NEW
) {
5897 BTRFS_I(inode
)->root
= root
;
5898 BTRFS_I(inode
)->location
.objectid
=
5900 BTRFS_I(inode
)->location
.type
=
5901 BTRFS_INODE_ITEM_KEY
;
5902 BTRFS_I(inode
)->location
.offset
= 0;
5903 btrfs_read_locked_inode(inode
);
5904 unlock_new_inode(inode
);
5907 * some code call btrfs_commit_transaction while
5908 * holding the i_mutex, so we can't use mutex_lock
5911 if (is_bad_inode(inode
) ||
5912 !mutex_trylock(&inode
->i_mutex
)) {
5915 key
.offset
= (u64
)-1;
5920 if (!extent_locked
) {
5921 struct btrfs_ordered_extent
*ordered
;
5923 btrfs_release_path(root
, path
);
5925 lock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
5926 lock_end
, GFP_NOFS
);
5927 ordered
= btrfs_lookup_first_ordered_extent(inode
,
5930 ordered
->file_offset
<= lock_end
&&
5931 ordered
->file_offset
+ ordered
->len
> lock_start
) {
5932 unlock_extent(&BTRFS_I(inode
)->io_tree
,
5933 lock_start
, lock_end
, GFP_NOFS
);
5934 btrfs_start_ordered_extent(inode
, ordered
, 1);
5935 btrfs_put_ordered_extent(ordered
);
5936 key
.offset
+= num_bytes
;
5940 btrfs_put_ordered_extent(ordered
);
5946 if (nr_extents
== 1) {
5947 /* update extent pointer in place */
5948 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
5949 new_extents
[0].disk_bytenr
);
5950 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
5951 new_extents
[0].disk_num_bytes
);
5952 btrfs_mark_buffer_dirty(leaf
);
5954 btrfs_drop_extent_cache(inode
, key
.offset
,
5955 key
.offset
+ num_bytes
- 1, 0);
5957 ret
= btrfs_inc_extent_ref(trans
, root
,
5958 new_extents
[0].disk_bytenr
,
5959 new_extents
[0].disk_num_bytes
,
5961 root
->root_key
.objectid
,
5966 ret
= btrfs_free_extent(trans
, root
,
5967 extent_key
->objectid
,
5970 btrfs_header_owner(leaf
),
5971 btrfs_header_generation(leaf
),
5975 btrfs_release_path(root
, path
);
5976 key
.offset
+= num_bytes
;
5984 * drop old extent pointer at first, then insert the
5985 * new pointers one bye one
5987 btrfs_release_path(root
, path
);
5988 ret
= btrfs_drop_extents(trans
, root
, inode
, key
.offset
,
5989 key
.offset
+ num_bytes
,
5990 key
.offset
, &alloc_hint
);
5993 for (i
= 0; i
< nr_extents
; i
++) {
5994 if (ext_offset
>= new_extents
[i
].num_bytes
) {
5995 ext_offset
-= new_extents
[i
].num_bytes
;
5998 extent_len
= min(new_extents
[i
].num_bytes
-
5999 ext_offset
, num_bytes
);
6001 ret
= btrfs_insert_empty_item(trans
, root
,
6006 leaf
= path
->nodes
[0];
6007 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6008 struct btrfs_file_extent_item
);
6009 btrfs_set_file_extent_generation(leaf
, fi
,
6011 btrfs_set_file_extent_type(leaf
, fi
,
6012 BTRFS_FILE_EXTENT_REG
);
6013 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
6014 new_extents
[i
].disk_bytenr
);
6015 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
6016 new_extents
[i
].disk_num_bytes
);
6017 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
6018 new_extents
[i
].ram_bytes
);
6020 btrfs_set_file_extent_compression(leaf
, fi
,
6021 new_extents
[i
].compression
);
6022 btrfs_set_file_extent_encryption(leaf
, fi
,
6023 new_extents
[i
].encryption
);
6024 btrfs_set_file_extent_other_encoding(leaf
, fi
,
6025 new_extents
[i
].other_encoding
);
6027 btrfs_set_file_extent_num_bytes(leaf
, fi
,
6029 ext_offset
+= new_extents
[i
].offset
;
6030 btrfs_set_file_extent_offset(leaf
, fi
,
6032 btrfs_mark_buffer_dirty(leaf
);
6034 btrfs_drop_extent_cache(inode
, key
.offset
,
6035 key
.offset
+ extent_len
- 1, 0);
6037 ret
= btrfs_inc_extent_ref(trans
, root
,
6038 new_extents
[i
].disk_bytenr
,
6039 new_extents
[i
].disk_num_bytes
,
6041 root
->root_key
.objectid
,
6042 trans
->transid
, key
.objectid
);
6044 btrfs_release_path(root
, path
);
6046 inode_add_bytes(inode
, extent_len
);
6049 num_bytes
-= extent_len
;
6050 key
.offset
+= extent_len
;
6055 BUG_ON(i
>= nr_extents
);
6059 if (extent_locked
) {
6060 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
6061 lock_end
, GFP_NOFS
);
6065 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
&&
6066 key
.offset
>= search_end
)
6073 btrfs_release_path(root
, path
);
6075 mutex_unlock(&inode
->i_mutex
);
6076 if (extent_locked
) {
6077 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
6078 lock_end
, GFP_NOFS
);
6085 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle
*trans
,
6086 struct btrfs_root
*root
,
6087 struct extent_buffer
*buf
, u64 orig_start
)
6092 BUG_ON(btrfs_header_generation(buf
) != trans
->transid
);
6093 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
6095 level
= btrfs_header_level(buf
);
6097 struct btrfs_leaf_ref
*ref
;
6098 struct btrfs_leaf_ref
*orig_ref
;
6100 orig_ref
= btrfs_lookup_leaf_ref(root
, orig_start
);
6104 ref
= btrfs_alloc_leaf_ref(root
, orig_ref
->nritems
);
6106 btrfs_free_leaf_ref(root
, orig_ref
);
6110 ref
->nritems
= orig_ref
->nritems
;
6111 memcpy(ref
->extents
, orig_ref
->extents
,
6112 sizeof(ref
->extents
[0]) * ref
->nritems
);
6114 btrfs_free_leaf_ref(root
, orig_ref
);
6116 ref
->root_gen
= trans
->transid
;
6117 ref
->bytenr
= buf
->start
;
6118 ref
->owner
= btrfs_header_owner(buf
);
6119 ref
->generation
= btrfs_header_generation(buf
);
6121 ret
= btrfs_add_leaf_ref(root
, ref
, 0);
6123 btrfs_free_leaf_ref(root
, ref
);
6128 static noinline
int invalidate_extent_cache(struct btrfs_root
*root
,
6129 struct extent_buffer
*leaf
,
6130 struct btrfs_block_group_cache
*group
,
6131 struct btrfs_root
*target_root
)
6133 struct btrfs_key key
;
6134 struct inode
*inode
= NULL
;
6135 struct btrfs_file_extent_item
*fi
;
6137 u64 skip_objectid
= 0;
6141 nritems
= btrfs_header_nritems(leaf
);
6142 for (i
= 0; i
< nritems
; i
++) {
6143 btrfs_item_key_to_cpu(leaf
, &key
, i
);
6144 if (key
.objectid
== skip_objectid
||
6145 key
.type
!= BTRFS_EXTENT_DATA_KEY
)
6147 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
6148 if (btrfs_file_extent_type(leaf
, fi
) ==
6149 BTRFS_FILE_EXTENT_INLINE
)
6151 if (btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
6153 if (!inode
|| inode
->i_ino
!= key
.objectid
) {
6155 inode
= btrfs_ilookup(target_root
->fs_info
->sb
,
6156 key
.objectid
, target_root
, 1);
6159 skip_objectid
= key
.objectid
;
6162 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
6164 lock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
6165 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
6166 btrfs_drop_extent_cache(inode
, key
.offset
,
6167 key
.offset
+ num_bytes
- 1, 1);
6168 unlock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
6169 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
6176 static noinline
int replace_extents_in_leaf(struct btrfs_trans_handle
*trans
,
6177 struct btrfs_root
*root
,
6178 struct extent_buffer
*leaf
,
6179 struct btrfs_block_group_cache
*group
,
6180 struct inode
*reloc_inode
)
6182 struct btrfs_key key
;
6183 struct btrfs_key extent_key
;
6184 struct btrfs_file_extent_item
*fi
;
6185 struct btrfs_leaf_ref
*ref
;
6186 struct disk_extent
*new_extent
;
6195 new_extent
= kmalloc(sizeof(*new_extent
), GFP_NOFS
);
6196 BUG_ON(!new_extent
);
6198 ref
= btrfs_lookup_leaf_ref(root
, leaf
->start
);
6202 nritems
= btrfs_header_nritems(leaf
);
6203 for (i
= 0; i
< nritems
; i
++) {
6204 btrfs_item_key_to_cpu(leaf
, &key
, i
);
6205 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
6207 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
6208 if (btrfs_file_extent_type(leaf
, fi
) ==
6209 BTRFS_FILE_EXTENT_INLINE
)
6211 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
6212 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
6217 if (bytenr
>= group
->key
.objectid
+ group
->key
.offset
||
6218 bytenr
+ num_bytes
<= group
->key
.objectid
)
6221 extent_key
.objectid
= bytenr
;
6222 extent_key
.offset
= num_bytes
;
6223 extent_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
6225 ret
= get_new_locations(reloc_inode
, &extent_key
,
6226 group
->key
.objectid
, 1,
6227 &new_extent
, &nr_extent
);
6232 BUG_ON(ref
->extents
[ext_index
].bytenr
!= bytenr
);
6233 BUG_ON(ref
->extents
[ext_index
].num_bytes
!= num_bytes
);
6234 ref
->extents
[ext_index
].bytenr
= new_extent
->disk_bytenr
;
6235 ref
->extents
[ext_index
].num_bytes
= new_extent
->disk_num_bytes
;
6237 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
6238 new_extent
->disk_bytenr
);
6239 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
6240 new_extent
->disk_num_bytes
);
6241 btrfs_mark_buffer_dirty(leaf
);
6243 ret
= btrfs_inc_extent_ref(trans
, root
,
6244 new_extent
->disk_bytenr
,
6245 new_extent
->disk_num_bytes
,
6247 root
->root_key
.objectid
,
6248 trans
->transid
, key
.objectid
);
6251 ret
= btrfs_free_extent(trans
, root
,
6252 bytenr
, num_bytes
, leaf
->start
,
6253 btrfs_header_owner(leaf
),
6254 btrfs_header_generation(leaf
),
6260 BUG_ON(ext_index
+ 1 != ref
->nritems
);
6261 btrfs_free_leaf_ref(root
, ref
);
6265 int btrfs_free_reloc_root(struct btrfs_trans_handle
*trans
,
6266 struct btrfs_root
*root
)
6268 struct btrfs_root
*reloc_root
;
6271 if (root
->reloc_root
) {
6272 reloc_root
= root
->reloc_root
;
6273 root
->reloc_root
= NULL
;
6274 list_add(&reloc_root
->dead_list
,
6275 &root
->fs_info
->dead_reloc_roots
);
6277 btrfs_set_root_bytenr(&reloc_root
->root_item
,
6278 reloc_root
->node
->start
);
6279 btrfs_set_root_level(&root
->root_item
,
6280 btrfs_header_level(reloc_root
->node
));
6281 memset(&reloc_root
->root_item
.drop_progress
, 0,
6282 sizeof(struct btrfs_disk_key
));
6283 reloc_root
->root_item
.drop_level
= 0;
6285 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
6286 &reloc_root
->root_key
,
6287 &reloc_root
->root_item
);
6293 int btrfs_drop_dead_reloc_roots(struct btrfs_root
*root
)
6295 struct btrfs_trans_handle
*trans
;
6296 struct btrfs_root
*reloc_root
;
6297 struct btrfs_root
*prev_root
= NULL
;
6298 struct list_head dead_roots
;
6302 INIT_LIST_HEAD(&dead_roots
);
6303 list_splice_init(&root
->fs_info
->dead_reloc_roots
, &dead_roots
);
6305 while (!list_empty(&dead_roots
)) {
6306 reloc_root
= list_entry(dead_roots
.prev
,
6307 struct btrfs_root
, dead_list
);
6308 list_del_init(&reloc_root
->dead_list
);
6310 BUG_ON(reloc_root
->commit_root
!= NULL
);
6312 trans
= btrfs_join_transaction(root
, 1);
6315 mutex_lock(&root
->fs_info
->drop_mutex
);
6316 ret
= btrfs_drop_snapshot(trans
, reloc_root
);
6319 mutex_unlock(&root
->fs_info
->drop_mutex
);
6321 nr
= trans
->blocks_used
;
6322 ret
= btrfs_end_transaction(trans
, root
);
6324 btrfs_btree_balance_dirty(root
, nr
);
6327 free_extent_buffer(reloc_root
->node
);
6329 ret
= btrfs_del_root(trans
, root
->fs_info
->tree_root
,
6330 &reloc_root
->root_key
);
6332 mutex_unlock(&root
->fs_info
->drop_mutex
);
6334 nr
= trans
->blocks_used
;
6335 ret
= btrfs_end_transaction(trans
, root
);
6337 btrfs_btree_balance_dirty(root
, nr
);
6340 prev_root
= reloc_root
;
6343 btrfs_remove_leaf_refs(prev_root
, (u64
)-1, 0);
6349 int btrfs_add_dead_reloc_root(struct btrfs_root
*root
)
6351 list_add(&root
->dead_list
, &root
->fs_info
->dead_reloc_roots
);
6355 int btrfs_cleanup_reloc_trees(struct btrfs_root
*root
)
6357 struct btrfs_root
*reloc_root
;
6358 struct btrfs_trans_handle
*trans
;
6359 struct btrfs_key location
;
6363 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
6364 ret
= btrfs_find_dead_roots(root
, BTRFS_TREE_RELOC_OBJECTID
, NULL
);
6366 found
= !list_empty(&root
->fs_info
->dead_reloc_roots
);
6367 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
6370 trans
= btrfs_start_transaction(root
, 1);
6372 ret
= btrfs_commit_transaction(trans
, root
);
6376 location
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
6377 location
.offset
= (u64
)-1;
6378 location
.type
= BTRFS_ROOT_ITEM_KEY
;
6380 reloc_root
= btrfs_read_fs_root_no_name(root
->fs_info
, &location
);
6381 BUG_ON(!reloc_root
);
6382 btrfs_orphan_cleanup(reloc_root
);
6386 static noinline
int init_reloc_tree(struct btrfs_trans_handle
*trans
,
6387 struct btrfs_root
*root
)
6389 struct btrfs_root
*reloc_root
;
6390 struct extent_buffer
*eb
;
6391 struct btrfs_root_item
*root_item
;
6392 struct btrfs_key root_key
;
6395 BUG_ON(!root
->ref_cows
);
6396 if (root
->reloc_root
)
6399 root_item
= kmalloc(sizeof(*root_item
), GFP_NOFS
);
6402 ret
= btrfs_copy_root(trans
, root
, root
->commit_root
,
6403 &eb
, BTRFS_TREE_RELOC_OBJECTID
);
6406 root_key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
6407 root_key
.offset
= root
->root_key
.objectid
;
6408 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
6410 memcpy(root_item
, &root
->root_item
, sizeof(root_item
));
6411 btrfs_set_root_refs(root_item
, 0);
6412 btrfs_set_root_bytenr(root_item
, eb
->start
);
6413 btrfs_set_root_level(root_item
, btrfs_header_level(eb
));
6414 btrfs_set_root_generation(root_item
, trans
->transid
);
6416 btrfs_tree_unlock(eb
);
6417 free_extent_buffer(eb
);
6419 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
6420 &root_key
, root_item
);
6424 reloc_root
= btrfs_read_fs_root_no_radix(root
->fs_info
->tree_root
,
6426 BUG_ON(!reloc_root
);
6427 reloc_root
->last_trans
= trans
->transid
;
6428 reloc_root
->commit_root
= NULL
;
6429 reloc_root
->ref_tree
= &root
->fs_info
->reloc_ref_tree
;
6431 root
->reloc_root
= reloc_root
;
6436 * Core function of space balance.
6438 * The idea is using reloc trees to relocate tree blocks in reference
6439 * counted roots. There is one reloc tree for each subvol, and all
6440 * reloc trees share same root key objectid. Reloc trees are snapshots
6441 * of the latest committed roots of subvols (root->commit_root).
6443 * To relocate a tree block referenced by a subvol, there are two steps.
6444 * COW the block through subvol's reloc tree, then update block pointer
6445 * in the subvol to point to the new block. Since all reloc trees share
6446 * same root key objectid, doing special handing for tree blocks owned
6447 * by them is easy. Once a tree block has been COWed in one reloc tree,
6448 * we can use the resulting new block directly when the same block is
6449 * required to COW again through other reloc trees. By this way, relocated
6450 * tree blocks are shared between reloc trees, so they are also shared
6453 static noinline
int relocate_one_path(struct btrfs_trans_handle
*trans
,
6454 struct btrfs_root
*root
,
6455 struct btrfs_path
*path
,
6456 struct btrfs_key
*first_key
,
6457 struct btrfs_ref_path
*ref_path
,
6458 struct btrfs_block_group_cache
*group
,
6459 struct inode
*reloc_inode
)
6461 struct btrfs_root
*reloc_root
;
6462 struct extent_buffer
*eb
= NULL
;
6463 struct btrfs_key
*keys
;
6467 int lowest_level
= 0;
6470 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
6471 lowest_level
= ref_path
->owner_objectid
;
6473 if (!root
->ref_cows
) {
6474 path
->lowest_level
= lowest_level
;
6475 ret
= btrfs_search_slot(trans
, root
, first_key
, path
, 0, 1);
6477 path
->lowest_level
= 0;
6478 btrfs_release_path(root
, path
);
6482 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
6483 ret
= init_reloc_tree(trans
, root
);
6485 reloc_root
= root
->reloc_root
;
6487 shared_level
= ref_path
->shared_level
;
6488 ref_path
->shared_level
= BTRFS_MAX_LEVEL
- 1;
6490 keys
= ref_path
->node_keys
;
6491 nodes
= ref_path
->new_nodes
;
6492 memset(&keys
[shared_level
+ 1], 0,
6493 sizeof(*keys
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
6494 memset(&nodes
[shared_level
+ 1], 0,
6495 sizeof(*nodes
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
6497 if (nodes
[lowest_level
] == 0) {
6498 path
->lowest_level
= lowest_level
;
6499 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
6502 for (level
= lowest_level
; level
< BTRFS_MAX_LEVEL
; level
++) {
6503 eb
= path
->nodes
[level
];
6504 if (!eb
|| eb
== reloc_root
->node
)
6506 nodes
[level
] = eb
->start
;
6508 btrfs_item_key_to_cpu(eb
, &keys
[level
], 0);
6510 btrfs_node_key_to_cpu(eb
, &keys
[level
], 0);
6513 ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6514 eb
= path
->nodes
[0];
6515 ret
= replace_extents_in_leaf(trans
, reloc_root
, eb
,
6516 group
, reloc_inode
);
6519 btrfs_release_path(reloc_root
, path
);
6521 ret
= btrfs_merge_path(trans
, reloc_root
, keys
, nodes
,
6527 * replace tree blocks in the fs tree with tree blocks in
6530 ret
= btrfs_merge_path(trans
, root
, keys
, nodes
, lowest_level
);
6533 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6534 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
6537 extent_buffer_get(path
->nodes
[0]);
6538 eb
= path
->nodes
[0];
6539 btrfs_release_path(reloc_root
, path
);
6540 ret
= invalidate_extent_cache(reloc_root
, eb
, group
, root
);
6542 free_extent_buffer(eb
);
6545 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
6546 path
->lowest_level
= 0;
6550 static noinline
int relocate_tree_block(struct btrfs_trans_handle
*trans
,
6551 struct btrfs_root
*root
,
6552 struct btrfs_path
*path
,
6553 struct btrfs_key
*first_key
,
6554 struct btrfs_ref_path
*ref_path
)
6558 ret
= relocate_one_path(trans
, root
, path
, first_key
,
6559 ref_path
, NULL
, NULL
);
6565 static noinline
int del_extent_zero(struct btrfs_trans_handle
*trans
,
6566 struct btrfs_root
*extent_root
,
6567 struct btrfs_path
*path
,
6568 struct btrfs_key
*extent_key
)
6572 ret
= btrfs_search_slot(trans
, extent_root
, extent_key
, path
, -1, 1);
6575 ret
= btrfs_del_item(trans
, extent_root
, path
);
6577 btrfs_release_path(extent_root
, path
);
6581 static noinline
struct btrfs_root
*read_ref_root(struct btrfs_fs_info
*fs_info
,
6582 struct btrfs_ref_path
*ref_path
)
6584 struct btrfs_key root_key
;
6586 root_key
.objectid
= ref_path
->root_objectid
;
6587 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
6588 if (is_cowonly_root(ref_path
->root_objectid
))
6589 root_key
.offset
= 0;
6591 root_key
.offset
= (u64
)-1;
6593 return btrfs_read_fs_root_no_name(fs_info
, &root_key
);
6596 static noinline
int relocate_one_extent(struct btrfs_root
*extent_root
,
6597 struct btrfs_path
*path
,
6598 struct btrfs_key
*extent_key
,
6599 struct btrfs_block_group_cache
*group
,
6600 struct inode
*reloc_inode
, int pass
)
6602 struct btrfs_trans_handle
*trans
;
6603 struct btrfs_root
*found_root
;
6604 struct btrfs_ref_path
*ref_path
= NULL
;
6605 struct disk_extent
*new_extents
= NULL
;
6610 struct btrfs_key first_key
;
6614 trans
= btrfs_start_transaction(extent_root
, 1);
6617 if (extent_key
->objectid
== 0) {
6618 ret
= del_extent_zero(trans
, extent_root
, path
, extent_key
);
6622 ref_path
= kmalloc(sizeof(*ref_path
), GFP_NOFS
);
6628 for (loops
= 0; ; loops
++) {
6630 ret
= btrfs_first_ref_path(trans
, extent_root
, ref_path
,
6631 extent_key
->objectid
);
6633 ret
= btrfs_next_ref_path(trans
, extent_root
, ref_path
);
6640 if (ref_path
->root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
6641 ref_path
->root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
6644 found_root
= read_ref_root(extent_root
->fs_info
, ref_path
);
6645 BUG_ON(!found_root
);
6647 * for reference counted tree, only process reference paths
6648 * rooted at the latest committed root.
6650 if (found_root
->ref_cows
&&
6651 ref_path
->root_generation
!= found_root
->root_key
.offset
)
6654 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6657 * copy data extents to new locations
6659 u64 group_start
= group
->key
.objectid
;
6660 ret
= relocate_data_extent(reloc_inode
,
6669 level
= ref_path
->owner_objectid
;
6672 if (prev_block
!= ref_path
->nodes
[level
]) {
6673 struct extent_buffer
*eb
;
6674 u64 block_start
= ref_path
->nodes
[level
];
6675 u64 block_size
= btrfs_level_size(found_root
, level
);
6677 eb
= read_tree_block(found_root
, block_start
,
6679 btrfs_tree_lock(eb
);
6680 BUG_ON(level
!= btrfs_header_level(eb
));
6683 btrfs_item_key_to_cpu(eb
, &first_key
, 0);
6685 btrfs_node_key_to_cpu(eb
, &first_key
, 0);
6687 btrfs_tree_unlock(eb
);
6688 free_extent_buffer(eb
);
6689 prev_block
= block_start
;
6692 mutex_lock(&extent_root
->fs_info
->trans_mutex
);
6693 btrfs_record_root_in_trans(found_root
);
6694 mutex_unlock(&extent_root
->fs_info
->trans_mutex
);
6695 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6697 * try to update data extent references while
6698 * keeping metadata shared between snapshots.
6701 ret
= relocate_one_path(trans
, found_root
,
6702 path
, &first_key
, ref_path
,
6703 group
, reloc_inode
);
6709 * use fallback method to process the remaining
6713 u64 group_start
= group
->key
.objectid
;
6714 new_extents
= kmalloc(sizeof(*new_extents
),
6717 ret
= get_new_locations(reloc_inode
,
6725 ret
= replace_one_extent(trans
, found_root
,
6727 &first_key
, ref_path
,
6728 new_extents
, nr_extents
);
6730 ret
= relocate_tree_block(trans
, found_root
, path
,
6731 &first_key
, ref_path
);
6738 btrfs_end_transaction(trans
, extent_root
);
6745 static u64
update_block_group_flags(struct btrfs_root
*root
, u64 flags
)
6748 u64 stripped
= BTRFS_BLOCK_GROUP_RAID0
|
6749 BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID10
;
6751 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
6752 if (num_devices
== 1) {
6753 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
6754 stripped
= flags
& ~stripped
;
6756 /* turn raid0 into single device chunks */
6757 if (flags
& BTRFS_BLOCK_GROUP_RAID0
)
6760 /* turn mirroring into duplication */
6761 if (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
6762 BTRFS_BLOCK_GROUP_RAID10
))
6763 return stripped
| BTRFS_BLOCK_GROUP_DUP
;
6766 /* they already had raid on here, just return */
6767 if (flags
& stripped
)
6770 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
6771 stripped
= flags
& ~stripped
;
6773 /* switch duplicated blocks with raid1 */
6774 if (flags
& BTRFS_BLOCK_GROUP_DUP
)
6775 return stripped
| BTRFS_BLOCK_GROUP_RAID1
;
6777 /* turn single device chunks into raid0 */
6778 return stripped
| BTRFS_BLOCK_GROUP_RAID0
;
6783 static int __alloc_chunk_for_shrink(struct btrfs_root
*root
,
6784 struct btrfs_block_group_cache
*shrink_block_group
,
6787 struct btrfs_trans_handle
*trans
;
6788 u64 new_alloc_flags
;
6791 spin_lock(&shrink_block_group
->lock
);
6792 if (btrfs_block_group_used(&shrink_block_group
->item
) +
6793 shrink_block_group
->reserved
> 0) {
6794 spin_unlock(&shrink_block_group
->lock
);
6796 trans
= btrfs_start_transaction(root
, 1);
6797 spin_lock(&shrink_block_group
->lock
);
6799 new_alloc_flags
= update_block_group_flags(root
,
6800 shrink_block_group
->flags
);
6801 if (new_alloc_flags
!= shrink_block_group
->flags
) {
6803 btrfs_block_group_used(&shrink_block_group
->item
);
6805 calc
= shrink_block_group
->key
.offset
;
6807 spin_unlock(&shrink_block_group
->lock
);
6809 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
6810 calc
+ 2 * 1024 * 1024, new_alloc_flags
, force
);
6812 btrfs_end_transaction(trans
, root
);
6814 spin_unlock(&shrink_block_group
->lock
);
6819 int btrfs_prepare_block_group_relocation(struct btrfs_root
*root
,
6820 struct btrfs_block_group_cache
*group
)
6823 __alloc_chunk_for_shrink(root
, group
, 1);
6824 set_block_group_readonly(group
);
6829 static int __insert_orphan_inode(struct btrfs_trans_handle
*trans
,
6830 struct btrfs_root
*root
,
6831 u64 objectid
, u64 size
)
6833 struct btrfs_path
*path
;
6834 struct btrfs_inode_item
*item
;
6835 struct extent_buffer
*leaf
;
6838 path
= btrfs_alloc_path();
6842 path
->leave_spinning
= 1;
6843 ret
= btrfs_insert_empty_inode(trans
, root
, path
, objectid
);
6847 leaf
= path
->nodes
[0];
6848 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_inode_item
);
6849 memset_extent_buffer(leaf
, 0, (unsigned long)item
, sizeof(*item
));
6850 btrfs_set_inode_generation(leaf
, item
, 1);
6851 btrfs_set_inode_size(leaf
, item
, size
);
6852 btrfs_set_inode_mode(leaf
, item
, S_IFREG
| 0600);
6853 btrfs_set_inode_flags(leaf
, item
, BTRFS_INODE_NOCOMPRESS
);
6854 btrfs_mark_buffer_dirty(leaf
);
6855 btrfs_release_path(root
, path
);
6857 btrfs_free_path(path
);
6861 static noinline
struct inode
*create_reloc_inode(struct btrfs_fs_info
*fs_info
,
6862 struct btrfs_block_group_cache
*group
)
6864 struct inode
*inode
= NULL
;
6865 struct btrfs_trans_handle
*trans
;
6866 struct btrfs_root
*root
;
6867 struct btrfs_key root_key
;
6868 u64 objectid
= BTRFS_FIRST_FREE_OBJECTID
;
6871 root_key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
6872 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
6873 root_key
.offset
= (u64
)-1;
6874 root
= btrfs_read_fs_root_no_name(fs_info
, &root_key
);
6876 return ERR_CAST(root
);
6878 trans
= btrfs_start_transaction(root
, 1);
6881 err
= btrfs_find_free_objectid(trans
, root
, objectid
, &objectid
);
6885 err
= __insert_orphan_inode(trans
, root
, objectid
, group
->key
.offset
);
6888 err
= btrfs_insert_file_extent(trans
, root
, objectid
, 0, 0, 0,
6889 group
->key
.offset
, 0, group
->key
.offset
,
6893 inode
= btrfs_iget_locked(root
->fs_info
->sb
, objectid
, root
);
6894 if (inode
->i_state
& I_NEW
) {
6895 BTRFS_I(inode
)->root
= root
;
6896 BTRFS_I(inode
)->location
.objectid
= objectid
;
6897 BTRFS_I(inode
)->location
.type
= BTRFS_INODE_ITEM_KEY
;
6898 BTRFS_I(inode
)->location
.offset
= 0;
6899 btrfs_read_locked_inode(inode
);
6900 unlock_new_inode(inode
);
6901 BUG_ON(is_bad_inode(inode
));
6905 BTRFS_I(inode
)->index_cnt
= group
->key
.objectid
;
6907 err
= btrfs_orphan_add(trans
, inode
);
6909 btrfs_end_transaction(trans
, root
);
6913 inode
= ERR_PTR(err
);
6918 int btrfs_reloc_clone_csums(struct inode
*inode
, u64 file_pos
, u64 len
)
6921 struct btrfs_ordered_sum
*sums
;
6922 struct btrfs_sector_sum
*sector_sum
;
6923 struct btrfs_ordered_extent
*ordered
;
6924 struct btrfs_root
*root
= BTRFS_I(inode
)->root
;
6925 struct list_head list
;
6930 INIT_LIST_HEAD(&list
);
6932 ordered
= btrfs_lookup_ordered_extent(inode
, file_pos
);
6933 BUG_ON(ordered
->file_offset
!= file_pos
|| ordered
->len
!= len
);
6935 disk_bytenr
= file_pos
+ BTRFS_I(inode
)->index_cnt
;
6936 ret
= btrfs_lookup_csums_range(root
->fs_info
->csum_root
, disk_bytenr
,
6937 disk_bytenr
+ len
- 1, &list
);
6939 while (!list_empty(&list
)) {
6940 sums
= list_entry(list
.next
, struct btrfs_ordered_sum
, list
);
6941 list_del_init(&sums
->list
);
6943 sector_sum
= sums
->sums
;
6944 sums
->bytenr
= ordered
->start
;
6947 while (offset
< sums
->len
) {
6948 sector_sum
->bytenr
+= ordered
->start
- disk_bytenr
;
6950 offset
+= root
->sectorsize
;
6953 btrfs_add_ordered_sum(inode
, ordered
, sums
);
6955 btrfs_put_ordered_extent(ordered
);
6959 int btrfs_relocate_block_group(struct btrfs_root
*root
, u64 group_start
)
6961 struct btrfs_trans_handle
*trans
;
6962 struct btrfs_path
*path
;
6963 struct btrfs_fs_info
*info
= root
->fs_info
;
6964 struct extent_buffer
*leaf
;
6965 struct inode
*reloc_inode
;
6966 struct btrfs_block_group_cache
*block_group
;
6967 struct btrfs_key key
;
6976 root
= root
->fs_info
->extent_root
;
6978 block_group
= btrfs_lookup_block_group(info
, group_start
);
6979 BUG_ON(!block_group
);
6981 printk(KERN_INFO
"btrfs relocating block group %llu flags %llu\n",
6982 (unsigned long long)block_group
->key
.objectid
,
6983 (unsigned long long)block_group
->flags
);
6985 path
= btrfs_alloc_path();
6988 reloc_inode
= create_reloc_inode(info
, block_group
);
6989 BUG_ON(IS_ERR(reloc_inode
));
6991 __alloc_chunk_for_shrink(root
, block_group
, 1);
6992 set_block_group_readonly(block_group
);
6994 btrfs_start_delalloc_inodes(info
->tree_root
);
6995 btrfs_wait_ordered_extents(info
->tree_root
, 0);
7000 key
.objectid
= block_group
->key
.objectid
;
7003 cur_byte
= key
.objectid
;
7005 trans
= btrfs_start_transaction(info
->tree_root
, 1);
7006 btrfs_commit_transaction(trans
, info
->tree_root
);
7008 mutex_lock(&root
->fs_info
->cleaner_mutex
);
7009 btrfs_clean_old_snapshots(info
->tree_root
);
7010 btrfs_remove_leaf_refs(info
->tree_root
, (u64
)-1, 1);
7011 mutex_unlock(&root
->fs_info
->cleaner_mutex
);
7013 trans
= btrfs_start_transaction(info
->tree_root
, 1);
7014 btrfs_commit_transaction(trans
, info
->tree_root
);
7017 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
7021 leaf
= path
->nodes
[0];
7022 nritems
= btrfs_header_nritems(leaf
);
7023 if (path
->slots
[0] >= nritems
) {
7024 ret
= btrfs_next_leaf(root
, path
);
7031 leaf
= path
->nodes
[0];
7032 nritems
= btrfs_header_nritems(leaf
);
7035 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
7037 if (key
.objectid
>= block_group
->key
.objectid
+
7038 block_group
->key
.offset
)
7041 if (progress
&& need_resched()) {
7042 btrfs_release_path(root
, path
);
7049 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
||
7050 key
.objectid
+ key
.offset
<= cur_byte
) {
7056 cur_byte
= key
.objectid
+ key
.offset
;
7057 btrfs_release_path(root
, path
);
7059 __alloc_chunk_for_shrink(root
, block_group
, 0);
7060 ret
= relocate_one_extent(root
, path
, &key
, block_group
,
7066 key
.objectid
= cur_byte
;
7071 btrfs_release_path(root
, path
);
7074 btrfs_wait_ordered_range(reloc_inode
, 0, (u64
)-1);
7075 invalidate_mapping_pages(reloc_inode
->i_mapping
, 0, -1);
7078 if (total_found
> 0) {
7079 printk(KERN_INFO
"btrfs found %llu extents in pass %d\n",
7080 (unsigned long long)total_found
, pass
);
7082 if (total_found
== skipped
&& pass
> 2) {
7084 reloc_inode
= create_reloc_inode(info
, block_group
);
7090 /* delete reloc_inode */
7093 /* unpin extents in this range */
7094 trans
= btrfs_start_transaction(info
->tree_root
, 1);
7095 btrfs_commit_transaction(trans
, info
->tree_root
);
7097 spin_lock(&block_group
->lock
);
7098 WARN_ON(block_group
->pinned
> 0);
7099 WARN_ON(block_group
->reserved
> 0);
7100 WARN_ON(btrfs_block_group_used(&block_group
->item
) > 0);
7101 spin_unlock(&block_group
->lock
);
7102 btrfs_put_block_group(block_group
);
7105 btrfs_free_path(path
);
7110 static int find_first_block_group(struct btrfs_root
*root
,
7111 struct btrfs_path
*path
, struct btrfs_key
*key
)
7114 struct btrfs_key found_key
;
7115 struct extent_buffer
*leaf
;
7118 ret
= btrfs_search_slot(NULL
, root
, key
, path
, 0, 0);
7123 slot
= path
->slots
[0];
7124 leaf
= path
->nodes
[0];
7125 if (slot
>= btrfs_header_nritems(leaf
)) {
7126 ret
= btrfs_next_leaf(root
, path
);
7133 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
7135 if (found_key
.objectid
>= key
->objectid
&&
7136 found_key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
7147 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
7149 struct btrfs_block_group_cache
*block_group
;
7150 struct btrfs_space_info
*space_info
;
7153 spin_lock(&info
->block_group_cache_lock
);
7154 while ((n
= rb_last(&info
->block_group_cache_tree
)) != NULL
) {
7155 block_group
= rb_entry(n
, struct btrfs_block_group_cache
,
7157 rb_erase(&block_group
->cache_node
,
7158 &info
->block_group_cache_tree
);
7159 spin_unlock(&info
->block_group_cache_lock
);
7161 down_write(&block_group
->space_info
->groups_sem
);
7162 list_del(&block_group
->list
);
7163 up_write(&block_group
->space_info
->groups_sem
);
7165 if (block_group
->cached
== BTRFS_CACHE_STARTED
)
7166 wait_event(block_group
->caching_q
,
7167 block_group_cache_done(block_group
));
7169 btrfs_remove_free_space_cache(block_group
);
7171 WARN_ON(atomic_read(&block_group
->count
) != 1);
7174 spin_lock(&info
->block_group_cache_lock
);
7176 spin_unlock(&info
->block_group_cache_lock
);
7178 /* now that all the block groups are freed, go through and
7179 * free all the space_info structs. This is only called during
7180 * the final stages of unmount, and so we know nobody is
7181 * using them. We call synchronize_rcu() once before we start,
7182 * just to be on the safe side.
7186 while(!list_empty(&info
->space_info
)) {
7187 space_info
= list_entry(info
->space_info
.next
,
7188 struct btrfs_space_info
,
7191 list_del(&space_info
->list
);
7197 int btrfs_read_block_groups(struct btrfs_root
*root
)
7199 struct btrfs_path
*path
;
7201 struct btrfs_block_group_cache
*cache
;
7202 struct btrfs_fs_info
*info
= root
->fs_info
;
7203 struct btrfs_space_info
*space_info
;
7204 struct btrfs_key key
;
7205 struct btrfs_key found_key
;
7206 struct extent_buffer
*leaf
;
7208 root
= info
->extent_root
;
7211 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
7212 path
= btrfs_alloc_path();
7217 ret
= find_first_block_group(root
, path
, &key
);
7225 leaf
= path
->nodes
[0];
7226 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
7227 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
7233 atomic_set(&cache
->count
, 1);
7234 spin_lock_init(&cache
->lock
);
7235 spin_lock_init(&cache
->tree_lock
);
7236 cache
->fs_info
= info
;
7237 init_waitqueue_head(&cache
->caching_q
);
7238 INIT_LIST_HEAD(&cache
->list
);
7239 INIT_LIST_HEAD(&cache
->cluster_list
);
7242 * we only want to have 32k of ram per block group for keeping
7243 * track of free space, and if we pass 1/2 of that we want to
7244 * start converting things over to using bitmaps
7246 cache
->extents_thresh
= ((1024 * 32) / 2) /
7247 sizeof(struct btrfs_free_space
);
7249 read_extent_buffer(leaf
, &cache
->item
,
7250 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
7251 sizeof(cache
->item
));
7252 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
7254 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
7255 btrfs_release_path(root
, path
);
7256 cache
->flags
= btrfs_block_group_flags(&cache
->item
);
7257 cache
->sectorsize
= root
->sectorsize
;
7259 remove_sb_from_cache(root
, cache
);
7262 * check for two cases, either we are full, and therefore
7263 * don't need to bother with the caching work since we won't
7264 * find any space, or we are empty, and we can just add all
7265 * the space in and be done with it. This saves us _alot_ of
7266 * time, particularly in the full case.
7268 if (found_key
.offset
== btrfs_block_group_used(&cache
->item
)) {
7269 cache
->cached
= BTRFS_CACHE_FINISHED
;
7270 } else if (btrfs_block_group_used(&cache
->item
) == 0) {
7271 cache
->cached
= BTRFS_CACHE_FINISHED
;
7272 add_new_free_space(cache
, root
->fs_info
,
7274 found_key
.objectid
+
7278 ret
= update_space_info(info
, cache
->flags
, found_key
.offset
,
7279 btrfs_block_group_used(&cache
->item
),
7282 cache
->space_info
= space_info
;
7283 down_write(&space_info
->groups_sem
);
7284 list_add_tail(&cache
->list
, &space_info
->block_groups
);
7285 up_write(&space_info
->groups_sem
);
7287 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
7290 set_avail_alloc_bits(root
->fs_info
, cache
->flags
);
7291 if (btrfs_chunk_readonly(root
, cache
->key
.objectid
))
7292 set_block_group_readonly(cache
);
7296 btrfs_free_path(path
);
7300 int btrfs_make_block_group(struct btrfs_trans_handle
*trans
,
7301 struct btrfs_root
*root
, u64 bytes_used
,
7302 u64 type
, u64 chunk_objectid
, u64 chunk_offset
,
7306 struct btrfs_root
*extent_root
;
7307 struct btrfs_block_group_cache
*cache
;
7309 extent_root
= root
->fs_info
->extent_root
;
7311 root
->fs_info
->last_trans_log_full_commit
= trans
->transid
;
7313 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
7317 cache
->key
.objectid
= chunk_offset
;
7318 cache
->key
.offset
= size
;
7319 cache
->key
.type
= BTRFS_BLOCK_GROUP_ITEM_KEY
;
7320 cache
->sectorsize
= root
->sectorsize
;
7323 * we only want to have 32k of ram per block group for keeping track
7324 * of free space, and if we pass 1/2 of that we want to start
7325 * converting things over to using bitmaps
7327 cache
->extents_thresh
= ((1024 * 32) / 2) /
7328 sizeof(struct btrfs_free_space
);
7329 atomic_set(&cache
->count
, 1);
7330 spin_lock_init(&cache
->lock
);
7331 spin_lock_init(&cache
->tree_lock
);
7332 init_waitqueue_head(&cache
->caching_q
);
7333 INIT_LIST_HEAD(&cache
->list
);
7334 INIT_LIST_HEAD(&cache
->cluster_list
);
7336 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
7337 btrfs_set_block_group_chunk_objectid(&cache
->item
, chunk_objectid
);
7338 cache
->flags
= type
;
7339 btrfs_set_block_group_flags(&cache
->item
, type
);
7341 cache
->cached
= BTRFS_CACHE_FINISHED
;
7342 remove_sb_from_cache(root
, cache
);
7344 add_new_free_space(cache
, root
->fs_info
, chunk_offset
,
7345 chunk_offset
+ size
);
7347 ret
= update_space_info(root
->fs_info
, cache
->flags
, size
, bytes_used
,
7348 &cache
->space_info
);
7350 down_write(&cache
->space_info
->groups_sem
);
7351 list_add_tail(&cache
->list
, &cache
->space_info
->block_groups
);
7352 up_write(&cache
->space_info
->groups_sem
);
7354 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
7357 ret
= btrfs_insert_item(trans
, extent_root
, &cache
->key
, &cache
->item
,
7358 sizeof(cache
->item
));
7361 set_avail_alloc_bits(extent_root
->fs_info
, type
);
7366 int btrfs_remove_block_group(struct btrfs_trans_handle
*trans
,
7367 struct btrfs_root
*root
, u64 group_start
)
7369 struct btrfs_path
*path
;
7370 struct btrfs_block_group_cache
*block_group
;
7371 struct btrfs_free_cluster
*cluster
;
7372 struct btrfs_key key
;
7375 root
= root
->fs_info
->extent_root
;
7377 block_group
= btrfs_lookup_block_group(root
->fs_info
, group_start
);
7378 BUG_ON(!block_group
);
7379 BUG_ON(!block_group
->ro
);
7381 memcpy(&key
, &block_group
->key
, sizeof(key
));
7383 /* make sure this block group isn't part of an allocation cluster */
7384 cluster
= &root
->fs_info
->data_alloc_cluster
;
7385 spin_lock(&cluster
->refill_lock
);
7386 btrfs_return_cluster_to_free_space(block_group
, cluster
);
7387 spin_unlock(&cluster
->refill_lock
);
7390 * make sure this block group isn't part of a metadata
7391 * allocation cluster
7393 cluster
= &root
->fs_info
->meta_alloc_cluster
;
7394 spin_lock(&cluster
->refill_lock
);
7395 btrfs_return_cluster_to_free_space(block_group
, cluster
);
7396 spin_unlock(&cluster
->refill_lock
);
7398 path
= btrfs_alloc_path();
7401 spin_lock(&root
->fs_info
->block_group_cache_lock
);
7402 rb_erase(&block_group
->cache_node
,
7403 &root
->fs_info
->block_group_cache_tree
);
7404 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
7406 down_write(&block_group
->space_info
->groups_sem
);
7408 * we must use list_del_init so people can check to see if they
7409 * are still on the list after taking the semaphore
7411 list_del_init(&block_group
->list
);
7412 up_write(&block_group
->space_info
->groups_sem
);
7414 if (block_group
->cached
== BTRFS_CACHE_STARTED
)
7415 wait_event(block_group
->caching_q
,
7416 block_group_cache_done(block_group
));
7418 btrfs_remove_free_space_cache(block_group
);
7420 spin_lock(&block_group
->space_info
->lock
);
7421 block_group
->space_info
->total_bytes
-= block_group
->key
.offset
;
7422 block_group
->space_info
->bytes_readonly
-= block_group
->key
.offset
;
7423 spin_unlock(&block_group
->space_info
->lock
);
7425 btrfs_clear_space_info_full(root
->fs_info
);
7427 btrfs_put_block_group(block_group
);
7428 btrfs_put_block_group(block_group
);
7430 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
7436 ret
= btrfs_del_item(trans
, root
, path
);
7438 btrfs_free_path(path
);