2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
24 #include <linux/kthread.h>
29 #include "print-tree.h"
30 #include "transaction.h"
33 #include "free-space-cache.h"
35 static int update_reserved_extents(struct btrfs_root
*root
,
36 u64 bytenr
, u64 num
, int reserve
);
37 static int update_block_group(struct btrfs_trans_handle
*trans
,
38 struct btrfs_root
*root
,
39 u64 bytenr
, u64 num_bytes
, int alloc
,
41 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
42 struct btrfs_root
*root
,
43 u64 bytenr
, u64 num_bytes
, u64 parent
,
44 u64 root_objectid
, u64 owner_objectid
,
45 u64 owner_offset
, int refs_to_drop
,
46 struct btrfs_delayed_extent_op
*extra_op
);
47 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op
*extent_op
,
48 struct extent_buffer
*leaf
,
49 struct btrfs_extent_item
*ei
);
50 static int alloc_reserved_file_extent(struct btrfs_trans_handle
*trans
,
51 struct btrfs_root
*root
,
52 u64 parent
, u64 root_objectid
,
53 u64 flags
, u64 owner
, u64 offset
,
54 struct btrfs_key
*ins
, int ref_mod
);
55 static int alloc_reserved_tree_block(struct btrfs_trans_handle
*trans
,
56 struct btrfs_root
*root
,
57 u64 parent
, u64 root_objectid
,
58 u64 flags
, struct btrfs_disk_key
*key
,
59 int level
, struct btrfs_key
*ins
);
61 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
62 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
63 u64 flags
, int force
);
66 block_group_cache_done(struct btrfs_block_group_cache
*cache
)
69 return cache
->cached
== BTRFS_CACHE_FINISHED
;
72 static int block_group_bits(struct btrfs_block_group_cache
*cache
, u64 bits
)
74 return (cache
->flags
& bits
) == bits
;
78 * this adds the block group to the fs_info rb tree for the block group
81 static int btrfs_add_block_group_cache(struct btrfs_fs_info
*info
,
82 struct btrfs_block_group_cache
*block_group
)
85 struct rb_node
*parent
= NULL
;
86 struct btrfs_block_group_cache
*cache
;
88 spin_lock(&info
->block_group_cache_lock
);
89 p
= &info
->block_group_cache_tree
.rb_node
;
93 cache
= rb_entry(parent
, struct btrfs_block_group_cache
,
95 if (block_group
->key
.objectid
< cache
->key
.objectid
) {
97 } else if (block_group
->key
.objectid
> cache
->key
.objectid
) {
100 spin_unlock(&info
->block_group_cache_lock
);
105 rb_link_node(&block_group
->cache_node
, parent
, p
);
106 rb_insert_color(&block_group
->cache_node
,
107 &info
->block_group_cache_tree
);
108 spin_unlock(&info
->block_group_cache_lock
);
114 * This will return the block group at or after bytenr if contains is 0, else
115 * it will return the block group that contains the bytenr
117 static struct btrfs_block_group_cache
*
118 block_group_cache_tree_search(struct btrfs_fs_info
*info
, u64 bytenr
,
121 struct btrfs_block_group_cache
*cache
, *ret
= NULL
;
125 spin_lock(&info
->block_group_cache_lock
);
126 n
= info
->block_group_cache_tree
.rb_node
;
129 cache
= rb_entry(n
, struct btrfs_block_group_cache
,
131 end
= cache
->key
.objectid
+ cache
->key
.offset
- 1;
132 start
= cache
->key
.objectid
;
134 if (bytenr
< start
) {
135 if (!contains
&& (!ret
|| start
< ret
->key
.objectid
))
138 } else if (bytenr
> start
) {
139 if (contains
&& bytenr
<= end
) {
150 atomic_inc(&ret
->count
);
151 spin_unlock(&info
->block_group_cache_lock
);
156 void btrfs_free_super_mirror_extents(struct btrfs_fs_info
*info
)
158 u64 start
, end
, last
= 0;
162 ret
= find_first_extent_bit(&info
->pinned_extents
, last
,
163 &start
, &end
, EXTENT_LOCKED
);
167 unlock_extent(&info
->pinned_extents
, start
, end
, GFP_NOFS
);
172 static int remove_sb_from_cache(struct btrfs_root
*root
,
173 struct btrfs_block_group_cache
*cache
)
175 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
181 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
182 bytenr
= btrfs_sb_offset(i
);
183 ret
= btrfs_rmap_block(&root
->fs_info
->mapping_tree
,
184 cache
->key
.objectid
, bytenr
,
185 0, &logical
, &nr
, &stripe_len
);
188 try_lock_extent(&fs_info
->pinned_extents
,
190 logical
[nr
] + stripe_len
- 1, GFP_NOFS
);
199 * this is only called by cache_block_group, since we could have freed extents
200 * we need to check the pinned_extents for any extents that can't be used yet
201 * since their free space will be released as soon as the transaction commits.
203 static u64
add_new_free_space(struct btrfs_block_group_cache
*block_group
,
204 struct btrfs_fs_info
*info
, u64 start
, u64 end
)
206 u64 extent_start
, extent_end
, size
, total_added
= 0;
209 while (start
< end
) {
210 ret
= find_first_extent_bit(&info
->pinned_extents
, start
,
211 &extent_start
, &extent_end
,
212 EXTENT_DIRTY
|EXTENT_LOCKED
|
217 if (extent_start
== start
) {
218 start
= extent_end
+ 1;
219 } else if (extent_start
> start
&& extent_start
< end
) {
220 size
= extent_start
- start
;
222 ret
= btrfs_add_free_space(block_group
, start
,
225 start
= extent_end
+ 1;
234 ret
= btrfs_add_free_space(block_group
, start
, size
);
241 DEFINE_MUTEX(discard_mutex
);
244 * if async kthreads are running when we cross transactions, we mark any pinned
245 * extents with EXTENT_DELALLOC and then let the caching kthreads clean up those
246 * extents when they are done. Also we run this from btrfs_finish_extent_commit
247 * in case there were some pinned extents that were missed because we had
248 * already cached that block group.
250 static void btrfs_discard_pinned_extents(struct btrfs_fs_info
*fs_info
,
251 struct btrfs_block_group_cache
*cache
)
253 u64 start
, end
, last
;
259 last
= cache
->key
.objectid
;
261 mutex_lock(&discard_mutex
);
263 ret
= find_first_extent_bit(&fs_info
->pinned_extents
, last
,
264 &start
, &end
, EXTENT_DELALLOC
);
268 if (cache
&& start
>= cache
->key
.objectid
+ cache
->key
.offset
)
273 cache
= btrfs_lookup_block_group(fs_info
, start
);
276 start
= max(start
, cache
->key
.objectid
);
277 end
= min(end
, cache
->key
.objectid
+ cache
->key
.offset
- 1);
279 if (block_group_cache_done(cache
))
280 btrfs_add_free_space(cache
, start
,
284 start
= max(start
, cache
->key
.objectid
);
285 end
= min(end
, cache
->key
.objectid
+ cache
->key
.offset
- 1);
286 btrfs_add_free_space(cache
, start
, end
- start
+ 1);
289 clear_extent_bits(&fs_info
->pinned_extents
, start
, end
,
290 EXTENT_DELALLOC
, GFP_NOFS
);
293 if (need_resched()) {
294 mutex_unlock(&discard_mutex
);
296 mutex_lock(&discard_mutex
);
299 mutex_unlock(&discard_mutex
);
302 static int caching_kthread(void *data
)
304 struct btrfs_block_group_cache
*block_group
= data
;
305 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
307 struct btrfs_path
*path
;
309 struct btrfs_key key
;
310 struct extent_buffer
*leaf
;
316 path
= btrfs_alloc_path();
320 atomic_inc(&fs_info
->async_caching_threads
);
321 atomic_inc(&block_group
->space_info
->caching_threads
);
322 last
= max_t(u64
, block_group
->key
.objectid
, BTRFS_SUPER_INFO_OFFSET
);
324 /* need to make sure the commit_root doesn't disappear */
325 down_read(&fs_info
->extent_root
->commit_root_sem
);
328 * We don't want to deadlock with somebody trying to allocate a new
329 * extent for the extent root while also trying to search the extent
330 * root to add free space. So we skip locking and search the commit
331 * root, since its read-only
333 path
->skip_locking
= 1;
334 path
->search_commit_root
= 1;
339 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
340 ret
= btrfs_search_slot(NULL
, fs_info
->extent_root
, &key
, path
, 0, 0);
346 if (block_group
->fs_info
->closing
)
349 leaf
= path
->nodes
[0];
350 slot
= path
->slots
[0];
351 if (slot
>= btrfs_header_nritems(leaf
)) {
352 ret
= btrfs_next_leaf(fs_info
->extent_root
, path
);
358 if (need_resched()) {
359 btrfs_release_path(fs_info
->extent_root
, path
);
360 up_read(&fs_info
->extent_root
->commit_root_sem
);
367 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
368 if (key
.objectid
< block_group
->key
.objectid
)
371 if (key
.objectid
>= block_group
->key
.objectid
+
372 block_group
->key
.offset
)
375 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
376 total_found
+= add_new_free_space(block_group
,
379 last
= key
.objectid
+ key
.offset
;
382 if (total_found
> (1024 * 1024 * 2)) {
384 wake_up(&block_group
->caching_q
);
391 total_found
+= add_new_free_space(block_group
, fs_info
, last
,
392 block_group
->key
.objectid
+
393 block_group
->key
.offset
);
395 spin_lock(&block_group
->lock
);
396 block_group
->cached
= BTRFS_CACHE_FINISHED
;
397 spin_unlock(&block_group
->lock
);
400 btrfs_free_path(path
);
401 up_read(&fs_info
->extent_root
->commit_root_sem
);
402 atomic_dec(&fs_info
->async_caching_threads
);
403 atomic_dec(&block_group
->space_info
->caching_threads
);
404 wake_up(&block_group
->caching_q
);
407 btrfs_discard_pinned_extents(fs_info
, block_group
);
412 static int cache_block_group(struct btrfs_block_group_cache
*cache
)
414 struct task_struct
*tsk
;
417 spin_lock(&cache
->lock
);
418 if (cache
->cached
!= BTRFS_CACHE_NO
) {
419 spin_unlock(&cache
->lock
);
422 cache
->cached
= BTRFS_CACHE_STARTED
;
423 spin_unlock(&cache
->lock
);
425 tsk
= kthread_run(caching_kthread
, cache
, "btrfs-cache-%llu\n",
426 cache
->key
.objectid
);
429 printk(KERN_ERR
"error running thread %d\n", ret
);
437 * return the block group that starts at or after bytenr
439 static struct btrfs_block_group_cache
*
440 btrfs_lookup_first_block_group(struct btrfs_fs_info
*info
, u64 bytenr
)
442 struct btrfs_block_group_cache
*cache
;
444 cache
= block_group_cache_tree_search(info
, bytenr
, 0);
450 * return the block group that contains the given bytenr
452 struct btrfs_block_group_cache
*btrfs_lookup_block_group(
453 struct btrfs_fs_info
*info
,
456 struct btrfs_block_group_cache
*cache
;
458 cache
= block_group_cache_tree_search(info
, bytenr
, 1);
463 void btrfs_put_block_group(struct btrfs_block_group_cache
*cache
)
465 if (atomic_dec_and_test(&cache
->count
))
469 static struct btrfs_space_info
*__find_space_info(struct btrfs_fs_info
*info
,
472 struct list_head
*head
= &info
->space_info
;
473 struct btrfs_space_info
*found
;
476 list_for_each_entry_rcu(found
, head
, list
) {
477 if (found
->flags
== flags
) {
487 * after adding space to the filesystem, we need to clear the full flags
488 * on all the space infos.
490 void btrfs_clear_space_info_full(struct btrfs_fs_info
*info
)
492 struct list_head
*head
= &info
->space_info
;
493 struct btrfs_space_info
*found
;
496 list_for_each_entry_rcu(found
, head
, list
)
501 static u64
div_factor(u64 num
, int factor
)
510 u64
btrfs_find_block_group(struct btrfs_root
*root
,
511 u64 search_start
, u64 search_hint
, int owner
)
513 struct btrfs_block_group_cache
*cache
;
515 u64 last
= max(search_hint
, search_start
);
522 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
526 spin_lock(&cache
->lock
);
527 last
= cache
->key
.objectid
+ cache
->key
.offset
;
528 used
= btrfs_block_group_used(&cache
->item
);
530 if ((full_search
|| !cache
->ro
) &&
531 block_group_bits(cache
, BTRFS_BLOCK_GROUP_METADATA
)) {
532 if (used
+ cache
->pinned
+ cache
->reserved
<
533 div_factor(cache
->key
.offset
, factor
)) {
534 group_start
= cache
->key
.objectid
;
535 spin_unlock(&cache
->lock
);
536 btrfs_put_block_group(cache
);
540 spin_unlock(&cache
->lock
);
541 btrfs_put_block_group(cache
);
549 if (!full_search
&& factor
< 10) {
559 /* simple helper to search for an existing extent at a given offset */
560 int btrfs_lookup_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
563 struct btrfs_key key
;
564 struct btrfs_path
*path
;
566 path
= btrfs_alloc_path();
568 key
.objectid
= start
;
570 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
571 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
573 btrfs_free_path(path
);
578 * Back reference rules. Back refs have three main goals:
580 * 1) differentiate between all holders of references to an extent so that
581 * when a reference is dropped we can make sure it was a valid reference
582 * before freeing the extent.
584 * 2) Provide enough information to quickly find the holders of an extent
585 * if we notice a given block is corrupted or bad.
587 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
588 * maintenance. This is actually the same as #2, but with a slightly
589 * different use case.
591 * There are two kinds of back refs. The implicit back refs is optimized
592 * for pointers in non-shared tree blocks. For a given pointer in a block,
593 * back refs of this kind provide information about the block's owner tree
594 * and the pointer's key. These information allow us to find the block by
595 * b-tree searching. The full back refs is for pointers in tree blocks not
596 * referenced by their owner trees. The location of tree block is recorded
597 * in the back refs. Actually the full back refs is generic, and can be
598 * used in all cases the implicit back refs is used. The major shortcoming
599 * of the full back refs is its overhead. Every time a tree block gets
600 * COWed, we have to update back refs entry for all pointers in it.
602 * For a newly allocated tree block, we use implicit back refs for
603 * pointers in it. This means most tree related operations only involve
604 * implicit back refs. For a tree block created in old transaction, the
605 * only way to drop a reference to it is COW it. So we can detect the
606 * event that tree block loses its owner tree's reference and do the
607 * back refs conversion.
609 * When a tree block is COW'd through a tree, there are four cases:
611 * The reference count of the block is one and the tree is the block's
612 * owner tree. Nothing to do in this case.
614 * The reference count of the block is one and the tree is not the
615 * block's owner tree. In this case, full back refs is used for pointers
616 * in the block. Remove these full back refs, add implicit back refs for
617 * every pointers in the new block.
619 * The reference count of the block is greater than one and the tree is
620 * the block's owner tree. In this case, implicit back refs is used for
621 * pointers in the block. Add full back refs for every pointers in the
622 * block, increase lower level extents' reference counts. The original
623 * implicit back refs are entailed to the new block.
625 * The reference count of the block is greater than one and the tree is
626 * not the block's owner tree. Add implicit back refs for every pointer in
627 * the new block, increase lower level extents' reference count.
629 * Back Reference Key composing:
631 * The key objectid corresponds to the first byte in the extent,
632 * The key type is used to differentiate between types of back refs.
633 * There are different meanings of the key offset for different types
636 * File extents can be referenced by:
638 * - multiple snapshots, subvolumes, or different generations in one subvol
639 * - different files inside a single subvolume
640 * - different offsets inside a file (bookend extents in file.c)
642 * The extent ref structure for the implicit back refs has fields for:
644 * - Objectid of the subvolume root
645 * - objectid of the file holding the reference
646 * - original offset in the file
647 * - how many bookend extents
649 * The key offset for the implicit back refs is hash of the first
652 * The extent ref structure for the full back refs has field for:
654 * - number of pointers in the tree leaf
656 * The key offset for the implicit back refs is the first byte of
659 * When a file extent is allocated, The implicit back refs is used.
660 * the fields are filled in:
662 * (root_key.objectid, inode objectid, offset in file, 1)
664 * When a file extent is removed file truncation, we find the
665 * corresponding implicit back refs and check the following fields:
667 * (btrfs_header_owner(leaf), inode objectid, offset in file)
669 * Btree extents can be referenced by:
671 * - Different subvolumes
673 * Both the implicit back refs and the full back refs for tree blocks
674 * only consist of key. The key offset for the implicit back refs is
675 * objectid of block's owner tree. The key offset for the full back refs
676 * is the first byte of parent block.
678 * When implicit back refs is used, information about the lowest key and
679 * level of the tree block are required. These information are stored in
680 * tree block info structure.
683 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
684 static int convert_extent_item_v0(struct btrfs_trans_handle
*trans
,
685 struct btrfs_root
*root
,
686 struct btrfs_path
*path
,
687 u64 owner
, u32 extra_size
)
689 struct btrfs_extent_item
*item
;
690 struct btrfs_extent_item_v0
*ei0
;
691 struct btrfs_extent_ref_v0
*ref0
;
692 struct btrfs_tree_block_info
*bi
;
693 struct extent_buffer
*leaf
;
694 struct btrfs_key key
;
695 struct btrfs_key found_key
;
696 u32 new_size
= sizeof(*item
);
700 leaf
= path
->nodes
[0];
701 BUG_ON(btrfs_item_size_nr(leaf
, path
->slots
[0]) != sizeof(*ei0
));
703 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
704 ei0
= btrfs_item_ptr(leaf
, path
->slots
[0],
705 struct btrfs_extent_item_v0
);
706 refs
= btrfs_extent_refs_v0(leaf
, ei0
);
708 if (owner
== (u64
)-1) {
710 if (path
->slots
[0] >= btrfs_header_nritems(leaf
)) {
711 ret
= btrfs_next_leaf(root
, path
);
715 leaf
= path
->nodes
[0];
717 btrfs_item_key_to_cpu(leaf
, &found_key
,
719 BUG_ON(key
.objectid
!= found_key
.objectid
);
720 if (found_key
.type
!= BTRFS_EXTENT_REF_V0_KEY
) {
724 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
725 struct btrfs_extent_ref_v0
);
726 owner
= btrfs_ref_objectid_v0(leaf
, ref0
);
730 btrfs_release_path(root
, path
);
732 if (owner
< BTRFS_FIRST_FREE_OBJECTID
)
733 new_size
+= sizeof(*bi
);
735 new_size
-= sizeof(*ei0
);
736 ret
= btrfs_search_slot(trans
, root
, &key
, path
,
737 new_size
+ extra_size
, 1);
742 ret
= btrfs_extend_item(trans
, root
, path
, new_size
);
745 leaf
= path
->nodes
[0];
746 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
747 btrfs_set_extent_refs(leaf
, item
, refs
);
748 /* FIXME: get real generation */
749 btrfs_set_extent_generation(leaf
, item
, 0);
750 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
751 btrfs_set_extent_flags(leaf
, item
,
752 BTRFS_EXTENT_FLAG_TREE_BLOCK
|
753 BTRFS_BLOCK_FLAG_FULL_BACKREF
);
754 bi
= (struct btrfs_tree_block_info
*)(item
+ 1);
755 /* FIXME: get first key of the block */
756 memset_extent_buffer(leaf
, 0, (unsigned long)bi
, sizeof(*bi
));
757 btrfs_set_tree_block_level(leaf
, bi
, (int)owner
);
759 btrfs_set_extent_flags(leaf
, item
, BTRFS_EXTENT_FLAG_DATA
);
761 btrfs_mark_buffer_dirty(leaf
);
766 static u64
hash_extent_data_ref(u64 root_objectid
, u64 owner
, u64 offset
)
768 u32 high_crc
= ~(u32
)0;
769 u32 low_crc
= ~(u32
)0;
772 lenum
= cpu_to_le64(root_objectid
);
773 high_crc
= crc32c(high_crc
, &lenum
, sizeof(lenum
));
774 lenum
= cpu_to_le64(owner
);
775 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
776 lenum
= cpu_to_le64(offset
);
777 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
779 return ((u64
)high_crc
<< 31) ^ (u64
)low_crc
;
782 static u64
hash_extent_data_ref_item(struct extent_buffer
*leaf
,
783 struct btrfs_extent_data_ref
*ref
)
785 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf
, ref
),
786 btrfs_extent_data_ref_objectid(leaf
, ref
),
787 btrfs_extent_data_ref_offset(leaf
, ref
));
790 static int match_extent_data_ref(struct extent_buffer
*leaf
,
791 struct btrfs_extent_data_ref
*ref
,
792 u64 root_objectid
, u64 owner
, u64 offset
)
794 if (btrfs_extent_data_ref_root(leaf
, ref
) != root_objectid
||
795 btrfs_extent_data_ref_objectid(leaf
, ref
) != owner
||
796 btrfs_extent_data_ref_offset(leaf
, ref
) != offset
)
801 static noinline
int lookup_extent_data_ref(struct btrfs_trans_handle
*trans
,
802 struct btrfs_root
*root
,
803 struct btrfs_path
*path
,
804 u64 bytenr
, u64 parent
,
806 u64 owner
, u64 offset
)
808 struct btrfs_key key
;
809 struct btrfs_extent_data_ref
*ref
;
810 struct extent_buffer
*leaf
;
816 key
.objectid
= bytenr
;
818 key
.type
= BTRFS_SHARED_DATA_REF_KEY
;
821 key
.type
= BTRFS_EXTENT_DATA_REF_KEY
;
822 key
.offset
= hash_extent_data_ref(root_objectid
,
827 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
836 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
837 key
.type
= BTRFS_EXTENT_REF_V0_KEY
;
838 btrfs_release_path(root
, path
);
839 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
850 leaf
= path
->nodes
[0];
851 nritems
= btrfs_header_nritems(leaf
);
853 if (path
->slots
[0] >= nritems
) {
854 ret
= btrfs_next_leaf(root
, path
);
860 leaf
= path
->nodes
[0];
861 nritems
= btrfs_header_nritems(leaf
);
865 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
866 if (key
.objectid
!= bytenr
||
867 key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
)
870 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
871 struct btrfs_extent_data_ref
);
873 if (match_extent_data_ref(leaf
, ref
, root_objectid
,
876 btrfs_release_path(root
, path
);
888 static noinline
int insert_extent_data_ref(struct btrfs_trans_handle
*trans
,
889 struct btrfs_root
*root
,
890 struct btrfs_path
*path
,
891 u64 bytenr
, u64 parent
,
892 u64 root_objectid
, u64 owner
,
893 u64 offset
, int refs_to_add
)
895 struct btrfs_key key
;
896 struct extent_buffer
*leaf
;
901 key
.objectid
= bytenr
;
903 key
.type
= BTRFS_SHARED_DATA_REF_KEY
;
905 size
= sizeof(struct btrfs_shared_data_ref
);
907 key
.type
= BTRFS_EXTENT_DATA_REF_KEY
;
908 key
.offset
= hash_extent_data_ref(root_objectid
,
910 size
= sizeof(struct btrfs_extent_data_ref
);
913 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, size
);
914 if (ret
&& ret
!= -EEXIST
)
917 leaf
= path
->nodes
[0];
919 struct btrfs_shared_data_ref
*ref
;
920 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
921 struct btrfs_shared_data_ref
);
923 btrfs_set_shared_data_ref_count(leaf
, ref
, refs_to_add
);
925 num_refs
= btrfs_shared_data_ref_count(leaf
, ref
);
926 num_refs
+= refs_to_add
;
927 btrfs_set_shared_data_ref_count(leaf
, ref
, num_refs
);
930 struct btrfs_extent_data_ref
*ref
;
931 while (ret
== -EEXIST
) {
932 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
933 struct btrfs_extent_data_ref
);
934 if (match_extent_data_ref(leaf
, ref
, root_objectid
,
937 btrfs_release_path(root
, path
);
939 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
941 if (ret
&& ret
!= -EEXIST
)
944 leaf
= path
->nodes
[0];
946 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
947 struct btrfs_extent_data_ref
);
949 btrfs_set_extent_data_ref_root(leaf
, ref
,
951 btrfs_set_extent_data_ref_objectid(leaf
, ref
, owner
);
952 btrfs_set_extent_data_ref_offset(leaf
, ref
, offset
);
953 btrfs_set_extent_data_ref_count(leaf
, ref
, refs_to_add
);
955 num_refs
= btrfs_extent_data_ref_count(leaf
, ref
);
956 num_refs
+= refs_to_add
;
957 btrfs_set_extent_data_ref_count(leaf
, ref
, num_refs
);
960 btrfs_mark_buffer_dirty(leaf
);
963 btrfs_release_path(root
, path
);
967 static noinline
int remove_extent_data_ref(struct btrfs_trans_handle
*trans
,
968 struct btrfs_root
*root
,
969 struct btrfs_path
*path
,
972 struct btrfs_key key
;
973 struct btrfs_extent_data_ref
*ref1
= NULL
;
974 struct btrfs_shared_data_ref
*ref2
= NULL
;
975 struct extent_buffer
*leaf
;
979 leaf
= path
->nodes
[0];
980 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
982 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
983 ref1
= btrfs_item_ptr(leaf
, path
->slots
[0],
984 struct btrfs_extent_data_ref
);
985 num_refs
= btrfs_extent_data_ref_count(leaf
, ref1
);
986 } else if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
987 ref2
= btrfs_item_ptr(leaf
, path
->slots
[0],
988 struct btrfs_shared_data_ref
);
989 num_refs
= btrfs_shared_data_ref_count(leaf
, ref2
);
990 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
991 } else if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
992 struct btrfs_extent_ref_v0
*ref0
;
993 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
994 struct btrfs_extent_ref_v0
);
995 num_refs
= btrfs_ref_count_v0(leaf
, ref0
);
1001 BUG_ON(num_refs
< refs_to_drop
);
1002 num_refs
-= refs_to_drop
;
1004 if (num_refs
== 0) {
1005 ret
= btrfs_del_item(trans
, root
, path
);
1007 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
)
1008 btrfs_set_extent_data_ref_count(leaf
, ref1
, num_refs
);
1009 else if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
)
1010 btrfs_set_shared_data_ref_count(leaf
, ref2
, num_refs
);
1011 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1013 struct btrfs_extent_ref_v0
*ref0
;
1014 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
1015 struct btrfs_extent_ref_v0
);
1016 btrfs_set_ref_count_v0(leaf
, ref0
, num_refs
);
1019 btrfs_mark_buffer_dirty(leaf
);
1024 static noinline u32
extent_data_ref_count(struct btrfs_root
*root
,
1025 struct btrfs_path
*path
,
1026 struct btrfs_extent_inline_ref
*iref
)
1028 struct btrfs_key key
;
1029 struct extent_buffer
*leaf
;
1030 struct btrfs_extent_data_ref
*ref1
;
1031 struct btrfs_shared_data_ref
*ref2
;
1034 leaf
= path
->nodes
[0];
1035 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
1037 if (btrfs_extent_inline_ref_type(leaf
, iref
) ==
1038 BTRFS_EXTENT_DATA_REF_KEY
) {
1039 ref1
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1040 num_refs
= btrfs_extent_data_ref_count(leaf
, ref1
);
1042 ref2
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
1043 num_refs
= btrfs_shared_data_ref_count(leaf
, ref2
);
1045 } else if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
1046 ref1
= btrfs_item_ptr(leaf
, path
->slots
[0],
1047 struct btrfs_extent_data_ref
);
1048 num_refs
= btrfs_extent_data_ref_count(leaf
, ref1
);
1049 } else if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
1050 ref2
= btrfs_item_ptr(leaf
, path
->slots
[0],
1051 struct btrfs_shared_data_ref
);
1052 num_refs
= btrfs_shared_data_ref_count(leaf
, ref2
);
1053 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1054 } else if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
1055 struct btrfs_extent_ref_v0
*ref0
;
1056 ref0
= btrfs_item_ptr(leaf
, path
->slots
[0],
1057 struct btrfs_extent_ref_v0
);
1058 num_refs
= btrfs_ref_count_v0(leaf
, ref0
);
1066 static noinline
int lookup_tree_block_ref(struct btrfs_trans_handle
*trans
,
1067 struct btrfs_root
*root
,
1068 struct btrfs_path
*path
,
1069 u64 bytenr
, u64 parent
,
1072 struct btrfs_key key
;
1075 key
.objectid
= bytenr
;
1077 key
.type
= BTRFS_SHARED_BLOCK_REF_KEY
;
1078 key
.offset
= parent
;
1080 key
.type
= BTRFS_TREE_BLOCK_REF_KEY
;
1081 key
.offset
= root_objectid
;
1084 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1087 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1088 if (ret
== -ENOENT
&& parent
) {
1089 btrfs_release_path(root
, path
);
1090 key
.type
= BTRFS_EXTENT_REF_V0_KEY
;
1091 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1099 static noinline
int insert_tree_block_ref(struct btrfs_trans_handle
*trans
,
1100 struct btrfs_root
*root
,
1101 struct btrfs_path
*path
,
1102 u64 bytenr
, u64 parent
,
1105 struct btrfs_key key
;
1108 key
.objectid
= bytenr
;
1110 key
.type
= BTRFS_SHARED_BLOCK_REF_KEY
;
1111 key
.offset
= parent
;
1113 key
.type
= BTRFS_TREE_BLOCK_REF_KEY
;
1114 key
.offset
= root_objectid
;
1117 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
1118 btrfs_release_path(root
, path
);
1122 static inline int extent_ref_type(u64 parent
, u64 owner
)
1125 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1127 type
= BTRFS_SHARED_BLOCK_REF_KEY
;
1129 type
= BTRFS_TREE_BLOCK_REF_KEY
;
1132 type
= BTRFS_SHARED_DATA_REF_KEY
;
1134 type
= BTRFS_EXTENT_DATA_REF_KEY
;
1139 static int find_next_key(struct btrfs_path
*path
, int level
,
1140 struct btrfs_key
*key
)
1143 for (; level
< BTRFS_MAX_LEVEL
; level
++) {
1144 if (!path
->nodes
[level
])
1146 if (path
->slots
[level
] + 1 >=
1147 btrfs_header_nritems(path
->nodes
[level
]))
1150 btrfs_item_key_to_cpu(path
->nodes
[level
], key
,
1151 path
->slots
[level
] + 1);
1153 btrfs_node_key_to_cpu(path
->nodes
[level
], key
,
1154 path
->slots
[level
] + 1);
1161 * look for inline back ref. if back ref is found, *ref_ret is set
1162 * to the address of inline back ref, and 0 is returned.
1164 * if back ref isn't found, *ref_ret is set to the address where it
1165 * should be inserted, and -ENOENT is returned.
1167 * if insert is true and there are too many inline back refs, the path
1168 * points to the extent item, and -EAGAIN is returned.
1170 * NOTE: inline back refs are ordered in the same way that back ref
1171 * items in the tree are ordered.
1173 static noinline_for_stack
1174 int lookup_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1175 struct btrfs_root
*root
,
1176 struct btrfs_path
*path
,
1177 struct btrfs_extent_inline_ref
**ref_ret
,
1178 u64 bytenr
, u64 num_bytes
,
1179 u64 parent
, u64 root_objectid
,
1180 u64 owner
, u64 offset
, int insert
)
1182 struct btrfs_key key
;
1183 struct extent_buffer
*leaf
;
1184 struct btrfs_extent_item
*ei
;
1185 struct btrfs_extent_inline_ref
*iref
;
1196 key
.objectid
= bytenr
;
1197 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1198 key
.offset
= num_bytes
;
1200 want
= extent_ref_type(parent
, owner
);
1202 extra_size
= btrfs_extent_inline_ref_size(want
);
1203 path
->keep_locks
= 1;
1206 ret
= btrfs_search_slot(trans
, root
, &key
, path
, extra_size
, 1);
1213 leaf
= path
->nodes
[0];
1214 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1215 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1216 if (item_size
< sizeof(*ei
)) {
1221 ret
= convert_extent_item_v0(trans
, root
, path
, owner
,
1227 leaf
= path
->nodes
[0];
1228 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1231 BUG_ON(item_size
< sizeof(*ei
));
1233 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1234 flags
= btrfs_extent_flags(leaf
, ei
);
1236 ptr
= (unsigned long)(ei
+ 1);
1237 end
= (unsigned long)ei
+ item_size
;
1239 if (flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
) {
1240 ptr
+= sizeof(struct btrfs_tree_block_info
);
1243 BUG_ON(!(flags
& BTRFS_EXTENT_FLAG_DATA
));
1252 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
1253 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
1257 ptr
+= btrfs_extent_inline_ref_size(type
);
1261 if (type
== BTRFS_EXTENT_DATA_REF_KEY
) {
1262 struct btrfs_extent_data_ref
*dref
;
1263 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1264 if (match_extent_data_ref(leaf
, dref
, root_objectid
,
1269 if (hash_extent_data_ref_item(leaf
, dref
) <
1270 hash_extent_data_ref(root_objectid
, owner
, offset
))
1274 ref_offset
= btrfs_extent_inline_ref_offset(leaf
, iref
);
1276 if (parent
== ref_offset
) {
1280 if (ref_offset
< parent
)
1283 if (root_objectid
== ref_offset
) {
1287 if (ref_offset
< root_objectid
)
1291 ptr
+= btrfs_extent_inline_ref_size(type
);
1293 if (err
== -ENOENT
&& insert
) {
1294 if (item_size
+ extra_size
>=
1295 BTRFS_MAX_EXTENT_ITEM_SIZE(root
)) {
1300 * To add new inline back ref, we have to make sure
1301 * there is no corresponding back ref item.
1302 * For simplicity, we just do not add new inline back
1303 * ref if there is any kind of item for this block
1305 if (find_next_key(path
, 0, &key
) == 0 &&
1306 key
.objectid
== bytenr
&&
1307 key
.type
< BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1312 *ref_ret
= (struct btrfs_extent_inline_ref
*)ptr
;
1315 path
->keep_locks
= 0;
1316 btrfs_unlock_up_safe(path
, 1);
1322 * helper to add new inline back ref
1324 static noinline_for_stack
1325 int setup_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1326 struct btrfs_root
*root
,
1327 struct btrfs_path
*path
,
1328 struct btrfs_extent_inline_ref
*iref
,
1329 u64 parent
, u64 root_objectid
,
1330 u64 owner
, u64 offset
, int refs_to_add
,
1331 struct btrfs_delayed_extent_op
*extent_op
)
1333 struct extent_buffer
*leaf
;
1334 struct btrfs_extent_item
*ei
;
1337 unsigned long item_offset
;
1343 leaf
= path
->nodes
[0];
1344 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1345 item_offset
= (unsigned long)iref
- (unsigned long)ei
;
1347 type
= extent_ref_type(parent
, owner
);
1348 size
= btrfs_extent_inline_ref_size(type
);
1350 ret
= btrfs_extend_item(trans
, root
, path
, size
);
1353 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1354 refs
= btrfs_extent_refs(leaf
, ei
);
1355 refs
+= refs_to_add
;
1356 btrfs_set_extent_refs(leaf
, ei
, refs
);
1358 __run_delayed_extent_op(extent_op
, leaf
, ei
);
1360 ptr
= (unsigned long)ei
+ item_offset
;
1361 end
= (unsigned long)ei
+ btrfs_item_size_nr(leaf
, path
->slots
[0]);
1362 if (ptr
< end
- size
)
1363 memmove_extent_buffer(leaf
, ptr
+ size
, ptr
,
1366 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
1367 btrfs_set_extent_inline_ref_type(leaf
, iref
, type
);
1368 if (type
== BTRFS_EXTENT_DATA_REF_KEY
) {
1369 struct btrfs_extent_data_ref
*dref
;
1370 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1371 btrfs_set_extent_data_ref_root(leaf
, dref
, root_objectid
);
1372 btrfs_set_extent_data_ref_objectid(leaf
, dref
, owner
);
1373 btrfs_set_extent_data_ref_offset(leaf
, dref
, offset
);
1374 btrfs_set_extent_data_ref_count(leaf
, dref
, refs_to_add
);
1375 } else if (type
== BTRFS_SHARED_DATA_REF_KEY
) {
1376 struct btrfs_shared_data_ref
*sref
;
1377 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
1378 btrfs_set_shared_data_ref_count(leaf
, sref
, refs_to_add
);
1379 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
1380 } else if (type
== BTRFS_SHARED_BLOCK_REF_KEY
) {
1381 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
1383 btrfs_set_extent_inline_ref_offset(leaf
, iref
, root_objectid
);
1385 btrfs_mark_buffer_dirty(leaf
);
1389 static int lookup_extent_backref(struct btrfs_trans_handle
*trans
,
1390 struct btrfs_root
*root
,
1391 struct btrfs_path
*path
,
1392 struct btrfs_extent_inline_ref
**ref_ret
,
1393 u64 bytenr
, u64 num_bytes
, u64 parent
,
1394 u64 root_objectid
, u64 owner
, u64 offset
)
1398 ret
= lookup_inline_extent_backref(trans
, root
, path
, ref_ret
,
1399 bytenr
, num_bytes
, parent
,
1400 root_objectid
, owner
, offset
, 0);
1404 btrfs_release_path(root
, path
);
1407 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1408 ret
= lookup_tree_block_ref(trans
, root
, path
, bytenr
, parent
,
1411 ret
= lookup_extent_data_ref(trans
, root
, path
, bytenr
, parent
,
1412 root_objectid
, owner
, offset
);
1418 * helper to update/remove inline back ref
1420 static noinline_for_stack
1421 int update_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1422 struct btrfs_root
*root
,
1423 struct btrfs_path
*path
,
1424 struct btrfs_extent_inline_ref
*iref
,
1426 struct btrfs_delayed_extent_op
*extent_op
)
1428 struct extent_buffer
*leaf
;
1429 struct btrfs_extent_item
*ei
;
1430 struct btrfs_extent_data_ref
*dref
= NULL
;
1431 struct btrfs_shared_data_ref
*sref
= NULL
;
1440 leaf
= path
->nodes
[0];
1441 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1442 refs
= btrfs_extent_refs(leaf
, ei
);
1443 WARN_ON(refs_to_mod
< 0 && refs
+ refs_to_mod
<= 0);
1444 refs
+= refs_to_mod
;
1445 btrfs_set_extent_refs(leaf
, ei
, refs
);
1447 __run_delayed_extent_op(extent_op
, leaf
, ei
);
1449 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
1451 if (type
== BTRFS_EXTENT_DATA_REF_KEY
) {
1452 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1453 refs
= btrfs_extent_data_ref_count(leaf
, dref
);
1454 } else if (type
== BTRFS_SHARED_DATA_REF_KEY
) {
1455 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
1456 refs
= btrfs_shared_data_ref_count(leaf
, sref
);
1459 BUG_ON(refs_to_mod
!= -1);
1462 BUG_ON(refs_to_mod
< 0 && refs
< -refs_to_mod
);
1463 refs
+= refs_to_mod
;
1466 if (type
== BTRFS_EXTENT_DATA_REF_KEY
)
1467 btrfs_set_extent_data_ref_count(leaf
, dref
, refs
);
1469 btrfs_set_shared_data_ref_count(leaf
, sref
, refs
);
1471 size
= btrfs_extent_inline_ref_size(type
);
1472 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1473 ptr
= (unsigned long)iref
;
1474 end
= (unsigned long)ei
+ item_size
;
1475 if (ptr
+ size
< end
)
1476 memmove_extent_buffer(leaf
, ptr
, ptr
+ size
,
1479 ret
= btrfs_truncate_item(trans
, root
, path
, item_size
, 1);
1482 btrfs_mark_buffer_dirty(leaf
);
1486 static noinline_for_stack
1487 int insert_inline_extent_backref(struct btrfs_trans_handle
*trans
,
1488 struct btrfs_root
*root
,
1489 struct btrfs_path
*path
,
1490 u64 bytenr
, u64 num_bytes
, u64 parent
,
1491 u64 root_objectid
, u64 owner
,
1492 u64 offset
, int refs_to_add
,
1493 struct btrfs_delayed_extent_op
*extent_op
)
1495 struct btrfs_extent_inline_ref
*iref
;
1498 ret
= lookup_inline_extent_backref(trans
, root
, path
, &iref
,
1499 bytenr
, num_bytes
, parent
,
1500 root_objectid
, owner
, offset
, 1);
1502 BUG_ON(owner
< BTRFS_FIRST_FREE_OBJECTID
);
1503 ret
= update_inline_extent_backref(trans
, root
, path
, iref
,
1504 refs_to_add
, extent_op
);
1505 } else if (ret
== -ENOENT
) {
1506 ret
= setup_inline_extent_backref(trans
, root
, path
, iref
,
1507 parent
, root_objectid
,
1508 owner
, offset
, refs_to_add
,
1514 static int insert_extent_backref(struct btrfs_trans_handle
*trans
,
1515 struct btrfs_root
*root
,
1516 struct btrfs_path
*path
,
1517 u64 bytenr
, u64 parent
, u64 root_objectid
,
1518 u64 owner
, u64 offset
, int refs_to_add
)
1521 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1522 BUG_ON(refs_to_add
!= 1);
1523 ret
= insert_tree_block_ref(trans
, root
, path
, bytenr
,
1524 parent
, root_objectid
);
1526 ret
= insert_extent_data_ref(trans
, root
, path
, bytenr
,
1527 parent
, root_objectid
,
1528 owner
, offset
, refs_to_add
);
1533 static int remove_extent_backref(struct btrfs_trans_handle
*trans
,
1534 struct btrfs_root
*root
,
1535 struct btrfs_path
*path
,
1536 struct btrfs_extent_inline_ref
*iref
,
1537 int refs_to_drop
, int is_data
)
1541 BUG_ON(!is_data
&& refs_to_drop
!= 1);
1543 ret
= update_inline_extent_backref(trans
, root
, path
, iref
,
1544 -refs_to_drop
, NULL
);
1545 } else if (is_data
) {
1546 ret
= remove_extent_data_ref(trans
, root
, path
, refs_to_drop
);
1548 ret
= btrfs_del_item(trans
, root
, path
);
1553 #ifdef BIO_RW_DISCARD
1554 static void btrfs_issue_discard(struct block_device
*bdev
,
1557 blkdev_issue_discard(bdev
, start
>> 9, len
>> 9, GFP_KERNEL
);
1561 static int btrfs_discard_extent(struct btrfs_root
*root
, u64 bytenr
,
1564 #ifdef BIO_RW_DISCARD
1566 u64 map_length
= num_bytes
;
1567 struct btrfs_multi_bio
*multi
= NULL
;
1569 /* Tell the block device(s) that the sectors can be discarded */
1570 ret
= btrfs_map_block(&root
->fs_info
->mapping_tree
, READ
,
1571 bytenr
, &map_length
, &multi
, 0);
1573 struct btrfs_bio_stripe
*stripe
= multi
->stripes
;
1576 if (map_length
> num_bytes
)
1577 map_length
= num_bytes
;
1579 for (i
= 0; i
< multi
->num_stripes
; i
++, stripe
++) {
1580 btrfs_issue_discard(stripe
->dev
->bdev
,
1593 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1594 struct btrfs_root
*root
,
1595 u64 bytenr
, u64 num_bytes
, u64 parent
,
1596 u64 root_objectid
, u64 owner
, u64 offset
)
1599 BUG_ON(owner
< BTRFS_FIRST_FREE_OBJECTID
&&
1600 root_objectid
== BTRFS_TREE_LOG_OBJECTID
);
1602 if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
1603 ret
= btrfs_add_delayed_tree_ref(trans
, bytenr
, num_bytes
,
1604 parent
, root_objectid
, (int)owner
,
1605 BTRFS_ADD_DELAYED_REF
, NULL
);
1607 ret
= btrfs_add_delayed_data_ref(trans
, bytenr
, num_bytes
,
1608 parent
, root_objectid
, owner
, offset
,
1609 BTRFS_ADD_DELAYED_REF
, NULL
);
1614 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1615 struct btrfs_root
*root
,
1616 u64 bytenr
, u64 num_bytes
,
1617 u64 parent
, u64 root_objectid
,
1618 u64 owner
, u64 offset
, int refs_to_add
,
1619 struct btrfs_delayed_extent_op
*extent_op
)
1621 struct btrfs_path
*path
;
1622 struct extent_buffer
*leaf
;
1623 struct btrfs_extent_item
*item
;
1628 path
= btrfs_alloc_path();
1633 path
->leave_spinning
= 1;
1634 /* this will setup the path even if it fails to insert the back ref */
1635 ret
= insert_inline_extent_backref(trans
, root
->fs_info
->extent_root
,
1636 path
, bytenr
, num_bytes
, parent
,
1637 root_objectid
, owner
, offset
,
1638 refs_to_add
, extent_op
);
1642 if (ret
!= -EAGAIN
) {
1647 leaf
= path
->nodes
[0];
1648 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1649 refs
= btrfs_extent_refs(leaf
, item
);
1650 btrfs_set_extent_refs(leaf
, item
, refs
+ refs_to_add
);
1652 __run_delayed_extent_op(extent_op
, leaf
, item
);
1654 btrfs_mark_buffer_dirty(leaf
);
1655 btrfs_release_path(root
->fs_info
->extent_root
, path
);
1658 path
->leave_spinning
= 1;
1660 /* now insert the actual backref */
1661 ret
= insert_extent_backref(trans
, root
->fs_info
->extent_root
,
1662 path
, bytenr
, parent
, root_objectid
,
1663 owner
, offset
, refs_to_add
);
1666 btrfs_free_path(path
);
1670 static int run_delayed_data_ref(struct btrfs_trans_handle
*trans
,
1671 struct btrfs_root
*root
,
1672 struct btrfs_delayed_ref_node
*node
,
1673 struct btrfs_delayed_extent_op
*extent_op
,
1674 int insert_reserved
)
1677 struct btrfs_delayed_data_ref
*ref
;
1678 struct btrfs_key ins
;
1683 ins
.objectid
= node
->bytenr
;
1684 ins
.offset
= node
->num_bytes
;
1685 ins
.type
= BTRFS_EXTENT_ITEM_KEY
;
1687 ref
= btrfs_delayed_node_to_data_ref(node
);
1688 if (node
->type
== BTRFS_SHARED_DATA_REF_KEY
)
1689 parent
= ref
->parent
;
1691 ref_root
= ref
->root
;
1693 if (node
->action
== BTRFS_ADD_DELAYED_REF
&& insert_reserved
) {
1695 BUG_ON(extent_op
->update_key
);
1696 flags
|= extent_op
->flags_to_set
;
1698 ret
= alloc_reserved_file_extent(trans
, root
,
1699 parent
, ref_root
, flags
,
1700 ref
->objectid
, ref
->offset
,
1701 &ins
, node
->ref_mod
);
1702 update_reserved_extents(root
, ins
.objectid
, ins
.offset
, 0);
1703 } else if (node
->action
== BTRFS_ADD_DELAYED_REF
) {
1704 ret
= __btrfs_inc_extent_ref(trans
, root
, node
->bytenr
,
1705 node
->num_bytes
, parent
,
1706 ref_root
, ref
->objectid
,
1707 ref
->offset
, node
->ref_mod
,
1709 } else if (node
->action
== BTRFS_DROP_DELAYED_REF
) {
1710 ret
= __btrfs_free_extent(trans
, root
, node
->bytenr
,
1711 node
->num_bytes
, parent
,
1712 ref_root
, ref
->objectid
,
1713 ref
->offset
, node
->ref_mod
,
1721 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op
*extent_op
,
1722 struct extent_buffer
*leaf
,
1723 struct btrfs_extent_item
*ei
)
1725 u64 flags
= btrfs_extent_flags(leaf
, ei
);
1726 if (extent_op
->update_flags
) {
1727 flags
|= extent_op
->flags_to_set
;
1728 btrfs_set_extent_flags(leaf
, ei
, flags
);
1731 if (extent_op
->update_key
) {
1732 struct btrfs_tree_block_info
*bi
;
1733 BUG_ON(!(flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
));
1734 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
1735 btrfs_set_tree_block_key(leaf
, bi
, &extent_op
->key
);
1739 static int run_delayed_extent_op(struct btrfs_trans_handle
*trans
,
1740 struct btrfs_root
*root
,
1741 struct btrfs_delayed_ref_node
*node
,
1742 struct btrfs_delayed_extent_op
*extent_op
)
1744 struct btrfs_key key
;
1745 struct btrfs_path
*path
;
1746 struct btrfs_extent_item
*ei
;
1747 struct extent_buffer
*leaf
;
1752 path
= btrfs_alloc_path();
1756 key
.objectid
= node
->bytenr
;
1757 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1758 key
.offset
= node
->num_bytes
;
1761 path
->leave_spinning
= 1;
1762 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
,
1773 leaf
= path
->nodes
[0];
1774 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1775 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1776 if (item_size
< sizeof(*ei
)) {
1777 ret
= convert_extent_item_v0(trans
, root
->fs_info
->extent_root
,
1783 leaf
= path
->nodes
[0];
1784 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
1787 BUG_ON(item_size
< sizeof(*ei
));
1788 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
1789 __run_delayed_extent_op(extent_op
, leaf
, ei
);
1791 btrfs_mark_buffer_dirty(leaf
);
1793 btrfs_free_path(path
);
1797 static int run_delayed_tree_ref(struct btrfs_trans_handle
*trans
,
1798 struct btrfs_root
*root
,
1799 struct btrfs_delayed_ref_node
*node
,
1800 struct btrfs_delayed_extent_op
*extent_op
,
1801 int insert_reserved
)
1804 struct btrfs_delayed_tree_ref
*ref
;
1805 struct btrfs_key ins
;
1809 ins
.objectid
= node
->bytenr
;
1810 ins
.offset
= node
->num_bytes
;
1811 ins
.type
= BTRFS_EXTENT_ITEM_KEY
;
1813 ref
= btrfs_delayed_node_to_tree_ref(node
);
1814 if (node
->type
== BTRFS_SHARED_BLOCK_REF_KEY
)
1815 parent
= ref
->parent
;
1817 ref_root
= ref
->root
;
1819 BUG_ON(node
->ref_mod
!= 1);
1820 if (node
->action
== BTRFS_ADD_DELAYED_REF
&& insert_reserved
) {
1821 BUG_ON(!extent_op
|| !extent_op
->update_flags
||
1822 !extent_op
->update_key
);
1823 ret
= alloc_reserved_tree_block(trans
, root
,
1825 extent_op
->flags_to_set
,
1828 update_reserved_extents(root
, ins
.objectid
, ins
.offset
, 0);
1829 } else if (node
->action
== BTRFS_ADD_DELAYED_REF
) {
1830 ret
= __btrfs_inc_extent_ref(trans
, root
, node
->bytenr
,
1831 node
->num_bytes
, parent
, ref_root
,
1832 ref
->level
, 0, 1, extent_op
);
1833 } else if (node
->action
== BTRFS_DROP_DELAYED_REF
) {
1834 ret
= __btrfs_free_extent(trans
, root
, node
->bytenr
,
1835 node
->num_bytes
, parent
, ref_root
,
1836 ref
->level
, 0, 1, extent_op
);
1844 /* helper function to actually process a single delayed ref entry */
1845 static int run_one_delayed_ref(struct btrfs_trans_handle
*trans
,
1846 struct btrfs_root
*root
,
1847 struct btrfs_delayed_ref_node
*node
,
1848 struct btrfs_delayed_extent_op
*extent_op
,
1849 int insert_reserved
)
1852 if (btrfs_delayed_ref_is_head(node
)) {
1853 struct btrfs_delayed_ref_head
*head
;
1855 * we've hit the end of the chain and we were supposed
1856 * to insert this extent into the tree. But, it got
1857 * deleted before we ever needed to insert it, so all
1858 * we have to do is clean up the accounting
1861 head
= btrfs_delayed_node_to_head(node
);
1862 if (insert_reserved
) {
1863 if (head
->is_data
) {
1864 ret
= btrfs_del_csums(trans
, root
,
1869 btrfs_update_pinned_extents(root
, node
->bytenr
,
1870 node
->num_bytes
, 1, 0);
1871 update_reserved_extents(root
, node
->bytenr
,
1872 node
->num_bytes
, 0);
1874 mutex_unlock(&head
->mutex
);
1878 if (node
->type
== BTRFS_TREE_BLOCK_REF_KEY
||
1879 node
->type
== BTRFS_SHARED_BLOCK_REF_KEY
)
1880 ret
= run_delayed_tree_ref(trans
, root
, node
, extent_op
,
1882 else if (node
->type
== BTRFS_EXTENT_DATA_REF_KEY
||
1883 node
->type
== BTRFS_SHARED_DATA_REF_KEY
)
1884 ret
= run_delayed_data_ref(trans
, root
, node
, extent_op
,
1891 static noinline
struct btrfs_delayed_ref_node
*
1892 select_delayed_ref(struct btrfs_delayed_ref_head
*head
)
1894 struct rb_node
*node
;
1895 struct btrfs_delayed_ref_node
*ref
;
1896 int action
= BTRFS_ADD_DELAYED_REF
;
1899 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
1900 * this prevents ref count from going down to zero when
1901 * there still are pending delayed ref.
1903 node
= rb_prev(&head
->node
.rb_node
);
1907 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
,
1909 if (ref
->bytenr
!= head
->node
.bytenr
)
1911 if (ref
->action
== action
)
1913 node
= rb_prev(node
);
1915 if (action
== BTRFS_ADD_DELAYED_REF
) {
1916 action
= BTRFS_DROP_DELAYED_REF
;
1922 static noinline
int run_clustered_refs(struct btrfs_trans_handle
*trans
,
1923 struct btrfs_root
*root
,
1924 struct list_head
*cluster
)
1926 struct btrfs_delayed_ref_root
*delayed_refs
;
1927 struct btrfs_delayed_ref_node
*ref
;
1928 struct btrfs_delayed_ref_head
*locked_ref
= NULL
;
1929 struct btrfs_delayed_extent_op
*extent_op
;
1932 int must_insert_reserved
= 0;
1934 delayed_refs
= &trans
->transaction
->delayed_refs
;
1937 /* pick a new head ref from the cluster list */
1938 if (list_empty(cluster
))
1941 locked_ref
= list_entry(cluster
->next
,
1942 struct btrfs_delayed_ref_head
, cluster
);
1944 /* grab the lock that says we are going to process
1945 * all the refs for this head */
1946 ret
= btrfs_delayed_ref_lock(trans
, locked_ref
);
1949 * we may have dropped the spin lock to get the head
1950 * mutex lock, and that might have given someone else
1951 * time to free the head. If that's true, it has been
1952 * removed from our list and we can move on.
1954 if (ret
== -EAGAIN
) {
1962 * record the must insert reserved flag before we
1963 * drop the spin lock.
1965 must_insert_reserved
= locked_ref
->must_insert_reserved
;
1966 locked_ref
->must_insert_reserved
= 0;
1968 extent_op
= locked_ref
->extent_op
;
1969 locked_ref
->extent_op
= NULL
;
1972 * locked_ref is the head node, so we have to go one
1973 * node back for any delayed ref updates
1975 ref
= select_delayed_ref(locked_ref
);
1977 /* All delayed refs have been processed, Go ahead
1978 * and send the head node to run_one_delayed_ref,
1979 * so that any accounting fixes can happen
1981 ref
= &locked_ref
->node
;
1983 if (extent_op
&& must_insert_reserved
) {
1989 spin_unlock(&delayed_refs
->lock
);
1991 ret
= run_delayed_extent_op(trans
, root
,
1997 spin_lock(&delayed_refs
->lock
);
2001 list_del_init(&locked_ref
->cluster
);
2006 rb_erase(&ref
->rb_node
, &delayed_refs
->root
);
2007 delayed_refs
->num_entries
--;
2009 spin_unlock(&delayed_refs
->lock
);
2011 ret
= run_one_delayed_ref(trans
, root
, ref
, extent_op
,
2012 must_insert_reserved
);
2015 btrfs_put_delayed_ref(ref
);
2020 spin_lock(&delayed_refs
->lock
);
2026 * this starts processing the delayed reference count updates and
2027 * extent insertions we have queued up so far. count can be
2028 * 0, which means to process everything in the tree at the start
2029 * of the run (but not newly added entries), or it can be some target
2030 * number you'd like to process.
2032 int btrfs_run_delayed_refs(struct btrfs_trans_handle
*trans
,
2033 struct btrfs_root
*root
, unsigned long count
)
2035 struct rb_node
*node
;
2036 struct btrfs_delayed_ref_root
*delayed_refs
;
2037 struct btrfs_delayed_ref_node
*ref
;
2038 struct list_head cluster
;
2040 int run_all
= count
== (unsigned long)-1;
2043 if (root
== root
->fs_info
->extent_root
)
2044 root
= root
->fs_info
->tree_root
;
2046 delayed_refs
= &trans
->transaction
->delayed_refs
;
2047 INIT_LIST_HEAD(&cluster
);
2049 spin_lock(&delayed_refs
->lock
);
2051 count
= delayed_refs
->num_entries
* 2;
2055 if (!(run_all
|| run_most
) &&
2056 delayed_refs
->num_heads_ready
< 64)
2060 * go find something we can process in the rbtree. We start at
2061 * the beginning of the tree, and then build a cluster
2062 * of refs to process starting at the first one we are able to
2065 ret
= btrfs_find_ref_cluster(trans
, &cluster
,
2066 delayed_refs
->run_delayed_start
);
2070 ret
= run_clustered_refs(trans
, root
, &cluster
);
2073 count
-= min_t(unsigned long, ret
, count
);
2080 node
= rb_first(&delayed_refs
->root
);
2083 count
= (unsigned long)-1;
2086 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
,
2088 if (btrfs_delayed_ref_is_head(ref
)) {
2089 struct btrfs_delayed_ref_head
*head
;
2091 head
= btrfs_delayed_node_to_head(ref
);
2092 atomic_inc(&ref
->refs
);
2094 spin_unlock(&delayed_refs
->lock
);
2095 mutex_lock(&head
->mutex
);
2096 mutex_unlock(&head
->mutex
);
2098 btrfs_put_delayed_ref(ref
);
2102 node
= rb_next(node
);
2104 spin_unlock(&delayed_refs
->lock
);
2105 schedule_timeout(1);
2109 spin_unlock(&delayed_refs
->lock
);
2113 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle
*trans
,
2114 struct btrfs_root
*root
,
2115 u64 bytenr
, u64 num_bytes
, u64 flags
,
2118 struct btrfs_delayed_extent_op
*extent_op
;
2121 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
2125 extent_op
->flags_to_set
= flags
;
2126 extent_op
->update_flags
= 1;
2127 extent_op
->update_key
= 0;
2128 extent_op
->is_data
= is_data
? 1 : 0;
2130 ret
= btrfs_add_delayed_extent_op(trans
, bytenr
, num_bytes
, extent_op
);
2136 static noinline
int check_delayed_ref(struct btrfs_trans_handle
*trans
,
2137 struct btrfs_root
*root
,
2138 struct btrfs_path
*path
,
2139 u64 objectid
, u64 offset
, u64 bytenr
)
2141 struct btrfs_delayed_ref_head
*head
;
2142 struct btrfs_delayed_ref_node
*ref
;
2143 struct btrfs_delayed_data_ref
*data_ref
;
2144 struct btrfs_delayed_ref_root
*delayed_refs
;
2145 struct rb_node
*node
;
2149 delayed_refs
= &trans
->transaction
->delayed_refs
;
2150 spin_lock(&delayed_refs
->lock
);
2151 head
= btrfs_find_delayed_ref_head(trans
, bytenr
);
2155 if (!mutex_trylock(&head
->mutex
)) {
2156 atomic_inc(&head
->node
.refs
);
2157 spin_unlock(&delayed_refs
->lock
);
2159 btrfs_release_path(root
->fs_info
->extent_root
, path
);
2161 mutex_lock(&head
->mutex
);
2162 mutex_unlock(&head
->mutex
);
2163 btrfs_put_delayed_ref(&head
->node
);
2167 node
= rb_prev(&head
->node
.rb_node
);
2171 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, rb_node
);
2173 if (ref
->bytenr
!= bytenr
)
2177 if (ref
->type
!= BTRFS_EXTENT_DATA_REF_KEY
)
2180 data_ref
= btrfs_delayed_node_to_data_ref(ref
);
2182 node
= rb_prev(node
);
2184 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, rb_node
);
2185 if (ref
->bytenr
== bytenr
)
2189 if (data_ref
->root
!= root
->root_key
.objectid
||
2190 data_ref
->objectid
!= objectid
|| data_ref
->offset
!= offset
)
2195 mutex_unlock(&head
->mutex
);
2197 spin_unlock(&delayed_refs
->lock
);
2201 static noinline
int check_committed_ref(struct btrfs_trans_handle
*trans
,
2202 struct btrfs_root
*root
,
2203 struct btrfs_path
*path
,
2204 u64 objectid
, u64 offset
, u64 bytenr
)
2206 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
2207 struct extent_buffer
*leaf
;
2208 struct btrfs_extent_data_ref
*ref
;
2209 struct btrfs_extent_inline_ref
*iref
;
2210 struct btrfs_extent_item
*ei
;
2211 struct btrfs_key key
;
2215 key
.objectid
= bytenr
;
2216 key
.offset
= (u64
)-1;
2217 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
2219 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
2225 if (path
->slots
[0] == 0)
2229 leaf
= path
->nodes
[0];
2230 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
2232 if (key
.objectid
!= bytenr
|| key
.type
!= BTRFS_EXTENT_ITEM_KEY
)
2236 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
2237 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2238 if (item_size
< sizeof(*ei
)) {
2239 WARN_ON(item_size
!= sizeof(struct btrfs_extent_item_v0
));
2243 ei
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_item
);
2245 if (item_size
!= sizeof(*ei
) +
2246 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY
))
2249 if (btrfs_extent_generation(leaf
, ei
) <=
2250 btrfs_root_last_snapshot(&root
->root_item
))
2253 iref
= (struct btrfs_extent_inline_ref
*)(ei
+ 1);
2254 if (btrfs_extent_inline_ref_type(leaf
, iref
) !=
2255 BTRFS_EXTENT_DATA_REF_KEY
)
2258 ref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
2259 if (btrfs_extent_refs(leaf
, ei
) !=
2260 btrfs_extent_data_ref_count(leaf
, ref
) ||
2261 btrfs_extent_data_ref_root(leaf
, ref
) !=
2262 root
->root_key
.objectid
||
2263 btrfs_extent_data_ref_objectid(leaf
, ref
) != objectid
||
2264 btrfs_extent_data_ref_offset(leaf
, ref
) != offset
)
2272 int btrfs_cross_ref_exist(struct btrfs_trans_handle
*trans
,
2273 struct btrfs_root
*root
,
2274 u64 objectid
, u64 offset
, u64 bytenr
)
2276 struct btrfs_path
*path
;
2280 path
= btrfs_alloc_path();
2285 ret
= check_committed_ref(trans
, root
, path
, objectid
,
2287 if (ret
&& ret
!= -ENOENT
)
2290 ret2
= check_delayed_ref(trans
, root
, path
, objectid
,
2292 } while (ret2
== -EAGAIN
);
2294 if (ret2
&& ret2
!= -ENOENT
) {
2299 if (ret
!= -ENOENT
|| ret2
!= -ENOENT
)
2302 btrfs_free_path(path
);
2307 int btrfs_cache_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
2308 struct extent_buffer
*buf
, u32 nr_extents
)
2310 struct btrfs_key key
;
2311 struct btrfs_file_extent_item
*fi
;
2319 if (!root
->ref_cows
)
2322 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
2324 root_gen
= root
->root_key
.offset
;
2327 root_gen
= trans
->transid
- 1;
2330 level
= btrfs_header_level(buf
);
2331 nritems
= btrfs_header_nritems(buf
);
2334 struct btrfs_leaf_ref
*ref
;
2335 struct btrfs_extent_info
*info
;
2337 ref
= btrfs_alloc_leaf_ref(root
, nr_extents
);
2343 ref
->root_gen
= root_gen
;
2344 ref
->bytenr
= buf
->start
;
2345 ref
->owner
= btrfs_header_owner(buf
);
2346 ref
->generation
= btrfs_header_generation(buf
);
2347 ref
->nritems
= nr_extents
;
2348 info
= ref
->extents
;
2350 for (i
= 0; nr_extents
> 0 && i
< nritems
; i
++) {
2352 btrfs_item_key_to_cpu(buf
, &key
, i
);
2353 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
2355 fi
= btrfs_item_ptr(buf
, i
,
2356 struct btrfs_file_extent_item
);
2357 if (btrfs_file_extent_type(buf
, fi
) ==
2358 BTRFS_FILE_EXTENT_INLINE
)
2360 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
2361 if (disk_bytenr
== 0)
2364 info
->bytenr
= disk_bytenr
;
2366 btrfs_file_extent_disk_num_bytes(buf
, fi
);
2367 info
->objectid
= key
.objectid
;
2368 info
->offset
= key
.offset
;
2372 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
2373 if (ret
== -EEXIST
&& shared
) {
2374 struct btrfs_leaf_ref
*old
;
2375 old
= btrfs_lookup_leaf_ref(root
, ref
->bytenr
);
2377 btrfs_remove_leaf_ref(root
, old
);
2378 btrfs_free_leaf_ref(root
, old
);
2379 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
2382 btrfs_free_leaf_ref(root
, ref
);
2388 /* when a block goes through cow, we update the reference counts of
2389 * everything that block points to. The internal pointers of the block
2390 * can be in just about any order, and it is likely to have clusters of
2391 * things that are close together and clusters of things that are not.
2393 * To help reduce the seeks that come with updating all of these reference
2394 * counts, sort them by byte number before actual updates are done.
2396 * struct refsort is used to match byte number to slot in the btree block.
2397 * we sort based on the byte number and then use the slot to actually
2400 * struct refsort is smaller than strcut btrfs_item and smaller than
2401 * struct btrfs_key_ptr. Since we're currently limited to the page size
2402 * for a btree block, there's no way for a kmalloc of refsorts for a
2403 * single node to be bigger than a page.
2411 * for passing into sort()
2413 static int refsort_cmp(const void *a_void
, const void *b_void
)
2415 const struct refsort
*a
= a_void
;
2416 const struct refsort
*b
= b_void
;
2418 if (a
->bytenr
< b
->bytenr
)
2420 if (a
->bytenr
> b
->bytenr
)
2426 static int __btrfs_mod_ref(struct btrfs_trans_handle
*trans
,
2427 struct btrfs_root
*root
,
2428 struct extent_buffer
*buf
,
2429 int full_backref
, int inc
)
2436 struct btrfs_key key
;
2437 struct btrfs_file_extent_item
*fi
;
2441 int (*process_func
)(struct btrfs_trans_handle
*, struct btrfs_root
*,
2442 u64
, u64
, u64
, u64
, u64
, u64
);
2444 ref_root
= btrfs_header_owner(buf
);
2445 nritems
= btrfs_header_nritems(buf
);
2446 level
= btrfs_header_level(buf
);
2448 if (!root
->ref_cows
&& level
== 0)
2452 process_func
= btrfs_inc_extent_ref
;
2454 process_func
= btrfs_free_extent
;
2457 parent
= buf
->start
;
2461 for (i
= 0; i
< nritems
; i
++) {
2463 btrfs_item_key_to_cpu(buf
, &key
, i
);
2464 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
2466 fi
= btrfs_item_ptr(buf
, i
,
2467 struct btrfs_file_extent_item
);
2468 if (btrfs_file_extent_type(buf
, fi
) ==
2469 BTRFS_FILE_EXTENT_INLINE
)
2471 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
2475 num_bytes
= btrfs_file_extent_disk_num_bytes(buf
, fi
);
2476 key
.offset
-= btrfs_file_extent_offset(buf
, fi
);
2477 ret
= process_func(trans
, root
, bytenr
, num_bytes
,
2478 parent
, ref_root
, key
.objectid
,
2483 bytenr
= btrfs_node_blockptr(buf
, i
);
2484 num_bytes
= btrfs_level_size(root
, level
- 1);
2485 ret
= process_func(trans
, root
, bytenr
, num_bytes
,
2486 parent
, ref_root
, level
- 1, 0);
2497 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
2498 struct extent_buffer
*buf
, int full_backref
)
2500 return __btrfs_mod_ref(trans
, root
, buf
, full_backref
, 1);
2503 int btrfs_dec_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
2504 struct extent_buffer
*buf
, int full_backref
)
2506 return __btrfs_mod_ref(trans
, root
, buf
, full_backref
, 0);
2509 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
2510 struct btrfs_root
*root
,
2511 struct btrfs_path
*path
,
2512 struct btrfs_block_group_cache
*cache
)
2515 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
2517 struct extent_buffer
*leaf
;
2519 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
2524 leaf
= path
->nodes
[0];
2525 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
2526 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
2527 btrfs_mark_buffer_dirty(leaf
);
2528 btrfs_release_path(extent_root
, path
);
2536 static struct btrfs_block_group_cache
*
2537 next_block_group(struct btrfs_root
*root
,
2538 struct btrfs_block_group_cache
*cache
)
2540 struct rb_node
*node
;
2541 spin_lock(&root
->fs_info
->block_group_cache_lock
);
2542 node
= rb_next(&cache
->cache_node
);
2543 btrfs_put_block_group(cache
);
2545 cache
= rb_entry(node
, struct btrfs_block_group_cache
,
2547 atomic_inc(&cache
->count
);
2550 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
2554 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
2555 struct btrfs_root
*root
)
2557 struct btrfs_block_group_cache
*cache
;
2559 struct btrfs_path
*path
;
2562 path
= btrfs_alloc_path();
2568 err
= btrfs_run_delayed_refs(trans
, root
,
2573 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
2577 cache
= next_block_group(root
, cache
);
2587 last
= cache
->key
.objectid
+ cache
->key
.offset
;
2589 err
= write_one_cache_group(trans
, root
, path
, cache
);
2591 btrfs_put_block_group(cache
);
2594 btrfs_free_path(path
);
2598 int btrfs_extent_readonly(struct btrfs_root
*root
, u64 bytenr
)
2600 struct btrfs_block_group_cache
*block_group
;
2603 block_group
= btrfs_lookup_block_group(root
->fs_info
, bytenr
);
2604 if (!block_group
|| block_group
->ro
)
2607 btrfs_put_block_group(block_group
);
2611 static int update_space_info(struct btrfs_fs_info
*info
, u64 flags
,
2612 u64 total_bytes
, u64 bytes_used
,
2613 struct btrfs_space_info
**space_info
)
2615 struct btrfs_space_info
*found
;
2617 found
= __find_space_info(info
, flags
);
2619 spin_lock(&found
->lock
);
2620 found
->total_bytes
+= total_bytes
;
2621 found
->bytes_used
+= bytes_used
;
2623 spin_unlock(&found
->lock
);
2624 *space_info
= found
;
2627 found
= kzalloc(sizeof(*found
), GFP_NOFS
);
2631 INIT_LIST_HEAD(&found
->block_groups
);
2632 init_rwsem(&found
->groups_sem
);
2633 spin_lock_init(&found
->lock
);
2634 found
->flags
= flags
;
2635 found
->total_bytes
= total_bytes
;
2636 found
->bytes_used
= bytes_used
;
2637 found
->bytes_pinned
= 0;
2638 found
->bytes_reserved
= 0;
2639 found
->bytes_readonly
= 0;
2640 found
->bytes_delalloc
= 0;
2642 found
->force_alloc
= 0;
2643 *space_info
= found
;
2644 list_add_rcu(&found
->list
, &info
->space_info
);
2645 atomic_set(&found
->caching_threads
, 0);
2649 static void set_avail_alloc_bits(struct btrfs_fs_info
*fs_info
, u64 flags
)
2651 u64 extra_flags
= flags
& (BTRFS_BLOCK_GROUP_RAID0
|
2652 BTRFS_BLOCK_GROUP_RAID1
|
2653 BTRFS_BLOCK_GROUP_RAID10
|
2654 BTRFS_BLOCK_GROUP_DUP
);
2656 if (flags
& BTRFS_BLOCK_GROUP_DATA
)
2657 fs_info
->avail_data_alloc_bits
|= extra_flags
;
2658 if (flags
& BTRFS_BLOCK_GROUP_METADATA
)
2659 fs_info
->avail_metadata_alloc_bits
|= extra_flags
;
2660 if (flags
& BTRFS_BLOCK_GROUP_SYSTEM
)
2661 fs_info
->avail_system_alloc_bits
|= extra_flags
;
2665 static void set_block_group_readonly(struct btrfs_block_group_cache
*cache
)
2667 spin_lock(&cache
->space_info
->lock
);
2668 spin_lock(&cache
->lock
);
2670 cache
->space_info
->bytes_readonly
+= cache
->key
.offset
-
2671 btrfs_block_group_used(&cache
->item
);
2674 spin_unlock(&cache
->lock
);
2675 spin_unlock(&cache
->space_info
->lock
);
2678 u64
btrfs_reduce_alloc_profile(struct btrfs_root
*root
, u64 flags
)
2680 u64 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
2682 if (num_devices
== 1)
2683 flags
&= ~(BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID0
);
2684 if (num_devices
< 4)
2685 flags
&= ~BTRFS_BLOCK_GROUP_RAID10
;
2687 if ((flags
& BTRFS_BLOCK_GROUP_DUP
) &&
2688 (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
2689 BTRFS_BLOCK_GROUP_RAID10
))) {
2690 flags
&= ~BTRFS_BLOCK_GROUP_DUP
;
2693 if ((flags
& BTRFS_BLOCK_GROUP_RAID1
) &&
2694 (flags
& BTRFS_BLOCK_GROUP_RAID10
)) {
2695 flags
&= ~BTRFS_BLOCK_GROUP_RAID1
;
2698 if ((flags
& BTRFS_BLOCK_GROUP_RAID0
) &&
2699 ((flags
& BTRFS_BLOCK_GROUP_RAID1
) |
2700 (flags
& BTRFS_BLOCK_GROUP_RAID10
) |
2701 (flags
& BTRFS_BLOCK_GROUP_DUP
)))
2702 flags
&= ~BTRFS_BLOCK_GROUP_RAID0
;
2706 static u64
btrfs_get_alloc_profile(struct btrfs_root
*root
, u64 data
)
2708 struct btrfs_fs_info
*info
= root
->fs_info
;
2712 alloc_profile
= info
->avail_data_alloc_bits
&
2713 info
->data_alloc_profile
;
2714 data
= BTRFS_BLOCK_GROUP_DATA
| alloc_profile
;
2715 } else if (root
== root
->fs_info
->chunk_root
) {
2716 alloc_profile
= info
->avail_system_alloc_bits
&
2717 info
->system_alloc_profile
;
2718 data
= BTRFS_BLOCK_GROUP_SYSTEM
| alloc_profile
;
2720 alloc_profile
= info
->avail_metadata_alloc_bits
&
2721 info
->metadata_alloc_profile
;
2722 data
= BTRFS_BLOCK_GROUP_METADATA
| alloc_profile
;
2725 return btrfs_reduce_alloc_profile(root
, data
);
2728 void btrfs_set_inode_space_info(struct btrfs_root
*root
, struct inode
*inode
)
2732 alloc_target
= btrfs_get_alloc_profile(root
, 1);
2733 BTRFS_I(inode
)->space_info
= __find_space_info(root
->fs_info
,
2738 * for now this just makes sure we have at least 5% of our metadata space free
2741 int btrfs_check_metadata_free_space(struct btrfs_root
*root
)
2743 struct btrfs_fs_info
*info
= root
->fs_info
;
2744 struct btrfs_space_info
*meta_sinfo
;
2745 u64 alloc_target
, thresh
;
2746 int committed
= 0, ret
;
2748 /* get the space info for where the metadata will live */
2749 alloc_target
= btrfs_get_alloc_profile(root
, 0);
2750 meta_sinfo
= __find_space_info(info
, alloc_target
);
2753 spin_lock(&meta_sinfo
->lock
);
2754 if (!meta_sinfo
->full
)
2755 thresh
= meta_sinfo
->total_bytes
* 80;
2757 thresh
= meta_sinfo
->total_bytes
* 95;
2759 do_div(thresh
, 100);
2761 if (meta_sinfo
->bytes_used
+ meta_sinfo
->bytes_reserved
+
2762 meta_sinfo
->bytes_pinned
+ meta_sinfo
->bytes_readonly
> thresh
) {
2763 struct btrfs_trans_handle
*trans
;
2764 if (!meta_sinfo
->full
) {
2765 meta_sinfo
->force_alloc
= 1;
2766 spin_unlock(&meta_sinfo
->lock
);
2768 trans
= btrfs_start_transaction(root
, 1);
2772 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2773 2 * 1024 * 1024, alloc_target
, 0);
2774 btrfs_end_transaction(trans
, root
);
2777 spin_unlock(&meta_sinfo
->lock
);
2781 trans
= btrfs_join_transaction(root
, 1);
2784 ret
= btrfs_commit_transaction(trans
, root
);
2791 spin_unlock(&meta_sinfo
->lock
);
2797 * This will check the space that the inode allocates from to make sure we have
2798 * enough space for bytes.
2800 int btrfs_check_data_free_space(struct btrfs_root
*root
, struct inode
*inode
,
2803 struct btrfs_space_info
*data_sinfo
;
2804 int ret
= 0, committed
= 0;
2806 /* make sure bytes are sectorsize aligned */
2807 bytes
= (bytes
+ root
->sectorsize
- 1) & ~((u64
)root
->sectorsize
- 1);
2809 data_sinfo
= BTRFS_I(inode
)->space_info
;
2811 /* make sure we have enough space to handle the data first */
2812 spin_lock(&data_sinfo
->lock
);
2813 if (data_sinfo
->total_bytes
- data_sinfo
->bytes_used
-
2814 data_sinfo
->bytes_delalloc
- data_sinfo
->bytes_reserved
-
2815 data_sinfo
->bytes_pinned
- data_sinfo
->bytes_readonly
-
2816 data_sinfo
->bytes_may_use
< bytes
) {
2817 struct btrfs_trans_handle
*trans
;
2820 * if we don't have enough free bytes in this space then we need
2821 * to alloc a new chunk.
2823 if (!data_sinfo
->full
) {
2826 data_sinfo
->force_alloc
= 1;
2827 spin_unlock(&data_sinfo
->lock
);
2829 alloc_target
= btrfs_get_alloc_profile(root
, 1);
2830 trans
= btrfs_start_transaction(root
, 1);
2834 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2835 bytes
+ 2 * 1024 * 1024,
2837 btrfs_end_transaction(trans
, root
);
2842 spin_unlock(&data_sinfo
->lock
);
2844 /* commit the current transaction and try again */
2847 trans
= btrfs_join_transaction(root
, 1);
2850 ret
= btrfs_commit_transaction(trans
, root
);
2856 printk(KERN_ERR
"no space left, need %llu, %llu delalloc bytes"
2857 ", %llu bytes_used, %llu bytes_reserved, "
2858 "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
2859 "%llu total\n", (unsigned long long)bytes
,
2860 (unsigned long long)data_sinfo
->bytes_delalloc
,
2861 (unsigned long long)data_sinfo
->bytes_used
,
2862 (unsigned long long)data_sinfo
->bytes_reserved
,
2863 (unsigned long long)data_sinfo
->bytes_pinned
,
2864 (unsigned long long)data_sinfo
->bytes_readonly
,
2865 (unsigned long long)data_sinfo
->bytes_may_use
,
2866 (unsigned long long)data_sinfo
->total_bytes
);
2869 data_sinfo
->bytes_may_use
+= bytes
;
2870 BTRFS_I(inode
)->reserved_bytes
+= bytes
;
2871 spin_unlock(&data_sinfo
->lock
);
2873 return btrfs_check_metadata_free_space(root
);
2877 * if there was an error for whatever reason after calling
2878 * btrfs_check_data_free_space, call this so we can cleanup the counters.
2880 void btrfs_free_reserved_data_space(struct btrfs_root
*root
,
2881 struct inode
*inode
, u64 bytes
)
2883 struct btrfs_space_info
*data_sinfo
;
2885 /* make sure bytes are sectorsize aligned */
2886 bytes
= (bytes
+ root
->sectorsize
- 1) & ~((u64
)root
->sectorsize
- 1);
2888 data_sinfo
= BTRFS_I(inode
)->space_info
;
2889 spin_lock(&data_sinfo
->lock
);
2890 data_sinfo
->bytes_may_use
-= bytes
;
2891 BTRFS_I(inode
)->reserved_bytes
-= bytes
;
2892 spin_unlock(&data_sinfo
->lock
);
2895 /* called when we are adding a delalloc extent to the inode's io_tree */
2896 void btrfs_delalloc_reserve_space(struct btrfs_root
*root
, struct inode
*inode
,
2899 struct btrfs_space_info
*data_sinfo
;
2901 /* get the space info for where this inode will be storing its data */
2902 data_sinfo
= BTRFS_I(inode
)->space_info
;
2904 /* make sure we have enough space to handle the data first */
2905 spin_lock(&data_sinfo
->lock
);
2906 data_sinfo
->bytes_delalloc
+= bytes
;
2909 * we are adding a delalloc extent without calling
2910 * btrfs_check_data_free_space first. This happens on a weird
2911 * writepage condition, but shouldn't hurt our accounting
2913 if (unlikely(bytes
> BTRFS_I(inode
)->reserved_bytes
)) {
2914 data_sinfo
->bytes_may_use
-= BTRFS_I(inode
)->reserved_bytes
;
2915 BTRFS_I(inode
)->reserved_bytes
= 0;
2917 data_sinfo
->bytes_may_use
-= bytes
;
2918 BTRFS_I(inode
)->reserved_bytes
-= bytes
;
2921 spin_unlock(&data_sinfo
->lock
);
2924 /* called when we are clearing an delalloc extent from the inode's io_tree */
2925 void btrfs_delalloc_free_space(struct btrfs_root
*root
, struct inode
*inode
,
2928 struct btrfs_space_info
*info
;
2930 info
= BTRFS_I(inode
)->space_info
;
2932 spin_lock(&info
->lock
);
2933 info
->bytes_delalloc
-= bytes
;
2934 spin_unlock(&info
->lock
);
2937 static void force_metadata_allocation(struct btrfs_fs_info
*info
)
2939 struct list_head
*head
= &info
->space_info
;
2940 struct btrfs_space_info
*found
;
2943 list_for_each_entry_rcu(found
, head
, list
) {
2944 if (found
->flags
& BTRFS_BLOCK_GROUP_METADATA
)
2945 found
->force_alloc
= 1;
2950 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
2951 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
2952 u64 flags
, int force
)
2954 struct btrfs_space_info
*space_info
;
2955 struct btrfs_fs_info
*fs_info
= extent_root
->fs_info
;
2959 mutex_lock(&fs_info
->chunk_mutex
);
2961 flags
= btrfs_reduce_alloc_profile(extent_root
, flags
);
2963 space_info
= __find_space_info(extent_root
->fs_info
, flags
);
2965 ret
= update_space_info(extent_root
->fs_info
, flags
,
2969 BUG_ON(!space_info
);
2971 spin_lock(&space_info
->lock
);
2972 if (space_info
->force_alloc
) {
2974 space_info
->force_alloc
= 0;
2976 if (space_info
->full
) {
2977 spin_unlock(&space_info
->lock
);
2981 thresh
= space_info
->total_bytes
- space_info
->bytes_readonly
;
2982 thresh
= div_factor(thresh
, 6);
2984 (space_info
->bytes_used
+ space_info
->bytes_pinned
+
2985 space_info
->bytes_reserved
+ alloc_bytes
) < thresh
) {
2986 spin_unlock(&space_info
->lock
);
2989 spin_unlock(&space_info
->lock
);
2992 * if we're doing a data chunk, go ahead and make sure that
2993 * we keep a reasonable number of metadata chunks allocated in the
2996 if (flags
& BTRFS_BLOCK_GROUP_DATA
) {
2997 fs_info
->data_chunk_allocations
++;
2998 if (!(fs_info
->data_chunk_allocations
%
2999 fs_info
->metadata_ratio
))
3000 force_metadata_allocation(fs_info
);
3003 ret
= btrfs_alloc_chunk(trans
, extent_root
, flags
);
3005 space_info
->full
= 1;
3007 mutex_unlock(&extent_root
->fs_info
->chunk_mutex
);
3011 static int update_block_group(struct btrfs_trans_handle
*trans
,
3012 struct btrfs_root
*root
,
3013 u64 bytenr
, u64 num_bytes
, int alloc
,
3016 struct btrfs_block_group_cache
*cache
;
3017 struct btrfs_fs_info
*info
= root
->fs_info
;
3018 u64 total
= num_bytes
;
3022 /* block accounting for super block */
3023 spin_lock(&info
->delalloc_lock
);
3024 old_val
= btrfs_super_bytes_used(&info
->super_copy
);
3026 old_val
+= num_bytes
;
3028 old_val
-= num_bytes
;
3029 btrfs_set_super_bytes_used(&info
->super_copy
, old_val
);
3031 /* block accounting for root item */
3032 old_val
= btrfs_root_used(&root
->root_item
);
3034 old_val
+= num_bytes
;
3036 old_val
-= num_bytes
;
3037 btrfs_set_root_used(&root
->root_item
, old_val
);
3038 spin_unlock(&info
->delalloc_lock
);
3041 cache
= btrfs_lookup_block_group(info
, bytenr
);
3044 byte_in_group
= bytenr
- cache
->key
.objectid
;
3045 WARN_ON(byte_in_group
> cache
->key
.offset
);
3047 spin_lock(&cache
->space_info
->lock
);
3048 spin_lock(&cache
->lock
);
3050 old_val
= btrfs_block_group_used(&cache
->item
);
3051 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
3053 old_val
+= num_bytes
;
3054 cache
->space_info
->bytes_used
+= num_bytes
;
3056 cache
->space_info
->bytes_readonly
-= num_bytes
;
3057 btrfs_set_block_group_used(&cache
->item
, old_val
);
3058 spin_unlock(&cache
->lock
);
3059 spin_unlock(&cache
->space_info
->lock
);
3061 old_val
-= num_bytes
;
3062 cache
->space_info
->bytes_used
-= num_bytes
;
3064 cache
->space_info
->bytes_readonly
+= num_bytes
;
3065 btrfs_set_block_group_used(&cache
->item
, old_val
);
3066 spin_unlock(&cache
->lock
);
3067 spin_unlock(&cache
->space_info
->lock
);
3071 ret
= btrfs_discard_extent(root
, bytenr
,
3075 ret
= btrfs_add_free_space(cache
, bytenr
,
3080 btrfs_put_block_group(cache
);
3082 bytenr
+= num_bytes
;
3087 static u64
first_logical_byte(struct btrfs_root
*root
, u64 search_start
)
3089 struct btrfs_block_group_cache
*cache
;
3092 cache
= btrfs_lookup_first_block_group(root
->fs_info
, search_start
);
3096 bytenr
= cache
->key
.objectid
;
3097 btrfs_put_block_group(cache
);
3102 int btrfs_update_pinned_extents(struct btrfs_root
*root
,
3103 u64 bytenr
, u64 num
, int pin
, int mark_free
)
3106 struct btrfs_block_group_cache
*cache
;
3107 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3110 set_extent_dirty(&fs_info
->pinned_extents
,
3111 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
3113 clear_extent_dirty(&fs_info
->pinned_extents
,
3114 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
3118 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
3120 len
= min(num
, cache
->key
.offset
-
3121 (bytenr
- cache
->key
.objectid
));
3123 spin_lock(&cache
->space_info
->lock
);
3124 spin_lock(&cache
->lock
);
3125 cache
->pinned
+= len
;
3126 cache
->space_info
->bytes_pinned
+= len
;
3127 spin_unlock(&cache
->lock
);
3128 spin_unlock(&cache
->space_info
->lock
);
3129 fs_info
->total_pinned
+= len
;
3131 spin_lock(&cache
->space_info
->lock
);
3132 spin_lock(&cache
->lock
);
3133 cache
->pinned
-= len
;
3134 cache
->space_info
->bytes_pinned
-= len
;
3135 spin_unlock(&cache
->lock
);
3136 spin_unlock(&cache
->space_info
->lock
);
3137 fs_info
->total_pinned
-= len
;
3138 if (block_group_cache_done(cache
) && mark_free
)
3139 btrfs_add_free_space(cache
, bytenr
, len
);
3141 btrfs_put_block_group(cache
);
3148 static int update_reserved_extents(struct btrfs_root
*root
,
3149 u64 bytenr
, u64 num
, int reserve
)
3152 struct btrfs_block_group_cache
*cache
;
3153 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3156 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
3158 len
= min(num
, cache
->key
.offset
-
3159 (bytenr
- cache
->key
.objectid
));
3161 spin_lock(&cache
->space_info
->lock
);
3162 spin_lock(&cache
->lock
);
3164 cache
->reserved
+= len
;
3165 cache
->space_info
->bytes_reserved
+= len
;
3167 cache
->reserved
-= len
;
3168 cache
->space_info
->bytes_reserved
-= len
;
3170 spin_unlock(&cache
->lock
);
3171 spin_unlock(&cache
->space_info
->lock
);
3172 btrfs_put_block_group(cache
);
3179 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_io_tree
*copy
)
3184 bool caching_kthreads
= false;
3185 struct extent_io_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
3188 if (atomic_read(&root
->fs_info
->async_caching_threads
))
3189 caching_kthreads
= true;
3192 ret
= find_first_extent_bit(pinned_extents
, last
,
3193 &start
, &end
, EXTENT_DIRTY
);
3198 * we need to make sure that the pinned extents don't go away
3199 * while we are caching block groups
3201 if (unlikely(caching_kthreads
))
3202 set_extent_delalloc(pinned_extents
, start
, end
,
3205 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
3211 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
3212 struct btrfs_root
*root
,
3213 struct extent_io_tree
*unpin
)
3220 ret
= find_first_extent_bit(&root
->fs_info
->pinned_extents
, 0,
3221 &start
, &end
, EXTENT_DELALLOC
);
3226 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
3231 ret
= btrfs_discard_extent(root
, start
, end
+ 1 - start
);
3233 /* unlocks the pinned mutex */
3234 btrfs_update_pinned_extents(root
, start
, end
+ 1 - start
, 0,
3236 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
3241 if (unlikely(!mark_free
))
3242 btrfs_discard_pinned_extents(root
->fs_info
, NULL
);
3247 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
3248 struct btrfs_root
*root
,
3249 struct btrfs_path
*path
,
3250 u64 bytenr
, u64 num_bytes
, int is_data
,
3251 struct extent_buffer
**must_clean
)
3254 struct extent_buffer
*buf
;
3259 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
3263 /* we can reuse a block if it hasn't been written
3264 * and it is from this transaction. We can't
3265 * reuse anything from the tree log root because
3266 * it has tiny sub-transactions.
3268 if (btrfs_buffer_uptodate(buf
, 0) &&
3269 btrfs_try_tree_lock(buf
)) {
3270 u64 header_owner
= btrfs_header_owner(buf
);
3271 u64 header_transid
= btrfs_header_generation(buf
);
3272 if (header_owner
!= BTRFS_TREE_LOG_OBJECTID
&&
3273 header_transid
== trans
->transid
&&
3274 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
3278 btrfs_tree_unlock(buf
);
3280 free_extent_buffer(buf
);
3282 btrfs_set_path_blocking(path
);
3283 /* unlocks the pinned mutex */
3284 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1, 0);
3291 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
3292 struct btrfs_root
*root
,
3293 u64 bytenr
, u64 num_bytes
, u64 parent
,
3294 u64 root_objectid
, u64 owner_objectid
,
3295 u64 owner_offset
, int refs_to_drop
,
3296 struct btrfs_delayed_extent_op
*extent_op
)
3298 struct btrfs_key key
;
3299 struct btrfs_path
*path
;
3300 struct btrfs_fs_info
*info
= root
->fs_info
;
3301 struct btrfs_root
*extent_root
= info
->extent_root
;
3302 struct extent_buffer
*leaf
;
3303 struct btrfs_extent_item
*ei
;
3304 struct btrfs_extent_inline_ref
*iref
;
3307 int extent_slot
= 0;
3308 int found_extent
= 0;
3313 path
= btrfs_alloc_path();
3318 path
->leave_spinning
= 1;
3320 is_data
= owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
;
3321 BUG_ON(!is_data
&& refs_to_drop
!= 1);
3323 ret
= lookup_extent_backref(trans
, extent_root
, path
, &iref
,
3324 bytenr
, num_bytes
, parent
,
3325 root_objectid
, owner_objectid
,
3328 extent_slot
= path
->slots
[0];
3329 while (extent_slot
>= 0) {
3330 btrfs_item_key_to_cpu(path
->nodes
[0], &key
,
3332 if (key
.objectid
!= bytenr
)
3334 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
3335 key
.offset
== num_bytes
) {
3339 if (path
->slots
[0] - extent_slot
> 5)
3343 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3344 item_size
= btrfs_item_size_nr(path
->nodes
[0], extent_slot
);
3345 if (found_extent
&& item_size
< sizeof(*ei
))
3348 if (!found_extent
) {
3350 ret
= remove_extent_backref(trans
, extent_root
, path
,
3354 btrfs_release_path(extent_root
, path
);
3355 path
->leave_spinning
= 1;
3357 key
.objectid
= bytenr
;
3358 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
3359 key
.offset
= num_bytes
;
3361 ret
= btrfs_search_slot(trans
, extent_root
,
3364 printk(KERN_ERR
"umm, got %d back from search"
3365 ", was looking for %llu\n", ret
,
3366 (unsigned long long)bytenr
);
3367 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
3370 extent_slot
= path
->slots
[0];
3373 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
3375 printk(KERN_ERR
"btrfs unable to find ref byte nr %llu "
3376 "parent %llu root %llu owner %llu offset %llu\n",
3377 (unsigned long long)bytenr
,
3378 (unsigned long long)parent
,
3379 (unsigned long long)root_objectid
,
3380 (unsigned long long)owner_objectid
,
3381 (unsigned long long)owner_offset
);
3384 leaf
= path
->nodes
[0];
3385 item_size
= btrfs_item_size_nr(leaf
, extent_slot
);
3386 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3387 if (item_size
< sizeof(*ei
)) {
3388 BUG_ON(found_extent
|| extent_slot
!= path
->slots
[0]);
3389 ret
= convert_extent_item_v0(trans
, extent_root
, path
,
3393 btrfs_release_path(extent_root
, path
);
3394 path
->leave_spinning
= 1;
3396 key
.objectid
= bytenr
;
3397 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
3398 key
.offset
= num_bytes
;
3400 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
,
3403 printk(KERN_ERR
"umm, got %d back from search"
3404 ", was looking for %llu\n", ret
,
3405 (unsigned long long)bytenr
);
3406 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
3409 extent_slot
= path
->slots
[0];
3410 leaf
= path
->nodes
[0];
3411 item_size
= btrfs_item_size_nr(leaf
, extent_slot
);
3414 BUG_ON(item_size
< sizeof(*ei
));
3415 ei
= btrfs_item_ptr(leaf
, extent_slot
,
3416 struct btrfs_extent_item
);
3417 if (owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
3418 struct btrfs_tree_block_info
*bi
;
3419 BUG_ON(item_size
< sizeof(*ei
) + sizeof(*bi
));
3420 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
3421 WARN_ON(owner_objectid
!= btrfs_tree_block_level(leaf
, bi
));
3424 refs
= btrfs_extent_refs(leaf
, ei
);
3425 BUG_ON(refs
< refs_to_drop
);
3426 refs
-= refs_to_drop
;
3430 __run_delayed_extent_op(extent_op
, leaf
, ei
);
3432 * In the case of inline back ref, reference count will
3433 * be updated by remove_extent_backref
3436 BUG_ON(!found_extent
);
3438 btrfs_set_extent_refs(leaf
, ei
, refs
);
3439 btrfs_mark_buffer_dirty(leaf
);
3442 ret
= remove_extent_backref(trans
, extent_root
, path
,
3449 struct extent_buffer
*must_clean
= NULL
;
3452 BUG_ON(is_data
&& refs_to_drop
!=
3453 extent_data_ref_count(root
, path
, iref
));
3455 BUG_ON(path
->slots
[0] != extent_slot
);
3457 BUG_ON(path
->slots
[0] != extent_slot
+ 1);
3458 path
->slots
[0] = extent_slot
;
3463 ret
= pin_down_bytes(trans
, root
, path
, bytenr
,
3464 num_bytes
, is_data
, &must_clean
);
3469 * it is going to be very rare for someone to be waiting
3470 * on the block we're freeing. del_items might need to
3471 * schedule, so rather than get fancy, just force it
3475 btrfs_set_lock_blocking(must_clean
);
3477 ret
= btrfs_del_items(trans
, extent_root
, path
, path
->slots
[0],
3480 btrfs_release_path(extent_root
, path
);
3483 clean_tree_block(NULL
, root
, must_clean
);
3484 btrfs_tree_unlock(must_clean
);
3485 free_extent_buffer(must_clean
);
3489 ret
= btrfs_del_csums(trans
, root
, bytenr
, num_bytes
);
3492 invalidate_mapping_pages(info
->btree_inode
->i_mapping
,
3493 bytenr
>> PAGE_CACHE_SHIFT
,
3494 (bytenr
+ num_bytes
- 1) >> PAGE_CACHE_SHIFT
);
3497 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
3501 btrfs_free_path(path
);
3506 * when we free an extent, it is possible (and likely) that we free the last
3507 * delayed ref for that extent as well. This searches the delayed ref tree for
3508 * a given extent, and if there are no other delayed refs to be processed, it
3509 * removes it from the tree.
3511 static noinline
int check_ref_cleanup(struct btrfs_trans_handle
*trans
,
3512 struct btrfs_root
*root
, u64 bytenr
)
3514 struct btrfs_delayed_ref_head
*head
;
3515 struct btrfs_delayed_ref_root
*delayed_refs
;
3516 struct btrfs_delayed_ref_node
*ref
;
3517 struct rb_node
*node
;
3520 delayed_refs
= &trans
->transaction
->delayed_refs
;
3521 spin_lock(&delayed_refs
->lock
);
3522 head
= btrfs_find_delayed_ref_head(trans
, bytenr
);
3526 node
= rb_prev(&head
->node
.rb_node
);
3530 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, rb_node
);
3532 /* there are still entries for this ref, we can't drop it */
3533 if (ref
->bytenr
== bytenr
)
3536 if (head
->extent_op
) {
3537 if (!head
->must_insert_reserved
)
3539 kfree(head
->extent_op
);
3540 head
->extent_op
= NULL
;
3544 * waiting for the lock here would deadlock. If someone else has it
3545 * locked they are already in the process of dropping it anyway
3547 if (!mutex_trylock(&head
->mutex
))
3551 * at this point we have a head with no other entries. Go
3552 * ahead and process it.
3554 head
->node
.in_tree
= 0;
3555 rb_erase(&head
->node
.rb_node
, &delayed_refs
->root
);
3557 delayed_refs
->num_entries
--;
3560 * we don't take a ref on the node because we're removing it from the
3561 * tree, so we just steal the ref the tree was holding.
3563 delayed_refs
->num_heads
--;
3564 if (list_empty(&head
->cluster
))
3565 delayed_refs
->num_heads_ready
--;
3567 list_del_init(&head
->cluster
);
3568 spin_unlock(&delayed_refs
->lock
);
3570 ret
= run_one_delayed_ref(trans
, root
->fs_info
->tree_root
,
3571 &head
->node
, head
->extent_op
,
3572 head
->must_insert_reserved
);
3574 btrfs_put_delayed_ref(&head
->node
);
3577 spin_unlock(&delayed_refs
->lock
);
3581 int btrfs_free_extent(struct btrfs_trans_handle
*trans
,
3582 struct btrfs_root
*root
,
3583 u64 bytenr
, u64 num_bytes
, u64 parent
,
3584 u64 root_objectid
, u64 owner
, u64 offset
)
3589 * tree log blocks never actually go into the extent allocation
3590 * tree, just update pinning info and exit early.
3592 if (root_objectid
== BTRFS_TREE_LOG_OBJECTID
) {
3593 WARN_ON(owner
>= BTRFS_FIRST_FREE_OBJECTID
);
3594 /* unlocks the pinned mutex */
3595 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1, 0);
3596 update_reserved_extents(root
, bytenr
, num_bytes
, 0);
3598 } else if (owner
< BTRFS_FIRST_FREE_OBJECTID
) {
3599 ret
= btrfs_add_delayed_tree_ref(trans
, bytenr
, num_bytes
,
3600 parent
, root_objectid
, (int)owner
,
3601 BTRFS_DROP_DELAYED_REF
, NULL
);
3603 ret
= check_ref_cleanup(trans
, root
, bytenr
);
3606 ret
= btrfs_add_delayed_data_ref(trans
, bytenr
, num_bytes
,
3607 parent
, root_objectid
, owner
,
3608 offset
, BTRFS_DROP_DELAYED_REF
, NULL
);
3614 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
3616 u64 mask
= ((u64
)root
->stripesize
- 1);
3617 u64 ret
= (val
+ mask
) & ~mask
;
3622 * when we wait for progress in the block group caching, its because
3623 * our allocation attempt failed at least once. So, we must sleep
3624 * and let some progress happen before we try again.
3626 * This function will sleep at least once waiting for new free space to
3627 * show up, and then it will check the block group free space numbers
3628 * for our min num_bytes. Another option is to have it go ahead
3629 * and look in the rbtree for a free extent of a given size, but this
3633 wait_block_group_cache_progress(struct btrfs_block_group_cache
*cache
,
3638 prepare_to_wait(&cache
->caching_q
, &wait
, TASK_UNINTERRUPTIBLE
);
3640 if (block_group_cache_done(cache
)) {
3641 finish_wait(&cache
->caching_q
, &wait
);
3645 finish_wait(&cache
->caching_q
, &wait
);
3647 wait_event(cache
->caching_q
, block_group_cache_done(cache
) ||
3648 (cache
->free_space
>= num_bytes
));
3652 enum btrfs_loop_type
{
3653 LOOP_CACHED_ONLY
= 0,
3654 LOOP_CACHING_NOWAIT
= 1,
3655 LOOP_CACHING_WAIT
= 2,
3656 LOOP_ALLOC_CHUNK
= 3,
3657 LOOP_NO_EMPTY_SIZE
= 4,
3661 * walks the btree of allocated extents and find a hole of a given size.
3662 * The key ins is changed to record the hole:
3663 * ins->objectid == block start
3664 * ins->flags = BTRFS_EXTENT_ITEM_KEY
3665 * ins->offset == number of blocks
3666 * Any available blocks before search_start are skipped.
3668 static noinline
int find_free_extent(struct btrfs_trans_handle
*trans
,
3669 struct btrfs_root
*orig_root
,
3670 u64 num_bytes
, u64 empty_size
,
3671 u64 search_start
, u64 search_end
,
3672 u64 hint_byte
, struct btrfs_key
*ins
,
3673 u64 exclude_start
, u64 exclude_nr
,
3677 struct btrfs_root
*root
= orig_root
->fs_info
->extent_root
;
3678 struct btrfs_free_cluster
*last_ptr
= NULL
;
3679 struct btrfs_block_group_cache
*block_group
= NULL
;
3680 int empty_cluster
= 2 * 1024 * 1024;
3681 int allowed_chunk_alloc
= 0;
3682 struct btrfs_space_info
*space_info
;
3683 int last_ptr_loop
= 0;
3685 bool found_uncached_bg
= false;
3687 WARN_ON(num_bytes
< root
->sectorsize
);
3688 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
3692 space_info
= __find_space_info(root
->fs_info
, data
);
3694 if (orig_root
->ref_cows
|| empty_size
)
3695 allowed_chunk_alloc
= 1;
3697 if (data
& BTRFS_BLOCK_GROUP_METADATA
) {
3698 last_ptr
= &root
->fs_info
->meta_alloc_cluster
;
3699 if (!btrfs_test_opt(root
, SSD
))
3700 empty_cluster
= 64 * 1024;
3703 if ((data
& BTRFS_BLOCK_GROUP_DATA
) && btrfs_test_opt(root
, SSD
)) {
3704 last_ptr
= &root
->fs_info
->data_alloc_cluster
;
3708 spin_lock(&last_ptr
->lock
);
3709 if (last_ptr
->block_group
)
3710 hint_byte
= last_ptr
->window_start
;
3711 spin_unlock(&last_ptr
->lock
);
3714 search_start
= max(search_start
, first_logical_byte(root
, 0));
3715 search_start
= max(search_start
, hint_byte
);
3720 if (search_start
== hint_byte
) {
3721 block_group
= btrfs_lookup_block_group(root
->fs_info
,
3724 * we don't want to use the block group if it doesn't match our
3725 * allocation bits, or if its not cached.
3727 if (block_group
&& block_group_bits(block_group
, data
) &&
3728 block_group_cache_done(block_group
)) {
3729 down_read(&space_info
->groups_sem
);
3730 if (list_empty(&block_group
->list
) ||
3733 * someone is removing this block group,
3734 * we can't jump into the have_block_group
3735 * target because our list pointers are not
3738 btrfs_put_block_group(block_group
);
3739 up_read(&space_info
->groups_sem
);
3741 goto have_block_group
;
3742 } else if (block_group
) {
3743 btrfs_put_block_group(block_group
);
3748 down_read(&space_info
->groups_sem
);
3749 list_for_each_entry(block_group
, &space_info
->block_groups
, list
) {
3753 atomic_inc(&block_group
->count
);
3754 search_start
= block_group
->key
.objectid
;
3757 if (unlikely(block_group
->cached
== BTRFS_CACHE_NO
)) {
3759 * we want to start caching kthreads, but not too many
3760 * right off the bat so we don't overwhelm the system,
3761 * so only start them if there are less than 2 and we're
3762 * in the initial allocation phase.
3764 if (loop
> LOOP_CACHING_NOWAIT
||
3765 atomic_read(&space_info
->caching_threads
) < 2) {
3766 ret
= cache_block_group(block_group
);
3771 cached
= block_group_cache_done(block_group
);
3772 if (unlikely(!cached
)) {
3773 found_uncached_bg
= true;
3775 /* if we only want cached bgs, loop */
3776 if (loop
== LOOP_CACHED_ONLY
)
3780 if (unlikely(block_group
->ro
))
3785 * the refill lock keeps out other
3786 * people trying to start a new cluster
3788 spin_lock(&last_ptr
->refill_lock
);
3789 if (last_ptr
->block_group
&&
3790 (last_ptr
->block_group
->ro
||
3791 !block_group_bits(last_ptr
->block_group
, data
))) {
3793 goto refill_cluster
;
3796 offset
= btrfs_alloc_from_cluster(block_group
, last_ptr
,
3797 num_bytes
, search_start
);
3799 /* we have a block, we're done */
3800 spin_unlock(&last_ptr
->refill_lock
);
3804 spin_lock(&last_ptr
->lock
);
3806 * whoops, this cluster doesn't actually point to
3807 * this block group. Get a ref on the block
3808 * group is does point to and try again
3810 if (!last_ptr_loop
&& last_ptr
->block_group
&&
3811 last_ptr
->block_group
!= block_group
) {
3813 btrfs_put_block_group(block_group
);
3814 block_group
= last_ptr
->block_group
;
3815 atomic_inc(&block_group
->count
);
3816 spin_unlock(&last_ptr
->lock
);
3817 spin_unlock(&last_ptr
->refill_lock
);
3820 search_start
= block_group
->key
.objectid
;
3822 * we know this block group is properly
3823 * in the list because
3824 * btrfs_remove_block_group, drops the
3825 * cluster before it removes the block
3826 * group from the list
3828 goto have_block_group
;
3830 spin_unlock(&last_ptr
->lock
);
3833 * this cluster didn't work out, free it and
3836 btrfs_return_cluster_to_free_space(NULL
, last_ptr
);
3840 /* allocate a cluster in this block group */
3841 ret
= btrfs_find_space_cluster(trans
, root
,
3842 block_group
, last_ptr
,
3844 empty_cluster
+ empty_size
);
3847 * now pull our allocation out of this
3850 offset
= btrfs_alloc_from_cluster(block_group
,
3851 last_ptr
, num_bytes
,
3854 /* we found one, proceed */
3855 spin_unlock(&last_ptr
->refill_lock
);
3858 } else if (!cached
&& loop
> LOOP_CACHING_NOWAIT
) {
3859 spin_unlock(&last_ptr
->refill_lock
);
3861 wait_block_group_cache_progress(block_group
,
3862 num_bytes
+ empty_cluster
+ empty_size
);
3863 goto have_block_group
;
3867 * at this point we either didn't find a cluster
3868 * or we weren't able to allocate a block from our
3869 * cluster. Free the cluster we've been trying
3870 * to use, and go to the next block group
3872 if (loop
< LOOP_NO_EMPTY_SIZE
) {
3873 btrfs_return_cluster_to_free_space(NULL
,
3875 spin_unlock(&last_ptr
->refill_lock
);
3878 spin_unlock(&last_ptr
->refill_lock
);
3881 offset
= btrfs_find_space_for_alloc(block_group
, search_start
,
3882 num_bytes
, empty_size
);
3883 if (!offset
&& (cached
|| (!cached
&&
3884 loop
== LOOP_CACHING_NOWAIT
))) {
3886 } else if (!offset
&& (!cached
&&
3887 loop
> LOOP_CACHING_NOWAIT
)) {
3888 wait_block_group_cache_progress(block_group
,
3889 num_bytes
+ empty_size
);
3890 goto have_block_group
;
3893 search_start
= stripe_align(root
, offset
);
3894 /* move on to the next group */
3895 if (search_start
+ num_bytes
>= search_end
) {
3896 btrfs_add_free_space(block_group
, offset
, num_bytes
);
3900 /* move on to the next group */
3901 if (search_start
+ num_bytes
>
3902 block_group
->key
.objectid
+ block_group
->key
.offset
) {
3903 btrfs_add_free_space(block_group
, offset
, num_bytes
);
3907 if (exclude_nr
> 0 &&
3908 (search_start
+ num_bytes
> exclude_start
&&
3909 search_start
< exclude_start
+ exclude_nr
)) {
3910 search_start
= exclude_start
+ exclude_nr
;
3912 btrfs_add_free_space(block_group
, offset
, num_bytes
);
3914 * if search_start is still in this block group
3915 * then we just re-search this block group
3917 if (search_start
>= block_group
->key
.objectid
&&
3918 search_start
< (block_group
->key
.objectid
+
3919 block_group
->key
.offset
))
3920 goto have_block_group
;
3924 ins
->objectid
= search_start
;
3925 ins
->offset
= num_bytes
;
3927 if (offset
< search_start
)
3928 btrfs_add_free_space(block_group
, offset
,
3929 search_start
- offset
);
3930 BUG_ON(offset
> search_start
);
3932 /* we are all good, lets return */
3935 btrfs_put_block_group(block_group
);
3937 up_read(&space_info
->groups_sem
);
3939 /* LOOP_CACHED_ONLY, only search fully cached block groups
3940 * LOOP_CACHING_NOWAIT, search partially cached block groups, but
3941 * dont wait foR them to finish caching
3942 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
3943 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
3944 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
3947 if (!ins
->objectid
&& loop
< LOOP_NO_EMPTY_SIZE
&&
3948 (found_uncached_bg
|| empty_size
|| empty_cluster
||
3949 allowed_chunk_alloc
)) {
3950 if (found_uncached_bg
) {
3951 found_uncached_bg
= false;
3952 if (loop
< LOOP_CACHING_WAIT
) {
3958 if (loop
== LOOP_ALLOC_CHUNK
) {
3963 if (allowed_chunk_alloc
) {
3964 ret
= do_chunk_alloc(trans
, root
, num_bytes
+
3965 2 * 1024 * 1024, data
, 1);
3966 allowed_chunk_alloc
= 0;
3968 space_info
->force_alloc
= 1;
3971 if (loop
< LOOP_NO_EMPTY_SIZE
) {
3976 } else if (!ins
->objectid
) {
3980 /* we found what we needed */
3981 if (ins
->objectid
) {
3982 if (!(data
& BTRFS_BLOCK_GROUP_DATA
))
3983 trans
->block_group
= block_group
->key
.objectid
;
3985 btrfs_put_block_group(block_group
);
3992 static void dump_space_info(struct btrfs_space_info
*info
, u64 bytes
)
3994 struct btrfs_block_group_cache
*cache
;
3996 printk(KERN_INFO
"space_info has %llu free, is %sfull\n",
3997 (unsigned long long)(info
->total_bytes
- info
->bytes_used
-
3998 info
->bytes_pinned
- info
->bytes_reserved
),
3999 (info
->full
) ? "" : "not ");
4000 printk(KERN_INFO
"space_info total=%llu, pinned=%llu, delalloc=%llu,"
4001 " may_use=%llu, used=%llu\n",
4002 (unsigned long long)info
->total_bytes
,
4003 (unsigned long long)info
->bytes_pinned
,
4004 (unsigned long long)info
->bytes_delalloc
,
4005 (unsigned long long)info
->bytes_may_use
,
4006 (unsigned long long)info
->bytes_used
);
4008 down_read(&info
->groups_sem
);
4009 list_for_each_entry(cache
, &info
->block_groups
, list
) {
4010 spin_lock(&cache
->lock
);
4011 printk(KERN_INFO
"block group %llu has %llu bytes, %llu used "
4012 "%llu pinned %llu reserved\n",
4013 (unsigned long long)cache
->key
.objectid
,
4014 (unsigned long long)cache
->key
.offset
,
4015 (unsigned long long)btrfs_block_group_used(&cache
->item
),
4016 (unsigned long long)cache
->pinned
,
4017 (unsigned long long)cache
->reserved
);
4018 btrfs_dump_free_space(cache
, bytes
);
4019 spin_unlock(&cache
->lock
);
4021 up_read(&info
->groups_sem
);
4024 static int __btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
4025 struct btrfs_root
*root
,
4026 u64 num_bytes
, u64 min_alloc_size
,
4027 u64 empty_size
, u64 hint_byte
,
4028 u64 search_end
, struct btrfs_key
*ins
,
4032 u64 search_start
= 0;
4033 struct btrfs_fs_info
*info
= root
->fs_info
;
4035 data
= btrfs_get_alloc_profile(root
, data
);
4038 * the only place that sets empty_size is btrfs_realloc_node, which
4039 * is not called recursively on allocations
4041 if (empty_size
|| root
->ref_cows
) {
4042 if (!(data
& BTRFS_BLOCK_GROUP_METADATA
)) {
4043 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
4045 BTRFS_BLOCK_GROUP_METADATA
|
4046 (info
->metadata_alloc_profile
&
4047 info
->avail_metadata_alloc_bits
), 0);
4049 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
4050 num_bytes
+ 2 * 1024 * 1024, data
, 0);
4053 WARN_ON(num_bytes
< root
->sectorsize
);
4054 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
4055 search_start
, search_end
, hint_byte
, ins
,
4056 trans
->alloc_exclude_start
,
4057 trans
->alloc_exclude_nr
, data
);
4059 if (ret
== -ENOSPC
&& num_bytes
> min_alloc_size
) {
4060 num_bytes
= num_bytes
>> 1;
4061 num_bytes
= num_bytes
& ~(root
->sectorsize
- 1);
4062 num_bytes
= max(num_bytes
, min_alloc_size
);
4063 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
4064 num_bytes
, data
, 1);
4067 if (ret
== -ENOSPC
) {
4068 struct btrfs_space_info
*sinfo
;
4070 sinfo
= __find_space_info(root
->fs_info
, data
);
4071 printk(KERN_ERR
"btrfs allocation failed flags %llu, "
4072 "wanted %llu\n", (unsigned long long)data
,
4073 (unsigned long long)num_bytes
);
4074 dump_space_info(sinfo
, num_bytes
);
4080 int btrfs_free_reserved_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
4082 struct btrfs_block_group_cache
*cache
;
4085 cache
= btrfs_lookup_block_group(root
->fs_info
, start
);
4087 printk(KERN_ERR
"Unable to find block group for %llu\n",
4088 (unsigned long long)start
);
4092 ret
= btrfs_discard_extent(root
, start
, len
);
4094 btrfs_add_free_space(cache
, start
, len
);
4095 btrfs_put_block_group(cache
);
4096 update_reserved_extents(root
, start
, len
, 0);
4101 int btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
4102 struct btrfs_root
*root
,
4103 u64 num_bytes
, u64 min_alloc_size
,
4104 u64 empty_size
, u64 hint_byte
,
4105 u64 search_end
, struct btrfs_key
*ins
,
4109 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, min_alloc_size
,
4110 empty_size
, hint_byte
, search_end
, ins
,
4113 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
4118 static int alloc_reserved_file_extent(struct btrfs_trans_handle
*trans
,
4119 struct btrfs_root
*root
,
4120 u64 parent
, u64 root_objectid
,
4121 u64 flags
, u64 owner
, u64 offset
,
4122 struct btrfs_key
*ins
, int ref_mod
)
4125 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
4126 struct btrfs_extent_item
*extent_item
;
4127 struct btrfs_extent_inline_ref
*iref
;
4128 struct btrfs_path
*path
;
4129 struct extent_buffer
*leaf
;
4134 type
= BTRFS_SHARED_DATA_REF_KEY
;
4136 type
= BTRFS_EXTENT_DATA_REF_KEY
;
4138 size
= sizeof(*extent_item
) + btrfs_extent_inline_ref_size(type
);
4140 path
= btrfs_alloc_path();
4143 path
->leave_spinning
= 1;
4144 ret
= btrfs_insert_empty_item(trans
, fs_info
->extent_root
, path
,
4148 leaf
= path
->nodes
[0];
4149 extent_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
4150 struct btrfs_extent_item
);
4151 btrfs_set_extent_refs(leaf
, extent_item
, ref_mod
);
4152 btrfs_set_extent_generation(leaf
, extent_item
, trans
->transid
);
4153 btrfs_set_extent_flags(leaf
, extent_item
,
4154 flags
| BTRFS_EXTENT_FLAG_DATA
);
4156 iref
= (struct btrfs_extent_inline_ref
*)(extent_item
+ 1);
4157 btrfs_set_extent_inline_ref_type(leaf
, iref
, type
);
4159 struct btrfs_shared_data_ref
*ref
;
4160 ref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
4161 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
4162 btrfs_set_shared_data_ref_count(leaf
, ref
, ref_mod
);
4164 struct btrfs_extent_data_ref
*ref
;
4165 ref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
4166 btrfs_set_extent_data_ref_root(leaf
, ref
, root_objectid
);
4167 btrfs_set_extent_data_ref_objectid(leaf
, ref
, owner
);
4168 btrfs_set_extent_data_ref_offset(leaf
, ref
, offset
);
4169 btrfs_set_extent_data_ref_count(leaf
, ref
, ref_mod
);
4172 btrfs_mark_buffer_dirty(path
->nodes
[0]);
4173 btrfs_free_path(path
);
4175 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
,
4178 printk(KERN_ERR
"btrfs update block group failed for %llu "
4179 "%llu\n", (unsigned long long)ins
->objectid
,
4180 (unsigned long long)ins
->offset
);
4186 static int alloc_reserved_tree_block(struct btrfs_trans_handle
*trans
,
4187 struct btrfs_root
*root
,
4188 u64 parent
, u64 root_objectid
,
4189 u64 flags
, struct btrfs_disk_key
*key
,
4190 int level
, struct btrfs_key
*ins
)
4193 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
4194 struct btrfs_extent_item
*extent_item
;
4195 struct btrfs_tree_block_info
*block_info
;
4196 struct btrfs_extent_inline_ref
*iref
;
4197 struct btrfs_path
*path
;
4198 struct extent_buffer
*leaf
;
4199 u32 size
= sizeof(*extent_item
) + sizeof(*block_info
) + sizeof(*iref
);
4201 path
= btrfs_alloc_path();
4204 path
->leave_spinning
= 1;
4205 ret
= btrfs_insert_empty_item(trans
, fs_info
->extent_root
, path
,
4209 leaf
= path
->nodes
[0];
4210 extent_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
4211 struct btrfs_extent_item
);
4212 btrfs_set_extent_refs(leaf
, extent_item
, 1);
4213 btrfs_set_extent_generation(leaf
, extent_item
, trans
->transid
);
4214 btrfs_set_extent_flags(leaf
, extent_item
,
4215 flags
| BTRFS_EXTENT_FLAG_TREE_BLOCK
);
4216 block_info
= (struct btrfs_tree_block_info
*)(extent_item
+ 1);
4218 btrfs_set_tree_block_key(leaf
, block_info
, key
);
4219 btrfs_set_tree_block_level(leaf
, block_info
, level
);
4221 iref
= (struct btrfs_extent_inline_ref
*)(block_info
+ 1);
4223 BUG_ON(!(flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
));
4224 btrfs_set_extent_inline_ref_type(leaf
, iref
,
4225 BTRFS_SHARED_BLOCK_REF_KEY
);
4226 btrfs_set_extent_inline_ref_offset(leaf
, iref
, parent
);
4228 btrfs_set_extent_inline_ref_type(leaf
, iref
,
4229 BTRFS_TREE_BLOCK_REF_KEY
);
4230 btrfs_set_extent_inline_ref_offset(leaf
, iref
, root_objectid
);
4233 btrfs_mark_buffer_dirty(leaf
);
4234 btrfs_free_path(path
);
4236 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
,
4239 printk(KERN_ERR
"btrfs update block group failed for %llu "
4240 "%llu\n", (unsigned long long)ins
->objectid
,
4241 (unsigned long long)ins
->offset
);
4247 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle
*trans
,
4248 struct btrfs_root
*root
,
4249 u64 root_objectid
, u64 owner
,
4250 u64 offset
, struct btrfs_key
*ins
)
4254 BUG_ON(root_objectid
== BTRFS_TREE_LOG_OBJECTID
);
4256 ret
= btrfs_add_delayed_data_ref(trans
, ins
->objectid
, ins
->offset
,
4257 0, root_objectid
, owner
, offset
,
4258 BTRFS_ADD_DELAYED_EXTENT
, NULL
);
4263 * this is used by the tree logging recovery code. It records that
4264 * an extent has been allocated and makes sure to clear the free
4265 * space cache bits as well
4267 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle
*trans
,
4268 struct btrfs_root
*root
,
4269 u64 root_objectid
, u64 owner
, u64 offset
,
4270 struct btrfs_key
*ins
)
4273 struct btrfs_block_group_cache
*block_group
;
4275 block_group
= btrfs_lookup_block_group(root
->fs_info
, ins
->objectid
);
4276 cache_block_group(block_group
);
4277 wait_event(block_group
->caching_q
,
4278 block_group_cache_done(block_group
));
4280 ret
= btrfs_remove_free_space(block_group
, ins
->objectid
,
4283 btrfs_put_block_group(block_group
);
4284 ret
= alloc_reserved_file_extent(trans
, root
, 0, root_objectid
,
4285 0, owner
, offset
, ins
, 1);
4290 * finds a free extent and does all the dirty work required for allocation
4291 * returns the key for the extent through ins, and a tree buffer for
4292 * the first block of the extent through buf.
4294 * returns 0 if everything worked, non-zero otherwise.
4296 static int alloc_tree_block(struct btrfs_trans_handle
*trans
,
4297 struct btrfs_root
*root
,
4298 u64 num_bytes
, u64 parent
, u64 root_objectid
,
4299 struct btrfs_disk_key
*key
, int level
,
4300 u64 empty_size
, u64 hint_byte
, u64 search_end
,
4301 struct btrfs_key
*ins
)
4306 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, num_bytes
,
4307 empty_size
, hint_byte
, search_end
,
4312 if (root_objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
4314 parent
= ins
->objectid
;
4315 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
4319 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
4320 if (root_objectid
!= BTRFS_TREE_LOG_OBJECTID
) {
4321 struct btrfs_delayed_extent_op
*extent_op
;
4322 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
4325 memcpy(&extent_op
->key
, key
, sizeof(extent_op
->key
));
4327 memset(&extent_op
->key
, 0, sizeof(extent_op
->key
));
4328 extent_op
->flags_to_set
= flags
;
4329 extent_op
->update_key
= 1;
4330 extent_op
->update_flags
= 1;
4331 extent_op
->is_data
= 0;
4333 ret
= btrfs_add_delayed_tree_ref(trans
, ins
->objectid
,
4334 ins
->offset
, parent
, root_objectid
,
4335 level
, BTRFS_ADD_DELAYED_EXTENT
,
4342 struct extent_buffer
*btrfs_init_new_buffer(struct btrfs_trans_handle
*trans
,
4343 struct btrfs_root
*root
,
4344 u64 bytenr
, u32 blocksize
,
4347 struct extent_buffer
*buf
;
4349 buf
= btrfs_find_create_tree_block(root
, bytenr
, blocksize
);
4351 return ERR_PTR(-ENOMEM
);
4352 btrfs_set_header_generation(buf
, trans
->transid
);
4353 btrfs_set_buffer_lockdep_class(buf
, level
);
4354 btrfs_tree_lock(buf
);
4355 clean_tree_block(trans
, root
, buf
);
4357 btrfs_set_lock_blocking(buf
);
4358 btrfs_set_buffer_uptodate(buf
);
4360 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
4361 set_extent_dirty(&root
->dirty_log_pages
, buf
->start
,
4362 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
4364 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
4365 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
4367 trans
->blocks_used
++;
4368 /* this returns a buffer locked for blocking */
4373 * helper function to allocate a block for a given tree
4374 * returns the tree buffer or NULL.
4376 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
4377 struct btrfs_root
*root
, u32 blocksize
,
4378 u64 parent
, u64 root_objectid
,
4379 struct btrfs_disk_key
*key
, int level
,
4380 u64 hint
, u64 empty_size
)
4382 struct btrfs_key ins
;
4384 struct extent_buffer
*buf
;
4386 ret
= alloc_tree_block(trans
, root
, blocksize
, parent
, root_objectid
,
4387 key
, level
, empty_size
, hint
, (u64
)-1, &ins
);
4390 return ERR_PTR(ret
);
4393 buf
= btrfs_init_new_buffer(trans
, root
, ins
.objectid
,
4399 int btrfs_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
4400 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
4404 struct btrfs_key key
;
4405 struct btrfs_file_extent_item
*fi
;
4410 BUG_ON(!btrfs_is_leaf(leaf
));
4411 nritems
= btrfs_header_nritems(leaf
);
4413 for (i
= 0; i
< nritems
; i
++) {
4415 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4417 /* only extents have references, skip everything else */
4418 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
4421 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4423 /* inline extents live in the btree, they don't have refs */
4424 if (btrfs_file_extent_type(leaf
, fi
) ==
4425 BTRFS_FILE_EXTENT_INLINE
)
4428 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
4430 /* holes don't have refs */
4431 if (disk_bytenr
== 0)
4434 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4435 ret
= btrfs_free_extent(trans
, root
, disk_bytenr
, num_bytes
,
4436 leaf
->start
, 0, key
.objectid
, 0);
4442 static noinline
int cache_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
4443 struct btrfs_root
*root
,
4444 struct btrfs_leaf_ref
*ref
)
4448 struct btrfs_extent_info
*info
;
4449 struct refsort
*sorted
;
4451 if (ref
->nritems
== 0)
4454 sorted
= kmalloc(sizeof(*sorted
) * ref
->nritems
, GFP_NOFS
);
4455 for (i
= 0; i
< ref
->nritems
; i
++) {
4456 sorted
[i
].bytenr
= ref
->extents
[i
].bytenr
;
4459 sort(sorted
, ref
->nritems
, sizeof(struct refsort
), refsort_cmp
, NULL
);
4462 * the items in the ref were sorted when the ref was inserted
4463 * into the ref cache, so this is already in order
4465 for (i
= 0; i
< ref
->nritems
; i
++) {
4466 info
= ref
->extents
+ sorted
[i
].slot
;
4467 ret
= btrfs_free_extent(trans
, root
, info
->bytenr
,
4468 info
->num_bytes
, ref
->bytenr
,
4469 ref
->owner
, ref
->generation
,
4472 atomic_inc(&root
->fs_info
->throttle_gen
);
4473 wake_up(&root
->fs_info
->transaction_throttle
);
4485 static int drop_snap_lookup_refcount(struct btrfs_trans_handle
*trans
,
4486 struct btrfs_root
*root
, u64 start
,
4491 ret
= btrfs_lookup_extent_refs(trans
, root
, start
, len
, refs
);
4494 #if 0 /* some debugging code in case we see problems here */
4495 /* if the refs count is one, it won't get increased again. But
4496 * if the ref count is > 1, someone may be decreasing it at
4497 * the same time we are.
4500 struct extent_buffer
*eb
= NULL
;
4501 eb
= btrfs_find_create_tree_block(root
, start
, len
);
4503 btrfs_tree_lock(eb
);
4505 mutex_lock(&root
->fs_info
->alloc_mutex
);
4506 ret
= lookup_extent_ref(NULL
, root
, start
, len
, refs
);
4508 mutex_unlock(&root
->fs_info
->alloc_mutex
);
4511 btrfs_tree_unlock(eb
);
4512 free_extent_buffer(eb
);
4515 printk(KERN_ERR
"btrfs block %llu went down to one "
4516 "during drop_snap\n", (unsigned long long)start
);
4528 * this is used while deleting old snapshots, and it drops the refs
4529 * on a whole subtree starting from a level 1 node.
4531 * The idea is to sort all the leaf pointers, and then drop the
4532 * ref on all the leaves in order. Most of the time the leaves
4533 * will have ref cache entries, so no leaf IOs will be required to
4534 * find the extents they have references on.
4536 * For each leaf, any references it has are also dropped in order
4538 * This ends up dropping the references in something close to optimal
4539 * order for reading and modifying the extent allocation tree.
4541 static noinline
int drop_level_one_refs(struct btrfs_trans_handle
*trans
,
4542 struct btrfs_root
*root
,
4543 struct btrfs_path
*path
)
4548 struct extent_buffer
*eb
= path
->nodes
[1];
4549 struct extent_buffer
*leaf
;
4550 struct btrfs_leaf_ref
*ref
;
4551 struct refsort
*sorted
= NULL
;
4552 int nritems
= btrfs_header_nritems(eb
);
4556 int slot
= path
->slots
[1];
4557 u32 blocksize
= btrfs_level_size(root
, 0);
4563 root_owner
= btrfs_header_owner(eb
);
4564 root_gen
= btrfs_header_generation(eb
);
4565 sorted
= kmalloc(sizeof(*sorted
) * nritems
, GFP_NOFS
);
4568 * step one, sort all the leaf pointers so we don't scribble
4569 * randomly into the extent allocation tree
4571 for (i
= slot
; i
< nritems
; i
++) {
4572 sorted
[refi
].bytenr
= btrfs_node_blockptr(eb
, i
);
4573 sorted
[refi
].slot
= i
;
4578 * nritems won't be zero, but if we're picking up drop_snapshot
4579 * after a crash, slot might be > 0, so double check things
4585 sort(sorted
, refi
, sizeof(struct refsort
), refsort_cmp
, NULL
);
4588 * the first loop frees everything the leaves point to
4590 for (i
= 0; i
< refi
; i
++) {
4593 bytenr
= sorted
[i
].bytenr
;
4596 * check the reference count on this leaf. If it is > 1
4597 * we just decrement it below and don't update any
4598 * of the refs the leaf points to.
4600 ret
= drop_snap_lookup_refcount(trans
, root
, bytenr
,
4606 ptr_gen
= btrfs_node_ptr_generation(eb
, sorted
[i
].slot
);
4609 * the leaf only had one reference, which means the
4610 * only thing pointing to this leaf is the snapshot
4611 * we're deleting. It isn't possible for the reference
4612 * count to increase again later
4614 * The reference cache is checked for the leaf,
4615 * and if found we'll be able to drop any refs held by
4616 * the leaf without needing to read it in.
4618 ref
= btrfs_lookup_leaf_ref(root
, bytenr
);
4619 if (ref
&& ref
->generation
!= ptr_gen
) {
4620 btrfs_free_leaf_ref(root
, ref
);
4624 ret
= cache_drop_leaf_ref(trans
, root
, ref
);
4626 btrfs_remove_leaf_ref(root
, ref
);
4627 btrfs_free_leaf_ref(root
, ref
);
4630 * the leaf wasn't in the reference cache, so
4631 * we have to read it.
4633 leaf
= read_tree_block(root
, bytenr
, blocksize
,
4635 ret
= btrfs_drop_leaf_ref(trans
, root
, leaf
);
4637 free_extent_buffer(leaf
);
4639 atomic_inc(&root
->fs_info
->throttle_gen
);
4640 wake_up(&root
->fs_info
->transaction_throttle
);
4645 * run through the loop again to free the refs on the leaves.
4646 * This is faster than doing it in the loop above because
4647 * the leaves are likely to be clustered together. We end up
4648 * working in nice chunks on the extent allocation tree.
4650 for (i
= 0; i
< refi
; i
++) {
4651 bytenr
= sorted
[i
].bytenr
;
4652 ret
= btrfs_free_extent(trans
, root
, bytenr
,
4653 blocksize
, eb
->start
,
4654 root_owner
, root_gen
, 0, 1);
4657 atomic_inc(&root
->fs_info
->throttle_gen
);
4658 wake_up(&root
->fs_info
->transaction_throttle
);
4665 * update the path to show we've processed the entire level 1
4666 * node. This will get saved into the root's drop_snapshot_progress
4667 * field so these drops are not repeated again if this transaction
4670 path
->slots
[1] = nritems
;
4675 * helper function for drop_snapshot, this walks down the tree dropping ref
4676 * counts as it goes.
4678 static noinline
int walk_down_tree(struct btrfs_trans_handle
*trans
,
4679 struct btrfs_root
*root
,
4680 struct btrfs_path
*path
, int *level
)
4686 struct extent_buffer
*next
;
4687 struct extent_buffer
*cur
;
4688 struct extent_buffer
*parent
;
4693 WARN_ON(*level
< 0);
4694 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
4695 ret
= drop_snap_lookup_refcount(trans
, root
, path
->nodes
[*level
]->start
,
4696 path
->nodes
[*level
]->len
, &refs
);
4702 * walk down to the last node level and free all the leaves
4704 while (*level
>= 0) {
4705 WARN_ON(*level
< 0);
4706 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
4707 cur
= path
->nodes
[*level
];
4709 if (btrfs_header_level(cur
) != *level
)
4712 if (path
->slots
[*level
] >=
4713 btrfs_header_nritems(cur
))
4716 /* the new code goes down to level 1 and does all the
4717 * leaves pointed to that node in bulk. So, this check
4718 * for level 0 will always be false.
4720 * But, the disk format allows the drop_snapshot_progress
4721 * field in the root to leave things in a state where
4722 * a leaf will need cleaning up here. If someone crashes
4723 * with the old code and then boots with the new code,
4724 * we might find a leaf here.
4727 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
4733 * once we get to level one, process the whole node
4734 * at once, including everything below it.
4737 ret
= drop_level_one_refs(trans
, root
, path
);
4742 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
4743 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
4744 blocksize
= btrfs_level_size(root
, *level
- 1);
4746 ret
= drop_snap_lookup_refcount(trans
, root
, bytenr
,
4751 * if there is more than one reference, we don't need
4752 * to read that node to drop any references it has. We
4753 * just drop the ref we hold on that node and move on to the
4754 * next slot in this level.
4757 parent
= path
->nodes
[*level
];
4758 root_owner
= btrfs_header_owner(parent
);
4759 root_gen
= btrfs_header_generation(parent
);
4760 path
->slots
[*level
]++;
4762 ret
= btrfs_free_extent(trans
, root
, bytenr
,
4763 blocksize
, parent
->start
,
4764 root_owner
, root_gen
,
4768 atomic_inc(&root
->fs_info
->throttle_gen
);
4769 wake_up(&root
->fs_info
->transaction_throttle
);
4776 * we need to keep freeing things in the next level down.
4777 * read the block and loop around to process it
4779 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
4780 WARN_ON(*level
<= 0);
4781 if (path
->nodes
[*level
-1])
4782 free_extent_buffer(path
->nodes
[*level
-1]);
4783 path
->nodes
[*level
-1] = next
;
4784 *level
= btrfs_header_level(next
);
4785 path
->slots
[*level
] = 0;
4789 WARN_ON(*level
< 0);
4790 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
4792 if (path
->nodes
[*level
] == root
->node
) {
4793 parent
= path
->nodes
[*level
];
4794 bytenr
= path
->nodes
[*level
]->start
;
4796 parent
= path
->nodes
[*level
+ 1];
4797 bytenr
= btrfs_node_blockptr(parent
, path
->slots
[*level
+ 1]);
4800 blocksize
= btrfs_level_size(root
, *level
);
4801 root_owner
= btrfs_header_owner(parent
);
4802 root_gen
= btrfs_header_generation(parent
);
4805 * cleanup and free the reference on the last node
4808 ret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
4809 parent
->start
, root_owner
, root_gen
,
4811 free_extent_buffer(path
->nodes
[*level
]);
4812 path
->nodes
[*level
] = NULL
;
4822 struct walk_control
{
4823 u64 refs
[BTRFS_MAX_LEVEL
];
4824 u64 flags
[BTRFS_MAX_LEVEL
];
4825 struct btrfs_key update_progress
;
4833 #define DROP_REFERENCE 1
4834 #define UPDATE_BACKREF 2
4837 * hepler to process tree block while walking down the tree.
4839 * when wc->stage == DROP_REFERENCE, this function checks
4840 * reference count of the block. if the block is shared and
4841 * we need update back refs for the subtree rooted at the
4842 * block, this function changes wc->stage to UPDATE_BACKREF
4844 * when wc->stage == UPDATE_BACKREF, this function updates
4845 * back refs for pointers in the block.
4847 * NOTE: return value 1 means we should stop walking down.
4849 static noinline
int walk_down_proc(struct btrfs_trans_handle
*trans
,
4850 struct btrfs_root
*root
,
4851 struct btrfs_path
*path
,
4852 struct walk_control
*wc
)
4854 int level
= wc
->level
;
4855 struct extent_buffer
*eb
= path
->nodes
[level
];
4856 struct btrfs_key key
;
4857 u64 flag
= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
4860 if (wc
->stage
== UPDATE_BACKREF
&&
4861 btrfs_header_owner(eb
) != root
->root_key
.objectid
)
4865 * when reference count of tree block is 1, it won't increase
4866 * again. once full backref flag is set, we never clear it.
4868 if ((wc
->stage
== DROP_REFERENCE
&& wc
->refs
[level
] != 1) ||
4869 (wc
->stage
== UPDATE_BACKREF
&& !(wc
->flags
[level
] & flag
))) {
4870 BUG_ON(!path
->locks
[level
]);
4871 ret
= btrfs_lookup_extent_info(trans
, root
,
4876 BUG_ON(wc
->refs
[level
] == 0);
4879 if (wc
->stage
== DROP_REFERENCE
&&
4880 wc
->update_ref
&& wc
->refs
[level
] > 1) {
4881 BUG_ON(eb
== root
->node
);
4882 BUG_ON(path
->slots
[level
] > 0);
4884 btrfs_item_key_to_cpu(eb
, &key
, path
->slots
[level
]);
4886 btrfs_node_key_to_cpu(eb
, &key
, path
->slots
[level
]);
4887 if (btrfs_header_owner(eb
) == root
->root_key
.objectid
&&
4888 btrfs_comp_cpu_keys(&key
, &wc
->update_progress
) >= 0) {
4889 wc
->stage
= UPDATE_BACKREF
;
4890 wc
->shared_level
= level
;
4894 if (wc
->stage
== DROP_REFERENCE
) {
4895 if (wc
->refs
[level
] > 1)
4898 if (path
->locks
[level
] && !wc
->keep_locks
) {
4899 btrfs_tree_unlock(eb
);
4900 path
->locks
[level
] = 0;
4905 /* wc->stage == UPDATE_BACKREF */
4906 if (!(wc
->flags
[level
] & flag
)) {
4907 BUG_ON(!path
->locks
[level
]);
4908 ret
= btrfs_inc_ref(trans
, root
, eb
, 1);
4910 ret
= btrfs_dec_ref(trans
, root
, eb
, 0);
4912 ret
= btrfs_set_disk_extent_flags(trans
, root
, eb
->start
,
4915 wc
->flags
[level
] |= flag
;
4919 * the block is shared by multiple trees, so it's not good to
4920 * keep the tree lock
4922 if (path
->locks
[level
] && level
> 0) {
4923 btrfs_tree_unlock(eb
);
4924 path
->locks
[level
] = 0;
4930 * hepler to process tree block while walking up the tree.
4932 * when wc->stage == DROP_REFERENCE, this function drops
4933 * reference count on the block.
4935 * when wc->stage == UPDATE_BACKREF, this function changes
4936 * wc->stage back to DROP_REFERENCE if we changed wc->stage
4937 * to UPDATE_BACKREF previously while processing the block.
4939 * NOTE: return value 1 means we should stop walking up.
4941 static noinline
int walk_up_proc(struct btrfs_trans_handle
*trans
,
4942 struct btrfs_root
*root
,
4943 struct btrfs_path
*path
,
4944 struct walk_control
*wc
)
4947 int level
= wc
->level
;
4948 struct extent_buffer
*eb
= path
->nodes
[level
];
4951 if (wc
->stage
== UPDATE_BACKREF
) {
4952 BUG_ON(wc
->shared_level
< level
);
4953 if (level
< wc
->shared_level
)
4956 BUG_ON(wc
->refs
[level
] <= 1);
4957 ret
= find_next_key(path
, level
+ 1, &wc
->update_progress
);
4961 wc
->stage
= DROP_REFERENCE
;
4962 wc
->shared_level
= -1;
4963 path
->slots
[level
] = 0;
4966 * check reference count again if the block isn't locked.
4967 * we should start walking down the tree again if reference
4970 if (!path
->locks
[level
]) {
4972 btrfs_tree_lock(eb
);
4973 btrfs_set_lock_blocking(eb
);
4974 path
->locks
[level
] = 1;
4976 ret
= btrfs_lookup_extent_info(trans
, root
,
4981 BUG_ON(wc
->refs
[level
] == 0);
4982 if (wc
->refs
[level
] == 1) {
4983 btrfs_tree_unlock(eb
);
4984 path
->locks
[level
] = 0;
4992 /* wc->stage == DROP_REFERENCE */
4993 BUG_ON(wc
->refs
[level
] > 1 && !path
->locks
[level
]);
4995 if (wc
->refs
[level
] == 1) {
4997 if (wc
->flags
[level
] & BTRFS_BLOCK_FLAG_FULL_BACKREF
)
4998 ret
= btrfs_dec_ref(trans
, root
, eb
, 1);
5000 ret
= btrfs_dec_ref(trans
, root
, eb
, 0);
5003 /* make block locked assertion in clean_tree_block happy */
5004 if (!path
->locks
[level
] &&
5005 btrfs_header_generation(eb
) == trans
->transid
) {
5006 btrfs_tree_lock(eb
);
5007 btrfs_set_lock_blocking(eb
);
5008 path
->locks
[level
] = 1;
5010 clean_tree_block(trans
, root
, eb
);
5013 if (eb
== root
->node
) {
5014 if (wc
->flags
[level
] & BTRFS_BLOCK_FLAG_FULL_BACKREF
)
5017 BUG_ON(root
->root_key
.objectid
!=
5018 btrfs_header_owner(eb
));
5020 if (wc
->flags
[level
+ 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF
)
5021 parent
= path
->nodes
[level
+ 1]->start
;
5023 BUG_ON(root
->root_key
.objectid
!=
5024 btrfs_header_owner(path
->nodes
[level
+ 1]));
5027 ret
= btrfs_free_extent(trans
, root
, eb
->start
, eb
->len
, parent
,
5028 root
->root_key
.objectid
, level
, 0);
5031 wc
->refs
[level
] = 0;
5032 wc
->flags
[level
] = 0;
5036 static noinline
int walk_down_tree(struct btrfs_trans_handle
*trans
,
5037 struct btrfs_root
*root
,
5038 struct btrfs_path
*path
,
5039 struct walk_control
*wc
)
5041 struct extent_buffer
*next
;
5042 struct extent_buffer
*cur
;
5046 int level
= wc
->level
;
5049 while (level
>= 0) {
5050 cur
= path
->nodes
[level
];
5051 BUG_ON(path
->slots
[level
] >= btrfs_header_nritems(cur
));
5053 ret
= walk_down_proc(trans
, root
, path
, wc
);
5060 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[level
]);
5061 blocksize
= btrfs_level_size(root
, level
- 1);
5062 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[level
]);
5064 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
5065 btrfs_tree_lock(next
);
5066 btrfs_set_lock_blocking(next
);
5069 BUG_ON(level
!= btrfs_header_level(next
));
5070 path
->nodes
[level
] = next
;
5071 path
->slots
[level
] = 0;
5072 path
->locks
[level
] = 1;
5078 static noinline
int walk_up_tree(struct btrfs_trans_handle
*trans
,
5079 struct btrfs_root
*root
,
5080 struct btrfs_path
*path
,
5081 struct walk_control
*wc
, int max_level
)
5083 int level
= wc
->level
;
5086 path
->slots
[level
] = btrfs_header_nritems(path
->nodes
[level
]);
5087 while (level
< max_level
&& path
->nodes
[level
]) {
5089 if (path
->slots
[level
] + 1 <
5090 btrfs_header_nritems(path
->nodes
[level
])) {
5091 path
->slots
[level
]++;
5094 ret
= walk_up_proc(trans
, root
, path
, wc
);
5098 if (path
->locks
[level
]) {
5099 btrfs_tree_unlock(path
->nodes
[level
]);
5100 path
->locks
[level
] = 0;
5102 free_extent_buffer(path
->nodes
[level
]);
5103 path
->nodes
[level
] = NULL
;
5111 * drop a subvolume tree.
5113 * this function traverses the tree freeing any blocks that only
5114 * referenced by the tree.
5116 * when a shared tree block is found. this function decreases its
5117 * reference count by one. if update_ref is true, this function
5118 * also make sure backrefs for the shared block and all lower level
5119 * blocks are properly updated.
5121 int btrfs_drop_snapshot(struct btrfs_root
*root
, int update_ref
)
5123 struct btrfs_path
*path
;
5124 struct btrfs_trans_handle
*trans
;
5125 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
5126 struct btrfs_root_item
*root_item
= &root
->root_item
;
5127 struct walk_control
*wc
;
5128 struct btrfs_key key
;
5133 path
= btrfs_alloc_path();
5136 wc
= kzalloc(sizeof(*wc
), GFP_NOFS
);
5139 trans
= btrfs_start_transaction(tree_root
, 1);
5141 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
5142 level
= btrfs_header_level(root
->node
);
5143 path
->nodes
[level
] = btrfs_lock_root_node(root
);
5144 btrfs_set_lock_blocking(path
->nodes
[level
]);
5145 path
->slots
[level
] = 0;
5146 path
->locks
[level
] = 1;
5147 memset(&wc
->update_progress
, 0,
5148 sizeof(wc
->update_progress
));
5150 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
5151 memcpy(&wc
->update_progress
, &key
,
5152 sizeof(wc
->update_progress
));
5154 level
= root_item
->drop_level
;
5156 path
->lowest_level
= level
;
5157 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5158 path
->lowest_level
= 0;
5163 btrfs_node_key_to_cpu(path
->nodes
[level
], &key
,
5164 path
->slots
[level
]);
5165 WARN_ON(memcmp(&key
, &wc
->update_progress
, sizeof(key
)));
5168 * unlock our path, this is safe because only this
5169 * function is allowed to delete this snapshot
5171 btrfs_unlock_up_safe(path
, 0);
5173 level
= btrfs_header_level(root
->node
);
5175 btrfs_tree_lock(path
->nodes
[level
]);
5176 btrfs_set_lock_blocking(path
->nodes
[level
]);
5178 ret
= btrfs_lookup_extent_info(trans
, root
,
5179 path
->nodes
[level
]->start
,
5180 path
->nodes
[level
]->len
,
5184 BUG_ON(wc
->refs
[level
] == 0);
5186 if (level
== root_item
->drop_level
)
5189 btrfs_tree_unlock(path
->nodes
[level
]);
5190 WARN_ON(wc
->refs
[level
] != 1);
5196 wc
->shared_level
= -1;
5197 wc
->stage
= DROP_REFERENCE
;
5198 wc
->update_ref
= update_ref
;
5202 ret
= walk_down_tree(trans
, root
, path
, wc
);
5208 ret
= walk_up_tree(trans
, root
, path
, wc
, BTRFS_MAX_LEVEL
);
5215 BUG_ON(wc
->stage
!= DROP_REFERENCE
);
5219 if (wc
->stage
== DROP_REFERENCE
) {
5221 btrfs_node_key(path
->nodes
[level
],
5222 &root_item
->drop_progress
,
5223 path
->slots
[level
]);
5224 root_item
->drop_level
= level
;
5227 BUG_ON(wc
->level
== 0);
5228 if (trans
->transaction
->in_commit
||
5229 trans
->transaction
->delayed_refs
.flushing
) {
5230 ret
= btrfs_update_root(trans
, tree_root
,
5235 btrfs_end_transaction(trans
, tree_root
);
5236 trans
= btrfs_start_transaction(tree_root
, 1);
5238 unsigned long update
;
5239 update
= trans
->delayed_ref_updates
;
5240 trans
->delayed_ref_updates
= 0;
5242 btrfs_run_delayed_refs(trans
, tree_root
,
5246 btrfs_release_path(root
, path
);
5249 ret
= btrfs_del_root(trans
, tree_root
, &root
->root_key
);
5252 free_extent_buffer(root
->node
);
5253 free_extent_buffer(root
->commit_root
);
5256 btrfs_end_transaction(trans
, tree_root
);
5258 btrfs_free_path(path
);
5263 * drop subtree rooted at tree block 'node'.
5265 * NOTE: this function will unlock and release tree block 'node'
5267 int btrfs_drop_subtree(struct btrfs_trans_handle
*trans
,
5268 struct btrfs_root
*root
,
5269 struct extent_buffer
*node
,
5270 struct extent_buffer
*parent
)
5272 struct btrfs_path
*path
;
5273 struct walk_control
*wc
;
5279 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
5281 path
= btrfs_alloc_path();
5284 wc
= kzalloc(sizeof(*wc
), GFP_NOFS
);
5287 btrfs_assert_tree_locked(parent
);
5288 parent_level
= btrfs_header_level(parent
);
5289 extent_buffer_get(parent
);
5290 path
->nodes
[parent_level
] = parent
;
5291 path
->slots
[parent_level
] = btrfs_header_nritems(parent
);
5293 btrfs_assert_tree_locked(node
);
5294 level
= btrfs_header_level(node
);
5295 path
->nodes
[level
] = node
;
5296 path
->slots
[level
] = 0;
5297 path
->locks
[level
] = 1;
5299 wc
->refs
[parent_level
] = 1;
5300 wc
->flags
[parent_level
] = BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5302 wc
->shared_level
= -1;
5303 wc
->stage
= DROP_REFERENCE
;
5308 wret
= walk_down_tree(trans
, root
, path
, wc
);
5314 wret
= walk_up_tree(trans
, root
, path
, wc
, parent_level
);
5322 btrfs_free_path(path
);
5327 static unsigned long calc_ra(unsigned long start
, unsigned long last
,
5330 return min(last
, start
+ nr
- 1);
5333 static noinline
int relocate_inode_pages(struct inode
*inode
, u64 start
,
5338 unsigned long first_index
;
5339 unsigned long last_index
;
5342 struct extent_io_tree
*io_tree
= &BTRFS_I(inode
)->io_tree
;
5343 struct file_ra_state
*ra
;
5344 struct btrfs_ordered_extent
*ordered
;
5345 unsigned int total_read
= 0;
5346 unsigned int total_dirty
= 0;
5349 ra
= kzalloc(sizeof(*ra
), GFP_NOFS
);
5351 mutex_lock(&inode
->i_mutex
);
5352 first_index
= start
>> PAGE_CACHE_SHIFT
;
5353 last_index
= (start
+ len
- 1) >> PAGE_CACHE_SHIFT
;
5355 /* make sure the dirty trick played by the caller work */
5356 ret
= invalidate_inode_pages2_range(inode
->i_mapping
,
5357 first_index
, last_index
);
5361 file_ra_state_init(ra
, inode
->i_mapping
);
5363 for (i
= first_index
; i
<= last_index
; i
++) {
5364 if (total_read
% ra
->ra_pages
== 0) {
5365 btrfs_force_ra(inode
->i_mapping
, ra
, NULL
, i
,
5366 calc_ra(i
, last_index
, ra
->ra_pages
));
5370 if (((u64
)i
<< PAGE_CACHE_SHIFT
) > i_size_read(inode
))
5372 page
= grab_cache_page(inode
->i_mapping
, i
);
5377 if (!PageUptodate(page
)) {
5378 btrfs_readpage(NULL
, page
);
5380 if (!PageUptodate(page
)) {
5382 page_cache_release(page
);
5387 wait_on_page_writeback(page
);
5389 page_start
= (u64
)page
->index
<< PAGE_CACHE_SHIFT
;
5390 page_end
= page_start
+ PAGE_CACHE_SIZE
- 1;
5391 lock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
5393 ordered
= btrfs_lookup_ordered_extent(inode
, page_start
);
5395 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
5397 page_cache_release(page
);
5398 btrfs_start_ordered_extent(inode
, ordered
, 1);
5399 btrfs_put_ordered_extent(ordered
);
5402 set_page_extent_mapped(page
);
5404 if (i
== first_index
)
5405 set_extent_bits(io_tree
, page_start
, page_end
,
5406 EXTENT_BOUNDARY
, GFP_NOFS
);
5407 btrfs_set_extent_delalloc(inode
, page_start
, page_end
);
5409 set_page_dirty(page
);
5412 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
5414 page_cache_release(page
);
5419 mutex_unlock(&inode
->i_mutex
);
5420 balance_dirty_pages_ratelimited_nr(inode
->i_mapping
, total_dirty
);
5424 static noinline
int relocate_data_extent(struct inode
*reloc_inode
,
5425 struct btrfs_key
*extent_key
,
5428 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
5429 struct extent_map_tree
*em_tree
= &BTRFS_I(reloc_inode
)->extent_tree
;
5430 struct extent_map
*em
;
5431 u64 start
= extent_key
->objectid
- offset
;
5432 u64 end
= start
+ extent_key
->offset
- 1;
5434 em
= alloc_extent_map(GFP_NOFS
);
5435 BUG_ON(!em
|| IS_ERR(em
));
5438 em
->len
= extent_key
->offset
;
5439 em
->block_len
= extent_key
->offset
;
5440 em
->block_start
= extent_key
->objectid
;
5441 em
->bdev
= root
->fs_info
->fs_devices
->latest_bdev
;
5442 set_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
5444 /* setup extent map to cheat btrfs_readpage */
5445 lock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
5448 spin_lock(&em_tree
->lock
);
5449 ret
= add_extent_mapping(em_tree
, em
);
5450 spin_unlock(&em_tree
->lock
);
5451 if (ret
!= -EEXIST
) {
5452 free_extent_map(em
);
5455 btrfs_drop_extent_cache(reloc_inode
, start
, end
, 0);
5457 unlock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
5459 return relocate_inode_pages(reloc_inode
, start
, extent_key
->offset
);
5462 struct btrfs_ref_path
{
5464 u64 nodes
[BTRFS_MAX_LEVEL
];
5466 u64 root_generation
;
5473 struct btrfs_key node_keys
[BTRFS_MAX_LEVEL
];
5474 u64 new_nodes
[BTRFS_MAX_LEVEL
];
5477 struct disk_extent
{
5488 static int is_cowonly_root(u64 root_objectid
)
5490 if (root_objectid
== BTRFS_ROOT_TREE_OBJECTID
||
5491 root_objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
5492 root_objectid
== BTRFS_CHUNK_TREE_OBJECTID
||
5493 root_objectid
== BTRFS_DEV_TREE_OBJECTID
||
5494 root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
5495 root_objectid
== BTRFS_CSUM_TREE_OBJECTID
)
5500 static noinline
int __next_ref_path(struct btrfs_trans_handle
*trans
,
5501 struct btrfs_root
*extent_root
,
5502 struct btrfs_ref_path
*ref_path
,
5505 struct extent_buffer
*leaf
;
5506 struct btrfs_path
*path
;
5507 struct btrfs_extent_ref
*ref
;
5508 struct btrfs_key key
;
5509 struct btrfs_key found_key
;
5515 path
= btrfs_alloc_path();
5520 ref_path
->lowest_level
= -1;
5521 ref_path
->current_level
= -1;
5522 ref_path
->shared_level
= -1;
5526 level
= ref_path
->current_level
- 1;
5527 while (level
>= -1) {
5529 if (level
< ref_path
->lowest_level
)
5533 bytenr
= ref_path
->nodes
[level
];
5535 bytenr
= ref_path
->extent_start
;
5536 BUG_ON(bytenr
== 0);
5538 parent
= ref_path
->nodes
[level
+ 1];
5539 ref_path
->nodes
[level
+ 1] = 0;
5540 ref_path
->current_level
= level
;
5541 BUG_ON(parent
== 0);
5543 key
.objectid
= bytenr
;
5544 key
.offset
= parent
+ 1;
5545 key
.type
= BTRFS_EXTENT_REF_KEY
;
5547 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
5552 leaf
= path
->nodes
[0];
5553 nritems
= btrfs_header_nritems(leaf
);
5554 if (path
->slots
[0] >= nritems
) {
5555 ret
= btrfs_next_leaf(extent_root
, path
);
5560 leaf
= path
->nodes
[0];
5563 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5564 if (found_key
.objectid
== bytenr
&&
5565 found_key
.type
== BTRFS_EXTENT_REF_KEY
) {
5566 if (level
< ref_path
->shared_level
)
5567 ref_path
->shared_level
= level
;
5572 btrfs_release_path(extent_root
, path
);
5575 /* reached lowest level */
5579 level
= ref_path
->current_level
;
5580 while (level
< BTRFS_MAX_LEVEL
- 1) {
5584 bytenr
= ref_path
->nodes
[level
];
5586 bytenr
= ref_path
->extent_start
;
5588 BUG_ON(bytenr
== 0);
5590 key
.objectid
= bytenr
;
5592 key
.type
= BTRFS_EXTENT_REF_KEY
;
5594 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
5598 leaf
= path
->nodes
[0];
5599 nritems
= btrfs_header_nritems(leaf
);
5600 if (path
->slots
[0] >= nritems
) {
5601 ret
= btrfs_next_leaf(extent_root
, path
);
5605 /* the extent was freed by someone */
5606 if (ref_path
->lowest_level
== level
)
5608 btrfs_release_path(extent_root
, path
);
5611 leaf
= path
->nodes
[0];
5614 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5615 if (found_key
.objectid
!= bytenr
||
5616 found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
5617 /* the extent was freed by someone */
5618 if (ref_path
->lowest_level
== level
) {
5622 btrfs_release_path(extent_root
, path
);
5626 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
5627 struct btrfs_extent_ref
);
5628 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
5629 if (ref_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
5631 level
= (int)ref_objectid
;
5632 BUG_ON(level
>= BTRFS_MAX_LEVEL
);
5633 ref_path
->lowest_level
= level
;
5634 ref_path
->current_level
= level
;
5635 ref_path
->nodes
[level
] = bytenr
;
5637 WARN_ON(ref_objectid
!= level
);
5640 WARN_ON(level
!= -1);
5644 if (ref_path
->lowest_level
== level
) {
5645 ref_path
->owner_objectid
= ref_objectid
;
5646 ref_path
->num_refs
= btrfs_ref_num_refs(leaf
, ref
);
5650 * the block is tree root or the block isn't in reference
5653 if (found_key
.objectid
== found_key
.offset
||
5654 is_cowonly_root(btrfs_ref_root(leaf
, ref
))) {
5655 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
5656 ref_path
->root_generation
=
5657 btrfs_ref_generation(leaf
, ref
);
5659 /* special reference from the tree log */
5660 ref_path
->nodes
[0] = found_key
.offset
;
5661 ref_path
->current_level
= 0;
5668 BUG_ON(ref_path
->nodes
[level
] != 0);
5669 ref_path
->nodes
[level
] = found_key
.offset
;
5670 ref_path
->current_level
= level
;
5673 * the reference was created in the running transaction,
5674 * no need to continue walking up.
5676 if (btrfs_ref_generation(leaf
, ref
) == trans
->transid
) {
5677 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
5678 ref_path
->root_generation
=
5679 btrfs_ref_generation(leaf
, ref
);
5684 btrfs_release_path(extent_root
, path
);
5687 /* reached max tree level, but no tree root found. */
5690 btrfs_free_path(path
);
5694 static int btrfs_first_ref_path(struct btrfs_trans_handle
*trans
,
5695 struct btrfs_root
*extent_root
,
5696 struct btrfs_ref_path
*ref_path
,
5699 memset(ref_path
, 0, sizeof(*ref_path
));
5700 ref_path
->extent_start
= extent_start
;
5702 return __next_ref_path(trans
, extent_root
, ref_path
, 1);
5705 static int btrfs_next_ref_path(struct btrfs_trans_handle
*trans
,
5706 struct btrfs_root
*extent_root
,
5707 struct btrfs_ref_path
*ref_path
)
5709 return __next_ref_path(trans
, extent_root
, ref_path
, 0);
5712 static noinline
int get_new_locations(struct inode
*reloc_inode
,
5713 struct btrfs_key
*extent_key
,
5714 u64 offset
, int no_fragment
,
5715 struct disk_extent
**extents
,
5718 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
5719 struct btrfs_path
*path
;
5720 struct btrfs_file_extent_item
*fi
;
5721 struct extent_buffer
*leaf
;
5722 struct disk_extent
*exts
= *extents
;
5723 struct btrfs_key found_key
;
5728 int max
= *nr_extents
;
5731 WARN_ON(!no_fragment
&& *extents
);
5734 exts
= kmalloc(sizeof(*exts
) * max
, GFP_NOFS
);
5739 path
= btrfs_alloc_path();
5742 cur_pos
= extent_key
->objectid
- offset
;
5743 last_byte
= extent_key
->objectid
+ extent_key
->offset
;
5744 ret
= btrfs_lookup_file_extent(NULL
, root
, path
, reloc_inode
->i_ino
,
5754 leaf
= path
->nodes
[0];
5755 nritems
= btrfs_header_nritems(leaf
);
5756 if (path
->slots
[0] >= nritems
) {
5757 ret
= btrfs_next_leaf(root
, path
);
5762 leaf
= path
->nodes
[0];
5765 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5766 if (found_key
.offset
!= cur_pos
||
5767 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
||
5768 found_key
.objectid
!= reloc_inode
->i_ino
)
5771 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
5772 struct btrfs_file_extent_item
);
5773 if (btrfs_file_extent_type(leaf
, fi
) !=
5774 BTRFS_FILE_EXTENT_REG
||
5775 btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
5779 struct disk_extent
*old
= exts
;
5781 exts
= kzalloc(sizeof(*exts
) * max
, GFP_NOFS
);
5782 memcpy(exts
, old
, sizeof(*exts
) * nr
);
5783 if (old
!= *extents
)
5787 exts
[nr
].disk_bytenr
=
5788 btrfs_file_extent_disk_bytenr(leaf
, fi
);
5789 exts
[nr
].disk_num_bytes
=
5790 btrfs_file_extent_disk_num_bytes(leaf
, fi
);
5791 exts
[nr
].offset
= btrfs_file_extent_offset(leaf
, fi
);
5792 exts
[nr
].num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
5793 exts
[nr
].ram_bytes
= btrfs_file_extent_ram_bytes(leaf
, fi
);
5794 exts
[nr
].compression
= btrfs_file_extent_compression(leaf
, fi
);
5795 exts
[nr
].encryption
= btrfs_file_extent_encryption(leaf
, fi
);
5796 exts
[nr
].other_encoding
= btrfs_file_extent_other_encoding(leaf
,
5798 BUG_ON(exts
[nr
].offset
> 0);
5799 BUG_ON(exts
[nr
].compression
|| exts
[nr
].encryption
);
5800 BUG_ON(exts
[nr
].num_bytes
!= exts
[nr
].disk_num_bytes
);
5802 cur_pos
+= exts
[nr
].num_bytes
;
5805 if (cur_pos
+ offset
>= last_byte
)
5815 BUG_ON(cur_pos
+ offset
> last_byte
);
5816 if (cur_pos
+ offset
< last_byte
) {
5822 btrfs_free_path(path
);
5824 if (exts
!= *extents
)
5833 static noinline
int replace_one_extent(struct btrfs_trans_handle
*trans
,
5834 struct btrfs_root
*root
,
5835 struct btrfs_path
*path
,
5836 struct btrfs_key
*extent_key
,
5837 struct btrfs_key
*leaf_key
,
5838 struct btrfs_ref_path
*ref_path
,
5839 struct disk_extent
*new_extents
,
5842 struct extent_buffer
*leaf
;
5843 struct btrfs_file_extent_item
*fi
;
5844 struct inode
*inode
= NULL
;
5845 struct btrfs_key key
;
5850 u64 search_end
= (u64
)-1;
5853 int extent_locked
= 0;
5857 memcpy(&key
, leaf_key
, sizeof(key
));
5858 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
5859 if (key
.objectid
< ref_path
->owner_objectid
||
5860 (key
.objectid
== ref_path
->owner_objectid
&&
5861 key
.type
< BTRFS_EXTENT_DATA_KEY
)) {
5862 key
.objectid
= ref_path
->owner_objectid
;
5863 key
.type
= BTRFS_EXTENT_DATA_KEY
;
5869 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
5873 leaf
= path
->nodes
[0];
5874 nritems
= btrfs_header_nritems(leaf
);
5876 if (extent_locked
&& ret
> 0) {
5878 * the file extent item was modified by someone
5879 * before the extent got locked.
5881 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
5882 lock_end
, GFP_NOFS
);
5886 if (path
->slots
[0] >= nritems
) {
5887 if (++nr_scaned
> 2)
5890 BUG_ON(extent_locked
);
5891 ret
= btrfs_next_leaf(root
, path
);
5896 leaf
= path
->nodes
[0];
5897 nritems
= btrfs_header_nritems(leaf
);
5900 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5902 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
5903 if ((key
.objectid
> ref_path
->owner_objectid
) ||
5904 (key
.objectid
== ref_path
->owner_objectid
&&
5905 key
.type
> BTRFS_EXTENT_DATA_KEY
) ||
5906 key
.offset
>= search_end
)
5910 if (inode
&& key
.objectid
!= inode
->i_ino
) {
5911 BUG_ON(extent_locked
);
5912 btrfs_release_path(root
, path
);
5913 mutex_unlock(&inode
->i_mutex
);
5919 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
5924 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
5925 struct btrfs_file_extent_item
);
5926 extent_type
= btrfs_file_extent_type(leaf
, fi
);
5927 if ((extent_type
!= BTRFS_FILE_EXTENT_REG
&&
5928 extent_type
!= BTRFS_FILE_EXTENT_PREALLOC
) ||
5929 (btrfs_file_extent_disk_bytenr(leaf
, fi
) !=
5930 extent_key
->objectid
)) {
5936 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
5937 ext_offset
= btrfs_file_extent_offset(leaf
, fi
);
5939 if (search_end
== (u64
)-1) {
5940 search_end
= key
.offset
- ext_offset
+
5941 btrfs_file_extent_ram_bytes(leaf
, fi
);
5944 if (!extent_locked
) {
5945 lock_start
= key
.offset
;
5946 lock_end
= lock_start
+ num_bytes
- 1;
5948 if (lock_start
> key
.offset
||
5949 lock_end
+ 1 < key
.offset
+ num_bytes
) {
5950 unlock_extent(&BTRFS_I(inode
)->io_tree
,
5951 lock_start
, lock_end
, GFP_NOFS
);
5957 btrfs_release_path(root
, path
);
5959 inode
= btrfs_iget_locked(root
->fs_info
->sb
,
5960 key
.objectid
, root
);
5961 if (inode
->i_state
& I_NEW
) {
5962 BTRFS_I(inode
)->root
= root
;
5963 BTRFS_I(inode
)->location
.objectid
=
5965 BTRFS_I(inode
)->location
.type
=
5966 BTRFS_INODE_ITEM_KEY
;
5967 BTRFS_I(inode
)->location
.offset
= 0;
5968 btrfs_read_locked_inode(inode
);
5969 unlock_new_inode(inode
);
5972 * some code call btrfs_commit_transaction while
5973 * holding the i_mutex, so we can't use mutex_lock
5976 if (is_bad_inode(inode
) ||
5977 !mutex_trylock(&inode
->i_mutex
)) {
5980 key
.offset
= (u64
)-1;
5985 if (!extent_locked
) {
5986 struct btrfs_ordered_extent
*ordered
;
5988 btrfs_release_path(root
, path
);
5990 lock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
5991 lock_end
, GFP_NOFS
);
5992 ordered
= btrfs_lookup_first_ordered_extent(inode
,
5995 ordered
->file_offset
<= lock_end
&&
5996 ordered
->file_offset
+ ordered
->len
> lock_start
) {
5997 unlock_extent(&BTRFS_I(inode
)->io_tree
,
5998 lock_start
, lock_end
, GFP_NOFS
);
5999 btrfs_start_ordered_extent(inode
, ordered
, 1);
6000 btrfs_put_ordered_extent(ordered
);
6001 key
.offset
+= num_bytes
;
6005 btrfs_put_ordered_extent(ordered
);
6011 if (nr_extents
== 1) {
6012 /* update extent pointer in place */
6013 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
6014 new_extents
[0].disk_bytenr
);
6015 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
6016 new_extents
[0].disk_num_bytes
);
6017 btrfs_mark_buffer_dirty(leaf
);
6019 btrfs_drop_extent_cache(inode
, key
.offset
,
6020 key
.offset
+ num_bytes
- 1, 0);
6022 ret
= btrfs_inc_extent_ref(trans
, root
,
6023 new_extents
[0].disk_bytenr
,
6024 new_extents
[0].disk_num_bytes
,
6026 root
->root_key
.objectid
,
6031 ret
= btrfs_free_extent(trans
, root
,
6032 extent_key
->objectid
,
6035 btrfs_header_owner(leaf
),
6036 btrfs_header_generation(leaf
),
6040 btrfs_release_path(root
, path
);
6041 key
.offset
+= num_bytes
;
6049 * drop old extent pointer at first, then insert the
6050 * new pointers one bye one
6052 btrfs_release_path(root
, path
);
6053 ret
= btrfs_drop_extents(trans
, root
, inode
, key
.offset
,
6054 key
.offset
+ num_bytes
,
6055 key
.offset
, &alloc_hint
);
6058 for (i
= 0; i
< nr_extents
; i
++) {
6059 if (ext_offset
>= new_extents
[i
].num_bytes
) {
6060 ext_offset
-= new_extents
[i
].num_bytes
;
6063 extent_len
= min(new_extents
[i
].num_bytes
-
6064 ext_offset
, num_bytes
);
6066 ret
= btrfs_insert_empty_item(trans
, root
,
6071 leaf
= path
->nodes
[0];
6072 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6073 struct btrfs_file_extent_item
);
6074 btrfs_set_file_extent_generation(leaf
, fi
,
6076 btrfs_set_file_extent_type(leaf
, fi
,
6077 BTRFS_FILE_EXTENT_REG
);
6078 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
6079 new_extents
[i
].disk_bytenr
);
6080 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
6081 new_extents
[i
].disk_num_bytes
);
6082 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
6083 new_extents
[i
].ram_bytes
);
6085 btrfs_set_file_extent_compression(leaf
, fi
,
6086 new_extents
[i
].compression
);
6087 btrfs_set_file_extent_encryption(leaf
, fi
,
6088 new_extents
[i
].encryption
);
6089 btrfs_set_file_extent_other_encoding(leaf
, fi
,
6090 new_extents
[i
].other_encoding
);
6092 btrfs_set_file_extent_num_bytes(leaf
, fi
,
6094 ext_offset
+= new_extents
[i
].offset
;
6095 btrfs_set_file_extent_offset(leaf
, fi
,
6097 btrfs_mark_buffer_dirty(leaf
);
6099 btrfs_drop_extent_cache(inode
, key
.offset
,
6100 key
.offset
+ extent_len
- 1, 0);
6102 ret
= btrfs_inc_extent_ref(trans
, root
,
6103 new_extents
[i
].disk_bytenr
,
6104 new_extents
[i
].disk_num_bytes
,
6106 root
->root_key
.objectid
,
6107 trans
->transid
, key
.objectid
);
6109 btrfs_release_path(root
, path
);
6111 inode_add_bytes(inode
, extent_len
);
6114 num_bytes
-= extent_len
;
6115 key
.offset
+= extent_len
;
6120 BUG_ON(i
>= nr_extents
);
6124 if (extent_locked
) {
6125 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
6126 lock_end
, GFP_NOFS
);
6130 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
&&
6131 key
.offset
>= search_end
)
6138 btrfs_release_path(root
, path
);
6140 mutex_unlock(&inode
->i_mutex
);
6141 if (extent_locked
) {
6142 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
6143 lock_end
, GFP_NOFS
);
6150 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle
*trans
,
6151 struct btrfs_root
*root
,
6152 struct extent_buffer
*buf
, u64 orig_start
)
6157 BUG_ON(btrfs_header_generation(buf
) != trans
->transid
);
6158 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
6160 level
= btrfs_header_level(buf
);
6162 struct btrfs_leaf_ref
*ref
;
6163 struct btrfs_leaf_ref
*orig_ref
;
6165 orig_ref
= btrfs_lookup_leaf_ref(root
, orig_start
);
6169 ref
= btrfs_alloc_leaf_ref(root
, orig_ref
->nritems
);
6171 btrfs_free_leaf_ref(root
, orig_ref
);
6175 ref
->nritems
= orig_ref
->nritems
;
6176 memcpy(ref
->extents
, orig_ref
->extents
,
6177 sizeof(ref
->extents
[0]) * ref
->nritems
);
6179 btrfs_free_leaf_ref(root
, orig_ref
);
6181 ref
->root_gen
= trans
->transid
;
6182 ref
->bytenr
= buf
->start
;
6183 ref
->owner
= btrfs_header_owner(buf
);
6184 ref
->generation
= btrfs_header_generation(buf
);
6186 ret
= btrfs_add_leaf_ref(root
, ref
, 0);
6188 btrfs_free_leaf_ref(root
, ref
);
6193 static noinline
int invalidate_extent_cache(struct btrfs_root
*root
,
6194 struct extent_buffer
*leaf
,
6195 struct btrfs_block_group_cache
*group
,
6196 struct btrfs_root
*target_root
)
6198 struct btrfs_key key
;
6199 struct inode
*inode
= NULL
;
6200 struct btrfs_file_extent_item
*fi
;
6202 u64 skip_objectid
= 0;
6206 nritems
= btrfs_header_nritems(leaf
);
6207 for (i
= 0; i
< nritems
; i
++) {
6208 btrfs_item_key_to_cpu(leaf
, &key
, i
);
6209 if (key
.objectid
== skip_objectid
||
6210 key
.type
!= BTRFS_EXTENT_DATA_KEY
)
6212 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
6213 if (btrfs_file_extent_type(leaf
, fi
) ==
6214 BTRFS_FILE_EXTENT_INLINE
)
6216 if (btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
6218 if (!inode
|| inode
->i_ino
!= key
.objectid
) {
6220 inode
= btrfs_ilookup(target_root
->fs_info
->sb
,
6221 key
.objectid
, target_root
, 1);
6224 skip_objectid
= key
.objectid
;
6227 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
6229 lock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
6230 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
6231 btrfs_drop_extent_cache(inode
, key
.offset
,
6232 key
.offset
+ num_bytes
- 1, 1);
6233 unlock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
6234 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
6241 static noinline
int replace_extents_in_leaf(struct btrfs_trans_handle
*trans
,
6242 struct btrfs_root
*root
,
6243 struct extent_buffer
*leaf
,
6244 struct btrfs_block_group_cache
*group
,
6245 struct inode
*reloc_inode
)
6247 struct btrfs_key key
;
6248 struct btrfs_key extent_key
;
6249 struct btrfs_file_extent_item
*fi
;
6250 struct btrfs_leaf_ref
*ref
;
6251 struct disk_extent
*new_extent
;
6260 new_extent
= kmalloc(sizeof(*new_extent
), GFP_NOFS
);
6261 BUG_ON(!new_extent
);
6263 ref
= btrfs_lookup_leaf_ref(root
, leaf
->start
);
6267 nritems
= btrfs_header_nritems(leaf
);
6268 for (i
= 0; i
< nritems
; i
++) {
6269 btrfs_item_key_to_cpu(leaf
, &key
, i
);
6270 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
6272 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
6273 if (btrfs_file_extent_type(leaf
, fi
) ==
6274 BTRFS_FILE_EXTENT_INLINE
)
6276 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
6277 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
6282 if (bytenr
>= group
->key
.objectid
+ group
->key
.offset
||
6283 bytenr
+ num_bytes
<= group
->key
.objectid
)
6286 extent_key
.objectid
= bytenr
;
6287 extent_key
.offset
= num_bytes
;
6288 extent_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
6290 ret
= get_new_locations(reloc_inode
, &extent_key
,
6291 group
->key
.objectid
, 1,
6292 &new_extent
, &nr_extent
);
6297 BUG_ON(ref
->extents
[ext_index
].bytenr
!= bytenr
);
6298 BUG_ON(ref
->extents
[ext_index
].num_bytes
!= num_bytes
);
6299 ref
->extents
[ext_index
].bytenr
= new_extent
->disk_bytenr
;
6300 ref
->extents
[ext_index
].num_bytes
= new_extent
->disk_num_bytes
;
6302 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
6303 new_extent
->disk_bytenr
);
6304 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
6305 new_extent
->disk_num_bytes
);
6306 btrfs_mark_buffer_dirty(leaf
);
6308 ret
= btrfs_inc_extent_ref(trans
, root
,
6309 new_extent
->disk_bytenr
,
6310 new_extent
->disk_num_bytes
,
6312 root
->root_key
.objectid
,
6313 trans
->transid
, key
.objectid
);
6316 ret
= btrfs_free_extent(trans
, root
,
6317 bytenr
, num_bytes
, leaf
->start
,
6318 btrfs_header_owner(leaf
),
6319 btrfs_header_generation(leaf
),
6325 BUG_ON(ext_index
+ 1 != ref
->nritems
);
6326 btrfs_free_leaf_ref(root
, ref
);
6330 int btrfs_free_reloc_root(struct btrfs_trans_handle
*trans
,
6331 struct btrfs_root
*root
)
6333 struct btrfs_root
*reloc_root
;
6336 if (root
->reloc_root
) {
6337 reloc_root
= root
->reloc_root
;
6338 root
->reloc_root
= NULL
;
6339 list_add(&reloc_root
->dead_list
,
6340 &root
->fs_info
->dead_reloc_roots
);
6342 btrfs_set_root_bytenr(&reloc_root
->root_item
,
6343 reloc_root
->node
->start
);
6344 btrfs_set_root_level(&root
->root_item
,
6345 btrfs_header_level(reloc_root
->node
));
6346 memset(&reloc_root
->root_item
.drop_progress
, 0,
6347 sizeof(struct btrfs_disk_key
));
6348 reloc_root
->root_item
.drop_level
= 0;
6350 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
6351 &reloc_root
->root_key
,
6352 &reloc_root
->root_item
);
6358 int btrfs_drop_dead_reloc_roots(struct btrfs_root
*root
)
6360 struct btrfs_trans_handle
*trans
;
6361 struct btrfs_root
*reloc_root
;
6362 struct btrfs_root
*prev_root
= NULL
;
6363 struct list_head dead_roots
;
6367 INIT_LIST_HEAD(&dead_roots
);
6368 list_splice_init(&root
->fs_info
->dead_reloc_roots
, &dead_roots
);
6370 while (!list_empty(&dead_roots
)) {
6371 reloc_root
= list_entry(dead_roots
.prev
,
6372 struct btrfs_root
, dead_list
);
6373 list_del_init(&reloc_root
->dead_list
);
6375 BUG_ON(reloc_root
->commit_root
!= NULL
);
6377 trans
= btrfs_join_transaction(root
, 1);
6380 mutex_lock(&root
->fs_info
->drop_mutex
);
6381 ret
= btrfs_drop_snapshot(trans
, reloc_root
);
6384 mutex_unlock(&root
->fs_info
->drop_mutex
);
6386 nr
= trans
->blocks_used
;
6387 ret
= btrfs_end_transaction(trans
, root
);
6389 btrfs_btree_balance_dirty(root
, nr
);
6392 free_extent_buffer(reloc_root
->node
);
6394 ret
= btrfs_del_root(trans
, root
->fs_info
->tree_root
,
6395 &reloc_root
->root_key
);
6397 mutex_unlock(&root
->fs_info
->drop_mutex
);
6399 nr
= trans
->blocks_used
;
6400 ret
= btrfs_end_transaction(trans
, root
);
6402 btrfs_btree_balance_dirty(root
, nr
);
6405 prev_root
= reloc_root
;
6408 btrfs_remove_leaf_refs(prev_root
, (u64
)-1, 0);
6414 int btrfs_add_dead_reloc_root(struct btrfs_root
*root
)
6416 list_add(&root
->dead_list
, &root
->fs_info
->dead_reloc_roots
);
6420 int btrfs_cleanup_reloc_trees(struct btrfs_root
*root
)
6422 struct btrfs_root
*reloc_root
;
6423 struct btrfs_trans_handle
*trans
;
6424 struct btrfs_key location
;
6428 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
6429 ret
= btrfs_find_dead_roots(root
, BTRFS_TREE_RELOC_OBJECTID
, NULL
);
6431 found
= !list_empty(&root
->fs_info
->dead_reloc_roots
);
6432 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
6435 trans
= btrfs_start_transaction(root
, 1);
6437 ret
= btrfs_commit_transaction(trans
, root
);
6441 location
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
6442 location
.offset
= (u64
)-1;
6443 location
.type
= BTRFS_ROOT_ITEM_KEY
;
6445 reloc_root
= btrfs_read_fs_root_no_name(root
->fs_info
, &location
);
6446 BUG_ON(!reloc_root
);
6447 btrfs_orphan_cleanup(reloc_root
);
6451 static noinline
int init_reloc_tree(struct btrfs_trans_handle
*trans
,
6452 struct btrfs_root
*root
)
6454 struct btrfs_root
*reloc_root
;
6455 struct extent_buffer
*eb
;
6456 struct btrfs_root_item
*root_item
;
6457 struct btrfs_key root_key
;
6460 BUG_ON(!root
->ref_cows
);
6461 if (root
->reloc_root
)
6464 root_item
= kmalloc(sizeof(*root_item
), GFP_NOFS
);
6467 ret
= btrfs_copy_root(trans
, root
, root
->commit_root
,
6468 &eb
, BTRFS_TREE_RELOC_OBJECTID
);
6471 root_key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
6472 root_key
.offset
= root
->root_key
.objectid
;
6473 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
6475 memcpy(root_item
, &root
->root_item
, sizeof(root_item
));
6476 btrfs_set_root_refs(root_item
, 0);
6477 btrfs_set_root_bytenr(root_item
, eb
->start
);
6478 btrfs_set_root_level(root_item
, btrfs_header_level(eb
));
6479 btrfs_set_root_generation(root_item
, trans
->transid
);
6481 btrfs_tree_unlock(eb
);
6482 free_extent_buffer(eb
);
6484 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
6485 &root_key
, root_item
);
6489 reloc_root
= btrfs_read_fs_root_no_radix(root
->fs_info
->tree_root
,
6491 BUG_ON(!reloc_root
);
6492 reloc_root
->last_trans
= trans
->transid
;
6493 reloc_root
->commit_root
= NULL
;
6494 reloc_root
->ref_tree
= &root
->fs_info
->reloc_ref_tree
;
6496 root
->reloc_root
= reloc_root
;
6501 * Core function of space balance.
6503 * The idea is using reloc trees to relocate tree blocks in reference
6504 * counted roots. There is one reloc tree for each subvol, and all
6505 * reloc trees share same root key objectid. Reloc trees are snapshots
6506 * of the latest committed roots of subvols (root->commit_root).
6508 * To relocate a tree block referenced by a subvol, there are two steps.
6509 * COW the block through subvol's reloc tree, then update block pointer
6510 * in the subvol to point to the new block. Since all reloc trees share
6511 * same root key objectid, doing special handing for tree blocks owned
6512 * by them is easy. Once a tree block has been COWed in one reloc tree,
6513 * we can use the resulting new block directly when the same block is
6514 * required to COW again through other reloc trees. By this way, relocated
6515 * tree blocks are shared between reloc trees, so they are also shared
6518 static noinline
int relocate_one_path(struct btrfs_trans_handle
*trans
,
6519 struct btrfs_root
*root
,
6520 struct btrfs_path
*path
,
6521 struct btrfs_key
*first_key
,
6522 struct btrfs_ref_path
*ref_path
,
6523 struct btrfs_block_group_cache
*group
,
6524 struct inode
*reloc_inode
)
6526 struct btrfs_root
*reloc_root
;
6527 struct extent_buffer
*eb
= NULL
;
6528 struct btrfs_key
*keys
;
6532 int lowest_level
= 0;
6535 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
6536 lowest_level
= ref_path
->owner_objectid
;
6538 if (!root
->ref_cows
) {
6539 path
->lowest_level
= lowest_level
;
6540 ret
= btrfs_search_slot(trans
, root
, first_key
, path
, 0, 1);
6542 path
->lowest_level
= 0;
6543 btrfs_release_path(root
, path
);
6547 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
6548 ret
= init_reloc_tree(trans
, root
);
6550 reloc_root
= root
->reloc_root
;
6552 shared_level
= ref_path
->shared_level
;
6553 ref_path
->shared_level
= BTRFS_MAX_LEVEL
- 1;
6555 keys
= ref_path
->node_keys
;
6556 nodes
= ref_path
->new_nodes
;
6557 memset(&keys
[shared_level
+ 1], 0,
6558 sizeof(*keys
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
6559 memset(&nodes
[shared_level
+ 1], 0,
6560 sizeof(*nodes
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
6562 if (nodes
[lowest_level
] == 0) {
6563 path
->lowest_level
= lowest_level
;
6564 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
6567 for (level
= lowest_level
; level
< BTRFS_MAX_LEVEL
; level
++) {
6568 eb
= path
->nodes
[level
];
6569 if (!eb
|| eb
== reloc_root
->node
)
6571 nodes
[level
] = eb
->start
;
6573 btrfs_item_key_to_cpu(eb
, &keys
[level
], 0);
6575 btrfs_node_key_to_cpu(eb
, &keys
[level
], 0);
6578 ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6579 eb
= path
->nodes
[0];
6580 ret
= replace_extents_in_leaf(trans
, reloc_root
, eb
,
6581 group
, reloc_inode
);
6584 btrfs_release_path(reloc_root
, path
);
6586 ret
= btrfs_merge_path(trans
, reloc_root
, keys
, nodes
,
6592 * replace tree blocks in the fs tree with tree blocks in
6595 ret
= btrfs_merge_path(trans
, root
, keys
, nodes
, lowest_level
);
6598 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6599 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
6602 extent_buffer_get(path
->nodes
[0]);
6603 eb
= path
->nodes
[0];
6604 btrfs_release_path(reloc_root
, path
);
6605 ret
= invalidate_extent_cache(reloc_root
, eb
, group
, root
);
6607 free_extent_buffer(eb
);
6610 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
6611 path
->lowest_level
= 0;
6615 static noinline
int relocate_tree_block(struct btrfs_trans_handle
*trans
,
6616 struct btrfs_root
*root
,
6617 struct btrfs_path
*path
,
6618 struct btrfs_key
*first_key
,
6619 struct btrfs_ref_path
*ref_path
)
6623 ret
= relocate_one_path(trans
, root
, path
, first_key
,
6624 ref_path
, NULL
, NULL
);
6630 static noinline
int del_extent_zero(struct btrfs_trans_handle
*trans
,
6631 struct btrfs_root
*extent_root
,
6632 struct btrfs_path
*path
,
6633 struct btrfs_key
*extent_key
)
6637 ret
= btrfs_search_slot(trans
, extent_root
, extent_key
, path
, -1, 1);
6640 ret
= btrfs_del_item(trans
, extent_root
, path
);
6642 btrfs_release_path(extent_root
, path
);
6646 static noinline
struct btrfs_root
*read_ref_root(struct btrfs_fs_info
*fs_info
,
6647 struct btrfs_ref_path
*ref_path
)
6649 struct btrfs_key root_key
;
6651 root_key
.objectid
= ref_path
->root_objectid
;
6652 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
6653 if (is_cowonly_root(ref_path
->root_objectid
))
6654 root_key
.offset
= 0;
6656 root_key
.offset
= (u64
)-1;
6658 return btrfs_read_fs_root_no_name(fs_info
, &root_key
);
6661 static noinline
int relocate_one_extent(struct btrfs_root
*extent_root
,
6662 struct btrfs_path
*path
,
6663 struct btrfs_key
*extent_key
,
6664 struct btrfs_block_group_cache
*group
,
6665 struct inode
*reloc_inode
, int pass
)
6667 struct btrfs_trans_handle
*trans
;
6668 struct btrfs_root
*found_root
;
6669 struct btrfs_ref_path
*ref_path
= NULL
;
6670 struct disk_extent
*new_extents
= NULL
;
6675 struct btrfs_key first_key
;
6679 trans
= btrfs_start_transaction(extent_root
, 1);
6682 if (extent_key
->objectid
== 0) {
6683 ret
= del_extent_zero(trans
, extent_root
, path
, extent_key
);
6687 ref_path
= kmalloc(sizeof(*ref_path
), GFP_NOFS
);
6693 for (loops
= 0; ; loops
++) {
6695 ret
= btrfs_first_ref_path(trans
, extent_root
, ref_path
,
6696 extent_key
->objectid
);
6698 ret
= btrfs_next_ref_path(trans
, extent_root
, ref_path
);
6705 if (ref_path
->root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
6706 ref_path
->root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
6709 found_root
= read_ref_root(extent_root
->fs_info
, ref_path
);
6710 BUG_ON(!found_root
);
6712 * for reference counted tree, only process reference paths
6713 * rooted at the latest committed root.
6715 if (found_root
->ref_cows
&&
6716 ref_path
->root_generation
!= found_root
->root_key
.offset
)
6719 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6722 * copy data extents to new locations
6724 u64 group_start
= group
->key
.objectid
;
6725 ret
= relocate_data_extent(reloc_inode
,
6734 level
= ref_path
->owner_objectid
;
6737 if (prev_block
!= ref_path
->nodes
[level
]) {
6738 struct extent_buffer
*eb
;
6739 u64 block_start
= ref_path
->nodes
[level
];
6740 u64 block_size
= btrfs_level_size(found_root
, level
);
6742 eb
= read_tree_block(found_root
, block_start
,
6744 btrfs_tree_lock(eb
);
6745 BUG_ON(level
!= btrfs_header_level(eb
));
6748 btrfs_item_key_to_cpu(eb
, &first_key
, 0);
6750 btrfs_node_key_to_cpu(eb
, &first_key
, 0);
6752 btrfs_tree_unlock(eb
);
6753 free_extent_buffer(eb
);
6754 prev_block
= block_start
;
6757 mutex_lock(&extent_root
->fs_info
->trans_mutex
);
6758 btrfs_record_root_in_trans(found_root
);
6759 mutex_unlock(&extent_root
->fs_info
->trans_mutex
);
6760 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
6762 * try to update data extent references while
6763 * keeping metadata shared between snapshots.
6766 ret
= relocate_one_path(trans
, found_root
,
6767 path
, &first_key
, ref_path
,
6768 group
, reloc_inode
);
6774 * use fallback method to process the remaining
6778 u64 group_start
= group
->key
.objectid
;
6779 new_extents
= kmalloc(sizeof(*new_extents
),
6782 ret
= get_new_locations(reloc_inode
,
6790 ret
= replace_one_extent(trans
, found_root
,
6792 &first_key
, ref_path
,
6793 new_extents
, nr_extents
);
6795 ret
= relocate_tree_block(trans
, found_root
, path
,
6796 &first_key
, ref_path
);
6803 btrfs_end_transaction(trans
, extent_root
);
6810 static u64
update_block_group_flags(struct btrfs_root
*root
, u64 flags
)
6813 u64 stripped
= BTRFS_BLOCK_GROUP_RAID0
|
6814 BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID10
;
6816 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
6817 if (num_devices
== 1) {
6818 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
6819 stripped
= flags
& ~stripped
;
6821 /* turn raid0 into single device chunks */
6822 if (flags
& BTRFS_BLOCK_GROUP_RAID0
)
6825 /* turn mirroring into duplication */
6826 if (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
6827 BTRFS_BLOCK_GROUP_RAID10
))
6828 return stripped
| BTRFS_BLOCK_GROUP_DUP
;
6831 /* they already had raid on here, just return */
6832 if (flags
& stripped
)
6835 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
6836 stripped
= flags
& ~stripped
;
6838 /* switch duplicated blocks with raid1 */
6839 if (flags
& BTRFS_BLOCK_GROUP_DUP
)
6840 return stripped
| BTRFS_BLOCK_GROUP_RAID1
;
6842 /* turn single device chunks into raid0 */
6843 return stripped
| BTRFS_BLOCK_GROUP_RAID0
;
6848 static int __alloc_chunk_for_shrink(struct btrfs_root
*root
,
6849 struct btrfs_block_group_cache
*shrink_block_group
,
6852 struct btrfs_trans_handle
*trans
;
6853 u64 new_alloc_flags
;
6856 spin_lock(&shrink_block_group
->lock
);
6857 if (btrfs_block_group_used(&shrink_block_group
->item
) +
6858 shrink_block_group
->reserved
> 0) {
6859 spin_unlock(&shrink_block_group
->lock
);
6861 trans
= btrfs_start_transaction(root
, 1);
6862 spin_lock(&shrink_block_group
->lock
);
6864 new_alloc_flags
= update_block_group_flags(root
,
6865 shrink_block_group
->flags
);
6866 if (new_alloc_flags
!= shrink_block_group
->flags
) {
6868 btrfs_block_group_used(&shrink_block_group
->item
);
6870 calc
= shrink_block_group
->key
.offset
;
6872 spin_unlock(&shrink_block_group
->lock
);
6874 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
6875 calc
+ 2 * 1024 * 1024, new_alloc_flags
, force
);
6877 btrfs_end_transaction(trans
, root
);
6879 spin_unlock(&shrink_block_group
->lock
);
6884 int btrfs_prepare_block_group_relocation(struct btrfs_root
*root
,
6885 struct btrfs_block_group_cache
*group
)
6888 __alloc_chunk_for_shrink(root
, group
, 1);
6889 set_block_group_readonly(group
);
6894 static int __insert_orphan_inode(struct btrfs_trans_handle
*trans
,
6895 struct btrfs_root
*root
,
6896 u64 objectid
, u64 size
)
6898 struct btrfs_path
*path
;
6899 struct btrfs_inode_item
*item
;
6900 struct extent_buffer
*leaf
;
6903 path
= btrfs_alloc_path();
6907 path
->leave_spinning
= 1;
6908 ret
= btrfs_insert_empty_inode(trans
, root
, path
, objectid
);
6912 leaf
= path
->nodes
[0];
6913 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_inode_item
);
6914 memset_extent_buffer(leaf
, 0, (unsigned long)item
, sizeof(*item
));
6915 btrfs_set_inode_generation(leaf
, item
, 1);
6916 btrfs_set_inode_size(leaf
, item
, size
);
6917 btrfs_set_inode_mode(leaf
, item
, S_IFREG
| 0600);
6918 btrfs_set_inode_flags(leaf
, item
, BTRFS_INODE_NOCOMPRESS
);
6919 btrfs_mark_buffer_dirty(leaf
);
6920 btrfs_release_path(root
, path
);
6922 btrfs_free_path(path
);
6926 static noinline
struct inode
*create_reloc_inode(struct btrfs_fs_info
*fs_info
,
6927 struct btrfs_block_group_cache
*group
)
6929 struct inode
*inode
= NULL
;
6930 struct btrfs_trans_handle
*trans
;
6931 struct btrfs_root
*root
;
6932 struct btrfs_key root_key
;
6933 u64 objectid
= BTRFS_FIRST_FREE_OBJECTID
;
6936 root_key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
6937 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
6938 root_key
.offset
= (u64
)-1;
6939 root
= btrfs_read_fs_root_no_name(fs_info
, &root_key
);
6941 return ERR_CAST(root
);
6943 trans
= btrfs_start_transaction(root
, 1);
6946 err
= btrfs_find_free_objectid(trans
, root
, objectid
, &objectid
);
6950 err
= __insert_orphan_inode(trans
, root
, objectid
, group
->key
.offset
);
6953 err
= btrfs_insert_file_extent(trans
, root
, objectid
, 0, 0, 0,
6954 group
->key
.offset
, 0, group
->key
.offset
,
6958 inode
= btrfs_iget_locked(root
->fs_info
->sb
, objectid
, root
);
6959 if (inode
->i_state
& I_NEW
) {
6960 BTRFS_I(inode
)->root
= root
;
6961 BTRFS_I(inode
)->location
.objectid
= objectid
;
6962 BTRFS_I(inode
)->location
.type
= BTRFS_INODE_ITEM_KEY
;
6963 BTRFS_I(inode
)->location
.offset
= 0;
6964 btrfs_read_locked_inode(inode
);
6965 unlock_new_inode(inode
);
6966 BUG_ON(is_bad_inode(inode
));
6970 BTRFS_I(inode
)->index_cnt
= group
->key
.objectid
;
6972 err
= btrfs_orphan_add(trans
, inode
);
6974 btrfs_end_transaction(trans
, root
);
6978 inode
= ERR_PTR(err
);
6983 int btrfs_reloc_clone_csums(struct inode
*inode
, u64 file_pos
, u64 len
)
6986 struct btrfs_ordered_sum
*sums
;
6987 struct btrfs_sector_sum
*sector_sum
;
6988 struct btrfs_ordered_extent
*ordered
;
6989 struct btrfs_root
*root
= BTRFS_I(inode
)->root
;
6990 struct list_head list
;
6995 INIT_LIST_HEAD(&list
);
6997 ordered
= btrfs_lookup_ordered_extent(inode
, file_pos
);
6998 BUG_ON(ordered
->file_offset
!= file_pos
|| ordered
->len
!= len
);
7000 disk_bytenr
= file_pos
+ BTRFS_I(inode
)->index_cnt
;
7001 ret
= btrfs_lookup_csums_range(root
->fs_info
->csum_root
, disk_bytenr
,
7002 disk_bytenr
+ len
- 1, &list
);
7004 while (!list_empty(&list
)) {
7005 sums
= list_entry(list
.next
, struct btrfs_ordered_sum
, list
);
7006 list_del_init(&sums
->list
);
7008 sector_sum
= sums
->sums
;
7009 sums
->bytenr
= ordered
->start
;
7012 while (offset
< sums
->len
) {
7013 sector_sum
->bytenr
+= ordered
->start
- disk_bytenr
;
7015 offset
+= root
->sectorsize
;
7018 btrfs_add_ordered_sum(inode
, ordered
, sums
);
7020 btrfs_put_ordered_extent(ordered
);
7024 int btrfs_relocate_block_group(struct btrfs_root
*root
, u64 group_start
)
7026 struct btrfs_trans_handle
*trans
;
7027 struct btrfs_path
*path
;
7028 struct btrfs_fs_info
*info
= root
->fs_info
;
7029 struct extent_buffer
*leaf
;
7030 struct inode
*reloc_inode
;
7031 struct btrfs_block_group_cache
*block_group
;
7032 struct btrfs_key key
;
7041 root
= root
->fs_info
->extent_root
;
7043 block_group
= btrfs_lookup_block_group(info
, group_start
);
7044 BUG_ON(!block_group
);
7046 printk(KERN_INFO
"btrfs relocating block group %llu flags %llu\n",
7047 (unsigned long long)block_group
->key
.objectid
,
7048 (unsigned long long)block_group
->flags
);
7050 path
= btrfs_alloc_path();
7053 reloc_inode
= create_reloc_inode(info
, block_group
);
7054 BUG_ON(IS_ERR(reloc_inode
));
7056 __alloc_chunk_for_shrink(root
, block_group
, 1);
7057 set_block_group_readonly(block_group
);
7059 btrfs_start_delalloc_inodes(info
->tree_root
);
7060 btrfs_wait_ordered_extents(info
->tree_root
, 0);
7065 key
.objectid
= block_group
->key
.objectid
;
7068 cur_byte
= key
.objectid
;
7070 trans
= btrfs_start_transaction(info
->tree_root
, 1);
7071 btrfs_commit_transaction(trans
, info
->tree_root
);
7073 mutex_lock(&root
->fs_info
->cleaner_mutex
);
7074 btrfs_clean_old_snapshots(info
->tree_root
);
7075 btrfs_remove_leaf_refs(info
->tree_root
, (u64
)-1, 1);
7076 mutex_unlock(&root
->fs_info
->cleaner_mutex
);
7078 trans
= btrfs_start_transaction(info
->tree_root
, 1);
7079 btrfs_commit_transaction(trans
, info
->tree_root
);
7082 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
7086 leaf
= path
->nodes
[0];
7087 nritems
= btrfs_header_nritems(leaf
);
7088 if (path
->slots
[0] >= nritems
) {
7089 ret
= btrfs_next_leaf(root
, path
);
7096 leaf
= path
->nodes
[0];
7097 nritems
= btrfs_header_nritems(leaf
);
7100 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
7102 if (key
.objectid
>= block_group
->key
.objectid
+
7103 block_group
->key
.offset
)
7106 if (progress
&& need_resched()) {
7107 btrfs_release_path(root
, path
);
7114 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
||
7115 key
.objectid
+ key
.offset
<= cur_byte
) {
7121 cur_byte
= key
.objectid
+ key
.offset
;
7122 btrfs_release_path(root
, path
);
7124 __alloc_chunk_for_shrink(root
, block_group
, 0);
7125 ret
= relocate_one_extent(root
, path
, &key
, block_group
,
7131 key
.objectid
= cur_byte
;
7136 btrfs_release_path(root
, path
);
7139 btrfs_wait_ordered_range(reloc_inode
, 0, (u64
)-1);
7140 invalidate_mapping_pages(reloc_inode
->i_mapping
, 0, -1);
7143 if (total_found
> 0) {
7144 printk(KERN_INFO
"btrfs found %llu extents in pass %d\n",
7145 (unsigned long long)total_found
, pass
);
7147 if (total_found
== skipped
&& pass
> 2) {
7149 reloc_inode
= create_reloc_inode(info
, block_group
);
7155 /* delete reloc_inode */
7158 /* unpin extents in this range */
7159 trans
= btrfs_start_transaction(info
->tree_root
, 1);
7160 btrfs_commit_transaction(trans
, info
->tree_root
);
7162 spin_lock(&block_group
->lock
);
7163 WARN_ON(block_group
->pinned
> 0);
7164 WARN_ON(block_group
->reserved
> 0);
7165 WARN_ON(btrfs_block_group_used(&block_group
->item
) > 0);
7166 spin_unlock(&block_group
->lock
);
7167 btrfs_put_block_group(block_group
);
7170 btrfs_free_path(path
);
7175 static int find_first_block_group(struct btrfs_root
*root
,
7176 struct btrfs_path
*path
, struct btrfs_key
*key
)
7179 struct btrfs_key found_key
;
7180 struct extent_buffer
*leaf
;
7183 ret
= btrfs_search_slot(NULL
, root
, key
, path
, 0, 0);
7188 slot
= path
->slots
[0];
7189 leaf
= path
->nodes
[0];
7190 if (slot
>= btrfs_header_nritems(leaf
)) {
7191 ret
= btrfs_next_leaf(root
, path
);
7198 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
7200 if (found_key
.objectid
>= key
->objectid
&&
7201 found_key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
7212 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
7214 struct btrfs_block_group_cache
*block_group
;
7215 struct btrfs_space_info
*space_info
;
7218 spin_lock(&info
->block_group_cache_lock
);
7219 while ((n
= rb_last(&info
->block_group_cache_tree
)) != NULL
) {
7220 block_group
= rb_entry(n
, struct btrfs_block_group_cache
,
7222 rb_erase(&block_group
->cache_node
,
7223 &info
->block_group_cache_tree
);
7224 spin_unlock(&info
->block_group_cache_lock
);
7226 down_write(&block_group
->space_info
->groups_sem
);
7227 list_del(&block_group
->list
);
7228 up_write(&block_group
->space_info
->groups_sem
);
7230 if (block_group
->cached
== BTRFS_CACHE_STARTED
)
7231 wait_event(block_group
->caching_q
,
7232 block_group_cache_done(block_group
));
7234 btrfs_remove_free_space_cache(block_group
);
7236 WARN_ON(atomic_read(&block_group
->count
) != 1);
7239 spin_lock(&info
->block_group_cache_lock
);
7241 spin_unlock(&info
->block_group_cache_lock
);
7243 /* now that all the block groups are freed, go through and
7244 * free all the space_info structs. This is only called during
7245 * the final stages of unmount, and so we know nobody is
7246 * using them. We call synchronize_rcu() once before we start,
7247 * just to be on the safe side.
7251 while(!list_empty(&info
->space_info
)) {
7252 space_info
= list_entry(info
->space_info
.next
,
7253 struct btrfs_space_info
,
7256 list_del(&space_info
->list
);
7262 int btrfs_read_block_groups(struct btrfs_root
*root
)
7264 struct btrfs_path
*path
;
7266 struct btrfs_block_group_cache
*cache
;
7267 struct btrfs_fs_info
*info
= root
->fs_info
;
7268 struct btrfs_space_info
*space_info
;
7269 struct btrfs_key key
;
7270 struct btrfs_key found_key
;
7271 struct extent_buffer
*leaf
;
7273 root
= info
->extent_root
;
7276 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
7277 path
= btrfs_alloc_path();
7282 ret
= find_first_block_group(root
, path
, &key
);
7290 leaf
= path
->nodes
[0];
7291 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
7292 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
7298 atomic_set(&cache
->count
, 1);
7299 spin_lock_init(&cache
->lock
);
7300 spin_lock_init(&cache
->tree_lock
);
7301 cache
->fs_info
= info
;
7302 init_waitqueue_head(&cache
->caching_q
);
7303 INIT_LIST_HEAD(&cache
->list
);
7304 INIT_LIST_HEAD(&cache
->cluster_list
);
7307 * we only want to have 32k of ram per block group for keeping
7308 * track of free space, and if we pass 1/2 of that we want to
7309 * start converting things over to using bitmaps
7311 cache
->extents_thresh
= ((1024 * 32) / 2) /
7312 sizeof(struct btrfs_free_space
);
7314 read_extent_buffer(leaf
, &cache
->item
,
7315 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
7316 sizeof(cache
->item
));
7317 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
7319 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
7320 btrfs_release_path(root
, path
);
7321 cache
->flags
= btrfs_block_group_flags(&cache
->item
);
7322 cache
->sectorsize
= root
->sectorsize
;
7324 remove_sb_from_cache(root
, cache
);
7327 * check for two cases, either we are full, and therefore
7328 * don't need to bother with the caching work since we won't
7329 * find any space, or we are empty, and we can just add all
7330 * the space in and be done with it. This saves us _alot_ of
7331 * time, particularly in the full case.
7333 if (found_key
.offset
== btrfs_block_group_used(&cache
->item
)) {
7334 cache
->cached
= BTRFS_CACHE_FINISHED
;
7335 } else if (btrfs_block_group_used(&cache
->item
) == 0) {
7336 cache
->cached
= BTRFS_CACHE_FINISHED
;
7337 add_new_free_space(cache
, root
->fs_info
,
7339 found_key
.objectid
+
7343 ret
= update_space_info(info
, cache
->flags
, found_key
.offset
,
7344 btrfs_block_group_used(&cache
->item
),
7347 cache
->space_info
= space_info
;
7348 down_write(&space_info
->groups_sem
);
7349 list_add_tail(&cache
->list
, &space_info
->block_groups
);
7350 up_write(&space_info
->groups_sem
);
7352 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
7355 set_avail_alloc_bits(root
->fs_info
, cache
->flags
);
7356 if (btrfs_chunk_readonly(root
, cache
->key
.objectid
))
7357 set_block_group_readonly(cache
);
7361 btrfs_free_path(path
);
7365 int btrfs_make_block_group(struct btrfs_trans_handle
*trans
,
7366 struct btrfs_root
*root
, u64 bytes_used
,
7367 u64 type
, u64 chunk_objectid
, u64 chunk_offset
,
7371 struct btrfs_root
*extent_root
;
7372 struct btrfs_block_group_cache
*cache
;
7374 extent_root
= root
->fs_info
->extent_root
;
7376 root
->fs_info
->last_trans_log_full_commit
= trans
->transid
;
7378 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
7382 cache
->key
.objectid
= chunk_offset
;
7383 cache
->key
.offset
= size
;
7384 cache
->key
.type
= BTRFS_BLOCK_GROUP_ITEM_KEY
;
7385 cache
->sectorsize
= root
->sectorsize
;
7388 * we only want to have 32k of ram per block group for keeping track
7389 * of free space, and if we pass 1/2 of that we want to start
7390 * converting things over to using bitmaps
7392 cache
->extents_thresh
= ((1024 * 32) / 2) /
7393 sizeof(struct btrfs_free_space
);
7394 atomic_set(&cache
->count
, 1);
7395 spin_lock_init(&cache
->lock
);
7396 spin_lock_init(&cache
->tree_lock
);
7397 init_waitqueue_head(&cache
->caching_q
);
7398 INIT_LIST_HEAD(&cache
->list
);
7399 INIT_LIST_HEAD(&cache
->cluster_list
);
7401 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
7402 btrfs_set_block_group_chunk_objectid(&cache
->item
, chunk_objectid
);
7403 cache
->flags
= type
;
7404 btrfs_set_block_group_flags(&cache
->item
, type
);
7406 cache
->cached
= BTRFS_CACHE_FINISHED
;
7407 remove_sb_from_cache(root
, cache
);
7409 add_new_free_space(cache
, root
->fs_info
, chunk_offset
,
7410 chunk_offset
+ size
);
7412 ret
= update_space_info(root
->fs_info
, cache
->flags
, size
, bytes_used
,
7413 &cache
->space_info
);
7415 down_write(&cache
->space_info
->groups_sem
);
7416 list_add_tail(&cache
->list
, &cache
->space_info
->block_groups
);
7417 up_write(&cache
->space_info
->groups_sem
);
7419 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
7422 ret
= btrfs_insert_item(trans
, extent_root
, &cache
->key
, &cache
->item
,
7423 sizeof(cache
->item
));
7426 set_avail_alloc_bits(extent_root
->fs_info
, type
);
7431 int btrfs_remove_block_group(struct btrfs_trans_handle
*trans
,
7432 struct btrfs_root
*root
, u64 group_start
)
7434 struct btrfs_path
*path
;
7435 struct btrfs_block_group_cache
*block_group
;
7436 struct btrfs_free_cluster
*cluster
;
7437 struct btrfs_key key
;
7440 root
= root
->fs_info
->extent_root
;
7442 block_group
= btrfs_lookup_block_group(root
->fs_info
, group_start
);
7443 BUG_ON(!block_group
);
7444 BUG_ON(!block_group
->ro
);
7446 memcpy(&key
, &block_group
->key
, sizeof(key
));
7448 /* make sure this block group isn't part of an allocation cluster */
7449 cluster
= &root
->fs_info
->data_alloc_cluster
;
7450 spin_lock(&cluster
->refill_lock
);
7451 btrfs_return_cluster_to_free_space(block_group
, cluster
);
7452 spin_unlock(&cluster
->refill_lock
);
7455 * make sure this block group isn't part of a metadata
7456 * allocation cluster
7458 cluster
= &root
->fs_info
->meta_alloc_cluster
;
7459 spin_lock(&cluster
->refill_lock
);
7460 btrfs_return_cluster_to_free_space(block_group
, cluster
);
7461 spin_unlock(&cluster
->refill_lock
);
7463 path
= btrfs_alloc_path();
7466 spin_lock(&root
->fs_info
->block_group_cache_lock
);
7467 rb_erase(&block_group
->cache_node
,
7468 &root
->fs_info
->block_group_cache_tree
);
7469 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
7471 down_write(&block_group
->space_info
->groups_sem
);
7473 * we must use list_del_init so people can check to see if they
7474 * are still on the list after taking the semaphore
7476 list_del_init(&block_group
->list
);
7477 up_write(&block_group
->space_info
->groups_sem
);
7479 if (block_group
->cached
== BTRFS_CACHE_STARTED
)
7480 wait_event(block_group
->caching_q
,
7481 block_group_cache_done(block_group
));
7483 btrfs_remove_free_space_cache(block_group
);
7485 spin_lock(&block_group
->space_info
->lock
);
7486 block_group
->space_info
->total_bytes
-= block_group
->key
.offset
;
7487 block_group
->space_info
->bytes_readonly
-= block_group
->key
.offset
;
7488 spin_unlock(&block_group
->space_info
->lock
);
7489 block_group
->space_info
->full
= 0;
7491 btrfs_put_block_group(block_group
);
7492 btrfs_put_block_group(block_group
);
7494 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
7500 ret
= btrfs_del_item(trans
, root
, path
);
7502 btrfs_free_path(path
);