2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
26 #include "print-tree.h"
27 #include "transaction.h"
30 #include "ref-cache.h"
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
36 struct pending_extent_op
{
47 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
48 btrfs_root
*extent_root
);
49 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
50 btrfs_root
*extent_root
);
51 static struct btrfs_block_group_cache
*
52 __btrfs_find_block_group(struct btrfs_root
*root
,
53 struct btrfs_block_group_cache
*hint
,
54 u64 search_start
, int data
, int owner
);
56 void maybe_lock_mutex(struct btrfs_root
*root
)
58 if (root
!= root
->fs_info
->extent_root
&&
59 root
!= root
->fs_info
->chunk_root
&&
60 root
!= root
->fs_info
->dev_root
) {
61 mutex_lock(&root
->fs_info
->alloc_mutex
);
65 void maybe_unlock_mutex(struct btrfs_root
*root
)
67 if (root
!= root
->fs_info
->extent_root
&&
68 root
!= root
->fs_info
->chunk_root
&&
69 root
!= root
->fs_info
->dev_root
) {
70 mutex_unlock(&root
->fs_info
->alloc_mutex
);
74 static int block_group_bits(struct btrfs_block_group_cache
*cache
, u64 bits
)
76 return (cache
->flags
& bits
) == bits
;
80 * this adds the block group to the fs_info rb tree for the block group
83 int btrfs_add_block_group_cache(struct btrfs_fs_info
*info
,
84 struct btrfs_block_group_cache
*block_group
)
87 struct rb_node
*parent
= NULL
;
88 struct btrfs_block_group_cache
*cache
;
90 spin_lock(&info
->block_group_cache_lock
);
91 p
= &info
->block_group_cache_tree
.rb_node
;
95 cache
= rb_entry(parent
, struct btrfs_block_group_cache
,
97 if (block_group
->key
.objectid
< cache
->key
.objectid
) {
99 } else if (block_group
->key
.objectid
> cache
->key
.objectid
) {
102 spin_unlock(&info
->block_group_cache_lock
);
107 rb_link_node(&block_group
->cache_node
, parent
, p
);
108 rb_insert_color(&block_group
->cache_node
,
109 &info
->block_group_cache_tree
);
110 spin_unlock(&info
->block_group_cache_lock
);
116 * This will return the block group at or after bytenr if contains is 0, else
117 * it will return the block group that contains the bytenr
119 static struct btrfs_block_group_cache
*
120 block_group_cache_tree_search(struct btrfs_fs_info
*info
, u64 bytenr
,
123 struct btrfs_block_group_cache
*cache
, *ret
= NULL
;
127 spin_lock(&info
->block_group_cache_lock
);
128 n
= info
->block_group_cache_tree
.rb_node
;
131 cache
= rb_entry(n
, struct btrfs_block_group_cache
,
133 end
= cache
->key
.objectid
+ cache
->key
.offset
- 1;
134 start
= cache
->key
.objectid
;
136 if (bytenr
< start
) {
137 if (!contains
&& (!ret
|| start
< ret
->key
.objectid
))
140 } else if (bytenr
> start
) {
141 if (contains
&& bytenr
<= end
) {
151 spin_unlock(&info
->block_group_cache_lock
);
157 * this is only called by cache_block_group, since we could have freed extents
158 * we need to check the pinned_extents for any extents that can't be used yet
159 * since their free space will be released as soon as the transaction commits.
161 static int add_new_free_space(struct btrfs_block_group_cache
*block_group
,
162 struct btrfs_fs_info
*info
, u64 start
, u64 end
)
164 u64 extent_start
, extent_end
, size
;
167 while (start
< end
) {
168 ret
= find_first_extent_bit(&info
->pinned_extents
, start
,
169 &extent_start
, &extent_end
,
174 if (extent_start
== start
) {
175 start
= extent_end
+ 1;
176 } else if (extent_start
> start
&& extent_start
< end
) {
177 size
= extent_start
- start
;
178 ret
= btrfs_add_free_space(block_group
, start
, size
);
180 start
= extent_end
+ 1;
188 ret
= btrfs_add_free_space(block_group
, start
, size
);
195 static int cache_block_group(struct btrfs_root
*root
,
196 struct btrfs_block_group_cache
*block_group
)
198 struct btrfs_path
*path
;
200 struct btrfs_key key
;
201 struct extent_buffer
*leaf
;
210 root
= root
->fs_info
->extent_root
;
212 if (block_group
->cached
)
215 path
= btrfs_alloc_path();
221 * we get into deadlocks with paths held by callers of this function.
222 * since the alloc_mutex is protecting things right now, just
223 * skip the locking here
225 path
->skip_locking
= 1;
226 first_free
= max_t(u64
, block_group
->key
.objectid
,
227 BTRFS_SUPER_INFO_OFFSET
+ BTRFS_SUPER_INFO_SIZE
);
228 key
.objectid
= block_group
->key
.objectid
;
230 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
231 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
234 ret
= btrfs_previous_item(root
, path
, 0, BTRFS_EXTENT_ITEM_KEY
);
238 leaf
= path
->nodes
[0];
239 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
240 if (key
.objectid
+ key
.offset
> first_free
)
241 first_free
= key
.objectid
+ key
.offset
;
244 leaf
= path
->nodes
[0];
245 slot
= path
->slots
[0];
246 if (slot
>= btrfs_header_nritems(leaf
)) {
247 ret
= btrfs_next_leaf(root
, path
);
255 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
256 if (key
.objectid
< block_group
->key
.objectid
)
259 if (key
.objectid
>= block_group
->key
.objectid
+
260 block_group
->key
.offset
)
263 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
269 add_new_free_space(block_group
, root
->fs_info
, last
,
272 last
= key
.objectid
+ key
.offset
;
281 add_new_free_space(block_group
, root
->fs_info
, last
,
282 block_group
->key
.objectid
+
283 block_group
->key
.offset
);
285 block_group
->cached
= 1;
288 btrfs_free_path(path
);
293 * return the block group that starts at or after bytenr
295 struct btrfs_block_group_cache
*btrfs_lookup_first_block_group(struct
299 struct btrfs_block_group_cache
*cache
;
301 cache
= block_group_cache_tree_search(info
, bytenr
, 0);
307 * return the block group that contains teh given bytenr
309 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
313 struct btrfs_block_group_cache
*cache
;
315 cache
= block_group_cache_tree_search(info
, bytenr
, 1);
320 static struct btrfs_space_info
*__find_space_info(struct btrfs_fs_info
*info
,
323 struct list_head
*head
= &info
->space_info
;
324 struct list_head
*cur
;
325 struct btrfs_space_info
*found
;
326 list_for_each(cur
, head
) {
327 found
= list_entry(cur
, struct btrfs_space_info
, list
);
328 if (found
->flags
== flags
)
334 static u64
div_factor(u64 num
, int factor
)
343 static struct btrfs_block_group_cache
*
344 __btrfs_find_block_group(struct btrfs_root
*root
,
345 struct btrfs_block_group_cache
*hint
,
346 u64 search_start
, int data
, int owner
)
348 struct btrfs_block_group_cache
*cache
;
349 struct btrfs_block_group_cache
*found_group
= NULL
;
350 struct btrfs_fs_info
*info
= root
->fs_info
;
358 if (data
& BTRFS_BLOCK_GROUP_METADATA
)
362 struct btrfs_block_group_cache
*shint
;
363 shint
= btrfs_lookup_first_block_group(info
, search_start
);
364 if (shint
&& block_group_bits(shint
, data
) && !shint
->ro
) {
365 spin_lock(&shint
->lock
);
366 used
= btrfs_block_group_used(&shint
->item
);
367 if (used
+ shint
->pinned
+ shint
->reserved
<
368 div_factor(shint
->key
.offset
, factor
)) {
369 spin_unlock(&shint
->lock
);
372 spin_unlock(&shint
->lock
);
375 if (hint
&& !hint
->ro
&& block_group_bits(hint
, data
)) {
376 spin_lock(&hint
->lock
);
377 used
= btrfs_block_group_used(&hint
->item
);
378 if (used
+ hint
->pinned
+ hint
->reserved
<
379 div_factor(hint
->key
.offset
, factor
)) {
380 spin_unlock(&hint
->lock
);
383 spin_unlock(&hint
->lock
);
384 last
= hint
->key
.objectid
+ hint
->key
.offset
;
387 last
= max(hint
->key
.objectid
, search_start
);
393 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
397 spin_lock(&cache
->lock
);
398 last
= cache
->key
.objectid
+ cache
->key
.offset
;
399 used
= btrfs_block_group_used(&cache
->item
);
401 if (!cache
->ro
&& block_group_bits(cache
, data
)) {
402 free_check
= div_factor(cache
->key
.offset
, factor
);
403 if (used
+ cache
->pinned
+ cache
->reserved
<
406 spin_unlock(&cache
->lock
);
410 spin_unlock(&cache
->lock
);
418 if (!full_search
&& factor
< 10) {
428 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
429 struct btrfs_block_group_cache
430 *hint
, u64 search_start
,
434 struct btrfs_block_group_cache
*ret
;
435 ret
= __btrfs_find_block_group(root
, hint
, search_start
, data
, owner
);
439 /* simple helper to search for an existing extent at a given offset */
440 int btrfs_lookup_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
443 struct btrfs_key key
;
444 struct btrfs_path
*path
;
446 path
= btrfs_alloc_path();
448 maybe_lock_mutex(root
);
449 key
.objectid
= start
;
451 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
452 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
454 maybe_unlock_mutex(root
);
455 btrfs_free_path(path
);
460 * Back reference rules. Back refs have three main goals:
462 * 1) differentiate between all holders of references to an extent so that
463 * when a reference is dropped we can make sure it was a valid reference
464 * before freeing the extent.
466 * 2) Provide enough information to quickly find the holders of an extent
467 * if we notice a given block is corrupted or bad.
469 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
470 * maintenance. This is actually the same as #2, but with a slightly
471 * different use case.
473 * File extents can be referenced by:
475 * - multiple snapshots, subvolumes, or different generations in one subvol
476 * - different files inside a single subvolume
477 * - different offsets inside a file (bookend extents in file.c)
479 * The extent ref structure has fields for:
481 * - Objectid of the subvolume root
482 * - Generation number of the tree holding the reference
483 * - objectid of the file holding the reference
484 * - number of references holding by parent node (alway 1 for tree blocks)
486 * Btree leaf may hold multiple references to a file extent. In most cases,
487 * these references are from same file and the corresponding offsets inside
488 * the file are close together.
490 * When a file extent is allocated the fields are filled in:
491 * (root_key.objectid, trans->transid, inode objectid, 1)
493 * When a leaf is cow'd new references are added for every file extent found
494 * in the leaf. It looks similar to the create case, but trans->transid will
495 * be different when the block is cow'd.
497 * (root_key.objectid, trans->transid, inode objectid,
498 * number of references in the leaf)
500 * When a file extent is removed either during snapshot deletion or
501 * file truncation, we find the corresponding back reference and check
502 * the following fields:
504 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
507 * Btree extents can be referenced by:
509 * - Different subvolumes
510 * - Different generations of the same subvolume
512 * When a tree block is created, back references are inserted:
514 * (root->root_key.objectid, trans->transid, level, 1)
516 * When a tree block is cow'd, new back references are added for all the
517 * blocks it points to. If the tree block isn't in reference counted root,
518 * the old back references are removed. These new back references are of
519 * the form (trans->transid will have increased since creation):
521 * (root->root_key.objectid, trans->transid, level, 1)
523 * When a backref is in deleting, the following fields are checked:
525 * if backref was for a tree root:
526 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
528 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
530 * Back Reference Key composing:
532 * The key objectid corresponds to the first byte in the extent, the key
533 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
534 * byte of parent extent. If a extent is tree root, the key offset is set
535 * to the key objectid.
538 static int noinline
lookup_extent_backref(struct btrfs_trans_handle
*trans
,
539 struct btrfs_root
*root
,
540 struct btrfs_path
*path
,
541 u64 bytenr
, u64 parent
,
542 u64 ref_root
, u64 ref_generation
,
543 u64 owner_objectid
, int del
)
545 struct btrfs_key key
;
546 struct btrfs_extent_ref
*ref
;
547 struct extent_buffer
*leaf
;
551 key
.objectid
= bytenr
;
552 key
.type
= BTRFS_EXTENT_REF_KEY
;
555 ret
= btrfs_search_slot(trans
, root
, &key
, path
, del
? -1 : 0, 1);
563 leaf
= path
->nodes
[0];
564 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
565 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
566 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
567 btrfs_ref_generation(leaf
, ref
) != ref_generation
||
568 (ref_objectid
!= owner_objectid
&&
569 ref_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
)) {
579 static int noinline
insert_extent_backref(struct btrfs_trans_handle
*trans
,
580 struct btrfs_root
*root
,
581 struct btrfs_path
*path
,
582 u64 bytenr
, u64 parent
,
583 u64 ref_root
, u64 ref_generation
,
586 struct btrfs_key key
;
587 struct extent_buffer
*leaf
;
588 struct btrfs_extent_ref
*ref
;
592 key
.objectid
= bytenr
;
593 key
.type
= BTRFS_EXTENT_REF_KEY
;
596 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*ref
));
598 leaf
= path
->nodes
[0];
599 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
600 struct btrfs_extent_ref
);
601 btrfs_set_ref_root(leaf
, ref
, ref_root
);
602 btrfs_set_ref_generation(leaf
, ref
, ref_generation
);
603 btrfs_set_ref_objectid(leaf
, ref
, owner_objectid
);
604 btrfs_set_ref_num_refs(leaf
, ref
, 1);
605 } else if (ret
== -EEXIST
) {
607 BUG_ON(owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
);
608 leaf
= path
->nodes
[0];
609 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
610 struct btrfs_extent_ref
);
611 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
612 btrfs_ref_generation(leaf
, ref
) != ref_generation
) {
618 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
619 BUG_ON(num_refs
== 0);
620 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
+ 1);
622 existing_owner
= btrfs_ref_objectid(leaf
, ref
);
623 if (existing_owner
!= owner_objectid
&&
624 existing_owner
!= BTRFS_MULTIPLE_OBJECTIDS
) {
625 btrfs_set_ref_objectid(leaf
, ref
,
626 BTRFS_MULTIPLE_OBJECTIDS
);
632 btrfs_mark_buffer_dirty(path
->nodes
[0]);
634 btrfs_release_path(root
, path
);
638 static int noinline
remove_extent_backref(struct btrfs_trans_handle
*trans
,
639 struct btrfs_root
*root
,
640 struct btrfs_path
*path
)
642 struct extent_buffer
*leaf
;
643 struct btrfs_extent_ref
*ref
;
647 leaf
= path
->nodes
[0];
648 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
649 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
650 BUG_ON(num_refs
== 0);
653 ret
= btrfs_del_item(trans
, root
, path
);
655 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
);
656 btrfs_mark_buffer_dirty(leaf
);
658 btrfs_release_path(root
, path
);
662 static int __btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
663 struct btrfs_root
*root
, u64 bytenr
,
664 u64 orig_parent
, u64 parent
,
665 u64 orig_root
, u64 ref_root
,
666 u64 orig_generation
, u64 ref_generation
,
670 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
671 struct btrfs_path
*path
;
673 if (root
== root
->fs_info
->extent_root
) {
674 struct pending_extent_op
*extent_op
;
677 BUG_ON(owner_objectid
>= BTRFS_MAX_LEVEL
);
678 num_bytes
= btrfs_level_size(root
, (int)owner_objectid
);
679 if (test_range_bit(&root
->fs_info
->extent_ins
, bytenr
,
680 bytenr
+ num_bytes
- 1, EXTENT_LOCKED
, 0)) {
682 ret
= get_state_private(&root
->fs_info
->extent_ins
,
685 extent_op
= (struct pending_extent_op
*)
687 BUG_ON(extent_op
->parent
!= orig_parent
);
688 BUG_ON(extent_op
->generation
!= orig_generation
);
689 extent_op
->parent
= parent
;
690 extent_op
->generation
= ref_generation
;
692 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
695 extent_op
->type
= PENDING_BACKREF_UPDATE
;
696 extent_op
->bytenr
= bytenr
;
697 extent_op
->num_bytes
= num_bytes
;
698 extent_op
->parent
= parent
;
699 extent_op
->orig_parent
= orig_parent
;
700 extent_op
->generation
= ref_generation
;
701 extent_op
->orig_generation
= orig_generation
;
702 extent_op
->level
= (int)owner_objectid
;
704 set_extent_bits(&root
->fs_info
->extent_ins
,
705 bytenr
, bytenr
+ num_bytes
- 1,
706 EXTENT_LOCKED
, GFP_NOFS
);
707 set_state_private(&root
->fs_info
->extent_ins
,
708 bytenr
, (unsigned long)extent_op
);
713 path
= btrfs_alloc_path();
716 ret
= lookup_extent_backref(trans
, extent_root
, path
,
717 bytenr
, orig_parent
, orig_root
,
718 orig_generation
, owner_objectid
, 1);
721 ret
= remove_extent_backref(trans
, extent_root
, path
);
724 ret
= insert_extent_backref(trans
, extent_root
, path
, bytenr
,
725 parent
, ref_root
, ref_generation
,
728 finish_current_insert(trans
, extent_root
);
729 del_pending_extents(trans
, extent_root
);
731 btrfs_free_path(path
);
735 int btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
736 struct btrfs_root
*root
, u64 bytenr
,
737 u64 orig_parent
, u64 parent
,
738 u64 ref_root
, u64 ref_generation
,
742 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
743 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
745 maybe_lock_mutex(root
);
746 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
, orig_parent
,
747 parent
, ref_root
, ref_root
,
748 ref_generation
, ref_generation
,
750 maybe_unlock_mutex(root
);
754 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
755 struct btrfs_root
*root
, u64 bytenr
,
756 u64 orig_parent
, u64 parent
,
757 u64 orig_root
, u64 ref_root
,
758 u64 orig_generation
, u64 ref_generation
,
761 struct btrfs_path
*path
;
763 struct btrfs_key key
;
764 struct extent_buffer
*l
;
765 struct btrfs_extent_item
*item
;
768 path
= btrfs_alloc_path();
773 key
.objectid
= bytenr
;
774 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
775 key
.offset
= (u64
)-1;
777 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
781 BUG_ON(ret
== 0 || path
->slots
[0] == 0);
786 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
787 BUG_ON(key
.objectid
!= bytenr
);
788 BUG_ON(key
.type
!= BTRFS_EXTENT_ITEM_KEY
);
790 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
791 refs
= btrfs_extent_refs(l
, item
);
792 btrfs_set_extent_refs(l
, item
, refs
+ 1);
793 btrfs_mark_buffer_dirty(path
->nodes
[0]);
795 btrfs_release_path(root
->fs_info
->extent_root
, path
);
798 ret
= insert_extent_backref(trans
, root
->fs_info
->extent_root
,
799 path
, bytenr
, parent
,
800 ref_root
, ref_generation
,
803 finish_current_insert(trans
, root
->fs_info
->extent_root
);
804 del_pending_extents(trans
, root
->fs_info
->extent_root
);
806 btrfs_free_path(path
);
810 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
811 struct btrfs_root
*root
,
812 u64 bytenr
, u64 num_bytes
, u64 parent
,
813 u64 ref_root
, u64 ref_generation
,
817 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
818 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
820 maybe_lock_mutex(root
);
821 ret
= __btrfs_inc_extent_ref(trans
, root
, bytenr
, 0, parent
,
822 0, ref_root
, 0, ref_generation
,
824 maybe_unlock_mutex(root
);
828 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
829 struct btrfs_root
*root
)
831 finish_current_insert(trans
, root
->fs_info
->extent_root
);
832 del_pending_extents(trans
, root
->fs_info
->extent_root
);
836 int btrfs_lookup_extent_ref(struct btrfs_trans_handle
*trans
,
837 struct btrfs_root
*root
, u64 bytenr
,
838 u64 num_bytes
, u32
*refs
)
840 struct btrfs_path
*path
;
842 struct btrfs_key key
;
843 struct extent_buffer
*l
;
844 struct btrfs_extent_item
*item
;
846 WARN_ON(num_bytes
< root
->sectorsize
);
847 path
= btrfs_alloc_path();
849 key
.objectid
= bytenr
;
850 key
.offset
= num_bytes
;
851 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
852 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
857 btrfs_print_leaf(root
, path
->nodes
[0]);
858 printk("failed to find block number %Lu\n", bytenr
);
862 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
863 *refs
= btrfs_extent_refs(l
, item
);
865 btrfs_free_path(path
);
869 static int get_reference_status(struct btrfs_root
*root
, u64 bytenr
,
870 u64 parent_gen
, u64 ref_objectid
,
871 u64
*min_generation
, u32
*ref_count
)
873 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
874 struct btrfs_path
*path
;
875 struct extent_buffer
*leaf
;
876 struct btrfs_extent_ref
*ref_item
;
877 struct btrfs_key key
;
878 struct btrfs_key found_key
;
879 u64 root_objectid
= root
->root_key
.objectid
;
884 key
.objectid
= bytenr
;
885 key
.offset
= (u64
)-1;
886 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
888 path
= btrfs_alloc_path();
889 mutex_lock(&root
->fs_info
->alloc_mutex
);
890 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
894 if (ret
< 0 || path
->slots
[0] == 0)
898 leaf
= path
->nodes
[0];
899 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
901 if (found_key
.objectid
!= bytenr
||
902 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
908 *min_generation
= (u64
)-1;
911 leaf
= path
->nodes
[0];
912 nritems
= btrfs_header_nritems(leaf
);
913 if (path
->slots
[0] >= nritems
) {
914 ret
= btrfs_next_leaf(extent_root
, path
);
921 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
922 if (found_key
.objectid
!= bytenr
)
925 if (found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
930 ref_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
931 struct btrfs_extent_ref
);
932 ref_generation
= btrfs_ref_generation(leaf
, ref_item
);
934 * For (parent_gen > 0 && parent_gen > ref_generation):
936 * we reach here through the oldest root, therefore
937 * all other reference from same snapshot should have
938 * a larger generation.
940 if ((root_objectid
!= btrfs_ref_root(leaf
, ref_item
)) ||
941 (parent_gen
> 0 && parent_gen
> ref_generation
) ||
942 (ref_objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
943 ref_objectid
!= btrfs_ref_objectid(leaf
, ref_item
))) {
949 if (*min_generation
> ref_generation
)
950 *min_generation
= ref_generation
;
956 mutex_unlock(&root
->fs_info
->alloc_mutex
);
957 btrfs_free_path(path
);
961 int btrfs_cross_ref_exists(struct btrfs_trans_handle
*trans
,
962 struct btrfs_root
*root
,
963 struct btrfs_key
*key
, u64 bytenr
)
965 struct btrfs_root
*old_root
;
966 struct btrfs_path
*path
= NULL
;
967 struct extent_buffer
*eb
;
968 struct btrfs_file_extent_item
*item
;
976 BUG_ON(trans
== NULL
);
977 BUG_ON(key
->type
!= BTRFS_EXTENT_DATA_KEY
);
978 ret
= get_reference_status(root
, bytenr
, 0, key
->objectid
,
979 &min_generation
, &ref_count
);
986 old_root
= root
->dirty_root
->root
;
987 ref_generation
= old_root
->root_key
.offset
;
989 /* all references are created in running transaction */
990 if (min_generation
> ref_generation
) {
995 path
= btrfs_alloc_path();
1001 path
->skip_locking
= 1;
1002 /* if no item found, the extent is referenced by other snapshot */
1003 ret
= btrfs_search_slot(NULL
, old_root
, key
, path
, 0, 0);
1007 eb
= path
->nodes
[0];
1008 item
= btrfs_item_ptr(eb
, path
->slots
[0],
1009 struct btrfs_file_extent_item
);
1010 if (btrfs_file_extent_type(eb
, item
) != BTRFS_FILE_EXTENT_REG
||
1011 btrfs_file_extent_disk_bytenr(eb
, item
) != bytenr
) {
1016 for (level
= BTRFS_MAX_LEVEL
- 1; level
>= -1; level
--) {
1018 eb
= path
->nodes
[level
];
1021 extent_start
= eb
->start
;
1023 extent_start
= bytenr
;
1025 ret
= get_reference_status(root
, extent_start
, ref_generation
,
1026 0, &min_generation
, &ref_count
);
1030 if (ref_count
!= 1) {
1035 ref_generation
= btrfs_header_generation(eb
);
1040 btrfs_free_path(path
);
1044 int btrfs_cache_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1045 struct extent_buffer
*buf
, u32 nr_extents
)
1047 struct btrfs_key key
;
1048 struct btrfs_file_extent_item
*fi
;
1056 if (!root
->ref_cows
)
1059 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
1061 root_gen
= root
->root_key
.offset
;
1064 root_gen
= trans
->transid
- 1;
1067 level
= btrfs_header_level(buf
);
1068 nritems
= btrfs_header_nritems(buf
);
1071 struct btrfs_leaf_ref
*ref
;
1072 struct btrfs_extent_info
*info
;
1074 ref
= btrfs_alloc_leaf_ref(root
, nr_extents
);
1080 ref
->root_gen
= root_gen
;
1081 ref
->bytenr
= buf
->start
;
1082 ref
->owner
= btrfs_header_owner(buf
);
1083 ref
->generation
= btrfs_header_generation(buf
);
1084 ref
->nritems
= nr_extents
;
1085 info
= ref
->extents
;
1087 for (i
= 0; nr_extents
> 0 && i
< nritems
; i
++) {
1089 btrfs_item_key_to_cpu(buf
, &key
, i
);
1090 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1092 fi
= btrfs_item_ptr(buf
, i
,
1093 struct btrfs_file_extent_item
);
1094 if (btrfs_file_extent_type(buf
, fi
) ==
1095 BTRFS_FILE_EXTENT_INLINE
)
1097 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1098 if (disk_bytenr
== 0)
1101 info
->bytenr
= disk_bytenr
;
1103 btrfs_file_extent_disk_num_bytes(buf
, fi
);
1104 info
->objectid
= key
.objectid
;
1105 info
->offset
= key
.offset
;
1109 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1110 if (ret
== -EEXIST
&& shared
) {
1111 struct btrfs_leaf_ref
*old
;
1112 old
= btrfs_lookup_leaf_ref(root
, ref
->bytenr
);
1114 btrfs_remove_leaf_ref(root
, old
);
1115 btrfs_free_leaf_ref(root
, old
);
1116 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1119 btrfs_free_leaf_ref(root
, ref
);
1125 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1126 struct extent_buffer
*orig_buf
, struct extent_buffer
*buf
,
1133 u64 orig_generation
;
1135 u32 nr_file_extents
= 0;
1136 struct btrfs_key key
;
1137 struct btrfs_file_extent_item
*fi
;
1142 int (*process_func
)(struct btrfs_trans_handle
*, struct btrfs_root
*,
1143 u64
, u64
, u64
, u64
, u64
, u64
, u64
, u64
);
1145 ref_root
= btrfs_header_owner(buf
);
1146 ref_generation
= btrfs_header_generation(buf
);
1147 orig_root
= btrfs_header_owner(orig_buf
);
1148 orig_generation
= btrfs_header_generation(orig_buf
);
1150 nritems
= btrfs_header_nritems(buf
);
1151 level
= btrfs_header_level(buf
);
1153 if (root
->ref_cows
) {
1154 process_func
= __btrfs_inc_extent_ref
;
1157 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1160 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1162 process_func
= __btrfs_update_extent_ref
;
1165 for (i
= 0; i
< nritems
; i
++) {
1168 btrfs_item_key_to_cpu(buf
, &key
, i
);
1169 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1171 fi
= btrfs_item_ptr(buf
, i
,
1172 struct btrfs_file_extent_item
);
1173 if (btrfs_file_extent_type(buf
, fi
) ==
1174 BTRFS_FILE_EXTENT_INLINE
)
1176 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1182 maybe_lock_mutex(root
);
1183 ret
= process_func(trans
, root
, bytenr
,
1184 orig_buf
->start
, buf
->start
,
1185 orig_root
, ref_root
,
1186 orig_generation
, ref_generation
,
1188 maybe_unlock_mutex(root
);
1196 bytenr
= btrfs_node_blockptr(buf
, i
);
1197 maybe_lock_mutex(root
);
1198 ret
= process_func(trans
, root
, bytenr
,
1199 orig_buf
->start
, buf
->start
,
1200 orig_root
, ref_root
,
1201 orig_generation
, ref_generation
,
1203 maybe_unlock_mutex(root
);
1214 *nr_extents
= nr_file_extents
;
1216 *nr_extents
= nritems
;
1224 int btrfs_update_ref(struct btrfs_trans_handle
*trans
,
1225 struct btrfs_root
*root
, struct extent_buffer
*orig_buf
,
1226 struct extent_buffer
*buf
, int start_slot
, int nr
)
1233 u64 orig_generation
;
1234 struct btrfs_key key
;
1235 struct btrfs_file_extent_item
*fi
;
1241 BUG_ON(start_slot
< 0);
1242 BUG_ON(start_slot
+ nr
> btrfs_header_nritems(buf
));
1244 ref_root
= btrfs_header_owner(buf
);
1245 ref_generation
= btrfs_header_generation(buf
);
1246 orig_root
= btrfs_header_owner(orig_buf
);
1247 orig_generation
= btrfs_header_generation(orig_buf
);
1248 level
= btrfs_header_level(buf
);
1250 if (!root
->ref_cows
) {
1252 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1255 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1259 for (i
= 0, slot
= start_slot
; i
< nr
; i
++, slot
++) {
1262 btrfs_item_key_to_cpu(buf
, &key
, slot
);
1263 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1265 fi
= btrfs_item_ptr(buf
, slot
,
1266 struct btrfs_file_extent_item
);
1267 if (btrfs_file_extent_type(buf
, fi
) ==
1268 BTRFS_FILE_EXTENT_INLINE
)
1270 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1273 maybe_lock_mutex(root
);
1274 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1275 orig_buf
->start
, buf
->start
,
1276 orig_root
, ref_root
,
1277 orig_generation
, ref_generation
,
1279 maybe_unlock_mutex(root
);
1283 bytenr
= btrfs_node_blockptr(buf
, slot
);
1284 maybe_lock_mutex(root
);
1285 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1286 orig_buf
->start
, buf
->start
,
1287 orig_root
, ref_root
,
1288 orig_generation
, ref_generation
,
1290 maybe_unlock_mutex(root
);
1301 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
1302 struct btrfs_root
*root
,
1303 struct btrfs_path
*path
,
1304 struct btrfs_block_group_cache
*cache
)
1308 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1310 struct extent_buffer
*leaf
;
1312 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
1317 leaf
= path
->nodes
[0];
1318 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
1319 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
1320 btrfs_mark_buffer_dirty(leaf
);
1321 btrfs_release_path(extent_root
, path
);
1323 finish_current_insert(trans
, extent_root
);
1324 pending_ret
= del_pending_extents(trans
, extent_root
);
1333 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
1334 struct btrfs_root
*root
)
1336 struct btrfs_block_group_cache
*cache
, *entry
;
1340 struct btrfs_path
*path
;
1343 path
= btrfs_alloc_path();
1347 mutex_lock(&root
->fs_info
->alloc_mutex
);
1350 spin_lock(&root
->fs_info
->block_group_cache_lock
);
1351 for (n
= rb_first(&root
->fs_info
->block_group_cache_tree
);
1352 n
; n
= rb_next(n
)) {
1353 entry
= rb_entry(n
, struct btrfs_block_group_cache
,
1360 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
1366 last
+= cache
->key
.offset
;
1368 err
= write_one_cache_group(trans
, root
,
1371 * if we fail to write the cache group, we want
1372 * to keep it marked dirty in hopes that a later
1380 btrfs_free_path(path
);
1381 mutex_unlock(&root
->fs_info
->alloc_mutex
);
1385 static int update_space_info(struct btrfs_fs_info
*info
, u64 flags
,
1386 u64 total_bytes
, u64 bytes_used
,
1387 struct btrfs_space_info
**space_info
)
1389 struct btrfs_space_info
*found
;
1391 found
= __find_space_info(info
, flags
);
1393 found
->total_bytes
+= total_bytes
;
1394 found
->bytes_used
+= bytes_used
;
1396 *space_info
= found
;
1399 found
= kmalloc(sizeof(*found
), GFP_NOFS
);
1403 list_add(&found
->list
, &info
->space_info
);
1404 INIT_LIST_HEAD(&found
->block_groups
);
1405 init_rwsem(&found
->groups_sem
);
1406 spin_lock_init(&found
->lock
);
1407 found
->flags
= flags
;
1408 found
->total_bytes
= total_bytes
;
1409 found
->bytes_used
= bytes_used
;
1410 found
->bytes_pinned
= 0;
1411 found
->bytes_reserved
= 0;
1413 found
->force_alloc
= 0;
1414 *space_info
= found
;
1418 static void set_avail_alloc_bits(struct btrfs_fs_info
*fs_info
, u64 flags
)
1420 u64 extra_flags
= flags
& (BTRFS_BLOCK_GROUP_RAID0
|
1421 BTRFS_BLOCK_GROUP_RAID1
|
1422 BTRFS_BLOCK_GROUP_RAID10
|
1423 BTRFS_BLOCK_GROUP_DUP
);
1425 if (flags
& BTRFS_BLOCK_GROUP_DATA
)
1426 fs_info
->avail_data_alloc_bits
|= extra_flags
;
1427 if (flags
& BTRFS_BLOCK_GROUP_METADATA
)
1428 fs_info
->avail_metadata_alloc_bits
|= extra_flags
;
1429 if (flags
& BTRFS_BLOCK_GROUP_SYSTEM
)
1430 fs_info
->avail_system_alloc_bits
|= extra_flags
;
1434 static u64
reduce_alloc_profile(struct btrfs_root
*root
, u64 flags
)
1436 u64 num_devices
= root
->fs_info
->fs_devices
->num_devices
;
1438 if (num_devices
== 1)
1439 flags
&= ~(BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID0
);
1440 if (num_devices
< 4)
1441 flags
&= ~BTRFS_BLOCK_GROUP_RAID10
;
1443 if ((flags
& BTRFS_BLOCK_GROUP_DUP
) &&
1444 (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
1445 BTRFS_BLOCK_GROUP_RAID10
))) {
1446 flags
&= ~BTRFS_BLOCK_GROUP_DUP
;
1449 if ((flags
& BTRFS_BLOCK_GROUP_RAID1
) &&
1450 (flags
& BTRFS_BLOCK_GROUP_RAID10
)) {
1451 flags
&= ~BTRFS_BLOCK_GROUP_RAID1
;
1454 if ((flags
& BTRFS_BLOCK_GROUP_RAID0
) &&
1455 ((flags
& BTRFS_BLOCK_GROUP_RAID1
) |
1456 (flags
& BTRFS_BLOCK_GROUP_RAID10
) |
1457 (flags
& BTRFS_BLOCK_GROUP_DUP
)))
1458 flags
&= ~BTRFS_BLOCK_GROUP_RAID0
;
1462 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
1463 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
1464 u64 flags
, int force
)
1466 struct btrfs_space_info
*space_info
;
1470 int ret
= 0, waited
= 0;
1472 flags
= reduce_alloc_profile(extent_root
, flags
);
1474 space_info
= __find_space_info(extent_root
->fs_info
, flags
);
1476 ret
= update_space_info(extent_root
->fs_info
, flags
,
1480 BUG_ON(!space_info
);
1482 if (space_info
->force_alloc
) {
1484 space_info
->force_alloc
= 0;
1486 if (space_info
->full
)
1489 thresh
= div_factor(space_info
->total_bytes
, 6);
1491 (space_info
->bytes_used
+ space_info
->bytes_pinned
+
1492 space_info
->bytes_reserved
+ alloc_bytes
) < thresh
)
1495 while (!mutex_trylock(&extent_root
->fs_info
->chunk_mutex
)) {
1498 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
1500 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
1504 if (waited
&& space_info
->full
)
1507 ret
= btrfs_alloc_chunk(trans
, extent_root
, &start
, &num_bytes
, flags
);
1508 if (ret
== -ENOSPC
) {
1509 printk("space info full %Lu\n", flags
);
1510 space_info
->full
= 1;
1515 ret
= btrfs_make_block_group(trans
, extent_root
, 0, flags
,
1516 BTRFS_FIRST_CHUNK_TREE_OBJECTID
, start
, num_bytes
);
1520 mutex_unlock(&extent_root
->fs_info
->chunk_mutex
);
1525 static int update_block_group(struct btrfs_trans_handle
*trans
,
1526 struct btrfs_root
*root
,
1527 u64 bytenr
, u64 num_bytes
, int alloc
,
1530 struct btrfs_block_group_cache
*cache
;
1531 struct btrfs_fs_info
*info
= root
->fs_info
;
1532 u64 total
= num_bytes
;
1536 WARN_ON(!mutex_is_locked(&root
->fs_info
->alloc_mutex
));
1538 cache
= btrfs_lookup_block_group(info
, bytenr
);
1542 byte_in_group
= bytenr
- cache
->key
.objectid
;
1543 WARN_ON(byte_in_group
> cache
->key
.offset
);
1545 spin_lock(&cache
->lock
);
1547 old_val
= btrfs_block_group_used(&cache
->item
);
1548 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
1550 old_val
+= num_bytes
;
1551 cache
->space_info
->bytes_used
+= num_bytes
;
1552 btrfs_set_block_group_used(&cache
->item
, old_val
);
1553 spin_unlock(&cache
->lock
);
1555 old_val
-= num_bytes
;
1556 cache
->space_info
->bytes_used
-= num_bytes
;
1557 btrfs_set_block_group_used(&cache
->item
, old_val
);
1558 spin_unlock(&cache
->lock
);
1561 ret
= btrfs_add_free_space(cache
, bytenr
,
1568 bytenr
+= num_bytes
;
1573 static u64
first_logical_byte(struct btrfs_root
*root
, u64 search_start
)
1575 struct btrfs_block_group_cache
*cache
;
1577 cache
= btrfs_lookup_first_block_group(root
->fs_info
, search_start
);
1581 return cache
->key
.objectid
;
1584 int btrfs_update_pinned_extents(struct btrfs_root
*root
,
1585 u64 bytenr
, u64 num
, int pin
)
1588 struct btrfs_block_group_cache
*cache
;
1589 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1591 WARN_ON(!mutex_is_locked(&root
->fs_info
->alloc_mutex
));
1593 set_extent_dirty(&fs_info
->pinned_extents
,
1594 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
1596 clear_extent_dirty(&fs_info
->pinned_extents
,
1597 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
1600 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
1602 len
= min(num
, cache
->key
.offset
-
1603 (bytenr
- cache
->key
.objectid
));
1605 spin_lock(&cache
->lock
);
1606 cache
->pinned
+= len
;
1607 cache
->space_info
->bytes_pinned
+= len
;
1608 spin_unlock(&cache
->lock
);
1609 fs_info
->total_pinned
+= len
;
1611 spin_lock(&cache
->lock
);
1612 cache
->pinned
-= len
;
1613 cache
->space_info
->bytes_pinned
-= len
;
1614 spin_unlock(&cache
->lock
);
1615 fs_info
->total_pinned
-= len
;
1623 static int update_reserved_extents(struct btrfs_root
*root
,
1624 u64 bytenr
, u64 num
, int reserve
)
1627 struct btrfs_block_group_cache
*cache
;
1628 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1630 WARN_ON(!mutex_is_locked(&root
->fs_info
->alloc_mutex
));
1632 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
1634 len
= min(num
, cache
->key
.offset
-
1635 (bytenr
- cache
->key
.objectid
));
1637 spin_lock(&cache
->lock
);
1638 cache
->reserved
+= len
;
1639 cache
->space_info
->bytes_reserved
+= len
;
1640 spin_unlock(&cache
->lock
);
1642 spin_lock(&cache
->lock
);
1643 cache
->reserved
-= len
;
1644 cache
->space_info
->bytes_reserved
-= len
;
1645 spin_unlock(&cache
->lock
);
1653 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_io_tree
*copy
)
1658 struct extent_io_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
1662 ret
= find_first_extent_bit(pinned_extents
, last
,
1663 &start
, &end
, EXTENT_DIRTY
);
1666 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
1672 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
1673 struct btrfs_root
*root
,
1674 struct extent_io_tree
*unpin
)
1679 struct btrfs_block_group_cache
*cache
;
1681 mutex_lock(&root
->fs_info
->alloc_mutex
);
1683 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
1687 btrfs_update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
1688 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
1689 cache
= btrfs_lookup_block_group(root
->fs_info
, start
);
1691 btrfs_add_free_space(cache
, start
, end
- start
+ 1);
1692 if (need_resched()) {
1693 mutex_unlock(&root
->fs_info
->alloc_mutex
);
1695 mutex_lock(&root
->fs_info
->alloc_mutex
);
1698 mutex_unlock(&root
->fs_info
->alloc_mutex
);
1702 static int finish_current_insert(struct btrfs_trans_handle
*trans
,
1703 struct btrfs_root
*extent_root
)
1708 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
1709 struct btrfs_path
*path
;
1710 struct btrfs_extent_ref
*ref
;
1711 struct pending_extent_op
*extent_op
;
1712 struct btrfs_key key
;
1713 struct btrfs_extent_item extent_item
;
1717 WARN_ON(!mutex_is_locked(&extent_root
->fs_info
->alloc_mutex
));
1718 btrfs_set_stack_extent_refs(&extent_item
, 1);
1719 path
= btrfs_alloc_path();
1722 ret
= find_first_extent_bit(&info
->extent_ins
, 0, &start
,
1723 &end
, EXTENT_LOCKED
);
1727 ret
= get_state_private(&info
->extent_ins
, start
, &priv
);
1729 extent_op
= (struct pending_extent_op
*)(unsigned long)priv
;
1731 if (extent_op
->type
== PENDING_EXTENT_INSERT
) {
1732 key
.objectid
= start
;
1733 key
.offset
= end
+ 1 - start
;
1734 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1735 err
= btrfs_insert_item(trans
, extent_root
, &key
,
1736 &extent_item
, sizeof(extent_item
));
1739 clear_extent_bits(&info
->extent_ins
, start
, end
,
1740 EXTENT_LOCKED
, GFP_NOFS
);
1742 err
= insert_extent_backref(trans
, extent_root
, path
,
1743 start
, extent_op
->parent
,
1744 extent_root
->root_key
.objectid
,
1745 extent_op
->generation
,
1748 } else if (extent_op
->type
== PENDING_BACKREF_UPDATE
) {
1749 err
= lookup_extent_backref(trans
, extent_root
, path
,
1750 start
, extent_op
->orig_parent
,
1751 extent_root
->root_key
.objectid
,
1752 extent_op
->orig_generation
,
1753 extent_op
->level
, 0);
1756 clear_extent_bits(&info
->extent_ins
, start
, end
,
1757 EXTENT_LOCKED
, GFP_NOFS
);
1759 key
.objectid
= start
;
1760 key
.offset
= extent_op
->parent
;
1761 key
.type
= BTRFS_EXTENT_REF_KEY
;
1762 err
= btrfs_set_item_key_safe(trans
, extent_root
, path
,
1765 ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
1766 struct btrfs_extent_ref
);
1767 btrfs_set_ref_generation(path
->nodes
[0], ref
,
1768 extent_op
->generation
);
1769 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1770 btrfs_release_path(extent_root
, path
);
1776 if (need_resched()) {
1777 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
1779 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
1782 btrfs_free_path(path
);
1786 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
1787 struct btrfs_root
*root
,
1788 u64 bytenr
, u64 num_bytes
, int is_data
)
1791 struct extent_buffer
*buf
;
1793 WARN_ON(!mutex_is_locked(&root
->fs_info
->alloc_mutex
));
1797 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
1801 /* we can reuse a block if it hasn't been written
1802 * and it is from this transaction. We can't
1803 * reuse anything from the tree log root because
1804 * it has tiny sub-transactions.
1806 if (btrfs_buffer_uptodate(buf
, 0) &&
1807 btrfs_try_tree_lock(buf
)) {
1808 u64 header_owner
= btrfs_header_owner(buf
);
1809 u64 header_transid
= btrfs_header_generation(buf
);
1810 if (header_owner
!= BTRFS_TREE_LOG_OBJECTID
&&
1811 header_owner
!= BTRFS_TREE_RELOC_OBJECTID
&&
1812 header_transid
== trans
->transid
&&
1813 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
1814 clean_tree_block(NULL
, root
, buf
);
1815 btrfs_tree_unlock(buf
);
1816 free_extent_buffer(buf
);
1819 btrfs_tree_unlock(buf
);
1821 free_extent_buffer(buf
);
1823 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1);
1830 * remove an extent from the root, returns 0 on success
1832 static int __free_extent(struct btrfs_trans_handle
*trans
,
1833 struct btrfs_root
*root
,
1834 u64 bytenr
, u64 num_bytes
, u64 parent
,
1835 u64 root_objectid
, u64 ref_generation
,
1836 u64 owner_objectid
, int pin
, int mark_free
)
1838 struct btrfs_path
*path
;
1839 struct btrfs_key key
;
1840 struct btrfs_fs_info
*info
= root
->fs_info
;
1841 struct btrfs_root
*extent_root
= info
->extent_root
;
1842 struct extent_buffer
*leaf
;
1844 int extent_slot
= 0;
1845 int found_extent
= 0;
1847 struct btrfs_extent_item
*ei
;
1850 WARN_ON(!mutex_is_locked(&root
->fs_info
->alloc_mutex
));
1851 key
.objectid
= bytenr
;
1852 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
1853 key
.offset
= num_bytes
;
1854 path
= btrfs_alloc_path();
1859 ret
= lookup_extent_backref(trans
, extent_root
, path
,
1860 bytenr
, parent
, root_objectid
,
1861 ref_generation
, owner_objectid
, 1);
1863 struct btrfs_key found_key
;
1864 extent_slot
= path
->slots
[0];
1865 while(extent_slot
> 0) {
1867 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
1869 if (found_key
.objectid
!= bytenr
)
1871 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
1872 found_key
.offset
== num_bytes
) {
1876 if (path
->slots
[0] - extent_slot
> 5)
1879 if (!found_extent
) {
1880 ret
= remove_extent_backref(trans
, extent_root
, path
);
1882 btrfs_release_path(extent_root
, path
);
1883 ret
= btrfs_search_slot(trans
, extent_root
,
1886 extent_slot
= path
->slots
[0];
1889 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
1891 printk("Unable to find ref byte nr %Lu root %Lu "
1892 "gen %Lu owner %Lu\n", bytenr
,
1893 root_objectid
, ref_generation
, owner_objectid
);
1896 leaf
= path
->nodes
[0];
1897 ei
= btrfs_item_ptr(leaf
, extent_slot
,
1898 struct btrfs_extent_item
);
1899 refs
= btrfs_extent_refs(leaf
, ei
);
1902 btrfs_set_extent_refs(leaf
, ei
, refs
);
1904 btrfs_mark_buffer_dirty(leaf
);
1906 if (refs
== 0 && found_extent
&& path
->slots
[0] == extent_slot
+ 1) {
1907 struct btrfs_extent_ref
*ref
;
1908 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
1909 struct btrfs_extent_ref
);
1910 BUG_ON(btrfs_ref_num_refs(leaf
, ref
) != 1);
1911 /* if the back ref and the extent are next to each other
1912 * they get deleted below in one shot
1914 path
->slots
[0] = extent_slot
;
1916 } else if (found_extent
) {
1917 /* otherwise delete the extent back ref */
1918 ret
= remove_extent_backref(trans
, extent_root
, path
);
1920 /* if refs are 0, we need to setup the path for deletion */
1922 btrfs_release_path(extent_root
, path
);
1923 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
,
1932 #ifdef BIO_RW_DISCARD
1933 u64 map_length
= num_bytes
;
1934 struct btrfs_multi_bio
*multi
= NULL
;
1938 ret
= pin_down_bytes(trans
, root
, bytenr
, num_bytes
,
1939 owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
);
1945 /* block accounting for super block */
1946 spin_lock_irq(&info
->delalloc_lock
);
1947 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1948 btrfs_set_super_bytes_used(&info
->super_copy
,
1949 super_used
- num_bytes
);
1950 spin_unlock_irq(&info
->delalloc_lock
);
1952 /* block accounting for root item */
1953 root_used
= btrfs_root_used(&root
->root_item
);
1954 btrfs_set_root_used(&root
->root_item
,
1955 root_used
- num_bytes
);
1956 ret
= btrfs_del_items(trans
, extent_root
, path
, path
->slots
[0],
1959 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
1963 #ifdef BIO_RW_DISCARD
1964 /* Tell the block device(s) that the sectors can be discarded */
1965 ret
= btrfs_map_block(&root
->fs_info
->mapping_tree
, READ
,
1966 bytenr
, &map_length
, &multi
, 0);
1968 struct btrfs_bio_stripe
*stripe
= multi
->stripes
;
1971 if (map_length
> num_bytes
)
1972 map_length
= num_bytes
;
1974 for (i
= 0; i
< multi
->num_stripes
; i
++, stripe
++) {
1975 blkdev_issue_discard(stripe
->dev
->bdev
,
1976 stripe
->physical
>> 9,
1983 btrfs_free_path(path
);
1984 finish_current_insert(trans
, extent_root
);
1989 * find all the blocks marked as pending in the radix tree and remove
1990 * them from the extent map
1992 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
1993 btrfs_root
*extent_root
)
2001 struct extent_io_tree
*pending_del
;
2002 struct extent_io_tree
*extent_ins
;
2003 struct pending_extent_op
*extent_op
;
2005 WARN_ON(!mutex_is_locked(&extent_root
->fs_info
->alloc_mutex
));
2006 extent_ins
= &extent_root
->fs_info
->extent_ins
;
2007 pending_del
= &extent_root
->fs_info
->pending_del
;
2010 ret
= find_first_extent_bit(pending_del
, 0, &start
, &end
,
2015 ret
= get_state_private(pending_del
, start
, &priv
);
2017 extent_op
= (struct pending_extent_op
*)(unsigned long)priv
;
2019 clear_extent_bits(pending_del
, start
, end
, EXTENT_LOCKED
,
2022 ret
= pin_down_bytes(trans
, extent_root
, start
,
2023 end
+ 1 - start
, 0);
2024 mark_free
= ret
> 0;
2025 if (!test_range_bit(extent_ins
, start
, end
,
2026 EXTENT_LOCKED
, 0)) {
2028 ret
= __free_extent(trans
, extent_root
,
2029 start
, end
+ 1 - start
,
2030 extent_op
->orig_parent
,
2031 extent_root
->root_key
.objectid
,
2032 extent_op
->orig_generation
,
2033 extent_op
->level
, 0, mark_free
);
2037 ret
= get_state_private(extent_ins
, start
, &priv
);
2039 extent_op
= (struct pending_extent_op
*)
2040 (unsigned long)priv
;
2042 clear_extent_bits(extent_ins
, start
, end
,
2043 EXTENT_LOCKED
, GFP_NOFS
);
2045 if (extent_op
->type
== PENDING_BACKREF_UPDATE
)
2048 ret
= update_block_group(trans
, extent_root
, start
,
2049 end
+ 1 - start
, 0, mark_free
);
2056 if (need_resched()) {
2057 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
2059 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
2066 * remove an extent from the root, returns 0 on success
2068 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2069 struct btrfs_root
*root
,
2070 u64 bytenr
, u64 num_bytes
, u64 parent
,
2071 u64 root_objectid
, u64 ref_generation
,
2072 u64 owner_objectid
, int pin
)
2074 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
2078 WARN_ON(num_bytes
< root
->sectorsize
);
2079 if (root
== extent_root
) {
2080 struct pending_extent_op
*extent_op
;
2082 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
2085 extent_op
->type
= PENDING_EXTENT_DELETE
;
2086 extent_op
->bytenr
= bytenr
;
2087 extent_op
->num_bytes
= num_bytes
;
2088 extent_op
->parent
= parent
;
2089 extent_op
->orig_parent
= parent
;
2090 extent_op
->generation
= ref_generation
;
2091 extent_op
->orig_generation
= ref_generation
;
2092 extent_op
->level
= (int)owner_objectid
;
2094 set_extent_bits(&root
->fs_info
->pending_del
,
2095 bytenr
, bytenr
+ num_bytes
- 1,
2096 EXTENT_LOCKED
, GFP_NOFS
);
2097 set_state_private(&root
->fs_info
->pending_del
,
2098 bytenr
, (unsigned long)extent_op
);
2101 /* if metadata always pin */
2102 if (owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
2103 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
2104 struct btrfs_block_group_cache
*cache
;
2106 /* btrfs_free_reserved_extent */
2107 cache
= btrfs_lookup_block_group(root
->fs_info
, bytenr
);
2109 btrfs_add_free_space(cache
, bytenr
, num_bytes
);
2110 update_reserved_extents(root
, bytenr
, num_bytes
, 0);
2116 /* if data pin when any transaction has committed this */
2117 if (ref_generation
!= trans
->transid
)
2120 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, parent
,
2121 root_objectid
, ref_generation
,
2122 owner_objectid
, pin
, pin
== 0);
2124 finish_current_insert(trans
, root
->fs_info
->extent_root
);
2125 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
);
2126 return ret
? ret
: pending_ret
;
2129 int btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2130 struct btrfs_root
*root
,
2131 u64 bytenr
, u64 num_bytes
, u64 parent
,
2132 u64 root_objectid
, u64 ref_generation
,
2133 u64 owner_objectid
, int pin
)
2137 maybe_lock_mutex(root
);
2138 ret
= __btrfs_free_extent(trans
, root
, bytenr
, num_bytes
, parent
,
2139 root_objectid
, ref_generation
,
2140 owner_objectid
, pin
);
2141 maybe_unlock_mutex(root
);
2145 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
2147 u64 mask
= ((u64
)root
->stripesize
- 1);
2148 u64 ret
= (val
+ mask
) & ~mask
;
2153 * walks the btree of allocated extents and find a hole of a given size.
2154 * The key ins is changed to record the hole:
2155 * ins->objectid == block start
2156 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2157 * ins->offset == number of blocks
2158 * Any available blocks before search_start are skipped.
2160 static int noinline
find_free_extent(struct btrfs_trans_handle
*trans
,
2161 struct btrfs_root
*orig_root
,
2162 u64 num_bytes
, u64 empty_size
,
2163 u64 search_start
, u64 search_end
,
2164 u64 hint_byte
, struct btrfs_key
*ins
,
2165 u64 exclude_start
, u64 exclude_nr
,
2169 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
2170 u64 total_needed
= num_bytes
;
2171 u64
*last_ptr
= NULL
;
2172 struct btrfs_block_group_cache
*block_group
= NULL
;
2173 int chunk_alloc_done
= 0;
2174 int empty_cluster
= 2 * 1024 * 1024;
2175 int allowed_chunk_alloc
= 0;
2176 struct list_head
*head
= NULL
, *cur
= NULL
;
2178 struct btrfs_space_info
*space_info
;
2180 WARN_ON(num_bytes
< root
->sectorsize
);
2181 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
2185 if (orig_root
->ref_cows
|| empty_size
)
2186 allowed_chunk_alloc
= 1;
2188 if (data
& BTRFS_BLOCK_GROUP_METADATA
) {
2189 last_ptr
= &root
->fs_info
->last_alloc
;
2190 empty_cluster
= 256 * 1024;
2193 if ((data
& BTRFS_BLOCK_GROUP_DATA
) && btrfs_test_opt(root
, SSD
))
2194 last_ptr
= &root
->fs_info
->last_data_alloc
;
2198 hint_byte
= *last_ptr
;
2200 empty_size
+= empty_cluster
;
2202 search_start
= max(search_start
, first_logical_byte(root
, 0));
2203 search_start
= max(search_start
, hint_byte
);
2204 total_needed
+= empty_size
;
2206 block_group
= btrfs_lookup_block_group(root
->fs_info
, search_start
);
2207 space_info
= __find_space_info(root
->fs_info
, data
);
2209 down_read(&space_info
->groups_sem
);
2211 struct btrfs_free_space
*free_space
;
2213 * the only way this happens if our hint points to a block
2214 * group thats not of the proper type, while looping this
2215 * should never happen
2217 if (unlikely(!block_group_bits(block_group
, data
)))
2220 ret
= cache_block_group(root
, block_group
);
2224 if (block_group
->ro
)
2227 free_space
= btrfs_find_free_space(block_group
, search_start
,
2230 u64 start
= block_group
->key
.objectid
;
2231 u64 end
= block_group
->key
.objectid
+
2232 block_group
->key
.offset
;
2234 search_start
= stripe_align(root
, free_space
->offset
);
2236 /* move on to the next group */
2237 if (search_start
+ num_bytes
>= search_end
)
2240 /* move on to the next group */
2241 if (search_start
+ num_bytes
> end
)
2244 if (exclude_nr
> 0 &&
2245 (search_start
+ num_bytes
> exclude_start
&&
2246 search_start
< exclude_start
+ exclude_nr
)) {
2247 search_start
= exclude_start
+ exclude_nr
;
2249 * if search_start is still in this block group
2250 * then we just re-search this block group
2252 if (search_start
>= start
&&
2256 /* else we go to the next block group */
2260 ins
->objectid
= search_start
;
2261 ins
->offset
= num_bytes
;
2262 /* we are all good, lets return */
2267 * Here's how this works.
2268 * loop == 0: we were searching a block group via a hint
2269 * and didn't find anything, so we start at
2270 * the head of the block groups and keep searching
2271 * loop == 1: we're searching through all of the block groups
2272 * if we hit the head again we have searched
2273 * all of the block groups for this space and we
2274 * need to try and allocate, if we cant error out.
2275 * loop == 2: we allocated more space and are looping through
2276 * all of the block groups again.
2279 head
= &space_info
->block_groups
;
2282 if (last_ptr
&& *last_ptr
) {
2283 total_needed
+= empty_cluster
;
2287 } else if (loop
== 1 && cur
== head
) {
2288 if (allowed_chunk_alloc
&& !chunk_alloc_done
) {
2289 up_read(&space_info
->groups_sem
);
2290 ret
= do_chunk_alloc(trans
, root
, num_bytes
+
2291 2 * 1024 * 1024, data
, 1);
2294 down_read(&space_info
->groups_sem
);
2296 head
= &space_info
->block_groups
;
2298 chunk_alloc_done
= 1;
2299 } else if (!allowed_chunk_alloc
) {
2300 space_info
->force_alloc
= 1;
2305 } else if (cur
== head
) {
2309 block_group
= list_entry(cur
, struct btrfs_block_group_cache
,
2311 search_start
= block_group
->key
.objectid
;
2315 /* we found what we needed */
2316 if (ins
->objectid
) {
2317 if (!(data
& BTRFS_BLOCK_GROUP_DATA
))
2318 trans
->block_group
= block_group
;
2321 *last_ptr
= ins
->objectid
+ ins
->offset
;
2327 up_read(&space_info
->groups_sem
);
2331 static void dump_space_info(struct btrfs_space_info
*info
, u64 bytes
)
2333 struct btrfs_block_group_cache
*cache
;
2334 struct list_head
*l
;
2336 printk(KERN_INFO
"space_info has %Lu free, is %sfull\n",
2337 info
->total_bytes
- info
->bytes_used
- info
->bytes_pinned
-
2338 info
->bytes_reserved
, (info
->full
) ? "" : "not ");
2340 down_read(&info
->groups_sem
);
2341 list_for_each(l
, &info
->block_groups
) {
2342 cache
= list_entry(l
, struct btrfs_block_group_cache
, list
);
2343 spin_lock(&cache
->lock
);
2344 printk(KERN_INFO
"block group %Lu has %Lu bytes, %Lu used "
2345 "%Lu pinned %Lu reserved\n",
2346 cache
->key
.objectid
, cache
->key
.offset
,
2347 btrfs_block_group_used(&cache
->item
),
2348 cache
->pinned
, cache
->reserved
);
2349 btrfs_dump_free_space(cache
, bytes
);
2350 spin_unlock(&cache
->lock
);
2352 up_read(&info
->groups_sem
);
2355 static int __btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
2356 struct btrfs_root
*root
,
2357 u64 num_bytes
, u64 min_alloc_size
,
2358 u64 empty_size
, u64 hint_byte
,
2359 u64 search_end
, struct btrfs_key
*ins
,
2363 u64 search_start
= 0;
2365 struct btrfs_fs_info
*info
= root
->fs_info
;
2366 struct btrfs_block_group_cache
*cache
;
2369 alloc_profile
= info
->avail_data_alloc_bits
&
2370 info
->data_alloc_profile
;
2371 data
= BTRFS_BLOCK_GROUP_DATA
| alloc_profile
;
2372 } else if (root
== root
->fs_info
->chunk_root
) {
2373 alloc_profile
= info
->avail_system_alloc_bits
&
2374 info
->system_alloc_profile
;
2375 data
= BTRFS_BLOCK_GROUP_SYSTEM
| alloc_profile
;
2377 alloc_profile
= info
->avail_metadata_alloc_bits
&
2378 info
->metadata_alloc_profile
;
2379 data
= BTRFS_BLOCK_GROUP_METADATA
| alloc_profile
;
2382 data
= reduce_alloc_profile(root
, data
);
2384 * the only place that sets empty_size is btrfs_realloc_node, which
2385 * is not called recursively on allocations
2387 if (empty_size
|| root
->ref_cows
) {
2388 if (!(data
& BTRFS_BLOCK_GROUP_METADATA
)) {
2389 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2391 BTRFS_BLOCK_GROUP_METADATA
|
2392 (info
->metadata_alloc_profile
&
2393 info
->avail_metadata_alloc_bits
), 0);
2395 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2396 num_bytes
+ 2 * 1024 * 1024, data
, 0);
2399 WARN_ON(num_bytes
< root
->sectorsize
);
2400 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
2401 search_start
, search_end
, hint_byte
, ins
,
2402 trans
->alloc_exclude_start
,
2403 trans
->alloc_exclude_nr
, data
);
2405 if (ret
== -ENOSPC
&& num_bytes
> min_alloc_size
) {
2406 num_bytes
= num_bytes
>> 1;
2407 num_bytes
= num_bytes
& ~(root
->sectorsize
- 1);
2408 num_bytes
= max(num_bytes
, min_alloc_size
);
2409 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2410 num_bytes
, data
, 1);
2414 struct btrfs_space_info
*sinfo
;
2416 sinfo
= __find_space_info(root
->fs_info
, data
);
2417 printk("allocation failed flags %Lu, wanted %Lu\n",
2419 dump_space_info(sinfo
, num_bytes
);
2422 cache
= btrfs_lookup_block_group(root
->fs_info
, ins
->objectid
);
2424 printk(KERN_ERR
"Unable to find block group for %Lu\n", ins
->objectid
);
2428 ret
= btrfs_remove_free_space(cache
, ins
->objectid
, ins
->offset
);
2433 int btrfs_free_reserved_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
2435 struct btrfs_block_group_cache
*cache
;
2437 maybe_lock_mutex(root
);
2438 cache
= btrfs_lookup_block_group(root
->fs_info
, start
);
2440 printk(KERN_ERR
"Unable to find block group for %Lu\n", start
);
2441 maybe_unlock_mutex(root
);
2444 btrfs_add_free_space(cache
, start
, len
);
2445 update_reserved_extents(root
, start
, len
, 0);
2446 maybe_unlock_mutex(root
);
2450 int btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
2451 struct btrfs_root
*root
,
2452 u64 num_bytes
, u64 min_alloc_size
,
2453 u64 empty_size
, u64 hint_byte
,
2454 u64 search_end
, struct btrfs_key
*ins
,
2458 maybe_lock_mutex(root
);
2459 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, min_alloc_size
,
2460 empty_size
, hint_byte
, search_end
, ins
,
2462 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
2463 maybe_unlock_mutex(root
);
2467 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
2468 struct btrfs_root
*root
, u64 parent
,
2469 u64 root_objectid
, u64 ref_generation
,
2470 u64 owner
, struct btrfs_key
*ins
)
2476 u64 num_bytes
= ins
->offset
;
2478 struct btrfs_fs_info
*info
= root
->fs_info
;
2479 struct btrfs_root
*extent_root
= info
->extent_root
;
2480 struct btrfs_extent_item
*extent_item
;
2481 struct btrfs_extent_ref
*ref
;
2482 struct btrfs_path
*path
;
2483 struct btrfs_key keys
[2];
2486 parent
= ins
->objectid
;
2488 /* block accounting for super block */
2489 spin_lock_irq(&info
->delalloc_lock
);
2490 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
2491 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
2492 spin_unlock_irq(&info
->delalloc_lock
);
2494 /* block accounting for root item */
2495 root_used
= btrfs_root_used(&root
->root_item
);
2496 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
2498 if (root
== extent_root
) {
2499 struct pending_extent_op
*extent_op
;
2501 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
2504 extent_op
->type
= PENDING_EXTENT_INSERT
;
2505 extent_op
->bytenr
= ins
->objectid
;
2506 extent_op
->num_bytes
= ins
->offset
;
2507 extent_op
->parent
= parent
;
2508 extent_op
->orig_parent
= 0;
2509 extent_op
->generation
= ref_generation
;
2510 extent_op
->orig_generation
= 0;
2511 extent_op
->level
= (int)owner
;
2513 set_extent_bits(&root
->fs_info
->extent_ins
, ins
->objectid
,
2514 ins
->objectid
+ ins
->offset
- 1,
2515 EXTENT_LOCKED
, GFP_NOFS
);
2516 set_state_private(&root
->fs_info
->extent_ins
,
2517 ins
->objectid
, (unsigned long)extent_op
);
2521 memcpy(&keys
[0], ins
, sizeof(*ins
));
2522 keys
[1].objectid
= ins
->objectid
;
2523 keys
[1].type
= BTRFS_EXTENT_REF_KEY
;
2524 keys
[1].offset
= parent
;
2525 sizes
[0] = sizeof(*extent_item
);
2526 sizes
[1] = sizeof(*ref
);
2528 path
= btrfs_alloc_path();
2531 ret
= btrfs_insert_empty_items(trans
, extent_root
, path
, keys
,
2535 extent_item
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2536 struct btrfs_extent_item
);
2537 btrfs_set_extent_refs(path
->nodes
[0], extent_item
, 1);
2538 ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0] + 1,
2539 struct btrfs_extent_ref
);
2541 btrfs_set_ref_root(path
->nodes
[0], ref
, root_objectid
);
2542 btrfs_set_ref_generation(path
->nodes
[0], ref
, ref_generation
);
2543 btrfs_set_ref_objectid(path
->nodes
[0], ref
, owner
);
2544 btrfs_set_ref_num_refs(path
->nodes
[0], ref
, 1);
2546 btrfs_mark_buffer_dirty(path
->nodes
[0]);
2548 trans
->alloc_exclude_start
= 0;
2549 trans
->alloc_exclude_nr
= 0;
2550 btrfs_free_path(path
);
2551 finish_current_insert(trans
, extent_root
);
2552 pending_ret
= del_pending_extents(trans
, extent_root
);
2562 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0);
2564 printk("update block group failed for %Lu %Lu\n",
2565 ins
->objectid
, ins
->offset
);
2572 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
2573 struct btrfs_root
*root
, u64 parent
,
2574 u64 root_objectid
, u64 ref_generation
,
2575 u64 owner
, struct btrfs_key
*ins
)
2579 if (root_objectid
== BTRFS_TREE_LOG_OBJECTID
)
2581 maybe_lock_mutex(root
);
2582 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
, root_objectid
,
2583 ref_generation
, owner
, ins
);
2584 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 0);
2585 maybe_unlock_mutex(root
);
2590 * this is used by the tree logging recovery code. It records that
2591 * an extent has been allocated and makes sure to clear the free
2592 * space cache bits as well
2594 int btrfs_alloc_logged_extent(struct btrfs_trans_handle
*trans
,
2595 struct btrfs_root
*root
, u64 parent
,
2596 u64 root_objectid
, u64 ref_generation
,
2597 u64 owner
, struct btrfs_key
*ins
)
2600 struct btrfs_block_group_cache
*block_group
;
2602 maybe_lock_mutex(root
);
2603 block_group
= btrfs_lookup_block_group(root
->fs_info
, ins
->objectid
);
2604 cache_block_group(root
, block_group
);
2606 ret
= btrfs_remove_free_space(block_group
, ins
->objectid
, ins
->offset
);
2608 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
, root_objectid
,
2609 ref_generation
, owner
, ins
);
2610 maybe_unlock_mutex(root
);
2615 * finds a free extent and does all the dirty work required for allocation
2616 * returns the key for the extent through ins, and a tree buffer for
2617 * the first block of the extent through buf.
2619 * returns 0 if everything worked, non-zero otherwise.
2621 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
2622 struct btrfs_root
*root
,
2623 u64 num_bytes
, u64 parent
, u64 min_alloc_size
,
2624 u64 root_objectid
, u64 ref_generation
,
2625 u64 owner_objectid
, u64 empty_size
, u64 hint_byte
,
2626 u64 search_end
, struct btrfs_key
*ins
, u64 data
)
2630 maybe_lock_mutex(root
);
2632 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
,
2633 min_alloc_size
, empty_size
, hint_byte
,
2634 search_end
, ins
, data
);
2636 if (root_objectid
!= BTRFS_TREE_LOG_OBJECTID
) {
2637 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
,
2638 root_objectid
, ref_generation
,
2639 owner_objectid
, ins
);
2643 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
2645 maybe_unlock_mutex(root
);
2649 struct extent_buffer
*btrfs_init_new_buffer(struct btrfs_trans_handle
*trans
,
2650 struct btrfs_root
*root
,
2651 u64 bytenr
, u32 blocksize
)
2653 struct extent_buffer
*buf
;
2655 buf
= btrfs_find_create_tree_block(root
, bytenr
, blocksize
);
2657 return ERR_PTR(-ENOMEM
);
2658 btrfs_set_header_generation(buf
, trans
->transid
);
2659 btrfs_tree_lock(buf
);
2660 clean_tree_block(trans
, root
, buf
);
2661 btrfs_set_buffer_uptodate(buf
);
2662 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
2663 set_extent_dirty(&root
->dirty_log_pages
, buf
->start
,
2664 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
2666 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
2667 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
2669 trans
->blocks_used
++;
2674 * helper function to allocate a block for a given tree
2675 * returns the tree buffer or NULL.
2677 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
2678 struct btrfs_root
*root
,
2679 u32 blocksize
, u64 parent
,
2686 struct btrfs_key ins
;
2688 struct extent_buffer
*buf
;
2690 ret
= btrfs_alloc_extent(trans
, root
, blocksize
, parent
, blocksize
,
2691 root_objectid
, ref_generation
, level
,
2692 empty_size
, hint
, (u64
)-1, &ins
, 0);
2695 return ERR_PTR(ret
);
2698 buf
= btrfs_init_new_buffer(trans
, root
, ins
.objectid
, blocksize
);
2702 int btrfs_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
2703 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
2706 u64 leaf_generation
;
2707 struct btrfs_key key
;
2708 struct btrfs_file_extent_item
*fi
;
2713 BUG_ON(!btrfs_is_leaf(leaf
));
2714 nritems
= btrfs_header_nritems(leaf
);
2715 leaf_owner
= btrfs_header_owner(leaf
);
2716 leaf_generation
= btrfs_header_generation(leaf
);
2718 for (i
= 0; i
< nritems
; i
++) {
2722 btrfs_item_key_to_cpu(leaf
, &key
, i
);
2723 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
2725 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
2726 if (btrfs_file_extent_type(leaf
, fi
) ==
2727 BTRFS_FILE_EXTENT_INLINE
)
2730 * FIXME make sure to insert a trans record that
2731 * repeats the snapshot del on crash
2733 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
2734 if (disk_bytenr
== 0)
2737 mutex_lock(&root
->fs_info
->alloc_mutex
);
2738 ret
= __btrfs_free_extent(trans
, root
, disk_bytenr
,
2739 btrfs_file_extent_disk_num_bytes(leaf
, fi
),
2740 leaf
->start
, leaf_owner
, leaf_generation
,
2742 mutex_unlock(&root
->fs_info
->alloc_mutex
);
2745 atomic_inc(&root
->fs_info
->throttle_gen
);
2746 wake_up(&root
->fs_info
->transaction_throttle
);
2752 static int noinline
cache_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
2753 struct btrfs_root
*root
,
2754 struct btrfs_leaf_ref
*ref
)
2758 struct btrfs_extent_info
*info
= ref
->extents
;
2760 for (i
= 0; i
< ref
->nritems
; i
++) {
2761 mutex_lock(&root
->fs_info
->alloc_mutex
);
2762 ret
= __btrfs_free_extent(trans
, root
, info
->bytenr
,
2763 info
->num_bytes
, ref
->bytenr
,
2764 ref
->owner
, ref
->generation
,
2766 mutex_unlock(&root
->fs_info
->alloc_mutex
);
2768 atomic_inc(&root
->fs_info
->throttle_gen
);
2769 wake_up(&root
->fs_info
->transaction_throttle
);
2779 int drop_snap_lookup_refcount(struct btrfs_root
*root
, u64 start
, u64 len
,
2784 ret
= btrfs_lookup_extent_ref(NULL
, root
, start
, len
, refs
);
2787 #if 0 // some debugging code in case we see problems here
2788 /* if the refs count is one, it won't get increased again. But
2789 * if the ref count is > 1, someone may be decreasing it at
2790 * the same time we are.
2793 struct extent_buffer
*eb
= NULL
;
2794 eb
= btrfs_find_create_tree_block(root
, start
, len
);
2796 btrfs_tree_lock(eb
);
2798 mutex_lock(&root
->fs_info
->alloc_mutex
);
2799 ret
= lookup_extent_ref(NULL
, root
, start
, len
, refs
);
2801 mutex_unlock(&root
->fs_info
->alloc_mutex
);
2804 btrfs_tree_unlock(eb
);
2805 free_extent_buffer(eb
);
2808 printk("block %llu went down to one during drop_snap\n",
2809 (unsigned long long)start
);
2820 * helper function for drop_snapshot, this walks down the tree dropping ref
2821 * counts as it goes.
2823 static int noinline
walk_down_tree(struct btrfs_trans_handle
*trans
,
2824 struct btrfs_root
*root
,
2825 struct btrfs_path
*path
, int *level
)
2831 struct extent_buffer
*next
;
2832 struct extent_buffer
*cur
;
2833 struct extent_buffer
*parent
;
2834 struct btrfs_leaf_ref
*ref
;
2839 WARN_ON(*level
< 0);
2840 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
2841 ret
= drop_snap_lookup_refcount(root
, path
->nodes
[*level
]->start
,
2842 path
->nodes
[*level
]->len
, &refs
);
2848 * walk down to the last node level and free all the leaves
2850 while(*level
>= 0) {
2851 WARN_ON(*level
< 0);
2852 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
2853 cur
= path
->nodes
[*level
];
2855 if (btrfs_header_level(cur
) != *level
)
2858 if (path
->slots
[*level
] >=
2859 btrfs_header_nritems(cur
))
2862 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
2866 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
2867 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
2868 blocksize
= btrfs_level_size(root
, *level
- 1);
2870 ret
= drop_snap_lookup_refcount(root
, bytenr
, blocksize
, &refs
);
2873 parent
= path
->nodes
[*level
];
2874 root_owner
= btrfs_header_owner(parent
);
2875 root_gen
= btrfs_header_generation(parent
);
2876 path
->slots
[*level
]++;
2878 mutex_lock(&root
->fs_info
->alloc_mutex
);
2879 ret
= __btrfs_free_extent(trans
, root
, bytenr
,
2880 blocksize
, parent
->start
,
2881 root_owner
, root_gen
,
2884 mutex_unlock(&root
->fs_info
->alloc_mutex
);
2886 atomic_inc(&root
->fs_info
->throttle_gen
);
2887 wake_up(&root
->fs_info
->transaction_throttle
);
2893 * at this point, we have a single ref, and since the
2894 * only place referencing this extent is a dead root
2895 * the reference count should never go higher.
2896 * So, we don't need to check it again
2899 ref
= btrfs_lookup_leaf_ref(root
, bytenr
);
2900 if (ref
&& ref
->generation
!= ptr_gen
) {
2901 btrfs_free_leaf_ref(root
, ref
);
2905 ret
= cache_drop_leaf_ref(trans
, root
, ref
);
2907 btrfs_remove_leaf_ref(root
, ref
);
2908 btrfs_free_leaf_ref(root
, ref
);
2912 if (printk_ratelimit()) {
2913 printk("leaf ref miss for bytenr %llu\n",
2914 (unsigned long long)bytenr
);
2917 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
2918 if (!next
|| !btrfs_buffer_uptodate(next
, ptr_gen
)) {
2919 free_extent_buffer(next
);
2921 next
= read_tree_block(root
, bytenr
, blocksize
,
2926 * this is a debugging check and can go away
2927 * the ref should never go all the way down to 1
2930 ret
= lookup_extent_ref(NULL
, root
, bytenr
, blocksize
,
2936 WARN_ON(*level
<= 0);
2937 if (path
->nodes
[*level
-1])
2938 free_extent_buffer(path
->nodes
[*level
-1]);
2939 path
->nodes
[*level
-1] = next
;
2940 *level
= btrfs_header_level(next
);
2941 path
->slots
[*level
] = 0;
2945 WARN_ON(*level
< 0);
2946 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
2948 if (path
->nodes
[*level
] == root
->node
) {
2949 parent
= path
->nodes
[*level
];
2950 bytenr
= path
->nodes
[*level
]->start
;
2952 parent
= path
->nodes
[*level
+ 1];
2953 bytenr
= btrfs_node_blockptr(parent
, path
->slots
[*level
+ 1]);
2956 blocksize
= btrfs_level_size(root
, *level
);
2957 root_owner
= btrfs_header_owner(parent
);
2958 root_gen
= btrfs_header_generation(parent
);
2960 mutex_lock(&root
->fs_info
->alloc_mutex
);
2961 ret
= __btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
2962 parent
->start
, root_owner
, root_gen
,
2964 mutex_unlock(&root
->fs_info
->alloc_mutex
);
2965 free_extent_buffer(path
->nodes
[*level
]);
2966 path
->nodes
[*level
] = NULL
;
2975 * helper function for drop_subtree, this function is similar to
2976 * walk_down_tree. The main difference is that it checks reference
2977 * counts while tree blocks are locked.
2979 static int noinline
walk_down_subtree(struct btrfs_trans_handle
*trans
,
2980 struct btrfs_root
*root
,
2981 struct btrfs_path
*path
, int *level
)
2983 struct extent_buffer
*next
;
2984 struct extent_buffer
*cur
;
2985 struct extent_buffer
*parent
;
2992 cur
= path
->nodes
[*level
];
2993 ret
= btrfs_lookup_extent_ref(trans
, root
, cur
->start
, cur
->len
,
2999 while (*level
>= 0) {
3000 cur
= path
->nodes
[*level
];
3002 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
3004 clean_tree_block(trans
, root
, cur
);
3007 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
)) {
3008 clean_tree_block(trans
, root
, cur
);
3012 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
3013 blocksize
= btrfs_level_size(root
, *level
- 1);
3014 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
3016 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
3017 btrfs_tree_lock(next
);
3019 ret
= btrfs_lookup_extent_ref(trans
, root
, bytenr
, blocksize
,
3023 parent
= path
->nodes
[*level
];
3024 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3025 blocksize
, parent
->start
,
3026 btrfs_header_owner(parent
),
3027 btrfs_header_generation(parent
),
3030 path
->slots
[*level
]++;
3031 btrfs_tree_unlock(next
);
3032 free_extent_buffer(next
);
3036 *level
= btrfs_header_level(next
);
3037 path
->nodes
[*level
] = next
;
3038 path
->slots
[*level
] = 0;
3039 path
->locks
[*level
] = 1;
3043 parent
= path
->nodes
[*level
+ 1];
3044 bytenr
= path
->nodes
[*level
]->start
;
3045 blocksize
= path
->nodes
[*level
]->len
;
3047 ret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
3048 parent
->start
, btrfs_header_owner(parent
),
3049 btrfs_header_generation(parent
), *level
, 1);
3052 if (path
->locks
[*level
]) {
3053 btrfs_tree_unlock(path
->nodes
[*level
]);
3054 path
->locks
[*level
] = 0;
3056 free_extent_buffer(path
->nodes
[*level
]);
3057 path
->nodes
[*level
] = NULL
;
3064 * helper for dropping snapshots. This walks back up the tree in the path
3065 * to find the first node higher up where we haven't yet gone through
3068 static int noinline
walk_up_tree(struct btrfs_trans_handle
*trans
,
3069 struct btrfs_root
*root
,
3070 struct btrfs_path
*path
,
3071 int *level
, int max_level
)
3075 struct btrfs_root_item
*root_item
= &root
->root_item
;
3080 for (i
= *level
; i
< max_level
&& path
->nodes
[i
]; i
++) {
3081 slot
= path
->slots
[i
];
3082 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
3083 struct extent_buffer
*node
;
3084 struct btrfs_disk_key disk_key
;
3085 node
= path
->nodes
[i
];
3088 WARN_ON(*level
== 0);
3089 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
3090 memcpy(&root_item
->drop_progress
,
3091 &disk_key
, sizeof(disk_key
));
3092 root_item
->drop_level
= i
;
3095 struct extent_buffer
*parent
;
3096 if (path
->nodes
[*level
] == root
->node
)
3097 parent
= path
->nodes
[*level
];
3099 parent
= path
->nodes
[*level
+ 1];
3101 root_owner
= btrfs_header_owner(parent
);
3102 root_gen
= btrfs_header_generation(parent
);
3104 clean_tree_block(trans
, root
, path
->nodes
[*level
]);
3105 ret
= btrfs_free_extent(trans
, root
,
3106 path
->nodes
[*level
]->start
,
3107 path
->nodes
[*level
]->len
,
3108 parent
->start
, root_owner
,
3109 root_gen
, *level
, 1);
3111 if (path
->locks
[*level
]) {
3112 btrfs_tree_unlock(path
->nodes
[*level
]);
3113 path
->locks
[*level
] = 0;
3115 free_extent_buffer(path
->nodes
[*level
]);
3116 path
->nodes
[*level
] = NULL
;
3124 * drop the reference count on the tree rooted at 'snap'. This traverses
3125 * the tree freeing any blocks that have a ref count of zero after being
3128 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
3134 struct btrfs_path
*path
;
3137 struct btrfs_root_item
*root_item
= &root
->root_item
;
3139 WARN_ON(!mutex_is_locked(&root
->fs_info
->drop_mutex
));
3140 path
= btrfs_alloc_path();
3143 level
= btrfs_header_level(root
->node
);
3145 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
3146 path
->nodes
[level
] = root
->node
;
3147 extent_buffer_get(root
->node
);
3148 path
->slots
[level
] = 0;
3150 struct btrfs_key key
;
3151 struct btrfs_disk_key found_key
;
3152 struct extent_buffer
*node
;
3154 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
3155 level
= root_item
->drop_level
;
3156 path
->lowest_level
= level
;
3157 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
3162 node
= path
->nodes
[level
];
3163 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
3164 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
3165 sizeof(found_key
)));
3167 * unlock our path, this is safe because only this
3168 * function is allowed to delete this snapshot
3170 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
3171 if (path
->nodes
[i
] && path
->locks
[i
]) {
3173 btrfs_tree_unlock(path
->nodes
[i
]);
3178 wret
= walk_down_tree(trans
, root
, path
, &level
);
3184 wret
= walk_up_tree(trans
, root
, path
, &level
,
3190 if (trans
->transaction
->in_commit
) {
3194 atomic_inc(&root
->fs_info
->throttle_gen
);
3195 wake_up(&root
->fs_info
->transaction_throttle
);
3197 for (i
= 0; i
<= orig_level
; i
++) {
3198 if (path
->nodes
[i
]) {
3199 free_extent_buffer(path
->nodes
[i
]);
3200 path
->nodes
[i
] = NULL
;
3204 btrfs_free_path(path
);
3208 int btrfs_drop_subtree(struct btrfs_trans_handle
*trans
,
3209 struct btrfs_root
*root
,
3210 struct extent_buffer
*node
,
3211 struct extent_buffer
*parent
)
3213 struct btrfs_path
*path
;
3219 path
= btrfs_alloc_path();
3222 BUG_ON(!btrfs_tree_locked(parent
));
3223 parent_level
= btrfs_header_level(parent
);
3224 extent_buffer_get(parent
);
3225 path
->nodes
[parent_level
] = parent
;
3226 path
->slots
[parent_level
] = btrfs_header_nritems(parent
);
3228 BUG_ON(!btrfs_tree_locked(node
));
3229 level
= btrfs_header_level(node
);
3230 extent_buffer_get(node
);
3231 path
->nodes
[level
] = node
;
3232 path
->slots
[level
] = 0;
3235 wret
= walk_down_subtree(trans
, root
, path
, &level
);
3241 wret
= walk_up_tree(trans
, root
, path
, &level
, parent_level
);
3248 btrfs_free_path(path
);
3252 static unsigned long calc_ra(unsigned long start
, unsigned long last
,
3255 return min(last
, start
+ nr
- 1);
3258 static int noinline
relocate_inode_pages(struct inode
*inode
, u64 start
,
3263 unsigned long first_index
;
3264 unsigned long last_index
;
3267 struct extent_io_tree
*io_tree
= &BTRFS_I(inode
)->io_tree
;
3268 struct file_ra_state
*ra
;
3269 struct btrfs_ordered_extent
*ordered
;
3270 unsigned int total_read
= 0;
3271 unsigned int total_dirty
= 0;
3274 ra
= kzalloc(sizeof(*ra
), GFP_NOFS
);
3276 mutex_lock(&inode
->i_mutex
);
3277 first_index
= start
>> PAGE_CACHE_SHIFT
;
3278 last_index
= (start
+ len
- 1) >> PAGE_CACHE_SHIFT
;
3280 /* make sure the dirty trick played by the caller work */
3281 ret
= invalidate_inode_pages2_range(inode
->i_mapping
,
3282 first_index
, last_index
);
3286 file_ra_state_init(ra
, inode
->i_mapping
);
3288 for (i
= first_index
; i
<= last_index
; i
++) {
3289 if (total_read
% ra
->ra_pages
== 0) {
3290 btrfs_force_ra(inode
->i_mapping
, ra
, NULL
, i
,
3291 calc_ra(i
, last_index
, ra
->ra_pages
));
3295 if (((u64
)i
<< PAGE_CACHE_SHIFT
) > i_size_read(inode
))
3297 page
= grab_cache_page(inode
->i_mapping
, i
);
3302 if (!PageUptodate(page
)) {
3303 btrfs_readpage(NULL
, page
);
3305 if (!PageUptodate(page
)) {
3307 page_cache_release(page
);
3312 wait_on_page_writeback(page
);
3314 page_start
= (u64
)page
->index
<< PAGE_CACHE_SHIFT
;
3315 page_end
= page_start
+ PAGE_CACHE_SIZE
- 1;
3316 lock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3318 ordered
= btrfs_lookup_ordered_extent(inode
, page_start
);
3320 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3322 page_cache_release(page
);
3323 btrfs_start_ordered_extent(inode
, ordered
, 1);
3324 btrfs_put_ordered_extent(ordered
);
3327 set_page_extent_mapped(page
);
3329 btrfs_set_extent_delalloc(inode
, page_start
, page_end
);
3330 if (i
== first_index
)
3331 set_extent_bits(io_tree
, page_start
, page_end
,
3332 EXTENT_BOUNDARY
, GFP_NOFS
);
3334 set_page_dirty(page
);
3337 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3339 page_cache_release(page
);
3344 mutex_unlock(&inode
->i_mutex
);
3345 balance_dirty_pages_ratelimited_nr(inode
->i_mapping
, total_dirty
);
3349 static int noinline
relocate_data_extent(struct inode
*reloc_inode
,
3350 struct btrfs_key
*extent_key
,
3353 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
3354 struct extent_map_tree
*em_tree
= &BTRFS_I(reloc_inode
)->extent_tree
;
3355 struct extent_map
*em
;
3357 em
= alloc_extent_map(GFP_NOFS
);
3358 BUG_ON(!em
|| IS_ERR(em
));
3360 em
->start
= extent_key
->objectid
- offset
;
3361 em
->len
= extent_key
->offset
;
3362 em
->block_len
= extent_key
->offset
;
3363 em
->block_start
= extent_key
->objectid
;
3364 em
->bdev
= root
->fs_info
->fs_devices
->latest_bdev
;
3365 set_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
3367 /* setup extent map to cheat btrfs_readpage */
3368 mutex_lock(&BTRFS_I(reloc_inode
)->extent_mutex
);
3371 spin_lock(&em_tree
->lock
);
3372 ret
= add_extent_mapping(em_tree
, em
);
3373 spin_unlock(&em_tree
->lock
);
3374 if (ret
!= -EEXIST
) {
3375 free_extent_map(em
);
3378 btrfs_drop_extent_cache(reloc_inode
, em
->start
,
3379 em
->start
+ em
->len
- 1, 0);
3381 mutex_unlock(&BTRFS_I(reloc_inode
)->extent_mutex
);
3383 return relocate_inode_pages(reloc_inode
, extent_key
->objectid
- offset
,
3384 extent_key
->offset
);
3387 struct btrfs_ref_path
{
3389 u64 nodes
[BTRFS_MAX_LEVEL
];
3391 u64 root_generation
;
3398 struct btrfs_key node_keys
[BTRFS_MAX_LEVEL
];
3399 u64 new_nodes
[BTRFS_MAX_LEVEL
];
3402 struct disk_extent
{
3413 static int is_cowonly_root(u64 root_objectid
)
3415 if (root_objectid
== BTRFS_ROOT_TREE_OBJECTID
||
3416 root_objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
3417 root_objectid
== BTRFS_CHUNK_TREE_OBJECTID
||
3418 root_objectid
== BTRFS_DEV_TREE_OBJECTID
||
3419 root_objectid
== BTRFS_TREE_LOG_OBJECTID
)
3424 static int noinline
__next_ref_path(struct btrfs_trans_handle
*trans
,
3425 struct btrfs_root
*extent_root
,
3426 struct btrfs_ref_path
*ref_path
,
3429 struct extent_buffer
*leaf
;
3430 struct btrfs_path
*path
;
3431 struct btrfs_extent_ref
*ref
;
3432 struct btrfs_key key
;
3433 struct btrfs_key found_key
;
3439 path
= btrfs_alloc_path();
3443 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
3446 ref_path
->lowest_level
= -1;
3447 ref_path
->current_level
= -1;
3448 ref_path
->shared_level
= -1;
3452 level
= ref_path
->current_level
- 1;
3453 while (level
>= -1) {
3455 if (level
< ref_path
->lowest_level
)
3459 bytenr
= ref_path
->nodes
[level
];
3461 bytenr
= ref_path
->extent_start
;
3463 BUG_ON(bytenr
== 0);
3465 parent
= ref_path
->nodes
[level
+ 1];
3466 ref_path
->nodes
[level
+ 1] = 0;
3467 ref_path
->current_level
= level
;
3468 BUG_ON(parent
== 0);
3470 key
.objectid
= bytenr
;
3471 key
.offset
= parent
+ 1;
3472 key
.type
= BTRFS_EXTENT_REF_KEY
;
3474 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
3479 leaf
= path
->nodes
[0];
3480 nritems
= btrfs_header_nritems(leaf
);
3481 if (path
->slots
[0] >= nritems
) {
3482 ret
= btrfs_next_leaf(extent_root
, path
);
3487 leaf
= path
->nodes
[0];
3490 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
3491 if (found_key
.objectid
== bytenr
&&
3492 found_key
.type
== BTRFS_EXTENT_REF_KEY
) {
3493 if (level
< ref_path
->shared_level
)
3494 ref_path
->shared_level
= level
;
3499 btrfs_release_path(extent_root
, path
);
3500 if (need_resched()) {
3501 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
3503 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
3506 /* reached lowest level */
3510 level
= ref_path
->current_level
;
3511 while (level
< BTRFS_MAX_LEVEL
- 1) {
3514 bytenr
= ref_path
->nodes
[level
];
3516 bytenr
= ref_path
->extent_start
;
3518 BUG_ON(bytenr
== 0);
3520 key
.objectid
= bytenr
;
3522 key
.type
= BTRFS_EXTENT_REF_KEY
;
3524 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
3528 leaf
= path
->nodes
[0];
3529 nritems
= btrfs_header_nritems(leaf
);
3530 if (path
->slots
[0] >= nritems
) {
3531 ret
= btrfs_next_leaf(extent_root
, path
);
3535 /* the extent was freed by someone */
3536 if (ref_path
->lowest_level
== level
)
3538 btrfs_release_path(extent_root
, path
);
3541 leaf
= path
->nodes
[0];
3544 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
3545 if (found_key
.objectid
!= bytenr
||
3546 found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
3547 /* the extent was freed by someone */
3548 if (ref_path
->lowest_level
== level
) {
3552 btrfs_release_path(extent_root
, path
);
3556 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
3557 struct btrfs_extent_ref
);
3558 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
3559 if (ref_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
3561 level
= (int)ref_objectid
;
3562 BUG_ON(level
>= BTRFS_MAX_LEVEL
);
3563 ref_path
->lowest_level
= level
;
3564 ref_path
->current_level
= level
;
3565 ref_path
->nodes
[level
] = bytenr
;
3567 WARN_ON(ref_objectid
!= level
);
3570 WARN_ON(level
!= -1);
3574 if (ref_path
->lowest_level
== level
) {
3575 ref_path
->owner_objectid
= ref_objectid
;
3576 ref_path
->num_refs
= btrfs_ref_num_refs(leaf
, ref
);
3580 * the block is tree root or the block isn't in reference
3583 if (found_key
.objectid
== found_key
.offset
||
3584 is_cowonly_root(btrfs_ref_root(leaf
, ref
))) {
3585 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
3586 ref_path
->root_generation
=
3587 btrfs_ref_generation(leaf
, ref
);
3589 /* special reference from the tree log */
3590 ref_path
->nodes
[0] = found_key
.offset
;
3591 ref_path
->current_level
= 0;
3598 BUG_ON(ref_path
->nodes
[level
] != 0);
3599 ref_path
->nodes
[level
] = found_key
.offset
;
3600 ref_path
->current_level
= level
;
3603 * the reference was created in the running transaction,
3604 * no need to continue walking up.
3606 if (btrfs_ref_generation(leaf
, ref
) == trans
->transid
) {
3607 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
3608 ref_path
->root_generation
=
3609 btrfs_ref_generation(leaf
, ref
);
3614 btrfs_release_path(extent_root
, path
);
3615 if (need_resched()) {
3616 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
3618 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
3621 /* reached max tree level, but no tree root found. */
3624 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
3625 btrfs_free_path(path
);
3629 static int btrfs_first_ref_path(struct btrfs_trans_handle
*trans
,
3630 struct btrfs_root
*extent_root
,
3631 struct btrfs_ref_path
*ref_path
,
3634 memset(ref_path
, 0, sizeof(*ref_path
));
3635 ref_path
->extent_start
= extent_start
;
3637 return __next_ref_path(trans
, extent_root
, ref_path
, 1);
3640 static int btrfs_next_ref_path(struct btrfs_trans_handle
*trans
,
3641 struct btrfs_root
*extent_root
,
3642 struct btrfs_ref_path
*ref_path
)
3644 return __next_ref_path(trans
, extent_root
, ref_path
, 0);
3647 static int noinline
get_new_locations(struct inode
*reloc_inode
,
3648 struct btrfs_key
*extent_key
,
3649 u64 offset
, int no_fragment
,
3650 struct disk_extent
**extents
,
3653 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
3654 struct btrfs_path
*path
;
3655 struct btrfs_file_extent_item
*fi
;
3656 struct extent_buffer
*leaf
;
3657 struct disk_extent
*exts
= *extents
;
3658 struct btrfs_key found_key
;
3663 int max
= *nr_extents
;
3666 WARN_ON(!no_fragment
&& *extents
);
3669 exts
= kmalloc(sizeof(*exts
) * max
, GFP_NOFS
);
3674 path
= btrfs_alloc_path();
3677 cur_pos
= extent_key
->objectid
- offset
;
3678 last_byte
= extent_key
->objectid
+ extent_key
->offset
;
3679 ret
= btrfs_lookup_file_extent(NULL
, root
, path
, reloc_inode
->i_ino
,
3689 leaf
= path
->nodes
[0];
3690 nritems
= btrfs_header_nritems(leaf
);
3691 if (path
->slots
[0] >= nritems
) {
3692 ret
= btrfs_next_leaf(root
, path
);
3697 leaf
= path
->nodes
[0];
3700 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
3701 if (found_key
.offset
!= cur_pos
||
3702 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
||
3703 found_key
.objectid
!= reloc_inode
->i_ino
)
3706 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
3707 struct btrfs_file_extent_item
);
3708 if (btrfs_file_extent_type(leaf
, fi
) !=
3709 BTRFS_FILE_EXTENT_REG
||
3710 btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
3714 struct disk_extent
*old
= exts
;
3716 exts
= kzalloc(sizeof(*exts
) * max
, GFP_NOFS
);
3717 memcpy(exts
, old
, sizeof(*exts
) * nr
);
3718 if (old
!= *extents
)
3722 exts
[nr
].disk_bytenr
=
3723 btrfs_file_extent_disk_bytenr(leaf
, fi
);
3724 exts
[nr
].disk_num_bytes
=
3725 btrfs_file_extent_disk_num_bytes(leaf
, fi
);
3726 exts
[nr
].offset
= btrfs_file_extent_offset(leaf
, fi
);
3727 exts
[nr
].num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
3728 exts
[nr
].ram_bytes
= btrfs_file_extent_ram_bytes(leaf
, fi
);
3729 exts
[nr
].compression
= btrfs_file_extent_compression(leaf
, fi
);
3730 exts
[nr
].encryption
= btrfs_file_extent_encryption(leaf
, fi
);
3731 exts
[nr
].other_encoding
= btrfs_file_extent_other_encoding(leaf
,
3733 WARN_ON(exts
[nr
].offset
> 0);
3734 WARN_ON(exts
[nr
].num_bytes
!= exts
[nr
].disk_num_bytes
);
3736 cur_pos
+= exts
[nr
].num_bytes
;
3739 if (cur_pos
+ offset
>= last_byte
)
3749 WARN_ON(cur_pos
+ offset
> last_byte
);
3750 if (cur_pos
+ offset
< last_byte
) {
3756 btrfs_free_path(path
);
3758 if (exts
!= *extents
)
3767 static int noinline
replace_one_extent(struct btrfs_trans_handle
*trans
,
3768 struct btrfs_root
*root
,
3769 struct btrfs_path
*path
,
3770 struct btrfs_key
*extent_key
,
3771 struct btrfs_key
*leaf_key
,
3772 struct btrfs_ref_path
*ref_path
,
3773 struct disk_extent
*new_extents
,
3776 struct extent_buffer
*leaf
;
3777 struct btrfs_file_extent_item
*fi
;
3778 struct inode
*inode
= NULL
;
3779 struct btrfs_key key
;
3787 int extent_locked
= 0;
3790 memcpy(&key
, leaf_key
, sizeof(key
));
3791 first_pos
= INT_LIMIT(loff_t
) - extent_key
->offset
;
3792 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
3793 if (key
.objectid
< ref_path
->owner_objectid
||
3794 (key
.objectid
== ref_path
->owner_objectid
&&
3795 key
.type
< BTRFS_EXTENT_DATA_KEY
)) {
3796 key
.objectid
= ref_path
->owner_objectid
;
3797 key
.type
= BTRFS_EXTENT_DATA_KEY
;
3803 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
3807 leaf
= path
->nodes
[0];
3808 nritems
= btrfs_header_nritems(leaf
);
3810 if (extent_locked
&& ret
> 0) {
3812 * the file extent item was modified by someone
3813 * before the extent got locked.
3815 mutex_unlock(&BTRFS_I(inode
)->extent_mutex
);
3816 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
3817 lock_end
, GFP_NOFS
);
3821 if (path
->slots
[0] >= nritems
) {
3822 if (++nr_scaned
> 2)
3825 BUG_ON(extent_locked
);
3826 ret
= btrfs_next_leaf(root
, path
);
3831 leaf
= path
->nodes
[0];
3832 nritems
= btrfs_header_nritems(leaf
);
3835 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
3837 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
3838 if ((key
.objectid
> ref_path
->owner_objectid
) ||
3839 (key
.objectid
== ref_path
->owner_objectid
&&
3840 key
.type
> BTRFS_EXTENT_DATA_KEY
) ||
3841 (key
.offset
>= first_pos
+ extent_key
->offset
))
3845 if (inode
&& key
.objectid
!= inode
->i_ino
) {
3846 BUG_ON(extent_locked
);
3847 btrfs_release_path(root
, path
);
3848 mutex_unlock(&inode
->i_mutex
);
3854 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
3859 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
3860 struct btrfs_file_extent_item
);
3861 if ((btrfs_file_extent_type(leaf
, fi
) !=
3862 BTRFS_FILE_EXTENT_REG
) ||
3863 (btrfs_file_extent_disk_bytenr(leaf
, fi
) !=
3864 extent_key
->objectid
)) {
3870 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
3871 ext_offset
= btrfs_file_extent_offset(leaf
, fi
);
3873 if (first_pos
> key
.offset
- ext_offset
)
3874 first_pos
= key
.offset
- ext_offset
;
3876 if (!extent_locked
) {
3877 lock_start
= key
.offset
;
3878 lock_end
= lock_start
+ num_bytes
- 1;
3880 BUG_ON(lock_start
!= key
.offset
);
3881 BUG_ON(lock_end
- lock_start
+ 1 < num_bytes
);
3885 btrfs_release_path(root
, path
);
3887 inode
= btrfs_iget_locked(root
->fs_info
->sb
,
3888 key
.objectid
, root
);
3889 if (inode
->i_state
& I_NEW
) {
3890 BTRFS_I(inode
)->root
= root
;
3891 BTRFS_I(inode
)->location
.objectid
=
3893 BTRFS_I(inode
)->location
.type
=
3894 BTRFS_INODE_ITEM_KEY
;
3895 BTRFS_I(inode
)->location
.offset
= 0;
3896 btrfs_read_locked_inode(inode
);
3897 unlock_new_inode(inode
);
3900 * some code call btrfs_commit_transaction while
3901 * holding the i_mutex, so we can't use mutex_lock
3904 if (is_bad_inode(inode
) ||
3905 !mutex_trylock(&inode
->i_mutex
)) {
3908 key
.offset
= (u64
)-1;
3913 if (!extent_locked
) {
3914 struct btrfs_ordered_extent
*ordered
;
3916 btrfs_release_path(root
, path
);
3918 lock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
3919 lock_end
, GFP_NOFS
);
3920 ordered
= btrfs_lookup_first_ordered_extent(inode
,
3923 ordered
->file_offset
<= lock_end
&&
3924 ordered
->file_offset
+ ordered
->len
> lock_start
) {
3925 unlock_extent(&BTRFS_I(inode
)->io_tree
,
3926 lock_start
, lock_end
, GFP_NOFS
);
3927 btrfs_start_ordered_extent(inode
, ordered
, 1);
3928 btrfs_put_ordered_extent(ordered
);
3929 key
.offset
+= num_bytes
;
3933 btrfs_put_ordered_extent(ordered
);
3935 mutex_lock(&BTRFS_I(inode
)->extent_mutex
);
3940 if (nr_extents
== 1) {
3941 /* update extent pointer in place */
3942 btrfs_set_file_extent_generation(leaf
, fi
,
3944 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
3945 new_extents
[0].disk_bytenr
);
3946 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
3947 new_extents
[0].disk_num_bytes
);
3948 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
3949 new_extents
[0].ram_bytes
);
3950 ext_offset
+= new_extents
[0].offset
;
3951 btrfs_set_file_extent_offset(leaf
, fi
, ext_offset
);
3952 btrfs_mark_buffer_dirty(leaf
);
3954 btrfs_drop_extent_cache(inode
, key
.offset
,
3955 key
.offset
+ num_bytes
- 1, 0);
3957 ret
= btrfs_inc_extent_ref(trans
, root
,
3958 new_extents
[0].disk_bytenr
,
3959 new_extents
[0].disk_num_bytes
,
3961 root
->root_key
.objectid
,
3966 ret
= btrfs_free_extent(trans
, root
,
3967 extent_key
->objectid
,
3970 btrfs_header_owner(leaf
),
3971 btrfs_header_generation(leaf
),
3975 btrfs_release_path(root
, path
);
3976 key
.offset
+= num_bytes
;
3982 * drop old extent pointer at first, then insert the
3983 * new pointers one bye one
3985 btrfs_release_path(root
, path
);
3986 ret
= btrfs_drop_extents(trans
, root
, inode
, key
.offset
,
3987 key
.offset
+ num_bytes
,
3988 key
.offset
, &alloc_hint
);
3991 for (i
= 0; i
< nr_extents
; i
++) {
3992 if (ext_offset
>= new_extents
[i
].num_bytes
) {
3993 ext_offset
-= new_extents
[i
].num_bytes
;
3996 extent_len
= min(new_extents
[i
].num_bytes
-
3997 ext_offset
, num_bytes
);
3999 ret
= btrfs_insert_empty_item(trans
, root
,
4004 leaf
= path
->nodes
[0];
4005 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4006 struct btrfs_file_extent_item
);
4007 btrfs_set_file_extent_generation(leaf
, fi
,
4009 btrfs_set_file_extent_type(leaf
, fi
,
4010 BTRFS_FILE_EXTENT_REG
);
4011 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4012 new_extents
[i
].disk_bytenr
);
4013 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4014 new_extents
[i
].disk_num_bytes
);
4015 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
4016 new_extents
[i
].ram_bytes
);
4018 btrfs_set_file_extent_compression(leaf
, fi
,
4019 new_extents
[i
].compression
);
4020 btrfs_set_file_extent_encryption(leaf
, fi
,
4021 new_extents
[i
].encryption
);
4022 btrfs_set_file_extent_other_encoding(leaf
, fi
,
4023 new_extents
[i
].other_encoding
);
4025 btrfs_set_file_extent_num_bytes(leaf
, fi
,
4027 ext_offset
+= new_extents
[i
].offset
;
4028 btrfs_set_file_extent_offset(leaf
, fi
,
4030 btrfs_mark_buffer_dirty(leaf
);
4032 btrfs_drop_extent_cache(inode
, key
.offset
,
4033 key
.offset
+ extent_len
- 1, 0);
4035 ret
= btrfs_inc_extent_ref(trans
, root
,
4036 new_extents
[i
].disk_bytenr
,
4037 new_extents
[i
].disk_num_bytes
,
4039 root
->root_key
.objectid
,
4040 trans
->transid
, key
.objectid
);
4042 btrfs_release_path(root
, path
);
4044 inode_add_bytes(inode
, extent_len
);
4047 num_bytes
-= extent_len
;
4048 key
.offset
+= extent_len
;
4053 BUG_ON(i
>= nr_extents
);
4056 if (extent_locked
) {
4057 mutex_unlock(&BTRFS_I(inode
)->extent_mutex
);
4058 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4059 lock_end
, GFP_NOFS
);
4063 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
&&
4064 key
.offset
>= first_pos
+ extent_key
->offset
)
4071 btrfs_release_path(root
, path
);
4073 mutex_unlock(&inode
->i_mutex
);
4074 if (extent_locked
) {
4075 mutex_unlock(&BTRFS_I(inode
)->extent_mutex
);
4076 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4077 lock_end
, GFP_NOFS
);
4084 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle
*trans
,
4085 struct btrfs_root
*root
,
4086 struct extent_buffer
*buf
, u64 orig_start
)
4091 BUG_ON(btrfs_header_generation(buf
) != trans
->transid
);
4092 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
4094 level
= btrfs_header_level(buf
);
4096 struct btrfs_leaf_ref
*ref
;
4097 struct btrfs_leaf_ref
*orig_ref
;
4099 orig_ref
= btrfs_lookup_leaf_ref(root
, orig_start
);
4103 ref
= btrfs_alloc_leaf_ref(root
, orig_ref
->nritems
);
4105 btrfs_free_leaf_ref(root
, orig_ref
);
4109 ref
->nritems
= orig_ref
->nritems
;
4110 memcpy(ref
->extents
, orig_ref
->extents
,
4111 sizeof(ref
->extents
[0]) * ref
->nritems
);
4113 btrfs_free_leaf_ref(root
, orig_ref
);
4115 ref
->root_gen
= trans
->transid
;
4116 ref
->bytenr
= buf
->start
;
4117 ref
->owner
= btrfs_header_owner(buf
);
4118 ref
->generation
= btrfs_header_generation(buf
);
4119 ret
= btrfs_add_leaf_ref(root
, ref
, 0);
4121 btrfs_free_leaf_ref(root
, ref
);
4126 static int noinline
invalidate_extent_cache(struct btrfs_root
*root
,
4127 struct extent_buffer
*leaf
,
4128 struct btrfs_block_group_cache
*group
,
4129 struct btrfs_root
*target_root
)
4131 struct btrfs_key key
;
4132 struct inode
*inode
= NULL
;
4133 struct btrfs_file_extent_item
*fi
;
4135 u64 skip_objectid
= 0;
4139 nritems
= btrfs_header_nritems(leaf
);
4140 for (i
= 0; i
< nritems
; i
++) {
4141 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4142 if (key
.objectid
== skip_objectid
||
4143 key
.type
!= BTRFS_EXTENT_DATA_KEY
)
4145 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4146 if (btrfs_file_extent_type(leaf
, fi
) ==
4147 BTRFS_FILE_EXTENT_INLINE
)
4149 if (btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
4151 if (!inode
|| inode
->i_ino
!= key
.objectid
) {
4153 inode
= btrfs_ilookup(target_root
->fs_info
->sb
,
4154 key
.objectid
, target_root
, 1);
4157 skip_objectid
= key
.objectid
;
4160 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4162 lock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4163 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4164 mutex_lock(&BTRFS_I(inode
)->extent_mutex
);
4165 btrfs_drop_extent_cache(inode
, key
.offset
,
4166 key
.offset
+ num_bytes
- 1, 1);
4167 mutex_unlock(&BTRFS_I(inode
)->extent_mutex
);
4168 unlock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4169 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4176 static int noinline
replace_extents_in_leaf(struct btrfs_trans_handle
*trans
,
4177 struct btrfs_root
*root
,
4178 struct extent_buffer
*leaf
,
4179 struct btrfs_block_group_cache
*group
,
4180 struct inode
*reloc_inode
)
4182 struct btrfs_key key
;
4183 struct btrfs_key extent_key
;
4184 struct btrfs_file_extent_item
*fi
;
4185 struct btrfs_leaf_ref
*ref
;
4186 struct disk_extent
*new_extent
;
4195 new_extent
= kmalloc(sizeof(*new_extent
), GFP_NOFS
);
4196 BUG_ON(!new_extent
);
4198 ref
= btrfs_lookup_leaf_ref(root
, leaf
->start
);
4202 nritems
= btrfs_header_nritems(leaf
);
4203 for (i
= 0; i
< nritems
; i
++) {
4204 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4205 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
4207 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4208 if (btrfs_file_extent_type(leaf
, fi
) ==
4209 BTRFS_FILE_EXTENT_INLINE
)
4211 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
4212 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4217 if (bytenr
>= group
->key
.objectid
+ group
->key
.offset
||
4218 bytenr
+ num_bytes
<= group
->key
.objectid
)
4221 extent_key
.objectid
= bytenr
;
4222 extent_key
.offset
= num_bytes
;
4223 extent_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
4225 ret
= get_new_locations(reloc_inode
, &extent_key
,
4226 group
->key
.objectid
, 1,
4227 &new_extent
, &nr_extent
);
4232 BUG_ON(ref
->extents
[ext_index
].bytenr
!= bytenr
);
4233 BUG_ON(ref
->extents
[ext_index
].num_bytes
!= num_bytes
);
4234 ref
->extents
[ext_index
].bytenr
= new_extent
->disk_bytenr
;
4235 ref
->extents
[ext_index
].num_bytes
= new_extent
->disk_num_bytes
;
4237 btrfs_set_file_extent_generation(leaf
, fi
, trans
->transid
);
4238 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
4239 new_extent
->ram_bytes
);
4240 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4241 new_extent
->disk_bytenr
);
4242 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4243 new_extent
->disk_num_bytes
);
4244 new_extent
->offset
+= btrfs_file_extent_offset(leaf
, fi
);
4245 btrfs_set_file_extent_offset(leaf
, fi
, new_extent
->offset
);
4246 btrfs_mark_buffer_dirty(leaf
);
4248 ret
= btrfs_inc_extent_ref(trans
, root
,
4249 new_extent
->disk_bytenr
,
4250 new_extent
->disk_num_bytes
,
4252 root
->root_key
.objectid
,
4253 trans
->transid
, key
.objectid
);
4255 ret
= btrfs_free_extent(trans
, root
,
4256 bytenr
, num_bytes
, leaf
->start
,
4257 btrfs_header_owner(leaf
),
4258 btrfs_header_generation(leaf
),
4264 BUG_ON(ext_index
+ 1 != ref
->nritems
);
4265 btrfs_free_leaf_ref(root
, ref
);
4269 int btrfs_free_reloc_root(struct btrfs_trans_handle
*trans
,
4270 struct btrfs_root
*root
)
4272 struct btrfs_root
*reloc_root
;
4275 if (root
->reloc_root
) {
4276 reloc_root
= root
->reloc_root
;
4277 root
->reloc_root
= NULL
;
4278 list_add(&reloc_root
->dead_list
,
4279 &root
->fs_info
->dead_reloc_roots
);
4281 btrfs_set_root_bytenr(&reloc_root
->root_item
,
4282 reloc_root
->node
->start
);
4283 btrfs_set_root_level(&root
->root_item
,
4284 btrfs_header_level(reloc_root
->node
));
4285 memset(&reloc_root
->root_item
.drop_progress
, 0,
4286 sizeof(struct btrfs_disk_key
));
4287 reloc_root
->root_item
.drop_level
= 0;
4289 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
4290 &reloc_root
->root_key
,
4291 &reloc_root
->root_item
);
4297 int btrfs_drop_dead_reloc_roots(struct btrfs_root
*root
)
4299 struct btrfs_trans_handle
*trans
;
4300 struct btrfs_root
*reloc_root
;
4301 struct btrfs_root
*prev_root
= NULL
;
4302 struct list_head dead_roots
;
4306 INIT_LIST_HEAD(&dead_roots
);
4307 list_splice_init(&root
->fs_info
->dead_reloc_roots
, &dead_roots
);
4309 while (!list_empty(&dead_roots
)) {
4310 reloc_root
= list_entry(dead_roots
.prev
,
4311 struct btrfs_root
, dead_list
);
4312 list_del_init(&reloc_root
->dead_list
);
4314 BUG_ON(reloc_root
->commit_root
!= NULL
);
4316 trans
= btrfs_join_transaction(root
, 1);
4319 mutex_lock(&root
->fs_info
->drop_mutex
);
4320 ret
= btrfs_drop_snapshot(trans
, reloc_root
);
4323 mutex_unlock(&root
->fs_info
->drop_mutex
);
4325 nr
= trans
->blocks_used
;
4326 ret
= btrfs_end_transaction(trans
, root
);
4328 btrfs_btree_balance_dirty(root
, nr
);
4331 free_extent_buffer(reloc_root
->node
);
4333 ret
= btrfs_del_root(trans
, root
->fs_info
->tree_root
,
4334 &reloc_root
->root_key
);
4336 mutex_unlock(&root
->fs_info
->drop_mutex
);
4338 nr
= trans
->blocks_used
;
4339 ret
= btrfs_end_transaction(trans
, root
);
4341 btrfs_btree_balance_dirty(root
, nr
);
4344 prev_root
= reloc_root
;
4347 btrfs_remove_leaf_refs(prev_root
, (u64
)-1, 0);
4353 int btrfs_add_dead_reloc_root(struct btrfs_root
*root
)
4355 list_add(&root
->dead_list
, &root
->fs_info
->dead_reloc_roots
);
4359 int btrfs_cleanup_reloc_trees(struct btrfs_root
*root
)
4361 struct btrfs_root
*reloc_root
;
4362 struct btrfs_trans_handle
*trans
;
4363 struct btrfs_key location
;
4367 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
4368 ret
= btrfs_find_dead_roots(root
, BTRFS_TREE_RELOC_OBJECTID
, NULL
);
4370 found
= !list_empty(&root
->fs_info
->dead_reloc_roots
);
4371 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
4374 trans
= btrfs_start_transaction(root
, 1);
4376 ret
= btrfs_commit_transaction(trans
, root
);
4380 location
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
4381 location
.offset
= (u64
)-1;
4382 location
.type
= BTRFS_ROOT_ITEM_KEY
;
4384 reloc_root
= btrfs_read_fs_root_no_name(root
->fs_info
, &location
);
4385 BUG_ON(!reloc_root
);
4386 btrfs_orphan_cleanup(reloc_root
);
4390 static int noinline
init_reloc_tree(struct btrfs_trans_handle
*trans
,
4391 struct btrfs_root
*root
)
4393 struct btrfs_root
*reloc_root
;
4394 struct extent_buffer
*eb
;
4395 struct btrfs_root_item
*root_item
;
4396 struct btrfs_key root_key
;
4399 BUG_ON(!root
->ref_cows
);
4400 if (root
->reloc_root
)
4403 root_item
= kmalloc(sizeof(*root_item
), GFP_NOFS
);
4406 ret
= btrfs_copy_root(trans
, root
, root
->commit_root
,
4407 &eb
, BTRFS_TREE_RELOC_OBJECTID
);
4410 root_key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
4411 root_key
.offset
= root
->root_key
.objectid
;
4412 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
4414 memcpy(root_item
, &root
->root_item
, sizeof(root_item
));
4415 btrfs_set_root_refs(root_item
, 0);
4416 btrfs_set_root_bytenr(root_item
, eb
->start
);
4417 btrfs_set_root_level(root_item
, btrfs_header_level(eb
));
4419 btrfs_tree_unlock(eb
);
4420 free_extent_buffer(eb
);
4422 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
4423 &root_key
, root_item
);
4427 reloc_root
= btrfs_read_fs_root_no_radix(root
->fs_info
->tree_root
,
4429 BUG_ON(!reloc_root
);
4430 reloc_root
->last_trans
= trans
->transid
;
4431 reloc_root
->commit_root
= NULL
;
4432 reloc_root
->ref_tree
= &root
->fs_info
->reloc_ref_tree
;
4434 root
->reloc_root
= reloc_root
;
4439 * Core function of space balance.
4441 * The idea is using reloc trees to relocate tree blocks in reference
4442 * counted roots. There is one reloc tree for each subvol, and all
4443 * reloc trees share same root key objectid. Reloc trees are snapshots
4444 * of the latest committed roots of subvols (root->commit_root).
4446 * To relocate a tree block referenced by a subvol, there are two steps.
4447 * COW the block through subvol's reloc tree, then update block pointer
4448 * in the subvol to point to the new block. Since all reloc trees share
4449 * same root key objectid, doing special handing for tree blocks owned
4450 * by them is easy. Once a tree block has been COWed in one reloc tree,
4451 * we can use the resulting new block directly when the same block is
4452 * required to COW again through other reloc trees. By this way, relocated
4453 * tree blocks are shared between reloc trees, so they are also shared
4456 static int noinline
relocate_one_path(struct btrfs_trans_handle
*trans
,
4457 struct btrfs_root
*root
,
4458 struct btrfs_path
*path
,
4459 struct btrfs_key
*first_key
,
4460 struct btrfs_ref_path
*ref_path
,
4461 struct btrfs_block_group_cache
*group
,
4462 struct inode
*reloc_inode
)
4464 struct btrfs_root
*reloc_root
;
4465 struct extent_buffer
*eb
= NULL
;
4466 struct btrfs_key
*keys
;
4470 int lowest_level
= 0;
4473 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
4474 lowest_level
= ref_path
->owner_objectid
;
4476 if (!root
->ref_cows
) {
4477 path
->lowest_level
= lowest_level
;
4478 ret
= btrfs_search_slot(trans
, root
, first_key
, path
, 0, 1);
4480 path
->lowest_level
= 0;
4481 btrfs_release_path(root
, path
);
4485 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
4486 ret
= init_reloc_tree(trans
, root
);
4488 reloc_root
= root
->reloc_root
;
4490 shared_level
= ref_path
->shared_level
;
4491 ref_path
->shared_level
= BTRFS_MAX_LEVEL
- 1;
4493 keys
= ref_path
->node_keys
;
4494 nodes
= ref_path
->new_nodes
;
4495 memset(&keys
[shared_level
+ 1], 0,
4496 sizeof(*keys
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
4497 memset(&nodes
[shared_level
+ 1], 0,
4498 sizeof(*nodes
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
4500 if (nodes
[lowest_level
] == 0) {
4501 path
->lowest_level
= lowest_level
;
4502 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
4505 for (level
= lowest_level
; level
< BTRFS_MAX_LEVEL
; level
++) {
4506 eb
= path
->nodes
[level
];
4507 if (!eb
|| eb
== reloc_root
->node
)
4509 nodes
[level
] = eb
->start
;
4511 btrfs_item_key_to_cpu(eb
, &keys
[level
], 0);
4513 btrfs_node_key_to_cpu(eb
, &keys
[level
], 0);
4515 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
4516 eb
= path
->nodes
[0];
4517 ret
= replace_extents_in_leaf(trans
, reloc_root
, eb
,
4518 group
, reloc_inode
);
4521 btrfs_release_path(reloc_root
, path
);
4523 ret
= btrfs_merge_path(trans
, reloc_root
, keys
, nodes
,
4529 * replace tree blocks in the fs tree with tree blocks in
4532 ret
= btrfs_merge_path(trans
, root
, keys
, nodes
, lowest_level
);
4535 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
4536 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
4539 extent_buffer_get(path
->nodes
[0]);
4540 eb
= path
->nodes
[0];
4541 btrfs_release_path(reloc_root
, path
);
4542 ret
= invalidate_extent_cache(reloc_root
, eb
, group
, root
);
4544 free_extent_buffer(eb
);
4547 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
4548 path
->lowest_level
= 0;
4552 static int noinline
relocate_tree_block(struct btrfs_trans_handle
*trans
,
4553 struct btrfs_root
*root
,
4554 struct btrfs_path
*path
,
4555 struct btrfs_key
*first_key
,
4556 struct btrfs_ref_path
*ref_path
)
4561 if (root
== root
->fs_info
->extent_root
||
4562 root
== root
->fs_info
->chunk_root
||
4563 root
== root
->fs_info
->dev_root
) {
4565 mutex_lock(&root
->fs_info
->alloc_mutex
);
4568 ret
= relocate_one_path(trans
, root
, path
, first_key
,
4569 ref_path
, NULL
, NULL
);
4572 if (root
== root
->fs_info
->extent_root
)
4573 btrfs_extent_post_op(trans
, root
);
4575 mutex_unlock(&root
->fs_info
->alloc_mutex
);
4580 static int noinline
del_extent_zero(struct btrfs_trans_handle
*trans
,
4581 struct btrfs_root
*extent_root
,
4582 struct btrfs_path
*path
,
4583 struct btrfs_key
*extent_key
)
4587 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
4588 ret
= btrfs_search_slot(trans
, extent_root
, extent_key
, path
, -1, 1);
4591 ret
= btrfs_del_item(trans
, extent_root
, path
);
4593 btrfs_release_path(extent_root
, path
);
4594 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
4598 static struct btrfs_root noinline
*read_ref_root(struct btrfs_fs_info
*fs_info
,
4599 struct btrfs_ref_path
*ref_path
)
4601 struct btrfs_key root_key
;
4603 root_key
.objectid
= ref_path
->root_objectid
;
4604 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
4605 if (is_cowonly_root(ref_path
->root_objectid
))
4606 root_key
.offset
= 0;
4608 root_key
.offset
= (u64
)-1;
4610 return btrfs_read_fs_root_no_name(fs_info
, &root_key
);
4613 static int noinline
relocate_one_extent(struct btrfs_root
*extent_root
,
4614 struct btrfs_path
*path
,
4615 struct btrfs_key
*extent_key
,
4616 struct btrfs_block_group_cache
*group
,
4617 struct inode
*reloc_inode
, int pass
)
4619 struct btrfs_trans_handle
*trans
;
4620 struct btrfs_root
*found_root
;
4621 struct btrfs_ref_path
*ref_path
= NULL
;
4622 struct disk_extent
*new_extents
= NULL
;
4627 struct btrfs_key first_key
;
4630 mutex_unlock(&extent_root
->fs_info
->alloc_mutex
);
4632 trans
= btrfs_start_transaction(extent_root
, 1);
4635 if (extent_key
->objectid
== 0) {
4636 ret
= del_extent_zero(trans
, extent_root
, path
, extent_key
);
4640 ref_path
= kmalloc(sizeof(*ref_path
), GFP_NOFS
);
4646 for (loops
= 0; ; loops
++) {
4648 ret
= btrfs_first_ref_path(trans
, extent_root
, ref_path
,
4649 extent_key
->objectid
);
4651 ret
= btrfs_next_ref_path(trans
, extent_root
, ref_path
);
4658 if (ref_path
->root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
4659 ref_path
->root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
4662 found_root
= read_ref_root(extent_root
->fs_info
, ref_path
);
4663 BUG_ON(!found_root
);
4665 * for reference counted tree, only process reference paths
4666 * rooted at the latest committed root.
4668 if (found_root
->ref_cows
&&
4669 ref_path
->root_generation
!= found_root
->root_key
.offset
)
4672 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
4675 * copy data extents to new locations
4677 u64 group_start
= group
->key
.objectid
;
4678 ret
= relocate_data_extent(reloc_inode
,
4687 level
= ref_path
->owner_objectid
;
4690 if (prev_block
!= ref_path
->nodes
[level
]) {
4691 struct extent_buffer
*eb
;
4692 u64 block_start
= ref_path
->nodes
[level
];
4693 u64 block_size
= btrfs_level_size(found_root
, level
);
4695 eb
= read_tree_block(found_root
, block_start
,
4697 btrfs_tree_lock(eb
);
4698 BUG_ON(level
!= btrfs_header_level(eb
));
4701 btrfs_item_key_to_cpu(eb
, &first_key
, 0);
4703 btrfs_node_key_to_cpu(eb
, &first_key
, 0);
4705 btrfs_tree_unlock(eb
);
4706 free_extent_buffer(eb
);
4707 prev_block
= block_start
;
4710 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
4713 * use fallback method to process the remaining
4717 u64 group_start
= group
->key
.objectid
;
4718 ret
= get_new_locations(reloc_inode
,
4726 btrfs_record_root_in_trans(found_root
);
4727 ret
= replace_one_extent(trans
, found_root
,
4729 &first_key
, ref_path
,
4730 new_extents
, nr_extents
);
4736 btrfs_record_root_in_trans(found_root
);
4737 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
4738 ret
= relocate_tree_block(trans
, found_root
, path
,
4739 &first_key
, ref_path
);
4742 * try to update data extent references while
4743 * keeping metadata shared between snapshots.
4745 ret
= relocate_one_path(trans
, found_root
, path
,
4746 &first_key
, ref_path
,
4747 group
, reloc_inode
);
4754 btrfs_end_transaction(trans
, extent_root
);
4757 mutex_lock(&extent_root
->fs_info
->alloc_mutex
);
4761 static u64
update_block_group_flags(struct btrfs_root
*root
, u64 flags
)
4764 u64 stripped
= BTRFS_BLOCK_GROUP_RAID0
|
4765 BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID10
;
4767 num_devices
= root
->fs_info
->fs_devices
->num_devices
;
4768 if (num_devices
== 1) {
4769 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
4770 stripped
= flags
& ~stripped
;
4772 /* turn raid0 into single device chunks */
4773 if (flags
& BTRFS_BLOCK_GROUP_RAID0
)
4776 /* turn mirroring into duplication */
4777 if (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
4778 BTRFS_BLOCK_GROUP_RAID10
))
4779 return stripped
| BTRFS_BLOCK_GROUP_DUP
;
4782 /* they already had raid on here, just return */
4783 if (flags
& stripped
)
4786 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
4787 stripped
= flags
& ~stripped
;
4789 /* switch duplicated blocks with raid1 */
4790 if (flags
& BTRFS_BLOCK_GROUP_DUP
)
4791 return stripped
| BTRFS_BLOCK_GROUP_RAID1
;
4793 /* turn single device chunks into raid0 */
4794 return stripped
| BTRFS_BLOCK_GROUP_RAID0
;
4799 int __alloc_chunk_for_shrink(struct btrfs_root
*root
,
4800 struct btrfs_block_group_cache
*shrink_block_group
,
4803 struct btrfs_trans_handle
*trans
;
4804 u64 new_alloc_flags
;
4807 spin_lock(&shrink_block_group
->lock
);
4808 if (btrfs_block_group_used(&shrink_block_group
->item
) > 0) {
4809 spin_unlock(&shrink_block_group
->lock
);
4810 mutex_unlock(&root
->fs_info
->alloc_mutex
);
4812 trans
= btrfs_start_transaction(root
, 1);
4813 mutex_lock(&root
->fs_info
->alloc_mutex
);
4814 spin_lock(&shrink_block_group
->lock
);
4816 new_alloc_flags
= update_block_group_flags(root
,
4817 shrink_block_group
->flags
);
4818 if (new_alloc_flags
!= shrink_block_group
->flags
) {
4820 btrfs_block_group_used(&shrink_block_group
->item
);
4822 calc
= shrink_block_group
->key
.offset
;
4824 spin_unlock(&shrink_block_group
->lock
);
4826 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
4827 calc
+ 2 * 1024 * 1024, new_alloc_flags
, force
);
4829 mutex_unlock(&root
->fs_info
->alloc_mutex
);
4830 btrfs_end_transaction(trans
, root
);
4831 mutex_lock(&root
->fs_info
->alloc_mutex
);
4833 spin_unlock(&shrink_block_group
->lock
);
4837 static int __insert_orphan_inode(struct btrfs_trans_handle
*trans
,
4838 struct btrfs_root
*root
,
4839 u64 objectid
, u64 size
)
4841 struct btrfs_path
*path
;
4842 struct btrfs_inode_item
*item
;
4843 struct extent_buffer
*leaf
;
4846 path
= btrfs_alloc_path();
4850 ret
= btrfs_insert_empty_inode(trans
, root
, path
, objectid
);
4854 leaf
= path
->nodes
[0];
4855 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_inode_item
);
4856 memset_extent_buffer(leaf
, 0, (unsigned long)item
, sizeof(*item
));
4857 btrfs_set_inode_generation(leaf
, item
, 1);
4858 btrfs_set_inode_size(leaf
, item
, size
);
4859 btrfs_set_inode_mode(leaf
, item
, S_IFREG
| 0600);
4860 btrfs_set_inode_flags(leaf
, item
, BTRFS_INODE_NODATASUM
);
4861 btrfs_mark_buffer_dirty(leaf
);
4862 btrfs_release_path(root
, path
);
4864 btrfs_free_path(path
);
4868 static struct inode noinline
*create_reloc_inode(struct btrfs_fs_info
*fs_info
,
4869 struct btrfs_block_group_cache
*group
)
4871 struct inode
*inode
= NULL
;
4872 struct btrfs_trans_handle
*trans
;
4873 struct btrfs_root
*root
;
4874 struct btrfs_key root_key
;
4875 u64 objectid
= BTRFS_FIRST_FREE_OBJECTID
;
4878 root_key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
4879 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
4880 root_key
.offset
= (u64
)-1;
4881 root
= btrfs_read_fs_root_no_name(fs_info
, &root_key
);
4883 return ERR_CAST(root
);
4885 trans
= btrfs_start_transaction(root
, 1);
4888 err
= btrfs_find_free_objectid(trans
, root
, objectid
, &objectid
);
4892 err
= __insert_orphan_inode(trans
, root
, objectid
, group
->key
.offset
);
4895 err
= btrfs_insert_file_extent(trans
, root
, objectid
, 0, 0, 0,
4896 group
->key
.offset
, 0, group
->key
.offset
,
4900 inode
= btrfs_iget_locked(root
->fs_info
->sb
, objectid
, root
);
4901 if (inode
->i_state
& I_NEW
) {
4902 BTRFS_I(inode
)->root
= root
;
4903 BTRFS_I(inode
)->location
.objectid
= objectid
;
4904 BTRFS_I(inode
)->location
.type
= BTRFS_INODE_ITEM_KEY
;
4905 BTRFS_I(inode
)->location
.offset
= 0;
4906 btrfs_read_locked_inode(inode
);
4907 unlock_new_inode(inode
);
4908 BUG_ON(is_bad_inode(inode
));
4913 err
= btrfs_orphan_add(trans
, inode
);
4915 btrfs_end_transaction(trans
, root
);
4919 inode
= ERR_PTR(err
);
4924 int btrfs_relocate_block_group(struct btrfs_root
*root
, u64 group_start
)
4926 struct btrfs_trans_handle
*trans
;
4927 struct btrfs_path
*path
;
4928 struct btrfs_fs_info
*info
= root
->fs_info
;
4929 struct extent_buffer
*leaf
;
4930 struct inode
*reloc_inode
;
4931 struct btrfs_block_group_cache
*block_group
;
4932 struct btrfs_key key
;
4940 root
= root
->fs_info
->extent_root
;
4942 block_group
= btrfs_lookup_block_group(info
, group_start
);
4943 BUG_ON(!block_group
);
4945 printk("btrfs relocating block group %llu flags %llu\n",
4946 (unsigned long long)block_group
->key
.objectid
,
4947 (unsigned long long)block_group
->flags
);
4949 path
= btrfs_alloc_path();
4952 reloc_inode
= create_reloc_inode(info
, block_group
);
4953 BUG_ON(IS_ERR(reloc_inode
));
4955 mutex_lock(&root
->fs_info
->alloc_mutex
);
4957 __alloc_chunk_for_shrink(root
, block_group
, 1);
4958 block_group
->ro
= 1;
4959 block_group
->space_info
->total_bytes
-= block_group
->key
.offset
;
4961 mutex_unlock(&root
->fs_info
->alloc_mutex
);
4963 btrfs_start_delalloc_inodes(info
->tree_root
);
4964 btrfs_wait_ordered_extents(info
->tree_root
, 0);
4968 key
.objectid
= block_group
->key
.objectid
;
4971 cur_byte
= key
.objectid
;
4973 trans
= btrfs_start_transaction(info
->tree_root
, 1);
4974 btrfs_commit_transaction(trans
, info
->tree_root
);
4976 mutex_lock(&root
->fs_info
->cleaner_mutex
);
4977 btrfs_clean_old_snapshots(info
->tree_root
);
4978 btrfs_remove_leaf_refs(info
->tree_root
, (u64
)-1, 1);
4979 mutex_unlock(&root
->fs_info
->cleaner_mutex
);
4981 mutex_lock(&root
->fs_info
->alloc_mutex
);
4984 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
4988 leaf
= path
->nodes
[0];
4989 nritems
= btrfs_header_nritems(leaf
);
4990 if (path
->slots
[0] >= nritems
) {
4991 ret
= btrfs_next_leaf(root
, path
);
4998 leaf
= path
->nodes
[0];
4999 nritems
= btrfs_header_nritems(leaf
);
5002 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5004 if (key
.objectid
>= block_group
->key
.objectid
+
5005 block_group
->key
.offset
)
5008 if (progress
&& need_resched()) {
5009 btrfs_release_path(root
, path
);
5010 mutex_unlock(&root
->fs_info
->alloc_mutex
);
5012 mutex_lock(&root
->fs_info
->alloc_mutex
);
5018 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
||
5019 key
.objectid
+ key
.offset
<= cur_byte
) {
5025 cur_byte
= key
.objectid
+ key
.offset
;
5026 btrfs_release_path(root
, path
);
5028 __alloc_chunk_for_shrink(root
, block_group
, 0);
5029 ret
= relocate_one_extent(root
, path
, &key
, block_group
,
5033 key
.objectid
= cur_byte
;
5038 btrfs_release_path(root
, path
);
5039 mutex_unlock(&root
->fs_info
->alloc_mutex
);
5042 btrfs_wait_ordered_range(reloc_inode
, 0, (u64
)-1);
5043 invalidate_mapping_pages(reloc_inode
->i_mapping
, 0, -1);
5044 WARN_ON(reloc_inode
->i_mapping
->nrpages
);
5047 if (total_found
> 0) {
5048 printk("btrfs found %llu extents in pass %d\n",
5049 (unsigned long long)total_found
, pass
);
5054 /* delete reloc_inode */
5057 /* unpin extents in this range */
5058 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5059 btrfs_commit_transaction(trans
, info
->tree_root
);
5061 mutex_lock(&root
->fs_info
->alloc_mutex
);
5063 spin_lock(&block_group
->lock
);
5064 WARN_ON(block_group
->pinned
> 0);
5065 WARN_ON(block_group
->reserved
> 0);
5066 WARN_ON(btrfs_block_group_used(&block_group
->item
) > 0);
5067 spin_unlock(&block_group
->lock
);
5070 mutex_unlock(&root
->fs_info
->alloc_mutex
);
5071 btrfs_free_path(path
);
5075 int find_first_block_group(struct btrfs_root
*root
, struct btrfs_path
*path
,
5076 struct btrfs_key
*key
)
5079 struct btrfs_key found_key
;
5080 struct extent_buffer
*leaf
;
5083 ret
= btrfs_search_slot(NULL
, root
, key
, path
, 0, 0);
5088 slot
= path
->slots
[0];
5089 leaf
= path
->nodes
[0];
5090 if (slot
>= btrfs_header_nritems(leaf
)) {
5091 ret
= btrfs_next_leaf(root
, path
);
5098 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
5100 if (found_key
.objectid
>= key
->objectid
&&
5101 found_key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
5112 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
5114 struct btrfs_block_group_cache
*block_group
;
5117 mutex_lock(&info
->alloc_mutex
);
5118 spin_lock(&info
->block_group_cache_lock
);
5119 while ((n
= rb_last(&info
->block_group_cache_tree
)) != NULL
) {
5120 block_group
= rb_entry(n
, struct btrfs_block_group_cache
,
5123 spin_unlock(&info
->block_group_cache_lock
);
5124 btrfs_remove_free_space_cache(block_group
);
5125 spin_lock(&info
->block_group_cache_lock
);
5127 rb_erase(&block_group
->cache_node
,
5128 &info
->block_group_cache_tree
);
5129 down_write(&block_group
->space_info
->groups_sem
);
5130 list_del(&block_group
->list
);
5131 up_write(&block_group
->space_info
->groups_sem
);
5134 spin_unlock(&info
->block_group_cache_lock
);
5135 mutex_unlock(&info
->alloc_mutex
);
5139 int btrfs_read_block_groups(struct btrfs_root
*root
)
5141 struct btrfs_path
*path
;
5143 struct btrfs_block_group_cache
*cache
;
5144 struct btrfs_fs_info
*info
= root
->fs_info
;
5145 struct btrfs_space_info
*space_info
;
5146 struct btrfs_key key
;
5147 struct btrfs_key found_key
;
5148 struct extent_buffer
*leaf
;
5150 root
= info
->extent_root
;
5153 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
5154 path
= btrfs_alloc_path();
5158 mutex_lock(&root
->fs_info
->alloc_mutex
);
5160 ret
= find_first_block_group(root
, path
, &key
);
5168 leaf
= path
->nodes
[0];
5169 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5170 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5176 spin_lock_init(&cache
->lock
);
5177 INIT_LIST_HEAD(&cache
->list
);
5178 read_extent_buffer(leaf
, &cache
->item
,
5179 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
5180 sizeof(cache
->item
));
5181 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
5183 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
5184 btrfs_release_path(root
, path
);
5185 cache
->flags
= btrfs_block_group_flags(&cache
->item
);
5187 ret
= update_space_info(info
, cache
->flags
, found_key
.offset
,
5188 btrfs_block_group_used(&cache
->item
),
5191 cache
->space_info
= space_info
;
5192 down_write(&space_info
->groups_sem
);
5193 list_add_tail(&cache
->list
, &space_info
->block_groups
);
5194 up_write(&space_info
->groups_sem
);
5196 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5199 set_avail_alloc_bits(root
->fs_info
, cache
->flags
);
5203 btrfs_free_path(path
);
5204 mutex_unlock(&root
->fs_info
->alloc_mutex
);
5208 int btrfs_make_block_group(struct btrfs_trans_handle
*trans
,
5209 struct btrfs_root
*root
, u64 bytes_used
,
5210 u64 type
, u64 chunk_objectid
, u64 chunk_offset
,
5214 struct btrfs_root
*extent_root
;
5215 struct btrfs_block_group_cache
*cache
;
5217 WARN_ON(!mutex_is_locked(&root
->fs_info
->alloc_mutex
));
5218 extent_root
= root
->fs_info
->extent_root
;
5220 root
->fs_info
->last_trans_new_blockgroup
= trans
->transid
;
5222 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5226 cache
->key
.objectid
= chunk_offset
;
5227 cache
->key
.offset
= size
;
5228 spin_lock_init(&cache
->lock
);
5229 INIT_LIST_HEAD(&cache
->list
);
5230 btrfs_set_key_type(&cache
->key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
5232 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
5233 btrfs_set_block_group_chunk_objectid(&cache
->item
, chunk_objectid
);
5234 cache
->flags
= type
;
5235 btrfs_set_block_group_flags(&cache
->item
, type
);
5237 ret
= update_space_info(root
->fs_info
, cache
->flags
, size
, bytes_used
,
5238 &cache
->space_info
);
5240 down_write(&cache
->space_info
->groups_sem
);
5241 list_add_tail(&cache
->list
, &cache
->space_info
->block_groups
);
5242 up_write(&cache
->space_info
->groups_sem
);
5244 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5247 ret
= btrfs_insert_item(trans
, extent_root
, &cache
->key
, &cache
->item
,
5248 sizeof(cache
->item
));
5251 finish_current_insert(trans
, extent_root
);
5252 ret
= del_pending_extents(trans
, extent_root
);
5254 set_avail_alloc_bits(extent_root
->fs_info
, type
);
5259 int btrfs_remove_block_group(struct btrfs_trans_handle
*trans
,
5260 struct btrfs_root
*root
, u64 group_start
)
5262 struct btrfs_path
*path
;
5263 struct btrfs_block_group_cache
*block_group
;
5264 struct btrfs_key key
;
5267 BUG_ON(!mutex_is_locked(&root
->fs_info
->alloc_mutex
));
5268 root
= root
->fs_info
->extent_root
;
5270 block_group
= btrfs_lookup_block_group(root
->fs_info
, group_start
);
5271 BUG_ON(!block_group
);
5273 memcpy(&key
, &block_group
->key
, sizeof(key
));
5275 path
= btrfs_alloc_path();
5278 btrfs_remove_free_space_cache(block_group
);
5279 rb_erase(&block_group
->cache_node
,
5280 &root
->fs_info
->block_group_cache_tree
);
5281 down_write(&block_group
->space_info
->groups_sem
);
5282 list_del(&block_group
->list
);
5283 up_write(&block_group
->space_info
->groups_sem
);
5286 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5287 kfree(shrink_block_group);
5290 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
5296 ret
= btrfs_del_item(trans
, root
, path
);
5298 btrfs_free_path(path
);