2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
21 #include "kerncompat.h"
22 #include "radix-tree.h"
25 #include "print-tree.h"
26 #include "transaction.h"
29 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
30 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
31 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
33 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
34 btrfs_root
*extent_root
);
35 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
36 btrfs_root
*extent_root
);
37 static int find_previous_extent(struct btrfs_root
*root
,
38 struct btrfs_path
*path
)
40 struct btrfs_key found_key
;
41 struct extent_buffer
*leaf
;
45 if (path
->slots
[0] == 0) {
46 ret
= btrfs_prev_leaf(root
, path
);
52 leaf
= path
->nodes
[0];
53 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
54 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
)
59 static int cache_block_group(struct btrfs_root
*root
,
60 struct btrfs_block_group_cache
*block_group
)
62 struct btrfs_path
*path
;
65 struct extent_buffer
*leaf
;
66 struct extent_map_tree
*free_space_cache
;
76 root
= root
->fs_info
->extent_root
;
77 free_space_cache
= &root
->fs_info
->free_space_cache
;
79 if (block_group
->cached
)
82 path
= btrfs_alloc_path();
87 first_free
= block_group
->key
.objectid
;
88 key
.objectid
= block_group
->key
.objectid
;
90 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
91 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
94 ret
= find_previous_extent(root
, path
);
98 leaf
= path
->nodes
[0];
99 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
100 if (key
.objectid
+ key
.offset
> first_free
)
101 first_free
= key
.objectid
+ key
.offset
;
104 leaf
= path
->nodes
[0];
105 slot
= path
->slots
[0];
106 if (slot
>= btrfs_header_nritems(leaf
)) {
107 ret
= btrfs_next_leaf(root
, path
);
116 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
117 if (key
.objectid
< block_group
->key
.objectid
) {
120 if (key
.objectid
>= block_group
->key
.objectid
+
121 block_group
->key
.offset
) {
125 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
130 if (key
.objectid
> last
) {
131 hole_size
= key
.objectid
- last
;
132 set_extent_dirty(free_space_cache
, last
,
133 last
+ hole_size
- 1,
136 last
= key
.objectid
+ key
.offset
;
144 if (block_group
->key
.objectid
+
145 block_group
->key
.offset
> last
) {
146 hole_size
= block_group
->key
.objectid
+
147 block_group
->key
.offset
- last
;
148 set_extent_dirty(free_space_cache
, last
,
149 last
+ hole_size
- 1, GFP_NOFS
);
151 block_group
->cached
= 1;
153 btrfs_free_path(path
);
157 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
161 struct extent_map_tree
*block_group_cache
;
162 struct btrfs_block_group_cache
*block_group
= NULL
;
168 block_group_cache
= &info
->block_group_cache
;
169 ret
= find_first_extent_bit(block_group_cache
,
170 bytenr
, &start
, &end
,
171 BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
);
175 ret
= get_state_private(block_group_cache
, start
, &ptr
);
179 block_group
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
180 if (block_group
->key
.objectid
<= bytenr
&& bytenr
<
181 block_group
->key
.objectid
+ block_group
->key
.offset
)
185 static u64 noinline
find_search_start(struct btrfs_root
*root
,
186 struct btrfs_block_group_cache
**cache_ret
,
187 u64 search_start
, int num
, int data
)
190 struct btrfs_block_group_cache
*cache
= *cache_ret
;
201 ret
= cache_block_group(root
, cache
);
205 last
= max(search_start
, cache
->key
.objectid
);
208 ret
= find_first_extent_bit(&root
->fs_info
->free_space_cache
,
209 last
, &start
, &end
, EXTENT_DIRTY
);
216 start
= max(last
, start
);
218 if (last
- start
< num
) {
219 if (last
== cache
->key
.objectid
+ cache
->key
.offset
)
223 if (data
!= BTRFS_BLOCK_GROUP_MIXED
&&
224 start
+ num
> cache
->key
.objectid
+ cache
->key
.offset
)
229 cache
= btrfs_lookup_block_group(root
->fs_info
, search_start
);
231 printk("Unable to find block group for %Lu\n",
239 last
= cache
->key
.objectid
+ cache
->key
.offset
;
241 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
247 data
= BTRFS_BLOCK_GROUP_MIXED
;
252 if (cache_miss
&& !cache
->cached
) {
253 cache_block_group(root
, cache
);
255 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
257 cache
= btrfs_find_block_group(root
, cache
, last
, data
, 0);
265 static u64
div_factor(u64 num
, int factor
)
274 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
275 struct btrfs_block_group_cache
276 *hint
, u64 search_start
,
279 struct btrfs_block_group_cache
*cache
;
280 struct extent_map_tree
*block_group_cache
;
281 struct btrfs_block_group_cache
*found_group
= NULL
;
282 struct btrfs_fs_info
*info
= root
->fs_info
;
296 block_group_cache
= &info
->block_group_cache
;
301 if (data
== BTRFS_BLOCK_GROUP_MIXED
) {
302 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
305 bit
= BLOCK_GROUP_DATA
;
307 bit
= BLOCK_GROUP_METADATA
;
310 struct btrfs_block_group_cache
*shint
;
311 shint
= btrfs_lookup_block_group(info
, search_start
);
312 if (shint
&& (shint
->data
== data
||
313 shint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
314 used
= btrfs_block_group_used(&shint
->item
);
315 if (used
+ shint
->pinned
<
316 div_factor(shint
->key
.offset
, factor
)) {
321 if (hint
&& (hint
->data
== data
||
322 hint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
323 used
= btrfs_block_group_used(&hint
->item
);
324 if (used
+ hint
->pinned
<
325 div_factor(hint
->key
.offset
, factor
)) {
328 last
= hint
->key
.objectid
+ hint
->key
.offset
;
332 hint_last
= max(hint
->key
.objectid
, search_start
);
334 hint_last
= search_start
;
340 ret
= find_first_extent_bit(block_group_cache
, last
,
345 ret
= get_state_private(block_group_cache
, start
, &ptr
);
349 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
350 last
= cache
->key
.objectid
+ cache
->key
.offset
;
351 used
= btrfs_block_group_used(&cache
->item
);
354 free_check
= cache
->key
.offset
;
356 free_check
= div_factor(cache
->key
.offset
, factor
);
357 if (used
+ cache
->pinned
< free_check
) {
370 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
378 static u64
hash_extent_ref(u64 root_objectid
, u64 ref_generation
,
379 u64 owner
, u64 owner_offset
)
381 u32 high_crc
= ~(u32
)0;
382 u32 low_crc
= ~(u32
)0;
385 lenum
= cpu_to_le64(root_objectid
);
386 high_crc
= crc32c(high_crc
, &lenum
, sizeof(lenum
));
387 lenum
= cpu_to_le64(ref_generation
);
388 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
389 if (owner
>= BTRFS_FIRST_FREE_OBJECTID
) {
390 lenum
= cpu_to_le64(owner
);
391 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
392 lenum
= cpu_to_le64(owner_offset
);
393 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
395 return ((u64
)high_crc
<< 32) | (u64
)low_crc
;
398 static int match_extent_ref(struct extent_buffer
*leaf
,
399 struct btrfs_extent_ref
*disk_ref
,
400 struct btrfs_extent_ref
*cpu_ref
)
405 if (cpu_ref
->objectid
)
406 len
= sizeof(*cpu_ref
);
408 len
= 2 * sizeof(u64
);
409 ret
= memcmp_extent_buffer(leaf
, cpu_ref
, (unsigned long)disk_ref
,
414 static int noinline
lookup_extent_backref(struct btrfs_trans_handle
*trans
,
415 struct btrfs_root
*root
,
416 struct btrfs_path
*path
, u64 bytenr
,
418 u64 ref_generation
, u64 owner
,
419 u64 owner_offset
, int del
)
422 struct btrfs_key key
;
423 struct btrfs_key found_key
;
424 struct btrfs_extent_ref ref
;
425 struct extent_buffer
*leaf
;
426 struct btrfs_extent_ref
*disk_ref
;
430 btrfs_set_stack_ref_root(&ref
, root_objectid
);
431 btrfs_set_stack_ref_generation(&ref
, ref_generation
);
432 btrfs_set_stack_ref_objectid(&ref
, owner
);
433 btrfs_set_stack_ref_offset(&ref
, owner_offset
);
435 hash
= hash_extent_ref(root_objectid
, ref_generation
, owner
,
438 key
.objectid
= bytenr
;
439 key
.type
= BTRFS_EXTENT_REF_KEY
;
442 ret
= btrfs_search_slot(trans
, root
, &key
, path
,
446 leaf
= path
->nodes
[0];
448 u32 nritems
= btrfs_header_nritems(leaf
);
449 if (path
->slots
[0] >= nritems
) {
450 ret2
= btrfs_next_leaf(root
, path
);
453 leaf
= path
->nodes
[0];
455 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
456 if (found_key
.objectid
!= bytenr
||
457 found_key
.type
!= BTRFS_EXTENT_REF_KEY
)
459 key
.offset
= found_key
.offset
;
461 btrfs_release_path(root
, path
);
465 disk_ref
= btrfs_item_ptr(path
->nodes
[0],
467 struct btrfs_extent_ref
);
468 if (match_extent_ref(path
->nodes
[0], disk_ref
, &ref
)) {
472 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
473 key
.offset
= found_key
.offset
+ 1;
474 btrfs_release_path(root
, path
);
481 * Back reference rules. Back refs have three main goals:
483 * 1) differentiate between all holders of references to an extent so that
484 * when a reference is dropped we can make sure it was a valid reference
485 * before freeing the extent.
487 * 2) Provide enough information to quickly find the holders of an extent
488 * if we notice a given block is corrupted or bad.
490 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
491 * maintenance. This is actually the same as #2, but with a slightly
492 * different use case.
494 * File extents can be referenced by:
496 * - multiple snapshots, subvolumes, or different generations in one subvol
497 * - different files inside a single subvolume (in theory, not implemented yet)
498 * - different offsets inside a file (bookend extents in file.c)
500 * The extent ref structure has fields for:
502 * - Objectid of the subvolume root
503 * - Generation number of the tree holding the reference
504 * - objectid of the file holding the reference
505 * - offset in the file corresponding to the key holding the reference
507 * When a file extent is allocated the fields are filled in:
508 * (root_key.objectid, trans->transid, inode objectid, offset in file)
510 * When a leaf is cow'd new references are added for every file extent found
511 * in the leaf. It looks the same as the create case, but trans->transid
512 * will be different when the block is cow'd.
514 * (root_key.objectid, trans->transid, inode objectid, offset in file)
516 * When a file extent is removed either during snapshot deletion or file
517 * truncation, the corresponding back reference is found
520 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
521 * inode objectid, offset in file)
523 * Btree extents can be referenced by:
525 * - Different subvolumes
526 * - Different generations of the same subvolume
528 * Storing sufficient information for a full reverse mapping of a btree
529 * block would require storing the lowest key of the block in the backref,
530 * and it would require updating that lowest key either before write out or
531 * every time it changed. Instead, the objectid of the lowest key is stored
532 * along with the level of the tree block. This provides a hint
533 * about where in the btree the block can be found. Searches through the
534 * btree only need to look for a pointer to that block, so they stop one
535 * level higher than the level recorded in the backref.
537 * Some btrees do not do reference counting on their extents. These
538 * include the extent tree and the tree of tree roots. Backrefs for these
539 * trees always have a generation of zero.
541 * When a tree block is created, back references are inserted:
543 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
545 * When a tree block is cow'd in a reference counted root,
546 * new back references are added for all the blocks it points to.
547 * These are of the form (trans->transid will have increased since creation):
549 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
551 * Because the lowest_key_objectid and the level are just hints
552 * they are not used when backrefs are deleted. When a backref is deleted:
554 * if backref was for a tree root:
555 * root_objectid = root->root_key.objectid
557 * root_objectid = btrfs_header_owner(parent)
559 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
561 * Back Reference Key hashing:
563 * Back references have four fields, each 64 bits long. Unfortunately,
564 * This is hashed into a single 64 bit number and placed into the key offset.
565 * The key objectid corresponds to the first byte in the extent, and the
566 * key type is set to BTRFS_EXTENT_REF_KEY
568 int btrfs_insert_extent_backref(struct btrfs_trans_handle
*trans
,
569 struct btrfs_root
*root
,
570 struct btrfs_path
*path
, u64 bytenr
,
571 u64 root_objectid
, u64 ref_generation
,
572 u64 owner
, u64 owner_offset
)
575 struct btrfs_key key
;
576 struct btrfs_extent_ref ref
;
577 struct btrfs_extent_ref
*disk_ref
;
580 btrfs_set_stack_ref_root(&ref
, root_objectid
);
581 btrfs_set_stack_ref_generation(&ref
, ref_generation
);
582 btrfs_set_stack_ref_objectid(&ref
, owner
);
583 btrfs_set_stack_ref_offset(&ref
, owner_offset
);
585 hash
= hash_extent_ref(root_objectid
, ref_generation
, owner
,
588 key
.objectid
= bytenr
;
589 key
.type
= BTRFS_EXTENT_REF_KEY
;
591 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(ref
));
592 while (ret
== -EEXIST
) {
593 disk_ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
594 struct btrfs_extent_ref
);
595 if (match_extent_ref(path
->nodes
[0], disk_ref
, &ref
))
598 btrfs_release_path(root
, path
);
599 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
604 disk_ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
605 struct btrfs_extent_ref
);
606 write_extent_buffer(path
->nodes
[0], &ref
, (unsigned long)disk_ref
,
608 btrfs_mark_buffer_dirty(path
->nodes
[0]);
610 btrfs_release_path(root
, path
);
614 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
615 struct btrfs_root
*root
,
616 u64 bytenr
, u64 num_bytes
,
617 u64 root_objectid
, u64 ref_generation
,
618 u64 owner
, u64 owner_offset
)
620 struct btrfs_path
*path
;
622 struct btrfs_key key
;
623 struct extent_buffer
*l
;
624 struct btrfs_extent_item
*item
;
627 WARN_ON(num_bytes
< root
->sectorsize
);
628 path
= btrfs_alloc_path();
632 key
.objectid
= bytenr
;
633 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
634 key
.offset
= num_bytes
;
635 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
644 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
645 refs
= btrfs_extent_refs(l
, item
);
646 btrfs_set_extent_refs(l
, item
, refs
+ 1);
647 btrfs_mark_buffer_dirty(path
->nodes
[0]);
649 btrfs_release_path(root
->fs_info
->extent_root
, path
);
651 ret
= btrfs_insert_extent_backref(trans
, root
->fs_info
->extent_root
,
652 path
, bytenr
, root_objectid
,
653 ref_generation
, owner
, owner_offset
);
655 finish_current_insert(trans
, root
->fs_info
->extent_root
);
656 del_pending_extents(trans
, root
->fs_info
->extent_root
);
658 btrfs_free_path(path
);
662 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
663 struct btrfs_root
*root
)
665 finish_current_insert(trans
, root
->fs_info
->extent_root
);
666 del_pending_extents(trans
, root
->fs_info
->extent_root
);
670 static int lookup_extent_ref(struct btrfs_trans_handle
*trans
,
671 struct btrfs_root
*root
, u64 bytenr
,
672 u64 num_bytes
, u32
*refs
)
674 struct btrfs_path
*path
;
676 struct btrfs_key key
;
677 struct extent_buffer
*l
;
678 struct btrfs_extent_item
*item
;
680 WARN_ON(num_bytes
< root
->sectorsize
);
681 path
= btrfs_alloc_path();
682 key
.objectid
= bytenr
;
683 key
.offset
= num_bytes
;
684 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
685 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
690 btrfs_print_leaf(root
, path
->nodes
[0]);
691 printk("failed to find block number %Lu\n", bytenr
);
695 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
696 *refs
= btrfs_extent_refs(l
, item
);
698 btrfs_free_path(path
);
702 u32
btrfs_count_snapshots_in_path(struct btrfs_root
*root
,
703 struct btrfs_path
*count_path
,
706 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
707 struct btrfs_path
*path
;
710 u64 root_objectid
= root
->root_key
.objectid
;
716 struct btrfs_key key
;
717 struct btrfs_key found_key
;
718 struct extent_buffer
*l
;
719 struct btrfs_extent_item
*item
;
720 struct btrfs_extent_ref
*ref_item
;
723 path
= btrfs_alloc_path();
726 bytenr
= first_extent
;
728 bytenr
= count_path
->nodes
[level
]->start
;
731 key
.objectid
= bytenr
;
734 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
735 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
741 btrfs_item_key_to_cpu(l
, &found_key
, path
->slots
[0]);
743 if (found_key
.objectid
!= bytenr
||
744 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
748 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
749 refs
= btrfs_extent_refs(l
, item
);
751 nritems
= btrfs_header_nritems(l
);
752 if (path
->slots
[0] >= nritems
) {
753 ret
= btrfs_next_leaf(extent_root
, path
);
758 btrfs_item_key_to_cpu(l
, &found_key
, path
->slots
[0]);
759 if (found_key
.objectid
!= bytenr
)
761 if (found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
767 ref_item
= btrfs_item_ptr(l
, path
->slots
[0],
768 struct btrfs_extent_ref
);
769 found_objectid
= btrfs_ref_root(l
, ref_item
);
771 if (found_objectid
!= root_objectid
) {
778 if (cur_count
== 0) {
782 if (level
>= 0 && root
->node
== count_path
->nodes
[level
])
785 btrfs_release_path(root
, path
);
789 btrfs_free_path(path
);
792 int btrfs_inc_root_ref(struct btrfs_trans_handle
*trans
,
793 struct btrfs_root
*root
, u64 owner_objectid
)
799 struct btrfs_disk_key disk_key
;
801 level
= btrfs_header_level(root
->node
);
802 generation
= trans
->transid
;
803 nritems
= btrfs_header_nritems(root
->node
);
806 btrfs_item_key(root
->node
, &disk_key
, 0);
808 btrfs_node_key(root
->node
, &disk_key
, 0);
809 key_objectid
= btrfs_disk_key_objectid(&disk_key
);
813 return btrfs_inc_extent_ref(trans
, root
, root
->node
->start
,
814 root
->node
->len
, owner_objectid
,
815 generation
, level
, key_objectid
);
818 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
819 struct extent_buffer
*buf
)
823 struct btrfs_key key
;
824 struct btrfs_file_extent_item
*fi
;
833 level
= btrfs_header_level(buf
);
834 nritems
= btrfs_header_nritems(buf
);
835 for (i
= 0; i
< nritems
; i
++) {
838 btrfs_item_key_to_cpu(buf
, &key
, i
);
839 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
841 fi
= btrfs_item_ptr(buf
, i
,
842 struct btrfs_file_extent_item
);
843 if (btrfs_file_extent_type(buf
, fi
) ==
844 BTRFS_FILE_EXTENT_INLINE
)
846 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
847 if (disk_bytenr
== 0)
849 ret
= btrfs_inc_extent_ref(trans
, root
, disk_bytenr
,
850 btrfs_file_extent_disk_num_bytes(buf
, fi
),
851 root
->root_key
.objectid
, trans
->transid
,
852 key
.objectid
, key
.offset
);
858 bytenr
= btrfs_node_blockptr(buf
, i
);
859 btrfs_node_key_to_cpu(buf
, &key
, i
);
860 ret
= btrfs_inc_extent_ref(trans
, root
, bytenr
,
861 btrfs_level_size(root
, level
- 1),
862 root
->root_key
.objectid
,
864 level
- 1, key
.objectid
);
875 for (i
=0; i
< faili
; i
++) {
878 btrfs_item_key_to_cpu(buf
, &key
, i
);
879 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
881 fi
= btrfs_item_ptr(buf
, i
,
882 struct btrfs_file_extent_item
);
883 if (btrfs_file_extent_type(buf
, fi
) ==
884 BTRFS_FILE_EXTENT_INLINE
)
886 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
887 if (disk_bytenr
== 0)
889 err
= btrfs_free_extent(trans
, root
, disk_bytenr
,
890 btrfs_file_extent_disk_num_bytes(buf
,
894 bytenr
= btrfs_node_blockptr(buf
, i
);
895 err
= btrfs_free_extent(trans
, root
, bytenr
,
896 btrfs_level_size(root
, level
- 1), 0);
904 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
905 struct btrfs_root
*root
,
906 struct btrfs_path
*path
,
907 struct btrfs_block_group_cache
*cache
)
911 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
913 struct extent_buffer
*leaf
;
915 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
920 leaf
= path
->nodes
[0];
921 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
922 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
923 btrfs_mark_buffer_dirty(leaf
);
924 btrfs_release_path(extent_root
, path
);
926 finish_current_insert(trans
, extent_root
);
927 pending_ret
= del_pending_extents(trans
, extent_root
);
936 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
937 struct btrfs_root
*root
)
939 struct extent_map_tree
*block_group_cache
;
940 struct btrfs_block_group_cache
*cache
;
944 struct btrfs_path
*path
;
950 block_group_cache
= &root
->fs_info
->block_group_cache
;
951 path
= btrfs_alloc_path();
956 ret
= find_first_extent_bit(block_group_cache
, last
,
957 &start
, &end
, BLOCK_GROUP_DIRTY
);
962 ret
= get_state_private(block_group_cache
, start
, &ptr
);
966 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
967 err
= write_one_cache_group(trans
, root
,
970 * if we fail to write the cache group, we want
971 * to keep it marked dirty in hopes that a later
978 clear_extent_bits(block_group_cache
, start
, end
,
979 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
981 btrfs_free_path(path
);
985 static int update_block_group(struct btrfs_trans_handle
*trans
,
986 struct btrfs_root
*root
,
987 u64 bytenr
, u64 num_bytes
, int alloc
,
988 int mark_free
, int data
)
990 struct btrfs_block_group_cache
*cache
;
991 struct btrfs_fs_info
*info
= root
->fs_info
;
992 u64 total
= num_bytes
;
999 cache
= btrfs_lookup_block_group(info
, bytenr
);
1003 byte_in_group
= bytenr
- cache
->key
.objectid
;
1004 WARN_ON(byte_in_group
> cache
->key
.offset
);
1005 start
= cache
->key
.objectid
;
1006 end
= start
+ cache
->key
.offset
- 1;
1007 set_extent_bits(&info
->block_group_cache
, start
, end
,
1008 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
1010 old_val
= btrfs_block_group_used(&cache
->item
);
1011 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
1013 if (cache
->data
!= data
&&
1014 old_val
< (cache
->key
.offset
>> 1)) {
1019 bit_to_clear
= BLOCK_GROUP_METADATA
;
1020 bit_to_set
= BLOCK_GROUP_DATA
;
1021 cache
->item
.flags
&=
1022 ~BTRFS_BLOCK_GROUP_MIXED
;
1023 cache
->item
.flags
|=
1024 BTRFS_BLOCK_GROUP_DATA
;
1026 bit_to_clear
= BLOCK_GROUP_DATA
;
1027 bit_to_set
= BLOCK_GROUP_METADATA
;
1028 cache
->item
.flags
&=
1029 ~BTRFS_BLOCK_GROUP_MIXED
;
1030 cache
->item
.flags
&=
1031 ~BTRFS_BLOCK_GROUP_DATA
;
1033 clear_extent_bits(&info
->block_group_cache
,
1034 start
, end
, bit_to_clear
,
1036 set_extent_bits(&info
->block_group_cache
,
1037 start
, end
, bit_to_set
,
1039 } else if (cache
->data
!= data
&&
1040 cache
->data
!= BTRFS_BLOCK_GROUP_MIXED
) {
1041 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
1042 set_extent_bits(&info
->block_group_cache
,
1045 BLOCK_GROUP_METADATA
,
1048 old_val
+= num_bytes
;
1050 old_val
-= num_bytes
;
1052 set_extent_dirty(&info
->free_space_cache
,
1053 bytenr
, bytenr
+ num_bytes
- 1,
1057 btrfs_set_block_group_used(&cache
->item
, old_val
);
1059 bytenr
+= num_bytes
;
1063 static int update_pinned_extents(struct btrfs_root
*root
,
1064 u64 bytenr
, u64 num
, int pin
)
1067 struct btrfs_block_group_cache
*cache
;
1068 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1071 set_extent_dirty(&fs_info
->pinned_extents
,
1072 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
1074 clear_extent_dirty(&fs_info
->pinned_extents
,
1075 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
1078 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
1080 len
= min(num
, cache
->key
.offset
-
1081 (bytenr
- cache
->key
.objectid
));
1083 cache
->pinned
+= len
;
1084 fs_info
->total_pinned
+= len
;
1086 cache
->pinned
-= len
;
1087 fs_info
->total_pinned
-= len
;
1095 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_map_tree
*copy
)
1100 struct extent_map_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
1104 ret
= find_first_extent_bit(pinned_extents
, last
,
1105 &start
, &end
, EXTENT_DIRTY
);
1108 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
1114 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
1115 struct btrfs_root
*root
,
1116 struct extent_map_tree
*unpin
)
1121 struct extent_map_tree
*free_space_cache
;
1122 free_space_cache
= &root
->fs_info
->free_space_cache
;
1125 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
1129 update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
1130 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
1131 set_extent_dirty(free_space_cache
, start
, end
, GFP_NOFS
);
1136 static int finish_current_insert(struct btrfs_trans_handle
*trans
,
1137 struct btrfs_root
*extent_root
)
1141 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
1142 struct extent_buffer
*eb
;
1143 struct btrfs_path
*path
;
1144 struct btrfs_key ins
;
1145 struct btrfs_disk_key first
;
1146 struct btrfs_extent_item extent_item
;
1151 btrfs_set_stack_extent_refs(&extent_item
, 1);
1152 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
1153 path
= btrfs_alloc_path();
1156 ret
= find_first_extent_bit(&info
->extent_ins
, 0, &start
,
1157 &end
, EXTENT_LOCKED
);
1161 ins
.objectid
= start
;
1162 ins
.offset
= end
+ 1 - start
;
1163 err
= btrfs_insert_item(trans
, extent_root
, &ins
,
1164 &extent_item
, sizeof(extent_item
));
1165 clear_extent_bits(&info
->extent_ins
, start
, end
, EXTENT_LOCKED
,
1167 eb
= read_tree_block(extent_root
, ins
.objectid
, ins
.offset
);
1168 level
= btrfs_header_level(eb
);
1170 btrfs_item_key(eb
, &first
, 0);
1172 btrfs_node_key(eb
, &first
, 0);
1174 err
= btrfs_insert_extent_backref(trans
, extent_root
, path
,
1175 start
, extent_root
->root_key
.objectid
,
1177 btrfs_disk_key_objectid(&first
));
1179 free_extent_buffer(eb
);
1181 btrfs_free_path(path
);
1185 static int pin_down_bytes(struct btrfs_root
*root
, u64 bytenr
, u32 num_bytes
,
1189 struct extent_buffer
*buf
;
1192 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
1194 if (btrfs_buffer_uptodate(buf
)) {
1196 root
->fs_info
->running_transaction
->transid
;
1197 if (btrfs_header_generation(buf
) == transid
) {
1198 free_extent_buffer(buf
);
1202 free_extent_buffer(buf
);
1204 update_pinned_extents(root
, bytenr
, num_bytes
, 1);
1206 set_extent_bits(&root
->fs_info
->pending_del
,
1207 bytenr
, bytenr
+ num_bytes
- 1,
1208 EXTENT_LOCKED
, GFP_NOFS
);
1215 * remove an extent from the root, returns 0 on success
1217 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
1218 *root
, u64 bytenr
, u64 num_bytes
,
1219 u64 root_objectid
, u64 ref_generation
,
1220 u64 owner_objectid
, u64 owner_offset
, int pin
,
1223 struct btrfs_path
*path
;
1224 struct btrfs_key key
;
1225 struct btrfs_fs_info
*info
= root
->fs_info
;
1226 struct btrfs_extent_ops
*ops
= info
->extent_ops
;
1227 struct btrfs_root
*extent_root
= info
->extent_root
;
1228 struct extent_buffer
*leaf
;
1230 struct btrfs_extent_item
*ei
;
1233 key
.objectid
= bytenr
;
1234 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
1235 key
.offset
= num_bytes
;
1237 path
= btrfs_alloc_path();
1241 ret
= lookup_extent_backref(trans
, extent_root
, path
,
1242 bytenr
, root_objectid
,
1244 owner_objectid
, owner_offset
, 1);
1246 ret
= btrfs_del_item(trans
, extent_root
, path
);
1248 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
1250 printk("Unable to find ref byte nr %Lu root %Lu "
1251 " gen %Lu owner %Lu offset %Lu\n", bytenr
,
1252 root_objectid
, ref_generation
, owner_objectid
,
1255 btrfs_release_path(extent_root
, path
);
1256 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
1261 leaf
= path
->nodes
[0];
1262 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
1263 struct btrfs_extent_item
);
1264 refs
= btrfs_extent_refs(leaf
, ei
);
1267 btrfs_set_extent_refs(leaf
, ei
, refs
);
1268 btrfs_mark_buffer_dirty(leaf
);
1275 ret
= pin_down_bytes(root
, bytenr
, num_bytes
, 0);
1281 /* block accounting for super block */
1282 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1283 btrfs_set_super_bytes_used(&info
->super_copy
,
1284 super_used
- num_bytes
);
1286 /* block accounting for root item */
1287 root_used
= btrfs_root_used(&root
->root_item
);
1288 btrfs_set_root_used(&root
->root_item
,
1289 root_used
- num_bytes
);
1290 ret
= btrfs_del_item(trans
, extent_root
, path
);
1294 if (ops
&& ops
->free_extent
)
1295 ops
->free_extent(root
, bytenr
, num_bytes
);
1297 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
1301 btrfs_free_path(path
);
1302 finish_current_insert(trans
, extent_root
);
1307 * find all the blocks marked as pending in the radix tree and remove
1308 * them from the extent map
1310 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
1311 btrfs_root
*extent_root
)
1317 struct extent_map_tree
*pending_del
;
1318 struct extent_map_tree
*pinned_extents
;
1320 pending_del
= &extent_root
->fs_info
->pending_del
;
1321 pinned_extents
= &extent_root
->fs_info
->pinned_extents
;
1324 ret
= find_first_extent_bit(pending_del
, 0, &start
, &end
,
1328 update_pinned_extents(extent_root
, start
, end
+ 1 - start
, 1);
1329 clear_extent_bits(pending_del
, start
, end
, EXTENT_LOCKED
,
1331 ret
= __free_extent(trans
, extent_root
,
1332 start
, end
+ 1 - start
,
1333 extent_root
->root_key
.objectid
,
1342 * remove an extent from the root, returns 0 on success
1344 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
1345 *root
, u64 bytenr
, u64 num_bytes
,
1346 u64 root_objectid
, u64 ref_generation
,
1347 u64 owner_objectid
, u64 owner_offset
, int pin
)
1349 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1353 WARN_ON(num_bytes
< root
->sectorsize
);
1354 if (!root
->ref_cows
)
1357 if (root
== extent_root
) {
1358 pin_down_bytes(root
, bytenr
, num_bytes
, 1);
1361 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, root_objectid
,
1362 ref_generation
, owner_objectid
, owner_offset
,
1364 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
);
1365 return ret
? ret
: pending_ret
;
1368 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
1370 u64 mask
= ((u64
)root
->stripesize
- 1);
1371 u64 ret
= (val
+ mask
) & ~mask
;
1376 * walks the btree of allocated extents and find a hole of a given size.
1377 * The key ins is changed to record the hole:
1378 * ins->objectid == block start
1379 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1380 * ins->offset == number of blocks
1381 * Any available blocks before search_start are skipped.
1383 static int noinline
find_free_extent(struct btrfs_trans_handle
*trans
,
1384 struct btrfs_root
*orig_root
,
1385 u64 num_bytes
, u64 empty_size
,
1386 u64 search_start
, u64 search_end
,
1387 u64 hint_byte
, struct btrfs_key
*ins
,
1388 u64 exclude_start
, u64 exclude_nr
,
1391 struct btrfs_path
*path
;
1392 struct btrfs_key key
;
1398 u64 orig_search_start
= search_start
;
1400 struct extent_buffer
*l
;
1401 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
1402 struct btrfs_fs_info
*info
= root
->fs_info
;
1403 u64 total_needed
= num_bytes
;
1405 struct btrfs_block_group_cache
*block_group
;
1410 WARN_ON(num_bytes
< root
->sectorsize
);
1411 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
1413 level
= btrfs_header_level(root
->node
);
1415 if (num_bytes
>= 32 * 1024 * 1024 && hint_byte
) {
1416 data
= BTRFS_BLOCK_GROUP_MIXED
;
1419 if (search_end
== (u64
)-1)
1420 search_end
= btrfs_super_total_bytes(&info
->super_copy
);
1422 block_group
= btrfs_lookup_block_group(info
, hint_byte
);
1424 hint_byte
= search_start
;
1425 block_group
= btrfs_find_block_group(root
, block_group
,
1426 hint_byte
, data
, 1);
1428 block_group
= btrfs_find_block_group(root
,
1430 search_start
, data
, 1);
1433 total_needed
+= empty_size
;
1434 path
= btrfs_alloc_path();
1437 block_group
= btrfs_lookup_block_group(info
, search_start
);
1439 block_group
= btrfs_lookup_block_group(info
,
1442 search_start
= find_search_start(root
, &block_group
, search_start
,
1443 total_needed
, data
);
1444 search_start
= stripe_align(root
, search_start
);
1445 cached_start
= search_start
;
1446 btrfs_init_path(path
);
1447 ins
->objectid
= search_start
;
1452 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1455 ret
= find_previous_extent(root
, path
);
1459 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
1462 slot
= path
->slots
[0];
1463 if (slot
>= btrfs_header_nritems(l
)) {
1464 ret
= btrfs_next_leaf(root
, path
);
1470 search_start
= max(search_start
,
1471 block_group
->key
.objectid
);
1473 aligned
= stripe_align(root
, search_start
);
1474 ins
->objectid
= aligned
;
1475 if (aligned
>= search_end
) {
1479 ins
->offset
= search_end
- aligned
;
1483 ins
->objectid
= stripe_align(root
,
1484 last_byte
> search_start
?
1485 last_byte
: search_start
);
1486 if (search_end
<= ins
->objectid
) {
1490 ins
->offset
= search_end
- ins
->objectid
;
1491 BUG_ON(ins
->objectid
>= search_end
);
1494 btrfs_item_key_to_cpu(l
, &key
, slot
);
1496 if (key
.objectid
>= search_start
&& key
.objectid
> last_byte
&&
1498 if (last_byte
< search_start
)
1499 last_byte
= search_start
;
1500 aligned
= stripe_align(root
, last_byte
);
1501 hole_size
= key
.objectid
- aligned
;
1502 if (key
.objectid
> aligned
&& hole_size
>= num_bytes
) {
1503 ins
->objectid
= aligned
;
1504 ins
->offset
= hole_size
;
1508 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
) {
1509 if (!start_found
&& btrfs_key_type(&key
) ==
1510 BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1511 last_byte
= key
.objectid
;
1519 last_byte
= key
.objectid
+ key
.offset
;
1521 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1522 last_byte
>= block_group
->key
.objectid
+
1523 block_group
->key
.offset
) {
1524 btrfs_release_path(root
, path
);
1525 search_start
= block_group
->key
.objectid
+
1526 block_group
->key
.offset
;
1534 /* we have to make sure we didn't find an extent that has already
1535 * been allocated by the map tree or the original allocation
1537 btrfs_release_path(root
, path
);
1538 BUG_ON(ins
->objectid
< search_start
);
1540 if (ins
->objectid
+ num_bytes
>= search_end
)
1542 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1543 ins
->objectid
+ num_bytes
> block_group
->
1544 key
.objectid
+ block_group
->key
.offset
) {
1545 search_start
= block_group
->key
.objectid
+
1546 block_group
->key
.offset
;
1549 if (test_range_bit(&info
->extent_ins
, ins
->objectid
,
1550 ins
->objectid
+ num_bytes
-1, EXTENT_LOCKED
, 0)) {
1551 search_start
= ins
->objectid
+ num_bytes
;
1554 if (test_range_bit(&info
->pinned_extents
, ins
->objectid
,
1555 ins
->objectid
+ num_bytes
-1, EXTENT_DIRTY
, 0)) {
1556 search_start
= ins
->objectid
+ num_bytes
;
1559 if (exclude_nr
> 0 && (ins
->objectid
+ num_bytes
> exclude_start
&&
1560 ins
->objectid
< exclude_start
+ exclude_nr
)) {
1561 search_start
= exclude_start
+ exclude_nr
;
1565 block_group
= btrfs_lookup_block_group(info
, ins
->objectid
);
1567 trans
->block_group
= block_group
;
1569 ins
->offset
= num_bytes
;
1570 btrfs_free_path(path
);
1574 if (search_start
+ num_bytes
>= search_end
) {
1576 search_start
= orig_search_start
;
1583 total_needed
-= empty_size
;
1585 data
= BTRFS_BLOCK_GROUP_MIXED
;
1589 block_group
= btrfs_lookup_block_group(info
, search_start
);
1591 block_group
= btrfs_find_block_group(root
, block_group
,
1592 search_start
, data
, 0);
1596 btrfs_release_path(root
, path
);
1597 btrfs_free_path(path
);
1601 * finds a free extent and does all the dirty work required for allocation
1602 * returns the key for the extent through ins, and a tree buffer for
1603 * the first block of the extent through buf.
1605 * returns 0 if everything worked, non-zero otherwise.
1607 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
1608 struct btrfs_root
*root
,
1609 u64 num_bytes
, u64 root_objectid
, u64 ref_generation
,
1610 u64 owner
, u64 owner_offset
,
1611 u64 empty_size
, u64 hint_byte
,
1612 u64 search_end
, struct btrfs_key
*ins
, int data
)
1616 u64 super_used
, root_used
;
1617 u64 search_start
= 0;
1621 struct btrfs_fs_info
*info
= root
->fs_info
;
1622 struct btrfs_extent_ops
*ops
= info
->extent_ops
;
1623 struct btrfs_root
*extent_root
= info
->extent_root
;
1624 struct btrfs_extent_item extent_item
;
1625 struct btrfs_path
*path
;
1627 btrfs_set_stack_extent_refs(&extent_item
, 1);
1630 new_hint = max(hint_byte, root->fs_info->alloc_start);
1631 if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1632 hint_byte = new_hint;
1634 WARN_ON(num_bytes
< root
->sectorsize
);
1635 if (ops
&& ops
->alloc_extent
) {
1636 ret
= ops
->alloc_extent(root
, num_bytes
, hint_byte
, ins
);
1638 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
1639 search_start
, search_end
, hint_byte
,
1640 ins
, trans
->alloc_exclude_start
,
1641 trans
->alloc_exclude_nr
, data
);
1647 /* block accounting for super block */
1648 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1649 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
1651 /* block accounting for root item */
1652 root_used
= btrfs_root_used(&root
->root_item
);
1653 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
1655 clear_extent_dirty(&root
->fs_info
->free_space_cache
,
1656 ins
->objectid
, ins
->objectid
+ ins
->offset
- 1,
1659 if (root
== extent_root
) {
1660 set_extent_bits(&root
->fs_info
->extent_ins
, ins
->objectid
,
1661 ins
->objectid
+ ins
->offset
- 1,
1662 EXTENT_LOCKED
, GFP_NOFS
);
1667 WARN_ON(trans
->alloc_exclude_nr
);
1668 trans
->alloc_exclude_start
= ins
->objectid
;
1669 trans
->alloc_exclude_nr
= ins
->offset
;
1670 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
1671 sizeof(extent_item
));
1673 trans
->alloc_exclude_start
= 0;
1674 trans
->alloc_exclude_nr
= 0;
1677 path
= btrfs_alloc_path();
1679 ret
= btrfs_insert_extent_backref(trans
, extent_root
, path
,
1680 ins
->objectid
, root_objectid
,
1681 ref_generation
, owner
, owner_offset
);
1684 btrfs_free_path(path
);
1685 finish_current_insert(trans
, extent_root
);
1686 pending_ret
= del_pending_extents(trans
, extent_root
);
1696 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0,
1703 * helper function to allocate a block for a given tree
1704 * returns the tree buffer or NULL.
1706 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1707 struct btrfs_root
*root
,
1709 u64 root_objectid
, u64 hint
,
1715 ref_generation
= trans
->transid
;
1720 return __btrfs_alloc_free_block(trans
, root
, blocksize
, root_objectid
,
1721 ref_generation
, 0, 0, hint
, empty_size
);
1725 * helper function to allocate a block for a given tree
1726 * returns the tree buffer or NULL.
1728 struct extent_buffer
*__btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1729 struct btrfs_root
*root
,
1738 struct btrfs_key ins
;
1740 struct extent_buffer
*buf
;
1742 ret
= btrfs_alloc_extent(trans
, root
, blocksize
,
1743 root_objectid
, ref_generation
,
1744 level
, first_objectid
, empty_size
, hint
,
1748 return ERR_PTR(ret
);
1750 buf
= btrfs_find_create_tree_block(root
, ins
.objectid
, blocksize
);
1752 btrfs_free_extent(trans
, root
, ins
.objectid
, blocksize
,
1753 root
->root_key
.objectid
, ref_generation
,
1755 return ERR_PTR(-ENOMEM
);
1757 btrfs_set_buffer_uptodate(buf
);
1759 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1760 buf->start + buf->len - 1, GFP_NOFS);
1761 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1762 buf->start, buf->start + buf->len - 1,
1763 EXTENT_CSUM, GFP_NOFS);
1764 buf->flags |= EXTENT_CSUM;
1765 btrfs_set_buffer_defrag(buf);
1767 trans
->blocks_used
++;
1771 static int noinline
drop_leaf_ref(struct btrfs_trans_handle
*trans
,
1772 struct btrfs_root
*root
,
1773 struct extent_buffer
*leaf
)
1776 u64 leaf_generation
;
1777 struct btrfs_key key
;
1778 struct btrfs_file_extent_item
*fi
;
1783 BUG_ON(!btrfs_is_leaf(leaf
));
1784 nritems
= btrfs_header_nritems(leaf
);
1785 leaf_owner
= btrfs_header_owner(leaf
);
1786 leaf_generation
= btrfs_header_generation(leaf
);
1788 for (i
= 0; i
< nritems
; i
++) {
1791 btrfs_item_key_to_cpu(leaf
, &key
, i
);
1792 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1794 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
1795 if (btrfs_file_extent_type(leaf
, fi
) ==
1796 BTRFS_FILE_EXTENT_INLINE
)
1799 * FIXME make sure to insert a trans record that
1800 * repeats the snapshot del on crash
1802 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
1803 if (disk_bytenr
== 0)
1805 ret
= btrfs_free_extent(trans
, root
, disk_bytenr
,
1806 btrfs_file_extent_disk_num_bytes(leaf
, fi
),
1807 leaf_owner
, leaf_generation
,
1808 key
.objectid
, key
.offset
, 0);
1814 static void noinline
reada_walk_down(struct btrfs_root
*root
,
1815 struct extent_buffer
*node
)
1825 nritems
= btrfs_header_nritems(node
);
1826 level
= btrfs_header_level(node
);
1827 for (i
= 0; i
< nritems
; i
++) {
1828 bytenr
= btrfs_node_blockptr(node
, i
);
1829 blocksize
= btrfs_level_size(root
, level
- 1);
1830 ret
= lookup_extent_ref(NULL
, root
, bytenr
, blocksize
, &refs
);
1834 mutex_unlock(&root
->fs_info
->fs_mutex
);
1835 ret
= readahead_tree_block(root
, bytenr
, blocksize
);
1837 mutex_lock(&root
->fs_info
->fs_mutex
);
1844 * helper function for drop_snapshot, this walks down the tree dropping ref
1845 * counts as it goes.
1847 static int noinline
walk_down_tree(struct btrfs_trans_handle
*trans
,
1848 struct btrfs_root
*root
,
1849 struct btrfs_path
*path
, int *level
)
1854 struct extent_buffer
*next
;
1855 struct extent_buffer
*cur
;
1856 struct extent_buffer
*parent
;
1861 WARN_ON(*level
< 0);
1862 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1863 ret
= lookup_extent_ref(trans
, root
,
1864 path
->nodes
[*level
]->start
,
1865 path
->nodes
[*level
]->len
, &refs
);
1871 * walk down to the last node level and free all the leaves
1873 while(*level
>= 0) {
1874 WARN_ON(*level
< 0);
1875 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1876 cur
= path
->nodes
[*level
];
1878 if (*level
> 0 && path
->slots
[*level
] == 0)
1879 reada_walk_down(root
, cur
);
1881 if (btrfs_header_level(cur
) != *level
)
1884 if (path
->slots
[*level
] >=
1885 btrfs_header_nritems(cur
))
1888 ret
= drop_leaf_ref(trans
, root
, cur
);
1892 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1893 blocksize
= btrfs_level_size(root
, *level
- 1);
1894 ret
= lookup_extent_ref(trans
, root
, bytenr
, blocksize
, &refs
);
1897 parent
= path
->nodes
[*level
];
1898 root_owner
= btrfs_header_owner(parent
);
1899 root_gen
= btrfs_header_generation(parent
);
1900 path
->slots
[*level
]++;
1901 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1902 blocksize
, root_owner
,
1907 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
1908 if (!next
|| !btrfs_buffer_uptodate(next
)) {
1909 free_extent_buffer(next
);
1910 mutex_unlock(&root
->fs_info
->fs_mutex
);
1911 next
= read_tree_block(root
, bytenr
, blocksize
);
1912 mutex_lock(&root
->fs_info
->fs_mutex
);
1914 /* we dropped the lock, check one more time */
1915 ret
= lookup_extent_ref(trans
, root
, bytenr
,
1919 parent
= path
->nodes
[*level
];
1920 root_owner
= btrfs_header_owner(parent
);
1921 root_gen
= btrfs_header_generation(parent
);
1923 path
->slots
[*level
]++;
1924 free_extent_buffer(next
);
1925 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1933 WARN_ON(*level
<= 0);
1934 if (path
->nodes
[*level
-1])
1935 free_extent_buffer(path
->nodes
[*level
-1]);
1936 path
->nodes
[*level
-1] = next
;
1937 *level
= btrfs_header_level(next
);
1938 path
->slots
[*level
] = 0;
1941 WARN_ON(*level
< 0);
1942 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1944 if (path
->nodes
[*level
] == root
->node
) {
1945 root_owner
= root
->root_key
.objectid
;
1946 parent
= path
->nodes
[*level
];
1948 parent
= path
->nodes
[*level
+ 1];
1949 root_owner
= btrfs_header_owner(parent
);
1952 root_gen
= btrfs_header_generation(parent
);
1953 ret
= btrfs_free_extent(trans
, root
, path
->nodes
[*level
]->start
,
1954 path
->nodes
[*level
]->len
,
1955 root_owner
, root_gen
, 0, 0, 1);
1956 free_extent_buffer(path
->nodes
[*level
]);
1957 path
->nodes
[*level
] = NULL
;
1964 * helper for dropping snapshots. This walks back up the tree in the path
1965 * to find the first node higher up where we haven't yet gone through
1968 static int noinline
walk_up_tree(struct btrfs_trans_handle
*trans
,
1969 struct btrfs_root
*root
,
1970 struct btrfs_path
*path
, int *level
)
1974 struct btrfs_root_item
*root_item
= &root
->root_item
;
1979 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1980 slot
= path
->slots
[i
];
1981 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
1982 struct extent_buffer
*node
;
1983 struct btrfs_disk_key disk_key
;
1984 node
= path
->nodes
[i
];
1987 WARN_ON(*level
== 0);
1988 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
1989 memcpy(&root_item
->drop_progress
,
1990 &disk_key
, sizeof(disk_key
));
1991 root_item
->drop_level
= i
;
1994 if (path
->nodes
[*level
] == root
->node
) {
1995 root_owner
= root
->root_key
.objectid
;
1997 btrfs_header_generation(path
->nodes
[*level
]);
1999 struct extent_buffer
*node
;
2000 node
= path
->nodes
[*level
+ 1];
2001 root_owner
= btrfs_header_owner(node
);
2002 root_gen
= btrfs_header_generation(node
);
2004 ret
= btrfs_free_extent(trans
, root
,
2005 path
->nodes
[*level
]->start
,
2006 path
->nodes
[*level
]->len
,
2007 root_owner
, root_gen
, 0, 0, 1);
2009 free_extent_buffer(path
->nodes
[*level
]);
2010 path
->nodes
[*level
] = NULL
;
2018 * drop the reference count on the tree rooted at 'snap'. This traverses
2019 * the tree freeing any blocks that have a ref count of zero after being
2022 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
2028 struct btrfs_path
*path
;
2031 struct btrfs_root_item
*root_item
= &root
->root_item
;
2033 path
= btrfs_alloc_path();
2036 level
= btrfs_header_level(root
->node
);
2038 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
2039 path
->nodes
[level
] = root
->node
;
2040 extent_buffer_get(root
->node
);
2041 path
->slots
[level
] = 0;
2043 struct btrfs_key key
;
2044 struct btrfs_disk_key found_key
;
2045 struct extent_buffer
*node
;
2047 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
2048 level
= root_item
->drop_level
;
2049 path
->lowest_level
= level
;
2050 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
2055 node
= path
->nodes
[level
];
2056 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
2057 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
2058 sizeof(found_key
)));
2061 wret
= walk_down_tree(trans
, root
, path
, &level
);
2067 wret
= walk_up_tree(trans
, root
, path
, &level
);
2077 for (i
= 0; i
<= orig_level
; i
++) {
2078 if (path
->nodes
[i
]) {
2079 free_extent_buffer(path
->nodes
[i
]);
2080 path
->nodes
[i
] = NULL
;
2084 btrfs_free_path(path
);
2088 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
2095 ret
= find_first_extent_bit(&info
->block_group_cache
, 0,
2096 &start
, &end
, (unsigned int)-1);
2099 ret
= get_state_private(&info
->block_group_cache
, start
, &ptr
);
2101 kfree((void *)(unsigned long)ptr
);
2102 clear_extent_bits(&info
->block_group_cache
, start
,
2103 end
, (unsigned int)-1, GFP_NOFS
);
2106 ret
= find_first_extent_bit(&info
->free_space_cache
, 0,
2107 &start
, &end
, EXTENT_DIRTY
);
2110 clear_extent_dirty(&info
->free_space_cache
, start
,
2116 int btrfs_read_block_groups(struct btrfs_root
*root
)
2118 struct btrfs_path
*path
;
2122 struct btrfs_block_group_cache
*cache
;
2123 struct btrfs_fs_info
*info
= root
->fs_info
;
2124 struct extent_map_tree
*block_group_cache
;
2125 struct btrfs_key key
;
2126 struct btrfs_key found_key
;
2127 struct extent_buffer
*leaf
;
2129 block_group_cache
= &info
->block_group_cache
;
2131 root
= info
->extent_root
;
2133 key
.offset
= BTRFS_BLOCK_GROUP_SIZE
;
2134 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
2136 path
= btrfs_alloc_path();
2141 ret
= btrfs_search_slot(NULL
, info
->extent_root
,
2147 leaf
= path
->nodes
[0];
2148 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
2149 cache
= kmalloc(sizeof(*cache
), GFP_NOFS
);
2155 read_extent_buffer(leaf
, &cache
->item
,
2156 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
2157 sizeof(cache
->item
));
2158 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
2161 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
2162 btrfs_release_path(root
, path
);
2164 if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_MIXED
) {
2165 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
2166 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
2167 } else if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_DATA
) {
2168 bit
= BLOCK_GROUP_DATA
;
2169 cache
->data
= BTRFS_BLOCK_GROUP_DATA
;
2171 bit
= BLOCK_GROUP_METADATA
;
2175 /* use EXTENT_LOCKED to prevent merging */
2176 set_extent_bits(block_group_cache
, found_key
.objectid
,
2177 found_key
.objectid
+ found_key
.offset
- 1,
2178 bit
| EXTENT_LOCKED
, GFP_NOFS
);
2179 set_state_private(block_group_cache
, found_key
.objectid
,
2180 (unsigned long)cache
);
2183 btrfs_super_total_bytes(&info
->super_copy
))
2187 btrfs_free_path(path
);
2191 static int btrfs_insert_block_group(struct btrfs_trans_handle
*trans
,
2192 struct btrfs_root
*root
,
2193 struct btrfs_key
*key
,
2194 struct btrfs_block_group_item
*bi
)
2198 struct btrfs_root
*extent_root
;
2200 extent_root
= root
->fs_info
->extent_root
;
2201 ret
= btrfs_insert_item(trans
, extent_root
, key
, bi
, sizeof(*bi
));
2202 finish_current_insert(trans
, extent_root
);
2203 pending_ret
= del_pending_extents(trans
, extent_root
);
2211 int btrfs_make_block_groups(struct btrfs_trans_handle
*trans
,
2212 struct btrfs_root
*root
)
2221 struct btrfs_root
*extent_root
;
2222 struct btrfs_block_group_cache
*cache
;
2223 struct extent_map_tree
*block_group_cache
;
2225 extent_root
= root
->fs_info
->extent_root
;
2226 block_group_cache
= &root
->fs_info
->block_group_cache
;
2227 group_size
= BTRFS_BLOCK_GROUP_SIZE
;
2228 bytes_used
= btrfs_super_bytes_used(&root
->fs_info
->super_copy
);
2229 total_bytes
= btrfs_super_total_bytes(&root
->fs_info
->super_copy
);
2232 while (cur_start
< total_bytes
) {
2233 cache
= malloc(sizeof(*cache
));
2235 cache
->key
.objectid
= cur_start
;
2236 cache
->key
.offset
= group_size
;
2237 btrfs_set_key_type(&cache
->key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
2238 memset(&cache
->item
, 0, sizeof(cache
->item
));
2240 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
2242 bit
= BLOCK_GROUP_DATA
;
2244 cache
->item
.flags
|= BTRFS_BLOCK_GROUP_DATA
;
2246 bit
= BLOCK_GROUP_METADATA
;
2250 set_extent_bits(block_group_cache
, cur_start
,
2251 cur_start
+ group_size
- 1,
2252 bit
| EXTENT_LOCKED
, GFP_NOFS
);
2253 set_state_private(block_group_cache
, cur_start
,
2254 (unsigned long)cache
);
2255 cur_start
+= group_size
;
2257 /* then insert all the items */
2259 while(cur_start
< total_bytes
) {
2260 cache
= btrfs_lookup_block_group(root
->fs_info
, cur_start
);
2262 ret
= btrfs_insert_block_group(trans
, root
, &cache
->key
,
2265 cur_start
+= group_size
;
2270 u64
btrfs_hash_extent_ref(u64 root_objectid
, u64 ref_generation
,
2271 u64 owner
, u64 owner_offset
)
2273 return hash_extent_ref(root_objectid
, ref_generation
,
2274 owner
, owner_offset
);
2277 int btrfs_update_block_group(struct btrfs_trans_handle
*trans
,
2278 struct btrfs_root
*root
,
2279 u64 bytenr
, u64 num_bytes
, int alloc
,
2280 int mark_free
, int data
)
2282 return update_block_group(trans
, root
, bytenr
, num_bytes
,
2283 alloc
, mark_free
, data
);