2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
21 #include "kerncompat.h"
22 #include "radix-tree.h"
25 #include "print-tree.h"
26 #include "transaction.h"
29 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
30 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
31 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
33 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
34 btrfs_root
*extent_root
);
35 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
36 btrfs_root
*extent_root
);
37 static int find_previous_extent(struct btrfs_root
*root
,
38 struct btrfs_path
*path
)
40 struct btrfs_key found_key
;
41 struct extent_buffer
*leaf
;
45 if (path
->slots
[0] == 0) {
46 ret
= btrfs_prev_leaf(root
, path
);
52 leaf
= path
->nodes
[0];
53 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
54 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
)
59 static int cache_block_group(struct btrfs_root
*root
,
60 struct btrfs_block_group_cache
*block_group
)
62 struct btrfs_path
*path
;
65 struct extent_buffer
*leaf
;
66 struct extent_map_tree
*free_space_cache
;
76 root
= root
->fs_info
->extent_root
;
77 free_space_cache
= &root
->fs_info
->free_space_cache
;
79 if (block_group
->cached
)
82 path
= btrfs_alloc_path();
87 first_free
= block_group
->key
.objectid
;
88 key
.objectid
= block_group
->key
.objectid
;
90 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
91 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
94 ret
= find_previous_extent(root
, path
);
98 leaf
= path
->nodes
[0];
99 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
100 if (key
.objectid
+ key
.offset
> first_free
)
101 first_free
= key
.objectid
+ key
.offset
;
104 leaf
= path
->nodes
[0];
105 slot
= path
->slots
[0];
106 if (slot
>= btrfs_header_nritems(leaf
)) {
107 ret
= btrfs_next_leaf(root
, path
);
116 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
117 if (key
.objectid
< block_group
->key
.objectid
) {
120 if (key
.objectid
>= block_group
->key
.objectid
+
121 block_group
->key
.offset
) {
125 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
130 if (key
.objectid
> last
) {
131 hole_size
= key
.objectid
- last
;
132 set_extent_dirty(free_space_cache
, last
,
133 last
+ hole_size
- 1,
136 last
= key
.objectid
+ key
.offset
;
144 if (block_group
->key
.objectid
+
145 block_group
->key
.offset
> last
) {
146 hole_size
= block_group
->key
.objectid
+
147 block_group
->key
.offset
- last
;
148 set_extent_dirty(free_space_cache
, last
,
149 last
+ hole_size
- 1, GFP_NOFS
);
151 block_group
->cached
= 1;
153 btrfs_free_path(path
);
157 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
161 struct extent_map_tree
*block_group_cache
;
162 struct btrfs_block_group_cache
*block_group
= NULL
;
168 block_group_cache
= &info
->block_group_cache
;
169 ret
= find_first_extent_bit(block_group_cache
,
170 bytenr
, &start
, &end
,
171 BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
);
175 ret
= get_state_private(block_group_cache
, start
, &ptr
);
179 block_group
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
180 if (block_group
->key
.objectid
<= bytenr
&& bytenr
<
181 block_group
->key
.objectid
+ block_group
->key
.offset
)
185 static u64 noinline
find_search_start(struct btrfs_root
*root
,
186 struct btrfs_block_group_cache
**cache_ret
,
187 u64 search_start
, int num
, int data
)
190 struct btrfs_block_group_cache
*cache
= *cache_ret
;
201 ret
= cache_block_group(root
, cache
);
205 last
= max(search_start
, cache
->key
.objectid
);
208 ret
= find_first_extent_bit(&root
->fs_info
->free_space_cache
,
209 last
, &start
, &end
, EXTENT_DIRTY
);
216 start
= max(last
, start
);
218 if (last
- start
< num
) {
219 if (last
== cache
->key
.objectid
+ cache
->key
.offset
)
223 if (data
!= BTRFS_BLOCK_GROUP_MIXED
&&
224 start
+ num
> cache
->key
.objectid
+ cache
->key
.offset
)
229 cache
= btrfs_lookup_block_group(root
->fs_info
, search_start
);
231 printk("Unable to find block group for %Lu\n",
239 last
= cache
->key
.objectid
+ cache
->key
.offset
;
241 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
247 data
= BTRFS_BLOCK_GROUP_MIXED
;
252 if (cache_miss
&& !cache
->cached
) {
253 cache_block_group(root
, cache
);
255 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
257 cache
= btrfs_find_block_group(root
, cache
, last
, data
, 0);
265 static u64
div_factor(u64 num
, int factor
)
274 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
275 struct btrfs_block_group_cache
276 *hint
, u64 search_start
,
279 struct btrfs_block_group_cache
*cache
;
280 struct extent_map_tree
*block_group_cache
;
281 struct btrfs_block_group_cache
*found_group
= NULL
;
282 struct btrfs_fs_info
*info
= root
->fs_info
;
296 block_group_cache
= &info
->block_group_cache
;
301 if (data
== BTRFS_BLOCK_GROUP_MIXED
) {
302 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
305 bit
= BLOCK_GROUP_DATA
;
307 bit
= BLOCK_GROUP_METADATA
;
310 struct btrfs_block_group_cache
*shint
;
311 shint
= btrfs_lookup_block_group(info
, search_start
);
312 if (shint
&& (shint
->data
== data
||
313 shint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
314 used
= btrfs_block_group_used(&shint
->item
);
315 if (used
+ shint
->pinned
<
316 div_factor(shint
->key
.offset
, factor
)) {
321 if (hint
&& (hint
->data
== data
||
322 hint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
323 used
= btrfs_block_group_used(&hint
->item
);
324 if (used
+ hint
->pinned
<
325 div_factor(hint
->key
.offset
, factor
)) {
328 last
= hint
->key
.objectid
+ hint
->key
.offset
;
332 hint_last
= max(hint
->key
.objectid
, search_start
);
334 hint_last
= search_start
;
340 ret
= find_first_extent_bit(block_group_cache
, last
,
345 ret
= get_state_private(block_group_cache
, start
, &ptr
);
349 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
350 last
= cache
->key
.objectid
+ cache
->key
.offset
;
351 used
= btrfs_block_group_used(&cache
->item
);
354 free_check
= cache
->key
.offset
;
356 free_check
= div_factor(cache
->key
.offset
, factor
);
357 if (used
+ cache
->pinned
< free_check
) {
370 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
378 static u64
hash_extent_ref(u64 root_objectid
, u64 ref_generation
,
379 u64 owner
, u64 owner_offset
)
381 u32 high_crc
= ~(u32
)0;
382 u32 low_crc
= ~(u32
)0;
385 lenum
= cpu_to_le64(root_objectid
);
386 high_crc
= crc32c(high_crc
, &lenum
, sizeof(lenum
));
387 lenum
= cpu_to_le64(ref_generation
);
388 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
391 lenum
= cpu_to_le64(owner
);
392 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
393 lenum
= cpu_to_le64(owner_offset
);
394 low_crc
= crc32c(low_crc
, &lenum
, sizeof(lenum
));
396 return ((u64
)high_crc
<< 32) | (u64
)low_crc
;
399 static int match_extent_ref(struct extent_buffer
*leaf
,
400 struct btrfs_extent_ref
*disk_ref
,
401 struct btrfs_extent_ref
*cpu_ref
)
406 if (cpu_ref
->objectid
)
407 len
= sizeof(*cpu_ref
);
409 len
= 2 * sizeof(u64
);
410 ret
= memcmp_extent_buffer(leaf
, cpu_ref
, (unsigned long)disk_ref
,
415 static int noinline
lookup_extent_backref(struct btrfs_trans_handle
*trans
,
416 struct btrfs_root
*root
,
417 struct btrfs_path
*path
, u64 bytenr
,
419 u64 ref_generation
, u64 owner
,
420 u64 owner_offset
, int del
)
423 struct btrfs_key key
;
424 struct btrfs_key found_key
;
425 struct btrfs_extent_ref ref
;
426 struct extent_buffer
*leaf
;
427 struct btrfs_extent_ref
*disk_ref
;
431 btrfs_set_stack_ref_root(&ref
, root_objectid
);
432 btrfs_set_stack_ref_generation(&ref
, ref_generation
);
433 btrfs_set_stack_ref_objectid(&ref
, owner
);
434 btrfs_set_stack_ref_offset(&ref
, owner_offset
);
436 hash
= hash_extent_ref(root_objectid
, ref_generation
, owner
,
439 key
.objectid
= bytenr
;
440 key
.type
= BTRFS_EXTENT_REF_KEY
;
443 ret
= btrfs_search_slot(trans
, root
, &key
, path
,
447 leaf
= path
->nodes
[0];
449 u32 nritems
= btrfs_header_nritems(leaf
);
450 if (path
->slots
[0] >= nritems
) {
451 ret2
= btrfs_next_leaf(root
, path
);
454 leaf
= path
->nodes
[0];
456 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
457 if (found_key
.objectid
!= bytenr
||
458 found_key
.type
!= BTRFS_EXTENT_REF_KEY
)
460 key
.offset
= found_key
.offset
;
462 btrfs_release_path(root
, path
);
466 disk_ref
= btrfs_item_ptr(path
->nodes
[0],
468 struct btrfs_extent_ref
);
469 if (match_extent_ref(path
->nodes
[0], disk_ref
, &ref
)) {
473 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
474 key
.offset
= found_key
.offset
+ 1;
475 btrfs_release_path(root
, path
);
482 * Back reference rules. Back refs have three main goals:
484 * 1) differentiate between all holders of references to an extent so that
485 * when a reference is dropped we can make sure it was a valid reference
486 * before freeing the extent.
488 * 2) Provide enough information to quickly find the holders of an extent
489 * if we notice a given block is corrupted or bad.
491 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
492 * maintenance. This is actually the same as #2, but with a slightly
493 * different use case.
495 * File extents can be referenced by:
497 * - multiple snapshots, subvolumes, or different generations in one subvol
498 * - different files inside a single subvolume (in theory, not implemented yet)
499 * - different offsets inside a file (bookend extents in file.c)
501 * The extent ref structure has fields for:
503 * - Objectid of the subvolume root
504 * - Generation number of the tree holding the reference
505 * - objectid of the file holding the reference
506 * - offset in the file corresponding to the key holding the reference
508 * When a file extent is allocated the fields are filled in:
509 * (root_key.objectid, trans->transid, inode objectid, offset in file)
511 * When a leaf is cow'd new references are added for every file extent found
512 * in the leaf. It looks the same as the create case, but trans->transid
513 * will be different when the block is cow'd.
515 * (root_key.objectid, trans->transid, inode objectid, offset in file)
517 * When a file extent is removed either during snapshot deletion or file
518 * truncation, the corresponding back reference is found
521 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
522 * inode objectid, offset in file)
524 * Btree extents can be referenced by:
526 * - Different subvolumes
527 * - Different generations of the same subvolume
529 * Storing sufficient information for a full reverse mapping of a btree
530 * block would require storing the lowest key of the block in the backref,
531 * and it would require updating that lowest key either before write out or
532 * every time it changed. Instead, the objectid of the lowest key is stored
533 * along with the level of the tree block. This provides a hint
534 * about where in the btree the block can be found. Searches through the
535 * btree only need to look for a pointer to that block, so they stop one
536 * level higher than the level recorded in the backref.
538 * Some btrees do not do reference counting on their extents. These
539 * include the extent tree and the tree of tree roots. Backrefs for these
540 * trees always have a generation of zero.
542 * When a tree block is created, back references are inserted:
544 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
546 * When a tree block is cow'd in a reference counted root,
547 * new back references are added for all the blocks it points to.
548 * These are of the form (trans->transid will have increased since creation):
550 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
552 * Because the lowest_key_objectid and the level are just hints
553 * they are not used when backrefs are deleted. When a backref is deleted:
555 * if backref was for a tree root:
556 * root_objectid = root->root_key.objectid
558 * root_objectid = btrfs_header_owner(parent)
560 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
562 * Back Reference Key hashing:
564 * Back references have four fields, each 64 bits long. Unfortunately,
565 * This is hashed into a single 64 bit number and placed into the key offset.
566 * The key objectid corresponds to the first byte in the extent, and the
567 * key type is set to BTRFS_EXTENT_REF_KEY
569 int btrfs_insert_extent_backref(struct btrfs_trans_handle
*trans
,
570 struct btrfs_root
*root
,
571 struct btrfs_path
*path
, u64 bytenr
,
572 u64 root_objectid
, u64 ref_generation
,
573 u64 owner
, u64 owner_offset
)
576 struct btrfs_key key
;
577 struct btrfs_extent_ref ref
;
578 struct btrfs_extent_ref
*disk_ref
;
581 btrfs_set_stack_ref_root(&ref
, root_objectid
);
582 btrfs_set_stack_ref_generation(&ref
, ref_generation
);
583 btrfs_set_stack_ref_objectid(&ref
, owner
);
584 btrfs_set_stack_ref_offset(&ref
, owner_offset
);
586 hash
= hash_extent_ref(root_objectid
, ref_generation
, owner
,
589 key
.objectid
= bytenr
;
590 key
.type
= BTRFS_EXTENT_REF_KEY
;
592 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(ref
));
593 while (ret
== -EEXIST
) {
594 disk_ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
595 struct btrfs_extent_ref
);
596 if (match_extent_ref(path
->nodes
[0], disk_ref
, &ref
))
599 btrfs_release_path(root
, path
);
600 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
605 disk_ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
606 struct btrfs_extent_ref
);
607 write_extent_buffer(path
->nodes
[0], &ref
, (unsigned long)disk_ref
,
609 btrfs_mark_buffer_dirty(path
->nodes
[0]);
611 btrfs_release_path(root
, path
);
615 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
616 struct btrfs_root
*root
,
617 u64 bytenr
, u64 num_bytes
,
618 u64 root_objectid
, u64 ref_generation
,
619 u64 owner
, u64 owner_offset
)
621 struct btrfs_path
*path
;
623 struct btrfs_key key
;
624 struct extent_buffer
*l
;
625 struct btrfs_extent_item
*item
;
628 WARN_ON(num_bytes
< root
->sectorsize
);
629 path
= btrfs_alloc_path();
633 key
.objectid
= bytenr
;
634 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
635 key
.offset
= num_bytes
;
636 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
645 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
646 refs
= btrfs_extent_refs(l
, item
);
647 btrfs_set_extent_refs(l
, item
, refs
+ 1);
648 btrfs_mark_buffer_dirty(path
->nodes
[0]);
650 btrfs_release_path(root
->fs_info
->extent_root
, path
);
652 ret
= btrfs_insert_extent_backref(trans
, root
->fs_info
->extent_root
,
653 path
, bytenr
, root_objectid
,
654 ref_generation
, owner
, owner_offset
);
656 finish_current_insert(trans
, root
->fs_info
->extent_root
);
657 del_pending_extents(trans
, root
->fs_info
->extent_root
);
659 btrfs_free_path(path
);
663 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
664 struct btrfs_root
*root
)
666 finish_current_insert(trans
, root
->fs_info
->extent_root
);
667 del_pending_extents(trans
, root
->fs_info
->extent_root
);
671 static int lookup_extent_ref(struct btrfs_trans_handle
*trans
,
672 struct btrfs_root
*root
, u64 bytenr
,
673 u64 num_bytes
, u32
*refs
)
675 struct btrfs_path
*path
;
677 struct btrfs_key key
;
678 struct extent_buffer
*l
;
679 struct btrfs_extent_item
*item
;
681 WARN_ON(num_bytes
< root
->sectorsize
);
682 path
= btrfs_alloc_path();
683 key
.objectid
= bytenr
;
684 key
.offset
= num_bytes
;
685 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
686 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
691 btrfs_print_leaf(root
, path
->nodes
[0]);
692 printk("failed to find block number %Lu\n", bytenr
);
696 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
697 *refs
= btrfs_extent_refs(l
, item
);
699 btrfs_free_path(path
);
703 u32
btrfs_count_snapshots_in_path(struct btrfs_root
*root
,
704 struct btrfs_path
*count_path
,
707 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
708 struct btrfs_path
*path
;
711 u64 root_objectid
= root
->root_key
.objectid
;
717 struct btrfs_key key
;
718 struct btrfs_key found_key
;
719 struct extent_buffer
*l
;
720 struct btrfs_extent_item
*item
;
721 struct btrfs_extent_ref
*ref_item
;
724 path
= btrfs_alloc_path();
727 bytenr
= first_extent
;
729 bytenr
= count_path
->nodes
[level
]->start
;
732 key
.objectid
= bytenr
;
735 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
736 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
742 btrfs_item_key_to_cpu(l
, &found_key
, path
->slots
[0]);
744 if (found_key
.objectid
!= bytenr
||
745 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
749 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
750 refs
= btrfs_extent_refs(l
, item
);
752 nritems
= btrfs_header_nritems(l
);
753 if (path
->slots
[0] >= nritems
) {
754 ret
= btrfs_next_leaf(extent_root
, path
);
759 btrfs_item_key_to_cpu(l
, &found_key
, path
->slots
[0]);
760 if (found_key
.objectid
!= bytenr
)
762 if (found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
768 ref_item
= btrfs_item_ptr(l
, path
->slots
[0],
769 struct btrfs_extent_ref
);
770 found_objectid
= btrfs_ref_root(l
, ref_item
);
772 if (found_objectid
!= root_objectid
) {
779 if (cur_count
== 0) {
783 if (level
>= 0 && root
->node
== count_path
->nodes
[level
])
786 btrfs_release_path(root
, path
);
790 btrfs_free_path(path
);
793 int btrfs_inc_root_ref(struct btrfs_trans_handle
*trans
,
794 struct btrfs_root
*root
, u64 owner_objectid
)
800 struct btrfs_disk_key disk_key
;
802 level
= btrfs_header_level(root
->node
);
803 generation
= trans
->transid
;
804 nritems
= btrfs_header_nritems(root
->node
);
807 btrfs_item_key(root
->node
, &disk_key
, 0);
809 btrfs_node_key(root
->node
, &disk_key
, 0);
810 key_objectid
= btrfs_disk_key_objectid(&disk_key
);
814 return btrfs_inc_extent_ref(trans
, root
, root
->node
->start
,
815 root
->node
->len
, owner_objectid
,
816 generation
, level
, key_objectid
);
819 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
820 struct extent_buffer
*buf
)
824 struct btrfs_key key
;
825 struct btrfs_file_extent_item
*fi
;
834 level
= btrfs_header_level(buf
);
835 nritems
= btrfs_header_nritems(buf
);
836 for (i
= 0; i
< nritems
; i
++) {
839 btrfs_item_key_to_cpu(buf
, &key
, i
);
840 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
842 fi
= btrfs_item_ptr(buf
, i
,
843 struct btrfs_file_extent_item
);
844 if (btrfs_file_extent_type(buf
, fi
) ==
845 BTRFS_FILE_EXTENT_INLINE
)
847 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
848 if (disk_bytenr
== 0)
850 ret
= btrfs_inc_extent_ref(trans
, root
, disk_bytenr
,
851 btrfs_file_extent_disk_num_bytes(buf
, fi
),
852 root
->root_key
.objectid
, trans
->transid
,
853 key
.objectid
, key
.offset
);
859 bytenr
= btrfs_node_blockptr(buf
, i
);
860 btrfs_node_key_to_cpu(buf
, &key
, i
);
861 ret
= btrfs_inc_extent_ref(trans
, root
, bytenr
,
862 btrfs_level_size(root
, level
- 1),
863 root
->root_key
.objectid
,
865 level
- 1, key
.objectid
);
876 for (i
=0; i
< faili
; i
++) {
879 btrfs_item_key_to_cpu(buf
, &key
, i
);
880 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
882 fi
= btrfs_item_ptr(buf
, i
,
883 struct btrfs_file_extent_item
);
884 if (btrfs_file_extent_type(buf
, fi
) ==
885 BTRFS_FILE_EXTENT_INLINE
)
887 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
888 if (disk_bytenr
== 0)
890 err
= btrfs_free_extent(trans
, root
, disk_bytenr
,
891 btrfs_file_extent_disk_num_bytes(buf
,
895 bytenr
= btrfs_node_blockptr(buf
, i
);
896 err
= btrfs_free_extent(trans
, root
, bytenr
,
897 btrfs_level_size(root
, level
- 1), 0);
905 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
906 struct btrfs_root
*root
,
907 struct btrfs_path
*path
,
908 struct btrfs_block_group_cache
*cache
)
912 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
914 struct extent_buffer
*leaf
;
916 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
921 leaf
= path
->nodes
[0];
922 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
923 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
924 btrfs_mark_buffer_dirty(leaf
);
925 btrfs_release_path(extent_root
, path
);
927 finish_current_insert(trans
, extent_root
);
928 pending_ret
= del_pending_extents(trans
, extent_root
);
937 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
938 struct btrfs_root
*root
)
940 struct extent_map_tree
*block_group_cache
;
941 struct btrfs_block_group_cache
*cache
;
945 struct btrfs_path
*path
;
951 block_group_cache
= &root
->fs_info
->block_group_cache
;
952 path
= btrfs_alloc_path();
957 ret
= find_first_extent_bit(block_group_cache
, last
,
958 &start
, &end
, BLOCK_GROUP_DIRTY
);
963 ret
= get_state_private(block_group_cache
, start
, &ptr
);
967 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
968 err
= write_one_cache_group(trans
, root
,
971 * if we fail to write the cache group, we want
972 * to keep it marked dirty in hopes that a later
979 clear_extent_bits(block_group_cache
, start
, end
,
980 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
982 btrfs_free_path(path
);
986 static int update_block_group(struct btrfs_trans_handle
*trans
,
987 struct btrfs_root
*root
,
988 u64 bytenr
, u64 num_bytes
, int alloc
,
989 int mark_free
, int data
)
991 struct btrfs_block_group_cache
*cache
;
992 struct btrfs_fs_info
*info
= root
->fs_info
;
993 u64 total
= num_bytes
;
1000 cache
= btrfs_lookup_block_group(info
, bytenr
);
1004 byte_in_group
= bytenr
- cache
->key
.objectid
;
1005 WARN_ON(byte_in_group
> cache
->key
.offset
);
1006 start
= cache
->key
.objectid
;
1007 end
= start
+ cache
->key
.offset
- 1;
1008 set_extent_bits(&info
->block_group_cache
, start
, end
,
1009 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
1011 old_val
= btrfs_block_group_used(&cache
->item
);
1012 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
1014 if (cache
->data
!= data
&&
1015 old_val
< (cache
->key
.offset
>> 1)) {
1020 bit_to_clear
= BLOCK_GROUP_METADATA
;
1021 bit_to_set
= BLOCK_GROUP_DATA
;
1022 cache
->item
.flags
&=
1023 ~BTRFS_BLOCK_GROUP_MIXED
;
1024 cache
->item
.flags
|=
1025 BTRFS_BLOCK_GROUP_DATA
;
1027 bit_to_clear
= BLOCK_GROUP_DATA
;
1028 bit_to_set
= BLOCK_GROUP_METADATA
;
1029 cache
->item
.flags
&=
1030 ~BTRFS_BLOCK_GROUP_MIXED
;
1031 cache
->item
.flags
&=
1032 ~BTRFS_BLOCK_GROUP_DATA
;
1034 clear_extent_bits(&info
->block_group_cache
,
1035 start
, end
, bit_to_clear
,
1037 set_extent_bits(&info
->block_group_cache
,
1038 start
, end
, bit_to_set
,
1040 } else if (cache
->data
!= data
&&
1041 cache
->data
!= BTRFS_BLOCK_GROUP_MIXED
) {
1042 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
1043 set_extent_bits(&info
->block_group_cache
,
1046 BLOCK_GROUP_METADATA
,
1049 old_val
+= num_bytes
;
1051 old_val
-= num_bytes
;
1053 set_extent_dirty(&info
->free_space_cache
,
1054 bytenr
, bytenr
+ num_bytes
- 1,
1058 btrfs_set_block_group_used(&cache
->item
, old_val
);
1060 bytenr
+= num_bytes
;
1064 static int update_pinned_extents(struct btrfs_root
*root
,
1065 u64 bytenr
, u64 num
, int pin
)
1068 struct btrfs_block_group_cache
*cache
;
1069 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1072 set_extent_dirty(&fs_info
->pinned_extents
,
1073 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
1075 clear_extent_dirty(&fs_info
->pinned_extents
,
1076 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
1079 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
1081 len
= min(num
, cache
->key
.offset
-
1082 (bytenr
- cache
->key
.objectid
));
1084 cache
->pinned
+= len
;
1085 fs_info
->total_pinned
+= len
;
1087 cache
->pinned
-= len
;
1088 fs_info
->total_pinned
-= len
;
1096 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_map_tree
*copy
)
1101 struct extent_map_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
1105 ret
= find_first_extent_bit(pinned_extents
, last
,
1106 &start
, &end
, EXTENT_DIRTY
);
1109 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
1115 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
1116 struct btrfs_root
*root
,
1117 struct extent_map_tree
*unpin
)
1122 struct extent_map_tree
*free_space_cache
;
1123 free_space_cache
= &root
->fs_info
->free_space_cache
;
1126 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
1130 update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
1131 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
1132 set_extent_dirty(free_space_cache
, start
, end
, GFP_NOFS
);
1137 static int finish_current_insert(struct btrfs_trans_handle
*trans
,
1138 struct btrfs_root
*extent_root
)
1142 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
1143 struct extent_buffer
*eb
;
1144 struct btrfs_path
*path
;
1145 struct btrfs_key ins
;
1146 struct btrfs_disk_key first
;
1147 struct btrfs_extent_item extent_item
;
1152 btrfs_set_stack_extent_refs(&extent_item
, 1);
1153 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
1154 path
= btrfs_alloc_path();
1157 ret
= find_first_extent_bit(&info
->extent_ins
, 0, &start
,
1158 &end
, EXTENT_LOCKED
);
1162 ins
.objectid
= start
;
1163 ins
.offset
= end
+ 1 - start
;
1164 err
= btrfs_insert_item(trans
, extent_root
, &ins
,
1165 &extent_item
, sizeof(extent_item
));
1166 clear_extent_bits(&info
->extent_ins
, start
, end
, EXTENT_LOCKED
,
1168 eb
= read_tree_block(extent_root
, ins
.objectid
, ins
.offset
);
1169 level
= btrfs_header_level(eb
);
1171 btrfs_item_key(eb
, &first
, 0);
1173 btrfs_node_key(eb
, &first
, 0);
1175 err
= btrfs_insert_extent_backref(trans
, extent_root
, path
,
1176 start
, extent_root
->root_key
.objectid
,
1178 btrfs_disk_key_objectid(&first
));
1180 free_extent_buffer(eb
);
1182 btrfs_free_path(path
);
1186 static int pin_down_bytes(struct btrfs_root
*root
, u64 bytenr
, u32 num_bytes
,
1190 struct extent_buffer
*buf
;
1193 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
1195 if (btrfs_buffer_uptodate(buf
)) {
1197 root
->fs_info
->running_transaction
->transid
;
1198 if (btrfs_header_generation(buf
) == transid
) {
1199 free_extent_buffer(buf
);
1203 free_extent_buffer(buf
);
1205 update_pinned_extents(root
, bytenr
, num_bytes
, 1);
1207 set_extent_bits(&root
->fs_info
->pending_del
,
1208 bytenr
, bytenr
+ num_bytes
- 1,
1209 EXTENT_LOCKED
, GFP_NOFS
);
1216 * remove an extent from the root, returns 0 on success
1218 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
1219 *root
, u64 bytenr
, u64 num_bytes
,
1220 u64 root_objectid
, u64 ref_generation
,
1221 u64 owner_objectid
, u64 owner_offset
, int pin
,
1224 struct btrfs_path
*path
;
1225 struct btrfs_key key
;
1226 struct btrfs_fs_info
*info
= root
->fs_info
;
1227 struct btrfs_extent_ops
*ops
= info
->extent_ops
;
1228 struct btrfs_root
*extent_root
= info
->extent_root
;
1229 struct extent_buffer
*leaf
;
1231 struct btrfs_extent_item
*ei
;
1234 key
.objectid
= bytenr
;
1235 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
1236 key
.offset
= num_bytes
;
1238 path
= btrfs_alloc_path();
1242 ret
= lookup_extent_backref(trans
, extent_root
, path
,
1243 bytenr
, root_objectid
,
1245 owner_objectid
, owner_offset
, 1);
1247 ret
= btrfs_del_item(trans
, extent_root
, path
);
1249 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
1251 printk("Unable to find ref byte nr %Lu root %Lu "
1252 " gen %Lu owner %Lu offset %Lu\n", bytenr
,
1253 root_objectid
, ref_generation
, owner_objectid
,
1256 btrfs_release_path(extent_root
, path
);
1257 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
1262 leaf
= path
->nodes
[0];
1263 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
1264 struct btrfs_extent_item
);
1265 refs
= btrfs_extent_refs(leaf
, ei
);
1268 btrfs_set_extent_refs(leaf
, ei
, refs
);
1269 btrfs_mark_buffer_dirty(leaf
);
1276 ret
= pin_down_bytes(root
, bytenr
, num_bytes
, 0);
1282 /* block accounting for super block */
1283 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1284 btrfs_set_super_bytes_used(&info
->super_copy
,
1285 super_used
- num_bytes
);
1287 /* block accounting for root item */
1288 root_used
= btrfs_root_used(&root
->root_item
);
1289 btrfs_set_root_used(&root
->root_item
,
1290 root_used
- num_bytes
);
1291 ret
= btrfs_del_item(trans
, extent_root
, path
);
1295 if (ops
&& ops
->free_extent
)
1296 ops
->free_extent(root
, bytenr
, num_bytes
);
1298 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
1302 btrfs_free_path(path
);
1303 finish_current_insert(trans
, extent_root
);
1308 * find all the blocks marked as pending in the radix tree and remove
1309 * them from the extent map
1311 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
1312 btrfs_root
*extent_root
)
1318 struct extent_map_tree
*pending_del
;
1319 struct extent_map_tree
*pinned_extents
;
1321 pending_del
= &extent_root
->fs_info
->pending_del
;
1322 pinned_extents
= &extent_root
->fs_info
->pinned_extents
;
1325 ret
= find_first_extent_bit(pending_del
, 0, &start
, &end
,
1329 update_pinned_extents(extent_root
, start
, end
+ 1 - start
, 1);
1330 clear_extent_bits(pending_del
, start
, end
, EXTENT_LOCKED
,
1332 ret
= __free_extent(trans
, extent_root
,
1333 start
, end
+ 1 - start
,
1334 extent_root
->root_key
.objectid
,
1343 * remove an extent from the root, returns 0 on success
1345 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
1346 *root
, u64 bytenr
, u64 num_bytes
,
1347 u64 root_objectid
, u64 ref_generation
,
1348 u64 owner_objectid
, u64 owner_offset
, int pin
)
1350 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1354 WARN_ON(num_bytes
< root
->sectorsize
);
1355 if (!root
->ref_cows
)
1358 if (root
== extent_root
) {
1359 pin_down_bytes(root
, bytenr
, num_bytes
, 1);
1362 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, root_objectid
,
1363 ref_generation
, owner_objectid
, owner_offset
,
1365 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
);
1366 return ret
? ret
: pending_ret
;
1369 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
1371 u64 mask
= ((u64
)root
->stripesize
- 1);
1372 u64 ret
= (val
+ mask
) & ~mask
;
1377 * walks the btree of allocated extents and find a hole of a given size.
1378 * The key ins is changed to record the hole:
1379 * ins->objectid == block start
1380 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1381 * ins->offset == number of blocks
1382 * Any available blocks before search_start are skipped.
1384 static int noinline
find_free_extent(struct btrfs_trans_handle
*trans
,
1385 struct btrfs_root
*orig_root
,
1386 u64 num_bytes
, u64 empty_size
,
1387 u64 search_start
, u64 search_end
,
1388 u64 hint_byte
, struct btrfs_key
*ins
,
1389 u64 exclude_start
, u64 exclude_nr
,
1392 struct btrfs_path
*path
;
1393 struct btrfs_key key
;
1399 u64 orig_search_start
= search_start
;
1401 struct extent_buffer
*l
;
1402 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
1403 struct btrfs_fs_info
*info
= root
->fs_info
;
1404 u64 total_needed
= num_bytes
;
1406 struct btrfs_block_group_cache
*block_group
;
1411 WARN_ON(num_bytes
< root
->sectorsize
);
1412 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
1414 level
= btrfs_header_level(root
->node
);
1416 if (num_bytes
>= 32 * 1024 * 1024 && hint_byte
) {
1417 data
= BTRFS_BLOCK_GROUP_MIXED
;
1420 if (search_end
== (u64
)-1)
1421 search_end
= btrfs_super_total_bytes(&info
->super_copy
);
1423 block_group
= btrfs_lookup_block_group(info
, hint_byte
);
1425 hint_byte
= search_start
;
1426 block_group
= btrfs_find_block_group(root
, block_group
,
1427 hint_byte
, data
, 1);
1429 block_group
= btrfs_find_block_group(root
,
1431 search_start
, data
, 1);
1434 total_needed
+= empty_size
;
1435 path
= btrfs_alloc_path();
1438 block_group
= btrfs_lookup_block_group(info
, search_start
);
1440 block_group
= btrfs_lookup_block_group(info
,
1443 search_start
= find_search_start(root
, &block_group
, search_start
,
1444 total_needed
, data
);
1445 search_start
= stripe_align(root
, search_start
);
1446 cached_start
= search_start
;
1447 btrfs_init_path(path
);
1448 ins
->objectid
= search_start
;
1453 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1456 ret
= find_previous_extent(root
, path
);
1460 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
1463 slot
= path
->slots
[0];
1464 if (slot
>= btrfs_header_nritems(l
)) {
1465 ret
= btrfs_next_leaf(root
, path
);
1471 search_start
= max(search_start
,
1472 block_group
->key
.objectid
);
1474 aligned
= stripe_align(root
, search_start
);
1475 ins
->objectid
= aligned
;
1476 if (aligned
>= search_end
) {
1480 ins
->offset
= search_end
- aligned
;
1484 ins
->objectid
= stripe_align(root
,
1485 last_byte
> search_start
?
1486 last_byte
: search_start
);
1487 if (search_end
<= ins
->objectid
) {
1491 ins
->offset
= search_end
- ins
->objectid
;
1492 BUG_ON(ins
->objectid
>= search_end
);
1495 btrfs_item_key_to_cpu(l
, &key
, slot
);
1497 if (key
.objectid
>= search_start
&& key
.objectid
> last_byte
&&
1499 if (last_byte
< search_start
)
1500 last_byte
= search_start
;
1501 aligned
= stripe_align(root
, last_byte
);
1502 hole_size
= key
.objectid
- aligned
;
1503 if (key
.objectid
> aligned
&& hole_size
>= num_bytes
) {
1504 ins
->objectid
= aligned
;
1505 ins
->offset
= hole_size
;
1509 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
) {
1510 if (!start_found
&& btrfs_key_type(&key
) ==
1511 BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1512 last_byte
= key
.objectid
;
1520 last_byte
= key
.objectid
+ key
.offset
;
1522 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1523 last_byte
>= block_group
->key
.objectid
+
1524 block_group
->key
.offset
) {
1525 btrfs_release_path(root
, path
);
1526 search_start
= block_group
->key
.objectid
+
1527 block_group
->key
.offset
;
1535 /* we have to make sure we didn't find an extent that has already
1536 * been allocated by the map tree or the original allocation
1538 btrfs_release_path(root
, path
);
1539 BUG_ON(ins
->objectid
< search_start
);
1541 if (ins
->objectid
+ num_bytes
>= search_end
)
1543 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1544 ins
->objectid
+ num_bytes
> block_group
->
1545 key
.objectid
+ block_group
->key
.offset
) {
1546 search_start
= block_group
->key
.objectid
+
1547 block_group
->key
.offset
;
1550 if (test_range_bit(&info
->extent_ins
, ins
->objectid
,
1551 ins
->objectid
+ num_bytes
-1, EXTENT_LOCKED
, 0)) {
1552 search_start
= ins
->objectid
+ num_bytes
;
1555 if (test_range_bit(&info
->pinned_extents
, ins
->objectid
,
1556 ins
->objectid
+ num_bytes
-1, EXTENT_DIRTY
, 0)) {
1557 search_start
= ins
->objectid
+ num_bytes
;
1560 if (exclude_nr
> 0 && (ins
->objectid
+ num_bytes
> exclude_start
&&
1561 ins
->objectid
< exclude_start
+ exclude_nr
)) {
1562 search_start
= exclude_start
+ exclude_nr
;
1566 block_group
= btrfs_lookup_block_group(info
, ins
->objectid
);
1568 trans
->block_group
= block_group
;
1570 ins
->offset
= num_bytes
;
1571 btrfs_free_path(path
);
1575 if (search_start
+ num_bytes
>= search_end
) {
1577 search_start
= orig_search_start
;
1584 total_needed
-= empty_size
;
1586 data
= BTRFS_BLOCK_GROUP_MIXED
;
1590 block_group
= btrfs_lookup_block_group(info
, search_start
);
1592 block_group
= btrfs_find_block_group(root
, block_group
,
1593 search_start
, data
, 0);
1597 btrfs_release_path(root
, path
);
1598 btrfs_free_path(path
);
1602 * finds a free extent and does all the dirty work required for allocation
1603 * returns the key for the extent through ins, and a tree buffer for
1604 * the first block of the extent through buf.
1606 * returns 0 if everything worked, non-zero otherwise.
1608 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
1609 struct btrfs_root
*root
,
1610 u64 num_bytes
, u64 root_objectid
, u64 ref_generation
,
1611 u64 owner
, u64 owner_offset
,
1612 u64 empty_size
, u64 hint_byte
,
1613 u64 search_end
, struct btrfs_key
*ins
, int data
)
1617 u64 super_used
, root_used
;
1618 u64 search_start
= 0;
1622 struct btrfs_fs_info
*info
= root
->fs_info
;
1623 struct btrfs_extent_ops
*ops
= info
->extent_ops
;
1624 struct btrfs_root
*extent_root
= info
->extent_root
;
1625 struct btrfs_extent_item extent_item
;
1626 struct btrfs_path
*path
;
1628 btrfs_set_stack_extent_refs(&extent_item
, 1);
1631 new_hint = max(hint_byte, root->fs_info->alloc_start);
1632 if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1633 hint_byte = new_hint;
1635 WARN_ON(num_bytes
< root
->sectorsize
);
1636 if (ops
&& ops
->alloc_extent
) {
1637 ret
= ops
->alloc_extent(root
, num_bytes
, hint_byte
, ins
);
1639 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
1640 search_start
, search_end
, hint_byte
,
1641 ins
, trans
->alloc_exclude_start
,
1642 trans
->alloc_exclude_nr
, data
);
1648 /* block accounting for super block */
1649 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1650 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
1652 /* block accounting for root item */
1653 root_used
= btrfs_root_used(&root
->root_item
);
1654 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
1656 clear_extent_dirty(&root
->fs_info
->free_space_cache
,
1657 ins
->objectid
, ins
->objectid
+ ins
->offset
- 1,
1660 if (root
== extent_root
) {
1661 set_extent_bits(&root
->fs_info
->extent_ins
, ins
->objectid
,
1662 ins
->objectid
+ ins
->offset
- 1,
1663 EXTENT_LOCKED
, GFP_NOFS
);
1668 WARN_ON(trans
->alloc_exclude_nr
);
1669 trans
->alloc_exclude_start
= ins
->objectid
;
1670 trans
->alloc_exclude_nr
= ins
->offset
;
1671 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
1672 sizeof(extent_item
));
1674 trans
->alloc_exclude_start
= 0;
1675 trans
->alloc_exclude_nr
= 0;
1678 path
= btrfs_alloc_path();
1680 ret
= btrfs_insert_extent_backref(trans
, extent_root
, path
,
1681 ins
->objectid
, root_objectid
,
1682 ref_generation
, owner
, owner_offset
);
1685 btrfs_free_path(path
);
1686 finish_current_insert(trans
, extent_root
);
1687 pending_ret
= del_pending_extents(trans
, extent_root
);
1697 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0,
1704 * helper function to allocate a block for a given tree
1705 * returns the tree buffer or NULL.
1707 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1708 struct btrfs_root
*root
,
1710 u64 root_objectid
, u64 hint
,
1716 ref_generation
= trans
->transid
;
1721 return __btrfs_alloc_free_block(trans
, root
, blocksize
, root_objectid
,
1722 ref_generation
, 0, 0, hint
, empty_size
);
1726 * helper function to allocate a block for a given tree
1727 * returns the tree buffer or NULL.
1729 struct extent_buffer
*__btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1730 struct btrfs_root
*root
,
1739 struct btrfs_key ins
;
1741 struct extent_buffer
*buf
;
1743 ret
= btrfs_alloc_extent(trans
, root
, blocksize
,
1744 root_objectid
, ref_generation
,
1745 level
, first_objectid
, empty_size
, hint
,
1749 return ERR_PTR(ret
);
1751 buf
= btrfs_find_create_tree_block(root
, ins
.objectid
, blocksize
);
1753 btrfs_free_extent(trans
, root
, ins
.objectid
, blocksize
,
1754 root
->root_key
.objectid
, ref_generation
,
1756 return ERR_PTR(-ENOMEM
);
1758 btrfs_set_buffer_uptodate(buf
);
1760 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1761 buf->start + buf->len - 1, GFP_NOFS);
1762 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1763 buf->start, buf->start + buf->len - 1,
1764 EXTENT_CSUM, GFP_NOFS);
1765 buf->flags |= EXTENT_CSUM;
1766 btrfs_set_buffer_defrag(buf);
1768 trans
->blocks_used
++;
1772 static int noinline
drop_leaf_ref(struct btrfs_trans_handle
*trans
,
1773 struct btrfs_root
*root
,
1774 struct extent_buffer
*leaf
)
1777 u64 leaf_generation
;
1778 struct btrfs_key key
;
1779 struct btrfs_file_extent_item
*fi
;
1784 BUG_ON(!btrfs_is_leaf(leaf
));
1785 nritems
= btrfs_header_nritems(leaf
);
1786 leaf_owner
= btrfs_header_owner(leaf
);
1787 leaf_generation
= btrfs_header_generation(leaf
);
1789 for (i
= 0; i
< nritems
; i
++) {
1792 btrfs_item_key_to_cpu(leaf
, &key
, i
);
1793 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1795 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
1796 if (btrfs_file_extent_type(leaf
, fi
) ==
1797 BTRFS_FILE_EXTENT_INLINE
)
1800 * FIXME make sure to insert a trans record that
1801 * repeats the snapshot del on crash
1803 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
1804 if (disk_bytenr
== 0)
1806 ret
= btrfs_free_extent(trans
, root
, disk_bytenr
,
1807 btrfs_file_extent_disk_num_bytes(leaf
, fi
),
1808 leaf_owner
, leaf_generation
,
1809 key
.objectid
, key
.offset
, 0);
1815 static void noinline
reada_walk_down(struct btrfs_root
*root
,
1816 struct extent_buffer
*node
)
1826 nritems
= btrfs_header_nritems(node
);
1827 level
= btrfs_header_level(node
);
1828 for (i
= 0; i
< nritems
; i
++) {
1829 bytenr
= btrfs_node_blockptr(node
, i
);
1830 blocksize
= btrfs_level_size(root
, level
- 1);
1831 ret
= lookup_extent_ref(NULL
, root
, bytenr
, blocksize
, &refs
);
1835 mutex_unlock(&root
->fs_info
->fs_mutex
);
1836 ret
= readahead_tree_block(root
, bytenr
, blocksize
);
1838 mutex_lock(&root
->fs_info
->fs_mutex
);
1845 * helper function for drop_snapshot, this walks down the tree dropping ref
1846 * counts as it goes.
1848 static int noinline
walk_down_tree(struct btrfs_trans_handle
*trans
,
1849 struct btrfs_root
*root
,
1850 struct btrfs_path
*path
, int *level
)
1855 struct extent_buffer
*next
;
1856 struct extent_buffer
*cur
;
1857 struct extent_buffer
*parent
;
1862 WARN_ON(*level
< 0);
1863 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1864 ret
= lookup_extent_ref(trans
, root
,
1865 path
->nodes
[*level
]->start
,
1866 path
->nodes
[*level
]->len
, &refs
);
1872 * walk down to the last node level and free all the leaves
1874 while(*level
>= 0) {
1875 WARN_ON(*level
< 0);
1876 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1877 cur
= path
->nodes
[*level
];
1879 if (*level
> 0 && path
->slots
[*level
] == 0)
1880 reada_walk_down(root
, cur
);
1882 if (btrfs_header_level(cur
) != *level
)
1885 if (path
->slots
[*level
] >=
1886 btrfs_header_nritems(cur
))
1889 ret
= drop_leaf_ref(trans
, root
, cur
);
1893 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1894 blocksize
= btrfs_level_size(root
, *level
- 1);
1895 ret
= lookup_extent_ref(trans
, root
, bytenr
, blocksize
, &refs
);
1898 parent
= path
->nodes
[*level
];
1899 root_owner
= btrfs_header_owner(parent
);
1900 root_gen
= btrfs_header_generation(parent
);
1901 path
->slots
[*level
]++;
1902 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1903 blocksize
, root_owner
,
1908 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
1909 if (!next
|| !btrfs_buffer_uptodate(next
)) {
1910 free_extent_buffer(next
);
1911 mutex_unlock(&root
->fs_info
->fs_mutex
);
1912 next
= read_tree_block(root
, bytenr
, blocksize
);
1913 mutex_lock(&root
->fs_info
->fs_mutex
);
1915 /* we dropped the lock, check one more time */
1916 ret
= lookup_extent_ref(trans
, root
, bytenr
,
1920 parent
= path
->nodes
[*level
];
1921 root_owner
= btrfs_header_owner(parent
);
1922 root_gen
= btrfs_header_generation(parent
);
1924 path
->slots
[*level
]++;
1925 free_extent_buffer(next
);
1926 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1934 WARN_ON(*level
<= 0);
1935 if (path
->nodes
[*level
-1])
1936 free_extent_buffer(path
->nodes
[*level
-1]);
1937 path
->nodes
[*level
-1] = next
;
1938 *level
= btrfs_header_level(next
);
1939 path
->slots
[*level
] = 0;
1942 WARN_ON(*level
< 0);
1943 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1945 if (path
->nodes
[*level
] == root
->node
) {
1946 root_owner
= root
->root_key
.objectid
;
1947 parent
= path
->nodes
[*level
];
1949 parent
= path
->nodes
[*level
+ 1];
1950 root_owner
= btrfs_header_owner(parent
);
1953 root_gen
= btrfs_header_generation(parent
);
1954 ret
= btrfs_free_extent(trans
, root
, path
->nodes
[*level
]->start
,
1955 path
->nodes
[*level
]->len
,
1956 root_owner
, root_gen
, 0, 0, 1);
1957 free_extent_buffer(path
->nodes
[*level
]);
1958 path
->nodes
[*level
] = NULL
;
1965 * helper for dropping snapshots. This walks back up the tree in the path
1966 * to find the first node higher up where we haven't yet gone through
1969 static int noinline
walk_up_tree(struct btrfs_trans_handle
*trans
,
1970 struct btrfs_root
*root
,
1971 struct btrfs_path
*path
, int *level
)
1975 struct btrfs_root_item
*root_item
= &root
->root_item
;
1980 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1981 slot
= path
->slots
[i
];
1982 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
1983 struct extent_buffer
*node
;
1984 struct btrfs_disk_key disk_key
;
1985 node
= path
->nodes
[i
];
1988 WARN_ON(*level
== 0);
1989 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
1990 memcpy(&root_item
->drop_progress
,
1991 &disk_key
, sizeof(disk_key
));
1992 root_item
->drop_level
= i
;
1995 if (path
->nodes
[*level
] == root
->node
) {
1996 root_owner
= root
->root_key
.objectid
;
1998 btrfs_header_generation(path
->nodes
[*level
]);
2000 struct extent_buffer
*node
;
2001 node
= path
->nodes
[*level
+ 1];
2002 root_owner
= btrfs_header_owner(node
);
2003 root_gen
= btrfs_header_generation(node
);
2005 ret
= btrfs_free_extent(trans
, root
,
2006 path
->nodes
[*level
]->start
,
2007 path
->nodes
[*level
]->len
,
2008 root_owner
, root_gen
, 0, 0, 1);
2010 free_extent_buffer(path
->nodes
[*level
]);
2011 path
->nodes
[*level
] = NULL
;
2019 * drop the reference count on the tree rooted at 'snap'. This traverses
2020 * the tree freeing any blocks that have a ref count of zero after being
2023 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
2029 struct btrfs_path
*path
;
2032 struct btrfs_root_item
*root_item
= &root
->root_item
;
2034 path
= btrfs_alloc_path();
2037 level
= btrfs_header_level(root
->node
);
2039 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
2040 path
->nodes
[level
] = root
->node
;
2041 extent_buffer_get(root
->node
);
2042 path
->slots
[level
] = 0;
2044 struct btrfs_key key
;
2045 struct btrfs_disk_key found_key
;
2046 struct extent_buffer
*node
;
2048 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
2049 level
= root_item
->drop_level
;
2050 path
->lowest_level
= level
;
2051 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
2056 node
= path
->nodes
[level
];
2057 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
2058 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
2059 sizeof(found_key
)));
2062 wret
= walk_down_tree(trans
, root
, path
, &level
);
2068 wret
= walk_up_tree(trans
, root
, path
, &level
);
2078 for (i
= 0; i
<= orig_level
; i
++) {
2079 if (path
->nodes
[i
]) {
2080 free_extent_buffer(path
->nodes
[i
]);
2081 path
->nodes
[i
] = NULL
;
2085 btrfs_free_path(path
);
2089 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
2096 ret
= find_first_extent_bit(&info
->block_group_cache
, 0,
2097 &start
, &end
, (unsigned int)-1);
2100 ret
= get_state_private(&info
->block_group_cache
, start
, &ptr
);
2102 kfree((void *)(unsigned long)ptr
);
2103 clear_extent_bits(&info
->block_group_cache
, start
,
2104 end
, (unsigned int)-1, GFP_NOFS
);
2107 ret
= find_first_extent_bit(&info
->free_space_cache
, 0,
2108 &start
, &end
, EXTENT_DIRTY
);
2111 clear_extent_dirty(&info
->free_space_cache
, start
,
2117 int btrfs_read_block_groups(struct btrfs_root
*root
)
2119 struct btrfs_path
*path
;
2123 struct btrfs_block_group_cache
*cache
;
2124 struct btrfs_fs_info
*info
= root
->fs_info
;
2125 struct extent_map_tree
*block_group_cache
;
2126 struct btrfs_key key
;
2127 struct btrfs_key found_key
;
2128 struct extent_buffer
*leaf
;
2130 block_group_cache
= &info
->block_group_cache
;
2132 root
= info
->extent_root
;
2134 key
.offset
= BTRFS_BLOCK_GROUP_SIZE
;
2135 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
2137 path
= btrfs_alloc_path();
2142 ret
= btrfs_search_slot(NULL
, info
->extent_root
,
2148 leaf
= path
->nodes
[0];
2149 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
2150 cache
= kmalloc(sizeof(*cache
), GFP_NOFS
);
2156 read_extent_buffer(leaf
, &cache
->item
,
2157 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
2158 sizeof(cache
->item
));
2159 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
2162 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
2163 btrfs_release_path(root
, path
);
2165 if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_MIXED
) {
2166 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
2167 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
2168 } else if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_DATA
) {
2169 bit
= BLOCK_GROUP_DATA
;
2170 cache
->data
= BTRFS_BLOCK_GROUP_DATA
;
2172 bit
= BLOCK_GROUP_METADATA
;
2176 /* use EXTENT_LOCKED to prevent merging */
2177 set_extent_bits(block_group_cache
, found_key
.objectid
,
2178 found_key
.objectid
+ found_key
.offset
- 1,
2179 bit
| EXTENT_LOCKED
, GFP_NOFS
);
2180 set_state_private(block_group_cache
, found_key
.objectid
,
2181 (unsigned long)cache
);
2184 btrfs_super_total_bytes(&info
->super_copy
))
2188 btrfs_free_path(path
);
2192 static int btrfs_insert_block_group(struct btrfs_trans_handle
*trans
,
2193 struct btrfs_root
*root
,
2194 struct btrfs_key
*key
,
2195 struct btrfs_block_group_item
*bi
)
2199 struct btrfs_root
*extent_root
;
2201 extent_root
= root
->fs_info
->extent_root
;
2202 ret
= btrfs_insert_item(trans
, extent_root
, key
, bi
, sizeof(*bi
));
2203 finish_current_insert(trans
, extent_root
);
2204 pending_ret
= del_pending_extents(trans
, extent_root
);
2212 int btrfs_make_block_groups(struct btrfs_trans_handle
*trans
,
2213 struct btrfs_root
*root
)
2222 struct btrfs_root
*extent_root
;
2223 struct btrfs_block_group_cache
*cache
;
2224 struct extent_map_tree
*block_group_cache
;
2226 extent_root
= root
->fs_info
->extent_root
;
2227 block_group_cache
= &root
->fs_info
->block_group_cache
;
2228 group_size
= BTRFS_BLOCK_GROUP_SIZE
;
2229 bytes_used
= btrfs_super_bytes_used(&root
->fs_info
->super_copy
);
2230 total_bytes
= btrfs_super_total_bytes(&root
->fs_info
->super_copy
);
2233 while (cur_start
< total_bytes
) {
2234 cache
= malloc(sizeof(*cache
));
2236 cache
->key
.objectid
= cur_start
;
2237 cache
->key
.offset
= group_size
;
2238 btrfs_set_key_type(&cache
->key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
2239 memset(&cache
->item
, 0, sizeof(cache
->item
));
2241 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
2243 bit
= BLOCK_GROUP_DATA
;
2245 cache
->item
.flags
|= BTRFS_BLOCK_GROUP_DATA
;
2247 bit
= BLOCK_GROUP_METADATA
;
2251 set_extent_bits(block_group_cache
, cur_start
,
2252 cur_start
+ group_size
- 1,
2253 bit
| EXTENT_LOCKED
, GFP_NOFS
);
2254 set_state_private(block_group_cache
, cur_start
,
2255 (unsigned long)cache
);
2256 cur_start
+= group_size
;
2258 /* then insert all the items */
2260 while(cur_start
< total_bytes
) {
2261 cache
= btrfs_lookup_block_group(root
->fs_info
, cur_start
);
2263 ret
= btrfs_insert_block_group(trans
, root
, &cache
->key
,
2266 cur_start
+= group_size
;
2271 u64
btrfs_hash_extent_ref(u64 root_objectid
, u64 ref_generation
,
2272 u64 owner
, u64 owner_offset
)
2274 return hash_extent_ref(root_objectid
, ref_generation
,
2275 owner
, owner_offset
);
2278 int btrfs_update_block_group(struct btrfs_trans_handle
*trans
,
2279 struct btrfs_root
*root
,
2280 u64 bytenr
, u64 num_bytes
, int alloc
,
2281 int mark_free
, int data
)
2283 return update_block_group(trans
, root
, bytenr
, num_bytes
,
2284 alloc
, mark_free
, data
);