2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include "kerncompat.h"
21 #include "free-space-cache.h"
22 #include "transaction.h"
24 #include "extent_io.h"
29 * Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have
30 * anything like that in userspace and have to get the value from the
33 #define BITS_PER_BITMAP(sectorsize) ((sectorsize) * 8)
34 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
36 static int link_free_space(struct btrfs_free_space_ctl
*ctl
,
37 struct btrfs_free_space
*info
);
38 static void merge_space_tree(struct btrfs_free_space_ctl
*ctl
);
43 struct btrfs_root
*root
;
48 unsigned check_crcs
:1;
51 static int io_ctl_init(struct io_ctl
*io_ctl
, u64 size
, u64 ino
,
52 struct btrfs_root
*root
)
54 memset(io_ctl
, 0, sizeof(struct io_ctl
));
55 io_ctl
->num_pages
= (size
+ root
->sectorsize
- 1) / root
->sectorsize
;
56 io_ctl
->buffer
= kzalloc(size
, GFP_NOFS
);
59 io_ctl
->total_size
= size
;
61 if (ino
!= BTRFS_FREE_INO_OBJECTID
)
62 io_ctl
->check_crcs
= 1;
66 static void io_ctl_free(struct io_ctl
*io_ctl
)
68 kfree(io_ctl
->buffer
);
71 static void io_ctl_unmap_page(struct io_ctl
*io_ctl
)
79 static void io_ctl_map_page(struct io_ctl
*io_ctl
, int clear
)
81 BUG_ON(io_ctl
->index
>= io_ctl
->num_pages
);
82 io_ctl
->cur
= io_ctl
->buffer
+ (io_ctl
->index
++ * io_ctl
->root
->sectorsize
);
83 io_ctl
->orig
= io_ctl
->cur
;
84 io_ctl
->size
= io_ctl
->root
->sectorsize
;
86 memset(io_ctl
->cur
, 0, io_ctl
->root
->sectorsize
);
89 static void io_ctl_drop_pages(struct io_ctl
*io_ctl
)
91 io_ctl_unmap_page(io_ctl
);
94 static int io_ctl_prepare_pages(struct io_ctl
*io_ctl
, struct btrfs_root
*root
,
95 struct btrfs_path
*path
, u64 ino
)
97 struct extent_buffer
*leaf
;
98 struct btrfs_file_extent_item
*fi
;
105 key
.type
= BTRFS_EXTENT_DATA_KEY
;
108 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
110 printf("Couldn't find file extent item for free space inode"
112 btrfs_release_path(path
);
116 while (total_read
< io_ctl
->total_size
) {
117 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
118 ret
= btrfs_next_leaf(root
, path
);
124 leaf
= path
->nodes
[0];
126 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
127 if (key
.objectid
!= ino
) {
132 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
137 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
138 struct btrfs_file_extent_item
);
139 if (btrfs_file_extent_type(path
->nodes
[0], fi
) !=
140 BTRFS_FILE_EXTENT_REG
) {
141 printf("Not the file extent type we wanted\n");
146 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
) +
147 btrfs_file_extent_offset(leaf
, fi
);
148 len
= btrfs_file_extent_num_bytes(leaf
, fi
);
149 ret
= read_data_from_disk(root
->fs_info
,
150 io_ctl
->buffer
+ key
.offset
, bytenr
,
158 btrfs_release_path(path
);
162 static int io_ctl_check_generation(struct io_ctl
*io_ctl
, u64 generation
)
167 * Skip the crc area. If we don't check crcs then we just have a 64bit
168 * chunk at the front of the first page.
170 if (io_ctl
->check_crcs
) {
171 io_ctl
->cur
+= sizeof(u32
) * io_ctl
->num_pages
;
172 io_ctl
->size
-= sizeof(u64
) +
173 (sizeof(u32
) * io_ctl
->num_pages
);
175 io_ctl
->cur
+= sizeof(u64
);
176 io_ctl
->size
-= sizeof(u64
) * 2;
180 if (le64_to_cpu(*gen
) != generation
) {
181 printk("btrfs: space cache generation "
182 "(%Lu) does not match inode (%Lu)\n", *gen
,
184 io_ctl_unmap_page(io_ctl
);
187 io_ctl
->cur
+= sizeof(u64
);
191 static int io_ctl_check_crc(struct io_ctl
*io_ctl
, int index
)
197 if (!io_ctl
->check_crcs
) {
198 io_ctl_map_page(io_ctl
, 0);
203 offset
= sizeof(u32
) * io_ctl
->num_pages
;
205 tmp
= io_ctl
->buffer
;
209 io_ctl_map_page(io_ctl
, 0);
210 crc
= crc32c(crc
, io_ctl
->orig
+ offset
, io_ctl
->root
->sectorsize
- offset
);
211 btrfs_csum_final(crc
, (char *)&crc
);
213 printk("btrfs: csum mismatch on free space cache\n");
214 io_ctl_unmap_page(io_ctl
);
221 static int io_ctl_read_entry(struct io_ctl
*io_ctl
,
222 struct btrfs_free_space
*entry
, u8
*type
)
224 struct btrfs_free_space_entry
*e
;
228 ret
= io_ctl_check_crc(io_ctl
, io_ctl
->index
);
234 entry
->offset
= le64_to_cpu(e
->offset
);
235 entry
->bytes
= le64_to_cpu(e
->bytes
);
237 io_ctl
->cur
+= sizeof(struct btrfs_free_space_entry
);
238 io_ctl
->size
-= sizeof(struct btrfs_free_space_entry
);
240 if (io_ctl
->size
>= sizeof(struct btrfs_free_space_entry
))
243 io_ctl_unmap_page(io_ctl
);
248 static int io_ctl_read_bitmap(struct io_ctl
*io_ctl
,
249 struct btrfs_free_space
*entry
)
253 ret
= io_ctl_check_crc(io_ctl
, io_ctl
->index
);
257 memcpy(entry
->bitmap
, io_ctl
->cur
, io_ctl
->root
->sectorsize
);
258 io_ctl_unmap_page(io_ctl
);
264 static int __load_free_space_cache(struct btrfs_root
*root
,
265 struct btrfs_free_space_ctl
*ctl
,
266 struct btrfs_path
*path
, u64 offset
)
268 struct btrfs_free_space_header
*header
;
269 struct btrfs_inode_item
*inode_item
;
270 struct extent_buffer
*leaf
;
271 struct io_ctl io_ctl
;
272 struct btrfs_key key
;
273 struct btrfs_key inode_location
;
274 struct btrfs_disk_key disk_key
;
275 struct btrfs_free_space
*e
, *n
;
276 struct list_head bitmaps
;
284 INIT_LIST_HEAD(&bitmaps
);
286 key
.objectid
= BTRFS_FREE_SPACE_OBJECTID
;
290 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
293 } else if (ret
> 0) {
294 btrfs_release_path(path
);
298 leaf
= path
->nodes
[0];
299 header
= btrfs_item_ptr(leaf
, path
->slots
[0],
300 struct btrfs_free_space_header
);
301 num_entries
= btrfs_free_space_entries(leaf
, header
);
302 num_bitmaps
= btrfs_free_space_bitmaps(leaf
, header
);
303 generation
= btrfs_free_space_generation(leaf
, header
);
304 btrfs_free_space_key(leaf
, header
, &disk_key
);
305 btrfs_disk_key_to_cpu(&inode_location
, &disk_key
);
306 btrfs_release_path(path
);
308 ret
= btrfs_search_slot(NULL
, root
, &inode_location
, path
, 0, 0);
310 printf("Couldn't find free space inode %d\n", ret
);
314 leaf
= path
->nodes
[0];
315 inode_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
316 struct btrfs_inode_item
);
318 inode_size
= btrfs_inode_size(leaf
, inode_item
);
319 if (!inode_size
|| !btrfs_inode_generation(leaf
, inode_item
)) {
320 btrfs_release_path(path
);
324 if (btrfs_inode_generation(leaf
, inode_item
) != generation
) {
325 printf("free space inode generation (%llu) did not match "
326 "free space cache generation (%llu)\n",
327 (unsigned long long)btrfs_inode_generation(leaf
,
329 (unsigned long long)generation
);
330 btrfs_release_path(path
);
334 btrfs_release_path(path
);
339 ret
= io_ctl_init(&io_ctl
, inode_size
, inode_location
.objectid
, root
);
343 ret
= io_ctl_prepare_pages(&io_ctl
, root
, path
,
344 inode_location
.objectid
);
348 ret
= io_ctl_check_crc(&io_ctl
, 0);
352 ret
= io_ctl_check_generation(&io_ctl
, generation
);
356 while (num_entries
) {
357 e
= calloc(1, sizeof(*e
));
361 ret
= io_ctl_read_entry(&io_ctl
, e
, &type
);
372 if (type
== BTRFS_FREE_SPACE_EXTENT
) {
373 ret
= link_free_space(ctl
, e
);
375 printf("Duplicate entries in free space cache, dumping");
380 BUG_ON(!num_bitmaps
);
382 e
->bitmap
= kzalloc(ctl
->sectorsize
, GFP_NOFS
);
387 ret
= link_free_space(ctl
, e
);
388 ctl
->total_bitmaps
++;
390 printf("Duplicate entries in free space cache, dumping");
395 list_add_tail(&e
->list
, &bitmaps
);
401 io_ctl_unmap_page(&io_ctl
);
404 * We add the bitmaps at the end of the entries in order that
405 * the bitmap entries are added to the cache.
407 list_for_each_entry_safe(e
, n
, &bitmaps
, list
) {
408 list_del_init(&e
->list
);
409 ret
= io_ctl_read_bitmap(&io_ctl
, e
);
414 io_ctl_drop_pages(&io_ctl
);
415 merge_space_tree(ctl
);
418 io_ctl_free(&io_ctl
);
421 io_ctl_drop_pages(&io_ctl
);
422 __btrfs_remove_free_space_cache(ctl
);
426 int load_free_space_cache(struct btrfs_fs_info
*fs_info
,
427 struct btrfs_block_group_cache
*block_group
)
429 struct btrfs_free_space_ctl
*ctl
= block_group
->free_space_ctl
;
430 struct btrfs_path
*path
;
433 path
= btrfs_alloc_path();
437 ret
= __load_free_space_cache(fs_info
->tree_root
, ctl
, path
,
438 block_group
->key
.objectid
);
439 btrfs_free_path(path
);
444 printf("failed to load free space cache for block group %llu\n",
445 block_group
->key
.objectid
);
451 static inline unsigned long offset_to_bit(u64 bitmap_start
, u32 unit
,
454 BUG_ON(offset
< bitmap_start
);
455 offset
-= bitmap_start
;
456 return (unsigned long)(offset
/ unit
);
459 static inline unsigned long bytes_to_bits(u64 bytes
, u32 unit
)
461 return (unsigned long)(bytes
/ unit
);
464 static inline u64
offset_to_bitmap(struct btrfs_free_space_ctl
*ctl
,
468 u64 bytes_per_bitmap
;
469 u32 sectorsize
= ctl
->sectorsize
;
471 bytes_per_bitmap
= BITS_PER_BITMAP(sectorsize
) * ctl
->unit
;
472 bitmap_start
= offset
- ctl
->start
;
473 bitmap_start
= bitmap_start
/ bytes_per_bitmap
;
474 bitmap_start
*= bytes_per_bitmap
;
475 bitmap_start
+= ctl
->start
;
480 static int tree_insert_offset(struct rb_root
*root
, u64 offset
,
481 struct rb_node
*node
, int bitmap
)
483 struct rb_node
**p
= &root
->rb_node
;
484 struct rb_node
*parent
= NULL
;
485 struct btrfs_free_space
*info
;
489 info
= rb_entry(parent
, struct btrfs_free_space
, offset_index
);
491 if (offset
< info
->offset
) {
493 } else if (offset
> info
->offset
) {
497 * we could have a bitmap entry and an extent entry
498 * share the same offset. If this is the case, we want
499 * the extent entry to always be found first if we do a
500 * linear search through the tree, since we want to have
501 * the quickest allocation time, and allocating from an
502 * extent is faster than allocating from a bitmap. So
503 * if we're inserting a bitmap and we find an entry at
504 * this offset, we want to go right, or after this entry
505 * logically. If we are inserting an extent and we've
506 * found a bitmap, we want to go left, or before
521 rb_link_node(node
, parent
, p
);
522 rb_insert_color(node
, root
);
528 * searches the tree for the given offset.
530 * fuzzy - If this is set, then we are trying to make an allocation, and we just
531 * want a section that has at least bytes size and comes at or after the given
534 static struct btrfs_free_space
*
535 tree_search_offset(struct btrfs_free_space_ctl
*ctl
,
536 u64 offset
, int bitmap_only
, int fuzzy
)
538 struct rb_node
*n
= ctl
->free_space_offset
.rb_node
;
539 struct btrfs_free_space
*entry
, *prev
= NULL
;
540 u32 sectorsize
= ctl
->sectorsize
;
542 /* find entry that is closest to the 'offset' */
549 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
552 if (offset
< entry
->offset
)
554 else if (offset
> entry
->offset
)
567 * bitmap entry and extent entry may share same offset,
568 * in that case, bitmap entry comes after extent entry.
573 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
574 if (entry
->offset
!= offset
)
577 WARN_ON(!entry
->bitmap
);
582 * if previous extent entry covers the offset,
583 * we should return it instead of the bitmap entry
585 n
= rb_prev(&entry
->offset_index
);
587 prev
= rb_entry(n
, struct btrfs_free_space
,
590 prev
->offset
+ prev
->bytes
> offset
)
600 /* find last entry before the 'offset' */
602 if (entry
->offset
> offset
) {
603 n
= rb_prev(&entry
->offset_index
);
605 entry
= rb_entry(n
, struct btrfs_free_space
,
607 BUG_ON(entry
->offset
> offset
);
617 n
= rb_prev(&entry
->offset_index
);
619 prev
= rb_entry(n
, struct btrfs_free_space
,
622 prev
->offset
+ prev
->bytes
> offset
)
625 if (entry
->offset
+ BITS_PER_BITMAP(sectorsize
) * ctl
->unit
> offset
)
627 } else if (entry
->offset
+ entry
->bytes
> offset
)
635 if (entry
->offset
+ BITS_PER_BITMAP(sectorsize
) *
639 if (entry
->offset
+ entry
->bytes
> offset
)
643 n
= rb_next(&entry
->offset_index
);
646 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
651 void unlink_free_space(struct btrfs_free_space_ctl
*ctl
,
652 struct btrfs_free_space
*info
)
654 rb_erase(&info
->offset_index
, &ctl
->free_space_offset
);
656 ctl
->free_space
-= info
->bytes
;
659 static int link_free_space(struct btrfs_free_space_ctl
*ctl
,
660 struct btrfs_free_space
*info
)
664 BUG_ON(!info
->bitmap
&& !info
->bytes
);
665 ret
= tree_insert_offset(&ctl
->free_space_offset
, info
->offset
,
666 &info
->offset_index
, (info
->bitmap
!= NULL
));
670 ctl
->free_space
+= info
->bytes
;
675 static int search_bitmap(struct btrfs_free_space_ctl
*ctl
,
676 struct btrfs_free_space
*bitmap_info
, u64
*offset
,
679 unsigned long found_bits
= 0;
680 unsigned long bits
, i
;
681 unsigned long next_zero
;
682 u32 sectorsize
= ctl
->sectorsize
;
684 i
= offset_to_bit(bitmap_info
->offset
, ctl
->unit
,
685 max_t(u64
, *offset
, bitmap_info
->offset
));
686 bits
= bytes_to_bits(*bytes
, ctl
->unit
);
688 for_each_set_bit_from(i
, bitmap_info
->bitmap
, BITS_PER_BITMAP(sectorsize
)) {
689 next_zero
= find_next_zero_bit(bitmap_info
->bitmap
,
690 BITS_PER_BITMAP(sectorsize
), i
);
691 if ((next_zero
- i
) >= bits
) {
692 found_bits
= next_zero
- i
;
699 *offset
= (u64
)(i
* ctl
->unit
) + bitmap_info
->offset
;
700 *bytes
= (u64
)(found_bits
) * ctl
->unit
;
707 struct btrfs_free_space
*
708 btrfs_find_free_space(struct btrfs_free_space_ctl
*ctl
, u64 offset
, u64 bytes
)
710 return tree_search_offset(ctl
, offset
, 0, 0);
713 static void try_merge_free_space(struct btrfs_free_space_ctl
*ctl
,
714 struct btrfs_free_space
*info
)
716 struct btrfs_free_space
*left_info
;
717 struct btrfs_free_space
*right_info
;
718 u64 offset
= info
->offset
;
719 u64 bytes
= info
->bytes
;
722 * first we want to see if there is free space adjacent to the range we
723 * are adding, if there is remove that struct and add a new one to
724 * cover the entire range
726 right_info
= tree_search_offset(ctl
, offset
+ bytes
, 0, 0);
727 if (right_info
&& rb_prev(&right_info
->offset_index
))
728 left_info
= rb_entry(rb_prev(&right_info
->offset_index
),
729 struct btrfs_free_space
, offset_index
);
731 left_info
= tree_search_offset(ctl
, offset
- 1, 0, 0);
733 if (right_info
&& !right_info
->bitmap
) {
734 unlink_free_space(ctl
, right_info
);
735 info
->bytes
+= right_info
->bytes
;
739 if (left_info
&& !left_info
->bitmap
&&
740 left_info
->offset
+ left_info
->bytes
== offset
) {
741 unlink_free_space(ctl
, left_info
);
742 info
->offset
= left_info
->offset
;
743 info
->bytes
+= left_info
->bytes
;
748 void btrfs_dump_free_space(struct btrfs_block_group_cache
*block_group
,
751 struct btrfs_free_space_ctl
*ctl
= block_group
->free_space_ctl
;
752 struct btrfs_free_space
*info
;
756 for (n
= rb_first(&ctl
->free_space_offset
); n
; n
= rb_next(n
)) {
757 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
758 if (info
->bytes
>= bytes
&& !block_group
->ro
)
760 printk("entry offset %llu, bytes %llu, bitmap %s\n",
761 (unsigned long long)info
->offset
,
762 (unsigned long long)info
->bytes
,
763 (info
->bitmap
) ? "yes" : "no");
765 printk("%d blocks of free space at or bigger than bytes is \n", count
);
768 int btrfs_init_free_space_ctl(struct btrfs_block_group_cache
*block_group
,
771 struct btrfs_free_space_ctl
*ctl
;
773 ctl
= calloc(1, sizeof(*ctl
));
777 ctl
->sectorsize
= sectorsize
;
778 ctl
->unit
= sectorsize
;
779 ctl
->start
= block_group
->key
.objectid
;
780 ctl
->private = block_group
;
781 block_group
->free_space_ctl
= ctl
;
786 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl
*ctl
)
788 struct btrfs_free_space
*info
;
789 struct rb_node
*node
;
791 while ((node
= rb_last(&ctl
->free_space_offset
)) != NULL
) {
792 info
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
793 unlink_free_space(ctl
, info
);
799 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group
)
801 __btrfs_remove_free_space_cache(block_group
->free_space_ctl
);
804 static int btrfs_add_free_space(struct btrfs_free_space_ctl
*ctl
, u64 offset
,
807 struct btrfs_free_space
*info
;
810 info
= calloc(1, sizeof(*info
));
814 info
->offset
= offset
;
817 try_merge_free_space(ctl
, info
);
819 ret
= link_free_space(ctl
, info
);
821 printk(KERN_CRIT
"btrfs: unable to add free space :%d\n", ret
);
822 BUG_ON(ret
== -EEXIST
);
829 * Merges all the free space cache and kills the bitmap entries since we just
830 * want to use the free space cache to verify it's correct, no reason to keep
831 * the bitmaps around to confuse things.
833 static void merge_space_tree(struct btrfs_free_space_ctl
*ctl
)
835 struct btrfs_free_space
*e
, *prev
= NULL
;
838 u32 sectorsize
= ctl
->sectorsize
;
842 for (n
= rb_first(&ctl
->free_space_offset
); n
; n
= rb_next(n
)) {
843 e
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
845 u64 offset
= e
->offset
, bytes
= ctl
->unit
;
848 end
= e
->offset
+ (u64
)(BITS_PER_BITMAP(sectorsize
) * ctl
->unit
);
850 unlink_free_space(ctl
, e
);
851 while (!(search_bitmap(ctl
, e
, &offset
, &bytes
))) {
852 ret
= btrfs_add_free_space(ctl
, offset
,
866 if (prev
->offset
+ prev
->bytes
== e
->offset
) {
867 unlink_free_space(ctl
, prev
);
868 unlink_free_space(ctl
, e
);
869 prev
->bytes
+= e
->bytes
;
871 link_free_space(ctl
, prev
);