2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
24 #include "free-space-cache.h"
25 #include "transaction.h"
27 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
30 static inline unsigned long offset_to_bit(u64 bitmap_start
, u64 sectorsize
,
33 BUG_ON(offset
< bitmap_start
);
34 offset
-= bitmap_start
;
35 return (unsigned long)(div64_u64(offset
, sectorsize
));
38 static inline unsigned long bytes_to_bits(u64 bytes
, u64 sectorsize
)
40 return (unsigned long)(div64_u64(bytes
, sectorsize
));
43 static inline u64
offset_to_bitmap(struct btrfs_block_group_cache
*block_group
,
49 bytes_per_bitmap
= BITS_PER_BITMAP
* block_group
->sectorsize
;
50 bitmap_start
= offset
- block_group
->key
.objectid
;
51 bitmap_start
= div64_u64(bitmap_start
, bytes_per_bitmap
);
52 bitmap_start
*= bytes_per_bitmap
;
53 bitmap_start
+= block_group
->key
.objectid
;
58 static int tree_insert_offset(struct rb_root
*root
, u64 offset
,
59 struct rb_node
*node
, int bitmap
)
61 struct rb_node
**p
= &root
->rb_node
;
62 struct rb_node
*parent
= NULL
;
63 struct btrfs_free_space
*info
;
67 info
= rb_entry(parent
, struct btrfs_free_space
, offset_index
);
69 if (offset
< info
->offset
) {
71 } else if (offset
> info
->offset
) {
75 * we could have a bitmap entry and an extent entry
76 * share the same offset. If this is the case, we want
77 * the extent entry to always be found first if we do a
78 * linear search through the tree, since we want to have
79 * the quickest allocation time, and allocating from an
80 * extent is faster than allocating from a bitmap. So
81 * if we're inserting a bitmap and we find an entry at
82 * this offset, we want to go right, or after this entry
83 * logically. If we are inserting an extent and we've
84 * found a bitmap, we want to go left, or before
88 WARN_ON(info
->bitmap
);
91 WARN_ON(!info
->bitmap
);
97 rb_link_node(node
, parent
, p
);
98 rb_insert_color(node
, root
);
104 * searches the tree for the given offset.
106 * fuzzy - If this is set, then we are trying to make an allocation, and we just
107 * want a section that has at least bytes size and comes at or after the given
110 static struct btrfs_free_space
*
111 tree_search_offset(struct btrfs_block_group_cache
*block_group
,
112 u64 offset
, int bitmap_only
, int fuzzy
)
114 struct rb_node
*n
= block_group
->free_space_offset
.rb_node
;
115 struct btrfs_free_space
*entry
, *prev
= NULL
;
117 /* find entry that is closest to the 'offset' */
124 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
127 if (offset
< entry
->offset
)
129 else if (offset
> entry
->offset
)
142 * bitmap entry and extent entry may share same offset,
143 * in that case, bitmap entry comes after extent entry.
148 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
149 if (entry
->offset
!= offset
)
152 WARN_ON(!entry
->bitmap
);
157 * if previous extent entry covers the offset,
158 * we should return it instead of the bitmap entry
160 n
= &entry
->offset_index
;
165 prev
= rb_entry(n
, struct btrfs_free_space
,
168 if (prev
->offset
+ prev
->bytes
> offset
)
180 /* find last entry before the 'offset' */
182 if (entry
->offset
> offset
) {
183 n
= rb_prev(&entry
->offset_index
);
185 entry
= rb_entry(n
, struct btrfs_free_space
,
187 BUG_ON(entry
->offset
> offset
);
197 n
= &entry
->offset_index
;
202 prev
= rb_entry(n
, struct btrfs_free_space
,
205 if (prev
->offset
+ prev
->bytes
> offset
)
210 if (entry
->offset
+ BITS_PER_BITMAP
*
211 block_group
->sectorsize
> offset
)
213 } else if (entry
->offset
+ entry
->bytes
> offset
)
221 if (entry
->offset
+ BITS_PER_BITMAP
*
222 block_group
->sectorsize
> offset
)
225 if (entry
->offset
+ entry
->bytes
> offset
)
229 n
= rb_next(&entry
->offset_index
);
232 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
237 static void unlink_free_space(struct btrfs_block_group_cache
*block_group
,
238 struct btrfs_free_space
*info
)
240 rb_erase(&info
->offset_index
, &block_group
->free_space_offset
);
241 block_group
->free_extents
--;
242 block_group
->free_space
-= info
->bytes
;
245 static int link_free_space(struct btrfs_block_group_cache
*block_group
,
246 struct btrfs_free_space
*info
)
250 BUG_ON(!info
->bitmap
&& !info
->bytes
);
251 ret
= tree_insert_offset(&block_group
->free_space_offset
, info
->offset
,
252 &info
->offset_index
, (info
->bitmap
!= NULL
));
256 block_group
->free_space
+= info
->bytes
;
257 block_group
->free_extents
++;
261 static void recalculate_thresholds(struct btrfs_block_group_cache
*block_group
)
268 * The goal is to keep the total amount of memory used per 1gb of space
269 * at or below 32k, so we need to adjust how much memory we allow to be
270 * used by extent based free space tracking
272 max_bytes
= MAX_CACHE_BYTES_PER_GIG
*
273 (div64_u64(block_group
->key
.offset
, 1024 * 1024 * 1024));
276 * we want to account for 1 more bitmap than what we have so we can make
277 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
278 * we add more bitmaps.
280 bitmap_bytes
= (block_group
->total_bitmaps
+ 1) * PAGE_CACHE_SIZE
;
282 if (bitmap_bytes
>= max_bytes
) {
283 block_group
->extents_thresh
= 0;
288 * we want the extent entry threshold to always be at most 1/2 the maxw
289 * bytes we can have, or whatever is less than that.
291 extent_bytes
= max_bytes
- bitmap_bytes
;
292 extent_bytes
= min_t(u64
, extent_bytes
, div64_u64(max_bytes
, 2));
294 block_group
->extents_thresh
=
295 div64_u64(extent_bytes
, (sizeof(struct btrfs_free_space
)));
298 static void bitmap_clear_bits(struct btrfs_block_group_cache
*block_group
,
299 struct btrfs_free_space
*info
, u64 offset
,
302 unsigned long start
, end
;
305 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
306 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
307 BUG_ON(end
> BITS_PER_BITMAP
);
309 for (i
= start
; i
< end
; i
++)
310 clear_bit(i
, info
->bitmap
);
312 info
->bytes
-= bytes
;
313 block_group
->free_space
-= bytes
;
316 static void bitmap_set_bits(struct btrfs_block_group_cache
*block_group
,
317 struct btrfs_free_space
*info
, u64 offset
,
320 unsigned long start
, end
;
323 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
324 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
325 BUG_ON(end
> BITS_PER_BITMAP
);
327 for (i
= start
; i
< end
; i
++)
328 set_bit(i
, info
->bitmap
);
330 info
->bytes
+= bytes
;
331 block_group
->free_space
+= bytes
;
334 static int search_bitmap(struct btrfs_block_group_cache
*block_group
,
335 struct btrfs_free_space
*bitmap_info
, u64
*offset
,
338 unsigned long found_bits
= 0;
339 unsigned long bits
, i
;
340 unsigned long next_zero
;
342 i
= offset_to_bit(bitmap_info
->offset
, block_group
->sectorsize
,
343 max_t(u64
, *offset
, bitmap_info
->offset
));
344 bits
= bytes_to_bits(*bytes
, block_group
->sectorsize
);
346 for (i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
);
348 i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
349 next_zero
= find_next_zero_bit(bitmap_info
->bitmap
,
351 if ((next_zero
- i
) >= bits
) {
352 found_bits
= next_zero
- i
;
359 *offset
= (u64
)(i
* block_group
->sectorsize
) +
361 *bytes
= (u64
)(found_bits
) * block_group
->sectorsize
;
368 static struct btrfs_free_space
*find_free_space(struct btrfs_block_group_cache
369 *block_group
, u64
*offset
,
370 u64
*bytes
, int debug
)
372 struct btrfs_free_space
*entry
;
373 struct rb_node
*node
;
376 if (!block_group
->free_space_offset
.rb_node
)
379 entry
= tree_search_offset(block_group
,
380 offset_to_bitmap(block_group
, *offset
),
385 for (node
= &entry
->offset_index
; node
; node
= rb_next(node
)) {
386 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
387 if (entry
->bytes
< *bytes
)
391 ret
= search_bitmap(block_group
, entry
, offset
, bytes
);
397 *offset
= entry
->offset
;
398 *bytes
= entry
->bytes
;
405 static void add_new_bitmap(struct btrfs_block_group_cache
*block_group
,
406 struct btrfs_free_space
*info
, u64 offset
)
408 u64 bytes_per_bg
= BITS_PER_BITMAP
* block_group
->sectorsize
;
409 int max_bitmaps
= (int)div64_u64(block_group
->key
.offset
+
410 bytes_per_bg
- 1, bytes_per_bg
);
411 BUG_ON(block_group
->total_bitmaps
>= max_bitmaps
);
413 info
->offset
= offset_to_bitmap(block_group
, offset
);
415 link_free_space(block_group
, info
);
416 block_group
->total_bitmaps
++;
418 recalculate_thresholds(block_group
);
421 static noinline
int remove_from_bitmap(struct btrfs_block_group_cache
*block_group
,
422 struct btrfs_free_space
*bitmap_info
,
423 u64
*offset
, u64
*bytes
)
426 u64 search_start
, search_bytes
;
430 end
= bitmap_info
->offset
+
431 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
) - 1;
434 * XXX - this can go away after a few releases.
436 * since the only user of btrfs_remove_free_space is the tree logging
437 * stuff, and the only way to test that is under crash conditions, we
438 * want to have this debug stuff here just in case somethings not
439 * working. Search the bitmap for the space we are trying to use to
440 * make sure its actually there. If its not there then we need to stop
441 * because something has gone wrong.
443 search_start
= *offset
;
444 search_bytes
= *bytes
;
445 ret
= search_bitmap(block_group
, bitmap_info
, &search_start
,
447 BUG_ON(ret
< 0 || search_start
!= *offset
);
449 if (*offset
> bitmap_info
->offset
&& *offset
+ *bytes
> end
) {
450 bitmap_clear_bits(block_group
, bitmap_info
, *offset
,
452 *bytes
-= end
- *offset
+ 1;
454 } else if (*offset
>= bitmap_info
->offset
&& *offset
+ *bytes
<= end
) {
455 bitmap_clear_bits(block_group
, bitmap_info
, *offset
, *bytes
);
460 struct rb_node
*next
= rb_next(&bitmap_info
->offset_index
);
461 if (!bitmap_info
->bytes
) {
462 unlink_free_space(block_group
, bitmap_info
);
463 kfree(bitmap_info
->bitmap
);
465 block_group
->total_bitmaps
--;
466 recalculate_thresholds(block_group
);
470 * no entry after this bitmap, but we still have bytes to
471 * remove, so something has gone wrong.
476 bitmap_info
= rb_entry(next
, struct btrfs_free_space
,
480 * if the next entry isn't a bitmap we need to return to let the
481 * extent stuff do its work.
483 if (!bitmap_info
->bitmap
)
487 * Ok the next item is a bitmap, but it may not actually hold
488 * the information for the rest of this free space stuff, so
489 * look for it, and if we don't find it return so we can try
490 * everything over again.
492 search_start
= *offset
;
493 search_bytes
= *bytes
;
494 ret
= search_bitmap(block_group
, bitmap_info
, &search_start
,
496 if (ret
< 0 || search_start
!= *offset
)
500 } else if (!bitmap_info
->bytes
) {
501 unlink_free_space(block_group
, bitmap_info
);
502 kfree(bitmap_info
->bitmap
);
504 block_group
->total_bitmaps
--;
505 recalculate_thresholds(block_group
);
511 static int insert_into_bitmap(struct btrfs_block_group_cache
*block_group
,
512 struct btrfs_free_space
*info
)
514 struct btrfs_free_space
*bitmap_info
;
516 u64 bytes
, offset
, end
;
520 * If we are below the extents threshold then we can add this as an
521 * extent, and don't have to deal with the bitmap
523 if (block_group
->free_extents
< block_group
->extents_thresh
&&
524 info
->bytes
> block_group
->sectorsize
* 4)
528 * some block groups are so tiny they can't be enveloped by a bitmap, so
529 * don't even bother to create a bitmap for this
531 if (BITS_PER_BITMAP
* block_group
->sectorsize
>
532 block_group
->key
.offset
)
536 offset
= info
->offset
;
539 bitmap_info
= tree_search_offset(block_group
,
540 offset_to_bitmap(block_group
, offset
),
547 end
= bitmap_info
->offset
+
548 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
);
550 if (offset
>= bitmap_info
->offset
&& offset
+ bytes
> end
) {
551 bitmap_set_bits(block_group
, bitmap_info
, offset
,
553 bytes
-= end
- offset
;
556 } else if (offset
>= bitmap_info
->offset
&& offset
+ bytes
<= end
) {
557 bitmap_set_bits(block_group
, bitmap_info
, offset
, bytes
);
570 if (info
&& info
->bitmap
) {
571 add_new_bitmap(block_group
, info
, offset
);
576 spin_unlock(&block_group
->tree_lock
);
578 /* no pre-allocated info, allocate a new one */
580 info
= kzalloc(sizeof(struct btrfs_free_space
),
583 spin_lock(&block_group
->tree_lock
);
589 /* allocate the bitmap */
590 info
->bitmap
= kzalloc(PAGE_CACHE_SIZE
, GFP_NOFS
);
591 spin_lock(&block_group
->tree_lock
);
609 int btrfs_add_free_space(struct btrfs_block_group_cache
*block_group
,
610 u64 offset
, u64 bytes
)
612 struct btrfs_free_space
*right_info
= NULL
;
613 struct btrfs_free_space
*left_info
= NULL
;
614 struct btrfs_free_space
*info
= NULL
;
617 info
= kzalloc(sizeof(struct btrfs_free_space
), GFP_NOFS
);
621 info
->offset
= offset
;
624 spin_lock(&block_group
->tree_lock
);
627 * first we want to see if there is free space adjacent to the range we
628 * are adding, if there is remove that struct and add a new one to
629 * cover the entire range
631 right_info
= tree_search_offset(block_group
, offset
+ bytes
, 0, 0);
632 if (right_info
&& rb_prev(&right_info
->offset_index
))
633 left_info
= rb_entry(rb_prev(&right_info
->offset_index
),
634 struct btrfs_free_space
, offset_index
);
636 left_info
= tree_search_offset(block_group
, offset
- 1, 0, 0);
639 * If there was no extent directly to the left or right of this new
640 * extent then we know we're going to have to allocate a new extent, so
641 * before we do that see if we need to drop this into a bitmap
643 if ((!left_info
|| left_info
->bitmap
) &&
644 (!right_info
|| right_info
->bitmap
)) {
645 ret
= insert_into_bitmap(block_group
, info
);
655 if (right_info
&& !right_info
->bitmap
) {
656 unlink_free_space(block_group
, right_info
);
657 info
->bytes
+= right_info
->bytes
;
661 if (left_info
&& !left_info
->bitmap
&&
662 left_info
->offset
+ left_info
->bytes
== offset
) {
663 unlink_free_space(block_group
, left_info
);
664 info
->offset
= left_info
->offset
;
665 info
->bytes
+= left_info
->bytes
;
669 ret
= link_free_space(block_group
, info
);
673 spin_unlock(&block_group
->tree_lock
);
676 printk(KERN_CRIT
"btrfs: unable to add free space :%d\n", ret
);
677 BUG_ON(ret
== -EEXIST
);
683 int btrfs_remove_free_space(struct btrfs_block_group_cache
*block_group
,
684 u64 offset
, u64 bytes
)
686 struct btrfs_free_space
*info
;
687 struct btrfs_free_space
*next_info
= NULL
;
690 spin_lock(&block_group
->tree_lock
);
693 info
= tree_search_offset(block_group
, offset
, 0, 0);
696 * oops didn't find an extent that matched the space we wanted
697 * to remove, look for a bitmap instead
699 info
= tree_search_offset(block_group
,
700 offset_to_bitmap(block_group
, offset
),
708 if (info
->bytes
< bytes
&& rb_next(&info
->offset_index
)) {
710 next_info
= rb_entry(rb_next(&info
->offset_index
),
711 struct btrfs_free_space
,
714 if (next_info
->bitmap
)
715 end
= next_info
->offset
+ BITS_PER_BITMAP
*
716 block_group
->sectorsize
- 1;
718 end
= next_info
->offset
+ next_info
->bytes
;
720 if (next_info
->bytes
< bytes
||
721 next_info
->offset
> offset
|| offset
> end
) {
722 printk(KERN_CRIT
"Found free space at %llu, size %llu,"
723 " trying to use %llu\n",
724 (unsigned long long)info
->offset
,
725 (unsigned long long)info
->bytes
,
726 (unsigned long long)bytes
);
735 if (info
->bytes
== bytes
) {
736 unlink_free_space(block_group
, info
);
739 block_group
->total_bitmaps
--;
745 if (!info
->bitmap
&& info
->offset
== offset
) {
746 unlink_free_space(block_group
, info
);
747 info
->offset
+= bytes
;
748 info
->bytes
-= bytes
;
749 link_free_space(block_group
, info
);
753 if (!info
->bitmap
&& info
->offset
<= offset
&&
754 info
->offset
+ info
->bytes
>= offset
+ bytes
) {
755 u64 old_start
= info
->offset
;
757 * we're freeing space in the middle of the info,
758 * this can happen during tree log replay
760 * first unlink the old info and then
761 * insert it again after the hole we're creating
763 unlink_free_space(block_group
, info
);
764 if (offset
+ bytes
< info
->offset
+ info
->bytes
) {
765 u64 old_end
= info
->offset
+ info
->bytes
;
767 info
->offset
= offset
+ bytes
;
768 info
->bytes
= old_end
- info
->offset
;
769 ret
= link_free_space(block_group
, info
);
774 /* the hole we're creating ends at the end
775 * of the info struct, just free the info
779 spin_unlock(&block_group
->tree_lock
);
781 /* step two, insert a new info struct to cover
782 * anything before the hole
784 ret
= btrfs_add_free_space(block_group
, old_start
,
790 ret
= remove_from_bitmap(block_group
, info
, &offset
, &bytes
);
795 spin_unlock(&block_group
->tree_lock
);
800 void btrfs_dump_free_space(struct btrfs_block_group_cache
*block_group
,
803 struct btrfs_free_space
*info
;
807 for (n
= rb_first(&block_group
->free_space_offset
); n
; n
= rb_next(n
)) {
808 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
809 if (info
->bytes
>= bytes
)
811 printk(KERN_CRIT
"entry offset %llu, bytes %llu, bitmap %s\n",
812 (unsigned long long)info
->offset
,
813 (unsigned long long)info
->bytes
,
814 (info
->bitmap
) ? "yes" : "no");
816 printk(KERN_INFO
"block group has cluster?: %s\n",
817 list_empty(&block_group
->cluster_list
) ? "no" : "yes");
818 printk(KERN_INFO
"%d blocks of free space at or bigger than bytes is"
822 u64
btrfs_block_group_free_space(struct btrfs_block_group_cache
*block_group
)
824 struct btrfs_free_space
*info
;
828 for (n
= rb_first(&block_group
->free_space_offset
); n
;
830 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
838 * for a given cluster, put all of its extents back into the free
839 * space cache. If the block group passed doesn't match the block group
840 * pointed to by the cluster, someone else raced in and freed the
841 * cluster already. In that case, we just return without changing anything
844 __btrfs_return_cluster_to_free_space(
845 struct btrfs_block_group_cache
*block_group
,
846 struct btrfs_free_cluster
*cluster
)
848 struct btrfs_free_space
*entry
;
849 struct rb_node
*node
;
852 spin_lock(&cluster
->lock
);
853 if (cluster
->block_group
!= block_group
)
856 bitmap
= cluster
->points_to_bitmap
;
857 cluster
->block_group
= NULL
;
858 cluster
->window_start
= 0;
859 list_del_init(&cluster
->block_group_list
);
860 cluster
->points_to_bitmap
= false;
865 node
= rb_first(&cluster
->root
);
867 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
868 node
= rb_next(&entry
->offset_index
);
869 rb_erase(&entry
->offset_index
, &cluster
->root
);
870 BUG_ON(entry
->bitmap
);
871 tree_insert_offset(&block_group
->free_space_offset
,
872 entry
->offset
, &entry
->offset_index
, 0);
874 cluster
->root
= RB_ROOT
;
877 spin_unlock(&cluster
->lock
);
878 btrfs_put_block_group(block_group
);
882 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group
)
884 struct btrfs_free_space
*info
;
885 struct rb_node
*node
;
886 struct btrfs_free_cluster
*cluster
;
887 struct list_head
*head
;
889 spin_lock(&block_group
->tree_lock
);
890 while ((head
= block_group
->cluster_list
.next
) !=
891 &block_group
->cluster_list
) {
892 cluster
= list_entry(head
, struct btrfs_free_cluster
,
895 WARN_ON(cluster
->block_group
!= block_group
);
896 __btrfs_return_cluster_to_free_space(block_group
, cluster
);
897 if (need_resched()) {
898 spin_unlock(&block_group
->tree_lock
);
900 spin_lock(&block_group
->tree_lock
);
904 while ((node
= rb_last(&block_group
->free_space_offset
)) != NULL
) {
905 info
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
906 unlink_free_space(block_group
, info
);
910 if (need_resched()) {
911 spin_unlock(&block_group
->tree_lock
);
913 spin_lock(&block_group
->tree_lock
);
917 spin_unlock(&block_group
->tree_lock
);
920 u64
btrfs_find_space_for_alloc(struct btrfs_block_group_cache
*block_group
,
921 u64 offset
, u64 bytes
, u64 empty_size
)
923 struct btrfs_free_space
*entry
= NULL
;
924 u64 bytes_search
= bytes
+ empty_size
;
927 spin_lock(&block_group
->tree_lock
);
928 entry
= find_free_space(block_group
, &offset
, &bytes_search
, 0);
934 bitmap_clear_bits(block_group
, entry
, offset
, bytes
);
936 unlink_free_space(block_group
, entry
);
937 kfree(entry
->bitmap
);
939 block_group
->total_bitmaps
--;
940 recalculate_thresholds(block_group
);
943 unlink_free_space(block_group
, entry
);
944 entry
->offset
+= bytes
;
945 entry
->bytes
-= bytes
;
949 link_free_space(block_group
, entry
);
953 spin_unlock(&block_group
->tree_lock
);
959 * given a cluster, put all of its extents back into the free space
960 * cache. If a block group is passed, this function will only free
961 * a cluster that belongs to the passed block group.
963 * Otherwise, it'll get a reference on the block group pointed to by the
964 * cluster and remove the cluster from it.
966 int btrfs_return_cluster_to_free_space(
967 struct btrfs_block_group_cache
*block_group
,
968 struct btrfs_free_cluster
*cluster
)
972 /* first, get a safe pointer to the block group */
973 spin_lock(&cluster
->lock
);
975 block_group
= cluster
->block_group
;
977 spin_unlock(&cluster
->lock
);
980 } else if (cluster
->block_group
!= block_group
) {
981 /* someone else has already freed it don't redo their work */
982 spin_unlock(&cluster
->lock
);
985 atomic_inc(&block_group
->count
);
986 spin_unlock(&cluster
->lock
);
988 /* now return any extents the cluster had on it */
989 spin_lock(&block_group
->tree_lock
);
990 ret
= __btrfs_return_cluster_to_free_space(block_group
, cluster
);
991 spin_unlock(&block_group
->tree_lock
);
993 /* finally drop our ref */
994 btrfs_put_block_group(block_group
);
998 static u64
btrfs_alloc_from_bitmap(struct btrfs_block_group_cache
*block_group
,
999 struct btrfs_free_cluster
*cluster
,
1000 u64 bytes
, u64 min_start
)
1002 struct btrfs_free_space
*entry
;
1004 u64 search_start
= cluster
->window_start
;
1005 u64 search_bytes
= bytes
;
1008 spin_lock(&block_group
->tree_lock
);
1009 spin_lock(&cluster
->lock
);
1011 if (!cluster
->points_to_bitmap
)
1014 if (cluster
->block_group
!= block_group
)
1018 * search_start is the beginning of the bitmap, but at some point it may
1019 * be a good idea to point to the actual start of the free area in the
1020 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1021 * to 1 to make sure we get the bitmap entry
1023 entry
= tree_search_offset(block_group
,
1024 offset_to_bitmap(block_group
, search_start
),
1026 if (!entry
|| !entry
->bitmap
)
1029 search_start
= min_start
;
1030 search_bytes
= bytes
;
1032 err
= search_bitmap(block_group
, entry
, &search_start
,
1038 bitmap_clear_bits(block_group
, entry
, ret
, bytes
);
1040 spin_unlock(&cluster
->lock
);
1041 spin_unlock(&block_group
->tree_lock
);
1047 * given a cluster, try to allocate 'bytes' from it, returns 0
1048 * if it couldn't find anything suitably large, or a logical disk offset
1049 * if things worked out
1051 u64
btrfs_alloc_from_cluster(struct btrfs_block_group_cache
*block_group
,
1052 struct btrfs_free_cluster
*cluster
, u64 bytes
,
1055 struct btrfs_free_space
*entry
= NULL
;
1056 struct rb_node
*node
;
1059 if (cluster
->points_to_bitmap
)
1060 return btrfs_alloc_from_bitmap(block_group
, cluster
, bytes
,
1063 spin_lock(&cluster
->lock
);
1064 if (bytes
> cluster
->max_size
)
1067 if (cluster
->block_group
!= block_group
)
1070 node
= rb_first(&cluster
->root
);
1074 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1077 if (entry
->bytes
< bytes
|| entry
->offset
< min_start
) {
1078 struct rb_node
*node
;
1080 node
= rb_next(&entry
->offset_index
);
1083 entry
= rb_entry(node
, struct btrfs_free_space
,
1087 ret
= entry
->offset
;
1089 entry
->offset
+= bytes
;
1090 entry
->bytes
-= bytes
;
1092 if (entry
->bytes
== 0) {
1093 rb_erase(&entry
->offset_index
, &cluster
->root
);
1099 spin_unlock(&cluster
->lock
);
1104 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache
*block_group
,
1105 struct btrfs_free_space
*entry
,
1106 struct btrfs_free_cluster
*cluster
,
1107 u64 offset
, u64 bytes
, u64 min_bytes
)
1109 unsigned long next_zero
;
1111 unsigned long search_bits
;
1112 unsigned long total_bits
;
1113 unsigned long found_bits
;
1114 unsigned long start
= 0;
1115 unsigned long total_found
= 0;
1118 i
= offset_to_bit(entry
->offset
, block_group
->sectorsize
,
1119 max_t(u64
, offset
, entry
->offset
));
1120 search_bits
= bytes_to_bits(min_bytes
, block_group
->sectorsize
);
1121 total_bits
= bytes_to_bits(bytes
, block_group
->sectorsize
);
1125 for (i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
);
1126 i
< BITS_PER_BITMAP
;
1127 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
1128 next_zero
= find_next_zero_bit(entry
->bitmap
,
1129 BITS_PER_BITMAP
, i
);
1130 if (next_zero
- i
>= search_bits
) {
1131 found_bits
= next_zero
- i
;
1145 total_found
+= found_bits
;
1147 if (cluster
->max_size
< found_bits
* block_group
->sectorsize
)
1148 cluster
->max_size
= found_bits
* block_group
->sectorsize
;
1150 if (total_found
< total_bits
) {
1151 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, next_zero
);
1152 if (i
- start
> total_bits
* 2) {
1154 cluster
->max_size
= 0;
1160 cluster
->window_start
= start
* block_group
->sectorsize
+
1162 cluster
->points_to_bitmap
= true;
1168 * here we try to find a cluster of blocks in a block group. The goal
1169 * is to find at least bytes free and up to empty_size + bytes free.
1170 * We might not find them all in one contiguous area.
1172 * returns zero and sets up cluster if things worked out, otherwise
1173 * it returns -enospc
1175 int btrfs_find_space_cluster(struct btrfs_trans_handle
*trans
,
1176 struct btrfs_root
*root
,
1177 struct btrfs_block_group_cache
*block_group
,
1178 struct btrfs_free_cluster
*cluster
,
1179 u64 offset
, u64 bytes
, u64 empty_size
)
1181 struct btrfs_free_space
*entry
= NULL
;
1182 struct rb_node
*node
;
1183 struct btrfs_free_space
*next
;
1184 struct btrfs_free_space
*last
= NULL
;
1189 bool found_bitmap
= false;
1192 /* for metadata, allow allocates with more holes */
1193 if (btrfs_test_opt(root
, SSD_SPREAD
)) {
1194 min_bytes
= bytes
+ empty_size
;
1195 } else if (block_group
->flags
& BTRFS_BLOCK_GROUP_METADATA
) {
1197 * we want to do larger allocations when we are
1198 * flushing out the delayed refs, it helps prevent
1199 * making more work as we go along.
1201 if (trans
->transaction
->delayed_refs
.flushing
)
1202 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 1);
1204 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 4);
1206 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 2);
1208 spin_lock(&block_group
->tree_lock
);
1209 spin_lock(&cluster
->lock
);
1211 /* someone already found a cluster, hooray */
1212 if (cluster
->block_group
) {
1217 entry
= tree_search_offset(block_group
, offset
, found_bitmap
, 1);
1224 * If found_bitmap is true, we exhausted our search for extent entries,
1225 * and we just want to search all of the bitmaps that we can find, and
1226 * ignore any extent entries we find.
1228 while (entry
->bitmap
|| found_bitmap
||
1229 (!entry
->bitmap
&& entry
->bytes
< min_bytes
)) {
1230 struct rb_node
*node
= rb_next(&entry
->offset_index
);
1232 if (entry
->bitmap
&& entry
->bytes
> bytes
+ empty_size
) {
1233 ret
= btrfs_bitmap_cluster(block_group
, entry
, cluster
,
1234 offset
, bytes
+ empty_size
,
1244 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1248 * We already searched all the extent entries from the passed in offset
1249 * to the end and didn't find enough space for the cluster, and we also
1250 * didn't find any bitmaps that met our criteria, just go ahead and exit
1257 cluster
->points_to_bitmap
= false;
1258 window_start
= entry
->offset
;
1259 window_free
= entry
->bytes
;
1261 max_extent
= entry
->bytes
;
1264 /* out window is just right, lets fill it */
1265 if (window_free
>= bytes
+ empty_size
)
1268 node
= rb_next(&last
->offset_index
);
1275 next
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1278 * we found a bitmap, so if this search doesn't result in a
1279 * cluster, we know to go and search again for the bitmaps and
1280 * start looking for space there
1284 offset
= next
->offset
;
1285 found_bitmap
= true;
1291 * we haven't filled the empty size and the window is
1292 * very large. reset and try again
1294 if (next
->offset
- (last
->offset
+ last
->bytes
) > 128 * 1024 ||
1295 next
->offset
- window_start
> (bytes
+ empty_size
) * 2) {
1297 window_start
= entry
->offset
;
1298 window_free
= entry
->bytes
;
1300 max_extent
= entry
->bytes
;
1303 window_free
+= next
->bytes
;
1304 if (entry
->bytes
> max_extent
)
1305 max_extent
= entry
->bytes
;
1309 cluster
->window_start
= entry
->offset
;
1312 * now we've found our entries, pull them out of the free space
1313 * cache and put them into the cluster rbtree
1315 * The cluster includes an rbtree, but only uses the offset index
1316 * of each free space cache entry.
1319 node
= rb_next(&entry
->offset_index
);
1320 if (entry
->bitmap
&& node
) {
1321 entry
= rb_entry(node
, struct btrfs_free_space
,
1324 } else if (entry
->bitmap
&& !node
) {
1328 rb_erase(&entry
->offset_index
, &block_group
->free_space_offset
);
1329 ret
= tree_insert_offset(&cluster
->root
, entry
->offset
,
1330 &entry
->offset_index
, 0);
1333 if (!node
|| entry
== last
)
1336 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1339 cluster
->max_size
= max_extent
;
1342 atomic_inc(&block_group
->count
);
1343 list_add_tail(&cluster
->block_group_list
, &block_group
->cluster_list
);
1344 cluster
->block_group
= block_group
;
1346 spin_unlock(&cluster
->lock
);
1347 spin_unlock(&block_group
->tree_lock
);
1353 * simple code to zero out a cluster
1355 void btrfs_init_free_cluster(struct btrfs_free_cluster
*cluster
)
1357 spin_lock_init(&cluster
->lock
);
1358 spin_lock_init(&cluster
->refill_lock
);
1359 cluster
->root
= RB_ROOT
;
1360 cluster
->max_size
= 0;
1361 cluster
->points_to_bitmap
= false;
1362 INIT_LIST_HEAD(&cluster
->block_group_list
);
1363 cluster
->block_group
= NULL
;