2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/math64.h>
23 #include "free-space-cache.h"
24 #include "transaction.h"
26 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
27 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29 static inline unsigned long offset_to_bit(u64 bitmap_start
, u64 sectorsize
,
32 BUG_ON(offset
< bitmap_start
);
33 offset
-= bitmap_start
;
34 return (unsigned long)(div64_u64(offset
, sectorsize
));
37 static inline unsigned long bytes_to_bits(u64 bytes
, u64 sectorsize
)
39 return (unsigned long)(div64_u64(bytes
, sectorsize
));
42 static inline u64
offset_to_bitmap(struct btrfs_block_group_cache
*block_group
,
48 bytes_per_bitmap
= BITS_PER_BITMAP
* block_group
->sectorsize
;
49 bitmap_start
= offset
- block_group
->key
.objectid
;
50 bitmap_start
= div64_u64(bitmap_start
, bytes_per_bitmap
);
51 bitmap_start
*= bytes_per_bitmap
;
52 bitmap_start
+= block_group
->key
.objectid
;
57 static int tree_insert_offset(struct rb_root
*root
, u64 offset
,
58 struct rb_node
*node
, int bitmap
)
60 struct rb_node
**p
= &root
->rb_node
;
61 struct rb_node
*parent
= NULL
;
62 struct btrfs_free_space
*info
;
66 info
= rb_entry(parent
, struct btrfs_free_space
, offset_index
);
68 if (offset
< info
->offset
) {
70 } else if (offset
> info
->offset
) {
74 * we could have a bitmap entry and an extent entry
75 * share the same offset. If this is the case, we want
76 * the extent entry to always be found first if we do a
77 * linear search through the tree, since we want to have
78 * the quickest allocation time, and allocating from an
79 * extent is faster than allocating from a bitmap. So
80 * if we're inserting a bitmap and we find an entry at
81 * this offset, we want to go right, or after this entry
82 * logically. If we are inserting an extent and we've
83 * found a bitmap, we want to go left, or before
87 WARN_ON(info
->bitmap
);
90 WARN_ON(!info
->bitmap
);
96 rb_link_node(node
, parent
, p
);
97 rb_insert_color(node
, root
);
103 * searches the tree for the given offset.
105 * fuzzy - If this is set, then we are trying to make an allocation, and we just
106 * want a section that has at least bytes size and comes at or after the given
109 static struct btrfs_free_space
*
110 tree_search_offset(struct btrfs_block_group_cache
*block_group
,
111 u64 offset
, int bitmap_only
, int fuzzy
)
113 struct rb_node
*n
= block_group
->free_space_offset
.rb_node
;
114 struct btrfs_free_space
*entry
, *prev
= NULL
;
116 /* find entry that is closest to the 'offset' */
123 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
126 if (offset
< entry
->offset
)
128 else if (offset
> entry
->offset
)
141 * bitmap entry and extent entry may share same offset,
142 * in that case, bitmap entry comes after extent entry.
147 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
148 if (entry
->offset
!= offset
)
151 WARN_ON(!entry
->bitmap
);
156 * if previous extent entry covers the offset,
157 * we should return it instead of the bitmap entry
159 n
= &entry
->offset_index
;
164 prev
= rb_entry(n
, struct btrfs_free_space
,
167 if (prev
->offset
+ prev
->bytes
> offset
)
179 /* find last entry before the 'offset' */
181 if (entry
->offset
> offset
) {
182 n
= rb_prev(&entry
->offset_index
);
184 entry
= rb_entry(n
, struct btrfs_free_space
,
186 BUG_ON(entry
->offset
> offset
);
196 n
= &entry
->offset_index
;
201 prev
= rb_entry(n
, struct btrfs_free_space
,
204 if (prev
->offset
+ prev
->bytes
> offset
)
209 if (entry
->offset
+ BITS_PER_BITMAP
*
210 block_group
->sectorsize
> offset
)
212 } else if (entry
->offset
+ entry
->bytes
> offset
)
220 if (entry
->offset
+ BITS_PER_BITMAP
*
221 block_group
->sectorsize
> offset
)
224 if (entry
->offset
+ entry
->bytes
> offset
)
228 n
= rb_next(&entry
->offset_index
);
231 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
236 static void unlink_free_space(struct btrfs_block_group_cache
*block_group
,
237 struct btrfs_free_space
*info
)
239 rb_erase(&info
->offset_index
, &block_group
->free_space_offset
);
240 block_group
->free_extents
--;
241 block_group
->free_space
-= info
->bytes
;
244 static int link_free_space(struct btrfs_block_group_cache
*block_group
,
245 struct btrfs_free_space
*info
)
249 BUG_ON(!info
->bitmap
&& !info
->bytes
);
250 ret
= tree_insert_offset(&block_group
->free_space_offset
, info
->offset
,
251 &info
->offset_index
, (info
->bitmap
!= NULL
));
255 block_group
->free_space
+= info
->bytes
;
256 block_group
->free_extents
++;
260 static void recalculate_thresholds(struct btrfs_block_group_cache
*block_group
)
267 * The goal is to keep the total amount of memory used per 1gb of space
268 * at or below 32k, so we need to adjust how much memory we allow to be
269 * used by extent based free space tracking
271 max_bytes
= MAX_CACHE_BYTES_PER_GIG
*
272 (div64_u64(block_group
->key
.offset
, 1024 * 1024 * 1024));
275 * we want to account for 1 more bitmap than what we have so we can make
276 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
277 * we add more bitmaps.
279 bitmap_bytes
= (block_group
->total_bitmaps
+ 1) * PAGE_CACHE_SIZE
;
281 if (bitmap_bytes
>= max_bytes
) {
282 block_group
->extents_thresh
= 0;
287 * we want the extent entry threshold to always be at most 1/2 the maxw
288 * bytes we can have, or whatever is less than that.
290 extent_bytes
= max_bytes
- bitmap_bytes
;
291 extent_bytes
= min_t(u64
, extent_bytes
, div64_u64(max_bytes
, 2));
293 block_group
->extents_thresh
=
294 div64_u64(extent_bytes
, (sizeof(struct btrfs_free_space
)));
297 static void bitmap_clear_bits(struct btrfs_block_group_cache
*block_group
,
298 struct btrfs_free_space
*info
, u64 offset
,
301 unsigned long start
, end
;
304 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
305 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
306 BUG_ON(end
> BITS_PER_BITMAP
);
308 for (i
= start
; i
< end
; i
++)
309 clear_bit(i
, info
->bitmap
);
311 info
->bytes
-= bytes
;
312 block_group
->free_space
-= bytes
;
315 static void bitmap_set_bits(struct btrfs_block_group_cache
*block_group
,
316 struct btrfs_free_space
*info
, u64 offset
,
319 unsigned long start
, end
;
322 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
323 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
324 BUG_ON(end
> BITS_PER_BITMAP
);
326 for (i
= start
; i
< end
; i
++)
327 set_bit(i
, info
->bitmap
);
329 info
->bytes
+= bytes
;
330 block_group
->free_space
+= bytes
;
333 static int search_bitmap(struct btrfs_block_group_cache
*block_group
,
334 struct btrfs_free_space
*bitmap_info
, u64
*offset
,
337 unsigned long found_bits
= 0;
338 unsigned long bits
, i
;
339 unsigned long next_zero
;
341 i
= offset_to_bit(bitmap_info
->offset
, block_group
->sectorsize
,
342 max_t(u64
, *offset
, bitmap_info
->offset
));
343 bits
= bytes_to_bits(*bytes
, block_group
->sectorsize
);
345 for (i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
);
347 i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
348 next_zero
= find_next_zero_bit(bitmap_info
->bitmap
,
350 if ((next_zero
- i
) >= bits
) {
351 found_bits
= next_zero
- i
;
358 *offset
= (u64
)(i
* block_group
->sectorsize
) +
360 *bytes
= (u64
)(found_bits
) * block_group
->sectorsize
;
367 static struct btrfs_free_space
*find_free_space(struct btrfs_block_group_cache
368 *block_group
, u64
*offset
,
369 u64
*bytes
, int debug
)
371 struct btrfs_free_space
*entry
;
372 struct rb_node
*node
;
375 if (!block_group
->free_space_offset
.rb_node
)
378 entry
= tree_search_offset(block_group
,
379 offset_to_bitmap(block_group
, *offset
),
384 for (node
= &entry
->offset_index
; node
; node
= rb_next(node
)) {
385 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
386 if (entry
->bytes
< *bytes
)
390 ret
= search_bitmap(block_group
, entry
, offset
, bytes
);
396 *offset
= entry
->offset
;
397 *bytes
= entry
->bytes
;
404 static void add_new_bitmap(struct btrfs_block_group_cache
*block_group
,
405 struct btrfs_free_space
*info
, u64 offset
)
407 u64 bytes_per_bg
= BITS_PER_BITMAP
* block_group
->sectorsize
;
408 int max_bitmaps
= (int)div64_u64(block_group
->key
.offset
+
409 bytes_per_bg
- 1, bytes_per_bg
);
410 BUG_ON(block_group
->total_bitmaps
>= max_bitmaps
);
412 info
->offset
= offset_to_bitmap(block_group
, offset
);
414 link_free_space(block_group
, info
);
415 block_group
->total_bitmaps
++;
417 recalculate_thresholds(block_group
);
420 static noinline
int remove_from_bitmap(struct btrfs_block_group_cache
*block_group
,
421 struct btrfs_free_space
*bitmap_info
,
422 u64
*offset
, u64
*bytes
)
425 u64 search_start
, search_bytes
;
429 end
= bitmap_info
->offset
+
430 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
) - 1;
433 * XXX - this can go away after a few releases.
435 * since the only user of btrfs_remove_free_space is the tree logging
436 * stuff, and the only way to test that is under crash conditions, we
437 * want to have this debug stuff here just in case somethings not
438 * working. Search the bitmap for the space we are trying to use to
439 * make sure its actually there. If its not there then we need to stop
440 * because something has gone wrong.
442 search_start
= *offset
;
443 search_bytes
= *bytes
;
444 ret
= search_bitmap(block_group
, bitmap_info
, &search_start
,
446 BUG_ON(ret
< 0 || search_start
!= *offset
);
448 if (*offset
> bitmap_info
->offset
&& *offset
+ *bytes
> end
) {
449 bitmap_clear_bits(block_group
, bitmap_info
, *offset
,
451 *bytes
-= end
- *offset
+ 1;
453 } else if (*offset
>= bitmap_info
->offset
&& *offset
+ *bytes
<= end
) {
454 bitmap_clear_bits(block_group
, bitmap_info
, *offset
, *bytes
);
459 struct rb_node
*next
= rb_next(&bitmap_info
->offset_index
);
460 if (!bitmap_info
->bytes
) {
461 unlink_free_space(block_group
, bitmap_info
);
462 kfree(bitmap_info
->bitmap
);
464 block_group
->total_bitmaps
--;
465 recalculate_thresholds(block_group
);
469 * no entry after this bitmap, but we still have bytes to
470 * remove, so something has gone wrong.
475 bitmap_info
= rb_entry(next
, struct btrfs_free_space
,
479 * if the next entry isn't a bitmap we need to return to let the
480 * extent stuff do its work.
482 if (!bitmap_info
->bitmap
)
486 * Ok the next item is a bitmap, but it may not actually hold
487 * the information for the rest of this free space stuff, so
488 * look for it, and if we don't find it return so we can try
489 * everything over again.
491 search_start
= *offset
;
492 search_bytes
= *bytes
;
493 ret
= search_bitmap(block_group
, bitmap_info
, &search_start
,
495 if (ret
< 0 || search_start
!= *offset
)
499 } else if (!bitmap_info
->bytes
) {
500 unlink_free_space(block_group
, bitmap_info
);
501 kfree(bitmap_info
->bitmap
);
503 block_group
->total_bitmaps
--;
504 recalculate_thresholds(block_group
);
510 static int insert_into_bitmap(struct btrfs_block_group_cache
*block_group
,
511 struct btrfs_free_space
*info
)
513 struct btrfs_free_space
*bitmap_info
;
515 u64 bytes
, offset
, end
;
519 * If we are below the extents threshold then we can add this as an
520 * extent, and don't have to deal with the bitmap
522 if (block_group
->free_extents
< block_group
->extents_thresh
&&
523 info
->bytes
> block_group
->sectorsize
* 4)
527 * some block groups are so tiny they can't be enveloped by a bitmap, so
528 * don't even bother to create a bitmap for this
530 if (BITS_PER_BITMAP
* block_group
->sectorsize
>
531 block_group
->key
.offset
)
535 offset
= info
->offset
;
538 bitmap_info
= tree_search_offset(block_group
,
539 offset_to_bitmap(block_group
, offset
),
546 end
= bitmap_info
->offset
+
547 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
);
549 if (offset
>= bitmap_info
->offset
&& offset
+ bytes
> end
) {
550 bitmap_set_bits(block_group
, bitmap_info
, offset
,
552 bytes
-= end
- offset
;
555 } else if (offset
>= bitmap_info
->offset
&& offset
+ bytes
<= end
) {
556 bitmap_set_bits(block_group
, bitmap_info
, offset
, bytes
);
569 if (info
&& info
->bitmap
) {
570 add_new_bitmap(block_group
, info
, offset
);
575 spin_unlock(&block_group
->tree_lock
);
577 /* no pre-allocated info, allocate a new one */
579 info
= kzalloc(sizeof(struct btrfs_free_space
),
582 spin_lock(&block_group
->tree_lock
);
588 /* allocate the bitmap */
589 info
->bitmap
= kzalloc(PAGE_CACHE_SIZE
, GFP_NOFS
);
590 spin_lock(&block_group
->tree_lock
);
608 int btrfs_add_free_space(struct btrfs_block_group_cache
*block_group
,
609 u64 offset
, u64 bytes
)
611 struct btrfs_free_space
*right_info
= NULL
;
612 struct btrfs_free_space
*left_info
= NULL
;
613 struct btrfs_free_space
*info
= NULL
;
616 info
= kzalloc(sizeof(struct btrfs_free_space
), GFP_NOFS
);
620 info
->offset
= offset
;
623 spin_lock(&block_group
->tree_lock
);
626 * first we want to see if there is free space adjacent to the range we
627 * are adding, if there is remove that struct and add a new one to
628 * cover the entire range
630 right_info
= tree_search_offset(block_group
, offset
+ bytes
, 0, 0);
631 if (right_info
&& rb_prev(&right_info
->offset_index
))
632 left_info
= rb_entry(rb_prev(&right_info
->offset_index
),
633 struct btrfs_free_space
, offset_index
);
635 left_info
= tree_search_offset(block_group
, offset
- 1, 0, 0);
638 * If there was no extent directly to the left or right of this new
639 * extent then we know we're going to have to allocate a new extent, so
640 * before we do that see if we need to drop this into a bitmap
642 if ((!left_info
|| left_info
->bitmap
) &&
643 (!right_info
|| right_info
->bitmap
)) {
644 ret
= insert_into_bitmap(block_group
, info
);
654 if (right_info
&& !right_info
->bitmap
) {
655 unlink_free_space(block_group
, right_info
);
656 info
->bytes
+= right_info
->bytes
;
660 if (left_info
&& !left_info
->bitmap
&&
661 left_info
->offset
+ left_info
->bytes
== offset
) {
662 unlink_free_space(block_group
, left_info
);
663 info
->offset
= left_info
->offset
;
664 info
->bytes
+= left_info
->bytes
;
668 ret
= link_free_space(block_group
, info
);
672 spin_unlock(&block_group
->tree_lock
);
675 printk(KERN_CRIT
"btrfs: unable to add free space :%d\n", ret
);
676 BUG_ON(ret
== -EEXIST
);
682 int btrfs_remove_free_space(struct btrfs_block_group_cache
*block_group
,
683 u64 offset
, u64 bytes
)
685 struct btrfs_free_space
*info
;
686 struct btrfs_free_space
*next_info
= NULL
;
689 spin_lock(&block_group
->tree_lock
);
692 info
= tree_search_offset(block_group
, offset
, 0, 0);
695 * oops didn't find an extent that matched the space we wanted
696 * to remove, look for a bitmap instead
698 info
= tree_search_offset(block_group
,
699 offset_to_bitmap(block_group
, offset
),
707 if (info
->bytes
< bytes
&& rb_next(&info
->offset_index
)) {
709 next_info
= rb_entry(rb_next(&info
->offset_index
),
710 struct btrfs_free_space
,
713 if (next_info
->bitmap
)
714 end
= next_info
->offset
+ BITS_PER_BITMAP
*
715 block_group
->sectorsize
- 1;
717 end
= next_info
->offset
+ next_info
->bytes
;
719 if (next_info
->bytes
< bytes
||
720 next_info
->offset
> offset
|| offset
> end
) {
721 printk(KERN_CRIT
"Found free space at %llu, size %llu,"
722 " trying to use %llu\n",
723 (unsigned long long)info
->offset
,
724 (unsigned long long)info
->bytes
,
725 (unsigned long long)bytes
);
734 if (info
->bytes
== bytes
) {
735 unlink_free_space(block_group
, info
);
738 block_group
->total_bitmaps
--;
744 if (!info
->bitmap
&& info
->offset
== offset
) {
745 unlink_free_space(block_group
, info
);
746 info
->offset
+= bytes
;
747 info
->bytes
-= bytes
;
748 link_free_space(block_group
, info
);
752 if (!info
->bitmap
&& info
->offset
<= offset
&&
753 info
->offset
+ info
->bytes
>= offset
+ bytes
) {
754 u64 old_start
= info
->offset
;
756 * we're freeing space in the middle of the info,
757 * this can happen during tree log replay
759 * first unlink the old info and then
760 * insert it again after the hole we're creating
762 unlink_free_space(block_group
, info
);
763 if (offset
+ bytes
< info
->offset
+ info
->bytes
) {
764 u64 old_end
= info
->offset
+ info
->bytes
;
766 info
->offset
= offset
+ bytes
;
767 info
->bytes
= old_end
- info
->offset
;
768 ret
= link_free_space(block_group
, info
);
773 /* the hole we're creating ends at the end
774 * of the info struct, just free the info
778 spin_unlock(&block_group
->tree_lock
);
780 /* step two, insert a new info struct to cover
781 * anything before the hole
783 ret
= btrfs_add_free_space(block_group
, old_start
,
789 ret
= remove_from_bitmap(block_group
, info
, &offset
, &bytes
);
794 spin_unlock(&block_group
->tree_lock
);
799 void btrfs_dump_free_space(struct btrfs_block_group_cache
*block_group
,
802 struct btrfs_free_space
*info
;
806 for (n
= rb_first(&block_group
->free_space_offset
); n
; n
= rb_next(n
)) {
807 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
808 if (info
->bytes
>= bytes
)
810 printk(KERN_CRIT
"entry offset %llu, bytes %llu, bitmap %s\n",
811 (unsigned long long)info
->offset
,
812 (unsigned long long)info
->bytes
,
813 (info
->bitmap
) ? "yes" : "no");
815 printk(KERN_INFO
"block group has cluster?: %s\n",
816 list_empty(&block_group
->cluster_list
) ? "no" : "yes");
817 printk(KERN_INFO
"%d blocks of free space at or bigger than bytes is"
821 u64
btrfs_block_group_free_space(struct btrfs_block_group_cache
*block_group
)
823 struct btrfs_free_space
*info
;
827 for (n
= rb_first(&block_group
->free_space_offset
); n
;
829 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
837 * for a given cluster, put all of its extents back into the free
838 * space cache. If the block group passed doesn't match the block group
839 * pointed to by the cluster, someone else raced in and freed the
840 * cluster already. In that case, we just return without changing anything
843 __btrfs_return_cluster_to_free_space(
844 struct btrfs_block_group_cache
*block_group
,
845 struct btrfs_free_cluster
*cluster
)
847 struct btrfs_free_space
*entry
;
848 struct rb_node
*node
;
851 spin_lock(&cluster
->lock
);
852 if (cluster
->block_group
!= block_group
)
855 bitmap
= cluster
->points_to_bitmap
;
856 cluster
->block_group
= NULL
;
857 cluster
->window_start
= 0;
858 list_del_init(&cluster
->block_group_list
);
859 cluster
->points_to_bitmap
= false;
864 node
= rb_first(&cluster
->root
);
866 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
867 node
= rb_next(&entry
->offset_index
);
868 rb_erase(&entry
->offset_index
, &cluster
->root
);
869 BUG_ON(entry
->bitmap
);
870 tree_insert_offset(&block_group
->free_space_offset
,
871 entry
->offset
, &entry
->offset_index
, 0);
873 cluster
->root
.rb_node
= NULL
;
876 spin_unlock(&cluster
->lock
);
877 btrfs_put_block_group(block_group
);
881 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group
)
883 struct btrfs_free_space
*info
;
884 struct rb_node
*node
;
885 struct btrfs_free_cluster
*cluster
;
886 struct list_head
*head
;
888 spin_lock(&block_group
->tree_lock
);
889 while ((head
= block_group
->cluster_list
.next
) !=
890 &block_group
->cluster_list
) {
891 cluster
= list_entry(head
, struct btrfs_free_cluster
,
894 WARN_ON(cluster
->block_group
!= block_group
);
895 __btrfs_return_cluster_to_free_space(block_group
, cluster
);
896 if (need_resched()) {
897 spin_unlock(&block_group
->tree_lock
);
899 spin_lock(&block_group
->tree_lock
);
903 while ((node
= rb_last(&block_group
->free_space_offset
)) != NULL
) {
904 info
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
905 unlink_free_space(block_group
, info
);
909 if (need_resched()) {
910 spin_unlock(&block_group
->tree_lock
);
912 spin_lock(&block_group
->tree_lock
);
916 spin_unlock(&block_group
->tree_lock
);
919 u64
btrfs_find_space_for_alloc(struct btrfs_block_group_cache
*block_group
,
920 u64 offset
, u64 bytes
, u64 empty_size
)
922 struct btrfs_free_space
*entry
= NULL
;
923 u64 bytes_search
= bytes
+ empty_size
;
926 spin_lock(&block_group
->tree_lock
);
927 entry
= find_free_space(block_group
, &offset
, &bytes_search
, 0);
933 bitmap_clear_bits(block_group
, entry
, offset
, bytes
);
935 unlink_free_space(block_group
, entry
);
936 kfree(entry
->bitmap
);
938 block_group
->total_bitmaps
--;
939 recalculate_thresholds(block_group
);
942 unlink_free_space(block_group
, entry
);
943 entry
->offset
+= bytes
;
944 entry
->bytes
-= bytes
;
948 link_free_space(block_group
, entry
);
952 spin_unlock(&block_group
->tree_lock
);
958 * given a cluster, put all of its extents back into the free space
959 * cache. If a block group is passed, this function will only free
960 * a cluster that belongs to the passed block group.
962 * Otherwise, it'll get a reference on the block group pointed to by the
963 * cluster and remove the cluster from it.
965 int btrfs_return_cluster_to_free_space(
966 struct btrfs_block_group_cache
*block_group
,
967 struct btrfs_free_cluster
*cluster
)
971 /* first, get a safe pointer to the block group */
972 spin_lock(&cluster
->lock
);
974 block_group
= cluster
->block_group
;
976 spin_unlock(&cluster
->lock
);
979 } else if (cluster
->block_group
!= block_group
) {
980 /* someone else has already freed it don't redo their work */
981 spin_unlock(&cluster
->lock
);
984 atomic_inc(&block_group
->count
);
985 spin_unlock(&cluster
->lock
);
987 /* now return any extents the cluster had on it */
988 spin_lock(&block_group
->tree_lock
);
989 ret
= __btrfs_return_cluster_to_free_space(block_group
, cluster
);
990 spin_unlock(&block_group
->tree_lock
);
992 /* finally drop our ref */
993 btrfs_put_block_group(block_group
);
997 static u64
btrfs_alloc_from_bitmap(struct btrfs_block_group_cache
*block_group
,
998 struct btrfs_free_cluster
*cluster
,
999 u64 bytes
, u64 min_start
)
1001 struct btrfs_free_space
*entry
;
1003 u64 search_start
= cluster
->window_start
;
1004 u64 search_bytes
= bytes
;
1007 spin_lock(&block_group
->tree_lock
);
1008 spin_lock(&cluster
->lock
);
1010 if (!cluster
->points_to_bitmap
)
1013 if (cluster
->block_group
!= block_group
)
1017 * search_start is the beginning of the bitmap, but at some point it may
1018 * be a good idea to point to the actual start of the free area in the
1019 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1020 * to 1 to make sure we get the bitmap entry
1022 entry
= tree_search_offset(block_group
,
1023 offset_to_bitmap(block_group
, search_start
),
1025 if (!entry
|| !entry
->bitmap
)
1028 search_start
= min_start
;
1029 search_bytes
= bytes
;
1031 err
= search_bitmap(block_group
, entry
, &search_start
,
1037 bitmap_clear_bits(block_group
, entry
, ret
, bytes
);
1039 spin_unlock(&cluster
->lock
);
1040 spin_unlock(&block_group
->tree_lock
);
1046 * given a cluster, try to allocate 'bytes' from it, returns 0
1047 * if it couldn't find anything suitably large, or a logical disk offset
1048 * if things worked out
1050 u64
btrfs_alloc_from_cluster(struct btrfs_block_group_cache
*block_group
,
1051 struct btrfs_free_cluster
*cluster
, u64 bytes
,
1054 struct btrfs_free_space
*entry
= NULL
;
1055 struct rb_node
*node
;
1058 if (cluster
->points_to_bitmap
)
1059 return btrfs_alloc_from_bitmap(block_group
, cluster
, bytes
,
1062 spin_lock(&cluster
->lock
);
1063 if (bytes
> cluster
->max_size
)
1066 if (cluster
->block_group
!= block_group
)
1069 node
= rb_first(&cluster
->root
);
1073 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1076 if (entry
->bytes
< bytes
|| entry
->offset
< min_start
) {
1077 struct rb_node
*node
;
1079 node
= rb_next(&entry
->offset_index
);
1082 entry
= rb_entry(node
, struct btrfs_free_space
,
1086 ret
= entry
->offset
;
1088 entry
->offset
+= bytes
;
1089 entry
->bytes
-= bytes
;
1091 if (entry
->bytes
== 0) {
1092 rb_erase(&entry
->offset_index
, &cluster
->root
);
1098 spin_unlock(&cluster
->lock
);
1103 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache
*block_group
,
1104 struct btrfs_free_space
*entry
,
1105 struct btrfs_free_cluster
*cluster
,
1106 u64 offset
, u64 bytes
, u64 min_bytes
)
1108 unsigned long next_zero
;
1110 unsigned long search_bits
;
1111 unsigned long total_bits
;
1112 unsigned long found_bits
;
1113 unsigned long start
= 0;
1114 unsigned long total_found
= 0;
1117 i
= offset_to_bit(entry
->offset
, block_group
->sectorsize
,
1118 max_t(u64
, offset
, entry
->offset
));
1119 search_bits
= bytes_to_bits(min_bytes
, block_group
->sectorsize
);
1120 total_bits
= bytes_to_bits(bytes
, block_group
->sectorsize
);
1124 for (i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
);
1125 i
< BITS_PER_BITMAP
;
1126 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
1127 next_zero
= find_next_zero_bit(entry
->bitmap
,
1128 BITS_PER_BITMAP
, i
);
1129 if (next_zero
- i
>= search_bits
) {
1130 found_bits
= next_zero
- i
;
1144 total_found
+= found_bits
;
1146 if (cluster
->max_size
< found_bits
* block_group
->sectorsize
)
1147 cluster
->max_size
= found_bits
* block_group
->sectorsize
;
1149 if (total_found
< total_bits
) {
1150 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, next_zero
);
1151 if (i
- start
> total_bits
* 2) {
1153 cluster
->max_size
= 0;
1159 cluster
->window_start
= start
* block_group
->sectorsize
+
1161 cluster
->points_to_bitmap
= true;
1167 * here we try to find a cluster of blocks in a block group. The goal
1168 * is to find at least bytes free and up to empty_size + bytes free.
1169 * We might not find them all in one contiguous area.
1171 * returns zero and sets up cluster if things worked out, otherwise
1172 * it returns -enospc
1174 int btrfs_find_space_cluster(struct btrfs_trans_handle
*trans
,
1175 struct btrfs_root
*root
,
1176 struct btrfs_block_group_cache
*block_group
,
1177 struct btrfs_free_cluster
*cluster
,
1178 u64 offset
, u64 bytes
, u64 empty_size
)
1180 struct btrfs_free_space
*entry
= NULL
;
1181 struct rb_node
*node
;
1182 struct btrfs_free_space
*next
;
1183 struct btrfs_free_space
*last
= NULL
;
1188 bool found_bitmap
= false;
1191 /* for metadata, allow allocates with more holes */
1192 if (btrfs_test_opt(root
, SSD_SPREAD
)) {
1193 min_bytes
= bytes
+ empty_size
;
1194 } else if (block_group
->flags
& BTRFS_BLOCK_GROUP_METADATA
) {
1196 * we want to do larger allocations when we are
1197 * flushing out the delayed refs, it helps prevent
1198 * making more work as we go along.
1200 if (trans
->transaction
->delayed_refs
.flushing
)
1201 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 1);
1203 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 4);
1205 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 2);
1207 spin_lock(&block_group
->tree_lock
);
1208 spin_lock(&cluster
->lock
);
1210 /* someone already found a cluster, hooray */
1211 if (cluster
->block_group
) {
1216 entry
= tree_search_offset(block_group
, offset
, found_bitmap
, 1);
1223 * If found_bitmap is true, we exhausted our search for extent entries,
1224 * and we just want to search all of the bitmaps that we can find, and
1225 * ignore any extent entries we find.
1227 while (entry
->bitmap
|| found_bitmap
||
1228 (!entry
->bitmap
&& entry
->bytes
< min_bytes
)) {
1229 struct rb_node
*node
= rb_next(&entry
->offset_index
);
1231 if (entry
->bitmap
&& entry
->bytes
> bytes
+ empty_size
) {
1232 ret
= btrfs_bitmap_cluster(block_group
, entry
, cluster
,
1233 offset
, bytes
+ empty_size
,
1243 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1247 * We already searched all the extent entries from the passed in offset
1248 * to the end and didn't find enough space for the cluster, and we also
1249 * didn't find any bitmaps that met our criteria, just go ahead and exit
1256 cluster
->points_to_bitmap
= false;
1257 window_start
= entry
->offset
;
1258 window_free
= entry
->bytes
;
1260 max_extent
= entry
->bytes
;
1263 /* out window is just right, lets fill it */
1264 if (window_free
>= bytes
+ empty_size
)
1267 node
= rb_next(&last
->offset_index
);
1274 next
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1277 * we found a bitmap, so if this search doesn't result in a
1278 * cluster, we know to go and search again for the bitmaps and
1279 * start looking for space there
1283 offset
= next
->offset
;
1284 found_bitmap
= true;
1290 * we haven't filled the empty size and the window is
1291 * very large. reset and try again
1293 if (next
->offset
- (last
->offset
+ last
->bytes
) > 128 * 1024 ||
1294 next
->offset
- window_start
> (bytes
+ empty_size
) * 2) {
1296 window_start
= entry
->offset
;
1297 window_free
= entry
->bytes
;
1299 max_extent
= entry
->bytes
;
1302 window_free
+= next
->bytes
;
1303 if (entry
->bytes
> max_extent
)
1304 max_extent
= entry
->bytes
;
1308 cluster
->window_start
= entry
->offset
;
1311 * now we've found our entries, pull them out of the free space
1312 * cache and put them into the cluster rbtree
1314 * The cluster includes an rbtree, but only uses the offset index
1315 * of each free space cache entry.
1318 node
= rb_next(&entry
->offset_index
);
1319 if (entry
->bitmap
&& node
) {
1320 entry
= rb_entry(node
, struct btrfs_free_space
,
1323 } else if (entry
->bitmap
&& !node
) {
1327 rb_erase(&entry
->offset_index
, &block_group
->free_space_offset
);
1328 ret
= tree_insert_offset(&cluster
->root
, entry
->offset
,
1329 &entry
->offset_index
, 0);
1332 if (!node
|| entry
== last
)
1335 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1338 cluster
->max_size
= max_extent
;
1341 atomic_inc(&block_group
->count
);
1342 list_add_tail(&cluster
->block_group_list
, &block_group
->cluster_list
);
1343 cluster
->block_group
= block_group
;
1345 spin_unlock(&cluster
->lock
);
1346 spin_unlock(&block_group
->tree_lock
);
1352 * simple code to zero out a cluster
1354 void btrfs_init_free_cluster(struct btrfs_free_cluster
*cluster
)
1356 spin_lock_init(&cluster
->lock
);
1357 spin_lock_init(&cluster
->refill_lock
);
1358 cluster
->root
.rb_node
= NULL
;
1359 cluster
->max_size
= 0;
1360 cluster
->points_to_bitmap
= false;
1361 INIT_LIST_HEAD(&cluster
->block_group_list
);
1362 cluster
->block_group
= NULL
;