2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/math64.h>
23 #include "free-space-cache.h"
24 #include "transaction.h"
26 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
27 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29 static inline unsigned long offset_to_bit(u64 bitmap_start
, u64 sectorsize
,
32 BUG_ON(offset
< bitmap_start
);
33 offset
-= bitmap_start
;
34 return (unsigned long)(div64_u64(offset
, sectorsize
));
37 static inline unsigned long bytes_to_bits(u64 bytes
, u64 sectorsize
)
39 return (unsigned long)(div64_u64(bytes
, sectorsize
));
42 static inline u64
offset_to_bitmap(struct btrfs_block_group_cache
*block_group
,
48 bytes_per_bitmap
= BITS_PER_BITMAP
* block_group
->sectorsize
;
49 bitmap_start
= offset
- block_group
->key
.objectid
;
50 bitmap_start
= div64_u64(bitmap_start
, bytes_per_bitmap
);
51 bitmap_start
*= bytes_per_bitmap
;
52 bitmap_start
+= block_group
->key
.objectid
;
57 static int tree_insert_offset(struct rb_root
*root
, u64 offset
,
58 struct rb_node
*node
, int bitmap
)
60 struct rb_node
**p
= &root
->rb_node
;
61 struct rb_node
*parent
= NULL
;
62 struct btrfs_free_space
*info
;
66 info
= rb_entry(parent
, struct btrfs_free_space
, offset_index
);
68 if (offset
< info
->offset
) {
70 } else if (offset
> info
->offset
) {
74 * we could have a bitmap entry and an extent entry
75 * share the same offset. If this is the case, we want
76 * the extent entry to always be found first if we do a
77 * linear search through the tree, since we want to have
78 * the quickest allocation time, and allocating from an
79 * extent is faster than allocating from a bitmap. So
80 * if we're inserting a bitmap and we find an entry at
81 * this offset, we want to go right, or after this entry
82 * logically. If we are inserting an extent and we've
83 * found a bitmap, we want to go left, or before
87 WARN_ON(info
->bitmap
);
90 WARN_ON(!info
->bitmap
);
96 rb_link_node(node
, parent
, p
);
97 rb_insert_color(node
, root
);
103 * searches the tree for the given offset.
105 * fuzzy - If this is set, then we are trying to make an allocation, and we just
106 * want a section that has at least bytes size and comes at or after the given
109 static struct btrfs_free_space
*
110 tree_search_offset(struct btrfs_block_group_cache
*block_group
,
111 u64 offset
, int bitmap_only
, int fuzzy
)
113 struct rb_node
*n
= block_group
->free_space_offset
.rb_node
;
114 struct btrfs_free_space
*entry
, *prev
= NULL
;
116 /* find entry that is closest to the 'offset' */
123 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
126 if (offset
< entry
->offset
)
128 else if (offset
> entry
->offset
)
141 * bitmap entry and extent entry may share same offset,
142 * in that case, bitmap entry comes after extent entry.
147 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
148 if (entry
->offset
!= offset
)
151 WARN_ON(!entry
->bitmap
);
156 * if previous extent entry covers the offset,
157 * we should return it instead of the bitmap entry
159 n
= &entry
->offset_index
;
164 prev
= rb_entry(n
, struct btrfs_free_space
,
167 if (prev
->offset
+ prev
->bytes
> offset
)
179 /* find last entry before the 'offset' */
181 if (entry
->offset
> offset
) {
182 n
= rb_prev(&entry
->offset_index
);
184 entry
= rb_entry(n
, struct btrfs_free_space
,
186 BUG_ON(entry
->offset
> offset
);
196 n
= &entry
->offset_index
;
201 prev
= rb_entry(n
, struct btrfs_free_space
,
204 if (prev
->offset
+ prev
->bytes
> offset
)
209 if (entry
->offset
+ BITS_PER_BITMAP
*
210 block_group
->sectorsize
> offset
)
212 } else if (entry
->offset
+ entry
->bytes
> offset
)
220 if (entry
->offset
+ BITS_PER_BITMAP
*
221 block_group
->sectorsize
> offset
)
224 if (entry
->offset
+ entry
->bytes
> offset
)
228 n
= rb_next(&entry
->offset_index
);
231 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
236 static void unlink_free_space(struct btrfs_block_group_cache
*block_group
,
237 struct btrfs_free_space
*info
)
239 rb_erase(&info
->offset_index
, &block_group
->free_space_offset
);
240 block_group
->free_extents
--;
241 block_group
->free_space
-= info
->bytes
;
244 static int link_free_space(struct btrfs_block_group_cache
*block_group
,
245 struct btrfs_free_space
*info
)
249 BUG_ON(!info
->bitmap
&& !info
->bytes
);
250 ret
= tree_insert_offset(&block_group
->free_space_offset
, info
->offset
,
251 &info
->offset_index
, (info
->bitmap
!= NULL
));
255 block_group
->free_space
+= info
->bytes
;
256 block_group
->free_extents
++;
260 static void recalculate_thresholds(struct btrfs_block_group_cache
*block_group
)
262 u64 max_bytes
, possible_bytes
;
265 * The goal is to keep the total amount of memory used per 1gb of space
266 * at or below 32k, so we need to adjust how much memory we allow to be
267 * used by extent based free space tracking
269 max_bytes
= MAX_CACHE_BYTES_PER_GIG
*
270 (div64_u64(block_group
->key
.offset
, 1024 * 1024 * 1024));
272 possible_bytes
= (block_group
->total_bitmaps
* PAGE_CACHE_SIZE
) +
273 (sizeof(struct btrfs_free_space
) *
274 block_group
->extents_thresh
);
276 if (possible_bytes
> max_bytes
) {
277 int extent_bytes
= max_bytes
-
278 (block_group
->total_bitmaps
* PAGE_CACHE_SIZE
);
280 if (extent_bytes
<= 0) {
281 block_group
->extents_thresh
= 0;
285 block_group
->extents_thresh
= extent_bytes
/
286 (sizeof(struct btrfs_free_space
));
290 static void bitmap_clear_bits(struct btrfs_block_group_cache
*block_group
,
291 struct btrfs_free_space
*info
, u64 offset
,
294 unsigned long start
, end
;
297 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
298 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
299 BUG_ON(end
> BITS_PER_BITMAP
);
301 for (i
= start
; i
< end
; i
++)
302 clear_bit(i
, info
->bitmap
);
304 info
->bytes
-= bytes
;
305 block_group
->free_space
-= bytes
;
308 static void bitmap_set_bits(struct btrfs_block_group_cache
*block_group
,
309 struct btrfs_free_space
*info
, u64 offset
,
312 unsigned long start
, end
;
315 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
316 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
317 BUG_ON(end
> BITS_PER_BITMAP
);
319 for (i
= start
; i
< end
; i
++)
320 set_bit(i
, info
->bitmap
);
322 info
->bytes
+= bytes
;
323 block_group
->free_space
+= bytes
;
326 static int search_bitmap(struct btrfs_block_group_cache
*block_group
,
327 struct btrfs_free_space
*bitmap_info
, u64
*offset
,
330 unsigned long found_bits
= 0;
331 unsigned long bits
, i
;
332 unsigned long next_zero
;
334 i
= offset_to_bit(bitmap_info
->offset
, block_group
->sectorsize
,
335 max_t(u64
, *offset
, bitmap_info
->offset
));
336 bits
= bytes_to_bits(*bytes
, block_group
->sectorsize
);
338 for (i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
);
340 i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
341 next_zero
= find_next_zero_bit(bitmap_info
->bitmap
,
343 if ((next_zero
- i
) >= bits
) {
344 found_bits
= next_zero
- i
;
351 *offset
= (u64
)(i
* block_group
->sectorsize
) +
353 *bytes
= (u64
)(found_bits
) * block_group
->sectorsize
;
360 static struct btrfs_free_space
*find_free_space(struct btrfs_block_group_cache
361 *block_group
, u64
*offset
,
362 u64
*bytes
, int debug
)
364 struct btrfs_free_space
*entry
;
365 struct rb_node
*node
;
368 if (!block_group
->free_space_offset
.rb_node
)
371 entry
= tree_search_offset(block_group
,
372 offset_to_bitmap(block_group
, *offset
),
377 for (node
= &entry
->offset_index
; node
; node
= rb_next(node
)) {
378 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
379 if (entry
->bytes
< *bytes
)
383 ret
= search_bitmap(block_group
, entry
, offset
, bytes
);
389 *offset
= entry
->offset
;
390 *bytes
= entry
->bytes
;
397 static void add_new_bitmap(struct btrfs_block_group_cache
*block_group
,
398 struct btrfs_free_space
*info
, u64 offset
)
400 u64 bytes_per_bg
= BITS_PER_BITMAP
* block_group
->sectorsize
;
401 int max_bitmaps
= (int)div64_u64(block_group
->key
.offset
+
402 bytes_per_bg
- 1, bytes_per_bg
);
403 BUG_ON(block_group
->total_bitmaps
>= max_bitmaps
);
405 info
->offset
= offset_to_bitmap(block_group
, offset
);
406 link_free_space(block_group
, info
);
407 block_group
->total_bitmaps
++;
409 recalculate_thresholds(block_group
);
412 static noinline
int remove_from_bitmap(struct btrfs_block_group_cache
*block_group
,
413 struct btrfs_free_space
*bitmap_info
,
414 u64
*offset
, u64
*bytes
)
417 u64 search_start
, search_bytes
;
421 end
= bitmap_info
->offset
+
422 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
) - 1;
425 * XXX - this can go away after a few releases.
427 * since the only user of btrfs_remove_free_space is the tree logging
428 * stuff, and the only way to test that is under crash conditions, we
429 * want to have this debug stuff here just in case somethings not
430 * working. Search the bitmap for the space we are trying to use to
431 * make sure its actually there. If its not there then we need to stop
432 * because something has gone wrong.
434 search_start
= *offset
;
435 search_bytes
= *bytes
;
436 ret
= search_bitmap(block_group
, bitmap_info
, &search_start
,
438 BUG_ON(ret
< 0 || search_start
!= *offset
);
440 if (*offset
> bitmap_info
->offset
&& *offset
+ *bytes
> end
) {
441 bitmap_clear_bits(block_group
, bitmap_info
, *offset
,
443 *bytes
-= end
- *offset
+ 1;
445 } else if (*offset
>= bitmap_info
->offset
&& *offset
+ *bytes
<= end
) {
446 bitmap_clear_bits(block_group
, bitmap_info
, *offset
, *bytes
);
451 struct rb_node
*next
= rb_next(&bitmap_info
->offset_index
);
452 if (!bitmap_info
->bytes
) {
453 unlink_free_space(block_group
, bitmap_info
);
454 kfree(bitmap_info
->bitmap
);
456 block_group
->total_bitmaps
--;
457 recalculate_thresholds(block_group
);
461 * no entry after this bitmap, but we still have bytes to
462 * remove, so something has gone wrong.
467 bitmap_info
= rb_entry(next
, struct btrfs_free_space
,
471 * if the next entry isn't a bitmap we need to return to let the
472 * extent stuff do its work.
474 if (!bitmap_info
->bitmap
)
478 * Ok the next item is a bitmap, but it may not actually hold
479 * the information for the rest of this free space stuff, so
480 * look for it, and if we don't find it return so we can try
481 * everything over again.
483 search_start
= *offset
;
484 search_bytes
= *bytes
;
485 ret
= search_bitmap(block_group
, bitmap_info
, &search_start
,
487 if (ret
< 0 || search_start
!= *offset
)
491 } else if (!bitmap_info
->bytes
) {
492 unlink_free_space(block_group
, bitmap_info
);
493 kfree(bitmap_info
->bitmap
);
495 block_group
->total_bitmaps
--;
496 recalculate_thresholds(block_group
);
502 static int insert_into_bitmap(struct btrfs_block_group_cache
*block_group
,
503 struct btrfs_free_space
*info
)
505 struct btrfs_free_space
*bitmap_info
;
507 u64 bytes
, offset
, end
;
511 * If we are below the extents threshold then we can add this as an
512 * extent, and don't have to deal with the bitmap
514 if (block_group
->free_extents
< block_group
->extents_thresh
&&
515 info
->bytes
> block_group
->sectorsize
* 4)
519 * some block groups are so tiny they can't be enveloped by a bitmap, so
520 * don't even bother to create a bitmap for this
522 if (BITS_PER_BITMAP
* block_group
->sectorsize
>
523 block_group
->key
.offset
)
527 offset
= info
->offset
;
530 bitmap_info
= tree_search_offset(block_group
,
531 offset_to_bitmap(block_group
, offset
),
538 end
= bitmap_info
->offset
+
539 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
);
541 if (offset
>= bitmap_info
->offset
&& offset
+ bytes
> end
) {
542 bitmap_set_bits(block_group
, bitmap_info
, offset
,
544 bytes
-= end
- offset
;
547 } else if (offset
>= bitmap_info
->offset
&& offset
+ bytes
<= end
) {
548 bitmap_set_bits(block_group
, bitmap_info
, offset
, bytes
);
561 if (info
&& info
->bitmap
) {
562 add_new_bitmap(block_group
, info
, offset
);
567 spin_unlock(&block_group
->tree_lock
);
569 /* no pre-allocated info, allocate a new one */
571 info
= kzalloc(sizeof(struct btrfs_free_space
),
574 spin_lock(&block_group
->tree_lock
);
580 /* allocate the bitmap */
581 info
->bitmap
= kzalloc(PAGE_CACHE_SIZE
, GFP_NOFS
);
582 spin_lock(&block_group
->tree_lock
);
600 int btrfs_add_free_space(struct btrfs_block_group_cache
*block_group
,
601 u64 offset
, u64 bytes
)
603 struct btrfs_free_space
*right_info
= NULL
;
604 struct btrfs_free_space
*left_info
= NULL
;
605 struct btrfs_free_space
*info
= NULL
;
608 info
= kzalloc(sizeof(struct btrfs_free_space
), GFP_NOFS
);
612 info
->offset
= offset
;
615 spin_lock(&block_group
->tree_lock
);
618 * first we want to see if there is free space adjacent to the range we
619 * are adding, if there is remove that struct and add a new one to
620 * cover the entire range
622 right_info
= tree_search_offset(block_group
, offset
+ bytes
, 0, 0);
623 if (right_info
&& rb_prev(&right_info
->offset_index
))
624 left_info
= rb_entry(rb_prev(&right_info
->offset_index
),
625 struct btrfs_free_space
, offset_index
);
627 left_info
= tree_search_offset(block_group
, offset
- 1, 0, 0);
630 * If there was no extent directly to the left or right of this new
631 * extent then we know we're going to have to allocate a new extent, so
632 * before we do that see if we need to drop this into a bitmap
634 if ((!left_info
|| left_info
->bitmap
) &&
635 (!right_info
|| right_info
->bitmap
)) {
636 ret
= insert_into_bitmap(block_group
, info
);
646 if (right_info
&& !right_info
->bitmap
) {
647 unlink_free_space(block_group
, right_info
);
648 info
->bytes
+= right_info
->bytes
;
652 if (left_info
&& !left_info
->bitmap
&&
653 left_info
->offset
+ left_info
->bytes
== offset
) {
654 unlink_free_space(block_group
, left_info
);
655 info
->offset
= left_info
->offset
;
656 info
->bytes
+= left_info
->bytes
;
660 ret
= link_free_space(block_group
, info
);
664 spin_unlock(&block_group
->tree_lock
);
667 printk(KERN_CRIT
"btrfs: unable to add free space :%d\n", ret
);
668 BUG_ON(ret
== -EEXIST
);
674 int btrfs_remove_free_space(struct btrfs_block_group_cache
*block_group
,
675 u64 offset
, u64 bytes
)
677 struct btrfs_free_space
*info
;
678 struct btrfs_free_space
*next_info
= NULL
;
681 spin_lock(&block_group
->tree_lock
);
684 info
= tree_search_offset(block_group
, offset
, 0, 0);
687 * oops didn't find an extent that matched the space we wanted
688 * to remove, look for a bitmap instead
690 info
= tree_search_offset(block_group
,
691 offset_to_bitmap(block_group
, offset
),
699 if (info
->bytes
< bytes
&& rb_next(&info
->offset_index
)) {
701 next_info
= rb_entry(rb_next(&info
->offset_index
),
702 struct btrfs_free_space
,
705 if (next_info
->bitmap
)
706 end
= next_info
->offset
+ BITS_PER_BITMAP
*
707 block_group
->sectorsize
- 1;
709 end
= next_info
->offset
+ next_info
->bytes
;
711 if (next_info
->bytes
< bytes
||
712 next_info
->offset
> offset
|| offset
> end
) {
713 printk(KERN_CRIT
"Found free space at %llu, size %llu,"
714 " trying to use %llu\n",
715 (unsigned long long)info
->offset
,
716 (unsigned long long)info
->bytes
,
717 (unsigned long long)bytes
);
726 if (info
->bytes
== bytes
) {
727 unlink_free_space(block_group
, info
);
730 block_group
->total_bitmaps
--;
736 if (!info
->bitmap
&& info
->offset
== offset
) {
737 unlink_free_space(block_group
, info
);
738 info
->offset
+= bytes
;
739 info
->bytes
-= bytes
;
740 link_free_space(block_group
, info
);
744 if (!info
->bitmap
&& info
->offset
<= offset
&&
745 info
->offset
+ info
->bytes
>= offset
+ bytes
) {
746 u64 old_start
= info
->offset
;
748 * we're freeing space in the middle of the info,
749 * this can happen during tree log replay
751 * first unlink the old info and then
752 * insert it again after the hole we're creating
754 unlink_free_space(block_group
, info
);
755 if (offset
+ bytes
< info
->offset
+ info
->bytes
) {
756 u64 old_end
= info
->offset
+ info
->bytes
;
758 info
->offset
= offset
+ bytes
;
759 info
->bytes
= old_end
- info
->offset
;
760 ret
= link_free_space(block_group
, info
);
765 /* the hole we're creating ends at the end
766 * of the info struct, just free the info
770 spin_unlock(&block_group
->tree_lock
);
772 /* step two, insert a new info struct to cover
773 * anything before the hole
775 ret
= btrfs_add_free_space(block_group
, old_start
,
781 ret
= remove_from_bitmap(block_group
, info
, &offset
, &bytes
);
786 spin_unlock(&block_group
->tree_lock
);
791 void btrfs_dump_free_space(struct btrfs_block_group_cache
*block_group
,
794 struct btrfs_free_space
*info
;
798 for (n
= rb_first(&block_group
->free_space_offset
); n
; n
= rb_next(n
)) {
799 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
800 if (info
->bytes
>= bytes
)
802 printk(KERN_CRIT
"entry offset %llu, bytes %llu, bitmap %s\n",
803 (unsigned long long)info
->offset
,
804 (unsigned long long)info
->bytes
,
805 (info
->bitmap
) ? "yes" : "no");
807 printk(KERN_INFO
"block group has cluster?: %s\n",
808 list_empty(&block_group
->cluster_list
) ? "no" : "yes");
809 printk(KERN_INFO
"%d blocks of free space at or bigger than bytes is"
813 u64
btrfs_block_group_free_space(struct btrfs_block_group_cache
*block_group
)
815 struct btrfs_free_space
*info
;
819 for (n
= rb_first(&block_group
->free_space_offset
); n
;
821 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
829 * for a given cluster, put all of its extents back into the free
830 * space cache. If the block group passed doesn't match the block group
831 * pointed to by the cluster, someone else raced in and freed the
832 * cluster already. In that case, we just return without changing anything
835 __btrfs_return_cluster_to_free_space(
836 struct btrfs_block_group_cache
*block_group
,
837 struct btrfs_free_cluster
*cluster
)
839 struct btrfs_free_space
*entry
;
840 struct rb_node
*node
;
843 spin_lock(&cluster
->lock
);
844 if (cluster
->block_group
!= block_group
)
847 bitmap
= cluster
->points_to_bitmap
;
848 cluster
->block_group
= NULL
;
849 cluster
->window_start
= 0;
850 list_del_init(&cluster
->block_group_list
);
851 cluster
->points_to_bitmap
= false;
856 node
= rb_first(&cluster
->root
);
858 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
859 node
= rb_next(&entry
->offset_index
);
860 rb_erase(&entry
->offset_index
, &cluster
->root
);
861 BUG_ON(entry
->bitmap
);
862 tree_insert_offset(&block_group
->free_space_offset
,
863 entry
->offset
, &entry
->offset_index
, 0);
865 cluster
->root
.rb_node
= NULL
;
868 spin_unlock(&cluster
->lock
);
869 btrfs_put_block_group(block_group
);
873 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group
)
875 struct btrfs_free_space
*info
;
876 struct rb_node
*node
;
877 struct btrfs_free_cluster
*cluster
;
878 struct list_head
*head
;
880 spin_lock(&block_group
->tree_lock
);
881 while ((head
= block_group
->cluster_list
.next
) !=
882 &block_group
->cluster_list
) {
883 cluster
= list_entry(head
, struct btrfs_free_cluster
,
886 WARN_ON(cluster
->block_group
!= block_group
);
887 __btrfs_return_cluster_to_free_space(block_group
, cluster
);
888 if (need_resched()) {
889 spin_unlock(&block_group
->tree_lock
);
891 spin_lock(&block_group
->tree_lock
);
895 while ((node
= rb_last(&block_group
->free_space_offset
)) != NULL
) {
896 info
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
897 unlink_free_space(block_group
, info
);
901 if (need_resched()) {
902 spin_unlock(&block_group
->tree_lock
);
904 spin_lock(&block_group
->tree_lock
);
908 spin_unlock(&block_group
->tree_lock
);
911 u64
btrfs_find_space_for_alloc(struct btrfs_block_group_cache
*block_group
,
912 u64 offset
, u64 bytes
, u64 empty_size
)
914 struct btrfs_free_space
*entry
= NULL
;
915 u64 bytes_search
= bytes
+ empty_size
;
918 spin_lock(&block_group
->tree_lock
);
919 entry
= find_free_space(block_group
, &offset
, &bytes_search
, 0);
925 bitmap_clear_bits(block_group
, entry
, offset
, bytes
);
927 unlink_free_space(block_group
, entry
);
928 kfree(entry
->bitmap
);
930 block_group
->total_bitmaps
--;
931 recalculate_thresholds(block_group
);
934 unlink_free_space(block_group
, entry
);
935 entry
->offset
+= bytes
;
936 entry
->bytes
-= bytes
;
940 link_free_space(block_group
, entry
);
944 spin_unlock(&block_group
->tree_lock
);
950 * given a cluster, put all of its extents back into the free space
951 * cache. If a block group is passed, this function will only free
952 * a cluster that belongs to the passed block group.
954 * Otherwise, it'll get a reference on the block group pointed to by the
955 * cluster and remove the cluster from it.
957 int btrfs_return_cluster_to_free_space(
958 struct btrfs_block_group_cache
*block_group
,
959 struct btrfs_free_cluster
*cluster
)
963 /* first, get a safe pointer to the block group */
964 spin_lock(&cluster
->lock
);
966 block_group
= cluster
->block_group
;
968 spin_unlock(&cluster
->lock
);
971 } else if (cluster
->block_group
!= block_group
) {
972 /* someone else has already freed it don't redo their work */
973 spin_unlock(&cluster
->lock
);
976 atomic_inc(&block_group
->count
);
977 spin_unlock(&cluster
->lock
);
979 /* now return any extents the cluster had on it */
980 spin_lock(&block_group
->tree_lock
);
981 ret
= __btrfs_return_cluster_to_free_space(block_group
, cluster
);
982 spin_unlock(&block_group
->tree_lock
);
984 /* finally drop our ref */
985 btrfs_put_block_group(block_group
);
989 static u64
btrfs_alloc_from_bitmap(struct btrfs_block_group_cache
*block_group
,
990 struct btrfs_free_cluster
*cluster
,
991 u64 bytes
, u64 min_start
)
993 struct btrfs_free_space
*entry
;
995 u64 search_start
= cluster
->window_start
;
996 u64 search_bytes
= bytes
;
999 spin_lock(&block_group
->tree_lock
);
1000 spin_lock(&cluster
->lock
);
1002 if (!cluster
->points_to_bitmap
)
1005 if (cluster
->block_group
!= block_group
)
1009 * search_start is the beginning of the bitmap, but at some point it may
1010 * be a good idea to point to the actual start of the free area in the
1011 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1012 * to 1 to make sure we get the bitmap entry
1014 entry
= tree_search_offset(block_group
,
1015 offset_to_bitmap(block_group
, search_start
),
1017 if (!entry
|| !entry
->bitmap
)
1020 search_start
= min_start
;
1021 search_bytes
= bytes
;
1023 err
= search_bitmap(block_group
, entry
, &search_start
,
1029 bitmap_clear_bits(block_group
, entry
, ret
, bytes
);
1031 spin_unlock(&cluster
->lock
);
1032 spin_unlock(&block_group
->tree_lock
);
1038 * given a cluster, try to allocate 'bytes' from it, returns 0
1039 * if it couldn't find anything suitably large, or a logical disk offset
1040 * if things worked out
1042 u64
btrfs_alloc_from_cluster(struct btrfs_block_group_cache
*block_group
,
1043 struct btrfs_free_cluster
*cluster
, u64 bytes
,
1046 struct btrfs_free_space
*entry
= NULL
;
1047 struct rb_node
*node
;
1050 if (cluster
->points_to_bitmap
)
1051 return btrfs_alloc_from_bitmap(block_group
, cluster
, bytes
,
1054 spin_lock(&cluster
->lock
);
1055 if (bytes
> cluster
->max_size
)
1058 if (cluster
->block_group
!= block_group
)
1061 node
= rb_first(&cluster
->root
);
1065 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1068 if (entry
->bytes
< bytes
|| entry
->offset
< min_start
) {
1069 struct rb_node
*node
;
1071 node
= rb_next(&entry
->offset_index
);
1074 entry
= rb_entry(node
, struct btrfs_free_space
,
1078 ret
= entry
->offset
;
1080 entry
->offset
+= bytes
;
1081 entry
->bytes
-= bytes
;
1083 if (entry
->bytes
== 0) {
1084 rb_erase(&entry
->offset_index
, &cluster
->root
);
1090 spin_unlock(&cluster
->lock
);
1095 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache
*block_group
,
1096 struct btrfs_free_space
*entry
,
1097 struct btrfs_free_cluster
*cluster
,
1098 u64 offset
, u64 bytes
, u64 min_bytes
)
1100 unsigned long next_zero
;
1102 unsigned long search_bits
;
1103 unsigned long total_bits
;
1104 unsigned long found_bits
;
1105 unsigned long start
= 0;
1106 unsigned long total_found
= 0;
1109 i
= offset_to_bit(entry
->offset
, block_group
->sectorsize
,
1110 max_t(u64
, offset
, entry
->offset
));
1111 search_bits
= bytes_to_bits(min_bytes
, block_group
->sectorsize
);
1112 total_bits
= bytes_to_bits(bytes
, block_group
->sectorsize
);
1116 for (i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
);
1117 i
< BITS_PER_BITMAP
;
1118 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
1119 next_zero
= find_next_zero_bit(entry
->bitmap
,
1120 BITS_PER_BITMAP
, i
);
1121 if (next_zero
- i
>= search_bits
) {
1122 found_bits
= next_zero
- i
;
1136 total_found
+= found_bits
;
1138 if (cluster
->max_size
< found_bits
* block_group
->sectorsize
)
1139 cluster
->max_size
= found_bits
* block_group
->sectorsize
;
1141 if (total_found
< total_bits
) {
1142 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, next_zero
);
1143 if (i
- start
> total_bits
* 2) {
1145 cluster
->max_size
= 0;
1151 cluster
->window_start
= start
* block_group
->sectorsize
+
1153 cluster
->points_to_bitmap
= true;
1159 * here we try to find a cluster of blocks in a block group. The goal
1160 * is to find at least bytes free and up to empty_size + bytes free.
1161 * We might not find them all in one contiguous area.
1163 * returns zero and sets up cluster if things worked out, otherwise
1164 * it returns -enospc
1166 int btrfs_find_space_cluster(struct btrfs_trans_handle
*trans
,
1167 struct btrfs_root
*root
,
1168 struct btrfs_block_group_cache
*block_group
,
1169 struct btrfs_free_cluster
*cluster
,
1170 u64 offset
, u64 bytes
, u64 empty_size
)
1172 struct btrfs_free_space
*entry
= NULL
;
1173 struct rb_node
*node
;
1174 struct btrfs_free_space
*next
;
1175 struct btrfs_free_space
*last
= NULL
;
1180 bool found_bitmap
= false;
1183 /* for metadata, allow allocates with more holes */
1184 if (btrfs_test_opt(root
, SSD_SPREAD
)) {
1185 min_bytes
= bytes
+ empty_size
;
1186 } else if (block_group
->flags
& BTRFS_BLOCK_GROUP_METADATA
) {
1188 * we want to do larger allocations when we are
1189 * flushing out the delayed refs, it helps prevent
1190 * making more work as we go along.
1192 if (trans
->transaction
->delayed_refs
.flushing
)
1193 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 1);
1195 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 4);
1197 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 2);
1199 spin_lock(&block_group
->tree_lock
);
1200 spin_lock(&cluster
->lock
);
1202 /* someone already found a cluster, hooray */
1203 if (cluster
->block_group
) {
1208 entry
= tree_search_offset(block_group
, offset
, found_bitmap
, 1);
1215 * If found_bitmap is true, we exhausted our search for extent entries,
1216 * and we just want to search all of the bitmaps that we can find, and
1217 * ignore any extent entries we find.
1219 while (entry
->bitmap
|| found_bitmap
||
1220 (!entry
->bitmap
&& entry
->bytes
< min_bytes
)) {
1221 struct rb_node
*node
= rb_next(&entry
->offset_index
);
1223 if (entry
->bitmap
&& entry
->bytes
> bytes
+ empty_size
) {
1224 ret
= btrfs_bitmap_cluster(block_group
, entry
, cluster
,
1225 offset
, bytes
+ empty_size
,
1235 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1239 * We already searched all the extent entries from the passed in offset
1240 * to the end and didn't find enough space for the cluster, and we also
1241 * didn't find any bitmaps that met our criteria, just go ahead and exit
1248 cluster
->points_to_bitmap
= false;
1249 window_start
= entry
->offset
;
1250 window_free
= entry
->bytes
;
1252 max_extent
= entry
->bytes
;
1255 /* out window is just right, lets fill it */
1256 if (window_free
>= bytes
+ empty_size
)
1259 node
= rb_next(&last
->offset_index
);
1266 next
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1269 * we found a bitmap, so if this search doesn't result in a
1270 * cluster, we know to go and search again for the bitmaps and
1271 * start looking for space there
1275 offset
= next
->offset
;
1276 found_bitmap
= true;
1282 * we haven't filled the empty size and the window is
1283 * very large. reset and try again
1285 if (next
->offset
- (last
->offset
+ last
->bytes
) > 128 * 1024 ||
1286 next
->offset
- window_start
> (bytes
+ empty_size
) * 2) {
1288 window_start
= entry
->offset
;
1289 window_free
= entry
->bytes
;
1294 window_free
+= next
->bytes
;
1295 if (entry
->bytes
> max_extent
)
1296 max_extent
= entry
->bytes
;
1300 cluster
->window_start
= entry
->offset
;
1303 * now we've found our entries, pull them out of the free space
1304 * cache and put them into the cluster rbtree
1306 * The cluster includes an rbtree, but only uses the offset index
1307 * of each free space cache entry.
1310 node
= rb_next(&entry
->offset_index
);
1311 if (entry
->bitmap
&& node
) {
1312 entry
= rb_entry(node
, struct btrfs_free_space
,
1315 } else if (entry
->bitmap
&& !node
) {
1319 rb_erase(&entry
->offset_index
, &block_group
->free_space_offset
);
1320 ret
= tree_insert_offset(&cluster
->root
, entry
->offset
,
1321 &entry
->offset_index
, 0);
1324 if (!node
|| entry
== last
)
1327 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1330 cluster
->max_size
= max_extent
;
1333 atomic_inc(&block_group
->count
);
1334 list_add_tail(&cluster
->block_group_list
, &block_group
->cluster_list
);
1335 cluster
->block_group
= block_group
;
1337 spin_unlock(&cluster
->lock
);
1338 spin_unlock(&block_group
->tree_lock
);
1344 * simple code to zero out a cluster
1346 void btrfs_init_free_cluster(struct btrfs_free_cluster
*cluster
)
1348 spin_lock_init(&cluster
->lock
);
1349 spin_lock_init(&cluster
->refill_lock
);
1350 cluster
->root
.rb_node
= NULL
;
1351 cluster
->max_size
= 0;
1352 cluster
->points_to_bitmap
= false;
1353 INIT_LIST_HEAD(&cluster
->block_group_list
);
1354 cluster
->block_group
= NULL
;