3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
33 static u64 cache_soft_max
= 1024 * 1024 * 256;
34 static u64 cache_hard_max
= 1 * 1024 * 1024 * 1024;
36 void extent_io_tree_init(struct extent_io_tree
*tree
)
38 cache_tree_init(&tree
->state
);
39 cache_tree_init(&tree
->cache
);
40 INIT_LIST_HEAD(&tree
->lru
);
44 static struct extent_state
*alloc_extent_state(void)
46 struct extent_state
*state
;
48 state
= malloc(sizeof(*state
));
51 state
->cache_node
.objectid
= 0;
58 static void btrfs_free_extent_state(struct extent_state
*state
)
61 BUG_ON(state
->refs
< 0);
66 static void free_extent_state_func(struct cache_extent
*cache
)
68 struct extent_state
*es
;
70 es
= container_of(cache
, struct extent_state
, cache_node
);
71 btrfs_free_extent_state(es
);
74 void extent_io_tree_cleanup(struct extent_io_tree
*tree
)
76 struct extent_buffer
*eb
;
78 while(!list_empty(&tree
->lru
)) {
79 eb
= list_entry(tree
->lru
.next
, struct extent_buffer
, lru
);
81 fprintf(stderr
, "extent buffer leak: "
82 "start %llu len %u\n",
83 (unsigned long long)eb
->start
, eb
->len
);
86 free_extent_buffer(eb
);
89 cache_tree_free_extents(&tree
->state
, free_extent_state_func
);
92 static inline void update_extent_state(struct extent_state
*state
)
94 state
->cache_node
.start
= state
->start
;
95 state
->cache_node
.size
= state
->end
+ 1 - state
->start
;
99 * Utility function to look for merge candidates inside a given range.
100 * Any extents with matching state are merged together into a single
101 * extent in the tree. Extents with EXTENT_IO in their state field are
104 static int merge_state(struct extent_io_tree
*tree
,
105 struct extent_state
*state
)
107 struct extent_state
*other
;
108 struct cache_extent
*other_node
;
110 if (state
->state
& EXTENT_IOBITS
)
113 other_node
= prev_cache_extent(&state
->cache_node
);
115 other
= container_of(other_node
, struct extent_state
,
117 if (other
->end
== state
->start
- 1 &&
118 other
->state
== state
->state
) {
119 state
->start
= other
->start
;
120 update_extent_state(state
);
121 remove_cache_extent(&tree
->state
, &other
->cache_node
);
122 btrfs_free_extent_state(other
);
125 other_node
= next_cache_extent(&state
->cache_node
);
127 other
= container_of(other_node
, struct extent_state
,
129 if (other
->start
== state
->end
+ 1 &&
130 other
->state
== state
->state
) {
131 other
->start
= state
->start
;
132 update_extent_state(other
);
133 remove_cache_extent(&tree
->state
, &state
->cache_node
);
134 btrfs_free_extent_state(state
);
141 * insert an extent_state struct into the tree. 'bits' are set on the
142 * struct before it is inserted.
144 static int insert_state(struct extent_io_tree
*tree
,
145 struct extent_state
*state
, u64 start
, u64 end
,
151 state
->state
|= bits
;
152 state
->start
= start
;
154 update_extent_state(state
);
155 ret
= insert_cache_extent(&tree
->state
, &state
->cache_node
);
157 merge_state(tree
, state
);
162 * split a given extent state struct in two, inserting the preallocated
163 * struct 'prealloc' as the newly created second half. 'split' indicates an
164 * offset inside 'orig' where it should be split.
166 static int split_state(struct extent_io_tree
*tree
, struct extent_state
*orig
,
167 struct extent_state
*prealloc
, u64 split
)
170 prealloc
->start
= orig
->start
;
171 prealloc
->end
= split
- 1;
172 prealloc
->state
= orig
->state
;
173 update_extent_state(prealloc
);
175 update_extent_state(orig
);
176 ret
= insert_cache_extent(&tree
->state
, &prealloc
->cache_node
);
182 * clear some bits on a range in the tree.
184 static int clear_state_bit(struct extent_io_tree
*tree
,
185 struct extent_state
*state
, int bits
)
187 int ret
= state
->state
& bits
;
189 state
->state
&= ~bits
;
190 if (state
->state
== 0) {
191 remove_cache_extent(&tree
->state
, &state
->cache_node
);
192 btrfs_free_extent_state(state
);
194 merge_state(tree
, state
);
200 * clear some bits on a range in the tree.
202 int clear_extent_bits(struct extent_io_tree
*tree
, u64 start
,
203 u64 end
, int bits
, gfp_t mask
)
205 struct extent_state
*state
;
206 struct extent_state
*prealloc
= NULL
;
207 struct cache_extent
*node
;
214 prealloc
= alloc_extent_state();
220 * this search will find the extents that end after
223 node
= search_cache_extent(&tree
->state
, start
);
226 state
= container_of(node
, struct extent_state
, cache_node
);
227 if (state
->start
> end
)
229 last_end
= state
->end
;
232 * | ---- desired range ---- |
234 * | ------------- state -------------- |
236 * We need to split the extent we found, and may flip
237 * bits on second half.
239 * If the extent we found extends past our range, we
240 * just split and search again. It'll get split again
241 * the next time though.
243 * If the extent we found is inside our range, we clear
244 * the desired bit on it.
246 if (state
->start
< start
) {
247 err
= split_state(tree
, state
, prealloc
, start
);
248 BUG_ON(err
== -EEXIST
);
252 if (state
->end
<= end
) {
253 set
|= clear_state_bit(tree
, state
, bits
);
254 if (last_end
== (u64
)-1)
256 start
= last_end
+ 1;
258 start
= state
->start
;
263 * | ---- desired range ---- |
265 * We need to split the extent, and clear the bit
268 if (state
->start
<= end
&& state
->end
> end
) {
269 err
= split_state(tree
, state
, prealloc
, end
+ 1);
270 BUG_ON(err
== -EEXIST
);
272 set
|= clear_state_bit(tree
, prealloc
, bits
);
277 start
= state
->end
+ 1;
278 set
|= clear_state_bit(tree
, state
, bits
);
279 if (last_end
== (u64
)-1)
281 start
= last_end
+ 1;
285 btrfs_free_extent_state(prealloc
);
295 * set some bits on a range in the tree.
297 int set_extent_bits(struct extent_io_tree
*tree
, u64 start
,
298 u64 end
, int bits
, gfp_t mask
)
300 struct extent_state
*state
;
301 struct extent_state
*prealloc
= NULL
;
302 struct cache_extent
*node
;
308 prealloc
= alloc_extent_state();
314 * this search will find the extents that end after
317 node
= search_cache_extent(&tree
->state
, start
);
319 err
= insert_state(tree
, prealloc
, start
, end
, bits
);
320 BUG_ON(err
== -EEXIST
);
325 state
= container_of(node
, struct extent_state
, cache_node
);
326 last_start
= state
->start
;
327 last_end
= state
->end
;
330 * | ---- desired range ---- |
333 * Just lock what we found and keep going
335 if (state
->start
== start
&& state
->end
<= end
) {
336 state
->state
|= bits
;
337 merge_state(tree
, state
);
338 if (last_end
== (u64
)-1)
340 start
= last_end
+ 1;
344 * | ---- desired range ---- |
347 * | ------------- state -------------- |
349 * We need to split the extent we found, and may flip bits on
352 * If the extent we found extends past our
353 * range, we just split and search again. It'll get split
354 * again the next time though.
356 * If the extent we found is inside our range, we set the
359 if (state
->start
< start
) {
360 err
= split_state(tree
, state
, prealloc
, start
);
361 BUG_ON(err
== -EEXIST
);
365 if (state
->end
<= end
) {
366 state
->state
|= bits
;
367 start
= state
->end
+ 1;
368 merge_state(tree
, state
);
369 if (last_end
== (u64
)-1)
371 start
= last_end
+ 1;
373 start
= state
->start
;
378 * | ---- desired range ---- |
379 * | state | or | state |
381 * There's a hole, we need to insert something in it and
382 * ignore the extent we found.
384 if (state
->start
> start
) {
386 if (end
< last_start
)
389 this_end
= last_start
-1;
390 err
= insert_state(tree
, prealloc
, start
, this_end
,
392 BUG_ON(err
== -EEXIST
);
396 start
= this_end
+ 1;
400 * | ---- desired range ---- |
401 * | ---------- state ---------- |
402 * We need to split the extent, and set the bit
405 err
= split_state(tree
, state
, prealloc
, end
+ 1);
406 BUG_ON(err
== -EEXIST
);
408 state
->state
|= bits
;
409 merge_state(tree
, prealloc
);
413 btrfs_free_extent_state(prealloc
);
421 int set_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
424 return set_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
427 int clear_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
430 return clear_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
433 int find_first_extent_bit(struct extent_io_tree
*tree
, u64 start
,
434 u64
*start_ret
, u64
*end_ret
, int bits
)
436 struct cache_extent
*node
;
437 struct extent_state
*state
;
441 * this search will find all the extents that end after
444 node
= search_cache_extent(&tree
->state
, start
);
449 state
= container_of(node
, struct extent_state
, cache_node
);
450 if (state
->end
>= start
&& (state
->state
& bits
)) {
451 *start_ret
= state
->start
;
452 *end_ret
= state
->end
;
456 node
= next_cache_extent(node
);
464 int test_range_bit(struct extent_io_tree
*tree
, u64 start
, u64 end
,
465 int bits
, int filled
)
467 struct extent_state
*state
= NULL
;
468 struct cache_extent
*node
;
471 node
= search_cache_extent(&tree
->state
, start
);
472 while (node
&& start
<= end
) {
473 state
= container_of(node
, struct extent_state
, cache_node
);
475 if (filled
&& state
->start
> start
) {
479 if (state
->start
> end
)
481 if (state
->state
& bits
) {
489 start
= state
->end
+ 1;
492 node
= next_cache_extent(node
);
502 int set_state_private(struct extent_io_tree
*tree
, u64 start
, u64
private)
504 struct cache_extent
*node
;
505 struct extent_state
*state
;
508 node
= search_cache_extent(&tree
->state
, start
);
513 state
= container_of(node
, struct extent_state
, cache_node
);
514 if (state
->start
!= start
) {
518 state
->xprivate
= private;
523 int get_state_private(struct extent_io_tree
*tree
, u64 start
, u64
*private)
525 struct cache_extent
*node
;
526 struct extent_state
*state
;
529 node
= search_cache_extent(&tree
->state
, start
);
534 state
= container_of(node
, struct extent_state
, cache_node
);
535 if (state
->start
!= start
) {
539 *private = state
->xprivate
;
544 static int free_some_buffers(struct extent_io_tree
*tree
)
547 struct extent_buffer
*eb
;
548 struct list_head
*node
, *next
;
550 if (tree
->cache_size
< cache_soft_max
)
553 list_for_each_safe(node
, next
, &tree
->lru
) {
554 eb
= list_entry(node
, struct extent_buffer
, lru
);
556 free_extent_buffer(eb
);
557 if (tree
->cache_size
< cache_hard_max
)
560 list_move_tail(&eb
->lru
, &tree
->lru
);
562 if (nrscan
++ > 64 && tree
->cache_size
< cache_hard_max
)
568 static struct extent_buffer
*__alloc_extent_buffer(struct extent_io_tree
*tree
,
569 u64 bytenr
, u32 blocksize
)
571 struct extent_buffer
*eb
;
574 eb
= malloc(sizeof(struct extent_buffer
) + blocksize
);
579 memset(eb
, 0, sizeof(struct extent_buffer
) + blocksize
);
587 eb
->dev_bytenr
= (u64
)-1;
588 eb
->cache_node
.start
= bytenr
;
589 eb
->cache_node
.size
= blocksize
;
590 INIT_LIST_HEAD(&eb
->recow
);
592 free_some_buffers(tree
);
593 ret
= insert_cache_extent(&tree
->cache
, &eb
->cache_node
);
598 list_add_tail(&eb
->lru
, &tree
->lru
);
599 tree
->cache_size
+= blocksize
;
603 void free_extent_buffer(struct extent_buffer
*eb
)
609 BUG_ON(eb
->refs
< 0);
611 struct extent_io_tree
*tree
= eb
->tree
;
612 BUG_ON(eb
->flags
& EXTENT_DIRTY
);
613 list_del_init(&eb
->lru
);
614 list_del_init(&eb
->recow
);
615 remove_cache_extent(&tree
->cache
, &eb
->cache_node
);
616 BUG_ON(tree
->cache_size
< eb
->len
);
617 tree
->cache_size
-= eb
->len
;
622 struct extent_buffer
*find_extent_buffer(struct extent_io_tree
*tree
,
623 u64 bytenr
, u32 blocksize
)
625 struct extent_buffer
*eb
= NULL
;
626 struct cache_extent
*cache
;
628 cache
= lookup_cache_extent(&tree
->cache
, bytenr
, blocksize
);
629 if (cache
&& cache
->start
== bytenr
&&
630 cache
->size
== blocksize
) {
631 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
632 list_move_tail(&eb
->lru
, &tree
->lru
);
638 struct extent_buffer
*find_first_extent_buffer(struct extent_io_tree
*tree
,
641 struct extent_buffer
*eb
= NULL
;
642 struct cache_extent
*cache
;
644 cache
= search_cache_extent(&tree
->cache
, start
);
646 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
647 list_move_tail(&eb
->lru
, &tree
->lru
);
653 struct extent_buffer
*alloc_extent_buffer(struct extent_io_tree
*tree
,
654 u64 bytenr
, u32 blocksize
)
656 struct extent_buffer
*eb
;
657 struct cache_extent
*cache
;
659 cache
= lookup_cache_extent(&tree
->cache
, bytenr
, blocksize
);
660 if (cache
&& cache
->start
== bytenr
&&
661 cache
->size
== blocksize
) {
662 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
663 list_move_tail(&eb
->lru
, &tree
->lru
);
667 eb
= container_of(cache
, struct extent_buffer
,
669 free_extent_buffer(eb
);
671 eb
= __alloc_extent_buffer(tree
, bytenr
, blocksize
);
676 int read_extent_from_disk(struct extent_buffer
*eb
,
677 unsigned long offset
, unsigned long len
)
680 ret
= pread(eb
->fd
, eb
->data
+ offset
, len
, eb
->dev_bytenr
);
692 int write_extent_to_disk(struct extent_buffer
*eb
)
695 ret
= pwrite(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
698 if (ret
!= eb
->len
) {
707 int read_data_from_disk(struct btrfs_fs_info
*info
, void *buf
, u64 offset
,
708 u64 bytes
, int mirror
)
710 struct btrfs_multi_bio
*multi
= NULL
;
711 struct btrfs_device
*device
;
712 u64 bytes_left
= bytes
;
718 read_len
= bytes_left
;
719 ret
= btrfs_map_block(&info
->mapping_tree
, READ
, offset
,
720 &read_len
, &multi
, mirror
, NULL
);
722 fprintf(stderr
, "Couldn't map the block %Lu\n",
726 device
= multi
->stripes
[0].dev
;
728 read_len
= min(bytes_left
, read_len
);
729 if (device
->fd
== 0) {
734 ret
= pread(device
->fd
, buf
+ total_read
, read_len
,
735 multi
->stripes
[0].physical
);
738 fprintf(stderr
, "Error reading %Lu, %d\n", offset
,
742 if (ret
!= read_len
) {
743 fprintf(stderr
, "Short read for %Lu, read %d, "
744 "read_len %Lu\n", offset
, ret
, read_len
);
748 bytes_left
-= read_len
;
750 total_read
+= read_len
;
756 int write_data_to_disk(struct btrfs_fs_info
*info
, void *buf
, u64 offset
,
757 u64 bytes
, int mirror
)
759 struct btrfs_multi_bio
*multi
= NULL
;
760 struct btrfs_device
*device
;
761 u64 bytes_left
= bytes
;
764 u64
*raid_map
= NULL
;
769 while (bytes_left
> 0) {
770 this_len
= bytes_left
;
773 ret
= btrfs_map_block(&info
->mapping_tree
, WRITE
, offset
,
774 &this_len
, &multi
, mirror
, &raid_map
);
776 fprintf(stderr
, "Couldn't map the block %Lu\n",
782 struct extent_buffer
*eb
;
783 u64 stripe_len
= this_len
;
785 this_len
= min(this_len
, bytes_left
);
786 this_len
= min(this_len
, (u64
)info
->tree_root
->leafsize
);
788 eb
= malloc(sizeof(struct extent_buffer
) + this_len
);
791 memset(eb
, 0, sizeof(struct extent_buffer
) + this_len
);
795 memcpy(eb
->data
, buf
+ total_write
, this_len
);
796 ret
= write_raid56_with_parity(info
, eb
, multi
,
797 stripe_len
, raid_map
);
803 } else while (dev_nr
< multi
->num_stripes
) {
804 device
= multi
->stripes
[dev_nr
].dev
;
805 if (device
->fd
== 0) {
810 dev_bytenr
= multi
->stripes
[dev_nr
].physical
;
811 this_len
= min(this_len
, bytes_left
);
814 ret
= pwrite(device
->fd
, buf
+ total_write
, this_len
, dev_bytenr
);
815 if (ret
!= this_len
) {
817 fprintf(stderr
, "Error writing to "
818 "device %d\n", errno
);
823 fprintf(stderr
, "Short write\n");
830 BUG_ON(bytes_left
< this_len
);
832 bytes_left
-= this_len
;
834 total_write
+= this_len
;
843 int set_extent_buffer_uptodate(struct extent_buffer
*eb
)
845 eb
->flags
|= EXTENT_UPTODATE
;
849 int clear_extent_buffer_uptodate(struct extent_io_tree
*tree
,
850 struct extent_buffer
*eb
)
852 eb
->flags
&= ~EXTENT_UPTODATE
;
856 int extent_buffer_uptodate(struct extent_buffer
*eb
)
861 if (eb
->flags
& EXTENT_UPTODATE
)
866 int set_extent_buffer_dirty(struct extent_buffer
*eb
)
868 struct extent_io_tree
*tree
= eb
->tree
;
869 if (!(eb
->flags
& EXTENT_DIRTY
)) {
870 eb
->flags
|= EXTENT_DIRTY
;
871 set_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
872 extent_buffer_get(eb
);
877 int clear_extent_buffer_dirty(struct extent_buffer
*eb
)
879 struct extent_io_tree
*tree
= eb
->tree
;
880 if (eb
->flags
& EXTENT_DIRTY
) {
881 eb
->flags
&= ~EXTENT_DIRTY
;
882 clear_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
883 free_extent_buffer(eb
);
888 int memcmp_extent_buffer(struct extent_buffer
*eb
, const void *ptrv
,
889 unsigned long start
, unsigned long len
)
891 return memcmp(eb
->data
+ start
, ptrv
, len
);
894 void read_extent_buffer(struct extent_buffer
*eb
, void *dst
,
895 unsigned long start
, unsigned long len
)
897 memcpy(dst
, eb
->data
+ start
, len
);
900 void write_extent_buffer(struct extent_buffer
*eb
, const void *src
,
901 unsigned long start
, unsigned long len
)
903 memcpy(eb
->data
+ start
, src
, len
);
906 void copy_extent_buffer(struct extent_buffer
*dst
, struct extent_buffer
*src
,
907 unsigned long dst_offset
, unsigned long src_offset
,
910 memcpy(dst
->data
+ dst_offset
, src
->data
+ src_offset
, len
);
913 void memmove_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
914 unsigned long src_offset
, unsigned long len
)
916 memmove(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
919 void memset_extent_buffer(struct extent_buffer
*eb
, char c
,
920 unsigned long start
, unsigned long len
)
922 memset(eb
->data
+ start
, c
, len
);