3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
21 #include <sys/types.h>
25 #include "kerncompat.h"
26 #include "extent_io.h"
31 void extent_io_tree_init(struct extent_io_tree
*tree
)
33 cache_tree_init(&tree
->state
);
34 cache_tree_init(&tree
->cache
);
35 INIT_LIST_HEAD(&tree
->lru
);
39 static struct extent_state
*alloc_extent_state(void)
41 struct extent_state
*state
;
43 state
= malloc(sizeof(*state
));
46 state
->cache_node
.objectid
= 0;
53 static void btrfs_free_extent_state(struct extent_state
*state
)
56 BUG_ON(state
->refs
< 0);
61 static void free_extent_state_func(struct cache_extent
*cache
)
63 struct extent_state
*es
;
65 es
= container_of(cache
, struct extent_state
, cache_node
);
66 btrfs_free_extent_state(es
);
69 void extent_io_tree_cleanup(struct extent_io_tree
*tree
)
71 struct extent_buffer
*eb
;
73 while(!list_empty(&tree
->lru
)) {
74 eb
= list_entry(tree
->lru
.next
, struct extent_buffer
, lru
);
75 fprintf(stderr
, "extent buffer leak: "
76 "start %llu len %u\n",
77 (unsigned long long)eb
->start
, eb
->len
);
78 free_extent_buffer(eb
);
81 cache_tree_free_extents(&tree
->state
, free_extent_state_func
);
84 static inline void update_extent_state(struct extent_state
*state
)
86 state
->cache_node
.start
= state
->start
;
87 state
->cache_node
.size
= state
->end
+ 1 - state
->start
;
91 * Utility function to look for merge candidates inside a given range.
92 * Any extents with matching state are merged together into a single
93 * extent in the tree. Extents with EXTENT_IO in their state field are
96 static int merge_state(struct extent_io_tree
*tree
,
97 struct extent_state
*state
)
99 struct extent_state
*other
;
100 struct cache_extent
*other_node
;
102 if (state
->state
& EXTENT_IOBITS
)
105 other_node
= prev_cache_extent(&state
->cache_node
);
107 other
= container_of(other_node
, struct extent_state
,
109 if (other
->end
== state
->start
- 1 &&
110 other
->state
== state
->state
) {
111 state
->start
= other
->start
;
112 update_extent_state(state
);
113 remove_cache_extent(&tree
->state
, &other
->cache_node
);
114 btrfs_free_extent_state(other
);
117 other_node
= next_cache_extent(&state
->cache_node
);
119 other
= container_of(other_node
, struct extent_state
,
121 if (other
->start
== state
->end
+ 1 &&
122 other
->state
== state
->state
) {
123 other
->start
= state
->start
;
124 update_extent_state(other
);
125 remove_cache_extent(&tree
->state
, &state
->cache_node
);
126 btrfs_free_extent_state(state
);
133 * insert an extent_state struct into the tree. 'bits' are set on the
134 * struct before it is inserted.
136 static int insert_state(struct extent_io_tree
*tree
,
137 struct extent_state
*state
, u64 start
, u64 end
,
143 state
->state
|= bits
;
144 state
->start
= start
;
146 update_extent_state(state
);
147 ret
= insert_cache_extent(&tree
->state
, &state
->cache_node
);
149 merge_state(tree
, state
);
154 * split a given extent state struct in two, inserting the preallocated
155 * struct 'prealloc' as the newly created second half. 'split' indicates an
156 * offset inside 'orig' where it should be split.
158 static int split_state(struct extent_io_tree
*tree
, struct extent_state
*orig
,
159 struct extent_state
*prealloc
, u64 split
)
162 prealloc
->start
= orig
->start
;
163 prealloc
->end
= split
- 1;
164 prealloc
->state
= orig
->state
;
165 update_extent_state(prealloc
);
167 update_extent_state(orig
);
168 ret
= insert_cache_extent(&tree
->state
, &prealloc
->cache_node
);
174 * clear some bits on a range in the tree.
176 static int clear_state_bit(struct extent_io_tree
*tree
,
177 struct extent_state
*state
, int bits
)
179 int ret
= state
->state
& bits
;
181 state
->state
&= ~bits
;
182 if (state
->state
== 0) {
183 remove_cache_extent(&tree
->state
, &state
->cache_node
);
184 btrfs_free_extent_state(state
);
186 merge_state(tree
, state
);
192 * clear some bits on a range in the tree.
194 int clear_extent_bits(struct extent_io_tree
*tree
, u64 start
,
195 u64 end
, int bits
, gfp_t mask
)
197 struct extent_state
*state
;
198 struct extent_state
*prealloc
= NULL
;
199 struct cache_extent
*node
;
206 prealloc
= alloc_extent_state();
212 * this search will find the extents that end after
215 node
= search_cache_extent(&tree
->state
, start
);
218 state
= container_of(node
, struct extent_state
, cache_node
);
219 if (state
->start
> end
)
221 last_end
= state
->end
;
224 * | ---- desired range ---- |
226 * | ------------- state -------------- |
228 * We need to split the extent we found, and may flip
229 * bits on second half.
231 * If the extent we found extends past our range, we
232 * just split and search again. It'll get split again
233 * the next time though.
235 * If the extent we found is inside our range, we clear
236 * the desired bit on it.
238 if (state
->start
< start
) {
239 err
= split_state(tree
, state
, prealloc
, start
);
240 BUG_ON(err
== -EEXIST
);
244 if (state
->end
<= end
) {
245 set
|= clear_state_bit(tree
, state
, bits
);
246 if (last_end
== (u64
)-1)
248 start
= last_end
+ 1;
250 start
= state
->start
;
255 * | ---- desired range ---- |
257 * We need to split the extent, and clear the bit
260 if (state
->start
<= end
&& state
->end
> end
) {
261 err
= split_state(tree
, state
, prealloc
, end
+ 1);
262 BUG_ON(err
== -EEXIST
);
264 set
|= clear_state_bit(tree
, prealloc
, bits
);
269 start
= state
->end
+ 1;
270 set
|= clear_state_bit(tree
, state
, bits
);
271 if (last_end
== (u64
)-1)
273 start
= last_end
+ 1;
277 btrfs_free_extent_state(prealloc
);
287 * set some bits on a range in the tree.
289 int set_extent_bits(struct extent_io_tree
*tree
, u64 start
,
290 u64 end
, int bits
, gfp_t mask
)
292 struct extent_state
*state
;
293 struct extent_state
*prealloc
= NULL
;
294 struct cache_extent
*node
;
300 prealloc
= alloc_extent_state();
306 * this search will find the extents that end after
309 node
= search_cache_extent(&tree
->state
, start
);
311 err
= insert_state(tree
, prealloc
, start
, end
, bits
);
312 BUG_ON(err
== -EEXIST
);
317 state
= container_of(node
, struct extent_state
, cache_node
);
318 last_start
= state
->start
;
319 last_end
= state
->end
;
322 * | ---- desired range ---- |
325 * Just lock what we found and keep going
327 if (state
->start
== start
&& state
->end
<= end
) {
328 state
->state
|= bits
;
329 merge_state(tree
, state
);
330 if (last_end
== (u64
)-1)
332 start
= last_end
+ 1;
336 * | ---- desired range ---- |
339 * | ------------- state -------------- |
341 * We need to split the extent we found, and may flip bits on
344 * If the extent we found extends past our
345 * range, we just split and search again. It'll get split
346 * again the next time though.
348 * If the extent we found is inside our range, we set the
351 if (state
->start
< start
) {
352 err
= split_state(tree
, state
, prealloc
, start
);
353 BUG_ON(err
== -EEXIST
);
357 if (state
->end
<= end
) {
358 state
->state
|= bits
;
359 start
= state
->end
+ 1;
360 merge_state(tree
, state
);
361 if (last_end
== (u64
)-1)
363 start
= last_end
+ 1;
365 start
= state
->start
;
370 * | ---- desired range ---- |
371 * | state | or | state |
373 * There's a hole, we need to insert something in it and
374 * ignore the extent we found.
376 if (state
->start
> start
) {
378 if (end
< last_start
)
381 this_end
= last_start
-1;
382 err
= insert_state(tree
, prealloc
, start
, this_end
,
384 BUG_ON(err
== -EEXIST
);
388 start
= this_end
+ 1;
392 * | ---- desired range ---- |
393 * | ---------- state ---------- |
394 * We need to split the extent, and set the bit
397 err
= split_state(tree
, state
, prealloc
, end
+ 1);
398 BUG_ON(err
== -EEXIST
);
400 state
->state
|= bits
;
401 merge_state(tree
, prealloc
);
405 btrfs_free_extent_state(prealloc
);
413 int set_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
416 return set_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
419 int clear_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
422 return clear_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
425 int find_first_extent_bit(struct extent_io_tree
*tree
, u64 start
,
426 u64
*start_ret
, u64
*end_ret
, int bits
)
428 struct cache_extent
*node
;
429 struct extent_state
*state
;
433 * this search will find all the extents that end after
436 node
= search_cache_extent(&tree
->state
, start
);
441 state
= container_of(node
, struct extent_state
, cache_node
);
442 if (state
->end
>= start
&& (state
->state
& bits
)) {
443 *start_ret
= state
->start
;
444 *end_ret
= state
->end
;
448 node
= next_cache_extent(node
);
456 int test_range_bit(struct extent_io_tree
*tree
, u64 start
, u64 end
,
457 int bits
, int filled
)
459 struct extent_state
*state
= NULL
;
460 struct cache_extent
*node
;
463 node
= search_cache_extent(&tree
->state
, start
);
464 while (node
&& start
<= end
) {
465 state
= container_of(node
, struct extent_state
, cache_node
);
467 if (filled
&& state
->start
> start
) {
471 if (state
->start
> end
)
473 if (state
->state
& bits
) {
481 start
= state
->end
+ 1;
484 node
= next_cache_extent(node
);
494 int set_state_private(struct extent_io_tree
*tree
, u64 start
, u64
private)
496 struct cache_extent
*node
;
497 struct extent_state
*state
;
500 node
= search_cache_extent(&tree
->state
, start
);
505 state
= container_of(node
, struct extent_state
, cache_node
);
506 if (state
->start
!= start
) {
510 state
->xprivate
= private;
515 int get_state_private(struct extent_io_tree
*tree
, u64 start
, u64
*private)
517 struct cache_extent
*node
;
518 struct extent_state
*state
;
521 node
= search_cache_extent(&tree
->state
, start
);
526 state
= container_of(node
, struct extent_state
, cache_node
);
527 if (state
->start
!= start
) {
531 *private = state
->xprivate
;
536 static struct extent_buffer
*__alloc_extent_buffer(struct extent_io_tree
*tree
,
537 u64 bytenr
, u32 blocksize
)
539 struct extent_buffer
*eb
;
541 eb
= calloc(1, sizeof(struct extent_buffer
) + blocksize
);
553 eb
->dev_bytenr
= (u64
)-1;
554 eb
->cache_node
.start
= bytenr
;
555 eb
->cache_node
.size
= blocksize
;
556 INIT_LIST_HEAD(&eb
->recow
);
561 struct extent_buffer
*btrfs_clone_extent_buffer(struct extent_buffer
*src
)
563 struct extent_buffer
*new;
565 new = __alloc_extent_buffer(NULL
, src
->start
, src
->len
);
569 copy_extent_buffer(new, src
, 0, 0, src
->len
);
570 new->flags
|= EXTENT_BUFFER_DUMMY
;
575 void free_extent_buffer(struct extent_buffer
*eb
)
577 if (!eb
|| IS_ERR(eb
))
581 BUG_ON(eb
->refs
< 0);
583 struct extent_io_tree
*tree
= eb
->tree
;
584 BUG_ON(eb
->flags
& EXTENT_DIRTY
);
585 list_del_init(&eb
->lru
);
586 list_del_init(&eb
->recow
);
587 if (!(eb
->flags
& EXTENT_BUFFER_DUMMY
)) {
588 BUG_ON(tree
->cache_size
< eb
->len
);
589 remove_cache_extent(&tree
->cache
, &eb
->cache_node
);
590 tree
->cache_size
-= eb
->len
;
596 struct extent_buffer
*find_extent_buffer(struct extent_io_tree
*tree
,
597 u64 bytenr
, u32 blocksize
)
599 struct extent_buffer
*eb
= NULL
;
600 struct cache_extent
*cache
;
602 cache
= lookup_cache_extent(&tree
->cache
, bytenr
, blocksize
);
603 if (cache
&& cache
->start
== bytenr
&&
604 cache
->size
== blocksize
) {
605 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
606 list_move_tail(&eb
->lru
, &tree
->lru
);
612 struct extent_buffer
*find_first_extent_buffer(struct extent_io_tree
*tree
,
615 struct extent_buffer
*eb
= NULL
;
616 struct cache_extent
*cache
;
618 cache
= search_cache_extent(&tree
->cache
, start
);
620 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
621 list_move_tail(&eb
->lru
, &tree
->lru
);
627 struct extent_buffer
*alloc_extent_buffer(struct extent_io_tree
*tree
,
628 u64 bytenr
, u32 blocksize
)
630 struct extent_buffer
*eb
;
631 struct cache_extent
*cache
;
633 cache
= lookup_cache_extent(&tree
->cache
, bytenr
, blocksize
);
634 if (cache
&& cache
->start
== bytenr
&&
635 cache
->size
== blocksize
) {
636 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
637 list_move_tail(&eb
->lru
, &tree
->lru
);
643 eb
= container_of(cache
, struct extent_buffer
,
645 free_extent_buffer(eb
);
647 eb
= __alloc_extent_buffer(tree
, bytenr
, blocksize
);
650 ret
= insert_cache_extent(&tree
->cache
, &eb
->cache_node
);
655 list_add_tail(&eb
->lru
, &tree
->lru
);
656 tree
->cache_size
+= blocksize
;
661 int read_extent_from_disk(struct extent_buffer
*eb
,
662 unsigned long offset
, unsigned long len
)
665 ret
= pread(eb
->fd
, eb
->data
+ offset
, len
, eb
->dev_bytenr
);
679 int write_extent_to_disk(struct extent_buffer
*eb
)
682 ret
= pwrite(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
685 if (ret
!= eb
->len
) {
694 int read_data_from_disk(struct btrfs_fs_info
*info
, void *buf
, u64 offset
,
695 u64 bytes
, int mirror
)
697 struct btrfs_multi_bio
*multi
= NULL
;
698 struct btrfs_device
*device
;
699 u64 bytes_left
= bytes
;
705 read_len
= bytes_left
;
706 ret
= btrfs_map_block(&info
->mapping_tree
, READ
, offset
,
707 &read_len
, &multi
, mirror
, NULL
);
709 fprintf(stderr
, "Couldn't map the block %Lu\n",
713 device
= multi
->stripes
[0].dev
;
715 read_len
= min(bytes_left
, read_len
);
716 if (device
->fd
<= 0) {
721 ret
= pread(device
->fd
, buf
+ total_read
, read_len
,
722 multi
->stripes
[0].physical
);
725 fprintf(stderr
, "Error reading %Lu, %d\n", offset
,
729 if (ret
!= read_len
) {
730 fprintf(stderr
, "Short read for %Lu, read %d, "
731 "read_len %Lu\n", offset
, ret
, read_len
);
735 bytes_left
-= read_len
;
737 total_read
+= read_len
;
743 int write_data_to_disk(struct btrfs_fs_info
*info
, void *buf
, u64 offset
,
744 u64 bytes
, int mirror
)
746 struct btrfs_multi_bio
*multi
= NULL
;
747 struct btrfs_device
*device
;
748 u64 bytes_left
= bytes
;
751 u64
*raid_map
= NULL
;
756 while (bytes_left
> 0) {
757 this_len
= bytes_left
;
760 ret
= btrfs_map_block(&info
->mapping_tree
, WRITE
, offset
,
761 &this_len
, &multi
, mirror
, &raid_map
);
763 fprintf(stderr
, "Couldn't map the block %Lu\n",
769 struct extent_buffer
*eb
;
770 u64 stripe_len
= this_len
;
772 this_len
= min(this_len
, bytes_left
);
773 this_len
= min(this_len
, (u64
)info
->tree_root
->leafsize
);
775 eb
= malloc(sizeof(struct extent_buffer
) + this_len
);
778 memset(eb
, 0, sizeof(struct extent_buffer
) + this_len
);
782 memcpy(eb
->data
, buf
+ total_write
, this_len
);
783 ret
= write_raid56_with_parity(info
, eb
, multi
,
784 stripe_len
, raid_map
);
790 } else while (dev_nr
< multi
->num_stripes
) {
791 device
= multi
->stripes
[dev_nr
].dev
;
792 if (device
->fd
<= 0) {
797 dev_bytenr
= multi
->stripes
[dev_nr
].physical
;
798 this_len
= min(this_len
, bytes_left
);
801 ret
= pwrite(device
->fd
, buf
+ total_write
, this_len
, dev_bytenr
);
802 if (ret
!= this_len
) {
804 fprintf(stderr
, "Error writing to "
805 "device %d\n", errno
);
810 fprintf(stderr
, "Short write\n");
817 BUG_ON(bytes_left
< this_len
);
819 bytes_left
-= this_len
;
821 total_write
+= this_len
;
829 int set_extent_buffer_dirty(struct extent_buffer
*eb
)
831 struct extent_io_tree
*tree
= eb
->tree
;
832 if (!(eb
->flags
& EXTENT_DIRTY
)) {
833 eb
->flags
|= EXTENT_DIRTY
;
834 set_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
835 extent_buffer_get(eb
);
840 int clear_extent_buffer_dirty(struct extent_buffer
*eb
)
842 struct extent_io_tree
*tree
= eb
->tree
;
843 if (eb
->flags
& EXTENT_DIRTY
) {
844 eb
->flags
&= ~EXTENT_DIRTY
;
845 clear_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
846 free_extent_buffer(eb
);
851 int memcmp_extent_buffer(struct extent_buffer
*eb
, const void *ptrv
,
852 unsigned long start
, unsigned long len
)
854 return memcmp(eb
->data
+ start
, ptrv
, len
);
857 void read_extent_buffer(struct extent_buffer
*eb
, void *dst
,
858 unsigned long start
, unsigned long len
)
860 memcpy(dst
, eb
->data
+ start
, len
);
863 void write_extent_buffer(struct extent_buffer
*eb
, const void *src
,
864 unsigned long start
, unsigned long len
)
866 memcpy(eb
->data
+ start
, src
, len
);
869 void copy_extent_buffer(struct extent_buffer
*dst
, struct extent_buffer
*src
,
870 unsigned long dst_offset
, unsigned long src_offset
,
873 memcpy(dst
->data
+ dst_offset
, src
->data
+ src_offset
, len
);
876 void memmove_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
877 unsigned long src_offset
, unsigned long len
)
879 memmove(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
882 void memset_extent_buffer(struct extent_buffer
*eb
, char c
,
883 unsigned long start
, unsigned long len
)
885 memset(eb
->data
+ start
, c
, len
);
888 int extent_buffer_test_bit(struct extent_buffer
*eb
, unsigned long start
,
891 return test_bit(nr
, (unsigned long *)(eb
->data
+ start
));