3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
31 u64 cache_soft_max
= 1024 * 1024 * 256;
32 u64 cache_hard_max
= 1 * 1024 * 1024 * 1024;
34 void extent_io_tree_init(struct extent_io_tree
*tree
)
36 cache_tree_init(&tree
->state
);
37 cache_tree_init(&tree
->cache
);
38 INIT_LIST_HEAD(&tree
->lru
);
42 static struct extent_state
*alloc_extent_state(void)
44 struct extent_state
*state
;
46 state
= malloc(sizeof(*state
));
55 static void free_extent_state(struct extent_state
*state
)
58 BUG_ON(state
->refs
< 0);
63 void extent_io_tree_cleanup(struct extent_io_tree
*tree
)
65 struct extent_state
*es
;
66 struct extent_buffer
*eb
;
67 struct cache_extent
*cache
;
69 while(!list_empty(&tree
->lru
)) {
70 eb
= list_entry(tree
->lru
.next
, struct extent_buffer
, lru
);
72 fprintf(stderr
, "extent buffer leak: "
73 "start %llu len %u\n",
74 (unsigned long long)eb
->start
, eb
->len
);
77 free_extent_buffer(eb
);
80 cache
= find_first_cache_extent(&tree
->state
, 0);
83 es
= container_of(cache
, struct extent_state
, cache_node
);
84 remove_cache_extent(&tree
->state
, &es
->cache_node
);
85 free_extent_state(es
);
89 static inline void update_extent_state(struct extent_state
*state
)
91 state
->cache_node
.start
= state
->start
;
92 state
->cache_node
.size
= state
->end
+ 1 - state
->start
;
96 * Utility function to look for merge candidates inside a given range.
97 * Any extents with matching state are merged together into a single
98 * extent in the tree. Extents with EXTENT_IO in their state field are
101 static int merge_state(struct extent_io_tree
*tree
,
102 struct extent_state
*state
)
104 struct extent_state
*other
;
105 struct cache_extent
*other_node
;
107 if (state
->state
& EXTENT_IOBITS
)
110 other_node
= prev_cache_extent(&state
->cache_node
);
112 other
= container_of(other_node
, struct extent_state
,
114 if (other
->end
== state
->start
- 1 &&
115 other
->state
== state
->state
) {
116 state
->start
= other
->start
;
117 update_extent_state(state
);
118 remove_cache_extent(&tree
->state
, &other
->cache_node
);
119 free_extent_state(other
);
122 other_node
= next_cache_extent(&state
->cache_node
);
124 other
= container_of(other_node
, struct extent_state
,
126 if (other
->start
== state
->end
+ 1 &&
127 other
->state
== state
->state
) {
128 other
->start
= state
->start
;
129 update_extent_state(other
);
130 remove_cache_extent(&tree
->state
, &state
->cache_node
);
131 free_extent_state(state
);
138 * insert an extent_state struct into the tree. 'bits' are set on the
139 * struct before it is inserted.
141 static int insert_state(struct extent_io_tree
*tree
,
142 struct extent_state
*state
, u64 start
, u64 end
,
148 state
->state
|= bits
;
149 state
->start
= start
;
151 update_extent_state(state
);
152 ret
= insert_existing_cache_extent(&tree
->state
, &state
->cache_node
);
154 merge_state(tree
, state
);
159 * split a given extent state struct in two, inserting the preallocated
160 * struct 'prealloc' as the newly created second half. 'split' indicates an
161 * offset inside 'orig' where it should be split.
163 static int split_state(struct extent_io_tree
*tree
, struct extent_state
*orig
,
164 struct extent_state
*prealloc
, u64 split
)
167 prealloc
->start
= orig
->start
;
168 prealloc
->end
= split
- 1;
169 prealloc
->state
= orig
->state
;
170 update_extent_state(prealloc
);
172 update_extent_state(orig
);
173 ret
= insert_existing_cache_extent(&tree
->state
,
174 &prealloc
->cache_node
);
180 * clear some bits on a range in the tree.
182 static int clear_state_bit(struct extent_io_tree
*tree
,
183 struct extent_state
*state
, int bits
)
185 int ret
= state
->state
& bits
;
187 state
->state
&= ~bits
;
188 if (state
->state
== 0) {
189 remove_cache_extent(&tree
->state
, &state
->cache_node
);
190 free_extent_state(state
);
192 merge_state(tree
, state
);
198 * set some bits on a range in the tree.
200 int clear_extent_bits(struct extent_io_tree
*tree
, u64 start
,
201 u64 end
, int bits
, gfp_t mask
)
203 struct extent_state
*state
;
204 struct extent_state
*prealloc
= NULL
;
205 struct cache_extent
*node
;
211 prealloc
= alloc_extent_state();
216 * this search will find the extents that end after
219 node
= find_first_cache_extent(&tree
->state
, start
);
222 state
= container_of(node
, struct extent_state
, cache_node
);
223 if (state
->start
> end
)
225 last_end
= state
->end
;
228 * | ---- desired range ---- |
230 * | ------------- state -------------- |
232 * We need to split the extent we found, and may flip
233 * bits on second half.
235 * If the extent we found extends past our range, we
236 * just split and search again. It'll get split again
237 * the next time though.
239 * If the extent we found is inside our range, we clear
240 * the desired bit on it.
242 if (state
->start
< start
) {
243 err
= split_state(tree
, state
, prealloc
, start
);
244 BUG_ON(err
== -EEXIST
);
248 if (state
->end
<= end
) {
249 set
|= clear_state_bit(tree
, state
, bits
);
250 if (last_end
== (u64
)-1)
252 start
= last_end
+ 1;
254 start
= state
->start
;
259 * | ---- desired range ---- |
261 * We need to split the extent, and clear the bit
264 if (state
->start
<= end
&& state
->end
> end
) {
265 err
= split_state(tree
, state
, prealloc
, end
+ 1);
266 BUG_ON(err
== -EEXIST
);
268 set
|= clear_state_bit(tree
, prealloc
, bits
);
273 start
= state
->end
+ 1;
274 set
|= clear_state_bit(tree
, state
, bits
);
275 if (last_end
== (u64
)-1)
277 start
= last_end
+ 1;
281 free_extent_state(prealloc
);
291 * set some bits on a range in the tree.
293 int set_extent_bits(struct extent_io_tree
*tree
, u64 start
,
294 u64 end
, int bits
, gfp_t mask
)
296 struct extent_state
*state
;
297 struct extent_state
*prealloc
= NULL
;
298 struct cache_extent
*node
;
303 prealloc
= alloc_extent_state();
308 * this search will find the extents that end after
311 node
= find_first_cache_extent(&tree
->state
, start
);
313 err
= insert_state(tree
, prealloc
, start
, end
, bits
);
314 BUG_ON(err
== -EEXIST
);
319 state
= container_of(node
, struct extent_state
, cache_node
);
320 last_start
= state
->start
;
321 last_end
= state
->end
;
324 * | ---- desired range ---- |
327 * Just lock what we found and keep going
329 if (state
->start
== start
&& state
->end
<= end
) {
330 state
->state
|= bits
;
331 merge_state(tree
, state
);
332 if (last_end
== (u64
)-1)
334 start
= last_end
+ 1;
338 * | ---- desired range ---- |
341 * | ------------- state -------------- |
343 * We need to split the extent we found, and may flip bits on
346 * If the extent we found extends past our
347 * range, we just split and search again. It'll get split
348 * again the next time though.
350 * If the extent we found is inside our range, we set the
353 if (state
->start
< start
) {
354 err
= split_state(tree
, state
, prealloc
, start
);
355 BUG_ON(err
== -EEXIST
);
359 if (state
->end
<= end
) {
360 state
->state
|= bits
;
361 start
= state
->end
+ 1;
362 merge_state(tree
, state
);
363 if (last_end
== (u64
)-1)
365 start
= last_end
+ 1;
367 start
= state
->start
;
372 * | ---- desired range ---- |
373 * | state | or | state |
375 * There's a hole, we need to insert something in it and
376 * ignore the extent we found.
378 if (state
->start
> start
) {
380 if (end
< last_start
)
383 this_end
= last_start
-1;
384 err
= insert_state(tree
, prealloc
, start
, this_end
,
386 BUG_ON(err
== -EEXIST
);
390 start
= this_end
+ 1;
394 * | ---- desired range ---- |
395 * | ---------- state ---------- |
396 * We need to split the extent, and set the bit
399 err
= split_state(tree
, state
, prealloc
, end
+ 1);
400 BUG_ON(err
== -EEXIST
);
402 state
->state
|= bits
;
403 merge_state(tree
, prealloc
);
407 free_extent_state(prealloc
);
415 int set_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
418 return set_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
421 int clear_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
424 return clear_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
427 int find_first_extent_bit(struct extent_io_tree
*tree
, u64 start
,
428 u64
*start_ret
, u64
*end_ret
, int bits
)
430 struct cache_extent
*node
;
431 struct extent_state
*state
;
435 * this search will find all the extents that end after
438 node
= find_first_cache_extent(&tree
->state
, start
);
443 state
= container_of(node
, struct extent_state
, cache_node
);
444 if (state
->end
>= start
&& (state
->state
& bits
)) {
445 *start_ret
= state
->start
;
446 *end_ret
= state
->end
;
450 node
= next_cache_extent(node
);
458 int test_range_bit(struct extent_io_tree
*tree
, u64 start
, u64 end
,
459 int bits
, int filled
)
461 struct extent_state
*state
= NULL
;
462 struct cache_extent
*node
;
465 node
= find_first_cache_extent(&tree
->state
, start
);
466 while (node
&& start
<= end
) {
467 state
= container_of(node
, struct extent_state
, cache_node
);
469 if (filled
&& state
->start
> start
) {
473 if (state
->start
> end
)
475 if (state
->state
& bits
) {
483 start
= state
->end
+ 1;
486 node
= next_cache_extent(node
);
496 int set_state_private(struct extent_io_tree
*tree
, u64 start
, u64
private)
498 struct cache_extent
*node
;
499 struct extent_state
*state
;
502 node
= find_first_cache_extent(&tree
->state
, start
);
507 state
= container_of(node
, struct extent_state
, cache_node
);
508 if (state
->start
!= start
) {
512 state
->private = private;
517 int get_state_private(struct extent_io_tree
*tree
, u64 start
, u64
*private)
519 struct cache_extent
*node
;
520 struct extent_state
*state
;
523 node
= find_first_cache_extent(&tree
->state
, start
);
528 state
= container_of(node
, struct extent_state
, cache_node
);
529 if (state
->start
!= start
) {
533 *private = state
->private;
538 static int free_some_buffers(struct extent_io_tree
*tree
)
541 struct extent_buffer
*eb
;
542 struct list_head
*node
, *next
;
544 if (tree
->cache_size
< cache_soft_max
)
547 list_for_each_safe(node
, next
, &tree
->lru
) {
548 eb
= list_entry(node
, struct extent_buffer
, lru
);
550 free_extent_buffer(eb
);
551 if (tree
->cache_size
< cache_hard_max
)
554 list_move_tail(&eb
->lru
, &tree
->lru
);
556 if (nrscan
++ > 64 && tree
->cache_size
< cache_hard_max
)
562 static struct extent_buffer
*__alloc_extent_buffer(struct extent_io_tree
*tree
,
563 u64 bytenr
, u32 blocksize
)
565 struct extent_buffer
*eb
;
568 eb
= malloc(sizeof(struct extent_buffer
) + blocksize
);
573 memset(eb
, 0, sizeof(struct extent_buffer
) + blocksize
);
581 eb
->dev_bytenr
= (u64
)-1;
582 eb
->cache_node
.start
= bytenr
;
583 eb
->cache_node
.size
= blocksize
;
585 free_some_buffers(tree
);
586 ret
= insert_existing_cache_extent(&tree
->cache
, &eb
->cache_node
);
591 list_add_tail(&eb
->lru
, &tree
->lru
);
592 tree
->cache_size
+= blocksize
;
596 void free_extent_buffer(struct extent_buffer
*eb
)
602 BUG_ON(eb
->refs
< 0);
604 struct extent_io_tree
*tree
= eb
->tree
;
605 BUG_ON(eb
->flags
& EXTENT_DIRTY
);
606 list_del_init(&eb
->lru
);
607 remove_cache_extent(&tree
->cache
, &eb
->cache_node
);
608 BUG_ON(tree
->cache_size
< eb
->len
);
609 tree
->cache_size
-= eb
->len
;
614 struct extent_buffer
*find_extent_buffer(struct extent_io_tree
*tree
,
615 u64 bytenr
, u32 blocksize
)
617 struct extent_buffer
*eb
= NULL
;
618 struct cache_extent
*cache
;
620 cache
= find_cache_extent(&tree
->cache
, bytenr
, blocksize
);
621 if (cache
&& cache
->start
== bytenr
&& cache
->size
== blocksize
) {
622 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
623 list_move_tail(&eb
->lru
, &tree
->lru
);
629 struct extent_buffer
*find_first_extent_buffer(struct extent_io_tree
*tree
,
632 struct extent_buffer
*eb
= NULL
;
633 struct cache_extent
*cache
;
635 cache
= find_first_cache_extent(&tree
->cache
, start
);
637 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
638 list_move_tail(&eb
->lru
, &tree
->lru
);
644 struct extent_buffer
*alloc_extent_buffer(struct extent_io_tree
*tree
,
645 u64 bytenr
, u32 blocksize
)
647 struct extent_buffer
*eb
;
648 struct cache_extent
*cache
;
650 cache
= find_cache_extent(&tree
->cache
, bytenr
, blocksize
);
651 if (cache
&& cache
->start
== bytenr
&& cache
->size
== blocksize
) {
652 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
653 list_move_tail(&eb
->lru
, &tree
->lru
);
657 eb
= container_of(cache
, struct extent_buffer
,
659 free_extent_buffer(eb
);
661 eb
= __alloc_extent_buffer(tree
, bytenr
, blocksize
);
666 int read_extent_from_disk(struct extent_buffer
*eb
)
669 ret
= pread(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
672 if (ret
!= eb
->len
) {
681 int write_extent_to_disk(struct extent_buffer
*eb
)
684 ret
= pwrite(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
687 if (ret
!= eb
->len
) {
696 int set_extent_buffer_uptodate(struct extent_buffer
*eb
)
698 eb
->flags
|= EXTENT_UPTODATE
;
702 int clear_extent_buffer_uptodate(struct extent_io_tree
*tree
,
703 struct extent_buffer
*eb
)
705 eb
->flags
&= ~EXTENT_UPTODATE
;
709 int extent_buffer_uptodate(struct extent_buffer
*eb
)
714 if (eb
->flags
& EXTENT_UPTODATE
)
719 int set_extent_buffer_dirty(struct extent_buffer
*eb
)
721 struct extent_io_tree
*tree
= eb
->tree
;
722 if (!(eb
->flags
& EXTENT_DIRTY
)) {
723 eb
->flags
|= EXTENT_DIRTY
;
724 set_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
725 extent_buffer_get(eb
);
730 int clear_extent_buffer_dirty(struct extent_buffer
*eb
)
732 struct extent_io_tree
*tree
= eb
->tree
;
733 if (eb
->flags
& EXTENT_DIRTY
) {
734 eb
->flags
&= ~EXTENT_DIRTY
;
735 clear_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
736 free_extent_buffer(eb
);
741 int memcmp_extent_buffer(struct extent_buffer
*eb
, const void *ptrv
,
742 unsigned long start
, unsigned long len
)
744 return memcmp(eb
->data
+ start
, ptrv
, len
);
747 void read_extent_buffer(struct extent_buffer
*eb
, void *dst
,
748 unsigned long start
, unsigned long len
)
750 memcpy(dst
, eb
->data
+ start
, len
);
753 void write_extent_buffer(struct extent_buffer
*eb
, const void *src
,
754 unsigned long start
, unsigned long len
)
756 memcpy(eb
->data
+ start
, src
, len
);
759 void copy_extent_buffer(struct extent_buffer
*dst
, struct extent_buffer
*src
,
760 unsigned long dst_offset
, unsigned long src_offset
,
763 memcpy(dst
->data
+ dst_offset
, src
->data
+ src_offset
, len
);
766 void memcpy_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
767 unsigned long src_offset
, unsigned long len
)
769 memcpy(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
772 void memmove_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
773 unsigned long src_offset
, unsigned long len
)
775 memmove(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
778 void memset_extent_buffer(struct extent_buffer
*eb
, char c
,
779 unsigned long start
, unsigned long len
)
781 memset(eb
->data
+ start
, c
, len
);