2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #define _XOPEN_SOURCE 600
22 #include <sys/types.h>
26 #include "kerncompat.h"
27 #include "extent_io.h"
30 u64 cache_max
= 1024 * 1024 * 32;
32 void extent_io_tree_init(struct extent_io_tree
*tree
)
34 cache_tree_init(&tree
->state
);
35 cache_tree_init(&tree
->cache
);
36 INIT_LIST_HEAD(&tree
->lru
);
40 static struct extent_state
*alloc_extent_state(void)
42 struct extent_state
*state
;
44 state
= malloc(sizeof(*state
));
53 static void free_extent_state(struct extent_state
*state
)
56 BUG_ON(state
->refs
< 0);
61 void extent_io_tree_cleanup(struct extent_io_tree
*tree
)
63 struct extent_state
*es
;
64 struct extent_buffer
*eb
;
65 struct cache_extent
*cache
;
67 while(!list_empty(&tree
->lru
)) {
68 eb
= list_entry(tree
->lru
.next
, struct extent_buffer
, lru
);
70 fprintf(stderr
, "extent buffer leak: "
71 "start %llu len %u\n",
72 (unsigned long long)eb
->start
, eb
->len
);
75 free_extent_buffer(eb
);
78 cache
= find_first_cache_extent(&tree
->state
, 0);
81 es
= container_of(cache
, struct extent_state
, cache_node
);
82 remove_cache_extent(&tree
->state
, &es
->cache_node
);
83 free_extent_state(es
);
87 static inline void update_extent_state(struct extent_state
*state
)
89 state
->cache_node
.start
= state
->start
;
90 state
->cache_node
.size
= state
->end
+ 1 - state
->start
;
94 * Utility function to look for merge candidates inside a given range.
95 * Any extents with matching state are merged together into a single
96 * extent in the tree. Extents with EXTENT_IO in their state field are
99 static int merge_state(struct extent_io_tree
*tree
,
100 struct extent_state
*state
)
102 struct extent_state
*other
;
103 struct cache_extent
*other_node
;
105 if (state
->state
& EXTENT_IOBITS
)
108 other_node
= prev_cache_extent(&state
->cache_node
);
110 other
= container_of(other_node
, struct extent_state
,
112 if (other
->end
== state
->start
- 1 &&
113 other
->state
== state
->state
) {
114 state
->start
= other
->start
;
115 update_extent_state(state
);
116 remove_cache_extent(&tree
->state
, &other
->cache_node
);
117 free_extent_state(other
);
120 other_node
= next_cache_extent(&state
->cache_node
);
122 other
= container_of(other_node
, struct extent_state
,
124 if (other
->start
== state
->end
+ 1 &&
125 other
->state
== state
->state
) {
126 other
->start
= state
->start
;
127 update_extent_state(other
);
128 remove_cache_extent(&tree
->state
, &state
->cache_node
);
129 free_extent_state(state
);
136 * insert an extent_state struct into the tree. 'bits' are set on the
137 * struct before it is inserted.
139 static int insert_state(struct extent_io_tree
*tree
,
140 struct extent_state
*state
, u64 start
, u64 end
,
146 state
->state
|= bits
;
147 state
->start
= start
;
149 update_extent_state(state
);
150 ret
= insert_existing_cache_extent(&tree
->state
, &state
->cache_node
);
152 merge_state(tree
, state
);
157 * split a given extent state struct in two, inserting the preallocated
158 * struct 'prealloc' as the newly created second half. 'split' indicates an
159 * offset inside 'orig' where it should be split.
161 static int split_state(struct extent_io_tree
*tree
, struct extent_state
*orig
,
162 struct extent_state
*prealloc
, u64 split
)
165 prealloc
->start
= orig
->start
;
166 prealloc
->end
= split
- 1;
167 prealloc
->state
= orig
->state
;
168 update_extent_state(prealloc
);
170 update_extent_state(orig
);
171 ret
= insert_existing_cache_extent(&tree
->state
,
172 &prealloc
->cache_node
);
178 * clear some bits on a range in the tree.
180 static int clear_state_bit(struct extent_io_tree
*tree
,
181 struct extent_state
*state
, int bits
)
183 int ret
= state
->state
& bits
;
185 state
->state
&= ~bits
;
186 if (state
->state
== 0) {
187 remove_cache_extent(&tree
->state
, &state
->cache_node
);
188 free_extent_state(state
);
190 merge_state(tree
, state
);
196 * set some bits on a range in the tree.
198 int clear_extent_bits(struct extent_io_tree
*tree
, u64 start
,
199 u64 end
, int bits
, gfp_t mask
)
201 struct extent_state
*state
;
202 struct extent_state
*prealloc
= NULL
;
203 struct cache_extent
*node
;
208 prealloc
= alloc_extent_state();
213 * this search will find the extents that end after
216 node
= find_first_cache_extent(&tree
->state
, start
);
219 state
= container_of(node
, struct extent_state
, cache_node
);
220 if (state
->start
> end
)
224 * | ---- desired range ---- |
226 * | ------------- state -------------- |
228 * We need to split the extent we found, and may flip
229 * bits on second half.
231 * If the extent we found extends past our range, we
232 * just split and search again. It'll get split again
233 * the next time though.
235 * If the extent we found is inside our range, we clear
236 * the desired bit on it.
238 if (state
->start
< start
) {
239 err
= split_state(tree
, state
, prealloc
, start
);
240 BUG_ON(err
== -EEXIST
);
244 if (state
->end
<= end
) {
245 start
= state
->end
+ 1;
246 set
|= clear_state_bit(tree
, state
, bits
);
248 start
= state
->start
;
253 * | ---- desired range ---- |
255 * We need to split the extent, and clear the bit
258 if (state
->start
<= end
&& state
->end
> end
) {
259 err
= split_state(tree
, state
, prealloc
, end
+ 1);
260 BUG_ON(err
== -EEXIST
);
262 set
|= clear_state_bit(tree
, prealloc
, bits
);
267 start
= state
->end
+ 1;
268 set
|= clear_state_bit(tree
, state
, bits
);
272 free_extent_state(prealloc
);
282 * set some bits on a range in the tree.
284 int set_extent_bits(struct extent_io_tree
*tree
, u64 start
,
285 u64 end
, int bits
, gfp_t mask
)
287 struct extent_state
*state
;
288 struct extent_state
*prealloc
= NULL
;
289 struct cache_extent
*node
;
295 prealloc
= alloc_extent_state();
300 * this search will find the extents that end after
303 node
= find_first_cache_extent(&tree
->state
, start
);
305 err
= insert_state(tree
, prealloc
, start
, end
, bits
);
306 BUG_ON(err
== -EEXIST
);
311 state
= container_of(node
, struct extent_state
, cache_node
);
312 last_start
= state
->start
;
313 last_end
= state
->end
;
316 * | ---- desired range ---- |
319 * Just lock what we found and keep going
321 if (state
->start
== start
&& state
->end
<= end
) {
322 set
= state
->state
& bits
;
323 state
->state
|= bits
;
324 start
= state
->end
+ 1;
325 merge_state(tree
, state
);
329 * | ---- desired range ---- |
332 * | ------------- state -------------- |
334 * We need to split the extent we found, and may flip bits on
337 * If the extent we found extends past our
338 * range, we just split and search again. It'll get split
339 * again the next time though.
341 * If the extent we found is inside our range, we set the
344 if (state
->start
< start
) {
345 set
= state
->state
& bits
;
346 err
= split_state(tree
, state
, prealloc
, start
);
347 BUG_ON(err
== -EEXIST
);
351 if (state
->end
<= end
) {
352 state
->state
|= bits
;
353 start
= state
->end
+ 1;
354 merge_state(tree
, state
);
356 start
= state
->start
;
361 * | ---- desired range ---- |
362 * | state | or | state |
364 * There's a hole, we need to insert something in it and
365 * ignore the extent we found.
367 if (state
->start
> start
) {
369 if (end
< last_start
)
372 this_end
= last_start
-1;
373 err
= insert_state(tree
, prealloc
, start
, this_end
,
375 BUG_ON(err
== -EEXIST
);
379 start
= this_end
+ 1;
383 * | ---- desired range ---- |
384 * | ---------- state ---------- |
385 * We need to split the extent, and set the bit
388 set
= state
->state
& bits
;
389 err
= split_state(tree
, state
, prealloc
, end
+ 1);
390 BUG_ON(err
== -EEXIST
);
392 state
->state
|= bits
;
393 merge_state(tree
, prealloc
);
397 free_extent_state(prealloc
);
405 int set_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
408 return set_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
411 int clear_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
414 return clear_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
417 int find_first_extent_bit(struct extent_io_tree
*tree
, u64 start
,
418 u64
*start_ret
, u64
*end_ret
, int bits
)
420 struct cache_extent
*node
;
421 struct extent_state
*state
;
425 * this search will find all the extents that end after
428 node
= find_first_cache_extent(&tree
->state
, start
);
433 state
= container_of(node
, struct extent_state
, cache_node
);
434 if (state
->end
>= start
&& (state
->state
& bits
)) {
435 *start_ret
= state
->start
;
436 *end_ret
= state
->end
;
440 node
= next_cache_extent(node
);
448 int test_range_bit(struct extent_io_tree
*tree
, u64 start
, u64 end
,
449 int bits
, int filled
)
451 struct extent_state
*state
= NULL
;
452 struct cache_extent
*node
;
455 node
= find_first_cache_extent(&tree
->state
, start
);
456 while (node
&& start
<= end
) {
457 state
= container_of(node
, struct extent_state
, cache_node
);
459 if (filled
&& state
->start
> start
) {
463 if (state
->start
> end
)
465 if (state
->state
& bits
) {
473 start
= state
->end
+ 1;
476 node
= next_cache_extent(node
);
486 int set_state_private(struct extent_io_tree
*tree
, u64 start
, u64
private)
488 struct cache_extent
*node
;
489 struct extent_state
*state
;
492 node
= find_first_cache_extent(&tree
->state
, start
);
497 state
= container_of(node
, struct extent_state
, cache_node
);
498 if (state
->start
!= start
) {
502 state
->private = private;
507 int get_state_private(struct extent_io_tree
*tree
, u64 start
, u64
*private)
509 struct cache_extent
*node
;
510 struct extent_state
*state
;
513 node
= find_first_cache_extent(&tree
->state
, start
);
518 state
= container_of(node
, struct extent_state
, cache_node
);
519 if (state
->start
!= start
) {
523 *private = state
->private;
528 static int free_some_buffers(struct extent_io_tree
*tree
)
531 struct extent_buffer
*eb
;
532 struct list_head
*node
, *next
;
534 if (tree
->cache_size
< cache_max
)
536 list_for_each_safe(node
, next
, &tree
->lru
) {
537 eb
= list_entry(node
, struct extent_buffer
, lru
);
539 free_extent_buffer(eb
);
540 if (tree
->cache_size
< cache_max
)
549 static struct extent_buffer
*__alloc_extent_buffer(struct extent_io_tree
*tree
,
550 u64 bytenr
, u32 blocksize
)
552 struct extent_buffer
*eb
;
555 eb
= malloc(sizeof(struct extent_buffer
) + blocksize
);
567 eb
->dev_bytenr
= (u64
)-1;
568 eb
->cache_node
.start
= bytenr
;
569 eb
->cache_node
.size
= blocksize
;
571 free_some_buffers(tree
);
572 ret
= insert_existing_cache_extent(&tree
->cache
, &eb
->cache_node
);
577 list_add_tail(&eb
->lru
, &tree
->lru
);
578 tree
->cache_size
+= blocksize
;
582 void free_extent_buffer(struct extent_buffer
*eb
)
588 BUG_ON(eb
->refs
< 0);
590 struct extent_io_tree
*tree
= eb
->tree
;
591 BUG_ON(eb
->flags
& EXTENT_DIRTY
);
592 list_del_init(&eb
->lru
);
593 remove_cache_extent(&tree
->cache
, &eb
->cache_node
);
594 BUG_ON(tree
->cache_size
< eb
->len
);
595 tree
->cache_size
-= eb
->len
;
600 struct extent_buffer
*find_extent_buffer(struct extent_io_tree
*tree
,
601 u64 bytenr
, u32 blocksize
)
603 struct extent_buffer
*eb
= NULL
;
604 struct cache_extent
*cache
;
606 cache
= find_cache_extent(&tree
->cache
, bytenr
, blocksize
);
607 if (cache
&& cache
->start
== bytenr
&& cache
->size
== blocksize
) {
608 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
609 list_move_tail(&eb
->lru
, &tree
->lru
);
615 struct extent_buffer
*find_first_extent_buffer(struct extent_io_tree
*tree
,
618 struct extent_buffer
*eb
= NULL
;
619 struct cache_extent
*cache
;
621 cache
= find_first_cache_extent(&tree
->cache
, start
);
623 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
624 list_move_tail(&eb
->lru
, &tree
->lru
);
630 struct extent_buffer
*alloc_extent_buffer(struct extent_io_tree
*tree
,
631 u64 bytenr
, u32 blocksize
)
633 struct extent_buffer
*eb
;
634 struct cache_extent
*cache
;
636 cache
= find_cache_extent(&tree
->cache
, bytenr
, blocksize
);
637 if (cache
&& cache
->start
== bytenr
&& cache
->size
== blocksize
) {
638 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
639 list_move_tail(&eb
->lru
, &tree
->lru
);
643 eb
= container_of(cache
, struct extent_buffer
,
645 BUG_ON(eb
->refs
!= 1);
646 free_extent_buffer(eb
);
648 eb
= __alloc_extent_buffer(tree
, bytenr
, blocksize
);
653 int read_extent_from_disk(struct extent_buffer
*eb
)
656 ret
= pread(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
659 if (ret
!= eb
->len
) {
668 int write_extent_to_disk(struct extent_buffer
*eb
)
671 ret
= pwrite(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
674 if (ret
!= eb
->len
) {
683 int set_extent_buffer_uptodate(struct extent_buffer
*eb
)
685 eb
->flags
|= EXTENT_UPTODATE
;
689 int extent_buffer_uptodate(struct extent_buffer
*eb
)
691 if (eb
->flags
& EXTENT_UPTODATE
)
696 int set_extent_buffer_dirty(struct extent_buffer
*eb
)
698 struct extent_io_tree
*tree
= eb
->tree
;
699 if (!(eb
->flags
& EXTENT_DIRTY
)) {
700 eb
->flags
|= EXTENT_DIRTY
;
701 set_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
702 extent_buffer_get(eb
);
707 int clear_extent_buffer_dirty(struct extent_buffer
*eb
)
709 struct extent_io_tree
*tree
= eb
->tree
;
710 if (eb
->flags
& EXTENT_DIRTY
) {
711 eb
->flags
&= ~EXTENT_DIRTY
;
712 clear_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
713 free_extent_buffer(eb
);
718 int memcmp_extent_buffer(struct extent_buffer
*eb
, const void *ptrv
,
719 unsigned long start
, unsigned long len
)
721 return memcmp(eb
->data
+ start
, ptrv
, len
);
724 void read_extent_buffer(struct extent_buffer
*eb
, void *dst
,
725 unsigned long start
, unsigned long len
)
727 memcpy(dst
, eb
->data
+ start
, len
);
730 void write_extent_buffer(struct extent_buffer
*eb
, const void *src
,
731 unsigned long start
, unsigned long len
)
733 memcpy(eb
->data
+ start
, src
, len
);
736 void copy_extent_buffer(struct extent_buffer
*dst
, struct extent_buffer
*src
,
737 unsigned long dst_offset
, unsigned long src_offset
,
740 memcpy(dst
->data
+ dst_offset
, src
->data
+ src_offset
, len
);
743 void memcpy_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
744 unsigned long src_offset
, unsigned long len
)
746 memcpy(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
749 void memmove_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
750 unsigned long src_offset
, unsigned long len
)
752 memmove(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
755 void memset_extent_buffer(struct extent_buffer
*eb
, char c
,
756 unsigned long start
, unsigned long len
)
758 memset(eb
->data
+ start
, c
, len
);