Btrfsck updates for multi-device filesystems
[btrfs-progs-unstable.git] / extent_io.c
blobb663275f2f96efb4fdcd2c08ff014d56d38ef066
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #define _XOPEN_SOURCE 600
19 #define __USE_XOPEN2K
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include "kerncompat.h"
27 #include "extent_io.h"
28 #include "list.h"
30 u64 cache_max = 1024 * 1024 * 32;
32 void extent_io_tree_init(struct extent_io_tree *tree)
34 cache_tree_init(&tree->state);
35 cache_tree_init(&tree->cache);
36 INIT_LIST_HEAD(&tree->lru);
37 tree->cache_size = 0;
40 static struct extent_state *alloc_extent_state(void)
42 struct extent_state *state;
44 state = malloc(sizeof(*state));
45 if (!state)
46 return NULL;
47 state->refs = 1;
48 state->state = 0;
49 state->private = 0;
50 return state;
53 static void free_extent_state(struct extent_state *state)
55 state->refs--;
56 BUG_ON(state->refs < 0);
57 if (state->refs == 0)
58 free(state);
61 void extent_io_tree_cleanup(struct extent_io_tree *tree)
63 struct extent_state *es;
64 struct extent_buffer *eb;
65 struct cache_extent *cache;
67 while(!list_empty(&tree->lru)) {
68 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
69 if (eb->refs != 1) {
70 fprintf(stderr, "extent buffer leak: "
71 "start %Lu len %u\n", eb->start, eb->len);
72 eb->refs = 1;
74 free_extent_buffer(eb);
76 while (1) {
77 cache = find_first_cache_extent(&tree->state, 0);
78 if (!cache)
79 break;
80 es = container_of(cache, struct extent_state, cache_node);
81 remove_cache_extent(&tree->state, &es->cache_node);
82 free_extent_state(es);
86 static inline void update_extent_state(struct extent_state *state)
88 state->cache_node.start = state->start;
89 state->cache_node.size = state->end + 1 - state->start;
93 * Utility function to look for merge candidates inside a given range.
94 * Any extents with matching state are merged together into a single
95 * extent in the tree. Extents with EXTENT_IO in their state field are
96 * not merged
98 static int merge_state(struct extent_io_tree *tree,
99 struct extent_state *state)
101 struct extent_state *other;
102 struct cache_extent *other_node;
104 if (state->state & EXTENT_IOBITS)
105 return 0;
107 other_node = prev_cache_extent(&state->cache_node);
108 if (other_node) {
109 other = container_of(other_node, struct extent_state,
110 cache_node);
111 if (other->end == state->start - 1 &&
112 other->state == state->state) {
113 state->start = other->start;
114 update_extent_state(state);
115 remove_cache_extent(&tree->state, &other->cache_node);
116 free_extent_state(other);
119 other_node = next_cache_extent(&state->cache_node);
120 if (other_node) {
121 other = container_of(other_node, struct extent_state,
122 cache_node);
123 if (other->start == state->end + 1 &&
124 other->state == state->state) {
125 other->start = state->start;
126 update_extent_state(other);
127 remove_cache_extent(&tree->state, &state->cache_node);
128 free_extent_state(state);
131 return 0;
135 * insert an extent_state struct into the tree. 'bits' are set on the
136 * struct before it is inserted.
138 static int insert_state(struct extent_io_tree *tree,
139 struct extent_state *state, u64 start, u64 end,
140 int bits)
142 int ret;
144 BUG_ON(end < start);
145 state->state |= bits;
146 state->start = start;
147 state->end = end;
148 update_extent_state(state);
149 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
150 BUG_ON(ret);
151 merge_state(tree, state);
152 return 0;
156 * split a given extent state struct in two, inserting the preallocated
157 * struct 'prealloc' as the newly created second half. 'split' indicates an
158 * offset inside 'orig' where it should be split.
160 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
161 struct extent_state *prealloc, u64 split)
163 int ret;
164 prealloc->start = orig->start;
165 prealloc->end = split - 1;
166 prealloc->state = orig->state;
167 update_extent_state(prealloc);
168 orig->start = split;
169 update_extent_state(orig);
170 ret = insert_existing_cache_extent(&tree->state,
171 &prealloc->cache_node);
172 BUG_ON(ret);
173 return 0;
177 * clear some bits on a range in the tree.
179 static int clear_state_bit(struct extent_io_tree *tree,
180 struct extent_state *state, int bits)
182 int ret = state->state & bits;
184 state->state &= ~bits;
185 if (state->state == 0) {
186 remove_cache_extent(&tree->state, &state->cache_node);
187 free_extent_state(state);
188 } else {
189 merge_state(tree, state);
191 return ret;
195 * set some bits on a range in the tree.
197 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
198 u64 end, int bits, gfp_t mask)
200 struct extent_state *state;
201 struct extent_state *prealloc = NULL;
202 struct cache_extent *node;
203 int err;
204 int set = 0;
206 again:
207 prealloc = alloc_extent_state();
208 if (!prealloc)
209 return -ENOMEM;
212 * this search will find the extents that end after
213 * our range starts
215 node = find_first_cache_extent(&tree->state, start);
216 if (!node)
217 goto out;
218 state = container_of(node, struct extent_state, cache_node);
219 if (state->start > end)
220 goto out;
223 * | ---- desired range ---- |
224 * | state | or
225 * | ------------- state -------------- |
227 * We need to split the extent we found, and may flip
228 * bits on second half.
230 * If the extent we found extends past our range, we
231 * just split and search again. It'll get split again
232 * the next time though.
234 * If the extent we found is inside our range, we clear
235 * the desired bit on it.
237 if (state->start < start) {
238 err = split_state(tree, state, prealloc, start);
239 BUG_ON(err == -EEXIST);
240 prealloc = NULL;
241 if (err)
242 goto out;
243 if (state->end <= end) {
244 start = state->end + 1;
245 set |= clear_state_bit(tree, state, bits);
246 } else {
247 start = state->start;
249 goto search_again;
252 * | ---- desired range ---- |
253 * | state |
254 * We need to split the extent, and clear the bit
255 * on the first half
257 if (state->start <= end && state->end > end) {
258 err = split_state(tree, state, prealloc, end + 1);
259 BUG_ON(err == -EEXIST);
261 set |= clear_state_bit(tree, prealloc, bits);
262 prealloc = NULL;
263 goto out;
266 start = state->end + 1;
267 set |= clear_state_bit(tree, state, bits);
268 goto search_again;
269 out:
270 if (prealloc)
271 free_extent_state(prealloc);
272 return set;
274 search_again:
275 if (start > end)
276 goto out;
277 goto again;
281 * set some bits on a range in the tree.
283 int set_extent_bits(struct extent_io_tree *tree, u64 start,
284 u64 end, int bits, gfp_t mask)
286 struct extent_state *state;
287 struct extent_state *prealloc = NULL;
288 struct cache_extent *node;
289 int err = 0;
290 int set;
291 u64 last_start;
292 u64 last_end;
293 again:
294 prealloc = alloc_extent_state();
295 if (!prealloc)
296 return -ENOMEM;
299 * this search will find the extents that end after
300 * our range starts
302 node = find_first_cache_extent(&tree->state, start);
303 if (!node) {
304 err = insert_state(tree, prealloc, start, end, bits);
305 BUG_ON(err == -EEXIST);
306 prealloc = NULL;
307 goto out;
310 state = container_of(node, struct extent_state, cache_node);
311 last_start = state->start;
312 last_end = state->end;
315 * | ---- desired range ---- |
316 * | state |
318 * Just lock what we found and keep going
320 if (state->start == start && state->end <= end) {
321 set = state->state & bits;
322 state->state |= bits;
323 start = state->end + 1;
324 merge_state(tree, state);
325 goto search_again;
328 * | ---- desired range ---- |
329 * | state |
330 * or
331 * | ------------- state -------------- |
333 * We need to split the extent we found, and may flip bits on
334 * second half.
336 * If the extent we found extends past our
337 * range, we just split and search again. It'll get split
338 * again the next time though.
340 * If the extent we found is inside our range, we set the
341 * desired bit on it.
343 if (state->start < start) {
344 set = state->state & bits;
345 err = split_state(tree, state, prealloc, start);
346 BUG_ON(err == -EEXIST);
347 prealloc = NULL;
348 if (err)
349 goto out;
350 if (state->end <= end) {
351 state->state |= bits;
352 start = state->end + 1;
353 merge_state(tree, state);
354 } else {
355 start = state->start;
357 goto search_again;
360 * | ---- desired range ---- |
361 * | state | or | state |
363 * There's a hole, we need to insert something in it and
364 * ignore the extent we found.
366 if (state->start > start) {
367 u64 this_end;
368 if (end < last_start)
369 this_end = end;
370 else
371 this_end = last_start -1;
372 err = insert_state(tree, prealloc, start, this_end,
373 bits);
374 BUG_ON(err == -EEXIST);
375 prealloc = NULL;
376 if (err)
377 goto out;
378 start = this_end + 1;
379 goto search_again;
382 * | ---- desired range ---- |
383 * | ---------- state ---------- |
384 * We need to split the extent, and set the bit
385 * on the first half
387 set = state->state & bits;
388 err = split_state(tree, state, prealloc, end + 1);
389 BUG_ON(err == -EEXIST);
391 state->state |= bits;
392 merge_state(tree, prealloc);
393 prealloc = NULL;
394 out:
395 if (prealloc)
396 free_extent_state(prealloc);
397 return err;
398 search_again:
399 if (start > end)
400 goto out;
401 goto again;
404 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
405 gfp_t mask)
407 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
410 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
411 gfp_t mask)
413 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
416 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
417 u64 *start_ret, u64 *end_ret, int bits)
419 struct cache_extent *node;
420 struct extent_state *state;
421 int ret = 1;
424 * this search will find all the extents that end after
425 * our range starts.
427 node = find_first_cache_extent(&tree->state, start);
428 if (!node)
429 goto out;
431 while(1) {
432 state = container_of(node, struct extent_state, cache_node);
433 if (state->end >= start && (state->state & bits)) {
434 *start_ret = state->start;
435 *end_ret = state->end;
436 ret = 0;
437 break;
439 node = next_cache_extent(node);
440 if (!node)
441 break;
443 out:
444 return ret;
447 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
448 int bits, int filled)
450 struct extent_state *state = NULL;
451 struct cache_extent *node;
452 int bitset = 0;
454 node = find_first_cache_extent(&tree->state, start);
455 while (node && start <= end) {
456 state = container_of(node, struct extent_state, cache_node);
458 if (filled && state->start > start) {
459 bitset = 0;
460 break;
462 if (state->start > end)
463 break;
464 if (state->state & bits) {
465 bitset = 1;
466 if (!filled)
467 break;
468 } else if (filled) {
469 bitset = 0;
470 break;
472 start = state->end + 1;
473 if (start > end)
474 break;
475 node = next_cache_extent(node);
476 if (!node) {
477 if (filled)
478 bitset = 0;
479 break;
482 return bitset;
485 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
487 struct cache_extent *node;
488 struct extent_state *state;
489 int ret = 0;
491 node = find_first_cache_extent(&tree->state, start);
492 if (!node) {
493 ret = -ENOENT;
494 goto out;
496 state = container_of(node, struct extent_state, cache_node);
497 if (state->start != start) {
498 ret = -ENOENT;
499 goto out;
501 state->private = private;
502 out:
503 return ret;
506 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
508 struct cache_extent *node;
509 struct extent_state *state;
510 int ret = 0;
512 node = find_first_cache_extent(&tree->state, start);
513 if (!node) {
514 ret = -ENOENT;
515 goto out;
517 state = container_of(node, struct extent_state, cache_node);
518 if (state->start != start) {
519 ret = -ENOENT;
520 goto out;
522 *private = state->private;
523 out:
524 return ret;
527 static int free_some_buffers(struct extent_io_tree *tree)
529 u32 nrscan = 0;
530 struct extent_buffer *eb;
531 struct list_head *node, *next;
533 if (tree->cache_size < cache_max)
534 return 0;
535 list_for_each_safe(node, next, &tree->lru) {
536 eb = list_entry(node, struct extent_buffer, lru);
537 if (eb->refs == 1) {
538 free_extent_buffer(eb);
539 if (tree->cache_size < cache_max)
540 break;
542 if (nrscan++ > 64)
543 break;
545 return 0;
548 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
549 u64 bytenr, u32 blocksize)
551 struct extent_buffer *eb;
552 int ret;
554 eb = malloc(sizeof(struct extent_buffer) + blocksize);
555 if (!eb) {
556 BUG();
557 return NULL;
560 eb->start = bytenr;
561 eb->len = blocksize;
562 eb->refs = 2;
563 eb->flags = 0;
564 eb->tree = tree;
565 eb->fd = -1;
566 eb->dev_bytenr = (u64)-1;
567 eb->cache_node.start = bytenr;
568 eb->cache_node.size = blocksize;
570 free_some_buffers(tree);
571 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
572 if (ret) {
573 free(eb);
574 return NULL;
576 list_add_tail(&eb->lru, &tree->lru);
577 tree->cache_size += blocksize;
578 return eb;
581 void free_extent_buffer(struct extent_buffer *eb)
583 if (!eb)
584 return;
586 eb->refs--;
587 BUG_ON(eb->refs < 0);
588 if (eb->refs == 0) {
589 struct extent_io_tree *tree = eb->tree;
590 BUG_ON(eb->flags & EXTENT_DIRTY);
591 list_del_init(&eb->lru);
592 remove_cache_extent(&tree->cache, &eb->cache_node);
593 BUG_ON(tree->cache_size < eb->len);
594 tree->cache_size -= eb->len;
595 free(eb);
599 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
600 u64 bytenr, u32 blocksize)
602 struct extent_buffer *eb = NULL;
603 struct cache_extent *cache;
605 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
606 if (cache && cache->start == bytenr && cache->size == blocksize) {
607 eb = container_of(cache, struct extent_buffer, cache_node);
608 list_move_tail(&eb->lru, &tree->lru);
609 eb->refs++;
611 return eb;
614 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
615 u64 start)
617 struct extent_buffer *eb = NULL;
618 struct cache_extent *cache;
620 cache = find_first_cache_extent(&tree->cache, start);
621 if (cache) {
622 eb = container_of(cache, struct extent_buffer, cache_node);
623 list_move_tail(&eb->lru, &tree->lru);
624 eb->refs++;
626 return eb;
629 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
630 u64 bytenr, u32 blocksize)
632 struct extent_buffer *eb;
633 struct cache_extent *cache;
635 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
636 if (cache && cache->start == bytenr && cache->size == blocksize) {
637 eb = container_of(cache, struct extent_buffer, cache_node);
638 list_move_tail(&eb->lru, &tree->lru);
639 eb->refs++;
640 } else {
641 if (cache) {
642 eb = container_of(cache, struct extent_buffer,
643 cache_node);
644 BUG_ON(eb->refs != 1);
645 free_extent_buffer(eb);
647 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
649 return eb;
652 int read_extent_from_disk(struct extent_buffer *eb)
654 int ret;
655 ret = pread(eb->fd, eb->data, eb->len, eb->dev_bytenr);
656 if (ret < 0)
657 goto out;
658 if (ret != eb->len) {
659 ret = -EIO;
660 goto out;
662 ret = 0;
663 out:
664 return ret;
667 int write_extent_to_disk(struct extent_buffer *eb)
669 int ret;
670 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
671 if (ret < 0)
672 goto out;
673 if (ret != eb->len) {
674 ret = -EIO;
675 goto out;
677 ret = 0;
678 out:
679 return ret;
682 int set_extent_buffer_uptodate(struct extent_buffer *eb)
684 eb->flags |= EXTENT_UPTODATE;
685 return 0;
688 int extent_buffer_uptodate(struct extent_buffer *eb)
690 if (eb->flags & EXTENT_UPTODATE)
691 return 1;
692 return 0;
695 int set_extent_buffer_dirty(struct extent_buffer *eb)
697 struct extent_io_tree *tree = eb->tree;
698 if (!(eb->flags & EXTENT_DIRTY)) {
699 eb->flags |= EXTENT_DIRTY;
700 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
701 extent_buffer_get(eb);
703 return 0;
706 int clear_extent_buffer_dirty(struct extent_buffer *eb)
708 struct extent_io_tree *tree = eb->tree;
709 if (eb->flags & EXTENT_DIRTY) {
710 eb->flags &= ~EXTENT_DIRTY;
711 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
712 free_extent_buffer(eb);
714 return 0;
717 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
718 unsigned long start, unsigned long len)
720 return memcmp(eb->data + start, ptrv, len);
723 void read_extent_buffer(struct extent_buffer *eb, void *dst,
724 unsigned long start, unsigned long len)
726 memcpy(dst, eb->data + start, len);
729 void write_extent_buffer(struct extent_buffer *eb, const void *src,
730 unsigned long start, unsigned long len)
732 memcpy(eb->data + start, src, len);
735 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
736 unsigned long dst_offset, unsigned long src_offset,
737 unsigned long len)
739 memcpy(dst->data + dst_offset, src->data + src_offset, len);
742 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
743 unsigned long src_offset, unsigned long len)
745 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
748 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
749 unsigned long src_offset, unsigned long len)
751 memmove(dst->data + dst_offset, dst->data + src_offset, len);
754 void memset_extent_buffer(struct extent_buffer *eb, char c,
755 unsigned long start, unsigned long len)
757 memset(eb->data + start, c, len);