btrfs-progs: check: Output verbose error when fsck found a bug in any tree
[btrfs-progs-unstable/devel.git] / extent_io.c
blob915c6ed87bb088e71625757ddf35a9162210ef2d
2 /*
3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include "kerncompat.h"
26 #include "extent_io.h"
27 #include "list.h"
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "internal.h"
32 void extent_io_tree_init(struct extent_io_tree *tree)
34 cache_tree_init(&tree->state);
35 cache_tree_init(&tree->cache);
36 INIT_LIST_HEAD(&tree->lru);
37 tree->cache_size = 0;
40 static struct extent_state *alloc_extent_state(void)
42 struct extent_state *state;
44 state = malloc(sizeof(*state));
45 if (!state)
46 return NULL;
47 state->cache_node.objectid = 0;
48 state->refs = 1;
49 state->state = 0;
50 state->xprivate = 0;
51 return state;
54 static void btrfs_free_extent_state(struct extent_state *state)
56 state->refs--;
57 BUG_ON(state->refs < 0);
58 if (state->refs == 0)
59 free(state);
62 static void free_extent_state_func(struct cache_extent *cache)
64 struct extent_state *es;
66 es = container_of(cache, struct extent_state, cache_node);
67 btrfs_free_extent_state(es);
70 void extent_io_tree_cleanup(struct extent_io_tree *tree)
72 struct extent_buffer *eb;
74 while(!list_empty(&tree->lru)) {
75 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
76 fprintf(stderr, "extent buffer leak: "
77 "start %llu len %u\n",
78 (unsigned long long)eb->start, eb->len);
79 free_extent_buffer(eb);
82 cache_tree_free_extents(&tree->state, free_extent_state_func);
85 static inline void update_extent_state(struct extent_state *state)
87 state->cache_node.start = state->start;
88 state->cache_node.size = state->end + 1 - state->start;
92 * Utility function to look for merge candidates inside a given range.
93 * Any extents with matching state are merged together into a single
94 * extent in the tree. Extents with EXTENT_IO in their state field are
95 * not merged
97 static int merge_state(struct extent_io_tree *tree,
98 struct extent_state *state)
100 struct extent_state *other;
101 struct cache_extent *other_node;
103 if (state->state & EXTENT_IOBITS)
104 return 0;
106 other_node = prev_cache_extent(&state->cache_node);
107 if (other_node) {
108 other = container_of(other_node, struct extent_state,
109 cache_node);
110 if (other->end == state->start - 1 &&
111 other->state == state->state) {
112 state->start = other->start;
113 update_extent_state(state);
114 remove_cache_extent(&tree->state, &other->cache_node);
115 btrfs_free_extent_state(other);
118 other_node = next_cache_extent(&state->cache_node);
119 if (other_node) {
120 other = container_of(other_node, struct extent_state,
121 cache_node);
122 if (other->start == state->end + 1 &&
123 other->state == state->state) {
124 other->start = state->start;
125 update_extent_state(other);
126 remove_cache_extent(&tree->state, &state->cache_node);
127 btrfs_free_extent_state(state);
130 return 0;
134 * insert an extent_state struct into the tree. 'bits' are set on the
135 * struct before it is inserted.
137 static int insert_state(struct extent_io_tree *tree,
138 struct extent_state *state, u64 start, u64 end,
139 int bits)
141 int ret;
143 BUG_ON(end < start);
144 state->state |= bits;
145 state->start = start;
146 state->end = end;
147 update_extent_state(state);
148 ret = insert_cache_extent(&tree->state, &state->cache_node);
149 BUG_ON(ret);
150 merge_state(tree, state);
151 return 0;
155 * split a given extent state struct in two, inserting the preallocated
156 * struct 'prealloc' as the newly created second half. 'split' indicates an
157 * offset inside 'orig' where it should be split.
159 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
160 struct extent_state *prealloc, u64 split)
162 int ret;
163 prealloc->start = orig->start;
164 prealloc->end = split - 1;
165 prealloc->state = orig->state;
166 update_extent_state(prealloc);
167 orig->start = split;
168 update_extent_state(orig);
169 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
170 BUG_ON(ret);
171 return 0;
175 * clear some bits on a range in the tree.
177 static int clear_state_bit(struct extent_io_tree *tree,
178 struct extent_state *state, int bits)
180 int ret = state->state & bits;
182 state->state &= ~bits;
183 if (state->state == 0) {
184 remove_cache_extent(&tree->state, &state->cache_node);
185 btrfs_free_extent_state(state);
186 } else {
187 merge_state(tree, state);
189 return ret;
193 * clear some bits on a range in the tree.
195 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
197 struct extent_state *state;
198 struct extent_state *prealloc = NULL;
199 struct cache_extent *node;
200 u64 last_end;
201 int err;
202 int set = 0;
204 again:
205 if (!prealloc) {
206 prealloc = alloc_extent_state();
207 if (!prealloc)
208 return -ENOMEM;
212 * this search will find the extents that end after
213 * our range starts
215 node = search_cache_extent(&tree->state, start);
216 if (!node)
217 goto out;
218 state = container_of(node, struct extent_state, cache_node);
219 if (state->start > end)
220 goto out;
221 last_end = state->end;
224 * | ---- desired range ---- |
225 * | state | or
226 * | ------------- state -------------- |
228 * We need to split the extent we found, and may flip
229 * bits on second half.
231 * If the extent we found extends past our range, we
232 * just split and search again. It'll get split again
233 * the next time though.
235 * If the extent we found is inside our range, we clear
236 * the desired bit on it.
238 if (state->start < start) {
239 err = split_state(tree, state, prealloc, start);
240 BUG_ON(err == -EEXIST);
241 prealloc = NULL;
242 if (err)
243 goto out;
244 if (state->end <= end) {
245 set |= clear_state_bit(tree, state, bits);
246 if (last_end == (u64)-1)
247 goto out;
248 start = last_end + 1;
249 } else {
250 start = state->start;
252 goto search_again;
255 * | ---- desired range ---- |
256 * | state |
257 * We need to split the extent, and clear the bit
258 * on the first half
260 if (state->start <= end && state->end > end) {
261 err = split_state(tree, state, prealloc, end + 1);
262 BUG_ON(err == -EEXIST);
264 set |= clear_state_bit(tree, prealloc, bits);
265 prealloc = NULL;
266 goto out;
269 start = state->end + 1;
270 set |= clear_state_bit(tree, state, bits);
271 if (last_end == (u64)-1)
272 goto out;
273 start = last_end + 1;
274 goto search_again;
275 out:
276 if (prealloc)
277 btrfs_free_extent_state(prealloc);
278 return set;
280 search_again:
281 if (start > end)
282 goto out;
283 goto again;
287 * set some bits on a range in the tree.
289 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
291 struct extent_state *state;
292 struct extent_state *prealloc = NULL;
293 struct cache_extent *node;
294 int err = 0;
295 u64 last_start;
296 u64 last_end;
297 again:
298 if (!prealloc) {
299 prealloc = alloc_extent_state();
300 if (!prealloc)
301 return -ENOMEM;
305 * this search will find the extents that end after
306 * our range starts
308 node = search_cache_extent(&tree->state, start);
309 if (!node) {
310 err = insert_state(tree, prealloc, start, end, bits);
311 BUG_ON(err == -EEXIST);
312 prealloc = NULL;
313 goto out;
316 state = container_of(node, struct extent_state, cache_node);
317 last_start = state->start;
318 last_end = state->end;
321 * | ---- desired range ---- |
322 * | state |
324 * Just lock what we found and keep going
326 if (state->start == start && state->end <= end) {
327 state->state |= bits;
328 merge_state(tree, state);
329 if (last_end == (u64)-1)
330 goto out;
331 start = last_end + 1;
332 goto search_again;
335 * | ---- desired range ---- |
336 * | state |
337 * or
338 * | ------------- state -------------- |
340 * We need to split the extent we found, and may flip bits on
341 * second half.
343 * If the extent we found extends past our
344 * range, we just split and search again. It'll get split
345 * again the next time though.
347 * If the extent we found is inside our range, we set the
348 * desired bit on it.
350 if (state->start < start) {
351 err = split_state(tree, state, prealloc, start);
352 BUG_ON(err == -EEXIST);
353 prealloc = NULL;
354 if (err)
355 goto out;
356 if (state->end <= end) {
357 state->state |= bits;
358 start = state->end + 1;
359 merge_state(tree, state);
360 if (last_end == (u64)-1)
361 goto out;
362 start = last_end + 1;
363 } else {
364 start = state->start;
366 goto search_again;
369 * | ---- desired range ---- |
370 * | state | or | state |
372 * There's a hole, we need to insert something in it and
373 * ignore the extent we found.
375 if (state->start > start) {
376 u64 this_end;
377 if (end < last_start)
378 this_end = end;
379 else
380 this_end = last_start -1;
381 err = insert_state(tree, prealloc, start, this_end,
382 bits);
383 BUG_ON(err == -EEXIST);
384 prealloc = NULL;
385 if (err)
386 goto out;
387 start = this_end + 1;
388 goto search_again;
391 * | ---- desired range ---- |
392 * | ---------- state ---------- |
393 * We need to split the extent, and set the bit
394 * on the first half
396 err = split_state(tree, state, prealloc, end + 1);
397 BUG_ON(err == -EEXIST);
399 state->state |= bits;
400 merge_state(tree, prealloc);
401 prealloc = NULL;
402 out:
403 if (prealloc)
404 btrfs_free_extent_state(prealloc);
405 return err;
406 search_again:
407 if (start > end)
408 goto out;
409 goto again;
412 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
414 return set_extent_bits(tree, start, end, EXTENT_DIRTY);
417 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
419 return clear_extent_bits(tree, start, end, EXTENT_DIRTY);
422 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
423 u64 *start_ret, u64 *end_ret, int bits)
425 struct cache_extent *node;
426 struct extent_state *state;
427 int ret = 1;
430 * this search will find all the extents that end after
431 * our range starts.
433 node = search_cache_extent(&tree->state, start);
434 if (!node)
435 goto out;
437 while(1) {
438 state = container_of(node, struct extent_state, cache_node);
439 if (state->end >= start && (state->state & bits)) {
440 *start_ret = state->start;
441 *end_ret = state->end;
442 ret = 0;
443 break;
445 node = next_cache_extent(node);
446 if (!node)
447 break;
449 out:
450 return ret;
453 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
454 int bits, int filled)
456 struct extent_state *state = NULL;
457 struct cache_extent *node;
458 int bitset = 0;
460 node = search_cache_extent(&tree->state, start);
461 while (node && start <= end) {
462 state = container_of(node, struct extent_state, cache_node);
464 if (filled && state->start > start) {
465 bitset = 0;
466 break;
468 if (state->start > end)
469 break;
470 if (state->state & bits) {
471 bitset = 1;
472 if (!filled)
473 break;
474 } else if (filled) {
475 bitset = 0;
476 break;
478 start = state->end + 1;
479 if (start > end)
480 break;
481 node = next_cache_extent(node);
482 if (!node) {
483 if (filled)
484 bitset = 0;
485 break;
488 return bitset;
491 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
493 struct cache_extent *node;
494 struct extent_state *state;
495 int ret = 0;
497 node = search_cache_extent(&tree->state, start);
498 if (!node) {
499 ret = -ENOENT;
500 goto out;
502 state = container_of(node, struct extent_state, cache_node);
503 if (state->start != start) {
504 ret = -ENOENT;
505 goto out;
507 state->xprivate = private;
508 out:
509 return ret;
512 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
514 struct cache_extent *node;
515 struct extent_state *state;
516 int ret = 0;
518 node = search_cache_extent(&tree->state, start);
519 if (!node) {
520 ret = -ENOENT;
521 goto out;
523 state = container_of(node, struct extent_state, cache_node);
524 if (state->start != start) {
525 ret = -ENOENT;
526 goto out;
528 *private = state->xprivate;
529 out:
530 return ret;
533 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
534 u64 bytenr, u32 blocksize)
536 struct extent_buffer *eb;
538 eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
539 if (!eb)
540 return NULL;
542 eb->start = bytenr;
543 eb->len = blocksize;
544 eb->refs = 1;
545 eb->flags = 0;
546 eb->tree = tree;
547 eb->fd = -1;
548 eb->dev_bytenr = (u64)-1;
549 eb->cache_node.start = bytenr;
550 eb->cache_node.size = blocksize;
551 INIT_LIST_HEAD(&eb->recow);
553 return eb;
556 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
558 struct extent_buffer *new;
560 new = __alloc_extent_buffer(NULL, src->start, src->len);
561 if (!new)
562 return NULL;
564 copy_extent_buffer(new, src, 0, 0, src->len);
565 new->flags |= EXTENT_BUFFER_DUMMY;
567 return new;
570 void free_extent_buffer(struct extent_buffer *eb)
572 if (!eb || IS_ERR(eb))
573 return;
575 eb->refs--;
576 BUG_ON(eb->refs < 0);
577 if (eb->refs == 0) {
578 struct extent_io_tree *tree = eb->tree;
579 BUG_ON(eb->flags & EXTENT_DIRTY);
580 list_del_init(&eb->lru);
581 list_del_init(&eb->recow);
582 if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
583 BUG_ON(tree->cache_size < eb->len);
584 remove_cache_extent(&tree->cache, &eb->cache_node);
585 tree->cache_size -= eb->len;
587 free(eb);
591 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
592 u64 bytenr, u32 blocksize)
594 struct extent_buffer *eb = NULL;
595 struct cache_extent *cache;
597 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
598 if (cache && cache->start == bytenr &&
599 cache->size == blocksize) {
600 eb = container_of(cache, struct extent_buffer, cache_node);
601 list_move_tail(&eb->lru, &tree->lru);
602 eb->refs++;
604 return eb;
607 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
608 u64 start)
610 struct extent_buffer *eb = NULL;
611 struct cache_extent *cache;
613 cache = search_cache_extent(&tree->cache, start);
614 if (cache) {
615 eb = container_of(cache, struct extent_buffer, cache_node);
616 list_move_tail(&eb->lru, &tree->lru);
617 eb->refs++;
619 return eb;
622 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
623 u64 bytenr, u32 blocksize)
625 struct extent_buffer *eb;
626 struct cache_extent *cache;
628 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
629 if (cache && cache->start == bytenr &&
630 cache->size == blocksize) {
631 eb = container_of(cache, struct extent_buffer, cache_node);
632 list_move_tail(&eb->lru, &tree->lru);
633 eb->refs++;
634 } else {
635 int ret;
637 if (cache) {
638 eb = container_of(cache, struct extent_buffer,
639 cache_node);
640 free_extent_buffer(eb);
642 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
643 if (!eb)
644 return NULL;
645 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
646 if (ret) {
647 free(eb);
648 return NULL;
650 list_add_tail(&eb->lru, &tree->lru);
651 tree->cache_size += blocksize;
653 return eb;
656 int read_extent_from_disk(struct extent_buffer *eb,
657 unsigned long offset, unsigned long len)
659 int ret;
660 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
661 if (ret < 0) {
662 ret = -errno;
663 goto out;
665 if (ret != len) {
666 ret = -EIO;
667 goto out;
669 ret = 0;
670 out:
671 return ret;
674 int write_extent_to_disk(struct extent_buffer *eb)
676 int ret;
677 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
678 if (ret < 0)
679 goto out;
680 if (ret != eb->len) {
681 ret = -EIO;
682 goto out;
684 ret = 0;
685 out:
686 return ret;
689 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
690 u64 bytes, int mirror)
692 struct btrfs_multi_bio *multi = NULL;
693 struct btrfs_device *device;
694 u64 bytes_left = bytes;
695 u64 read_len;
696 u64 total_read = 0;
697 int ret;
699 while (bytes_left) {
700 read_len = bytes_left;
701 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
702 &read_len, &multi, mirror, NULL);
703 if (ret) {
704 fprintf(stderr, "Couldn't map the block %Lu\n",
705 offset);
706 return -EIO;
708 device = multi->stripes[0].dev;
710 read_len = min(bytes_left, read_len);
711 if (device->fd <= 0) {
712 kfree(multi);
713 return -EIO;
716 ret = pread(device->fd, buf + total_read, read_len,
717 multi->stripes[0].physical);
718 kfree(multi);
719 if (ret < 0) {
720 fprintf(stderr, "Error reading %Lu, %d\n", offset,
721 ret);
722 return ret;
724 if (ret != read_len) {
725 fprintf(stderr, "Short read for %Lu, read %d, "
726 "read_len %Lu\n", offset, ret, read_len);
727 return -EIO;
730 bytes_left -= read_len;
731 offset += read_len;
732 total_read += read_len;
735 return 0;
738 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
739 u64 bytes, int mirror)
741 struct btrfs_multi_bio *multi = NULL;
742 struct btrfs_device *device;
743 u64 bytes_left = bytes;
744 u64 this_len;
745 u64 total_write = 0;
746 u64 *raid_map = NULL;
747 u64 dev_bytenr;
748 int dev_nr;
749 int ret = 0;
751 while (bytes_left > 0) {
752 this_len = bytes_left;
753 dev_nr = 0;
755 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
756 &this_len, &multi, mirror, &raid_map);
757 if (ret) {
758 fprintf(stderr, "Couldn't map the block %Lu\n",
759 offset);
760 return -EIO;
763 if (raid_map) {
764 struct extent_buffer *eb;
765 u64 stripe_len = this_len;
767 this_len = min(this_len, bytes_left);
768 this_len = min(this_len, (u64)info->tree_root->nodesize);
770 eb = malloc(sizeof(struct extent_buffer) + this_len);
771 if (!eb) {
772 fprintf(stderr, "cannot allocate memory for eb\n");
773 ret = -ENOMEM;
774 goto out;
777 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
778 eb->start = offset;
779 eb->len = this_len;
781 memcpy(eb->data, buf + total_write, this_len);
782 ret = write_raid56_with_parity(info, eb, multi,
783 stripe_len, raid_map);
784 BUG_ON(ret);
786 free(eb);
787 kfree(raid_map);
788 raid_map = NULL;
789 } else while (dev_nr < multi->num_stripes) {
790 device = multi->stripes[dev_nr].dev;
791 if (device->fd <= 0) {
792 kfree(multi);
793 return -EIO;
796 dev_bytenr = multi->stripes[dev_nr].physical;
797 this_len = min(this_len, bytes_left);
798 dev_nr++;
800 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
801 if (ret != this_len) {
802 if (ret < 0) {
803 fprintf(stderr, "Error writing to "
804 "device %d\n", errno);
805 ret = errno;
806 kfree(multi);
807 return ret;
808 } else {
809 fprintf(stderr, "Short write\n");
810 kfree(multi);
811 return -EIO;
816 BUG_ON(bytes_left < this_len);
818 bytes_left -= this_len;
819 offset += this_len;
820 total_write += this_len;
822 kfree(multi);
823 multi = NULL;
825 return 0;
827 out:
828 kfree(raid_map);
829 return ret;
832 int set_extent_buffer_dirty(struct extent_buffer *eb)
834 struct extent_io_tree *tree = eb->tree;
835 if (!(eb->flags & EXTENT_DIRTY)) {
836 eb->flags |= EXTENT_DIRTY;
837 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
838 extent_buffer_get(eb);
840 return 0;
843 int clear_extent_buffer_dirty(struct extent_buffer *eb)
845 struct extent_io_tree *tree = eb->tree;
846 if (eb->flags & EXTENT_DIRTY) {
847 eb->flags &= ~EXTENT_DIRTY;
848 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
849 free_extent_buffer(eb);
851 return 0;
854 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
855 unsigned long start, unsigned long len)
857 return memcmp(eb->data + start, ptrv, len);
860 void read_extent_buffer(struct extent_buffer *eb, void *dst,
861 unsigned long start, unsigned long len)
863 memcpy(dst, eb->data + start, len);
866 void write_extent_buffer(struct extent_buffer *eb, const void *src,
867 unsigned long start, unsigned long len)
869 memcpy(eb->data + start, src, len);
872 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
873 unsigned long dst_offset, unsigned long src_offset,
874 unsigned long len)
876 memcpy(dst->data + dst_offset, src->data + src_offset, len);
879 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
880 unsigned long src_offset, unsigned long len)
882 memmove(dst->data + dst_offset, dst->data + src_offset, len);
885 void memset_extent_buffer(struct extent_buffer *eb, char c,
886 unsigned long start, unsigned long len)
888 memset(eb->data + start, c, len);
891 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
892 unsigned long nr)
894 return le_test_bit(nr, (u8 *)eb->data + start);