btrfs-progs: Fix a memleak in btrfs_scan_one_device.
[btrfs-progs-unstable/devel.git] / extent_io.c
bloba127e54302e8d2c57182c36a3189fbe8c600e974
2 /*
3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
20 #define __USE_XOPEN2K
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
29 #include "list.h"
30 #include "ctree.h"
31 #include "volumes.h"
33 static u64 cache_soft_max = 1024 * 1024 * 256;
34 static u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
36 void extent_io_tree_init(struct extent_io_tree *tree)
38 cache_tree_init(&tree->state);
39 cache_tree_init(&tree->cache);
40 INIT_LIST_HEAD(&tree->lru);
41 tree->cache_size = 0;
44 static struct extent_state *alloc_extent_state(void)
46 struct extent_state *state;
48 state = malloc(sizeof(*state));
49 if (!state)
50 return NULL;
51 state->cache_node.objectid = 0;
52 state->refs = 1;
53 state->state = 0;
54 state->xprivate = 0;
55 return state;
58 static void btrfs_free_extent_state(struct extent_state *state)
60 state->refs--;
61 BUG_ON(state->refs < 0);
62 if (state->refs == 0)
63 free(state);
66 static void free_extent_state_func(struct cache_extent *cache)
68 struct extent_state *es;
70 es = container_of(cache, struct extent_state, cache_node);
71 btrfs_free_extent_state(es);
74 void extent_io_tree_cleanup(struct extent_io_tree *tree)
76 struct extent_buffer *eb;
78 while(!list_empty(&tree->lru)) {
79 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
80 if (eb->refs != 1) {
81 fprintf(stderr, "extent buffer leak: "
82 "start %llu len %u\n",
83 (unsigned long long)eb->start, eb->len);
84 eb->refs = 1;
86 free_extent_buffer(eb);
89 cache_tree_free_extents(&tree->state, free_extent_state_func);
92 static inline void update_extent_state(struct extent_state *state)
94 state->cache_node.start = state->start;
95 state->cache_node.size = state->end + 1 - state->start;
99 * Utility function to look for merge candidates inside a given range.
100 * Any extents with matching state are merged together into a single
101 * extent in the tree. Extents with EXTENT_IO in their state field are
102 * not merged
104 static int merge_state(struct extent_io_tree *tree,
105 struct extent_state *state)
107 struct extent_state *other;
108 struct cache_extent *other_node;
110 if (state->state & EXTENT_IOBITS)
111 return 0;
113 other_node = prev_cache_extent(&state->cache_node);
114 if (other_node) {
115 other = container_of(other_node, struct extent_state,
116 cache_node);
117 if (other->end == state->start - 1 &&
118 other->state == state->state) {
119 state->start = other->start;
120 update_extent_state(state);
121 remove_cache_extent(&tree->state, &other->cache_node);
122 btrfs_free_extent_state(other);
125 other_node = next_cache_extent(&state->cache_node);
126 if (other_node) {
127 other = container_of(other_node, struct extent_state,
128 cache_node);
129 if (other->start == state->end + 1 &&
130 other->state == state->state) {
131 other->start = state->start;
132 update_extent_state(other);
133 remove_cache_extent(&tree->state, &state->cache_node);
134 btrfs_free_extent_state(state);
137 return 0;
141 * insert an extent_state struct into the tree. 'bits' are set on the
142 * struct before it is inserted.
144 static int insert_state(struct extent_io_tree *tree,
145 struct extent_state *state, u64 start, u64 end,
146 int bits)
148 int ret;
150 BUG_ON(end < start);
151 state->state |= bits;
152 state->start = start;
153 state->end = end;
154 update_extent_state(state);
155 ret = insert_cache_extent(&tree->state, &state->cache_node);
156 BUG_ON(ret);
157 merge_state(tree, state);
158 return 0;
162 * split a given extent state struct in two, inserting the preallocated
163 * struct 'prealloc' as the newly created second half. 'split' indicates an
164 * offset inside 'orig' where it should be split.
166 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
167 struct extent_state *prealloc, u64 split)
169 int ret;
170 prealloc->start = orig->start;
171 prealloc->end = split - 1;
172 prealloc->state = orig->state;
173 update_extent_state(prealloc);
174 orig->start = split;
175 update_extent_state(orig);
176 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
177 BUG_ON(ret);
178 return 0;
182 * clear some bits on a range in the tree.
184 static int clear_state_bit(struct extent_io_tree *tree,
185 struct extent_state *state, int bits)
187 int ret = state->state & bits;
189 state->state &= ~bits;
190 if (state->state == 0) {
191 remove_cache_extent(&tree->state, &state->cache_node);
192 btrfs_free_extent_state(state);
193 } else {
194 merge_state(tree, state);
196 return ret;
200 * clear some bits on a range in the tree.
202 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
203 u64 end, int bits, gfp_t mask)
205 struct extent_state *state;
206 struct extent_state *prealloc = NULL;
207 struct cache_extent *node;
208 u64 last_end;
209 int err;
210 int set = 0;
212 again:
213 if (!prealloc) {
214 prealloc = alloc_extent_state();
215 if (!prealloc)
216 return -ENOMEM;
220 * this search will find the extents that end after
221 * our range starts
223 node = search_cache_extent(&tree->state, start);
224 if (!node)
225 goto out;
226 state = container_of(node, struct extent_state, cache_node);
227 if (state->start > end)
228 goto out;
229 last_end = state->end;
232 * | ---- desired range ---- |
233 * | state | or
234 * | ------------- state -------------- |
236 * We need to split the extent we found, and may flip
237 * bits on second half.
239 * If the extent we found extends past our range, we
240 * just split and search again. It'll get split again
241 * the next time though.
243 * If the extent we found is inside our range, we clear
244 * the desired bit on it.
246 if (state->start < start) {
247 err = split_state(tree, state, prealloc, start);
248 BUG_ON(err == -EEXIST);
249 prealloc = NULL;
250 if (err)
251 goto out;
252 if (state->end <= end) {
253 set |= clear_state_bit(tree, state, bits);
254 if (last_end == (u64)-1)
255 goto out;
256 start = last_end + 1;
257 } else {
258 start = state->start;
260 goto search_again;
263 * | ---- desired range ---- |
264 * | state |
265 * We need to split the extent, and clear the bit
266 * on the first half
268 if (state->start <= end && state->end > end) {
269 err = split_state(tree, state, prealloc, end + 1);
270 BUG_ON(err == -EEXIST);
272 set |= clear_state_bit(tree, prealloc, bits);
273 prealloc = NULL;
274 goto out;
277 start = state->end + 1;
278 set |= clear_state_bit(tree, state, bits);
279 if (last_end == (u64)-1)
280 goto out;
281 start = last_end + 1;
282 goto search_again;
283 out:
284 if (prealloc)
285 btrfs_free_extent_state(prealloc);
286 return set;
288 search_again:
289 if (start > end)
290 goto out;
291 goto again;
295 * set some bits on a range in the tree.
297 int set_extent_bits(struct extent_io_tree *tree, u64 start,
298 u64 end, int bits, gfp_t mask)
300 struct extent_state *state;
301 struct extent_state *prealloc = NULL;
302 struct cache_extent *node;
303 int err = 0;
304 u64 last_start;
305 u64 last_end;
306 again:
307 if (!prealloc) {
308 prealloc = alloc_extent_state();
309 if (!prealloc)
310 return -ENOMEM;
314 * this search will find the extents that end after
315 * our range starts
317 node = search_cache_extent(&tree->state, start);
318 if (!node) {
319 err = insert_state(tree, prealloc, start, end, bits);
320 BUG_ON(err == -EEXIST);
321 prealloc = NULL;
322 goto out;
325 state = container_of(node, struct extent_state, cache_node);
326 last_start = state->start;
327 last_end = state->end;
330 * | ---- desired range ---- |
331 * | state |
333 * Just lock what we found and keep going
335 if (state->start == start && state->end <= end) {
336 state->state |= bits;
337 merge_state(tree, state);
338 if (last_end == (u64)-1)
339 goto out;
340 start = last_end + 1;
341 goto search_again;
344 * | ---- desired range ---- |
345 * | state |
346 * or
347 * | ------------- state -------------- |
349 * We need to split the extent we found, and may flip bits on
350 * second half.
352 * If the extent we found extends past our
353 * range, we just split and search again. It'll get split
354 * again the next time though.
356 * If the extent we found is inside our range, we set the
357 * desired bit on it.
359 if (state->start < start) {
360 err = split_state(tree, state, prealloc, start);
361 BUG_ON(err == -EEXIST);
362 prealloc = NULL;
363 if (err)
364 goto out;
365 if (state->end <= end) {
366 state->state |= bits;
367 start = state->end + 1;
368 merge_state(tree, state);
369 if (last_end == (u64)-1)
370 goto out;
371 start = last_end + 1;
372 } else {
373 start = state->start;
375 goto search_again;
378 * | ---- desired range ---- |
379 * | state | or | state |
381 * There's a hole, we need to insert something in it and
382 * ignore the extent we found.
384 if (state->start > start) {
385 u64 this_end;
386 if (end < last_start)
387 this_end = end;
388 else
389 this_end = last_start -1;
390 err = insert_state(tree, prealloc, start, this_end,
391 bits);
392 BUG_ON(err == -EEXIST);
393 prealloc = NULL;
394 if (err)
395 goto out;
396 start = this_end + 1;
397 goto search_again;
400 * | ---- desired range ---- |
401 * | ---------- state ---------- |
402 * We need to split the extent, and set the bit
403 * on the first half
405 err = split_state(tree, state, prealloc, end + 1);
406 BUG_ON(err == -EEXIST);
408 state->state |= bits;
409 merge_state(tree, prealloc);
410 prealloc = NULL;
411 out:
412 if (prealloc)
413 btrfs_free_extent_state(prealloc);
414 return err;
415 search_again:
416 if (start > end)
417 goto out;
418 goto again;
421 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
422 gfp_t mask)
424 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
427 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
428 gfp_t mask)
430 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
433 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
434 u64 *start_ret, u64 *end_ret, int bits)
436 struct cache_extent *node;
437 struct extent_state *state;
438 int ret = 1;
441 * this search will find all the extents that end after
442 * our range starts.
444 node = search_cache_extent(&tree->state, start);
445 if (!node)
446 goto out;
448 while(1) {
449 state = container_of(node, struct extent_state, cache_node);
450 if (state->end >= start && (state->state & bits)) {
451 *start_ret = state->start;
452 *end_ret = state->end;
453 ret = 0;
454 break;
456 node = next_cache_extent(node);
457 if (!node)
458 break;
460 out:
461 return ret;
464 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
465 int bits, int filled)
467 struct extent_state *state = NULL;
468 struct cache_extent *node;
469 int bitset = 0;
471 node = search_cache_extent(&tree->state, start);
472 while (node && start <= end) {
473 state = container_of(node, struct extent_state, cache_node);
475 if (filled && state->start > start) {
476 bitset = 0;
477 break;
479 if (state->start > end)
480 break;
481 if (state->state & bits) {
482 bitset = 1;
483 if (!filled)
484 break;
485 } else if (filled) {
486 bitset = 0;
487 break;
489 start = state->end + 1;
490 if (start > end)
491 break;
492 node = next_cache_extent(node);
493 if (!node) {
494 if (filled)
495 bitset = 0;
496 break;
499 return bitset;
502 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
504 struct cache_extent *node;
505 struct extent_state *state;
506 int ret = 0;
508 node = search_cache_extent(&tree->state, start);
509 if (!node) {
510 ret = -ENOENT;
511 goto out;
513 state = container_of(node, struct extent_state, cache_node);
514 if (state->start != start) {
515 ret = -ENOENT;
516 goto out;
518 state->xprivate = private;
519 out:
520 return ret;
523 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
525 struct cache_extent *node;
526 struct extent_state *state;
527 int ret = 0;
529 node = search_cache_extent(&tree->state, start);
530 if (!node) {
531 ret = -ENOENT;
532 goto out;
534 state = container_of(node, struct extent_state, cache_node);
535 if (state->start != start) {
536 ret = -ENOENT;
537 goto out;
539 *private = state->xprivate;
540 out:
541 return ret;
544 static int free_some_buffers(struct extent_io_tree *tree)
546 u32 nrscan = 0;
547 struct extent_buffer *eb;
548 struct list_head *node, *next;
550 if (tree->cache_size < cache_soft_max)
551 return 0;
553 list_for_each_safe(node, next, &tree->lru) {
554 eb = list_entry(node, struct extent_buffer, lru);
555 if (eb->refs == 1) {
556 free_extent_buffer(eb);
557 if (tree->cache_size < cache_hard_max)
558 break;
559 } else {
560 list_move_tail(&eb->lru, &tree->lru);
562 if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
563 break;
565 return 0;
568 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
569 u64 bytenr, u32 blocksize)
571 struct extent_buffer *eb;
572 int ret;
574 eb = malloc(sizeof(struct extent_buffer) + blocksize);
575 if (!eb) {
576 BUG();
577 return NULL;
579 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
581 eb->start = bytenr;
582 eb->len = blocksize;
583 eb->refs = 1;
584 eb->flags = 0;
585 eb->tree = tree;
586 eb->fd = -1;
587 eb->dev_bytenr = (u64)-1;
588 eb->cache_node.start = bytenr;
589 eb->cache_node.size = blocksize;
590 INIT_LIST_HEAD(&eb->recow);
592 free_some_buffers(tree);
593 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
594 if (ret) {
595 free(eb);
596 return NULL;
598 list_add_tail(&eb->lru, &tree->lru);
599 tree->cache_size += blocksize;
600 return eb;
603 void free_extent_buffer(struct extent_buffer *eb)
605 if (!eb)
606 return;
608 eb->refs--;
609 BUG_ON(eb->refs < 0);
610 if (eb->refs == 0) {
611 struct extent_io_tree *tree = eb->tree;
612 BUG_ON(eb->flags & EXTENT_DIRTY);
613 list_del_init(&eb->lru);
614 list_del_init(&eb->recow);
615 remove_cache_extent(&tree->cache, &eb->cache_node);
616 BUG_ON(tree->cache_size < eb->len);
617 tree->cache_size -= eb->len;
618 free(eb);
622 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
623 u64 bytenr, u32 blocksize)
625 struct extent_buffer *eb = NULL;
626 struct cache_extent *cache;
628 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
629 if (cache && cache->start == bytenr &&
630 cache->size == blocksize) {
631 eb = container_of(cache, struct extent_buffer, cache_node);
632 list_move_tail(&eb->lru, &tree->lru);
633 eb->refs++;
635 return eb;
638 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
639 u64 start)
641 struct extent_buffer *eb = NULL;
642 struct cache_extent *cache;
644 cache = search_cache_extent(&tree->cache, start);
645 if (cache) {
646 eb = container_of(cache, struct extent_buffer, cache_node);
647 list_move_tail(&eb->lru, &tree->lru);
648 eb->refs++;
650 return eb;
653 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
654 u64 bytenr, u32 blocksize)
656 struct extent_buffer *eb;
657 struct cache_extent *cache;
659 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
660 if (cache && cache->start == bytenr &&
661 cache->size == blocksize) {
662 eb = container_of(cache, struct extent_buffer, cache_node);
663 list_move_tail(&eb->lru, &tree->lru);
664 eb->refs++;
665 } else {
666 if (cache) {
667 eb = container_of(cache, struct extent_buffer,
668 cache_node);
669 free_extent_buffer(eb);
671 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
673 return eb;
676 int read_extent_from_disk(struct extent_buffer *eb,
677 unsigned long offset, unsigned long len)
679 int ret;
680 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
681 if (ret < 0)
682 goto out;
683 if (ret != len) {
684 ret = -EIO;
685 goto out;
687 ret = 0;
688 out:
689 return ret;
692 int write_extent_to_disk(struct extent_buffer *eb)
694 int ret;
695 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
696 if (ret < 0)
697 goto out;
698 if (ret != eb->len) {
699 ret = -EIO;
700 goto out;
702 ret = 0;
703 out:
704 return ret;
707 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
708 u64 bytes, int mirror)
710 struct btrfs_multi_bio *multi = NULL;
711 struct btrfs_device *device;
712 u64 bytes_left = bytes;
713 u64 read_len;
714 u64 total_read = 0;
715 int ret;
717 while (bytes_left) {
718 read_len = bytes_left;
719 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
720 &read_len, &multi, mirror, NULL);
721 if (ret) {
722 fprintf(stderr, "Couldn't map the block %Lu\n",
723 offset);
724 return -EIO;
726 device = multi->stripes[0].dev;
728 read_len = min(bytes_left, read_len);
729 if (device->fd == 0) {
730 kfree(multi);
731 return -EIO;
734 ret = pread(device->fd, buf + total_read, read_len,
735 multi->stripes[0].physical);
736 kfree(multi);
737 if (ret < 0) {
738 fprintf(stderr, "Error reading %Lu, %d\n", offset,
739 ret);
740 return ret;
742 if (ret != read_len) {
743 fprintf(stderr, "Short read for %Lu, read %d, "
744 "read_len %Lu\n", offset, ret, read_len);
745 return -EIO;
748 bytes_left -= read_len;
749 offset += read_len;
750 total_read += read_len;
753 return 0;
756 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
757 u64 bytes, int mirror)
759 struct btrfs_multi_bio *multi = NULL;
760 struct btrfs_device *device;
761 u64 bytes_left = bytes;
762 u64 this_len;
763 u64 total_write = 0;
764 u64 *raid_map = NULL;
765 u64 dev_bytenr;
766 int dev_nr;
767 int ret = 0;
769 while (bytes_left > 0) {
770 this_len = bytes_left;
771 dev_nr = 0;
773 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
774 &this_len, &multi, mirror, &raid_map);
775 if (ret) {
776 fprintf(stderr, "Couldn't map the block %Lu\n",
777 offset);
778 return -EIO;
781 if (raid_map) {
782 struct extent_buffer *eb;
783 u64 stripe_len = this_len;
785 this_len = min(this_len, bytes_left);
786 this_len = min(this_len, (u64)info->tree_root->leafsize);
788 eb = malloc(sizeof(struct extent_buffer) + this_len);
789 BUG_ON(!eb);
791 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
792 eb->start = offset;
793 eb->len = this_len;
795 memcpy(eb->data, buf + total_write, this_len);
796 ret = write_raid56_with_parity(info, eb, multi,
797 stripe_len, raid_map);
798 BUG_ON(ret);
800 free(eb);
801 kfree(raid_map);
802 raid_map = NULL;
803 } else while (dev_nr < multi->num_stripes) {
804 device = multi->stripes[dev_nr].dev;
805 if (device->fd == 0) {
806 kfree(multi);
807 return -EIO;
810 dev_bytenr = multi->stripes[dev_nr].physical;
811 this_len = min(this_len, bytes_left);
812 dev_nr++;
814 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
815 if (ret != this_len) {
816 if (ret < 0) {
817 fprintf(stderr, "Error writing to "
818 "device %d\n", errno);
819 ret = errno;
820 kfree(multi);
821 return ret;
822 } else {
823 fprintf(stderr, "Short write\n");
824 kfree(multi);
825 return -EIO;
830 BUG_ON(bytes_left < this_len);
832 bytes_left -= this_len;
833 offset += this_len;
834 total_write += this_len;
836 kfree(multi);
837 multi = NULL;
839 return 0;
843 int set_extent_buffer_uptodate(struct extent_buffer *eb)
845 eb->flags |= EXTENT_UPTODATE;
846 return 0;
849 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
850 struct extent_buffer *eb)
852 eb->flags &= ~EXTENT_UPTODATE;
853 return 0;
856 int extent_buffer_uptodate(struct extent_buffer *eb)
858 if (!eb)
859 return 0;
861 if (eb->flags & EXTENT_UPTODATE)
862 return 1;
863 return 0;
866 int set_extent_buffer_dirty(struct extent_buffer *eb)
868 struct extent_io_tree *tree = eb->tree;
869 if (!(eb->flags & EXTENT_DIRTY)) {
870 eb->flags |= EXTENT_DIRTY;
871 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
872 extent_buffer_get(eb);
874 return 0;
877 int clear_extent_buffer_dirty(struct extent_buffer *eb)
879 struct extent_io_tree *tree = eb->tree;
880 if (eb->flags & EXTENT_DIRTY) {
881 eb->flags &= ~EXTENT_DIRTY;
882 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
883 free_extent_buffer(eb);
885 return 0;
888 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
889 unsigned long start, unsigned long len)
891 return memcmp(eb->data + start, ptrv, len);
894 void read_extent_buffer(struct extent_buffer *eb, void *dst,
895 unsigned long start, unsigned long len)
897 memcpy(dst, eb->data + start, len);
900 void write_extent_buffer(struct extent_buffer *eb, const void *src,
901 unsigned long start, unsigned long len)
903 memcpy(eb->data + start, src, len);
906 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
907 unsigned long dst_offset, unsigned long src_offset,
908 unsigned long len)
910 memcpy(dst->data + dst_offset, src->data + src_offset, len);
913 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
914 unsigned long src_offset, unsigned long len)
916 memmove(dst->data + dst_offset, dst->data + src_offset, len);
919 void memset_extent_buffer(struct extent_buffer *eb, char c,
920 unsigned long start, unsigned long len)
922 memset(eb->data + start, c, len);