btrfs-progs: fi du: add long options for units
[btrfs-progs-unstable/devel.git] / extent_io.c
blob88e92736280edb5e5ad9669db185ee6a4ed4646e
2 /*
3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include "kerncompat.h"
26 #include "extent_io.h"
27 #include "list.h"
28 #include "ctree.h"
29 #include "volumes.h"
31 void extent_io_tree_init(struct extent_io_tree *tree)
33 cache_tree_init(&tree->state);
34 cache_tree_init(&tree->cache);
35 INIT_LIST_HEAD(&tree->lru);
36 tree->cache_size = 0;
39 static struct extent_state *alloc_extent_state(void)
41 struct extent_state *state;
43 state = malloc(sizeof(*state));
44 if (!state)
45 return NULL;
46 state->cache_node.objectid = 0;
47 state->refs = 1;
48 state->state = 0;
49 state->xprivate = 0;
50 return state;
53 static void btrfs_free_extent_state(struct extent_state *state)
55 state->refs--;
56 BUG_ON(state->refs < 0);
57 if (state->refs == 0)
58 free(state);
61 static void free_extent_state_func(struct cache_extent *cache)
63 struct extent_state *es;
65 es = container_of(cache, struct extent_state, cache_node);
66 btrfs_free_extent_state(es);
69 void extent_io_tree_cleanup(struct extent_io_tree *tree)
71 struct extent_buffer *eb;
73 while(!list_empty(&tree->lru)) {
74 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
75 fprintf(stderr, "extent buffer leak: "
76 "start %llu len %u\n",
77 (unsigned long long)eb->start, eb->len);
78 free_extent_buffer(eb);
81 cache_tree_free_extents(&tree->state, free_extent_state_func);
84 static inline void update_extent_state(struct extent_state *state)
86 state->cache_node.start = state->start;
87 state->cache_node.size = state->end + 1 - state->start;
91 * Utility function to look for merge candidates inside a given range.
92 * Any extents with matching state are merged together into a single
93 * extent in the tree. Extents with EXTENT_IO in their state field are
94 * not merged
96 static int merge_state(struct extent_io_tree *tree,
97 struct extent_state *state)
99 struct extent_state *other;
100 struct cache_extent *other_node;
102 if (state->state & EXTENT_IOBITS)
103 return 0;
105 other_node = prev_cache_extent(&state->cache_node);
106 if (other_node) {
107 other = container_of(other_node, struct extent_state,
108 cache_node);
109 if (other->end == state->start - 1 &&
110 other->state == state->state) {
111 state->start = other->start;
112 update_extent_state(state);
113 remove_cache_extent(&tree->state, &other->cache_node);
114 btrfs_free_extent_state(other);
117 other_node = next_cache_extent(&state->cache_node);
118 if (other_node) {
119 other = container_of(other_node, struct extent_state,
120 cache_node);
121 if (other->start == state->end + 1 &&
122 other->state == state->state) {
123 other->start = state->start;
124 update_extent_state(other);
125 remove_cache_extent(&tree->state, &state->cache_node);
126 btrfs_free_extent_state(state);
129 return 0;
133 * insert an extent_state struct into the tree. 'bits' are set on the
134 * struct before it is inserted.
136 static int insert_state(struct extent_io_tree *tree,
137 struct extent_state *state, u64 start, u64 end,
138 int bits)
140 int ret;
142 BUG_ON(end < start);
143 state->state |= bits;
144 state->start = start;
145 state->end = end;
146 update_extent_state(state);
147 ret = insert_cache_extent(&tree->state, &state->cache_node);
148 BUG_ON(ret);
149 merge_state(tree, state);
150 return 0;
154 * split a given extent state struct in two, inserting the preallocated
155 * struct 'prealloc' as the newly created second half. 'split' indicates an
156 * offset inside 'orig' where it should be split.
158 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
159 struct extent_state *prealloc, u64 split)
161 int ret;
162 prealloc->start = orig->start;
163 prealloc->end = split - 1;
164 prealloc->state = orig->state;
165 update_extent_state(prealloc);
166 orig->start = split;
167 update_extent_state(orig);
168 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
169 BUG_ON(ret);
170 return 0;
174 * clear some bits on a range in the tree.
176 static int clear_state_bit(struct extent_io_tree *tree,
177 struct extent_state *state, int bits)
179 int ret = state->state & bits;
181 state->state &= ~bits;
182 if (state->state == 0) {
183 remove_cache_extent(&tree->state, &state->cache_node);
184 btrfs_free_extent_state(state);
185 } else {
186 merge_state(tree, state);
188 return ret;
192 * clear some bits on a range in the tree.
194 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
195 u64 end, int bits, gfp_t mask)
197 struct extent_state *state;
198 struct extent_state *prealloc = NULL;
199 struct cache_extent *node;
200 u64 last_end;
201 int err;
202 int set = 0;
204 again:
205 if (!prealloc) {
206 prealloc = alloc_extent_state();
207 if (!prealloc)
208 return -ENOMEM;
212 * this search will find the extents that end after
213 * our range starts
215 node = search_cache_extent(&tree->state, start);
216 if (!node)
217 goto out;
218 state = container_of(node, struct extent_state, cache_node);
219 if (state->start > end)
220 goto out;
221 last_end = state->end;
224 * | ---- desired range ---- |
225 * | state | or
226 * | ------------- state -------------- |
228 * We need to split the extent we found, and may flip
229 * bits on second half.
231 * If the extent we found extends past our range, we
232 * just split and search again. It'll get split again
233 * the next time though.
235 * If the extent we found is inside our range, we clear
236 * the desired bit on it.
238 if (state->start < start) {
239 err = split_state(tree, state, prealloc, start);
240 BUG_ON(err == -EEXIST);
241 prealloc = NULL;
242 if (err)
243 goto out;
244 if (state->end <= end) {
245 set |= clear_state_bit(tree, state, bits);
246 if (last_end == (u64)-1)
247 goto out;
248 start = last_end + 1;
249 } else {
250 start = state->start;
252 goto search_again;
255 * | ---- desired range ---- |
256 * | state |
257 * We need to split the extent, and clear the bit
258 * on the first half
260 if (state->start <= end && state->end > end) {
261 err = split_state(tree, state, prealloc, end + 1);
262 BUG_ON(err == -EEXIST);
264 set |= clear_state_bit(tree, prealloc, bits);
265 prealloc = NULL;
266 goto out;
269 start = state->end + 1;
270 set |= clear_state_bit(tree, state, bits);
271 if (last_end == (u64)-1)
272 goto out;
273 start = last_end + 1;
274 goto search_again;
275 out:
276 if (prealloc)
277 btrfs_free_extent_state(prealloc);
278 return set;
280 search_again:
281 if (start > end)
282 goto out;
283 goto again;
287 * set some bits on a range in the tree.
289 int set_extent_bits(struct extent_io_tree *tree, u64 start,
290 u64 end, int bits, gfp_t mask)
292 struct extent_state *state;
293 struct extent_state *prealloc = NULL;
294 struct cache_extent *node;
295 int err = 0;
296 u64 last_start;
297 u64 last_end;
298 again:
299 if (!prealloc) {
300 prealloc = alloc_extent_state();
301 if (!prealloc)
302 return -ENOMEM;
306 * this search will find the extents that end after
307 * our range starts
309 node = search_cache_extent(&tree->state, start);
310 if (!node) {
311 err = insert_state(tree, prealloc, start, end, bits);
312 BUG_ON(err == -EEXIST);
313 prealloc = NULL;
314 goto out;
317 state = container_of(node, struct extent_state, cache_node);
318 last_start = state->start;
319 last_end = state->end;
322 * | ---- desired range ---- |
323 * | state |
325 * Just lock what we found and keep going
327 if (state->start == start && state->end <= end) {
328 state->state |= bits;
329 merge_state(tree, state);
330 if (last_end == (u64)-1)
331 goto out;
332 start = last_end + 1;
333 goto search_again;
336 * | ---- desired range ---- |
337 * | state |
338 * or
339 * | ------------- state -------------- |
341 * We need to split the extent we found, and may flip bits on
342 * second half.
344 * If the extent we found extends past our
345 * range, we just split and search again. It'll get split
346 * again the next time though.
348 * If the extent we found is inside our range, we set the
349 * desired bit on it.
351 if (state->start < start) {
352 err = split_state(tree, state, prealloc, start);
353 BUG_ON(err == -EEXIST);
354 prealloc = NULL;
355 if (err)
356 goto out;
357 if (state->end <= end) {
358 state->state |= bits;
359 start = state->end + 1;
360 merge_state(tree, state);
361 if (last_end == (u64)-1)
362 goto out;
363 start = last_end + 1;
364 } else {
365 start = state->start;
367 goto search_again;
370 * | ---- desired range ---- |
371 * | state | or | state |
373 * There's a hole, we need to insert something in it and
374 * ignore the extent we found.
376 if (state->start > start) {
377 u64 this_end;
378 if (end < last_start)
379 this_end = end;
380 else
381 this_end = last_start -1;
382 err = insert_state(tree, prealloc, start, this_end,
383 bits);
384 BUG_ON(err == -EEXIST);
385 prealloc = NULL;
386 if (err)
387 goto out;
388 start = this_end + 1;
389 goto search_again;
392 * | ---- desired range ---- |
393 * | ---------- state ---------- |
394 * We need to split the extent, and set the bit
395 * on the first half
397 err = split_state(tree, state, prealloc, end + 1);
398 BUG_ON(err == -EEXIST);
400 state->state |= bits;
401 merge_state(tree, prealloc);
402 prealloc = NULL;
403 out:
404 if (prealloc)
405 btrfs_free_extent_state(prealloc);
406 return err;
407 search_again:
408 if (start > end)
409 goto out;
410 goto again;
413 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
414 gfp_t mask)
416 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
419 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
420 gfp_t mask)
422 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
425 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
426 u64 *start_ret, u64 *end_ret, int bits)
428 struct cache_extent *node;
429 struct extent_state *state;
430 int ret = 1;
433 * this search will find all the extents that end after
434 * our range starts.
436 node = search_cache_extent(&tree->state, start);
437 if (!node)
438 goto out;
440 while(1) {
441 state = container_of(node, struct extent_state, cache_node);
442 if (state->end >= start && (state->state & bits)) {
443 *start_ret = state->start;
444 *end_ret = state->end;
445 ret = 0;
446 break;
448 node = next_cache_extent(node);
449 if (!node)
450 break;
452 out:
453 return ret;
456 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
457 int bits, int filled)
459 struct extent_state *state = NULL;
460 struct cache_extent *node;
461 int bitset = 0;
463 node = search_cache_extent(&tree->state, start);
464 while (node && start <= end) {
465 state = container_of(node, struct extent_state, cache_node);
467 if (filled && state->start > start) {
468 bitset = 0;
469 break;
471 if (state->start > end)
472 break;
473 if (state->state & bits) {
474 bitset = 1;
475 if (!filled)
476 break;
477 } else if (filled) {
478 bitset = 0;
479 break;
481 start = state->end + 1;
482 if (start > end)
483 break;
484 node = next_cache_extent(node);
485 if (!node) {
486 if (filled)
487 bitset = 0;
488 break;
491 return bitset;
494 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
496 struct cache_extent *node;
497 struct extent_state *state;
498 int ret = 0;
500 node = search_cache_extent(&tree->state, start);
501 if (!node) {
502 ret = -ENOENT;
503 goto out;
505 state = container_of(node, struct extent_state, cache_node);
506 if (state->start != start) {
507 ret = -ENOENT;
508 goto out;
510 state->xprivate = private;
511 out:
512 return ret;
515 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
517 struct cache_extent *node;
518 struct extent_state *state;
519 int ret = 0;
521 node = search_cache_extent(&tree->state, start);
522 if (!node) {
523 ret = -ENOENT;
524 goto out;
526 state = container_of(node, struct extent_state, cache_node);
527 if (state->start != start) {
528 ret = -ENOENT;
529 goto out;
531 *private = state->xprivate;
532 out:
533 return ret;
536 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
537 u64 bytenr, u32 blocksize)
539 struct extent_buffer *eb;
541 eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
542 if (!eb) {
543 BUG();
544 return NULL;
547 eb->start = bytenr;
548 eb->len = blocksize;
549 eb->refs = 1;
550 eb->flags = 0;
551 eb->tree = tree;
552 eb->fd = -1;
553 eb->dev_bytenr = (u64)-1;
554 eb->cache_node.start = bytenr;
555 eb->cache_node.size = blocksize;
556 INIT_LIST_HEAD(&eb->recow);
558 return eb;
561 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
563 struct extent_buffer *new;
565 new = __alloc_extent_buffer(NULL, src->start, src->len);
566 if (new == NULL)
567 return NULL;
569 copy_extent_buffer(new, src, 0, 0, src->len);
570 new->flags |= EXTENT_BUFFER_DUMMY;
572 return new;
575 void free_extent_buffer(struct extent_buffer *eb)
577 if (!eb || IS_ERR(eb))
578 return;
580 eb->refs--;
581 BUG_ON(eb->refs < 0);
582 if (eb->refs == 0) {
583 struct extent_io_tree *tree = eb->tree;
584 BUG_ON(eb->flags & EXTENT_DIRTY);
585 list_del_init(&eb->lru);
586 list_del_init(&eb->recow);
587 if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
588 BUG_ON(tree->cache_size < eb->len);
589 remove_cache_extent(&tree->cache, &eb->cache_node);
590 tree->cache_size -= eb->len;
592 free(eb);
596 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
597 u64 bytenr, u32 blocksize)
599 struct extent_buffer *eb = NULL;
600 struct cache_extent *cache;
602 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
603 if (cache && cache->start == bytenr &&
604 cache->size == blocksize) {
605 eb = container_of(cache, struct extent_buffer, cache_node);
606 list_move_tail(&eb->lru, &tree->lru);
607 eb->refs++;
609 return eb;
612 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
613 u64 start)
615 struct extent_buffer *eb = NULL;
616 struct cache_extent *cache;
618 cache = search_cache_extent(&tree->cache, start);
619 if (cache) {
620 eb = container_of(cache, struct extent_buffer, cache_node);
621 list_move_tail(&eb->lru, &tree->lru);
622 eb->refs++;
624 return eb;
627 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
628 u64 bytenr, u32 blocksize)
630 struct extent_buffer *eb;
631 struct cache_extent *cache;
633 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
634 if (cache && cache->start == bytenr &&
635 cache->size == blocksize) {
636 eb = container_of(cache, struct extent_buffer, cache_node);
637 list_move_tail(&eb->lru, &tree->lru);
638 eb->refs++;
639 } else {
640 int ret;
642 if (cache) {
643 eb = container_of(cache, struct extent_buffer,
644 cache_node);
645 free_extent_buffer(eb);
647 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
648 if (!eb)
649 return NULL;
650 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
651 if (ret) {
652 free(eb);
653 return NULL;
655 list_add_tail(&eb->lru, &tree->lru);
656 tree->cache_size += blocksize;
658 return eb;
661 int read_extent_from_disk(struct extent_buffer *eb,
662 unsigned long offset, unsigned long len)
664 int ret;
665 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
666 if (ret < 0) {
667 ret = -errno;
668 goto out;
670 if (ret != len) {
671 ret = -EIO;
672 goto out;
674 ret = 0;
675 out:
676 return ret;
679 int write_extent_to_disk(struct extent_buffer *eb)
681 int ret;
682 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
683 if (ret < 0)
684 goto out;
685 if (ret != eb->len) {
686 ret = -EIO;
687 goto out;
689 ret = 0;
690 out:
691 return ret;
694 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
695 u64 bytes, int mirror)
697 struct btrfs_multi_bio *multi = NULL;
698 struct btrfs_device *device;
699 u64 bytes_left = bytes;
700 u64 read_len;
701 u64 total_read = 0;
702 int ret;
704 while (bytes_left) {
705 read_len = bytes_left;
706 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
707 &read_len, &multi, mirror, NULL);
708 if (ret) {
709 fprintf(stderr, "Couldn't map the block %Lu\n",
710 offset);
711 return -EIO;
713 device = multi->stripes[0].dev;
715 read_len = min(bytes_left, read_len);
716 if (device->fd <= 0) {
717 kfree(multi);
718 return -EIO;
721 ret = pread(device->fd, buf + total_read, read_len,
722 multi->stripes[0].physical);
723 kfree(multi);
724 if (ret < 0) {
725 fprintf(stderr, "Error reading %Lu, %d\n", offset,
726 ret);
727 return ret;
729 if (ret != read_len) {
730 fprintf(stderr, "Short read for %Lu, read %d, "
731 "read_len %Lu\n", offset, ret, read_len);
732 return -EIO;
735 bytes_left -= read_len;
736 offset += read_len;
737 total_read += read_len;
740 return 0;
743 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
744 u64 bytes, int mirror)
746 struct btrfs_multi_bio *multi = NULL;
747 struct btrfs_device *device;
748 u64 bytes_left = bytes;
749 u64 this_len;
750 u64 total_write = 0;
751 u64 *raid_map = NULL;
752 u64 dev_bytenr;
753 int dev_nr;
754 int ret = 0;
756 while (bytes_left > 0) {
757 this_len = bytes_left;
758 dev_nr = 0;
760 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
761 &this_len, &multi, mirror, &raid_map);
762 if (ret) {
763 fprintf(stderr, "Couldn't map the block %Lu\n",
764 offset);
765 return -EIO;
768 if (raid_map) {
769 struct extent_buffer *eb;
770 u64 stripe_len = this_len;
772 this_len = min(this_len, bytes_left);
773 this_len = min(this_len, (u64)info->tree_root->leafsize);
775 eb = malloc(sizeof(struct extent_buffer) + this_len);
776 BUG_ON(!eb);
778 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
779 eb->start = offset;
780 eb->len = this_len;
782 memcpy(eb->data, buf + total_write, this_len);
783 ret = write_raid56_with_parity(info, eb, multi,
784 stripe_len, raid_map);
785 BUG_ON(ret);
787 free(eb);
788 kfree(raid_map);
789 raid_map = NULL;
790 } else while (dev_nr < multi->num_stripes) {
791 device = multi->stripes[dev_nr].dev;
792 if (device->fd <= 0) {
793 kfree(multi);
794 return -EIO;
797 dev_bytenr = multi->stripes[dev_nr].physical;
798 this_len = min(this_len, bytes_left);
799 dev_nr++;
801 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
802 if (ret != this_len) {
803 if (ret < 0) {
804 fprintf(stderr, "Error writing to "
805 "device %d\n", errno);
806 ret = errno;
807 kfree(multi);
808 return ret;
809 } else {
810 fprintf(stderr, "Short write\n");
811 kfree(multi);
812 return -EIO;
817 BUG_ON(bytes_left < this_len);
819 bytes_left -= this_len;
820 offset += this_len;
821 total_write += this_len;
823 kfree(multi);
824 multi = NULL;
826 return 0;
829 int set_extent_buffer_dirty(struct extent_buffer *eb)
831 struct extent_io_tree *tree = eb->tree;
832 if (!(eb->flags & EXTENT_DIRTY)) {
833 eb->flags |= EXTENT_DIRTY;
834 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
835 extent_buffer_get(eb);
837 return 0;
840 int clear_extent_buffer_dirty(struct extent_buffer *eb)
842 struct extent_io_tree *tree = eb->tree;
843 if (eb->flags & EXTENT_DIRTY) {
844 eb->flags &= ~EXTENT_DIRTY;
845 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
846 free_extent_buffer(eb);
848 return 0;
851 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
852 unsigned long start, unsigned long len)
854 return memcmp(eb->data + start, ptrv, len);
857 void read_extent_buffer(struct extent_buffer *eb, void *dst,
858 unsigned long start, unsigned long len)
860 memcpy(dst, eb->data + start, len);
863 void write_extent_buffer(struct extent_buffer *eb, const void *src,
864 unsigned long start, unsigned long len)
866 memcpy(eb->data + start, src, len);
869 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
870 unsigned long dst_offset, unsigned long src_offset,
871 unsigned long len)
873 memcpy(dst->data + dst_offset, src->data + src_offset, len);
876 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
877 unsigned long src_offset, unsigned long len)
879 memmove(dst->data + dst_offset, dst->data + src_offset, len);
882 void memset_extent_buffer(struct extent_buffer *eb, char c,
883 unsigned long start, unsigned long len)
885 memset(eb->data + start, c, len);
888 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
889 unsigned long nr)
891 return test_bit(nr, (unsigned long *)(eb->data + start));