btrfs-progs: move prefixcmp helper to utils
[btrfs-progs-unstable/devel.git] / extent_io.c
blob7b9eb8eff2e97056273ec0d0f7f1371a91b3aecd
2 /*
3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include "kerncompat.h"
26 #include "extent_io.h"
27 #include "list.h"
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "internal.h"
32 void extent_io_tree_init(struct extent_io_tree *tree)
34 cache_tree_init(&tree->state);
35 cache_tree_init(&tree->cache);
36 INIT_LIST_HEAD(&tree->lru);
37 tree->cache_size = 0;
40 static struct extent_state *alloc_extent_state(void)
42 struct extent_state *state;
44 state = malloc(sizeof(*state));
45 if (!state)
46 return NULL;
47 state->cache_node.objectid = 0;
48 state->refs = 1;
49 state->state = 0;
50 state->xprivate = 0;
51 return state;
54 static void btrfs_free_extent_state(struct extent_state *state)
56 state->refs--;
57 BUG_ON(state->refs < 0);
58 if (state->refs == 0)
59 free(state);
62 static void free_extent_state_func(struct cache_extent *cache)
64 struct extent_state *es;
66 es = container_of(cache, struct extent_state, cache_node);
67 btrfs_free_extent_state(es);
70 void extent_io_tree_cleanup(struct extent_io_tree *tree)
72 struct extent_buffer *eb;
74 while(!list_empty(&tree->lru)) {
75 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
76 fprintf(stderr, "extent buffer leak: "
77 "start %llu len %u\n",
78 (unsigned long long)eb->start, eb->len);
79 free_extent_buffer(eb);
82 cache_tree_free_extents(&tree->state, free_extent_state_func);
85 static inline void update_extent_state(struct extent_state *state)
87 state->cache_node.start = state->start;
88 state->cache_node.size = state->end + 1 - state->start;
92 * Utility function to look for merge candidates inside a given range.
93 * Any extents with matching state are merged together into a single
94 * extent in the tree. Extents with EXTENT_IO in their state field are
95 * not merged
97 static int merge_state(struct extent_io_tree *tree,
98 struct extent_state *state)
100 struct extent_state *other;
101 struct cache_extent *other_node;
103 if (state->state & EXTENT_IOBITS)
104 return 0;
106 other_node = prev_cache_extent(&state->cache_node);
107 if (other_node) {
108 other = container_of(other_node, struct extent_state,
109 cache_node);
110 if (other->end == state->start - 1 &&
111 other->state == state->state) {
112 state->start = other->start;
113 update_extent_state(state);
114 remove_cache_extent(&tree->state, &other->cache_node);
115 btrfs_free_extent_state(other);
118 other_node = next_cache_extent(&state->cache_node);
119 if (other_node) {
120 other = container_of(other_node, struct extent_state,
121 cache_node);
122 if (other->start == state->end + 1 &&
123 other->state == state->state) {
124 other->start = state->start;
125 update_extent_state(other);
126 remove_cache_extent(&tree->state, &state->cache_node);
127 btrfs_free_extent_state(state);
130 return 0;
134 * insert an extent_state struct into the tree. 'bits' are set on the
135 * struct before it is inserted.
137 static int insert_state(struct extent_io_tree *tree,
138 struct extent_state *state, u64 start, u64 end,
139 int bits)
141 int ret;
143 BUG_ON(end < start);
144 state->state |= bits;
145 state->start = start;
146 state->end = end;
147 update_extent_state(state);
148 ret = insert_cache_extent(&tree->state, &state->cache_node);
149 BUG_ON(ret);
150 merge_state(tree, state);
151 return 0;
155 * split a given extent state struct in two, inserting the preallocated
156 * struct 'prealloc' as the newly created second half. 'split' indicates an
157 * offset inside 'orig' where it should be split.
159 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
160 struct extent_state *prealloc, u64 split)
162 int ret;
163 prealloc->start = orig->start;
164 prealloc->end = split - 1;
165 prealloc->state = orig->state;
166 update_extent_state(prealloc);
167 orig->start = split;
168 update_extent_state(orig);
169 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
170 BUG_ON(ret);
171 return 0;
175 * clear some bits on a range in the tree.
177 static int clear_state_bit(struct extent_io_tree *tree,
178 struct extent_state *state, int bits)
180 int ret = state->state & bits;
182 state->state &= ~bits;
183 if (state->state == 0) {
184 remove_cache_extent(&tree->state, &state->cache_node);
185 btrfs_free_extent_state(state);
186 } else {
187 merge_state(tree, state);
189 return ret;
193 * clear some bits on a range in the tree.
195 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
196 u64 end, int bits, gfp_t mask)
198 struct extent_state *state;
199 struct extent_state *prealloc = NULL;
200 struct cache_extent *node;
201 u64 last_end;
202 int err;
203 int set = 0;
205 again:
206 if (!prealloc) {
207 prealloc = alloc_extent_state();
208 if (!prealloc)
209 return -ENOMEM;
213 * this search will find the extents that end after
214 * our range starts
216 node = search_cache_extent(&tree->state, start);
217 if (!node)
218 goto out;
219 state = container_of(node, struct extent_state, cache_node);
220 if (state->start > end)
221 goto out;
222 last_end = state->end;
225 * | ---- desired range ---- |
226 * | state | or
227 * | ------------- state -------------- |
229 * We need to split the extent we found, and may flip
230 * bits on second half.
232 * If the extent we found extends past our range, we
233 * just split and search again. It'll get split again
234 * the next time though.
236 * If the extent we found is inside our range, we clear
237 * the desired bit on it.
239 if (state->start < start) {
240 err = split_state(tree, state, prealloc, start);
241 BUG_ON(err == -EEXIST);
242 prealloc = NULL;
243 if (err)
244 goto out;
245 if (state->end <= end) {
246 set |= clear_state_bit(tree, state, bits);
247 if (last_end == (u64)-1)
248 goto out;
249 start = last_end + 1;
250 } else {
251 start = state->start;
253 goto search_again;
256 * | ---- desired range ---- |
257 * | state |
258 * We need to split the extent, and clear the bit
259 * on the first half
261 if (state->start <= end && state->end > end) {
262 err = split_state(tree, state, prealloc, end + 1);
263 BUG_ON(err == -EEXIST);
265 set |= clear_state_bit(tree, prealloc, bits);
266 prealloc = NULL;
267 goto out;
270 start = state->end + 1;
271 set |= clear_state_bit(tree, state, bits);
272 if (last_end == (u64)-1)
273 goto out;
274 start = last_end + 1;
275 goto search_again;
276 out:
277 if (prealloc)
278 btrfs_free_extent_state(prealloc);
279 return set;
281 search_again:
282 if (start > end)
283 goto out;
284 goto again;
288 * set some bits on a range in the tree.
290 int set_extent_bits(struct extent_io_tree *tree, u64 start,
291 u64 end, int bits, gfp_t mask)
293 struct extent_state *state;
294 struct extent_state *prealloc = NULL;
295 struct cache_extent *node;
296 int err = 0;
297 u64 last_start;
298 u64 last_end;
299 again:
300 if (!prealloc) {
301 prealloc = alloc_extent_state();
302 if (!prealloc)
303 return -ENOMEM;
307 * this search will find the extents that end after
308 * our range starts
310 node = search_cache_extent(&tree->state, start);
311 if (!node) {
312 err = insert_state(tree, prealloc, start, end, bits);
313 BUG_ON(err == -EEXIST);
314 prealloc = NULL;
315 goto out;
318 state = container_of(node, struct extent_state, cache_node);
319 last_start = state->start;
320 last_end = state->end;
323 * | ---- desired range ---- |
324 * | state |
326 * Just lock what we found and keep going
328 if (state->start == start && state->end <= end) {
329 state->state |= bits;
330 merge_state(tree, state);
331 if (last_end == (u64)-1)
332 goto out;
333 start = last_end + 1;
334 goto search_again;
337 * | ---- desired range ---- |
338 * | state |
339 * or
340 * | ------------- state -------------- |
342 * We need to split the extent we found, and may flip bits on
343 * second half.
345 * If the extent we found extends past our
346 * range, we just split and search again. It'll get split
347 * again the next time though.
349 * If the extent we found is inside our range, we set the
350 * desired bit on it.
352 if (state->start < start) {
353 err = split_state(tree, state, prealloc, start);
354 BUG_ON(err == -EEXIST);
355 prealloc = NULL;
356 if (err)
357 goto out;
358 if (state->end <= end) {
359 state->state |= bits;
360 start = state->end + 1;
361 merge_state(tree, state);
362 if (last_end == (u64)-1)
363 goto out;
364 start = last_end + 1;
365 } else {
366 start = state->start;
368 goto search_again;
371 * | ---- desired range ---- |
372 * | state | or | state |
374 * There's a hole, we need to insert something in it and
375 * ignore the extent we found.
377 if (state->start > start) {
378 u64 this_end;
379 if (end < last_start)
380 this_end = end;
381 else
382 this_end = last_start -1;
383 err = insert_state(tree, prealloc, start, this_end,
384 bits);
385 BUG_ON(err == -EEXIST);
386 prealloc = NULL;
387 if (err)
388 goto out;
389 start = this_end + 1;
390 goto search_again;
393 * | ---- desired range ---- |
394 * | ---------- state ---------- |
395 * We need to split the extent, and set the bit
396 * on the first half
398 err = split_state(tree, state, prealloc, end + 1);
399 BUG_ON(err == -EEXIST);
401 state->state |= bits;
402 merge_state(tree, prealloc);
403 prealloc = NULL;
404 out:
405 if (prealloc)
406 btrfs_free_extent_state(prealloc);
407 return err;
408 search_again:
409 if (start > end)
410 goto out;
411 goto again;
414 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
415 gfp_t mask)
417 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
420 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
421 gfp_t mask)
423 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
426 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
427 u64 *start_ret, u64 *end_ret, int bits)
429 struct cache_extent *node;
430 struct extent_state *state;
431 int ret = 1;
434 * this search will find all the extents that end after
435 * our range starts.
437 node = search_cache_extent(&tree->state, start);
438 if (!node)
439 goto out;
441 while(1) {
442 state = container_of(node, struct extent_state, cache_node);
443 if (state->end >= start && (state->state & bits)) {
444 *start_ret = state->start;
445 *end_ret = state->end;
446 ret = 0;
447 break;
449 node = next_cache_extent(node);
450 if (!node)
451 break;
453 out:
454 return ret;
457 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
458 int bits, int filled)
460 struct extent_state *state = NULL;
461 struct cache_extent *node;
462 int bitset = 0;
464 node = search_cache_extent(&tree->state, start);
465 while (node && start <= end) {
466 state = container_of(node, struct extent_state, cache_node);
468 if (filled && state->start > start) {
469 bitset = 0;
470 break;
472 if (state->start > end)
473 break;
474 if (state->state & bits) {
475 bitset = 1;
476 if (!filled)
477 break;
478 } else if (filled) {
479 bitset = 0;
480 break;
482 start = state->end + 1;
483 if (start > end)
484 break;
485 node = next_cache_extent(node);
486 if (!node) {
487 if (filled)
488 bitset = 0;
489 break;
492 return bitset;
495 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
497 struct cache_extent *node;
498 struct extent_state *state;
499 int ret = 0;
501 node = search_cache_extent(&tree->state, start);
502 if (!node) {
503 ret = -ENOENT;
504 goto out;
506 state = container_of(node, struct extent_state, cache_node);
507 if (state->start != start) {
508 ret = -ENOENT;
509 goto out;
511 state->xprivate = private;
512 out:
513 return ret;
516 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
518 struct cache_extent *node;
519 struct extent_state *state;
520 int ret = 0;
522 node = search_cache_extent(&tree->state, start);
523 if (!node) {
524 ret = -ENOENT;
525 goto out;
527 state = container_of(node, struct extent_state, cache_node);
528 if (state->start != start) {
529 ret = -ENOENT;
530 goto out;
532 *private = state->xprivate;
533 out:
534 return ret;
537 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
538 u64 bytenr, u32 blocksize)
540 struct extent_buffer *eb;
542 eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
543 if (!eb)
544 return NULL;
546 eb->start = bytenr;
547 eb->len = blocksize;
548 eb->refs = 1;
549 eb->flags = 0;
550 eb->tree = tree;
551 eb->fd = -1;
552 eb->dev_bytenr = (u64)-1;
553 eb->cache_node.start = bytenr;
554 eb->cache_node.size = blocksize;
555 INIT_LIST_HEAD(&eb->recow);
557 return eb;
560 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
562 struct extent_buffer *new;
564 new = __alloc_extent_buffer(NULL, src->start, src->len);
565 if (!new)
566 return NULL;
568 copy_extent_buffer(new, src, 0, 0, src->len);
569 new->flags |= EXTENT_BUFFER_DUMMY;
571 return new;
574 void free_extent_buffer(struct extent_buffer *eb)
576 if (!eb || IS_ERR(eb))
577 return;
579 eb->refs--;
580 BUG_ON(eb->refs < 0);
581 if (eb->refs == 0) {
582 struct extent_io_tree *tree = eb->tree;
583 BUG_ON(eb->flags & EXTENT_DIRTY);
584 list_del_init(&eb->lru);
585 list_del_init(&eb->recow);
586 if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
587 BUG_ON(tree->cache_size < eb->len);
588 remove_cache_extent(&tree->cache, &eb->cache_node);
589 tree->cache_size -= eb->len;
591 free(eb);
595 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
596 u64 bytenr, u32 blocksize)
598 struct extent_buffer *eb = NULL;
599 struct cache_extent *cache;
601 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
602 if (cache && cache->start == bytenr &&
603 cache->size == blocksize) {
604 eb = container_of(cache, struct extent_buffer, cache_node);
605 list_move_tail(&eb->lru, &tree->lru);
606 eb->refs++;
608 return eb;
611 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
612 u64 start)
614 struct extent_buffer *eb = NULL;
615 struct cache_extent *cache;
617 cache = search_cache_extent(&tree->cache, start);
618 if (cache) {
619 eb = container_of(cache, struct extent_buffer, cache_node);
620 list_move_tail(&eb->lru, &tree->lru);
621 eb->refs++;
623 return eb;
626 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
627 u64 bytenr, u32 blocksize)
629 struct extent_buffer *eb;
630 struct cache_extent *cache;
632 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
633 if (cache && cache->start == bytenr &&
634 cache->size == blocksize) {
635 eb = container_of(cache, struct extent_buffer, cache_node);
636 list_move_tail(&eb->lru, &tree->lru);
637 eb->refs++;
638 } else {
639 int ret;
641 if (cache) {
642 eb = container_of(cache, struct extent_buffer,
643 cache_node);
644 free_extent_buffer(eb);
646 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
647 if (!eb)
648 return NULL;
649 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
650 if (ret) {
651 free(eb);
652 return NULL;
654 list_add_tail(&eb->lru, &tree->lru);
655 tree->cache_size += blocksize;
657 return eb;
660 int read_extent_from_disk(struct extent_buffer *eb,
661 unsigned long offset, unsigned long len)
663 int ret;
664 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
665 if (ret < 0) {
666 ret = -errno;
667 goto out;
669 if (ret != len) {
670 ret = -EIO;
671 goto out;
673 ret = 0;
674 out:
675 return ret;
678 int write_extent_to_disk(struct extent_buffer *eb)
680 int ret;
681 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
682 if (ret < 0)
683 goto out;
684 if (ret != eb->len) {
685 ret = -EIO;
686 goto out;
688 ret = 0;
689 out:
690 return ret;
693 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
694 u64 bytes, int mirror)
696 struct btrfs_multi_bio *multi = NULL;
697 struct btrfs_device *device;
698 u64 bytes_left = bytes;
699 u64 read_len;
700 u64 total_read = 0;
701 int ret;
703 while (bytes_left) {
704 read_len = bytes_left;
705 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
706 &read_len, &multi, mirror, NULL);
707 if (ret) {
708 fprintf(stderr, "Couldn't map the block %Lu\n",
709 offset);
710 return -EIO;
712 device = multi->stripes[0].dev;
714 read_len = min(bytes_left, read_len);
715 if (device->fd <= 0) {
716 kfree(multi);
717 return -EIO;
720 ret = pread(device->fd, buf + total_read, read_len,
721 multi->stripes[0].physical);
722 kfree(multi);
723 if (ret < 0) {
724 fprintf(stderr, "Error reading %Lu, %d\n", offset,
725 ret);
726 return ret;
728 if (ret != read_len) {
729 fprintf(stderr, "Short read for %Lu, read %d, "
730 "read_len %Lu\n", offset, ret, read_len);
731 return -EIO;
734 bytes_left -= read_len;
735 offset += read_len;
736 total_read += read_len;
739 return 0;
742 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
743 u64 bytes, int mirror)
745 struct btrfs_multi_bio *multi = NULL;
746 struct btrfs_device *device;
747 u64 bytes_left = bytes;
748 u64 this_len;
749 u64 total_write = 0;
750 u64 *raid_map = NULL;
751 u64 dev_bytenr;
752 int dev_nr;
753 int ret = 0;
755 while (bytes_left > 0) {
756 this_len = bytes_left;
757 dev_nr = 0;
759 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
760 &this_len, &multi, mirror, &raid_map);
761 if (ret) {
762 fprintf(stderr, "Couldn't map the block %Lu\n",
763 offset);
764 return -EIO;
767 if (raid_map) {
768 struct extent_buffer *eb;
769 u64 stripe_len = this_len;
771 this_len = min(this_len, bytes_left);
772 this_len = min(this_len, (u64)info->tree_root->nodesize);
774 eb = malloc(sizeof(struct extent_buffer) + this_len);
775 if (!eb) {
776 fprintf(stderr, "cannot allocate memory for eb\n");
777 ret = -ENOMEM;
778 goto out;
781 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
782 eb->start = offset;
783 eb->len = this_len;
785 memcpy(eb->data, buf + total_write, this_len);
786 ret = write_raid56_with_parity(info, eb, multi,
787 stripe_len, raid_map);
788 BUG_ON(ret);
790 free(eb);
791 kfree(raid_map);
792 raid_map = NULL;
793 } else while (dev_nr < multi->num_stripes) {
794 device = multi->stripes[dev_nr].dev;
795 if (device->fd <= 0) {
796 kfree(multi);
797 return -EIO;
800 dev_bytenr = multi->stripes[dev_nr].physical;
801 this_len = min(this_len, bytes_left);
802 dev_nr++;
804 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
805 if (ret != this_len) {
806 if (ret < 0) {
807 fprintf(stderr, "Error writing to "
808 "device %d\n", errno);
809 ret = errno;
810 kfree(multi);
811 return ret;
812 } else {
813 fprintf(stderr, "Short write\n");
814 kfree(multi);
815 return -EIO;
820 BUG_ON(bytes_left < this_len);
822 bytes_left -= this_len;
823 offset += this_len;
824 total_write += this_len;
826 kfree(multi);
827 multi = NULL;
829 return 0;
831 out:
832 kfree(raid_map);
833 return ret;
836 int set_extent_buffer_dirty(struct extent_buffer *eb)
838 struct extent_io_tree *tree = eb->tree;
839 if (!(eb->flags & EXTENT_DIRTY)) {
840 eb->flags |= EXTENT_DIRTY;
841 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
842 extent_buffer_get(eb);
844 return 0;
847 int clear_extent_buffer_dirty(struct extent_buffer *eb)
849 struct extent_io_tree *tree = eb->tree;
850 if (eb->flags & EXTENT_DIRTY) {
851 eb->flags &= ~EXTENT_DIRTY;
852 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
853 free_extent_buffer(eb);
855 return 0;
858 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
859 unsigned long start, unsigned long len)
861 return memcmp(eb->data + start, ptrv, len);
864 void read_extent_buffer(struct extent_buffer *eb, void *dst,
865 unsigned long start, unsigned long len)
867 memcpy(dst, eb->data + start, len);
870 void write_extent_buffer(struct extent_buffer *eb, const void *src,
871 unsigned long start, unsigned long len)
873 memcpy(eb->data + start, src, len);
876 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
877 unsigned long dst_offset, unsigned long src_offset,
878 unsigned long len)
880 memcpy(dst->data + dst_offset, src->data + src_offset, len);
883 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
884 unsigned long src_offset, unsigned long len)
886 memmove(dst->data + dst_offset, dst->data + src_offset, len);
889 void memset_extent_buffer(struct extent_buffer *eb, char c,
890 unsigned long start, unsigned long len)
892 memset(eb->data + start, c, len);
895 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
896 unsigned long nr)
898 return le_test_bit(nr, (u8 *)eb->data + start);