Recow all roots at the end of mkfs
[btrfs-progs-unstable.git] / extent_io.c
blob9071644db4532ae7ccb030ac9e4219d69e40218d
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #define _XOPEN_SOURCE 600
19 #define __USE_XOPEN2K
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include "kerncompat.h"
27 #include "extent_io.h"
28 #include "list.h"
30 u64 cache_max = 1024 * 1024 * 32;
32 void extent_io_tree_init(struct extent_io_tree *tree)
34 cache_tree_init(&tree->state);
35 cache_tree_init(&tree->cache);
36 INIT_LIST_HEAD(&tree->lru);
37 tree->cache_size = 0;
40 static struct extent_state *alloc_extent_state(void)
42 struct extent_state *state;
44 state = malloc(sizeof(*state));
45 if (!state)
46 return NULL;
47 state->refs = 1;
48 state->state = 0;
49 state->private = 0;
50 return state;
53 static void free_extent_state(struct extent_state *state)
55 state->refs--;
56 BUG_ON(state->refs < 0);
57 if (state->refs == 0)
58 free(state);
61 void extent_io_tree_cleanup(struct extent_io_tree *tree)
63 struct extent_state *es;
64 struct extent_buffer *eb;
65 struct cache_extent *cache;
67 while(!list_empty(&tree->lru)) {
68 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
69 if (eb->refs != 1) {
70 fprintf(stderr, "extent buffer leak: "
71 "start %llu len %u\n",
72 (unsigned long long)eb->start, eb->len);
73 eb->refs = 1;
75 free_extent_buffer(eb);
77 while (1) {
78 cache = find_first_cache_extent(&tree->state, 0);
79 if (!cache)
80 break;
81 es = container_of(cache, struct extent_state, cache_node);
82 remove_cache_extent(&tree->state, &es->cache_node);
83 free_extent_state(es);
87 static inline void update_extent_state(struct extent_state *state)
89 state->cache_node.start = state->start;
90 state->cache_node.size = state->end + 1 - state->start;
94 * Utility function to look for merge candidates inside a given range.
95 * Any extents with matching state are merged together into a single
96 * extent in the tree. Extents with EXTENT_IO in their state field are
97 * not merged
99 static int merge_state(struct extent_io_tree *tree,
100 struct extent_state *state)
102 struct extent_state *other;
103 struct cache_extent *other_node;
105 if (state->state & EXTENT_IOBITS)
106 return 0;
108 other_node = prev_cache_extent(&state->cache_node);
109 if (other_node) {
110 other = container_of(other_node, struct extent_state,
111 cache_node);
112 if (other->end == state->start - 1 &&
113 other->state == state->state) {
114 state->start = other->start;
115 update_extent_state(state);
116 remove_cache_extent(&tree->state, &other->cache_node);
117 free_extent_state(other);
120 other_node = next_cache_extent(&state->cache_node);
121 if (other_node) {
122 other = container_of(other_node, struct extent_state,
123 cache_node);
124 if (other->start == state->end + 1 &&
125 other->state == state->state) {
126 other->start = state->start;
127 update_extent_state(other);
128 remove_cache_extent(&tree->state, &state->cache_node);
129 free_extent_state(state);
132 return 0;
136 * insert an extent_state struct into the tree. 'bits' are set on the
137 * struct before it is inserted.
139 static int insert_state(struct extent_io_tree *tree,
140 struct extent_state *state, u64 start, u64 end,
141 int bits)
143 int ret;
145 BUG_ON(end < start);
146 state->state |= bits;
147 state->start = start;
148 state->end = end;
149 update_extent_state(state);
150 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
151 BUG_ON(ret);
152 merge_state(tree, state);
153 return 0;
157 * split a given extent state struct in two, inserting the preallocated
158 * struct 'prealloc' as the newly created second half. 'split' indicates an
159 * offset inside 'orig' where it should be split.
161 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
162 struct extent_state *prealloc, u64 split)
164 int ret;
165 prealloc->start = orig->start;
166 prealloc->end = split - 1;
167 prealloc->state = orig->state;
168 update_extent_state(prealloc);
169 orig->start = split;
170 update_extent_state(orig);
171 ret = insert_existing_cache_extent(&tree->state,
172 &prealloc->cache_node);
173 BUG_ON(ret);
174 return 0;
178 * clear some bits on a range in the tree.
180 static int clear_state_bit(struct extent_io_tree *tree,
181 struct extent_state *state, int bits)
183 int ret = state->state & bits;
185 state->state &= ~bits;
186 if (state->state == 0) {
187 remove_cache_extent(&tree->state, &state->cache_node);
188 free_extent_state(state);
189 } else {
190 merge_state(tree, state);
192 return ret;
196 * set some bits on a range in the tree.
198 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
199 u64 end, int bits, gfp_t mask)
201 struct extent_state *state;
202 struct extent_state *prealloc = NULL;
203 struct cache_extent *node;
204 int err;
205 int set = 0;
207 again:
208 prealloc = alloc_extent_state();
209 if (!prealloc)
210 return -ENOMEM;
213 * this search will find the extents that end after
214 * our range starts
216 node = find_first_cache_extent(&tree->state, start);
217 if (!node)
218 goto out;
219 state = container_of(node, struct extent_state, cache_node);
220 if (state->start > end)
221 goto out;
224 * | ---- desired range ---- |
225 * | state | or
226 * | ------------- state -------------- |
228 * We need to split the extent we found, and may flip
229 * bits on second half.
231 * If the extent we found extends past our range, we
232 * just split and search again. It'll get split again
233 * the next time though.
235 * If the extent we found is inside our range, we clear
236 * the desired bit on it.
238 if (state->start < start) {
239 err = split_state(tree, state, prealloc, start);
240 BUG_ON(err == -EEXIST);
241 prealloc = NULL;
242 if (err)
243 goto out;
244 if (state->end <= end) {
245 start = state->end + 1;
246 set |= clear_state_bit(tree, state, bits);
247 } else {
248 start = state->start;
250 goto search_again;
253 * | ---- desired range ---- |
254 * | state |
255 * We need to split the extent, and clear the bit
256 * on the first half
258 if (state->start <= end && state->end > end) {
259 err = split_state(tree, state, prealloc, end + 1);
260 BUG_ON(err == -EEXIST);
262 set |= clear_state_bit(tree, prealloc, bits);
263 prealloc = NULL;
264 goto out;
267 start = state->end + 1;
268 set |= clear_state_bit(tree, state, bits);
269 goto search_again;
270 out:
271 if (prealloc)
272 free_extent_state(prealloc);
273 return set;
275 search_again:
276 if (start > end)
277 goto out;
278 goto again;
282 * set some bits on a range in the tree.
284 int set_extent_bits(struct extent_io_tree *tree, u64 start,
285 u64 end, int bits, gfp_t mask)
287 struct extent_state *state;
288 struct extent_state *prealloc = NULL;
289 struct cache_extent *node;
290 int err = 0;
291 int set;
292 u64 last_start;
293 u64 last_end;
294 again:
295 prealloc = alloc_extent_state();
296 if (!prealloc)
297 return -ENOMEM;
300 * this search will find the extents that end after
301 * our range starts
303 node = find_first_cache_extent(&tree->state, start);
304 if (!node) {
305 err = insert_state(tree, prealloc, start, end, bits);
306 BUG_ON(err == -EEXIST);
307 prealloc = NULL;
308 goto out;
311 state = container_of(node, struct extent_state, cache_node);
312 last_start = state->start;
313 last_end = state->end;
316 * | ---- desired range ---- |
317 * | state |
319 * Just lock what we found and keep going
321 if (state->start == start && state->end <= end) {
322 set = state->state & bits;
323 state->state |= bits;
324 start = state->end + 1;
325 merge_state(tree, state);
326 goto search_again;
329 * | ---- desired range ---- |
330 * | state |
331 * or
332 * | ------------- state -------------- |
334 * We need to split the extent we found, and may flip bits on
335 * second half.
337 * If the extent we found extends past our
338 * range, we just split and search again. It'll get split
339 * again the next time though.
341 * If the extent we found is inside our range, we set the
342 * desired bit on it.
344 if (state->start < start) {
345 set = state->state & bits;
346 err = split_state(tree, state, prealloc, start);
347 BUG_ON(err == -EEXIST);
348 prealloc = NULL;
349 if (err)
350 goto out;
351 if (state->end <= end) {
352 state->state |= bits;
353 start = state->end + 1;
354 merge_state(tree, state);
355 } else {
356 start = state->start;
358 goto search_again;
361 * | ---- desired range ---- |
362 * | state | or | state |
364 * There's a hole, we need to insert something in it and
365 * ignore the extent we found.
367 if (state->start > start) {
368 u64 this_end;
369 if (end < last_start)
370 this_end = end;
371 else
372 this_end = last_start -1;
373 err = insert_state(tree, prealloc, start, this_end,
374 bits);
375 BUG_ON(err == -EEXIST);
376 prealloc = NULL;
377 if (err)
378 goto out;
379 start = this_end + 1;
380 goto search_again;
383 * | ---- desired range ---- |
384 * | ---------- state ---------- |
385 * We need to split the extent, and set the bit
386 * on the first half
388 set = state->state & bits;
389 err = split_state(tree, state, prealloc, end + 1);
390 BUG_ON(err == -EEXIST);
392 state->state |= bits;
393 merge_state(tree, prealloc);
394 prealloc = NULL;
395 out:
396 if (prealloc)
397 free_extent_state(prealloc);
398 return err;
399 search_again:
400 if (start > end)
401 goto out;
402 goto again;
405 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
406 gfp_t mask)
408 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
411 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
412 gfp_t mask)
414 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
417 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
418 u64 *start_ret, u64 *end_ret, int bits)
420 struct cache_extent *node;
421 struct extent_state *state;
422 int ret = 1;
425 * this search will find all the extents that end after
426 * our range starts.
428 node = find_first_cache_extent(&tree->state, start);
429 if (!node)
430 goto out;
432 while(1) {
433 state = container_of(node, struct extent_state, cache_node);
434 if (state->end >= start && (state->state & bits)) {
435 *start_ret = state->start;
436 *end_ret = state->end;
437 ret = 0;
438 break;
440 node = next_cache_extent(node);
441 if (!node)
442 break;
444 out:
445 return ret;
448 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
449 int bits, int filled)
451 struct extent_state *state = NULL;
452 struct cache_extent *node;
453 int bitset = 0;
455 node = find_first_cache_extent(&tree->state, start);
456 while (node && start <= end) {
457 state = container_of(node, struct extent_state, cache_node);
459 if (filled && state->start > start) {
460 bitset = 0;
461 break;
463 if (state->start > end)
464 break;
465 if (state->state & bits) {
466 bitset = 1;
467 if (!filled)
468 break;
469 } else if (filled) {
470 bitset = 0;
471 break;
473 start = state->end + 1;
474 if (start > end)
475 break;
476 node = next_cache_extent(node);
477 if (!node) {
478 if (filled)
479 bitset = 0;
480 break;
483 return bitset;
486 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
488 struct cache_extent *node;
489 struct extent_state *state;
490 int ret = 0;
492 node = find_first_cache_extent(&tree->state, start);
493 if (!node) {
494 ret = -ENOENT;
495 goto out;
497 state = container_of(node, struct extent_state, cache_node);
498 if (state->start != start) {
499 ret = -ENOENT;
500 goto out;
502 state->private = private;
503 out:
504 return ret;
507 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
509 struct cache_extent *node;
510 struct extent_state *state;
511 int ret = 0;
513 node = find_first_cache_extent(&tree->state, start);
514 if (!node) {
515 ret = -ENOENT;
516 goto out;
518 state = container_of(node, struct extent_state, cache_node);
519 if (state->start != start) {
520 ret = -ENOENT;
521 goto out;
523 *private = state->private;
524 out:
525 return ret;
528 static int free_some_buffers(struct extent_io_tree *tree)
530 u32 nrscan = 0;
531 struct extent_buffer *eb;
532 struct list_head *node, *next;
534 if (tree->cache_size < cache_max)
535 return 0;
536 list_for_each_safe(node, next, &tree->lru) {
537 eb = list_entry(node, struct extent_buffer, lru);
538 if (eb->refs == 1) {
539 free_extent_buffer(eb);
540 if (tree->cache_size < cache_max)
541 break;
543 if (nrscan++ > 64)
544 break;
546 return 0;
549 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
550 u64 bytenr, u32 blocksize)
552 struct extent_buffer *eb;
553 int ret;
555 eb = malloc(sizeof(struct extent_buffer) + blocksize);
556 if (!eb) {
557 BUG();
558 return NULL;
561 eb->start = bytenr;
562 eb->len = blocksize;
563 eb->refs = 2;
564 eb->flags = 0;
565 eb->tree = tree;
566 eb->fd = -1;
567 eb->dev_bytenr = (u64)-1;
568 eb->cache_node.start = bytenr;
569 eb->cache_node.size = blocksize;
571 free_some_buffers(tree);
572 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
573 if (ret) {
574 free(eb);
575 return NULL;
577 list_add_tail(&eb->lru, &tree->lru);
578 tree->cache_size += blocksize;
579 return eb;
582 void free_extent_buffer(struct extent_buffer *eb)
584 if (!eb)
585 return;
587 eb->refs--;
588 BUG_ON(eb->refs < 0);
589 if (eb->refs == 0) {
590 struct extent_io_tree *tree = eb->tree;
591 BUG_ON(eb->flags & EXTENT_DIRTY);
592 list_del_init(&eb->lru);
593 remove_cache_extent(&tree->cache, &eb->cache_node);
594 BUG_ON(tree->cache_size < eb->len);
595 tree->cache_size -= eb->len;
596 free(eb);
600 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
601 u64 bytenr, u32 blocksize)
603 struct extent_buffer *eb = NULL;
604 struct cache_extent *cache;
606 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
607 if (cache && cache->start == bytenr && cache->size == blocksize) {
608 eb = container_of(cache, struct extent_buffer, cache_node);
609 list_move_tail(&eb->lru, &tree->lru);
610 eb->refs++;
612 return eb;
615 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
616 u64 start)
618 struct extent_buffer *eb = NULL;
619 struct cache_extent *cache;
621 cache = find_first_cache_extent(&tree->cache, start);
622 if (cache) {
623 eb = container_of(cache, struct extent_buffer, cache_node);
624 list_move_tail(&eb->lru, &tree->lru);
625 eb->refs++;
627 return eb;
630 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
631 u64 bytenr, u32 blocksize)
633 struct extent_buffer *eb;
634 struct cache_extent *cache;
636 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
637 if (cache && cache->start == bytenr && cache->size == blocksize) {
638 eb = container_of(cache, struct extent_buffer, cache_node);
639 list_move_tail(&eb->lru, &tree->lru);
640 eb->refs++;
641 } else {
642 if (cache) {
643 eb = container_of(cache, struct extent_buffer,
644 cache_node);
645 BUG_ON(eb->refs != 1);
646 free_extent_buffer(eb);
648 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
650 return eb;
653 int read_extent_from_disk(struct extent_buffer *eb)
655 int ret;
656 ret = pread(eb->fd, eb->data, eb->len, eb->dev_bytenr);
657 if (ret < 0)
658 goto out;
659 if (ret != eb->len) {
660 ret = -EIO;
661 goto out;
663 ret = 0;
664 out:
665 return ret;
668 int write_extent_to_disk(struct extent_buffer *eb)
670 int ret;
671 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
672 if (ret < 0)
673 goto out;
674 if (ret != eb->len) {
675 ret = -EIO;
676 goto out;
678 ret = 0;
679 out:
680 return ret;
683 int set_extent_buffer_uptodate(struct extent_buffer *eb)
685 eb->flags |= EXTENT_UPTODATE;
686 return 0;
689 int extent_buffer_uptodate(struct extent_buffer *eb)
691 if (eb->flags & EXTENT_UPTODATE)
692 return 1;
693 return 0;
696 int set_extent_buffer_dirty(struct extent_buffer *eb)
698 struct extent_io_tree *tree = eb->tree;
699 if (!(eb->flags & EXTENT_DIRTY)) {
700 eb->flags |= EXTENT_DIRTY;
701 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
702 extent_buffer_get(eb);
704 return 0;
707 int clear_extent_buffer_dirty(struct extent_buffer *eb)
709 struct extent_io_tree *tree = eb->tree;
710 if (eb->flags & EXTENT_DIRTY) {
711 eb->flags &= ~EXTENT_DIRTY;
712 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
713 free_extent_buffer(eb);
715 return 0;
718 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
719 unsigned long start, unsigned long len)
721 return memcmp(eb->data + start, ptrv, len);
724 void read_extent_buffer(struct extent_buffer *eb, void *dst,
725 unsigned long start, unsigned long len)
727 memcpy(dst, eb->data + start, len);
730 void write_extent_buffer(struct extent_buffer *eb, const void *src,
731 unsigned long start, unsigned long len)
733 memcpy(eb->data + start, src, len);
736 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
737 unsigned long dst_offset, unsigned long src_offset,
738 unsigned long len)
740 memcpy(dst->data + dst_offset, src->data + src_offset, len);
743 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
744 unsigned long src_offset, unsigned long len)
746 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
749 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
750 unsigned long src_offset, unsigned long len)
752 memmove(dst->data + dst_offset, dst->data + src_offset, len);
755 void memset_extent_buffer(struct extent_buffer *eb, char c,
756 unsigned long start, unsigned long len)
758 memset(eb->data + start, c, len);