Fix inode link count checks in btrfsck
[btrfs-progs-unstable.git] / extent_io.c
blob069c199c7d9db2f6fea57a5886c7635acfc67028
2 /*
3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
20 #define __USE_XOPEN2K
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
29 #include "list.h"
31 u64 cache_max = 1024 * 1024 * 32;
33 void extent_io_tree_init(struct extent_io_tree *tree)
35 cache_tree_init(&tree->state);
36 cache_tree_init(&tree->cache);
37 INIT_LIST_HEAD(&tree->lru);
38 tree->cache_size = 0;
41 static struct extent_state *alloc_extent_state(void)
43 struct extent_state *state;
45 state = malloc(sizeof(*state));
46 if (!state)
47 return NULL;
48 state->refs = 1;
49 state->state = 0;
50 state->private = 0;
51 return state;
54 static void free_extent_state(struct extent_state *state)
56 state->refs--;
57 BUG_ON(state->refs < 0);
58 if (state->refs == 0)
59 free(state);
62 void extent_io_tree_cleanup(struct extent_io_tree *tree)
64 struct extent_state *es;
65 struct extent_buffer *eb;
66 struct cache_extent *cache;
68 while(!list_empty(&tree->lru)) {
69 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
70 if (eb->refs != 1) {
71 fprintf(stderr, "extent buffer leak: "
72 "start %llu len %u\n",
73 (unsigned long long)eb->start, eb->len);
74 eb->refs = 1;
76 free_extent_buffer(eb);
78 while (1) {
79 cache = find_first_cache_extent(&tree->state, 0);
80 if (!cache)
81 break;
82 es = container_of(cache, struct extent_state, cache_node);
83 remove_cache_extent(&tree->state, &es->cache_node);
84 free_extent_state(es);
88 static inline void update_extent_state(struct extent_state *state)
90 state->cache_node.start = state->start;
91 state->cache_node.size = state->end + 1 - state->start;
95 * Utility function to look for merge candidates inside a given range.
96 * Any extents with matching state are merged together into a single
97 * extent in the tree. Extents with EXTENT_IO in their state field are
98 * not merged
100 static int merge_state(struct extent_io_tree *tree,
101 struct extent_state *state)
103 struct extent_state *other;
104 struct cache_extent *other_node;
106 if (state->state & EXTENT_IOBITS)
107 return 0;
109 other_node = prev_cache_extent(&state->cache_node);
110 if (other_node) {
111 other = container_of(other_node, struct extent_state,
112 cache_node);
113 if (other->end == state->start - 1 &&
114 other->state == state->state) {
115 state->start = other->start;
116 update_extent_state(state);
117 remove_cache_extent(&tree->state, &other->cache_node);
118 free_extent_state(other);
121 other_node = next_cache_extent(&state->cache_node);
122 if (other_node) {
123 other = container_of(other_node, struct extent_state,
124 cache_node);
125 if (other->start == state->end + 1 &&
126 other->state == state->state) {
127 other->start = state->start;
128 update_extent_state(other);
129 remove_cache_extent(&tree->state, &state->cache_node);
130 free_extent_state(state);
133 return 0;
137 * insert an extent_state struct into the tree. 'bits' are set on the
138 * struct before it is inserted.
140 static int insert_state(struct extent_io_tree *tree,
141 struct extent_state *state, u64 start, u64 end,
142 int bits)
144 int ret;
146 BUG_ON(end < start);
147 state->state |= bits;
148 state->start = start;
149 state->end = end;
150 update_extent_state(state);
151 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
152 BUG_ON(ret);
153 merge_state(tree, state);
154 return 0;
158 * split a given extent state struct in two, inserting the preallocated
159 * struct 'prealloc' as the newly created second half. 'split' indicates an
160 * offset inside 'orig' where it should be split.
162 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
163 struct extent_state *prealloc, u64 split)
165 int ret;
166 prealloc->start = orig->start;
167 prealloc->end = split - 1;
168 prealloc->state = orig->state;
169 update_extent_state(prealloc);
170 orig->start = split;
171 update_extent_state(orig);
172 ret = insert_existing_cache_extent(&tree->state,
173 &prealloc->cache_node);
174 BUG_ON(ret);
175 return 0;
179 * clear some bits on a range in the tree.
181 static int clear_state_bit(struct extent_io_tree *tree,
182 struct extent_state *state, int bits)
184 int ret = state->state & bits;
186 state->state &= ~bits;
187 if (state->state == 0) {
188 remove_cache_extent(&tree->state, &state->cache_node);
189 free_extent_state(state);
190 } else {
191 merge_state(tree, state);
193 return ret;
197 * set some bits on a range in the tree.
199 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
200 u64 end, int bits, gfp_t mask)
202 struct extent_state *state;
203 struct extent_state *prealloc = NULL;
204 struct cache_extent *node;
205 u64 last_end;
206 int err;
207 int set = 0;
209 again:
210 prealloc = alloc_extent_state();
211 if (!prealloc)
212 return -ENOMEM;
215 * this search will find the extents that end after
216 * our range starts
218 node = find_first_cache_extent(&tree->state, start);
219 if (!node)
220 goto out;
221 state = container_of(node, struct extent_state, cache_node);
222 if (state->start > end)
223 goto out;
224 last_end = state->end;
227 * | ---- desired range ---- |
228 * | state | or
229 * | ------------- state -------------- |
231 * We need to split the extent we found, and may flip
232 * bits on second half.
234 * If the extent we found extends past our range, we
235 * just split and search again. It'll get split again
236 * the next time though.
238 * If the extent we found is inside our range, we clear
239 * the desired bit on it.
241 if (state->start < start) {
242 err = split_state(tree, state, prealloc, start);
243 BUG_ON(err == -EEXIST);
244 prealloc = NULL;
245 if (err)
246 goto out;
247 if (state->end <= end) {
248 set |= clear_state_bit(tree, state, bits);
249 if (last_end == (u64)-1)
250 goto out;
251 start = last_end + 1;
252 } else {
253 start = state->start;
255 goto search_again;
258 * | ---- desired range ---- |
259 * | state |
260 * We need to split the extent, and clear the bit
261 * on the first half
263 if (state->start <= end && state->end > end) {
264 err = split_state(tree, state, prealloc, end + 1);
265 BUG_ON(err == -EEXIST);
267 set |= clear_state_bit(tree, prealloc, bits);
268 prealloc = NULL;
269 goto out;
272 start = state->end + 1;
273 set |= clear_state_bit(tree, state, bits);
274 if (last_end == (u64)-1)
275 goto out;
276 start = last_end + 1;
277 goto search_again;
278 out:
279 if (prealloc)
280 free_extent_state(prealloc);
281 return set;
283 search_again:
284 if (start > end)
285 goto out;
286 goto again;
290 * set some bits on a range in the tree.
292 int set_extent_bits(struct extent_io_tree *tree, u64 start,
293 u64 end, int bits, gfp_t mask)
295 struct extent_state *state;
296 struct extent_state *prealloc = NULL;
297 struct cache_extent *node;
298 int err = 0;
299 int set;
300 u64 last_start;
301 u64 last_end;
302 again:
303 prealloc = alloc_extent_state();
304 if (!prealloc)
305 return -ENOMEM;
308 * this search will find the extents that end after
309 * our range starts
311 node = find_first_cache_extent(&tree->state, start);
312 if (!node) {
313 err = insert_state(tree, prealloc, start, end, bits);
314 BUG_ON(err == -EEXIST);
315 prealloc = NULL;
316 goto out;
319 state = container_of(node, struct extent_state, cache_node);
320 last_start = state->start;
321 last_end = state->end;
324 * | ---- desired range ---- |
325 * | state |
327 * Just lock what we found and keep going
329 if (state->start == start && state->end <= end) {
330 set = state->state & bits;
331 state->state |= bits;
332 merge_state(tree, state);
333 if (last_end == (u64)-1)
334 goto out;
335 start = last_end + 1;
336 goto search_again;
339 * | ---- desired range ---- |
340 * | state |
341 * or
342 * | ------------- state -------------- |
344 * We need to split the extent we found, and may flip bits on
345 * second half.
347 * If the extent we found extends past our
348 * range, we just split and search again. It'll get split
349 * again the next time though.
351 * If the extent we found is inside our range, we set the
352 * desired bit on it.
354 if (state->start < start) {
355 set = state->state & bits;
356 err = split_state(tree, state, prealloc, start);
357 BUG_ON(err == -EEXIST);
358 prealloc = NULL;
359 if (err)
360 goto out;
361 if (state->end <= end) {
362 state->state |= bits;
363 start = state->end + 1;
364 merge_state(tree, state);
365 if (last_end == (u64)-1)
366 goto out;
367 start = last_end + 1;
368 } else {
369 start = state->start;
371 goto search_again;
374 * | ---- desired range ---- |
375 * | state | or | state |
377 * There's a hole, we need to insert something in it and
378 * ignore the extent we found.
380 if (state->start > start) {
381 u64 this_end;
382 if (end < last_start)
383 this_end = end;
384 else
385 this_end = last_start -1;
386 err = insert_state(tree, prealloc, start, this_end,
387 bits);
388 BUG_ON(err == -EEXIST);
389 prealloc = NULL;
390 if (err)
391 goto out;
392 start = this_end + 1;
393 goto search_again;
396 * | ---- desired range ---- |
397 * | ---------- state ---------- |
398 * We need to split the extent, and set the bit
399 * on the first half
401 set = state->state & bits;
402 err = split_state(tree, state, prealloc, end + 1);
403 BUG_ON(err == -EEXIST);
405 state->state |= bits;
406 merge_state(tree, prealloc);
407 prealloc = NULL;
408 out:
409 if (prealloc)
410 free_extent_state(prealloc);
411 return err;
412 search_again:
413 if (start > end)
414 goto out;
415 goto again;
418 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
419 gfp_t mask)
421 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
424 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
425 gfp_t mask)
427 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
430 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
431 u64 *start_ret, u64 *end_ret, int bits)
433 struct cache_extent *node;
434 struct extent_state *state;
435 int ret = 1;
438 * this search will find all the extents that end after
439 * our range starts.
441 node = find_first_cache_extent(&tree->state, start);
442 if (!node)
443 goto out;
445 while(1) {
446 state = container_of(node, struct extent_state, cache_node);
447 if (state->end >= start && (state->state & bits)) {
448 *start_ret = state->start;
449 *end_ret = state->end;
450 ret = 0;
451 break;
453 node = next_cache_extent(node);
454 if (!node)
455 break;
457 out:
458 return ret;
461 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
462 int bits, int filled)
464 struct extent_state *state = NULL;
465 struct cache_extent *node;
466 int bitset = 0;
468 node = find_first_cache_extent(&tree->state, start);
469 while (node && start <= end) {
470 state = container_of(node, struct extent_state, cache_node);
472 if (filled && state->start > start) {
473 bitset = 0;
474 break;
476 if (state->start > end)
477 break;
478 if (state->state & bits) {
479 bitset = 1;
480 if (!filled)
481 break;
482 } else if (filled) {
483 bitset = 0;
484 break;
486 start = state->end + 1;
487 if (start > end)
488 break;
489 node = next_cache_extent(node);
490 if (!node) {
491 if (filled)
492 bitset = 0;
493 break;
496 return bitset;
499 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
501 struct cache_extent *node;
502 struct extent_state *state;
503 int ret = 0;
505 node = find_first_cache_extent(&tree->state, start);
506 if (!node) {
507 ret = -ENOENT;
508 goto out;
510 state = container_of(node, struct extent_state, cache_node);
511 if (state->start != start) {
512 ret = -ENOENT;
513 goto out;
515 state->private = private;
516 out:
517 return ret;
520 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
522 struct cache_extent *node;
523 struct extent_state *state;
524 int ret = 0;
526 node = find_first_cache_extent(&tree->state, start);
527 if (!node) {
528 ret = -ENOENT;
529 goto out;
531 state = container_of(node, struct extent_state, cache_node);
532 if (state->start != start) {
533 ret = -ENOENT;
534 goto out;
536 *private = state->private;
537 out:
538 return ret;
541 static int free_some_buffers(struct extent_io_tree *tree)
543 u32 nrscan = 0;
544 struct extent_buffer *eb;
545 struct list_head *node, *next;
547 if (tree->cache_size < cache_max)
548 return 0;
549 list_for_each_safe(node, next, &tree->lru) {
550 eb = list_entry(node, struct extent_buffer, lru);
551 if (eb->refs == 1) {
552 free_extent_buffer(eb);
553 if (tree->cache_size < cache_max)
554 break;
555 } else {
556 list_move_tail(&eb->lru, &tree->lru);
558 if (nrscan++ > 64)
559 break;
561 return 0;
564 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
565 u64 bytenr, u32 blocksize)
567 struct extent_buffer *eb;
568 int ret;
570 eb = malloc(sizeof(struct extent_buffer) + blocksize);
571 if (!eb) {
572 BUG();
573 return NULL;
576 eb->start = bytenr;
577 eb->len = blocksize;
578 eb->refs = 2;
579 eb->flags = 0;
580 eb->tree = tree;
581 eb->fd = -1;
582 eb->dev_bytenr = (u64)-1;
583 eb->cache_node.start = bytenr;
584 eb->cache_node.size = blocksize;
586 free_some_buffers(tree);
587 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
588 if (ret) {
589 free(eb);
590 return NULL;
592 list_add_tail(&eb->lru, &tree->lru);
593 tree->cache_size += blocksize;
594 return eb;
597 void free_extent_buffer(struct extent_buffer *eb)
599 if (!eb)
600 return;
602 eb->refs--;
603 BUG_ON(eb->refs < 0);
604 if (eb->refs == 0) {
605 struct extent_io_tree *tree = eb->tree;
606 BUG_ON(eb->flags & EXTENT_DIRTY);
607 list_del_init(&eb->lru);
608 remove_cache_extent(&tree->cache, &eb->cache_node);
609 BUG_ON(tree->cache_size < eb->len);
610 tree->cache_size -= eb->len;
611 free(eb);
615 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
616 u64 bytenr, u32 blocksize)
618 struct extent_buffer *eb = NULL;
619 struct cache_extent *cache;
621 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
622 if (cache && cache->start == bytenr && cache->size == blocksize) {
623 eb = container_of(cache, struct extent_buffer, cache_node);
624 list_move_tail(&eb->lru, &tree->lru);
625 eb->refs++;
627 return eb;
630 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
631 u64 start)
633 struct extent_buffer *eb = NULL;
634 struct cache_extent *cache;
636 cache = find_first_cache_extent(&tree->cache, start);
637 if (cache) {
638 eb = container_of(cache, struct extent_buffer, cache_node);
639 list_move_tail(&eb->lru, &tree->lru);
640 eb->refs++;
642 return eb;
645 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
646 u64 bytenr, u32 blocksize)
648 struct extent_buffer *eb;
649 struct cache_extent *cache;
651 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
652 if (cache && cache->start == bytenr && cache->size == blocksize) {
653 eb = container_of(cache, struct extent_buffer, cache_node);
654 list_move_tail(&eb->lru, &tree->lru);
655 eb->refs++;
656 } else {
657 if (cache) {
658 eb = container_of(cache, struct extent_buffer,
659 cache_node);
660 BUG_ON(eb->refs != 1);
661 free_extent_buffer(eb);
663 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
665 return eb;
668 int read_extent_from_disk(struct extent_buffer *eb)
670 int ret;
671 ret = pread(eb->fd, eb->data, eb->len, eb->dev_bytenr);
672 if (ret < 0)
673 goto out;
674 if (ret != eb->len) {
675 ret = -EIO;
676 goto out;
678 ret = 0;
679 out:
680 return ret;
683 int write_extent_to_disk(struct extent_buffer *eb)
685 int ret;
686 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
687 if (ret < 0)
688 goto out;
689 if (ret != eb->len) {
690 ret = -EIO;
691 goto out;
693 ret = 0;
694 out:
695 return ret;
698 int set_extent_buffer_uptodate(struct extent_buffer *eb)
700 eb->flags |= EXTENT_UPTODATE;
701 return 0;
704 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
705 struct extent_buffer *eb)
707 eb->flags &= ~EXTENT_UPTODATE;
708 return 0;
711 int extent_buffer_uptodate(struct extent_buffer *eb)
713 if (eb->flags & EXTENT_UPTODATE)
714 return 1;
715 return 0;
718 int set_extent_buffer_dirty(struct extent_buffer *eb)
720 struct extent_io_tree *tree = eb->tree;
721 if (!(eb->flags & EXTENT_DIRTY)) {
722 eb->flags |= EXTENT_DIRTY;
723 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
724 extent_buffer_get(eb);
726 return 0;
729 int clear_extent_buffer_dirty(struct extent_buffer *eb)
731 struct extent_io_tree *tree = eb->tree;
732 if (eb->flags & EXTENT_DIRTY) {
733 eb->flags &= ~EXTENT_DIRTY;
734 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
735 free_extent_buffer(eb);
737 return 0;
740 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
741 unsigned long start, unsigned long len)
743 return memcmp(eb->data + start, ptrv, len);
746 void read_extent_buffer(struct extent_buffer *eb, void *dst,
747 unsigned long start, unsigned long len)
749 memcpy(dst, eb->data + start, len);
752 void write_extent_buffer(struct extent_buffer *eb, const void *src,
753 unsigned long start, unsigned long len)
755 memcpy(eb->data + start, src, len);
758 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
759 unsigned long dst_offset, unsigned long src_offset,
760 unsigned long len)
762 memcpy(dst->data + dst_offset, src->data + src_offset, len);
765 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
766 unsigned long src_offset, unsigned long len)
768 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
771 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
772 unsigned long src_offset, unsigned long len)
774 memmove(dst->data + dst_offset, dst->data + src_offset, len);
777 void memset_extent_buffer(struct extent_buffer *eb, char c,
778 unsigned long start, unsigned long len)
780 memset(eb->data + start, c, len);