btrfs-progs: mkfs: rename nodiscard option to K
[btrfs-progs-unstable/devel.git] / extent_io.c
blobebb35b28c44e98547adc2d61877686e8b29419fb
2 /*
3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
20 #define __USE_XOPEN2K
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
29 #include "list.h"
31 u64 cache_soft_max = 1024 * 1024 * 256;
32 u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
34 void extent_io_tree_init(struct extent_io_tree *tree)
36 cache_tree_init(&tree->state);
37 cache_tree_init(&tree->cache);
38 INIT_LIST_HEAD(&tree->lru);
39 tree->cache_size = 0;
42 static struct extent_state *alloc_extent_state(void)
44 struct extent_state *state;
46 state = malloc(sizeof(*state));
47 if (!state)
48 return NULL;
49 state->refs = 1;
50 state->state = 0;
51 state->private = 0;
52 return state;
55 static void free_extent_state(struct extent_state *state)
57 state->refs--;
58 BUG_ON(state->refs < 0);
59 if (state->refs == 0)
60 free(state);
63 void extent_io_tree_cleanup(struct extent_io_tree *tree)
65 struct extent_state *es;
66 struct extent_buffer *eb;
67 struct cache_extent *cache;
69 while(!list_empty(&tree->lru)) {
70 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
71 if (eb->refs != 1) {
72 fprintf(stderr, "extent buffer leak: "
73 "start %llu len %u\n",
74 (unsigned long long)eb->start, eb->len);
75 eb->refs = 1;
77 free_extent_buffer(eb);
79 while (1) {
80 cache = find_first_cache_extent(&tree->state, 0);
81 if (!cache)
82 break;
83 es = container_of(cache, struct extent_state, cache_node);
84 remove_cache_extent(&tree->state, &es->cache_node);
85 free_extent_state(es);
89 static inline void update_extent_state(struct extent_state *state)
91 state->cache_node.start = state->start;
92 state->cache_node.size = state->end + 1 - state->start;
96 * Utility function to look for merge candidates inside a given range.
97 * Any extents with matching state are merged together into a single
98 * extent in the tree. Extents with EXTENT_IO in their state field are
99 * not merged
101 static int merge_state(struct extent_io_tree *tree,
102 struct extent_state *state)
104 struct extent_state *other;
105 struct cache_extent *other_node;
107 if (state->state & EXTENT_IOBITS)
108 return 0;
110 other_node = prev_cache_extent(&state->cache_node);
111 if (other_node) {
112 other = container_of(other_node, struct extent_state,
113 cache_node);
114 if (other->end == state->start - 1 &&
115 other->state == state->state) {
116 state->start = other->start;
117 update_extent_state(state);
118 remove_cache_extent(&tree->state, &other->cache_node);
119 free_extent_state(other);
122 other_node = next_cache_extent(&state->cache_node);
123 if (other_node) {
124 other = container_of(other_node, struct extent_state,
125 cache_node);
126 if (other->start == state->end + 1 &&
127 other->state == state->state) {
128 other->start = state->start;
129 update_extent_state(other);
130 remove_cache_extent(&tree->state, &state->cache_node);
131 free_extent_state(state);
134 return 0;
138 * insert an extent_state struct into the tree. 'bits' are set on the
139 * struct before it is inserted.
141 static int insert_state(struct extent_io_tree *tree,
142 struct extent_state *state, u64 start, u64 end,
143 int bits)
145 int ret;
147 BUG_ON(end < start);
148 state->state |= bits;
149 state->start = start;
150 state->end = end;
151 update_extent_state(state);
152 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
153 BUG_ON(ret);
154 merge_state(tree, state);
155 return 0;
159 * split a given extent state struct in two, inserting the preallocated
160 * struct 'prealloc' as the newly created second half. 'split' indicates an
161 * offset inside 'orig' where it should be split.
163 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
164 struct extent_state *prealloc, u64 split)
166 int ret;
167 prealloc->start = orig->start;
168 prealloc->end = split - 1;
169 prealloc->state = orig->state;
170 update_extent_state(prealloc);
171 orig->start = split;
172 update_extent_state(orig);
173 ret = insert_existing_cache_extent(&tree->state,
174 &prealloc->cache_node);
175 BUG_ON(ret);
176 return 0;
180 * clear some bits on a range in the tree.
182 static int clear_state_bit(struct extent_io_tree *tree,
183 struct extent_state *state, int bits)
185 int ret = state->state & bits;
187 state->state &= ~bits;
188 if (state->state == 0) {
189 remove_cache_extent(&tree->state, &state->cache_node);
190 free_extent_state(state);
191 } else {
192 merge_state(tree, state);
194 return ret;
198 * set some bits on a range in the tree.
200 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
201 u64 end, int bits, gfp_t mask)
203 struct extent_state *state;
204 struct extent_state *prealloc = NULL;
205 struct cache_extent *node;
206 u64 last_end;
207 int err;
208 int set = 0;
210 again:
211 prealloc = alloc_extent_state();
212 if (!prealloc)
213 return -ENOMEM;
216 * this search will find the extents that end after
217 * our range starts
219 node = find_first_cache_extent(&tree->state, start);
220 if (!node)
221 goto out;
222 state = container_of(node, struct extent_state, cache_node);
223 if (state->start > end)
224 goto out;
225 last_end = state->end;
228 * | ---- desired range ---- |
229 * | state | or
230 * | ------------- state -------------- |
232 * We need to split the extent we found, and may flip
233 * bits on second half.
235 * If the extent we found extends past our range, we
236 * just split and search again. It'll get split again
237 * the next time though.
239 * If the extent we found is inside our range, we clear
240 * the desired bit on it.
242 if (state->start < start) {
243 err = split_state(tree, state, prealloc, start);
244 BUG_ON(err == -EEXIST);
245 prealloc = NULL;
246 if (err)
247 goto out;
248 if (state->end <= end) {
249 set |= clear_state_bit(tree, state, bits);
250 if (last_end == (u64)-1)
251 goto out;
252 start = last_end + 1;
253 } else {
254 start = state->start;
256 goto search_again;
259 * | ---- desired range ---- |
260 * | state |
261 * We need to split the extent, and clear the bit
262 * on the first half
264 if (state->start <= end && state->end > end) {
265 err = split_state(tree, state, prealloc, end + 1);
266 BUG_ON(err == -EEXIST);
268 set |= clear_state_bit(tree, prealloc, bits);
269 prealloc = NULL;
270 goto out;
273 start = state->end + 1;
274 set |= clear_state_bit(tree, state, bits);
275 if (last_end == (u64)-1)
276 goto out;
277 start = last_end + 1;
278 goto search_again;
279 out:
280 if (prealloc)
281 free_extent_state(prealloc);
282 return set;
284 search_again:
285 if (start > end)
286 goto out;
287 goto again;
291 * set some bits on a range in the tree.
293 int set_extent_bits(struct extent_io_tree *tree, u64 start,
294 u64 end, int bits, gfp_t mask)
296 struct extent_state *state;
297 struct extent_state *prealloc = NULL;
298 struct cache_extent *node;
299 int err = 0;
300 u64 last_start;
301 u64 last_end;
302 again:
303 prealloc = alloc_extent_state();
304 if (!prealloc)
305 return -ENOMEM;
308 * this search will find the extents that end after
309 * our range starts
311 node = find_first_cache_extent(&tree->state, start);
312 if (!node) {
313 err = insert_state(tree, prealloc, start, end, bits);
314 BUG_ON(err == -EEXIST);
315 prealloc = NULL;
316 goto out;
319 state = container_of(node, struct extent_state, cache_node);
320 last_start = state->start;
321 last_end = state->end;
324 * | ---- desired range ---- |
325 * | state |
327 * Just lock what we found and keep going
329 if (state->start == start && state->end <= end) {
330 state->state |= bits;
331 merge_state(tree, state);
332 if (last_end == (u64)-1)
333 goto out;
334 start = last_end + 1;
335 goto search_again;
338 * | ---- desired range ---- |
339 * | state |
340 * or
341 * | ------------- state -------------- |
343 * We need to split the extent we found, and may flip bits on
344 * second half.
346 * If the extent we found extends past our
347 * range, we just split and search again. It'll get split
348 * again the next time though.
350 * If the extent we found is inside our range, we set the
351 * desired bit on it.
353 if (state->start < start) {
354 err = split_state(tree, state, prealloc, start);
355 BUG_ON(err == -EEXIST);
356 prealloc = NULL;
357 if (err)
358 goto out;
359 if (state->end <= end) {
360 state->state |= bits;
361 start = state->end + 1;
362 merge_state(tree, state);
363 if (last_end == (u64)-1)
364 goto out;
365 start = last_end + 1;
366 } else {
367 start = state->start;
369 goto search_again;
372 * | ---- desired range ---- |
373 * | state | or | state |
375 * There's a hole, we need to insert something in it and
376 * ignore the extent we found.
378 if (state->start > start) {
379 u64 this_end;
380 if (end < last_start)
381 this_end = end;
382 else
383 this_end = last_start -1;
384 err = insert_state(tree, prealloc, start, this_end,
385 bits);
386 BUG_ON(err == -EEXIST);
387 prealloc = NULL;
388 if (err)
389 goto out;
390 start = this_end + 1;
391 goto search_again;
394 * | ---- desired range ---- |
395 * | ---------- state ---------- |
396 * We need to split the extent, and set the bit
397 * on the first half
399 err = split_state(tree, state, prealloc, end + 1);
400 BUG_ON(err == -EEXIST);
402 state->state |= bits;
403 merge_state(tree, prealloc);
404 prealloc = NULL;
405 out:
406 if (prealloc)
407 free_extent_state(prealloc);
408 return err;
409 search_again:
410 if (start > end)
411 goto out;
412 goto again;
415 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
416 gfp_t mask)
418 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
421 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
422 gfp_t mask)
424 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
427 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
428 u64 *start_ret, u64 *end_ret, int bits)
430 struct cache_extent *node;
431 struct extent_state *state;
432 int ret = 1;
435 * this search will find all the extents that end after
436 * our range starts.
438 node = find_first_cache_extent(&tree->state, start);
439 if (!node)
440 goto out;
442 while(1) {
443 state = container_of(node, struct extent_state, cache_node);
444 if (state->end >= start && (state->state & bits)) {
445 *start_ret = state->start;
446 *end_ret = state->end;
447 ret = 0;
448 break;
450 node = next_cache_extent(node);
451 if (!node)
452 break;
454 out:
455 return ret;
458 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
459 int bits, int filled)
461 struct extent_state *state = NULL;
462 struct cache_extent *node;
463 int bitset = 0;
465 node = find_first_cache_extent(&tree->state, start);
466 while (node && start <= end) {
467 state = container_of(node, struct extent_state, cache_node);
469 if (filled && state->start > start) {
470 bitset = 0;
471 break;
473 if (state->start > end)
474 break;
475 if (state->state & bits) {
476 bitset = 1;
477 if (!filled)
478 break;
479 } else if (filled) {
480 bitset = 0;
481 break;
483 start = state->end + 1;
484 if (start > end)
485 break;
486 node = next_cache_extent(node);
487 if (!node) {
488 if (filled)
489 bitset = 0;
490 break;
493 return bitset;
496 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
498 struct cache_extent *node;
499 struct extent_state *state;
500 int ret = 0;
502 node = find_first_cache_extent(&tree->state, start);
503 if (!node) {
504 ret = -ENOENT;
505 goto out;
507 state = container_of(node, struct extent_state, cache_node);
508 if (state->start != start) {
509 ret = -ENOENT;
510 goto out;
512 state->private = private;
513 out:
514 return ret;
517 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
519 struct cache_extent *node;
520 struct extent_state *state;
521 int ret = 0;
523 node = find_first_cache_extent(&tree->state, start);
524 if (!node) {
525 ret = -ENOENT;
526 goto out;
528 state = container_of(node, struct extent_state, cache_node);
529 if (state->start != start) {
530 ret = -ENOENT;
531 goto out;
533 *private = state->private;
534 out:
535 return ret;
538 static int free_some_buffers(struct extent_io_tree *tree)
540 u32 nrscan = 0;
541 struct extent_buffer *eb;
542 struct list_head *node, *next;
544 if (tree->cache_size < cache_soft_max)
545 return 0;
547 list_for_each_safe(node, next, &tree->lru) {
548 eb = list_entry(node, struct extent_buffer, lru);
549 if (eb->refs == 1) {
550 free_extent_buffer(eb);
551 if (tree->cache_size < cache_hard_max)
552 break;
553 } else {
554 list_move_tail(&eb->lru, &tree->lru);
556 if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
557 break;
559 return 0;
562 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
563 u64 bytenr, u32 blocksize)
565 struct extent_buffer *eb;
566 int ret;
568 eb = malloc(sizeof(struct extent_buffer) + blocksize);
569 if (!eb) {
570 BUG();
571 return NULL;
573 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
575 eb->start = bytenr;
576 eb->len = blocksize;
577 eb->refs = 2;
578 eb->flags = 0;
579 eb->tree = tree;
580 eb->fd = -1;
581 eb->dev_bytenr = (u64)-1;
582 eb->cache_node.start = bytenr;
583 eb->cache_node.size = blocksize;
585 free_some_buffers(tree);
586 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
587 if (ret) {
588 free(eb);
589 return NULL;
591 list_add_tail(&eb->lru, &tree->lru);
592 tree->cache_size += blocksize;
593 return eb;
596 void free_extent_buffer(struct extent_buffer *eb)
598 if (!eb)
599 return;
601 eb->refs--;
602 BUG_ON(eb->refs < 0);
603 if (eb->refs == 0) {
604 struct extent_io_tree *tree = eb->tree;
605 BUG_ON(eb->flags & EXTENT_DIRTY);
606 list_del_init(&eb->lru);
607 remove_cache_extent(&tree->cache, &eb->cache_node);
608 BUG_ON(tree->cache_size < eb->len);
609 tree->cache_size -= eb->len;
610 free(eb);
614 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
615 u64 bytenr, u32 blocksize)
617 struct extent_buffer *eb = NULL;
618 struct cache_extent *cache;
620 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
621 if (cache && cache->start == bytenr && cache->size == blocksize) {
622 eb = container_of(cache, struct extent_buffer, cache_node);
623 list_move_tail(&eb->lru, &tree->lru);
624 eb->refs++;
626 return eb;
629 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
630 u64 start)
632 struct extent_buffer *eb = NULL;
633 struct cache_extent *cache;
635 cache = find_first_cache_extent(&tree->cache, start);
636 if (cache) {
637 eb = container_of(cache, struct extent_buffer, cache_node);
638 list_move_tail(&eb->lru, &tree->lru);
639 eb->refs++;
641 return eb;
644 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
645 u64 bytenr, u32 blocksize)
647 struct extent_buffer *eb;
648 struct cache_extent *cache;
650 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
651 if (cache && cache->start == bytenr && cache->size == blocksize) {
652 eb = container_of(cache, struct extent_buffer, cache_node);
653 list_move_tail(&eb->lru, &tree->lru);
654 eb->refs++;
655 } else {
656 if (cache) {
657 eb = container_of(cache, struct extent_buffer,
658 cache_node);
659 free_extent_buffer(eb);
661 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
663 return eb;
666 int read_extent_from_disk(struct extent_buffer *eb)
668 int ret;
669 ret = pread(eb->fd, eb->data, eb->len, eb->dev_bytenr);
670 if (ret < 0)
671 goto out;
672 if (ret != eb->len) {
673 ret = -EIO;
674 goto out;
676 ret = 0;
677 out:
678 return ret;
681 int write_extent_to_disk(struct extent_buffer *eb)
683 int ret;
684 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
685 if (ret < 0)
686 goto out;
687 if (ret != eb->len) {
688 ret = -EIO;
689 goto out;
691 ret = 0;
692 out:
693 return ret;
696 int set_extent_buffer_uptodate(struct extent_buffer *eb)
698 eb->flags |= EXTENT_UPTODATE;
699 return 0;
702 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
703 struct extent_buffer *eb)
705 eb->flags &= ~EXTENT_UPTODATE;
706 return 0;
709 int extent_buffer_uptodate(struct extent_buffer *eb)
711 if (!eb)
712 return 0;
714 if (eb->flags & EXTENT_UPTODATE)
715 return 1;
716 return 0;
719 int set_extent_buffer_dirty(struct extent_buffer *eb)
721 struct extent_io_tree *tree = eb->tree;
722 if (!(eb->flags & EXTENT_DIRTY)) {
723 eb->flags |= EXTENT_DIRTY;
724 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
725 extent_buffer_get(eb);
727 return 0;
730 int clear_extent_buffer_dirty(struct extent_buffer *eb)
732 struct extent_io_tree *tree = eb->tree;
733 if (eb->flags & EXTENT_DIRTY) {
734 eb->flags &= ~EXTENT_DIRTY;
735 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
736 free_extent_buffer(eb);
738 return 0;
741 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
742 unsigned long start, unsigned long len)
744 return memcmp(eb->data + start, ptrv, len);
747 void read_extent_buffer(struct extent_buffer *eb, void *dst,
748 unsigned long start, unsigned long len)
750 memcpy(dst, eb->data + start, len);
753 void write_extent_buffer(struct extent_buffer *eb, const void *src,
754 unsigned long start, unsigned long len)
756 memcpy(eb->data + start, src, len);
759 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
760 unsigned long dst_offset, unsigned long src_offset,
761 unsigned long len)
763 memcpy(dst->data + dst_offset, src->data + src_offset, len);
766 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
767 unsigned long src_offset, unsigned long len)
769 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
772 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
773 unsigned long src_offset, unsigned long len)
775 memmove(dst->data + dst_offset, dst->data + src_offset, len);
778 void memset_extent_buffer(struct extent_buffer *eb, char c,
779 unsigned long start, unsigned long len)
781 memset(eb->data + start, c, len);