Fix man page headers to include the correct program name.
[btrfs-progs-unstable/devel.git] / extent-tree.c
blobd0ad017cba3617864a81877f863131580d8b95f2
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
30 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM EXTENT_NEW
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
36 #define PENDING_EXTENT_INSERT 0
37 #define PENDING_EXTENT_DELETE 1
38 #define PENDING_BACKREF_UPDATE 2
40 struct pending_extent_op {
41 int type;
42 u64 bytenr;
43 u64 num_bytes;
44 u64 parent;
45 u64 orig_parent;
46 u64 generation;
47 u64 orig_generation;
48 int level;
51 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
52 btrfs_root *extent_root);
53 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
54 btrfs_root *extent_root);
56 void maybe_lock_mutex(struct btrfs_root *root)
60 void maybe_unlock_mutex(struct btrfs_root *root)
64 static int remove_sb_from_cache(struct btrfs_root *root,
65 struct btrfs_block_group_cache *cache)
67 u64 bytenr;
68 u64 *logical;
69 int stripe_len;
70 int i, nr, ret;
71 struct extent_io_tree *free_space_cache;
73 free_space_cache = &root->fs_info->free_space_cache;
74 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
75 bytenr = btrfs_sb_offset(i);
76 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
77 cache->key.objectid, bytenr, 0,
78 &logical, &nr, &stripe_len);
79 BUG_ON(ret);
80 while (nr--) {
81 clear_extent_dirty(free_space_cache, logical[nr],
82 logical[nr] + stripe_len - 1, GFP_NOFS);
84 kfree(logical);
86 return 0;
89 static int cache_block_group(struct btrfs_root *root,
90 struct btrfs_block_group_cache *block_group)
92 struct btrfs_path *path;
93 int ret;
94 struct btrfs_key key;
95 struct extent_buffer *leaf;
96 struct extent_io_tree *free_space_cache;
97 int slot;
98 u64 last;
99 u64 hole_size;
101 if (!block_group)
102 return 0;
104 root = root->fs_info->extent_root;
105 free_space_cache = &root->fs_info->free_space_cache;
107 if (block_group->cached)
108 return 0;
110 path = btrfs_alloc_path();
111 if (!path)
112 return -ENOMEM;
114 path->reada = 2;
115 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
116 key.objectid = last;
117 key.offset = 0;
118 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
119 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
120 if (ret < 0)
121 goto err;
123 while(1) {
124 leaf = path->nodes[0];
125 slot = path->slots[0];
126 if (slot >= btrfs_header_nritems(leaf)) {
127 ret = btrfs_next_leaf(root, path);
128 if (ret < 0)
129 goto err;
130 if (ret == 0) {
131 continue;
132 } else {
133 break;
136 btrfs_item_key_to_cpu(leaf, &key, slot);
137 if (key.objectid < block_group->key.objectid) {
138 goto next;
140 if (key.objectid >= block_group->key.objectid +
141 block_group->key.offset) {
142 break;
145 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
146 if (key.objectid > last) {
147 hole_size = key.objectid - last;
148 set_extent_dirty(free_space_cache, last,
149 last + hole_size - 1,
150 GFP_NOFS);
152 last = key.objectid + key.offset;
154 next:
155 path->slots[0]++;
158 if (block_group->key.objectid +
159 block_group->key.offset > last) {
160 hole_size = block_group->key.objectid +
161 block_group->key.offset - last;
162 set_extent_dirty(free_space_cache, last,
163 last + hole_size - 1, GFP_NOFS);
165 remove_sb_from_cache(root, block_group);
166 block_group->cached = 1;
167 err:
168 btrfs_free_path(path);
169 return 0;
172 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
173 btrfs_fs_info *info,
174 u64 bytenr)
176 struct extent_io_tree *block_group_cache;
177 struct btrfs_block_group_cache *block_group = NULL;
178 u64 ptr;
179 u64 start;
180 u64 end;
181 int ret;
183 bytenr = max_t(u64, bytenr,
184 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
185 block_group_cache = &info->block_group_cache;
186 ret = find_first_extent_bit(block_group_cache,
187 bytenr, &start, &end,
188 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
189 BLOCK_GROUP_SYSTEM);
190 if (ret) {
191 return NULL;
193 ret = get_state_private(block_group_cache, start, &ptr);
194 if (ret)
195 return NULL;
197 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
198 return block_group;
201 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
202 btrfs_fs_info *info,
203 u64 bytenr)
205 struct extent_io_tree *block_group_cache;
206 struct btrfs_block_group_cache *block_group = NULL;
207 u64 ptr;
208 u64 start;
209 u64 end;
210 int ret;
212 block_group_cache = &info->block_group_cache;
213 ret = find_first_extent_bit(block_group_cache,
214 bytenr, &start, &end,
215 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
216 BLOCK_GROUP_SYSTEM);
217 if (ret) {
218 return NULL;
220 ret = get_state_private(block_group_cache, start, &ptr);
221 if (ret)
222 return NULL;
224 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
225 if (block_group->key.objectid <= bytenr && bytenr <
226 block_group->key.objectid + block_group->key.offset)
227 return block_group;
228 return NULL;
231 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
233 return (cache->flags & bits) == bits;
236 static int noinline find_search_start(struct btrfs_root *root,
237 struct btrfs_block_group_cache **cache_ret,
238 u64 *start_ret, int num, int data)
240 int ret;
241 struct btrfs_block_group_cache *cache = *cache_ret;
242 u64 last;
243 u64 start = 0;
244 u64 end = 0;
245 u64 search_start = *start_ret;
246 int wrapped = 0;
248 if (!cache) {
249 goto out;
251 again:
252 ret = cache_block_group(root, cache);
253 if (ret)
254 goto out;
256 last = max(search_start, cache->key.objectid);
257 if (cache->ro || !block_group_bits(cache, data)) {
258 goto new_group;
261 while(1) {
262 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
263 last, &start, &end, EXTENT_DIRTY);
264 if (ret) {
265 goto new_group;
268 start = max(last, start);
269 last = end + 1;
270 if (last - start < num) {
271 continue;
273 if (start + num > cache->key.objectid + cache->key.offset) {
274 goto new_group;
276 *start_ret = start;
277 return 0;
279 out:
280 cache = btrfs_lookup_block_group(root->fs_info, search_start);
281 if (!cache) {
282 printk("Unable to find block group for %llu\n",
283 (unsigned long long)search_start);
284 WARN_ON(1);
286 return -ENOSPC;
288 new_group:
289 last = cache->key.objectid + cache->key.offset;
290 wrapped:
291 cache = btrfs_lookup_first_block_group(root->fs_info, last);
292 if (!cache) {
293 no_cache:
294 if (!wrapped) {
295 wrapped = 1;
296 last = search_start;
297 goto wrapped;
299 goto out;
301 cache = btrfs_find_block_group(root, cache, last, data, 0);
302 cache = btrfs_find_block_group(root, cache, last, data, 0);
303 if (!cache)
304 goto no_cache;
306 *cache_ret = cache;
307 goto again;
310 static u64 div_factor(u64 num, int factor)
312 if (factor == 10)
313 return num;
314 num *= factor;
315 num /= 10;
316 return num;
319 static int block_group_state_bits(u64 flags)
321 int bits = 0;
322 if (flags & BTRFS_BLOCK_GROUP_DATA)
323 bits |= BLOCK_GROUP_DATA;
324 if (flags & BTRFS_BLOCK_GROUP_METADATA)
325 bits |= BLOCK_GROUP_METADATA;
326 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
327 bits |= BLOCK_GROUP_SYSTEM;
328 return bits;
331 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
332 struct btrfs_block_group_cache
333 *hint, u64 search_start,
334 int data, int owner)
336 struct btrfs_block_group_cache *cache;
337 struct extent_io_tree *block_group_cache;
338 struct btrfs_block_group_cache *found_group = NULL;
339 struct btrfs_fs_info *info = root->fs_info;
340 u64 used;
341 u64 last = 0;
342 u64 hint_last;
343 u64 start;
344 u64 end;
345 u64 free_check;
346 u64 ptr;
347 int bit;
348 int ret;
349 int full_search = 0;
350 int factor = 10;
352 block_group_cache = &info->block_group_cache;
354 if (!owner)
355 factor = 10;
357 bit = block_group_state_bits(data);
359 if (search_start) {
360 struct btrfs_block_group_cache *shint;
361 shint = btrfs_lookup_block_group(info, search_start);
362 if (shint && !shint->ro && block_group_bits(shint, data)) {
363 used = btrfs_block_group_used(&shint->item);
364 if (used + shint->pinned <
365 div_factor(shint->key.offset, factor)) {
366 return shint;
370 if (hint && !hint->ro && block_group_bits(hint, data)) {
371 used = btrfs_block_group_used(&hint->item);
372 if (used + hint->pinned <
373 div_factor(hint->key.offset, factor)) {
374 return hint;
376 last = hint->key.objectid + hint->key.offset;
377 hint_last = last;
378 } else {
379 if (hint)
380 hint_last = max(hint->key.objectid, search_start);
381 else
382 hint_last = search_start;
384 last = hint_last;
386 again:
387 while(1) {
388 ret = find_first_extent_bit(block_group_cache, last,
389 &start, &end, bit);
390 if (ret)
391 break;
393 ret = get_state_private(block_group_cache, start, &ptr);
394 if (ret)
395 break;
397 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
398 last = cache->key.objectid + cache->key.offset;
399 used = btrfs_block_group_used(&cache->item);
401 if (!cache->ro && block_group_bits(cache, data)) {
402 if (full_search)
403 free_check = cache->key.offset;
404 else
405 free_check = div_factor(cache->key.offset,
406 factor);
408 if (used + cache->pinned < free_check) {
409 found_group = cache;
410 goto found;
413 cond_resched();
415 if (!full_search) {
416 last = search_start;
417 full_search = 1;
418 goto again;
420 found:
421 return found_group;
425 * Back reference rules. Back refs have three main goals:
427 * 1) differentiate between all holders of references to an extent so that
428 * when a reference is dropped we can make sure it was a valid reference
429 * before freeing the extent.
431 * 2) Provide enough information to quickly find the holders of an extent
432 * if we notice a given block is corrupted or bad.
434 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
435 * maintenance. This is actually the same as #2, but with a slightly
436 * different use case.
438 * File extents can be referenced by:
440 * - multiple snapshots, subvolumes, or different generations in one subvol
441 * - different files inside a single subvolume
442 * - different offsets inside a file (bookend extents in file.c)
444 * The extent ref structure has fields for:
446 * - Objectid of the subvolume root
447 * - Generation number of the tree holding the reference
448 * - objectid of the file holding the reference
449 * - offset in the file corresponding to the key holding the reference
450 * - number of references holding by parent node (alway 1 for tree blocks)
452 * Btree leaf may hold multiple references to a file extent. In most cases,
453 * these references are from same file and the corresponding offsets inside
454 * the file are close together. So inode objectid and offset in file are
455 * just hints, they provide hints about where in the btree the references
456 * can be found and when we can stop searching.
458 * When a file extent is allocated the fields are filled in:
459 * (root_key.objectid, trans->transid, inode objectid, offset in file, 1)
461 * When a leaf is cow'd new references are added for every file extent found
462 * in the leaf. It looks similar to the create case, but trans->transid will
463 * be different when the block is cow'd.
465 * (root_key.objectid, trans->transid, inode objectid, offset in file,
466 * number of references in the leaf)
468 * Because inode objectid and offset in file are just hints, they are not
469 * used when backrefs are deleted. When a file extent is removed either
470 * during snapshot deletion or file truncation, we find the corresponding
471 * back back reference and check the following fields.
473 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf))
475 * Btree extents can be referenced by:
477 * - Different subvolumes
478 * - Different generations of the same subvolume
480 * When a tree block is created, back references are inserted:
482 * (root->root_key.objectid, trans->transid, level, 0, 1)
484 * When a tree block is cow'd, new back references are added for all the
485 * blocks it points to. If the tree block isn't in reference counted root,
486 * the old back references are removed. These new back references are of
487 * the form (trans->transid will have increased since creation):
489 * (root->root_key.objectid, trans->transid, level, 0, 1)
491 * When a backref is in deleting, the following fields are checked:
493 * if backref was for a tree root:
494 * (btrfs_header_owner(itself), btrfs_header_generation(itself))
495 * else
496 * (btrfs_header_owner(parent), btrfs_header_generation(parent))
498 * Back Reference Key composing:
500 * The key objectid corresponds to the first byte in the extent, the key
501 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
502 * byte of parent extent. If a extent is tree root, the key offset is set
503 * to the key objectid.
506 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
507 struct btrfs_root *root,
508 struct btrfs_path *path,
509 u64 bytenr, u64 parent,
510 u64 ref_root, u64 ref_generation,
511 u64 owner_objectid, int del)
513 struct btrfs_key key;
514 struct btrfs_extent_ref *ref;
515 struct extent_buffer *leaf;
516 u64 ref_objectid;
517 int ret;
519 key.objectid = bytenr;
520 key.type = BTRFS_EXTENT_REF_KEY;
521 key.offset = parent;
523 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
524 if (ret < 0)
525 goto out;
526 if (ret > 0) {
527 ret = -ENOENT;
528 goto out;
531 leaf = path->nodes[0];
532 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
533 ref_objectid = btrfs_ref_objectid(leaf, ref);
534 if (btrfs_ref_root(leaf, ref) != ref_root ||
535 btrfs_ref_generation(leaf, ref) != ref_generation ||
536 (ref_objectid != owner_objectid &&
537 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
538 ret = -EIO;
539 WARN_ON(1);
540 goto out;
542 ret = 0;
543 out:
544 return ret;
547 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
548 struct btrfs_root *root,
549 struct btrfs_path *path,
550 u64 bytenr, u64 parent,
551 u64 ref_root, u64 ref_generation,
552 u64 owner_objectid)
554 struct btrfs_key key;
555 struct extent_buffer *leaf;
556 struct btrfs_extent_ref *ref;
557 u32 num_refs;
558 int ret;
560 key.objectid = bytenr;
561 key.type = BTRFS_EXTENT_REF_KEY;
562 key.offset = parent;
564 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
565 if (ret == 0) {
566 leaf = path->nodes[0];
567 ref = btrfs_item_ptr(leaf, path->slots[0],
568 struct btrfs_extent_ref);
569 btrfs_set_ref_root(leaf, ref, ref_root);
570 btrfs_set_ref_generation(leaf, ref, ref_generation);
571 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
572 btrfs_set_ref_num_refs(leaf, ref, 1);
573 } else if (ret == -EEXIST) {
574 u64 existing_owner;
575 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
576 leaf = path->nodes[0];
577 ref = btrfs_item_ptr(leaf, path->slots[0],
578 struct btrfs_extent_ref);
579 if (btrfs_ref_root(leaf, ref) != ref_root ||
580 btrfs_ref_generation(leaf, ref) != ref_generation) {
581 ret = -EIO;
582 WARN_ON(1);
583 goto out;
586 num_refs = btrfs_ref_num_refs(leaf, ref);
587 BUG_ON(num_refs == 0);
588 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
590 existing_owner = btrfs_ref_objectid(leaf, ref);
591 if (existing_owner != owner_objectid &&
592 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
593 btrfs_set_ref_objectid(leaf, ref,
594 BTRFS_MULTIPLE_OBJECTIDS);
596 ret = 0;
597 } else {
598 goto out;
600 btrfs_mark_buffer_dirty(path->nodes[0]);
601 out:
602 btrfs_release_path(root, path);
603 return ret;
606 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
607 struct btrfs_root *root,
608 struct btrfs_path *path)
610 struct extent_buffer *leaf;
611 struct btrfs_extent_ref *ref;
612 u32 num_refs;
613 int ret = 0;
615 leaf = path->nodes[0];
616 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
617 num_refs = btrfs_ref_num_refs(leaf, ref);
618 BUG_ON(num_refs == 0);
619 num_refs -= 1;
620 if (num_refs == 0) {
621 ret = btrfs_del_item(trans, root, path);
622 } else {
623 btrfs_set_ref_num_refs(leaf, ref, num_refs);
624 btrfs_mark_buffer_dirty(leaf);
626 btrfs_release_path(root, path);
627 return ret;
630 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
631 struct btrfs_root *root, u64 bytenr,
632 u64 orig_parent, u64 parent,
633 u64 orig_root, u64 ref_root,
634 u64 orig_generation, u64 ref_generation,
635 u64 owner_objectid)
637 int ret;
638 struct btrfs_root *extent_root = root->fs_info->extent_root;
639 struct btrfs_path *path;
641 if (root == root->fs_info->extent_root) {
642 struct pending_extent_op *extent_op;
643 u64 num_bytes;
645 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
646 num_bytes = btrfs_level_size(root, (int)owner_objectid);
647 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
648 bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
649 u64 priv;
650 ret = get_state_private(&root->fs_info->extent_ins,
651 bytenr, &priv);
652 BUG_ON(ret);
653 extent_op = (struct pending_extent_op *)
654 (unsigned long)priv;
655 BUG_ON(extent_op->parent != orig_parent);
656 BUG_ON(extent_op->generation != orig_generation);
657 extent_op->parent = parent;
658 extent_op->generation = ref_generation;
659 } else {
660 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
661 BUG_ON(!extent_op);
663 extent_op->type = PENDING_BACKREF_UPDATE;
664 extent_op->bytenr = bytenr;
665 extent_op->num_bytes = num_bytes;
666 extent_op->parent = parent;
667 extent_op->orig_parent = orig_parent;
668 extent_op->generation = ref_generation;
669 extent_op->orig_generation = orig_generation;
670 extent_op->level = (int)owner_objectid;
672 set_extent_bits(&root->fs_info->extent_ins,
673 bytenr, bytenr + num_bytes - 1,
674 EXTENT_LOCKED, GFP_NOFS);
675 set_state_private(&root->fs_info->extent_ins,
676 bytenr, (unsigned long)extent_op);
678 return 0;
681 path = btrfs_alloc_path();
682 if (!path)
683 return -ENOMEM;
684 ret = lookup_extent_backref(trans, extent_root, path,
685 bytenr, orig_parent, orig_root,
686 orig_generation, owner_objectid, 1);
687 if (ret)
688 goto out;
689 ret = remove_extent_backref(trans, extent_root, path);
690 if (ret)
691 goto out;
692 ret = insert_extent_backref(trans, extent_root, path, bytenr,
693 parent, ref_root, ref_generation,
694 owner_objectid);
695 BUG_ON(ret);
696 finish_current_insert(trans, extent_root);
697 del_pending_extents(trans, extent_root);
698 out:
699 btrfs_free_path(path);
700 return ret;
703 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
704 struct btrfs_root *root, u64 bytenr,
705 u64 orig_parent, u64 parent,
706 u64 ref_root, u64 ref_generation,
707 u64 owner_objectid)
709 int ret;
710 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
711 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
712 return 0;
713 maybe_lock_mutex(root);
714 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
715 parent, ref_root, ref_root,
716 ref_generation, ref_generation,
717 owner_objectid);
718 maybe_unlock_mutex(root);
719 return ret;
722 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
723 struct btrfs_root *root, u64 bytenr,
724 u64 orig_parent, u64 parent,
725 u64 orig_root, u64 ref_root,
726 u64 orig_generation, u64 ref_generation,
727 u64 owner_objectid)
729 struct btrfs_path *path;
730 int ret;
731 struct btrfs_key key;
732 struct extent_buffer *l;
733 struct btrfs_extent_item *item;
734 u32 refs;
736 path = btrfs_alloc_path();
737 if (!path)
738 return -ENOMEM;
740 path->reada = 1;
741 key.objectid = bytenr;
742 key.type = BTRFS_EXTENT_ITEM_KEY;
743 key.offset = (u64)-1;
745 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
746 0, 1);
747 if (ret < 0)
748 return ret;
749 BUG_ON(ret == 0 || path->slots[0] == 0);
751 path->slots[0]--;
752 l = path->nodes[0];
754 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
755 BUG_ON(key.objectid != bytenr);
756 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
758 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
759 refs = btrfs_extent_refs(l, item);
760 btrfs_set_extent_refs(l, item, refs + 1);
761 btrfs_mark_buffer_dirty(path->nodes[0]);
763 btrfs_release_path(root->fs_info->extent_root, path);
765 path->reada = 1;
766 ret = insert_extent_backref(trans, root->fs_info->extent_root,
767 path, bytenr, parent,
768 ref_root, ref_generation,
769 owner_objectid);
770 BUG_ON(ret);
771 finish_current_insert(trans, root->fs_info->extent_root);
772 del_pending_extents(trans, root->fs_info->extent_root);
774 btrfs_free_path(path);
775 return 0;
778 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
779 struct btrfs_root *root,
780 u64 bytenr, u64 num_bytes, u64 parent,
781 u64 ref_root, u64 ref_generation,
782 u64 owner_objectid)
784 int ret;
785 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
786 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
787 return 0;
788 maybe_lock_mutex(root);
789 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
790 0, ref_root, 0, ref_generation,
791 owner_objectid);
792 maybe_unlock_mutex(root);
793 return ret;
796 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
797 struct btrfs_root *root)
799 finish_current_insert(trans, root->fs_info->extent_root);
800 del_pending_extents(trans, root->fs_info->extent_root);
801 return 0;
804 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
805 struct btrfs_root *root, u64 bytenr,
806 u64 num_bytes, u32 *refs)
808 struct btrfs_path *path;
809 int ret;
810 struct btrfs_key key;
811 struct extent_buffer *l;
812 struct btrfs_extent_item *item;
814 WARN_ON(num_bytes < root->sectorsize);
815 path = btrfs_alloc_path();
816 path->reada = 1;
817 key.objectid = bytenr;
818 key.offset = num_bytes;
819 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
820 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
821 0, 0);
822 if (ret < 0)
823 goto out;
824 if (ret != 0) {
825 btrfs_print_leaf(root, path->nodes[0]);
826 printk("failed to find block number %Lu\n",
827 (unsigned long long)bytenr);
828 BUG();
830 l = path->nodes[0];
831 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
832 *refs = btrfs_extent_refs(l, item);
833 out:
834 btrfs_free_path(path);
835 return 0;
838 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
839 struct extent_buffer *orig_buf, struct extent_buffer *buf,
840 u32 *nr_extents)
842 u64 bytenr;
843 u64 ref_root;
844 u64 orig_root;
845 u64 ref_generation;
846 u64 orig_generation;
847 u32 nritems;
848 u32 nr_file_extents = 0;
849 struct btrfs_key key;
850 struct btrfs_file_extent_item *fi;
851 int i;
852 int level;
853 int ret = 0;
854 int faili = 0;
855 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
856 u64, u64, u64, u64, u64, u64, u64, u64);
858 ref_root = btrfs_header_owner(buf);
859 ref_generation = btrfs_header_generation(buf);
860 orig_root = btrfs_header_owner(orig_buf);
861 orig_generation = btrfs_header_generation(orig_buf);
863 nritems = btrfs_header_nritems(buf);
864 level = btrfs_header_level(buf);
866 if (root->ref_cows) {
867 process_func = __btrfs_inc_extent_ref;
868 } else {
869 if (level == 0 &&
870 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
871 goto out;
872 process_func = __btrfs_update_extent_ref;
875 for (i = 0; i < nritems; i++) {
876 cond_resched();
877 if (level == 0) {
878 btrfs_item_key_to_cpu(buf, &key, i);
879 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
880 continue;
881 fi = btrfs_item_ptr(buf, i,
882 struct btrfs_file_extent_item);
883 if (btrfs_file_extent_type(buf, fi) ==
884 BTRFS_FILE_EXTENT_INLINE)
885 continue;
886 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
887 if (bytenr == 0)
888 continue;
890 nr_file_extents++;
892 maybe_lock_mutex(root);
893 ret = process_func(trans, root, bytenr,
894 orig_buf->start, buf->start,
895 orig_root, ref_root,
896 orig_generation, ref_generation,
897 key.objectid);
898 maybe_unlock_mutex(root);
900 if (ret) {
901 faili = i;
902 WARN_ON(1);
903 goto fail;
905 } else {
906 bytenr = btrfs_node_blockptr(buf, i);
907 maybe_lock_mutex(root);
908 ret = process_func(trans, root, bytenr,
909 orig_buf->start, buf->start,
910 orig_root, ref_root,
911 orig_generation, ref_generation,
912 level - 1);
913 maybe_unlock_mutex(root);
914 if (ret) {
915 faili = i;
916 WARN_ON(1);
917 goto fail;
921 out:
922 if (nr_extents) {
923 if (level == 0)
924 *nr_extents = nr_file_extents;
925 else
926 *nr_extents = nritems;
928 return 0;
929 fail:
930 WARN_ON(1);
931 #if 0
932 for (i =0; i < faili; i++) {
933 if (level == 0) {
934 u64 disk_bytenr;
935 btrfs_item_key_to_cpu(buf, &key, i);
936 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
937 continue;
938 fi = btrfs_item_ptr(buf, i,
939 struct btrfs_file_extent_item);
940 if (btrfs_file_extent_type(buf, fi) ==
941 BTRFS_FILE_EXTENT_INLINE)
942 continue;
943 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
944 if (disk_bytenr == 0)
945 continue;
946 err = btrfs_free_extent(trans, root, disk_bytenr,
947 btrfs_file_extent_disk_num_bytes(buf,
948 fi), 0);
949 BUG_ON(err);
950 } else {
951 bytenr = btrfs_node_blockptr(buf, i);
952 err = btrfs_free_extent(trans, root, bytenr,
953 btrfs_level_size(root, level - 1), 0);
954 BUG_ON(err);
957 #endif
958 return ret;
961 int btrfs_update_ref(struct btrfs_trans_handle *trans,
962 struct btrfs_root *root, struct extent_buffer *orig_buf,
963 struct extent_buffer *buf, int start_slot, int nr)
966 u64 bytenr;
967 u64 ref_root;
968 u64 orig_root;
969 u64 ref_generation;
970 u64 orig_generation;
971 struct btrfs_key key;
972 struct btrfs_file_extent_item *fi;
973 int i;
974 int ret;
975 int slot;
976 int level;
978 BUG_ON(start_slot < 0);
979 BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
981 ref_root = btrfs_header_owner(buf);
982 ref_generation = btrfs_header_generation(buf);
983 orig_root = btrfs_header_owner(orig_buf);
984 orig_generation = btrfs_header_generation(orig_buf);
985 level = btrfs_header_level(buf);
987 if (!root->ref_cows) {
988 if (level == 0 &&
989 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
990 return 0;
993 for (i = 0, slot = start_slot; i < nr; i++, slot++) {
994 cond_resched();
995 if (level == 0) {
996 btrfs_item_key_to_cpu(buf, &key, slot);
997 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
998 continue;
999 fi = btrfs_item_ptr(buf, slot,
1000 struct btrfs_file_extent_item);
1001 if (btrfs_file_extent_type(buf, fi) ==
1002 BTRFS_FILE_EXTENT_INLINE)
1003 continue;
1004 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1005 if (bytenr == 0)
1006 continue;
1008 maybe_lock_mutex(root);
1009 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1010 orig_buf->start, buf->start,
1011 orig_root, ref_root,
1012 orig_generation, ref_generation,
1013 key.objectid);
1014 maybe_unlock_mutex(root);
1015 if (ret)
1016 goto fail;
1017 } else {
1018 bytenr = btrfs_node_blockptr(buf, slot);
1019 maybe_lock_mutex(root);
1020 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1021 orig_buf->start, buf->start,
1022 orig_root, ref_root,
1023 orig_generation, ref_generation,
1024 level - 1);
1025 maybe_unlock_mutex(root);
1026 if (ret)
1027 goto fail;
1030 return 0;
1031 fail:
1032 WARN_ON(1);
1033 return -1;
1036 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1037 struct btrfs_root *root,
1038 struct btrfs_path *path,
1039 struct btrfs_block_group_cache *cache)
1041 int ret;
1042 int pending_ret;
1043 struct btrfs_root *extent_root = root->fs_info->extent_root;
1044 unsigned long bi;
1045 struct extent_buffer *leaf;
1047 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1048 if (ret < 0)
1049 goto fail;
1050 BUG_ON(ret);
1052 leaf = path->nodes[0];
1053 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1054 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1055 btrfs_mark_buffer_dirty(leaf);
1056 btrfs_release_path(extent_root, path);
1057 fail:
1058 finish_current_insert(trans, extent_root);
1059 pending_ret = del_pending_extents(trans, extent_root);
1060 if (ret)
1061 return ret;
1062 if (pending_ret)
1063 return pending_ret;
1064 return 0;
1068 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1069 struct btrfs_root *root)
1071 struct extent_io_tree *block_group_cache;
1072 struct btrfs_block_group_cache *cache;
1073 int ret;
1074 int err = 0;
1075 int werr = 0;
1076 struct btrfs_path *path;
1077 u64 last = 0;
1078 u64 start;
1079 u64 end;
1080 u64 ptr;
1082 block_group_cache = &root->fs_info->block_group_cache;
1083 path = btrfs_alloc_path();
1084 if (!path)
1085 return -ENOMEM;
1087 while(1) {
1088 ret = find_first_extent_bit(block_group_cache, last,
1089 &start, &end, BLOCK_GROUP_DIRTY);
1090 if (ret)
1091 break;
1093 last = end + 1;
1094 ret = get_state_private(block_group_cache, start, &ptr);
1095 if (ret)
1096 break;
1097 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1098 err = write_one_cache_group(trans, root,
1099 path, cache);
1101 * if we fail to write the cache group, we want
1102 * to keep it marked dirty in hopes that a later
1103 * write will work
1105 if (err) {
1106 werr = err;
1107 continue;
1109 clear_extent_bits(block_group_cache, start, end,
1110 BLOCK_GROUP_DIRTY, GFP_NOFS);
1112 btrfs_free_path(path);
1113 return werr;
1116 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1117 u64 flags)
1119 struct list_head *head = &info->space_info;
1120 struct list_head *cur;
1121 struct btrfs_space_info *found;
1122 list_for_each(cur, head) {
1123 found = list_entry(cur, struct btrfs_space_info, list);
1124 if (found->flags == flags)
1125 return found;
1127 return NULL;
1131 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1132 u64 total_bytes, u64 bytes_used,
1133 struct btrfs_space_info **space_info)
1135 struct btrfs_space_info *found;
1137 found = __find_space_info(info, flags);
1138 if (found) {
1139 found->total_bytes += total_bytes;
1140 found->bytes_used += bytes_used;
1141 WARN_ON(found->total_bytes < found->bytes_used);
1142 *space_info = found;
1143 return 0;
1145 found = kmalloc(sizeof(*found), GFP_NOFS);
1146 if (!found)
1147 return -ENOMEM;
1149 list_add(&found->list, &info->space_info);
1150 found->flags = flags;
1151 found->total_bytes = total_bytes;
1152 found->bytes_used = bytes_used;
1153 found->bytes_pinned = 0;
1154 found->full = 0;
1155 *space_info = found;
1156 return 0;
1160 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1162 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1163 BTRFS_BLOCK_GROUP_RAID1 |
1164 BTRFS_BLOCK_GROUP_DUP);
1165 if (extra_flags) {
1166 if (flags & BTRFS_BLOCK_GROUP_DATA)
1167 fs_info->avail_data_alloc_bits |= extra_flags;
1168 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1169 fs_info->avail_metadata_alloc_bits |= extra_flags;
1170 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1171 fs_info->avail_system_alloc_bits |= extra_flags;
1175 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1176 struct btrfs_root *extent_root, u64 alloc_bytes,
1177 u64 flags)
1179 struct btrfs_space_info *space_info;
1180 u64 thresh;
1181 u64 start;
1182 u64 num_bytes;
1183 int ret;
1185 space_info = __find_space_info(extent_root->fs_info, flags);
1186 if (!space_info) {
1187 ret = update_space_info(extent_root->fs_info, flags,
1188 0, 0, &space_info);
1189 BUG_ON(ret);
1191 BUG_ON(!space_info);
1193 if (space_info->full)
1194 return 0;
1196 thresh = div_factor(space_info->total_bytes, 7);
1197 if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1198 thresh)
1199 return 0;
1201 ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1202 if (ret == -ENOSPC) {
1203 space_info->full = 1;
1204 return 0;
1207 BUG_ON(ret);
1209 ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1210 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1211 BUG_ON(ret);
1212 return 0;
1215 static int update_block_group(struct btrfs_trans_handle *trans,
1216 struct btrfs_root *root,
1217 u64 bytenr, u64 num_bytes, int alloc,
1218 int mark_free)
1220 struct btrfs_block_group_cache *cache;
1221 struct btrfs_fs_info *info = root->fs_info;
1222 u64 total = num_bytes;
1223 u64 old_val;
1224 u64 byte_in_group;
1225 u64 start;
1226 u64 end;
1228 while(total) {
1229 cache = btrfs_lookup_block_group(info, bytenr);
1230 if (!cache) {
1231 return -1;
1233 byte_in_group = bytenr - cache->key.objectid;
1234 WARN_ON(byte_in_group > cache->key.offset);
1235 start = cache->key.objectid;
1236 end = start + cache->key.offset - 1;
1237 set_extent_bits(&info->block_group_cache, start, end,
1238 BLOCK_GROUP_DIRTY, GFP_NOFS);
1240 old_val = btrfs_block_group_used(&cache->item);
1241 num_bytes = min(total, cache->key.offset - byte_in_group);
1242 if (alloc) {
1243 old_val += num_bytes;
1244 cache->space_info->bytes_used += num_bytes;
1245 } else {
1246 old_val -= num_bytes;
1247 cache->space_info->bytes_used -= num_bytes;
1248 if (mark_free) {
1249 set_extent_dirty(&info->free_space_cache,
1250 bytenr, bytenr + num_bytes - 1,
1251 GFP_NOFS);
1254 btrfs_set_block_group_used(&cache->item, old_val);
1255 total -= num_bytes;
1256 bytenr += num_bytes;
1258 return 0;
1261 static int update_pinned_extents(struct btrfs_root *root,
1262 u64 bytenr, u64 num, int pin)
1264 u64 len;
1265 struct btrfs_block_group_cache *cache;
1266 struct btrfs_fs_info *fs_info = root->fs_info;
1268 if (pin) {
1269 set_extent_dirty(&fs_info->pinned_extents,
1270 bytenr, bytenr + num - 1, GFP_NOFS);
1271 } else {
1272 clear_extent_dirty(&fs_info->pinned_extents,
1273 bytenr, bytenr + num - 1, GFP_NOFS);
1275 while (num > 0) {
1276 cache = btrfs_lookup_block_group(fs_info, bytenr);
1277 WARN_ON(!cache);
1278 len = min(num, cache->key.offset -
1279 (bytenr - cache->key.objectid));
1280 if (pin) {
1281 cache->pinned += len;
1282 cache->space_info->bytes_pinned += len;
1283 fs_info->total_pinned += len;
1284 } else {
1285 cache->pinned -= len;
1286 cache->space_info->bytes_pinned -= len;
1287 fs_info->total_pinned -= len;
1289 bytenr += len;
1290 num -= len;
1292 return 0;
1295 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1297 u64 last = 0;
1298 u64 start;
1299 u64 end;
1300 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1301 int ret;
1303 while(1) {
1304 ret = find_first_extent_bit(pinned_extents, last,
1305 &start, &end, EXTENT_DIRTY);
1306 if (ret)
1307 break;
1308 set_extent_dirty(copy, start, end, GFP_NOFS);
1309 last = end + 1;
1311 return 0;
1314 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1315 struct btrfs_root *root,
1316 struct extent_io_tree *unpin)
1318 u64 start;
1319 u64 end;
1320 int ret;
1321 struct extent_io_tree *free_space_cache;
1322 free_space_cache = &root->fs_info->free_space_cache;
1324 while(1) {
1325 ret = find_first_extent_bit(unpin, 0, &start, &end,
1326 EXTENT_DIRTY);
1327 if (ret)
1328 break;
1329 update_pinned_extents(root, start, end + 1 - start, 0);
1330 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1331 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1333 return 0;
1336 static int finish_current_insert(struct btrfs_trans_handle *trans,
1337 struct btrfs_root *extent_root)
1339 u64 start;
1340 u64 end;
1341 u64 priv;
1342 struct btrfs_fs_info *info = extent_root->fs_info;
1343 struct btrfs_path *path;
1344 struct btrfs_extent_ref *ref;
1345 struct pending_extent_op *extent_op;
1346 struct btrfs_key key;
1347 struct btrfs_extent_item extent_item;
1348 int ret;
1349 int err = 0;
1351 btrfs_set_stack_extent_refs(&extent_item, 1);
1352 path = btrfs_alloc_path();
1354 while(1) {
1355 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1356 &end, EXTENT_LOCKED);
1357 if (ret)
1358 break;
1360 ret = get_state_private(&info->extent_ins, start, &priv);
1361 BUG_ON(ret);
1362 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1364 if (extent_op->type == PENDING_EXTENT_INSERT) {
1365 key.objectid = start;
1366 key.offset = end + 1 - start;
1367 key.type = BTRFS_EXTENT_ITEM_KEY;
1368 err = btrfs_insert_item(trans, extent_root, &key,
1369 &extent_item, sizeof(extent_item));
1370 BUG_ON(err);
1372 clear_extent_bits(&info->extent_ins, start, end,
1373 EXTENT_LOCKED, GFP_NOFS);
1375 err = insert_extent_backref(trans, extent_root, path,
1376 start, extent_op->parent,
1377 extent_root->root_key.objectid,
1378 extent_op->generation,
1379 extent_op->level);
1380 BUG_ON(err);
1381 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
1382 err = lookup_extent_backref(trans, extent_root, path,
1383 start, extent_op->orig_parent,
1384 extent_root->root_key.objectid,
1385 extent_op->orig_generation,
1386 extent_op->level, 0);
1387 BUG_ON(err);
1389 clear_extent_bits(&info->extent_ins, start, end,
1390 EXTENT_LOCKED, GFP_NOFS);
1392 key.objectid = start;
1393 key.offset = extent_op->parent;
1394 key.type = BTRFS_EXTENT_REF_KEY;
1395 err = btrfs_set_item_key_safe(trans, extent_root, path,
1396 &key);
1397 BUG_ON(err);
1398 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1399 struct btrfs_extent_ref);
1400 btrfs_set_ref_generation(path->nodes[0], ref,
1401 extent_op->generation);
1402 btrfs_mark_buffer_dirty(path->nodes[0]);
1403 btrfs_release_path(extent_root, path);
1404 } else {
1405 BUG_ON(1);
1407 kfree(extent_op);
1409 btrfs_free_path(path);
1410 return 0;
1413 static int pin_down_bytes(struct btrfs_trans_handle *trans,
1414 struct btrfs_root *root,
1415 u64 bytenr, u64 num_bytes, int is_data)
1417 int err = 0;
1418 struct extent_buffer *buf;
1420 if (is_data)
1421 goto pinit;
1423 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1424 if (!buf)
1425 goto pinit;
1427 /* we can reuse a block if it hasn't been written
1428 * and it is from this transaction. We can't
1429 * reuse anything from the tree log root because
1430 * it has tiny sub-transactions.
1432 if (btrfs_buffer_uptodate(buf, 0)) {
1433 u64 header_owner = btrfs_header_owner(buf);
1434 u64 header_transid = btrfs_header_generation(buf);
1435 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1436 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
1437 header_transid == trans->transid &&
1438 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1439 clean_tree_block(NULL, root, buf);
1440 free_extent_buffer(buf);
1441 return 1;
1444 free_extent_buffer(buf);
1445 pinit:
1446 update_pinned_extents(root, bytenr, num_bytes, 1);
1448 BUG_ON(err < 0);
1449 return 0;
1453 * remove an extent from the root, returns 0 on success
1455 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1456 *root, u64 bytenr, u64 num_bytes, u64 parent,
1457 u64 root_objectid, u64 ref_generation,
1458 u64 owner_objectid, int pin, int mark_free)
1460 struct btrfs_path *path;
1461 struct btrfs_key key;
1462 struct btrfs_fs_info *info = root->fs_info;
1463 struct btrfs_extent_ops *ops = info->extent_ops;
1464 struct btrfs_root *extent_root = info->extent_root;
1465 struct extent_buffer *leaf;
1466 int ret;
1467 int extent_slot = 0;
1468 int found_extent = 0;
1469 int num_to_del = 1;
1470 struct btrfs_extent_item *ei;
1471 u32 refs;
1473 key.objectid = bytenr;
1474 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1475 key.offset = num_bytes;
1477 path = btrfs_alloc_path();
1478 if (!path)
1479 return -ENOMEM;
1481 ret = lookup_extent_backref(trans, extent_root, path,
1482 bytenr, parent, root_objectid,
1483 ref_generation, owner_objectid, 1);
1484 if (ret == 0) {
1485 struct btrfs_key found_key;
1486 extent_slot = path->slots[0];
1487 while(extent_slot > 0) {
1488 extent_slot--;
1489 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1490 extent_slot);
1491 if (found_key.objectid != bytenr)
1492 break;
1493 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1494 found_key.offset == num_bytes) {
1495 found_extent = 1;
1496 break;
1498 if (path->slots[0] - extent_slot > 5)
1499 break;
1501 if (!found_extent) {
1502 ret = remove_extent_backref(trans, extent_root, path);
1503 BUG_ON(ret);
1504 btrfs_release_path(extent_root, path);
1505 ret = btrfs_search_slot(trans, extent_root,
1506 &key, path, -1, 1);
1507 BUG_ON(ret);
1508 extent_slot = path->slots[0];
1510 } else {
1511 btrfs_print_leaf(extent_root, path->nodes[0]);
1512 printk("Unable to find ref byte nr %llu root %llu "
1513 " gen %llu owner %llu\n",
1514 (unsigned long long)bytenr,
1515 (unsigned long long)root_objectid,
1516 (unsigned long long)ref_generation,
1517 (unsigned long long)owner_objectid);
1518 BUG_ON(1);
1521 leaf = path->nodes[0];
1522 ei = btrfs_item_ptr(leaf, extent_slot,
1523 struct btrfs_extent_item);
1524 refs = btrfs_extent_refs(leaf, ei);
1525 BUG_ON(refs == 0);
1526 refs -= 1;
1527 btrfs_set_extent_refs(leaf, ei, refs);
1529 btrfs_mark_buffer_dirty(leaf);
1531 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1532 struct btrfs_extent_ref *ref;
1533 ref = btrfs_item_ptr(leaf, path->slots[0],
1534 struct btrfs_extent_ref);
1535 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
1536 /* if the back ref and the extent are next to each other
1537 * they get deleted below in one shot
1539 path->slots[0] = extent_slot;
1540 num_to_del = 2;
1541 } else if (found_extent) {
1542 /* otherwise delete the extent back ref */
1543 ret = remove_extent_backref(trans, extent_root, path);
1544 BUG_ON(ret);
1545 /* if refs are 0, we need to setup the path for deletion */
1546 if (refs == 0) {
1547 btrfs_release_path(extent_root, path);
1548 ret = btrfs_search_slot(trans, extent_root, &key, path,
1549 -1, 1);
1550 if (ret < 0)
1551 return ret;
1552 BUG_ON(ret);
1556 if (refs == 0) {
1557 u64 super_used;
1558 u64 root_used;
1562 /* block accounting for super block */
1563 super_used = btrfs_super_bytes_used(&info->super_copy);
1564 btrfs_set_super_bytes_used(&info->super_copy,
1565 super_used - num_bytes);
1567 /* block accounting for root item */
1568 root_used = btrfs_root_used(&root->root_item);
1569 btrfs_set_root_used(&root->root_item,
1570 root_used - num_bytes);
1571 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1572 num_to_del);
1573 if (ret)
1574 return ret;
1576 if (ops && ops->free_extent) {
1577 ret = ops->free_extent(root, bytenr, num_bytes);
1578 if (ret > 0) {
1579 pin = 0;
1580 mark_free = 0;
1584 if (pin) {
1585 ret = pin_down_bytes(trans, root, bytenr, num_bytes, 0);
1586 if (ret > 0)
1587 mark_free = 1;
1588 BUG_ON(ret < 0);
1591 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
1592 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
1593 BUG_ON(ret);
1596 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1597 mark_free);
1598 BUG_ON(ret);
1600 btrfs_free_path(path);
1601 finish_current_insert(trans, extent_root);
1602 return ret;
1606 * find all the blocks marked as pending in the radix tree and remove
1607 * them from the extent map
1609 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1610 btrfs_root *extent_root)
1612 int ret;
1613 int err = 0;
1614 int mark_free = 0;
1615 u64 start;
1616 u64 end;
1617 u64 priv;
1618 struct extent_io_tree *pending_del;
1619 struct extent_io_tree *extent_ins;
1620 struct pending_extent_op *extent_op;
1622 extent_ins = &extent_root->fs_info->extent_ins;
1623 pending_del = &extent_root->fs_info->pending_del;
1625 while(1) {
1626 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1627 EXTENT_LOCKED);
1628 if (ret)
1629 break;
1631 ret = get_state_private(pending_del, start, &priv);
1632 BUG_ON(ret);
1633 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1635 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1636 GFP_NOFS);
1638 ret = pin_down_bytes(trans, extent_root, start,
1639 end + 1 - start, 0);
1640 mark_free = ret > 0;
1641 if (!test_range_bit(extent_ins, start, end,
1642 EXTENT_LOCKED, 0)) {
1643 free_extent:
1644 ret = __free_extent(trans, extent_root,
1645 start, end + 1 - start,
1646 extent_op->orig_parent,
1647 extent_root->root_key.objectid,
1648 extent_op->orig_generation,
1649 extent_op->level, 0, mark_free);
1650 kfree(extent_op);
1651 } else {
1652 kfree(extent_op);
1653 ret = get_state_private(extent_ins, start, &priv);
1654 BUG_ON(ret);
1655 extent_op = (struct pending_extent_op *)
1656 (unsigned long)priv;
1658 clear_extent_bits(extent_ins, start, end,
1659 EXTENT_LOCKED, GFP_NOFS);
1661 if (extent_op->type == PENDING_BACKREF_UPDATE)
1662 goto free_extent;
1664 ret = update_block_group(trans, extent_root, start,
1665 end + 1 - start, 0, mark_free);
1666 BUG_ON(ret);
1667 kfree(extent_op);
1669 if (ret)
1670 err = ret;
1672 return err;
1676 * remove an extent from the root, returns 0 on success
1678 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1679 *root, u64 bytenr, u64 num_bytes, u64 parent,
1680 u64 root_objectid, u64 ref_generation,
1681 u64 owner_objectid, int pin)
1683 struct btrfs_root *extent_root = root->fs_info->extent_root;
1684 int pending_ret;
1685 int ret;
1687 WARN_ON(num_bytes < root->sectorsize);
1688 if (root == extent_root) {
1689 struct pending_extent_op *extent_op;
1691 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1692 BUG_ON(!extent_op);
1694 extent_op->type = PENDING_EXTENT_DELETE;
1695 extent_op->bytenr = bytenr;
1696 extent_op->num_bytes = num_bytes;
1697 extent_op->parent = parent;
1698 extent_op->orig_parent = parent;
1699 extent_op->generation = ref_generation;
1700 extent_op->orig_generation = ref_generation;
1701 extent_op->level = (int)owner_objectid;
1703 set_extent_bits(&root->fs_info->pending_del,
1704 bytenr, bytenr + num_bytes - 1,
1705 EXTENT_LOCKED, GFP_NOFS);
1706 set_state_private(&root->fs_info->pending_del,
1707 bytenr, (unsigned long)extent_op);
1708 return 0;
1710 ret = __free_extent(trans, root, bytenr, num_bytes, parent,
1711 root_objectid, ref_generation,
1712 owner_objectid, pin, pin == 0);
1713 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1714 return ret ? ret : pending_ret;
1717 static u64 stripe_align(struct btrfs_root *root, u64 val)
1719 u64 mask = ((u64)root->stripesize - 1);
1720 u64 ret = (val + mask) & ~mask;
1721 return ret;
1725 * walks the btree of allocated extents and find a hole of a given size.
1726 * The key ins is changed to record the hole:
1727 * ins->objectid == block start
1728 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1729 * ins->offset == number of blocks
1730 * Any available blocks before search_start are skipped.
1732 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1733 struct btrfs_root *orig_root,
1734 u64 num_bytes, u64 empty_size,
1735 u64 search_start, u64 search_end,
1736 u64 hint_byte, struct btrfs_key *ins,
1737 u64 exclude_start, u64 exclude_nr,
1738 int data)
1740 int ret;
1741 u64 orig_search_start = search_start;
1742 struct btrfs_root * root = orig_root->fs_info->extent_root;
1743 struct btrfs_fs_info *info = root->fs_info;
1744 u64 total_needed = num_bytes;
1745 struct btrfs_block_group_cache *block_group;
1746 int full_scan = 0;
1747 int wrapped = 0;
1749 WARN_ON(num_bytes < root->sectorsize);
1750 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1752 if (hint_byte) {
1753 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1754 if (!block_group)
1755 hint_byte = search_start;
1756 block_group = btrfs_find_block_group(root, block_group,
1757 hint_byte, data, 1);
1758 } else {
1759 block_group = btrfs_find_block_group(root,
1760 trans->block_group,
1761 search_start, data, 1);
1764 total_needed += empty_size;
1766 check_failed:
1767 if (!block_group) {
1768 block_group = btrfs_lookup_first_block_group(info,
1769 search_start);
1770 if (!block_group)
1771 block_group = btrfs_lookup_first_block_group(info,
1772 orig_search_start);
1774 ret = find_search_start(root, &block_group, &search_start,
1775 total_needed, data);
1776 if (ret)
1777 goto error;
1779 search_start = stripe_align(root, search_start);
1780 ins->objectid = search_start;
1781 ins->offset = num_bytes;
1783 if (ins->objectid + num_bytes >
1784 block_group->key.objectid + block_group->key.offset) {
1785 search_start = block_group->key.objectid +
1786 block_group->key.offset;
1787 goto new_group;
1790 if (test_range_bit(&info->extent_ins, ins->objectid,
1791 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1792 search_start = ins->objectid + num_bytes;
1793 goto new_group;
1796 if (test_range_bit(&info->pinned_extents, ins->objectid,
1797 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1798 search_start = ins->objectid + num_bytes;
1799 goto new_group;
1802 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1803 ins->objectid < exclude_start + exclude_nr)) {
1804 search_start = exclude_start + exclude_nr;
1805 goto new_group;
1808 if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1809 block_group = btrfs_lookup_block_group(info, ins->objectid);
1810 if (block_group)
1811 trans->block_group = block_group;
1813 ins->offset = num_bytes;
1814 return 0;
1816 new_group:
1817 block_group = btrfs_lookup_first_block_group(info, search_start);
1818 if (!block_group) {
1819 search_start = orig_search_start;
1820 if (full_scan) {
1821 ret = -ENOSPC;
1822 goto error;
1824 if (wrapped) {
1825 if (!full_scan)
1826 total_needed -= empty_size;
1827 full_scan = 1;
1828 } else
1829 wrapped = 1;
1831 cond_resched();
1832 block_group = btrfs_find_block_group(root, block_group,
1833 search_start, data, 0);
1834 goto check_failed;
1836 error:
1837 return ret;
1840 * finds a free extent and does all the dirty work required for allocation
1841 * returns the key for the extent through ins, and a tree buffer for
1842 * the first block of the extent through buf.
1844 * returns 0 if everything worked, non-zero otherwise.
1846 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1847 struct btrfs_root *root,
1848 u64 num_bytes, u64 parent,
1849 u64 root_objectid, u64 ref_generation,
1850 u64 owner, u64 empty_size, u64 hint_byte,
1851 u64 search_end, struct btrfs_key *ins, int data)
1853 int ret;
1854 int pending_ret;
1855 u64 super_used, root_used;
1856 u64 search_start = 0;
1857 u64 alloc_profile;
1858 u32 sizes[2];
1859 struct btrfs_fs_info *info = root->fs_info;
1860 struct btrfs_root *extent_root = info->extent_root;
1861 struct btrfs_path *path;
1862 struct btrfs_extent_item *extent_item;
1863 struct btrfs_extent_ref *ref;
1864 struct btrfs_key keys[2];
1866 if (info->extent_ops) {
1867 struct btrfs_extent_ops *ops = info->extent_ops;
1868 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
1869 BUG_ON(ret);
1870 goto found;
1873 if (data) {
1874 alloc_profile = info->avail_data_alloc_bits &
1875 info->data_alloc_profile;
1876 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1877 } else if ((info->system_allocs > 0 || root == info->chunk_root) &&
1878 info->system_allocs >= 0) {
1879 alloc_profile = info->avail_system_alloc_bits &
1880 info->system_alloc_profile;
1881 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1882 } else {
1883 alloc_profile = info->avail_metadata_alloc_bits &
1884 info->metadata_alloc_profile;
1885 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1888 if (root->ref_cows) {
1889 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1890 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1891 num_bytes,
1892 BTRFS_BLOCK_GROUP_METADATA);
1893 BUG_ON(ret);
1895 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1896 num_bytes + 2 * 1024 * 1024, data);
1897 BUG_ON(ret);
1900 WARN_ON(num_bytes < root->sectorsize);
1901 ret = find_free_extent(trans, root, num_bytes, empty_size,
1902 search_start, search_end, hint_byte, ins,
1903 trans->alloc_exclude_start,
1904 trans->alloc_exclude_nr, data);
1905 BUG_ON(ret);
1906 found:
1907 if (ret)
1908 return ret;
1910 if (parent == 0)
1911 parent = ins->objectid;
1913 /* block accounting for super block */
1914 super_used = btrfs_super_bytes_used(&info->super_copy);
1915 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1917 /* block accounting for root item */
1918 root_used = btrfs_root_used(&root->root_item);
1919 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1921 clear_extent_dirty(&root->fs_info->free_space_cache,
1922 ins->objectid, ins->objectid + ins->offset - 1,
1923 GFP_NOFS);
1925 if (root == extent_root) {
1926 struct pending_extent_op *extent_op;
1928 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1929 BUG_ON(!extent_op);
1931 extent_op->type = PENDING_EXTENT_INSERT;
1932 extent_op->bytenr = ins->objectid;
1933 extent_op->num_bytes = ins->offset;
1934 extent_op->parent = parent;
1935 extent_op->orig_parent = 0;
1936 extent_op->generation = ref_generation;
1937 extent_op->orig_generation = 0;
1938 extent_op->level = (int)owner;
1940 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1941 ins->objectid + ins->offset - 1,
1942 EXTENT_LOCKED, GFP_NOFS);
1943 set_state_private(&root->fs_info->extent_ins,
1944 ins->objectid, (unsigned long)extent_op);
1945 goto update_block;
1948 WARN_ON(trans->alloc_exclude_nr);
1949 trans->alloc_exclude_start = ins->objectid;
1950 trans->alloc_exclude_nr = ins->offset;
1952 memcpy(&keys[0], ins, sizeof(*ins));
1953 keys[1].objectid = ins->objectid;
1954 keys[1].type = BTRFS_EXTENT_REF_KEY;
1955 keys[1].offset = parent;
1956 sizes[0] = sizeof(*extent_item);
1957 sizes[1] = sizeof(*ref);
1959 path = btrfs_alloc_path();
1960 BUG_ON(!path);
1962 ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1963 sizes, 2);
1965 BUG_ON(ret);
1966 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1967 struct btrfs_extent_item);
1968 btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1969 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1970 struct btrfs_extent_ref);
1972 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1973 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1974 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1975 btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
1977 btrfs_mark_buffer_dirty(path->nodes[0]);
1979 trans->alloc_exclude_start = 0;
1980 trans->alloc_exclude_nr = 0;
1981 btrfs_free_path(path);
1982 finish_current_insert(trans, extent_root);
1983 pending_ret = del_pending_extents(trans, extent_root);
1985 if (ret) {
1986 return ret;
1988 if (pending_ret) {
1989 return pending_ret;
1992 update_block:
1993 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1994 if (ret) {
1995 printk("update block group failed for %llu %llu\n",
1996 (unsigned long long)ins->objectid,
1997 (unsigned long long)ins->offset);
1998 BUG();
2000 return 0;
2004 * helper function to allocate a block for a given tree
2005 * returns the tree buffer or NULL.
2007 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2008 struct btrfs_root *root,
2009 u32 blocksize, u64 parent,
2010 u64 root_objectid,
2011 u64 ref_generation,
2012 int level,
2013 u64 hint,
2014 u64 empty_size)
2016 struct btrfs_key ins;
2017 int ret;
2018 struct extent_buffer *buf;
2020 ret = btrfs_alloc_extent(trans, root, blocksize, parent,
2021 root_objectid, ref_generation,
2022 level, empty_size, hint,
2023 (u64)-1, &ins, 0);
2024 if (ret) {
2025 BUG_ON(ret > 0);
2026 return ERR_PTR(ret);
2028 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2029 if (!buf) {
2030 if (parent == 0)
2031 parent = ins.objectid;
2032 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2033 parent, root->root_key.objectid,
2034 ref_generation, level, 0);
2035 BUG_ON(1);
2036 return ERR_PTR(-ENOMEM);
2038 btrfs_set_buffer_uptodate(buf);
2039 trans->blocks_used++;
2040 return buf;
2043 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2044 struct btrfs_root *root,
2045 struct extent_buffer *leaf)
2047 u64 leaf_owner;
2048 u64 leaf_generation;
2049 struct btrfs_key key;
2050 struct btrfs_file_extent_item *fi;
2051 int i;
2052 int nritems;
2053 int ret;
2055 BUG_ON(!btrfs_is_leaf(leaf));
2056 nritems = btrfs_header_nritems(leaf);
2057 leaf_owner = btrfs_header_owner(leaf);
2058 leaf_generation = btrfs_header_generation(leaf);
2060 for (i = 0; i < nritems; i++) {
2061 u64 disk_bytenr;
2063 btrfs_item_key_to_cpu(leaf, &key, i);
2064 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2065 continue;
2066 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2067 if (btrfs_file_extent_type(leaf, fi) ==
2068 BTRFS_FILE_EXTENT_INLINE)
2069 continue;
2071 * FIXME make sure to insert a trans record that
2072 * repeats the snapshot del on crash
2074 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2075 if (disk_bytenr == 0)
2076 continue;
2077 ret = btrfs_free_extent(trans, root, disk_bytenr,
2078 btrfs_file_extent_disk_num_bytes(leaf, fi),
2079 leaf->start, leaf_owner, leaf_generation,
2080 key.objectid, 0);
2081 BUG_ON(ret);
2083 return 0;
2086 static void noinline reada_walk_down(struct btrfs_root *root,
2087 struct extent_buffer *node,
2088 int slot)
2090 u64 bytenr;
2091 u64 last = 0;
2092 u32 nritems;
2093 u32 refs;
2094 u32 blocksize;
2095 int ret;
2096 int i;
2097 int level;
2098 int skipped = 0;
2100 nritems = btrfs_header_nritems(node);
2101 level = btrfs_header_level(node);
2102 if (level)
2103 return;
2105 for (i = slot; i < nritems && skipped < 32; i++) {
2106 bytenr = btrfs_node_blockptr(node, i);
2107 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2108 (last > bytenr && last - bytenr > 32 * 1024))) {
2109 skipped++;
2110 continue;
2112 blocksize = btrfs_level_size(root, level - 1);
2113 if (i != slot) {
2114 ret = btrfs_lookup_extent_ref(NULL, root, bytenr,
2115 blocksize, &refs);
2116 BUG_ON(ret);
2117 if (refs != 1) {
2118 skipped++;
2119 continue;
2122 mutex_unlock(&root->fs_info->fs_mutex);
2123 ret = readahead_tree_block(root, bytenr, blocksize,
2124 btrfs_node_ptr_generation(node, i));
2125 last = bytenr + blocksize;
2126 cond_resched();
2127 mutex_lock(&root->fs_info->fs_mutex);
2128 if (ret)
2129 break;
2134 * helper function for drop_snapshot, this walks down the tree dropping ref
2135 * counts as it goes.
2137 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path, int *level)
2141 u64 root_owner;
2142 u64 root_gen;
2143 u64 bytenr;
2144 u64 ptr_gen;
2145 struct extent_buffer *next;
2146 struct extent_buffer *cur;
2147 struct extent_buffer *parent;
2148 u32 blocksize;
2149 int ret;
2150 u32 refs;
2152 WARN_ON(*level < 0);
2153 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2154 ret = btrfs_lookup_extent_ref(trans, root,
2155 path->nodes[*level]->start,
2156 path->nodes[*level]->len, &refs);
2157 BUG_ON(ret);
2158 if (refs > 1)
2159 goto out;
2162 * walk down to the last node level and free all the leaves
2164 while(*level >= 0) {
2165 WARN_ON(*level < 0);
2166 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2167 cur = path->nodes[*level];
2169 if (btrfs_header_level(cur) != *level)
2170 WARN_ON(1);
2172 if (path->slots[*level] >=
2173 btrfs_header_nritems(cur))
2174 break;
2175 if (*level == 0) {
2176 ret = drop_leaf_ref(trans, root, cur);
2177 BUG_ON(ret);
2178 break;
2180 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2181 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2182 blocksize = btrfs_level_size(root, *level - 1);
2183 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
2184 &refs);
2185 BUG_ON(ret);
2186 if (refs != 1) {
2187 parent = path->nodes[*level];
2188 root_owner = btrfs_header_owner(parent);
2189 root_gen = btrfs_header_generation(parent);
2190 path->slots[*level]++;
2191 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
2192 parent->start, root_owner,
2193 root_gen, *level - 1, 1);
2194 BUG_ON(ret);
2195 continue;
2197 next = btrfs_find_tree_block(root, bytenr, blocksize);
2198 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2199 free_extent_buffer(next);
2200 reada_walk_down(root, cur, path->slots[*level]);
2201 mutex_unlock(&root->fs_info->fs_mutex);
2202 next = read_tree_block(root, bytenr, blocksize,
2203 ptr_gen);
2204 mutex_lock(&root->fs_info->fs_mutex);
2206 WARN_ON(*level <= 0);
2207 if (path->nodes[*level-1])
2208 free_extent_buffer(path->nodes[*level-1]);
2209 path->nodes[*level-1] = next;
2210 *level = btrfs_header_level(next);
2211 path->slots[*level] = 0;
2213 out:
2214 WARN_ON(*level < 0);
2215 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2217 if (path->nodes[*level] == root->node) {
2218 root_owner = root->root_key.objectid;
2219 parent = path->nodes[*level];
2220 } else {
2221 parent = path->nodes[*level + 1];
2222 root_owner = btrfs_header_owner(parent);
2225 root_gen = btrfs_header_generation(parent);
2226 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2227 path->nodes[*level]->len, parent->start,
2228 root_owner, root_gen, *level, 1);
2229 free_extent_buffer(path->nodes[*level]);
2230 path->nodes[*level] = NULL;
2231 *level += 1;
2232 BUG_ON(ret);
2233 return 0;
2237 * helper for dropping snapshots. This walks back up the tree in the path
2238 * to find the first node higher up where we haven't yet gone through
2239 * all the slots
2241 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2242 struct btrfs_root *root,
2243 struct btrfs_path *path, int *level)
2245 u64 root_owner;
2246 u64 root_gen;
2247 struct btrfs_root_item *root_item = &root->root_item;
2248 int i;
2249 int slot;
2250 int ret;
2252 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2253 slot = path->slots[i];
2254 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2255 struct extent_buffer *node;
2256 struct btrfs_disk_key disk_key;
2257 node = path->nodes[i];
2258 path->slots[i]++;
2259 *level = i;
2260 WARN_ON(*level == 0);
2261 btrfs_node_key(node, &disk_key, path->slots[i]);
2262 memcpy(&root_item->drop_progress,
2263 &disk_key, sizeof(disk_key));
2264 root_item->drop_level = i;
2265 return 0;
2266 } else {
2267 struct extent_buffer *parent;
2268 if (path->nodes[*level] == root->node)
2269 parent = path->nodes[*level];
2270 else
2271 parent = path->nodes[*level + 1];
2273 root_owner = btrfs_header_owner(parent);
2274 root_gen = btrfs_header_generation(parent);
2275 ret = btrfs_free_extent(trans, root,
2276 path->nodes[*level]->start,
2277 path->nodes[*level]->len,
2278 parent->start, root_owner,
2279 root_gen, *level, 1);
2280 BUG_ON(ret);
2281 free_extent_buffer(path->nodes[*level]);
2282 path->nodes[*level] = NULL;
2283 *level = i + 1;
2286 return 1;
2290 * drop the reference count on the tree rooted at 'snap'. This traverses
2291 * the tree freeing any blocks that have a ref count of zero after being
2292 * decremented.
2294 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2295 *root)
2297 int ret = 0;
2298 int wret;
2299 int level;
2300 struct btrfs_path *path;
2301 int i;
2302 int orig_level;
2303 struct btrfs_root_item *root_item = &root->root_item;
2305 path = btrfs_alloc_path();
2306 BUG_ON(!path);
2308 level = btrfs_header_level(root->node);
2309 orig_level = level;
2310 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2311 path->nodes[level] = root->node;
2312 extent_buffer_get(root->node);
2313 path->slots[level] = 0;
2314 } else {
2315 struct btrfs_key key;
2316 struct btrfs_disk_key found_key;
2317 struct extent_buffer *node;
2319 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2320 level = root_item->drop_level;
2321 path->lowest_level = level;
2322 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2323 if (wret < 0) {
2324 ret = wret;
2325 goto out;
2327 node = path->nodes[level];
2328 btrfs_node_key(node, &found_key, path->slots[level]);
2329 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2330 sizeof(found_key)));
2332 while(1) {
2333 wret = walk_down_tree(trans, root, path, &level);
2334 if (wret < 0)
2335 ret = wret;
2336 if (wret != 0)
2337 break;
2339 wret = walk_up_tree(trans, root, path, &level);
2340 if (wret < 0)
2341 ret = wret;
2342 if (wret != 0)
2343 break;
2345 ret = -EAGAIN;
2346 break;
2349 for (i = 0; i <= orig_level; i++) {
2350 if (path->nodes[i]) {
2351 free_extent_buffer(path->nodes[i]);
2352 path->nodes[i] = NULL;
2355 out:
2356 btrfs_free_path(path);
2357 return ret;
2360 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2362 u64 start;
2363 u64 end;
2364 u64 ptr;
2365 int ret;
2366 while(1) {
2367 ret = find_first_extent_bit(&info->block_group_cache, 0,
2368 &start, &end, (unsigned int)-1);
2369 if (ret)
2370 break;
2371 ret = get_state_private(&info->block_group_cache, start, &ptr);
2372 if (!ret)
2373 kfree((void *)(unsigned long)ptr);
2374 clear_extent_bits(&info->block_group_cache, start,
2375 end, (unsigned int)-1, GFP_NOFS);
2377 while(1) {
2378 ret = find_first_extent_bit(&info->free_space_cache, 0,
2379 &start, &end, EXTENT_DIRTY);
2380 if (ret)
2381 break;
2382 clear_extent_dirty(&info->free_space_cache, start,
2383 end, GFP_NOFS);
2385 return 0;
2388 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2389 struct btrfs_key *key)
2391 int ret;
2392 struct btrfs_key found_key;
2393 struct extent_buffer *leaf;
2394 int slot;
2396 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2397 if (ret < 0)
2398 return ret;
2399 while(1) {
2400 slot = path->slots[0];
2401 leaf = path->nodes[0];
2402 if (slot >= btrfs_header_nritems(leaf)) {
2403 ret = btrfs_next_leaf(root, path);
2404 if (ret == 0)
2405 continue;
2406 if (ret < 0)
2407 goto error;
2408 break;
2410 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2412 if (found_key.objectid >= key->objectid &&
2413 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2414 return 0;
2415 path->slots[0]++;
2417 ret = -ENOENT;
2418 error:
2419 return ret;
2422 int btrfs_read_block_groups(struct btrfs_root *root)
2424 struct btrfs_path *path;
2425 int ret;
2426 int bit;
2427 struct btrfs_block_group_cache *cache;
2428 struct btrfs_fs_info *info = root->fs_info;
2429 struct btrfs_space_info *space_info;
2430 struct extent_io_tree *block_group_cache;
2431 struct btrfs_key key;
2432 struct btrfs_key found_key;
2433 struct extent_buffer *leaf;
2435 block_group_cache = &info->block_group_cache;
2437 root = info->extent_root;
2438 key.objectid = 0;
2439 key.offset = 0;
2440 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2441 path = btrfs_alloc_path();
2442 if (!path)
2443 return -ENOMEM;
2445 while(1) {
2446 ret = find_first_block_group(root, path, &key);
2447 if (ret > 0) {
2448 ret = 0;
2449 goto error;
2451 if (ret != 0) {
2452 goto error;
2454 leaf = path->nodes[0];
2455 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2456 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2457 if (!cache) {
2458 ret = -ENOMEM;
2459 break;
2462 read_extent_buffer(leaf, &cache->item,
2463 btrfs_item_ptr_offset(leaf, path->slots[0]),
2464 sizeof(cache->item));
2465 memcpy(&cache->key, &found_key, sizeof(found_key));
2466 cache->cached = 0;
2467 cache->pinned = 0;
2468 key.objectid = found_key.objectid + found_key.offset;
2469 btrfs_release_path(root, path);
2470 cache->flags = btrfs_block_group_flags(&cache->item);
2471 bit = 0;
2472 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2473 bit = BLOCK_GROUP_DATA;
2474 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2475 bit = BLOCK_GROUP_SYSTEM;
2476 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2477 bit = BLOCK_GROUP_METADATA;
2479 set_avail_alloc_bits(info, cache->flags);
2480 if (btrfs_chunk_readonly(root, cache->key.objectid))
2481 cache->ro = 1;
2483 ret = update_space_info(info, cache->flags, found_key.offset,
2484 btrfs_block_group_used(&cache->item),
2485 &space_info);
2486 BUG_ON(ret);
2487 cache->space_info = space_info;
2489 /* use EXTENT_LOCKED to prevent merging */
2490 set_extent_bits(block_group_cache, found_key.objectid,
2491 found_key.objectid + found_key.offset - 1,
2492 bit | EXTENT_LOCKED, GFP_NOFS);
2493 set_state_private(block_group_cache, found_key.objectid,
2494 (unsigned long)cache);
2496 ret = 0;
2497 error:
2498 btrfs_free_path(path);
2499 return ret;
2502 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2503 struct btrfs_root *root, u64 bytes_used,
2504 u64 type, u64 chunk_objectid, u64 chunk_offset,
2505 u64 size)
2507 int ret;
2508 int bit = 0;
2509 struct btrfs_root *extent_root;
2510 struct btrfs_block_group_cache *cache;
2511 struct extent_io_tree *block_group_cache;
2513 extent_root = root->fs_info->extent_root;
2514 block_group_cache = &root->fs_info->block_group_cache;
2516 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2517 BUG_ON(!cache);
2518 cache->key.objectid = chunk_offset;
2519 cache->key.offset = size;
2521 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2522 btrfs_set_block_group_used(&cache->item, bytes_used);
2523 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2524 cache->flags = type;
2525 btrfs_set_block_group_flags(&cache->item, type);
2527 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2528 &cache->space_info);
2529 BUG_ON(ret);
2531 bit = block_group_state_bits(type);
2532 set_extent_bits(block_group_cache, chunk_offset,
2533 chunk_offset + size - 1,
2534 bit | EXTENT_LOCKED, GFP_NOFS);
2536 set_state_private(block_group_cache, chunk_offset,
2537 (unsigned long)cache);
2538 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2539 sizeof(cache->item));
2540 BUG_ON(ret);
2542 finish_current_insert(trans, extent_root);
2543 ret = del_pending_extents(trans, extent_root);
2544 BUG_ON(ret);
2545 set_avail_alloc_bits(extent_root->fs_info, type);
2546 return 0;
2550 * This is for converter use only.
2552 * In that case, we don't know where are free blocks located.
2553 * Therefore all block group cache entries must be setup properly
2554 * before doing any block allocation.
2556 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
2557 struct btrfs_root *root)
2559 u64 total_bytes;
2560 u64 cur_start;
2561 u64 group_type;
2562 u64 group_size;
2563 u64 group_align;
2564 u64 total_data = 0;
2565 u64 total_metadata = 0;
2566 u64 chunk_objectid;
2567 int ret;
2568 int bit;
2569 struct btrfs_root *extent_root;
2570 struct btrfs_block_group_cache *cache;
2571 struct extent_io_tree *block_group_cache;
2573 extent_root = root->fs_info->extent_root;
2574 block_group_cache = &root->fs_info->block_group_cache;
2575 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2576 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
2577 group_align = 64 * root->sectorsize;
2579 cur_start = 0;
2580 while (cur_start < total_bytes) {
2581 group_size = total_bytes / 12;
2582 group_size = min_t(u64, group_size, total_bytes - cur_start);
2583 if (cur_start == 0) {
2584 bit = BLOCK_GROUP_SYSTEM;
2585 group_type = BTRFS_BLOCK_GROUP_SYSTEM;
2586 group_size /= 4;
2587 group_size &= ~(group_align - 1);
2588 group_size = max_t(u64, group_size, 8 * 1024 * 1024);
2589 group_size = min_t(u64, group_size, 32 * 1024 * 1024);
2590 } else {
2591 group_size &= ~(group_align - 1);
2592 if (total_data >= total_metadata * 2) {
2593 group_type = BTRFS_BLOCK_GROUP_METADATA;
2594 group_size = min_t(u64, group_size,
2595 1ULL * 1024 * 1024 * 1024);
2596 total_metadata += group_size;
2597 } else {
2598 group_type = BTRFS_BLOCK_GROUP_DATA;
2599 group_size = min_t(u64, group_size,
2600 5ULL * 1024 * 1024 * 1024);
2601 total_data += group_size;
2603 if ((total_bytes - cur_start) * 4 < group_size * 5)
2604 group_size = total_bytes - cur_start;
2607 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2608 BUG_ON(!cache);
2610 cache->key.objectid = cur_start;
2611 cache->key.offset = group_size;
2612 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2614 btrfs_set_block_group_used(&cache->item, 0);
2615 btrfs_set_block_group_chunk_objectid(&cache->item,
2616 chunk_objectid);
2617 btrfs_set_block_group_flags(&cache->item, group_type);
2619 cache->flags = group_type;
2621 ret = update_space_info(root->fs_info, group_type, group_size,
2622 0, &cache->space_info);
2623 BUG_ON(ret);
2624 set_avail_alloc_bits(extent_root->fs_info, group_type);
2626 set_extent_bits(block_group_cache, cur_start,
2627 cur_start + group_size - 1,
2628 bit | EXTENT_LOCKED, GFP_NOFS);
2629 set_state_private(block_group_cache, cur_start,
2630 (unsigned long)cache);
2631 cur_start += group_size;
2633 /* then insert all the items */
2634 cur_start = 0;
2635 while(cur_start < total_bytes) {
2636 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
2637 BUG_ON(!cache);
2639 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2640 sizeof(cache->item));
2641 BUG_ON(ret);
2643 finish_current_insert(trans, extent_root);
2644 ret = del_pending_extents(trans, extent_root);
2645 BUG_ON(ret);
2647 cur_start = cache->key.objectid + cache->key.offset;
2649 return 0;
2652 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
2653 struct btrfs_root *root,
2654 u64 bytenr, u64 num_bytes, int alloc,
2655 int mark_free)
2657 return update_block_group(trans, root, bytenr, num_bytes,
2658 alloc, mark_free);