2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
21 #include "kerncompat.h"
22 #include "radix-tree.h"
25 #include "print-tree.h"
26 #include "transaction.h"
28 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
29 btrfs_root
*extent_root
);
30 static int run_pending(struct btrfs_trans_handle
*trans
, struct btrfs_root
33 static int inc_block_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
34 *root
, u64 bytenr
, u32 blocksize
)
36 struct btrfs_path path
;
40 struct btrfs_extent_item
*item
;
43 btrfs_init_path(&path
);
44 key
.objectid
= bytenr
;
45 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
46 key
.offset
= blocksize
;
47 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, &path
,
52 l
= &path
.nodes
[0]->leaf
;
53 item
= btrfs_item_ptr(l
, path
.slots
[0], struct btrfs_extent_item
);
54 refs
= btrfs_extent_refs(item
);
55 btrfs_set_extent_refs(item
, refs
+ 1);
57 BUG_ON(list_empty(&path
.nodes
[0]->dirty
));
58 btrfs_release_path(root
->fs_info
->extent_root
, &path
);
59 finish_current_insert(trans
, root
->fs_info
->extent_root
);
60 run_pending(trans
, root
->fs_info
->extent_root
);
64 static int lookup_block_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
65 *root
, u64 bytenr
, u32 blocksize
, u32
*refs
)
67 struct btrfs_path path
;
71 struct btrfs_extent_item
*item
;
72 btrfs_init_path(&path
);
73 key
.objectid
= bytenr
;
74 key
.offset
= blocksize
;
75 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
76 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, &path
,
80 l
= &path
.nodes
[0]->leaf
;
81 item
= btrfs_item_ptr(l
, path
.slots
[0], struct btrfs_extent_item
);
82 *refs
= btrfs_extent_refs(item
);
83 btrfs_release_path(root
->fs_info
->extent_root
, &path
);
87 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
88 struct btrfs_buffer
*buf
)
98 level
= btrfs_header_level(&buf
->node
.header
) - 1;
99 blocksize
= btrfs_level_size(root
, level
);
101 if (btrfs_is_leaf(&buf
->node
))
104 for (i
= 0; i
< btrfs_header_nritems(&buf
->node
.header
); i
++) {
105 bytenr
= btrfs_node_blockptr(&buf
->node
, i
);
106 inc_block_ref(trans
, root
, bytenr
, blocksize
);
112 int btrfs_inc_root_ref(struct btrfs_trans_handle
*trans
,
113 struct btrfs_root
*root
)
115 return inc_block_ref(trans
, root
, root
->node
->bytenr
,
119 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
120 struct btrfs_root
*root
,
121 struct btrfs_path
*path
,
122 struct btrfs_block_group_cache
*cache
)
126 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
127 struct btrfs_block_group_item
*bi
;
129 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
,
130 &cache
->key
, path
, 0, 1);
132 bi
= btrfs_item_ptr(&path
->nodes
[0]->leaf
, path
->slots
[0],
133 struct btrfs_block_group_item
);
134 memcpy(bi
, &cache
->item
, sizeof(*bi
));
135 dirty_tree_block(trans
, extent_root
, path
->nodes
[0]);
136 btrfs_release_path(extent_root
, path
);
137 finish_current_insert(trans
, root
);
138 pending_ret
= run_pending(trans
, root
);
147 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
148 struct btrfs_root
*root
)
150 struct btrfs_block_group_cache
*bg
;
151 struct cache_extent
*cache
;
154 struct cache_tree
*bg_cache
= &root
->fs_info
->block_group_cache
;
155 struct btrfs_path path
;
156 btrfs_init_path(&path
);
160 cache
= find_first_cache_extent(bg_cache
, start
);
163 bg
= container_of(cache
, struct btrfs_block_group_cache
,
165 start
= cache
->start
+ cache
->size
;
167 err
= write_one_cache_group(trans
, root
,
177 static int update_block_group(struct btrfs_trans_handle
*trans
,
178 struct btrfs_root
*root
,
179 u64 bytenr
, u64 num
, int alloc
)
181 struct btrfs_block_group_cache
*bg
;
182 struct cache_extent
*cache
;
183 struct btrfs_fs_info
*info
= root
->fs_info
;
189 cache
= find_first_cache_extent(&info
->block_group_cache
,
193 bg
= container_of(cache
, struct btrfs_block_group_cache
,
196 byte_in_group
= bytenr
- bg
->key
.objectid
;
197 old_val
= btrfs_block_group_used(&bg
->item
);
198 if (total
> bg
->key
.offset
- byte_in_group
)
199 num
= bg
->key
.offset
- byte_in_group
;
208 btrfs_set_block_group_used(&bg
->item
, old_val
);
213 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
, struct
217 struct cache_extent
*pe
;
218 struct cache_extent
*next
;
220 pe
= find_first_cache_extent(&root
->fs_info
->pinned_tree
, 0);
224 next
= next_cache_extent(pe
);
225 remove_cache_extent(&root
->fs_info
->pinned_tree
, pe
);
226 free_cache_extent(pe
);
229 root
->fs_info
->last_insert
.objectid
= first
;
230 root
->fs_info
->last_insert
.offset
= 0;
234 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
235 btrfs_root
*extent_root
)
237 struct btrfs_key ins
;
238 struct btrfs_extent_item extent_item
;
240 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
241 struct cache_extent
*pe
;
242 struct cache_extent
*next
;
243 struct cache_tree
*pending_tree
= &info
->pending_tree
;
245 btrfs_set_extent_refs(&extent_item
, 1);
246 btrfs_set_extent_owner(&extent_item
, extent_root
->root_key
.objectid
);
248 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
249 pe
= find_first_cache_extent(pending_tree
, 0);
251 ins
.offset
= pe
->size
;
252 ins
.objectid
= pe
->start
;
254 remove_cache_extent(pending_tree
, pe
);
255 next
= next_cache_extent(pe
);
257 next
= find_first_cache_extent(pending_tree
, 0);
259 free_cache_extent(pe
);
262 ret
= btrfs_insert_item(trans
, extent_root
, &ins
, &extent_item
,
263 sizeof(extent_item
));
265 btrfs_print_tree(extent_root
, extent_root
->node
);
273 * remove an extent from the root, returns 0 on success
275 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
276 *root
, u64 bytenr
, u64 num_bytes
, int pin
)
278 struct btrfs_path path
;
279 struct btrfs_key key
;
280 struct btrfs_fs_info
*info
= root
->fs_info
;
281 struct btrfs_root
*extent_root
= info
->extent_root
;
283 struct btrfs_extent_item
*ei
;
286 key
.objectid
= bytenr
;
287 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
288 key
.offset
= num_bytes
;
290 btrfs_init_path(&path
);
291 ret
= btrfs_search_slot(trans
, extent_root
, &key
, &path
, -1, 1);
293 btrfs_print_tree(extent_root
, extent_root
->node
);
294 printf("failed to find %llu\n",
295 (unsigned long long)key
.objectid
);
298 ei
= btrfs_item_ptr(&path
.nodes
[0]->leaf
, path
.slots
[0],
299 struct btrfs_extent_item
);
300 BUG_ON(ei
->refs
== 0);
301 refs
= btrfs_extent_refs(ei
) - 1;
302 btrfs_set_extent_refs(ei
, refs
);
304 u64 super_bytes_used
, root_bytes_used
;
307 err
= insert_cache_extent(&info
->pinned_tree
,
311 super_bytes_used
= btrfs_super_bytes_used(info
->disk_super
);
312 btrfs_set_super_bytes_used(info
->disk_super
,
313 super_bytes_used
- num_bytes
);
314 root_bytes_used
= btrfs_root_bytes_used(&root
->root_item
);
315 btrfs_set_root_bytes_used(&root
->root_item
,
316 root_bytes_used
- num_bytes
);
318 ret
= btrfs_del_item(trans
, extent_root
, &path
);
319 if (!pin
&& extent_root
->fs_info
->last_insert
.objectid
>
321 extent_root
->fs_info
->last_insert
.objectid
= bytenr
;
324 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0);
327 btrfs_release_path(extent_root
, &path
);
328 finish_current_insert(trans
, extent_root
);
333 * find all the blocks marked as pending in the radix tree and remove
334 * them from the extent map
336 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
337 btrfs_root
*extent_root
)
340 struct cache_extent
*pe
;
341 struct cache_extent
*next
;
342 struct cache_tree
*del_pending
= &extent_root
->fs_info
->del_pending
;
344 pe
= find_first_cache_extent(del_pending
, 0);
346 remove_cache_extent(del_pending
, pe
);
347 ret
= __free_extent(trans
, extent_root
,
348 pe
->start
, pe
->size
, 1);
350 next
= next_cache_extent(pe
);
352 next
= find_first_cache_extent(del_pending
, 0);
353 free_cache_extent(pe
);
359 static int run_pending(struct btrfs_trans_handle
*trans
, struct btrfs_root
362 del_pending_extents(trans
, extent_root
);
368 * remove an extent from the root, returns 0 on success
370 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
371 *root
, u64 bytenr
, u64 num_bytes
, int pin
)
373 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
377 if (root
== extent_root
) {
378 ret
= insert_cache_extent(&root
->fs_info
->del_pending
,
383 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, pin
);
384 pending_ret
= run_pending(trans
, root
->fs_info
->extent_root
);
385 return ret
? ret
: pending_ret
;
388 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
390 u64 mask
= ((u64
)root
->stripesize
- 1);
391 u64 ret
= (val
+ mask
) & ~mask
;
396 * walks the btree of allocated extents and find a hole of a given size.
397 * The key ins is changed to record the hole:
398 * ins->objectid == block start
399 * ins->flags = BTRFS_EXTENT_ITEM_KEY
400 * ins->offset == number of blocks
401 * Any available blocks before search_start are skipped.
403 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
404 *orig_root
, u64 total_needed
, u64 search_start
,
405 u64 search_end
, struct btrfs_key
*ins
)
407 struct btrfs_path path
;
408 struct btrfs_key key
;
415 struct btrfs_leaf
*l
;
416 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
418 if (root
->fs_info
->last_insert
.objectid
> search_start
)
419 search_start
= root
->fs_info
->last_insert
.objectid
;
421 search_start
= stripe_align(root
, search_start
);
422 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
425 btrfs_init_path(&path
);
426 ins
->objectid
= search_start
;
429 ret
= btrfs_search_slot(trans
, root
, ins
, &path
, 0, 0);
433 if (path
.slots
[0] > 0)
437 l
= &path
.nodes
[0]->leaf
;
438 slot
= path
.slots
[0];
439 if (slot
>= btrfs_header_nritems(&l
->header
)) {
440 ret
= btrfs_next_leaf(root
, &path
);
446 aligned
= stripe_align(root
, search_start
);
447 ins
->objectid
= aligned
;
448 ins
->offset
= (u64
)-1 - aligned
;
452 ins
->objectid
= stripe_align(root
,
453 last_byte
> search_start
?
454 last_byte
: search_start
);
455 ins
->offset
= (u64
)-1 - ins
->objectid
;
458 btrfs_disk_key_to_cpu(&key
, &l
->items
[slot
].key
);
459 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
)
461 if (key
.objectid
>= search_start
) {
463 if (last_byte
< search_start
)
464 last_byte
= search_start
;
465 aligned
= stripe_align(root
, last_byte
);
466 hole_size
= key
.objectid
- aligned
;
467 if (key
.objectid
> aligned
&&
468 hole_size
> total_needed
) {
469 ins
->objectid
= aligned
;
470 ins
->offset
= hole_size
;
476 last_byte
= key
.objectid
+ key
.offset
;
482 /* we have to make sure we didn't find an extent that has already
483 * been allocated by the map tree or the original allocation
485 btrfs_release_path(root
, &path
);
486 BUG_ON(ins
->objectid
< search_start
);
487 if (find_cache_extent(&root
->fs_info
->pinned_tree
,
488 ins
->objectid
, total_needed
)) {
489 search_start
= ins
->objectid
+ total_needed
;
492 if (find_cache_extent(&root
->fs_info
->pending_tree
,
493 ins
->objectid
, total_needed
)) {
494 search_start
= ins
->objectid
+ total_needed
;
497 root
->fs_info
->last_insert
.objectid
= ins
->objectid
;
498 ins
->offset
= total_needed
;
501 btrfs_release_path(root
, &path
);
505 * finds a free extent and does all the dirty work required for allocation
506 * returns the key for the extent through ins, and a tree buffer for
507 * the first block of the extent through buf.
509 * returns 0 if everything worked, non-zero otherwise.
511 static int alloc_extent(struct btrfs_trans_handle
*trans
,
512 struct btrfs_root
*root
, u64 owner
,
513 u64 num_bytes
, u64 search_start
,
514 u64 search_end
, struct btrfs_key
*ins
)
518 u64 super_bytes_used
, root_bytes_used
;
519 struct btrfs_fs_info
*info
= root
->fs_info
;
520 struct btrfs_root
*extent_root
= info
->extent_root
;
521 struct btrfs_extent_item extent_item
;
523 btrfs_set_extent_refs(&extent_item
, 1);
524 btrfs_set_extent_owner(&extent_item
, owner
);
526 ret
= find_free_extent(trans
, root
, num_bytes
, search_start
,
531 super_bytes_used
= btrfs_super_bytes_used(info
->disk_super
);
532 btrfs_set_super_bytes_used(info
->disk_super
, super_bytes_used
+
534 root_bytes_used
= btrfs_root_bytes_used(&root
->root_item
);
535 btrfs_set_root_bytes_used(&root
->root_item
, root_bytes_used
+
537 if (root
== extent_root
) {
538 ret
= insert_cache_extent(&root
->fs_info
->pending_tree
,
539 ins
->objectid
, ins
->offset
);
543 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
544 sizeof(extent_item
));
546 finish_current_insert(trans
, extent_root
);
547 pending_ret
= run_pending(trans
, extent_root
);
553 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1);
558 * helper function to allocate a block for a given tree
559 * returns the tree buffer or NULL.
561 struct btrfs_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
562 struct btrfs_root
*root
,
565 struct btrfs_key ins
;
567 struct btrfs_buffer
*buf
;
568 ret
= alloc_extent(trans
, root
, root
->root_key
.objectid
,
569 blocksize
, 0, (u64
)-1, &ins
);
574 buf
= find_tree_block(root
, ins
.objectid
, blocksize
);
575 btrfs_set_header_generation(&buf
->node
.header
, trans
->transid
);
576 btrfs_set_header_bytenr(&buf
->node
.header
, buf
->bytenr
);
577 memcpy(buf
->node
.header
.fsid
, root
->fs_info
->disk_super
->fsid
,
578 sizeof(buf
->node
.header
.fsid
));
579 dirty_tree_block(trans
, root
, buf
);
585 * helper function for drop_snapshot, this walks down the tree dropping ref
588 static int walk_down_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
589 *root
, struct btrfs_path
*path
, int *level
)
591 struct btrfs_buffer
*next
;
592 struct btrfs_buffer
*cur
;
597 ret
= lookup_block_ref(trans
, root
, path
->nodes
[*level
]->bytenr
,
598 btrfs_level_size(root
, *level
), &refs
);
603 * walk down to the last node level and free all the leaves
606 u32 size
= btrfs_level_size(root
, *level
- 1);
608 cur
= path
->nodes
[*level
];
609 if (path
->slots
[*level
] >=
610 btrfs_header_nritems(&cur
->node
.header
))
612 bytenr
= btrfs_node_blockptr(&cur
->node
, path
->slots
[*level
]);
613 ret
= lookup_block_ref(trans
, root
, bytenr
, size
, &refs
);
614 if (refs
!= 1 || *level
== 1) {
615 path
->slots
[*level
]++;
616 ret
= btrfs_free_extent(trans
, root
, bytenr
, size
, 1);
621 next
= read_tree_block(root
, bytenr
, size
);
622 if (path
->nodes
[*level
-1])
623 btrfs_block_release(root
, path
->nodes
[*level
-1]);
624 path
->nodes
[*level
-1] = next
;
625 *level
= btrfs_header_level(&next
->node
.header
);
626 path
->slots
[*level
] = 0;
629 ret
= btrfs_free_extent(trans
, root
, path
->nodes
[*level
]->bytenr
,
630 btrfs_level_size(root
, *level
), 1);
631 btrfs_block_release(root
, path
->nodes
[*level
]);
632 path
->nodes
[*level
] = NULL
;
639 * helper for dropping snapshots. This walks back up the tree in the path
640 * to find the first node higher up where we haven't yet gone through
643 static int walk_up_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
644 *root
, struct btrfs_path
*path
, int *level
)
649 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
650 slot
= path
->slots
[i
];
652 btrfs_header_nritems(&path
->nodes
[i
]->node
.header
)- 1) {
657 ret
= btrfs_free_extent(trans
, root
,
658 path
->nodes
[*level
]->bytenr
,
659 btrfs_level_size(root
, *level
), 1);
660 btrfs_block_release(root
, path
->nodes
[*level
]);
661 path
->nodes
[*level
] = NULL
;
670 * drop the reference count on the tree rooted at 'snap'. This traverses
671 * the tree freeing any blocks that have a ref count of zero after being
674 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
675 *root
, struct btrfs_buffer
*snap
)
680 struct btrfs_path path
;
684 btrfs_init_path(&path
);
686 level
= btrfs_header_level(&snap
->node
.header
);
688 path
.nodes
[level
] = snap
;
689 path
.slots
[level
] = 0;
691 wret
= walk_down_tree(trans
, root
, &path
, &level
);
697 wret
= walk_up_tree(trans
, root
, &path
, &level
);
703 for (i
= 0; i
<= orig_level
; i
++) {
705 btrfs_block_release(root
, path
.nodes
[i
]);
711 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
713 struct btrfs_block_group_cache
*bg
;
714 struct cache_extent
*cache
;
717 cache
= find_first_cache_extent(&info
->block_group_cache
, 0);
720 bg
= container_of(cache
, struct btrfs_block_group_cache
,
722 remove_cache_extent(&info
->block_group_cache
, cache
);
728 int btrfs_read_block_groups(struct btrfs_root
*root
)
730 struct btrfs_path path
;
733 struct btrfs_block_group_item
*bi
;
734 struct btrfs_block_group_cache
*bg
;
735 struct cache_tree
*bg_cache
;
736 struct btrfs_key key
;
737 struct btrfs_key found_key
;
738 struct btrfs_leaf
*leaf
;
739 u64 group_size
= BTRFS_BLOCK_GROUP_SIZE
;
741 root
= root
->fs_info
->extent_root
;
742 bg_cache
= &root
->fs_info
->block_group_cache
;
744 key
.offset
= group_size
;
745 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
746 btrfs_init_path(&path
);
749 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
,
755 leaf
= &path
.nodes
[0]->leaf
;
756 btrfs_disk_key_to_cpu(&found_key
,
757 &leaf
->items
[path
.slots
[0]].key
);
758 bg
= malloc(sizeof(*bg
));
763 bi
= btrfs_item_ptr(leaf
, path
.slots
[0],
764 struct btrfs_block_group_item
);
765 memcpy(&bg
->item
, bi
, sizeof(*bi
));
766 memcpy(&bg
->key
, &found_key
, sizeof(found_key
));
767 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
768 btrfs_release_path(root
, &path
);
769 bg
->cache
.start
= found_key
.objectid
;
770 bg
->cache
.size
= found_key
.offset
;
772 ret
= insert_existing_cache_extent(bg_cache
, &bg
->cache
);
775 btrfs_super_total_bytes(root
->fs_info
->disk_super
))
778 btrfs_release_path(root
, &path
);
782 int btrfs_insert_block_group(struct btrfs_trans_handle
*trans
,
783 struct btrfs_root
*root
,
784 struct btrfs_key
*key
,
785 struct btrfs_block_group_item
*bi
)
790 root
= root
->fs_info
->extent_root
;
791 ret
= btrfs_insert_item(trans
, root
, key
, bi
, sizeof(*bi
));
792 finish_current_insert(trans
, root
);
793 pending_ret
= run_pending(trans
, root
);