2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
21 #include "kerncompat.h"
22 #include "radix-tree.h"
25 #include "print-tree.h"
26 #include "transaction.h"
28 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
29 btrfs_root
*extent_root
);
30 static int run_pending(struct btrfs_trans_handle
*trans
, struct btrfs_root
33 static int inc_block_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
34 *root
, u64 bytenr
, u32 blocksize
)
36 struct btrfs_path path
;
40 struct btrfs_extent_item
*item
;
43 btrfs_init_path(&path
);
44 key
.objectid
= bytenr
;
45 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
46 key
.offset
= blocksize
;
47 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, &path
,
52 l
= &path
.nodes
[0]->leaf
;
53 item
= btrfs_item_ptr(l
, path
.slots
[0], struct btrfs_extent_item
);
54 refs
= btrfs_extent_refs(item
);
55 btrfs_set_extent_refs(item
, refs
+ 1);
57 BUG_ON(list_empty(&path
.nodes
[0]->dirty
));
58 btrfs_release_path(root
->fs_info
->extent_root
, &path
);
59 finish_current_insert(trans
, root
->fs_info
->extent_root
);
60 run_pending(trans
, root
->fs_info
->extent_root
);
64 static int lookup_block_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
65 *root
, u64 bytenr
, u32 blocksize
, u32
*refs
)
67 struct btrfs_path path
;
71 struct btrfs_extent_item
*item
;
72 btrfs_init_path(&path
);
73 key
.objectid
= bytenr
;
74 key
.offset
= blocksize
;
75 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
76 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, &path
,
80 l
= &path
.nodes
[0]->leaf
;
81 item
= btrfs_item_ptr(l
, path
.slots
[0], struct btrfs_extent_item
);
82 *refs
= btrfs_extent_refs(item
);
83 btrfs_release_path(root
->fs_info
->extent_root
, &path
);
87 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
88 struct btrfs_buffer
*buf
)
96 if (btrfs_is_leaf(&buf
->node
))
99 for (i
= 0; i
< btrfs_header_nritems(&buf
->node
.header
); i
++) {
100 bytenr
= btrfs_node_blockptr(&buf
->node
, i
);
101 inc_block_ref(trans
, root
, bytenr
, root
->nodesize
);
106 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
107 struct btrfs_root
*root
,
108 struct btrfs_path
*path
,
109 struct btrfs_block_group_cache
*cache
)
113 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
114 struct btrfs_block_group_item
*bi
;
116 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
,
117 &cache
->key
, path
, 0, 1);
119 bi
= btrfs_item_ptr(&path
->nodes
[0]->leaf
, path
->slots
[0],
120 struct btrfs_block_group_item
);
121 memcpy(bi
, &cache
->item
, sizeof(*bi
));
122 dirty_tree_block(trans
, extent_root
, path
->nodes
[0]);
123 btrfs_release_path(extent_root
, path
);
124 finish_current_insert(trans
, root
);
125 pending_ret
= run_pending(trans
, root
);
134 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
135 struct btrfs_root
*root
)
137 struct btrfs_block_group_cache
*cache
[8];
141 struct radix_tree_root
*radix
= &root
->fs_info
->block_group_radix
;
143 struct btrfs_path path
;
144 btrfs_init_path(&path
);
147 ret
= radix_tree_gang_lookup_tag(radix
, (void *)cache
,
148 0, ARRAY_SIZE(cache
),
149 BTRFS_BLOCK_GROUP_DIRTY
);
152 for (i
= 0; i
< ret
; i
++) {
153 radix_tree_tag_clear(radix
, cache
[i
]->key
.objectid
+
154 cache
[i
]->key
.offset
-1,
155 BTRFS_BLOCK_GROUP_DIRTY
);
156 err
= write_one_cache_group(trans
, root
,
165 static int update_block_group(struct btrfs_trans_handle
*trans
,
166 struct btrfs_root
*root
,
167 u64 bytenr
, u64 num
, int alloc
)
169 struct btrfs_block_group_cache
*cache
;
170 struct btrfs_fs_info
*info
= root
->fs_info
;
177 ret
= radix_tree_gang_lookup(&info
->block_group_radix
,
178 (void *)&cache
, bytenr
, 1);
181 radix_tree_tag_set(&info
->block_group_radix
,
182 cache
->key
.objectid
+ cache
->key
.offset
- 1,
183 BTRFS_BLOCK_GROUP_DIRTY
);
185 byte_in_group
= bytenr
- cache
->key
.objectid
;
186 old_val
= btrfs_block_group_used(&cache
->item
);
187 if (total
> cache
->key
.offset
- byte_in_group
)
188 num
= cache
->key
.offset
- byte_in_group
;
197 btrfs_set_block_group_used(&cache
->item
, old_val
);
202 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
, struct
206 struct pending_extent
*pe
;
207 struct pending_extent
*next
;
209 pe
= find_first_pending_extent(&root
->fs_info
->pinned_tree
, 0);
213 next
= next_pending_extent(pe
);
214 remove_pending_extent(&root
->fs_info
->pinned_tree
, pe
);
215 free_pending_extent(pe
);
218 root
->fs_info
->last_insert
.objectid
= first
;
219 root
->fs_info
->last_insert
.offset
= 0;
223 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
224 btrfs_root
*extent_root
)
226 struct btrfs_key ins
;
227 struct btrfs_extent_item extent_item
;
229 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
230 struct pending_extent
*pe
;
231 struct pending_extent
*next
;
232 struct pending_tree
*pending_tree
= &info
->pending_tree
;
234 btrfs_set_extent_refs(&extent_item
, 1);
235 btrfs_set_extent_owner(&extent_item
, extent_root
->root_key
.objectid
);
237 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
238 pe
= find_first_pending_extent(pending_tree
, 0);
240 ins
.offset
= pe
->size
;
241 ins
.objectid
= pe
->start
;
243 remove_pending_extent(pending_tree
, pe
);
244 next
= next_pending_extent(pe
);
246 next
= find_first_pending_extent(pending_tree
, 0);
248 free_pending_extent(pe
);
251 ret
= btrfs_insert_item(trans
, extent_root
, &ins
, &extent_item
,
252 sizeof(extent_item
));
254 btrfs_print_tree(extent_root
, extent_root
->node
);
262 * remove an extent from the root, returns 0 on success
264 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
265 *root
, u64 bytenr
, u64 num_bytes
, int pin
)
267 struct btrfs_path path
;
268 struct btrfs_key key
;
269 struct btrfs_fs_info
*info
= root
->fs_info
;
270 struct btrfs_root
*extent_root
= info
->extent_root
;
272 struct btrfs_extent_item
*ei
;
275 key
.objectid
= bytenr
;
276 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
277 key
.offset
= num_bytes
;
279 btrfs_init_path(&path
);
280 ret
= btrfs_search_slot(trans
, extent_root
, &key
, &path
, -1, 1);
282 btrfs_print_tree(extent_root
, extent_root
->node
);
283 printf("failed to find %llu\n",
284 (unsigned long long)key
.objectid
);
287 ei
= btrfs_item_ptr(&path
.nodes
[0]->leaf
, path
.slots
[0],
288 struct btrfs_extent_item
);
289 BUG_ON(ei
->refs
== 0);
290 refs
= btrfs_extent_refs(ei
) - 1;
291 btrfs_set_extent_refs(ei
, refs
);
293 u64 super_bytes_used
, root_bytes_used
;
296 err
= insert_pending_extent(&info
->pinned_tree
,
300 super_bytes_used
= btrfs_super_bytes_used(info
->disk_super
);
301 btrfs_set_super_bytes_used(info
->disk_super
,
302 super_bytes_used
- num_bytes
);
303 root_bytes_used
= btrfs_root_bytes_used(&root
->root_item
);
304 btrfs_set_root_bytes_used(&root
->root_item
,
305 root_bytes_used
- num_bytes
);
307 ret
= btrfs_del_item(trans
, extent_root
, &path
);
308 if (!pin
&& extent_root
->fs_info
->last_insert
.objectid
>
310 extent_root
->fs_info
->last_insert
.objectid
= bytenr
;
313 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0);
316 btrfs_release_path(extent_root
, &path
);
317 finish_current_insert(trans
, extent_root
);
322 * find all the blocks marked as pending in the radix tree and remove
323 * them from the extent map
325 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
326 btrfs_root
*extent_root
)
329 struct pending_extent
*pe
;
330 struct pending_extent
*next
;
331 struct pending_tree
*del_pending
= &extent_root
->fs_info
->del_pending
;
333 pe
= find_first_pending_extent(del_pending
, 0);
335 remove_pending_extent(del_pending
, pe
);
336 ret
= __free_extent(trans
, extent_root
,
337 pe
->start
, pe
->size
, 1);
339 next
= next_pending_extent(pe
);
341 next
= find_first_pending_extent(del_pending
, 0);
342 free_pending_extent(pe
);
348 static int run_pending(struct btrfs_trans_handle
*trans
, struct btrfs_root
351 del_pending_extents(trans
, extent_root
);
357 * remove an extent from the root, returns 0 on success
359 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
360 *root
, u64 bytenr
, u64 num_bytes
, int pin
)
362 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
366 if (root
== extent_root
) {
367 ret
= insert_pending_extent(&root
->fs_info
->del_pending
,
372 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, pin
);
373 pending_ret
= run_pending(trans
, root
->fs_info
->extent_root
);
374 return ret
? ret
: pending_ret
;
378 * walks the btree of allocated extents and find a hole of a given size.
379 * The key ins is changed to record the hole:
380 * ins->objectid == block start
381 * ins->flags = BTRFS_EXTENT_ITEM_KEY
382 * ins->offset == number of blocks
383 * Any available blocks before search_start are skipped.
385 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
386 *orig_root
, u64 total_needed
, u64 search_start
,
387 u64 search_end
, struct btrfs_key
*ins
)
389 struct btrfs_path path
;
390 struct btrfs_key key
;
396 struct btrfs_leaf
*l
;
397 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
399 if (root
->fs_info
->last_insert
.objectid
> search_start
)
400 search_start
= root
->fs_info
->last_insert
.objectid
;
402 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
405 btrfs_init_path(&path
);
406 ins
->objectid
= search_start
;
409 ret
= btrfs_search_slot(trans
, root
, ins
, &path
, 0, 0);
413 if (path
.slots
[0] > 0)
417 l
= &path
.nodes
[0]->leaf
;
418 slot
= path
.slots
[0];
419 if (slot
>= btrfs_header_nritems(&l
->header
)) {
420 ret
= btrfs_next_leaf(root
, &path
);
426 ins
->objectid
= search_start
;
427 ins
->offset
= (u64
)-1 - search_start
;
431 ins
->objectid
= last_byte
> search_start
?
432 last_byte
: search_start
;
433 ins
->offset
= (u64
)-1 - ins
->objectid
;
436 btrfs_disk_key_to_cpu(&key
, &l
->items
[slot
].key
);
437 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
)
439 if (key
.objectid
>= search_start
) {
441 if (last_byte
< search_start
)
442 last_byte
= search_start
;
443 hole_size
= key
.objectid
- last_byte
;
444 if (hole_size
> total_needed
) {
445 ins
->objectid
= last_byte
;
446 ins
->offset
= hole_size
;
452 last_byte
= key
.objectid
+ key
.offset
;
458 /* we have to make sure we didn't find an extent that has already
459 * been allocated by the map tree or the original allocation
461 btrfs_release_path(root
, &path
);
462 BUG_ON(ins
->objectid
< search_start
);
463 if (find_pending_extent(&root
->fs_info
->pinned_tree
,
464 ins
->objectid
, total_needed
)) {
465 search_start
= ins
->objectid
+ total_needed
;
468 if (find_pending_extent(&root
->fs_info
->pending_tree
,
469 ins
->objectid
, total_needed
)) {
470 search_start
= ins
->objectid
+ total_needed
;
473 root
->fs_info
->last_insert
.objectid
= ins
->objectid
;
474 ins
->offset
= total_needed
;
477 btrfs_release_path(root
, &path
);
481 * finds a free extent and does all the dirty work required for allocation
482 * returns the key for the extent through ins, and a tree buffer for
483 * the first block of the extent through buf.
485 * returns 0 if everything worked, non-zero otherwise.
487 static int alloc_extent(struct btrfs_trans_handle
*trans
,
488 struct btrfs_root
*root
, u64 owner
,
489 u64 num_bytes
, u64 search_start
,
490 u64 search_end
, struct btrfs_key
*ins
)
494 u64 super_bytes_used
, root_bytes_used
;
495 struct btrfs_fs_info
*info
= root
->fs_info
;
496 struct btrfs_root
*extent_root
= info
->extent_root
;
497 struct btrfs_extent_item extent_item
;
499 btrfs_set_extent_refs(&extent_item
, 1);
500 btrfs_set_extent_owner(&extent_item
, owner
);
502 ret
= find_free_extent(trans
, root
, num_bytes
, search_start
,
507 super_bytes_used
= btrfs_super_bytes_used(info
->disk_super
);
508 btrfs_set_super_bytes_used(info
->disk_super
, super_bytes_used
+
510 root_bytes_used
= btrfs_root_bytes_used(&root
->root_item
);
511 btrfs_set_root_bytes_used(&root
->root_item
, root_bytes_used
+
513 if (root
== extent_root
) {
514 ret
= insert_pending_extent(&root
->fs_info
->pending_tree
,
515 ins
->objectid
, ins
->offset
);
520 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
521 sizeof(extent_item
));
523 finish_current_insert(trans
, extent_root
);
524 pending_ret
= run_pending(trans
, extent_root
);
533 * helper function to allocate a block for a given tree
534 * returns the tree buffer or NULL.
536 struct btrfs_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
537 struct btrfs_root
*root
,
540 struct btrfs_key ins
;
542 struct btrfs_buffer
*buf
;
544 ret
= alloc_extent(trans
, root
, root
->root_key
.objectid
,
545 blocksize
, 0, (unsigned long)-1, &ins
);
550 ret
= update_block_group(trans
, root
, ins
.objectid
, ins
.offset
, 1);
551 buf
= find_tree_block(root
, ins
.objectid
, blocksize
);
552 btrfs_set_header_generation(&buf
->node
.header
,
553 root
->root_key
.offset
+ 1);
554 btrfs_set_header_bytenr(&buf
->node
.header
, buf
->bytenr
);
555 memcpy(buf
->node
.header
.fsid
, root
->fs_info
->disk_super
->fsid
,
556 sizeof(buf
->node
.header
.fsid
));
557 dirty_tree_block(trans
, root
, buf
);
563 * helper function for drop_snapshot, this walks down the tree dropping ref
566 static int walk_down_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
567 *root
, struct btrfs_path
*path
, int *level
)
569 struct btrfs_buffer
*next
;
570 struct btrfs_buffer
*cur
;
575 ret
= lookup_block_ref(trans
, root
, path
->nodes
[*level
]->bytenr
,
576 btrfs_level_size(root
, *level
), &refs
);
581 * walk down to the last node level and free all the leaves
584 u32 size
= btrfs_level_size(root
, *level
- 1);
586 cur
= path
->nodes
[*level
];
587 if (path
->slots
[*level
] >=
588 btrfs_header_nritems(&cur
->node
.header
))
590 bytenr
= btrfs_node_blockptr(&cur
->node
, path
->slots
[*level
]);
591 ret
= lookup_block_ref(trans
, root
, bytenr
, size
, &refs
);
592 if (refs
!= 1 || *level
== 1) {
593 path
->slots
[*level
]++;
594 ret
= btrfs_free_extent(trans
, root
, bytenr
, size
, 1);
599 next
= read_tree_block(root
, bytenr
, size
);
600 if (path
->nodes
[*level
-1])
601 btrfs_block_release(root
, path
->nodes
[*level
-1]);
602 path
->nodes
[*level
-1] = next
;
603 *level
= btrfs_header_level(&next
->node
.header
);
604 path
->slots
[*level
] = 0;
607 ret
= btrfs_free_extent(trans
, root
, path
->nodes
[*level
]->bytenr
,
608 btrfs_level_size(root
, *level
), 1);
609 btrfs_block_release(root
, path
->nodes
[*level
]);
610 path
->nodes
[*level
] = NULL
;
617 * helper for dropping snapshots. This walks back up the tree in the path
618 * to find the first node higher up where we haven't yet gone through
621 static int walk_up_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
622 *root
, struct btrfs_path
*path
, int *level
)
627 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
628 slot
= path
->slots
[i
];
630 btrfs_header_nritems(&path
->nodes
[i
]->node
.header
)- 1) {
635 ret
= btrfs_free_extent(trans
, root
,
636 path
->nodes
[*level
]->bytenr
,
637 btrfs_level_size(root
, *level
), 1);
638 btrfs_block_release(root
, path
->nodes
[*level
]);
639 path
->nodes
[*level
] = NULL
;
648 * drop the reference count on the tree rooted at 'snap'. This traverses
649 * the tree freeing any blocks that have a ref count of zero after being
652 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
653 *root
, struct btrfs_buffer
*snap
)
658 struct btrfs_path path
;
662 btrfs_init_path(&path
);
664 level
= btrfs_header_level(&snap
->node
.header
);
666 path
.nodes
[level
] = snap
;
667 path
.slots
[level
] = 0;
669 wret
= walk_down_tree(trans
, root
, &path
, &level
);
675 wret
= walk_up_tree(trans
, root
, &path
, &level
);
681 for (i
= 0; i
<= orig_level
; i
++) {
683 btrfs_block_release(root
, path
.nodes
[i
]);
689 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
692 struct btrfs_block_group_cache
*cache
[8];
696 ret
= radix_tree_gang_lookup(&info
->block_group_radix
,
701 for (i
= 0; i
< ret
; i
++) {
702 radix_tree_delete(&info
->block_group_radix
,
703 cache
[i
]->key
.objectid
+
704 cache
[i
]->key
.offset
- 1);
711 int btrfs_read_block_groups(struct btrfs_root
*root
)
713 struct btrfs_path path
;
716 struct btrfs_block_group_item
*bi
;
717 struct btrfs_block_group_cache
*cache
;
718 struct btrfs_key key
;
719 struct btrfs_key found_key
;
720 struct btrfs_leaf
*leaf
;
721 u64 group_size
= BTRFS_BLOCK_GROUP_SIZE
;
723 root
= root
->fs_info
->extent_root
;
725 key
.offset
= group_size
;
726 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
727 btrfs_init_path(&path
);
730 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
,
736 leaf
= &path
.nodes
[0]->leaf
;
737 btrfs_disk_key_to_cpu(&found_key
,
738 &leaf
->items
[path
.slots
[0]].key
);
739 cache
= malloc(sizeof(*cache
));
744 bi
= btrfs_item_ptr(leaf
, path
.slots
[0],
745 struct btrfs_block_group_item
);
746 memcpy(&cache
->item
, bi
, sizeof(*bi
));
747 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
748 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
749 btrfs_release_path(root
, &path
);
750 ret
= radix_tree_insert(&root
->fs_info
->block_group_radix
,
752 found_key
.offset
- 1, (void *)cache
);
755 btrfs_super_total_bytes(root
->fs_info
->disk_super
))
758 btrfs_release_path(root
, &path
);
762 int btrfs_insert_block_group(struct btrfs_trans_handle
*trans
,
763 struct btrfs_root
*root
,
764 struct btrfs_key
*key
,
765 struct btrfs_block_group_item
*bi
)
770 root
= root
->fs_info
->extent_root
;
771 ret
= btrfs_insert_item(trans
, root
, key
, bi
, sizeof(*bi
));
772 finish_current_insert(trans
, root
);
773 pending_ret
= run_pending(trans
, root
);