3 #include "kerncompat.h"
4 #include "radix-tree.h"
7 #include "print-tree.h"
8 #include "transaction.h"
10 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
11 *orig_root
, u64 num_blocks
, u64 search_start
, u64
12 search_end
, struct btrfs_key
*ins
);
13 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
14 btrfs_root
*extent_root
);
15 static int run_pending(struct btrfs_trans_handle
*trans
, struct btrfs_root
19 * pending extents are blocks that we're trying to allocate in the extent
20 * map while trying to grow the map because of other allocations. To avoid
21 * recursing, they are tagged in the radix tree and cleaned up after
22 * other allocations are done. The pending tag is also used in the same
25 #define CTREE_EXTENT_PENDING_DEL 0
27 static int inc_block_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
30 struct btrfs_path path
;
34 struct btrfs_extent_item
*item
;
38 find_free_extent(trans
, root
->fs_info
->extent_root
, 0, 0, (u64
)-1,
40 btrfs_init_path(&path
);
41 key
.objectid
= blocknr
;
43 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
45 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, &path
,
50 l
= &path
.nodes
[0]->leaf
;
51 item
= btrfs_item_ptr(l
, path
.slots
[0], struct btrfs_extent_item
);
52 refs
= btrfs_extent_refs(item
);
53 btrfs_set_extent_refs(item
, refs
+ 1);
55 BUG_ON(list_empty(&path
.nodes
[0]->dirty
));
56 btrfs_release_path(root
->fs_info
->extent_root
, &path
);
57 finish_current_insert(trans
, root
->fs_info
->extent_root
);
58 run_pending(trans
, root
->fs_info
->extent_root
);
62 static int lookup_block_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
63 *root
, u64 blocknr
, u32
*refs
)
65 struct btrfs_path path
;
69 struct btrfs_extent_item
*item
;
70 btrfs_init_path(&path
);
71 key
.objectid
= blocknr
;
74 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
75 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, &path
,
79 l
= &path
.nodes
[0]->leaf
;
80 item
= btrfs_item_ptr(l
, path
.slots
[0], struct btrfs_extent_item
);
81 *refs
= btrfs_extent_refs(item
);
82 btrfs_release_path(root
->fs_info
->extent_root
, &path
);
86 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
87 struct btrfs_buffer
*buf
)
94 if (btrfs_is_leaf(&buf
->node
))
97 for (i
= 0; i
< btrfs_header_nritems(&buf
->node
.header
); i
++) {
98 blocknr
= btrfs_node_blockptr(&buf
->node
, i
);
99 inc_block_ref(trans
, root
, blocknr
);
104 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
105 struct btrfs_root
*root
,
106 struct btrfs_path
*path
,
107 struct btrfs_block_group_cache
*cache
)
111 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
112 struct btrfs_block_group_item
*bi
;
113 struct btrfs_key ins
;
115 ret
= find_free_extent(trans
, root
, 0, 0, (u64
)-1, &ins
);
118 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
,
119 &cache
->key
, path
, 0, 1);
121 bi
= btrfs_item_ptr(&path
->nodes
[0]->leaf
, path
->slots
[0],
122 struct btrfs_block_group_item
);
123 memcpy(bi
, &cache
->item
, sizeof(*bi
));
124 dirty_tree_block(trans
, extent_root
, path
->nodes
[0]);
125 btrfs_release_path(extent_root
, path
);
126 finish_current_insert(trans
, root
);
127 pending_ret
= run_pending(trans
, root
);
136 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
137 struct btrfs_root
*root
)
139 struct btrfs_block_group_cache
*cache
[8];
143 struct radix_tree_root
*radix
= &root
->fs_info
->block_group_radix
;
145 struct btrfs_path path
;
146 btrfs_init_path(&path
);
149 ret
= radix_tree_gang_lookup_tag(radix
, (void **)cache
,
150 0, ARRAY_SIZE(cache
),
151 BTRFS_BLOCK_GROUP_DIRTY
);
154 for (i
= 0; i
< ret
; i
++) {
155 radix_tree_tag_clear(radix
, cache
[i
]->key
.objectid
+
156 cache
[i
]->key
.offset
-1,
157 BTRFS_BLOCK_GROUP_DIRTY
);
158 err
= write_one_cache_group(trans
, root
,
167 static int update_block_group(struct btrfs_trans_handle
*trans
,
168 struct btrfs_root
*root
,
169 u64 blocknr
, u64 num
, int alloc
)
171 struct btrfs_block_group_cache
*cache
;
172 struct btrfs_fs_info
*info
= root
->fs_info
;
179 ret
= radix_tree_gang_lookup(&info
->block_group_radix
,
180 (void **)&cache
, blocknr
, 1);
183 radix_tree_tag_set(&info
->block_group_radix
,
184 cache
->key
.objectid
+ cache
->key
.offset
- 1,
185 BTRFS_BLOCK_GROUP_DIRTY
);
187 block_in_group
= blocknr
- cache
->key
.objectid
;
188 old_val
= btrfs_block_group_used(&cache
->item
);
189 if (total
> cache
->key
.offset
- block_in_group
)
190 num
= cache
->key
.offset
- block_in_group
;
199 btrfs_set_block_group_used(&cache
->item
, old_val
);
204 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
, struct
207 unsigned long gang
[8];
213 ret
= radix_tree_gang_lookup(&root
->fs_info
->pinned_radix
,
220 for (i
= 0; i
< ret
; i
++) {
221 radix_tree_delete(&root
->fs_info
->pinned_radix
,
225 root
->fs_info
->last_insert
.objectid
= first
;
226 root
->fs_info
->last_insert
.offset
= 0;
230 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
231 btrfs_root
*extent_root
)
233 struct btrfs_key ins
;
234 struct btrfs_extent_item extent_item
;
237 u64 super_blocks_used
;
238 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
240 btrfs_set_extent_refs(&extent_item
, 1);
241 btrfs_set_extent_owner(&extent_item
, extent_root
->root_key
.objectid
);
244 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
246 for (i
= 0; i
< extent_root
->fs_info
->current_insert
.flags
; i
++) {
247 ins
.objectid
= extent_root
->fs_info
->current_insert
.objectid
+
249 super_blocks_used
= btrfs_super_blocks_used(info
->disk_super
);
250 btrfs_set_super_blocks_used(info
->disk_super
,
251 super_blocks_used
+ 1);
252 ret
= btrfs_insert_item(trans
, extent_root
, &ins
, &extent_item
,
253 sizeof(extent_item
));
255 btrfs_print_tree(extent_root
, extent_root
->node
);
259 extent_root
->fs_info
->current_insert
.offset
= 0;
264 * remove an extent from the root, returns 0 on success
266 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
267 *root
, u64 blocknr
, u64 num_blocks
, int pin
)
269 struct btrfs_path path
;
270 struct btrfs_key key
;
271 struct btrfs_fs_info
*info
= root
->fs_info
;
272 struct btrfs_root
*extent_root
= info
->extent_root
;
274 struct btrfs_extent_item
*ei
;
275 struct btrfs_key ins
;
278 BUG_ON(pin
&& num_blocks
!= 1);
279 key
.objectid
= blocknr
;
281 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
282 key
.offset
= num_blocks
;
284 find_free_extent(trans
, root
, 0, 0, (u64
)-1, &ins
);
285 btrfs_init_path(&path
);
286 ret
= btrfs_search_slot(trans
, extent_root
, &key
, &path
, -1, 1);
288 printf("failed to find %Lu\n", key
.objectid
);
289 btrfs_print_tree(extent_root
, extent_root
->node
);
290 printf("failed to find %Lu\n", key
.objectid
);
293 ei
= btrfs_item_ptr(&path
.nodes
[0]->leaf
, path
.slots
[0],
294 struct btrfs_extent_item
);
295 BUG_ON(ei
->refs
== 0);
296 refs
= btrfs_extent_refs(ei
) - 1;
297 btrfs_set_extent_refs(ei
, refs
);
299 u64 super_blocks_used
;
302 radix_tree_preload(GFP_KERNEL
);
303 err
= radix_tree_insert(&info
->pinned_radix
,
304 blocknr
, (void *)blocknr
);
306 radix_tree_preload_end();
308 super_blocks_used
= btrfs_super_blocks_used(info
->disk_super
);
309 btrfs_set_super_blocks_used(info
->disk_super
,
310 super_blocks_used
- num_blocks
);
311 ret
= btrfs_del_item(trans
, extent_root
, &path
);
312 if (!pin
&& extent_root
->fs_info
->last_insert
.objectid
>
314 extent_root
->fs_info
->last_insert
.objectid
= blocknr
;
317 ret
= update_block_group(trans
, root
, blocknr
, num_blocks
, 0);
320 btrfs_release_path(extent_root
, &path
);
321 finish_current_insert(trans
, extent_root
);
326 * find all the blocks marked as pending in the radix tree and remove
327 * them from the extent map
329 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
330 btrfs_root
*extent_root
)
333 struct btrfs_buffer
*gang
[4];
337 ret
= radix_tree_gang_lookup_tag(
338 &extent_root
->fs_info
->cache_radix
,
341 CTREE_EXTENT_PENDING_DEL
);
344 for (i
= 0; i
< ret
; i
++) {
345 ret
= __free_extent(trans
, extent_root
,
346 gang
[i
]->blocknr
, 1, 1);
347 radix_tree_tag_clear(&extent_root
->fs_info
->cache_radix
,
349 CTREE_EXTENT_PENDING_DEL
);
350 btrfs_block_release(extent_root
, gang
[i
]);
356 static int run_pending(struct btrfs_trans_handle
*trans
, struct btrfs_root
359 while(radix_tree_tagged(&extent_root
->fs_info
->cache_radix
,
360 CTREE_EXTENT_PENDING_DEL
))
361 del_pending_extents(trans
, extent_root
);
367 * remove an extent from the root, returns 0 on success
369 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
370 *root
, u64 blocknr
, u64 num_blocks
, int pin
)
372 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
373 struct btrfs_buffer
*t
;
377 if (root
== extent_root
) {
378 t
= find_tree_block(root
, blocknr
);
379 radix_tree_tag_set(&root
->fs_info
->cache_radix
, blocknr
,
380 CTREE_EXTENT_PENDING_DEL
);
383 ret
= __free_extent(trans
, root
, blocknr
, num_blocks
, pin
);
384 pending_ret
= run_pending(trans
, root
->fs_info
->extent_root
);
385 return ret
? ret
: pending_ret
;
389 * walks the btree of allocated extents and find a hole of a given size.
390 * The key ins is changed to record the hole:
391 * ins->objectid == block start
392 * ins->flags = BTRFS_EXTENT_ITEM_KEY
393 * ins->offset == number of blocks
394 * Any available blocks before search_start are skipped.
396 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
397 *orig_root
, u64 num_blocks
, u64 search_start
, u64
398 search_end
, struct btrfs_key
*ins
)
400 struct btrfs_path path
;
401 struct btrfs_key key
;
408 struct btrfs_leaf
*l
;
409 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
410 int total_needed
= num_blocks
;
412 total_needed
+= (btrfs_header_level(&root
->node
->node
.header
) + 1) * 3;
413 if (root
->fs_info
->last_insert
.objectid
> search_start
)
414 search_start
= root
->fs_info
->last_insert
.objectid
;
417 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
420 btrfs_init_path(&path
);
421 ins
->objectid
= search_start
;
424 ret
= btrfs_search_slot(trans
, root
, ins
, &path
, 0, 0);
428 if (path
.slots
[0] > 0)
432 l
= &path
.nodes
[0]->leaf
;
433 slot
= path
.slots
[0];
434 if (slot
>= btrfs_header_nritems(&l
->header
)) {
435 ret
= btrfs_next_leaf(root
, &path
);
441 ins
->objectid
= search_start
;
442 ins
->offset
= (u64
)-1 - search_start
;
446 ins
->objectid
= last_block
> search_start
?
447 last_block
: search_start
;
448 ins
->offset
= (u64
)-1 - ins
->objectid
;
451 btrfs_disk_key_to_cpu(&key
, &l
->items
[slot
].key
);
452 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
)
454 if (key
.objectid
>= search_start
) {
456 if (last_block
< search_start
)
457 last_block
= search_start
;
458 hole_size
= key
.objectid
- last_block
;
459 if (hole_size
> total_needed
) {
460 ins
->objectid
= last_block
;
461 ins
->offset
= hole_size
;
467 last_block
= key
.objectid
+ key
.offset
;
473 /* we have to make sure we didn't find an extent that has already
474 * been allocated by the map tree or the original allocation
476 btrfs_release_path(root
, &path
);
477 BUG_ON(ins
->objectid
< search_start
);
478 for (test_block
= ins
->objectid
;
479 test_block
< ins
->objectid
+ total_needed
; test_block
++) {
480 if (radix_tree_lookup(&root
->fs_info
->pinned_radix
,
482 search_start
= test_block
+ 1;
486 BUG_ON(root
->fs_info
->current_insert
.offset
);
487 root
->fs_info
->current_insert
.offset
= total_needed
- num_blocks
;
488 root
->fs_info
->current_insert
.objectid
= ins
->objectid
+ num_blocks
;
489 root
->fs_info
->current_insert
.flags
= 0;
490 root
->fs_info
->last_insert
.objectid
= ins
->objectid
;
491 ins
->offset
= num_blocks
;
494 btrfs_release_path(root
, &path
);
498 * finds a free extent and does all the dirty work required for allocation
499 * returns the key for the extent through ins, and a tree buffer for
500 * the first block of the extent through buf.
502 * returns 0 if everything worked, non-zero otherwise.
504 static int alloc_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
505 *root
, u64 owner
, u64 num_blocks
,
506 u64 search_start
, u64
507 search_end
, struct btrfs_key
*ins
)
511 u64 super_blocks_used
;
512 struct btrfs_fs_info
*info
= root
->fs_info
;
513 struct btrfs_root
*extent_root
= info
->extent_root
;
514 struct btrfs_extent_item extent_item
;
516 btrfs_set_extent_refs(&extent_item
, 1);
517 btrfs_set_extent_owner(&extent_item
, owner
);
519 if (root
== extent_root
) {
520 BUG_ON(extent_root
->fs_info
->current_insert
.offset
== 0);
521 BUG_ON(num_blocks
!= 1);
522 BUG_ON(extent_root
->fs_info
->current_insert
.flags
==
523 extent_root
->fs_info
->current_insert
.offset
);
525 ins
->objectid
= extent_root
->fs_info
->current_insert
.objectid
+
526 extent_root
->fs_info
->current_insert
.flags
++;
529 ret
= find_free_extent(trans
, root
, num_blocks
, search_start
,
534 super_blocks_used
= btrfs_super_blocks_used(info
->disk_super
);
535 btrfs_set_super_blocks_used(info
->disk_super
, super_blocks_used
+
537 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
538 sizeof(extent_item
));
540 finish_current_insert(trans
, extent_root
);
541 pending_ret
= run_pending(trans
, extent_root
);
550 * helper function to allocate a block for a given tree
551 * returns the tree buffer or NULL.
553 struct btrfs_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
554 struct btrfs_root
*root
)
556 struct btrfs_key ins
;
558 struct btrfs_buffer
*buf
;
560 ret
= alloc_extent(trans
, root
, root
->root_key
.objectid
,
561 1, 0, (unsigned long)-1, &ins
);
566 ret
= update_block_group(trans
, root
, ins
.objectid
, ins
.offset
, 1);
567 buf
= find_tree_block(root
, ins
.objectid
);
568 btrfs_set_header_generation(&buf
->node
.header
,
569 root
->root_key
.offset
+ 1);
570 btrfs_set_header_blocknr(&buf
->node
.header
, buf
->blocknr
);
571 memcpy(buf
->node
.header
.fsid
, root
->fs_info
->disk_super
->fsid
,
572 sizeof(buf
->node
.header
.fsid
));
573 dirty_tree_block(trans
, root
, buf
);
579 * helper function for drop_snapshot, this walks down the tree dropping ref
582 static int walk_down_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
583 *root
, struct btrfs_path
*path
, int *level
)
585 struct btrfs_buffer
*next
;
586 struct btrfs_buffer
*cur
;
591 ret
= lookup_block_ref(trans
, root
, path
->nodes
[*level
]->blocknr
,
597 * walk down to the last node level and free all the leaves
600 cur
= path
->nodes
[*level
];
601 if (path
->slots
[*level
] >=
602 btrfs_header_nritems(&cur
->node
.header
))
604 blocknr
= btrfs_node_blockptr(&cur
->node
, path
->slots
[*level
]);
605 ret
= lookup_block_ref(trans
, root
, blocknr
, &refs
);
606 if (refs
!= 1 || *level
== 1) {
607 path
->slots
[*level
]++;
608 ret
= btrfs_free_extent(trans
, root
, blocknr
, 1, 1);
613 next
= read_tree_block(root
, blocknr
);
614 if (path
->nodes
[*level
-1])
615 btrfs_block_release(root
, path
->nodes
[*level
-1]);
616 path
->nodes
[*level
-1] = next
;
617 *level
= btrfs_header_level(&next
->node
.header
);
618 path
->slots
[*level
] = 0;
621 ret
= btrfs_free_extent(trans
, root
, path
->nodes
[*level
]->blocknr
, 1,
623 btrfs_block_release(root
, path
->nodes
[*level
]);
624 path
->nodes
[*level
] = NULL
;
631 * helper for dropping snapshots. This walks back up the tree in the path
632 * to find the first node higher up where we haven't yet gone through
635 static int walk_up_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
636 *root
, struct btrfs_path
*path
, int *level
)
641 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
642 slot
= path
->slots
[i
];
644 btrfs_header_nritems(&path
->nodes
[i
]->node
.header
)- 1) {
649 ret
= btrfs_free_extent(trans
, root
,
650 path
->nodes
[*level
]->blocknr
,
652 btrfs_block_release(root
, path
->nodes
[*level
]);
653 path
->nodes
[*level
] = NULL
;
662 * drop the reference count on the tree rooted at 'snap'. This traverses
663 * the tree freeing any blocks that have a ref count of zero after being
666 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
667 *root
, struct btrfs_buffer
*snap
)
672 struct btrfs_path path
;
676 btrfs_init_path(&path
);
678 level
= btrfs_header_level(&snap
->node
.header
);
680 path
.nodes
[level
] = snap
;
681 path
.slots
[level
] = 0;
683 wret
= walk_down_tree(trans
, root
, &path
, &level
);
689 wret
= walk_up_tree(trans
, root
, &path
, &level
);
695 for (i
= 0; i
<= orig_level
; i
++) {
697 btrfs_block_release(root
, path
.nodes
[i
]);
703 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
706 struct btrfs_block_group_cache
*cache
[8];
710 ret
= radix_tree_gang_lookup(&info
->block_group_radix
,
715 for (i
= 0; i
< ret
; i
++) {
716 radix_tree_delete(&info
->block_group_radix
,
717 cache
[i
]->key
.objectid
+
718 cache
[i
]->key
.offset
- 1);
725 int btrfs_read_block_groups(struct btrfs_root
*root
)
727 struct btrfs_path path
;
730 struct btrfs_block_group_item
*bi
;
731 struct btrfs_block_group_cache
*cache
;
732 struct btrfs_key key
;
733 struct btrfs_key found_key
;
734 struct btrfs_leaf
*leaf
;
735 u64 group_size_blocks
= BTRFS_BLOCK_GROUP_SIZE
/ root
->blocksize
;
737 root
= root
->fs_info
->extent_root
;
739 key
.offset
= group_size_blocks
;
741 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
742 btrfs_init_path(&path
);
745 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
,
751 leaf
= &path
.nodes
[0]->leaf
;
752 btrfs_disk_key_to_cpu(&found_key
,
753 &leaf
->items
[path
.slots
[0]].key
);
754 cache
= malloc(sizeof(*cache
));
759 bi
= btrfs_item_ptr(leaf
, path
.slots
[0],
760 struct btrfs_block_group_item
);
761 memcpy(&cache
->item
, bi
, sizeof(*bi
));
762 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
763 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
764 btrfs_release_path(root
, &path
);
765 ret
= radix_tree_insert(&root
->fs_info
->block_group_radix
,
767 found_key
.offset
- 1, (void *)cache
);
770 btrfs_super_total_blocks(root
->fs_info
->disk_super
))
773 btrfs_release_path(root
, &path
);
777 int btrfs_insert_block_group(struct btrfs_trans_handle
*trans
,
778 struct btrfs_root
*root
,
779 struct btrfs_key
*key
,
780 struct btrfs_block_group_item
*bi
)
782 struct btrfs_key ins
;
786 root
= root
->fs_info
->extent_root
;
787 ret
= find_free_extent(trans
, root
, 0, 0, (u64
)-1, &ins
);
790 ret
= btrfs_insert_item(trans
, root
, key
, bi
, sizeof(*bi
));
791 finish_current_insert(trans
, root
);
792 pending_ret
= run_pending(trans
, root
);