2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include "transaction.h"
21 #include "print-tree.h"
25 struct cache_extent ce
;
26 struct btrfs_device
*dev
;
31 * this uses a pretty simple search, the expectation is that it is
32 * called very infrequently and that a given device has a small number
35 static int find_free_dev_extent(struct btrfs_trans_handle
*trans
,
36 struct btrfs_device
*device
,
37 struct btrfs_path
*path
,
38 u64 num_bytes
, u64
*start
)
41 struct btrfs_root
*root
= device
->dev_root
;
42 struct btrfs_dev_extent
*dev_extent
= NULL
;
46 u64 search_end
= device
->total_bytes
;
50 struct extent_buffer
*l
;
55 /* FIXME use last free of some kind */
57 key
.objectid
= device
->devid
;
58 key
.offset
= search_start
;
59 key
.type
= BTRFS_DEV_EXTENT_KEY
;
60 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 0);
63 ret
= btrfs_previous_item(root
, path
, 0, key
.type
);
67 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
70 slot
= path
->slots
[0];
71 if (slot
>= btrfs_header_nritems(l
)) {
72 ret
= btrfs_next_leaf(root
, path
);
79 if (search_start
>= search_end
) {
83 *start
= search_start
;
87 *start
= last_byte
> search_start
?
88 last_byte
: search_start
;
89 if (search_end
<= *start
) {
95 btrfs_item_key_to_cpu(l
, &key
, slot
);
97 if (key
.objectid
< device
->devid
)
100 if (key
.objectid
> device
->devid
)
103 if (key
.offset
>= search_start
&& key
.offset
> last_byte
&&
105 if (last_byte
< search_start
)
106 last_byte
= search_start
;
107 hole_size
= key
.offset
- last_byte
;
108 if (key
.offset
> last_byte
&&
109 hole_size
>= num_bytes
) {
114 if (btrfs_key_type(&key
) != BTRFS_DEV_EXTENT_KEY
) {
119 dev_extent
= btrfs_item_ptr(l
, slot
, struct btrfs_dev_extent
);
120 last_byte
= key
.offset
+ btrfs_dev_extent_length(l
, dev_extent
);
126 /* we have to make sure we didn't find an extent that has already
127 * been allocated by the map tree or the original allocation
129 btrfs_release_path(root
, path
);
130 BUG_ON(*start
< search_start
);
132 if (*start
+ num_bytes
> search_end
) {
136 /* check for pending inserts here */
140 btrfs_release_path(root
, path
);
144 int btrfs_alloc_dev_extent(struct btrfs_trans_handle
*trans
,
145 struct btrfs_device
*device
,
146 u64 owner
, u64 num_bytes
, u64
*start
)
149 struct btrfs_path
*path
;
150 struct btrfs_root
*root
= device
->dev_root
;
151 struct btrfs_dev_extent
*extent
;
152 struct extent_buffer
*leaf
;
153 struct btrfs_key key
;
155 path
= btrfs_alloc_path();
159 ret
= find_free_dev_extent(trans
, device
, path
, num_bytes
, start
);
164 key
.objectid
= device
->devid
;
166 key
.type
= BTRFS_DEV_EXTENT_KEY
;
167 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
171 leaf
= path
->nodes
[0];
172 extent
= btrfs_item_ptr(leaf
, path
->slots
[0],
173 struct btrfs_dev_extent
);
174 btrfs_set_dev_extent_owner(leaf
, extent
, owner
);
175 btrfs_set_dev_extent_length(leaf
, extent
, num_bytes
);
176 btrfs_mark_buffer_dirty(leaf
);
178 btrfs_free_path(path
);
182 static int find_next_chunk(struct btrfs_root
*root
, u64
*objectid
)
184 struct btrfs_path
*path
;
186 struct btrfs_key key
;
187 struct btrfs_key found_key
;
189 path
= btrfs_alloc_path();
192 key
.objectid
= (u64
)-1;
193 key
.offset
= (u64
)-1;
194 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
196 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
202 ret
= btrfs_previous_item(root
, path
, 0, BTRFS_CHUNK_ITEM_KEY
);
206 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
208 *objectid
= found_key
.objectid
+ found_key
.offset
;
212 btrfs_free_path(path
);
216 static int find_next_devid(struct btrfs_root
*root
, struct btrfs_path
*path
,
220 struct btrfs_key key
;
221 struct btrfs_key found_key
;
223 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
224 key
.type
= BTRFS_DEV_ITEM_KEY
;
225 key
.offset
= (u64
)-1;
227 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
233 ret
= btrfs_previous_item(root
, path
, BTRFS_DEV_ITEMS_OBJECTID
,
238 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
240 *objectid
= found_key
.offset
+ 1;
244 btrfs_release_path(root
, path
);
249 * the device information is stored in the chunk root
250 * the btrfs_device struct should be fully filled in
252 int btrfs_add_device(struct btrfs_trans_handle
*trans
,
253 struct btrfs_root
*root
,
254 struct btrfs_device
*device
)
257 struct btrfs_path
*path
;
258 struct btrfs_dev_item
*dev_item
;
259 struct extent_buffer
*leaf
;
260 struct btrfs_key key
;
264 root
= root
->fs_info
->chunk_root
;
266 path
= btrfs_alloc_path();
270 ret
= find_next_devid(root
, path
, &free_devid
);
274 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
275 key
.type
= BTRFS_DEV_ITEM_KEY
;
276 key
.offset
= free_devid
;
278 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
279 sizeof(*dev_item
) + device
->name_len
);
283 leaf
= path
->nodes
[0];
284 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
286 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
287 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
288 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
289 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
290 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
291 btrfs_set_device_rdev(leaf
, dev_item
, device
->rdev
);
292 btrfs_set_device_partition(leaf
, dev_item
, device
->partition
);
293 btrfs_set_device_name_len(leaf
, dev_item
, device
->name_len
);
294 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
295 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
297 ptr
= (unsigned long)btrfs_device_name(dev_item
);
298 write_extent_buffer(leaf
, device
->name
, ptr
, device
->name_len
);
300 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
301 write_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_DEV_UUID_SIZE
);
302 btrfs_mark_buffer_dirty(leaf
);
306 btrfs_free_path(path
);
309 int btrfs_update_device(struct btrfs_trans_handle
*trans
,
310 struct btrfs_device
*device
)
313 struct btrfs_path
*path
;
314 struct btrfs_root
*root
;
315 struct btrfs_dev_item
*dev_item
;
316 struct extent_buffer
*leaf
;
317 struct btrfs_key key
;
319 root
= device
->dev_root
->fs_info
->chunk_root
;
321 path
= btrfs_alloc_path();
325 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
326 key
.type
= BTRFS_DEV_ITEM_KEY
;
327 key
.offset
= device
->devid
;
329 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
338 leaf
= path
->nodes
[0];
339 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
341 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
342 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
343 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
344 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
345 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
346 btrfs_set_device_rdev(leaf
, dev_item
, device
->rdev
);
347 btrfs_set_device_partition(leaf
, dev_item
, device
->partition
);
348 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
349 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
350 btrfs_mark_buffer_dirty(leaf
);
353 btrfs_free_path(path
);
357 int btrfs_add_system_chunk(struct btrfs_trans_handle
*trans
,
358 struct btrfs_root
*root
,
359 struct btrfs_key
*key
,
360 struct btrfs_chunk
*chunk
, int item_size
)
362 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
363 struct btrfs_disk_key disk_key
;
367 array_size
= btrfs_super_sys_array_size(super_copy
);
368 if (array_size
+ item_size
> BTRFS_SYSTEM_CHUNK_ARRAY_SIZE
)
371 ptr
= super_copy
->sys_chunk_array
+ array_size
;
372 btrfs_cpu_key_to_disk(&disk_key
, key
);
373 memcpy(ptr
, &disk_key
, sizeof(disk_key
));
374 ptr
+= sizeof(disk_key
);
375 memcpy(ptr
, chunk
, item_size
);
376 item_size
+= sizeof(disk_key
);
377 btrfs_set_super_sys_array_size(super_copy
, array_size
+ item_size
);
381 int btrfs_alloc_chunk(struct btrfs_trans_handle
*trans
,
382 struct btrfs_root
*extent_root
, u64
*start
,
383 u64
*num_bytes
, u64 type
)
386 struct btrfs_root
*chunk_root
= extent_root
->fs_info
->chunk_root
;
387 struct btrfs_stripe
*stripes
;
388 struct btrfs_device
*device
= NULL
;
389 struct btrfs_chunk
*chunk
;
390 struct list_head private_devs
;
391 struct list_head
*dev_list
= &extent_root
->fs_info
->devices
;
392 struct list_head
*cur
;
393 struct map_lookup
*map
;
395 u64 calc_size
= 1024 * 1024 * 1024;
402 struct btrfs_key key
;
404 if (list_empty(dev_list
))
407 INIT_LIST_HEAD(&private_devs
);
408 cur
= dev_list
->next
;
410 /* build a private list of devices we will allocate from */
411 while(index
< num_stripes
) {
412 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
413 avail
= device
->total_bytes
- device
->bytes_used
;
415 if (avail
> max_avail
)
417 if (avail
>= calc_size
) {
418 list_move_tail(&device
->dev_list
, &private_devs
);
424 if (index
< num_stripes
) {
425 list_splice(&private_devs
, dev_list
);
426 if (!looped
&& max_avail
> 0) {
428 calc_size
= max_avail
;
434 ret
= find_next_chunk(chunk_root
, &key
.objectid
);
438 chunk
= kmalloc(btrfs_chunk_item_size(num_stripes
), GFP_NOFS
);
442 stripes
= &chunk
->stripe
;
444 *num_bytes
= calc_size
;
446 while(index
< num_stripes
) {
447 BUG_ON(list_empty(&private_devs
));
448 cur
= private_devs
.next
;
449 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
450 list_move_tail(&device
->dev_list
, dev_list
);
452 ret
= btrfs_alloc_dev_extent(trans
, device
,
454 calc_size
, &dev_offset
);
457 device
->bytes_used
+= calc_size
;
458 ret
= btrfs_update_device(trans
, device
);
461 btrfs_set_stack_stripe_devid(stripes
+ index
, device
->devid
);
462 btrfs_set_stack_stripe_offset(stripes
+ index
, dev_offset
);
463 physical
= dev_offset
;
466 BUG_ON(!list_empty(&private_devs
));
468 /* key.objectid was set above */
469 key
.offset
= *num_bytes
;
470 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
471 btrfs_set_stack_chunk_owner(chunk
, extent_root
->root_key
.objectid
);
472 btrfs_set_stack_chunk_stripe_len(chunk
, 64 * 1024);
473 btrfs_set_stack_chunk_type(chunk
, type
);
474 btrfs_set_stack_chunk_num_stripes(chunk
, num_stripes
);
475 btrfs_set_stack_chunk_io_align(chunk
, extent_root
->sectorsize
);
476 btrfs_set_stack_chunk_io_width(chunk
, extent_root
->sectorsize
);
477 btrfs_set_stack_chunk_sector_size(chunk
, extent_root
->sectorsize
);
479 ret
= btrfs_insert_item(trans
, chunk_root
, &key
, chunk
,
480 btrfs_chunk_item_size(num_stripes
));
482 *start
= key
.objectid
;
484 map
= kmalloc(sizeof(*map
), GFP_NOFS
);
488 map
->ce
.start
= key
.objectid
;
489 map
->ce
.size
= key
.offset
;
491 map
->physical
= physical
;
498 ret
= insert_existing_cache_extent(
499 &extent_root
->fs_info
->mapping_tree
.cache_tree
,
507 void btrfs_mapping_init(struct btrfs_mapping_tree
*tree
)
509 cache_tree_init(&tree
->cache_tree
);
512 int btrfs_map_block(struct btrfs_mapping_tree
*map_tree
,
513 u64 logical
, u64
*phys
, u64
*length
,
514 struct btrfs_device
**dev
)
516 struct cache_extent
*ce
;
517 struct map_lookup
*map
;
520 ce
= find_first_cache_extent(&map_tree
->cache_tree
, logical
);
522 BUG_ON(ce
->start
> logical
|| ce
->start
+ ce
->size
< logical
);
523 map
= container_of(ce
, struct map_lookup
, ce
);
524 offset
= logical
- ce
->start
;
525 *phys
= map
->physical
+ offset
;
526 *length
= ce
->size
- offset
;
531 struct btrfs_device
*btrfs_find_device(struct btrfs_root
*root
, u64 devid
)
533 struct btrfs_device
*dev
;
534 struct list_head
*cur
= root
->fs_info
->devices
.next
;
535 struct list_head
*head
= &root
->fs_info
->devices
;
538 dev
= list_entry(cur
, struct btrfs_device
, dev_list
);
539 if (dev
->devid
== devid
)
546 static int read_one_chunk(struct btrfs_root
*root
, struct btrfs_key
*key
,
547 struct extent_buffer
*leaf
,
548 struct btrfs_chunk
*chunk
)
550 struct btrfs_mapping_tree
*map_tree
= &root
->fs_info
->mapping_tree
;
551 struct map_lookup
*map
;
552 struct cache_extent
*ce
;
558 logical
= key
->objectid
;
559 length
= key
->offset
;
560 ce
= find_first_cache_extent(&map_tree
->cache_tree
, logical
);
562 /* already mapped? */
563 if (ce
&& ce
->start
<= logical
&& ce
->start
+ ce
->size
> logical
) {
567 map
= kmalloc(sizeof(*map
), GFP_NOFS
);
571 map
->ce
.start
= logical
;
572 map
->ce
.size
= length
;
574 map
->physical
= btrfs_stripe_offset_nr(leaf
, chunk
, 0);
575 devid
= btrfs_stripe_devid_nr(leaf
, chunk
, 0);
576 map
->dev
= btrfs_find_device(root
, devid
);
582 ret
= insert_existing_cache_extent(&map_tree
->cache_tree
, &map
->ce
);
588 static int fill_device_from_item(struct extent_buffer
*leaf
,
589 struct btrfs_dev_item
*dev_item
,
590 struct btrfs_device
*device
)
595 device
->devid
= btrfs_device_id(leaf
, dev_item
);
596 device
->total_bytes
= btrfs_device_total_bytes(leaf
, dev_item
);
597 device
->bytes_used
= btrfs_device_bytes_used(leaf
, dev_item
);
598 device
->type
= btrfs_device_type(leaf
, dev_item
);
599 device
->io_align
= btrfs_device_io_align(leaf
, dev_item
);
600 device
->io_width
= btrfs_device_io_width(leaf
, dev_item
);
601 device
->sector_size
= btrfs_device_sector_size(leaf
, dev_item
);
602 device
->rdev
= btrfs_device_rdev(leaf
, dev_item
);
603 device
->partition
= btrfs_device_partition(leaf
, dev_item
);
604 device
->name_len
= btrfs_device_name_len(leaf
, dev_item
);
606 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
607 read_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_DEV_UUID_SIZE
);
609 name
= kmalloc(device
->name_len
+ 1, GFP_NOFS
);
613 ptr
= (unsigned long)btrfs_device_name(dev_item
);
614 read_extent_buffer(leaf
, name
, ptr
, device
->name_len
);
615 name
[device
->name_len
] = '\0';
619 static int read_one_dev(struct btrfs_root
*root
, struct btrfs_key
*key
,
620 struct extent_buffer
*leaf
,
621 struct btrfs_dev_item
*dev_item
)
623 struct btrfs_device
*device
;
627 devid
= btrfs_device_id(leaf
, dev_item
);
628 device
= btrfs_find_device(root
, devid
);
630 device
= kmalloc(sizeof(*device
), GFP_NOFS
);
633 list_add(&device
->dev_list
, &root
->fs_info
->devices
);
636 fill_device_from_item(leaf
, dev_item
, device
);
637 device
->dev_root
= root
->fs_info
->dev_root
;
639 memcpy(&device
->dev_key
, key
, sizeof(*key
));
640 ret
= btrfs_open_device(device
);
647 int btrfs_read_sys_array(struct btrfs_root
*root
)
649 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
650 struct extent_buffer
*sb
= root
->fs_info
->sb_buffer
;
651 struct btrfs_disk_key
*disk_key
;
652 struct btrfs_dev_item
*dev_item
;
653 struct btrfs_chunk
*chunk
;
654 struct btrfs_key key
;
659 unsigned long sb_ptr
;
664 array_size
= btrfs_super_sys_array_size(super_copy
);
667 * we do this loop twice, once for the device items and
668 * once for all of the chunks. This way there are device
669 * structs filled in for every chunk
672 ptr
= super_copy
->sys_chunk_array
;
673 sb_ptr
= offsetof(struct btrfs_super_block
, sys_chunk_array
);
676 while (cur
< array_size
) {
677 disk_key
= (struct btrfs_disk_key
*)ptr
;
678 btrfs_disk_key_to_cpu(&key
, disk_key
);
680 len
= sizeof(*disk_key
);
685 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
&&
686 key
.type
== BTRFS_DEV_ITEM_KEY
) {
687 dev_item
= (struct btrfs_dev_item
*)sb_ptr
;
689 ret
= read_one_dev(root
, &key
, sb
, dev_item
);
692 len
= sizeof(*dev_item
);
693 len
+= btrfs_device_name_len(sb
, dev_item
);
694 } else if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
696 chunk
= (struct btrfs_chunk
*)sb_ptr
;
698 ret
= read_one_chunk(root
, &key
, sb
, chunk
);
701 num_stripes
= btrfs_chunk_num_stripes(sb
, chunk
);
702 len
= btrfs_chunk_item_size(num_stripes
);
717 int btrfs_read_chunk_tree(struct btrfs_root
*root
)
719 struct btrfs_path
*path
;
720 struct extent_buffer
*leaf
;
721 struct btrfs_key key
;
722 struct btrfs_key found_key
;
726 root
= root
->fs_info
->chunk_root
;
728 path
= btrfs_alloc_path();
732 /* first we search for all of the device items, and then we
733 * read in all of the chunk items. This way we can create chunk
734 * mappings that reference all of the devices that are afound
736 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
740 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
742 leaf
= path
->nodes
[0];
743 slot
= path
->slots
[0];
744 if (slot
>= btrfs_header_nritems(leaf
)) {
745 ret
= btrfs_next_leaf(root
, path
);
752 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
753 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
754 if (found_key
.objectid
!= BTRFS_DEV_ITEMS_OBJECTID
)
756 if (found_key
.type
== BTRFS_DEV_ITEM_KEY
) {
757 struct btrfs_dev_item
*dev_item
;
758 dev_item
= btrfs_item_ptr(leaf
, slot
,
759 struct btrfs_dev_item
);
760 ret
= read_one_dev(root
, &found_key
, leaf
,
764 } else if (found_key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
765 struct btrfs_chunk
*chunk
;
766 chunk
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
767 ret
= read_one_chunk(root
, &found_key
, leaf
, chunk
);
771 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
773 btrfs_release_path(root
, path
);
777 btrfs_free_path(path
);