2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include "transaction.h"
21 #include "print-tree.h"
25 struct cache_extent ce
;
26 struct btrfs_device
*dev
;
31 * this uses a pretty simple search, the expectation is that it is
32 * called very infrequently and that a given device has a small number
35 static int find_free_dev_extent(struct btrfs_trans_handle
*trans
,
36 struct btrfs_device
*device
,
37 struct btrfs_path
*path
,
38 u64 num_bytes
, u64
*start
)
41 struct btrfs_root
*root
= device
->dev_root
;
42 struct btrfs_dev_extent
*dev_extent
= NULL
;
46 u64 search_end
= device
->total_bytes
;
50 struct extent_buffer
*l
;
55 /* FIXME use last free of some kind */
57 key
.objectid
= device
->devid
;
58 key
.offset
= search_start
;
59 key
.type
= BTRFS_DEV_EXTENT_KEY
;
60 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 0);
63 ret
= btrfs_previous_item(root
, path
, 0, key
.type
);
67 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
70 slot
= path
->slots
[0];
71 if (slot
>= btrfs_header_nritems(l
)) {
72 ret
= btrfs_next_leaf(root
, path
);
79 if (search_start
>= search_end
) {
83 *start
= search_start
;
87 *start
= last_byte
> search_start
?
88 last_byte
: search_start
;
89 if (search_end
<= *start
) {
95 btrfs_item_key_to_cpu(l
, &key
, slot
);
97 if (key
.objectid
< device
->devid
)
100 if (key
.objectid
> device
->devid
)
103 if (key
.offset
>= search_start
&& key
.offset
> last_byte
&&
105 if (last_byte
< search_start
)
106 last_byte
= search_start
;
107 hole_size
= key
.offset
- last_byte
;
108 if (key
.offset
> last_byte
&&
109 hole_size
>= num_bytes
) {
114 if (btrfs_key_type(&key
) != BTRFS_DEV_EXTENT_KEY
) {
119 dev_extent
= btrfs_item_ptr(l
, slot
, struct btrfs_dev_extent
);
120 last_byte
= key
.offset
+ btrfs_dev_extent_length(l
, dev_extent
);
126 /* we have to make sure we didn't find an extent that has already
127 * been allocated by the map tree or the original allocation
129 btrfs_release_path(root
, path
);
130 BUG_ON(*start
< search_start
);
132 if (*start
+ num_bytes
>= search_end
) {
136 /* check for pending inserts here */
140 btrfs_release_path(root
, path
);
144 int btrfs_alloc_dev_extent(struct btrfs_trans_handle
*trans
,
145 struct btrfs_device
*device
,
146 u64 owner
, u64 num_bytes
, u64
*start
)
149 struct btrfs_path
*path
;
150 struct btrfs_root
*root
= device
->dev_root
;
151 struct btrfs_dev_extent
*extent
;
152 struct extent_buffer
*leaf
;
153 struct btrfs_key key
;
155 path
= btrfs_alloc_path();
159 ret
= find_free_dev_extent(trans
, device
, path
, num_bytes
, start
);
163 key
.objectid
= device
->devid
;
165 key
.type
= BTRFS_DEV_EXTENT_KEY
;
166 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
170 leaf
= path
->nodes
[0];
171 extent
= btrfs_item_ptr(leaf
, path
->slots
[0],
172 struct btrfs_dev_extent
);
173 btrfs_set_dev_extent_owner(leaf
, extent
, owner
);
174 btrfs_set_dev_extent_length(leaf
, extent
, num_bytes
);
175 btrfs_mark_buffer_dirty(leaf
);
177 btrfs_free_path(path
);
181 static int find_next_chunk(struct btrfs_root
*root
, u64
*objectid
)
183 struct btrfs_path
*path
;
185 struct btrfs_key key
;
186 struct btrfs_key found_key
;
188 path
= btrfs_alloc_path();
191 key
.objectid
= (u64
)-1;
192 key
.offset
= (u64
)-1;
193 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
195 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
201 ret
= btrfs_previous_item(root
, path
, 0, BTRFS_CHUNK_ITEM_KEY
);
205 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
207 *objectid
= found_key
.objectid
+ found_key
.offset
;
211 btrfs_free_path(path
);
215 static struct btrfs_device
*next_device(struct list_head
*head
,
216 struct list_head
*last
)
218 struct list_head
*next
= last
->next
;
219 struct btrfs_device
*dev
;
221 if (list_empty(head
))
227 dev
= list_entry(next
, struct btrfs_device
, dev_list
);
231 static int find_next_devid(struct btrfs_root
*root
, struct btrfs_path
*path
,
235 struct btrfs_key key
;
236 struct btrfs_key found_key
;
238 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
239 key
.type
= BTRFS_DEV_ITEM_KEY
;
240 key
.offset
= (u64
)-1;
242 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
248 ret
= btrfs_previous_item(root
, path
, BTRFS_DEV_ITEMS_OBJECTID
,
253 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
255 *objectid
= found_key
.offset
+ 1;
259 btrfs_release_path(root
, path
);
264 * the device information is stored in the chunk root
265 * the btrfs_device struct should be fully filled in
267 int btrfs_add_device(struct btrfs_trans_handle
*trans
,
268 struct btrfs_root
*root
,
269 struct btrfs_device
*device
)
272 struct btrfs_path
*path
;
273 struct btrfs_dev_item
*dev_item
;
274 struct extent_buffer
*leaf
;
275 struct btrfs_key key
;
279 root
= root
->fs_info
->chunk_root
;
281 path
= btrfs_alloc_path();
285 ret
= find_next_devid(root
, path
, &free_devid
);
289 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
290 key
.type
= BTRFS_DEV_ITEM_KEY
;
291 key
.offset
= free_devid
;
293 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
294 sizeof(*dev_item
) + device
->name_len
);
298 leaf
= path
->nodes
[0];
299 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
301 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
302 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
303 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
304 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
305 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
306 btrfs_set_device_rdev(leaf
, dev_item
, device
->rdev
);
307 btrfs_set_device_partition(leaf
, dev_item
, device
->partition
);
308 btrfs_set_device_name_len(leaf
, dev_item
, device
->name_len
);
309 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
310 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
312 ptr
= (unsigned long)btrfs_device_name(dev_item
);
313 write_extent_buffer(leaf
, device
->name
, ptr
, device
->name_len
);
315 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
316 write_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_DEV_UUID_SIZE
);
317 btrfs_mark_buffer_dirty(leaf
);
321 btrfs_free_path(path
);
324 int btrfs_update_device(struct btrfs_trans_handle
*trans
,
325 struct btrfs_device
*device
)
328 struct btrfs_path
*path
;
329 struct btrfs_root
*root
;
330 struct btrfs_dev_item
*dev_item
;
331 struct extent_buffer
*leaf
;
332 struct btrfs_key key
;
334 root
= device
->dev_root
->fs_info
->chunk_root
;
336 path
= btrfs_alloc_path();
340 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
341 key
.type
= BTRFS_DEV_ITEM_KEY
;
342 key
.offset
= device
->devid
;
344 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
353 leaf
= path
->nodes
[0];
354 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
356 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
357 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
358 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
359 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
360 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
361 btrfs_set_device_rdev(leaf
, dev_item
, device
->rdev
);
362 btrfs_set_device_partition(leaf
, dev_item
, device
->partition
);
363 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
364 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
365 btrfs_mark_buffer_dirty(leaf
);
368 btrfs_free_path(path
);
372 int btrfs_add_system_chunk(struct btrfs_trans_handle
*trans
,
373 struct btrfs_root
*root
,
374 struct btrfs_key
*key
,
375 struct btrfs_chunk
*chunk
, int item_size
)
377 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
378 struct btrfs_disk_key disk_key
;
382 array_size
= btrfs_super_sys_array_size(super_copy
);
383 if (array_size
+ item_size
> BTRFS_SYSTEM_CHUNK_ARRAY_SIZE
)
386 ptr
= super_copy
->sys_chunk_array
+ array_size
;
387 btrfs_cpu_key_to_disk(&disk_key
, key
);
388 memcpy(ptr
, &disk_key
, sizeof(disk_key
));
389 ptr
+= sizeof(disk_key
);
390 memcpy(ptr
, chunk
, item_size
);
391 item_size
+= sizeof(disk_key
);
392 btrfs_set_super_sys_array_size(super_copy
, array_size
+ item_size
);
396 int btrfs_alloc_chunk(struct btrfs_trans_handle
*trans
,
397 struct btrfs_root
*extent_root
, u64
*start
,
398 u64
*num_bytes
, u32 type
)
401 struct btrfs_root
*chunk_root
= extent_root
->fs_info
->chunk_root
;
402 struct btrfs_stripe
*stripes
;
403 struct btrfs_device
*device
= NULL
;
404 struct btrfs_chunk
*chunk
;
405 struct list_head
*dev_list
= &extent_root
->fs_info
->devices
;
406 struct list_head
*last_dev
= extent_root
->fs_info
->last_device
;
407 struct map_lookup
*map
;
413 struct btrfs_key key
;
416 ret
= find_next_chunk(chunk_root
, &key
.objectid
);
421 chunk
= kmalloc(btrfs_chunk_item_size(num_stripes
), GFP_NOFS
);
425 stripes
= &chunk
->stripe
;
427 while(index
< num_stripes
) {
428 device
= next_device(dev_list
, last_dev
);
430 last_dev
= &device
->dev_list
;
431 extent_root
->fs_info
->last_device
= last_dev
;
434 int mask
= device
->io_align
;
435 calc_size
= (device
->total_bytes
* 95) / 100;
436 calc_size
= device
->total_bytes
- calc_size
;
437 calc_size
= (calc_size
/ mask
) * mask
;
438 *num_bytes
= calc_size
;
441 ret
= btrfs_alloc_dev_extent(trans
, device
,
443 calc_size
, &dev_offset
);
446 device
->bytes_used
+= calc_size
;
447 ret
= btrfs_update_device(trans
, device
);
450 btrfs_set_stack_stripe_devid(stripes
+ index
, device
->devid
);
451 btrfs_set_stack_stripe_offset(stripes
+ index
, dev_offset
);
452 physical
= dev_offset
;
456 /* key.objectid was set above */
457 key
.offset
= *num_bytes
;
458 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
459 btrfs_set_stack_chunk_owner(chunk
, extent_root
->root_key
.objectid
);
460 btrfs_set_stack_chunk_stripe_len(chunk
, 64 * 1024);
461 btrfs_set_stack_chunk_type(chunk
, type
);
462 btrfs_set_stack_chunk_num_stripes(chunk
, num_stripes
);
463 btrfs_set_stack_chunk_io_align(chunk
, extent_root
->sectorsize
);
464 btrfs_set_stack_chunk_io_width(chunk
, extent_root
->sectorsize
);
465 btrfs_set_stack_chunk_sector_size(chunk
, extent_root
->sectorsize
);
467 ret
= btrfs_insert_item(trans
, chunk_root
, &key
, chunk
,
468 btrfs_chunk_item_size(num_stripes
));
470 *start
= key
.objectid
;
472 map
= kmalloc(sizeof(*map
), GFP_NOFS
);
476 map
->ce
.start
= key
.objectid
;
477 map
->ce
.size
= key
.offset
;
479 map
->physical
= physical
;
486 ret
= insert_existing_cache_extent(
487 &extent_root
->fs_info
->mapping_tree
.cache_tree
,
495 void btrfs_mapping_init(struct btrfs_mapping_tree
*tree
)
497 cache_tree_init(&tree
->cache_tree
);
500 int btrfs_map_block(struct btrfs_mapping_tree
*map_tree
,
501 u64 logical
, u64
*phys
, u64
*length
,
502 struct btrfs_device
**dev
)
504 struct cache_extent
*ce
;
505 struct map_lookup
*map
;
508 ce
= find_first_cache_extent(&map_tree
->cache_tree
, logical
);
510 BUG_ON(ce
->start
> logical
|| ce
->start
+ ce
->size
< logical
);
511 map
= container_of(ce
, struct map_lookup
, ce
);
512 offset
= logical
- ce
->start
;
513 *phys
= map
->physical
+ offset
;
514 *length
= ce
->size
- offset
;
519 struct btrfs_device
*btrfs_find_device(struct btrfs_root
*root
, u64 devid
)
521 struct btrfs_device
*dev
;
522 struct list_head
*cur
= root
->fs_info
->devices
.next
;
523 struct list_head
*head
= &root
->fs_info
->devices
;
526 dev
= list_entry(cur
, struct btrfs_device
, dev_list
);
527 if (dev
->devid
== devid
)
534 static int read_one_chunk(struct btrfs_root
*root
, struct btrfs_key
*key
,
535 struct extent_buffer
*leaf
,
536 struct btrfs_chunk
*chunk
)
538 struct btrfs_mapping_tree
*map_tree
= &root
->fs_info
->mapping_tree
;
539 struct map_lookup
*map
;
540 struct cache_extent
*ce
;
546 logical
= key
->objectid
;
547 length
= key
->offset
;
548 ce
= find_first_cache_extent(&map_tree
->cache_tree
, logical
);
550 /* already mapped? */
551 if (ce
&& ce
->start
<= logical
&& ce
->start
+ ce
->size
> logical
) {
555 map
= kmalloc(sizeof(*map
), GFP_NOFS
);
559 map
->ce
.start
= logical
;
560 map
->ce
.size
= length
;
562 map
->physical
= btrfs_stripe_offset_nr(leaf
, chunk
, 0);
563 devid
= btrfs_stripe_devid_nr(leaf
, chunk
, 0);
564 map
->dev
= btrfs_find_device(root
, devid
);
570 ret
= insert_existing_cache_extent(&map_tree
->cache_tree
, &map
->ce
);
576 static int fill_device_from_item(struct extent_buffer
*leaf
,
577 struct btrfs_dev_item
*dev_item
,
578 struct btrfs_device
*device
)
583 device
->devid
= btrfs_device_id(leaf
, dev_item
);
584 device
->total_bytes
= btrfs_device_total_bytes(leaf
, dev_item
);
585 device
->bytes_used
= btrfs_device_bytes_used(leaf
, dev_item
);
586 device
->type
= btrfs_device_type(leaf
, dev_item
);
587 device
->io_align
= btrfs_device_io_align(leaf
, dev_item
);
588 device
->io_width
= btrfs_device_io_width(leaf
, dev_item
);
589 device
->sector_size
= btrfs_device_sector_size(leaf
, dev_item
);
590 device
->rdev
= btrfs_device_rdev(leaf
, dev_item
);
591 device
->partition
= btrfs_device_partition(leaf
, dev_item
);
592 device
->name_len
= btrfs_device_name_len(leaf
, dev_item
);
594 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
595 read_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_DEV_UUID_SIZE
);
597 name
= kmalloc(device
->name_len
+ 1, GFP_NOFS
);
601 ptr
= (unsigned long)btrfs_device_name(dev_item
);
602 read_extent_buffer(leaf
, name
, ptr
, device
->name_len
);
603 name
[device
->name_len
] = '\0';
607 static int read_one_dev(struct btrfs_root
*root
, struct btrfs_key
*key
,
608 struct extent_buffer
*leaf
,
609 struct btrfs_dev_item
*dev_item
)
611 struct btrfs_device
*device
;
615 devid
= btrfs_device_id(leaf
, dev_item
);
616 if (btrfs_find_device(root
, devid
))
619 device
= kmalloc(sizeof(*device
), GFP_NOFS
);
623 fill_device_from_item(leaf
, dev_item
, device
);
624 device
->dev_root
= root
->fs_info
->dev_root
;
626 list_add(&device
->dev_list
, &root
->fs_info
->devices
);
627 memcpy(&device
->dev_key
, key
, sizeof(*key
));
629 ret
= btrfs_open_device(device
);
636 int btrfs_read_sys_array(struct btrfs_root
*root
)
638 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
639 struct extent_buffer
*sb
= root
->fs_info
->sb_buffer
;
640 struct btrfs_disk_key
*disk_key
;
641 struct btrfs_dev_item
*dev_item
;
642 struct btrfs_chunk
*chunk
;
643 struct btrfs_key key
;
648 unsigned long sb_ptr
;
653 array_size
= btrfs_super_sys_array_size(super_copy
);
656 * we do this loop twice, once for the device items and
657 * once for all of the chunks. This way there are device
658 * structs filled in for every chunk
661 ptr
= super_copy
->sys_chunk_array
;
662 sb_ptr
= offsetof(struct btrfs_super_block
, sys_chunk_array
);
665 while (cur
< array_size
) {
666 disk_key
= (struct btrfs_disk_key
*)ptr
;
667 btrfs_disk_key_to_cpu(&key
, disk_key
);
669 len
= sizeof(*disk_key
);
674 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
&&
675 key
.type
== BTRFS_DEV_ITEM_KEY
) {
676 dev_item
= (struct btrfs_dev_item
*)sb_ptr
;
678 ret
= read_one_dev(root
, &key
, sb
, dev_item
);
681 len
= sizeof(*dev_item
);
682 len
+= btrfs_device_name_len(sb
, dev_item
);
683 } else if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
685 chunk
= (struct btrfs_chunk
*)sb_ptr
;
687 ret
= read_one_chunk(root
, &key
, sb
, chunk
);
690 num_stripes
= btrfs_chunk_num_stripes(sb
, chunk
);
691 len
= btrfs_chunk_item_size(num_stripes
);
706 int btrfs_read_chunk_tree(struct btrfs_root
*root
)
708 struct btrfs_path
*path
;
709 struct extent_buffer
*leaf
;
710 struct btrfs_key key
;
711 struct btrfs_key found_key
;
715 root
= root
->fs_info
->chunk_root
;
717 path
= btrfs_alloc_path();
721 /* first we search for all of the device items, and then we
722 * read in all of the chunk items. This way we can create chunk
723 * mappings that reference all of the devices that are afound
725 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
729 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
731 leaf
= path
->nodes
[0];
732 slot
= path
->slots
[0];
733 if (slot
>= btrfs_header_nritems(leaf
)) {
734 ret
= btrfs_next_leaf(root
, path
);
741 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
742 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
743 if (found_key
.objectid
!= BTRFS_DEV_ITEMS_OBJECTID
)
745 if (found_key
.type
== BTRFS_DEV_ITEM_KEY
) {
746 struct btrfs_dev_item
*dev_item
;
747 dev_item
= btrfs_item_ptr(leaf
, slot
,
748 struct btrfs_dev_item
);
749 ret
= read_one_dev(root
, &found_key
, leaf
,
753 } else if (found_key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
754 struct btrfs_chunk
*chunk
;
755 chunk
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
756 ret
= read_one_chunk(root
, &found_key
, leaf
, chunk
);
760 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
762 btrfs_release_path(root
, path
);
766 btrfs_free_path(path
);