Update the super magic string to match the seed and root format changes
[btrfs-progs-unstable/devel.git] / volumes.c
blob249cdc7202e041acc630cf2f1beafa1f439fd5f4
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #define _XOPEN_SOURCE 600
19 #define __USE_XOPEN2K
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <uuid/uuid.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include "ctree.h"
28 #include "disk-io.h"
29 #include "transaction.h"
30 #include "print-tree.h"
31 #include "volumes.h"
33 struct stripe {
34 struct btrfs_device *dev;
35 u64 physical;
38 struct map_lookup {
39 struct cache_extent ce;
40 u64 type;
41 int io_align;
42 int io_width;
43 int stripe_len;
44 int sector_size;
45 int num_stripes;
46 int sub_stripes;
47 struct btrfs_bio_stripe stripes[];
50 #define map_lookup_size(n) (sizeof(struct map_lookup) + \
51 (sizeof(struct btrfs_bio_stripe) * (n)))
53 static LIST_HEAD(fs_uuids);
55 static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
56 u8 *uuid)
58 struct btrfs_device *dev;
59 struct list_head *cur;
61 list_for_each(cur, head) {
62 dev = list_entry(cur, struct btrfs_device, dev_list);
63 if (dev->devid == devid &&
64 !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) {
65 return dev;
68 return NULL;
71 static struct btrfs_fs_devices *find_fsid(u8 *fsid)
73 struct list_head *cur;
74 struct btrfs_fs_devices *fs_devices;
76 list_for_each(cur, &fs_uuids) {
77 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
78 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
79 return fs_devices;
81 return NULL;
84 static int device_list_add(const char *path,
85 struct btrfs_super_block *disk_super,
86 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
88 struct btrfs_device *device;
89 struct btrfs_fs_devices *fs_devices;
90 u64 found_transid = btrfs_super_generation(disk_super);
92 fs_devices = find_fsid(disk_super->fsid);
93 if (!fs_devices) {
94 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
95 if (!fs_devices)
96 return -ENOMEM;
97 INIT_LIST_HEAD(&fs_devices->devices);
98 list_add(&fs_devices->list, &fs_uuids);
99 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
100 fs_devices->latest_devid = devid;
101 fs_devices->latest_trans = found_transid;
102 fs_devices->lowest_devid = (u64)-1;
103 device = NULL;
104 } else {
105 device = __find_device(&fs_devices->devices, devid,
106 disk_super->dev_item.uuid);
108 if (!device) {
109 device = kzalloc(sizeof(*device), GFP_NOFS);
110 if (!device) {
111 /* we can safely leave the fs_devices entry around */
112 return -ENOMEM;
114 device->devid = devid;
115 memcpy(device->uuid, disk_super->dev_item.uuid,
116 BTRFS_UUID_SIZE);
117 device->name = kstrdup(path, GFP_NOFS);
118 if (!device->name) {
119 kfree(device);
120 return -ENOMEM;
122 device->label = kstrdup(disk_super->label, GFP_NOFS);
123 device->total_devs = btrfs_super_num_devices(disk_super);
124 device->super_bytes_used = btrfs_super_bytes_used(disk_super);
125 device->total_bytes =
126 btrfs_stack_device_total_bytes(&disk_super->dev_item);
127 device->bytes_used =
128 btrfs_stack_device_bytes_used(&disk_super->dev_item);
129 list_add(&device->dev_list, &fs_devices->devices);
130 device->fs_devices = fs_devices;
133 if (found_transid > fs_devices->latest_trans) {
134 fs_devices->latest_devid = devid;
135 fs_devices->latest_trans = found_transid;
137 if (fs_devices->lowest_devid > devid) {
138 fs_devices->lowest_devid = devid;
140 *fs_devices_ret = fs_devices;
141 return 0;
144 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
146 struct btrfs_fs_devices *seed_devices;
147 struct list_head *cur;
148 struct btrfs_device *device;
149 again:
150 list_for_each(cur, &fs_devices->devices) {
151 device = list_entry(cur, struct btrfs_device, dev_list);
152 close(device->fd);
153 device->fd = -1;
154 device->writeable = 0;
157 seed_devices = fs_devices->seed;
158 fs_devices->seed = NULL;
159 if (seed_devices) {
160 fs_devices = seed_devices;
161 goto again;
164 return 0;
167 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags)
169 int fd;
170 struct list_head *head = &fs_devices->devices;
171 struct list_head *cur;
172 struct btrfs_device *device;
173 int ret;
175 list_for_each(cur, head) {
176 device = list_entry(cur, struct btrfs_device, dev_list);
178 fd = open(device->name, flags);
179 if (fd < 0) {
180 ret = -errno;
181 goto fail;
184 if (device->devid == fs_devices->latest_devid)
185 fs_devices->latest_bdev = fd;
186 if (device->devid == fs_devices->lowest_devid)
187 fs_devices->lowest_bdev = fd;
188 device->fd = fd;
189 if (flags == O_RDWR)
190 device->writeable = 1;
192 return 0;
193 fail:
194 btrfs_close_devices(fs_devices);
195 return ret;
198 int btrfs_scan_one_device(int fd, const char *path,
199 struct btrfs_fs_devices **fs_devices_ret,
200 u64 *total_devs, u64 super_offset)
202 struct btrfs_super_block *disk_super;
203 char *buf;
204 int ret;
205 u64 devid;
206 char uuidbuf[37];
208 buf = malloc(4096);
209 if (!buf) {
210 ret = -ENOMEM;
211 goto error;
213 ret = pread(fd, buf, 4096, super_offset);
214 if (ret != 4096) {
215 ret = -EIO;
216 goto error;
218 disk_super = (struct btrfs_super_block *)buf;
219 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
220 sizeof(disk_super->magic))) {
221 ret = -ENOENT;
222 goto error_brelse;
224 devid = le64_to_cpu(disk_super->dev_item.devid);
225 *total_devs = btrfs_super_num_devices(disk_super);
226 uuid_unparse(disk_super->fsid, uuidbuf);
228 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
230 error_brelse:
231 free(buf);
232 error:
233 return ret;
237 * this uses a pretty simple search, the expectation is that it is
238 * called very infrequently and that a given device has a small number
239 * of extents
241 static int find_free_dev_extent(struct btrfs_trans_handle *trans,
242 struct btrfs_device *device,
243 struct btrfs_path *path,
244 u64 num_bytes, u64 *start)
246 struct btrfs_key key;
247 struct btrfs_root *root = device->dev_root;
248 struct btrfs_dev_extent *dev_extent = NULL;
249 u64 hole_size = 0;
250 u64 last_byte = 0;
251 u64 search_start = 0;
252 u64 search_end = device->total_bytes;
253 int ret;
254 int slot = 0;
255 int start_found;
256 struct extent_buffer *l;
258 start_found = 0;
259 path->reada = 2;
261 /* FIXME use last free of some kind */
263 /* we don't want to overwrite the superblock on the drive,
264 * so we make sure to start at an offset of at least 1MB
266 search_start = max((u64)1024 * 1024, search_start);
268 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
269 search_start = max(root->fs_info->alloc_start, search_start);
271 key.objectid = device->devid;
272 key.offset = search_start;
273 key.type = BTRFS_DEV_EXTENT_KEY;
274 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
275 if (ret < 0)
276 goto error;
277 ret = btrfs_previous_item(root, path, 0, key.type);
278 if (ret < 0)
279 goto error;
280 l = path->nodes[0];
281 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
282 while (1) {
283 l = path->nodes[0];
284 slot = path->slots[0];
285 if (slot >= btrfs_header_nritems(l)) {
286 ret = btrfs_next_leaf(root, path);
287 if (ret == 0)
288 continue;
289 if (ret < 0)
290 goto error;
291 no_more_items:
292 if (!start_found) {
293 if (search_start >= search_end) {
294 ret = -ENOSPC;
295 goto error;
297 *start = search_start;
298 start_found = 1;
299 goto check_pending;
301 *start = last_byte > search_start ?
302 last_byte : search_start;
303 if (search_end <= *start) {
304 ret = -ENOSPC;
305 goto error;
307 goto check_pending;
309 btrfs_item_key_to_cpu(l, &key, slot);
311 if (key.objectid < device->devid)
312 goto next;
314 if (key.objectid > device->devid)
315 goto no_more_items;
317 if (key.offset >= search_start && key.offset > last_byte &&
318 start_found) {
319 if (last_byte < search_start)
320 last_byte = search_start;
321 hole_size = key.offset - last_byte;
322 if (key.offset > last_byte &&
323 hole_size >= num_bytes) {
324 *start = last_byte;
325 goto check_pending;
328 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
329 goto next;
332 start_found = 1;
333 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
334 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
335 next:
336 path->slots[0]++;
337 cond_resched();
339 check_pending:
340 /* we have to make sure we didn't find an extent that has already
341 * been allocated by the map tree or the original allocation
343 btrfs_release_path(root, path);
344 BUG_ON(*start < search_start);
346 if (*start + num_bytes > search_end) {
347 ret = -ENOSPC;
348 goto error;
350 /* check for pending inserts here */
351 return 0;
353 error:
354 btrfs_release_path(root, path);
355 return ret;
358 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
359 struct btrfs_device *device,
360 u64 chunk_tree, u64 chunk_objectid,
361 u64 chunk_offset,
362 u64 num_bytes, u64 *start)
364 int ret;
365 struct btrfs_path *path;
366 struct btrfs_root *root = device->dev_root;
367 struct btrfs_dev_extent *extent;
368 struct extent_buffer *leaf;
369 struct btrfs_key key;
371 path = btrfs_alloc_path();
372 if (!path)
373 return -ENOMEM;
375 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
376 if (ret) {
377 goto err;
380 key.objectid = device->devid;
381 key.offset = *start;
382 key.type = BTRFS_DEV_EXTENT_KEY;
383 ret = btrfs_insert_empty_item(trans, root, path, &key,
384 sizeof(*extent));
385 BUG_ON(ret);
387 leaf = path->nodes[0];
388 extent = btrfs_item_ptr(leaf, path->slots[0],
389 struct btrfs_dev_extent);
390 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
391 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
392 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
394 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
395 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
396 BTRFS_UUID_SIZE);
398 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
399 btrfs_mark_buffer_dirty(leaf);
400 err:
401 btrfs_free_path(path);
402 return ret;
405 static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
407 struct btrfs_path *path;
408 int ret;
409 struct btrfs_key key;
410 struct btrfs_chunk *chunk;
411 struct btrfs_key found_key;
413 path = btrfs_alloc_path();
414 BUG_ON(!path);
416 key.objectid = objectid;
417 key.offset = (u64)-1;
418 key.type = BTRFS_CHUNK_ITEM_KEY;
420 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
421 if (ret < 0)
422 goto error;
424 BUG_ON(ret == 0);
426 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
427 if (ret) {
428 *offset = 0;
429 } else {
430 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
431 path->slots[0]);
432 if (found_key.objectid != objectid)
433 *offset = 0;
434 else {
435 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
436 struct btrfs_chunk);
437 *offset = found_key.offset +
438 btrfs_chunk_length(path->nodes[0], chunk);
441 ret = 0;
442 error:
443 btrfs_free_path(path);
444 return ret;
447 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
448 u64 *objectid)
450 int ret;
451 struct btrfs_key key;
452 struct btrfs_key found_key;
454 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
455 key.type = BTRFS_DEV_ITEM_KEY;
456 key.offset = (u64)-1;
458 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
459 if (ret < 0)
460 goto error;
462 BUG_ON(ret == 0);
464 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
465 BTRFS_DEV_ITEM_KEY);
466 if (ret) {
467 *objectid = 1;
468 } else {
469 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
470 path->slots[0]);
471 *objectid = found_key.offset + 1;
473 ret = 0;
474 error:
475 btrfs_release_path(root, path);
476 return ret;
480 * the device information is stored in the chunk root
481 * the btrfs_device struct should be fully filled in
483 int btrfs_add_device(struct btrfs_trans_handle *trans,
484 struct btrfs_root *root,
485 struct btrfs_device *device)
487 int ret;
488 struct btrfs_path *path;
489 struct btrfs_dev_item *dev_item;
490 struct extent_buffer *leaf;
491 struct btrfs_key key;
492 unsigned long ptr;
493 u64 free_devid = 0;
495 root = root->fs_info->chunk_root;
497 path = btrfs_alloc_path();
498 if (!path)
499 return -ENOMEM;
501 ret = find_next_devid(root, path, &free_devid);
502 if (ret)
503 goto out;
505 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
506 key.type = BTRFS_DEV_ITEM_KEY;
507 key.offset = free_devid;
509 ret = btrfs_insert_empty_item(trans, root, path, &key,
510 sizeof(*dev_item));
511 if (ret)
512 goto out;
514 leaf = path->nodes[0];
515 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
517 device->devid = free_devid;
518 btrfs_set_device_id(leaf, dev_item, device->devid);
519 btrfs_set_device_generation(leaf, dev_item, 0);
520 btrfs_set_device_type(leaf, dev_item, device->type);
521 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
522 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
523 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
524 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
525 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
526 btrfs_set_device_group(leaf, dev_item, 0);
527 btrfs_set_device_seek_speed(leaf, dev_item, 0);
528 btrfs_set_device_bandwidth(leaf, dev_item, 0);
530 ptr = (unsigned long)btrfs_device_uuid(dev_item);
531 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
532 ptr = (unsigned long)btrfs_device_fsid(dev_item);
533 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
534 btrfs_mark_buffer_dirty(leaf);
535 ret = 0;
537 out:
538 btrfs_free_path(path);
539 return ret;
542 int btrfs_update_device(struct btrfs_trans_handle *trans,
543 struct btrfs_device *device)
545 int ret;
546 struct btrfs_path *path;
547 struct btrfs_root *root;
548 struct btrfs_dev_item *dev_item;
549 struct extent_buffer *leaf;
550 struct btrfs_key key;
552 root = device->dev_root->fs_info->chunk_root;
554 path = btrfs_alloc_path();
555 if (!path)
556 return -ENOMEM;
558 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
559 key.type = BTRFS_DEV_ITEM_KEY;
560 key.offset = device->devid;
562 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
563 if (ret < 0)
564 goto out;
566 if (ret > 0) {
567 ret = -ENOENT;
568 goto out;
571 leaf = path->nodes[0];
572 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
574 btrfs_set_device_id(leaf, dev_item, device->devid);
575 btrfs_set_device_type(leaf, dev_item, device->type);
576 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
577 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
578 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
579 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
580 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
581 btrfs_mark_buffer_dirty(leaf);
583 out:
584 btrfs_free_path(path);
585 return ret;
588 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
589 struct btrfs_root *root,
590 struct btrfs_key *key,
591 struct btrfs_chunk *chunk, int item_size)
593 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
594 struct btrfs_disk_key disk_key;
595 u32 array_size;
596 u8 *ptr;
598 array_size = btrfs_super_sys_array_size(super_copy);
599 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
600 return -EFBIG;
602 ptr = super_copy->sys_chunk_array + array_size;
603 btrfs_cpu_key_to_disk(&disk_key, key);
604 memcpy(ptr, &disk_key, sizeof(disk_key));
605 ptr += sizeof(disk_key);
606 memcpy(ptr, chunk, item_size);
607 item_size += sizeof(disk_key);
608 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
609 return 0;
612 static u64 div_factor(u64 num, int factor)
614 if (factor == 10)
615 return num;
616 num *= factor;
617 return num / 10;
620 static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
621 int sub_stripes)
623 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
624 return calc_size;
625 else if (type & BTRFS_BLOCK_GROUP_RAID10)
626 return calc_size * (num_stripes / sub_stripes);
627 else
628 return calc_size * num_stripes;
632 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
633 struct btrfs_root *extent_root, u64 *start,
634 u64 *num_bytes, u64 type)
636 u64 dev_offset;
637 struct btrfs_fs_info *info = extent_root->fs_info;
638 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
639 struct btrfs_stripe *stripes;
640 struct btrfs_device *device = NULL;
641 struct btrfs_chunk *chunk;
642 struct list_head private_devs;
643 struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices;
644 struct list_head *cur;
645 struct map_lookup *map;
646 int min_stripe_size = 1 * 1024 * 1024;
647 u64 physical;
648 u64 calc_size = 8 * 1024 * 1024;
649 u64 min_free;
650 u64 max_chunk_size = 4 * calc_size;
651 u64 avail;
652 u64 max_avail = 0;
653 u64 percent_max;
654 int num_stripes = 1;
655 int min_stripes = 1;
656 int sub_stripes = 0;
657 int looped = 0;
658 int ret;
659 int index;
660 int stripe_len = 64 * 1024;
661 struct btrfs_key key;
663 if (list_empty(dev_list)) {
664 return -ENOSPC;
667 if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
668 BTRFS_BLOCK_GROUP_RAID10 |
669 BTRFS_BLOCK_GROUP_DUP)) {
670 if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
671 calc_size = 8 * 1024 * 1024;
672 max_chunk_size = calc_size * 2;
673 min_stripe_size = 1 * 1024 * 1024;
674 } else if (type & BTRFS_BLOCK_GROUP_DATA) {
675 calc_size = 1024 * 1024 * 1024;
676 max_chunk_size = 10 * calc_size;
677 min_stripe_size = 64 * 1024 * 1024;
678 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
679 calc_size = 1024 * 1024 * 1024;
680 max_chunk_size = 4 * calc_size;
681 min_stripe_size = 32 * 1024 * 1024;
684 if (type & BTRFS_BLOCK_GROUP_RAID1) {
685 num_stripes = min_t(u64, 2,
686 btrfs_super_num_devices(&info->super_copy));
687 if (num_stripes < 2)
688 return -ENOSPC;
689 min_stripes = 2;
691 if (type & BTRFS_BLOCK_GROUP_DUP) {
692 num_stripes = 2;
693 min_stripes = 2;
695 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
696 num_stripes = btrfs_super_num_devices(&info->super_copy);
697 min_stripes = 2;
699 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
700 num_stripes = btrfs_super_num_devices(&info->super_copy);
701 if (num_stripes < 4)
702 return -ENOSPC;
703 num_stripes &= ~(u32)1;
704 sub_stripes = 2;
705 min_stripes = 4;
708 /* we don't want a chunk larger than 10% of the FS */
709 percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
710 max_chunk_size = min(percent_max, max_chunk_size);
712 again:
713 if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) >
714 max_chunk_size) {
715 calc_size = max_chunk_size;
716 calc_size /= num_stripes;
717 calc_size /= stripe_len;
718 calc_size *= stripe_len;
720 /* we don't want tiny stripes */
721 calc_size = max_t(u64, calc_size, min_stripe_size);
723 calc_size /= stripe_len;
724 calc_size *= stripe_len;
725 INIT_LIST_HEAD(&private_devs);
726 cur = dev_list->next;
727 index = 0;
729 if (type & BTRFS_BLOCK_GROUP_DUP)
730 min_free = calc_size * 2;
731 else
732 min_free = calc_size;
734 /* build a private list of devices we will allocate from */
735 while(index < num_stripes) {
736 device = list_entry(cur, struct btrfs_device, dev_list);
737 avail = device->total_bytes - device->bytes_used;
738 cur = cur->next;
739 if (avail >= min_free) {
740 list_move_tail(&device->dev_list, &private_devs);
741 index++;
742 if (type & BTRFS_BLOCK_GROUP_DUP)
743 index++;
744 } else if (avail > max_avail)
745 max_avail = avail;
746 if (cur == dev_list)
747 break;
749 if (index < num_stripes) {
750 list_splice(&private_devs, dev_list);
751 if (index >= min_stripes) {
752 num_stripes = index;
753 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
754 num_stripes /= sub_stripes;
755 num_stripes *= sub_stripes;
757 looped = 1;
758 goto again;
760 if (!looped && max_avail > 0) {
761 looped = 1;
762 calc_size = max_avail;
763 goto again;
765 return -ENOSPC;
767 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
768 key.type = BTRFS_CHUNK_ITEM_KEY;
769 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
770 &key.offset);
771 if (ret)
772 return ret;
774 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
775 if (!chunk)
776 return -ENOMEM;
778 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
779 if (!map) {
780 kfree(chunk);
781 return -ENOMEM;
784 stripes = &chunk->stripe;
785 *num_bytes = chunk_bytes_by_type(type, calc_size,
786 num_stripes, sub_stripes);
787 index = 0;
788 while(index < num_stripes) {
789 struct btrfs_stripe *stripe;
790 BUG_ON(list_empty(&private_devs));
791 cur = private_devs.next;
792 device = list_entry(cur, struct btrfs_device, dev_list);
794 /* loop over this device again if we're doing a dup group */
795 if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
796 (index == num_stripes - 1))
797 list_move_tail(&device->dev_list, dev_list);
799 ret = btrfs_alloc_dev_extent(trans, device,
800 info->chunk_root->root_key.objectid,
801 BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
802 calc_size, &dev_offset);
803 BUG_ON(ret);
805 device->bytes_used += calc_size;
806 ret = btrfs_update_device(trans, device);
807 BUG_ON(ret);
809 map->stripes[index].dev = device;
810 map->stripes[index].physical = dev_offset;
811 stripe = stripes + index;
812 btrfs_set_stack_stripe_devid(stripe, device->devid);
813 btrfs_set_stack_stripe_offset(stripe, dev_offset);
814 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
815 physical = dev_offset;
816 index++;
818 BUG_ON(!list_empty(&private_devs));
820 /* key was set above */
821 btrfs_set_stack_chunk_length(chunk, *num_bytes);
822 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
823 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
824 btrfs_set_stack_chunk_type(chunk, type);
825 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
826 btrfs_set_stack_chunk_io_align(chunk, stripe_len);
827 btrfs_set_stack_chunk_io_width(chunk, stripe_len);
828 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
829 btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
830 map->sector_size = extent_root->sectorsize;
831 map->stripe_len = stripe_len;
832 map->io_align = stripe_len;
833 map->io_width = stripe_len;
834 map->type = type;
835 map->num_stripes = num_stripes;
836 map->sub_stripes = sub_stripes;
838 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
839 btrfs_chunk_item_size(num_stripes));
840 BUG_ON(ret);
841 *start = key.offset;;
843 map->ce.start = key.offset;
844 map->ce.size = *num_bytes;
846 ret = insert_existing_cache_extent(
847 &extent_root->fs_info->mapping_tree.cache_tree,
848 &map->ce);
849 BUG_ON(ret);
851 if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
852 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
853 chunk, btrfs_chunk_item_size(num_stripes));
854 BUG_ON(ret);
857 kfree(chunk);
858 return ret;
861 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
863 cache_tree_init(&tree->cache_tree);
866 int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
868 struct cache_extent *ce;
869 struct map_lookup *map;
870 int ret;
871 u64 offset;
873 ce = find_first_cache_extent(&map_tree->cache_tree, logical);
874 BUG_ON(!ce);
875 BUG_ON(ce->start > logical || ce->start + ce->size < logical);
876 map = container_of(ce, struct map_lookup, ce);
878 offset = logical - ce->start;
879 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
880 ret = map->num_stripes;
881 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
882 ret = map->sub_stripes;
883 else
884 ret = 1;
885 return ret;
888 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
889 u64 logical, u64 *length,
890 struct btrfs_multi_bio **multi_ret, int mirror_num)
892 struct cache_extent *ce;
893 struct map_lookup *map;
894 u64 offset;
895 u64 stripe_offset;
896 u64 stripe_nr;
897 int stripes_allocated = 8;
898 int stripes_required = 1;
899 int stripe_index;
900 int i;
901 struct btrfs_multi_bio *multi = NULL;
903 if (multi_ret && rw == READ) {
904 stripes_allocated = 1;
906 again:
907 if (multi_ret) {
908 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
909 GFP_NOFS);
910 if (!multi)
911 return -ENOMEM;
914 ce = find_first_cache_extent(&map_tree->cache_tree, logical);
915 BUG_ON(!ce);
916 BUG_ON(ce->start > logical || ce->start + ce->size < logical);
917 map = container_of(ce, struct map_lookup, ce);
918 offset = logical - ce->start;
920 if (rw == WRITE) {
921 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
922 BTRFS_BLOCK_GROUP_DUP)) {
923 stripes_required = map->num_stripes;
924 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
925 stripes_required = map->sub_stripes;
928 /* if our multi bio struct is too small, back off and try again */
929 if (multi_ret && rw == WRITE &&
930 stripes_allocated < stripes_required) {
931 stripes_allocated = map->num_stripes;
932 kfree(multi);
933 goto again;
935 stripe_nr = offset;
937 * stripe_nr counts the total number of stripes we have to stride
938 * to get to this block
940 stripe_nr = stripe_nr / map->stripe_len;
942 stripe_offset = stripe_nr * map->stripe_len;
943 BUG_ON(offset < stripe_offset);
945 /* stripe_offset is the offset of this block in its stripe*/
946 stripe_offset = offset - stripe_offset;
948 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
949 BTRFS_BLOCK_GROUP_RAID10 |
950 BTRFS_BLOCK_GROUP_DUP)) {
951 /* we limit the length of each bio to what fits in a stripe */
952 *length = min_t(u64, ce->size - offset,
953 map->stripe_len - stripe_offset);
954 } else {
955 *length = ce->size - offset;
958 if (!multi_ret)
959 goto out;
961 multi->num_stripes = 1;
962 stripe_index = 0;
963 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
964 if (rw == WRITE)
965 multi->num_stripes = map->num_stripes;
966 else if (mirror_num)
967 stripe_index = mirror_num - 1;
968 else
969 stripe_index = stripe_nr % map->num_stripes;
970 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
971 int factor = map->num_stripes / map->sub_stripes;
973 stripe_index = stripe_nr % factor;
974 stripe_index *= map->sub_stripes;
976 if (rw == WRITE)
977 multi->num_stripes = map->sub_stripes;
978 else if (mirror_num)
979 stripe_index += mirror_num - 1;
980 else
981 stripe_index = stripe_nr % map->sub_stripes;
983 stripe_nr = stripe_nr / factor;
984 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
985 if (rw == WRITE)
986 multi->num_stripes = map->num_stripes;
987 else if (mirror_num)
988 stripe_index = mirror_num - 1;
989 } else {
991 * after this do_div call, stripe_nr is the number of stripes
992 * on this device we have to walk to find the data, and
993 * stripe_index is the number of our device in the stripe array
995 stripe_index = stripe_nr % map->num_stripes;
996 stripe_nr = stripe_nr / map->num_stripes;
998 BUG_ON(stripe_index >= map->num_stripes);
1000 BUG_ON(stripe_index != 0 && multi->num_stripes > 1);
1001 for (i = 0; i < multi->num_stripes; i++) {
1002 multi->stripes[i].physical =
1003 map->stripes[stripe_index].physical + stripe_offset +
1004 stripe_nr * map->stripe_len;
1005 multi->stripes[i].dev = map->stripes[stripe_index].dev;
1006 stripe_index++;
1008 *multi_ret = multi;
1009 out:
1010 return 0;
1013 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
1014 u8 *uuid, u8 *fsid)
1016 struct btrfs_device *device;
1017 struct btrfs_fs_devices *cur_devices;
1019 cur_devices = root->fs_info->fs_devices;
1020 while (cur_devices) {
1021 if (!fsid ||
1022 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
1023 device = __find_device(&cur_devices->devices,
1024 devid, uuid);
1025 if (device)
1026 return device;
1028 cur_devices = cur_devices->seed;
1030 return NULL;
1033 int btrfs_bootstrap_super_map(struct btrfs_mapping_tree *map_tree,
1034 struct btrfs_fs_devices *fs_devices)
1036 struct map_lookup *map;
1037 u64 logical = BTRFS_SUPER_INFO_OFFSET;
1038 u64 length = BTRFS_SUPER_INFO_SIZE;
1039 int num_stripes = 0;
1040 int sub_stripes = 0;
1041 int ret;
1042 int i;
1043 struct list_head *cur;
1045 list_for_each(cur, &fs_devices->devices) {
1046 num_stripes++;
1048 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1049 if (!map)
1050 return -ENOMEM;
1052 map->ce.start = logical;
1053 map->ce.size = length;
1054 map->num_stripes = num_stripes;
1055 map->sub_stripes = sub_stripes;
1056 map->io_width = length;
1057 map->io_align = length;
1058 map->sector_size = length;
1059 map->stripe_len = length;
1060 map->type = BTRFS_BLOCK_GROUP_RAID1;
1062 i = 0;
1063 list_for_each(cur, &fs_devices->devices) {
1064 struct btrfs_device *device = list_entry(cur,
1065 struct btrfs_device,
1066 dev_list);
1067 map->stripes[i].physical = logical;
1068 map->stripes[i].dev = device;
1069 i++;
1071 ret = insert_existing_cache_extent(&map_tree->cache_tree, &map->ce);
1072 if (ret == -EEXIST) {
1073 struct cache_extent *old;
1074 struct map_lookup *old_map;
1075 old = find_cache_extent(&map_tree->cache_tree, logical, length);
1076 old_map = container_of(old, struct map_lookup, ce);
1077 remove_cache_extent(&map_tree->cache_tree, old);
1078 kfree(old_map);
1079 ret = insert_existing_cache_extent(&map_tree->cache_tree,
1080 &map->ce);
1082 BUG_ON(ret);
1083 return 0;
1086 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
1088 struct cache_extent *ce;
1089 struct map_lookup *map;
1090 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
1091 int readonly = 0;
1092 int i;
1094 ce = find_first_cache_extent(&map_tree->cache_tree, chunk_offset);
1095 BUG_ON(!ce);
1097 map = container_of(ce, struct map_lookup, ce);
1098 for (i = 0; i < map->num_stripes; i++) {
1099 if (!map->stripes[i].dev->writeable) {
1100 readonly = 1;
1101 break;
1105 return readonly;
1108 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
1109 struct extent_buffer *leaf,
1110 struct btrfs_chunk *chunk)
1112 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
1113 struct map_lookup *map;
1114 struct cache_extent *ce;
1115 u64 logical;
1116 u64 length;
1117 u64 devid;
1118 u64 super_offset_diff = 0;
1119 u8 uuid[BTRFS_UUID_SIZE];
1120 int num_stripes;
1121 int ret;
1122 int i;
1124 logical = key->offset;
1125 length = btrfs_chunk_length(leaf, chunk);
1127 if (logical < BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE) {
1128 super_offset_diff = BTRFS_SUPER_INFO_OFFSET +
1129 BTRFS_SUPER_INFO_SIZE - logical;
1130 logical = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
1133 ce = find_first_cache_extent(&map_tree->cache_tree, logical);
1135 /* already mapped? */
1136 if (ce && ce->start <= logical && ce->start + ce->size > logical) {
1137 return 0;
1140 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
1141 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1142 if (!map)
1143 return -ENOMEM;
1145 map->ce.start = logical;
1146 map->ce.size = length - super_offset_diff;
1147 map->num_stripes = num_stripes;
1148 map->io_width = btrfs_chunk_io_width(leaf, chunk);
1149 map->io_align = btrfs_chunk_io_align(leaf, chunk);
1150 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
1151 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
1152 map->type = btrfs_chunk_type(leaf, chunk);
1153 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
1155 for (i = 0; i < num_stripes; i++) {
1156 map->stripes[i].physical =
1157 btrfs_stripe_offset_nr(leaf, chunk, i) +
1158 super_offset_diff;
1159 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
1160 read_extent_buffer(leaf, uuid, (unsigned long)
1161 btrfs_stripe_dev_uuid_nr(chunk, i),
1162 BTRFS_UUID_SIZE);
1163 map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
1164 NULL);
1165 if (!map->stripes[i].dev) {
1166 kfree(map);
1167 return -EIO;
1171 ret = insert_existing_cache_extent(&map_tree->cache_tree, &map->ce);
1172 BUG_ON(ret);
1174 return 0;
1177 static int fill_device_from_item(struct extent_buffer *leaf,
1178 struct btrfs_dev_item *dev_item,
1179 struct btrfs_device *device)
1181 unsigned long ptr;
1183 device->devid = btrfs_device_id(leaf, dev_item);
1184 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
1185 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
1186 device->type = btrfs_device_type(leaf, dev_item);
1187 device->io_align = btrfs_device_io_align(leaf, dev_item);
1188 device->io_width = btrfs_device_io_width(leaf, dev_item);
1189 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
1191 ptr = (unsigned long)btrfs_device_uuid(dev_item);
1192 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1194 return 0;
1197 static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
1199 struct btrfs_fs_devices *fs_devices;
1200 int ret;
1202 fs_devices = root->fs_info->fs_devices->seed;
1203 while (fs_devices) {
1204 if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
1205 ret = 0;
1206 goto out;
1208 fs_devices = fs_devices->seed;
1211 fs_devices = find_fsid(fsid);
1212 if (!fs_devices) {
1213 ret = -ENOENT;
1214 goto out;
1217 ret = btrfs_open_devices(fs_devices, O_RDONLY);
1218 if (ret)
1219 goto out;
1221 fs_devices->seed = root->fs_info->fs_devices->seed;
1222 root->fs_info->fs_devices->seed = fs_devices;
1223 out:
1224 return ret;
1227 static int read_one_dev(struct btrfs_root *root,
1228 struct extent_buffer *leaf,
1229 struct btrfs_dev_item *dev_item)
1231 struct btrfs_device *device;
1232 u64 devid;
1233 int ret = 0;
1234 u8 fs_uuid[BTRFS_UUID_SIZE];
1235 u8 dev_uuid[BTRFS_UUID_SIZE];
1237 devid = btrfs_device_id(leaf, dev_item);
1238 read_extent_buffer(leaf, dev_uuid,
1239 (unsigned long)btrfs_device_uuid(dev_item),
1240 BTRFS_UUID_SIZE);
1241 read_extent_buffer(leaf, fs_uuid,
1242 (unsigned long)btrfs_device_fsid(dev_item),
1243 BTRFS_UUID_SIZE);
1245 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
1246 ret = open_seed_devices(root, fs_uuid);
1247 if (ret)
1248 return ret;
1251 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1252 if (!device) {
1253 printk("warning devid %llu not found already\n",
1254 (unsigned long long)devid);
1255 device = kmalloc(sizeof(*device), GFP_NOFS);
1256 if (!device)
1257 return -ENOMEM;
1258 device->total_ios = 0;
1259 list_add(&device->dev_list,
1260 &root->fs_info->fs_devices->devices);
1263 fill_device_from_item(leaf, dev_item, device);
1264 device->dev_root = root->fs_info->dev_root;
1265 return ret;
1268 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
1270 struct btrfs_dev_item *dev_item;
1272 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
1273 dev_item);
1274 return read_one_dev(root, buf, dev_item);
1277 int btrfs_read_sys_array(struct btrfs_root *root)
1279 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1280 struct extent_buffer *sb = root->fs_info->sb_buffer;
1281 struct btrfs_disk_key *disk_key;
1282 struct btrfs_chunk *chunk;
1283 struct btrfs_key key;
1284 u32 num_stripes;
1285 u32 array_size;
1286 u32 len = 0;
1287 u8 *ptr;
1288 unsigned long sb_ptr;
1289 u32 cur;
1290 int ret;
1292 array_size = btrfs_super_sys_array_size(super_copy);
1295 * we do this loop twice, once for the device items and
1296 * once for all of the chunks. This way there are device
1297 * structs filled in for every chunk
1299 ptr = super_copy->sys_chunk_array;
1300 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
1301 cur = 0;
1303 while (cur < array_size) {
1304 disk_key = (struct btrfs_disk_key *)ptr;
1305 btrfs_disk_key_to_cpu(&key, disk_key);
1307 len = sizeof(*disk_key);
1308 ptr += len;
1309 sb_ptr += len;
1310 cur += len;
1312 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1313 chunk = (struct btrfs_chunk *)sb_ptr;
1314 ret = read_one_chunk(root, &key, sb, chunk);
1315 BUG_ON(ret);
1316 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
1317 len = btrfs_chunk_item_size(num_stripes);
1318 } else {
1319 BUG();
1321 ptr += len;
1322 sb_ptr += len;
1323 cur += len;
1325 return 0;
1328 int btrfs_read_chunk_tree(struct btrfs_root *root)
1330 struct btrfs_path *path;
1331 struct extent_buffer *leaf;
1332 struct btrfs_key key;
1333 struct btrfs_key found_key;
1334 int ret;
1335 int slot;
1337 root = root->fs_info->chunk_root;
1339 path = btrfs_alloc_path();
1340 if (!path)
1341 return -ENOMEM;
1343 /* first we search for all of the device items, and then we
1344 * read in all of the chunk items. This way we can create chunk
1345 * mappings that reference all of the devices that are afound
1347 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1348 key.offset = 0;
1349 key.type = 0;
1350 again:
1351 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1352 while(1) {
1353 leaf = path->nodes[0];
1354 slot = path->slots[0];
1355 if (slot >= btrfs_header_nritems(leaf)) {
1356 ret = btrfs_next_leaf(root, path);
1357 if (ret == 0)
1358 continue;
1359 if (ret < 0)
1360 goto error;
1361 break;
1363 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1364 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
1365 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
1366 break;
1367 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
1368 struct btrfs_dev_item *dev_item;
1369 dev_item = btrfs_item_ptr(leaf, slot,
1370 struct btrfs_dev_item);
1371 ret = read_one_dev(root, leaf, dev_item);
1372 BUG_ON(ret);
1374 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
1375 struct btrfs_chunk *chunk;
1376 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
1377 ret = read_one_chunk(root, &found_key, leaf, chunk);
1378 BUG_ON(ret);
1380 path->slots[0]++;
1382 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
1383 key.objectid = 0;
1384 btrfs_release_path(root, path);
1385 goto again;
1388 btrfs_free_path(path);
1389 ret = 0;
1390 error:
1391 return ret;
1394 struct list_head *btrfs_scanned_uuids(void)
1396 return &fs_uuids;