faster btrfsck
[btrfs-progs-unstable/devel.git] / disk-io.c
blob0d921f16c878f6ab4d0516d8a2431efda16cf18a
1 #define _XOPEN_SOURCE 600
2 #define __USE_XOPEN2K
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/types.h>
6 #include <sys/stat.h>
7 #include <fcntl.h>
8 #include <unistd.h>
9 #include "kerncompat.h"
10 #include "radix-tree.h"
11 #include "ctree.h"
12 #include "disk-io.h"
13 #include "transaction.h"
15 static int allocated_blocks = 0;
16 int cache_max = 10000;
18 struct dev_lookup {
19 u64 block_start;
20 u64 num_blocks;
21 u64 device_id;
22 int fd;
25 int btrfs_insert_dev_radix(struct btrfs_root *root,
26 int fd,
27 u64 device_id,
28 u64 block_start,
29 u64 num_blocks)
31 struct dev_lookup *lookup;
32 int ret;
34 lookup = malloc(sizeof(*lookup));
35 if (!lookup)
36 return -ENOMEM;
37 lookup->block_start = block_start;
38 lookup->num_blocks = num_blocks;
39 lookup->fd = fd;
40 lookup->device_id = device_id;
41 printf("inserting into dev radix %Lu %Lu\n", block_start, num_blocks);
43 ret = radix_tree_insert(&root->fs_info->dev_radix, block_start +
44 num_blocks - 1, lookup);
45 return ret;
48 int btrfs_map_bh_to_logical(struct btrfs_root *root, struct btrfs_buffer *bh,
49 u64 logical)
51 struct dev_lookup *lookup[2];
53 int ret;
55 root = root->fs_info->dev_root;
56 ret = radix_tree_gang_lookup(&root->fs_info->dev_radix,
57 (void **)lookup,
58 (unsigned long)logical,
59 ARRAY_SIZE(lookup));
60 if (ret == 0 || lookup[0]->block_start > logical ||
61 lookup[0]->block_start + lookup[0]->num_blocks <= logical) {
62 ret = -1;
63 goto out;
65 bh->fd = lookup[0]->fd;
66 bh->dev_blocknr = logical - lookup[0]->block_start;
67 ret = 0;
68 out:
69 return ret;
72 static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf)
74 if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
75 BUG();
76 if (memcmp(root->fs_info->disk_super->fsid, buf->node.header.fsid,
77 sizeof(buf->node.header.fsid)))
78 BUG();
79 return 0;
82 static int free_some_buffers(struct btrfs_root *root)
84 struct list_head *node, *next;
85 struct btrfs_buffer *b;
86 if (root->fs_info->cache_size < cache_max)
87 return 0;
88 list_for_each_safe(node, next, &root->fs_info->cache) {
89 b = list_entry(node, struct btrfs_buffer, cache);
90 if (b->count == 1) {
91 BUG_ON(!list_empty(&b->dirty));
92 list_del_init(&b->cache);
93 btrfs_block_release(root, b);
94 if (root->fs_info->cache_size < cache_max)
95 break;
98 return 0;
101 struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr)
103 struct btrfs_buffer *buf;
104 int ret;
106 buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize);
107 if (!buf)
108 return buf;
109 allocated_blocks++;
110 buf->blocknr = blocknr;
111 buf->count = 2;
112 INIT_LIST_HEAD(&buf->dirty);
113 free_some_buffers(root);
114 radix_tree_preload(GFP_KERNEL);
115 ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf);
116 radix_tree_preload_end();
117 list_add_tail(&buf->cache, &root->fs_info->cache);
118 root->fs_info->cache_size++;
119 if (ret) {
120 free(buf);
121 return NULL;
123 return buf;
126 struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr)
128 struct btrfs_buffer *buf;
129 buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
130 if (buf) {
131 buf->count++;
132 } else {
133 buf = alloc_tree_block(root, blocknr);
134 if (!buf) {
135 BUG();
136 return NULL;
139 return buf;
142 struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr)
144 struct btrfs_buffer *buf;
145 int ret;
146 buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
147 if (buf) {
148 buf->count++;
149 } else {
150 buf = alloc_tree_block(root, blocknr);
151 if (!buf)
152 return NULL;
153 btrfs_map_bh_to_logical(root, buf, blocknr);
154 ret = pread(buf->fd, &buf->node, root->blocksize,
155 buf->dev_blocknr * root->blocksize);
156 if (ret != root->blocksize) {
157 free(buf);
158 return NULL;
161 if (check_tree_block(root, buf))
162 BUG();
163 return buf;
166 int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
167 struct btrfs_buffer *buf)
169 if (!list_empty(&buf->dirty))
170 return 0;
171 list_add_tail(&buf->dirty, &root->fs_info->trans);
172 buf->count++;
173 return 0;
176 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
177 struct btrfs_buffer *buf)
179 if (!list_empty(&buf->dirty)) {
180 list_del_init(&buf->dirty);
181 btrfs_block_release(root, buf);
183 return 0;
186 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
187 struct btrfs_buffer *buf)
189 int ret;
191 if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
192 BUG();
193 btrfs_map_bh_to_logical(root, buf, buf->blocknr);
194 ret = pwrite(buf->fd, &buf->node, root->blocksize,
195 buf->dev_blocknr * root->blocksize);
196 if (ret != root->blocksize)
197 return ret;
198 return 0;
201 static int __commit_transaction(struct btrfs_trans_handle *trans, struct
202 btrfs_root *root)
204 struct btrfs_buffer *b;
205 int ret = 0;
206 int wret;
207 while(!list_empty(&root->fs_info->trans)) {
208 b = list_entry(root->fs_info->trans.next, struct btrfs_buffer,
209 dirty);
210 list_del_init(&b->dirty);
211 wret = write_tree_block(trans, root, b);
212 if (wret)
213 ret = wret;
214 btrfs_block_release(root, b);
216 return ret;
219 static int commit_tree_roots(struct btrfs_trans_handle *trans,
220 struct btrfs_fs_info *fs_info)
222 int ret;
223 u64 old_extent_block;
224 struct btrfs_root *tree_root = fs_info->tree_root;
225 struct btrfs_root *extent_root = fs_info->extent_root;
227 if (btrfs_super_device_root(fs_info->disk_super) !=
228 fs_info->dev_root->node->blocknr) {
229 btrfs_set_super_device_root(fs_info->disk_super,
230 fs_info->dev_root->node->blocknr);
232 while(1) {
233 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
234 if (old_extent_block == extent_root->node->blocknr)
235 break;
236 btrfs_set_root_blocknr(&extent_root->root_item,
237 extent_root->node->blocknr);
238 ret = btrfs_update_root(trans, tree_root,
239 &extent_root->root_key,
240 &extent_root->root_item);
241 BUG_ON(ret);
243 return 0;
246 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct
247 btrfs_root *root, struct btrfs_super_block *s)
249 int ret = 0;
250 struct btrfs_buffer *snap = root->commit_root;
251 struct btrfs_key snap_key;
253 if (root->commit_root == root->node)
254 return 0;
256 memcpy(&snap_key, &root->root_key, sizeof(snap_key));
257 root->root_key.offset++;
259 btrfs_set_root_blocknr(&root->root_item, root->node->blocknr);
260 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
261 &root->root_key, &root->root_item);
262 BUG_ON(ret);
264 ret = commit_tree_roots(trans, root->fs_info);
265 BUG_ON(ret);
267 ret = __commit_transaction(trans, root);
268 BUG_ON(ret);
270 write_ctree_super(trans, root, s);
271 btrfs_finish_extent_commit(trans, root->fs_info->extent_root);
272 btrfs_finish_extent_commit(trans, root->fs_info->tree_root);
274 root->commit_root = root->node;
275 root->node->count++;
276 ret = btrfs_drop_snapshot(trans, root, snap);
277 BUG_ON(ret);
279 ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key);
280 BUG_ON(ret);
281 root->fs_info->generation = root->root_key.offset + 1;
283 return ret;
286 static int __setup_root(struct btrfs_super_block *super,
287 struct btrfs_root *root,
288 struct btrfs_fs_info *fs_info,
289 u64 objectid, int fp)
291 root->node = NULL;
292 root->commit_root = NULL;
293 root->blocksize = btrfs_super_blocksize(super);
294 root->ref_cows = 0;
295 root->fs_info = fs_info;
296 memset(&root->root_key, 0, sizeof(root->root_key));
297 memset(&root->root_item, 0, sizeof(root->root_item));
298 root->root_key.objectid = objectid;
299 return 0;
302 static int find_and_setup_root(struct btrfs_super_block *super,
303 struct btrfs_root *tree_root,
304 struct btrfs_fs_info *fs_info,
305 u64 objectid,
306 struct btrfs_root *root, int fp)
308 int ret;
310 __setup_root(super, root, fs_info, objectid, fp);
311 ret = btrfs_find_last_root(tree_root, objectid,
312 &root->root_item, &root->root_key);
313 BUG_ON(ret);
315 root->node = read_tree_block(root,
316 btrfs_root_blocknr(&root->root_item));
317 BUG_ON(!root->node);
318 return 0;
321 int btrfs_open_disk(struct btrfs_root *root, u64 device_id,
322 u64 block_start, u64 num_blocks,
323 char *filename, int name_len)
325 char *null_filename;
326 int fd;
327 int ret;
329 null_filename = malloc(name_len + 1);
330 if (!null_filename)
331 return -ENOMEM;
332 memcpy(null_filename, filename, name_len);
333 null_filename[name_len] = '\0';
335 fd = open(null_filename, O_RDWR);
336 if (fd < 0) {
337 ret = -1;
338 goto out;
341 posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
342 posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE);
343 ret = btrfs_insert_dev_radix(root, fd, device_id,
344 block_start, num_blocks);
345 BUG_ON(ret);
346 ret = 0;
347 out:
348 free(null_filename);
349 return ret;
352 static int read_device_info(struct btrfs_root *root)
354 struct btrfs_path path;
355 int ret;
356 struct btrfs_key key;
357 struct btrfs_leaf *leaf;
358 struct btrfs_device_item *dev_item;
359 int nritems;
360 int slot;
362 root = root->fs_info->dev_root;
364 btrfs_init_path(&path);
365 key.objectid = 0;
366 key.offset = 0;
367 key.flags = 0;
368 btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
370 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
371 leaf = &path.nodes[0]->leaf;
372 nritems = btrfs_header_nritems(&leaf->header);
373 while(1) {
374 slot = path.slots[0];
375 if (slot >= nritems) {
376 ret = btrfs_next_leaf(root, &path);
377 if (ret)
378 break;
379 leaf = &path.nodes[0]->leaf;
380 nritems = btrfs_header_nritems(&leaf->header);
381 slot = path.slots[0];
383 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
384 if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) {
385 path.slots[0]++;
386 continue;
388 dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item);
389 if (btrfs_device_id(dev_item) !=
390 btrfs_super_device_id(root->fs_info->disk_super)) {
391 printf("found key %Lu %Lu\n", key.objectid, key.offset);
392 ret = btrfs_open_disk(root, btrfs_device_id(dev_item),
393 key.objectid, key.offset,
394 (char *)(dev_item + 1),
395 btrfs_device_pathlen(dev_item));
396 BUG_ON(ret);
398 path.slots[0]++;
400 btrfs_release_path(root, &path);
401 return 0;
404 struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super)
406 int fp;
408 fp = open(filename, O_CREAT | O_RDWR, 0600);
409 if (fp < 0) {
410 return NULL;
412 return open_ctree_fd(fp, super);
415 struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super)
417 struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
418 struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
419 struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
420 struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root));
421 struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
422 struct dev_lookup *dev_lookup;
423 int ret;
425 INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL);
426 INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL);
427 INIT_RADIX_TREE(&fs_info->dev_radix, GFP_KERNEL);
428 INIT_LIST_HEAD(&fs_info->trans);
429 INIT_LIST_HEAD(&fs_info->cache);
430 fs_info->cache_size = 0;
431 fs_info->fp = fp;
432 fs_info->running_transaction = NULL;
433 fs_info->fs_root = root;
434 fs_info->tree_root = tree_root;
435 fs_info->extent_root = extent_root;
436 fs_info->dev_root = dev_root;
437 fs_info->last_inode_alloc = 0;
438 fs_info->last_inode_alloc_dirid = 0;
439 fs_info->disk_super = super;
440 memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
441 memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
443 ret = pread(fp, super, sizeof(struct btrfs_super_block),
444 BTRFS_SUPER_INFO_OFFSET);
445 if (ret == 0 || btrfs_super_root(super) == 0) {
446 BUG();
447 return NULL;
449 BUG_ON(ret < 0);
450 __setup_root(super, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID, fp);
452 dev_lookup = malloc(sizeof(*dev_lookup));
453 dev_lookup->fd = fp;
454 dev_lookup->device_id = btrfs_super_device_id(super);
455 dev_lookup->block_start = btrfs_super_device_block_start(super);
456 dev_lookup->num_blocks = btrfs_super_device_num_blocks(super);
457 ret = radix_tree_insert(&fs_info->dev_radix,
458 dev_lookup->block_start +
459 dev_lookup->num_blocks - 1, dev_lookup);
460 BUG_ON(ret);
462 dev_root->node = read_tree_block(dev_root,
463 btrfs_super_device_root(super));
465 ret = read_device_info(dev_root);
466 BUG_ON(ret);
468 __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp);
469 tree_root->node = read_tree_block(tree_root, btrfs_super_root(super));
470 BUG_ON(!tree_root->node);
472 ret = find_and_setup_root(super, tree_root, fs_info,
473 BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp);
474 BUG_ON(ret);
476 ret = find_and_setup_root(super, tree_root, fs_info,
477 BTRFS_FS_TREE_OBJECTID, root, fp);
478 BUG_ON(ret);
480 root->commit_root = root->node;
481 root->node->count++;
482 root->ref_cows = 1;
483 root->fs_info->generation = root->root_key.offset + 1;
484 return root;
487 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
488 *root, struct btrfs_super_block *s)
490 int ret;
491 btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr);
492 ret = pwrite(root->fs_info->fp, s, sizeof(*s),
493 BTRFS_SUPER_INFO_OFFSET);
494 if (ret != sizeof(*s)) {
495 fprintf(stderr, "failed to write new super block err %d\n", ret);
496 return ret;
498 return 0;
501 static int drop_cache(struct btrfs_root *root)
503 while(!list_empty(&root->fs_info->cache)) {
504 struct btrfs_buffer *b = list_entry(root->fs_info->cache.next,
505 struct btrfs_buffer,
506 cache);
507 list_del_init(&b->cache);
508 btrfs_block_release(root, b);
510 return 0;
513 static int free_dev_radix(struct btrfs_fs_info *fs_info)
515 struct dev_lookup *lookup[8];
516 int ret;
517 int i;
518 while(1) {
519 ret = radix_tree_gang_lookup(&fs_info->dev_radix,
520 (void **)lookup, 0,
521 ARRAY_SIZE(lookup));
522 if (!ret)
523 break;
524 for (i = 0; i < ret; i++) {
525 if (lookup[i]->device_id !=
526 btrfs_super_device_id(fs_info->disk_super))
527 close(lookup[i]->fd);
528 radix_tree_delete(&fs_info->dev_radix,
529 lookup[i]->block_start +
530 lookup[i]->num_blocks - 1);
531 free(lookup[i]);
534 return 0;
537 int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s)
539 int ret;
540 struct btrfs_trans_handle *trans;
542 trans = root->fs_info->running_transaction;
543 btrfs_commit_transaction(trans, root, s);
544 ret = commit_tree_roots(trans, root->fs_info);
545 BUG_ON(ret);
546 ret = __commit_transaction(trans, root);
547 BUG_ON(ret);
548 write_ctree_super(trans, root, s);
549 drop_cache(root);
550 BUG_ON(!list_empty(&root->fs_info->trans));
552 free_dev_radix(root->fs_info);
553 close(root->fs_info->fp);
554 if (root->node)
555 btrfs_block_release(root, root->node);
556 if (root->fs_info->extent_root->node)
557 btrfs_block_release(root->fs_info->extent_root,
558 root->fs_info->extent_root->node);
559 if (root->fs_info->tree_root->node)
560 btrfs_block_release(root->fs_info->tree_root,
561 root->fs_info->tree_root->node);
562 if (root->fs_info->dev_root->node)
563 btrfs_block_release(root->fs_info->dev_root,
564 root->fs_info->dev_root->node);
565 btrfs_block_release(root, root->commit_root);
566 free(root);
567 printf("on close %d blocks are allocated\n", allocated_blocks);
568 return 0;
571 void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf)
573 buf->count--;
574 if (buf->count < 0)
575 BUG();
576 if (buf->count == 0) {
577 BUG_ON(!list_empty(&buf->cache));
578 BUG_ON(!list_empty(&buf->dirty));
579 if (!radix_tree_lookup(&root->fs_info->cache_radix,
580 buf->blocknr))
581 BUG();
582 radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr);
583 memset(buf, 0, sizeof(*buf));
584 free(buf);
585 BUG_ON(allocated_blocks == 0);
586 allocated_blocks--;
587 BUG_ON(root->fs_info->cache_size == 0);
588 root->fs_info->cache_size--;