Switch to byte granular allocations
[btrfs-progs-unstable/devel.git] / disk-io.c
blob72b97c8a590b12c20cbee2e6bbf2b3956cdb1e1e
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
20 #define __USE_XOPEN2K
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include "kerncompat.h"
28 #include "radix-tree.h"
29 #include "ctree.h"
30 #include "disk-io.h"
31 #include "transaction.h"
32 #include "crc32c.h"
34 static u64 allocated_bytes = 0;
35 int cache_max = 10000;
37 int btrfs_map_bh_to_logical(struct btrfs_root *root, struct btrfs_buffer *bh,
38 u64 logical)
40 bh->fd = root->fs_info->fp;
41 bh->dev_bytenr = logical;
42 return 0;
45 static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf)
47 if (buf->bytenr != btrfs_header_bytenr(&buf->node.header))
48 BUG();
49 if (memcmp(root->fs_info->disk_super->fsid, buf->node.header.fsid,
50 sizeof(buf->node.header.fsid)))
51 BUG();
52 return 0;
55 static int free_some_buffers(struct btrfs_root *root)
57 struct list_head *node, *next;
58 struct btrfs_buffer *b;
59 if (root->fs_info->cache_size < cache_max)
60 return 0;
61 list_for_each_safe(node, next, &root->fs_info->cache) {
62 b = list_entry(node, struct btrfs_buffer, cache);
63 if (b->count == 1) {
64 BUG_ON(!list_empty(&b->dirty));
65 list_del_init(&b->cache);
66 btrfs_block_release(root, b);
67 if (root->fs_info->cache_size < cache_max)
68 break;
71 return 0;
74 struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 bytenr,
75 u32 blocksize)
77 struct btrfs_buffer *buf;
78 int ret;
80 buf = malloc(sizeof(struct btrfs_buffer) + blocksize);
81 if (!buf)
82 return buf;
83 allocated_bytes += blocksize;
85 buf->bytenr = bytenr;
86 buf->count = 2;
87 buf->size = blocksize;
89 INIT_LIST_HEAD(&buf->dirty);
90 free_some_buffers(root);
91 radix_tree_preload(GFP_KERNEL);
92 ret = radix_tree_insert(&root->fs_info->cache_radix, bytenr, buf);
93 radix_tree_preload_end();
94 list_add_tail(&buf->cache, &root->fs_info->cache);
95 root->fs_info->cache_size += blocksize;
96 if (ret) {
97 free(buf);
98 return NULL;
100 return buf;
103 struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 bytenr,
104 u32 blocksize)
106 struct btrfs_buffer *buf;
107 buf = radix_tree_lookup(&root->fs_info->cache_radix, bytenr);
108 if (buf) {
109 buf->count++;
110 } else {
111 buf = alloc_tree_block(root, bytenr, blocksize);
112 if (!buf) {
113 BUG();
114 return NULL;
117 return buf;
120 struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
121 u32 blocksize)
123 struct btrfs_buffer *buf;
124 int ret;
125 buf = radix_tree_lookup(&root->fs_info->cache_radix, bytenr);
126 if (buf) {
127 buf->count++;
128 if (check_tree_block(root, buf))
129 BUG();
130 } else {
131 buf = alloc_tree_block(root, bytenr, blocksize);
132 if (!buf)
133 return NULL;
134 btrfs_map_bh_to_logical(root, buf, bytenr);
135 ret = pread(buf->fd, &buf->node, blocksize,
136 buf->dev_bytenr);
137 if (ret != blocksize) {
138 free(buf);
139 return NULL;
141 if (check_tree_block(root, buf))
142 BUG();
144 return buf;
147 int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
148 struct btrfs_buffer *buf)
150 if (!list_empty(&buf->dirty))
151 return 0;
152 list_add_tail(&buf->dirty, &root->fs_info->trans);
153 buf->count++;
154 if (check_tree_block(root, buf))
155 BUG();
156 return 0;
159 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
160 struct btrfs_buffer *buf)
162 if (!list_empty(&buf->dirty)) {
163 list_del_init(&buf->dirty);
164 btrfs_block_release(root, buf);
166 return 0;
169 int btrfs_csum_node(struct btrfs_root *root, struct btrfs_node *node)
171 u32 crc;
172 size_t len = btrfs_level_size(root, btrfs_header_level(&node->header)) -
173 BTRFS_CSUM_SIZE;
175 crc = crc32c(0, (char *)(node) + BTRFS_CSUM_SIZE, len);
176 memcpy(node->header.csum, &crc, BTRFS_CRC32_SIZE);
177 return 0;
180 int btrfs_csum_super(struct btrfs_root *root, struct btrfs_super_block *super)
182 u32 crc;
183 char block[root->sectorsize];
184 size_t len = root->sectorsize - BTRFS_CSUM_SIZE;
186 memset(block, 0, root->sectorsize);
187 memcpy(block, super, sizeof(*super));
189 crc = crc32c(0, block + BTRFS_CSUM_SIZE, len);
190 memcpy(super->csum, &crc, BTRFS_CRC32_SIZE);
191 return 0;
194 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
195 struct btrfs_buffer *buf)
197 int ret;
199 if (buf->bytenr != btrfs_header_bytenr(&buf->node.header))
200 BUG();
201 btrfs_map_bh_to_logical(root, buf, buf->bytenr);
202 if (check_tree_block(root, buf))
203 BUG();
205 btrfs_csum_node(root, &buf->node);
207 ret = pwrite(buf->fd, &buf->node, buf->size,
208 buf->dev_bytenr);
209 if (ret != buf->size)
210 return ret;
211 return 0;
214 static int __commit_transaction(struct btrfs_trans_handle *trans, struct
215 btrfs_root *root)
217 struct btrfs_buffer *b;
218 int ret = 0;
219 int wret;
220 while(!list_empty(&root->fs_info->trans)) {
221 b = list_entry(root->fs_info->trans.next, struct btrfs_buffer,
222 dirty);
223 list_del_init(&b->dirty);
224 wret = write_tree_block(trans, root, b);
225 if (wret)
226 ret = wret;
227 btrfs_block_release(root, b);
229 return ret;
232 static int commit_tree_roots(struct btrfs_trans_handle *trans,
233 struct btrfs_fs_info *fs_info)
235 int ret;
236 u64 old_extent_bytenr;
237 struct btrfs_root *tree_root = fs_info->tree_root;
238 struct btrfs_root *extent_root = fs_info->extent_root;
240 btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
241 while(1) {
242 old_extent_bytenr = btrfs_root_bytenr(&extent_root->root_item);
243 if (old_extent_bytenr == extent_root->node->bytenr)
244 break;
245 btrfs_set_root_bytenr(&extent_root->root_item,
246 extent_root->node->bytenr);
247 extent_root->root_item.level =
248 btrfs_header_level(&extent_root->node->node.header);
249 ret = btrfs_update_root(trans, tree_root,
250 &extent_root->root_key,
251 &extent_root->root_item);
252 BUG_ON(ret);
253 btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
255 return 0;
258 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct
259 btrfs_root *root, struct btrfs_super_block *s)
261 int ret = 0;
262 struct btrfs_buffer *snap = root->commit_root;
263 struct btrfs_key snap_key;
265 if (root->commit_root == root->node)
266 return 0;
268 memcpy(&snap_key, &root->root_key, sizeof(snap_key));
269 root->root_key.offset++;
271 btrfs_set_root_bytenr(&root->root_item, root->node->bytenr);
272 root->root_item.level =
273 btrfs_header_level(&root->node->node.header);
274 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
275 &root->root_key, &root->root_item);
276 BUG_ON(ret);
278 ret = commit_tree_roots(trans, root->fs_info);
279 BUG_ON(ret);
281 ret = __commit_transaction(trans, root);
282 BUG_ON(ret);
284 write_ctree_super(trans, root, s);
285 btrfs_finish_extent_commit(trans, root->fs_info->extent_root);
286 btrfs_finish_extent_commit(trans, root->fs_info->tree_root);
288 root->commit_root = root->node;
289 root->node->count++;
290 ret = btrfs_drop_snapshot(trans, root, snap);
291 BUG_ON(ret);
293 ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key);
294 BUG_ON(ret);
295 root->fs_info->generation = root->root_key.offset + 1;
297 return ret;
300 static int __setup_root(struct btrfs_super_block *super,
301 struct btrfs_root *root,
302 struct btrfs_fs_info *fs_info,
303 u64 objectid, int fp)
305 root->node = NULL;
306 root->commit_root = NULL;
307 root->sectorsize = btrfs_super_sectorsize(super);
308 root->nodesize = btrfs_super_nodesize(super);
309 root->leafsize = btrfs_super_leafsize(super);
310 root->ref_cows = 0;
311 root->fs_info = fs_info;
312 memset(&root->root_key, 0, sizeof(root->root_key));
313 memset(&root->root_item, 0, sizeof(root->root_item));
314 root->root_key.objectid = objectid;
315 return 0;
318 struct btrfs_buffer *read_root_block(struct btrfs_root *root, u64 bytenr,
319 u8 level)
321 struct btrfs_buffer *node;
322 u32 size = btrfs_level_size(root, level);
324 node = read_tree_block(root, bytenr, size);
325 BUG_ON(!node);
326 return node;
329 static int find_and_setup_root(struct btrfs_super_block *super,
330 struct btrfs_root *tree_root,
331 struct btrfs_fs_info *fs_info,
332 u64 objectid,
333 struct btrfs_root *root, int fp)
335 int ret;
337 __setup_root(super, root, fs_info, objectid, fp);
338 ret = btrfs_find_last_root(tree_root, objectid,
339 &root->root_item, &root->root_key);
340 BUG_ON(ret);
341 root->node = read_root_block(root,
342 btrfs_root_bytenr(&root->root_item),
343 root->root_item.level);
344 BUG_ON(!root->node);
345 return 0;
348 struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super)
350 int fp;
352 fp = open(filename, O_CREAT | O_RDWR, 0600);
353 if (fp < 0) {
354 return NULL;
356 return open_ctree_fd(fp, super);
359 struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super)
361 struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
362 struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
363 struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
364 struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
365 int ret;
367 INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL);
368 INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
369 INIT_LIST_HEAD(&fs_info->trans);
370 INIT_LIST_HEAD(&fs_info->cache);
371 pending_tree_init(&fs_info->pending_tree);
372 pending_tree_init(&fs_info->pinned_tree);
373 pending_tree_init(&fs_info->del_pending);
374 fs_info->cache_size = 0;
375 fs_info->fp = fp;
376 fs_info->running_transaction = NULL;
377 fs_info->fs_root = root;
378 fs_info->tree_root = tree_root;
379 fs_info->extent_root = extent_root;
380 fs_info->last_inode_alloc = 0;
381 fs_info->last_inode_alloc_dirid = 0;
382 fs_info->disk_super = super;
383 memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
385 ret = pread(fp, super, sizeof(struct btrfs_super_block),
386 BTRFS_SUPER_INFO_OFFSET);
387 if (ret == 0 || btrfs_super_root(super) == 0) {
388 BUG();
389 return NULL;
391 BUG_ON(ret < 0);
393 __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp);
394 tree_root->node = read_root_block(tree_root, btrfs_super_root(super),
395 btrfs_super_root_level(super));
396 BUG_ON(!tree_root->node);
398 ret = find_and_setup_root(super, tree_root, fs_info,
399 BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp);
400 BUG_ON(ret);
402 ret = find_and_setup_root(super, tree_root, fs_info,
403 BTRFS_FS_TREE_OBJECTID, root, fp);
404 BUG_ON(ret);
406 root->commit_root = root->node;
407 root->node->count++;
408 root->ref_cows = 1;
409 root->fs_info->generation = root->root_key.offset + 1;
410 btrfs_read_block_groups(root);
411 return root;
414 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
415 *root, struct btrfs_super_block *s)
417 int ret;
419 btrfs_set_super_root(s, root->fs_info->tree_root->node->bytenr);
420 btrfs_set_super_root_level(s,
421 btrfs_header_level(&root->fs_info->tree_root->node->node.header));
422 btrfs_csum_super(root, s);
424 ret = pwrite(root->fs_info->fp, s, sizeof(*s),
425 BTRFS_SUPER_INFO_OFFSET);
426 if (ret != sizeof(*s)) {
427 fprintf(stderr, "failed to write new super block err %d\n", ret);
428 return ret;
430 return 0;
433 static int drop_cache(struct btrfs_root *root)
435 while(!list_empty(&root->fs_info->cache)) {
436 struct btrfs_buffer *b = list_entry(root->fs_info->cache.next,
437 struct btrfs_buffer,
438 cache);
439 list_del_init(&b->cache);
440 btrfs_block_release(root, b);
442 return 0;
445 int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s)
447 int ret;
448 struct btrfs_trans_handle *trans;
450 trans = root->fs_info->running_transaction;
451 btrfs_commit_transaction(trans, root, s);
452 ret = commit_tree_roots(trans, root->fs_info);
453 BUG_ON(ret);
454 ret = __commit_transaction(trans, root);
455 BUG_ON(ret);
456 write_ctree_super(trans, root, s);
457 drop_cache(root);
458 BUG_ON(!list_empty(&root->fs_info->trans));
460 btrfs_free_block_groups(root->fs_info);
461 close(root->fs_info->fp);
462 if (root->node)
463 btrfs_block_release(root, root->node);
464 if (root->fs_info->extent_root->node)
465 btrfs_block_release(root->fs_info->extent_root,
466 root->fs_info->extent_root->node);
467 if (root->fs_info->tree_root->node)
468 btrfs_block_release(root->fs_info->tree_root,
469 root->fs_info->tree_root->node);
470 btrfs_block_release(root, root->commit_root);
471 free(root);
472 printf("on close %llu blocks are allocated\n",
473 (unsigned long long)allocated_bytes);
474 return 0;
477 void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf)
479 buf->count--;
480 if (buf->count < 0)
481 BUG();
482 if (buf->count == 0) {
483 BUG_ON(!list_empty(&buf->cache));
484 BUG_ON(!list_empty(&buf->dirty));
485 if (!radix_tree_lookup(&root->fs_info->cache_radix,
486 buf->bytenr))
487 BUG();
489 radix_tree_delete(&root->fs_info->cache_radix, buf->bytenr);
490 BUG_ON(allocated_bytes == 0);
491 allocated_bytes -= buf->size;
492 BUG_ON(root->fs_info->cache_size == 0);
493 root->fs_info->cache_size -= buf->size;
495 memset(buf, 0, sizeof(*buf));
496 free(buf);