i386 fixes from axboe
[btrfs-progs-unstable.git] / btrfsck.c
blobeb8e38e0a45759f4cece0486b5fa3b69636d763a
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 500
20 #include <stdio.h>
21 #include <stdlib.h>
22 #define __USE_GNU
23 #include <fcntl.h>
24 #include "kerncompat.h"
25 #include "radix-tree.h"
26 #include "ctree.h"
27 #include "disk-io.h"
28 #include "print-tree.h"
29 #include "transaction.h"
30 #include "bit-radix.h"
32 static u64 blocks_used = 0;
33 static u64 total_csum_bytes = 0;
34 static u64 total_btree_blocks = 0;
35 static u64 btree_space_waste = 0;
37 struct extent_record {
38 struct btrfs_disk_key parent_key;
39 u64 start;
40 u64 nr;
41 u64 owner;
42 u32 refs;
43 u32 extent_item_refs;
44 int checked;
47 static int check_node(struct btrfs_root *root,
48 struct btrfs_disk_key *parent_key,
49 struct btrfs_node *node)
51 int i;
52 u32 nritems = btrfs_header_nritems(&node->header);
54 if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
55 return 1;
56 if (parent_key->flags) {
57 if (memcmp(parent_key, &node->ptrs[0].key,
58 sizeof(struct btrfs_disk_key)))
59 return 1;
61 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
62 struct btrfs_key cpukey;
63 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
64 if (btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0)
65 return 1;
67 return 0;
70 static int check_leaf(struct btrfs_root *root,
71 struct btrfs_disk_key *parent_key,
72 struct btrfs_leaf *leaf)
74 int i;
75 u32 nritems = btrfs_header_nritems(&leaf->header);
77 if (btrfs_header_level(&leaf->header) != 0) {
78 fprintf(stderr, "leaf is not a leaf %llu\n",
79 (unsigned long long)btrfs_header_blocknr(&leaf->header));
80 return 1;
82 if (btrfs_leaf_free_space(root, leaf) < 0) {
83 fprintf(stderr, "leaf free space incorrect %llu %d\n",
84 (unsigned long long)btrfs_header_blocknr(&leaf->header),
85 btrfs_leaf_free_space(root, leaf));
86 return 1;
89 if (nritems == 0)
90 return 0;
92 if (parent_key->flags && memcmp(parent_key, &leaf->items[0].key,
93 sizeof(struct btrfs_disk_key))) {
94 fprintf(stderr, "leaf parent key incorrect %llu\n",
95 (unsigned long long)btrfs_header_blocknr(&leaf->header));
96 return 1;
98 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
99 struct btrfs_key cpukey;
100 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
101 if (btrfs_comp_keys(&leaf->items[i].key,
102 &cpukey) >= 0)
103 return 1;
104 if (btrfs_item_offset(leaf->items + i) !=
105 btrfs_item_end(leaf->items + i + 1))
106 return 1;
107 if (i == 0) {
108 if (btrfs_item_offset(leaf->items + i) +
109 btrfs_item_size(leaf->items + i) !=
110 BTRFS_LEAF_DATA_SIZE(root))
111 return 1;
114 return 0;
117 static int maybe_free_extent_rec(struct radix_tree_root *extent_radix,
118 struct extent_record *rec)
120 if (rec->checked && rec->extent_item_refs == rec->refs &&
121 rec->refs > 0) {
122 radix_tree_delete(extent_radix, rec->start);
123 free(rec);
125 return 0;
128 static int check_block(struct btrfs_root *root,
129 struct radix_tree_root *extent_radix,
130 struct btrfs_buffer *buf)
132 struct extent_record *rec;
133 int ret = 1;
135 rec = radix_tree_lookup(extent_radix, buf->blocknr);
136 if (!rec)
137 return 1;
138 if (btrfs_is_leaf(&buf->node)) {
139 ret = check_leaf(root, &rec->parent_key, &buf->leaf);
140 } else {
141 ret = check_node(root, &rec->parent_key, &buf->node);
143 rec->checked = 1;
144 if (!ret)
145 maybe_free_extent_rec(extent_radix, rec);
146 return ret;
149 static int add_extent_rec(struct radix_tree_root *extent_radix,
150 struct btrfs_disk_key *parent_key,
151 u64 ref, u64 start, u64 nr, u64 owner,
152 u32 extent_item_refs, int inc_ref, int set_checked)
154 struct extent_record *rec;
155 int ret = 0;
156 rec = radix_tree_lookup(extent_radix, start);
157 if (rec) {
158 if (inc_ref)
159 rec->refs++;
160 if (start != rec->start) {
161 fprintf(stderr, "warning, start mismatch %llu %llu\n",
162 (unsigned long long)rec->start,
163 (unsigned long long)start);
164 ret = 1;
166 if (extent_item_refs) {
167 if (rec->extent_item_refs) {
168 fprintf(stderr, "block %llu rec extent_item_refs %u, passed %u\n", (unsigned long long)start, rec->extent_item_refs, extent_item_refs);
170 rec->extent_item_refs = extent_item_refs;
172 if (set_checked)
173 rec->checked = 1;
174 maybe_free_extent_rec(extent_radix, rec);
175 return ret;
177 rec = malloc(sizeof(*rec));
178 if (start == 0)
179 extent_item_refs = 0;
180 rec->start = start;
181 rec->nr = nr;
182 rec->owner = owner;
183 rec->checked = 0;
185 if (inc_ref)
186 rec->refs = 1;
187 else
188 rec->refs = 0;
190 if (extent_item_refs)
191 rec->extent_item_refs = extent_item_refs;
192 else
193 rec->extent_item_refs = 0;
195 if (parent_key)
196 memcpy(&rec->parent_key, parent_key, sizeof(*parent_key));
197 else
198 memset(&rec->parent_key, 0, sizeof(*parent_key));
200 ret = radix_tree_insert(extent_radix, start, rec);
201 BUG_ON(ret);
202 blocks_used += nr;
203 if (set_checked)
204 rec->checked = 1;
205 return ret;
208 static int add_pending(struct radix_tree_root *pending,
209 struct radix_tree_root *seen, u64 blocknr)
211 if (test_radix_bit(seen, blocknr))
212 return -EEXIST;
213 set_radix_bit(pending, blocknr);
214 set_radix_bit(seen, blocknr);
215 return 0;
217 static int pick_next_pending(struct radix_tree_root *pending,
218 struct radix_tree_root *reada,
219 struct radix_tree_root *nodes,
220 u64 last, unsigned long *bits, int bits_nr,
221 int *reada_bits)
223 unsigned long node_start = last;
224 int ret;
225 ret = find_first_radix_bit(reada, bits, 0, 1);
226 if (ret) {
227 *reada_bits = 1;
228 return ret;
230 *reada_bits = 0;
231 if (node_start > 8)
232 node_start -= 8;
233 ret = find_first_radix_bit(nodes, bits, node_start, bits_nr);
234 if (!ret)
235 ret = find_first_radix_bit(nodes, bits, 0, bits_nr);
236 if (ret) {
237 if (bits_nr - ret > 8) {
238 int ret2;
239 u64 sequential;
240 ret2 = find_first_radix_bit(pending, bits + ret,
241 bits[0], bits_nr - ret);
242 sequential = bits[0];
243 while(ret2 > 0) {
244 if (bits[ret] - sequential > 8)
245 break;
246 sequential = bits[ret];
247 ret++;
248 ret2--;
251 return ret;
253 return find_first_radix_bit(pending, bits, 0, bits_nr);
255 static struct btrfs_buffer reada_buf;
257 static int run_next_block(struct btrfs_root *root,
258 unsigned long *bits,
259 int bits_nr,
260 u64 *last,
261 struct radix_tree_root *pending,
262 struct radix_tree_root *seen,
263 struct radix_tree_root *reada,
264 struct radix_tree_root *nodes,
265 struct radix_tree_root *extent_radix)
267 struct btrfs_buffer *buf;
268 u64 blocknr;
269 int ret;
270 int i;
271 int nritems;
272 struct btrfs_leaf *leaf;
273 struct btrfs_node *node;
274 struct btrfs_disk_key *disk_key;
275 int reada_bits;
277 u64 last_block = 0;
278 ret = pick_next_pending(pending, reada, nodes, *last, bits,
279 bits_nr, &reada_bits);
280 if (ret == 0) {
281 return 1;
283 if (!reada_bits) {
284 for(i = 0; i < ret; i++) {
285 u64 offset;
286 set_radix_bit(reada, bits[i]);
287 btrfs_map_bh_to_logical(root, &reada_buf, bits[i]);
288 offset = reada_buf.dev_blocknr * root->blocksize;
289 last_block = bits[i];
290 readahead(reada_buf.fd, offset, root->blocksize);
293 *last = bits[0];
294 blocknr = bits[0];
295 clear_radix_bit(pending, blocknr);
296 clear_radix_bit(reada, blocknr);
297 clear_radix_bit(nodes, blocknr);
298 buf = read_tree_block(root, blocknr);
299 nritems = btrfs_header_nritems(&buf->node.header);
300 ret = check_block(root, extent_radix, buf);
301 if (ret) {
302 fprintf(stderr, "bad block %llu\n",
303 (unsigned long long)blocknr);
305 if (btrfs_is_leaf(&buf->node)) {
306 leaf = &buf->leaf;
307 btree_space_waste += btrfs_leaf_free_space(root, leaf);
308 for (i = 0; i < nritems; i++) {
309 struct btrfs_file_extent_item *fi;
310 disk_key = &leaf->items[i].key;
311 if (btrfs_disk_key_type(disk_key) ==
312 BTRFS_EXTENT_ITEM_KEY) {
313 struct btrfs_key found;
314 struct btrfs_extent_item *ei;
315 btrfs_disk_key_to_cpu(&found,
316 &leaf->items[i].key);
317 ei = btrfs_item_ptr(leaf, i,
318 struct btrfs_extent_item);
319 add_extent_rec(extent_radix, NULL, 0,
320 found.objectid,
321 found.offset,
322 btrfs_extent_owner(ei),
323 btrfs_extent_refs(ei), 0, 0);
324 continue;
326 if (btrfs_disk_key_type(disk_key) ==
327 BTRFS_CSUM_ITEM_KEY) {
328 total_csum_bytes +=
329 btrfs_item_size(leaf->items + i);
330 continue;
332 if (btrfs_disk_key_type(disk_key) ==
333 BTRFS_BLOCK_GROUP_ITEM_KEY) {
334 struct btrfs_block_group_item *bi;
335 bi = btrfs_item_ptr(leaf, i,
336 struct btrfs_block_group_item);
337 #if 0
338 fprintf(stderr,"block group %Lu %Lu used %Lu ",
339 btrfs_disk_key_objectid(disk_key),
340 btrfs_disk_key_offset(disk_key),
341 btrfs_block_group_used(bi));
342 fprintf(stderr, "flags %x\n", bi->flags);
343 #endif
344 continue;
346 if (btrfs_disk_key_type(&leaf->items[i].key) !=
347 BTRFS_EXTENT_DATA_KEY)
348 continue;
349 fi = btrfs_item_ptr(leaf, i,
350 struct btrfs_file_extent_item);
351 if (btrfs_file_extent_type(fi) !=
352 BTRFS_FILE_EXTENT_REG)
353 continue;
354 if (btrfs_file_extent_disk_blocknr(fi) == 0)
355 continue;
356 ret = add_extent_rec(extent_radix, NULL, blocknr,
357 btrfs_file_extent_disk_blocknr(fi),
358 btrfs_file_extent_disk_num_blocks(fi),
359 btrfs_disk_key_objectid(&leaf->items[i].key),
360 0, 1, 1);
361 BUG_ON(ret);
363 } else {
364 int level;
365 node = &buf->node;
366 level = btrfs_header_level(&node->header);
367 for (i = 0; i < nritems; i++) {
368 u64 ptr = btrfs_node_blockptr(node, i);
369 ret = add_extent_rec(extent_radix,
370 &node->ptrs[i].key,
371 blocknr, ptr, 1,
372 btrfs_header_owner(&node->header),
373 0, 1, 0);
374 BUG_ON(ret);
375 if (level > 1) {
376 add_pending(nodes, seen, ptr);
377 } else {
378 add_pending(pending, seen, ptr);
381 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
382 nritems) * sizeof(struct btrfs_key_ptr);
384 btrfs_block_release(root, buf);
385 total_btree_blocks++;
386 return 0;
389 static int add_root_to_pending(struct btrfs_buffer *buf,
390 unsigned long *bits,
391 int bits_nr,
392 struct radix_tree_root *extent_radix,
393 struct radix_tree_root *pending,
394 struct radix_tree_root *seen,
395 struct radix_tree_root *reada,
396 struct radix_tree_root *nodes)
398 if (btrfs_header_level(&buf->node.header) > 0)
399 add_pending(nodes, seen, buf->blocknr);
400 else
401 add_pending(pending, seen, buf->blocknr);
402 add_extent_rec(extent_radix, NULL, 0, buf->blocknr, 1,
403 btrfs_header_owner(&buf->node.header), 0, 1, 0);
404 return 0;
407 int check_extent_refs(struct btrfs_root *root,
408 struct radix_tree_root *extent_radix)
410 struct extent_record *rec[64];
411 int i;
412 int ret;
413 int err = 0;
415 while(1) {
416 ret = radix_tree_gang_lookup(extent_radix, (void **)rec, 0,
417 ARRAY_SIZE(rec));
418 if (!ret)
419 break;
420 for (i = 0; i < ret; i++) {
421 if (rec[i]->refs != rec[i]->extent_item_refs) {
422 fprintf(stderr, "ref mismatch on [%llu %llu] ",
423 (unsigned long long)rec[i]->start,
424 (unsigned long long)rec[i]->nr);
425 fprintf(stderr, "extent item %u, found %u\n",
426 rec[i]->extent_item_refs,
427 rec[i]->refs);
428 err = 1;
430 radix_tree_delete(extent_radix, rec[i]->start);
431 free(rec[i]);
434 return err;
437 int main(int ac, char **av) {
438 struct btrfs_super_block super;
439 struct btrfs_root *root;
440 struct radix_tree_root extent_radix;
441 struct radix_tree_root seen;
442 struct radix_tree_root pending;
443 struct radix_tree_root reada;
444 struct radix_tree_root nodes;
445 struct btrfs_path path;
446 struct btrfs_key key;
447 struct btrfs_key found_key;
448 int ret;
449 u64 last = 0;
450 unsigned long *bits;
451 int bits_nr;
452 struct btrfs_leaf *leaf;
453 int slot;
454 struct btrfs_root_item *ri;
456 radix_tree_init();
459 INIT_RADIX_TREE(&extent_radix, GFP_NOFS);
460 init_bit_radix(&seen);
461 init_bit_radix(&pending);
462 init_bit_radix(&reada);
463 init_bit_radix(&nodes);
465 root = open_ctree(av[1], &super);
467 bits_nr = 1024 * 1024 / root->blocksize;
468 bits = malloc(bits_nr * sizeof(unsigned long));
469 if (!bits) {
470 perror("malloc");
471 exit(1);
474 add_root_to_pending(root->fs_info->tree_root->node, bits, bits_nr,
475 &extent_radix, &pending, &seen, &reada, &nodes);
477 btrfs_init_path(&path);
478 key.offset = 0;
479 key.objectid = 0;
480 key.flags = 0;
481 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
482 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
483 &key, &path, 0, 0);
484 BUG_ON(ret < 0);
485 while(1) {
486 leaf = &path.nodes[0]->leaf;
487 slot = path.slots[0];
488 if (slot >= btrfs_header_nritems(&leaf->header)) {
489 ret = btrfs_next_leaf(root, &path);
490 if (ret != 0)
491 break;
492 leaf = &path.nodes[0]->leaf;
493 slot = path.slots[0];
495 btrfs_disk_key_to_cpu(&found_key,
496 &leaf->items[path.slots[0]].key);
497 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
498 struct btrfs_buffer *buf;
499 ri = btrfs_item_ptr(leaf, path.slots[0],
500 struct btrfs_root_item);
501 buf = read_tree_block(root->fs_info->tree_root,
502 btrfs_root_blocknr(ri));
503 add_root_to_pending(buf, bits, bits_nr, &extent_radix,
504 &pending, &seen, &reada, &nodes);
505 btrfs_block_release(root->fs_info->tree_root, buf);
507 path.slots[0]++;
509 btrfs_release_path(root, &path);
510 while(1) {
511 ret = run_next_block(root, bits, bits_nr, &last, &pending,
512 &seen, &reada, &nodes, &extent_radix);
513 if (ret != 0)
514 break;
516 ret = check_extent_refs(root, &extent_radix);
517 close_ctree(root, &super);
518 printf("found %llu blocks used err is %d\n",
519 (unsigned long long)blocks_used, ret);
520 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
521 printf("total tree blocks: %llu\n",
522 (unsigned long long)total_btree_blocks);
523 printf("btree space waste bytes: %llu\n",
524 (unsigned long long)btree_space_waste);
525 return ret;