btrfs-progs: check: make low memory mode support partially dropped snapshots
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobdf97d3b6eea841c9df0a6a330e2ed2c2c84fed4a
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
45 enum task_position {
46 TASK_EXTENTS,
47 TASK_FREE_SPACE,
48 TASK_FS_ROOTS,
49 TASK_NOTHING, /* have to be the last element */
52 struct task_ctx {
53 int progress_enabled;
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
78 CHECK_MODE_ORIGINAL,
79 CHECK_MODE_LOWMEM,
80 CHECK_MODE_UNKNOWN,
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
102 union {
103 u64 parent;
104 u64 root;
106 u64 owner;
107 u64 offset;
108 u64 disk_bytenr;
109 u64 bytes;
110 u64 ram_bytes;
111 u32 num_refs;
112 u32 found_ref;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
128 u64 root;
129 u64 objectid;
130 u64 offset;
131 u64 disk_bytenr;
132 u64 disk_len;
135 struct tree_backref {
136 struct extent_backref node;
137 union {
138 u64 parent;
139 u64 root;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
157 u64 start;
158 u64 max_size;
159 u64 nr;
160 u64 refs;
161 u64 extent_item_refs;
162 u64 generation;
163 u64 parent_generation;
164 u64 info_objectid;
165 u32 num_duplicates;
166 u8 info_level;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
188 unsigned int filetype:8;
189 int errors;
190 unsigned int ref_type;
191 u64 dir;
192 u64 index;
193 u16 namelen;
194 char name[0];
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
204 u64 objectid;
205 u64 bytenr;
206 u64 last_snapshot;
207 u8 level;
208 u8 drop_level;
209 int level_size;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
228 struct rb_node node;
229 u64 start;
230 u64 len;
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
243 int errors;
245 u64 ino;
246 u32 nlink;
247 u32 imode;
248 u64 isize;
249 u64 nbytes;
251 u32 found_link;
252 u64 found_size;
253 u64 extent_start;
254 u64 extent_end;
255 struct rb_root holes;
256 struct list_head orphan_extents;
258 u32 refs;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
284 int errors;
285 u64 ref_root;
286 u64 dir;
287 u64 index;
288 u16 namelen;
289 char name[0];
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
297 struct root_record {
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
301 u64 objectid;
302 u32 found_ref;
305 struct ptr_node {
306 struct cache_extent cache;
307 void *data;
310 struct shared_node {
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
315 u32 refs;
318 struct block_info {
319 u64 start;
320 u32 size;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
326 int active_node;
327 int root_level;
330 struct bad_item {
331 struct btrfs_key key;
332 u64 root_id;
333 struct list_head list;
336 struct extent_entry {
337 u64 bytenr;
338 u64 bytes;
339 int count;
340 int broken;
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
346 u8 level;
347 /* number of nodes at this level, must be 1 for a root */
348 int node_count;
349 u64 bytenr;
350 u64 gen;
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
358 * classification.
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
375 uint32_t count = 0;
376 static char *task_position_string[] = {
377 "checking extents",
378 "checking free space cache",
379 "checking fs roots",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
385 return NULL;
387 while (1) {
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
390 count++;
391 fflush(stdout);
392 task_period_wait(priv->info);
394 return NULL;
397 static int print_status_return(void *p)
399 printf("\n");
400 fflush(stdout);
402 return 0;
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
423 return (u64)-1;
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426 return hole->start;
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
438 return -1;
439 if (hole1->start < hole2->start)
440 return 1;
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
447 return -1;
448 /* Hole 2 will be merge center */
449 return 1;
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
459 u64 start, u64 len)
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
466 if (!hole)
467 return -ENOMEM;
468 hole->start = start;
469 hole->len = len;
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476 node);
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
481 free(prev);
482 prev = NULL;
485 /* iterate merge with next holes */
486 while (1) {
487 if (!rb_next(&hole->node))
488 break;
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490 node);
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
494 hole->start;
495 rb_erase(&next->node, holes);
496 free(next);
497 next = NULL;
498 } else
499 break;
501 return 0;
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
507 u64 start;
509 hole = (struct file_extent_hole *)data;
510 start = hole->start;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
514 return -1;
515 if (start >= hole->start && start < hole->start + hole->len)
516 return 0;
517 return 1;
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
526 u64 start, u64 len)
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
530 u64 prev_start = 0;
531 u64 prev_len = 0;
532 u64 next_start = 0;
533 u64 next_len = 0;
534 struct rb_node *node;
535 int have_prev = 0;
536 int have_next = 0;
537 int ret = 0;
539 tmp.start = start;
540 tmp.len = len;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
542 if (!node)
543 return -EEXIST;
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
546 return -EEXIST;
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
555 have_prev = 1;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
560 have_next = 1;
562 rb_erase(node, holes);
563 free(hole);
564 if (have_prev) {
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
566 if (ret < 0)
567 return ret;
569 if (have_next) {
570 ret = add_file_extent_hole(holes, next_start, next_len);
571 if (ret < 0)
572 return ret;
574 return 0;
577 static int copy_file_extent_holes(struct rb_root *dst,
578 struct rb_root *src)
580 struct file_extent_hole *hole;
581 struct rb_node *node;
582 int ret = 0;
584 node = rb_first(src);
585 while (node) {
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
588 if (ret)
589 break;
590 node = rb_next(node);
592 return ret;
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
601 while (node) {
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
604 free(hole);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
624 #define S_SHIFT 12
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
647 return -1;
648 else if (rec1->devid < rec2->devid)
649 return 1;
650 else
651 return 0;
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
662 size_t size;
663 int ret;
665 rec = malloc(sizeof(*rec));
666 if (!rec)
667 return ERR_PTR(-ENOMEM);
668 memcpy(rec, orig_rec, sizeof(*rec));
669 rec->refs = 1;
670 INIT_LIST_HEAD(&rec->backrefs);
671 INIT_LIST_HEAD(&rec->orphan_extents);
672 rec->holes = RB_ROOT;
674 list_for_each_entry(orig, &orig_rec->backrefs, list) {
675 size = sizeof(*orig) + orig->namelen + 1;
676 backref = malloc(size);
677 if (!backref) {
678 ret = -ENOMEM;
679 goto cleanup;
681 memcpy(backref, orig, size);
682 list_add_tail(&backref->list, &rec->backrefs);
684 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
685 dst_orphan = malloc(sizeof(*dst_orphan));
686 if (!dst_orphan) {
687 ret = -ENOMEM;
688 goto cleanup;
690 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
691 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
694 BUG_ON(ret < 0);
696 return rec;
698 cleanup:
699 if (!list_empty(&rec->backrefs))
700 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
701 list_del(&orig->list);
702 free(orig);
705 if (!list_empty(&rec->orphan_extents))
706 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
707 list_del(&orig->list);
708 free(orig);
711 free(rec);
713 return ERR_PTR(ret);
716 static void print_orphan_data_extents(struct list_head *orphan_extents,
717 u64 objectid)
719 struct orphan_data_extent *orphan;
721 if (list_empty(orphan_extents))
722 return;
723 printf("The following data extent is lost in tree %llu:\n",
724 objectid);
725 list_for_each_entry(orphan, orphan_extents, list) {
726 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
727 orphan->objectid, orphan->offset, orphan->disk_bytenr,
728 orphan->disk_len);
732 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
734 u64 root_objectid = root->root_key.objectid;
735 int errors = rec->errors;
737 if (!errors)
738 return;
739 /* reloc root errors, we print its corresponding fs root objectid*/
740 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
741 root_objectid = root->root_key.offset;
742 fprintf(stderr, "reloc");
744 fprintf(stderr, "root %llu inode %llu errors %x",
745 (unsigned long long) root_objectid,
746 (unsigned long long) rec->ino, rec->errors);
748 if (errors & I_ERR_NO_INODE_ITEM)
749 fprintf(stderr, ", no inode item");
750 if (errors & I_ERR_NO_ORPHAN_ITEM)
751 fprintf(stderr, ", no orphan item");
752 if (errors & I_ERR_DUP_INODE_ITEM)
753 fprintf(stderr, ", dup inode item");
754 if (errors & I_ERR_DUP_DIR_INDEX)
755 fprintf(stderr, ", dup dir index");
756 if (errors & I_ERR_ODD_DIR_ITEM)
757 fprintf(stderr, ", odd dir item");
758 if (errors & I_ERR_ODD_FILE_EXTENT)
759 fprintf(stderr, ", odd file extent");
760 if (errors & I_ERR_BAD_FILE_EXTENT)
761 fprintf(stderr, ", bad file extent");
762 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
763 fprintf(stderr, ", file extent overlap");
764 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
765 fprintf(stderr, ", file extent discount");
766 if (errors & I_ERR_DIR_ISIZE_WRONG)
767 fprintf(stderr, ", dir isize wrong");
768 if (errors & I_ERR_FILE_NBYTES_WRONG)
769 fprintf(stderr, ", nbytes wrong");
770 if (errors & I_ERR_ODD_CSUM_ITEM)
771 fprintf(stderr, ", odd csum item");
772 if (errors & I_ERR_SOME_CSUM_MISSING)
773 fprintf(stderr, ", some csum missing");
774 if (errors & I_ERR_LINK_COUNT_WRONG)
775 fprintf(stderr, ", link count wrong");
776 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
777 fprintf(stderr, ", orphan file extent");
778 fprintf(stderr, "\n");
779 /* Print the orphan extents if needed */
780 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
781 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
783 /* Print the holes if needed */
784 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
785 struct file_extent_hole *hole;
786 struct rb_node *node;
787 int found = 0;
789 node = rb_first(&rec->holes);
790 fprintf(stderr, "Found file extent holes:\n");
791 while (node) {
792 found = 1;
793 hole = rb_entry(node, struct file_extent_hole, node);
794 fprintf(stderr, "\tstart: %llu, len: %llu\n",
795 hole->start, hole->len);
796 node = rb_next(node);
798 if (!found)
799 fprintf(stderr, "\tstart: 0, len: %llu\n",
800 round_up(rec->isize, root->sectorsize));
804 static void print_ref_error(int errors)
806 if (errors & REF_ERR_NO_DIR_ITEM)
807 fprintf(stderr, ", no dir item");
808 if (errors & REF_ERR_NO_DIR_INDEX)
809 fprintf(stderr, ", no dir index");
810 if (errors & REF_ERR_NO_INODE_REF)
811 fprintf(stderr, ", no inode ref");
812 if (errors & REF_ERR_DUP_DIR_ITEM)
813 fprintf(stderr, ", dup dir item");
814 if (errors & REF_ERR_DUP_DIR_INDEX)
815 fprintf(stderr, ", dup dir index");
816 if (errors & REF_ERR_DUP_INODE_REF)
817 fprintf(stderr, ", dup inode ref");
818 if (errors & REF_ERR_INDEX_UNMATCH)
819 fprintf(stderr, ", index mismatch");
820 if (errors & REF_ERR_FILETYPE_UNMATCH)
821 fprintf(stderr, ", filetype mismatch");
822 if (errors & REF_ERR_NAME_TOO_LONG)
823 fprintf(stderr, ", name too long");
824 if (errors & REF_ERR_NO_ROOT_REF)
825 fprintf(stderr, ", no root ref");
826 if (errors & REF_ERR_NO_ROOT_BACKREF)
827 fprintf(stderr, ", no root backref");
828 if (errors & REF_ERR_DUP_ROOT_REF)
829 fprintf(stderr, ", dup root ref");
830 if (errors & REF_ERR_DUP_ROOT_BACKREF)
831 fprintf(stderr, ", dup root backref");
832 fprintf(stderr, "\n");
835 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
836 u64 ino, int mod)
838 struct ptr_node *node;
839 struct cache_extent *cache;
840 struct inode_record *rec = NULL;
841 int ret;
843 cache = lookup_cache_extent(inode_cache, ino, 1);
844 if (cache) {
845 node = container_of(cache, struct ptr_node, cache);
846 rec = node->data;
847 if (mod && rec->refs > 1) {
848 node->data = clone_inode_rec(rec);
849 if (IS_ERR(node->data))
850 return node->data;
851 rec->refs--;
852 rec = node->data;
854 } else if (mod) {
855 rec = calloc(1, sizeof(*rec));
856 if (!rec)
857 return ERR_PTR(-ENOMEM);
858 rec->ino = ino;
859 rec->extent_start = (u64)-1;
860 rec->refs = 1;
861 INIT_LIST_HEAD(&rec->backrefs);
862 INIT_LIST_HEAD(&rec->orphan_extents);
863 rec->holes = RB_ROOT;
865 node = malloc(sizeof(*node));
866 if (!node) {
867 free(rec);
868 return ERR_PTR(-ENOMEM);
870 node->cache.start = ino;
871 node->cache.size = 1;
872 node->data = rec;
874 if (ino == BTRFS_FREE_INO_OBJECTID)
875 rec->found_link = 1;
877 ret = insert_cache_extent(inode_cache, &node->cache);
878 if (ret)
879 return ERR_PTR(-EEXIST);
881 return rec;
884 static void free_orphan_data_extents(struct list_head *orphan_extents)
886 struct orphan_data_extent *orphan;
888 while (!list_empty(orphan_extents)) {
889 orphan = list_entry(orphan_extents->next,
890 struct orphan_data_extent, list);
891 list_del(&orphan->list);
892 free(orphan);
896 static void free_inode_rec(struct inode_record *rec)
898 struct inode_backref *backref;
900 if (--rec->refs > 0)
901 return;
903 while (!list_empty(&rec->backrefs)) {
904 backref = to_inode_backref(rec->backrefs.next);
905 list_del(&backref->list);
906 free(backref);
908 free_orphan_data_extents(&rec->orphan_extents);
909 free_file_extent_holes(&rec->holes);
910 free(rec);
913 static int can_free_inode_rec(struct inode_record *rec)
915 if (!rec->errors && rec->checked && rec->found_inode_item &&
916 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
917 return 1;
918 return 0;
921 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
922 struct inode_record *rec)
924 struct cache_extent *cache;
925 struct inode_backref *tmp, *backref;
926 struct ptr_node *node;
927 unsigned char filetype;
929 if (!rec->found_inode_item)
930 return;
932 filetype = imode_to_type(rec->imode);
933 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
934 if (backref->found_dir_item && backref->found_dir_index) {
935 if (backref->filetype != filetype)
936 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
937 if (!backref->errors && backref->found_inode_ref &&
938 rec->nlink == rec->found_link) {
939 list_del(&backref->list);
940 free(backref);
945 if (!rec->checked || rec->merging)
946 return;
948 if (S_ISDIR(rec->imode)) {
949 if (rec->found_size != rec->isize)
950 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
951 if (rec->found_file_extent)
952 rec->errors |= I_ERR_ODD_FILE_EXTENT;
953 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
954 if (rec->found_dir_item)
955 rec->errors |= I_ERR_ODD_DIR_ITEM;
956 if (rec->found_size != rec->nbytes)
957 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
958 if (rec->nlink > 0 && !no_holes &&
959 (rec->extent_end < rec->isize ||
960 first_extent_gap(&rec->holes) < rec->isize))
961 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
964 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
965 if (rec->found_csum_item && rec->nodatasum)
966 rec->errors |= I_ERR_ODD_CSUM_ITEM;
967 if (rec->some_csum_missing && !rec->nodatasum)
968 rec->errors |= I_ERR_SOME_CSUM_MISSING;
971 BUG_ON(rec->refs != 1);
972 if (can_free_inode_rec(rec)) {
973 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
974 node = container_of(cache, struct ptr_node, cache);
975 BUG_ON(node->data != rec);
976 remove_cache_extent(inode_cache, &node->cache);
977 free(node);
978 free_inode_rec(rec);
982 static int check_orphan_item(struct btrfs_root *root, u64 ino)
984 struct btrfs_path path;
985 struct btrfs_key key;
986 int ret;
988 key.objectid = BTRFS_ORPHAN_OBJECTID;
989 key.type = BTRFS_ORPHAN_ITEM_KEY;
990 key.offset = ino;
992 btrfs_init_path(&path);
993 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
994 btrfs_release_path(&path);
995 if (ret > 0)
996 ret = -ENOENT;
997 return ret;
1000 static int process_inode_item(struct extent_buffer *eb,
1001 int slot, struct btrfs_key *key,
1002 struct shared_node *active_node)
1004 struct inode_record *rec;
1005 struct btrfs_inode_item *item;
1007 rec = active_node->current;
1008 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1009 if (rec->found_inode_item) {
1010 rec->errors |= I_ERR_DUP_INODE_ITEM;
1011 return 1;
1013 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1014 rec->nlink = btrfs_inode_nlink(eb, item);
1015 rec->isize = btrfs_inode_size(eb, item);
1016 rec->nbytes = btrfs_inode_nbytes(eb, item);
1017 rec->imode = btrfs_inode_mode(eb, item);
1018 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1019 rec->nodatasum = 1;
1020 rec->found_inode_item = 1;
1021 if (rec->nlink == 0)
1022 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1023 maybe_free_inode_rec(&active_node->inode_cache, rec);
1024 return 0;
1027 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1028 const char *name,
1029 int namelen, u64 dir)
1031 struct inode_backref *backref;
1033 list_for_each_entry(backref, &rec->backrefs, list) {
1034 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1035 break;
1036 if (backref->dir != dir || backref->namelen != namelen)
1037 continue;
1038 if (memcmp(name, backref->name, namelen))
1039 continue;
1040 return backref;
1043 backref = malloc(sizeof(*backref) + namelen + 1);
1044 if (!backref)
1045 return NULL;
1046 memset(backref, 0, sizeof(*backref));
1047 backref->dir = dir;
1048 backref->namelen = namelen;
1049 memcpy(backref->name, name, namelen);
1050 backref->name[namelen] = '\0';
1051 list_add_tail(&backref->list, &rec->backrefs);
1052 return backref;
1055 static int add_inode_backref(struct cache_tree *inode_cache,
1056 u64 ino, u64 dir, u64 index,
1057 const char *name, int namelen,
1058 int filetype, int itemtype, int errors)
1060 struct inode_record *rec;
1061 struct inode_backref *backref;
1063 rec = get_inode_rec(inode_cache, ino, 1);
1064 BUG_ON(IS_ERR(rec));
1065 backref = get_inode_backref(rec, name, namelen, dir);
1066 BUG_ON(!backref);
1067 if (errors)
1068 backref->errors |= errors;
1069 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1070 if (backref->found_dir_index)
1071 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1072 if (backref->found_inode_ref && backref->index != index)
1073 backref->errors |= REF_ERR_INDEX_UNMATCH;
1074 if (backref->found_dir_item && backref->filetype != filetype)
1075 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1077 backref->index = index;
1078 backref->filetype = filetype;
1079 backref->found_dir_index = 1;
1080 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1081 rec->found_link++;
1082 if (backref->found_dir_item)
1083 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1084 if (backref->found_dir_index && backref->filetype != filetype)
1085 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1087 backref->filetype = filetype;
1088 backref->found_dir_item = 1;
1089 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1090 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1091 if (backref->found_inode_ref)
1092 backref->errors |= REF_ERR_DUP_INODE_REF;
1093 if (backref->found_dir_index && backref->index != index)
1094 backref->errors |= REF_ERR_INDEX_UNMATCH;
1095 else
1096 backref->index = index;
1098 backref->ref_type = itemtype;
1099 backref->found_inode_ref = 1;
1100 } else {
1101 BUG_ON(1);
1104 maybe_free_inode_rec(inode_cache, rec);
1105 return 0;
1108 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1109 struct cache_tree *dst_cache)
1111 struct inode_backref *backref;
1112 u32 dir_count = 0;
1113 int ret = 0;
1115 dst->merging = 1;
1116 list_for_each_entry(backref, &src->backrefs, list) {
1117 if (backref->found_dir_index) {
1118 add_inode_backref(dst_cache, dst->ino, backref->dir,
1119 backref->index, backref->name,
1120 backref->namelen, backref->filetype,
1121 BTRFS_DIR_INDEX_KEY, backref->errors);
1123 if (backref->found_dir_item) {
1124 dir_count++;
1125 add_inode_backref(dst_cache, dst->ino,
1126 backref->dir, 0, backref->name,
1127 backref->namelen, backref->filetype,
1128 BTRFS_DIR_ITEM_KEY, backref->errors);
1130 if (backref->found_inode_ref) {
1131 add_inode_backref(dst_cache, dst->ino,
1132 backref->dir, backref->index,
1133 backref->name, backref->namelen, 0,
1134 backref->ref_type, backref->errors);
1138 if (src->found_dir_item)
1139 dst->found_dir_item = 1;
1140 if (src->found_file_extent)
1141 dst->found_file_extent = 1;
1142 if (src->found_csum_item)
1143 dst->found_csum_item = 1;
1144 if (src->some_csum_missing)
1145 dst->some_csum_missing = 1;
1146 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1147 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1148 if (ret < 0)
1149 return ret;
1152 BUG_ON(src->found_link < dir_count);
1153 dst->found_link += src->found_link - dir_count;
1154 dst->found_size += src->found_size;
1155 if (src->extent_start != (u64)-1) {
1156 if (dst->extent_start == (u64)-1) {
1157 dst->extent_start = src->extent_start;
1158 dst->extent_end = src->extent_end;
1159 } else {
1160 if (dst->extent_end > src->extent_start)
1161 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1162 else if (dst->extent_end < src->extent_start) {
1163 ret = add_file_extent_hole(&dst->holes,
1164 dst->extent_end,
1165 src->extent_start - dst->extent_end);
1167 if (dst->extent_end < src->extent_end)
1168 dst->extent_end = src->extent_end;
1172 dst->errors |= src->errors;
1173 if (src->found_inode_item) {
1174 if (!dst->found_inode_item) {
1175 dst->nlink = src->nlink;
1176 dst->isize = src->isize;
1177 dst->nbytes = src->nbytes;
1178 dst->imode = src->imode;
1179 dst->nodatasum = src->nodatasum;
1180 dst->found_inode_item = 1;
1181 } else {
1182 dst->errors |= I_ERR_DUP_INODE_ITEM;
1185 dst->merging = 0;
1187 return 0;
1190 static int splice_shared_node(struct shared_node *src_node,
1191 struct shared_node *dst_node)
1193 struct cache_extent *cache;
1194 struct ptr_node *node, *ins;
1195 struct cache_tree *src, *dst;
1196 struct inode_record *rec, *conflict;
1197 u64 current_ino = 0;
1198 int splice = 0;
1199 int ret;
1201 if (--src_node->refs == 0)
1202 splice = 1;
1203 if (src_node->current)
1204 current_ino = src_node->current->ino;
1206 src = &src_node->root_cache;
1207 dst = &dst_node->root_cache;
1208 again:
1209 cache = search_cache_extent(src, 0);
1210 while (cache) {
1211 node = container_of(cache, struct ptr_node, cache);
1212 rec = node->data;
1213 cache = next_cache_extent(cache);
1215 if (splice) {
1216 remove_cache_extent(src, &node->cache);
1217 ins = node;
1218 } else {
1219 ins = malloc(sizeof(*ins));
1220 BUG_ON(!ins);
1221 ins->cache.start = node->cache.start;
1222 ins->cache.size = node->cache.size;
1223 ins->data = rec;
1224 rec->refs++;
1226 ret = insert_cache_extent(dst, &ins->cache);
1227 if (ret == -EEXIST) {
1228 conflict = get_inode_rec(dst, rec->ino, 1);
1229 BUG_ON(IS_ERR(conflict));
1230 merge_inode_recs(rec, conflict, dst);
1231 if (rec->checked) {
1232 conflict->checked = 1;
1233 if (dst_node->current == conflict)
1234 dst_node->current = NULL;
1236 maybe_free_inode_rec(dst, conflict);
1237 free_inode_rec(rec);
1238 free(ins);
1239 } else {
1240 BUG_ON(ret);
1244 if (src == &src_node->root_cache) {
1245 src = &src_node->inode_cache;
1246 dst = &dst_node->inode_cache;
1247 goto again;
1250 if (current_ino > 0 && (!dst_node->current ||
1251 current_ino > dst_node->current->ino)) {
1252 if (dst_node->current) {
1253 dst_node->current->checked = 1;
1254 maybe_free_inode_rec(dst, dst_node->current);
1256 dst_node->current = get_inode_rec(dst, current_ino, 1);
1257 BUG_ON(IS_ERR(dst_node->current));
1259 return 0;
1262 static void free_inode_ptr(struct cache_extent *cache)
1264 struct ptr_node *node;
1265 struct inode_record *rec;
1267 node = container_of(cache, struct ptr_node, cache);
1268 rec = node->data;
1269 free_inode_rec(rec);
1270 free(node);
1273 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1275 static struct shared_node *find_shared_node(struct cache_tree *shared,
1276 u64 bytenr)
1278 struct cache_extent *cache;
1279 struct shared_node *node;
1281 cache = lookup_cache_extent(shared, bytenr, 1);
1282 if (cache) {
1283 node = container_of(cache, struct shared_node, cache);
1284 return node;
1286 return NULL;
1289 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1291 int ret;
1292 struct shared_node *node;
1294 node = calloc(1, sizeof(*node));
1295 if (!node)
1296 return -ENOMEM;
1297 node->cache.start = bytenr;
1298 node->cache.size = 1;
1299 cache_tree_init(&node->root_cache);
1300 cache_tree_init(&node->inode_cache);
1301 node->refs = refs;
1303 ret = insert_cache_extent(shared, &node->cache);
1305 return ret;
1308 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1309 struct walk_control *wc, int level)
1311 struct shared_node *node;
1312 struct shared_node *dest;
1313 int ret;
1315 if (level == wc->active_node)
1316 return 0;
1318 BUG_ON(wc->active_node <= level);
1319 node = find_shared_node(&wc->shared, bytenr);
1320 if (!node) {
1321 ret = add_shared_node(&wc->shared, bytenr, refs);
1322 BUG_ON(ret);
1323 node = find_shared_node(&wc->shared, bytenr);
1324 wc->nodes[level] = node;
1325 wc->active_node = level;
1326 return 0;
1329 if (wc->root_level == wc->active_node &&
1330 btrfs_root_refs(&root->root_item) == 0) {
1331 if (--node->refs == 0) {
1332 free_inode_recs_tree(&node->root_cache);
1333 free_inode_recs_tree(&node->inode_cache);
1334 remove_cache_extent(&wc->shared, &node->cache);
1335 free(node);
1337 return 1;
1340 dest = wc->nodes[wc->active_node];
1341 splice_shared_node(node, dest);
1342 if (node->refs == 0) {
1343 remove_cache_extent(&wc->shared, &node->cache);
1344 free(node);
1346 return 1;
1349 static int leave_shared_node(struct btrfs_root *root,
1350 struct walk_control *wc, int level)
1352 struct shared_node *node;
1353 struct shared_node *dest;
1354 int i;
1356 if (level == wc->root_level)
1357 return 0;
1359 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1360 if (wc->nodes[i])
1361 break;
1363 BUG_ON(i >= BTRFS_MAX_LEVEL);
1365 node = wc->nodes[wc->active_node];
1366 wc->nodes[wc->active_node] = NULL;
1367 wc->active_node = i;
1369 dest = wc->nodes[wc->active_node];
1370 if (wc->active_node < wc->root_level ||
1371 btrfs_root_refs(&root->root_item) > 0) {
1372 BUG_ON(node->refs <= 1);
1373 splice_shared_node(node, dest);
1374 } else {
1375 BUG_ON(node->refs < 2);
1376 node->refs--;
1378 return 0;
1382 * Returns:
1383 * < 0 - on error
1384 * 1 - if the root with id child_root_id is a child of root parent_root_id
1385 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1386 * has other root(s) as parent(s)
1387 * 2 - if the root child_root_id doesn't have any parent roots
1389 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1390 u64 child_root_id)
1392 struct btrfs_path path;
1393 struct btrfs_key key;
1394 struct extent_buffer *leaf;
1395 int has_parent = 0;
1396 int ret;
1398 btrfs_init_path(&path);
1400 key.objectid = parent_root_id;
1401 key.type = BTRFS_ROOT_REF_KEY;
1402 key.offset = child_root_id;
1403 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1404 0, 0);
1405 if (ret < 0)
1406 return ret;
1407 btrfs_release_path(&path);
1408 if (!ret)
1409 return 1;
1411 key.objectid = child_root_id;
1412 key.type = BTRFS_ROOT_BACKREF_KEY;
1413 key.offset = 0;
1414 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1415 0, 0);
1416 if (ret < 0)
1417 goto out;
1419 while (1) {
1420 leaf = path.nodes[0];
1421 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1422 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1423 if (ret)
1424 break;
1425 leaf = path.nodes[0];
1428 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1429 if (key.objectid != child_root_id ||
1430 key.type != BTRFS_ROOT_BACKREF_KEY)
1431 break;
1433 has_parent = 1;
1435 if (key.offset == parent_root_id) {
1436 btrfs_release_path(&path);
1437 return 1;
1440 path.slots[0]++;
1442 out:
1443 btrfs_release_path(&path);
1444 if (ret < 0)
1445 return ret;
1446 return has_parent ? 0 : 2;
1449 static int process_dir_item(struct btrfs_root *root,
1450 struct extent_buffer *eb,
1451 int slot, struct btrfs_key *key,
1452 struct shared_node *active_node)
1454 u32 total;
1455 u32 cur = 0;
1456 u32 len;
1457 u32 name_len;
1458 u32 data_len;
1459 int error;
1460 int nritems = 0;
1461 int filetype;
1462 struct btrfs_dir_item *di;
1463 struct inode_record *rec;
1464 struct cache_tree *root_cache;
1465 struct cache_tree *inode_cache;
1466 struct btrfs_key location;
1467 char namebuf[BTRFS_NAME_LEN];
1469 root_cache = &active_node->root_cache;
1470 inode_cache = &active_node->inode_cache;
1471 rec = active_node->current;
1472 rec->found_dir_item = 1;
1474 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1475 total = btrfs_item_size_nr(eb, slot);
1476 while (cur < total) {
1477 nritems++;
1478 btrfs_dir_item_key_to_cpu(eb, di, &location);
1479 name_len = btrfs_dir_name_len(eb, di);
1480 data_len = btrfs_dir_data_len(eb, di);
1481 filetype = btrfs_dir_type(eb, di);
1483 rec->found_size += name_len;
1484 if (name_len <= BTRFS_NAME_LEN) {
1485 len = name_len;
1486 error = 0;
1487 } else {
1488 len = BTRFS_NAME_LEN;
1489 error = REF_ERR_NAME_TOO_LONG;
1491 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1493 if (location.type == BTRFS_INODE_ITEM_KEY) {
1494 add_inode_backref(inode_cache, location.objectid,
1495 key->objectid, key->offset, namebuf,
1496 len, filetype, key->type, error);
1497 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1498 add_inode_backref(root_cache, location.objectid,
1499 key->objectid, key->offset,
1500 namebuf, len, filetype,
1501 key->type, error);
1502 } else {
1503 fprintf(stderr, "invalid location in dir item %u\n",
1504 location.type);
1505 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1506 key->objectid, key->offset, namebuf,
1507 len, filetype, key->type, error);
1510 len = sizeof(*di) + name_len + data_len;
1511 di = (struct btrfs_dir_item *)((char *)di + len);
1512 cur += len;
1514 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1515 rec->errors |= I_ERR_DUP_DIR_INDEX;
1517 return 0;
1520 static int process_inode_ref(struct extent_buffer *eb,
1521 int slot, struct btrfs_key *key,
1522 struct shared_node *active_node)
1524 u32 total;
1525 u32 cur = 0;
1526 u32 len;
1527 u32 name_len;
1528 u64 index;
1529 int error;
1530 struct cache_tree *inode_cache;
1531 struct btrfs_inode_ref *ref;
1532 char namebuf[BTRFS_NAME_LEN];
1534 inode_cache = &active_node->inode_cache;
1536 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1537 total = btrfs_item_size_nr(eb, slot);
1538 while (cur < total) {
1539 name_len = btrfs_inode_ref_name_len(eb, ref);
1540 index = btrfs_inode_ref_index(eb, ref);
1541 if (name_len <= BTRFS_NAME_LEN) {
1542 len = name_len;
1543 error = 0;
1544 } else {
1545 len = BTRFS_NAME_LEN;
1546 error = REF_ERR_NAME_TOO_LONG;
1548 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1549 add_inode_backref(inode_cache, key->objectid, key->offset,
1550 index, namebuf, len, 0, key->type, error);
1552 len = sizeof(*ref) + name_len;
1553 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1554 cur += len;
1556 return 0;
1559 static int process_inode_extref(struct extent_buffer *eb,
1560 int slot, struct btrfs_key *key,
1561 struct shared_node *active_node)
1563 u32 total;
1564 u32 cur = 0;
1565 u32 len;
1566 u32 name_len;
1567 u64 index;
1568 u64 parent;
1569 int error;
1570 struct cache_tree *inode_cache;
1571 struct btrfs_inode_extref *extref;
1572 char namebuf[BTRFS_NAME_LEN];
1574 inode_cache = &active_node->inode_cache;
1576 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1577 total = btrfs_item_size_nr(eb, slot);
1578 while (cur < total) {
1579 name_len = btrfs_inode_extref_name_len(eb, extref);
1580 index = btrfs_inode_extref_index(eb, extref);
1581 parent = btrfs_inode_extref_parent(eb, extref);
1582 if (name_len <= BTRFS_NAME_LEN) {
1583 len = name_len;
1584 error = 0;
1585 } else {
1586 len = BTRFS_NAME_LEN;
1587 error = REF_ERR_NAME_TOO_LONG;
1589 read_extent_buffer(eb, namebuf,
1590 (unsigned long)(extref + 1), len);
1591 add_inode_backref(inode_cache, key->objectid, parent,
1592 index, namebuf, len, 0, key->type, error);
1594 len = sizeof(*extref) + name_len;
1595 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1596 cur += len;
1598 return 0;
1602 static int count_csum_range(struct btrfs_root *root, u64 start,
1603 u64 len, u64 *found)
1605 struct btrfs_key key;
1606 struct btrfs_path path;
1607 struct extent_buffer *leaf;
1608 int ret;
1609 size_t size;
1610 *found = 0;
1611 u64 csum_end;
1612 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1614 btrfs_init_path(&path);
1616 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1617 key.offset = start;
1618 key.type = BTRFS_EXTENT_CSUM_KEY;
1620 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1621 &key, &path, 0, 0);
1622 if (ret < 0)
1623 goto out;
1624 if (ret > 0 && path.slots[0] > 0) {
1625 leaf = path.nodes[0];
1626 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1627 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1628 key.type == BTRFS_EXTENT_CSUM_KEY)
1629 path.slots[0]--;
1632 while (len > 0) {
1633 leaf = path.nodes[0];
1634 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1635 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1636 if (ret > 0)
1637 break;
1638 else if (ret < 0)
1639 goto out;
1640 leaf = path.nodes[0];
1643 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1644 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1645 key.type != BTRFS_EXTENT_CSUM_KEY)
1646 break;
1648 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1649 if (key.offset >= start + len)
1650 break;
1652 if (key.offset > start)
1653 start = key.offset;
1655 size = btrfs_item_size_nr(leaf, path.slots[0]);
1656 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1657 if (csum_end > start) {
1658 size = min(csum_end - start, len);
1659 len -= size;
1660 start += size;
1661 *found += size;
1664 path.slots[0]++;
1666 out:
1667 btrfs_release_path(&path);
1668 if (ret < 0)
1669 return ret;
1670 return 0;
1673 static int process_file_extent(struct btrfs_root *root,
1674 struct extent_buffer *eb,
1675 int slot, struct btrfs_key *key,
1676 struct shared_node *active_node)
1678 struct inode_record *rec;
1679 struct btrfs_file_extent_item *fi;
1680 u64 num_bytes = 0;
1681 u64 disk_bytenr = 0;
1682 u64 extent_offset = 0;
1683 u64 mask = root->sectorsize - 1;
1684 int extent_type;
1685 int ret;
1687 rec = active_node->current;
1688 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1689 rec->found_file_extent = 1;
1691 if (rec->extent_start == (u64)-1) {
1692 rec->extent_start = key->offset;
1693 rec->extent_end = key->offset;
1696 if (rec->extent_end > key->offset)
1697 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1698 else if (rec->extent_end < key->offset) {
1699 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1700 key->offset - rec->extent_end);
1701 if (ret < 0)
1702 return ret;
1705 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1706 extent_type = btrfs_file_extent_type(eb, fi);
1708 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1709 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1710 if (num_bytes == 0)
1711 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1712 rec->found_size += num_bytes;
1713 num_bytes = (num_bytes + mask) & ~mask;
1714 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1715 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1716 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1717 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1718 extent_offset = btrfs_file_extent_offset(eb, fi);
1719 if (num_bytes == 0 || (num_bytes & mask))
1720 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1721 if (num_bytes + extent_offset >
1722 btrfs_file_extent_ram_bytes(eb, fi))
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1725 (btrfs_file_extent_compression(eb, fi) ||
1726 btrfs_file_extent_encryption(eb, fi) ||
1727 btrfs_file_extent_other_encoding(eb, fi)))
1728 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1729 if (disk_bytenr > 0)
1730 rec->found_size += num_bytes;
1731 } else {
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 rec->extent_end = key->offset + num_bytes;
1737 * The data reloc tree will copy full extents into its inode and then
1738 * copy the corresponding csums. Because the extent it copied could be
1739 * a preallocated extent that hasn't been written to yet there may be no
1740 * csums to copy, ergo we won't have csums for our file extent. This is
1741 * ok so just don't bother checking csums if the inode belongs to the
1742 * data reloc tree.
1744 if (disk_bytenr > 0 &&
1745 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1746 u64 found;
1747 if (btrfs_file_extent_compression(eb, fi))
1748 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1749 else
1750 disk_bytenr += extent_offset;
1752 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1753 if (ret < 0)
1754 return ret;
1755 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1756 if (found > 0)
1757 rec->found_csum_item = 1;
1758 if (found < num_bytes)
1759 rec->some_csum_missing = 1;
1760 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1761 if (found > 0)
1762 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1765 return 0;
1768 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1769 struct walk_control *wc)
1771 struct btrfs_key key;
1772 u32 nritems;
1773 int i;
1774 int ret = 0;
1775 struct cache_tree *inode_cache;
1776 struct shared_node *active_node;
1778 if (wc->root_level == wc->active_node &&
1779 btrfs_root_refs(&root->root_item) == 0)
1780 return 0;
1782 active_node = wc->nodes[wc->active_node];
1783 inode_cache = &active_node->inode_cache;
1784 nritems = btrfs_header_nritems(eb);
1785 for (i = 0; i < nritems; i++) {
1786 btrfs_item_key_to_cpu(eb, &key, i);
1788 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1789 continue;
1790 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1791 continue;
1793 if (active_node->current == NULL ||
1794 active_node->current->ino < key.objectid) {
1795 if (active_node->current) {
1796 active_node->current->checked = 1;
1797 maybe_free_inode_rec(inode_cache,
1798 active_node->current);
1800 active_node->current = get_inode_rec(inode_cache,
1801 key.objectid, 1);
1802 BUG_ON(IS_ERR(active_node->current));
1804 switch (key.type) {
1805 case BTRFS_DIR_ITEM_KEY:
1806 case BTRFS_DIR_INDEX_KEY:
1807 ret = process_dir_item(root, eb, i, &key, active_node);
1808 break;
1809 case BTRFS_INODE_REF_KEY:
1810 ret = process_inode_ref(eb, i, &key, active_node);
1811 break;
1812 case BTRFS_INODE_EXTREF_KEY:
1813 ret = process_inode_extref(eb, i, &key, active_node);
1814 break;
1815 case BTRFS_INODE_ITEM_KEY:
1816 ret = process_inode_item(eb, i, &key, active_node);
1817 break;
1818 case BTRFS_EXTENT_DATA_KEY:
1819 ret = process_file_extent(root, eb, i, &key,
1820 active_node);
1821 break;
1822 default:
1823 break;
1826 return ret;
1829 static void reada_walk_down(struct btrfs_root *root,
1830 struct extent_buffer *node, int slot)
1832 u64 bytenr;
1833 u64 ptr_gen;
1834 u32 nritems;
1835 u32 blocksize;
1836 int i;
1837 int level;
1839 level = btrfs_header_level(node);
1840 if (level != 1)
1841 return;
1843 nritems = btrfs_header_nritems(node);
1844 blocksize = root->nodesize;
1845 for (i = slot; i < nritems; i++) {
1846 bytenr = btrfs_node_blockptr(node, i);
1847 ptr_gen = btrfs_node_ptr_generation(node, i);
1848 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1853 * Check the child node/leaf by the following condition:
1854 * 1. the first item key of the node/leaf should be the same with the one
1855 * in parent.
1856 * 2. block in parent node should match the child node/leaf.
1857 * 3. generation of parent node and child's header should be consistent.
1859 * Or the child node/leaf pointed by the key in parent is not valid.
1861 * We hope to check leaf owner too, but since subvol may share leaves,
1862 * which makes leaf owner check not so strong, key check should be
1863 * sufficient enough for that case.
1865 static int check_child_node(struct btrfs_root *root,
1866 struct extent_buffer *parent, int slot,
1867 struct extent_buffer *child)
1869 struct btrfs_key parent_key;
1870 struct btrfs_key child_key;
1871 int ret = 0;
1873 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1874 if (btrfs_header_level(child) == 0)
1875 btrfs_item_key_to_cpu(child, &child_key, 0);
1876 else
1877 btrfs_node_key_to_cpu(child, &child_key, 0);
1879 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1880 ret = -EINVAL;
1881 fprintf(stderr,
1882 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1883 parent_key.objectid, parent_key.type, parent_key.offset,
1884 child_key.objectid, child_key.type, child_key.offset);
1886 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1887 ret = -EINVAL;
1888 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1889 btrfs_node_blockptr(parent, slot),
1890 btrfs_header_bytenr(child));
1892 if (btrfs_node_ptr_generation(parent, slot) !=
1893 btrfs_header_generation(child)) {
1894 ret = -EINVAL;
1895 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1896 btrfs_header_generation(child),
1897 btrfs_node_ptr_generation(parent, slot));
1899 return ret;
1902 struct node_refs {
1903 u64 bytenr[BTRFS_MAX_LEVEL];
1904 u64 refs[BTRFS_MAX_LEVEL];
1907 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1908 struct walk_control *wc, int *level,
1909 struct node_refs *nrefs)
1911 enum btrfs_tree_block_status status;
1912 u64 bytenr;
1913 u64 ptr_gen;
1914 struct extent_buffer *next;
1915 struct extent_buffer *cur;
1916 u32 blocksize;
1917 int ret, err = 0;
1918 u64 refs;
1920 WARN_ON(*level < 0);
1921 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1923 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1924 refs = nrefs->refs[*level];
1925 ret = 0;
1926 } else {
1927 ret = btrfs_lookup_extent_info(NULL, root,
1928 path->nodes[*level]->start,
1929 *level, 1, &refs, NULL);
1930 if (ret < 0) {
1931 err = ret;
1932 goto out;
1934 nrefs->bytenr[*level] = path->nodes[*level]->start;
1935 nrefs->refs[*level] = refs;
1938 if (refs > 1) {
1939 ret = enter_shared_node(root, path->nodes[*level]->start,
1940 refs, wc, *level);
1941 if (ret > 0) {
1942 err = ret;
1943 goto out;
1947 while (*level >= 0) {
1948 WARN_ON(*level < 0);
1949 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1950 cur = path->nodes[*level];
1952 if (btrfs_header_level(cur) != *level)
1953 WARN_ON(1);
1955 if (path->slots[*level] >= btrfs_header_nritems(cur))
1956 break;
1957 if (*level == 0) {
1958 ret = process_one_leaf(root, cur, wc);
1959 if (ret < 0)
1960 err = ret;
1961 break;
1963 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1964 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1965 blocksize = root->nodesize;
1967 if (bytenr == nrefs->bytenr[*level - 1]) {
1968 refs = nrefs->refs[*level - 1];
1969 } else {
1970 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1971 *level - 1, 1, &refs, NULL);
1972 if (ret < 0) {
1973 refs = 0;
1974 } else {
1975 nrefs->bytenr[*level - 1] = bytenr;
1976 nrefs->refs[*level - 1] = refs;
1980 if (refs > 1) {
1981 ret = enter_shared_node(root, bytenr, refs,
1982 wc, *level - 1);
1983 if (ret > 0) {
1984 path->slots[*level]++;
1985 continue;
1989 next = btrfs_find_tree_block(root, bytenr, blocksize);
1990 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1991 free_extent_buffer(next);
1992 reada_walk_down(root, cur, path->slots[*level]);
1993 next = read_tree_block(root, bytenr, blocksize,
1994 ptr_gen);
1995 if (!extent_buffer_uptodate(next)) {
1996 struct btrfs_key node_key;
1998 btrfs_node_key_to_cpu(path->nodes[*level],
1999 &node_key,
2000 path->slots[*level]);
2001 btrfs_add_corrupt_extent_record(root->fs_info,
2002 &node_key,
2003 path->nodes[*level]->start,
2004 root->nodesize, *level);
2005 err = -EIO;
2006 goto out;
2010 ret = check_child_node(root, cur, path->slots[*level], next);
2011 if (ret) {
2012 err = ret;
2013 goto out;
2016 if (btrfs_is_leaf(next))
2017 status = btrfs_check_leaf(root, NULL, next);
2018 else
2019 status = btrfs_check_node(root, NULL, next);
2020 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2021 free_extent_buffer(next);
2022 err = -EIO;
2023 goto out;
2026 *level = *level - 1;
2027 free_extent_buffer(path->nodes[*level]);
2028 path->nodes[*level] = next;
2029 path->slots[*level] = 0;
2031 out:
2032 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2033 return err;
2036 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2037 struct walk_control *wc, int *level)
2039 int i;
2040 struct extent_buffer *leaf;
2042 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2043 leaf = path->nodes[i];
2044 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2045 path->slots[i]++;
2046 *level = i;
2047 return 0;
2048 } else {
2049 free_extent_buffer(path->nodes[*level]);
2050 path->nodes[*level] = NULL;
2051 BUG_ON(*level > wc->active_node);
2052 if (*level == wc->active_node)
2053 leave_shared_node(root, wc, *level);
2054 *level = i + 1;
2057 return 1;
2060 static int check_root_dir(struct inode_record *rec)
2062 struct inode_backref *backref;
2063 int ret = -1;
2065 if (!rec->found_inode_item || rec->errors)
2066 goto out;
2067 if (rec->nlink != 1 || rec->found_link != 0)
2068 goto out;
2069 if (list_empty(&rec->backrefs))
2070 goto out;
2071 backref = to_inode_backref(rec->backrefs.next);
2072 if (!backref->found_inode_ref)
2073 goto out;
2074 if (backref->index != 0 || backref->namelen != 2 ||
2075 memcmp(backref->name, "..", 2))
2076 goto out;
2077 if (backref->found_dir_index || backref->found_dir_item)
2078 goto out;
2079 ret = 0;
2080 out:
2081 return ret;
2084 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2085 struct btrfs_root *root, struct btrfs_path *path,
2086 struct inode_record *rec)
2088 struct btrfs_inode_item *ei;
2089 struct btrfs_key key;
2090 int ret;
2092 key.objectid = rec->ino;
2093 key.type = BTRFS_INODE_ITEM_KEY;
2094 key.offset = (u64)-1;
2096 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2097 if (ret < 0)
2098 goto out;
2099 if (ret) {
2100 if (!path->slots[0]) {
2101 ret = -ENOENT;
2102 goto out;
2104 path->slots[0]--;
2105 ret = 0;
2107 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2108 if (key.objectid != rec->ino) {
2109 ret = -ENOENT;
2110 goto out;
2113 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2114 struct btrfs_inode_item);
2115 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2116 btrfs_mark_buffer_dirty(path->nodes[0]);
2117 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2118 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2119 root->root_key.objectid);
2120 out:
2121 btrfs_release_path(path);
2122 return ret;
2125 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2126 struct btrfs_root *root,
2127 struct btrfs_path *path,
2128 struct inode_record *rec)
2130 int ret;
2132 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2133 btrfs_release_path(path);
2134 if (!ret)
2135 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2136 return ret;
2139 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2140 struct btrfs_root *root,
2141 struct btrfs_path *path,
2142 struct inode_record *rec)
2144 struct btrfs_inode_item *ei;
2145 struct btrfs_key key;
2146 int ret = 0;
2148 key.objectid = rec->ino;
2149 key.type = BTRFS_INODE_ITEM_KEY;
2150 key.offset = 0;
2152 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2153 if (ret) {
2154 if (ret > 0)
2155 ret = -ENOENT;
2156 goto out;
2159 /* Since ret == 0, no need to check anything */
2160 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2161 struct btrfs_inode_item);
2162 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2163 btrfs_mark_buffer_dirty(path->nodes[0]);
2164 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2165 printf("reset nbytes for ino %llu root %llu\n",
2166 rec->ino, root->root_key.objectid);
2167 out:
2168 btrfs_release_path(path);
2169 return ret;
2172 static int add_missing_dir_index(struct btrfs_root *root,
2173 struct cache_tree *inode_cache,
2174 struct inode_record *rec,
2175 struct inode_backref *backref)
2177 struct btrfs_path *path;
2178 struct btrfs_trans_handle *trans;
2179 struct btrfs_dir_item *dir_item;
2180 struct extent_buffer *leaf;
2181 struct btrfs_key key;
2182 struct btrfs_disk_key disk_key;
2183 struct inode_record *dir_rec;
2184 unsigned long name_ptr;
2185 u32 data_size = sizeof(*dir_item) + backref->namelen;
2186 int ret;
2188 path = btrfs_alloc_path();
2189 if (!path)
2190 return -ENOMEM;
2192 trans = btrfs_start_transaction(root, 1);
2193 if (IS_ERR(trans)) {
2194 btrfs_free_path(path);
2195 return PTR_ERR(trans);
2198 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2199 (unsigned long long)rec->ino);
2200 key.objectid = backref->dir;
2201 key.type = BTRFS_DIR_INDEX_KEY;
2202 key.offset = backref->index;
2204 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2205 BUG_ON(ret);
2207 leaf = path->nodes[0];
2208 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2210 disk_key.objectid = cpu_to_le64(rec->ino);
2211 disk_key.type = BTRFS_INODE_ITEM_KEY;
2212 disk_key.offset = 0;
2214 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2215 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2216 btrfs_set_dir_data_len(leaf, dir_item, 0);
2217 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2218 name_ptr = (unsigned long)(dir_item + 1);
2219 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2220 btrfs_mark_buffer_dirty(leaf);
2221 btrfs_free_path(path);
2222 btrfs_commit_transaction(trans, root);
2224 backref->found_dir_index = 1;
2225 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2226 BUG_ON(IS_ERR(dir_rec));
2227 if (!dir_rec)
2228 return 0;
2229 dir_rec->found_size += backref->namelen;
2230 if (dir_rec->found_size == dir_rec->isize &&
2231 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2232 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2233 if (dir_rec->found_size != dir_rec->isize)
2234 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2236 return 0;
2239 static int delete_dir_index(struct btrfs_root *root,
2240 struct cache_tree *inode_cache,
2241 struct inode_record *rec,
2242 struct inode_backref *backref)
2244 struct btrfs_trans_handle *trans;
2245 struct btrfs_dir_item *di;
2246 struct btrfs_path *path;
2247 int ret = 0;
2249 path = btrfs_alloc_path();
2250 if (!path)
2251 return -ENOMEM;
2253 trans = btrfs_start_transaction(root, 1);
2254 if (IS_ERR(trans)) {
2255 btrfs_free_path(path);
2256 return PTR_ERR(trans);
2260 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261 (unsigned long long)backref->dir,
2262 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263 (unsigned long long)root->objectid);
2265 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2266 backref->name, backref->namelen,
2267 backref->index, -1);
2268 if (IS_ERR(di)) {
2269 ret = PTR_ERR(di);
2270 btrfs_free_path(path);
2271 btrfs_commit_transaction(trans, root);
2272 if (ret == -ENOENT)
2273 return 0;
2274 return ret;
2277 if (!di)
2278 ret = btrfs_del_item(trans, root, path);
2279 else
2280 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2281 BUG_ON(ret);
2282 btrfs_free_path(path);
2283 btrfs_commit_transaction(trans, root);
2284 return ret;
2287 static int create_inode_item(struct btrfs_root *root,
2288 struct inode_record *rec,
2289 struct inode_backref *backref, int root_dir)
2291 struct btrfs_trans_handle *trans;
2292 struct btrfs_inode_item inode_item;
2293 time_t now = time(NULL);
2294 int ret;
2296 trans = btrfs_start_transaction(root, 1);
2297 if (IS_ERR(trans)) {
2298 ret = PTR_ERR(trans);
2299 return ret;
2302 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2303 "be incomplete, please check permissions and content after "
2304 "the fsck completes.\n", (unsigned long long)root->objectid,
2305 (unsigned long long)rec->ino);
2307 memset(&inode_item, 0, sizeof(inode_item));
2308 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2309 if (root_dir)
2310 btrfs_set_stack_inode_nlink(&inode_item, 1);
2311 else
2312 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2313 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2314 if (rec->found_dir_item) {
2315 if (rec->found_file_extent)
2316 fprintf(stderr, "root %llu inode %llu has both a dir "
2317 "item and extents, unsure if it is a dir or a "
2318 "regular file so setting it as a directory\n",
2319 (unsigned long long)root->objectid,
2320 (unsigned long long)rec->ino);
2321 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2322 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2323 } else if (!rec->found_dir_item) {
2324 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2325 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2328 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2329 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2334 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2337 BUG_ON(ret);
2338 btrfs_commit_transaction(trans, root);
2339 return 0;
2342 static int repair_inode_backrefs(struct btrfs_root *root,
2343 struct inode_record *rec,
2344 struct cache_tree *inode_cache,
2345 int delete)
2347 struct inode_backref *tmp, *backref;
2348 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2349 int ret = 0;
2350 int repaired = 0;
2352 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2353 if (!delete && rec->ino == root_dirid) {
2354 if (!rec->found_inode_item) {
2355 ret = create_inode_item(root, rec, backref, 1);
2356 if (ret)
2357 break;
2358 repaired++;
2362 /* Index 0 for root dir's are special, don't mess with it */
2363 if (rec->ino == root_dirid && backref->index == 0)
2364 continue;
2366 if (delete &&
2367 ((backref->found_dir_index && !backref->found_inode_ref) ||
2368 (backref->found_dir_index && backref->found_inode_ref &&
2369 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2370 ret = delete_dir_index(root, inode_cache, rec, backref);
2371 if (ret)
2372 break;
2373 repaired++;
2374 list_del(&backref->list);
2375 free(backref);
2378 if (!delete && !backref->found_dir_index &&
2379 backref->found_dir_item && backref->found_inode_ref) {
2380 ret = add_missing_dir_index(root, inode_cache, rec,
2381 backref);
2382 if (ret)
2383 break;
2384 repaired++;
2385 if (backref->found_dir_item &&
2386 backref->found_dir_index &&
2387 backref->found_dir_index) {
2388 if (!backref->errors &&
2389 backref->found_inode_ref) {
2390 list_del(&backref->list);
2391 free(backref);
2396 if (!delete && (!backref->found_dir_index &&
2397 !backref->found_dir_item &&
2398 backref->found_inode_ref)) {
2399 struct btrfs_trans_handle *trans;
2400 struct btrfs_key location;
2402 ret = check_dir_conflict(root, backref->name,
2403 backref->namelen,
2404 backref->dir,
2405 backref->index);
2406 if (ret) {
2408 * let nlink fixing routine to handle it,
2409 * which can do it better.
2411 ret = 0;
2412 break;
2414 location.objectid = rec->ino;
2415 location.type = BTRFS_INODE_ITEM_KEY;
2416 location.offset = 0;
2418 trans = btrfs_start_transaction(root, 1);
2419 if (IS_ERR(trans)) {
2420 ret = PTR_ERR(trans);
2421 break;
2423 fprintf(stderr, "adding missing dir index/item pair "
2424 "for inode %llu\n",
2425 (unsigned long long)rec->ino);
2426 ret = btrfs_insert_dir_item(trans, root, backref->name,
2427 backref->namelen,
2428 backref->dir, &location,
2429 imode_to_type(rec->imode),
2430 backref->index);
2431 BUG_ON(ret);
2432 btrfs_commit_transaction(trans, root);
2433 repaired++;
2436 if (!delete && (backref->found_inode_ref &&
2437 backref->found_dir_index &&
2438 backref->found_dir_item &&
2439 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2440 !rec->found_inode_item)) {
2441 ret = create_inode_item(root, rec, backref, 0);
2442 if (ret)
2443 break;
2444 repaired++;
2448 return ret ? ret : repaired;
2452 * To determine the file type for nlink/inode_item repair
2454 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2455 * Return -ENOENT if file type is not found.
2457 static int find_file_type(struct inode_record *rec, u8 *type)
2459 struct inode_backref *backref;
2461 /* For inode item recovered case */
2462 if (rec->found_inode_item) {
2463 *type = imode_to_type(rec->imode);
2464 return 0;
2467 list_for_each_entry(backref, &rec->backrefs, list) {
2468 if (backref->found_dir_index || backref->found_dir_item) {
2469 *type = backref->filetype;
2470 return 0;
2473 return -ENOENT;
2477 * To determine the file name for nlink repair
2479 * Return 0 if file name is found, set name and namelen.
2480 * Return -ENOENT if file name is not found.
2482 static int find_file_name(struct inode_record *rec,
2483 char *name, int *namelen)
2485 struct inode_backref *backref;
2487 list_for_each_entry(backref, &rec->backrefs, list) {
2488 if (backref->found_dir_index || backref->found_dir_item ||
2489 backref->found_inode_ref) {
2490 memcpy(name, backref->name, backref->namelen);
2491 *namelen = backref->namelen;
2492 return 0;
2495 return -ENOENT;
2498 /* Reset the nlink of the inode to the correct one */
2499 static int reset_nlink(struct btrfs_trans_handle *trans,
2500 struct btrfs_root *root,
2501 struct btrfs_path *path,
2502 struct inode_record *rec)
2504 struct inode_backref *backref;
2505 struct inode_backref *tmp;
2506 struct btrfs_key key;
2507 struct btrfs_inode_item *inode_item;
2508 int ret = 0;
2510 /* We don't believe this either, reset it and iterate backref */
2511 rec->found_link = 0;
2513 /* Remove all backref including the valid ones */
2514 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2515 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2516 backref->index, backref->name,
2517 backref->namelen, 0);
2518 if (ret < 0)
2519 goto out;
2521 /* remove invalid backref, so it won't be added back */
2522 if (!(backref->found_dir_index &&
2523 backref->found_dir_item &&
2524 backref->found_inode_ref)) {
2525 list_del(&backref->list);
2526 free(backref);
2527 } else {
2528 rec->found_link++;
2532 /* Set nlink to 0 */
2533 key.objectid = rec->ino;
2534 key.type = BTRFS_INODE_ITEM_KEY;
2535 key.offset = 0;
2536 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2537 if (ret < 0)
2538 goto out;
2539 if (ret > 0) {
2540 ret = -ENOENT;
2541 goto out;
2543 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2544 struct btrfs_inode_item);
2545 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2546 btrfs_mark_buffer_dirty(path->nodes[0]);
2547 btrfs_release_path(path);
2550 * Add back valid inode_ref/dir_item/dir_index,
2551 * add_link() will handle the nlink inc, so new nlink must be correct
2553 list_for_each_entry(backref, &rec->backrefs, list) {
2554 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2555 backref->name, backref->namelen,
2556 backref->filetype, &backref->index, 1);
2557 if (ret < 0)
2558 goto out;
2560 out:
2561 btrfs_release_path(path);
2562 return ret;
2565 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2566 struct btrfs_root *root,
2567 struct btrfs_path *path,
2568 struct inode_record *rec)
2570 char *dir_name = "lost+found";
2571 char namebuf[BTRFS_NAME_LEN] = {0};
2572 u64 lost_found_ino;
2573 u32 mode = 0700;
2574 u8 type = 0;
2575 int namelen = 0;
2576 int name_recovered = 0;
2577 int type_recovered = 0;
2578 int ret = 0;
2581 * Get file name and type first before these invalid inode ref
2582 * are deleted by remove_all_invalid_backref()
2584 name_recovered = !find_file_name(rec, namebuf, &namelen);
2585 type_recovered = !find_file_type(rec, &type);
2587 if (!name_recovered) {
2588 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2589 rec->ino, rec->ino);
2590 namelen = count_digits(rec->ino);
2591 sprintf(namebuf, "%llu", rec->ino);
2592 name_recovered = 1;
2594 if (!type_recovered) {
2595 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2596 rec->ino);
2597 type = BTRFS_FT_REG_FILE;
2598 type_recovered = 1;
2601 ret = reset_nlink(trans, root, path, rec);
2602 if (ret < 0) {
2603 fprintf(stderr,
2604 "Failed to reset nlink for inode %llu: %s\n",
2605 rec->ino, strerror(-ret));
2606 goto out;
2609 if (rec->found_link == 0) {
2610 lost_found_ino = root->highest_inode;
2611 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2612 ret = -EOVERFLOW;
2613 goto out;
2615 lost_found_ino++;
2616 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2617 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2618 mode);
2619 if (ret < 0) {
2620 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2621 dir_name, strerror(-ret));
2622 goto out;
2624 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2625 namebuf, namelen, type, NULL, 1);
2627 * Add ".INO" suffix several times to handle case where
2628 * "FILENAME.INO" is already taken by another file.
2630 while (ret == -EEXIST) {
2632 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634 if (namelen + count_digits(rec->ino) + 1 >
2635 BTRFS_NAME_LEN) {
2636 ret = -EFBIG;
2637 goto out;
2639 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2640 ".%llu", rec->ino);
2641 namelen += count_digits(rec->ino) + 1;
2642 ret = btrfs_add_link(trans, root, rec->ino,
2643 lost_found_ino, namebuf,
2644 namelen, type, NULL, 1);
2646 if (ret < 0) {
2647 fprintf(stderr,
2648 "Failed to link the inode %llu to %s dir: %s\n",
2649 rec->ino, dir_name, strerror(-ret));
2650 goto out;
2653 * Just increase the found_link, don't actually add the
2654 * backref. This will make things easier and this inode
2655 * record will be freed after the repair is done.
2656 * So fsck will not report problem about this inode.
2658 rec->found_link++;
2659 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2660 namelen, namebuf, dir_name);
2662 printf("Fixed the nlink of inode %llu\n", rec->ino);
2663 out:
2665 * Clear the flag anyway, or we will loop forever for the same inode
2666 * as it will not be removed from the bad inode list and the dead loop
2667 * happens.
2669 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2670 btrfs_release_path(path);
2671 return ret;
2675 * Check if there is any normal(reg or prealloc) file extent for given
2676 * ino.
2677 * This is used to determine the file type when neither its dir_index/item or
2678 * inode_item exists.
2680 * This will *NOT* report error, if any error happens, just consider it does
2681 * not have any normal file extent.
2683 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 struct btrfs_path *path;
2686 struct btrfs_key key;
2687 struct btrfs_key found_key;
2688 struct btrfs_file_extent_item *fi;
2689 u8 type;
2690 int ret = 0;
2692 path = btrfs_alloc_path();
2693 if (!path)
2694 goto out;
2695 key.objectid = ino;
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2697 key.offset = 0;
2699 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2700 if (ret < 0) {
2701 ret = 0;
2702 goto out;
2704 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2705 ret = btrfs_next_leaf(root, path);
2706 if (ret) {
2707 ret = 0;
2708 goto out;
2711 while (1) {
2712 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2713 path->slots[0]);
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2716 break;
2717 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path->nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721 ret = 1;
2722 goto out;
2725 out:
2726 btrfs_free_path(path);
2727 return ret;
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2750 u8 filetype;
2751 u32 mode = 0700;
2752 int type_recovered = 0;
2753 int ret = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2767 * TODO:
2768 * 1. If found backref(inode_index/item is already handled) to it,
2769 * it must be DIR.
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2775 type_recovered = 1;
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2778 type_recovered = 1;
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2781 type_recovered = 1;
2782 filetype = BTRFS_FT_REG_FILE;
2783 } else{
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785 rec->ino);
2786 type_recovered = 1;
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2793 if (ret < 0)
2794 goto out;
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2805 rec->nlink = 0;
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810 return ret;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2820 int ret = 0;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2833 if (ret < 0)
2834 goto out;
2835 if (!ret) {
2836 fprintf(stderr,
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2843 orphan->offset);
2844 if (ret < 0)
2845 goto out;
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2850 if (ret < 0)
2851 goto out;
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860 orphan->disk_len);
2861 if (ret < 0)
2862 goto out;
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2867 free(orphan);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871 return ret;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2881 int found = 0;
2882 int ret = 0;
2884 node = rb_first(&rec->holes);
2886 while (node) {
2887 found = 1;
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2891 if (ret < 0)
2892 goto out;
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2894 hole->len);
2895 if (ret < 0)
2896 goto out;
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2902 if (!found) {
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2905 if (ret < 0)
2906 goto out;
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2910 out:
2911 return ret;
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path *path;
2918 int ret = 0;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2927 return rec->errors;
2929 path = btrfs_alloc_path();
2930 if (!path)
2931 return -ENOMEM;
2934 * For nlink repair, it may create a dir and add link, so
2935 * 2 for parent(256)'s dir_index and dir_item
2936 * 2 for lost+found dir's inode_item and inode_ref
2937 * 1 for the new inode_ref of the file
2938 * 2 for lost+found dir's dir_index and dir_item for the file
2940 trans = btrfs_start_transaction(root, 7);
2941 if (IS_ERR(trans)) {
2942 btrfs_free_path(path);
2943 return PTR_ERR(trans);
2946 if (rec->errors & I_ERR_NO_INODE_ITEM)
2947 ret = repair_inode_no_item(trans, root, path, rec);
2948 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2949 ret = repair_inode_orphan_extent(trans, root, path, rec);
2950 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2951 ret = repair_inode_discount_extent(trans, root, path, rec);
2952 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2953 ret = repair_inode_isize(trans, root, path, rec);
2954 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2955 ret = repair_inode_orphan_item(trans, root, path, rec);
2956 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2957 ret = repair_inode_nlinks(trans, root, path, rec);
2958 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2959 ret = repair_inode_nbytes(trans, root, path, rec);
2960 btrfs_commit_transaction(trans, root);
2961 btrfs_free_path(path);
2962 return ret;
2965 static int check_inode_recs(struct btrfs_root *root,
2966 struct cache_tree *inode_cache)
2968 struct cache_extent *cache;
2969 struct ptr_node *node;
2970 struct inode_record *rec;
2971 struct inode_backref *backref;
2972 int stage = 0;
2973 int ret = 0;
2974 int err = 0;
2975 u64 error = 0;
2976 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2978 if (btrfs_root_refs(&root->root_item) == 0) {
2979 if (!cache_tree_empty(inode_cache))
2980 fprintf(stderr, "warning line %d\n", __LINE__);
2981 return 0;
2985 * We need to record the highest inode number for later 'lost+found'
2986 * dir creation.
2987 * We must select an ino not used/referred by any existing inode, or
2988 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2989 * this may cause 'lost+found' dir has wrong nlinks.
2991 cache = last_cache_extent(inode_cache);
2992 if (cache) {
2993 node = container_of(cache, struct ptr_node, cache);
2994 rec = node->data;
2995 if (rec->ino > root->highest_inode)
2996 root->highest_inode = rec->ino;
3000 * We need to repair backrefs first because we could change some of the
3001 * errors in the inode recs.
3003 * We also need to go through and delete invalid backrefs first and then
3004 * add the correct ones second. We do this because we may get EEXIST
3005 * when adding back the correct index because we hadn't yet deleted the
3006 * invalid index.
3008 * For example, if we were missing a dir index then the directories
3009 * isize would be wrong, so if we fixed the isize to what we thought it
3010 * would be and then fixed the backref we'd still have a invalid fs, so
3011 * we need to add back the dir index and then check to see if the isize
3012 * is still wrong.
3014 while (stage < 3) {
3015 stage++;
3016 if (stage == 3 && !err)
3017 break;
3019 cache = search_cache_extent(inode_cache, 0);
3020 while (repair && cache) {
3021 node = container_of(cache, struct ptr_node, cache);
3022 rec = node->data;
3023 cache = next_cache_extent(cache);
3025 /* Need to free everything up and rescan */
3026 if (stage == 3) {
3027 remove_cache_extent(inode_cache, &node->cache);
3028 free(node);
3029 free_inode_rec(rec);
3030 continue;
3033 if (list_empty(&rec->backrefs))
3034 continue;
3036 ret = repair_inode_backrefs(root, rec, inode_cache,
3037 stage == 1);
3038 if (ret < 0) {
3039 err = ret;
3040 stage = 2;
3041 break;
3042 } if (ret > 0) {
3043 err = -EAGAIN;
3047 if (err)
3048 return err;
3050 rec = get_inode_rec(inode_cache, root_dirid, 0);
3051 BUG_ON(IS_ERR(rec));
3052 if (rec) {
3053 ret = check_root_dir(rec);
3054 if (ret) {
3055 fprintf(stderr, "root %llu root dir %llu error\n",
3056 (unsigned long long)root->root_key.objectid,
3057 (unsigned long long)root_dirid);
3058 print_inode_error(root, rec);
3059 error++;
3061 } else {
3062 if (repair) {
3063 struct btrfs_trans_handle *trans;
3065 trans = btrfs_start_transaction(root, 1);
3066 if (IS_ERR(trans)) {
3067 err = PTR_ERR(trans);
3068 return err;
3071 fprintf(stderr,
3072 "root %llu missing its root dir, recreating\n",
3073 (unsigned long long)root->objectid);
3075 ret = btrfs_make_root_dir(trans, root, root_dirid);
3076 BUG_ON(ret);
3078 btrfs_commit_transaction(trans, root);
3079 return -EAGAIN;
3082 fprintf(stderr, "root %llu root dir %llu not found\n",
3083 (unsigned long long)root->root_key.objectid,
3084 (unsigned long long)root_dirid);
3087 while (1) {
3088 cache = search_cache_extent(inode_cache, 0);
3089 if (!cache)
3090 break;
3091 node = container_of(cache, struct ptr_node, cache);
3092 rec = node->data;
3093 remove_cache_extent(inode_cache, &node->cache);
3094 free(node);
3095 if (rec->ino == root_dirid ||
3096 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3097 free_inode_rec(rec);
3098 continue;
3101 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3102 ret = check_orphan_item(root, rec->ino);
3103 if (ret == 0)
3104 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3105 if (can_free_inode_rec(rec)) {
3106 free_inode_rec(rec);
3107 continue;
3111 if (!rec->found_inode_item)
3112 rec->errors |= I_ERR_NO_INODE_ITEM;
3113 if (rec->found_link != rec->nlink)
3114 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3115 if (repair) {
3116 ret = try_repair_inode(root, rec);
3117 if (ret == 0 && can_free_inode_rec(rec)) {
3118 free_inode_rec(rec);
3119 continue;
3121 ret = 0;
3124 if (!(repair && ret == 0))
3125 error++;
3126 print_inode_error(root, rec);
3127 list_for_each_entry(backref, &rec->backrefs, list) {
3128 if (!backref->found_dir_item)
3129 backref->errors |= REF_ERR_NO_DIR_ITEM;
3130 if (!backref->found_dir_index)
3131 backref->errors |= REF_ERR_NO_DIR_INDEX;
3132 if (!backref->found_inode_ref)
3133 backref->errors |= REF_ERR_NO_INODE_REF;
3134 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3135 " namelen %u name %s filetype %d errors %x",
3136 (unsigned long long)backref->dir,
3137 (unsigned long long)backref->index,
3138 backref->namelen, backref->name,
3139 backref->filetype, backref->errors);
3140 print_ref_error(backref->errors);
3142 free_inode_rec(rec);
3144 return (error > 0) ? -1 : 0;
3147 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3148 u64 objectid)
3150 struct cache_extent *cache;
3151 struct root_record *rec = NULL;
3152 int ret;
3154 cache = lookup_cache_extent(root_cache, objectid, 1);
3155 if (cache) {
3156 rec = container_of(cache, struct root_record, cache);
3157 } else {
3158 rec = calloc(1, sizeof(*rec));
3159 if (!rec)
3160 return ERR_PTR(-ENOMEM);
3161 rec->objectid = objectid;
3162 INIT_LIST_HEAD(&rec->backrefs);
3163 rec->cache.start = objectid;
3164 rec->cache.size = 1;
3166 ret = insert_cache_extent(root_cache, &rec->cache);
3167 if (ret)
3168 return ERR_PTR(-EEXIST);
3170 return rec;
3173 static struct root_backref *get_root_backref(struct root_record *rec,
3174 u64 ref_root, u64 dir, u64 index,
3175 const char *name, int namelen)
3177 struct root_backref *backref;
3179 list_for_each_entry(backref, &rec->backrefs, list) {
3180 if (backref->ref_root != ref_root || backref->dir != dir ||
3181 backref->namelen != namelen)
3182 continue;
3183 if (memcmp(name, backref->name, namelen))
3184 continue;
3185 return backref;
3188 backref = calloc(1, sizeof(*backref) + namelen + 1);
3189 if (!backref)
3190 return NULL;
3191 backref->ref_root = ref_root;
3192 backref->dir = dir;
3193 backref->index = index;
3194 backref->namelen = namelen;
3195 memcpy(backref->name, name, namelen);
3196 backref->name[namelen] = '\0';
3197 list_add_tail(&backref->list, &rec->backrefs);
3198 return backref;
3201 static void free_root_record(struct cache_extent *cache)
3203 struct root_record *rec;
3204 struct root_backref *backref;
3206 rec = container_of(cache, struct root_record, cache);
3207 while (!list_empty(&rec->backrefs)) {
3208 backref = to_root_backref(rec->backrefs.next);
3209 list_del(&backref->list);
3210 free(backref);
3213 kfree(rec);
3216 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3218 static int add_root_backref(struct cache_tree *root_cache,
3219 u64 root_id, u64 ref_root, u64 dir, u64 index,
3220 const char *name, int namelen,
3221 int item_type, int errors)
3223 struct root_record *rec;
3224 struct root_backref *backref;
3226 rec = get_root_rec(root_cache, root_id);
3227 BUG_ON(IS_ERR(rec));
3228 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3229 BUG_ON(!backref);
3231 backref->errors |= errors;
3233 if (item_type != BTRFS_DIR_ITEM_KEY) {
3234 if (backref->found_dir_index || backref->found_back_ref ||
3235 backref->found_forward_ref) {
3236 if (backref->index != index)
3237 backref->errors |= REF_ERR_INDEX_UNMATCH;
3238 } else {
3239 backref->index = index;
3243 if (item_type == BTRFS_DIR_ITEM_KEY) {
3244 if (backref->found_forward_ref)
3245 rec->found_ref++;
3246 backref->found_dir_item = 1;
3247 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3248 backref->found_dir_index = 1;
3249 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3250 if (backref->found_forward_ref)
3251 backref->errors |= REF_ERR_DUP_ROOT_REF;
3252 else if (backref->found_dir_item)
3253 rec->found_ref++;
3254 backref->found_forward_ref = 1;
3255 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3256 if (backref->found_back_ref)
3257 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3258 backref->found_back_ref = 1;
3259 } else {
3260 BUG_ON(1);
3263 if (backref->found_forward_ref && backref->found_dir_item)
3264 backref->reachable = 1;
3265 return 0;
3268 static int merge_root_recs(struct btrfs_root *root,
3269 struct cache_tree *src_cache,
3270 struct cache_tree *dst_cache)
3272 struct cache_extent *cache;
3273 struct ptr_node *node;
3274 struct inode_record *rec;
3275 struct inode_backref *backref;
3276 int ret = 0;
3278 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3279 free_inode_recs_tree(src_cache);
3280 return 0;
3283 while (1) {
3284 cache = search_cache_extent(src_cache, 0);
3285 if (!cache)
3286 break;
3287 node = container_of(cache, struct ptr_node, cache);
3288 rec = node->data;
3289 remove_cache_extent(src_cache, &node->cache);
3290 free(node);
3292 ret = is_child_root(root, root->objectid, rec->ino);
3293 if (ret < 0)
3294 break;
3295 else if (ret == 0)
3296 goto skip;
3298 list_for_each_entry(backref, &rec->backrefs, list) {
3299 BUG_ON(backref->found_inode_ref);
3300 if (backref->found_dir_item)
3301 add_root_backref(dst_cache, rec->ino,
3302 root->root_key.objectid, backref->dir,
3303 backref->index, backref->name,
3304 backref->namelen, BTRFS_DIR_ITEM_KEY,
3305 backref->errors);
3306 if (backref->found_dir_index)
3307 add_root_backref(dst_cache, rec->ino,
3308 root->root_key.objectid, backref->dir,
3309 backref->index, backref->name,
3310 backref->namelen, BTRFS_DIR_INDEX_KEY,
3311 backref->errors);
3313 skip:
3314 free_inode_rec(rec);
3316 if (ret < 0)
3317 return ret;
3318 return 0;
3321 static int check_root_refs(struct btrfs_root *root,
3322 struct cache_tree *root_cache)
3324 struct root_record *rec;
3325 struct root_record *ref_root;
3326 struct root_backref *backref;
3327 struct cache_extent *cache;
3328 int loop = 1;
3329 int ret;
3330 int error;
3331 int errors = 0;
3333 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3334 BUG_ON(IS_ERR(rec));
3335 rec->found_ref = 1;
3337 /* fixme: this can not detect circular references */
3338 while (loop) {
3339 loop = 0;
3340 cache = search_cache_extent(root_cache, 0);
3341 while (1) {
3342 if (!cache)
3343 break;
3344 rec = container_of(cache, struct root_record, cache);
3345 cache = next_cache_extent(cache);
3347 if (rec->found_ref == 0)
3348 continue;
3350 list_for_each_entry(backref, &rec->backrefs, list) {
3351 if (!backref->reachable)
3352 continue;
3354 ref_root = get_root_rec(root_cache,
3355 backref->ref_root);
3356 BUG_ON(IS_ERR(ref_root));
3357 if (ref_root->found_ref > 0)
3358 continue;
3360 backref->reachable = 0;
3361 rec->found_ref--;
3362 if (rec->found_ref == 0)
3363 loop = 1;
3368 cache = search_cache_extent(root_cache, 0);
3369 while (1) {
3370 if (!cache)
3371 break;
3372 rec = container_of(cache, struct root_record, cache);
3373 cache = next_cache_extent(cache);
3375 if (rec->found_ref == 0 &&
3376 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3377 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3378 ret = check_orphan_item(root->fs_info->tree_root,
3379 rec->objectid);
3380 if (ret == 0)
3381 continue;
3384 * If we don't have a root item then we likely just have
3385 * a dir item in a snapshot for this root but no actual
3386 * ref key or anything so it's meaningless.
3388 if (!rec->found_root_item)
3389 continue;
3390 errors++;
3391 fprintf(stderr, "fs tree %llu not referenced\n",
3392 (unsigned long long)rec->objectid);
3395 error = 0;
3396 if (rec->found_ref > 0 && !rec->found_root_item)
3397 error = 1;
3398 list_for_each_entry(backref, &rec->backrefs, list) {
3399 if (!backref->found_dir_item)
3400 backref->errors |= REF_ERR_NO_DIR_ITEM;
3401 if (!backref->found_dir_index)
3402 backref->errors |= REF_ERR_NO_DIR_INDEX;
3403 if (!backref->found_back_ref)
3404 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3405 if (!backref->found_forward_ref)
3406 backref->errors |= REF_ERR_NO_ROOT_REF;
3407 if (backref->reachable && backref->errors)
3408 error = 1;
3410 if (!error)
3411 continue;
3413 errors++;
3414 fprintf(stderr, "fs tree %llu refs %u %s\n",
3415 (unsigned long long)rec->objectid, rec->found_ref,
3416 rec->found_root_item ? "" : "not found");
3418 list_for_each_entry(backref, &rec->backrefs, list) {
3419 if (!backref->reachable)
3420 continue;
3421 if (!backref->errors && rec->found_root_item)
3422 continue;
3423 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3424 " index %llu namelen %u name %s errors %x\n",
3425 (unsigned long long)backref->ref_root,
3426 (unsigned long long)backref->dir,
3427 (unsigned long long)backref->index,
3428 backref->namelen, backref->name,
3429 backref->errors);
3430 print_ref_error(backref->errors);
3433 return errors > 0 ? 1 : 0;
3436 static int process_root_ref(struct extent_buffer *eb, int slot,
3437 struct btrfs_key *key,
3438 struct cache_tree *root_cache)
3440 u64 dirid;
3441 u64 index;
3442 u32 len;
3443 u32 name_len;
3444 struct btrfs_root_ref *ref;
3445 char namebuf[BTRFS_NAME_LEN];
3446 int error;
3448 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3450 dirid = btrfs_root_ref_dirid(eb, ref);
3451 index = btrfs_root_ref_sequence(eb, ref);
3452 name_len = btrfs_root_ref_name_len(eb, ref);
3454 if (name_len <= BTRFS_NAME_LEN) {
3455 len = name_len;
3456 error = 0;
3457 } else {
3458 len = BTRFS_NAME_LEN;
3459 error = REF_ERR_NAME_TOO_LONG;
3461 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3463 if (key->type == BTRFS_ROOT_REF_KEY) {
3464 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3465 index, namebuf, len, key->type, error);
3466 } else {
3467 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3468 index, namebuf, len, key->type, error);
3470 return 0;
3473 static void free_corrupt_block(struct cache_extent *cache)
3475 struct btrfs_corrupt_block *corrupt;
3477 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3478 free(corrupt);
3481 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3484 * Repair the btree of the given root.
3486 * The fix is to remove the node key in corrupt_blocks cache_tree.
3487 * and rebalance the tree.
3488 * After the fix, the btree should be writeable.
3490 static int repair_btree(struct btrfs_root *root,
3491 struct cache_tree *corrupt_blocks)
3493 struct btrfs_trans_handle *trans;
3494 struct btrfs_path *path;
3495 struct btrfs_corrupt_block *corrupt;
3496 struct cache_extent *cache;
3497 struct btrfs_key key;
3498 u64 offset;
3499 int level;
3500 int ret = 0;
3502 if (cache_tree_empty(corrupt_blocks))
3503 return 0;
3505 path = btrfs_alloc_path();
3506 if (!path)
3507 return -ENOMEM;
3509 trans = btrfs_start_transaction(root, 1);
3510 if (IS_ERR(trans)) {
3511 ret = PTR_ERR(trans);
3512 fprintf(stderr, "Error starting transaction: %s\n",
3513 strerror(-ret));
3514 goto out_free_path;
3516 cache = first_cache_extent(corrupt_blocks);
3517 while (cache) {
3518 corrupt = container_of(cache, struct btrfs_corrupt_block,
3519 cache);
3520 level = corrupt->level;
3521 path->lowest_level = level;
3522 key.objectid = corrupt->key.objectid;
3523 key.type = corrupt->key.type;
3524 key.offset = corrupt->key.offset;
3527 * Here we don't want to do any tree balance, since it may
3528 * cause a balance with corrupted brother leaf/node,
3529 * so ins_len set to 0 here.
3530 * Balance will be done after all corrupt node/leaf is deleted.
3532 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3533 if (ret < 0)
3534 goto out;
3535 offset = btrfs_node_blockptr(path->nodes[level],
3536 path->slots[level]);
3538 /* Remove the ptr */
3539 ret = btrfs_del_ptr(trans, root, path, level,
3540 path->slots[level]);
3541 if (ret < 0)
3542 goto out;
3544 * Remove the corresponding extent
3545 * return value is not concerned.
3547 btrfs_release_path(path);
3548 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3549 0, root->root_key.objectid,
3550 level - 1, 0);
3551 cache = next_cache_extent(cache);
3554 /* Balance the btree using btrfs_search_slot() */
3555 cache = first_cache_extent(corrupt_blocks);
3556 while (cache) {
3557 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558 cache);
3559 memcpy(&key, &corrupt->key, sizeof(key));
3560 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3561 if (ret < 0)
3562 goto out;
3563 /* return will always >0 since it won't find the item */
3564 ret = 0;
3565 btrfs_release_path(path);
3566 cache = next_cache_extent(cache);
3568 out:
3569 btrfs_commit_transaction(trans, root);
3570 out_free_path:
3571 btrfs_free_path(path);
3572 return ret;
3575 static int check_fs_root(struct btrfs_root *root,
3576 struct cache_tree *root_cache,
3577 struct walk_control *wc)
3579 int ret = 0;
3580 int err = 0;
3581 int wret;
3582 int level;
3583 struct btrfs_path path;
3584 struct shared_node root_node;
3585 struct root_record *rec;
3586 struct btrfs_root_item *root_item = &root->root_item;
3587 struct cache_tree corrupt_blocks;
3588 struct orphan_data_extent *orphan;
3589 struct orphan_data_extent *tmp;
3590 enum btrfs_tree_block_status status;
3591 struct node_refs nrefs;
3594 * Reuse the corrupt_block cache tree to record corrupted tree block
3596 * Unlike the usage in extent tree check, here we do it in a per
3597 * fs/subvol tree base.
3599 cache_tree_init(&corrupt_blocks);
3600 root->fs_info->corrupt_blocks = &corrupt_blocks;
3602 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3603 rec = get_root_rec(root_cache, root->root_key.objectid);
3604 BUG_ON(IS_ERR(rec));
3605 if (btrfs_root_refs(root_item) > 0)
3606 rec->found_root_item = 1;
3609 btrfs_init_path(&path);
3610 memset(&root_node, 0, sizeof(root_node));
3611 cache_tree_init(&root_node.root_cache);
3612 cache_tree_init(&root_node.inode_cache);
3613 memset(&nrefs, 0, sizeof(nrefs));
3615 /* Move the orphan extent record to corresponding inode_record */
3616 list_for_each_entry_safe(orphan, tmp,
3617 &root->orphan_data_extents, list) {
3618 struct inode_record *inode;
3620 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3622 BUG_ON(IS_ERR(inode));
3623 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3624 list_move(&orphan->list, &inode->orphan_extents);
3627 level = btrfs_header_level(root->node);
3628 memset(wc->nodes, 0, sizeof(wc->nodes));
3629 wc->nodes[level] = &root_node;
3630 wc->active_node = level;
3631 wc->root_level = level;
3633 /* We may not have checked the root block, lets do that now */
3634 if (btrfs_is_leaf(root->node))
3635 status = btrfs_check_leaf(root, NULL, root->node);
3636 else
3637 status = btrfs_check_node(root, NULL, root->node);
3638 if (status != BTRFS_TREE_BLOCK_CLEAN)
3639 return -EIO;
3641 if (btrfs_root_refs(root_item) > 0 ||
3642 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3643 path.nodes[level] = root->node;
3644 extent_buffer_get(root->node);
3645 path.slots[level] = 0;
3646 } else {
3647 struct btrfs_key key;
3648 struct btrfs_disk_key found_key;
3650 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3651 level = root_item->drop_level;
3652 path.lowest_level = level;
3653 if (level > btrfs_header_level(root->node) ||
3654 level >= BTRFS_MAX_LEVEL) {
3655 error("ignoring invalid drop level: %u", level);
3656 goto skip_walking;
3658 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3659 if (wret < 0)
3660 goto skip_walking;
3661 btrfs_node_key(path.nodes[level], &found_key,
3662 path.slots[level]);
3663 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3664 sizeof(found_key)));
3667 while (1) {
3668 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3669 if (wret < 0)
3670 ret = wret;
3671 if (wret != 0)
3672 break;
3674 wret = walk_up_tree(root, &path, wc, &level);
3675 if (wret < 0)
3676 ret = wret;
3677 if (wret != 0)
3678 break;
3680 skip_walking:
3681 btrfs_release_path(&path);
3683 if (!cache_tree_empty(&corrupt_blocks)) {
3684 struct cache_extent *cache;
3685 struct btrfs_corrupt_block *corrupt;
3687 printf("The following tree block(s) is corrupted in tree %llu:\n",
3688 root->root_key.objectid);
3689 cache = first_cache_extent(&corrupt_blocks);
3690 while (cache) {
3691 corrupt = container_of(cache,
3692 struct btrfs_corrupt_block,
3693 cache);
3694 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3695 cache->start, corrupt->level,
3696 corrupt->key.objectid, corrupt->key.type,
3697 corrupt->key.offset);
3698 cache = next_cache_extent(cache);
3700 if (repair) {
3701 printf("Try to repair the btree for root %llu\n",
3702 root->root_key.objectid);
3703 ret = repair_btree(root, &corrupt_blocks);
3704 if (ret < 0)
3705 fprintf(stderr, "Failed to repair btree: %s\n",
3706 strerror(-ret));
3707 if (!ret)
3708 printf("Btree for root %llu is fixed\n",
3709 root->root_key.objectid);
3713 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3714 if (err < 0)
3715 ret = err;
3717 if (root_node.current) {
3718 root_node.current->checked = 1;
3719 maybe_free_inode_rec(&root_node.inode_cache,
3720 root_node.current);
3723 err = check_inode_recs(root, &root_node.inode_cache);
3724 if (!ret)
3725 ret = err;
3727 free_corrupt_blocks_tree(&corrupt_blocks);
3728 root->fs_info->corrupt_blocks = NULL;
3729 free_orphan_data_extents(&root->orphan_data_extents);
3730 return ret;
3733 static int fs_root_objectid(u64 objectid)
3735 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3736 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3737 return 1;
3738 return is_fstree(objectid);
3741 static int check_fs_roots(struct btrfs_root *root,
3742 struct cache_tree *root_cache)
3744 struct btrfs_path path;
3745 struct btrfs_key key;
3746 struct walk_control wc;
3747 struct extent_buffer *leaf, *tree_node;
3748 struct btrfs_root *tmp_root;
3749 struct btrfs_root *tree_root = root->fs_info->tree_root;
3750 int ret;
3751 int err = 0;
3753 if (ctx.progress_enabled) {
3754 ctx.tp = TASK_FS_ROOTS;
3755 task_start(ctx.info);
3759 * Just in case we made any changes to the extent tree that weren't
3760 * reflected into the free space cache yet.
3762 if (repair)
3763 reset_cached_block_groups(root->fs_info);
3764 memset(&wc, 0, sizeof(wc));
3765 cache_tree_init(&wc.shared);
3766 btrfs_init_path(&path);
3768 again:
3769 key.offset = 0;
3770 key.objectid = 0;
3771 key.type = BTRFS_ROOT_ITEM_KEY;
3772 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3773 if (ret < 0) {
3774 err = 1;
3775 goto out;
3777 tree_node = tree_root->node;
3778 while (1) {
3779 if (tree_node != tree_root->node) {
3780 free_root_recs_tree(root_cache);
3781 btrfs_release_path(&path);
3782 goto again;
3784 leaf = path.nodes[0];
3785 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3786 ret = btrfs_next_leaf(tree_root, &path);
3787 if (ret) {
3788 if (ret < 0)
3789 err = 1;
3790 break;
3792 leaf = path.nodes[0];
3794 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3795 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3796 fs_root_objectid(key.objectid)) {
3797 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3798 tmp_root = btrfs_read_fs_root_no_cache(
3799 root->fs_info, &key);
3800 } else {
3801 key.offset = (u64)-1;
3802 tmp_root = btrfs_read_fs_root(
3803 root->fs_info, &key);
3805 if (IS_ERR(tmp_root)) {
3806 err = 1;
3807 goto next;
3809 ret = check_fs_root(tmp_root, root_cache, &wc);
3810 if (ret == -EAGAIN) {
3811 free_root_recs_tree(root_cache);
3812 btrfs_release_path(&path);
3813 goto again;
3815 if (ret)
3816 err = 1;
3817 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3818 btrfs_free_fs_root(tmp_root);
3819 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3820 key.type == BTRFS_ROOT_BACKREF_KEY) {
3821 process_root_ref(leaf, path.slots[0], &key,
3822 root_cache);
3824 next:
3825 path.slots[0]++;
3827 out:
3828 btrfs_release_path(&path);
3829 if (err)
3830 free_extent_cache_tree(&wc.shared);
3831 if (!cache_tree_empty(&wc.shared))
3832 fprintf(stderr, "warning line %d\n", __LINE__);
3834 task_stop(ctx.info);
3836 return err;
3839 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3841 struct list_head *cur = rec->backrefs.next;
3842 struct extent_backref *back;
3843 struct tree_backref *tback;
3844 struct data_backref *dback;
3845 u64 found = 0;
3846 int err = 0;
3848 while(cur != &rec->backrefs) {
3849 back = to_extent_backref(cur);
3850 cur = cur->next;
3851 if (!back->found_extent_tree) {
3852 err = 1;
3853 if (!print_errs)
3854 goto out;
3855 if (back->is_data) {
3856 dback = to_data_backref(back);
3857 fprintf(stderr, "Backref %llu %s %llu"
3858 " owner %llu offset %llu num_refs %lu"
3859 " not found in extent tree\n",
3860 (unsigned long long)rec->start,
3861 back->full_backref ?
3862 "parent" : "root",
3863 back->full_backref ?
3864 (unsigned long long)dback->parent:
3865 (unsigned long long)dback->root,
3866 (unsigned long long)dback->owner,
3867 (unsigned long long)dback->offset,
3868 (unsigned long)dback->num_refs);
3869 } else {
3870 tback = to_tree_backref(back);
3871 fprintf(stderr, "Backref %llu parent %llu"
3872 " root %llu not found in extent tree\n",
3873 (unsigned long long)rec->start,
3874 (unsigned long long)tback->parent,
3875 (unsigned long long)tback->root);
3878 if (!back->is_data && !back->found_ref) {
3879 err = 1;
3880 if (!print_errs)
3881 goto out;
3882 tback = to_tree_backref(back);
3883 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3884 (unsigned long long)rec->start,
3885 back->full_backref ? "parent" : "root",
3886 back->full_backref ?
3887 (unsigned long long)tback->parent :
3888 (unsigned long long)tback->root, back);
3890 if (back->is_data) {
3891 dback = to_data_backref(back);
3892 if (dback->found_ref != dback->num_refs) {
3893 err = 1;
3894 if (!print_errs)
3895 goto out;
3896 fprintf(stderr, "Incorrect local backref count"
3897 " on %llu %s %llu owner %llu"
3898 " offset %llu found %u wanted %u back %p\n",
3899 (unsigned long long)rec->start,
3900 back->full_backref ?
3901 "parent" : "root",
3902 back->full_backref ?
3903 (unsigned long long)dback->parent:
3904 (unsigned long long)dback->root,
3905 (unsigned long long)dback->owner,
3906 (unsigned long long)dback->offset,
3907 dback->found_ref, dback->num_refs, back);
3909 if (dback->disk_bytenr != rec->start) {
3910 err = 1;
3911 if (!print_errs)
3912 goto out;
3913 fprintf(stderr, "Backref disk bytenr does not"
3914 " match extent record, bytenr=%llu, "
3915 "ref bytenr=%llu\n",
3916 (unsigned long long)rec->start,
3917 (unsigned long long)dback->disk_bytenr);
3920 if (dback->bytes != rec->nr) {
3921 err = 1;
3922 if (!print_errs)
3923 goto out;
3924 fprintf(stderr, "Backref bytes do not match "
3925 "extent backref, bytenr=%llu, ref "
3926 "bytes=%llu, backref bytes=%llu\n",
3927 (unsigned long long)rec->start,
3928 (unsigned long long)rec->nr,
3929 (unsigned long long)dback->bytes);
3932 if (!back->is_data) {
3933 found += 1;
3934 } else {
3935 dback = to_data_backref(back);
3936 found += dback->found_ref;
3939 if (found != rec->refs) {
3940 err = 1;
3941 if (!print_errs)
3942 goto out;
3943 fprintf(stderr, "Incorrect global backref count "
3944 "on %llu found %llu wanted %llu\n",
3945 (unsigned long long)rec->start,
3946 (unsigned long long)found,
3947 (unsigned long long)rec->refs);
3949 out:
3950 return err;
3953 static int free_all_extent_backrefs(struct extent_record *rec)
3955 struct extent_backref *back;
3956 struct list_head *cur;
3957 while (!list_empty(&rec->backrefs)) {
3958 cur = rec->backrefs.next;
3959 back = to_extent_backref(cur);
3960 list_del(cur);
3961 free(back);
3963 return 0;
3966 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3967 struct cache_tree *extent_cache)
3969 struct cache_extent *cache;
3970 struct extent_record *rec;
3972 while (1) {
3973 cache = first_cache_extent(extent_cache);
3974 if (!cache)
3975 break;
3976 rec = container_of(cache, struct extent_record, cache);
3977 remove_cache_extent(extent_cache, cache);
3978 free_all_extent_backrefs(rec);
3979 free(rec);
3983 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3984 struct extent_record *rec)
3986 if (rec->content_checked && rec->owner_ref_checked &&
3987 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3988 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3989 !rec->bad_full_backref && !rec->crossing_stripes &&
3990 !rec->wrong_chunk_type) {
3991 remove_cache_extent(extent_cache, &rec->cache);
3992 free_all_extent_backrefs(rec);
3993 list_del_init(&rec->list);
3994 free(rec);
3996 return 0;
3999 static int check_owner_ref(struct btrfs_root *root,
4000 struct extent_record *rec,
4001 struct extent_buffer *buf)
4003 struct extent_backref *node;
4004 struct tree_backref *back;
4005 struct btrfs_root *ref_root;
4006 struct btrfs_key key;
4007 struct btrfs_path path;
4008 struct extent_buffer *parent;
4009 int level;
4010 int found = 0;
4011 int ret;
4013 list_for_each_entry(node, &rec->backrefs, list) {
4014 if (node->is_data)
4015 continue;
4016 if (!node->found_ref)
4017 continue;
4018 if (node->full_backref)
4019 continue;
4020 back = to_tree_backref(node);
4021 if (btrfs_header_owner(buf) == back->root)
4022 return 0;
4024 BUG_ON(rec->is_root);
4026 /* try to find the block by search corresponding fs tree */
4027 key.objectid = btrfs_header_owner(buf);
4028 key.type = BTRFS_ROOT_ITEM_KEY;
4029 key.offset = (u64)-1;
4031 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4032 if (IS_ERR(ref_root))
4033 return 1;
4035 level = btrfs_header_level(buf);
4036 if (level == 0)
4037 btrfs_item_key_to_cpu(buf, &key, 0);
4038 else
4039 btrfs_node_key_to_cpu(buf, &key, 0);
4041 btrfs_init_path(&path);
4042 path.lowest_level = level + 1;
4043 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4044 if (ret < 0)
4045 return 0;
4047 parent = path.nodes[level + 1];
4048 if (parent && buf->start == btrfs_node_blockptr(parent,
4049 path.slots[level + 1]))
4050 found = 1;
4052 btrfs_release_path(&path);
4053 return found ? 0 : 1;
4056 static int is_extent_tree_record(struct extent_record *rec)
4058 struct list_head *cur = rec->backrefs.next;
4059 struct extent_backref *node;
4060 struct tree_backref *back;
4061 int is_extent = 0;
4063 while(cur != &rec->backrefs) {
4064 node = to_extent_backref(cur);
4065 cur = cur->next;
4066 if (node->is_data)
4067 return 0;
4068 back = to_tree_backref(node);
4069 if (node->full_backref)
4070 return 0;
4071 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4072 is_extent = 1;
4074 return is_extent;
4078 static int record_bad_block_io(struct btrfs_fs_info *info,
4079 struct cache_tree *extent_cache,
4080 u64 start, u64 len)
4082 struct extent_record *rec;
4083 struct cache_extent *cache;
4084 struct btrfs_key key;
4086 cache = lookup_cache_extent(extent_cache, start, len);
4087 if (!cache)
4088 return 0;
4090 rec = container_of(cache, struct extent_record, cache);
4091 if (!is_extent_tree_record(rec))
4092 return 0;
4094 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4095 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4098 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4099 struct extent_buffer *buf, int slot)
4101 if (btrfs_header_level(buf)) {
4102 struct btrfs_key_ptr ptr1, ptr2;
4104 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4105 sizeof(struct btrfs_key_ptr));
4106 read_extent_buffer(buf, &ptr2,
4107 btrfs_node_key_ptr_offset(slot + 1),
4108 sizeof(struct btrfs_key_ptr));
4109 write_extent_buffer(buf, &ptr1,
4110 btrfs_node_key_ptr_offset(slot + 1),
4111 sizeof(struct btrfs_key_ptr));
4112 write_extent_buffer(buf, &ptr2,
4113 btrfs_node_key_ptr_offset(slot),
4114 sizeof(struct btrfs_key_ptr));
4115 if (slot == 0) {
4116 struct btrfs_disk_key key;
4117 btrfs_node_key(buf, &key, 0);
4118 btrfs_fixup_low_keys(root, path, &key,
4119 btrfs_header_level(buf) + 1);
4121 } else {
4122 struct btrfs_item *item1, *item2;
4123 struct btrfs_key k1, k2;
4124 char *item1_data, *item2_data;
4125 u32 item1_offset, item2_offset, item1_size, item2_size;
4127 item1 = btrfs_item_nr(slot);
4128 item2 = btrfs_item_nr(slot + 1);
4129 btrfs_item_key_to_cpu(buf, &k1, slot);
4130 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4131 item1_offset = btrfs_item_offset(buf, item1);
4132 item2_offset = btrfs_item_offset(buf, item2);
4133 item1_size = btrfs_item_size(buf, item1);
4134 item2_size = btrfs_item_size(buf, item2);
4136 item1_data = malloc(item1_size);
4137 if (!item1_data)
4138 return -ENOMEM;
4139 item2_data = malloc(item2_size);
4140 if (!item2_data) {
4141 free(item1_data);
4142 return -ENOMEM;
4145 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4146 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4148 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4149 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4150 free(item1_data);
4151 free(item2_data);
4153 btrfs_set_item_offset(buf, item1, item2_offset);
4154 btrfs_set_item_offset(buf, item2, item1_offset);
4155 btrfs_set_item_size(buf, item1, item2_size);
4156 btrfs_set_item_size(buf, item2, item1_size);
4158 path->slots[0] = slot;
4159 btrfs_set_item_key_unsafe(root, path, &k2);
4160 path->slots[0] = slot + 1;
4161 btrfs_set_item_key_unsafe(root, path, &k1);
4163 return 0;
4166 static int fix_key_order(struct btrfs_trans_handle *trans,
4167 struct btrfs_root *root,
4168 struct btrfs_path *path)
4170 struct extent_buffer *buf;
4171 struct btrfs_key k1, k2;
4172 int i;
4173 int level = path->lowest_level;
4174 int ret = -EIO;
4176 buf = path->nodes[level];
4177 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4178 if (level) {
4179 btrfs_node_key_to_cpu(buf, &k1, i);
4180 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4181 } else {
4182 btrfs_item_key_to_cpu(buf, &k1, i);
4183 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4185 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4186 continue;
4187 ret = swap_values(root, path, buf, i);
4188 if (ret)
4189 break;
4190 btrfs_mark_buffer_dirty(buf);
4191 i = 0;
4193 return ret;
4196 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4197 struct btrfs_root *root,
4198 struct btrfs_path *path,
4199 struct extent_buffer *buf, int slot)
4201 struct btrfs_key key;
4202 int nritems = btrfs_header_nritems(buf);
4204 btrfs_item_key_to_cpu(buf, &key, slot);
4206 /* These are all the keys we can deal with missing. */
4207 if (key.type != BTRFS_DIR_INDEX_KEY &&
4208 key.type != BTRFS_EXTENT_ITEM_KEY &&
4209 key.type != BTRFS_METADATA_ITEM_KEY &&
4210 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4211 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4212 return -1;
4214 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4215 (unsigned long long)key.objectid, key.type,
4216 (unsigned long long)key.offset, slot, buf->start);
4217 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4218 btrfs_item_nr_offset(slot + 1),
4219 sizeof(struct btrfs_item) *
4220 (nritems - slot - 1));
4221 btrfs_set_header_nritems(buf, nritems - 1);
4222 if (slot == 0) {
4223 struct btrfs_disk_key disk_key;
4225 btrfs_item_key(buf, &disk_key, 0);
4226 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4228 btrfs_mark_buffer_dirty(buf);
4229 return 0;
4232 static int fix_item_offset(struct btrfs_trans_handle *trans,
4233 struct btrfs_root *root,
4234 struct btrfs_path *path)
4236 struct extent_buffer *buf;
4237 int i;
4238 int ret = 0;
4240 /* We should only get this for leaves */
4241 BUG_ON(path->lowest_level);
4242 buf = path->nodes[0];
4243 again:
4244 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4245 unsigned int shift = 0, offset;
4247 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4248 BTRFS_LEAF_DATA_SIZE(root)) {
4249 if (btrfs_item_end_nr(buf, i) >
4250 BTRFS_LEAF_DATA_SIZE(root)) {
4251 ret = delete_bogus_item(trans, root, path,
4252 buf, i);
4253 if (!ret)
4254 goto again;
4255 fprintf(stderr, "item is off the end of the "
4256 "leaf, can't fix\n");
4257 ret = -EIO;
4258 break;
4260 shift = BTRFS_LEAF_DATA_SIZE(root) -
4261 btrfs_item_end_nr(buf, i);
4262 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4263 btrfs_item_offset_nr(buf, i - 1)) {
4264 if (btrfs_item_end_nr(buf, i) >
4265 btrfs_item_offset_nr(buf, i - 1)) {
4266 ret = delete_bogus_item(trans, root, path,
4267 buf, i);
4268 if (!ret)
4269 goto again;
4270 fprintf(stderr, "items overlap, can't fix\n");
4271 ret = -EIO;
4272 break;
4274 shift = btrfs_item_offset_nr(buf, i - 1) -
4275 btrfs_item_end_nr(buf, i);
4277 if (!shift)
4278 continue;
4280 printf("Shifting item nr %d by %u bytes in block %llu\n",
4281 i, shift, (unsigned long long)buf->start);
4282 offset = btrfs_item_offset_nr(buf, i);
4283 memmove_extent_buffer(buf,
4284 btrfs_leaf_data(buf) + offset + shift,
4285 btrfs_leaf_data(buf) + offset,
4286 btrfs_item_size_nr(buf, i));
4287 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4288 offset + shift);
4289 btrfs_mark_buffer_dirty(buf);
4293 * We may have moved things, in which case we want to exit so we don't
4294 * write those changes out. Once we have proper abort functionality in
4295 * progs this can be changed to something nicer.
4297 BUG_ON(ret);
4298 return ret;
4302 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4303 * then just return -EIO.
4305 static int try_to_fix_bad_block(struct btrfs_root *root,
4306 struct extent_buffer *buf,
4307 enum btrfs_tree_block_status status)
4309 struct btrfs_trans_handle *trans;
4310 struct ulist *roots;
4311 struct ulist_node *node;
4312 struct btrfs_root *search_root;
4313 struct btrfs_path *path;
4314 struct ulist_iterator iter;
4315 struct btrfs_key root_key, key;
4316 int ret;
4318 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4319 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4320 return -EIO;
4322 path = btrfs_alloc_path();
4323 if (!path)
4324 return -EIO;
4326 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4327 0, &roots);
4328 if (ret) {
4329 btrfs_free_path(path);
4330 return -EIO;
4333 ULIST_ITER_INIT(&iter);
4334 while ((node = ulist_next(roots, &iter))) {
4335 root_key.objectid = node->val;
4336 root_key.type = BTRFS_ROOT_ITEM_KEY;
4337 root_key.offset = (u64)-1;
4339 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4340 if (IS_ERR(root)) {
4341 ret = -EIO;
4342 break;
4346 trans = btrfs_start_transaction(search_root, 0);
4347 if (IS_ERR(trans)) {
4348 ret = PTR_ERR(trans);
4349 break;
4352 path->lowest_level = btrfs_header_level(buf);
4353 path->skip_check_block = 1;
4354 if (path->lowest_level)
4355 btrfs_node_key_to_cpu(buf, &key, 0);
4356 else
4357 btrfs_item_key_to_cpu(buf, &key, 0);
4358 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4359 if (ret) {
4360 ret = -EIO;
4361 btrfs_commit_transaction(trans, search_root);
4362 break;
4364 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4365 ret = fix_key_order(trans, search_root, path);
4366 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4367 ret = fix_item_offset(trans, search_root, path);
4368 if (ret) {
4369 btrfs_commit_transaction(trans, search_root);
4370 break;
4372 btrfs_release_path(path);
4373 btrfs_commit_transaction(trans, search_root);
4375 ulist_free(roots);
4376 btrfs_free_path(path);
4377 return ret;
4380 static int check_block(struct btrfs_root *root,
4381 struct cache_tree *extent_cache,
4382 struct extent_buffer *buf, u64 flags)
4384 struct extent_record *rec;
4385 struct cache_extent *cache;
4386 struct btrfs_key key;
4387 enum btrfs_tree_block_status status;
4388 int ret = 0;
4389 int level;
4391 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4392 if (!cache)
4393 return 1;
4394 rec = container_of(cache, struct extent_record, cache);
4395 rec->generation = btrfs_header_generation(buf);
4397 level = btrfs_header_level(buf);
4398 if (btrfs_header_nritems(buf) > 0) {
4400 if (level == 0)
4401 btrfs_item_key_to_cpu(buf, &key, 0);
4402 else
4403 btrfs_node_key_to_cpu(buf, &key, 0);
4405 rec->info_objectid = key.objectid;
4407 rec->info_level = level;
4409 if (btrfs_is_leaf(buf))
4410 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4411 else
4412 status = btrfs_check_node(root, &rec->parent_key, buf);
4414 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4415 if (repair)
4416 status = try_to_fix_bad_block(root, buf, status);
4417 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4418 ret = -EIO;
4419 fprintf(stderr, "bad block %llu\n",
4420 (unsigned long long)buf->start);
4421 } else {
4423 * Signal to callers we need to start the scan over
4424 * again since we'll have cowed blocks.
4426 ret = -EAGAIN;
4428 } else {
4429 rec->content_checked = 1;
4430 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4431 rec->owner_ref_checked = 1;
4432 else {
4433 ret = check_owner_ref(root, rec, buf);
4434 if (!ret)
4435 rec->owner_ref_checked = 1;
4438 if (!ret)
4439 maybe_free_extent_rec(extent_cache, rec);
4440 return ret;
4443 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4444 u64 parent, u64 root)
4446 struct list_head *cur = rec->backrefs.next;
4447 struct extent_backref *node;
4448 struct tree_backref *back;
4450 while(cur != &rec->backrefs) {
4451 node = to_extent_backref(cur);
4452 cur = cur->next;
4453 if (node->is_data)
4454 continue;
4455 back = to_tree_backref(node);
4456 if (parent > 0) {
4457 if (!node->full_backref)
4458 continue;
4459 if (parent == back->parent)
4460 return back;
4461 } else {
4462 if (node->full_backref)
4463 continue;
4464 if (back->root == root)
4465 return back;
4468 return NULL;
4471 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4472 u64 parent, u64 root)
4474 struct tree_backref *ref = malloc(sizeof(*ref));
4476 if (!ref)
4477 return NULL;
4478 memset(&ref->node, 0, sizeof(ref->node));
4479 if (parent > 0) {
4480 ref->parent = parent;
4481 ref->node.full_backref = 1;
4482 } else {
4483 ref->root = root;
4484 ref->node.full_backref = 0;
4486 list_add_tail(&ref->node.list, &rec->backrefs);
4488 return ref;
4491 static struct data_backref *find_data_backref(struct extent_record *rec,
4492 u64 parent, u64 root,
4493 u64 owner, u64 offset,
4494 int found_ref,
4495 u64 disk_bytenr, u64 bytes)
4497 struct list_head *cur = rec->backrefs.next;
4498 struct extent_backref *node;
4499 struct data_backref *back;
4501 while(cur != &rec->backrefs) {
4502 node = to_extent_backref(cur);
4503 cur = cur->next;
4504 if (!node->is_data)
4505 continue;
4506 back = to_data_backref(node);
4507 if (parent > 0) {
4508 if (!node->full_backref)
4509 continue;
4510 if (parent == back->parent)
4511 return back;
4512 } else {
4513 if (node->full_backref)
4514 continue;
4515 if (back->root == root && back->owner == owner &&
4516 back->offset == offset) {
4517 if (found_ref && node->found_ref &&
4518 (back->bytes != bytes ||
4519 back->disk_bytenr != disk_bytenr))
4520 continue;
4521 return back;
4525 return NULL;
4528 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4529 u64 parent, u64 root,
4530 u64 owner, u64 offset,
4531 u64 max_size)
4533 struct data_backref *ref = malloc(sizeof(*ref));
4535 if (!ref)
4536 return NULL;
4537 memset(&ref->node, 0, sizeof(ref->node));
4538 ref->node.is_data = 1;
4540 if (parent > 0) {
4541 ref->parent = parent;
4542 ref->owner = 0;
4543 ref->offset = 0;
4544 ref->node.full_backref = 1;
4545 } else {
4546 ref->root = root;
4547 ref->owner = owner;
4548 ref->offset = offset;
4549 ref->node.full_backref = 0;
4551 ref->bytes = max_size;
4552 ref->found_ref = 0;
4553 ref->num_refs = 0;
4554 list_add_tail(&ref->node.list, &rec->backrefs);
4555 if (max_size > rec->max_size)
4556 rec->max_size = max_size;
4557 return ref;
4560 /* Check if the type of extent matches with its chunk */
4561 static void check_extent_type(struct extent_record *rec)
4563 struct btrfs_block_group_cache *bg_cache;
4565 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4566 if (!bg_cache)
4567 return;
4569 /* data extent, check chunk directly*/
4570 if (!rec->metadata) {
4571 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4572 rec->wrong_chunk_type = 1;
4573 return;
4576 /* metadata extent, check the obvious case first */
4577 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4578 BTRFS_BLOCK_GROUP_METADATA))) {
4579 rec->wrong_chunk_type = 1;
4580 return;
4584 * Check SYSTEM extent, as it's also marked as metadata, we can only
4585 * make sure it's a SYSTEM extent by its backref
4587 if (!list_empty(&rec->backrefs)) {
4588 struct extent_backref *node;
4589 struct tree_backref *tback;
4590 u64 bg_type;
4592 node = to_extent_backref(rec->backrefs.next);
4593 if (node->is_data) {
4594 /* tree block shouldn't have data backref */
4595 rec->wrong_chunk_type = 1;
4596 return;
4598 tback = container_of(node, struct tree_backref, node);
4600 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4601 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4602 else
4603 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4604 if (!(bg_cache->flags & bg_type))
4605 rec->wrong_chunk_type = 1;
4610 * Allocate a new extent record, fill default values from @tmpl and insert int
4611 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4612 * the cache, otherwise it fails.
4614 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4615 struct extent_record *tmpl)
4617 struct extent_record *rec;
4618 int ret = 0;
4620 rec = malloc(sizeof(*rec));
4621 if (!rec)
4622 return -ENOMEM;
4623 rec->start = tmpl->start;
4624 rec->max_size = tmpl->max_size;
4625 rec->nr = max(tmpl->nr, tmpl->max_size);
4626 rec->found_rec = tmpl->found_rec;
4627 rec->content_checked = tmpl->content_checked;
4628 rec->owner_ref_checked = tmpl->owner_ref_checked;
4629 rec->num_duplicates = 0;
4630 rec->metadata = tmpl->metadata;
4631 rec->flag_block_full_backref = FLAG_UNSET;
4632 rec->bad_full_backref = 0;
4633 rec->crossing_stripes = 0;
4634 rec->wrong_chunk_type = 0;
4635 rec->is_root = tmpl->is_root;
4636 rec->refs = tmpl->refs;
4637 rec->extent_item_refs = tmpl->extent_item_refs;
4638 rec->parent_generation = tmpl->parent_generation;
4639 INIT_LIST_HEAD(&rec->backrefs);
4640 INIT_LIST_HEAD(&rec->dups);
4641 INIT_LIST_HEAD(&rec->list);
4642 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4643 rec->cache.start = tmpl->start;
4644 rec->cache.size = tmpl->nr;
4645 ret = insert_cache_extent(extent_cache, &rec->cache);
4646 BUG_ON(ret);
4647 bytes_used += rec->nr;
4649 if (tmpl->metadata)
4650 rec->crossing_stripes = check_crossing_stripes(rec->start,
4651 global_info->tree_root->nodesize);
4652 check_extent_type(rec);
4653 return ret;
4657 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4658 * some are hints:
4659 * - refs - if found, increase refs
4660 * - is_root - if found, set
4661 * - content_checked - if found, set
4662 * - owner_ref_checked - if found, set
4664 * If not found, create a new one, initialize and insert.
4666 static int add_extent_rec(struct cache_tree *extent_cache,
4667 struct extent_record *tmpl)
4669 struct extent_record *rec;
4670 struct cache_extent *cache;
4671 int ret = 0;
4672 int dup = 0;
4674 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4675 if (cache) {
4676 rec = container_of(cache, struct extent_record, cache);
4677 if (tmpl->refs)
4678 rec->refs++;
4679 if (rec->nr == 1)
4680 rec->nr = max(tmpl->nr, tmpl->max_size);
4683 * We need to make sure to reset nr to whatever the extent
4684 * record says was the real size, this way we can compare it to
4685 * the backrefs.
4687 if (tmpl->found_rec) {
4688 if (tmpl->start != rec->start || rec->found_rec) {
4689 struct extent_record *tmp;
4691 dup = 1;
4692 if (list_empty(&rec->list))
4693 list_add_tail(&rec->list,
4694 &duplicate_extents);
4697 * We have to do this song and dance in case we
4698 * find an extent record that falls inside of
4699 * our current extent record but does not have
4700 * the same objectid.
4702 tmp = malloc(sizeof(*tmp));
4703 if (!tmp)
4704 return -ENOMEM;
4705 tmp->start = tmpl->start;
4706 tmp->max_size = tmpl->max_size;
4707 tmp->nr = tmpl->nr;
4708 tmp->found_rec = 1;
4709 tmp->metadata = tmpl->metadata;
4710 tmp->extent_item_refs = tmpl->extent_item_refs;
4711 INIT_LIST_HEAD(&tmp->list);
4712 list_add_tail(&tmp->list, &rec->dups);
4713 rec->num_duplicates++;
4714 } else {
4715 rec->nr = tmpl->nr;
4716 rec->found_rec = 1;
4720 if (tmpl->extent_item_refs && !dup) {
4721 if (rec->extent_item_refs) {
4722 fprintf(stderr, "block %llu rec "
4723 "extent_item_refs %llu, passed %llu\n",
4724 (unsigned long long)tmpl->start,
4725 (unsigned long long)
4726 rec->extent_item_refs,
4727 (unsigned long long)tmpl->extent_item_refs);
4729 rec->extent_item_refs = tmpl->extent_item_refs;
4731 if (tmpl->is_root)
4732 rec->is_root = 1;
4733 if (tmpl->content_checked)
4734 rec->content_checked = 1;
4735 if (tmpl->owner_ref_checked)
4736 rec->owner_ref_checked = 1;
4737 memcpy(&rec->parent_key, &tmpl->parent_key,
4738 sizeof(tmpl->parent_key));
4739 if (tmpl->parent_generation)
4740 rec->parent_generation = tmpl->parent_generation;
4741 if (rec->max_size < tmpl->max_size)
4742 rec->max_size = tmpl->max_size;
4745 * A metadata extent can't cross stripe_len boundary, otherwise
4746 * kernel scrub won't be able to handle it.
4747 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4748 * it.
4750 if (tmpl->metadata)
4751 rec->crossing_stripes = check_crossing_stripes(
4752 rec->start, global_info->tree_root->nodesize);
4753 check_extent_type(rec);
4754 maybe_free_extent_rec(extent_cache, rec);
4755 return ret;
4758 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4760 return ret;
4763 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4764 u64 parent, u64 root, int found_ref)
4766 struct extent_record *rec;
4767 struct tree_backref *back;
4768 struct cache_extent *cache;
4770 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4771 if (!cache) {
4772 struct extent_record tmpl;
4774 memset(&tmpl, 0, sizeof(tmpl));
4775 tmpl.start = bytenr;
4776 tmpl.nr = 1;
4777 tmpl.metadata = 1;
4779 add_extent_rec_nolookup(extent_cache, &tmpl);
4781 /* really a bug in cache_extent implement now */
4782 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4783 if (!cache)
4784 return -ENOENT;
4787 rec = container_of(cache, struct extent_record, cache);
4788 if (rec->start != bytenr) {
4790 * Several cause, from unaligned bytenr to over lapping extents
4792 return -EEXIST;
4795 back = find_tree_backref(rec, parent, root);
4796 if (!back) {
4797 back = alloc_tree_backref(rec, parent, root);
4798 if (!back)
4799 return -ENOMEM;
4802 if (found_ref) {
4803 if (back->node.found_ref) {
4804 fprintf(stderr, "Extent back ref already exists "
4805 "for %llu parent %llu root %llu \n",
4806 (unsigned long long)bytenr,
4807 (unsigned long long)parent,
4808 (unsigned long long)root);
4810 back->node.found_ref = 1;
4811 } else {
4812 if (back->node.found_extent_tree) {
4813 fprintf(stderr, "Extent back ref already exists "
4814 "for %llu parent %llu root %llu \n",
4815 (unsigned long long)bytenr,
4816 (unsigned long long)parent,
4817 (unsigned long long)root);
4819 back->node.found_extent_tree = 1;
4821 check_extent_type(rec);
4822 maybe_free_extent_rec(extent_cache, rec);
4823 return 0;
4826 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4827 u64 parent, u64 root, u64 owner, u64 offset,
4828 u32 num_refs, int found_ref, u64 max_size)
4830 struct extent_record *rec;
4831 struct data_backref *back;
4832 struct cache_extent *cache;
4834 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4835 if (!cache) {
4836 struct extent_record tmpl;
4838 memset(&tmpl, 0, sizeof(tmpl));
4839 tmpl.start = bytenr;
4840 tmpl.nr = 1;
4841 tmpl.max_size = max_size;
4843 add_extent_rec_nolookup(extent_cache, &tmpl);
4845 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4846 if (!cache)
4847 abort();
4850 rec = container_of(cache, struct extent_record, cache);
4851 if (rec->max_size < max_size)
4852 rec->max_size = max_size;
4855 * If found_ref is set then max_size is the real size and must match the
4856 * existing refs. So if we have already found a ref then we need to
4857 * make sure that this ref matches the existing one, otherwise we need
4858 * to add a new backref so we can notice that the backrefs don't match
4859 * and we need to figure out who is telling the truth. This is to
4860 * account for that awful fsync bug I introduced where we'd end up with
4861 * a btrfs_file_extent_item that would have its length include multiple
4862 * prealloc extents or point inside of a prealloc extent.
4864 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4865 bytenr, max_size);
4866 if (!back) {
4867 back = alloc_data_backref(rec, parent, root, owner, offset,
4868 max_size);
4869 BUG_ON(!back);
4872 if (found_ref) {
4873 BUG_ON(num_refs != 1);
4874 if (back->node.found_ref)
4875 BUG_ON(back->bytes != max_size);
4876 back->node.found_ref = 1;
4877 back->found_ref += 1;
4878 back->bytes = max_size;
4879 back->disk_bytenr = bytenr;
4880 rec->refs += 1;
4881 rec->content_checked = 1;
4882 rec->owner_ref_checked = 1;
4883 } else {
4884 if (back->node.found_extent_tree) {
4885 fprintf(stderr, "Extent back ref already exists "
4886 "for %llu parent %llu root %llu "
4887 "owner %llu offset %llu num_refs %lu\n",
4888 (unsigned long long)bytenr,
4889 (unsigned long long)parent,
4890 (unsigned long long)root,
4891 (unsigned long long)owner,
4892 (unsigned long long)offset,
4893 (unsigned long)num_refs);
4895 back->num_refs = num_refs;
4896 back->node.found_extent_tree = 1;
4898 maybe_free_extent_rec(extent_cache, rec);
4899 return 0;
4902 static int add_pending(struct cache_tree *pending,
4903 struct cache_tree *seen, u64 bytenr, u32 size)
4905 int ret;
4906 ret = add_cache_extent(seen, bytenr, size);
4907 if (ret)
4908 return ret;
4909 add_cache_extent(pending, bytenr, size);
4910 return 0;
4913 static int pick_next_pending(struct cache_tree *pending,
4914 struct cache_tree *reada,
4915 struct cache_tree *nodes,
4916 u64 last, struct block_info *bits, int bits_nr,
4917 int *reada_bits)
4919 unsigned long node_start = last;
4920 struct cache_extent *cache;
4921 int ret;
4923 cache = search_cache_extent(reada, 0);
4924 if (cache) {
4925 bits[0].start = cache->start;
4926 bits[0].size = cache->size;
4927 *reada_bits = 1;
4928 return 1;
4930 *reada_bits = 0;
4931 if (node_start > 32768)
4932 node_start -= 32768;
4934 cache = search_cache_extent(nodes, node_start);
4935 if (!cache)
4936 cache = search_cache_extent(nodes, 0);
4938 if (!cache) {
4939 cache = search_cache_extent(pending, 0);
4940 if (!cache)
4941 return 0;
4942 ret = 0;
4943 do {
4944 bits[ret].start = cache->start;
4945 bits[ret].size = cache->size;
4946 cache = next_cache_extent(cache);
4947 ret++;
4948 } while (cache && ret < bits_nr);
4949 return ret;
4952 ret = 0;
4953 do {
4954 bits[ret].start = cache->start;
4955 bits[ret].size = cache->size;
4956 cache = next_cache_extent(cache);
4957 ret++;
4958 } while (cache && ret < bits_nr);
4960 if (bits_nr - ret > 8) {
4961 u64 lookup = bits[0].start + bits[0].size;
4962 struct cache_extent *next;
4963 next = search_cache_extent(pending, lookup);
4964 while(next) {
4965 if (next->start - lookup > 32768)
4966 break;
4967 bits[ret].start = next->start;
4968 bits[ret].size = next->size;
4969 lookup = next->start + next->size;
4970 ret++;
4971 if (ret == bits_nr)
4972 break;
4973 next = next_cache_extent(next);
4974 if (!next)
4975 break;
4978 return ret;
4981 static void free_chunk_record(struct cache_extent *cache)
4983 struct chunk_record *rec;
4985 rec = container_of(cache, struct chunk_record, cache);
4986 list_del_init(&rec->list);
4987 list_del_init(&rec->dextents);
4988 free(rec);
4991 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4993 cache_tree_free_extents(chunk_cache, free_chunk_record);
4996 static void free_device_record(struct rb_node *node)
4998 struct device_record *rec;
5000 rec = container_of(node, struct device_record, node);
5001 free(rec);
5004 FREE_RB_BASED_TREE(device_cache, free_device_record);
5006 int insert_block_group_record(struct block_group_tree *tree,
5007 struct block_group_record *bg_rec)
5009 int ret;
5011 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5012 if (ret)
5013 return ret;
5015 list_add_tail(&bg_rec->list, &tree->block_groups);
5016 return 0;
5019 static void free_block_group_record(struct cache_extent *cache)
5021 struct block_group_record *rec;
5023 rec = container_of(cache, struct block_group_record, cache);
5024 list_del_init(&rec->list);
5025 free(rec);
5028 void free_block_group_tree(struct block_group_tree *tree)
5030 cache_tree_free_extents(&tree->tree, free_block_group_record);
5033 int insert_device_extent_record(struct device_extent_tree *tree,
5034 struct device_extent_record *de_rec)
5036 int ret;
5039 * Device extent is a bit different from the other extents, because
5040 * the extents which belong to the different devices may have the
5041 * same start and size, so we need use the special extent cache
5042 * search/insert functions.
5044 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5045 if (ret)
5046 return ret;
5048 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5049 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5050 return 0;
5053 static void free_device_extent_record(struct cache_extent *cache)
5055 struct device_extent_record *rec;
5057 rec = container_of(cache, struct device_extent_record, cache);
5058 if (!list_empty(&rec->chunk_list))
5059 list_del_init(&rec->chunk_list);
5060 if (!list_empty(&rec->device_list))
5061 list_del_init(&rec->device_list);
5062 free(rec);
5065 void free_device_extent_tree(struct device_extent_tree *tree)
5067 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5070 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5071 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5072 struct extent_buffer *leaf, int slot)
5074 struct btrfs_extent_ref_v0 *ref0;
5075 struct btrfs_key key;
5076 int ret;
5078 btrfs_item_key_to_cpu(leaf, &key, slot);
5079 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5080 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5081 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5082 0, 0);
5083 } else {
5084 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5085 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5087 return ret;
5089 #endif
5091 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5092 struct btrfs_key *key,
5093 int slot)
5095 struct btrfs_chunk *ptr;
5096 struct chunk_record *rec;
5097 int num_stripes, i;
5099 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5100 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5102 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5103 if (!rec) {
5104 fprintf(stderr, "memory allocation failed\n");
5105 exit(-1);
5108 INIT_LIST_HEAD(&rec->list);
5109 INIT_LIST_HEAD(&rec->dextents);
5110 rec->bg_rec = NULL;
5112 rec->cache.start = key->offset;
5113 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5115 rec->generation = btrfs_header_generation(leaf);
5117 rec->objectid = key->objectid;
5118 rec->type = key->type;
5119 rec->offset = key->offset;
5121 rec->length = rec->cache.size;
5122 rec->owner = btrfs_chunk_owner(leaf, ptr);
5123 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5124 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5125 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5126 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5127 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5128 rec->num_stripes = num_stripes;
5129 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5131 for (i = 0; i < rec->num_stripes; ++i) {
5132 rec->stripes[i].devid =
5133 btrfs_stripe_devid_nr(leaf, ptr, i);
5134 rec->stripes[i].offset =
5135 btrfs_stripe_offset_nr(leaf, ptr, i);
5136 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5137 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5138 BTRFS_UUID_SIZE);
5141 return rec;
5144 static int process_chunk_item(struct cache_tree *chunk_cache,
5145 struct btrfs_key *key, struct extent_buffer *eb,
5146 int slot)
5148 struct chunk_record *rec;
5149 struct btrfs_chunk *chunk;
5150 int ret = 0;
5152 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5154 * Do extra check for this chunk item,
5156 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5157 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5158 * and owner<->key_type check.
5160 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5161 key->offset);
5162 if (ret < 0) {
5163 error("chunk(%llu, %llu) is not valid, ignore it",
5164 key->offset, btrfs_chunk_length(eb, chunk));
5165 return 0;
5167 rec = btrfs_new_chunk_record(eb, key, slot);
5168 ret = insert_cache_extent(chunk_cache, &rec->cache);
5169 if (ret) {
5170 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5171 rec->offset, rec->length);
5172 free(rec);
5175 return ret;
5178 static int process_device_item(struct rb_root *dev_cache,
5179 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5181 struct btrfs_dev_item *ptr;
5182 struct device_record *rec;
5183 int ret = 0;
5185 ptr = btrfs_item_ptr(eb,
5186 slot, struct btrfs_dev_item);
5188 rec = malloc(sizeof(*rec));
5189 if (!rec) {
5190 fprintf(stderr, "memory allocation failed\n");
5191 return -ENOMEM;
5194 rec->devid = key->offset;
5195 rec->generation = btrfs_header_generation(eb);
5197 rec->objectid = key->objectid;
5198 rec->type = key->type;
5199 rec->offset = key->offset;
5201 rec->devid = btrfs_device_id(eb, ptr);
5202 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5203 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5205 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5206 if (ret) {
5207 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5208 free(rec);
5211 return ret;
5214 struct block_group_record *
5215 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5216 int slot)
5218 struct btrfs_block_group_item *ptr;
5219 struct block_group_record *rec;
5221 rec = calloc(1, sizeof(*rec));
5222 if (!rec) {
5223 fprintf(stderr, "memory allocation failed\n");
5224 exit(-1);
5227 rec->cache.start = key->objectid;
5228 rec->cache.size = key->offset;
5230 rec->generation = btrfs_header_generation(leaf);
5232 rec->objectid = key->objectid;
5233 rec->type = key->type;
5234 rec->offset = key->offset;
5236 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5237 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5239 INIT_LIST_HEAD(&rec->list);
5241 return rec;
5244 static int process_block_group_item(struct block_group_tree *block_group_cache,
5245 struct btrfs_key *key,
5246 struct extent_buffer *eb, int slot)
5248 struct block_group_record *rec;
5249 int ret = 0;
5251 rec = btrfs_new_block_group_record(eb, key, slot);
5252 ret = insert_block_group_record(block_group_cache, rec);
5253 if (ret) {
5254 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5255 rec->objectid, rec->offset);
5256 free(rec);
5259 return ret;
5262 struct device_extent_record *
5263 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5264 struct btrfs_key *key, int slot)
5266 struct device_extent_record *rec;
5267 struct btrfs_dev_extent *ptr;
5269 rec = calloc(1, sizeof(*rec));
5270 if (!rec) {
5271 fprintf(stderr, "memory allocation failed\n");
5272 exit(-1);
5275 rec->cache.objectid = key->objectid;
5276 rec->cache.start = key->offset;
5278 rec->generation = btrfs_header_generation(leaf);
5280 rec->objectid = key->objectid;
5281 rec->type = key->type;
5282 rec->offset = key->offset;
5284 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5285 rec->chunk_objecteid =
5286 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5287 rec->chunk_offset =
5288 btrfs_dev_extent_chunk_offset(leaf, ptr);
5289 rec->length = btrfs_dev_extent_length(leaf, ptr);
5290 rec->cache.size = rec->length;
5292 INIT_LIST_HEAD(&rec->chunk_list);
5293 INIT_LIST_HEAD(&rec->device_list);
5295 return rec;
5298 static int
5299 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5300 struct btrfs_key *key, struct extent_buffer *eb,
5301 int slot)
5303 struct device_extent_record *rec;
5304 int ret;
5306 rec = btrfs_new_device_extent_record(eb, key, slot);
5307 ret = insert_device_extent_record(dev_extent_cache, rec);
5308 if (ret) {
5309 fprintf(stderr,
5310 "Device extent[%llu, %llu, %llu] existed.\n",
5311 rec->objectid, rec->offset, rec->length);
5312 free(rec);
5315 return ret;
5318 static int process_extent_item(struct btrfs_root *root,
5319 struct cache_tree *extent_cache,
5320 struct extent_buffer *eb, int slot)
5322 struct btrfs_extent_item *ei;
5323 struct btrfs_extent_inline_ref *iref;
5324 struct btrfs_extent_data_ref *dref;
5325 struct btrfs_shared_data_ref *sref;
5326 struct btrfs_key key;
5327 struct extent_record tmpl;
5328 unsigned long end;
5329 unsigned long ptr;
5330 int ret;
5331 int type;
5332 u32 item_size = btrfs_item_size_nr(eb, slot);
5333 u64 refs = 0;
5334 u64 offset;
5335 u64 num_bytes;
5336 int metadata = 0;
5338 btrfs_item_key_to_cpu(eb, &key, slot);
5340 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5341 metadata = 1;
5342 num_bytes = root->nodesize;
5343 } else {
5344 num_bytes = key.offset;
5347 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5348 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5349 key.objectid, root->sectorsize);
5350 return -EIO;
5352 if (item_size < sizeof(*ei)) {
5353 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5354 struct btrfs_extent_item_v0 *ei0;
5355 BUG_ON(item_size != sizeof(*ei0));
5356 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5357 refs = btrfs_extent_refs_v0(eb, ei0);
5358 #else
5359 BUG();
5360 #endif
5361 memset(&tmpl, 0, sizeof(tmpl));
5362 tmpl.start = key.objectid;
5363 tmpl.nr = num_bytes;
5364 tmpl.extent_item_refs = refs;
5365 tmpl.metadata = metadata;
5366 tmpl.found_rec = 1;
5367 tmpl.max_size = num_bytes;
5369 return add_extent_rec(extent_cache, &tmpl);
5372 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5373 refs = btrfs_extent_refs(eb, ei);
5374 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5375 metadata = 1;
5376 else
5377 metadata = 0;
5378 if (metadata && num_bytes != root->nodesize) {
5379 error("ignore invalid metadata extent, length %llu does not equal to %u",
5380 num_bytes, root->nodesize);
5381 return -EIO;
5383 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5384 error("ignore invalid data extent, length %llu is not aligned to %u",
5385 num_bytes, root->sectorsize);
5386 return -EIO;
5389 memset(&tmpl, 0, sizeof(tmpl));
5390 tmpl.start = key.objectid;
5391 tmpl.nr = num_bytes;
5392 tmpl.extent_item_refs = refs;
5393 tmpl.metadata = metadata;
5394 tmpl.found_rec = 1;
5395 tmpl.max_size = num_bytes;
5396 add_extent_rec(extent_cache, &tmpl);
5398 ptr = (unsigned long)(ei + 1);
5399 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5400 key.type == BTRFS_EXTENT_ITEM_KEY)
5401 ptr += sizeof(struct btrfs_tree_block_info);
5403 end = (unsigned long)ei + item_size;
5404 while (ptr < end) {
5405 iref = (struct btrfs_extent_inline_ref *)ptr;
5406 type = btrfs_extent_inline_ref_type(eb, iref);
5407 offset = btrfs_extent_inline_ref_offset(eb, iref);
5408 switch (type) {
5409 case BTRFS_TREE_BLOCK_REF_KEY:
5410 ret = add_tree_backref(extent_cache, key.objectid,
5411 0, offset, 0);
5412 if (ret < 0)
5413 error("add_tree_backref failed: %s",
5414 strerror(-ret));
5415 break;
5416 case BTRFS_SHARED_BLOCK_REF_KEY:
5417 ret = add_tree_backref(extent_cache, key.objectid,
5418 offset, 0, 0);
5419 if (ret < 0)
5420 error("add_tree_backref failed: %s",
5421 strerror(-ret));
5422 break;
5423 case BTRFS_EXTENT_DATA_REF_KEY:
5424 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5425 add_data_backref(extent_cache, key.objectid, 0,
5426 btrfs_extent_data_ref_root(eb, dref),
5427 btrfs_extent_data_ref_objectid(eb,
5428 dref),
5429 btrfs_extent_data_ref_offset(eb, dref),
5430 btrfs_extent_data_ref_count(eb, dref),
5431 0, num_bytes);
5432 break;
5433 case BTRFS_SHARED_DATA_REF_KEY:
5434 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5435 add_data_backref(extent_cache, key.objectid, offset,
5436 0, 0, 0,
5437 btrfs_shared_data_ref_count(eb, sref),
5438 0, num_bytes);
5439 break;
5440 default:
5441 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5442 key.objectid, key.type, num_bytes);
5443 goto out;
5445 ptr += btrfs_extent_inline_ref_size(type);
5447 WARN_ON(ptr > end);
5448 out:
5449 return 0;
5452 static int check_cache_range(struct btrfs_root *root,
5453 struct btrfs_block_group_cache *cache,
5454 u64 offset, u64 bytes)
5456 struct btrfs_free_space *entry;
5457 u64 *logical;
5458 u64 bytenr;
5459 int stripe_len;
5460 int i, nr, ret;
5462 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5463 bytenr = btrfs_sb_offset(i);
5464 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5465 cache->key.objectid, bytenr, 0,
5466 &logical, &nr, &stripe_len);
5467 if (ret)
5468 return ret;
5470 while (nr--) {
5471 if (logical[nr] + stripe_len <= offset)
5472 continue;
5473 if (offset + bytes <= logical[nr])
5474 continue;
5475 if (logical[nr] == offset) {
5476 if (stripe_len >= bytes) {
5477 kfree(logical);
5478 return 0;
5480 bytes -= stripe_len;
5481 offset += stripe_len;
5482 } else if (logical[nr] < offset) {
5483 if (logical[nr] + stripe_len >=
5484 offset + bytes) {
5485 kfree(logical);
5486 return 0;
5488 bytes = (offset + bytes) -
5489 (logical[nr] + stripe_len);
5490 offset = logical[nr] + stripe_len;
5491 } else {
5493 * Could be tricky, the super may land in the
5494 * middle of the area we're checking. First
5495 * check the easiest case, it's at the end.
5497 if (logical[nr] + stripe_len >=
5498 bytes + offset) {
5499 bytes = logical[nr] - offset;
5500 continue;
5503 /* Check the left side */
5504 ret = check_cache_range(root, cache,
5505 offset,
5506 logical[nr] - offset);
5507 if (ret) {
5508 kfree(logical);
5509 return ret;
5512 /* Now we continue with the right side */
5513 bytes = (offset + bytes) -
5514 (logical[nr] + stripe_len);
5515 offset = logical[nr] + stripe_len;
5519 kfree(logical);
5522 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5523 if (!entry) {
5524 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5525 offset, offset+bytes);
5526 return -EINVAL;
5529 if (entry->offset != offset) {
5530 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5531 entry->offset);
5532 return -EINVAL;
5535 if (entry->bytes != bytes) {
5536 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5537 bytes, entry->bytes, offset);
5538 return -EINVAL;
5541 unlink_free_space(cache->free_space_ctl, entry);
5542 free(entry);
5543 return 0;
5546 static int verify_space_cache(struct btrfs_root *root,
5547 struct btrfs_block_group_cache *cache)
5549 struct btrfs_path *path;
5550 struct extent_buffer *leaf;
5551 struct btrfs_key key;
5552 u64 last;
5553 int ret = 0;
5555 path = btrfs_alloc_path();
5556 if (!path)
5557 return -ENOMEM;
5559 root = root->fs_info->extent_root;
5561 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5563 key.objectid = last;
5564 key.offset = 0;
5565 key.type = BTRFS_EXTENT_ITEM_KEY;
5567 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5568 if (ret < 0)
5569 goto out;
5570 ret = 0;
5571 while (1) {
5572 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573 ret = btrfs_next_leaf(root, path);
5574 if (ret < 0)
5575 goto out;
5576 if (ret > 0) {
5577 ret = 0;
5578 break;
5581 leaf = path->nodes[0];
5582 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5583 if (key.objectid >= cache->key.offset + cache->key.objectid)
5584 break;
5585 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5586 key.type != BTRFS_METADATA_ITEM_KEY) {
5587 path->slots[0]++;
5588 continue;
5591 if (last == key.objectid) {
5592 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5593 last = key.objectid + key.offset;
5594 else
5595 last = key.objectid + root->nodesize;
5596 path->slots[0]++;
5597 continue;
5600 ret = check_cache_range(root, cache, last,
5601 key.objectid - last);
5602 if (ret)
5603 break;
5604 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5605 last = key.objectid + key.offset;
5606 else
5607 last = key.objectid + root->nodesize;
5608 path->slots[0]++;
5611 if (last < cache->key.objectid + cache->key.offset)
5612 ret = check_cache_range(root, cache, last,
5613 cache->key.objectid +
5614 cache->key.offset - last);
5616 out:
5617 btrfs_free_path(path);
5619 if (!ret &&
5620 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5621 fprintf(stderr, "There are still entries left in the space "
5622 "cache\n");
5623 ret = -EINVAL;
5626 return ret;
5629 static int check_space_cache(struct btrfs_root *root)
5631 struct btrfs_block_group_cache *cache;
5632 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5633 int ret;
5634 int error = 0;
5636 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5637 btrfs_super_generation(root->fs_info->super_copy) !=
5638 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5639 printf("cache and super generation don't match, space cache "
5640 "will be invalidated\n");
5641 return 0;
5644 if (ctx.progress_enabled) {
5645 ctx.tp = TASK_FREE_SPACE;
5646 task_start(ctx.info);
5649 while (1) {
5650 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5651 if (!cache)
5652 break;
5654 start = cache->key.objectid + cache->key.offset;
5655 if (!cache->free_space_ctl) {
5656 if (btrfs_init_free_space_ctl(cache,
5657 root->sectorsize)) {
5658 ret = -ENOMEM;
5659 break;
5661 } else {
5662 btrfs_remove_free_space_cache(cache);
5665 if (btrfs_fs_compat_ro(root->fs_info,
5666 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5667 ret = exclude_super_stripes(root, cache);
5668 if (ret) {
5669 fprintf(stderr, "could not exclude super stripes: %s\n",
5670 strerror(-ret));
5671 error++;
5672 continue;
5674 ret = load_free_space_tree(root->fs_info, cache);
5675 free_excluded_extents(root, cache);
5676 if (ret < 0) {
5677 fprintf(stderr, "could not load free space tree: %s\n",
5678 strerror(-ret));
5679 error++;
5680 continue;
5682 error += ret;
5683 } else {
5684 ret = load_free_space_cache(root->fs_info, cache);
5685 if (!ret)
5686 continue;
5689 ret = verify_space_cache(root, cache);
5690 if (ret) {
5691 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5692 cache->key.objectid);
5693 error++;
5697 task_stop(ctx.info);
5699 return error ? -EINVAL : 0;
5702 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5703 u64 num_bytes, unsigned long leaf_offset,
5704 struct extent_buffer *eb) {
5706 u64 offset = 0;
5707 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5708 char *data;
5709 unsigned long csum_offset;
5710 u32 csum;
5711 u32 csum_expected;
5712 u64 read_len;
5713 u64 data_checked = 0;
5714 u64 tmp;
5715 int ret = 0;
5716 int mirror;
5717 int num_copies;
5719 if (num_bytes % root->sectorsize)
5720 return -EINVAL;
5722 data = malloc(num_bytes);
5723 if (!data)
5724 return -ENOMEM;
5726 while (offset < num_bytes) {
5727 mirror = 0;
5728 again:
5729 read_len = num_bytes - offset;
5730 /* read as much space once a time */
5731 ret = read_extent_data(root, data + offset,
5732 bytenr + offset, &read_len, mirror);
5733 if (ret)
5734 goto out;
5735 data_checked = 0;
5736 /* verify every 4k data's checksum */
5737 while (data_checked < read_len) {
5738 csum = ~(u32)0;
5739 tmp = offset + data_checked;
5741 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5742 csum, root->sectorsize);
5743 btrfs_csum_final(csum, (char *)&csum);
5745 csum_offset = leaf_offset +
5746 tmp / root->sectorsize * csum_size;
5747 read_extent_buffer(eb, (char *)&csum_expected,
5748 csum_offset, csum_size);
5749 /* try another mirror */
5750 if (csum != csum_expected) {
5751 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5752 mirror, bytenr + tmp,
5753 csum, csum_expected);
5754 num_copies = btrfs_num_copies(
5755 &root->fs_info->mapping_tree,
5756 bytenr, num_bytes);
5757 if (mirror < num_copies - 1) {
5758 mirror += 1;
5759 goto again;
5762 data_checked += root->sectorsize;
5764 offset += read_len;
5766 out:
5767 free(data);
5768 return ret;
5771 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5772 u64 num_bytes)
5774 struct btrfs_path *path;
5775 struct extent_buffer *leaf;
5776 struct btrfs_key key;
5777 int ret;
5779 path = btrfs_alloc_path();
5780 if (!path) {
5781 fprintf(stderr, "Error allocating path\n");
5782 return -ENOMEM;
5785 key.objectid = bytenr;
5786 key.type = BTRFS_EXTENT_ITEM_KEY;
5787 key.offset = (u64)-1;
5789 again:
5790 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5791 0, 0);
5792 if (ret < 0) {
5793 fprintf(stderr, "Error looking up extent record %d\n", ret);
5794 btrfs_free_path(path);
5795 return ret;
5796 } else if (ret) {
5797 if (path->slots[0] > 0) {
5798 path->slots[0]--;
5799 } else {
5800 ret = btrfs_prev_leaf(root, path);
5801 if (ret < 0) {
5802 goto out;
5803 } else if (ret > 0) {
5804 ret = 0;
5805 goto out;
5810 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5813 * Block group items come before extent items if they have the same
5814 * bytenr, so walk back one more just in case. Dear future traveller,
5815 * first congrats on mastering time travel. Now if it's not too much
5816 * trouble could you go back to 2006 and tell Chris to make the
5817 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5818 * EXTENT_ITEM_KEY please?
5820 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5821 if (path->slots[0] > 0) {
5822 path->slots[0]--;
5823 } else {
5824 ret = btrfs_prev_leaf(root, path);
5825 if (ret < 0) {
5826 goto out;
5827 } else if (ret > 0) {
5828 ret = 0;
5829 goto out;
5832 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5835 while (num_bytes) {
5836 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5837 ret = btrfs_next_leaf(root, path);
5838 if (ret < 0) {
5839 fprintf(stderr, "Error going to next leaf "
5840 "%d\n", ret);
5841 btrfs_free_path(path);
5842 return ret;
5843 } else if (ret) {
5844 break;
5847 leaf = path->nodes[0];
5848 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5849 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5850 path->slots[0]++;
5851 continue;
5853 if (key.objectid + key.offset < bytenr) {
5854 path->slots[0]++;
5855 continue;
5857 if (key.objectid > bytenr + num_bytes)
5858 break;
5860 if (key.objectid == bytenr) {
5861 if (key.offset >= num_bytes) {
5862 num_bytes = 0;
5863 break;
5865 num_bytes -= key.offset;
5866 bytenr += key.offset;
5867 } else if (key.objectid < bytenr) {
5868 if (key.objectid + key.offset >= bytenr + num_bytes) {
5869 num_bytes = 0;
5870 break;
5872 num_bytes = (bytenr + num_bytes) -
5873 (key.objectid + key.offset);
5874 bytenr = key.objectid + key.offset;
5875 } else {
5876 if (key.objectid + key.offset < bytenr + num_bytes) {
5877 u64 new_start = key.objectid + key.offset;
5878 u64 new_bytes = bytenr + num_bytes - new_start;
5881 * Weird case, the extent is in the middle of
5882 * our range, we'll have to search one side
5883 * and then the other. Not sure if this happens
5884 * in real life, but no harm in coding it up
5885 * anyway just in case.
5887 btrfs_release_path(path);
5888 ret = check_extent_exists(root, new_start,
5889 new_bytes);
5890 if (ret) {
5891 fprintf(stderr, "Right section didn't "
5892 "have a record\n");
5893 break;
5895 num_bytes = key.objectid - bytenr;
5896 goto again;
5898 num_bytes = key.objectid - bytenr;
5900 path->slots[0]++;
5902 ret = 0;
5904 out:
5905 if (num_bytes && !ret) {
5906 fprintf(stderr, "There are no extents for csum range "
5907 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5908 ret = 1;
5911 btrfs_free_path(path);
5912 return ret;
5915 static int check_csums(struct btrfs_root *root)
5917 struct btrfs_path *path;
5918 struct extent_buffer *leaf;
5919 struct btrfs_key key;
5920 u64 offset = 0, num_bytes = 0;
5921 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5922 int errors = 0;
5923 int ret;
5924 u64 data_len;
5925 unsigned long leaf_offset;
5927 root = root->fs_info->csum_root;
5928 if (!extent_buffer_uptodate(root->node)) {
5929 fprintf(stderr, "No valid csum tree found\n");
5930 return -ENOENT;
5933 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5934 key.type = BTRFS_EXTENT_CSUM_KEY;
5935 key.offset = 0;
5937 path = btrfs_alloc_path();
5938 if (!path)
5939 return -ENOMEM;
5941 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5942 if (ret < 0) {
5943 fprintf(stderr, "Error searching csum tree %d\n", ret);
5944 btrfs_free_path(path);
5945 return ret;
5948 if (ret > 0 && path->slots[0])
5949 path->slots[0]--;
5950 ret = 0;
5952 while (1) {
5953 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5954 ret = btrfs_next_leaf(root, path);
5955 if (ret < 0) {
5956 fprintf(stderr, "Error going to next leaf "
5957 "%d\n", ret);
5958 break;
5960 if (ret)
5961 break;
5963 leaf = path->nodes[0];
5965 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5966 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5967 path->slots[0]++;
5968 continue;
5971 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5972 csum_size) * root->sectorsize;
5973 if (!check_data_csum)
5974 goto skip_csum_check;
5975 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5976 ret = check_extent_csums(root, key.offset, data_len,
5977 leaf_offset, leaf);
5978 if (ret)
5979 break;
5980 skip_csum_check:
5981 if (!num_bytes) {
5982 offset = key.offset;
5983 } else if (key.offset != offset + num_bytes) {
5984 ret = check_extent_exists(root, offset, num_bytes);
5985 if (ret) {
5986 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5987 "there is no extent record\n",
5988 offset, offset+num_bytes);
5989 errors++;
5991 offset = key.offset;
5992 num_bytes = 0;
5994 num_bytes += data_len;
5995 path->slots[0]++;
5998 btrfs_free_path(path);
5999 return errors;
6002 static int is_dropped_key(struct btrfs_key *key,
6003 struct btrfs_key *drop_key) {
6004 if (key->objectid < drop_key->objectid)
6005 return 1;
6006 else if (key->objectid == drop_key->objectid) {
6007 if (key->type < drop_key->type)
6008 return 1;
6009 else if (key->type == drop_key->type) {
6010 if (key->offset < drop_key->offset)
6011 return 1;
6014 return 0;
6018 * Here are the rules for FULL_BACKREF.
6020 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6021 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6022 * FULL_BACKREF set.
6023 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6024 * if it happened after the relocation occurred since we'll have dropped the
6025 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6026 * have no real way to know for sure.
6028 * We process the blocks one root at a time, and we start from the lowest root
6029 * objectid and go to the highest. So we can just lookup the owner backref for
6030 * the record and if we don't find it then we know it doesn't exist and we have
6031 * a FULL BACKREF.
6033 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6034 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6035 * be set or not and then we can check later once we've gathered all the refs.
6037 static int calc_extent_flag(struct btrfs_root *root,
6038 struct cache_tree *extent_cache,
6039 struct extent_buffer *buf,
6040 struct root_item_record *ri,
6041 u64 *flags)
6043 struct extent_record *rec;
6044 struct cache_extent *cache;
6045 struct tree_backref *tback;
6046 u64 owner = 0;
6048 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6049 /* we have added this extent before */
6050 BUG_ON(!cache);
6051 rec = container_of(cache, struct extent_record, cache);
6054 * Except file/reloc tree, we can not have
6055 * FULL BACKREF MODE
6057 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6058 goto normal;
6060 * root node
6062 if (buf->start == ri->bytenr)
6063 goto normal;
6065 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6066 goto full_backref;
6068 owner = btrfs_header_owner(buf);
6069 if (owner == ri->objectid)
6070 goto normal;
6072 tback = find_tree_backref(rec, 0, owner);
6073 if (!tback)
6074 goto full_backref;
6075 normal:
6076 *flags = 0;
6077 if (rec->flag_block_full_backref != FLAG_UNSET &&
6078 rec->flag_block_full_backref != 0)
6079 rec->bad_full_backref = 1;
6080 return 0;
6081 full_backref:
6082 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6083 if (rec->flag_block_full_backref != FLAG_UNSET &&
6084 rec->flag_block_full_backref != 1)
6085 rec->bad_full_backref = 1;
6086 return 0;
6089 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6091 fprintf(stderr, "Invalid key type(");
6092 print_key_type(stderr, 0, key_type);
6093 fprintf(stderr, ") found in root(");
6094 print_objectid(stderr, rootid, 0);
6095 fprintf(stderr, ")\n");
6099 * Check if the key is valid with its extent buffer.
6101 * This is a early check in case invalid key exists in a extent buffer
6102 * This is not comprehensive yet, but should prevent wrong key/item passed
6103 * further
6105 static int check_type_with_root(u64 rootid, u8 key_type)
6107 switch (key_type) {
6108 /* Only valid in chunk tree */
6109 case BTRFS_DEV_ITEM_KEY:
6110 case BTRFS_CHUNK_ITEM_KEY:
6111 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6112 goto err;
6113 break;
6114 /* valid in csum and log tree */
6115 case BTRFS_CSUM_TREE_OBJECTID:
6116 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6117 is_fstree(rootid)))
6118 goto err;
6119 break;
6120 case BTRFS_EXTENT_ITEM_KEY:
6121 case BTRFS_METADATA_ITEM_KEY:
6122 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6123 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6124 goto err;
6125 break;
6126 case BTRFS_ROOT_ITEM_KEY:
6127 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6128 goto err;
6129 break;
6130 case BTRFS_DEV_EXTENT_KEY:
6131 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6132 goto err;
6133 break;
6135 return 0;
6136 err:
6137 report_mismatch_key_root(key_type, rootid);
6138 return -EINVAL;
6141 static int run_next_block(struct btrfs_root *root,
6142 struct block_info *bits,
6143 int bits_nr,
6144 u64 *last,
6145 struct cache_tree *pending,
6146 struct cache_tree *seen,
6147 struct cache_tree *reada,
6148 struct cache_tree *nodes,
6149 struct cache_tree *extent_cache,
6150 struct cache_tree *chunk_cache,
6151 struct rb_root *dev_cache,
6152 struct block_group_tree *block_group_cache,
6153 struct device_extent_tree *dev_extent_cache,
6154 struct root_item_record *ri)
6156 struct extent_buffer *buf;
6157 struct extent_record *rec = NULL;
6158 u64 bytenr;
6159 u32 size;
6160 u64 parent;
6161 u64 owner;
6162 u64 flags;
6163 u64 ptr;
6164 u64 gen = 0;
6165 int ret = 0;
6166 int i;
6167 int nritems;
6168 struct btrfs_key key;
6169 struct cache_extent *cache;
6170 int reada_bits;
6172 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6173 bits_nr, &reada_bits);
6174 if (nritems == 0)
6175 return 1;
6177 if (!reada_bits) {
6178 for(i = 0; i < nritems; i++) {
6179 ret = add_cache_extent(reada, bits[i].start,
6180 bits[i].size);
6181 if (ret == -EEXIST)
6182 continue;
6184 /* fixme, get the parent transid */
6185 readahead_tree_block(root, bits[i].start,
6186 bits[i].size, 0);
6189 *last = bits[0].start;
6190 bytenr = bits[0].start;
6191 size = bits[0].size;
6193 cache = lookup_cache_extent(pending, bytenr, size);
6194 if (cache) {
6195 remove_cache_extent(pending, cache);
6196 free(cache);
6198 cache = lookup_cache_extent(reada, bytenr, size);
6199 if (cache) {
6200 remove_cache_extent(reada, cache);
6201 free(cache);
6203 cache = lookup_cache_extent(nodes, bytenr, size);
6204 if (cache) {
6205 remove_cache_extent(nodes, cache);
6206 free(cache);
6208 cache = lookup_cache_extent(extent_cache, bytenr, size);
6209 if (cache) {
6210 rec = container_of(cache, struct extent_record, cache);
6211 gen = rec->parent_generation;
6214 /* fixme, get the real parent transid */
6215 buf = read_tree_block(root, bytenr, size, gen);
6216 if (!extent_buffer_uptodate(buf)) {
6217 record_bad_block_io(root->fs_info,
6218 extent_cache, bytenr, size);
6219 goto out;
6222 nritems = btrfs_header_nritems(buf);
6224 flags = 0;
6225 if (!init_extent_tree) {
6226 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6227 btrfs_header_level(buf), 1, NULL,
6228 &flags);
6229 if (ret < 0) {
6230 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6231 if (ret < 0) {
6232 fprintf(stderr, "Couldn't calc extent flags\n");
6233 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6236 } else {
6237 flags = 0;
6238 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6239 if (ret < 0) {
6240 fprintf(stderr, "Couldn't calc extent flags\n");
6241 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6245 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6246 if (ri != NULL &&
6247 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6248 ri->objectid == btrfs_header_owner(buf)) {
6250 * Ok we got to this block from it's original owner and
6251 * we have FULL_BACKREF set. Relocation can leave
6252 * converted blocks over so this is altogether possible,
6253 * however it's not possible if the generation > the
6254 * last snapshot, so check for this case.
6256 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6257 btrfs_header_generation(buf) > ri->last_snapshot) {
6258 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6259 rec->bad_full_backref = 1;
6262 } else {
6263 if (ri != NULL &&
6264 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6265 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6266 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6267 rec->bad_full_backref = 1;
6271 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6272 rec->flag_block_full_backref = 1;
6273 parent = bytenr;
6274 owner = 0;
6275 } else {
6276 rec->flag_block_full_backref = 0;
6277 parent = 0;
6278 owner = btrfs_header_owner(buf);
6281 ret = check_block(root, extent_cache, buf, flags);
6282 if (ret)
6283 goto out;
6285 if (btrfs_is_leaf(buf)) {
6286 btree_space_waste += btrfs_leaf_free_space(root, buf);
6287 for (i = 0; i < nritems; i++) {
6288 struct btrfs_file_extent_item *fi;
6289 btrfs_item_key_to_cpu(buf, &key, i);
6291 * Check key type against the leaf owner.
6292 * Could filter quite a lot of early error if
6293 * owner is correct
6295 if (check_type_with_root(btrfs_header_owner(buf),
6296 key.type)) {
6297 fprintf(stderr, "ignoring invalid key\n");
6298 continue;
6300 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6301 process_extent_item(root, extent_cache, buf,
6303 continue;
6305 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6306 process_extent_item(root, extent_cache, buf,
6308 continue;
6310 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6311 total_csum_bytes +=
6312 btrfs_item_size_nr(buf, i);
6313 continue;
6315 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6316 process_chunk_item(chunk_cache, &key, buf, i);
6317 continue;
6319 if (key.type == BTRFS_DEV_ITEM_KEY) {
6320 process_device_item(dev_cache, &key, buf, i);
6321 continue;
6323 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6324 process_block_group_item(block_group_cache,
6325 &key, buf, i);
6326 continue;
6328 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6329 process_device_extent_item(dev_extent_cache,
6330 &key, buf, i);
6331 continue;
6334 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6335 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6336 process_extent_ref_v0(extent_cache, buf, i);
6337 #else
6338 BUG();
6339 #endif
6340 continue;
6343 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6344 ret = add_tree_backref(extent_cache,
6345 key.objectid, 0, key.offset, 0);
6346 if (ret < 0)
6347 error("add_tree_backref failed: %s",
6348 strerror(-ret));
6349 continue;
6351 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6352 ret = add_tree_backref(extent_cache,
6353 key.objectid, key.offset, 0, 0);
6354 if (ret < 0)
6355 error("add_tree_backref failed: %s",
6356 strerror(-ret));
6357 continue;
6359 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6360 struct btrfs_extent_data_ref *ref;
6361 ref = btrfs_item_ptr(buf, i,
6362 struct btrfs_extent_data_ref);
6363 add_data_backref(extent_cache,
6364 key.objectid, 0,
6365 btrfs_extent_data_ref_root(buf, ref),
6366 btrfs_extent_data_ref_objectid(buf,
6367 ref),
6368 btrfs_extent_data_ref_offset(buf, ref),
6369 btrfs_extent_data_ref_count(buf, ref),
6370 0, root->sectorsize);
6371 continue;
6373 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6374 struct btrfs_shared_data_ref *ref;
6375 ref = btrfs_item_ptr(buf, i,
6376 struct btrfs_shared_data_ref);
6377 add_data_backref(extent_cache,
6378 key.objectid, key.offset, 0, 0, 0,
6379 btrfs_shared_data_ref_count(buf, ref),
6380 0, root->sectorsize);
6381 continue;
6383 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6384 struct bad_item *bad;
6386 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6387 continue;
6388 if (!owner)
6389 continue;
6390 bad = malloc(sizeof(struct bad_item));
6391 if (!bad)
6392 continue;
6393 INIT_LIST_HEAD(&bad->list);
6394 memcpy(&bad->key, &key,
6395 sizeof(struct btrfs_key));
6396 bad->root_id = owner;
6397 list_add_tail(&bad->list, &delete_items);
6398 continue;
6400 if (key.type != BTRFS_EXTENT_DATA_KEY)
6401 continue;
6402 fi = btrfs_item_ptr(buf, i,
6403 struct btrfs_file_extent_item);
6404 if (btrfs_file_extent_type(buf, fi) ==
6405 BTRFS_FILE_EXTENT_INLINE)
6406 continue;
6407 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6408 continue;
6410 data_bytes_allocated +=
6411 btrfs_file_extent_disk_num_bytes(buf, fi);
6412 if (data_bytes_allocated < root->sectorsize) {
6413 abort();
6415 data_bytes_referenced +=
6416 btrfs_file_extent_num_bytes(buf, fi);
6417 add_data_backref(extent_cache,
6418 btrfs_file_extent_disk_bytenr(buf, fi),
6419 parent, owner, key.objectid, key.offset -
6420 btrfs_file_extent_offset(buf, fi), 1, 1,
6421 btrfs_file_extent_disk_num_bytes(buf, fi));
6423 } else {
6424 int level;
6425 struct btrfs_key first_key;
6427 first_key.objectid = 0;
6429 if (nritems > 0)
6430 btrfs_item_key_to_cpu(buf, &first_key, 0);
6431 level = btrfs_header_level(buf);
6432 for (i = 0; i < nritems; i++) {
6433 struct extent_record tmpl;
6435 ptr = btrfs_node_blockptr(buf, i);
6436 size = root->nodesize;
6437 btrfs_node_key_to_cpu(buf, &key, i);
6438 if (ri != NULL) {
6439 if ((level == ri->drop_level)
6440 && is_dropped_key(&key, &ri->drop_key)) {
6441 continue;
6445 memset(&tmpl, 0, sizeof(tmpl));
6446 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6447 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6448 tmpl.start = ptr;
6449 tmpl.nr = size;
6450 tmpl.refs = 1;
6451 tmpl.metadata = 1;
6452 tmpl.max_size = size;
6453 ret = add_extent_rec(extent_cache, &tmpl);
6454 if (ret < 0)
6455 goto out;
6457 ret = add_tree_backref(extent_cache, ptr, parent,
6458 owner, 1);
6459 if (ret < 0) {
6460 error("add_tree_backref failed: %s",
6461 strerror(-ret));
6462 continue;
6465 if (level > 1) {
6466 add_pending(nodes, seen, ptr, size);
6467 } else {
6468 add_pending(pending, seen, ptr, size);
6471 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6472 nritems) * sizeof(struct btrfs_key_ptr);
6474 total_btree_bytes += buf->len;
6475 if (fs_root_objectid(btrfs_header_owner(buf)))
6476 total_fs_tree_bytes += buf->len;
6477 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6478 total_extent_tree_bytes += buf->len;
6479 if (!found_old_backref &&
6480 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6481 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6482 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6483 found_old_backref = 1;
6484 out:
6485 free_extent_buffer(buf);
6486 return ret;
6489 static int add_root_to_pending(struct extent_buffer *buf,
6490 struct cache_tree *extent_cache,
6491 struct cache_tree *pending,
6492 struct cache_tree *seen,
6493 struct cache_tree *nodes,
6494 u64 objectid)
6496 struct extent_record tmpl;
6497 int ret;
6499 if (btrfs_header_level(buf) > 0)
6500 add_pending(nodes, seen, buf->start, buf->len);
6501 else
6502 add_pending(pending, seen, buf->start, buf->len);
6504 memset(&tmpl, 0, sizeof(tmpl));
6505 tmpl.start = buf->start;
6506 tmpl.nr = buf->len;
6507 tmpl.is_root = 1;
6508 tmpl.refs = 1;
6509 tmpl.metadata = 1;
6510 tmpl.max_size = buf->len;
6511 add_extent_rec(extent_cache, &tmpl);
6513 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6514 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6515 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6516 0, 1);
6517 else
6518 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6520 return ret;
6523 /* as we fix the tree, we might be deleting blocks that
6524 * we're tracking for repair. This hook makes sure we
6525 * remove any backrefs for blocks as we are fixing them.
6527 static int free_extent_hook(struct btrfs_trans_handle *trans,
6528 struct btrfs_root *root,
6529 u64 bytenr, u64 num_bytes, u64 parent,
6530 u64 root_objectid, u64 owner, u64 offset,
6531 int refs_to_drop)
6533 struct extent_record *rec;
6534 struct cache_extent *cache;
6535 int is_data;
6536 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6538 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6539 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6540 if (!cache)
6541 return 0;
6543 rec = container_of(cache, struct extent_record, cache);
6544 if (is_data) {
6545 struct data_backref *back;
6546 back = find_data_backref(rec, parent, root_objectid, owner,
6547 offset, 1, bytenr, num_bytes);
6548 if (!back)
6549 goto out;
6550 if (back->node.found_ref) {
6551 back->found_ref -= refs_to_drop;
6552 if (rec->refs)
6553 rec->refs -= refs_to_drop;
6555 if (back->node.found_extent_tree) {
6556 back->num_refs -= refs_to_drop;
6557 if (rec->extent_item_refs)
6558 rec->extent_item_refs -= refs_to_drop;
6560 if (back->found_ref == 0)
6561 back->node.found_ref = 0;
6562 if (back->num_refs == 0)
6563 back->node.found_extent_tree = 0;
6565 if (!back->node.found_extent_tree && back->node.found_ref) {
6566 list_del(&back->node.list);
6567 free(back);
6569 } else {
6570 struct tree_backref *back;
6571 back = find_tree_backref(rec, parent, root_objectid);
6572 if (!back)
6573 goto out;
6574 if (back->node.found_ref) {
6575 if (rec->refs)
6576 rec->refs--;
6577 back->node.found_ref = 0;
6579 if (back->node.found_extent_tree) {
6580 if (rec->extent_item_refs)
6581 rec->extent_item_refs--;
6582 back->node.found_extent_tree = 0;
6584 if (!back->node.found_extent_tree && back->node.found_ref) {
6585 list_del(&back->node.list);
6586 free(back);
6589 maybe_free_extent_rec(extent_cache, rec);
6590 out:
6591 return 0;
6594 static int delete_extent_records(struct btrfs_trans_handle *trans,
6595 struct btrfs_root *root,
6596 struct btrfs_path *path,
6597 u64 bytenr, u64 new_len)
6599 struct btrfs_key key;
6600 struct btrfs_key found_key;
6601 struct extent_buffer *leaf;
6602 int ret;
6603 int slot;
6606 key.objectid = bytenr;
6607 key.type = (u8)-1;
6608 key.offset = (u64)-1;
6610 while(1) {
6611 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6612 &key, path, 0, 1);
6613 if (ret < 0)
6614 break;
6616 if (ret > 0) {
6617 ret = 0;
6618 if (path->slots[0] == 0)
6619 break;
6620 path->slots[0]--;
6622 ret = 0;
6624 leaf = path->nodes[0];
6625 slot = path->slots[0];
6627 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6628 if (found_key.objectid != bytenr)
6629 break;
6631 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6632 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6633 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6634 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6635 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6636 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6637 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6638 btrfs_release_path(path);
6639 if (found_key.type == 0) {
6640 if (found_key.offset == 0)
6641 break;
6642 key.offset = found_key.offset - 1;
6643 key.type = found_key.type;
6645 key.type = found_key.type - 1;
6646 key.offset = (u64)-1;
6647 continue;
6650 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6651 found_key.objectid, found_key.type, found_key.offset);
6653 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6654 if (ret)
6655 break;
6656 btrfs_release_path(path);
6658 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6659 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6660 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6661 found_key.offset : root->nodesize;
6663 ret = btrfs_update_block_group(trans, root, bytenr,
6664 bytes, 0, 0);
6665 if (ret)
6666 break;
6670 btrfs_release_path(path);
6671 return ret;
6675 * for a single backref, this will allocate a new extent
6676 * and add the backref to it.
6678 static int record_extent(struct btrfs_trans_handle *trans,
6679 struct btrfs_fs_info *info,
6680 struct btrfs_path *path,
6681 struct extent_record *rec,
6682 struct extent_backref *back,
6683 int allocated, u64 flags)
6685 int ret;
6686 struct btrfs_root *extent_root = info->extent_root;
6687 struct extent_buffer *leaf;
6688 struct btrfs_key ins_key;
6689 struct btrfs_extent_item *ei;
6690 struct tree_backref *tback;
6691 struct data_backref *dback;
6692 struct btrfs_tree_block_info *bi;
6694 if (!back->is_data)
6695 rec->max_size = max_t(u64, rec->max_size,
6696 info->extent_root->nodesize);
6698 if (!allocated) {
6699 u32 item_size = sizeof(*ei);
6701 if (!back->is_data)
6702 item_size += sizeof(*bi);
6704 ins_key.objectid = rec->start;
6705 ins_key.offset = rec->max_size;
6706 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6708 ret = btrfs_insert_empty_item(trans, extent_root, path,
6709 &ins_key, item_size);
6710 if (ret)
6711 goto fail;
6713 leaf = path->nodes[0];
6714 ei = btrfs_item_ptr(leaf, path->slots[0],
6715 struct btrfs_extent_item);
6717 btrfs_set_extent_refs(leaf, ei, 0);
6718 btrfs_set_extent_generation(leaf, ei, rec->generation);
6720 if (back->is_data) {
6721 btrfs_set_extent_flags(leaf, ei,
6722 BTRFS_EXTENT_FLAG_DATA);
6723 } else {
6724 struct btrfs_disk_key copy_key;;
6726 tback = to_tree_backref(back);
6727 bi = (struct btrfs_tree_block_info *)(ei + 1);
6728 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6729 sizeof(*bi));
6731 btrfs_set_disk_key_objectid(&copy_key,
6732 rec->info_objectid);
6733 btrfs_set_disk_key_type(&copy_key, 0);
6734 btrfs_set_disk_key_offset(&copy_key, 0);
6736 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6737 btrfs_set_tree_block_key(leaf, bi, &copy_key);
6739 btrfs_set_extent_flags(leaf, ei,
6740 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6743 btrfs_mark_buffer_dirty(leaf);
6744 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6745 rec->max_size, 1, 0);
6746 if (ret)
6747 goto fail;
6748 btrfs_release_path(path);
6751 if (back->is_data) {
6752 u64 parent;
6753 int i;
6755 dback = to_data_backref(back);
6756 if (back->full_backref)
6757 parent = dback->parent;
6758 else
6759 parent = 0;
6761 for (i = 0; i < dback->found_ref; i++) {
6762 /* if parent != 0, we're doing a full backref
6763 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6764 * just makes the backref allocator create a data
6765 * backref
6767 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6768 rec->start, rec->max_size,
6769 parent,
6770 dback->root,
6771 parent ?
6772 BTRFS_FIRST_FREE_OBJECTID :
6773 dback->owner,
6774 dback->offset);
6775 if (ret)
6776 break;
6778 fprintf(stderr, "adding new data backref"
6779 " on %llu %s %llu owner %llu"
6780 " offset %llu found %d\n",
6781 (unsigned long long)rec->start,
6782 back->full_backref ?
6783 "parent" : "root",
6784 back->full_backref ?
6785 (unsigned long long)parent :
6786 (unsigned long long)dback->root,
6787 (unsigned long long)dback->owner,
6788 (unsigned long long)dback->offset,
6789 dback->found_ref);
6790 } else {
6791 u64 parent;
6793 tback = to_tree_backref(back);
6794 if (back->full_backref)
6795 parent = tback->parent;
6796 else
6797 parent = 0;
6799 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6800 rec->start, rec->max_size,
6801 parent, tback->root, 0, 0);
6802 fprintf(stderr, "adding new tree backref on "
6803 "start %llu len %llu parent %llu root %llu\n",
6804 rec->start, rec->max_size, parent, tback->root);
6806 fail:
6807 btrfs_release_path(path);
6808 return ret;
6811 static struct extent_entry *find_entry(struct list_head *entries,
6812 u64 bytenr, u64 bytes)
6814 struct extent_entry *entry = NULL;
6816 list_for_each_entry(entry, entries, list) {
6817 if (entry->bytenr == bytenr && entry->bytes == bytes)
6818 return entry;
6821 return NULL;
6824 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6826 struct extent_entry *entry, *best = NULL, *prev = NULL;
6828 list_for_each_entry(entry, entries, list) {
6829 if (!prev) {
6830 prev = entry;
6831 continue;
6835 * If there are as many broken entries as entries then we know
6836 * not to trust this particular entry.
6838 if (entry->broken == entry->count)
6839 continue;
6842 * If our current entry == best then we can't be sure our best
6843 * is really the best, so we need to keep searching.
6845 if (best && best->count == entry->count) {
6846 prev = entry;
6847 best = NULL;
6848 continue;
6851 /* Prev == entry, not good enough, have to keep searching */
6852 if (!prev->broken && prev->count == entry->count)
6853 continue;
6855 if (!best)
6856 best = (prev->count > entry->count) ? prev : entry;
6857 else if (best->count < entry->count)
6858 best = entry;
6859 prev = entry;
6862 return best;
6865 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6866 struct data_backref *dback, struct extent_entry *entry)
6868 struct btrfs_trans_handle *trans;
6869 struct btrfs_root *root;
6870 struct btrfs_file_extent_item *fi;
6871 struct extent_buffer *leaf;
6872 struct btrfs_key key;
6873 u64 bytenr, bytes;
6874 int ret, err;
6876 key.objectid = dback->root;
6877 key.type = BTRFS_ROOT_ITEM_KEY;
6878 key.offset = (u64)-1;
6879 root = btrfs_read_fs_root(info, &key);
6880 if (IS_ERR(root)) {
6881 fprintf(stderr, "Couldn't find root for our ref\n");
6882 return -EINVAL;
6886 * The backref points to the original offset of the extent if it was
6887 * split, so we need to search down to the offset we have and then walk
6888 * forward until we find the backref we're looking for.
6890 key.objectid = dback->owner;
6891 key.type = BTRFS_EXTENT_DATA_KEY;
6892 key.offset = dback->offset;
6893 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6894 if (ret < 0) {
6895 fprintf(stderr, "Error looking up ref %d\n", ret);
6896 return ret;
6899 while (1) {
6900 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6901 ret = btrfs_next_leaf(root, path);
6902 if (ret) {
6903 fprintf(stderr, "Couldn't find our ref, next\n");
6904 return -EINVAL;
6907 leaf = path->nodes[0];
6908 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6909 if (key.objectid != dback->owner ||
6910 key.type != BTRFS_EXTENT_DATA_KEY) {
6911 fprintf(stderr, "Couldn't find our ref, search\n");
6912 return -EINVAL;
6914 fi = btrfs_item_ptr(leaf, path->slots[0],
6915 struct btrfs_file_extent_item);
6916 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6917 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6919 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6920 break;
6921 path->slots[0]++;
6924 btrfs_release_path(path);
6926 trans = btrfs_start_transaction(root, 1);
6927 if (IS_ERR(trans))
6928 return PTR_ERR(trans);
6931 * Ok we have the key of the file extent we want to fix, now we can cow
6932 * down to the thing and fix it.
6934 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6935 if (ret < 0) {
6936 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6937 key.objectid, key.type, key.offset, ret);
6938 goto out;
6940 if (ret > 0) {
6941 fprintf(stderr, "Well that's odd, we just found this key "
6942 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6943 key.offset);
6944 ret = -EINVAL;
6945 goto out;
6947 leaf = path->nodes[0];
6948 fi = btrfs_item_ptr(leaf, path->slots[0],
6949 struct btrfs_file_extent_item);
6951 if (btrfs_file_extent_compression(leaf, fi) &&
6952 dback->disk_bytenr != entry->bytenr) {
6953 fprintf(stderr, "Ref doesn't match the record start and is "
6954 "compressed, please take a btrfs-image of this file "
6955 "system and send it to a btrfs developer so they can "
6956 "complete this functionality for bytenr %Lu\n",
6957 dback->disk_bytenr);
6958 ret = -EINVAL;
6959 goto out;
6962 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6963 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6964 } else if (dback->disk_bytenr > entry->bytenr) {
6965 u64 off_diff, offset;
6967 off_diff = dback->disk_bytenr - entry->bytenr;
6968 offset = btrfs_file_extent_offset(leaf, fi);
6969 if (dback->disk_bytenr + offset +
6970 btrfs_file_extent_num_bytes(leaf, fi) >
6971 entry->bytenr + entry->bytes) {
6972 fprintf(stderr, "Ref is past the entry end, please "
6973 "take a btrfs-image of this file system and "
6974 "send it to a btrfs developer, ref %Lu\n",
6975 dback->disk_bytenr);
6976 ret = -EINVAL;
6977 goto out;
6979 offset += off_diff;
6980 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6981 btrfs_set_file_extent_offset(leaf, fi, offset);
6982 } else if (dback->disk_bytenr < entry->bytenr) {
6983 u64 offset;
6985 offset = btrfs_file_extent_offset(leaf, fi);
6986 if (dback->disk_bytenr + offset < entry->bytenr) {
6987 fprintf(stderr, "Ref is before the entry start, please"
6988 " take a btrfs-image of this file system and "
6989 "send it to a btrfs developer, ref %Lu\n",
6990 dback->disk_bytenr);
6991 ret = -EINVAL;
6992 goto out;
6995 offset += dback->disk_bytenr;
6996 offset -= entry->bytenr;
6997 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6998 btrfs_set_file_extent_offset(leaf, fi, offset);
7001 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7004 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7005 * only do this if we aren't using compression, otherwise it's a
7006 * trickier case.
7008 if (!btrfs_file_extent_compression(leaf, fi))
7009 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7010 else
7011 printf("ram bytes may be wrong?\n");
7012 btrfs_mark_buffer_dirty(leaf);
7013 out:
7014 err = btrfs_commit_transaction(trans, root);
7015 btrfs_release_path(path);
7016 return ret ? ret : err;
7019 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7020 struct extent_record *rec)
7022 struct extent_backref *back;
7023 struct data_backref *dback;
7024 struct extent_entry *entry, *best = NULL;
7025 LIST_HEAD(entries);
7026 int nr_entries = 0;
7027 int broken_entries = 0;
7028 int ret = 0;
7029 short mismatch = 0;
7032 * Metadata is easy and the backrefs should always agree on bytenr and
7033 * size, if not we've got bigger issues.
7035 if (rec->metadata)
7036 return 0;
7038 list_for_each_entry(back, &rec->backrefs, list) {
7039 if (back->full_backref || !back->is_data)
7040 continue;
7042 dback = to_data_backref(back);
7045 * We only pay attention to backrefs that we found a real
7046 * backref for.
7048 if (dback->found_ref == 0)
7049 continue;
7052 * For now we only catch when the bytes don't match, not the
7053 * bytenr. We can easily do this at the same time, but I want
7054 * to have a fs image to test on before we just add repair
7055 * functionality willy-nilly so we know we won't screw up the
7056 * repair.
7059 entry = find_entry(&entries, dback->disk_bytenr,
7060 dback->bytes);
7061 if (!entry) {
7062 entry = malloc(sizeof(struct extent_entry));
7063 if (!entry) {
7064 ret = -ENOMEM;
7065 goto out;
7067 memset(entry, 0, sizeof(*entry));
7068 entry->bytenr = dback->disk_bytenr;
7069 entry->bytes = dback->bytes;
7070 list_add_tail(&entry->list, &entries);
7071 nr_entries++;
7075 * If we only have on entry we may think the entries agree when
7076 * in reality they don't so we have to do some extra checking.
7078 if (dback->disk_bytenr != rec->start ||
7079 dback->bytes != rec->nr || back->broken)
7080 mismatch = 1;
7082 if (back->broken) {
7083 entry->broken++;
7084 broken_entries++;
7087 entry->count++;
7090 /* Yay all the backrefs agree, carry on good sir */
7091 if (nr_entries <= 1 && !mismatch)
7092 goto out;
7094 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7095 "%Lu\n", rec->start);
7098 * First we want to see if the backrefs can agree amongst themselves who
7099 * is right, so figure out which one of the entries has the highest
7100 * count.
7102 best = find_most_right_entry(&entries);
7105 * Ok so we may have an even split between what the backrefs think, so
7106 * this is where we use the extent ref to see what it thinks.
7108 if (!best) {
7109 entry = find_entry(&entries, rec->start, rec->nr);
7110 if (!entry && (!broken_entries || !rec->found_rec)) {
7111 fprintf(stderr, "Backrefs don't agree with each other "
7112 "and extent record doesn't agree with anybody,"
7113 " so we can't fix bytenr %Lu bytes %Lu\n",
7114 rec->start, rec->nr);
7115 ret = -EINVAL;
7116 goto out;
7117 } else if (!entry) {
7119 * Ok our backrefs were broken, we'll assume this is the
7120 * correct value and add an entry for this range.
7122 entry = malloc(sizeof(struct extent_entry));
7123 if (!entry) {
7124 ret = -ENOMEM;
7125 goto out;
7127 memset(entry, 0, sizeof(*entry));
7128 entry->bytenr = rec->start;
7129 entry->bytes = rec->nr;
7130 list_add_tail(&entry->list, &entries);
7131 nr_entries++;
7133 entry->count++;
7134 best = find_most_right_entry(&entries);
7135 if (!best) {
7136 fprintf(stderr, "Backrefs and extent record evenly "
7137 "split on who is right, this is going to "
7138 "require user input to fix bytenr %Lu bytes "
7139 "%Lu\n", rec->start, rec->nr);
7140 ret = -EINVAL;
7141 goto out;
7146 * I don't think this can happen currently as we'll abort() if we catch
7147 * this case higher up, but in case somebody removes that we still can't
7148 * deal with it properly here yet, so just bail out of that's the case.
7150 if (best->bytenr != rec->start) {
7151 fprintf(stderr, "Extent start and backref starts don't match, "
7152 "please use btrfs-image on this file system and send "
7153 "it to a btrfs developer so they can make fsck fix "
7154 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7155 rec->start, rec->nr);
7156 ret = -EINVAL;
7157 goto out;
7161 * Ok great we all agreed on an extent record, let's go find the real
7162 * references and fix up the ones that don't match.
7164 list_for_each_entry(back, &rec->backrefs, list) {
7165 if (back->full_backref || !back->is_data)
7166 continue;
7168 dback = to_data_backref(back);
7171 * Still ignoring backrefs that don't have a real ref attached
7172 * to them.
7174 if (dback->found_ref == 0)
7175 continue;
7177 if (dback->bytes == best->bytes &&
7178 dback->disk_bytenr == best->bytenr)
7179 continue;
7181 ret = repair_ref(info, path, dback, best);
7182 if (ret)
7183 goto out;
7187 * Ok we messed with the actual refs, which means we need to drop our
7188 * entire cache and go back and rescan. I know this is a huge pain and
7189 * adds a lot of extra work, but it's the only way to be safe. Once all
7190 * the backrefs agree we may not need to do anything to the extent
7191 * record itself.
7193 ret = -EAGAIN;
7194 out:
7195 while (!list_empty(&entries)) {
7196 entry = list_entry(entries.next, struct extent_entry, list);
7197 list_del_init(&entry->list);
7198 free(entry);
7200 return ret;
7203 static int process_duplicates(struct btrfs_root *root,
7204 struct cache_tree *extent_cache,
7205 struct extent_record *rec)
7207 struct extent_record *good, *tmp;
7208 struct cache_extent *cache;
7209 int ret;
7212 * If we found a extent record for this extent then return, or if we
7213 * have more than one duplicate we are likely going to need to delete
7214 * something.
7216 if (rec->found_rec || rec->num_duplicates > 1)
7217 return 0;
7219 /* Shouldn't happen but just in case */
7220 BUG_ON(!rec->num_duplicates);
7223 * So this happens if we end up with a backref that doesn't match the
7224 * actual extent entry. So either the backref is bad or the extent
7225 * entry is bad. Either way we want to have the extent_record actually
7226 * reflect what we found in the extent_tree, so we need to take the
7227 * duplicate out and use that as the extent_record since the only way we
7228 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7230 remove_cache_extent(extent_cache, &rec->cache);
7232 good = to_extent_record(rec->dups.next);
7233 list_del_init(&good->list);
7234 INIT_LIST_HEAD(&good->backrefs);
7235 INIT_LIST_HEAD(&good->dups);
7236 good->cache.start = good->start;
7237 good->cache.size = good->nr;
7238 good->content_checked = 0;
7239 good->owner_ref_checked = 0;
7240 good->num_duplicates = 0;
7241 good->refs = rec->refs;
7242 list_splice_init(&rec->backrefs, &good->backrefs);
7243 while (1) {
7244 cache = lookup_cache_extent(extent_cache, good->start,
7245 good->nr);
7246 if (!cache)
7247 break;
7248 tmp = container_of(cache, struct extent_record, cache);
7251 * If we find another overlapping extent and it's found_rec is
7252 * set then it's a duplicate and we need to try and delete
7253 * something.
7255 if (tmp->found_rec || tmp->num_duplicates > 0) {
7256 if (list_empty(&good->list))
7257 list_add_tail(&good->list,
7258 &duplicate_extents);
7259 good->num_duplicates += tmp->num_duplicates + 1;
7260 list_splice_init(&tmp->dups, &good->dups);
7261 list_del_init(&tmp->list);
7262 list_add_tail(&tmp->list, &good->dups);
7263 remove_cache_extent(extent_cache, &tmp->cache);
7264 continue;
7268 * Ok we have another non extent item backed extent rec, so lets
7269 * just add it to this extent and carry on like we did above.
7271 good->refs += tmp->refs;
7272 list_splice_init(&tmp->backrefs, &good->backrefs);
7273 remove_cache_extent(extent_cache, &tmp->cache);
7274 free(tmp);
7276 ret = insert_cache_extent(extent_cache, &good->cache);
7277 BUG_ON(ret);
7278 free(rec);
7279 return good->num_duplicates ? 0 : 1;
7282 static int delete_duplicate_records(struct btrfs_root *root,
7283 struct extent_record *rec)
7285 struct btrfs_trans_handle *trans;
7286 LIST_HEAD(delete_list);
7287 struct btrfs_path *path;
7288 struct extent_record *tmp, *good, *n;
7289 int nr_del = 0;
7290 int ret = 0, err;
7291 struct btrfs_key key;
7293 path = btrfs_alloc_path();
7294 if (!path) {
7295 ret = -ENOMEM;
7296 goto out;
7299 good = rec;
7300 /* Find the record that covers all of the duplicates. */
7301 list_for_each_entry(tmp, &rec->dups, list) {
7302 if (good->start < tmp->start)
7303 continue;
7304 if (good->nr > tmp->nr)
7305 continue;
7307 if (tmp->start + tmp->nr < good->start + good->nr) {
7308 fprintf(stderr, "Ok we have overlapping extents that "
7309 "aren't completely covered by each other, this "
7310 "is going to require more careful thought. "
7311 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7312 tmp->start, tmp->nr, good->start, good->nr);
7313 abort();
7315 good = tmp;
7318 if (good != rec)
7319 list_add_tail(&rec->list, &delete_list);
7321 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7322 if (tmp == good)
7323 continue;
7324 list_move_tail(&tmp->list, &delete_list);
7327 root = root->fs_info->extent_root;
7328 trans = btrfs_start_transaction(root, 1);
7329 if (IS_ERR(trans)) {
7330 ret = PTR_ERR(trans);
7331 goto out;
7334 list_for_each_entry(tmp, &delete_list, list) {
7335 if (tmp->found_rec == 0)
7336 continue;
7337 key.objectid = tmp->start;
7338 key.type = BTRFS_EXTENT_ITEM_KEY;
7339 key.offset = tmp->nr;
7341 /* Shouldn't happen but just in case */
7342 if (tmp->metadata) {
7343 fprintf(stderr, "Well this shouldn't happen, extent "
7344 "record overlaps but is metadata? "
7345 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7346 abort();
7349 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7350 if (ret) {
7351 if (ret > 0)
7352 ret = -EINVAL;
7353 break;
7355 ret = btrfs_del_item(trans, root, path);
7356 if (ret)
7357 break;
7358 btrfs_release_path(path);
7359 nr_del++;
7361 err = btrfs_commit_transaction(trans, root);
7362 if (err && !ret)
7363 ret = err;
7364 out:
7365 while (!list_empty(&delete_list)) {
7366 tmp = to_extent_record(delete_list.next);
7367 list_del_init(&tmp->list);
7368 if (tmp == rec)
7369 continue;
7370 free(tmp);
7373 while (!list_empty(&rec->dups)) {
7374 tmp = to_extent_record(rec->dups.next);
7375 list_del_init(&tmp->list);
7376 free(tmp);
7379 btrfs_free_path(path);
7381 if (!ret && !nr_del)
7382 rec->num_duplicates = 0;
7384 return ret ? ret : nr_del;
7387 static int find_possible_backrefs(struct btrfs_fs_info *info,
7388 struct btrfs_path *path,
7389 struct cache_tree *extent_cache,
7390 struct extent_record *rec)
7392 struct btrfs_root *root;
7393 struct extent_backref *back;
7394 struct data_backref *dback;
7395 struct cache_extent *cache;
7396 struct btrfs_file_extent_item *fi;
7397 struct btrfs_key key;
7398 u64 bytenr, bytes;
7399 int ret;
7401 list_for_each_entry(back, &rec->backrefs, list) {
7402 /* Don't care about full backrefs (poor unloved backrefs) */
7403 if (back->full_backref || !back->is_data)
7404 continue;
7406 dback = to_data_backref(back);
7408 /* We found this one, we don't need to do a lookup */
7409 if (dback->found_ref)
7410 continue;
7412 key.objectid = dback->root;
7413 key.type = BTRFS_ROOT_ITEM_KEY;
7414 key.offset = (u64)-1;
7416 root = btrfs_read_fs_root(info, &key);
7418 /* No root, definitely a bad ref, skip */
7419 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7420 continue;
7421 /* Other err, exit */
7422 if (IS_ERR(root))
7423 return PTR_ERR(root);
7425 key.objectid = dback->owner;
7426 key.type = BTRFS_EXTENT_DATA_KEY;
7427 key.offset = dback->offset;
7428 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7429 if (ret) {
7430 btrfs_release_path(path);
7431 if (ret < 0)
7432 return ret;
7433 /* Didn't find it, we can carry on */
7434 ret = 0;
7435 continue;
7438 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7439 struct btrfs_file_extent_item);
7440 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7441 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7442 btrfs_release_path(path);
7443 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7444 if (cache) {
7445 struct extent_record *tmp;
7446 tmp = container_of(cache, struct extent_record, cache);
7449 * If we found an extent record for the bytenr for this
7450 * particular backref then we can't add it to our
7451 * current extent record. We only want to add backrefs
7452 * that don't have a corresponding extent item in the
7453 * extent tree since they likely belong to this record
7454 * and we need to fix it if it doesn't match bytenrs.
7456 if (tmp->found_rec)
7457 continue;
7460 dback->found_ref += 1;
7461 dback->disk_bytenr = bytenr;
7462 dback->bytes = bytes;
7465 * Set this so the verify backref code knows not to trust the
7466 * values in this backref.
7468 back->broken = 1;
7471 return 0;
7475 * Record orphan data ref into corresponding root.
7477 * Return 0 if the extent item contains data ref and recorded.
7478 * Return 1 if the extent item contains no useful data ref
7479 * On that case, it may contains only shared_dataref or metadata backref
7480 * or the file extent exists(this should be handled by the extent bytenr
7481 * recovery routine)
7482 * Return <0 if something goes wrong.
7484 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7485 struct extent_record *rec)
7487 struct btrfs_key key;
7488 struct btrfs_root *dest_root;
7489 struct extent_backref *back;
7490 struct data_backref *dback;
7491 struct orphan_data_extent *orphan;
7492 struct btrfs_path *path;
7493 int recorded_data_ref = 0;
7494 int ret = 0;
7496 if (rec->metadata)
7497 return 1;
7498 path = btrfs_alloc_path();
7499 if (!path)
7500 return -ENOMEM;
7501 list_for_each_entry(back, &rec->backrefs, list) {
7502 if (back->full_backref || !back->is_data ||
7503 !back->found_extent_tree)
7504 continue;
7505 dback = to_data_backref(back);
7506 if (dback->found_ref)
7507 continue;
7508 key.objectid = dback->root;
7509 key.type = BTRFS_ROOT_ITEM_KEY;
7510 key.offset = (u64)-1;
7512 dest_root = btrfs_read_fs_root(fs_info, &key);
7514 /* For non-exist root we just skip it */
7515 if (IS_ERR(dest_root) || !dest_root)
7516 continue;
7518 key.objectid = dback->owner;
7519 key.type = BTRFS_EXTENT_DATA_KEY;
7520 key.offset = dback->offset;
7522 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7524 * For ret < 0, it's OK since the fs-tree may be corrupted,
7525 * we need to record it for inode/file extent rebuild.
7526 * For ret > 0, we record it only for file extent rebuild.
7527 * For ret == 0, the file extent exists but only bytenr
7528 * mismatch, let the original bytenr fix routine to handle,
7529 * don't record it.
7531 if (ret == 0)
7532 continue;
7533 ret = 0;
7534 orphan = malloc(sizeof(*orphan));
7535 if (!orphan) {
7536 ret = -ENOMEM;
7537 goto out;
7539 INIT_LIST_HEAD(&orphan->list);
7540 orphan->root = dback->root;
7541 orphan->objectid = dback->owner;
7542 orphan->offset = dback->offset;
7543 orphan->disk_bytenr = rec->cache.start;
7544 orphan->disk_len = rec->cache.size;
7545 list_add(&dest_root->orphan_data_extents, &orphan->list);
7546 recorded_data_ref = 1;
7548 out:
7549 btrfs_free_path(path);
7550 if (!ret)
7551 return !recorded_data_ref;
7552 else
7553 return ret;
7557 * when an incorrect extent item is found, this will delete
7558 * all of the existing entries for it and recreate them
7559 * based on what the tree scan found.
7561 static int fixup_extent_refs(struct btrfs_fs_info *info,
7562 struct cache_tree *extent_cache,
7563 struct extent_record *rec)
7565 struct btrfs_trans_handle *trans = NULL;
7566 int ret;
7567 struct btrfs_path *path;
7568 struct list_head *cur = rec->backrefs.next;
7569 struct cache_extent *cache;
7570 struct extent_backref *back;
7571 int allocated = 0;
7572 u64 flags = 0;
7574 if (rec->flag_block_full_backref)
7575 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7577 path = btrfs_alloc_path();
7578 if (!path)
7579 return -ENOMEM;
7581 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7583 * Sometimes the backrefs themselves are so broken they don't
7584 * get attached to any meaningful rec, so first go back and
7585 * check any of our backrefs that we couldn't find and throw
7586 * them into the list if we find the backref so that
7587 * verify_backrefs can figure out what to do.
7589 ret = find_possible_backrefs(info, path, extent_cache, rec);
7590 if (ret < 0)
7591 goto out;
7594 /* step one, make sure all of the backrefs agree */
7595 ret = verify_backrefs(info, path, rec);
7596 if (ret < 0)
7597 goto out;
7599 trans = btrfs_start_transaction(info->extent_root, 1);
7600 if (IS_ERR(trans)) {
7601 ret = PTR_ERR(trans);
7602 goto out;
7605 /* step two, delete all the existing records */
7606 ret = delete_extent_records(trans, info->extent_root, path,
7607 rec->start, rec->max_size);
7609 if (ret < 0)
7610 goto out;
7612 /* was this block corrupt? If so, don't add references to it */
7613 cache = lookup_cache_extent(info->corrupt_blocks,
7614 rec->start, rec->max_size);
7615 if (cache) {
7616 ret = 0;
7617 goto out;
7620 /* step three, recreate all the refs we did find */
7621 while(cur != &rec->backrefs) {
7622 back = to_extent_backref(cur);
7623 cur = cur->next;
7626 * if we didn't find any references, don't create a
7627 * new extent record
7629 if (!back->found_ref)
7630 continue;
7632 rec->bad_full_backref = 0;
7633 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7634 allocated = 1;
7636 if (ret)
7637 goto out;
7639 out:
7640 if (trans) {
7641 int err = btrfs_commit_transaction(trans, info->extent_root);
7642 if (!ret)
7643 ret = err;
7646 btrfs_free_path(path);
7647 return ret;
7650 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7651 struct extent_record *rec)
7653 struct btrfs_trans_handle *trans;
7654 struct btrfs_root *root = fs_info->extent_root;
7655 struct btrfs_path *path;
7656 struct btrfs_extent_item *ei;
7657 struct btrfs_key key;
7658 u64 flags;
7659 int ret = 0;
7661 key.objectid = rec->start;
7662 if (rec->metadata) {
7663 key.type = BTRFS_METADATA_ITEM_KEY;
7664 key.offset = rec->info_level;
7665 } else {
7666 key.type = BTRFS_EXTENT_ITEM_KEY;
7667 key.offset = rec->max_size;
7670 path = btrfs_alloc_path();
7671 if (!path)
7672 return -ENOMEM;
7674 trans = btrfs_start_transaction(root, 0);
7675 if (IS_ERR(trans)) {
7676 btrfs_free_path(path);
7677 return PTR_ERR(trans);
7680 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7681 if (ret < 0) {
7682 btrfs_free_path(path);
7683 btrfs_commit_transaction(trans, root);
7684 return ret;
7685 } else if (ret) {
7686 fprintf(stderr, "Didn't find extent for %llu\n",
7687 (unsigned long long)rec->start);
7688 btrfs_free_path(path);
7689 btrfs_commit_transaction(trans, root);
7690 return -ENOENT;
7693 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7694 struct btrfs_extent_item);
7695 flags = btrfs_extent_flags(path->nodes[0], ei);
7696 if (rec->flag_block_full_backref) {
7697 fprintf(stderr, "setting full backref on %llu\n",
7698 (unsigned long long)key.objectid);
7699 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7700 } else {
7701 fprintf(stderr, "clearing full backref on %llu\n",
7702 (unsigned long long)key.objectid);
7703 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7705 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7706 btrfs_mark_buffer_dirty(path->nodes[0]);
7707 btrfs_free_path(path);
7708 return btrfs_commit_transaction(trans, root);
7711 /* right now we only prune from the extent allocation tree */
7712 static int prune_one_block(struct btrfs_trans_handle *trans,
7713 struct btrfs_fs_info *info,
7714 struct btrfs_corrupt_block *corrupt)
7716 int ret;
7717 struct btrfs_path path;
7718 struct extent_buffer *eb;
7719 u64 found;
7720 int slot;
7721 int nritems;
7722 int level = corrupt->level + 1;
7724 btrfs_init_path(&path);
7725 again:
7726 /* we want to stop at the parent to our busted block */
7727 path.lowest_level = level;
7729 ret = btrfs_search_slot(trans, info->extent_root,
7730 &corrupt->key, &path, -1, 1);
7732 if (ret < 0)
7733 goto out;
7735 eb = path.nodes[level];
7736 if (!eb) {
7737 ret = -ENOENT;
7738 goto out;
7742 * hopefully the search gave us the block we want to prune,
7743 * lets try that first
7745 slot = path.slots[level];
7746 found = btrfs_node_blockptr(eb, slot);
7747 if (found == corrupt->cache.start)
7748 goto del_ptr;
7750 nritems = btrfs_header_nritems(eb);
7752 /* the search failed, lets scan this node and hope we find it */
7753 for (slot = 0; slot < nritems; slot++) {
7754 found = btrfs_node_blockptr(eb, slot);
7755 if (found == corrupt->cache.start)
7756 goto del_ptr;
7759 * we couldn't find the bad block. TODO, search all the nodes for pointers
7760 * to this block
7762 if (eb == info->extent_root->node) {
7763 ret = -ENOENT;
7764 goto out;
7765 } else {
7766 level++;
7767 btrfs_release_path(&path);
7768 goto again;
7771 del_ptr:
7772 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7773 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7775 out:
7776 btrfs_release_path(&path);
7777 return ret;
7780 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7782 struct btrfs_trans_handle *trans = NULL;
7783 struct cache_extent *cache;
7784 struct btrfs_corrupt_block *corrupt;
7786 while (1) {
7787 cache = search_cache_extent(info->corrupt_blocks, 0);
7788 if (!cache)
7789 break;
7790 if (!trans) {
7791 trans = btrfs_start_transaction(info->extent_root, 1);
7792 if (IS_ERR(trans))
7793 return PTR_ERR(trans);
7795 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7796 prune_one_block(trans, info, corrupt);
7797 remove_cache_extent(info->corrupt_blocks, cache);
7799 if (trans)
7800 return btrfs_commit_transaction(trans, info->extent_root);
7801 return 0;
7804 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7806 struct btrfs_block_group_cache *cache;
7807 u64 start, end;
7808 int ret;
7810 while (1) {
7811 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7812 &start, &end, EXTENT_DIRTY);
7813 if (ret)
7814 break;
7815 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7816 GFP_NOFS);
7819 start = 0;
7820 while (1) {
7821 cache = btrfs_lookup_first_block_group(fs_info, start);
7822 if (!cache)
7823 break;
7824 if (cache->cached)
7825 cache->cached = 0;
7826 start = cache->key.objectid + cache->key.offset;
7830 static int check_extent_refs(struct btrfs_root *root,
7831 struct cache_tree *extent_cache)
7833 struct extent_record *rec;
7834 struct cache_extent *cache;
7835 int err = 0;
7836 int ret = 0;
7837 int fixed = 0;
7838 int had_dups = 0;
7839 int recorded = 0;
7841 if (repair) {
7843 * if we're doing a repair, we have to make sure
7844 * we don't allocate from the problem extents.
7845 * In the worst case, this will be all the
7846 * extents in the FS
7848 cache = search_cache_extent(extent_cache, 0);
7849 while(cache) {
7850 rec = container_of(cache, struct extent_record, cache);
7851 set_extent_dirty(root->fs_info->excluded_extents,
7852 rec->start,
7853 rec->start + rec->max_size - 1,
7854 GFP_NOFS);
7855 cache = next_cache_extent(cache);
7858 /* pin down all the corrupted blocks too */
7859 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7860 while(cache) {
7861 set_extent_dirty(root->fs_info->excluded_extents,
7862 cache->start,
7863 cache->start + cache->size - 1,
7864 GFP_NOFS);
7865 cache = next_cache_extent(cache);
7867 prune_corrupt_blocks(root->fs_info);
7868 reset_cached_block_groups(root->fs_info);
7871 reset_cached_block_groups(root->fs_info);
7874 * We need to delete any duplicate entries we find first otherwise we
7875 * could mess up the extent tree when we have backrefs that actually
7876 * belong to a different extent item and not the weird duplicate one.
7878 while (repair && !list_empty(&duplicate_extents)) {
7879 rec = to_extent_record(duplicate_extents.next);
7880 list_del_init(&rec->list);
7882 /* Sometimes we can find a backref before we find an actual
7883 * extent, so we need to process it a little bit to see if there
7884 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7885 * if this is a backref screwup. If we need to delete stuff
7886 * process_duplicates() will return 0, otherwise it will return
7887 * 1 and we
7889 if (process_duplicates(root, extent_cache, rec))
7890 continue;
7891 ret = delete_duplicate_records(root, rec);
7892 if (ret < 0)
7893 return ret;
7895 * delete_duplicate_records will return the number of entries
7896 * deleted, so if it's greater than 0 then we know we actually
7897 * did something and we need to remove.
7899 if (ret)
7900 had_dups = 1;
7903 if (had_dups)
7904 return -EAGAIN;
7906 while(1) {
7907 int cur_err = 0;
7909 fixed = 0;
7910 recorded = 0;
7911 cache = search_cache_extent(extent_cache, 0);
7912 if (!cache)
7913 break;
7914 rec = container_of(cache, struct extent_record, cache);
7915 if (rec->num_duplicates) {
7916 fprintf(stderr, "extent item %llu has multiple extent "
7917 "items\n", (unsigned long long)rec->start);
7918 err = 1;
7919 cur_err = 1;
7922 if (rec->refs != rec->extent_item_refs) {
7923 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7924 (unsigned long long)rec->start,
7925 (unsigned long long)rec->nr);
7926 fprintf(stderr, "extent item %llu, found %llu\n",
7927 (unsigned long long)rec->extent_item_refs,
7928 (unsigned long long)rec->refs);
7929 ret = record_orphan_data_extents(root->fs_info, rec);
7930 if (ret < 0)
7931 goto repair_abort;
7932 if (ret == 0) {
7933 recorded = 1;
7934 } else {
7936 * we can't use the extent to repair file
7937 * extent, let the fallback method handle it.
7939 if (!fixed && repair) {
7940 ret = fixup_extent_refs(
7941 root->fs_info,
7942 extent_cache, rec);
7943 if (ret)
7944 goto repair_abort;
7945 fixed = 1;
7948 err = 1;
7949 cur_err = 1;
7951 if (all_backpointers_checked(rec, 1)) {
7952 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7953 (unsigned long long)rec->start,
7954 (unsigned long long)rec->nr);
7956 if (!fixed && !recorded && repair) {
7957 ret = fixup_extent_refs(root->fs_info,
7958 extent_cache, rec);
7959 if (ret)
7960 goto repair_abort;
7961 fixed = 1;
7963 cur_err = 1;
7964 err = 1;
7966 if (!rec->owner_ref_checked) {
7967 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7968 (unsigned long long)rec->start,
7969 (unsigned long long)rec->nr);
7970 if (!fixed && !recorded && repair) {
7971 ret = fixup_extent_refs(root->fs_info,
7972 extent_cache, rec);
7973 if (ret)
7974 goto repair_abort;
7975 fixed = 1;
7977 err = 1;
7978 cur_err = 1;
7980 if (rec->bad_full_backref) {
7981 fprintf(stderr, "bad full backref, on [%llu]\n",
7982 (unsigned long long)rec->start);
7983 if (repair) {
7984 ret = fixup_extent_flags(root->fs_info, rec);
7985 if (ret)
7986 goto repair_abort;
7987 fixed = 1;
7989 err = 1;
7990 cur_err = 1;
7993 * Although it's not a extent ref's problem, we reuse this
7994 * routine for error reporting.
7995 * No repair function yet.
7997 if (rec->crossing_stripes) {
7998 fprintf(stderr,
7999 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8000 rec->start, rec->start + rec->max_size);
8001 err = 1;
8002 cur_err = 1;
8005 if (rec->wrong_chunk_type) {
8006 fprintf(stderr,
8007 "bad extent [%llu, %llu), type mismatch with chunk\n",
8008 rec->start, rec->start + rec->max_size);
8009 err = 1;
8010 cur_err = 1;
8013 remove_cache_extent(extent_cache, cache);
8014 free_all_extent_backrefs(rec);
8015 if (!init_extent_tree && repair && (!cur_err || fixed))
8016 clear_extent_dirty(root->fs_info->excluded_extents,
8017 rec->start,
8018 rec->start + rec->max_size - 1,
8019 GFP_NOFS);
8020 free(rec);
8022 repair_abort:
8023 if (repair) {
8024 if (ret && ret != -EAGAIN) {
8025 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8026 exit(1);
8027 } else if (!ret) {
8028 struct btrfs_trans_handle *trans;
8030 root = root->fs_info->extent_root;
8031 trans = btrfs_start_transaction(root, 1);
8032 if (IS_ERR(trans)) {
8033 ret = PTR_ERR(trans);
8034 goto repair_abort;
8037 btrfs_fix_block_accounting(trans, root);
8038 ret = btrfs_commit_transaction(trans, root);
8039 if (ret)
8040 goto repair_abort;
8042 if (err)
8043 fprintf(stderr, "repaired damaged extent references\n");
8044 return ret;
8046 return err;
8049 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8051 u64 stripe_size;
8053 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8054 stripe_size = length;
8055 stripe_size /= num_stripes;
8056 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8057 stripe_size = length * 2;
8058 stripe_size /= num_stripes;
8059 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8060 stripe_size = length;
8061 stripe_size /= (num_stripes - 1);
8062 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8063 stripe_size = length;
8064 stripe_size /= (num_stripes - 2);
8065 } else {
8066 stripe_size = length;
8068 return stripe_size;
8072 * Check the chunk with its block group/dev list ref:
8073 * Return 0 if all refs seems valid.
8074 * Return 1 if part of refs seems valid, need later check for rebuild ref
8075 * like missing block group and needs to search extent tree to rebuild them.
8076 * Return -1 if essential refs are missing and unable to rebuild.
8078 static int check_chunk_refs(struct chunk_record *chunk_rec,
8079 struct block_group_tree *block_group_cache,
8080 struct device_extent_tree *dev_extent_cache,
8081 int silent)
8083 struct cache_extent *block_group_item;
8084 struct block_group_record *block_group_rec;
8085 struct cache_extent *dev_extent_item;
8086 struct device_extent_record *dev_extent_rec;
8087 u64 devid;
8088 u64 offset;
8089 u64 length;
8090 int metadump_v2 = 0;
8091 int i;
8092 int ret = 0;
8094 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8095 chunk_rec->offset,
8096 chunk_rec->length);
8097 if (block_group_item) {
8098 block_group_rec = container_of(block_group_item,
8099 struct block_group_record,
8100 cache);
8101 if (chunk_rec->length != block_group_rec->offset ||
8102 chunk_rec->offset != block_group_rec->objectid ||
8103 (!metadump_v2 &&
8104 chunk_rec->type_flags != block_group_rec->flags)) {
8105 if (!silent)
8106 fprintf(stderr,
8107 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8108 chunk_rec->objectid,
8109 chunk_rec->type,
8110 chunk_rec->offset,
8111 chunk_rec->length,
8112 chunk_rec->offset,
8113 chunk_rec->type_flags,
8114 block_group_rec->objectid,
8115 block_group_rec->type,
8116 block_group_rec->offset,
8117 block_group_rec->offset,
8118 block_group_rec->objectid,
8119 block_group_rec->flags);
8120 ret = -1;
8121 } else {
8122 list_del_init(&block_group_rec->list);
8123 chunk_rec->bg_rec = block_group_rec;
8125 } else {
8126 if (!silent)
8127 fprintf(stderr,
8128 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8129 chunk_rec->objectid,
8130 chunk_rec->type,
8131 chunk_rec->offset,
8132 chunk_rec->length,
8133 chunk_rec->offset,
8134 chunk_rec->type_flags);
8135 ret = 1;
8138 if (metadump_v2)
8139 return ret;
8141 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8142 chunk_rec->num_stripes);
8143 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8144 devid = chunk_rec->stripes[i].devid;
8145 offset = chunk_rec->stripes[i].offset;
8146 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8147 devid, offset, length);
8148 if (dev_extent_item) {
8149 dev_extent_rec = container_of(dev_extent_item,
8150 struct device_extent_record,
8151 cache);
8152 if (dev_extent_rec->objectid != devid ||
8153 dev_extent_rec->offset != offset ||
8154 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8155 dev_extent_rec->length != length) {
8156 if (!silent)
8157 fprintf(stderr,
8158 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8159 chunk_rec->objectid,
8160 chunk_rec->type,
8161 chunk_rec->offset,
8162 chunk_rec->stripes[i].devid,
8163 chunk_rec->stripes[i].offset,
8164 dev_extent_rec->objectid,
8165 dev_extent_rec->offset,
8166 dev_extent_rec->length);
8167 ret = -1;
8168 } else {
8169 list_move(&dev_extent_rec->chunk_list,
8170 &chunk_rec->dextents);
8172 } else {
8173 if (!silent)
8174 fprintf(stderr,
8175 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8176 chunk_rec->objectid,
8177 chunk_rec->type,
8178 chunk_rec->offset,
8179 chunk_rec->stripes[i].devid,
8180 chunk_rec->stripes[i].offset);
8181 ret = -1;
8184 return ret;
8187 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8188 int check_chunks(struct cache_tree *chunk_cache,
8189 struct block_group_tree *block_group_cache,
8190 struct device_extent_tree *dev_extent_cache,
8191 struct list_head *good, struct list_head *bad,
8192 struct list_head *rebuild, int silent)
8194 struct cache_extent *chunk_item;
8195 struct chunk_record *chunk_rec;
8196 struct block_group_record *bg_rec;
8197 struct device_extent_record *dext_rec;
8198 int err;
8199 int ret = 0;
8201 chunk_item = first_cache_extent(chunk_cache);
8202 while (chunk_item) {
8203 chunk_rec = container_of(chunk_item, struct chunk_record,
8204 cache);
8205 err = check_chunk_refs(chunk_rec, block_group_cache,
8206 dev_extent_cache, silent);
8207 if (err < 0)
8208 ret = err;
8209 if (err == 0 && good)
8210 list_add_tail(&chunk_rec->list, good);
8211 if (err > 0 && rebuild)
8212 list_add_tail(&chunk_rec->list, rebuild);
8213 if (err < 0 && bad)
8214 list_add_tail(&chunk_rec->list, bad);
8215 chunk_item = next_cache_extent(chunk_item);
8218 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8219 if (!silent)
8220 fprintf(stderr,
8221 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8222 bg_rec->objectid,
8223 bg_rec->offset,
8224 bg_rec->flags);
8225 if (!ret)
8226 ret = 1;
8229 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8230 chunk_list) {
8231 if (!silent)
8232 fprintf(stderr,
8233 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8234 dext_rec->objectid,
8235 dext_rec->offset,
8236 dext_rec->length);
8237 if (!ret)
8238 ret = 1;
8240 return ret;
8244 static int check_device_used(struct device_record *dev_rec,
8245 struct device_extent_tree *dext_cache)
8247 struct cache_extent *cache;
8248 struct device_extent_record *dev_extent_rec;
8249 u64 total_byte = 0;
8251 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8252 while (cache) {
8253 dev_extent_rec = container_of(cache,
8254 struct device_extent_record,
8255 cache);
8256 if (dev_extent_rec->objectid != dev_rec->devid)
8257 break;
8259 list_del_init(&dev_extent_rec->device_list);
8260 total_byte += dev_extent_rec->length;
8261 cache = next_cache_extent(cache);
8264 if (total_byte != dev_rec->byte_used) {
8265 fprintf(stderr,
8266 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8267 total_byte, dev_rec->byte_used, dev_rec->objectid,
8268 dev_rec->type, dev_rec->offset);
8269 return -1;
8270 } else {
8271 return 0;
8275 /* check btrfs_dev_item -> btrfs_dev_extent */
8276 static int check_devices(struct rb_root *dev_cache,
8277 struct device_extent_tree *dev_extent_cache)
8279 struct rb_node *dev_node;
8280 struct device_record *dev_rec;
8281 struct device_extent_record *dext_rec;
8282 int err;
8283 int ret = 0;
8285 dev_node = rb_first(dev_cache);
8286 while (dev_node) {
8287 dev_rec = container_of(dev_node, struct device_record, node);
8288 err = check_device_used(dev_rec, dev_extent_cache);
8289 if (err)
8290 ret = err;
8292 dev_node = rb_next(dev_node);
8294 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8295 device_list) {
8296 fprintf(stderr,
8297 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8298 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8299 if (!ret)
8300 ret = 1;
8302 return ret;
8305 static int add_root_item_to_list(struct list_head *head,
8306 u64 objectid, u64 bytenr, u64 last_snapshot,
8307 u8 level, u8 drop_level,
8308 int level_size, struct btrfs_key *drop_key)
8311 struct root_item_record *ri_rec;
8312 ri_rec = malloc(sizeof(*ri_rec));
8313 if (!ri_rec)
8314 return -ENOMEM;
8315 ri_rec->bytenr = bytenr;
8316 ri_rec->objectid = objectid;
8317 ri_rec->level = level;
8318 ri_rec->level_size = level_size;
8319 ri_rec->drop_level = drop_level;
8320 ri_rec->last_snapshot = last_snapshot;
8321 if (drop_key)
8322 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8323 list_add_tail(&ri_rec->list, head);
8325 return 0;
8328 static void free_root_item_list(struct list_head *list)
8330 struct root_item_record *ri_rec;
8332 while (!list_empty(list)) {
8333 ri_rec = list_first_entry(list, struct root_item_record,
8334 list);
8335 list_del_init(&ri_rec->list);
8336 free(ri_rec);
8340 static int deal_root_from_list(struct list_head *list,
8341 struct btrfs_root *root,
8342 struct block_info *bits,
8343 int bits_nr,
8344 struct cache_tree *pending,
8345 struct cache_tree *seen,
8346 struct cache_tree *reada,
8347 struct cache_tree *nodes,
8348 struct cache_tree *extent_cache,
8349 struct cache_tree *chunk_cache,
8350 struct rb_root *dev_cache,
8351 struct block_group_tree *block_group_cache,
8352 struct device_extent_tree *dev_extent_cache)
8354 int ret = 0;
8355 u64 last;
8357 while (!list_empty(list)) {
8358 struct root_item_record *rec;
8359 struct extent_buffer *buf;
8360 rec = list_entry(list->next,
8361 struct root_item_record, list);
8362 last = 0;
8363 buf = read_tree_block(root->fs_info->tree_root,
8364 rec->bytenr, rec->level_size, 0);
8365 if (!extent_buffer_uptodate(buf)) {
8366 free_extent_buffer(buf);
8367 ret = -EIO;
8368 break;
8370 ret = add_root_to_pending(buf, extent_cache, pending,
8371 seen, nodes, rec->objectid);
8372 if (ret < 0)
8373 break;
8375 * To rebuild extent tree, we need deal with snapshot
8376 * one by one, otherwise we deal with node firstly which
8377 * can maximize readahead.
8379 while (1) {
8380 ret = run_next_block(root, bits, bits_nr, &last,
8381 pending, seen, reada, nodes,
8382 extent_cache, chunk_cache,
8383 dev_cache, block_group_cache,
8384 dev_extent_cache, rec);
8385 if (ret != 0)
8386 break;
8388 free_extent_buffer(buf);
8389 list_del(&rec->list);
8390 free(rec);
8391 if (ret < 0)
8392 break;
8394 while (ret >= 0) {
8395 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8396 reada, nodes, extent_cache, chunk_cache,
8397 dev_cache, block_group_cache,
8398 dev_extent_cache, NULL);
8399 if (ret != 0) {
8400 if (ret > 0)
8401 ret = 0;
8402 break;
8405 return ret;
8408 static int check_chunks_and_extents(struct btrfs_root *root)
8410 struct rb_root dev_cache;
8411 struct cache_tree chunk_cache;
8412 struct block_group_tree block_group_cache;
8413 struct device_extent_tree dev_extent_cache;
8414 struct cache_tree extent_cache;
8415 struct cache_tree seen;
8416 struct cache_tree pending;
8417 struct cache_tree reada;
8418 struct cache_tree nodes;
8419 struct extent_io_tree excluded_extents;
8420 struct cache_tree corrupt_blocks;
8421 struct btrfs_path path;
8422 struct btrfs_key key;
8423 struct btrfs_key found_key;
8424 int ret, err = 0;
8425 struct block_info *bits;
8426 int bits_nr;
8427 struct extent_buffer *leaf;
8428 int slot;
8429 struct btrfs_root_item ri;
8430 struct list_head dropping_trees;
8431 struct list_head normal_trees;
8432 struct btrfs_root *root1;
8433 u64 objectid;
8434 u32 level_size;
8435 u8 level;
8437 dev_cache = RB_ROOT;
8438 cache_tree_init(&chunk_cache);
8439 block_group_tree_init(&block_group_cache);
8440 device_extent_tree_init(&dev_extent_cache);
8442 cache_tree_init(&extent_cache);
8443 cache_tree_init(&seen);
8444 cache_tree_init(&pending);
8445 cache_tree_init(&nodes);
8446 cache_tree_init(&reada);
8447 cache_tree_init(&corrupt_blocks);
8448 extent_io_tree_init(&excluded_extents);
8449 INIT_LIST_HEAD(&dropping_trees);
8450 INIT_LIST_HEAD(&normal_trees);
8452 if (repair) {
8453 root->fs_info->excluded_extents = &excluded_extents;
8454 root->fs_info->fsck_extent_cache = &extent_cache;
8455 root->fs_info->free_extent_hook = free_extent_hook;
8456 root->fs_info->corrupt_blocks = &corrupt_blocks;
8459 bits_nr = 1024;
8460 bits = malloc(bits_nr * sizeof(struct block_info));
8461 if (!bits) {
8462 perror("malloc");
8463 exit(1);
8466 if (ctx.progress_enabled) {
8467 ctx.tp = TASK_EXTENTS;
8468 task_start(ctx.info);
8471 again:
8472 root1 = root->fs_info->tree_root;
8473 level = btrfs_header_level(root1->node);
8474 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8475 root1->node->start, 0, level, 0,
8476 root1->nodesize, NULL);
8477 if (ret < 0)
8478 goto out;
8479 root1 = root->fs_info->chunk_root;
8480 level = btrfs_header_level(root1->node);
8481 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8482 root1->node->start, 0, level, 0,
8483 root1->nodesize, NULL);
8484 if (ret < 0)
8485 goto out;
8486 btrfs_init_path(&path);
8487 key.offset = 0;
8488 key.objectid = 0;
8489 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8490 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8491 &key, &path, 0, 0);
8492 if (ret < 0)
8493 goto out;
8494 while(1) {
8495 leaf = path.nodes[0];
8496 slot = path.slots[0];
8497 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8498 ret = btrfs_next_leaf(root, &path);
8499 if (ret != 0)
8500 break;
8501 leaf = path.nodes[0];
8502 slot = path.slots[0];
8504 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8505 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8506 unsigned long offset;
8507 u64 last_snapshot;
8509 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8510 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8511 last_snapshot = btrfs_root_last_snapshot(&ri);
8512 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8513 level = btrfs_root_level(&ri);
8514 level_size = root->nodesize;
8515 ret = add_root_item_to_list(&normal_trees,
8516 found_key.objectid,
8517 btrfs_root_bytenr(&ri),
8518 last_snapshot, level,
8519 0, level_size, NULL);
8520 if (ret < 0)
8521 goto out;
8522 } else {
8523 level = btrfs_root_level(&ri);
8524 level_size = root->nodesize;
8525 objectid = found_key.objectid;
8526 btrfs_disk_key_to_cpu(&found_key,
8527 &ri.drop_progress);
8528 ret = add_root_item_to_list(&dropping_trees,
8529 objectid,
8530 btrfs_root_bytenr(&ri),
8531 last_snapshot, level,
8532 ri.drop_level,
8533 level_size, &found_key);
8534 if (ret < 0)
8535 goto out;
8538 path.slots[0]++;
8540 btrfs_release_path(&path);
8543 * check_block can return -EAGAIN if it fixes something, please keep
8544 * this in mind when dealing with return values from these functions, if
8545 * we get -EAGAIN we want to fall through and restart the loop.
8547 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8548 &seen, &reada, &nodes, &extent_cache,
8549 &chunk_cache, &dev_cache, &block_group_cache,
8550 &dev_extent_cache);
8551 if (ret < 0) {
8552 if (ret == -EAGAIN)
8553 goto loop;
8554 goto out;
8556 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8557 &pending, &seen, &reada, &nodes,
8558 &extent_cache, &chunk_cache, &dev_cache,
8559 &block_group_cache, &dev_extent_cache);
8560 if (ret < 0) {
8561 if (ret == -EAGAIN)
8562 goto loop;
8563 goto out;
8566 ret = check_chunks(&chunk_cache, &block_group_cache,
8567 &dev_extent_cache, NULL, NULL, NULL, 0);
8568 if (ret) {
8569 if (ret == -EAGAIN)
8570 goto loop;
8571 err = ret;
8574 ret = check_extent_refs(root, &extent_cache);
8575 if (ret < 0) {
8576 if (ret == -EAGAIN)
8577 goto loop;
8578 goto out;
8581 ret = check_devices(&dev_cache, &dev_extent_cache);
8582 if (ret && err)
8583 ret = err;
8585 out:
8586 task_stop(ctx.info);
8587 if (repair) {
8588 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8589 extent_io_tree_cleanup(&excluded_extents);
8590 root->fs_info->fsck_extent_cache = NULL;
8591 root->fs_info->free_extent_hook = NULL;
8592 root->fs_info->corrupt_blocks = NULL;
8593 root->fs_info->excluded_extents = NULL;
8595 free(bits);
8596 free_chunk_cache_tree(&chunk_cache);
8597 free_device_cache_tree(&dev_cache);
8598 free_block_group_tree(&block_group_cache);
8599 free_device_extent_tree(&dev_extent_cache);
8600 free_extent_cache_tree(&seen);
8601 free_extent_cache_tree(&pending);
8602 free_extent_cache_tree(&reada);
8603 free_extent_cache_tree(&nodes);
8604 return ret;
8605 loop:
8606 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8607 free_extent_cache_tree(&seen);
8608 free_extent_cache_tree(&pending);
8609 free_extent_cache_tree(&reada);
8610 free_extent_cache_tree(&nodes);
8611 free_chunk_cache_tree(&chunk_cache);
8612 free_block_group_tree(&block_group_cache);
8613 free_device_cache_tree(&dev_cache);
8614 free_device_extent_tree(&dev_extent_cache);
8615 free_extent_record_cache(root->fs_info, &extent_cache);
8616 free_root_item_list(&normal_trees);
8617 free_root_item_list(&dropping_trees);
8618 extent_io_tree_cleanup(&excluded_extents);
8619 goto again;
8623 * Check backrefs of a tree block given by @bytenr or @eb.
8625 * @root: the root containing the @bytenr or @eb
8626 * @eb: tree block extent buffer, can be NULL
8627 * @bytenr: bytenr of the tree block to search
8628 * @level: tree level of the tree block
8629 * @owner: owner of the tree block
8631 * Return >0 for any error found and output error message
8632 * Return 0 for no error found
8634 static int check_tree_block_ref(struct btrfs_root *root,
8635 struct extent_buffer *eb, u64 bytenr,
8636 int level, u64 owner)
8638 struct btrfs_key key;
8639 struct btrfs_root *extent_root = root->fs_info->extent_root;
8640 struct btrfs_path path;
8641 struct btrfs_extent_item *ei;
8642 struct btrfs_extent_inline_ref *iref;
8643 struct extent_buffer *leaf;
8644 unsigned long end;
8645 unsigned long ptr;
8646 int slot;
8647 int skinny_level;
8648 int type;
8649 u32 nodesize = root->nodesize;
8650 u32 item_size;
8651 u64 offset;
8652 int found_ref = 0;
8653 int err = 0;
8654 int ret;
8656 btrfs_init_path(&path);
8657 key.objectid = bytenr;
8658 if (btrfs_fs_incompat(root->fs_info,
8659 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8660 key.type = BTRFS_METADATA_ITEM_KEY;
8661 else
8662 key.type = BTRFS_EXTENT_ITEM_KEY;
8663 key.offset = (u64)-1;
8665 /* Search for the backref in extent tree */
8666 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8667 if (ret < 0) {
8668 err |= BACKREF_MISSING;
8669 goto out;
8671 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8672 if (ret) {
8673 err |= BACKREF_MISSING;
8674 goto out;
8677 leaf = path.nodes[0];
8678 slot = path.slots[0];
8679 btrfs_item_key_to_cpu(leaf, &key, slot);
8681 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8683 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8684 skinny_level = (int)key.offset;
8685 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8686 } else {
8687 struct btrfs_tree_block_info *info;
8689 info = (struct btrfs_tree_block_info *)(ei + 1);
8690 skinny_level = btrfs_tree_block_level(leaf, info);
8691 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8694 if (eb) {
8695 u64 header_gen;
8696 u64 extent_gen;
8698 if (!(btrfs_extent_flags(leaf, ei) &
8699 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8700 error(
8701 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8702 key.objectid, nodesize,
8703 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8704 err = BACKREF_MISMATCH;
8706 header_gen = btrfs_header_generation(eb);
8707 extent_gen = btrfs_extent_generation(leaf, ei);
8708 if (header_gen != extent_gen) {
8709 error(
8710 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8711 key.objectid, nodesize, header_gen,
8712 extent_gen);
8713 err = BACKREF_MISMATCH;
8715 if (level != skinny_level) {
8716 error(
8717 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8718 key.objectid, nodesize, level, skinny_level);
8719 err = BACKREF_MISMATCH;
8721 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8722 error(
8723 "extent[%llu %u] is referred by other roots than %llu",
8724 key.objectid, nodesize, root->objectid);
8725 err = BACKREF_MISMATCH;
8730 * Iterate the extent/metadata item to find the exact backref
8732 item_size = btrfs_item_size_nr(leaf, slot);
8733 ptr = (unsigned long)iref;
8734 end = (unsigned long)ei + item_size;
8735 while (ptr < end) {
8736 iref = (struct btrfs_extent_inline_ref *)ptr;
8737 type = btrfs_extent_inline_ref_type(leaf, iref);
8738 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8740 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8741 (offset == root->objectid || offset == owner)) {
8742 found_ref = 1;
8743 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8744 /* Check if the backref points to valid referencer */
8745 found_ref = !check_tree_block_ref(root, NULL, offset,
8746 level + 1, owner);
8749 if (found_ref)
8750 break;
8751 ptr += btrfs_extent_inline_ref_size(type);
8755 * Inlined extent item doesn't have what we need, check
8756 * TREE_BLOCK_REF_KEY
8758 if (!found_ref) {
8759 btrfs_release_path(&path);
8760 key.objectid = bytenr;
8761 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8762 key.offset = root->objectid;
8764 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8765 if (!ret)
8766 found_ref = 1;
8768 if (!found_ref)
8769 err |= BACKREF_MISSING;
8770 out:
8771 btrfs_release_path(&path);
8772 if (eb && (err & BACKREF_MISSING))
8773 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8774 bytenr, nodesize, owner, level);
8775 return err;
8779 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8781 * Return >0 any error found and output error message
8782 * Return 0 for no error found
8784 static int check_extent_data_item(struct btrfs_root *root,
8785 struct extent_buffer *eb, int slot)
8787 struct btrfs_file_extent_item *fi;
8788 struct btrfs_path path;
8789 struct btrfs_root *extent_root = root->fs_info->extent_root;
8790 struct btrfs_key fi_key;
8791 struct btrfs_key dbref_key;
8792 struct extent_buffer *leaf;
8793 struct btrfs_extent_item *ei;
8794 struct btrfs_extent_inline_ref *iref;
8795 struct btrfs_extent_data_ref *dref;
8796 u64 owner;
8797 u64 file_extent_gen;
8798 u64 disk_bytenr;
8799 u64 disk_num_bytes;
8800 u64 extent_num_bytes;
8801 u64 extent_flags;
8802 u64 extent_gen;
8803 u32 item_size;
8804 unsigned long end;
8805 unsigned long ptr;
8806 int type;
8807 u64 ref_root;
8808 int found_dbackref = 0;
8809 int err = 0;
8810 int ret;
8812 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8813 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8814 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8816 /* Nothing to check for hole and inline data extents */
8817 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8818 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8819 return 0;
8821 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8822 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8823 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8825 /* Check unaligned disk_num_bytes and num_bytes */
8826 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8827 error(
8828 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8829 fi_key.objectid, fi_key.offset, disk_num_bytes,
8830 root->sectorsize);
8831 err |= BYTES_UNALIGNED;
8832 } else {
8833 data_bytes_allocated += disk_num_bytes;
8835 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8836 error(
8837 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8838 fi_key.objectid, fi_key.offset, extent_num_bytes,
8839 root->sectorsize);
8840 err |= BYTES_UNALIGNED;
8841 } else {
8842 data_bytes_referenced += extent_num_bytes;
8844 owner = btrfs_header_owner(eb);
8846 /* Check the extent item of the file extent in extent tree */
8847 btrfs_init_path(&path);
8848 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8849 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8850 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8852 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8853 if (ret) {
8854 err |= BACKREF_MISSING;
8855 goto error;
8858 leaf = path.nodes[0];
8859 slot = path.slots[0];
8860 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8862 extent_flags = btrfs_extent_flags(leaf, ei);
8863 extent_gen = btrfs_extent_generation(leaf, ei);
8865 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8866 error(
8867 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8868 disk_bytenr, disk_num_bytes,
8869 BTRFS_EXTENT_FLAG_DATA);
8870 err |= BACKREF_MISMATCH;
8873 if (file_extent_gen < extent_gen) {
8874 error(
8875 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8876 disk_bytenr, disk_num_bytes, file_extent_gen,
8877 extent_gen);
8878 err |= BACKREF_MISMATCH;
8881 /* Check data backref inside that extent item */
8882 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8883 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8884 ptr = (unsigned long)iref;
8885 end = (unsigned long)ei + item_size;
8886 while (ptr < end) {
8887 iref = (struct btrfs_extent_inline_ref *)ptr;
8888 type = btrfs_extent_inline_ref_type(leaf, iref);
8889 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8891 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8892 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8893 if (ref_root == owner || ref_root == root->objectid)
8894 found_dbackref = 1;
8895 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8896 found_dbackref = !check_tree_block_ref(root, NULL,
8897 btrfs_extent_inline_ref_offset(leaf, iref),
8898 0, owner);
8901 if (found_dbackref)
8902 break;
8903 ptr += btrfs_extent_inline_ref_size(type);
8906 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8907 if (!found_dbackref) {
8908 btrfs_release_path(&path);
8910 btrfs_init_path(&path);
8911 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8912 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8913 dbref_key.offset = hash_extent_data_ref(root->objectid,
8914 fi_key.objectid, fi_key.offset);
8916 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8917 &dbref_key, &path, 0, 0);
8918 if (!ret)
8919 found_dbackref = 1;
8922 if (!found_dbackref)
8923 err |= BACKREF_MISSING;
8924 error:
8925 btrfs_release_path(&path);
8926 if (err & BACKREF_MISSING) {
8927 error("data extent[%llu %llu] backref lost",
8928 disk_bytenr, disk_num_bytes);
8930 return err;
8934 * Get real tree block level for the case like shared block
8935 * Return >= 0 as tree level
8936 * Return <0 for error
8938 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8940 struct extent_buffer *eb;
8941 struct btrfs_path path;
8942 struct btrfs_key key;
8943 struct btrfs_extent_item *ei;
8944 u64 flags;
8945 u64 transid;
8946 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8947 u8 backref_level;
8948 u8 header_level;
8949 int ret;
8951 /* Search extent tree for extent generation and level */
8952 key.objectid = bytenr;
8953 key.type = BTRFS_METADATA_ITEM_KEY;
8954 key.offset = (u64)-1;
8956 btrfs_init_path(&path);
8957 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8958 if (ret < 0)
8959 goto release_out;
8960 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8961 if (ret < 0)
8962 goto release_out;
8963 if (ret > 0) {
8964 ret = -ENOENT;
8965 goto release_out;
8968 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8969 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8970 struct btrfs_extent_item);
8971 flags = btrfs_extent_flags(path.nodes[0], ei);
8972 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8973 ret = -ENOENT;
8974 goto release_out;
8977 /* Get transid for later read_tree_block() check */
8978 transid = btrfs_extent_generation(path.nodes[0], ei);
8980 /* Get backref level as one source */
8981 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8982 backref_level = key.offset;
8983 } else {
8984 struct btrfs_tree_block_info *info;
8986 info = (struct btrfs_tree_block_info *)(ei + 1);
8987 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8989 btrfs_release_path(&path);
8991 /* Get level from tree block as an alternative source */
8992 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8993 if (!extent_buffer_uptodate(eb)) {
8994 free_extent_buffer(eb);
8995 return -EIO;
8997 header_level = btrfs_header_level(eb);
8998 free_extent_buffer(eb);
9000 if (header_level != backref_level)
9001 return -EIO;
9002 return header_level;
9004 release_out:
9005 btrfs_release_path(&path);
9006 return ret;
9010 * Check if a tree block backref is valid (points to a valid tree block)
9011 * if level == -1, level will be resolved
9012 * Return >0 for any error found and print error message
9014 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9015 u64 bytenr, int level)
9017 struct btrfs_root *root;
9018 struct btrfs_key key;
9019 struct btrfs_path path;
9020 struct extent_buffer *eb;
9021 struct extent_buffer *node;
9022 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9023 int err = 0;
9024 int ret;
9026 /* Query level for level == -1 special case */
9027 if (level == -1)
9028 level = query_tree_block_level(fs_info, bytenr);
9029 if (level < 0) {
9030 err |= REFERENCER_MISSING;
9031 goto out;
9034 key.objectid = root_id;
9035 key.type = BTRFS_ROOT_ITEM_KEY;
9036 key.offset = (u64)-1;
9038 root = btrfs_read_fs_root(fs_info, &key);
9039 if (IS_ERR(root)) {
9040 err |= REFERENCER_MISSING;
9041 goto out;
9044 /* Read out the tree block to get item/node key */
9045 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9046 if (!extent_buffer_uptodate(eb)) {
9047 err |= REFERENCER_MISSING;
9048 free_extent_buffer(eb);
9049 goto out;
9052 /* Empty tree, no need to check key */
9053 if (!btrfs_header_nritems(eb) && !level) {
9054 free_extent_buffer(eb);
9055 goto out;
9058 if (level)
9059 btrfs_node_key_to_cpu(eb, &key, 0);
9060 else
9061 btrfs_item_key_to_cpu(eb, &key, 0);
9063 free_extent_buffer(eb);
9065 btrfs_init_path(&path);
9066 path.lowest_level = level;
9067 /* Search with the first key, to ensure we can reach it */
9068 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9069 if (ret < 0) {
9070 err |= REFERENCER_MISSING;
9071 goto release_out;
9074 node = path.nodes[level];
9075 if (btrfs_header_bytenr(node) != bytenr) {
9076 error(
9077 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9078 bytenr, nodesize, bytenr,
9079 btrfs_header_bytenr(node));
9080 err |= REFERENCER_MISMATCH;
9082 if (btrfs_header_level(node) != level) {
9083 error(
9084 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9085 bytenr, nodesize, level,
9086 btrfs_header_level(node));
9087 err |= REFERENCER_MISMATCH;
9090 release_out:
9091 btrfs_release_path(&path);
9092 out:
9093 if (err & REFERENCER_MISSING) {
9094 if (level < 0)
9095 error("extent [%llu %d] lost referencer (owner: %llu)",
9096 bytenr, nodesize, root_id);
9097 else
9098 error(
9099 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9100 bytenr, nodesize, root_id, level);
9103 return err;
9107 * Check referencer for shared block backref
9108 * If level == -1, this function will resolve the level.
9110 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9111 u64 parent, u64 bytenr, int level)
9113 struct extent_buffer *eb;
9114 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9115 u32 nr;
9116 int found_parent = 0;
9117 int i;
9119 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9120 if (!extent_buffer_uptodate(eb))
9121 goto out;
9123 if (level == -1)
9124 level = query_tree_block_level(fs_info, bytenr);
9125 if (level < 0)
9126 goto out;
9128 if (level + 1 != btrfs_header_level(eb))
9129 goto out;
9131 nr = btrfs_header_nritems(eb);
9132 for (i = 0; i < nr; i++) {
9133 if (bytenr == btrfs_node_blockptr(eb, i)) {
9134 found_parent = 1;
9135 break;
9138 out:
9139 free_extent_buffer(eb);
9140 if (!found_parent) {
9141 error(
9142 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9143 bytenr, nodesize, parent, level);
9144 return REFERENCER_MISSING;
9146 return 0;
9150 * Check referencer for normal (inlined) data ref
9151 * If len == 0, it will be resolved by searching in extent tree
9153 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9154 u64 root_id, u64 objectid, u64 offset,
9155 u64 bytenr, u64 len, u32 count)
9157 struct btrfs_root *root;
9158 struct btrfs_root *extent_root = fs_info->extent_root;
9159 struct btrfs_key key;
9160 struct btrfs_path path;
9161 struct extent_buffer *leaf;
9162 struct btrfs_file_extent_item *fi;
9163 u32 found_count = 0;
9164 int slot;
9165 int ret = 0;
9167 if (!len) {
9168 key.objectid = bytenr;
9169 key.type = BTRFS_EXTENT_ITEM_KEY;
9170 key.offset = (u64)-1;
9172 btrfs_init_path(&path);
9173 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9174 if (ret < 0)
9175 goto out;
9176 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9177 if (ret)
9178 goto out;
9179 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9180 if (key.objectid != bytenr ||
9181 key.type != BTRFS_EXTENT_ITEM_KEY)
9182 goto out;
9183 len = key.offset;
9184 btrfs_release_path(&path);
9186 key.objectid = root_id;
9187 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9188 key.offset = (u64)-1;
9189 btrfs_init_path(&path);
9191 root = btrfs_read_fs_root(fs_info, &key);
9192 if (IS_ERR(root))
9193 goto out;
9195 key.objectid = objectid;
9196 key.type = BTRFS_EXTENT_DATA_KEY;
9198 * It can be nasty as data backref offset is
9199 * file offset - file extent offset, which is smaller or
9200 * equal to original backref offset. The only special case is
9201 * overflow. So we need to special check and do further search.
9203 key.offset = offset & (1ULL << 63) ? 0 : offset;
9205 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9206 if (ret < 0)
9207 goto out;
9210 * Search afterwards to get correct one
9211 * NOTE: As we must do a comprehensive check on the data backref to
9212 * make sure the dref count also matches, we must iterate all file
9213 * extents for that inode.
9215 while (1) {
9216 leaf = path.nodes[0];
9217 slot = path.slots[0];
9219 btrfs_item_key_to_cpu(leaf, &key, slot);
9220 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9221 break;
9222 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9224 * Except normal disk bytenr and disk num bytes, we still
9225 * need to do extra check on dbackref offset as
9226 * dbackref offset = file_offset - file_extent_offset
9228 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9229 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9230 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9231 offset)
9232 found_count++;
9234 ret = btrfs_next_item(root, &path);
9235 if (ret)
9236 break;
9238 out:
9239 btrfs_release_path(&path);
9240 if (found_count != count) {
9241 error(
9242 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9243 bytenr, len, root_id, objectid, offset, count, found_count);
9244 return REFERENCER_MISSING;
9246 return 0;
9250 * Check if the referencer of a shared data backref exists
9252 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9253 u64 parent, u64 bytenr)
9255 struct extent_buffer *eb;
9256 struct btrfs_key key;
9257 struct btrfs_file_extent_item *fi;
9258 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9259 u32 nr;
9260 int found_parent = 0;
9261 int i;
9263 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9264 if (!extent_buffer_uptodate(eb))
9265 goto out;
9267 nr = btrfs_header_nritems(eb);
9268 for (i = 0; i < nr; i++) {
9269 btrfs_item_key_to_cpu(eb, &key, i);
9270 if (key.type != BTRFS_EXTENT_DATA_KEY)
9271 continue;
9273 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9274 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9275 continue;
9277 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9278 found_parent = 1;
9279 break;
9283 out:
9284 free_extent_buffer(eb);
9285 if (!found_parent) {
9286 error("shared extent %llu referencer lost (parent: %llu)",
9287 bytenr, parent);
9288 return REFERENCER_MISSING;
9290 return 0;
9294 * This function will check a given extent item, including its backref and
9295 * itself (like crossing stripe boundary and type)
9297 * Since we don't use extent_record anymore, introduce new error bit
9299 static int check_extent_item(struct btrfs_fs_info *fs_info,
9300 struct extent_buffer *eb, int slot)
9302 struct btrfs_extent_item *ei;
9303 struct btrfs_extent_inline_ref *iref;
9304 struct btrfs_extent_data_ref *dref;
9305 unsigned long end;
9306 unsigned long ptr;
9307 int type;
9308 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9309 u32 item_size = btrfs_item_size_nr(eb, slot);
9310 u64 flags;
9311 u64 offset;
9312 int metadata = 0;
9313 int level;
9314 struct btrfs_key key;
9315 int ret;
9316 int err = 0;
9318 btrfs_item_key_to_cpu(eb, &key, slot);
9319 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9320 bytes_used += key.offset;
9321 else
9322 bytes_used += nodesize;
9324 if (item_size < sizeof(*ei)) {
9326 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9327 * old thing when on disk format is still un-determined.
9328 * No need to care about it anymore
9330 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9331 return -ENOTTY;
9334 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9335 flags = btrfs_extent_flags(eb, ei);
9337 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9338 metadata = 1;
9339 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9340 error("bad metadata [%llu, %llu) crossing stripe boundary",
9341 key.objectid, key.objectid + nodesize);
9342 err |= CROSSING_STRIPE_BOUNDARY;
9345 ptr = (unsigned long)(ei + 1);
9347 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9348 /* Old EXTENT_ITEM metadata */
9349 struct btrfs_tree_block_info *info;
9351 info = (struct btrfs_tree_block_info *)ptr;
9352 level = btrfs_tree_block_level(eb, info);
9353 ptr += sizeof(struct btrfs_tree_block_info);
9354 } else {
9355 /* New METADATA_ITEM */
9356 level = key.offset;
9358 end = (unsigned long)ei + item_size;
9360 if (ptr >= end) {
9361 err |= ITEM_SIZE_MISMATCH;
9362 goto out;
9365 /* Now check every backref in this extent item */
9366 next:
9367 iref = (struct btrfs_extent_inline_ref *)ptr;
9368 type = btrfs_extent_inline_ref_type(eb, iref);
9369 offset = btrfs_extent_inline_ref_offset(eb, iref);
9370 switch (type) {
9371 case BTRFS_TREE_BLOCK_REF_KEY:
9372 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9373 level);
9374 err |= ret;
9375 break;
9376 case BTRFS_SHARED_BLOCK_REF_KEY:
9377 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9378 level);
9379 err |= ret;
9380 break;
9381 case BTRFS_EXTENT_DATA_REF_KEY:
9382 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9383 ret = check_extent_data_backref(fs_info,
9384 btrfs_extent_data_ref_root(eb, dref),
9385 btrfs_extent_data_ref_objectid(eb, dref),
9386 btrfs_extent_data_ref_offset(eb, dref),
9387 key.objectid, key.offset,
9388 btrfs_extent_data_ref_count(eb, dref));
9389 err |= ret;
9390 break;
9391 case BTRFS_SHARED_DATA_REF_KEY:
9392 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9393 err |= ret;
9394 break;
9395 default:
9396 error("extent[%llu %d %llu] has unknown ref type: %d",
9397 key.objectid, key.type, key.offset, type);
9398 err |= UNKNOWN_TYPE;
9399 goto out;
9402 ptr += btrfs_extent_inline_ref_size(type);
9403 if (ptr < end)
9404 goto next;
9406 out:
9407 return err;
9411 * Check if a dev extent item is referred correctly by its chunk
9413 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9414 struct extent_buffer *eb, int slot)
9416 struct btrfs_root *chunk_root = fs_info->chunk_root;
9417 struct btrfs_dev_extent *ptr;
9418 struct btrfs_path path;
9419 struct btrfs_key chunk_key;
9420 struct btrfs_key devext_key;
9421 struct btrfs_chunk *chunk;
9422 struct extent_buffer *l;
9423 int num_stripes;
9424 u64 length;
9425 int i;
9426 int found_chunk = 0;
9427 int ret;
9429 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9430 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9431 length = btrfs_dev_extent_length(eb, ptr);
9433 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9434 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9435 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9437 btrfs_init_path(&path);
9438 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9439 if (ret)
9440 goto out;
9442 l = path.nodes[0];
9443 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9444 if (btrfs_chunk_length(l, chunk) != length)
9445 goto out;
9447 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9448 for (i = 0; i < num_stripes; i++) {
9449 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9450 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9452 if (devid == devext_key.objectid &&
9453 offset == devext_key.offset) {
9454 found_chunk = 1;
9455 break;
9458 out:
9459 btrfs_release_path(&path);
9460 if (!found_chunk) {
9461 error(
9462 "device extent[%llu, %llu, %llu] did not find the related chunk",
9463 devext_key.objectid, devext_key.offset, length);
9464 return REFERENCER_MISSING;
9466 return 0;
9470 * Check if the used space is correct with the dev item
9472 static int check_dev_item(struct btrfs_fs_info *fs_info,
9473 struct extent_buffer *eb, int slot)
9475 struct btrfs_root *dev_root = fs_info->dev_root;
9476 struct btrfs_dev_item *dev_item;
9477 struct btrfs_path path;
9478 struct btrfs_key key;
9479 struct btrfs_dev_extent *ptr;
9480 u64 dev_id;
9481 u64 used;
9482 u64 total = 0;
9483 int ret;
9485 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9486 dev_id = btrfs_device_id(eb, dev_item);
9487 used = btrfs_device_bytes_used(eb, dev_item);
9489 key.objectid = dev_id;
9490 key.type = BTRFS_DEV_EXTENT_KEY;
9491 key.offset = 0;
9493 btrfs_init_path(&path);
9494 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9495 if (ret < 0) {
9496 btrfs_item_key_to_cpu(eb, &key, slot);
9497 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9498 key.objectid, key.type, key.offset);
9499 btrfs_release_path(&path);
9500 return REFERENCER_MISSING;
9503 /* Iterate dev_extents to calculate the used space of a device */
9504 while (1) {
9505 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9507 if (key.objectid > dev_id)
9508 break;
9509 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9510 goto next;
9512 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9513 struct btrfs_dev_extent);
9514 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9515 next:
9516 ret = btrfs_next_item(dev_root, &path);
9517 if (ret)
9518 break;
9520 btrfs_release_path(&path);
9522 if (used != total) {
9523 btrfs_item_key_to_cpu(eb, &key, slot);
9524 error(
9525 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9526 total, used, BTRFS_ROOT_TREE_OBJECTID,
9527 BTRFS_DEV_EXTENT_KEY, dev_id);
9528 return ACCOUNTING_MISMATCH;
9530 return 0;
9534 * Check a block group item with its referener (chunk) and its used space
9535 * with extent/metadata item
9537 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9538 struct extent_buffer *eb, int slot)
9540 struct btrfs_root *extent_root = fs_info->extent_root;
9541 struct btrfs_root *chunk_root = fs_info->chunk_root;
9542 struct btrfs_block_group_item *bi;
9543 struct btrfs_block_group_item bg_item;
9544 struct btrfs_path path;
9545 struct btrfs_key bg_key;
9546 struct btrfs_key chunk_key;
9547 struct btrfs_key extent_key;
9548 struct btrfs_chunk *chunk;
9549 struct extent_buffer *leaf;
9550 struct btrfs_extent_item *ei;
9551 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9552 u64 flags;
9553 u64 bg_flags;
9554 u64 used;
9555 u64 total = 0;
9556 int ret;
9557 int err = 0;
9559 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9560 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9561 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9562 used = btrfs_block_group_used(&bg_item);
9563 bg_flags = btrfs_block_group_flags(&bg_item);
9565 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9566 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9567 chunk_key.offset = bg_key.objectid;
9569 btrfs_init_path(&path);
9570 /* Search for the referencer chunk */
9571 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9572 if (ret) {
9573 error(
9574 "block group[%llu %llu] did not find the related chunk item",
9575 bg_key.objectid, bg_key.offset);
9576 err |= REFERENCER_MISSING;
9577 } else {
9578 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9579 struct btrfs_chunk);
9580 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9581 bg_key.offset) {
9582 error(
9583 "block group[%llu %llu] related chunk item length does not match",
9584 bg_key.objectid, bg_key.offset);
9585 err |= REFERENCER_MISMATCH;
9588 btrfs_release_path(&path);
9590 /* Search from the block group bytenr */
9591 extent_key.objectid = bg_key.objectid;
9592 extent_key.type = 0;
9593 extent_key.offset = 0;
9595 btrfs_init_path(&path);
9596 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9597 if (ret < 0)
9598 goto out;
9600 /* Iterate extent tree to account used space */
9601 while (1) {
9602 leaf = path.nodes[0];
9603 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9604 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9605 break;
9607 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9608 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9609 goto next;
9610 if (extent_key.objectid < bg_key.objectid)
9611 goto next;
9613 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9614 total += nodesize;
9615 else
9616 total += extent_key.offset;
9618 ei = btrfs_item_ptr(leaf, path.slots[0],
9619 struct btrfs_extent_item);
9620 flags = btrfs_extent_flags(leaf, ei);
9621 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9622 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9623 error(
9624 "bad extent[%llu, %llu) type mismatch with chunk",
9625 extent_key.objectid,
9626 extent_key.objectid + extent_key.offset);
9627 err |= CHUNK_TYPE_MISMATCH;
9629 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9630 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9631 BTRFS_BLOCK_GROUP_METADATA))) {
9632 error(
9633 "bad extent[%llu, %llu) type mismatch with chunk",
9634 extent_key.objectid,
9635 extent_key.objectid + nodesize);
9636 err |= CHUNK_TYPE_MISMATCH;
9639 next:
9640 ret = btrfs_next_item(extent_root, &path);
9641 if (ret)
9642 break;
9645 out:
9646 btrfs_release_path(&path);
9648 if (total != used) {
9649 error(
9650 "block group[%llu %llu] used %llu but extent items used %llu",
9651 bg_key.objectid, bg_key.offset, used, total);
9652 err |= ACCOUNTING_MISMATCH;
9654 return err;
9658 * Check a chunk item.
9659 * Including checking all referred dev_extents and block group
9661 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9662 struct extent_buffer *eb, int slot)
9664 struct btrfs_root *extent_root = fs_info->extent_root;
9665 struct btrfs_root *dev_root = fs_info->dev_root;
9666 struct btrfs_path path;
9667 struct btrfs_key chunk_key;
9668 struct btrfs_key bg_key;
9669 struct btrfs_key devext_key;
9670 struct btrfs_chunk *chunk;
9671 struct extent_buffer *leaf;
9672 struct btrfs_block_group_item *bi;
9673 struct btrfs_block_group_item bg_item;
9674 struct btrfs_dev_extent *ptr;
9675 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9676 u64 length;
9677 u64 chunk_end;
9678 u64 type;
9679 u64 profile;
9680 int num_stripes;
9681 u64 offset;
9682 u64 objectid;
9683 int i;
9684 int ret;
9685 int err = 0;
9687 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9688 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9689 length = btrfs_chunk_length(eb, chunk);
9690 chunk_end = chunk_key.offset + length;
9691 if (!IS_ALIGNED(length, sectorsize)) {
9692 error("chunk[%llu %llu) not aligned to %u",
9693 chunk_key.offset, chunk_end, sectorsize);
9694 err |= BYTES_UNALIGNED;
9695 goto out;
9698 type = btrfs_chunk_type(eb, chunk);
9699 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9700 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9701 error("chunk[%llu %llu) has no chunk type",
9702 chunk_key.offset, chunk_end);
9703 err |= UNKNOWN_TYPE;
9705 if (profile && (profile & (profile - 1))) {
9706 error("chunk[%llu %llu) multiple profiles detected: %llx",
9707 chunk_key.offset, chunk_end, profile);
9708 err |= UNKNOWN_TYPE;
9711 bg_key.objectid = chunk_key.offset;
9712 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9713 bg_key.offset = length;
9715 btrfs_init_path(&path);
9716 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9717 if (ret) {
9718 error(
9719 "chunk[%llu %llu) did not find the related block group item",
9720 chunk_key.offset, chunk_end);
9721 err |= REFERENCER_MISSING;
9722 } else{
9723 leaf = path.nodes[0];
9724 bi = btrfs_item_ptr(leaf, path.slots[0],
9725 struct btrfs_block_group_item);
9726 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9727 sizeof(bg_item));
9728 if (btrfs_block_group_flags(&bg_item) != type) {
9729 error(
9730 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9731 chunk_key.offset, chunk_end, type,
9732 btrfs_block_group_flags(&bg_item));
9733 err |= REFERENCER_MISSING;
9737 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9738 for (i = 0; i < num_stripes; i++) {
9739 btrfs_release_path(&path);
9740 btrfs_init_path(&path);
9741 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9742 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9743 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9745 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9746 0, 0);
9747 if (ret)
9748 goto not_match_dev;
9750 leaf = path.nodes[0];
9751 ptr = btrfs_item_ptr(leaf, path.slots[0],
9752 struct btrfs_dev_extent);
9753 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9754 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9755 if (objectid != chunk_key.objectid ||
9756 offset != chunk_key.offset ||
9757 btrfs_dev_extent_length(leaf, ptr) != length)
9758 goto not_match_dev;
9759 continue;
9760 not_match_dev:
9761 err |= BACKREF_MISSING;
9762 error(
9763 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9764 chunk_key.objectid, chunk_end, i);
9765 continue;
9767 btrfs_release_path(&path);
9768 out:
9769 return err;
9773 * Main entry function to check known items and update related accounting info
9775 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9777 struct btrfs_fs_info *fs_info = root->fs_info;
9778 struct btrfs_key key;
9779 int slot = 0;
9780 int type;
9781 struct btrfs_extent_data_ref *dref;
9782 int ret;
9783 int err = 0;
9785 next:
9786 btrfs_item_key_to_cpu(eb, &key, slot);
9787 type = btrfs_key_type(&key);
9789 switch (type) {
9790 case BTRFS_EXTENT_DATA_KEY:
9791 ret = check_extent_data_item(root, eb, slot);
9792 err |= ret;
9793 break;
9794 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9795 ret = check_block_group_item(fs_info, eb, slot);
9796 err |= ret;
9797 break;
9798 case BTRFS_DEV_ITEM_KEY:
9799 ret = check_dev_item(fs_info, eb, slot);
9800 err |= ret;
9801 break;
9802 case BTRFS_CHUNK_ITEM_KEY:
9803 ret = check_chunk_item(fs_info, eb, slot);
9804 err |= ret;
9805 break;
9806 case BTRFS_DEV_EXTENT_KEY:
9807 ret = check_dev_extent_item(fs_info, eb, slot);
9808 err |= ret;
9809 break;
9810 case BTRFS_EXTENT_ITEM_KEY:
9811 case BTRFS_METADATA_ITEM_KEY:
9812 ret = check_extent_item(fs_info, eb, slot);
9813 err |= ret;
9814 break;
9815 case BTRFS_EXTENT_CSUM_KEY:
9816 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9817 break;
9818 case BTRFS_TREE_BLOCK_REF_KEY:
9819 ret = check_tree_block_backref(fs_info, key.offset,
9820 key.objectid, -1);
9821 err |= ret;
9822 break;
9823 case BTRFS_EXTENT_DATA_REF_KEY:
9824 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9825 ret = check_extent_data_backref(fs_info,
9826 btrfs_extent_data_ref_root(eb, dref),
9827 btrfs_extent_data_ref_objectid(eb, dref),
9828 btrfs_extent_data_ref_offset(eb, dref),
9829 key.objectid, 0,
9830 btrfs_extent_data_ref_count(eb, dref));
9831 err |= ret;
9832 break;
9833 case BTRFS_SHARED_BLOCK_REF_KEY:
9834 ret = check_shared_block_backref(fs_info, key.offset,
9835 key.objectid, -1);
9836 err |= ret;
9837 break;
9838 case BTRFS_SHARED_DATA_REF_KEY:
9839 ret = check_shared_data_backref(fs_info, key.offset,
9840 key.objectid);
9841 err |= ret;
9842 break;
9843 default:
9844 break;
9847 if (++slot < btrfs_header_nritems(eb))
9848 goto next;
9850 return err;
9854 * Helper function for later fs/subvol tree check. To determine if a tree
9855 * block should be checked.
9856 * This function will ensure only the direct referencer with lowest rootid to
9857 * check a fs/subvolume tree block.
9859 * Backref check at extent tree would detect errors like missing subvolume
9860 * tree, so we can do aggressive check to reduce duplicated checks.
9862 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9864 struct btrfs_root *extent_root = root->fs_info->extent_root;
9865 struct btrfs_key key;
9866 struct btrfs_path path;
9867 struct extent_buffer *leaf;
9868 int slot;
9869 struct btrfs_extent_item *ei;
9870 unsigned long ptr;
9871 unsigned long end;
9872 int type;
9873 u32 item_size;
9874 u64 offset;
9875 struct btrfs_extent_inline_ref *iref;
9876 int ret;
9878 btrfs_init_path(&path);
9879 key.objectid = btrfs_header_bytenr(eb);
9880 key.type = BTRFS_METADATA_ITEM_KEY;
9881 key.offset = (u64)-1;
9884 * Any failure in backref resolving means we can't determine
9885 * whom the tree block belongs to.
9886 * So in that case, we need to check that tree block
9888 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9889 if (ret < 0)
9890 goto need_check;
9892 ret = btrfs_previous_extent_item(extent_root, &path,
9893 btrfs_header_bytenr(eb));
9894 if (ret)
9895 goto need_check;
9897 leaf = path.nodes[0];
9898 slot = path.slots[0];
9899 btrfs_item_key_to_cpu(leaf, &key, slot);
9900 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9902 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9903 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9904 } else {
9905 struct btrfs_tree_block_info *info;
9907 info = (struct btrfs_tree_block_info *)(ei + 1);
9908 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9911 item_size = btrfs_item_size_nr(leaf, slot);
9912 ptr = (unsigned long)iref;
9913 end = (unsigned long)ei + item_size;
9914 while (ptr < end) {
9915 iref = (struct btrfs_extent_inline_ref *)ptr;
9916 type = btrfs_extent_inline_ref_type(leaf, iref);
9917 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9920 * We only check the tree block if current root is
9921 * the lowest referencer of it.
9923 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9924 offset < root->objectid) {
9925 btrfs_release_path(&path);
9926 return 0;
9929 ptr += btrfs_extent_inline_ref_size(type);
9932 * Normally we should also check keyed tree block ref, but that may be
9933 * very time consuming. Inlined ref should already make us skip a lot
9934 * of refs now. So skip search keyed tree block ref.
9937 need_check:
9938 btrfs_release_path(&path);
9939 return 1;
9943 * Traversal function for tree block. We will do:
9944 * 1) Skip shared fs/subvolume tree blocks
9945 * 2) Update related bytes accounting
9946 * 3) Pre-order traversal
9948 static int traverse_tree_block(struct btrfs_root *root,
9949 struct extent_buffer *node)
9951 struct extent_buffer *eb;
9952 struct btrfs_key key;
9953 struct btrfs_key drop_key;
9954 int level;
9955 u64 nr;
9956 int i;
9957 int err = 0;
9958 int ret;
9961 * Skip shared fs/subvolume tree block, in that case they will
9962 * be checked by referencer with lowest rootid
9964 if (is_fstree(root->objectid) && !should_check(root, node))
9965 return 0;
9967 /* Update bytes accounting */
9968 total_btree_bytes += node->len;
9969 if (fs_root_objectid(btrfs_header_owner(node)))
9970 total_fs_tree_bytes += node->len;
9971 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9972 total_extent_tree_bytes += node->len;
9973 if (!found_old_backref &&
9974 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9975 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9976 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9977 found_old_backref = 1;
9979 /* pre-order tranversal, check itself first */
9980 level = btrfs_header_level(node);
9981 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9982 btrfs_header_level(node),
9983 btrfs_header_owner(node));
9984 err |= ret;
9985 if (err)
9986 error(
9987 "check %s failed root %llu bytenr %llu level %d, force continue check",
9988 level ? "node":"leaf", root->objectid,
9989 btrfs_header_bytenr(node), btrfs_header_level(node));
9991 if (!level) {
9992 btree_space_waste += btrfs_leaf_free_space(root, node);
9993 ret = check_leaf_items(root, node);
9994 err |= ret;
9995 return err;
9998 nr = btrfs_header_nritems(node);
9999 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10000 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10001 sizeof(struct btrfs_key_ptr);
10003 /* Then check all its children */
10004 for (i = 0; i < nr; i++) {
10005 u64 blocknr = btrfs_node_blockptr(node, i);
10007 btrfs_node_key_to_cpu(node, &key, i);
10008 if (level == root->root_item.drop_level &&
10009 is_dropped_key(&key, &drop_key))
10010 continue;
10013 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10014 * to call the function itself.
10016 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10017 if (extent_buffer_uptodate(eb)) {
10018 ret = traverse_tree_block(root, eb);
10019 err |= ret;
10021 free_extent_buffer(eb);
10024 return err;
10028 * Low memory usage version check_chunks_and_extents.
10030 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10032 struct btrfs_path path;
10033 struct btrfs_key key;
10034 struct btrfs_root *root1;
10035 struct btrfs_root *cur_root;
10036 int err = 0;
10037 int ret;
10039 root1 = root->fs_info->chunk_root;
10040 ret = traverse_tree_block(root1, root1->node);
10041 err |= ret;
10043 root1 = root->fs_info->tree_root;
10044 ret = traverse_tree_block(root1, root1->node);
10045 err |= ret;
10047 btrfs_init_path(&path);
10048 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10049 key.offset = 0;
10050 key.type = BTRFS_ROOT_ITEM_KEY;
10052 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10053 if (ret) {
10054 error("cannot find extent treet in tree_root");
10055 goto out;
10058 while (1) {
10059 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10060 if (key.type != BTRFS_ROOT_ITEM_KEY)
10061 goto next;
10062 key.offset = (u64)-1;
10064 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10065 if (IS_ERR(cur_root) || !cur_root) {
10066 error("failed to read tree: %lld", key.objectid);
10067 goto next;
10070 ret = traverse_tree_block(cur_root, cur_root->node);
10071 err |= ret;
10073 next:
10074 ret = btrfs_next_item(root1, &path);
10075 if (ret)
10076 goto out;
10079 out:
10080 btrfs_release_path(&path);
10081 return err;
10084 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10085 struct btrfs_root *root, int overwrite)
10087 struct extent_buffer *c;
10088 struct extent_buffer *old = root->node;
10089 int level;
10090 int ret;
10091 struct btrfs_disk_key disk_key = {0,0,0};
10093 level = 0;
10095 if (overwrite) {
10096 c = old;
10097 extent_buffer_get(c);
10098 goto init;
10100 c = btrfs_alloc_free_block(trans, root,
10101 root->nodesize,
10102 root->root_key.objectid,
10103 &disk_key, level, 0, 0);
10104 if (IS_ERR(c)) {
10105 c = old;
10106 extent_buffer_get(c);
10107 overwrite = 1;
10109 init:
10110 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10111 btrfs_set_header_level(c, level);
10112 btrfs_set_header_bytenr(c, c->start);
10113 btrfs_set_header_generation(c, trans->transid);
10114 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10115 btrfs_set_header_owner(c, root->root_key.objectid);
10117 write_extent_buffer(c, root->fs_info->fsid,
10118 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10120 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10121 btrfs_header_chunk_tree_uuid(c),
10122 BTRFS_UUID_SIZE);
10124 btrfs_mark_buffer_dirty(c);
10126 * this case can happen in the following case:
10128 * 1.overwrite previous root.
10130 * 2.reinit reloc data root, this is because we skip pin
10131 * down reloc data tree before which means we can allocate
10132 * same block bytenr here.
10134 if (old->start == c->start) {
10135 btrfs_set_root_generation(&root->root_item,
10136 trans->transid);
10137 root->root_item.level = btrfs_header_level(root->node);
10138 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10139 &root->root_key, &root->root_item);
10140 if (ret) {
10141 free_extent_buffer(c);
10142 return ret;
10145 free_extent_buffer(old);
10146 root->node = c;
10147 add_root_to_dirty_list(root);
10148 return 0;
10151 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10152 struct extent_buffer *eb, int tree_root)
10154 struct extent_buffer *tmp;
10155 struct btrfs_root_item *ri;
10156 struct btrfs_key key;
10157 u64 bytenr;
10158 u32 nodesize;
10159 int level = btrfs_header_level(eb);
10160 int nritems;
10161 int ret;
10162 int i;
10165 * If we have pinned this block before, don't pin it again.
10166 * This can not only avoid forever loop with broken filesystem
10167 * but also give us some speedups.
10169 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10170 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10171 return 0;
10173 btrfs_pin_extent(fs_info, eb->start, eb->len);
10175 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10176 nritems = btrfs_header_nritems(eb);
10177 for (i = 0; i < nritems; i++) {
10178 if (level == 0) {
10179 btrfs_item_key_to_cpu(eb, &key, i);
10180 if (key.type != BTRFS_ROOT_ITEM_KEY)
10181 continue;
10182 /* Skip the extent root and reloc roots */
10183 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10184 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10185 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10186 continue;
10187 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10188 bytenr = btrfs_disk_root_bytenr(eb, ri);
10191 * If at any point we start needing the real root we
10192 * will have to build a stump root for the root we are
10193 * in, but for now this doesn't actually use the root so
10194 * just pass in extent_root.
10196 tmp = read_tree_block(fs_info->extent_root, bytenr,
10197 nodesize, 0);
10198 if (!extent_buffer_uptodate(tmp)) {
10199 fprintf(stderr, "Error reading root block\n");
10200 return -EIO;
10202 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10203 free_extent_buffer(tmp);
10204 if (ret)
10205 return ret;
10206 } else {
10207 bytenr = btrfs_node_blockptr(eb, i);
10209 /* If we aren't the tree root don't read the block */
10210 if (level == 1 && !tree_root) {
10211 btrfs_pin_extent(fs_info, bytenr, nodesize);
10212 continue;
10215 tmp = read_tree_block(fs_info->extent_root, bytenr,
10216 nodesize, 0);
10217 if (!extent_buffer_uptodate(tmp)) {
10218 fprintf(stderr, "Error reading tree block\n");
10219 return -EIO;
10221 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10222 free_extent_buffer(tmp);
10223 if (ret)
10224 return ret;
10228 return 0;
10231 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10233 int ret;
10235 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10236 if (ret)
10237 return ret;
10239 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10242 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10244 struct btrfs_block_group_cache *cache;
10245 struct btrfs_path *path;
10246 struct extent_buffer *leaf;
10247 struct btrfs_chunk *chunk;
10248 struct btrfs_key key;
10249 int ret;
10250 u64 start;
10252 path = btrfs_alloc_path();
10253 if (!path)
10254 return -ENOMEM;
10256 key.objectid = 0;
10257 key.type = BTRFS_CHUNK_ITEM_KEY;
10258 key.offset = 0;
10260 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10261 if (ret < 0) {
10262 btrfs_free_path(path);
10263 return ret;
10267 * We do this in case the block groups were screwed up and had alloc
10268 * bits that aren't actually set on the chunks. This happens with
10269 * restored images every time and could happen in real life I guess.
10271 fs_info->avail_data_alloc_bits = 0;
10272 fs_info->avail_metadata_alloc_bits = 0;
10273 fs_info->avail_system_alloc_bits = 0;
10275 /* First we need to create the in-memory block groups */
10276 while (1) {
10277 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10278 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10279 if (ret < 0) {
10280 btrfs_free_path(path);
10281 return ret;
10283 if (ret) {
10284 ret = 0;
10285 break;
10288 leaf = path->nodes[0];
10289 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10290 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10291 path->slots[0]++;
10292 continue;
10295 chunk = btrfs_item_ptr(leaf, path->slots[0],
10296 struct btrfs_chunk);
10297 btrfs_add_block_group(fs_info, 0,
10298 btrfs_chunk_type(leaf, chunk),
10299 key.objectid, key.offset,
10300 btrfs_chunk_length(leaf, chunk));
10301 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10302 key.offset + btrfs_chunk_length(leaf, chunk),
10303 GFP_NOFS);
10304 path->slots[0]++;
10306 start = 0;
10307 while (1) {
10308 cache = btrfs_lookup_first_block_group(fs_info, start);
10309 if (!cache)
10310 break;
10311 cache->cached = 1;
10312 start = cache->key.objectid + cache->key.offset;
10315 btrfs_free_path(path);
10316 return 0;
10319 static int reset_balance(struct btrfs_trans_handle *trans,
10320 struct btrfs_fs_info *fs_info)
10322 struct btrfs_root *root = fs_info->tree_root;
10323 struct btrfs_path *path;
10324 struct extent_buffer *leaf;
10325 struct btrfs_key key;
10326 int del_slot, del_nr = 0;
10327 int ret;
10328 int found = 0;
10330 path = btrfs_alloc_path();
10331 if (!path)
10332 return -ENOMEM;
10334 key.objectid = BTRFS_BALANCE_OBJECTID;
10335 key.type = BTRFS_BALANCE_ITEM_KEY;
10336 key.offset = 0;
10338 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10339 if (ret) {
10340 if (ret > 0)
10341 ret = 0;
10342 if (!ret)
10343 goto reinit_data_reloc;
10344 else
10345 goto out;
10348 ret = btrfs_del_item(trans, root, path);
10349 if (ret)
10350 goto out;
10351 btrfs_release_path(path);
10353 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10354 key.type = BTRFS_ROOT_ITEM_KEY;
10355 key.offset = 0;
10357 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10358 if (ret < 0)
10359 goto out;
10360 while (1) {
10361 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10362 if (!found)
10363 break;
10365 if (del_nr) {
10366 ret = btrfs_del_items(trans, root, path,
10367 del_slot, del_nr);
10368 del_nr = 0;
10369 if (ret)
10370 goto out;
10372 key.offset++;
10373 btrfs_release_path(path);
10375 found = 0;
10376 ret = btrfs_search_slot(trans, root, &key, path,
10377 -1, 1);
10378 if (ret < 0)
10379 goto out;
10380 continue;
10382 found = 1;
10383 leaf = path->nodes[0];
10384 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10385 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10386 break;
10387 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10388 path->slots[0]++;
10389 continue;
10391 if (!del_nr) {
10392 del_slot = path->slots[0];
10393 del_nr = 1;
10394 } else {
10395 del_nr++;
10397 path->slots[0]++;
10400 if (del_nr) {
10401 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10402 if (ret)
10403 goto out;
10405 btrfs_release_path(path);
10407 reinit_data_reloc:
10408 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10409 key.type = BTRFS_ROOT_ITEM_KEY;
10410 key.offset = (u64)-1;
10411 root = btrfs_read_fs_root(fs_info, &key);
10412 if (IS_ERR(root)) {
10413 fprintf(stderr, "Error reading data reloc tree\n");
10414 ret = PTR_ERR(root);
10415 goto out;
10417 record_root_in_trans(trans, root);
10418 ret = btrfs_fsck_reinit_root(trans, root, 0);
10419 if (ret)
10420 goto out;
10421 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10422 out:
10423 btrfs_free_path(path);
10424 return ret;
10427 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10428 struct btrfs_fs_info *fs_info)
10430 u64 start = 0;
10431 int ret;
10434 * The only reason we don't do this is because right now we're just
10435 * walking the trees we find and pinning down their bytes, we don't look
10436 * at any of the leaves. In order to do mixed groups we'd have to check
10437 * the leaves of any fs roots and pin down the bytes for any file
10438 * extents we find. Not hard but why do it if we don't have to?
10440 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10441 fprintf(stderr, "We don't support re-initing the extent tree "
10442 "for mixed block groups yet, please notify a btrfs "
10443 "developer you want to do this so they can add this "
10444 "functionality.\n");
10445 return -EINVAL;
10449 * first we need to walk all of the trees except the extent tree and pin
10450 * down the bytes that are in use so we don't overwrite any existing
10451 * metadata.
10453 ret = pin_metadata_blocks(fs_info);
10454 if (ret) {
10455 fprintf(stderr, "error pinning down used bytes\n");
10456 return ret;
10460 * Need to drop all the block groups since we're going to recreate all
10461 * of them again.
10463 btrfs_free_block_groups(fs_info);
10464 ret = reset_block_groups(fs_info);
10465 if (ret) {
10466 fprintf(stderr, "error resetting the block groups\n");
10467 return ret;
10470 /* Ok we can allocate now, reinit the extent root */
10471 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10472 if (ret) {
10473 fprintf(stderr, "extent root initialization failed\n");
10475 * When the transaction code is updated we should end the
10476 * transaction, but for now progs only knows about commit so
10477 * just return an error.
10479 return ret;
10483 * Now we have all the in-memory block groups setup so we can make
10484 * allocations properly, and the metadata we care about is safe since we
10485 * pinned all of it above.
10487 while (1) {
10488 struct btrfs_block_group_cache *cache;
10490 cache = btrfs_lookup_first_block_group(fs_info, start);
10491 if (!cache)
10492 break;
10493 start = cache->key.objectid + cache->key.offset;
10494 ret = btrfs_insert_item(trans, fs_info->extent_root,
10495 &cache->key, &cache->item,
10496 sizeof(cache->item));
10497 if (ret) {
10498 fprintf(stderr, "Error adding block group\n");
10499 return ret;
10501 btrfs_extent_post_op(trans, fs_info->extent_root);
10504 ret = reset_balance(trans, fs_info);
10505 if (ret)
10506 fprintf(stderr, "error resetting the pending balance\n");
10508 return ret;
10511 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10513 struct btrfs_path *path;
10514 struct btrfs_trans_handle *trans;
10515 struct btrfs_key key;
10516 int ret;
10518 printf("Recowing metadata block %llu\n", eb->start);
10519 key.objectid = btrfs_header_owner(eb);
10520 key.type = BTRFS_ROOT_ITEM_KEY;
10521 key.offset = (u64)-1;
10523 root = btrfs_read_fs_root(root->fs_info, &key);
10524 if (IS_ERR(root)) {
10525 fprintf(stderr, "Couldn't find owner root %llu\n",
10526 key.objectid);
10527 return PTR_ERR(root);
10530 path = btrfs_alloc_path();
10531 if (!path)
10532 return -ENOMEM;
10534 trans = btrfs_start_transaction(root, 1);
10535 if (IS_ERR(trans)) {
10536 btrfs_free_path(path);
10537 return PTR_ERR(trans);
10540 path->lowest_level = btrfs_header_level(eb);
10541 if (path->lowest_level)
10542 btrfs_node_key_to_cpu(eb, &key, 0);
10543 else
10544 btrfs_item_key_to_cpu(eb, &key, 0);
10546 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10547 btrfs_commit_transaction(trans, root);
10548 btrfs_free_path(path);
10549 return ret;
10552 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10554 struct btrfs_path *path;
10555 struct btrfs_trans_handle *trans;
10556 struct btrfs_key key;
10557 int ret;
10559 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10560 bad->key.type, bad->key.offset);
10561 key.objectid = bad->root_id;
10562 key.type = BTRFS_ROOT_ITEM_KEY;
10563 key.offset = (u64)-1;
10565 root = btrfs_read_fs_root(root->fs_info, &key);
10566 if (IS_ERR(root)) {
10567 fprintf(stderr, "Couldn't find owner root %llu\n",
10568 key.objectid);
10569 return PTR_ERR(root);
10572 path = btrfs_alloc_path();
10573 if (!path)
10574 return -ENOMEM;
10576 trans = btrfs_start_transaction(root, 1);
10577 if (IS_ERR(trans)) {
10578 btrfs_free_path(path);
10579 return PTR_ERR(trans);
10582 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10583 if (ret) {
10584 if (ret > 0)
10585 ret = 0;
10586 goto out;
10588 ret = btrfs_del_item(trans, root, path);
10589 out:
10590 btrfs_commit_transaction(trans, root);
10591 btrfs_free_path(path);
10592 return ret;
10595 static int zero_log_tree(struct btrfs_root *root)
10597 struct btrfs_trans_handle *trans;
10598 int ret;
10600 trans = btrfs_start_transaction(root, 1);
10601 if (IS_ERR(trans)) {
10602 ret = PTR_ERR(trans);
10603 return ret;
10605 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10606 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10607 ret = btrfs_commit_transaction(trans, root);
10608 return ret;
10611 static int populate_csum(struct btrfs_trans_handle *trans,
10612 struct btrfs_root *csum_root, char *buf, u64 start,
10613 u64 len)
10615 u64 offset = 0;
10616 u64 sectorsize;
10617 int ret = 0;
10619 while (offset < len) {
10620 sectorsize = csum_root->sectorsize;
10621 ret = read_extent_data(csum_root, buf, start + offset,
10622 &sectorsize, 0);
10623 if (ret)
10624 break;
10625 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10626 start + offset, buf, sectorsize);
10627 if (ret)
10628 break;
10629 offset += sectorsize;
10631 return ret;
10634 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10635 struct btrfs_root *csum_root,
10636 struct btrfs_root *cur_root)
10638 struct btrfs_path *path;
10639 struct btrfs_key key;
10640 struct extent_buffer *node;
10641 struct btrfs_file_extent_item *fi;
10642 char *buf = NULL;
10643 u64 start = 0;
10644 u64 len = 0;
10645 int slot = 0;
10646 int ret = 0;
10648 path = btrfs_alloc_path();
10649 if (!path)
10650 return -ENOMEM;
10651 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10652 if (!buf) {
10653 ret = -ENOMEM;
10654 goto out;
10657 key.objectid = 0;
10658 key.offset = 0;
10659 key.type = 0;
10661 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10662 if (ret < 0)
10663 goto out;
10664 /* Iterate all regular file extents and fill its csum */
10665 while (1) {
10666 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10668 if (key.type != BTRFS_EXTENT_DATA_KEY)
10669 goto next;
10670 node = path->nodes[0];
10671 slot = path->slots[0];
10672 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10673 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10674 goto next;
10675 start = btrfs_file_extent_disk_bytenr(node, fi);
10676 len = btrfs_file_extent_disk_num_bytes(node, fi);
10678 ret = populate_csum(trans, csum_root, buf, start, len);
10679 if (ret == -EEXIST)
10680 ret = 0;
10681 if (ret < 0)
10682 goto out;
10683 next:
10685 * TODO: if next leaf is corrupted, jump to nearest next valid
10686 * leaf.
10688 ret = btrfs_next_item(cur_root, path);
10689 if (ret < 0)
10690 goto out;
10691 if (ret > 0) {
10692 ret = 0;
10693 goto out;
10697 out:
10698 btrfs_free_path(path);
10699 free(buf);
10700 return ret;
10703 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10704 struct btrfs_root *csum_root)
10706 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10707 struct btrfs_path *path;
10708 struct btrfs_root *tree_root = fs_info->tree_root;
10709 struct btrfs_root *cur_root;
10710 struct extent_buffer *node;
10711 struct btrfs_key key;
10712 int slot = 0;
10713 int ret = 0;
10715 path = btrfs_alloc_path();
10716 if (!path)
10717 return -ENOMEM;
10719 key.objectid = BTRFS_FS_TREE_OBJECTID;
10720 key.offset = 0;
10721 key.type = BTRFS_ROOT_ITEM_KEY;
10723 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10724 if (ret < 0)
10725 goto out;
10726 if (ret > 0) {
10727 ret = -ENOENT;
10728 goto out;
10731 while (1) {
10732 node = path->nodes[0];
10733 slot = path->slots[0];
10734 btrfs_item_key_to_cpu(node, &key, slot);
10735 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10736 goto out;
10737 if (key.type != BTRFS_ROOT_ITEM_KEY)
10738 goto next;
10739 if (!is_fstree(key.objectid))
10740 goto next;
10741 key.offset = (u64)-1;
10743 cur_root = btrfs_read_fs_root(fs_info, &key);
10744 if (IS_ERR(cur_root) || !cur_root) {
10745 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10746 key.objectid);
10747 goto out;
10749 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10750 cur_root);
10751 if (ret < 0)
10752 goto out;
10753 next:
10754 ret = btrfs_next_item(tree_root, path);
10755 if (ret > 0) {
10756 ret = 0;
10757 goto out;
10759 if (ret < 0)
10760 goto out;
10763 out:
10764 btrfs_free_path(path);
10765 return ret;
10768 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10769 struct btrfs_root *csum_root)
10771 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10772 struct btrfs_path *path;
10773 struct btrfs_extent_item *ei;
10774 struct extent_buffer *leaf;
10775 char *buf;
10776 struct btrfs_key key;
10777 int ret;
10779 path = btrfs_alloc_path();
10780 if (!path)
10781 return -ENOMEM;
10783 key.objectid = 0;
10784 key.type = BTRFS_EXTENT_ITEM_KEY;
10785 key.offset = 0;
10787 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10788 if (ret < 0) {
10789 btrfs_free_path(path);
10790 return ret;
10793 buf = malloc(csum_root->sectorsize);
10794 if (!buf) {
10795 btrfs_free_path(path);
10796 return -ENOMEM;
10799 while (1) {
10800 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10801 ret = btrfs_next_leaf(extent_root, path);
10802 if (ret < 0)
10803 break;
10804 if (ret) {
10805 ret = 0;
10806 break;
10809 leaf = path->nodes[0];
10811 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10812 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10813 path->slots[0]++;
10814 continue;
10817 ei = btrfs_item_ptr(leaf, path->slots[0],
10818 struct btrfs_extent_item);
10819 if (!(btrfs_extent_flags(leaf, ei) &
10820 BTRFS_EXTENT_FLAG_DATA)) {
10821 path->slots[0]++;
10822 continue;
10825 ret = populate_csum(trans, csum_root, buf, key.objectid,
10826 key.offset);
10827 if (ret)
10828 break;
10829 path->slots[0]++;
10832 btrfs_free_path(path);
10833 free(buf);
10834 return ret;
10838 * Recalculate the csum and put it into the csum tree.
10840 * Extent tree init will wipe out all the extent info, so in that case, we
10841 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10842 * will use fs/subvol trees to init the csum tree.
10844 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10845 struct btrfs_root *csum_root,
10846 int search_fs_tree)
10848 if (search_fs_tree)
10849 return fill_csum_tree_from_fs(trans, csum_root);
10850 else
10851 return fill_csum_tree_from_extent(trans, csum_root);
10854 static void free_roots_info_cache(void)
10856 if (!roots_info_cache)
10857 return;
10859 while (!cache_tree_empty(roots_info_cache)) {
10860 struct cache_extent *entry;
10861 struct root_item_info *rii;
10863 entry = first_cache_extent(roots_info_cache);
10864 if (!entry)
10865 break;
10866 remove_cache_extent(roots_info_cache, entry);
10867 rii = container_of(entry, struct root_item_info, cache_extent);
10868 free(rii);
10871 free(roots_info_cache);
10872 roots_info_cache = NULL;
10875 static int build_roots_info_cache(struct btrfs_fs_info *info)
10877 int ret = 0;
10878 struct btrfs_key key;
10879 struct extent_buffer *leaf;
10880 struct btrfs_path *path;
10882 if (!roots_info_cache) {
10883 roots_info_cache = malloc(sizeof(*roots_info_cache));
10884 if (!roots_info_cache)
10885 return -ENOMEM;
10886 cache_tree_init(roots_info_cache);
10889 path = btrfs_alloc_path();
10890 if (!path)
10891 return -ENOMEM;
10893 key.objectid = 0;
10894 key.type = BTRFS_EXTENT_ITEM_KEY;
10895 key.offset = 0;
10897 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10898 if (ret < 0)
10899 goto out;
10900 leaf = path->nodes[0];
10902 while (1) {
10903 struct btrfs_key found_key;
10904 struct btrfs_extent_item *ei;
10905 struct btrfs_extent_inline_ref *iref;
10906 int slot = path->slots[0];
10907 int type;
10908 u64 flags;
10909 u64 root_id;
10910 u8 level;
10911 struct cache_extent *entry;
10912 struct root_item_info *rii;
10914 if (slot >= btrfs_header_nritems(leaf)) {
10915 ret = btrfs_next_leaf(info->extent_root, path);
10916 if (ret < 0) {
10917 break;
10918 } else if (ret) {
10919 ret = 0;
10920 break;
10922 leaf = path->nodes[0];
10923 slot = path->slots[0];
10926 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10928 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10929 found_key.type != BTRFS_METADATA_ITEM_KEY)
10930 goto next;
10932 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10933 flags = btrfs_extent_flags(leaf, ei);
10935 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10936 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10937 goto next;
10939 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10940 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10941 level = found_key.offset;
10942 } else {
10943 struct btrfs_tree_block_info *binfo;
10945 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10946 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10947 level = btrfs_tree_block_level(leaf, binfo);
10951 * For a root extent, it must be of the following type and the
10952 * first (and only one) iref in the item.
10954 type = btrfs_extent_inline_ref_type(leaf, iref);
10955 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10956 goto next;
10958 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10959 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10960 if (!entry) {
10961 rii = malloc(sizeof(struct root_item_info));
10962 if (!rii) {
10963 ret = -ENOMEM;
10964 goto out;
10966 rii->cache_extent.start = root_id;
10967 rii->cache_extent.size = 1;
10968 rii->level = (u8)-1;
10969 entry = &rii->cache_extent;
10970 ret = insert_cache_extent(roots_info_cache, entry);
10971 ASSERT(ret == 0);
10972 } else {
10973 rii = container_of(entry, struct root_item_info,
10974 cache_extent);
10977 ASSERT(rii->cache_extent.start == root_id);
10978 ASSERT(rii->cache_extent.size == 1);
10980 if (level > rii->level || rii->level == (u8)-1) {
10981 rii->level = level;
10982 rii->bytenr = found_key.objectid;
10983 rii->gen = btrfs_extent_generation(leaf, ei);
10984 rii->node_count = 1;
10985 } else if (level == rii->level) {
10986 rii->node_count++;
10988 next:
10989 path->slots[0]++;
10992 out:
10993 btrfs_free_path(path);
10995 return ret;
10998 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10999 struct btrfs_path *path,
11000 const struct btrfs_key *root_key,
11001 const int read_only_mode)
11003 const u64 root_id = root_key->objectid;
11004 struct cache_extent *entry;
11005 struct root_item_info *rii;
11006 struct btrfs_root_item ri;
11007 unsigned long offset;
11009 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11010 if (!entry) {
11011 fprintf(stderr,
11012 "Error: could not find extent items for root %llu\n",
11013 root_key->objectid);
11014 return -ENOENT;
11017 rii = container_of(entry, struct root_item_info, cache_extent);
11018 ASSERT(rii->cache_extent.start == root_id);
11019 ASSERT(rii->cache_extent.size == 1);
11021 if (rii->node_count != 1) {
11022 fprintf(stderr,
11023 "Error: could not find btree root extent for root %llu\n",
11024 root_id);
11025 return -ENOENT;
11028 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11029 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11031 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11032 btrfs_root_level(&ri) != rii->level ||
11033 btrfs_root_generation(&ri) != rii->gen) {
11036 * If we're in repair mode but our caller told us to not update
11037 * the root item, i.e. just check if it needs to be updated, don't
11038 * print this message, since the caller will call us again shortly
11039 * for the same root item without read only mode (the caller will
11040 * open a transaction first).
11042 if (!(read_only_mode && repair))
11043 fprintf(stderr,
11044 "%sroot item for root %llu,"
11045 " current bytenr %llu, current gen %llu, current level %u,"
11046 " new bytenr %llu, new gen %llu, new level %u\n",
11047 (read_only_mode ? "" : "fixing "),
11048 root_id,
11049 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11050 btrfs_root_level(&ri),
11051 rii->bytenr, rii->gen, rii->level);
11053 if (btrfs_root_generation(&ri) > rii->gen) {
11054 fprintf(stderr,
11055 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11056 root_id, btrfs_root_generation(&ri), rii->gen);
11057 return -EINVAL;
11060 if (!read_only_mode) {
11061 btrfs_set_root_bytenr(&ri, rii->bytenr);
11062 btrfs_set_root_level(&ri, rii->level);
11063 btrfs_set_root_generation(&ri, rii->gen);
11064 write_extent_buffer(path->nodes[0], &ri,
11065 offset, sizeof(ri));
11068 return 1;
11071 return 0;
11075 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11076 * caused read-only snapshots to be corrupted if they were created at a moment
11077 * when the source subvolume/snapshot had orphan items. The issue was that the
11078 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11079 * node instead of the post orphan cleanup root node.
11080 * So this function, and its callees, just detects and fixes those cases. Even
11081 * though the regression was for read-only snapshots, this function applies to
11082 * any snapshot/subvolume root.
11083 * This must be run before any other repair code - not doing it so, makes other
11084 * repair code delete or modify backrefs in the extent tree for example, which
11085 * will result in an inconsistent fs after repairing the root items.
11087 static int repair_root_items(struct btrfs_fs_info *info)
11089 struct btrfs_path *path = NULL;
11090 struct btrfs_key key;
11091 struct extent_buffer *leaf;
11092 struct btrfs_trans_handle *trans = NULL;
11093 int ret = 0;
11094 int bad_roots = 0;
11095 int need_trans = 0;
11097 ret = build_roots_info_cache(info);
11098 if (ret)
11099 goto out;
11101 path = btrfs_alloc_path();
11102 if (!path) {
11103 ret = -ENOMEM;
11104 goto out;
11107 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11108 key.type = BTRFS_ROOT_ITEM_KEY;
11109 key.offset = 0;
11111 again:
11113 * Avoid opening and committing transactions if a leaf doesn't have
11114 * any root items that need to be fixed, so that we avoid rotating
11115 * backup roots unnecessarily.
11117 if (need_trans) {
11118 trans = btrfs_start_transaction(info->tree_root, 1);
11119 if (IS_ERR(trans)) {
11120 ret = PTR_ERR(trans);
11121 goto out;
11125 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11126 0, trans ? 1 : 0);
11127 if (ret < 0)
11128 goto out;
11129 leaf = path->nodes[0];
11131 while (1) {
11132 struct btrfs_key found_key;
11134 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11135 int no_more_keys = find_next_key(path, &key);
11137 btrfs_release_path(path);
11138 if (trans) {
11139 ret = btrfs_commit_transaction(trans,
11140 info->tree_root);
11141 trans = NULL;
11142 if (ret < 0)
11143 goto out;
11145 need_trans = 0;
11146 if (no_more_keys)
11147 break;
11148 goto again;
11151 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11153 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11154 goto next;
11155 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11156 goto next;
11158 ret = maybe_repair_root_item(info, path, &found_key,
11159 trans ? 0 : 1);
11160 if (ret < 0)
11161 goto out;
11162 if (ret) {
11163 if (!trans && repair) {
11164 need_trans = 1;
11165 key = found_key;
11166 btrfs_release_path(path);
11167 goto again;
11169 bad_roots++;
11171 next:
11172 path->slots[0]++;
11174 ret = 0;
11175 out:
11176 free_roots_info_cache();
11177 btrfs_free_path(path);
11178 if (trans)
11179 btrfs_commit_transaction(trans, info->tree_root);
11180 if (ret < 0)
11181 return ret;
11183 return bad_roots;
11186 const char * const cmd_check_usage[] = {
11187 "btrfs check [options] <device>",
11188 "Check structural integrity of a filesystem (unmounted).",
11189 "Check structural integrity of an unmounted filesystem. Verify internal",
11190 "trees' consistency and item connectivity. In the repair mode try to",
11191 "fix the problems found. ",
11192 "WARNING: the repair mode is considered dangerous",
11194 "-s|--super <superblock> use this superblock copy",
11195 "-b|--backup use the first valid backup root copy",
11196 "--repair try to repair the filesystem",
11197 "--readonly run in read-only mode (default)",
11198 "--init-csum-tree create a new CRC tree",
11199 "--init-extent-tree create a new extent tree",
11200 "--mode <MODE> select mode, allows to make some memory/IO",
11201 " trade-offs, where MODE is one of:",
11202 " original - read inodes and extents to memory (requires",
11203 " more memory, does less IO)",
11204 " lowmem - try to use less memory but read blocks again",
11205 " when needed",
11206 "--check-data-csum verify checksums of data blocks",
11207 "-Q|--qgroup-report print a report on qgroup consistency",
11208 "-E|--subvol-extents <subvolid>",
11209 " print subvolume extents and sharing state",
11210 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11211 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11212 "-p|--progress indicate progress",
11213 NULL
11216 int cmd_check(int argc, char **argv)
11218 struct cache_tree root_cache;
11219 struct btrfs_root *root;
11220 struct btrfs_fs_info *info;
11221 u64 bytenr = 0;
11222 u64 subvolid = 0;
11223 u64 tree_root_bytenr = 0;
11224 u64 chunk_root_bytenr = 0;
11225 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11226 int ret;
11227 u64 num;
11228 int init_csum_tree = 0;
11229 int readonly = 0;
11230 int qgroup_report = 0;
11231 int qgroups_repaired = 0;
11232 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11234 while(1) {
11235 int c;
11236 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11237 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11238 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11239 GETOPT_VAL_MODE };
11240 static const struct option long_options[] = {
11241 { "super", required_argument, NULL, 's' },
11242 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11243 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11244 { "init-csum-tree", no_argument, NULL,
11245 GETOPT_VAL_INIT_CSUM },
11246 { "init-extent-tree", no_argument, NULL,
11247 GETOPT_VAL_INIT_EXTENT },
11248 { "check-data-csum", no_argument, NULL,
11249 GETOPT_VAL_CHECK_CSUM },
11250 { "backup", no_argument, NULL, 'b' },
11251 { "subvol-extents", required_argument, NULL, 'E' },
11252 { "qgroup-report", no_argument, NULL, 'Q' },
11253 { "tree-root", required_argument, NULL, 'r' },
11254 { "chunk-root", required_argument, NULL,
11255 GETOPT_VAL_CHUNK_TREE },
11256 { "progress", no_argument, NULL, 'p' },
11257 { "mode", required_argument, NULL,
11258 GETOPT_VAL_MODE },
11259 { NULL, 0, NULL, 0}
11262 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11263 if (c < 0)
11264 break;
11265 switch(c) {
11266 case 'a': /* ignored */ break;
11267 case 'b':
11268 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11269 break;
11270 case 's':
11271 num = arg_strtou64(optarg);
11272 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11273 fprintf(stderr,
11274 "ERROR: super mirror should be less than: %d\n",
11275 BTRFS_SUPER_MIRROR_MAX);
11276 exit(1);
11278 bytenr = btrfs_sb_offset(((int)num));
11279 printf("using SB copy %llu, bytenr %llu\n", num,
11280 (unsigned long long)bytenr);
11281 break;
11282 case 'Q':
11283 qgroup_report = 1;
11284 break;
11285 case 'E':
11286 subvolid = arg_strtou64(optarg);
11287 break;
11288 case 'r':
11289 tree_root_bytenr = arg_strtou64(optarg);
11290 break;
11291 case GETOPT_VAL_CHUNK_TREE:
11292 chunk_root_bytenr = arg_strtou64(optarg);
11293 break;
11294 case 'p':
11295 ctx.progress_enabled = true;
11296 break;
11297 case '?':
11298 case 'h':
11299 usage(cmd_check_usage);
11300 case GETOPT_VAL_REPAIR:
11301 printf("enabling repair mode\n");
11302 repair = 1;
11303 ctree_flags |= OPEN_CTREE_WRITES;
11304 break;
11305 case GETOPT_VAL_READONLY:
11306 readonly = 1;
11307 break;
11308 case GETOPT_VAL_INIT_CSUM:
11309 printf("Creating a new CRC tree\n");
11310 init_csum_tree = 1;
11311 repair = 1;
11312 ctree_flags |= OPEN_CTREE_WRITES;
11313 break;
11314 case GETOPT_VAL_INIT_EXTENT:
11315 init_extent_tree = 1;
11316 ctree_flags |= (OPEN_CTREE_WRITES |
11317 OPEN_CTREE_NO_BLOCK_GROUPS);
11318 repair = 1;
11319 break;
11320 case GETOPT_VAL_CHECK_CSUM:
11321 check_data_csum = 1;
11322 break;
11323 case GETOPT_VAL_MODE:
11324 check_mode = parse_check_mode(optarg);
11325 if (check_mode == CHECK_MODE_UNKNOWN) {
11326 error("unknown mode: %s", optarg);
11327 exit(1);
11329 break;
11333 if (check_argc_exact(argc - optind, 1))
11334 usage(cmd_check_usage);
11336 if (ctx.progress_enabled) {
11337 ctx.tp = TASK_NOTHING;
11338 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11341 /* This check is the only reason for --readonly to exist */
11342 if (readonly && repair) {
11343 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11344 exit(1);
11348 * Not supported yet
11350 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11351 error("Low memory mode doesn't support repair yet");
11352 exit(1);
11355 radix_tree_init();
11356 cache_tree_init(&root_cache);
11358 if((ret = check_mounted(argv[optind])) < 0) {
11359 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11360 goto err_out;
11361 } else if(ret) {
11362 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11363 ret = -EBUSY;
11364 goto err_out;
11367 /* only allow partial opening under repair mode */
11368 if (repair)
11369 ctree_flags |= OPEN_CTREE_PARTIAL;
11371 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11372 chunk_root_bytenr, ctree_flags);
11373 if (!info) {
11374 fprintf(stderr, "Couldn't open file system\n");
11375 ret = -EIO;
11376 goto err_out;
11379 global_info = info;
11380 root = info->fs_root;
11383 * repair mode will force us to commit transaction which
11384 * will make us fail to load log tree when mounting.
11386 if (repair && btrfs_super_log_root(info->super_copy)) {
11387 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11388 if (!ret) {
11389 ret = 1;
11390 goto close_out;
11392 ret = zero_log_tree(root);
11393 if (ret) {
11394 fprintf(stderr, "fail to zero log tree\n");
11395 goto close_out;
11399 uuid_unparse(info->super_copy->fsid, uuidbuf);
11400 if (qgroup_report) {
11401 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11402 uuidbuf);
11403 ret = qgroup_verify_all(info);
11404 if (ret == 0)
11405 report_qgroups(1);
11406 goto close_out;
11408 if (subvolid) {
11409 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11410 subvolid, argv[optind], uuidbuf);
11411 ret = print_extent_state(info, subvolid);
11412 goto close_out;
11414 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11416 if (!extent_buffer_uptodate(info->tree_root->node) ||
11417 !extent_buffer_uptodate(info->dev_root->node) ||
11418 !extent_buffer_uptodate(info->chunk_root->node)) {
11419 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11420 ret = -EIO;
11421 goto close_out;
11424 if (init_extent_tree || init_csum_tree) {
11425 struct btrfs_trans_handle *trans;
11427 trans = btrfs_start_transaction(info->extent_root, 0);
11428 if (IS_ERR(trans)) {
11429 fprintf(stderr, "Error starting transaction\n");
11430 ret = PTR_ERR(trans);
11431 goto close_out;
11434 if (init_extent_tree) {
11435 printf("Creating a new extent tree\n");
11436 ret = reinit_extent_tree(trans, info);
11437 if (ret)
11438 goto close_out;
11441 if (init_csum_tree) {
11442 fprintf(stderr, "Reinit crc root\n");
11443 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11444 if (ret) {
11445 fprintf(stderr, "crc root initialization failed\n");
11446 ret = -EIO;
11447 goto close_out;
11450 ret = fill_csum_tree(trans, info->csum_root,
11451 init_extent_tree);
11452 if (ret) {
11453 fprintf(stderr, "crc refilling failed\n");
11454 return -EIO;
11458 * Ok now we commit and run the normal fsck, which will add
11459 * extent entries for all of the items it finds.
11461 ret = btrfs_commit_transaction(trans, info->extent_root);
11462 if (ret)
11463 goto close_out;
11465 if (!extent_buffer_uptodate(info->extent_root->node)) {
11466 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11467 ret = -EIO;
11468 goto close_out;
11470 if (!extent_buffer_uptodate(info->csum_root->node)) {
11471 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11472 ret = -EIO;
11473 goto close_out;
11476 if (!ctx.progress_enabled)
11477 fprintf(stderr, "checking extents\n");
11478 if (check_mode == CHECK_MODE_LOWMEM)
11479 ret = check_chunks_and_extents_v2(root);
11480 else
11481 ret = check_chunks_and_extents(root);
11482 if (ret)
11483 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11485 ret = repair_root_items(info);
11486 if (ret < 0)
11487 goto close_out;
11488 if (repair) {
11489 fprintf(stderr, "Fixed %d roots.\n", ret);
11490 ret = 0;
11491 } else if (ret > 0) {
11492 fprintf(stderr,
11493 "Found %d roots with an outdated root item.\n",
11494 ret);
11495 fprintf(stderr,
11496 "Please run a filesystem check with the option --repair to fix them.\n");
11497 ret = 1;
11498 goto close_out;
11501 if (!ctx.progress_enabled) {
11502 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11503 fprintf(stderr, "checking free space tree\n");
11504 else
11505 fprintf(stderr, "checking free space cache\n");
11507 ret = check_space_cache(root);
11508 if (ret)
11509 goto out;
11512 * We used to have to have these hole extents in between our real
11513 * extents so if we don't have this flag set we need to make sure there
11514 * are no gaps in the file extents for inodes, otherwise we can just
11515 * ignore it when this happens.
11517 no_holes = btrfs_fs_incompat(root->fs_info,
11518 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11519 if (!ctx.progress_enabled)
11520 fprintf(stderr, "checking fs roots\n");
11521 ret = check_fs_roots(root, &root_cache);
11522 if (ret)
11523 goto out;
11525 fprintf(stderr, "checking csums\n");
11526 ret = check_csums(root);
11527 if (ret)
11528 goto out;
11530 fprintf(stderr, "checking root refs\n");
11531 ret = check_root_refs(root, &root_cache);
11532 if (ret)
11533 goto out;
11535 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11536 struct extent_buffer *eb;
11538 eb = list_first_entry(&root->fs_info->recow_ebs,
11539 struct extent_buffer, recow);
11540 list_del_init(&eb->recow);
11541 ret = recow_extent_buffer(root, eb);
11542 if (ret)
11543 break;
11546 while (!list_empty(&delete_items)) {
11547 struct bad_item *bad;
11549 bad = list_first_entry(&delete_items, struct bad_item, list);
11550 list_del_init(&bad->list);
11551 if (repair)
11552 ret = delete_bad_item(root, bad);
11553 free(bad);
11556 if (info->quota_enabled) {
11557 int err;
11558 fprintf(stderr, "checking quota groups\n");
11559 err = qgroup_verify_all(info);
11560 if (err)
11561 goto out;
11562 report_qgroups(0);
11563 err = repair_qgroups(info, &qgroups_repaired);
11564 if (err)
11565 goto out;
11568 if (!list_empty(&root->fs_info->recow_ebs)) {
11569 fprintf(stderr, "Transid errors in file system\n");
11570 ret = 1;
11572 out:
11573 /* Don't override original ret */
11574 if (!ret && qgroups_repaired)
11575 ret = qgroups_repaired;
11577 if (found_old_backref) { /*
11578 * there was a disk format change when mixed
11579 * backref was in testing tree. The old format
11580 * existed about one week.
11582 printf("\n * Found old mixed backref format. "
11583 "The old format is not supported! *"
11584 "\n * Please mount the FS in readonly mode, "
11585 "backup data and re-format the FS. *\n\n");
11586 ret = 1;
11588 printf("found %llu bytes used err is %d\n",
11589 (unsigned long long)bytes_used, ret);
11590 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11591 printf("total tree bytes: %llu\n",
11592 (unsigned long long)total_btree_bytes);
11593 printf("total fs tree bytes: %llu\n",
11594 (unsigned long long)total_fs_tree_bytes);
11595 printf("total extent tree bytes: %llu\n",
11596 (unsigned long long)total_extent_tree_bytes);
11597 printf("btree space waste bytes: %llu\n",
11598 (unsigned long long)btree_space_waste);
11599 printf("file data blocks allocated: %llu\n referenced %llu\n",
11600 (unsigned long long)data_bytes_allocated,
11601 (unsigned long long)data_bytes_referenced);
11603 free_qgroup_counts();
11604 free_root_recs_tree(&root_cache);
11605 close_out:
11606 close_ctree(root);
11607 err_out:
11608 if (ctx.progress_enabled)
11609 task_deinit(ctx.info);
11611 return ret;