btrfs-progs: remove unused argument from clear_extent_bits
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobdfb34764a4985b6b1af1ddcbc3d4d49c2a0c6874
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
47 enum task_position {
48 TASK_EXTENTS,
49 TASK_FREE_SPACE,
50 TASK_FS_ROOTS,
51 TASK_NOTHING, /* have to be the last element */
54 struct task_ctx {
55 int progress_enabled;
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
80 CHECK_MODE_ORIGINAL,
81 CHECK_MODE_LOWMEM,
82 CHECK_MODE_UNKNOWN,
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
104 union {
105 u64 parent;
106 u64 root;
108 u64 owner;
109 u64 offset;
110 u64 disk_bytenr;
111 u64 bytes;
112 u64 ram_bytes;
113 u32 num_refs;
114 u32 found_ref;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
148 u64 root;
149 u64 objectid;
150 u64 offset;
151 u64 disk_bytenr;
152 u64 disk_len;
155 struct tree_backref {
156 struct extent_backref node;
157 union {
158 u64 parent;
159 u64 root;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
177 u64 start;
178 u64 max_size;
179 u64 nr;
180 u64 refs;
181 u64 extent_item_refs;
182 u64 generation;
183 u64 parent_generation;
184 u64 info_objectid;
185 u32 num_duplicates;
186 u8 info_level;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
208 u8 filetype;
209 u8 ref_type;
210 int errors;
211 u64 dir;
212 u64 index;
213 u16 namelen;
214 char name[0];
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
224 u64 objectid;
225 u64 bytenr;
226 u64 last_snapshot;
227 u8 level;
228 u8 drop_level;
229 int level_size;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
248 struct rb_node node;
249 u64 start;
250 u64 len;
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
263 int errors;
265 u64 ino;
266 u32 nlink;
267 u32 imode;
268 u64 isize;
269 u64 nbytes;
271 u32 found_link;
272 u64 found_size;
273 u64 extent_start;
274 u64 extent_end;
275 struct rb_root holes;
276 struct list_head orphan_extents;
278 u32 refs;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
304 int errors;
305 u64 ref_root;
306 u64 dir;
307 u64 index;
308 u16 namelen;
309 char name[0];
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
317 struct root_record {
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
321 u64 objectid;
322 u32 found_ref;
325 struct ptr_node {
326 struct cache_extent cache;
327 void *data;
330 struct shared_node {
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
335 u32 refs;
338 struct block_info {
339 u64 start;
340 u32 size;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
346 int active_node;
347 int root_level;
350 struct bad_item {
351 struct btrfs_key key;
352 u64 root_id;
353 struct list_head list;
356 struct extent_entry {
357 u64 bytenr;
358 u64 bytes;
359 int count;
360 int broken;
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
366 u8 level;
367 /* number of nodes at this level, must be 1 for a root */
368 int node_count;
369 u64 bytenr;
370 u64 gen;
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
378 * classification.
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 uint32_t count = 0;
396 static char *task_position_string[] = {
397 "checking extents",
398 "checking free space cache",
399 "checking fs roots",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
405 return NULL;
407 while (1) {
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
410 count++;
411 fflush(stdout);
412 task_period_wait(priv->info);
414 return NULL;
417 static int print_status_return(void *p)
419 printf("\n");
420 fflush(stdout);
422 return 0;
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
443 return (u64)-1;
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446 return hole->start;
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
458 return -1;
459 if (hole1->start < hole2->start)
460 return 1;
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
467 return -1;
468 /* Hole 2 will be merge center */
469 return 1;
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
479 u64 start, u64 len)
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
486 if (!hole)
487 return -ENOMEM;
488 hole->start = start;
489 hole->len = len;
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 node);
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
501 free(prev);
502 prev = NULL;
505 /* iterate merge with next holes */
506 while (1) {
507 if (!rb_next(&hole->node))
508 break;
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 node);
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
514 hole->start;
515 rb_erase(&next->node, holes);
516 free(next);
517 next = NULL;
518 } else
519 break;
521 return 0;
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
527 u64 start;
529 hole = (struct file_extent_hole *)data;
530 start = hole->start;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
534 return -1;
535 if (start >= hole->start && start < hole->start + hole->len)
536 return 0;
537 return 1;
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
546 u64 start, u64 len)
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
550 u64 prev_start = 0;
551 u64 prev_len = 0;
552 u64 next_start = 0;
553 u64 next_len = 0;
554 struct rb_node *node;
555 int have_prev = 0;
556 int have_next = 0;
557 int ret = 0;
559 tmp.start = start;
560 tmp.len = len;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 if (!node)
563 return -EEXIST;
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
566 return -EEXIST;
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
575 have_prev = 1;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
580 have_next = 1;
582 rb_erase(node, holes);
583 free(hole);
584 if (have_prev) {
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
586 if (ret < 0)
587 return ret;
589 if (have_next) {
590 ret = add_file_extent_hole(holes, next_start, next_len);
591 if (ret < 0)
592 return ret;
594 return 0;
597 static int copy_file_extent_holes(struct rb_root *dst,
598 struct rb_root *src)
600 struct file_extent_hole *hole;
601 struct rb_node *node;
602 int ret = 0;
604 node = rb_first(src);
605 while (node) {
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 if (ret)
609 break;
610 node = rb_next(node);
612 return ret;
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
621 while (node) {
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
624 free(hole);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
644 #define S_SHIFT 12
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
667 return -1;
668 else if (rec1->devid < rec2->devid)
669 return 1;
670 else
671 return 0;
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
682 struct rb_node *rb;
683 size_t size;
684 int ret;
686 rec = malloc(sizeof(*rec));
687 if (!rec)
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
690 rec->refs = 1;
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
698 if (!backref) {
699 ret = -ENOMEM;
700 goto cleanup;
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
707 if (!dst_orphan) {
708 ret = -ENOMEM;
709 goto cleanup;
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715 if (ret < 0)
716 goto cleanup_rb;
718 return rec;
720 cleanup_rb:
721 rb = rb_first(&rec->holes);
722 while (rb) {
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
726 rb = rb_next(rb);
727 free(hole);
730 cleanup:
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
734 free(orig);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
740 free(orig);
743 free(rec);
745 return ERR_PTR(ret);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 u64 objectid)
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
754 return;
755 printf("The following data extent is lost in tree %llu:\n",
756 objectid);
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
760 orphan->disk_len);
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
769 if (!errors)
770 return;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
819 int found = 0;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
823 while (node) {
824 found = 1;
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
830 if (!found)
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868 u64 ino, int mod)
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
873 int ret;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
876 if (cache) {
877 node = container_of(cache, struct ptr_node, cache);
878 rec = node->data;
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
882 return node->data;
883 rec->refs--;
884 rec = node->data;
886 } else if (mod) {
887 rec = calloc(1, sizeof(*rec));
888 if (!rec)
889 return ERR_PTR(-ENOMEM);
890 rec->ino = ino;
891 rec->extent_start = (u64)-1;
892 rec->refs = 1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
898 if (!node) {
899 free(rec);
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
904 node->data = rec;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
907 rec->found_link = 1;
909 ret = insert_cache_extent(inode_cache, &node->cache);
910 if (ret)
911 return ERR_PTR(-EEXIST);
913 return rec;
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
924 free(orphan);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
932 if (--rec->refs > 0)
933 return;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
938 free(backref);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
942 free(rec);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949 return 1;
950 return 0;
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
959 u8 filetype;
961 if (!rec->found_inode_item)
962 return;
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
972 free(backref);
977 if (!rec->checked || rec->merging)
978 return;
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1009 free(node);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1018 int ret;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1022 key.offset = ino;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1027 if (ret > 0)
1028 ret = -ENOENT;
1029 return ret;
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043 return 1;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->nodatasum = 1;
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1056 return 0;
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 const char *name,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 break;
1068 if (backref->dir != dir || backref->namelen != namelen)
1069 continue;
1070 if (memcmp(name, backref->name, namelen))
1071 continue;
1072 return backref;
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1076 if (!backref)
1077 return NULL;
1078 memset(backref, 0, sizeof(*backref));
1079 backref->dir = dir;
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1084 return backref;
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1098 BUG_ON(!backref);
1099 if (errors)
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 rec->found_link++;
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 else
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1132 } else {
1133 BUG_ON(1);
1136 maybe_free_inode_rec(inode_cache, rec);
1137 return 0;
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1144 u32 dir_count = 0;
1145 int ret = 0;
1147 dst->merging = 1;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1156 dir_count++;
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180 if (ret < 0)
1181 return ret;
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1191 } else {
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1196 dst->extent_end,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1213 } else {
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1217 dst->merging = 0;
1219 return 0;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1230 int splice = 0;
1231 int ret;
1233 if (--src_node->refs == 0)
1234 splice = 1;
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1240 again:
1241 cache = search_cache_extent(src, 0);
1242 while (cache) {
1243 node = container_of(cache, struct ptr_node, cache);
1244 rec = node->data;
1245 cache = next_cache_extent(cache);
1247 if (splice) {
1248 remove_cache_extent(src, &node->cache);
1249 ins = node;
1250 } else {
1251 ins = malloc(sizeof(*ins));
1252 BUG_ON(!ins);
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1255 ins->data = rec;
1256 rec->refs++;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1263 if (rec->checked) {
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1270 free(ins);
1271 } else {
1272 BUG_ON(ret);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1279 goto again;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1291 return 0;
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1300 rec = node->data;
1301 free_inode_rec(rec);
1302 free(node);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308 u64 bytenr)
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1314 if (cache) {
1315 node = container_of(cache, struct shared_node, cache);
1316 return node;
1318 return NULL;
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 int ret;
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1327 if (!node)
1328 return -ENOMEM;
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1333 node->refs = refs;
1335 ret = insert_cache_extent(shared, &node->cache);
1337 return ret;
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1345 int ret;
1347 if (level == wc->active_node)
1348 return 0;
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1352 if (!node) {
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 BUG_ON(ret);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1358 return 0;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1367 free(node);
1369 return 1;
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1376 free(node);
1378 return 1;
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1386 int i;
1388 if (level == wc->root_level)
1389 return 0;
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392 if (wc->nodes[i])
1393 break;
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1406 } else {
1407 BUG_ON(node->refs < 2);
1408 node->refs--;
1410 return 0;
1414 * Returns:
1415 * < 0 - on error
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422 u64 child_root_id)
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1427 int has_parent = 0;
1428 int ret;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436 0, 0);
1437 if (ret < 0)
1438 return ret;
1439 btrfs_release_path(&path);
1440 if (!ret)
1441 return 1;
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 key.offset = 0;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447 0, 0);
1448 if (ret < 0)
1449 goto out;
1451 while (1) {
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455 if (ret)
1456 break;
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1463 break;
1465 has_parent = 1;
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1469 return 1;
1472 path.slots[0]++;
1474 out:
1475 btrfs_release_path(&path);
1476 if (ret < 0)
1477 return ret;
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct btrfs_root *root,
1482 struct extent_buffer *eb,
1483 int slot, struct btrfs_key *key,
1484 struct shared_node *active_node)
1486 u32 total;
1487 u32 cur = 0;
1488 u32 len;
1489 u32 name_len;
1490 u32 data_len;
1491 int error;
1492 int nritems = 0;
1493 u8 filetype;
1494 struct btrfs_dir_item *di;
1495 struct inode_record *rec;
1496 struct cache_tree *root_cache;
1497 struct cache_tree *inode_cache;
1498 struct btrfs_key location;
1499 char namebuf[BTRFS_NAME_LEN];
1501 root_cache = &active_node->root_cache;
1502 inode_cache = &active_node->inode_cache;
1503 rec = active_node->current;
1504 rec->found_dir_item = 1;
1506 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507 total = btrfs_item_size_nr(eb, slot);
1508 while (cur < total) {
1509 nritems++;
1510 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511 name_len = btrfs_dir_name_len(eb, di);
1512 data_len = btrfs_dir_data_len(eb, di);
1513 filetype = btrfs_dir_type(eb, di);
1515 rec->found_size += name_len;
1516 if (name_len <= BTRFS_NAME_LEN) {
1517 len = name_len;
1518 error = 0;
1519 } else {
1520 len = BTRFS_NAME_LEN;
1521 error = REF_ERR_NAME_TOO_LONG;
1523 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1525 if (location.type == BTRFS_INODE_ITEM_KEY) {
1526 add_inode_backref(inode_cache, location.objectid,
1527 key->objectid, key->offset, namebuf,
1528 len, filetype, key->type, error);
1529 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1530 add_inode_backref(root_cache, location.objectid,
1531 key->objectid, key->offset,
1532 namebuf, len, filetype,
1533 key->type, error);
1534 } else {
1535 fprintf(stderr, "invalid location in dir item %u\n",
1536 location.type);
1537 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1538 key->objectid, key->offset, namebuf,
1539 len, filetype, key->type, error);
1542 len = sizeof(*di) + name_len + data_len;
1543 di = (struct btrfs_dir_item *)((char *)di + len);
1544 cur += len;
1546 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1547 rec->errors |= I_ERR_DUP_DIR_INDEX;
1549 return 0;
1552 static int process_inode_ref(struct extent_buffer *eb,
1553 int slot, struct btrfs_key *key,
1554 struct shared_node *active_node)
1556 u32 total;
1557 u32 cur = 0;
1558 u32 len;
1559 u32 name_len;
1560 u64 index;
1561 int error;
1562 struct cache_tree *inode_cache;
1563 struct btrfs_inode_ref *ref;
1564 char namebuf[BTRFS_NAME_LEN];
1566 inode_cache = &active_node->inode_cache;
1568 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1569 total = btrfs_item_size_nr(eb, slot);
1570 while (cur < total) {
1571 name_len = btrfs_inode_ref_name_len(eb, ref);
1572 index = btrfs_inode_ref_index(eb, ref);
1573 if (name_len <= BTRFS_NAME_LEN) {
1574 len = name_len;
1575 error = 0;
1576 } else {
1577 len = BTRFS_NAME_LEN;
1578 error = REF_ERR_NAME_TOO_LONG;
1580 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1581 add_inode_backref(inode_cache, key->objectid, key->offset,
1582 index, namebuf, len, 0, key->type, error);
1584 len = sizeof(*ref) + name_len;
1585 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1586 cur += len;
1588 return 0;
1591 static int process_inode_extref(struct extent_buffer *eb,
1592 int slot, struct btrfs_key *key,
1593 struct shared_node *active_node)
1595 u32 total;
1596 u32 cur = 0;
1597 u32 len;
1598 u32 name_len;
1599 u64 index;
1600 u64 parent;
1601 int error;
1602 struct cache_tree *inode_cache;
1603 struct btrfs_inode_extref *extref;
1604 char namebuf[BTRFS_NAME_LEN];
1606 inode_cache = &active_node->inode_cache;
1608 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1609 total = btrfs_item_size_nr(eb, slot);
1610 while (cur < total) {
1611 name_len = btrfs_inode_extref_name_len(eb, extref);
1612 index = btrfs_inode_extref_index(eb, extref);
1613 parent = btrfs_inode_extref_parent(eb, extref);
1614 if (name_len <= BTRFS_NAME_LEN) {
1615 len = name_len;
1616 error = 0;
1617 } else {
1618 len = BTRFS_NAME_LEN;
1619 error = REF_ERR_NAME_TOO_LONG;
1621 read_extent_buffer(eb, namebuf,
1622 (unsigned long)(extref + 1), len);
1623 add_inode_backref(inode_cache, key->objectid, parent,
1624 index, namebuf, len, 0, key->type, error);
1626 len = sizeof(*extref) + name_len;
1627 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1628 cur += len;
1630 return 0;
1634 static int count_csum_range(struct btrfs_root *root, u64 start,
1635 u64 len, u64 *found)
1637 struct btrfs_key key;
1638 struct btrfs_path path;
1639 struct extent_buffer *leaf;
1640 int ret;
1641 size_t size;
1642 *found = 0;
1643 u64 csum_end;
1644 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1646 btrfs_init_path(&path);
1648 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649 key.offset = start;
1650 key.type = BTRFS_EXTENT_CSUM_KEY;
1652 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1653 &key, &path, 0, 0);
1654 if (ret < 0)
1655 goto out;
1656 if (ret > 0 && path.slots[0] > 0) {
1657 leaf = path.nodes[0];
1658 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1659 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1660 key.type == BTRFS_EXTENT_CSUM_KEY)
1661 path.slots[0]--;
1664 while (len > 0) {
1665 leaf = path.nodes[0];
1666 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1667 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1668 if (ret > 0)
1669 break;
1670 else if (ret < 0)
1671 goto out;
1672 leaf = path.nodes[0];
1675 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1676 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1677 key.type != BTRFS_EXTENT_CSUM_KEY)
1678 break;
1680 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1681 if (key.offset >= start + len)
1682 break;
1684 if (key.offset > start)
1685 start = key.offset;
1687 size = btrfs_item_size_nr(leaf, path.slots[0]);
1688 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1689 if (csum_end > start) {
1690 size = min(csum_end - start, len);
1691 len -= size;
1692 start += size;
1693 *found += size;
1696 path.slots[0]++;
1698 out:
1699 btrfs_release_path(&path);
1700 if (ret < 0)
1701 return ret;
1702 return 0;
1705 static int process_file_extent(struct btrfs_root *root,
1706 struct extent_buffer *eb,
1707 int slot, struct btrfs_key *key,
1708 struct shared_node *active_node)
1710 struct inode_record *rec;
1711 struct btrfs_file_extent_item *fi;
1712 u64 num_bytes = 0;
1713 u64 disk_bytenr = 0;
1714 u64 extent_offset = 0;
1715 u64 mask = root->sectorsize - 1;
1716 int extent_type;
1717 int ret;
1719 rec = active_node->current;
1720 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1721 rec->found_file_extent = 1;
1723 if (rec->extent_start == (u64)-1) {
1724 rec->extent_start = key->offset;
1725 rec->extent_end = key->offset;
1728 if (rec->extent_end > key->offset)
1729 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1730 else if (rec->extent_end < key->offset) {
1731 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1732 key->offset - rec->extent_end);
1733 if (ret < 0)
1734 return ret;
1737 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1738 extent_type = btrfs_file_extent_type(eb, fi);
1740 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1741 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742 if (num_bytes == 0)
1743 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1744 rec->found_size += num_bytes;
1745 num_bytes = (num_bytes + mask) & ~mask;
1746 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1747 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1748 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1749 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1750 extent_offset = btrfs_file_extent_offset(eb, fi);
1751 if (num_bytes == 0 || (num_bytes & mask))
1752 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1753 if (num_bytes + extent_offset >
1754 btrfs_file_extent_ram_bytes(eb, fi))
1755 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1756 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1757 (btrfs_file_extent_compression(eb, fi) ||
1758 btrfs_file_extent_encryption(eb, fi) ||
1759 btrfs_file_extent_other_encoding(eb, fi)))
1760 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1761 if (disk_bytenr > 0)
1762 rec->found_size += num_bytes;
1763 } else {
1764 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766 rec->extent_end = key->offset + num_bytes;
1769 * The data reloc tree will copy full extents into its inode and then
1770 * copy the corresponding csums. Because the extent it copied could be
1771 * a preallocated extent that hasn't been written to yet there may be no
1772 * csums to copy, ergo we won't have csums for our file extent. This is
1773 * ok so just don't bother checking csums if the inode belongs to the
1774 * data reloc tree.
1776 if (disk_bytenr > 0 &&
1777 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778 u64 found;
1779 if (btrfs_file_extent_compression(eb, fi))
1780 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781 else
1782 disk_bytenr += extent_offset;
1784 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1785 if (ret < 0)
1786 return ret;
1787 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788 if (found > 0)
1789 rec->found_csum_item = 1;
1790 if (found < num_bytes)
1791 rec->some_csum_missing = 1;
1792 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793 if (found > 0)
1794 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1797 return 0;
1800 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1801 struct walk_control *wc)
1803 struct btrfs_key key;
1804 u32 nritems;
1805 int i;
1806 int ret = 0;
1807 struct cache_tree *inode_cache;
1808 struct shared_node *active_node;
1810 if (wc->root_level == wc->active_node &&
1811 btrfs_root_refs(&root->root_item) == 0)
1812 return 0;
1814 active_node = wc->nodes[wc->active_node];
1815 inode_cache = &active_node->inode_cache;
1816 nritems = btrfs_header_nritems(eb);
1817 for (i = 0; i < nritems; i++) {
1818 btrfs_item_key_to_cpu(eb, &key, i);
1820 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821 continue;
1822 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1823 continue;
1825 if (active_node->current == NULL ||
1826 active_node->current->ino < key.objectid) {
1827 if (active_node->current) {
1828 active_node->current->checked = 1;
1829 maybe_free_inode_rec(inode_cache,
1830 active_node->current);
1832 active_node->current = get_inode_rec(inode_cache,
1833 key.objectid, 1);
1834 BUG_ON(IS_ERR(active_node->current));
1836 switch (key.type) {
1837 case BTRFS_DIR_ITEM_KEY:
1838 case BTRFS_DIR_INDEX_KEY:
1839 ret = process_dir_item(root, eb, i, &key, active_node);
1840 break;
1841 case BTRFS_INODE_REF_KEY:
1842 ret = process_inode_ref(eb, i, &key, active_node);
1843 break;
1844 case BTRFS_INODE_EXTREF_KEY:
1845 ret = process_inode_extref(eb, i, &key, active_node);
1846 break;
1847 case BTRFS_INODE_ITEM_KEY:
1848 ret = process_inode_item(eb, i, &key, active_node);
1849 break;
1850 case BTRFS_EXTENT_DATA_KEY:
1851 ret = process_file_extent(root, eb, i, &key,
1852 active_node);
1853 break;
1854 default:
1855 break;
1858 return ret;
1861 struct node_refs {
1862 u64 bytenr[BTRFS_MAX_LEVEL];
1863 u64 refs[BTRFS_MAX_LEVEL];
1864 int need_check[BTRFS_MAX_LEVEL];
1867 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1868 struct node_refs *nrefs, u64 level);
1869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1870 unsigned int ext_ref);
1872 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1873 struct node_refs *nrefs, int *level, int ext_ref)
1875 struct extent_buffer *cur = path->nodes[0];
1876 struct btrfs_key key;
1877 u64 cur_bytenr;
1878 u32 nritems;
1879 u64 first_ino = 0;
1880 int root_level = btrfs_header_level(root->node);
1881 int i;
1882 int ret = 0; /* Final return value */
1883 int err = 0; /* Positive error bitmap */
1885 cur_bytenr = cur->start;
1887 /* skip to first inode item or the first inode number change */
1888 nritems = btrfs_header_nritems(cur);
1889 for (i = 0; i < nritems; i++) {
1890 btrfs_item_key_to_cpu(cur, &key, i);
1891 if (i == 0)
1892 first_ino = key.objectid;
1893 if (key.type == BTRFS_INODE_ITEM_KEY ||
1894 (first_ino && first_ino != key.objectid))
1895 break;
1897 if (i == nritems) {
1898 path->slots[0] = nritems;
1899 return 0;
1901 path->slots[0] = i;
1903 again:
1904 err |= check_inode_item(root, path, ext_ref);
1906 if (err & LAST_ITEM)
1907 goto out;
1909 /* still have inode items in thie leaf */
1910 if (cur->start == cur_bytenr)
1911 goto again;
1914 * we have switched to another leaf, above nodes may
1915 * have changed, here walk down the path, if a node
1916 * or leaf is shared, check whether we can skip this
1917 * node or leaf.
1919 for (i = root_level; i >= 0; i--) {
1920 if (path->nodes[i]->start == nrefs->bytenr[i])
1921 continue;
1923 ret = update_nodes_refs(root,
1924 path->nodes[i]->start,
1925 nrefs, i);
1926 if (ret)
1927 goto out;
1929 if (!nrefs->need_check[i]) {
1930 *level += 1;
1931 break;
1935 for (i = 0; i < *level; i++) {
1936 free_extent_buffer(path->nodes[i]);
1937 path->nodes[i] = NULL;
1939 out:
1940 err &= ~LAST_ITEM;
1942 * Convert any error bitmap to -EIO, as we should avoid
1943 * mixing positive and negative return value to represent
1944 * error
1946 if (err && !ret)
1947 ret = -EIO;
1948 return ret;
1951 static void reada_walk_down(struct btrfs_root *root,
1952 struct extent_buffer *node, int slot)
1954 u64 bytenr;
1955 u64 ptr_gen;
1956 u32 nritems;
1957 u32 blocksize;
1958 int i;
1959 int level;
1961 level = btrfs_header_level(node);
1962 if (level != 1)
1963 return;
1965 nritems = btrfs_header_nritems(node);
1966 blocksize = root->nodesize;
1967 for (i = slot; i < nritems; i++) {
1968 bytenr = btrfs_node_blockptr(node, i);
1969 ptr_gen = btrfs_node_ptr_generation(node, i);
1970 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1975 * Check the child node/leaf by the following condition:
1976 * 1. the first item key of the node/leaf should be the same with the one
1977 * in parent.
1978 * 2. block in parent node should match the child node/leaf.
1979 * 3. generation of parent node and child's header should be consistent.
1981 * Or the child node/leaf pointed by the key in parent is not valid.
1983 * We hope to check leaf owner too, but since subvol may share leaves,
1984 * which makes leaf owner check not so strong, key check should be
1985 * sufficient enough for that case.
1987 static int check_child_node(struct btrfs_root *root,
1988 struct extent_buffer *parent, int slot,
1989 struct extent_buffer *child)
1991 struct btrfs_key parent_key;
1992 struct btrfs_key child_key;
1993 int ret = 0;
1995 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1996 if (btrfs_header_level(child) == 0)
1997 btrfs_item_key_to_cpu(child, &child_key, 0);
1998 else
1999 btrfs_node_key_to_cpu(child, &child_key, 0);
2001 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002 ret = -EINVAL;
2003 fprintf(stderr,
2004 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2005 parent_key.objectid, parent_key.type, parent_key.offset,
2006 child_key.objectid, child_key.type, child_key.offset);
2008 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2009 ret = -EINVAL;
2010 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2011 btrfs_node_blockptr(parent, slot),
2012 btrfs_header_bytenr(child));
2014 if (btrfs_node_ptr_generation(parent, slot) !=
2015 btrfs_header_generation(child)) {
2016 ret = -EINVAL;
2017 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2018 btrfs_header_generation(child),
2019 btrfs_node_ptr_generation(parent, slot));
2021 return ret;
2025 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2026 * in every fs or file tree check. Here we find its all root ids, and only check
2027 * it in the fs or file tree which has the smallest root id.
2029 static int need_check(struct btrfs_root *root, struct ulist *roots)
2031 struct rb_node *node;
2032 struct ulist_node *u;
2034 if (roots->nnodes == 1)
2035 return 1;
2037 node = rb_first(&roots->root);
2038 u = rb_entry(node, struct ulist_node, rb_node);
2040 * current root id is not smallest, we skip it and let it be checked
2041 * in the fs or file tree who hash the smallest root id.
2043 if (root->objectid != u->val)
2044 return 0;
2046 return 1;
2050 * for a tree node or leaf, we record its reference count, so later if we still
2051 * process this node or leaf, don't need to compute its reference count again.
2053 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2054 struct node_refs *nrefs, u64 level)
2056 int check, ret;
2057 u64 refs;
2058 struct ulist *roots;
2060 if (nrefs->bytenr[level] != bytenr) {
2061 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2062 level, 1, &refs, NULL);
2063 if (ret < 0)
2064 return ret;
2066 nrefs->bytenr[level] = bytenr;
2067 nrefs->refs[level] = refs;
2068 if (refs > 1) {
2069 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2070 0, &roots);
2071 if (ret)
2072 return -EIO;
2074 check = need_check(root, roots);
2075 ulist_free(roots);
2076 nrefs->need_check[level] = check;
2077 } else {
2078 nrefs->need_check[level] = 1;
2082 return 0;
2085 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2086 struct walk_control *wc, int *level,
2087 struct node_refs *nrefs)
2089 enum btrfs_tree_block_status status;
2090 u64 bytenr;
2091 u64 ptr_gen;
2092 struct extent_buffer *next;
2093 struct extent_buffer *cur;
2094 u32 blocksize;
2095 int ret, err = 0;
2096 u64 refs;
2098 WARN_ON(*level < 0);
2099 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2101 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2102 refs = nrefs->refs[*level];
2103 ret = 0;
2104 } else {
2105 ret = btrfs_lookup_extent_info(NULL, root,
2106 path->nodes[*level]->start,
2107 *level, 1, &refs, NULL);
2108 if (ret < 0) {
2109 err = ret;
2110 goto out;
2112 nrefs->bytenr[*level] = path->nodes[*level]->start;
2113 nrefs->refs[*level] = refs;
2116 if (refs > 1) {
2117 ret = enter_shared_node(root, path->nodes[*level]->start,
2118 refs, wc, *level);
2119 if (ret > 0) {
2120 err = ret;
2121 goto out;
2125 while (*level >= 0) {
2126 WARN_ON(*level < 0);
2127 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2128 cur = path->nodes[*level];
2130 if (btrfs_header_level(cur) != *level)
2131 WARN_ON(1);
2133 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134 break;
2135 if (*level == 0) {
2136 ret = process_one_leaf(root, cur, wc);
2137 if (ret < 0)
2138 err = ret;
2139 break;
2141 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2142 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2143 blocksize = root->nodesize;
2145 if (bytenr == nrefs->bytenr[*level - 1]) {
2146 refs = nrefs->refs[*level - 1];
2147 } else {
2148 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2149 *level - 1, 1, &refs, NULL);
2150 if (ret < 0) {
2151 refs = 0;
2152 } else {
2153 nrefs->bytenr[*level - 1] = bytenr;
2154 nrefs->refs[*level - 1] = refs;
2158 if (refs > 1) {
2159 ret = enter_shared_node(root, bytenr, refs,
2160 wc, *level - 1);
2161 if (ret > 0) {
2162 path->slots[*level]++;
2163 continue;
2167 next = btrfs_find_tree_block(root, bytenr, blocksize);
2168 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2169 free_extent_buffer(next);
2170 reada_walk_down(root, cur, path->slots[*level]);
2171 next = read_tree_block(root, bytenr, blocksize,
2172 ptr_gen);
2173 if (!extent_buffer_uptodate(next)) {
2174 struct btrfs_key node_key;
2176 btrfs_node_key_to_cpu(path->nodes[*level],
2177 &node_key,
2178 path->slots[*level]);
2179 btrfs_add_corrupt_extent_record(root->fs_info,
2180 &node_key,
2181 path->nodes[*level]->start,
2182 root->nodesize, *level);
2183 err = -EIO;
2184 goto out;
2188 ret = check_child_node(root, cur, path->slots[*level], next);
2189 if (ret) {
2190 err = ret;
2191 goto out;
2194 if (btrfs_is_leaf(next))
2195 status = btrfs_check_leaf(root, NULL, next);
2196 else
2197 status = btrfs_check_node(root, NULL, next);
2198 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2199 free_extent_buffer(next);
2200 err = -EIO;
2201 goto out;
2204 *level = *level - 1;
2205 free_extent_buffer(path->nodes[*level]);
2206 path->nodes[*level] = next;
2207 path->slots[*level] = 0;
2209 out:
2210 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2211 return err;
2214 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2215 unsigned int ext_ref);
2217 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2218 int *level, struct node_refs *nrefs, int ext_ref)
2220 enum btrfs_tree_block_status status;
2221 u64 bytenr;
2222 u64 ptr_gen;
2223 struct extent_buffer *next;
2224 struct extent_buffer *cur;
2225 u32 blocksize;
2226 int ret;
2228 WARN_ON(*level < 0);
2229 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2231 ret = update_nodes_refs(root, path->nodes[*level]->start,
2232 nrefs, *level);
2233 if (ret < 0)
2234 return ret;
2236 while (*level >= 0) {
2237 WARN_ON(*level < 0);
2238 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2239 cur = path->nodes[*level];
2241 if (btrfs_header_level(cur) != *level)
2242 WARN_ON(1);
2244 if (path->slots[*level] >= btrfs_header_nritems(cur))
2245 break;
2246 /* Don't forgot to check leaf/node validation */
2247 if (*level == 0) {
2248 ret = btrfs_check_leaf(root, NULL, cur);
2249 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2250 ret = -EIO;
2251 break;
2253 ret = process_one_leaf_v2(root, path, nrefs,
2254 level, ext_ref);
2255 break;
2256 } else {
2257 ret = btrfs_check_node(root, NULL, cur);
2258 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2259 ret = -EIO;
2260 break;
2263 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2264 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2265 blocksize = root->nodesize;
2267 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2268 if (ret)
2269 break;
2270 if (!nrefs->need_check[*level - 1]) {
2271 path->slots[*level]++;
2272 continue;
2275 next = btrfs_find_tree_block(root, bytenr, blocksize);
2276 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2277 free_extent_buffer(next);
2278 reada_walk_down(root, cur, path->slots[*level]);
2279 next = read_tree_block(root, bytenr, blocksize,
2280 ptr_gen);
2281 if (!extent_buffer_uptodate(next)) {
2282 struct btrfs_key node_key;
2284 btrfs_node_key_to_cpu(path->nodes[*level],
2285 &node_key,
2286 path->slots[*level]);
2287 btrfs_add_corrupt_extent_record(root->fs_info,
2288 &node_key,
2289 path->nodes[*level]->start,
2290 root->nodesize, *level);
2291 ret = -EIO;
2292 break;
2296 ret = check_child_node(root, cur, path->slots[*level], next);
2297 if (ret < 0)
2298 break;
2300 if (btrfs_is_leaf(next))
2301 status = btrfs_check_leaf(root, NULL, next);
2302 else
2303 status = btrfs_check_node(root, NULL, next);
2304 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2305 free_extent_buffer(next);
2306 ret = -EIO;
2307 break;
2310 *level = *level - 1;
2311 free_extent_buffer(path->nodes[*level]);
2312 path->nodes[*level] = next;
2313 path->slots[*level] = 0;
2315 return ret;
2318 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2319 struct walk_control *wc, int *level)
2321 int i;
2322 struct extent_buffer *leaf;
2324 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2325 leaf = path->nodes[i];
2326 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2327 path->slots[i]++;
2328 *level = i;
2329 return 0;
2330 } else {
2331 free_extent_buffer(path->nodes[*level]);
2332 path->nodes[*level] = NULL;
2333 BUG_ON(*level > wc->active_node);
2334 if (*level == wc->active_node)
2335 leave_shared_node(root, wc, *level);
2336 *level = i + 1;
2339 return 1;
2342 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2343 int *level)
2345 int i;
2346 struct extent_buffer *leaf;
2348 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349 leaf = path->nodes[i];
2350 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2351 path->slots[i]++;
2352 *level = i;
2353 return 0;
2354 } else {
2355 free_extent_buffer(path->nodes[*level]);
2356 path->nodes[*level] = NULL;
2357 *level = i + 1;
2360 return 1;
2363 static int check_root_dir(struct inode_record *rec)
2365 struct inode_backref *backref;
2366 int ret = -1;
2368 if (!rec->found_inode_item || rec->errors)
2369 goto out;
2370 if (rec->nlink != 1 || rec->found_link != 0)
2371 goto out;
2372 if (list_empty(&rec->backrefs))
2373 goto out;
2374 backref = to_inode_backref(rec->backrefs.next);
2375 if (!backref->found_inode_ref)
2376 goto out;
2377 if (backref->index != 0 || backref->namelen != 2 ||
2378 memcmp(backref->name, "..", 2))
2379 goto out;
2380 if (backref->found_dir_index || backref->found_dir_item)
2381 goto out;
2382 ret = 0;
2383 out:
2384 return ret;
2387 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2388 struct btrfs_root *root, struct btrfs_path *path,
2389 struct inode_record *rec)
2391 struct btrfs_inode_item *ei;
2392 struct btrfs_key key;
2393 int ret;
2395 key.objectid = rec->ino;
2396 key.type = BTRFS_INODE_ITEM_KEY;
2397 key.offset = (u64)-1;
2399 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2400 if (ret < 0)
2401 goto out;
2402 if (ret) {
2403 if (!path->slots[0]) {
2404 ret = -ENOENT;
2405 goto out;
2407 path->slots[0]--;
2408 ret = 0;
2410 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2411 if (key.objectid != rec->ino) {
2412 ret = -ENOENT;
2413 goto out;
2416 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2417 struct btrfs_inode_item);
2418 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2419 btrfs_mark_buffer_dirty(path->nodes[0]);
2420 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2421 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2422 root->root_key.objectid);
2423 out:
2424 btrfs_release_path(path);
2425 return ret;
2428 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2429 struct btrfs_root *root,
2430 struct btrfs_path *path,
2431 struct inode_record *rec)
2433 int ret;
2435 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2436 btrfs_release_path(path);
2437 if (!ret)
2438 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2439 return ret;
2442 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2443 struct btrfs_root *root,
2444 struct btrfs_path *path,
2445 struct inode_record *rec)
2447 struct btrfs_inode_item *ei;
2448 struct btrfs_key key;
2449 int ret = 0;
2451 key.objectid = rec->ino;
2452 key.type = BTRFS_INODE_ITEM_KEY;
2453 key.offset = 0;
2455 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2456 if (ret) {
2457 if (ret > 0)
2458 ret = -ENOENT;
2459 goto out;
2462 /* Since ret == 0, no need to check anything */
2463 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2464 struct btrfs_inode_item);
2465 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2466 btrfs_mark_buffer_dirty(path->nodes[0]);
2467 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2468 printf("reset nbytes for ino %llu root %llu\n",
2469 rec->ino, root->root_key.objectid);
2470 out:
2471 btrfs_release_path(path);
2472 return ret;
2475 static int add_missing_dir_index(struct btrfs_root *root,
2476 struct cache_tree *inode_cache,
2477 struct inode_record *rec,
2478 struct inode_backref *backref)
2480 struct btrfs_path path;
2481 struct btrfs_trans_handle *trans;
2482 struct btrfs_dir_item *dir_item;
2483 struct extent_buffer *leaf;
2484 struct btrfs_key key;
2485 struct btrfs_disk_key disk_key;
2486 struct inode_record *dir_rec;
2487 unsigned long name_ptr;
2488 u32 data_size = sizeof(*dir_item) + backref->namelen;
2489 int ret;
2491 trans = btrfs_start_transaction(root, 1);
2492 if (IS_ERR(trans))
2493 return PTR_ERR(trans);
2495 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2496 (unsigned long long)rec->ino);
2498 btrfs_init_path(&path);
2499 key.objectid = backref->dir;
2500 key.type = BTRFS_DIR_INDEX_KEY;
2501 key.offset = backref->index;
2502 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2503 BUG_ON(ret);
2505 leaf = path.nodes[0];
2506 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2508 disk_key.objectid = cpu_to_le64(rec->ino);
2509 disk_key.type = BTRFS_INODE_ITEM_KEY;
2510 disk_key.offset = 0;
2512 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2513 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2514 btrfs_set_dir_data_len(leaf, dir_item, 0);
2515 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2516 name_ptr = (unsigned long)(dir_item + 1);
2517 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2518 btrfs_mark_buffer_dirty(leaf);
2519 btrfs_release_path(&path);
2520 btrfs_commit_transaction(trans, root);
2522 backref->found_dir_index = 1;
2523 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2524 BUG_ON(IS_ERR(dir_rec));
2525 if (!dir_rec)
2526 return 0;
2527 dir_rec->found_size += backref->namelen;
2528 if (dir_rec->found_size == dir_rec->isize &&
2529 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2530 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2531 if (dir_rec->found_size != dir_rec->isize)
2532 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2534 return 0;
2537 static int delete_dir_index(struct btrfs_root *root,
2538 struct cache_tree *inode_cache,
2539 struct inode_record *rec,
2540 struct inode_backref *backref)
2542 struct btrfs_trans_handle *trans;
2543 struct btrfs_dir_item *di;
2544 struct btrfs_path path;
2545 int ret = 0;
2547 trans = btrfs_start_transaction(root, 1);
2548 if (IS_ERR(trans))
2549 return PTR_ERR(trans);
2551 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2552 (unsigned long long)backref->dir,
2553 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2554 (unsigned long long)root->objectid);
2556 btrfs_init_path(&path);
2557 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2558 backref->name, backref->namelen,
2559 backref->index, -1);
2560 if (IS_ERR(di)) {
2561 ret = PTR_ERR(di);
2562 btrfs_release_path(&path);
2563 btrfs_commit_transaction(trans, root);
2564 if (ret == -ENOENT)
2565 return 0;
2566 return ret;
2569 if (!di)
2570 ret = btrfs_del_item(trans, root, &path);
2571 else
2572 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2573 BUG_ON(ret);
2574 btrfs_release_path(&path);
2575 btrfs_commit_transaction(trans, root);
2576 return ret;
2579 static int create_inode_item(struct btrfs_root *root,
2580 struct inode_record *rec,
2581 struct inode_backref *backref, int root_dir)
2583 struct btrfs_trans_handle *trans;
2584 struct btrfs_inode_item inode_item;
2585 time_t now = time(NULL);
2586 int ret;
2588 trans = btrfs_start_transaction(root, 1);
2589 if (IS_ERR(trans)) {
2590 ret = PTR_ERR(trans);
2591 return ret;
2594 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2595 "be incomplete, please check permissions and content after "
2596 "the fsck completes.\n", (unsigned long long)root->objectid,
2597 (unsigned long long)rec->ino);
2599 memset(&inode_item, 0, sizeof(inode_item));
2600 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2601 if (root_dir)
2602 btrfs_set_stack_inode_nlink(&inode_item, 1);
2603 else
2604 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2605 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2606 if (rec->found_dir_item) {
2607 if (rec->found_file_extent)
2608 fprintf(stderr, "root %llu inode %llu has both a dir "
2609 "item and extents, unsure if it is a dir or a "
2610 "regular file so setting it as a directory\n",
2611 (unsigned long long)root->objectid,
2612 (unsigned long long)rec->ino);
2613 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2614 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2615 } else if (!rec->found_dir_item) {
2616 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2617 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2619 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2620 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2621 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2622 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2623 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2624 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2625 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2626 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2628 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2629 BUG_ON(ret);
2630 btrfs_commit_transaction(trans, root);
2631 return 0;
2634 static int repair_inode_backrefs(struct btrfs_root *root,
2635 struct inode_record *rec,
2636 struct cache_tree *inode_cache,
2637 int delete)
2639 struct inode_backref *tmp, *backref;
2640 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2641 int ret = 0;
2642 int repaired = 0;
2644 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2645 if (!delete && rec->ino == root_dirid) {
2646 if (!rec->found_inode_item) {
2647 ret = create_inode_item(root, rec, backref, 1);
2648 if (ret)
2649 break;
2650 repaired++;
2654 /* Index 0 for root dir's are special, don't mess with it */
2655 if (rec->ino == root_dirid && backref->index == 0)
2656 continue;
2658 if (delete &&
2659 ((backref->found_dir_index && !backref->found_inode_ref) ||
2660 (backref->found_dir_index && backref->found_inode_ref &&
2661 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2662 ret = delete_dir_index(root, inode_cache, rec, backref);
2663 if (ret)
2664 break;
2665 repaired++;
2666 list_del(&backref->list);
2667 free(backref);
2670 if (!delete && !backref->found_dir_index &&
2671 backref->found_dir_item && backref->found_inode_ref) {
2672 ret = add_missing_dir_index(root, inode_cache, rec,
2673 backref);
2674 if (ret)
2675 break;
2676 repaired++;
2677 if (backref->found_dir_item &&
2678 backref->found_dir_index &&
2679 backref->found_dir_index) {
2680 if (!backref->errors &&
2681 backref->found_inode_ref) {
2682 list_del(&backref->list);
2683 free(backref);
2688 if (!delete && (!backref->found_dir_index &&
2689 !backref->found_dir_item &&
2690 backref->found_inode_ref)) {
2691 struct btrfs_trans_handle *trans;
2692 struct btrfs_key location;
2694 ret = check_dir_conflict(root, backref->name,
2695 backref->namelen,
2696 backref->dir,
2697 backref->index);
2698 if (ret) {
2700 * let nlink fixing routine to handle it,
2701 * which can do it better.
2703 ret = 0;
2704 break;
2706 location.objectid = rec->ino;
2707 location.type = BTRFS_INODE_ITEM_KEY;
2708 location.offset = 0;
2710 trans = btrfs_start_transaction(root, 1);
2711 if (IS_ERR(trans)) {
2712 ret = PTR_ERR(trans);
2713 break;
2715 fprintf(stderr, "adding missing dir index/item pair "
2716 "for inode %llu\n",
2717 (unsigned long long)rec->ino);
2718 ret = btrfs_insert_dir_item(trans, root, backref->name,
2719 backref->namelen,
2720 backref->dir, &location,
2721 imode_to_type(rec->imode),
2722 backref->index);
2723 BUG_ON(ret);
2724 btrfs_commit_transaction(trans, root);
2725 repaired++;
2728 if (!delete && (backref->found_inode_ref &&
2729 backref->found_dir_index &&
2730 backref->found_dir_item &&
2731 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2732 !rec->found_inode_item)) {
2733 ret = create_inode_item(root, rec, backref, 0);
2734 if (ret)
2735 break;
2736 repaired++;
2740 return ret ? ret : repaired;
2744 * To determine the file type for nlink/inode_item repair
2746 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2747 * Return -ENOENT if file type is not found.
2749 static int find_file_type(struct inode_record *rec, u8 *type)
2751 struct inode_backref *backref;
2753 /* For inode item recovered case */
2754 if (rec->found_inode_item) {
2755 *type = imode_to_type(rec->imode);
2756 return 0;
2759 list_for_each_entry(backref, &rec->backrefs, list) {
2760 if (backref->found_dir_index || backref->found_dir_item) {
2761 *type = backref->filetype;
2762 return 0;
2765 return -ENOENT;
2769 * To determine the file name for nlink repair
2771 * Return 0 if file name is found, set name and namelen.
2772 * Return -ENOENT if file name is not found.
2774 static int find_file_name(struct inode_record *rec,
2775 char *name, int *namelen)
2777 struct inode_backref *backref;
2779 list_for_each_entry(backref, &rec->backrefs, list) {
2780 if (backref->found_dir_index || backref->found_dir_item ||
2781 backref->found_inode_ref) {
2782 memcpy(name, backref->name, backref->namelen);
2783 *namelen = backref->namelen;
2784 return 0;
2787 return -ENOENT;
2790 /* Reset the nlink of the inode to the correct one */
2791 static int reset_nlink(struct btrfs_trans_handle *trans,
2792 struct btrfs_root *root,
2793 struct btrfs_path *path,
2794 struct inode_record *rec)
2796 struct inode_backref *backref;
2797 struct inode_backref *tmp;
2798 struct btrfs_key key;
2799 struct btrfs_inode_item *inode_item;
2800 int ret = 0;
2802 /* We don't believe this either, reset it and iterate backref */
2803 rec->found_link = 0;
2805 /* Remove all backref including the valid ones */
2806 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2807 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2808 backref->index, backref->name,
2809 backref->namelen, 0);
2810 if (ret < 0)
2811 goto out;
2813 /* remove invalid backref, so it won't be added back */
2814 if (!(backref->found_dir_index &&
2815 backref->found_dir_item &&
2816 backref->found_inode_ref)) {
2817 list_del(&backref->list);
2818 free(backref);
2819 } else {
2820 rec->found_link++;
2824 /* Set nlink to 0 */
2825 key.objectid = rec->ino;
2826 key.type = BTRFS_INODE_ITEM_KEY;
2827 key.offset = 0;
2828 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2829 if (ret < 0)
2830 goto out;
2831 if (ret > 0) {
2832 ret = -ENOENT;
2833 goto out;
2835 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2836 struct btrfs_inode_item);
2837 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2838 btrfs_mark_buffer_dirty(path->nodes[0]);
2839 btrfs_release_path(path);
2842 * Add back valid inode_ref/dir_item/dir_index,
2843 * add_link() will handle the nlink inc, so new nlink must be correct
2845 list_for_each_entry(backref, &rec->backrefs, list) {
2846 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2847 backref->name, backref->namelen,
2848 backref->filetype, &backref->index, 1);
2849 if (ret < 0)
2850 goto out;
2852 out:
2853 btrfs_release_path(path);
2854 return ret;
2857 static int get_highest_inode(struct btrfs_trans_handle *trans,
2858 struct btrfs_root *root,
2859 struct btrfs_path *path,
2860 u64 *highest_ino)
2862 struct btrfs_key key, found_key;
2863 int ret;
2865 btrfs_init_path(path);
2866 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2867 key.offset = -1;
2868 key.type = BTRFS_INODE_ITEM_KEY;
2869 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2870 if (ret == 1) {
2871 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2872 path->slots[0] - 1);
2873 *highest_ino = found_key.objectid;
2874 ret = 0;
2876 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2877 ret = -EOVERFLOW;
2878 btrfs_release_path(path);
2879 return ret;
2882 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2883 struct btrfs_root *root,
2884 struct btrfs_path *path,
2885 struct inode_record *rec)
2887 char *dir_name = "lost+found";
2888 char namebuf[BTRFS_NAME_LEN] = {0};
2889 u64 lost_found_ino;
2890 u32 mode = 0700;
2891 u8 type = 0;
2892 int namelen = 0;
2893 int name_recovered = 0;
2894 int type_recovered = 0;
2895 int ret = 0;
2898 * Get file name and type first before these invalid inode ref
2899 * are deleted by remove_all_invalid_backref()
2901 name_recovered = !find_file_name(rec, namebuf, &namelen);
2902 type_recovered = !find_file_type(rec, &type);
2904 if (!name_recovered) {
2905 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2906 rec->ino, rec->ino);
2907 namelen = count_digits(rec->ino);
2908 sprintf(namebuf, "%llu", rec->ino);
2909 name_recovered = 1;
2911 if (!type_recovered) {
2912 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2913 rec->ino);
2914 type = BTRFS_FT_REG_FILE;
2915 type_recovered = 1;
2918 ret = reset_nlink(trans, root, path, rec);
2919 if (ret < 0) {
2920 fprintf(stderr,
2921 "Failed to reset nlink for inode %llu: %s\n",
2922 rec->ino, strerror(-ret));
2923 goto out;
2926 if (rec->found_link == 0) {
2927 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2928 if (ret < 0)
2929 goto out;
2930 lost_found_ino++;
2931 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2932 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2933 mode);
2934 if (ret < 0) {
2935 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2936 dir_name, strerror(-ret));
2937 goto out;
2939 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2940 namebuf, namelen, type, NULL, 1);
2942 * Add ".INO" suffix several times to handle case where
2943 * "FILENAME.INO" is already taken by another file.
2945 while (ret == -EEXIST) {
2947 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2949 if (namelen + count_digits(rec->ino) + 1 >
2950 BTRFS_NAME_LEN) {
2951 ret = -EFBIG;
2952 goto out;
2954 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2955 ".%llu", rec->ino);
2956 namelen += count_digits(rec->ino) + 1;
2957 ret = btrfs_add_link(trans, root, rec->ino,
2958 lost_found_ino, namebuf,
2959 namelen, type, NULL, 1);
2961 if (ret < 0) {
2962 fprintf(stderr,
2963 "Failed to link the inode %llu to %s dir: %s\n",
2964 rec->ino, dir_name, strerror(-ret));
2965 goto out;
2968 * Just increase the found_link, don't actually add the
2969 * backref. This will make things easier and this inode
2970 * record will be freed after the repair is done.
2971 * So fsck will not report problem about this inode.
2973 rec->found_link++;
2974 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2975 namelen, namebuf, dir_name);
2977 printf("Fixed the nlink of inode %llu\n", rec->ino);
2978 out:
2980 * Clear the flag anyway, or we will loop forever for the same inode
2981 * as it will not be removed from the bad inode list and the dead loop
2982 * happens.
2984 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2985 btrfs_release_path(path);
2986 return ret;
2990 * Check if there is any normal(reg or prealloc) file extent for given
2991 * ino.
2992 * This is used to determine the file type when neither its dir_index/item or
2993 * inode_item exists.
2995 * This will *NOT* report error, if any error happens, just consider it does
2996 * not have any normal file extent.
2998 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3000 struct btrfs_path path;
3001 struct btrfs_key key;
3002 struct btrfs_key found_key;
3003 struct btrfs_file_extent_item *fi;
3004 u8 type;
3005 int ret = 0;
3007 btrfs_init_path(&path);
3008 key.objectid = ino;
3009 key.type = BTRFS_EXTENT_DATA_KEY;
3010 key.offset = 0;
3012 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3013 if (ret < 0) {
3014 ret = 0;
3015 goto out;
3017 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3018 ret = btrfs_next_leaf(root, &path);
3019 if (ret) {
3020 ret = 0;
3021 goto out;
3024 while (1) {
3025 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3026 path.slots[0]);
3027 if (found_key.objectid != ino ||
3028 found_key.type != BTRFS_EXTENT_DATA_KEY)
3029 break;
3030 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3031 struct btrfs_file_extent_item);
3032 type = btrfs_file_extent_type(path.nodes[0], fi);
3033 if (type != BTRFS_FILE_EXTENT_INLINE) {
3034 ret = 1;
3035 goto out;
3038 out:
3039 btrfs_release_path(&path);
3040 return ret;
3043 static u32 btrfs_type_to_imode(u8 type)
3045 static u32 imode_by_btrfs_type[] = {
3046 [BTRFS_FT_REG_FILE] = S_IFREG,
3047 [BTRFS_FT_DIR] = S_IFDIR,
3048 [BTRFS_FT_CHRDEV] = S_IFCHR,
3049 [BTRFS_FT_BLKDEV] = S_IFBLK,
3050 [BTRFS_FT_FIFO] = S_IFIFO,
3051 [BTRFS_FT_SOCK] = S_IFSOCK,
3052 [BTRFS_FT_SYMLINK] = S_IFLNK,
3055 return imode_by_btrfs_type[(type)];
3058 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3059 struct btrfs_root *root,
3060 struct btrfs_path *path,
3061 struct inode_record *rec)
3063 u8 filetype;
3064 u32 mode = 0700;
3065 int type_recovered = 0;
3066 int ret = 0;
3068 printf("Trying to rebuild inode:%llu\n", rec->ino);
3070 type_recovered = !find_file_type(rec, &filetype);
3073 * Try to determine inode type if type not found.
3075 * For found regular file extent, it must be FILE.
3076 * For found dir_item/index, it must be DIR.
3078 * For undetermined one, use FILE as fallback.
3080 * TODO:
3081 * 1. If found backref(inode_index/item is already handled) to it,
3082 * it must be DIR.
3083 * Need new inode-inode ref structure to allow search for that.
3085 if (!type_recovered) {
3086 if (rec->found_file_extent &&
3087 find_normal_file_extent(root, rec->ino)) {
3088 type_recovered = 1;
3089 filetype = BTRFS_FT_REG_FILE;
3090 } else if (rec->found_dir_item) {
3091 type_recovered = 1;
3092 filetype = BTRFS_FT_DIR;
3093 } else if (!list_empty(&rec->orphan_extents)) {
3094 type_recovered = 1;
3095 filetype = BTRFS_FT_REG_FILE;
3096 } else{
3097 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3098 rec->ino);
3099 type_recovered = 1;
3100 filetype = BTRFS_FT_REG_FILE;
3104 ret = btrfs_new_inode(trans, root, rec->ino,
3105 mode | btrfs_type_to_imode(filetype));
3106 if (ret < 0)
3107 goto out;
3110 * Here inode rebuild is done, we only rebuild the inode item,
3111 * don't repair the nlink(like move to lost+found).
3112 * That is the job of nlink repair.
3114 * We just fill the record and return
3116 rec->found_dir_item = 1;
3117 rec->imode = mode | btrfs_type_to_imode(filetype);
3118 rec->nlink = 0;
3119 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3120 /* Ensure the inode_nlinks repair function will be called */
3121 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3122 out:
3123 return ret;
3126 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3127 struct btrfs_root *root,
3128 struct btrfs_path *path,
3129 struct inode_record *rec)
3131 struct orphan_data_extent *orphan;
3132 struct orphan_data_extent *tmp;
3133 int ret = 0;
3135 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3137 * Check for conflicting file extents
3139 * Here we don't know whether the extents is compressed or not,
3140 * so we can only assume it not compressed nor data offset,
3141 * and use its disk_len as extent length.
3143 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3144 orphan->offset, orphan->disk_len, 0);
3145 btrfs_release_path(path);
3146 if (ret < 0)
3147 goto out;
3148 if (!ret) {
3149 fprintf(stderr,
3150 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3151 orphan->disk_bytenr, orphan->disk_len);
3152 ret = btrfs_free_extent(trans,
3153 root->fs_info->extent_root,
3154 orphan->disk_bytenr, orphan->disk_len,
3155 0, root->objectid, orphan->objectid,
3156 orphan->offset);
3157 if (ret < 0)
3158 goto out;
3160 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3161 orphan->offset, orphan->disk_bytenr,
3162 orphan->disk_len, orphan->disk_len);
3163 if (ret < 0)
3164 goto out;
3166 /* Update file size info */
3167 rec->found_size += orphan->disk_len;
3168 if (rec->found_size == rec->nbytes)
3169 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3171 /* Update the file extent hole info too */
3172 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3173 orphan->disk_len);
3174 if (ret < 0)
3175 goto out;
3176 if (RB_EMPTY_ROOT(&rec->holes))
3177 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3179 list_del(&orphan->list);
3180 free(orphan);
3182 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3183 out:
3184 return ret;
3187 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3188 struct btrfs_root *root,
3189 struct btrfs_path *path,
3190 struct inode_record *rec)
3192 struct rb_node *node;
3193 struct file_extent_hole *hole;
3194 int found = 0;
3195 int ret = 0;
3197 node = rb_first(&rec->holes);
3199 while (node) {
3200 found = 1;
3201 hole = rb_entry(node, struct file_extent_hole, node);
3202 ret = btrfs_punch_hole(trans, root, rec->ino,
3203 hole->start, hole->len);
3204 if (ret < 0)
3205 goto out;
3206 ret = del_file_extent_hole(&rec->holes, hole->start,
3207 hole->len);
3208 if (ret < 0)
3209 goto out;
3210 if (RB_EMPTY_ROOT(&rec->holes))
3211 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3212 node = rb_first(&rec->holes);
3214 /* special case for a file losing all its file extent */
3215 if (!found) {
3216 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3217 round_up(rec->isize, root->sectorsize));
3218 if (ret < 0)
3219 goto out;
3221 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3222 rec->ino, root->objectid);
3223 out:
3224 return ret;
3227 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3229 struct btrfs_trans_handle *trans;
3230 struct btrfs_path path;
3231 int ret = 0;
3233 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3234 I_ERR_NO_ORPHAN_ITEM |
3235 I_ERR_LINK_COUNT_WRONG |
3236 I_ERR_NO_INODE_ITEM |
3237 I_ERR_FILE_EXTENT_ORPHAN |
3238 I_ERR_FILE_EXTENT_DISCOUNT|
3239 I_ERR_FILE_NBYTES_WRONG)))
3240 return rec->errors;
3243 * For nlink repair, it may create a dir and add link, so
3244 * 2 for parent(256)'s dir_index and dir_item
3245 * 2 for lost+found dir's inode_item and inode_ref
3246 * 1 for the new inode_ref of the file
3247 * 2 for lost+found dir's dir_index and dir_item for the file
3249 trans = btrfs_start_transaction(root, 7);
3250 if (IS_ERR(trans))
3251 return PTR_ERR(trans);
3253 btrfs_init_path(&path);
3254 if (rec->errors & I_ERR_NO_INODE_ITEM)
3255 ret = repair_inode_no_item(trans, root, &path, rec);
3256 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3257 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3259 ret = repair_inode_discount_extent(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3261 ret = repair_inode_isize(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3263 ret = repair_inode_orphan_item(trans, root, &path, rec);
3264 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3265 ret = repair_inode_nlinks(trans, root, &path, rec);
3266 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3267 ret = repair_inode_nbytes(trans, root, &path, rec);
3268 btrfs_commit_transaction(trans, root);
3269 btrfs_release_path(&path);
3270 return ret;
3273 static int check_inode_recs(struct btrfs_root *root,
3274 struct cache_tree *inode_cache)
3276 struct cache_extent *cache;
3277 struct ptr_node *node;
3278 struct inode_record *rec;
3279 struct inode_backref *backref;
3280 int stage = 0;
3281 int ret = 0;
3282 int err = 0;
3283 u64 error = 0;
3284 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3286 if (btrfs_root_refs(&root->root_item) == 0) {
3287 if (!cache_tree_empty(inode_cache))
3288 fprintf(stderr, "warning line %d\n", __LINE__);
3289 return 0;
3293 * We need to repair backrefs first because we could change some of the
3294 * errors in the inode recs.
3296 * We also need to go through and delete invalid backrefs first and then
3297 * add the correct ones second. We do this because we may get EEXIST
3298 * when adding back the correct index because we hadn't yet deleted the
3299 * invalid index.
3301 * For example, if we were missing a dir index then the directories
3302 * isize would be wrong, so if we fixed the isize to what we thought it
3303 * would be and then fixed the backref we'd still have a invalid fs, so
3304 * we need to add back the dir index and then check to see if the isize
3305 * is still wrong.
3307 while (stage < 3) {
3308 stage++;
3309 if (stage == 3 && !err)
3310 break;
3312 cache = search_cache_extent(inode_cache, 0);
3313 while (repair && cache) {
3314 node = container_of(cache, struct ptr_node, cache);
3315 rec = node->data;
3316 cache = next_cache_extent(cache);
3318 /* Need to free everything up and rescan */
3319 if (stage == 3) {
3320 remove_cache_extent(inode_cache, &node->cache);
3321 free(node);
3322 free_inode_rec(rec);
3323 continue;
3326 if (list_empty(&rec->backrefs))
3327 continue;
3329 ret = repair_inode_backrefs(root, rec, inode_cache,
3330 stage == 1);
3331 if (ret < 0) {
3332 err = ret;
3333 stage = 2;
3334 break;
3335 } if (ret > 0) {
3336 err = -EAGAIN;
3340 if (err)
3341 return err;
3343 rec = get_inode_rec(inode_cache, root_dirid, 0);
3344 BUG_ON(IS_ERR(rec));
3345 if (rec) {
3346 ret = check_root_dir(rec);
3347 if (ret) {
3348 fprintf(stderr, "root %llu root dir %llu error\n",
3349 (unsigned long long)root->root_key.objectid,
3350 (unsigned long long)root_dirid);
3351 print_inode_error(root, rec);
3352 error++;
3354 } else {
3355 if (repair) {
3356 struct btrfs_trans_handle *trans;
3358 trans = btrfs_start_transaction(root, 1);
3359 if (IS_ERR(trans)) {
3360 err = PTR_ERR(trans);
3361 return err;
3364 fprintf(stderr,
3365 "root %llu missing its root dir, recreating\n",
3366 (unsigned long long)root->objectid);
3368 ret = btrfs_make_root_dir(trans, root, root_dirid);
3369 BUG_ON(ret);
3371 btrfs_commit_transaction(trans, root);
3372 return -EAGAIN;
3375 fprintf(stderr, "root %llu root dir %llu not found\n",
3376 (unsigned long long)root->root_key.objectid,
3377 (unsigned long long)root_dirid);
3380 while (1) {
3381 cache = search_cache_extent(inode_cache, 0);
3382 if (!cache)
3383 break;
3384 node = container_of(cache, struct ptr_node, cache);
3385 rec = node->data;
3386 remove_cache_extent(inode_cache, &node->cache);
3387 free(node);
3388 if (rec->ino == root_dirid ||
3389 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3390 free_inode_rec(rec);
3391 continue;
3394 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3395 ret = check_orphan_item(root, rec->ino);
3396 if (ret == 0)
3397 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3398 if (can_free_inode_rec(rec)) {
3399 free_inode_rec(rec);
3400 continue;
3404 if (!rec->found_inode_item)
3405 rec->errors |= I_ERR_NO_INODE_ITEM;
3406 if (rec->found_link != rec->nlink)
3407 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3408 if (repair) {
3409 ret = try_repair_inode(root, rec);
3410 if (ret == 0 && can_free_inode_rec(rec)) {
3411 free_inode_rec(rec);
3412 continue;
3414 ret = 0;
3417 if (!(repair && ret == 0))
3418 error++;
3419 print_inode_error(root, rec);
3420 list_for_each_entry(backref, &rec->backrefs, list) {
3421 if (!backref->found_dir_item)
3422 backref->errors |= REF_ERR_NO_DIR_ITEM;
3423 if (!backref->found_dir_index)
3424 backref->errors |= REF_ERR_NO_DIR_INDEX;
3425 if (!backref->found_inode_ref)
3426 backref->errors |= REF_ERR_NO_INODE_REF;
3427 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3428 " namelen %u name %s filetype %d errors %x",
3429 (unsigned long long)backref->dir,
3430 (unsigned long long)backref->index,
3431 backref->namelen, backref->name,
3432 backref->filetype, backref->errors);
3433 print_ref_error(backref->errors);
3435 free_inode_rec(rec);
3437 return (error > 0) ? -1 : 0;
3440 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3441 u64 objectid)
3443 struct cache_extent *cache;
3444 struct root_record *rec = NULL;
3445 int ret;
3447 cache = lookup_cache_extent(root_cache, objectid, 1);
3448 if (cache) {
3449 rec = container_of(cache, struct root_record, cache);
3450 } else {
3451 rec = calloc(1, sizeof(*rec));
3452 if (!rec)
3453 return ERR_PTR(-ENOMEM);
3454 rec->objectid = objectid;
3455 INIT_LIST_HEAD(&rec->backrefs);
3456 rec->cache.start = objectid;
3457 rec->cache.size = 1;
3459 ret = insert_cache_extent(root_cache, &rec->cache);
3460 if (ret)
3461 return ERR_PTR(-EEXIST);
3463 return rec;
3466 static struct root_backref *get_root_backref(struct root_record *rec,
3467 u64 ref_root, u64 dir, u64 index,
3468 const char *name, int namelen)
3470 struct root_backref *backref;
3472 list_for_each_entry(backref, &rec->backrefs, list) {
3473 if (backref->ref_root != ref_root || backref->dir != dir ||
3474 backref->namelen != namelen)
3475 continue;
3476 if (memcmp(name, backref->name, namelen))
3477 continue;
3478 return backref;
3481 backref = calloc(1, sizeof(*backref) + namelen + 1);
3482 if (!backref)
3483 return NULL;
3484 backref->ref_root = ref_root;
3485 backref->dir = dir;
3486 backref->index = index;
3487 backref->namelen = namelen;
3488 memcpy(backref->name, name, namelen);
3489 backref->name[namelen] = '\0';
3490 list_add_tail(&backref->list, &rec->backrefs);
3491 return backref;
3494 static void free_root_record(struct cache_extent *cache)
3496 struct root_record *rec;
3497 struct root_backref *backref;
3499 rec = container_of(cache, struct root_record, cache);
3500 while (!list_empty(&rec->backrefs)) {
3501 backref = to_root_backref(rec->backrefs.next);
3502 list_del(&backref->list);
3503 free(backref);
3506 free(rec);
3509 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3511 static int add_root_backref(struct cache_tree *root_cache,
3512 u64 root_id, u64 ref_root, u64 dir, u64 index,
3513 const char *name, int namelen,
3514 int item_type, int errors)
3516 struct root_record *rec;
3517 struct root_backref *backref;
3519 rec = get_root_rec(root_cache, root_id);
3520 BUG_ON(IS_ERR(rec));
3521 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3522 BUG_ON(!backref);
3524 backref->errors |= errors;
3526 if (item_type != BTRFS_DIR_ITEM_KEY) {
3527 if (backref->found_dir_index || backref->found_back_ref ||
3528 backref->found_forward_ref) {
3529 if (backref->index != index)
3530 backref->errors |= REF_ERR_INDEX_UNMATCH;
3531 } else {
3532 backref->index = index;
3536 if (item_type == BTRFS_DIR_ITEM_KEY) {
3537 if (backref->found_forward_ref)
3538 rec->found_ref++;
3539 backref->found_dir_item = 1;
3540 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3541 backref->found_dir_index = 1;
3542 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3543 if (backref->found_forward_ref)
3544 backref->errors |= REF_ERR_DUP_ROOT_REF;
3545 else if (backref->found_dir_item)
3546 rec->found_ref++;
3547 backref->found_forward_ref = 1;
3548 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3549 if (backref->found_back_ref)
3550 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3551 backref->found_back_ref = 1;
3552 } else {
3553 BUG_ON(1);
3556 if (backref->found_forward_ref && backref->found_dir_item)
3557 backref->reachable = 1;
3558 return 0;
3561 static int merge_root_recs(struct btrfs_root *root,
3562 struct cache_tree *src_cache,
3563 struct cache_tree *dst_cache)
3565 struct cache_extent *cache;
3566 struct ptr_node *node;
3567 struct inode_record *rec;
3568 struct inode_backref *backref;
3569 int ret = 0;
3571 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3572 free_inode_recs_tree(src_cache);
3573 return 0;
3576 while (1) {
3577 cache = search_cache_extent(src_cache, 0);
3578 if (!cache)
3579 break;
3580 node = container_of(cache, struct ptr_node, cache);
3581 rec = node->data;
3582 remove_cache_extent(src_cache, &node->cache);
3583 free(node);
3585 ret = is_child_root(root, root->objectid, rec->ino);
3586 if (ret < 0)
3587 break;
3588 else if (ret == 0)
3589 goto skip;
3591 list_for_each_entry(backref, &rec->backrefs, list) {
3592 BUG_ON(backref->found_inode_ref);
3593 if (backref->found_dir_item)
3594 add_root_backref(dst_cache, rec->ino,
3595 root->root_key.objectid, backref->dir,
3596 backref->index, backref->name,
3597 backref->namelen, BTRFS_DIR_ITEM_KEY,
3598 backref->errors);
3599 if (backref->found_dir_index)
3600 add_root_backref(dst_cache, rec->ino,
3601 root->root_key.objectid, backref->dir,
3602 backref->index, backref->name,
3603 backref->namelen, BTRFS_DIR_INDEX_KEY,
3604 backref->errors);
3606 skip:
3607 free_inode_rec(rec);
3609 if (ret < 0)
3610 return ret;
3611 return 0;
3614 static int check_root_refs(struct btrfs_root *root,
3615 struct cache_tree *root_cache)
3617 struct root_record *rec;
3618 struct root_record *ref_root;
3619 struct root_backref *backref;
3620 struct cache_extent *cache;
3621 int loop = 1;
3622 int ret;
3623 int error;
3624 int errors = 0;
3626 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3627 BUG_ON(IS_ERR(rec));
3628 rec->found_ref = 1;
3630 /* fixme: this can not detect circular references */
3631 while (loop) {
3632 loop = 0;
3633 cache = search_cache_extent(root_cache, 0);
3634 while (1) {
3635 if (!cache)
3636 break;
3637 rec = container_of(cache, struct root_record, cache);
3638 cache = next_cache_extent(cache);
3640 if (rec->found_ref == 0)
3641 continue;
3643 list_for_each_entry(backref, &rec->backrefs, list) {
3644 if (!backref->reachable)
3645 continue;
3647 ref_root = get_root_rec(root_cache,
3648 backref->ref_root);
3649 BUG_ON(IS_ERR(ref_root));
3650 if (ref_root->found_ref > 0)
3651 continue;
3653 backref->reachable = 0;
3654 rec->found_ref--;
3655 if (rec->found_ref == 0)
3656 loop = 1;
3661 cache = search_cache_extent(root_cache, 0);
3662 while (1) {
3663 if (!cache)
3664 break;
3665 rec = container_of(cache, struct root_record, cache);
3666 cache = next_cache_extent(cache);
3668 if (rec->found_ref == 0 &&
3669 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3670 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3671 ret = check_orphan_item(root->fs_info->tree_root,
3672 rec->objectid);
3673 if (ret == 0)
3674 continue;
3677 * If we don't have a root item then we likely just have
3678 * a dir item in a snapshot for this root but no actual
3679 * ref key or anything so it's meaningless.
3681 if (!rec->found_root_item)
3682 continue;
3683 errors++;
3684 fprintf(stderr, "fs tree %llu not referenced\n",
3685 (unsigned long long)rec->objectid);
3688 error = 0;
3689 if (rec->found_ref > 0 && !rec->found_root_item)
3690 error = 1;
3691 list_for_each_entry(backref, &rec->backrefs, list) {
3692 if (!backref->found_dir_item)
3693 backref->errors |= REF_ERR_NO_DIR_ITEM;
3694 if (!backref->found_dir_index)
3695 backref->errors |= REF_ERR_NO_DIR_INDEX;
3696 if (!backref->found_back_ref)
3697 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3698 if (!backref->found_forward_ref)
3699 backref->errors |= REF_ERR_NO_ROOT_REF;
3700 if (backref->reachable && backref->errors)
3701 error = 1;
3703 if (!error)
3704 continue;
3706 errors++;
3707 fprintf(stderr, "fs tree %llu refs %u %s\n",
3708 (unsigned long long)rec->objectid, rec->found_ref,
3709 rec->found_root_item ? "" : "not found");
3711 list_for_each_entry(backref, &rec->backrefs, list) {
3712 if (!backref->reachable)
3713 continue;
3714 if (!backref->errors && rec->found_root_item)
3715 continue;
3716 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3717 " index %llu namelen %u name %s errors %x\n",
3718 (unsigned long long)backref->ref_root,
3719 (unsigned long long)backref->dir,
3720 (unsigned long long)backref->index,
3721 backref->namelen, backref->name,
3722 backref->errors);
3723 print_ref_error(backref->errors);
3726 return errors > 0 ? 1 : 0;
3729 static int process_root_ref(struct extent_buffer *eb, int slot,
3730 struct btrfs_key *key,
3731 struct cache_tree *root_cache)
3733 u64 dirid;
3734 u64 index;
3735 u32 len;
3736 u32 name_len;
3737 struct btrfs_root_ref *ref;
3738 char namebuf[BTRFS_NAME_LEN];
3739 int error;
3741 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3743 dirid = btrfs_root_ref_dirid(eb, ref);
3744 index = btrfs_root_ref_sequence(eb, ref);
3745 name_len = btrfs_root_ref_name_len(eb, ref);
3747 if (name_len <= BTRFS_NAME_LEN) {
3748 len = name_len;
3749 error = 0;
3750 } else {
3751 len = BTRFS_NAME_LEN;
3752 error = REF_ERR_NAME_TOO_LONG;
3754 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3756 if (key->type == BTRFS_ROOT_REF_KEY) {
3757 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3758 index, namebuf, len, key->type, error);
3759 } else {
3760 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3761 index, namebuf, len, key->type, error);
3763 return 0;
3766 static void free_corrupt_block(struct cache_extent *cache)
3768 struct btrfs_corrupt_block *corrupt;
3770 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3771 free(corrupt);
3774 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3777 * Repair the btree of the given root.
3779 * The fix is to remove the node key in corrupt_blocks cache_tree.
3780 * and rebalance the tree.
3781 * After the fix, the btree should be writeable.
3783 static int repair_btree(struct btrfs_root *root,
3784 struct cache_tree *corrupt_blocks)
3786 struct btrfs_trans_handle *trans;
3787 struct btrfs_path path;
3788 struct btrfs_corrupt_block *corrupt;
3789 struct cache_extent *cache;
3790 struct btrfs_key key;
3791 u64 offset;
3792 int level;
3793 int ret = 0;
3795 if (cache_tree_empty(corrupt_blocks))
3796 return 0;
3798 trans = btrfs_start_transaction(root, 1);
3799 if (IS_ERR(trans)) {
3800 ret = PTR_ERR(trans);
3801 fprintf(stderr, "Error starting transaction: %s\n",
3802 strerror(-ret));
3803 return ret;
3805 btrfs_init_path(&path);
3806 cache = first_cache_extent(corrupt_blocks);
3807 while (cache) {
3808 corrupt = container_of(cache, struct btrfs_corrupt_block,
3809 cache);
3810 level = corrupt->level;
3811 path.lowest_level = level;
3812 key.objectid = corrupt->key.objectid;
3813 key.type = corrupt->key.type;
3814 key.offset = corrupt->key.offset;
3817 * Here we don't want to do any tree balance, since it may
3818 * cause a balance with corrupted brother leaf/node,
3819 * so ins_len set to 0 here.
3820 * Balance will be done after all corrupt node/leaf is deleted.
3822 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3823 if (ret < 0)
3824 goto out;
3825 offset = btrfs_node_blockptr(path.nodes[level],
3826 path.slots[level]);
3828 /* Remove the ptr */
3829 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3830 if (ret < 0)
3831 goto out;
3833 * Remove the corresponding extent
3834 * return value is not concerned.
3836 btrfs_release_path(&path);
3837 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3838 0, root->root_key.objectid,
3839 level - 1, 0);
3840 cache = next_cache_extent(cache);
3843 /* Balance the btree using btrfs_search_slot() */
3844 cache = first_cache_extent(corrupt_blocks);
3845 while (cache) {
3846 corrupt = container_of(cache, struct btrfs_corrupt_block,
3847 cache);
3848 memcpy(&key, &corrupt->key, sizeof(key));
3849 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3850 if (ret < 0)
3851 goto out;
3852 /* return will always >0 since it won't find the item */
3853 ret = 0;
3854 btrfs_release_path(&path);
3855 cache = next_cache_extent(cache);
3857 out:
3858 btrfs_commit_transaction(trans, root);
3859 btrfs_release_path(&path);
3860 return ret;
3863 static int check_fs_root(struct btrfs_root *root,
3864 struct cache_tree *root_cache,
3865 struct walk_control *wc)
3867 int ret = 0;
3868 int err = 0;
3869 int wret;
3870 int level;
3871 struct btrfs_path path;
3872 struct shared_node root_node;
3873 struct root_record *rec;
3874 struct btrfs_root_item *root_item = &root->root_item;
3875 struct cache_tree corrupt_blocks;
3876 struct orphan_data_extent *orphan;
3877 struct orphan_data_extent *tmp;
3878 enum btrfs_tree_block_status status;
3879 struct node_refs nrefs;
3882 * Reuse the corrupt_block cache tree to record corrupted tree block
3884 * Unlike the usage in extent tree check, here we do it in a per
3885 * fs/subvol tree base.
3887 cache_tree_init(&corrupt_blocks);
3888 root->fs_info->corrupt_blocks = &corrupt_blocks;
3890 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3891 rec = get_root_rec(root_cache, root->root_key.objectid);
3892 BUG_ON(IS_ERR(rec));
3893 if (btrfs_root_refs(root_item) > 0)
3894 rec->found_root_item = 1;
3897 btrfs_init_path(&path);
3898 memset(&root_node, 0, sizeof(root_node));
3899 cache_tree_init(&root_node.root_cache);
3900 cache_tree_init(&root_node.inode_cache);
3901 memset(&nrefs, 0, sizeof(nrefs));
3903 /* Move the orphan extent record to corresponding inode_record */
3904 list_for_each_entry_safe(orphan, tmp,
3905 &root->orphan_data_extents, list) {
3906 struct inode_record *inode;
3908 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3910 BUG_ON(IS_ERR(inode));
3911 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3912 list_move(&orphan->list, &inode->orphan_extents);
3915 level = btrfs_header_level(root->node);
3916 memset(wc->nodes, 0, sizeof(wc->nodes));
3917 wc->nodes[level] = &root_node;
3918 wc->active_node = level;
3919 wc->root_level = level;
3921 /* We may not have checked the root block, lets do that now */
3922 if (btrfs_is_leaf(root->node))
3923 status = btrfs_check_leaf(root, NULL, root->node);
3924 else
3925 status = btrfs_check_node(root, NULL, root->node);
3926 if (status != BTRFS_TREE_BLOCK_CLEAN)
3927 return -EIO;
3929 if (btrfs_root_refs(root_item) > 0 ||
3930 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3931 path.nodes[level] = root->node;
3932 extent_buffer_get(root->node);
3933 path.slots[level] = 0;
3934 } else {
3935 struct btrfs_key key;
3936 struct btrfs_disk_key found_key;
3938 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3939 level = root_item->drop_level;
3940 path.lowest_level = level;
3941 if (level > btrfs_header_level(root->node) ||
3942 level >= BTRFS_MAX_LEVEL) {
3943 error("ignoring invalid drop level: %u", level);
3944 goto skip_walking;
3946 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3947 if (wret < 0)
3948 goto skip_walking;
3949 btrfs_node_key(path.nodes[level], &found_key,
3950 path.slots[level]);
3951 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3952 sizeof(found_key)));
3955 while (1) {
3956 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3957 if (wret < 0)
3958 ret = wret;
3959 if (wret != 0)
3960 break;
3962 wret = walk_up_tree(root, &path, wc, &level);
3963 if (wret < 0)
3964 ret = wret;
3965 if (wret != 0)
3966 break;
3968 skip_walking:
3969 btrfs_release_path(&path);
3971 if (!cache_tree_empty(&corrupt_blocks)) {
3972 struct cache_extent *cache;
3973 struct btrfs_corrupt_block *corrupt;
3975 printf("The following tree block(s) is corrupted in tree %llu:\n",
3976 root->root_key.objectid);
3977 cache = first_cache_extent(&corrupt_blocks);
3978 while (cache) {
3979 corrupt = container_of(cache,
3980 struct btrfs_corrupt_block,
3981 cache);
3982 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3983 cache->start, corrupt->level,
3984 corrupt->key.objectid, corrupt->key.type,
3985 corrupt->key.offset);
3986 cache = next_cache_extent(cache);
3988 if (repair) {
3989 printf("Try to repair the btree for root %llu\n",
3990 root->root_key.objectid);
3991 ret = repair_btree(root, &corrupt_blocks);
3992 if (ret < 0)
3993 fprintf(stderr, "Failed to repair btree: %s\n",
3994 strerror(-ret));
3995 if (!ret)
3996 printf("Btree for root %llu is fixed\n",
3997 root->root_key.objectid);
4001 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4002 if (err < 0)
4003 ret = err;
4005 if (root_node.current) {
4006 root_node.current->checked = 1;
4007 maybe_free_inode_rec(&root_node.inode_cache,
4008 root_node.current);
4011 err = check_inode_recs(root, &root_node.inode_cache);
4012 if (!ret)
4013 ret = err;
4015 free_corrupt_blocks_tree(&corrupt_blocks);
4016 root->fs_info->corrupt_blocks = NULL;
4017 free_orphan_data_extents(&root->orphan_data_extents);
4018 return ret;
4021 static int fs_root_objectid(u64 objectid)
4023 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4024 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4025 return 1;
4026 return is_fstree(objectid);
4029 static int check_fs_roots(struct btrfs_root *root,
4030 struct cache_tree *root_cache)
4032 struct btrfs_path path;
4033 struct btrfs_key key;
4034 struct walk_control wc;
4035 struct extent_buffer *leaf, *tree_node;
4036 struct btrfs_root *tmp_root;
4037 struct btrfs_root *tree_root = root->fs_info->tree_root;
4038 int ret;
4039 int err = 0;
4041 if (ctx.progress_enabled) {
4042 ctx.tp = TASK_FS_ROOTS;
4043 task_start(ctx.info);
4047 * Just in case we made any changes to the extent tree that weren't
4048 * reflected into the free space cache yet.
4050 if (repair)
4051 reset_cached_block_groups(root->fs_info);
4052 memset(&wc, 0, sizeof(wc));
4053 cache_tree_init(&wc.shared);
4054 btrfs_init_path(&path);
4056 again:
4057 key.offset = 0;
4058 key.objectid = 0;
4059 key.type = BTRFS_ROOT_ITEM_KEY;
4060 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4061 if (ret < 0) {
4062 err = 1;
4063 goto out;
4065 tree_node = tree_root->node;
4066 while (1) {
4067 if (tree_node != tree_root->node) {
4068 free_root_recs_tree(root_cache);
4069 btrfs_release_path(&path);
4070 goto again;
4072 leaf = path.nodes[0];
4073 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4074 ret = btrfs_next_leaf(tree_root, &path);
4075 if (ret) {
4076 if (ret < 0)
4077 err = 1;
4078 break;
4080 leaf = path.nodes[0];
4082 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4083 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4084 fs_root_objectid(key.objectid)) {
4085 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4086 tmp_root = btrfs_read_fs_root_no_cache(
4087 root->fs_info, &key);
4088 } else {
4089 key.offset = (u64)-1;
4090 tmp_root = btrfs_read_fs_root(
4091 root->fs_info, &key);
4093 if (IS_ERR(tmp_root)) {
4094 err = 1;
4095 goto next;
4097 ret = check_fs_root(tmp_root, root_cache, &wc);
4098 if (ret == -EAGAIN) {
4099 free_root_recs_tree(root_cache);
4100 btrfs_release_path(&path);
4101 goto again;
4103 if (ret)
4104 err = 1;
4105 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4106 btrfs_free_fs_root(tmp_root);
4107 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4108 key.type == BTRFS_ROOT_BACKREF_KEY) {
4109 process_root_ref(leaf, path.slots[0], &key,
4110 root_cache);
4112 next:
4113 path.slots[0]++;
4115 out:
4116 btrfs_release_path(&path);
4117 if (err)
4118 free_extent_cache_tree(&wc.shared);
4119 if (!cache_tree_empty(&wc.shared))
4120 fprintf(stderr, "warning line %d\n", __LINE__);
4122 task_stop(ctx.info);
4124 return err;
4128 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4129 * INODE_REF/INODE_EXTREF match.
4131 * @root: the root of the fs/file tree
4132 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4133 * @key: the key of the DIR_ITEM/DIR_INDEX
4134 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4135 * distinguish root_dir between normal dir/file
4136 * @name: the name in the INODE_REF/INODE_EXTREF
4137 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4138 * @mode: the st_mode of INODE_ITEM
4140 * Return 0 if no error occurred.
4141 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4142 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4143 * dir/file.
4144 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4145 * not match for normal dir/file.
4147 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4148 struct btrfs_key *key, u64 index, char *name,
4149 u32 namelen, u32 mode)
4151 struct btrfs_path path;
4152 struct extent_buffer *node;
4153 struct btrfs_dir_item *di;
4154 struct btrfs_key location;
4155 char namebuf[BTRFS_NAME_LEN] = {0};
4156 u32 total;
4157 u32 cur = 0;
4158 u32 len;
4159 u32 name_len;
4160 u32 data_len;
4161 u8 filetype;
4162 int slot;
4163 int ret;
4165 btrfs_init_path(&path);
4166 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4167 if (ret < 0) {
4168 ret = DIR_ITEM_MISSING;
4169 goto out;
4172 /* Process root dir and goto out*/
4173 if (index == 0) {
4174 if (ret == 0) {
4175 ret = ROOT_DIR_ERROR;
4176 error(
4177 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4178 root->objectid,
4179 ref_key->type == BTRFS_INODE_REF_KEY ?
4180 "REF" : "EXTREF",
4181 ref_key->objectid, ref_key->offset,
4182 key->type == BTRFS_DIR_ITEM_KEY ?
4183 "DIR_ITEM" : "DIR_INDEX");
4184 } else {
4185 ret = 0;
4188 goto out;
4191 /* Process normal file/dir */
4192 if (ret > 0) {
4193 ret = DIR_ITEM_MISSING;
4194 error(
4195 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4196 root->objectid,
4197 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4198 ref_key->objectid, ref_key->offset,
4199 key->type == BTRFS_DIR_ITEM_KEY ?
4200 "DIR_ITEM" : "DIR_INDEX",
4201 key->objectid, key->offset, namelen, name,
4202 imode_to_type(mode));
4203 goto out;
4206 /* Check whether inode_id/filetype/name match */
4207 node = path.nodes[0];
4208 slot = path.slots[0];
4209 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4210 total = btrfs_item_size_nr(node, slot);
4211 while (cur < total) {
4212 ret = DIR_ITEM_MISMATCH;
4213 name_len = btrfs_dir_name_len(node, di);
4214 data_len = btrfs_dir_data_len(node, di);
4216 btrfs_dir_item_key_to_cpu(node, di, &location);
4217 if (location.objectid != ref_key->objectid ||
4218 location.type != BTRFS_INODE_ITEM_KEY ||
4219 location.offset != 0)
4220 goto next;
4222 filetype = btrfs_dir_type(node, di);
4223 if (imode_to_type(mode) != filetype)
4224 goto next;
4226 if (name_len <= BTRFS_NAME_LEN) {
4227 len = name_len;
4228 } else {
4229 len = BTRFS_NAME_LEN;
4230 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4231 root->objectid,
4232 key->type == BTRFS_DIR_ITEM_KEY ?
4233 "DIR_ITEM" : "DIR_INDEX",
4234 key->objectid, key->offset, name_len);
4236 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4237 if (len != namelen || strncmp(namebuf, name, len))
4238 goto next;
4240 ret = 0;
4241 goto out;
4242 next:
4243 len = sizeof(*di) + name_len + data_len;
4244 di = (struct btrfs_dir_item *)((char *)di + len);
4245 cur += len;
4247 if (ret == DIR_ITEM_MISMATCH)
4248 error(
4249 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4250 root->objectid,
4251 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4252 ref_key->objectid, ref_key->offset,
4253 key->type == BTRFS_DIR_ITEM_KEY ?
4254 "DIR_ITEM" : "DIR_INDEX",
4255 key->objectid, key->offset, namelen, name,
4256 imode_to_type(mode));
4257 out:
4258 btrfs_release_path(&path);
4259 return ret;
4263 * Traverse the given INODE_REF and call find_dir_item() to find related
4264 * DIR_ITEM/DIR_INDEX.
4266 * @root: the root of the fs/file tree
4267 * @ref_key: the key of the INODE_REF
4268 * @refs: the count of INODE_REF
4269 * @mode: the st_mode of INODE_ITEM
4271 * Return 0 if no error occurred.
4273 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4274 struct extent_buffer *node, int slot, u64 *refs,
4275 int mode)
4277 struct btrfs_key key;
4278 struct btrfs_inode_ref *ref;
4279 char namebuf[BTRFS_NAME_LEN] = {0};
4280 u32 total;
4281 u32 cur = 0;
4282 u32 len;
4283 u32 name_len;
4284 u64 index;
4285 int ret, err = 0;
4287 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4288 total = btrfs_item_size_nr(node, slot);
4290 next:
4291 /* Update inode ref count */
4292 (*refs)++;
4294 index = btrfs_inode_ref_index(node, ref);
4295 name_len = btrfs_inode_ref_name_len(node, ref);
4296 if (name_len <= BTRFS_NAME_LEN) {
4297 len = name_len;
4298 } else {
4299 len = BTRFS_NAME_LEN;
4300 warning("root %llu INODE_REF[%llu %llu] name too long",
4301 root->objectid, ref_key->objectid, ref_key->offset);
4304 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4306 /* Check root dir ref name */
4307 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4308 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4309 root->objectid, ref_key->objectid, ref_key->offset,
4310 namebuf);
4311 err |= ROOT_DIR_ERROR;
4314 /* Find related DIR_INDEX */
4315 key.objectid = ref_key->offset;
4316 key.type = BTRFS_DIR_INDEX_KEY;
4317 key.offset = index;
4318 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4319 err |= ret;
4321 /* Find related dir_item */
4322 key.objectid = ref_key->offset;
4323 key.type = BTRFS_DIR_ITEM_KEY;
4324 key.offset = btrfs_name_hash(namebuf, len);
4325 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4326 err |= ret;
4328 len = sizeof(*ref) + name_len;
4329 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4330 cur += len;
4331 if (cur < total)
4332 goto next;
4334 return err;
4338 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4339 * DIR_ITEM/DIR_INDEX.
4341 * @root: the root of the fs/file tree
4342 * @ref_key: the key of the INODE_EXTREF
4343 * @refs: the count of INODE_EXTREF
4344 * @mode: the st_mode of INODE_ITEM
4346 * Return 0 if no error occurred.
4348 static int check_inode_extref(struct btrfs_root *root,
4349 struct btrfs_key *ref_key,
4350 struct extent_buffer *node, int slot, u64 *refs,
4351 int mode)
4353 struct btrfs_key key;
4354 struct btrfs_inode_extref *extref;
4355 char namebuf[BTRFS_NAME_LEN] = {0};
4356 u32 total;
4357 u32 cur = 0;
4358 u32 len;
4359 u32 name_len;
4360 u64 index;
4361 u64 parent;
4362 int ret;
4363 int err = 0;
4365 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4366 total = btrfs_item_size_nr(node, slot);
4368 next:
4369 /* update inode ref count */
4370 (*refs)++;
4371 name_len = btrfs_inode_extref_name_len(node, extref);
4372 index = btrfs_inode_extref_index(node, extref);
4373 parent = btrfs_inode_extref_parent(node, extref);
4374 if (name_len <= BTRFS_NAME_LEN) {
4375 len = name_len;
4376 } else {
4377 len = BTRFS_NAME_LEN;
4378 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4379 root->objectid, ref_key->objectid, ref_key->offset);
4381 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4383 /* Check root dir ref name */
4384 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4385 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4386 root->objectid, ref_key->objectid, ref_key->offset,
4387 namebuf);
4388 err |= ROOT_DIR_ERROR;
4391 /* find related dir_index */
4392 key.objectid = parent;
4393 key.type = BTRFS_DIR_INDEX_KEY;
4394 key.offset = index;
4395 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4396 err |= ret;
4398 /* find related dir_item */
4399 key.objectid = parent;
4400 key.type = BTRFS_DIR_ITEM_KEY;
4401 key.offset = btrfs_name_hash(namebuf, len);
4402 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4403 err |= ret;
4405 len = sizeof(*extref) + name_len;
4406 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4407 cur += len;
4409 if (cur < total)
4410 goto next;
4412 return err;
4416 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4417 * DIR_ITEM/DIR_INDEX match.
4419 * @root: the root of the fs/file tree
4420 * @key: the key of the INODE_REF/INODE_EXTREF
4421 * @name: the name in the INODE_REF/INODE_EXTREF
4422 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4423 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4424 * to (u64)-1
4425 * @ext_ref: the EXTENDED_IREF feature
4427 * Return 0 if no error occurred.
4428 * Return >0 for error bitmap
4430 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4431 char *name, int namelen, u64 index,
4432 unsigned int ext_ref)
4434 struct btrfs_path path;
4435 struct btrfs_inode_ref *ref;
4436 struct btrfs_inode_extref *extref;
4437 struct extent_buffer *node;
4438 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4439 u32 total;
4440 u32 cur = 0;
4441 u32 len;
4442 u32 ref_namelen;
4443 u64 ref_index;
4444 u64 parent;
4445 u64 dir_id;
4446 int slot;
4447 int ret;
4449 btrfs_init_path(&path);
4450 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4451 if (ret) {
4452 ret = INODE_REF_MISSING;
4453 goto extref;
4456 node = path.nodes[0];
4457 slot = path.slots[0];
4459 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4460 total = btrfs_item_size_nr(node, slot);
4462 /* Iterate all entry of INODE_REF */
4463 while (cur < total) {
4464 ret = INODE_REF_MISSING;
4466 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4467 ref_index = btrfs_inode_ref_index(node, ref);
4468 if (index != (u64)-1 && index != ref_index)
4469 goto next_ref;
4471 if (ref_namelen <= BTRFS_NAME_LEN) {
4472 len = ref_namelen;
4473 } else {
4474 len = BTRFS_NAME_LEN;
4475 warning("root %llu INODE %s[%llu %llu] name too long",
4476 root->objectid,
4477 key->type == BTRFS_INODE_REF_KEY ?
4478 "REF" : "EXTREF",
4479 key->objectid, key->offset);
4481 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4482 len);
4484 if (len != namelen || strncmp(ref_namebuf, name, len))
4485 goto next_ref;
4487 ret = 0;
4488 goto out;
4489 next_ref:
4490 len = sizeof(*ref) + ref_namelen;
4491 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4492 cur += len;
4495 extref:
4496 /* Skip if not support EXTENDED_IREF feature */
4497 if (!ext_ref)
4498 goto out;
4500 btrfs_release_path(&path);
4501 btrfs_init_path(&path);
4503 dir_id = key->offset;
4504 key->type = BTRFS_INODE_EXTREF_KEY;
4505 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4507 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4508 if (ret) {
4509 ret = INODE_REF_MISSING;
4510 goto out;
4513 node = path.nodes[0];
4514 slot = path.slots[0];
4516 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4517 cur = 0;
4518 total = btrfs_item_size_nr(node, slot);
4520 /* Iterate all entry of INODE_EXTREF */
4521 while (cur < total) {
4522 ret = INODE_REF_MISSING;
4524 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4525 ref_index = btrfs_inode_extref_index(node, extref);
4526 parent = btrfs_inode_extref_parent(node, extref);
4527 if (index != (u64)-1 && index != ref_index)
4528 goto next_extref;
4530 if (parent != dir_id)
4531 goto next_extref;
4533 if (ref_namelen <= BTRFS_NAME_LEN) {
4534 len = ref_namelen;
4535 } else {
4536 len = BTRFS_NAME_LEN;
4537 warning("root %llu INODE %s[%llu %llu] name too long",
4538 root->objectid,
4539 key->type == BTRFS_INODE_REF_KEY ?
4540 "REF" : "EXTREF",
4541 key->objectid, key->offset);
4543 read_extent_buffer(node, ref_namebuf,
4544 (unsigned long)(extref + 1), len);
4546 if (len != namelen || strncmp(ref_namebuf, name, len))
4547 goto next_extref;
4549 ret = 0;
4550 goto out;
4552 next_extref:
4553 len = sizeof(*extref) + ref_namelen;
4554 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4555 cur += len;
4558 out:
4559 btrfs_release_path(&path);
4560 return ret;
4564 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4565 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4567 * @root: the root of the fs/file tree
4568 * @key: the key of the INODE_REF/INODE_EXTREF
4569 * @size: the st_size of the INODE_ITEM
4570 * @ext_ref: the EXTENDED_IREF feature
4572 * Return 0 if no error occurred.
4574 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4575 struct extent_buffer *node, int slot, u64 *size,
4576 unsigned int ext_ref)
4578 struct btrfs_dir_item *di;
4579 struct btrfs_inode_item *ii;
4580 struct btrfs_path path;
4581 struct btrfs_key location;
4582 char namebuf[BTRFS_NAME_LEN] = {0};
4583 u32 total;
4584 u32 cur = 0;
4585 u32 len;
4586 u32 name_len;
4587 u32 data_len;
4588 u8 filetype;
4589 u32 mode;
4590 u64 index;
4591 int ret;
4592 int err = 0;
4595 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4596 * ignore index check.
4598 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4600 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4601 total = btrfs_item_size_nr(node, slot);
4603 while (cur < total) {
4604 data_len = btrfs_dir_data_len(node, di);
4605 if (data_len)
4606 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4607 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4608 "DIR_ITEM" : "DIR_INDEX",
4609 key->objectid, key->offset, data_len);
4611 name_len = btrfs_dir_name_len(node, di);
4612 if (name_len <= BTRFS_NAME_LEN) {
4613 len = name_len;
4614 } else {
4615 len = BTRFS_NAME_LEN;
4616 warning("root %llu %s[%llu %llu] name too long",
4617 root->objectid,
4618 key->type == BTRFS_DIR_ITEM_KEY ?
4619 "DIR_ITEM" : "DIR_INDEX",
4620 key->objectid, key->offset);
4622 (*size) += name_len;
4624 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4625 filetype = btrfs_dir_type(node, di);
4627 btrfs_init_path(&path);
4628 btrfs_dir_item_key_to_cpu(node, di, &location);
4630 /* Ignore related ROOT_ITEM check */
4631 if (location.type == BTRFS_ROOT_ITEM_KEY)
4632 goto next;
4634 /* Check relative INODE_ITEM(existence/filetype) */
4635 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4636 if (ret) {
4637 err |= INODE_ITEM_MISSING;
4638 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4639 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4640 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4641 key->offset, location.objectid, name_len,
4642 namebuf, filetype);
4643 goto next;
4646 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4647 struct btrfs_inode_item);
4648 mode = btrfs_inode_mode(path.nodes[0], ii);
4650 if (imode_to_type(mode) != filetype) {
4651 err |= INODE_ITEM_MISMATCH;
4652 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4653 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4654 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4655 key->offset, name_len, namebuf, filetype);
4658 /* Check relative INODE_REF/INODE_EXTREF */
4659 location.type = BTRFS_INODE_REF_KEY;
4660 location.offset = key->objectid;
4661 ret = find_inode_ref(root, &location, namebuf, len,
4662 index, ext_ref);
4663 err |= ret;
4664 if (ret & INODE_REF_MISSING)
4665 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4666 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4667 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4668 key->offset, name_len, namebuf, filetype);
4670 next:
4671 btrfs_release_path(&path);
4672 len = sizeof(*di) + name_len + data_len;
4673 di = (struct btrfs_dir_item *)((char *)di + len);
4674 cur += len;
4676 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4677 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4678 root->objectid, key->objectid, key->offset);
4679 break;
4683 return err;
4687 * Check file extent datasum/hole, update the size of the file extents,
4688 * check and update the last offset of the file extent.
4690 * @root: the root of fs/file tree.
4691 * @fkey: the key of the file extent.
4692 * @nodatasum: INODE_NODATASUM feature.
4693 * @size: the sum of all EXTENT_DATA items size for this inode.
4694 * @end: the offset of the last extent.
4696 * Return 0 if no error occurred.
4698 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4699 struct extent_buffer *node, int slot,
4700 unsigned int nodatasum, u64 *size, u64 *end)
4702 struct btrfs_file_extent_item *fi;
4703 u64 disk_bytenr;
4704 u64 disk_num_bytes;
4705 u64 extent_num_bytes;
4706 u64 found;
4707 unsigned int extent_type;
4708 unsigned int is_hole;
4709 int ret;
4710 int err = 0;
4712 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4714 extent_type = btrfs_file_extent_type(node, fi);
4715 /* Skip if file extent is inline */
4716 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717 struct btrfs_item *e = btrfs_item_nr(slot);
4718 u32 item_inline_len;
4720 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722 if (extent_num_bytes == 0 ||
4723 extent_num_bytes != item_inline_len)
4724 err |= FILE_EXTENT_ERROR;
4725 *size += extent_num_bytes;
4726 return err;
4729 /* Check extent type */
4730 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4731 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4732 err |= FILE_EXTENT_ERROR;
4733 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4734 root->objectid, fkey->objectid, fkey->offset);
4735 return err;
4738 /* Check REG_EXTENT/PREALLOC_EXTENT */
4739 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4740 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4741 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4742 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4744 /* Check EXTENT_DATA datasum */
4745 ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4746 if (found > 0 && nodatasum) {
4747 err |= ODD_CSUM_ITEM;
4748 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4749 root->objectid, fkey->objectid, fkey->offset);
4750 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4751 !is_hole &&
4752 (ret < 0 || found == 0 || found < disk_num_bytes)) {
4753 err |= CSUM_ITEM_MISSING;
4754 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4755 root->objectid, fkey->objectid, fkey->offset);
4756 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4757 err |= ODD_CSUM_ITEM;
4758 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4759 root->objectid, fkey->objectid, fkey->offset);
4762 /* Check EXTENT_DATA hole */
4763 if (no_holes && is_hole) {
4764 err |= FILE_EXTENT_ERROR;
4765 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4766 root->objectid, fkey->objectid, fkey->offset);
4767 } else if (!no_holes && *end != fkey->offset) {
4768 err |= FILE_EXTENT_ERROR;
4769 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4770 root->objectid, fkey->objectid, fkey->offset);
4773 *end += extent_num_bytes;
4774 if (!is_hole)
4775 *size += extent_num_bytes;
4777 return err;
4781 * Check INODE_ITEM and related ITEMs (the same inode number)
4782 * 1. check link count
4783 * 2. check inode ref/extref
4784 * 3. check dir item/index
4786 * @ext_ref: the EXTENDED_IREF feature
4788 * Return 0 if no error occurred.
4789 * Return >0 for error or hit the traversal is done(by error bitmap)
4791 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4792 unsigned int ext_ref)
4794 struct extent_buffer *node;
4795 struct btrfs_inode_item *ii;
4796 struct btrfs_key key;
4797 u64 inode_id;
4798 u32 mode;
4799 u64 nlink;
4800 u64 nbytes;
4801 u64 isize;
4802 u64 size = 0;
4803 u64 refs = 0;
4804 u64 extent_end = 0;
4805 u64 extent_size = 0;
4806 unsigned int dir;
4807 unsigned int nodatasum;
4808 int slot;
4809 int ret;
4810 int err = 0;
4812 node = path->nodes[0];
4813 slot = path->slots[0];
4815 btrfs_item_key_to_cpu(node, &key, slot);
4816 inode_id = key.objectid;
4818 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4819 ret = btrfs_next_item(root, path);
4820 if (ret > 0)
4821 err |= LAST_ITEM;
4822 return err;
4825 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4826 isize = btrfs_inode_size(node, ii);
4827 nbytes = btrfs_inode_nbytes(node, ii);
4828 mode = btrfs_inode_mode(node, ii);
4829 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4830 nlink = btrfs_inode_nlink(node, ii);
4831 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4833 while (1) {
4834 ret = btrfs_next_item(root, path);
4835 if (ret < 0) {
4836 /* out will fill 'err' rusing current statistics */
4837 goto out;
4838 } else if (ret > 0) {
4839 err |= LAST_ITEM;
4840 goto out;
4843 node = path->nodes[0];
4844 slot = path->slots[0];
4845 btrfs_item_key_to_cpu(node, &key, slot);
4846 if (key.objectid != inode_id)
4847 goto out;
4849 switch (key.type) {
4850 case BTRFS_INODE_REF_KEY:
4851 ret = check_inode_ref(root, &key, node, slot, &refs,
4852 mode);
4853 err |= ret;
4854 break;
4855 case BTRFS_INODE_EXTREF_KEY:
4856 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4857 warning("root %llu EXTREF[%llu %llu] isn't supported",
4858 root->objectid, key.objectid,
4859 key.offset);
4860 ret = check_inode_extref(root, &key, node, slot, &refs,
4861 mode);
4862 err |= ret;
4863 break;
4864 case BTRFS_DIR_ITEM_KEY:
4865 case BTRFS_DIR_INDEX_KEY:
4866 if (!dir) {
4867 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4868 root->objectid, inode_id,
4869 imode_to_type(mode), key.objectid,
4870 key.offset);
4872 ret = check_dir_item(root, &key, node, slot, &size,
4873 ext_ref);
4874 err |= ret;
4875 break;
4876 case BTRFS_EXTENT_DATA_KEY:
4877 if (dir) {
4878 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4879 root->objectid, inode_id, key.objectid,
4880 key.offset);
4882 ret = check_file_extent(root, &key, node, slot,
4883 nodatasum, &extent_size,
4884 &extent_end);
4885 err |= ret;
4886 break;
4887 case BTRFS_XATTR_ITEM_KEY:
4888 break;
4889 default:
4890 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4891 key.objectid, key.type, key.offset);
4895 out:
4896 /* verify INODE_ITEM nlink/isize/nbytes */
4897 if (dir) {
4898 if (nlink != 1) {
4899 err |= LINK_COUNT_ERROR;
4900 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4901 root->objectid, inode_id, nlink);
4905 * Just a warning, as dir inode nbytes is just an
4906 * instructive value.
4908 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4909 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4910 root->objectid, inode_id, root->nodesize);
4913 if (isize != size) {
4914 err |= ISIZE_ERROR;
4915 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4916 root->objectid, inode_id, isize, size);
4918 } else {
4919 if (nlink != refs) {
4920 err |= LINK_COUNT_ERROR;
4921 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4922 root->objectid, inode_id, nlink, refs);
4923 } else if (!nlink) {
4924 err |= ORPHAN_ITEM;
4927 if (!nbytes && !no_holes && extent_end < isize) {
4928 err |= NBYTES_ERROR;
4929 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4930 root->objectid, inode_id, isize);
4933 if (nbytes != extent_size) {
4934 err |= NBYTES_ERROR;
4935 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4936 root->objectid, inode_id, nbytes, extent_size);
4940 return err;
4943 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4945 struct btrfs_path path;
4946 struct btrfs_key key;
4947 int err = 0;
4948 int ret;
4950 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4951 key.type = BTRFS_INODE_ITEM_KEY;
4952 key.offset = 0;
4954 /* For root being dropped, we don't need to check first inode */
4955 if (btrfs_root_refs(&root->root_item) == 0 &&
4956 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4957 key.objectid)
4958 return 0;
4960 btrfs_init_path(&path);
4962 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4963 if (ret < 0)
4964 goto out;
4965 if (ret > 0) {
4966 ret = 0;
4967 err |= INODE_ITEM_MISSING;
4970 err |= check_inode_item(root, &path, ext_ref);
4971 err &= ~LAST_ITEM;
4972 if (err && !ret)
4973 ret = -EIO;
4974 out:
4975 btrfs_release_path(&path);
4976 return ret;
4980 * Iterate all item on the tree and call check_inode_item() to check.
4982 * @root: the root of the tree to be checked.
4983 * @ext_ref: the EXTENDED_IREF feature
4985 * Return 0 if no error found.
4986 * Return <0 for error.
4988 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4990 struct btrfs_path path;
4991 struct node_refs nrefs;
4992 struct btrfs_root_item *root_item = &root->root_item;
4993 int ret, wret;
4994 int level;
4997 * We need to manually check the first inode item(256)
4998 * As the following traversal function will only start from
4999 * the first inode item in the leaf, if inode item(256) is missing
5000 * we will just skip it forever.
5002 ret = check_fs_first_inode(root, ext_ref);
5003 if (ret < 0)
5004 return ret;
5006 memset(&nrefs, 0, sizeof(nrefs));
5007 level = btrfs_header_level(root->node);
5008 btrfs_init_path(&path);
5010 if (btrfs_root_refs(root_item) > 0 ||
5011 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5012 path.nodes[level] = root->node;
5013 path.slots[level] = 0;
5014 extent_buffer_get(root->node);
5015 } else {
5016 struct btrfs_key key;
5018 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5019 level = root_item->drop_level;
5020 path.lowest_level = level;
5021 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5022 if (ret < 0)
5023 goto out;
5024 ret = 0;
5027 while (1) {
5028 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5029 if (wret < 0)
5030 ret = wret;
5031 if (wret != 0)
5032 break;
5034 wret = walk_up_tree_v2(root, &path, &level);
5035 if (wret < 0)
5036 ret = wret;
5037 if (wret != 0)
5038 break;
5041 out:
5042 btrfs_release_path(&path);
5043 return ret;
5047 * Find the relative ref for root_ref and root_backref.
5049 * @root: the root of the root tree.
5050 * @ref_key: the key of the root ref.
5052 * Return 0 if no error occurred.
5054 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5055 struct extent_buffer *node, int slot)
5057 struct btrfs_path path;
5058 struct btrfs_key key;
5059 struct btrfs_root_ref *ref;
5060 struct btrfs_root_ref *backref;
5061 char ref_name[BTRFS_NAME_LEN] = {0};
5062 char backref_name[BTRFS_NAME_LEN] = {0};
5063 u64 ref_dirid;
5064 u64 ref_seq;
5065 u32 ref_namelen;
5066 u64 backref_dirid;
5067 u64 backref_seq;
5068 u32 backref_namelen;
5069 u32 len;
5070 int ret;
5071 int err = 0;
5073 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5074 ref_dirid = btrfs_root_ref_dirid(node, ref);
5075 ref_seq = btrfs_root_ref_sequence(node, ref);
5076 ref_namelen = btrfs_root_ref_name_len(node, ref);
5078 if (ref_namelen <= BTRFS_NAME_LEN) {
5079 len = ref_namelen;
5080 } else {
5081 len = BTRFS_NAME_LEN;
5082 warning("%s[%llu %llu] ref_name too long",
5083 ref_key->type == BTRFS_ROOT_REF_KEY ?
5084 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5085 ref_key->offset);
5087 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5089 /* Find relative root_ref */
5090 key.objectid = ref_key->offset;
5091 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5092 key.offset = ref_key->objectid;
5094 btrfs_init_path(&path);
5095 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5096 if (ret) {
5097 err |= ROOT_REF_MISSING;
5098 error("%s[%llu %llu] couldn't find relative ref",
5099 ref_key->type == BTRFS_ROOT_REF_KEY ?
5100 "ROOT_REF" : "ROOT_BACKREF",
5101 ref_key->objectid, ref_key->offset);
5102 goto out;
5105 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5106 struct btrfs_root_ref);
5107 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5108 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5109 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5111 if (backref_namelen <= BTRFS_NAME_LEN) {
5112 len = backref_namelen;
5113 } else {
5114 len = BTRFS_NAME_LEN;
5115 warning("%s[%llu %llu] ref_name too long",
5116 key.type == BTRFS_ROOT_REF_KEY ?
5117 "ROOT_REF" : "ROOT_BACKREF",
5118 key.objectid, key.offset);
5120 read_extent_buffer(path.nodes[0], backref_name,
5121 (unsigned long)(backref + 1), len);
5123 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5124 ref_namelen != backref_namelen ||
5125 strncmp(ref_name, backref_name, len)) {
5126 err |= ROOT_REF_MISMATCH;
5127 error("%s[%llu %llu] mismatch relative ref",
5128 ref_key->type == BTRFS_ROOT_REF_KEY ?
5129 "ROOT_REF" : "ROOT_BACKREF",
5130 ref_key->objectid, ref_key->offset);
5132 out:
5133 btrfs_release_path(&path);
5134 return err;
5138 * Check all fs/file tree in low_memory mode.
5140 * 1. for fs tree root item, call check_fs_root_v2()
5141 * 2. for fs tree root ref/backref, call check_root_ref()
5143 * Return 0 if no error occurred.
5145 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5147 struct btrfs_root *tree_root = fs_info->tree_root;
5148 struct btrfs_root *cur_root = NULL;
5149 struct btrfs_path path;
5150 struct btrfs_key key;
5151 struct extent_buffer *node;
5152 unsigned int ext_ref;
5153 int slot;
5154 int ret;
5155 int err = 0;
5157 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5159 btrfs_init_path(&path);
5160 key.objectid = BTRFS_FS_TREE_OBJECTID;
5161 key.offset = 0;
5162 key.type = BTRFS_ROOT_ITEM_KEY;
5164 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5165 if (ret < 0) {
5166 err = ret;
5167 goto out;
5168 } else if (ret > 0) {
5169 err = -ENOENT;
5170 goto out;
5173 while (1) {
5174 node = path.nodes[0];
5175 slot = path.slots[0];
5176 btrfs_item_key_to_cpu(node, &key, slot);
5177 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5178 goto out;
5179 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5180 fs_root_objectid(key.objectid)) {
5181 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5182 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5183 &key);
5184 } else {
5185 key.offset = (u64)-1;
5186 cur_root = btrfs_read_fs_root(fs_info, &key);
5189 if (IS_ERR(cur_root)) {
5190 error("Fail to read fs/subvol tree: %lld",
5191 key.objectid);
5192 err = -EIO;
5193 goto next;
5196 ret = check_fs_root_v2(cur_root, ext_ref);
5197 err |= ret;
5199 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5200 btrfs_free_fs_root(cur_root);
5201 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5202 key.type == BTRFS_ROOT_BACKREF_KEY) {
5203 ret = check_root_ref(tree_root, &key, node, slot);
5204 err |= ret;
5206 next:
5207 ret = btrfs_next_item(tree_root, &path);
5208 if (ret > 0)
5209 goto out;
5210 if (ret < 0) {
5211 err = ret;
5212 goto out;
5216 out:
5217 btrfs_release_path(&path);
5218 return err;
5221 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5223 struct list_head *cur = rec->backrefs.next;
5224 struct extent_backref *back;
5225 struct tree_backref *tback;
5226 struct data_backref *dback;
5227 u64 found = 0;
5228 int err = 0;
5230 while(cur != &rec->backrefs) {
5231 back = to_extent_backref(cur);
5232 cur = cur->next;
5233 if (!back->found_extent_tree) {
5234 err = 1;
5235 if (!print_errs)
5236 goto out;
5237 if (back->is_data) {
5238 dback = to_data_backref(back);
5239 fprintf(stderr, "Backref %llu %s %llu"
5240 " owner %llu offset %llu num_refs %lu"
5241 " not found in extent tree\n",
5242 (unsigned long long)rec->start,
5243 back->full_backref ?
5244 "parent" : "root",
5245 back->full_backref ?
5246 (unsigned long long)dback->parent:
5247 (unsigned long long)dback->root,
5248 (unsigned long long)dback->owner,
5249 (unsigned long long)dback->offset,
5250 (unsigned long)dback->num_refs);
5251 } else {
5252 tback = to_tree_backref(back);
5253 fprintf(stderr, "Backref %llu parent %llu"
5254 " root %llu not found in extent tree\n",
5255 (unsigned long long)rec->start,
5256 (unsigned long long)tback->parent,
5257 (unsigned long long)tback->root);
5260 if (!back->is_data && !back->found_ref) {
5261 err = 1;
5262 if (!print_errs)
5263 goto out;
5264 tback = to_tree_backref(back);
5265 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5266 (unsigned long long)rec->start,
5267 back->full_backref ? "parent" : "root",
5268 back->full_backref ?
5269 (unsigned long long)tback->parent :
5270 (unsigned long long)tback->root, back);
5272 if (back->is_data) {
5273 dback = to_data_backref(back);
5274 if (dback->found_ref != dback->num_refs) {
5275 err = 1;
5276 if (!print_errs)
5277 goto out;
5278 fprintf(stderr, "Incorrect local backref count"
5279 " on %llu %s %llu owner %llu"
5280 " offset %llu found %u wanted %u back %p\n",
5281 (unsigned long long)rec->start,
5282 back->full_backref ?
5283 "parent" : "root",
5284 back->full_backref ?
5285 (unsigned long long)dback->parent:
5286 (unsigned long long)dback->root,
5287 (unsigned long long)dback->owner,
5288 (unsigned long long)dback->offset,
5289 dback->found_ref, dback->num_refs, back);
5291 if (dback->disk_bytenr != rec->start) {
5292 err = 1;
5293 if (!print_errs)
5294 goto out;
5295 fprintf(stderr, "Backref disk bytenr does not"
5296 " match extent record, bytenr=%llu, "
5297 "ref bytenr=%llu\n",
5298 (unsigned long long)rec->start,
5299 (unsigned long long)dback->disk_bytenr);
5302 if (dback->bytes != rec->nr) {
5303 err = 1;
5304 if (!print_errs)
5305 goto out;
5306 fprintf(stderr, "Backref bytes do not match "
5307 "extent backref, bytenr=%llu, ref "
5308 "bytes=%llu, backref bytes=%llu\n",
5309 (unsigned long long)rec->start,
5310 (unsigned long long)rec->nr,
5311 (unsigned long long)dback->bytes);
5314 if (!back->is_data) {
5315 found += 1;
5316 } else {
5317 dback = to_data_backref(back);
5318 found += dback->found_ref;
5321 if (found != rec->refs) {
5322 err = 1;
5323 if (!print_errs)
5324 goto out;
5325 fprintf(stderr, "Incorrect global backref count "
5326 "on %llu found %llu wanted %llu\n",
5327 (unsigned long long)rec->start,
5328 (unsigned long long)found,
5329 (unsigned long long)rec->refs);
5331 out:
5332 return err;
5335 static int free_all_extent_backrefs(struct extent_record *rec)
5337 struct extent_backref *back;
5338 struct list_head *cur;
5339 while (!list_empty(&rec->backrefs)) {
5340 cur = rec->backrefs.next;
5341 back = to_extent_backref(cur);
5342 list_del(cur);
5343 free(back);
5345 return 0;
5348 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5349 struct cache_tree *extent_cache)
5351 struct cache_extent *cache;
5352 struct extent_record *rec;
5354 while (1) {
5355 cache = first_cache_extent(extent_cache);
5356 if (!cache)
5357 break;
5358 rec = container_of(cache, struct extent_record, cache);
5359 remove_cache_extent(extent_cache, cache);
5360 free_all_extent_backrefs(rec);
5361 free(rec);
5365 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5366 struct extent_record *rec)
5368 if (rec->content_checked && rec->owner_ref_checked &&
5369 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5370 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5371 !rec->bad_full_backref && !rec->crossing_stripes &&
5372 !rec->wrong_chunk_type) {
5373 remove_cache_extent(extent_cache, &rec->cache);
5374 free_all_extent_backrefs(rec);
5375 list_del_init(&rec->list);
5376 free(rec);
5378 return 0;
5381 static int check_owner_ref(struct btrfs_root *root,
5382 struct extent_record *rec,
5383 struct extent_buffer *buf)
5385 struct extent_backref *node;
5386 struct tree_backref *back;
5387 struct btrfs_root *ref_root;
5388 struct btrfs_key key;
5389 struct btrfs_path path;
5390 struct extent_buffer *parent;
5391 int level;
5392 int found = 0;
5393 int ret;
5395 list_for_each_entry(node, &rec->backrefs, list) {
5396 if (node->is_data)
5397 continue;
5398 if (!node->found_ref)
5399 continue;
5400 if (node->full_backref)
5401 continue;
5402 back = to_tree_backref(node);
5403 if (btrfs_header_owner(buf) == back->root)
5404 return 0;
5406 BUG_ON(rec->is_root);
5408 /* try to find the block by search corresponding fs tree */
5409 key.objectid = btrfs_header_owner(buf);
5410 key.type = BTRFS_ROOT_ITEM_KEY;
5411 key.offset = (u64)-1;
5413 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5414 if (IS_ERR(ref_root))
5415 return 1;
5417 level = btrfs_header_level(buf);
5418 if (level == 0)
5419 btrfs_item_key_to_cpu(buf, &key, 0);
5420 else
5421 btrfs_node_key_to_cpu(buf, &key, 0);
5423 btrfs_init_path(&path);
5424 path.lowest_level = level + 1;
5425 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5426 if (ret < 0)
5427 return 0;
5429 parent = path.nodes[level + 1];
5430 if (parent && buf->start == btrfs_node_blockptr(parent,
5431 path.slots[level + 1]))
5432 found = 1;
5434 btrfs_release_path(&path);
5435 return found ? 0 : 1;
5438 static int is_extent_tree_record(struct extent_record *rec)
5440 struct list_head *cur = rec->backrefs.next;
5441 struct extent_backref *node;
5442 struct tree_backref *back;
5443 int is_extent = 0;
5445 while(cur != &rec->backrefs) {
5446 node = to_extent_backref(cur);
5447 cur = cur->next;
5448 if (node->is_data)
5449 return 0;
5450 back = to_tree_backref(node);
5451 if (node->full_backref)
5452 return 0;
5453 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5454 is_extent = 1;
5456 return is_extent;
5460 static int record_bad_block_io(struct btrfs_fs_info *info,
5461 struct cache_tree *extent_cache,
5462 u64 start, u64 len)
5464 struct extent_record *rec;
5465 struct cache_extent *cache;
5466 struct btrfs_key key;
5468 cache = lookup_cache_extent(extent_cache, start, len);
5469 if (!cache)
5470 return 0;
5472 rec = container_of(cache, struct extent_record, cache);
5473 if (!is_extent_tree_record(rec))
5474 return 0;
5476 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5477 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5480 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5481 struct extent_buffer *buf, int slot)
5483 if (btrfs_header_level(buf)) {
5484 struct btrfs_key_ptr ptr1, ptr2;
5486 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5487 sizeof(struct btrfs_key_ptr));
5488 read_extent_buffer(buf, &ptr2,
5489 btrfs_node_key_ptr_offset(slot + 1),
5490 sizeof(struct btrfs_key_ptr));
5491 write_extent_buffer(buf, &ptr1,
5492 btrfs_node_key_ptr_offset(slot + 1),
5493 sizeof(struct btrfs_key_ptr));
5494 write_extent_buffer(buf, &ptr2,
5495 btrfs_node_key_ptr_offset(slot),
5496 sizeof(struct btrfs_key_ptr));
5497 if (slot == 0) {
5498 struct btrfs_disk_key key;
5499 btrfs_node_key(buf, &key, 0);
5500 btrfs_fixup_low_keys(root, path, &key,
5501 btrfs_header_level(buf) + 1);
5503 } else {
5504 struct btrfs_item *item1, *item2;
5505 struct btrfs_key k1, k2;
5506 char *item1_data, *item2_data;
5507 u32 item1_offset, item2_offset, item1_size, item2_size;
5509 item1 = btrfs_item_nr(slot);
5510 item2 = btrfs_item_nr(slot + 1);
5511 btrfs_item_key_to_cpu(buf, &k1, slot);
5512 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5513 item1_offset = btrfs_item_offset(buf, item1);
5514 item2_offset = btrfs_item_offset(buf, item2);
5515 item1_size = btrfs_item_size(buf, item1);
5516 item2_size = btrfs_item_size(buf, item2);
5518 item1_data = malloc(item1_size);
5519 if (!item1_data)
5520 return -ENOMEM;
5521 item2_data = malloc(item2_size);
5522 if (!item2_data) {
5523 free(item1_data);
5524 return -ENOMEM;
5527 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5528 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5530 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5531 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5532 free(item1_data);
5533 free(item2_data);
5535 btrfs_set_item_offset(buf, item1, item2_offset);
5536 btrfs_set_item_offset(buf, item2, item1_offset);
5537 btrfs_set_item_size(buf, item1, item2_size);
5538 btrfs_set_item_size(buf, item2, item1_size);
5540 path->slots[0] = slot;
5541 btrfs_set_item_key_unsafe(root, path, &k2);
5542 path->slots[0] = slot + 1;
5543 btrfs_set_item_key_unsafe(root, path, &k1);
5545 return 0;
5548 static int fix_key_order(struct btrfs_trans_handle *trans,
5549 struct btrfs_root *root,
5550 struct btrfs_path *path)
5552 struct extent_buffer *buf;
5553 struct btrfs_key k1, k2;
5554 int i;
5555 int level = path->lowest_level;
5556 int ret = -EIO;
5558 buf = path->nodes[level];
5559 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5560 if (level) {
5561 btrfs_node_key_to_cpu(buf, &k1, i);
5562 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5563 } else {
5564 btrfs_item_key_to_cpu(buf, &k1, i);
5565 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5567 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5568 continue;
5569 ret = swap_values(root, path, buf, i);
5570 if (ret)
5571 break;
5572 btrfs_mark_buffer_dirty(buf);
5573 i = 0;
5575 return ret;
5578 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5579 struct btrfs_root *root,
5580 struct btrfs_path *path,
5581 struct extent_buffer *buf, int slot)
5583 struct btrfs_key key;
5584 int nritems = btrfs_header_nritems(buf);
5586 btrfs_item_key_to_cpu(buf, &key, slot);
5588 /* These are all the keys we can deal with missing. */
5589 if (key.type != BTRFS_DIR_INDEX_KEY &&
5590 key.type != BTRFS_EXTENT_ITEM_KEY &&
5591 key.type != BTRFS_METADATA_ITEM_KEY &&
5592 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5593 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5594 return -1;
5596 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5597 (unsigned long long)key.objectid, key.type,
5598 (unsigned long long)key.offset, slot, buf->start);
5599 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5600 btrfs_item_nr_offset(slot + 1),
5601 sizeof(struct btrfs_item) *
5602 (nritems - slot - 1));
5603 btrfs_set_header_nritems(buf, nritems - 1);
5604 if (slot == 0) {
5605 struct btrfs_disk_key disk_key;
5607 btrfs_item_key(buf, &disk_key, 0);
5608 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5610 btrfs_mark_buffer_dirty(buf);
5611 return 0;
5614 static int fix_item_offset(struct btrfs_trans_handle *trans,
5615 struct btrfs_root *root,
5616 struct btrfs_path *path)
5618 struct extent_buffer *buf;
5619 int i;
5620 int ret = 0;
5622 /* We should only get this for leaves */
5623 BUG_ON(path->lowest_level);
5624 buf = path->nodes[0];
5625 again:
5626 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5627 unsigned int shift = 0, offset;
5629 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5630 BTRFS_LEAF_DATA_SIZE(root)) {
5631 if (btrfs_item_end_nr(buf, i) >
5632 BTRFS_LEAF_DATA_SIZE(root)) {
5633 ret = delete_bogus_item(trans, root, path,
5634 buf, i);
5635 if (!ret)
5636 goto again;
5637 fprintf(stderr, "item is off the end of the "
5638 "leaf, can't fix\n");
5639 ret = -EIO;
5640 break;
5642 shift = BTRFS_LEAF_DATA_SIZE(root) -
5643 btrfs_item_end_nr(buf, i);
5644 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5645 btrfs_item_offset_nr(buf, i - 1)) {
5646 if (btrfs_item_end_nr(buf, i) >
5647 btrfs_item_offset_nr(buf, i - 1)) {
5648 ret = delete_bogus_item(trans, root, path,
5649 buf, i);
5650 if (!ret)
5651 goto again;
5652 fprintf(stderr, "items overlap, can't fix\n");
5653 ret = -EIO;
5654 break;
5656 shift = btrfs_item_offset_nr(buf, i - 1) -
5657 btrfs_item_end_nr(buf, i);
5659 if (!shift)
5660 continue;
5662 printf("Shifting item nr %d by %u bytes in block %llu\n",
5663 i, shift, (unsigned long long)buf->start);
5664 offset = btrfs_item_offset_nr(buf, i);
5665 memmove_extent_buffer(buf,
5666 btrfs_leaf_data(buf) + offset + shift,
5667 btrfs_leaf_data(buf) + offset,
5668 btrfs_item_size_nr(buf, i));
5669 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5670 offset + shift);
5671 btrfs_mark_buffer_dirty(buf);
5675 * We may have moved things, in which case we want to exit so we don't
5676 * write those changes out. Once we have proper abort functionality in
5677 * progs this can be changed to something nicer.
5679 BUG_ON(ret);
5680 return ret;
5684 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5685 * then just return -EIO.
5687 static int try_to_fix_bad_block(struct btrfs_root *root,
5688 struct extent_buffer *buf,
5689 enum btrfs_tree_block_status status)
5691 struct btrfs_trans_handle *trans;
5692 struct ulist *roots;
5693 struct ulist_node *node;
5694 struct btrfs_root *search_root;
5695 struct btrfs_path path;
5696 struct ulist_iterator iter;
5697 struct btrfs_key root_key, key;
5698 int ret;
5700 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5701 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5702 return -EIO;
5704 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5705 if (ret)
5706 return -EIO;
5708 btrfs_init_path(&path);
5709 ULIST_ITER_INIT(&iter);
5710 while ((node = ulist_next(roots, &iter))) {
5711 root_key.objectid = node->val;
5712 root_key.type = BTRFS_ROOT_ITEM_KEY;
5713 root_key.offset = (u64)-1;
5715 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5716 if (IS_ERR(root)) {
5717 ret = -EIO;
5718 break;
5722 trans = btrfs_start_transaction(search_root, 0);
5723 if (IS_ERR(trans)) {
5724 ret = PTR_ERR(trans);
5725 break;
5728 path.lowest_level = btrfs_header_level(buf);
5729 path.skip_check_block = 1;
5730 if (path.lowest_level)
5731 btrfs_node_key_to_cpu(buf, &key, 0);
5732 else
5733 btrfs_item_key_to_cpu(buf, &key, 0);
5734 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5735 if (ret) {
5736 ret = -EIO;
5737 btrfs_commit_transaction(trans, search_root);
5738 break;
5740 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5741 ret = fix_key_order(trans, search_root, &path);
5742 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5743 ret = fix_item_offset(trans, search_root, &path);
5744 if (ret) {
5745 btrfs_commit_transaction(trans, search_root);
5746 break;
5748 btrfs_release_path(&path);
5749 btrfs_commit_transaction(trans, search_root);
5751 ulist_free(roots);
5752 btrfs_release_path(&path);
5753 return ret;
5756 static int check_block(struct btrfs_root *root,
5757 struct cache_tree *extent_cache,
5758 struct extent_buffer *buf, u64 flags)
5760 struct extent_record *rec;
5761 struct cache_extent *cache;
5762 struct btrfs_key key;
5763 enum btrfs_tree_block_status status;
5764 int ret = 0;
5765 int level;
5767 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5768 if (!cache)
5769 return 1;
5770 rec = container_of(cache, struct extent_record, cache);
5771 rec->generation = btrfs_header_generation(buf);
5773 level = btrfs_header_level(buf);
5774 if (btrfs_header_nritems(buf) > 0) {
5776 if (level == 0)
5777 btrfs_item_key_to_cpu(buf, &key, 0);
5778 else
5779 btrfs_node_key_to_cpu(buf, &key, 0);
5781 rec->info_objectid = key.objectid;
5783 rec->info_level = level;
5785 if (btrfs_is_leaf(buf))
5786 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5787 else
5788 status = btrfs_check_node(root, &rec->parent_key, buf);
5790 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5791 if (repair)
5792 status = try_to_fix_bad_block(root, buf, status);
5793 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5794 ret = -EIO;
5795 fprintf(stderr, "bad block %llu\n",
5796 (unsigned long long)buf->start);
5797 } else {
5799 * Signal to callers we need to start the scan over
5800 * again since we'll have cowed blocks.
5802 ret = -EAGAIN;
5804 } else {
5805 rec->content_checked = 1;
5806 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5807 rec->owner_ref_checked = 1;
5808 else {
5809 ret = check_owner_ref(root, rec, buf);
5810 if (!ret)
5811 rec->owner_ref_checked = 1;
5814 if (!ret)
5815 maybe_free_extent_rec(extent_cache, rec);
5816 return ret;
5819 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5820 u64 parent, u64 root)
5822 struct list_head *cur = rec->backrefs.next;
5823 struct extent_backref *node;
5824 struct tree_backref *back;
5826 while(cur != &rec->backrefs) {
5827 node = to_extent_backref(cur);
5828 cur = cur->next;
5829 if (node->is_data)
5830 continue;
5831 back = to_tree_backref(node);
5832 if (parent > 0) {
5833 if (!node->full_backref)
5834 continue;
5835 if (parent == back->parent)
5836 return back;
5837 } else {
5838 if (node->full_backref)
5839 continue;
5840 if (back->root == root)
5841 return back;
5844 return NULL;
5847 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5848 u64 parent, u64 root)
5850 struct tree_backref *ref = malloc(sizeof(*ref));
5852 if (!ref)
5853 return NULL;
5854 memset(&ref->node, 0, sizeof(ref->node));
5855 if (parent > 0) {
5856 ref->parent = parent;
5857 ref->node.full_backref = 1;
5858 } else {
5859 ref->root = root;
5860 ref->node.full_backref = 0;
5862 list_add_tail(&ref->node.list, &rec->backrefs);
5864 return ref;
5867 static struct data_backref *find_data_backref(struct extent_record *rec,
5868 u64 parent, u64 root,
5869 u64 owner, u64 offset,
5870 int found_ref,
5871 u64 disk_bytenr, u64 bytes)
5873 struct list_head *cur = rec->backrefs.next;
5874 struct extent_backref *node;
5875 struct data_backref *back;
5877 while(cur != &rec->backrefs) {
5878 node = to_extent_backref(cur);
5879 cur = cur->next;
5880 if (!node->is_data)
5881 continue;
5882 back = to_data_backref(node);
5883 if (parent > 0) {
5884 if (!node->full_backref)
5885 continue;
5886 if (parent == back->parent)
5887 return back;
5888 } else {
5889 if (node->full_backref)
5890 continue;
5891 if (back->root == root && back->owner == owner &&
5892 back->offset == offset) {
5893 if (found_ref && node->found_ref &&
5894 (back->bytes != bytes ||
5895 back->disk_bytenr != disk_bytenr))
5896 continue;
5897 return back;
5901 return NULL;
5904 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5905 u64 parent, u64 root,
5906 u64 owner, u64 offset,
5907 u64 max_size)
5909 struct data_backref *ref = malloc(sizeof(*ref));
5911 if (!ref)
5912 return NULL;
5913 memset(&ref->node, 0, sizeof(ref->node));
5914 ref->node.is_data = 1;
5916 if (parent > 0) {
5917 ref->parent = parent;
5918 ref->owner = 0;
5919 ref->offset = 0;
5920 ref->node.full_backref = 1;
5921 } else {
5922 ref->root = root;
5923 ref->owner = owner;
5924 ref->offset = offset;
5925 ref->node.full_backref = 0;
5927 ref->bytes = max_size;
5928 ref->found_ref = 0;
5929 ref->num_refs = 0;
5930 list_add_tail(&ref->node.list, &rec->backrefs);
5931 if (max_size > rec->max_size)
5932 rec->max_size = max_size;
5933 return ref;
5936 /* Check if the type of extent matches with its chunk */
5937 static void check_extent_type(struct extent_record *rec)
5939 struct btrfs_block_group_cache *bg_cache;
5941 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5942 if (!bg_cache)
5943 return;
5945 /* data extent, check chunk directly*/
5946 if (!rec->metadata) {
5947 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5948 rec->wrong_chunk_type = 1;
5949 return;
5952 /* metadata extent, check the obvious case first */
5953 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5954 BTRFS_BLOCK_GROUP_METADATA))) {
5955 rec->wrong_chunk_type = 1;
5956 return;
5960 * Check SYSTEM extent, as it's also marked as metadata, we can only
5961 * make sure it's a SYSTEM extent by its backref
5963 if (!list_empty(&rec->backrefs)) {
5964 struct extent_backref *node;
5965 struct tree_backref *tback;
5966 u64 bg_type;
5968 node = to_extent_backref(rec->backrefs.next);
5969 if (node->is_data) {
5970 /* tree block shouldn't have data backref */
5971 rec->wrong_chunk_type = 1;
5972 return;
5974 tback = container_of(node, struct tree_backref, node);
5976 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5977 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5978 else
5979 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5980 if (!(bg_cache->flags & bg_type))
5981 rec->wrong_chunk_type = 1;
5986 * Allocate a new extent record, fill default values from @tmpl and insert int
5987 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5988 * the cache, otherwise it fails.
5990 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5991 struct extent_record *tmpl)
5993 struct extent_record *rec;
5994 int ret = 0;
5996 rec = malloc(sizeof(*rec));
5997 if (!rec)
5998 return -ENOMEM;
5999 rec->start = tmpl->start;
6000 rec->max_size = tmpl->max_size;
6001 rec->nr = max(tmpl->nr, tmpl->max_size);
6002 rec->found_rec = tmpl->found_rec;
6003 rec->content_checked = tmpl->content_checked;
6004 rec->owner_ref_checked = tmpl->owner_ref_checked;
6005 rec->num_duplicates = 0;
6006 rec->metadata = tmpl->metadata;
6007 rec->flag_block_full_backref = FLAG_UNSET;
6008 rec->bad_full_backref = 0;
6009 rec->crossing_stripes = 0;
6010 rec->wrong_chunk_type = 0;
6011 rec->is_root = tmpl->is_root;
6012 rec->refs = tmpl->refs;
6013 rec->extent_item_refs = tmpl->extent_item_refs;
6014 rec->parent_generation = tmpl->parent_generation;
6015 INIT_LIST_HEAD(&rec->backrefs);
6016 INIT_LIST_HEAD(&rec->dups);
6017 INIT_LIST_HEAD(&rec->list);
6018 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6019 rec->cache.start = tmpl->start;
6020 rec->cache.size = tmpl->nr;
6021 ret = insert_cache_extent(extent_cache, &rec->cache);
6022 if (ret) {
6023 free(rec);
6024 return ret;
6026 bytes_used += rec->nr;
6028 if (tmpl->metadata)
6029 rec->crossing_stripes = check_crossing_stripes(global_info,
6030 rec->start, global_info->tree_root->nodesize);
6031 check_extent_type(rec);
6032 return ret;
6036 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6037 * some are hints:
6038 * - refs - if found, increase refs
6039 * - is_root - if found, set
6040 * - content_checked - if found, set
6041 * - owner_ref_checked - if found, set
6043 * If not found, create a new one, initialize and insert.
6045 static int add_extent_rec(struct cache_tree *extent_cache,
6046 struct extent_record *tmpl)
6048 struct extent_record *rec;
6049 struct cache_extent *cache;
6050 int ret = 0;
6051 int dup = 0;
6053 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6054 if (cache) {
6055 rec = container_of(cache, struct extent_record, cache);
6056 if (tmpl->refs)
6057 rec->refs++;
6058 if (rec->nr == 1)
6059 rec->nr = max(tmpl->nr, tmpl->max_size);
6062 * We need to make sure to reset nr to whatever the extent
6063 * record says was the real size, this way we can compare it to
6064 * the backrefs.
6066 if (tmpl->found_rec) {
6067 if (tmpl->start != rec->start || rec->found_rec) {
6068 struct extent_record *tmp;
6070 dup = 1;
6071 if (list_empty(&rec->list))
6072 list_add_tail(&rec->list,
6073 &duplicate_extents);
6076 * We have to do this song and dance in case we
6077 * find an extent record that falls inside of
6078 * our current extent record but does not have
6079 * the same objectid.
6081 tmp = malloc(sizeof(*tmp));
6082 if (!tmp)
6083 return -ENOMEM;
6084 tmp->start = tmpl->start;
6085 tmp->max_size = tmpl->max_size;
6086 tmp->nr = tmpl->nr;
6087 tmp->found_rec = 1;
6088 tmp->metadata = tmpl->metadata;
6089 tmp->extent_item_refs = tmpl->extent_item_refs;
6090 INIT_LIST_HEAD(&tmp->list);
6091 list_add_tail(&tmp->list, &rec->dups);
6092 rec->num_duplicates++;
6093 } else {
6094 rec->nr = tmpl->nr;
6095 rec->found_rec = 1;
6099 if (tmpl->extent_item_refs && !dup) {
6100 if (rec->extent_item_refs) {
6101 fprintf(stderr, "block %llu rec "
6102 "extent_item_refs %llu, passed %llu\n",
6103 (unsigned long long)tmpl->start,
6104 (unsigned long long)
6105 rec->extent_item_refs,
6106 (unsigned long long)tmpl->extent_item_refs);
6108 rec->extent_item_refs = tmpl->extent_item_refs;
6110 if (tmpl->is_root)
6111 rec->is_root = 1;
6112 if (tmpl->content_checked)
6113 rec->content_checked = 1;
6114 if (tmpl->owner_ref_checked)
6115 rec->owner_ref_checked = 1;
6116 memcpy(&rec->parent_key, &tmpl->parent_key,
6117 sizeof(tmpl->parent_key));
6118 if (tmpl->parent_generation)
6119 rec->parent_generation = tmpl->parent_generation;
6120 if (rec->max_size < tmpl->max_size)
6121 rec->max_size = tmpl->max_size;
6124 * A metadata extent can't cross stripe_len boundary, otherwise
6125 * kernel scrub won't be able to handle it.
6126 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6127 * it.
6129 if (tmpl->metadata)
6130 rec->crossing_stripes = check_crossing_stripes(
6131 global_info, rec->start,
6132 global_info->tree_root->nodesize);
6133 check_extent_type(rec);
6134 maybe_free_extent_rec(extent_cache, rec);
6135 return ret;
6138 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6140 return ret;
6143 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6144 u64 parent, u64 root, int found_ref)
6146 struct extent_record *rec;
6147 struct tree_backref *back;
6148 struct cache_extent *cache;
6149 int ret;
6151 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6152 if (!cache) {
6153 struct extent_record tmpl;
6155 memset(&tmpl, 0, sizeof(tmpl));
6156 tmpl.start = bytenr;
6157 tmpl.nr = 1;
6158 tmpl.metadata = 1;
6160 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6161 if (ret)
6162 return ret;
6164 /* really a bug in cache_extent implement now */
6165 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6166 if (!cache)
6167 return -ENOENT;
6170 rec = container_of(cache, struct extent_record, cache);
6171 if (rec->start != bytenr) {
6173 * Several cause, from unaligned bytenr to over lapping extents
6175 return -EEXIST;
6178 back = find_tree_backref(rec, parent, root);
6179 if (!back) {
6180 back = alloc_tree_backref(rec, parent, root);
6181 if (!back)
6182 return -ENOMEM;
6185 if (found_ref) {
6186 if (back->node.found_ref) {
6187 fprintf(stderr, "Extent back ref already exists "
6188 "for %llu parent %llu root %llu \n",
6189 (unsigned long long)bytenr,
6190 (unsigned long long)parent,
6191 (unsigned long long)root);
6193 back->node.found_ref = 1;
6194 } else {
6195 if (back->node.found_extent_tree) {
6196 fprintf(stderr, "Extent back ref already exists "
6197 "for %llu parent %llu root %llu \n",
6198 (unsigned long long)bytenr,
6199 (unsigned long long)parent,
6200 (unsigned long long)root);
6202 back->node.found_extent_tree = 1;
6204 check_extent_type(rec);
6205 maybe_free_extent_rec(extent_cache, rec);
6206 return 0;
6209 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6210 u64 parent, u64 root, u64 owner, u64 offset,
6211 u32 num_refs, int found_ref, u64 max_size)
6213 struct extent_record *rec;
6214 struct data_backref *back;
6215 struct cache_extent *cache;
6216 int ret;
6218 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6219 if (!cache) {
6220 struct extent_record tmpl;
6222 memset(&tmpl, 0, sizeof(tmpl));
6223 tmpl.start = bytenr;
6224 tmpl.nr = 1;
6225 tmpl.max_size = max_size;
6227 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6228 if (ret)
6229 return ret;
6231 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6232 if (!cache)
6233 abort();
6236 rec = container_of(cache, struct extent_record, cache);
6237 if (rec->max_size < max_size)
6238 rec->max_size = max_size;
6241 * If found_ref is set then max_size is the real size and must match the
6242 * existing refs. So if we have already found a ref then we need to
6243 * make sure that this ref matches the existing one, otherwise we need
6244 * to add a new backref so we can notice that the backrefs don't match
6245 * and we need to figure out who is telling the truth. This is to
6246 * account for that awful fsync bug I introduced where we'd end up with
6247 * a btrfs_file_extent_item that would have its length include multiple
6248 * prealloc extents or point inside of a prealloc extent.
6250 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6251 bytenr, max_size);
6252 if (!back) {
6253 back = alloc_data_backref(rec, parent, root, owner, offset,
6254 max_size);
6255 BUG_ON(!back);
6258 if (found_ref) {
6259 BUG_ON(num_refs != 1);
6260 if (back->node.found_ref)
6261 BUG_ON(back->bytes != max_size);
6262 back->node.found_ref = 1;
6263 back->found_ref += 1;
6264 back->bytes = max_size;
6265 back->disk_bytenr = bytenr;
6266 rec->refs += 1;
6267 rec->content_checked = 1;
6268 rec->owner_ref_checked = 1;
6269 } else {
6270 if (back->node.found_extent_tree) {
6271 fprintf(stderr, "Extent back ref already exists "
6272 "for %llu parent %llu root %llu "
6273 "owner %llu offset %llu num_refs %lu\n",
6274 (unsigned long long)bytenr,
6275 (unsigned long long)parent,
6276 (unsigned long long)root,
6277 (unsigned long long)owner,
6278 (unsigned long long)offset,
6279 (unsigned long)num_refs);
6281 back->num_refs = num_refs;
6282 back->node.found_extent_tree = 1;
6284 maybe_free_extent_rec(extent_cache, rec);
6285 return 0;
6288 static int add_pending(struct cache_tree *pending,
6289 struct cache_tree *seen, u64 bytenr, u32 size)
6291 int ret;
6292 ret = add_cache_extent(seen, bytenr, size);
6293 if (ret)
6294 return ret;
6295 add_cache_extent(pending, bytenr, size);
6296 return 0;
6299 static int pick_next_pending(struct cache_tree *pending,
6300 struct cache_tree *reada,
6301 struct cache_tree *nodes,
6302 u64 last, struct block_info *bits, int bits_nr,
6303 int *reada_bits)
6305 unsigned long node_start = last;
6306 struct cache_extent *cache;
6307 int ret;
6309 cache = search_cache_extent(reada, 0);
6310 if (cache) {
6311 bits[0].start = cache->start;
6312 bits[0].size = cache->size;
6313 *reada_bits = 1;
6314 return 1;
6316 *reada_bits = 0;
6317 if (node_start > 32768)
6318 node_start -= 32768;
6320 cache = search_cache_extent(nodes, node_start);
6321 if (!cache)
6322 cache = search_cache_extent(nodes, 0);
6324 if (!cache) {
6325 cache = search_cache_extent(pending, 0);
6326 if (!cache)
6327 return 0;
6328 ret = 0;
6329 do {
6330 bits[ret].start = cache->start;
6331 bits[ret].size = cache->size;
6332 cache = next_cache_extent(cache);
6333 ret++;
6334 } while (cache && ret < bits_nr);
6335 return ret;
6338 ret = 0;
6339 do {
6340 bits[ret].start = cache->start;
6341 bits[ret].size = cache->size;
6342 cache = next_cache_extent(cache);
6343 ret++;
6344 } while (cache && ret < bits_nr);
6346 if (bits_nr - ret > 8) {
6347 u64 lookup = bits[0].start + bits[0].size;
6348 struct cache_extent *next;
6349 next = search_cache_extent(pending, lookup);
6350 while(next) {
6351 if (next->start - lookup > 32768)
6352 break;
6353 bits[ret].start = next->start;
6354 bits[ret].size = next->size;
6355 lookup = next->start + next->size;
6356 ret++;
6357 if (ret == bits_nr)
6358 break;
6359 next = next_cache_extent(next);
6360 if (!next)
6361 break;
6364 return ret;
6367 static void free_chunk_record(struct cache_extent *cache)
6369 struct chunk_record *rec;
6371 rec = container_of(cache, struct chunk_record, cache);
6372 list_del_init(&rec->list);
6373 list_del_init(&rec->dextents);
6374 free(rec);
6377 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6379 cache_tree_free_extents(chunk_cache, free_chunk_record);
6382 static void free_device_record(struct rb_node *node)
6384 struct device_record *rec;
6386 rec = container_of(node, struct device_record, node);
6387 free(rec);
6390 FREE_RB_BASED_TREE(device_cache, free_device_record);
6392 int insert_block_group_record(struct block_group_tree *tree,
6393 struct block_group_record *bg_rec)
6395 int ret;
6397 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6398 if (ret)
6399 return ret;
6401 list_add_tail(&bg_rec->list, &tree->block_groups);
6402 return 0;
6405 static void free_block_group_record(struct cache_extent *cache)
6407 struct block_group_record *rec;
6409 rec = container_of(cache, struct block_group_record, cache);
6410 list_del_init(&rec->list);
6411 free(rec);
6414 void free_block_group_tree(struct block_group_tree *tree)
6416 cache_tree_free_extents(&tree->tree, free_block_group_record);
6419 int insert_device_extent_record(struct device_extent_tree *tree,
6420 struct device_extent_record *de_rec)
6422 int ret;
6425 * Device extent is a bit different from the other extents, because
6426 * the extents which belong to the different devices may have the
6427 * same start and size, so we need use the special extent cache
6428 * search/insert functions.
6430 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6431 if (ret)
6432 return ret;
6434 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6435 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6436 return 0;
6439 static void free_device_extent_record(struct cache_extent *cache)
6441 struct device_extent_record *rec;
6443 rec = container_of(cache, struct device_extent_record, cache);
6444 if (!list_empty(&rec->chunk_list))
6445 list_del_init(&rec->chunk_list);
6446 if (!list_empty(&rec->device_list))
6447 list_del_init(&rec->device_list);
6448 free(rec);
6451 void free_device_extent_tree(struct device_extent_tree *tree)
6453 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6456 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6457 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6458 struct extent_buffer *leaf, int slot)
6460 struct btrfs_extent_ref_v0 *ref0;
6461 struct btrfs_key key;
6462 int ret;
6464 btrfs_item_key_to_cpu(leaf, &key, slot);
6465 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6466 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6467 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6468 0, 0);
6469 } else {
6470 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6471 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6473 return ret;
6475 #endif
6477 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6478 struct btrfs_key *key,
6479 int slot)
6481 struct btrfs_chunk *ptr;
6482 struct chunk_record *rec;
6483 int num_stripes, i;
6485 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6486 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6488 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6489 if (!rec) {
6490 fprintf(stderr, "memory allocation failed\n");
6491 exit(-1);
6494 INIT_LIST_HEAD(&rec->list);
6495 INIT_LIST_HEAD(&rec->dextents);
6496 rec->bg_rec = NULL;
6498 rec->cache.start = key->offset;
6499 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6501 rec->generation = btrfs_header_generation(leaf);
6503 rec->objectid = key->objectid;
6504 rec->type = key->type;
6505 rec->offset = key->offset;
6507 rec->length = rec->cache.size;
6508 rec->owner = btrfs_chunk_owner(leaf, ptr);
6509 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6510 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6511 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6512 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6513 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6514 rec->num_stripes = num_stripes;
6515 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6517 for (i = 0; i < rec->num_stripes; ++i) {
6518 rec->stripes[i].devid =
6519 btrfs_stripe_devid_nr(leaf, ptr, i);
6520 rec->stripes[i].offset =
6521 btrfs_stripe_offset_nr(leaf, ptr, i);
6522 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6523 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6524 BTRFS_UUID_SIZE);
6527 return rec;
6530 static int process_chunk_item(struct cache_tree *chunk_cache,
6531 struct btrfs_key *key, struct extent_buffer *eb,
6532 int slot)
6534 struct chunk_record *rec;
6535 struct btrfs_chunk *chunk;
6536 int ret = 0;
6538 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6540 * Do extra check for this chunk item,
6542 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6543 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6544 * and owner<->key_type check.
6546 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6547 key->offset);
6548 if (ret < 0) {
6549 error("chunk(%llu, %llu) is not valid, ignore it",
6550 key->offset, btrfs_chunk_length(eb, chunk));
6551 return 0;
6553 rec = btrfs_new_chunk_record(eb, key, slot);
6554 ret = insert_cache_extent(chunk_cache, &rec->cache);
6555 if (ret) {
6556 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6557 rec->offset, rec->length);
6558 free(rec);
6561 return ret;
6564 static int process_device_item(struct rb_root *dev_cache,
6565 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6567 struct btrfs_dev_item *ptr;
6568 struct device_record *rec;
6569 int ret = 0;
6571 ptr = btrfs_item_ptr(eb,
6572 slot, struct btrfs_dev_item);
6574 rec = malloc(sizeof(*rec));
6575 if (!rec) {
6576 fprintf(stderr, "memory allocation failed\n");
6577 return -ENOMEM;
6580 rec->devid = key->offset;
6581 rec->generation = btrfs_header_generation(eb);
6583 rec->objectid = key->objectid;
6584 rec->type = key->type;
6585 rec->offset = key->offset;
6587 rec->devid = btrfs_device_id(eb, ptr);
6588 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6589 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6591 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6592 if (ret) {
6593 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6594 free(rec);
6597 return ret;
6600 struct block_group_record *
6601 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6602 int slot)
6604 struct btrfs_block_group_item *ptr;
6605 struct block_group_record *rec;
6607 rec = calloc(1, sizeof(*rec));
6608 if (!rec) {
6609 fprintf(stderr, "memory allocation failed\n");
6610 exit(-1);
6613 rec->cache.start = key->objectid;
6614 rec->cache.size = key->offset;
6616 rec->generation = btrfs_header_generation(leaf);
6618 rec->objectid = key->objectid;
6619 rec->type = key->type;
6620 rec->offset = key->offset;
6622 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6623 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6625 INIT_LIST_HEAD(&rec->list);
6627 return rec;
6630 static int process_block_group_item(struct block_group_tree *block_group_cache,
6631 struct btrfs_key *key,
6632 struct extent_buffer *eb, int slot)
6634 struct block_group_record *rec;
6635 int ret = 0;
6637 rec = btrfs_new_block_group_record(eb, key, slot);
6638 ret = insert_block_group_record(block_group_cache, rec);
6639 if (ret) {
6640 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6641 rec->objectid, rec->offset);
6642 free(rec);
6645 return ret;
6648 struct device_extent_record *
6649 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6650 struct btrfs_key *key, int slot)
6652 struct device_extent_record *rec;
6653 struct btrfs_dev_extent *ptr;
6655 rec = calloc(1, sizeof(*rec));
6656 if (!rec) {
6657 fprintf(stderr, "memory allocation failed\n");
6658 exit(-1);
6661 rec->cache.objectid = key->objectid;
6662 rec->cache.start = key->offset;
6664 rec->generation = btrfs_header_generation(leaf);
6666 rec->objectid = key->objectid;
6667 rec->type = key->type;
6668 rec->offset = key->offset;
6670 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6671 rec->chunk_objecteid =
6672 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6673 rec->chunk_offset =
6674 btrfs_dev_extent_chunk_offset(leaf, ptr);
6675 rec->length = btrfs_dev_extent_length(leaf, ptr);
6676 rec->cache.size = rec->length;
6678 INIT_LIST_HEAD(&rec->chunk_list);
6679 INIT_LIST_HEAD(&rec->device_list);
6681 return rec;
6684 static int
6685 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6686 struct btrfs_key *key, struct extent_buffer *eb,
6687 int slot)
6689 struct device_extent_record *rec;
6690 int ret;
6692 rec = btrfs_new_device_extent_record(eb, key, slot);
6693 ret = insert_device_extent_record(dev_extent_cache, rec);
6694 if (ret) {
6695 fprintf(stderr,
6696 "Device extent[%llu, %llu, %llu] existed.\n",
6697 rec->objectid, rec->offset, rec->length);
6698 free(rec);
6701 return ret;
6704 static int process_extent_item(struct btrfs_root *root,
6705 struct cache_tree *extent_cache,
6706 struct extent_buffer *eb, int slot)
6708 struct btrfs_extent_item *ei;
6709 struct btrfs_extent_inline_ref *iref;
6710 struct btrfs_extent_data_ref *dref;
6711 struct btrfs_shared_data_ref *sref;
6712 struct btrfs_key key;
6713 struct extent_record tmpl;
6714 unsigned long end;
6715 unsigned long ptr;
6716 int ret;
6717 int type;
6718 u32 item_size = btrfs_item_size_nr(eb, slot);
6719 u64 refs = 0;
6720 u64 offset;
6721 u64 num_bytes;
6722 int metadata = 0;
6724 btrfs_item_key_to_cpu(eb, &key, slot);
6726 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6727 metadata = 1;
6728 num_bytes = root->nodesize;
6729 } else {
6730 num_bytes = key.offset;
6733 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6734 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6735 key.objectid, root->sectorsize);
6736 return -EIO;
6738 if (item_size < sizeof(*ei)) {
6739 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6740 struct btrfs_extent_item_v0 *ei0;
6741 BUG_ON(item_size != sizeof(*ei0));
6742 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6743 refs = btrfs_extent_refs_v0(eb, ei0);
6744 #else
6745 BUG();
6746 #endif
6747 memset(&tmpl, 0, sizeof(tmpl));
6748 tmpl.start = key.objectid;
6749 tmpl.nr = num_bytes;
6750 tmpl.extent_item_refs = refs;
6751 tmpl.metadata = metadata;
6752 tmpl.found_rec = 1;
6753 tmpl.max_size = num_bytes;
6755 return add_extent_rec(extent_cache, &tmpl);
6758 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6759 refs = btrfs_extent_refs(eb, ei);
6760 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6761 metadata = 1;
6762 else
6763 metadata = 0;
6764 if (metadata && num_bytes != root->nodesize) {
6765 error("ignore invalid metadata extent, length %llu does not equal to %u",
6766 num_bytes, root->nodesize);
6767 return -EIO;
6769 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6770 error("ignore invalid data extent, length %llu is not aligned to %u",
6771 num_bytes, root->sectorsize);
6772 return -EIO;
6775 memset(&tmpl, 0, sizeof(tmpl));
6776 tmpl.start = key.objectid;
6777 tmpl.nr = num_bytes;
6778 tmpl.extent_item_refs = refs;
6779 tmpl.metadata = metadata;
6780 tmpl.found_rec = 1;
6781 tmpl.max_size = num_bytes;
6782 add_extent_rec(extent_cache, &tmpl);
6784 ptr = (unsigned long)(ei + 1);
6785 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6786 key.type == BTRFS_EXTENT_ITEM_KEY)
6787 ptr += sizeof(struct btrfs_tree_block_info);
6789 end = (unsigned long)ei + item_size;
6790 while (ptr < end) {
6791 iref = (struct btrfs_extent_inline_ref *)ptr;
6792 type = btrfs_extent_inline_ref_type(eb, iref);
6793 offset = btrfs_extent_inline_ref_offset(eb, iref);
6794 switch (type) {
6795 case BTRFS_TREE_BLOCK_REF_KEY:
6796 ret = add_tree_backref(extent_cache, key.objectid,
6797 0, offset, 0);
6798 if (ret < 0)
6799 error("add_tree_backref failed: %s",
6800 strerror(-ret));
6801 break;
6802 case BTRFS_SHARED_BLOCK_REF_KEY:
6803 ret = add_tree_backref(extent_cache, key.objectid,
6804 offset, 0, 0);
6805 if (ret < 0)
6806 error("add_tree_backref failed: %s",
6807 strerror(-ret));
6808 break;
6809 case BTRFS_EXTENT_DATA_REF_KEY:
6810 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6811 add_data_backref(extent_cache, key.objectid, 0,
6812 btrfs_extent_data_ref_root(eb, dref),
6813 btrfs_extent_data_ref_objectid(eb,
6814 dref),
6815 btrfs_extent_data_ref_offset(eb, dref),
6816 btrfs_extent_data_ref_count(eb, dref),
6817 0, num_bytes);
6818 break;
6819 case BTRFS_SHARED_DATA_REF_KEY:
6820 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6821 add_data_backref(extent_cache, key.objectid, offset,
6822 0, 0, 0,
6823 btrfs_shared_data_ref_count(eb, sref),
6824 0, num_bytes);
6825 break;
6826 default:
6827 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6828 key.objectid, key.type, num_bytes);
6829 goto out;
6831 ptr += btrfs_extent_inline_ref_size(type);
6833 WARN_ON(ptr > end);
6834 out:
6835 return 0;
6838 static int check_cache_range(struct btrfs_root *root,
6839 struct btrfs_block_group_cache *cache,
6840 u64 offset, u64 bytes)
6842 struct btrfs_free_space *entry;
6843 u64 *logical;
6844 u64 bytenr;
6845 int stripe_len;
6846 int i, nr, ret;
6848 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6849 bytenr = btrfs_sb_offset(i);
6850 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6851 cache->key.objectid, bytenr, 0,
6852 &logical, &nr, &stripe_len);
6853 if (ret)
6854 return ret;
6856 while (nr--) {
6857 if (logical[nr] + stripe_len <= offset)
6858 continue;
6859 if (offset + bytes <= logical[nr])
6860 continue;
6861 if (logical[nr] == offset) {
6862 if (stripe_len >= bytes) {
6863 free(logical);
6864 return 0;
6866 bytes -= stripe_len;
6867 offset += stripe_len;
6868 } else if (logical[nr] < offset) {
6869 if (logical[nr] + stripe_len >=
6870 offset + bytes) {
6871 free(logical);
6872 return 0;
6874 bytes = (offset + bytes) -
6875 (logical[nr] + stripe_len);
6876 offset = logical[nr] + stripe_len;
6877 } else {
6879 * Could be tricky, the super may land in the
6880 * middle of the area we're checking. First
6881 * check the easiest case, it's at the end.
6883 if (logical[nr] + stripe_len >=
6884 bytes + offset) {
6885 bytes = logical[nr] - offset;
6886 continue;
6889 /* Check the left side */
6890 ret = check_cache_range(root, cache,
6891 offset,
6892 logical[nr] - offset);
6893 if (ret) {
6894 free(logical);
6895 return ret;
6898 /* Now we continue with the right side */
6899 bytes = (offset + bytes) -
6900 (logical[nr] + stripe_len);
6901 offset = logical[nr] + stripe_len;
6905 free(logical);
6908 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6909 if (!entry) {
6910 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6911 offset, offset+bytes);
6912 return -EINVAL;
6915 if (entry->offset != offset) {
6916 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6917 entry->offset);
6918 return -EINVAL;
6921 if (entry->bytes != bytes) {
6922 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6923 bytes, entry->bytes, offset);
6924 return -EINVAL;
6927 unlink_free_space(cache->free_space_ctl, entry);
6928 free(entry);
6929 return 0;
6932 static int verify_space_cache(struct btrfs_root *root,
6933 struct btrfs_block_group_cache *cache)
6935 struct btrfs_path path;
6936 struct extent_buffer *leaf;
6937 struct btrfs_key key;
6938 u64 last;
6939 int ret = 0;
6941 root = root->fs_info->extent_root;
6943 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6945 btrfs_init_path(&path);
6946 key.objectid = last;
6947 key.offset = 0;
6948 key.type = BTRFS_EXTENT_ITEM_KEY;
6949 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6950 if (ret < 0)
6951 goto out;
6952 ret = 0;
6953 while (1) {
6954 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6955 ret = btrfs_next_leaf(root, &path);
6956 if (ret < 0)
6957 goto out;
6958 if (ret > 0) {
6959 ret = 0;
6960 break;
6963 leaf = path.nodes[0];
6964 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6965 if (key.objectid >= cache->key.offset + cache->key.objectid)
6966 break;
6967 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6968 key.type != BTRFS_METADATA_ITEM_KEY) {
6969 path.slots[0]++;
6970 continue;
6973 if (last == key.objectid) {
6974 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6975 last = key.objectid + key.offset;
6976 else
6977 last = key.objectid + root->nodesize;
6978 path.slots[0]++;
6979 continue;
6982 ret = check_cache_range(root, cache, last,
6983 key.objectid - last);
6984 if (ret)
6985 break;
6986 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6987 last = key.objectid + key.offset;
6988 else
6989 last = key.objectid + root->nodesize;
6990 path.slots[0]++;
6993 if (last < cache->key.objectid + cache->key.offset)
6994 ret = check_cache_range(root, cache, last,
6995 cache->key.objectid +
6996 cache->key.offset - last);
6998 out:
6999 btrfs_release_path(&path);
7001 if (!ret &&
7002 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7003 fprintf(stderr, "There are still entries left in the space "
7004 "cache\n");
7005 ret = -EINVAL;
7008 return ret;
7011 static int check_space_cache(struct btrfs_root *root)
7013 struct btrfs_block_group_cache *cache;
7014 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7015 int ret;
7016 int error = 0;
7018 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7019 btrfs_super_generation(root->fs_info->super_copy) !=
7020 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7021 printf("cache and super generation don't match, space cache "
7022 "will be invalidated\n");
7023 return 0;
7026 if (ctx.progress_enabled) {
7027 ctx.tp = TASK_FREE_SPACE;
7028 task_start(ctx.info);
7031 while (1) {
7032 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7033 if (!cache)
7034 break;
7036 start = cache->key.objectid + cache->key.offset;
7037 if (!cache->free_space_ctl) {
7038 if (btrfs_init_free_space_ctl(cache,
7039 root->sectorsize)) {
7040 ret = -ENOMEM;
7041 break;
7043 } else {
7044 btrfs_remove_free_space_cache(cache);
7047 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7048 ret = exclude_super_stripes(root, cache);
7049 if (ret) {
7050 fprintf(stderr, "could not exclude super stripes: %s\n",
7051 strerror(-ret));
7052 error++;
7053 continue;
7055 ret = load_free_space_tree(root->fs_info, cache);
7056 free_excluded_extents(root, cache);
7057 if (ret < 0) {
7058 fprintf(stderr, "could not load free space tree: %s\n",
7059 strerror(-ret));
7060 error++;
7061 continue;
7063 error += ret;
7064 } else {
7065 ret = load_free_space_cache(root->fs_info, cache);
7066 if (!ret)
7067 continue;
7070 ret = verify_space_cache(root, cache);
7071 if (ret) {
7072 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7073 cache->key.objectid);
7074 error++;
7078 task_stop(ctx.info);
7080 return error ? -EINVAL : 0;
7083 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7084 u64 num_bytes, unsigned long leaf_offset,
7085 struct extent_buffer *eb) {
7087 u64 offset = 0;
7088 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7089 char *data;
7090 unsigned long csum_offset;
7091 u32 csum;
7092 u32 csum_expected;
7093 u64 read_len;
7094 u64 data_checked = 0;
7095 u64 tmp;
7096 int ret = 0;
7097 int mirror;
7098 int num_copies;
7100 if (num_bytes % root->sectorsize)
7101 return -EINVAL;
7103 data = malloc(num_bytes);
7104 if (!data)
7105 return -ENOMEM;
7107 while (offset < num_bytes) {
7108 mirror = 0;
7109 again:
7110 read_len = num_bytes - offset;
7111 /* read as much space once a time */
7112 ret = read_extent_data(root, data + offset,
7113 bytenr + offset, &read_len, mirror);
7114 if (ret)
7115 goto out;
7116 data_checked = 0;
7117 /* verify every 4k data's checksum */
7118 while (data_checked < read_len) {
7119 csum = ~(u32)0;
7120 tmp = offset + data_checked;
7122 csum = btrfs_csum_data((char *)data + tmp,
7123 csum, root->sectorsize);
7124 btrfs_csum_final(csum, (u8 *)&csum);
7126 csum_offset = leaf_offset +
7127 tmp / root->sectorsize * csum_size;
7128 read_extent_buffer(eb, (char *)&csum_expected,
7129 csum_offset, csum_size);
7130 /* try another mirror */
7131 if (csum != csum_expected) {
7132 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7133 mirror, bytenr + tmp,
7134 csum, csum_expected);
7135 num_copies = btrfs_num_copies(
7136 &root->fs_info->mapping_tree,
7137 bytenr, num_bytes);
7138 if (mirror < num_copies - 1) {
7139 mirror += 1;
7140 goto again;
7143 data_checked += root->sectorsize;
7145 offset += read_len;
7147 out:
7148 free(data);
7149 return ret;
7152 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7153 u64 num_bytes)
7155 struct btrfs_path path;
7156 struct extent_buffer *leaf;
7157 struct btrfs_key key;
7158 int ret;
7160 btrfs_init_path(&path);
7161 key.objectid = bytenr;
7162 key.type = BTRFS_EXTENT_ITEM_KEY;
7163 key.offset = (u64)-1;
7165 again:
7166 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7167 0, 0);
7168 if (ret < 0) {
7169 fprintf(stderr, "Error looking up extent record %d\n", ret);
7170 btrfs_release_path(&path);
7171 return ret;
7172 } else if (ret) {
7173 if (path.slots[0] > 0) {
7174 path.slots[0]--;
7175 } else {
7176 ret = btrfs_prev_leaf(root, &path);
7177 if (ret < 0) {
7178 goto out;
7179 } else if (ret > 0) {
7180 ret = 0;
7181 goto out;
7186 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7189 * Block group items come before extent items if they have the same
7190 * bytenr, so walk back one more just in case. Dear future traveller,
7191 * first congrats on mastering time travel. Now if it's not too much
7192 * trouble could you go back to 2006 and tell Chris to make the
7193 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7194 * EXTENT_ITEM_KEY please?
7196 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7197 if (path.slots[0] > 0) {
7198 path.slots[0]--;
7199 } else {
7200 ret = btrfs_prev_leaf(root, &path);
7201 if (ret < 0) {
7202 goto out;
7203 } else if (ret > 0) {
7204 ret = 0;
7205 goto out;
7208 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7211 while (num_bytes) {
7212 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7213 ret = btrfs_next_leaf(root, &path);
7214 if (ret < 0) {
7215 fprintf(stderr, "Error going to next leaf "
7216 "%d\n", ret);
7217 btrfs_release_path(&path);
7218 return ret;
7219 } else if (ret) {
7220 break;
7223 leaf = path.nodes[0];
7224 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7225 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7226 path.slots[0]++;
7227 continue;
7229 if (key.objectid + key.offset < bytenr) {
7230 path.slots[0]++;
7231 continue;
7233 if (key.objectid > bytenr + num_bytes)
7234 break;
7236 if (key.objectid == bytenr) {
7237 if (key.offset >= num_bytes) {
7238 num_bytes = 0;
7239 break;
7241 num_bytes -= key.offset;
7242 bytenr += key.offset;
7243 } else if (key.objectid < bytenr) {
7244 if (key.objectid + key.offset >= bytenr + num_bytes) {
7245 num_bytes = 0;
7246 break;
7248 num_bytes = (bytenr + num_bytes) -
7249 (key.objectid + key.offset);
7250 bytenr = key.objectid + key.offset;
7251 } else {
7252 if (key.objectid + key.offset < bytenr + num_bytes) {
7253 u64 new_start = key.objectid + key.offset;
7254 u64 new_bytes = bytenr + num_bytes - new_start;
7257 * Weird case, the extent is in the middle of
7258 * our range, we'll have to search one side
7259 * and then the other. Not sure if this happens
7260 * in real life, but no harm in coding it up
7261 * anyway just in case.
7263 btrfs_release_path(&path);
7264 ret = check_extent_exists(root, new_start,
7265 new_bytes);
7266 if (ret) {
7267 fprintf(stderr, "Right section didn't "
7268 "have a record\n");
7269 break;
7271 num_bytes = key.objectid - bytenr;
7272 goto again;
7274 num_bytes = key.objectid - bytenr;
7276 path.slots[0]++;
7278 ret = 0;
7280 out:
7281 if (num_bytes && !ret) {
7282 fprintf(stderr, "There are no extents for csum range "
7283 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7284 ret = 1;
7287 btrfs_release_path(&path);
7288 return ret;
7291 static int check_csums(struct btrfs_root *root)
7293 struct btrfs_path path;
7294 struct extent_buffer *leaf;
7295 struct btrfs_key key;
7296 u64 offset = 0, num_bytes = 0;
7297 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7298 int errors = 0;
7299 int ret;
7300 u64 data_len;
7301 unsigned long leaf_offset;
7303 root = root->fs_info->csum_root;
7304 if (!extent_buffer_uptodate(root->node)) {
7305 fprintf(stderr, "No valid csum tree found\n");
7306 return -ENOENT;
7309 btrfs_init_path(&path);
7310 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7311 key.type = BTRFS_EXTENT_CSUM_KEY;
7312 key.offset = 0;
7313 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7314 if (ret < 0) {
7315 fprintf(stderr, "Error searching csum tree %d\n", ret);
7316 btrfs_release_path(&path);
7317 return ret;
7320 if (ret > 0 && path.slots[0])
7321 path.slots[0]--;
7322 ret = 0;
7324 while (1) {
7325 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7326 ret = btrfs_next_leaf(root, &path);
7327 if (ret < 0) {
7328 fprintf(stderr, "Error going to next leaf "
7329 "%d\n", ret);
7330 break;
7332 if (ret)
7333 break;
7335 leaf = path.nodes[0];
7337 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7338 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7339 path.slots[0]++;
7340 continue;
7343 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7344 csum_size) * root->sectorsize;
7345 if (!check_data_csum)
7346 goto skip_csum_check;
7347 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7348 ret = check_extent_csums(root, key.offset, data_len,
7349 leaf_offset, leaf);
7350 if (ret)
7351 break;
7352 skip_csum_check:
7353 if (!num_bytes) {
7354 offset = key.offset;
7355 } else if (key.offset != offset + num_bytes) {
7356 ret = check_extent_exists(root, offset, num_bytes);
7357 if (ret) {
7358 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7359 "there is no extent record\n",
7360 offset, offset+num_bytes);
7361 errors++;
7363 offset = key.offset;
7364 num_bytes = 0;
7366 num_bytes += data_len;
7367 path.slots[0]++;
7370 btrfs_release_path(&path);
7371 return errors;
7374 static int is_dropped_key(struct btrfs_key *key,
7375 struct btrfs_key *drop_key) {
7376 if (key->objectid < drop_key->objectid)
7377 return 1;
7378 else if (key->objectid == drop_key->objectid) {
7379 if (key->type < drop_key->type)
7380 return 1;
7381 else if (key->type == drop_key->type) {
7382 if (key->offset < drop_key->offset)
7383 return 1;
7386 return 0;
7390 * Here are the rules for FULL_BACKREF.
7392 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7393 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7394 * FULL_BACKREF set.
7395 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7396 * if it happened after the relocation occurred since we'll have dropped the
7397 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7398 * have no real way to know for sure.
7400 * We process the blocks one root at a time, and we start from the lowest root
7401 * objectid and go to the highest. So we can just lookup the owner backref for
7402 * the record and if we don't find it then we know it doesn't exist and we have
7403 * a FULL BACKREF.
7405 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7406 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7407 * be set or not and then we can check later once we've gathered all the refs.
7409 static int calc_extent_flag(struct btrfs_root *root,
7410 struct cache_tree *extent_cache,
7411 struct extent_buffer *buf,
7412 struct root_item_record *ri,
7413 u64 *flags)
7415 struct extent_record *rec;
7416 struct cache_extent *cache;
7417 struct tree_backref *tback;
7418 u64 owner = 0;
7420 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7421 /* we have added this extent before */
7422 if (!cache)
7423 return -ENOENT;
7425 rec = container_of(cache, struct extent_record, cache);
7428 * Except file/reloc tree, we can not have
7429 * FULL BACKREF MODE
7431 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7432 goto normal;
7434 * root node
7436 if (buf->start == ri->bytenr)
7437 goto normal;
7439 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7440 goto full_backref;
7442 owner = btrfs_header_owner(buf);
7443 if (owner == ri->objectid)
7444 goto normal;
7446 tback = find_tree_backref(rec, 0, owner);
7447 if (!tback)
7448 goto full_backref;
7449 normal:
7450 *flags = 0;
7451 if (rec->flag_block_full_backref != FLAG_UNSET &&
7452 rec->flag_block_full_backref != 0)
7453 rec->bad_full_backref = 1;
7454 return 0;
7455 full_backref:
7456 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7457 if (rec->flag_block_full_backref != FLAG_UNSET &&
7458 rec->flag_block_full_backref != 1)
7459 rec->bad_full_backref = 1;
7460 return 0;
7463 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7465 fprintf(stderr, "Invalid key type(");
7466 print_key_type(stderr, 0, key_type);
7467 fprintf(stderr, ") found in root(");
7468 print_objectid(stderr, rootid, 0);
7469 fprintf(stderr, ")\n");
7473 * Check if the key is valid with its extent buffer.
7475 * This is a early check in case invalid key exists in a extent buffer
7476 * This is not comprehensive yet, but should prevent wrong key/item passed
7477 * further
7479 static int check_type_with_root(u64 rootid, u8 key_type)
7481 switch (key_type) {
7482 /* Only valid in chunk tree */
7483 case BTRFS_DEV_ITEM_KEY:
7484 case BTRFS_CHUNK_ITEM_KEY:
7485 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7486 goto err;
7487 break;
7488 /* valid in csum and log tree */
7489 case BTRFS_CSUM_TREE_OBJECTID:
7490 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7491 is_fstree(rootid)))
7492 goto err;
7493 break;
7494 case BTRFS_EXTENT_ITEM_KEY:
7495 case BTRFS_METADATA_ITEM_KEY:
7496 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7497 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7498 goto err;
7499 break;
7500 case BTRFS_ROOT_ITEM_KEY:
7501 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7502 goto err;
7503 break;
7504 case BTRFS_DEV_EXTENT_KEY:
7505 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7506 goto err;
7507 break;
7509 return 0;
7510 err:
7511 report_mismatch_key_root(key_type, rootid);
7512 return -EINVAL;
7515 static int run_next_block(struct btrfs_root *root,
7516 struct block_info *bits,
7517 int bits_nr,
7518 u64 *last,
7519 struct cache_tree *pending,
7520 struct cache_tree *seen,
7521 struct cache_tree *reada,
7522 struct cache_tree *nodes,
7523 struct cache_tree *extent_cache,
7524 struct cache_tree *chunk_cache,
7525 struct rb_root *dev_cache,
7526 struct block_group_tree *block_group_cache,
7527 struct device_extent_tree *dev_extent_cache,
7528 struct root_item_record *ri)
7530 struct extent_buffer *buf;
7531 struct extent_record *rec = NULL;
7532 u64 bytenr;
7533 u32 size;
7534 u64 parent;
7535 u64 owner;
7536 u64 flags;
7537 u64 ptr;
7538 u64 gen = 0;
7539 int ret = 0;
7540 int i;
7541 int nritems;
7542 struct btrfs_key key;
7543 struct cache_extent *cache;
7544 int reada_bits;
7546 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7547 bits_nr, &reada_bits);
7548 if (nritems == 0)
7549 return 1;
7551 if (!reada_bits) {
7552 for(i = 0; i < nritems; i++) {
7553 ret = add_cache_extent(reada, bits[i].start,
7554 bits[i].size);
7555 if (ret == -EEXIST)
7556 continue;
7558 /* fixme, get the parent transid */
7559 readahead_tree_block(root, bits[i].start,
7560 bits[i].size, 0);
7563 *last = bits[0].start;
7564 bytenr = bits[0].start;
7565 size = bits[0].size;
7567 cache = lookup_cache_extent(pending, bytenr, size);
7568 if (cache) {
7569 remove_cache_extent(pending, cache);
7570 free(cache);
7572 cache = lookup_cache_extent(reada, bytenr, size);
7573 if (cache) {
7574 remove_cache_extent(reada, cache);
7575 free(cache);
7577 cache = lookup_cache_extent(nodes, bytenr, size);
7578 if (cache) {
7579 remove_cache_extent(nodes, cache);
7580 free(cache);
7582 cache = lookup_cache_extent(extent_cache, bytenr, size);
7583 if (cache) {
7584 rec = container_of(cache, struct extent_record, cache);
7585 gen = rec->parent_generation;
7588 /* fixme, get the real parent transid */
7589 buf = read_tree_block(root, bytenr, size, gen);
7590 if (!extent_buffer_uptodate(buf)) {
7591 record_bad_block_io(root->fs_info,
7592 extent_cache, bytenr, size);
7593 goto out;
7596 nritems = btrfs_header_nritems(buf);
7598 flags = 0;
7599 if (!init_extent_tree) {
7600 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7601 btrfs_header_level(buf), 1, NULL,
7602 &flags);
7603 if (ret < 0) {
7604 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7605 if (ret < 0) {
7606 fprintf(stderr, "Couldn't calc extent flags\n");
7607 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7610 } else {
7611 flags = 0;
7612 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7613 if (ret < 0) {
7614 fprintf(stderr, "Couldn't calc extent flags\n");
7615 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7619 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7620 if (ri != NULL &&
7621 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7622 ri->objectid == btrfs_header_owner(buf)) {
7624 * Ok we got to this block from it's original owner and
7625 * we have FULL_BACKREF set. Relocation can leave
7626 * converted blocks over so this is altogether possible,
7627 * however it's not possible if the generation > the
7628 * last snapshot, so check for this case.
7630 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7631 btrfs_header_generation(buf) > ri->last_snapshot) {
7632 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7633 rec->bad_full_backref = 1;
7636 } else {
7637 if (ri != NULL &&
7638 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7639 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7640 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7641 rec->bad_full_backref = 1;
7645 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7646 rec->flag_block_full_backref = 1;
7647 parent = bytenr;
7648 owner = 0;
7649 } else {
7650 rec->flag_block_full_backref = 0;
7651 parent = 0;
7652 owner = btrfs_header_owner(buf);
7655 ret = check_block(root, extent_cache, buf, flags);
7656 if (ret)
7657 goto out;
7659 if (btrfs_is_leaf(buf)) {
7660 btree_space_waste += btrfs_leaf_free_space(root, buf);
7661 for (i = 0; i < nritems; i++) {
7662 struct btrfs_file_extent_item *fi;
7663 btrfs_item_key_to_cpu(buf, &key, i);
7665 * Check key type against the leaf owner.
7666 * Could filter quite a lot of early error if
7667 * owner is correct
7669 if (check_type_with_root(btrfs_header_owner(buf),
7670 key.type)) {
7671 fprintf(stderr, "ignoring invalid key\n");
7672 continue;
7674 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7675 process_extent_item(root, extent_cache, buf,
7677 continue;
7679 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7680 process_extent_item(root, extent_cache, buf,
7682 continue;
7684 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7685 total_csum_bytes +=
7686 btrfs_item_size_nr(buf, i);
7687 continue;
7689 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7690 process_chunk_item(chunk_cache, &key, buf, i);
7691 continue;
7693 if (key.type == BTRFS_DEV_ITEM_KEY) {
7694 process_device_item(dev_cache, &key, buf, i);
7695 continue;
7697 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7698 process_block_group_item(block_group_cache,
7699 &key, buf, i);
7700 continue;
7702 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7703 process_device_extent_item(dev_extent_cache,
7704 &key, buf, i);
7705 continue;
7708 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7709 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7710 process_extent_ref_v0(extent_cache, buf, i);
7711 #else
7712 BUG();
7713 #endif
7714 continue;
7717 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7718 ret = add_tree_backref(extent_cache,
7719 key.objectid, 0, key.offset, 0);
7720 if (ret < 0)
7721 error("add_tree_backref failed: %s",
7722 strerror(-ret));
7723 continue;
7725 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7726 ret = add_tree_backref(extent_cache,
7727 key.objectid, key.offset, 0, 0);
7728 if (ret < 0)
7729 error("add_tree_backref failed: %s",
7730 strerror(-ret));
7731 continue;
7733 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7734 struct btrfs_extent_data_ref *ref;
7735 ref = btrfs_item_ptr(buf, i,
7736 struct btrfs_extent_data_ref);
7737 add_data_backref(extent_cache,
7738 key.objectid, 0,
7739 btrfs_extent_data_ref_root(buf, ref),
7740 btrfs_extent_data_ref_objectid(buf,
7741 ref),
7742 btrfs_extent_data_ref_offset(buf, ref),
7743 btrfs_extent_data_ref_count(buf, ref),
7744 0, root->sectorsize);
7745 continue;
7747 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7748 struct btrfs_shared_data_ref *ref;
7749 ref = btrfs_item_ptr(buf, i,
7750 struct btrfs_shared_data_ref);
7751 add_data_backref(extent_cache,
7752 key.objectid, key.offset, 0, 0, 0,
7753 btrfs_shared_data_ref_count(buf, ref),
7754 0, root->sectorsize);
7755 continue;
7757 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7758 struct bad_item *bad;
7760 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7761 continue;
7762 if (!owner)
7763 continue;
7764 bad = malloc(sizeof(struct bad_item));
7765 if (!bad)
7766 continue;
7767 INIT_LIST_HEAD(&bad->list);
7768 memcpy(&bad->key, &key,
7769 sizeof(struct btrfs_key));
7770 bad->root_id = owner;
7771 list_add_tail(&bad->list, &delete_items);
7772 continue;
7774 if (key.type != BTRFS_EXTENT_DATA_KEY)
7775 continue;
7776 fi = btrfs_item_ptr(buf, i,
7777 struct btrfs_file_extent_item);
7778 if (btrfs_file_extent_type(buf, fi) ==
7779 BTRFS_FILE_EXTENT_INLINE)
7780 continue;
7781 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7782 continue;
7784 data_bytes_allocated +=
7785 btrfs_file_extent_disk_num_bytes(buf, fi);
7786 if (data_bytes_allocated < root->sectorsize) {
7787 abort();
7789 data_bytes_referenced +=
7790 btrfs_file_extent_num_bytes(buf, fi);
7791 add_data_backref(extent_cache,
7792 btrfs_file_extent_disk_bytenr(buf, fi),
7793 parent, owner, key.objectid, key.offset -
7794 btrfs_file_extent_offset(buf, fi), 1, 1,
7795 btrfs_file_extent_disk_num_bytes(buf, fi));
7797 } else {
7798 int level;
7799 struct btrfs_key first_key;
7801 first_key.objectid = 0;
7803 if (nritems > 0)
7804 btrfs_item_key_to_cpu(buf, &first_key, 0);
7805 level = btrfs_header_level(buf);
7806 for (i = 0; i < nritems; i++) {
7807 struct extent_record tmpl;
7809 ptr = btrfs_node_blockptr(buf, i);
7810 size = root->nodesize;
7811 btrfs_node_key_to_cpu(buf, &key, i);
7812 if (ri != NULL) {
7813 if ((level == ri->drop_level)
7814 && is_dropped_key(&key, &ri->drop_key)) {
7815 continue;
7819 memset(&tmpl, 0, sizeof(tmpl));
7820 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7821 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7822 tmpl.start = ptr;
7823 tmpl.nr = size;
7824 tmpl.refs = 1;
7825 tmpl.metadata = 1;
7826 tmpl.max_size = size;
7827 ret = add_extent_rec(extent_cache, &tmpl);
7828 if (ret < 0)
7829 goto out;
7831 ret = add_tree_backref(extent_cache, ptr, parent,
7832 owner, 1);
7833 if (ret < 0) {
7834 error("add_tree_backref failed: %s",
7835 strerror(-ret));
7836 continue;
7839 if (level > 1) {
7840 add_pending(nodes, seen, ptr, size);
7841 } else {
7842 add_pending(pending, seen, ptr, size);
7845 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7846 nritems) * sizeof(struct btrfs_key_ptr);
7848 total_btree_bytes += buf->len;
7849 if (fs_root_objectid(btrfs_header_owner(buf)))
7850 total_fs_tree_bytes += buf->len;
7851 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7852 total_extent_tree_bytes += buf->len;
7853 if (!found_old_backref &&
7854 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7855 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7856 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7857 found_old_backref = 1;
7858 out:
7859 free_extent_buffer(buf);
7860 return ret;
7863 static int add_root_to_pending(struct extent_buffer *buf,
7864 struct cache_tree *extent_cache,
7865 struct cache_tree *pending,
7866 struct cache_tree *seen,
7867 struct cache_tree *nodes,
7868 u64 objectid)
7870 struct extent_record tmpl;
7871 int ret;
7873 if (btrfs_header_level(buf) > 0)
7874 add_pending(nodes, seen, buf->start, buf->len);
7875 else
7876 add_pending(pending, seen, buf->start, buf->len);
7878 memset(&tmpl, 0, sizeof(tmpl));
7879 tmpl.start = buf->start;
7880 tmpl.nr = buf->len;
7881 tmpl.is_root = 1;
7882 tmpl.refs = 1;
7883 tmpl.metadata = 1;
7884 tmpl.max_size = buf->len;
7885 add_extent_rec(extent_cache, &tmpl);
7887 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7888 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7889 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7890 0, 1);
7891 else
7892 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7894 return ret;
7897 /* as we fix the tree, we might be deleting blocks that
7898 * we're tracking for repair. This hook makes sure we
7899 * remove any backrefs for blocks as we are fixing them.
7901 static int free_extent_hook(struct btrfs_trans_handle *trans,
7902 struct btrfs_root *root,
7903 u64 bytenr, u64 num_bytes, u64 parent,
7904 u64 root_objectid, u64 owner, u64 offset,
7905 int refs_to_drop)
7907 struct extent_record *rec;
7908 struct cache_extent *cache;
7909 int is_data;
7910 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7912 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7913 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7914 if (!cache)
7915 return 0;
7917 rec = container_of(cache, struct extent_record, cache);
7918 if (is_data) {
7919 struct data_backref *back;
7920 back = find_data_backref(rec, parent, root_objectid, owner,
7921 offset, 1, bytenr, num_bytes);
7922 if (!back)
7923 goto out;
7924 if (back->node.found_ref) {
7925 back->found_ref -= refs_to_drop;
7926 if (rec->refs)
7927 rec->refs -= refs_to_drop;
7929 if (back->node.found_extent_tree) {
7930 back->num_refs -= refs_to_drop;
7931 if (rec->extent_item_refs)
7932 rec->extent_item_refs -= refs_to_drop;
7934 if (back->found_ref == 0)
7935 back->node.found_ref = 0;
7936 if (back->num_refs == 0)
7937 back->node.found_extent_tree = 0;
7939 if (!back->node.found_extent_tree && back->node.found_ref) {
7940 list_del(&back->node.list);
7941 free(back);
7943 } else {
7944 struct tree_backref *back;
7945 back = find_tree_backref(rec, parent, root_objectid);
7946 if (!back)
7947 goto out;
7948 if (back->node.found_ref) {
7949 if (rec->refs)
7950 rec->refs--;
7951 back->node.found_ref = 0;
7953 if (back->node.found_extent_tree) {
7954 if (rec->extent_item_refs)
7955 rec->extent_item_refs--;
7956 back->node.found_extent_tree = 0;
7958 if (!back->node.found_extent_tree && back->node.found_ref) {
7959 list_del(&back->node.list);
7960 free(back);
7963 maybe_free_extent_rec(extent_cache, rec);
7964 out:
7965 return 0;
7968 static int delete_extent_records(struct btrfs_trans_handle *trans,
7969 struct btrfs_root *root,
7970 struct btrfs_path *path,
7971 u64 bytenr)
7973 struct btrfs_key key;
7974 struct btrfs_key found_key;
7975 struct extent_buffer *leaf;
7976 int ret;
7977 int slot;
7980 key.objectid = bytenr;
7981 key.type = (u8)-1;
7982 key.offset = (u64)-1;
7984 while(1) {
7985 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7986 &key, path, 0, 1);
7987 if (ret < 0)
7988 break;
7990 if (ret > 0) {
7991 ret = 0;
7992 if (path->slots[0] == 0)
7993 break;
7994 path->slots[0]--;
7996 ret = 0;
7998 leaf = path->nodes[0];
7999 slot = path->slots[0];
8001 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8002 if (found_key.objectid != bytenr)
8003 break;
8005 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8006 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8007 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8008 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8009 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8010 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8011 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8012 btrfs_release_path(path);
8013 if (found_key.type == 0) {
8014 if (found_key.offset == 0)
8015 break;
8016 key.offset = found_key.offset - 1;
8017 key.type = found_key.type;
8019 key.type = found_key.type - 1;
8020 key.offset = (u64)-1;
8021 continue;
8024 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8025 found_key.objectid, found_key.type, found_key.offset);
8027 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8028 if (ret)
8029 break;
8030 btrfs_release_path(path);
8032 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8033 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8034 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8035 found_key.offset : root->nodesize;
8037 ret = btrfs_update_block_group(trans, root, bytenr,
8038 bytes, 0, 0);
8039 if (ret)
8040 break;
8044 btrfs_release_path(path);
8045 return ret;
8049 * for a single backref, this will allocate a new extent
8050 * and add the backref to it.
8052 static int record_extent(struct btrfs_trans_handle *trans,
8053 struct btrfs_fs_info *info,
8054 struct btrfs_path *path,
8055 struct extent_record *rec,
8056 struct extent_backref *back,
8057 int allocated, u64 flags)
8059 int ret = 0;
8060 struct btrfs_root *extent_root = info->extent_root;
8061 struct extent_buffer *leaf;
8062 struct btrfs_key ins_key;
8063 struct btrfs_extent_item *ei;
8064 struct data_backref *dback;
8065 struct btrfs_tree_block_info *bi;
8067 if (!back->is_data)
8068 rec->max_size = max_t(u64, rec->max_size,
8069 info->extent_root->nodesize);
8071 if (!allocated) {
8072 u32 item_size = sizeof(*ei);
8074 if (!back->is_data)
8075 item_size += sizeof(*bi);
8077 ins_key.objectid = rec->start;
8078 ins_key.offset = rec->max_size;
8079 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8081 ret = btrfs_insert_empty_item(trans, extent_root, path,
8082 &ins_key, item_size);
8083 if (ret)
8084 goto fail;
8086 leaf = path->nodes[0];
8087 ei = btrfs_item_ptr(leaf, path->slots[0],
8088 struct btrfs_extent_item);
8090 btrfs_set_extent_refs(leaf, ei, 0);
8091 btrfs_set_extent_generation(leaf, ei, rec->generation);
8093 if (back->is_data) {
8094 btrfs_set_extent_flags(leaf, ei,
8095 BTRFS_EXTENT_FLAG_DATA);
8096 } else {
8097 struct btrfs_disk_key copy_key;;
8099 bi = (struct btrfs_tree_block_info *)(ei + 1);
8100 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8101 sizeof(*bi));
8103 btrfs_set_disk_key_objectid(&copy_key,
8104 rec->info_objectid);
8105 btrfs_set_disk_key_type(&copy_key, 0);
8106 btrfs_set_disk_key_offset(&copy_key, 0);
8108 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8109 btrfs_set_tree_block_key(leaf, bi, &copy_key);
8111 btrfs_set_extent_flags(leaf, ei,
8112 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8115 btrfs_mark_buffer_dirty(leaf);
8116 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8117 rec->max_size, 1, 0);
8118 if (ret)
8119 goto fail;
8120 btrfs_release_path(path);
8123 if (back->is_data) {
8124 u64 parent;
8125 int i;
8127 dback = to_data_backref(back);
8128 if (back->full_backref)
8129 parent = dback->parent;
8130 else
8131 parent = 0;
8133 for (i = 0; i < dback->found_ref; i++) {
8134 /* if parent != 0, we're doing a full backref
8135 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8136 * just makes the backref allocator create a data
8137 * backref
8139 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8140 rec->start, rec->max_size,
8141 parent,
8142 dback->root,
8143 parent ?
8144 BTRFS_FIRST_FREE_OBJECTID :
8145 dback->owner,
8146 dback->offset);
8147 if (ret)
8148 break;
8150 fprintf(stderr, "adding new data backref"
8151 " on %llu %s %llu owner %llu"
8152 " offset %llu found %d\n",
8153 (unsigned long long)rec->start,
8154 back->full_backref ?
8155 "parent" : "root",
8156 back->full_backref ?
8157 (unsigned long long)parent :
8158 (unsigned long long)dback->root,
8159 (unsigned long long)dback->owner,
8160 (unsigned long long)dback->offset,
8161 dback->found_ref);
8162 } else {
8163 u64 parent;
8164 struct tree_backref *tback;
8166 tback = to_tree_backref(back);
8167 if (back->full_backref)
8168 parent = tback->parent;
8169 else
8170 parent = 0;
8172 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8173 rec->start, rec->max_size,
8174 parent, tback->root, 0, 0);
8175 fprintf(stderr, "adding new tree backref on "
8176 "start %llu len %llu parent %llu root %llu\n",
8177 rec->start, rec->max_size, parent, tback->root);
8179 fail:
8180 btrfs_release_path(path);
8181 return ret;
8184 static struct extent_entry *find_entry(struct list_head *entries,
8185 u64 bytenr, u64 bytes)
8187 struct extent_entry *entry = NULL;
8189 list_for_each_entry(entry, entries, list) {
8190 if (entry->bytenr == bytenr && entry->bytes == bytes)
8191 return entry;
8194 return NULL;
8197 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8199 struct extent_entry *entry, *best = NULL, *prev = NULL;
8201 list_for_each_entry(entry, entries, list) {
8203 * If there are as many broken entries as entries then we know
8204 * not to trust this particular entry.
8206 if (entry->broken == entry->count)
8207 continue;
8210 * Special case, when there are only two entries and 'best' is
8211 * the first one
8213 if (!prev) {
8214 best = entry;
8215 prev = entry;
8216 continue;
8220 * If our current entry == best then we can't be sure our best
8221 * is really the best, so we need to keep searching.
8223 if (best && best->count == entry->count) {
8224 prev = entry;
8225 best = NULL;
8226 continue;
8229 /* Prev == entry, not good enough, have to keep searching */
8230 if (!prev->broken && prev->count == entry->count)
8231 continue;
8233 if (!best)
8234 best = (prev->count > entry->count) ? prev : entry;
8235 else if (best->count < entry->count)
8236 best = entry;
8237 prev = entry;
8240 return best;
8243 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8244 struct data_backref *dback, struct extent_entry *entry)
8246 struct btrfs_trans_handle *trans;
8247 struct btrfs_root *root;
8248 struct btrfs_file_extent_item *fi;
8249 struct extent_buffer *leaf;
8250 struct btrfs_key key;
8251 u64 bytenr, bytes;
8252 int ret, err;
8254 key.objectid = dback->root;
8255 key.type = BTRFS_ROOT_ITEM_KEY;
8256 key.offset = (u64)-1;
8257 root = btrfs_read_fs_root(info, &key);
8258 if (IS_ERR(root)) {
8259 fprintf(stderr, "Couldn't find root for our ref\n");
8260 return -EINVAL;
8264 * The backref points to the original offset of the extent if it was
8265 * split, so we need to search down to the offset we have and then walk
8266 * forward until we find the backref we're looking for.
8268 key.objectid = dback->owner;
8269 key.type = BTRFS_EXTENT_DATA_KEY;
8270 key.offset = dback->offset;
8271 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8272 if (ret < 0) {
8273 fprintf(stderr, "Error looking up ref %d\n", ret);
8274 return ret;
8277 while (1) {
8278 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8279 ret = btrfs_next_leaf(root, path);
8280 if (ret) {
8281 fprintf(stderr, "Couldn't find our ref, next\n");
8282 return -EINVAL;
8285 leaf = path->nodes[0];
8286 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8287 if (key.objectid != dback->owner ||
8288 key.type != BTRFS_EXTENT_DATA_KEY) {
8289 fprintf(stderr, "Couldn't find our ref, search\n");
8290 return -EINVAL;
8292 fi = btrfs_item_ptr(leaf, path->slots[0],
8293 struct btrfs_file_extent_item);
8294 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8295 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8297 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8298 break;
8299 path->slots[0]++;
8302 btrfs_release_path(path);
8304 trans = btrfs_start_transaction(root, 1);
8305 if (IS_ERR(trans))
8306 return PTR_ERR(trans);
8309 * Ok we have the key of the file extent we want to fix, now we can cow
8310 * down to the thing and fix it.
8312 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8313 if (ret < 0) {
8314 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8315 key.objectid, key.type, key.offset, ret);
8316 goto out;
8318 if (ret > 0) {
8319 fprintf(stderr, "Well that's odd, we just found this key "
8320 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8321 key.offset);
8322 ret = -EINVAL;
8323 goto out;
8325 leaf = path->nodes[0];
8326 fi = btrfs_item_ptr(leaf, path->slots[0],
8327 struct btrfs_file_extent_item);
8329 if (btrfs_file_extent_compression(leaf, fi) &&
8330 dback->disk_bytenr != entry->bytenr) {
8331 fprintf(stderr, "Ref doesn't match the record start and is "
8332 "compressed, please take a btrfs-image of this file "
8333 "system and send it to a btrfs developer so they can "
8334 "complete this functionality for bytenr %Lu\n",
8335 dback->disk_bytenr);
8336 ret = -EINVAL;
8337 goto out;
8340 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8341 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8342 } else if (dback->disk_bytenr > entry->bytenr) {
8343 u64 off_diff, offset;
8345 off_diff = dback->disk_bytenr - entry->bytenr;
8346 offset = btrfs_file_extent_offset(leaf, fi);
8347 if (dback->disk_bytenr + offset +
8348 btrfs_file_extent_num_bytes(leaf, fi) >
8349 entry->bytenr + entry->bytes) {
8350 fprintf(stderr, "Ref is past the entry end, please "
8351 "take a btrfs-image of this file system and "
8352 "send it to a btrfs developer, ref %Lu\n",
8353 dback->disk_bytenr);
8354 ret = -EINVAL;
8355 goto out;
8357 offset += off_diff;
8358 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8359 btrfs_set_file_extent_offset(leaf, fi, offset);
8360 } else if (dback->disk_bytenr < entry->bytenr) {
8361 u64 offset;
8363 offset = btrfs_file_extent_offset(leaf, fi);
8364 if (dback->disk_bytenr + offset < entry->bytenr) {
8365 fprintf(stderr, "Ref is before the entry start, please"
8366 " take a btrfs-image of this file system and "
8367 "send it to a btrfs developer, ref %Lu\n",
8368 dback->disk_bytenr);
8369 ret = -EINVAL;
8370 goto out;
8373 offset += dback->disk_bytenr;
8374 offset -= entry->bytenr;
8375 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8376 btrfs_set_file_extent_offset(leaf, fi, offset);
8379 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8382 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8383 * only do this if we aren't using compression, otherwise it's a
8384 * trickier case.
8386 if (!btrfs_file_extent_compression(leaf, fi))
8387 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8388 else
8389 printf("ram bytes may be wrong?\n");
8390 btrfs_mark_buffer_dirty(leaf);
8391 out:
8392 err = btrfs_commit_transaction(trans, root);
8393 btrfs_release_path(path);
8394 return ret ? ret : err;
8397 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8398 struct extent_record *rec)
8400 struct extent_backref *back;
8401 struct data_backref *dback;
8402 struct extent_entry *entry, *best = NULL;
8403 LIST_HEAD(entries);
8404 int nr_entries = 0;
8405 int broken_entries = 0;
8406 int ret = 0;
8407 short mismatch = 0;
8410 * Metadata is easy and the backrefs should always agree on bytenr and
8411 * size, if not we've got bigger issues.
8413 if (rec->metadata)
8414 return 0;
8416 list_for_each_entry(back, &rec->backrefs, list) {
8417 if (back->full_backref || !back->is_data)
8418 continue;
8420 dback = to_data_backref(back);
8423 * We only pay attention to backrefs that we found a real
8424 * backref for.
8426 if (dback->found_ref == 0)
8427 continue;
8430 * For now we only catch when the bytes don't match, not the
8431 * bytenr. We can easily do this at the same time, but I want
8432 * to have a fs image to test on before we just add repair
8433 * functionality willy-nilly so we know we won't screw up the
8434 * repair.
8437 entry = find_entry(&entries, dback->disk_bytenr,
8438 dback->bytes);
8439 if (!entry) {
8440 entry = malloc(sizeof(struct extent_entry));
8441 if (!entry) {
8442 ret = -ENOMEM;
8443 goto out;
8445 memset(entry, 0, sizeof(*entry));
8446 entry->bytenr = dback->disk_bytenr;
8447 entry->bytes = dback->bytes;
8448 list_add_tail(&entry->list, &entries);
8449 nr_entries++;
8453 * If we only have on entry we may think the entries agree when
8454 * in reality they don't so we have to do some extra checking.
8456 if (dback->disk_bytenr != rec->start ||
8457 dback->bytes != rec->nr || back->broken)
8458 mismatch = 1;
8460 if (back->broken) {
8461 entry->broken++;
8462 broken_entries++;
8465 entry->count++;
8468 /* Yay all the backrefs agree, carry on good sir */
8469 if (nr_entries <= 1 && !mismatch)
8470 goto out;
8472 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8473 "%Lu\n", rec->start);
8476 * First we want to see if the backrefs can agree amongst themselves who
8477 * is right, so figure out which one of the entries has the highest
8478 * count.
8480 best = find_most_right_entry(&entries);
8483 * Ok so we may have an even split between what the backrefs think, so
8484 * this is where we use the extent ref to see what it thinks.
8486 if (!best) {
8487 entry = find_entry(&entries, rec->start, rec->nr);
8488 if (!entry && (!broken_entries || !rec->found_rec)) {
8489 fprintf(stderr, "Backrefs don't agree with each other "
8490 "and extent record doesn't agree with anybody,"
8491 " so we can't fix bytenr %Lu bytes %Lu\n",
8492 rec->start, rec->nr);
8493 ret = -EINVAL;
8494 goto out;
8495 } else if (!entry) {
8497 * Ok our backrefs were broken, we'll assume this is the
8498 * correct value and add an entry for this range.
8500 entry = malloc(sizeof(struct extent_entry));
8501 if (!entry) {
8502 ret = -ENOMEM;
8503 goto out;
8505 memset(entry, 0, sizeof(*entry));
8506 entry->bytenr = rec->start;
8507 entry->bytes = rec->nr;
8508 list_add_tail(&entry->list, &entries);
8509 nr_entries++;
8511 entry->count++;
8512 best = find_most_right_entry(&entries);
8513 if (!best) {
8514 fprintf(stderr, "Backrefs and extent record evenly "
8515 "split on who is right, this is going to "
8516 "require user input to fix bytenr %Lu bytes "
8517 "%Lu\n", rec->start, rec->nr);
8518 ret = -EINVAL;
8519 goto out;
8524 * I don't think this can happen currently as we'll abort() if we catch
8525 * this case higher up, but in case somebody removes that we still can't
8526 * deal with it properly here yet, so just bail out of that's the case.
8528 if (best->bytenr != rec->start) {
8529 fprintf(stderr, "Extent start and backref starts don't match, "
8530 "please use btrfs-image on this file system and send "
8531 "it to a btrfs developer so they can make fsck fix "
8532 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8533 rec->start, rec->nr);
8534 ret = -EINVAL;
8535 goto out;
8539 * Ok great we all agreed on an extent record, let's go find the real
8540 * references and fix up the ones that don't match.
8542 list_for_each_entry(back, &rec->backrefs, list) {
8543 if (back->full_backref || !back->is_data)
8544 continue;
8546 dback = to_data_backref(back);
8549 * Still ignoring backrefs that don't have a real ref attached
8550 * to them.
8552 if (dback->found_ref == 0)
8553 continue;
8555 if (dback->bytes == best->bytes &&
8556 dback->disk_bytenr == best->bytenr)
8557 continue;
8559 ret = repair_ref(info, path, dback, best);
8560 if (ret)
8561 goto out;
8565 * Ok we messed with the actual refs, which means we need to drop our
8566 * entire cache and go back and rescan. I know this is a huge pain and
8567 * adds a lot of extra work, but it's the only way to be safe. Once all
8568 * the backrefs agree we may not need to do anything to the extent
8569 * record itself.
8571 ret = -EAGAIN;
8572 out:
8573 while (!list_empty(&entries)) {
8574 entry = list_entry(entries.next, struct extent_entry, list);
8575 list_del_init(&entry->list);
8576 free(entry);
8578 return ret;
8581 static int process_duplicates(struct btrfs_root *root,
8582 struct cache_tree *extent_cache,
8583 struct extent_record *rec)
8585 struct extent_record *good, *tmp;
8586 struct cache_extent *cache;
8587 int ret;
8590 * If we found a extent record for this extent then return, or if we
8591 * have more than one duplicate we are likely going to need to delete
8592 * something.
8594 if (rec->found_rec || rec->num_duplicates > 1)
8595 return 0;
8597 /* Shouldn't happen but just in case */
8598 BUG_ON(!rec->num_duplicates);
8601 * So this happens if we end up with a backref that doesn't match the
8602 * actual extent entry. So either the backref is bad or the extent
8603 * entry is bad. Either way we want to have the extent_record actually
8604 * reflect what we found in the extent_tree, so we need to take the
8605 * duplicate out and use that as the extent_record since the only way we
8606 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8608 remove_cache_extent(extent_cache, &rec->cache);
8610 good = to_extent_record(rec->dups.next);
8611 list_del_init(&good->list);
8612 INIT_LIST_HEAD(&good->backrefs);
8613 INIT_LIST_HEAD(&good->dups);
8614 good->cache.start = good->start;
8615 good->cache.size = good->nr;
8616 good->content_checked = 0;
8617 good->owner_ref_checked = 0;
8618 good->num_duplicates = 0;
8619 good->refs = rec->refs;
8620 list_splice_init(&rec->backrefs, &good->backrefs);
8621 while (1) {
8622 cache = lookup_cache_extent(extent_cache, good->start,
8623 good->nr);
8624 if (!cache)
8625 break;
8626 tmp = container_of(cache, struct extent_record, cache);
8629 * If we find another overlapping extent and it's found_rec is
8630 * set then it's a duplicate and we need to try and delete
8631 * something.
8633 if (tmp->found_rec || tmp->num_duplicates > 0) {
8634 if (list_empty(&good->list))
8635 list_add_tail(&good->list,
8636 &duplicate_extents);
8637 good->num_duplicates += tmp->num_duplicates + 1;
8638 list_splice_init(&tmp->dups, &good->dups);
8639 list_del_init(&tmp->list);
8640 list_add_tail(&tmp->list, &good->dups);
8641 remove_cache_extent(extent_cache, &tmp->cache);
8642 continue;
8646 * Ok we have another non extent item backed extent rec, so lets
8647 * just add it to this extent and carry on like we did above.
8649 good->refs += tmp->refs;
8650 list_splice_init(&tmp->backrefs, &good->backrefs);
8651 remove_cache_extent(extent_cache, &tmp->cache);
8652 free(tmp);
8654 ret = insert_cache_extent(extent_cache, &good->cache);
8655 BUG_ON(ret);
8656 free(rec);
8657 return good->num_duplicates ? 0 : 1;
8660 static int delete_duplicate_records(struct btrfs_root *root,
8661 struct extent_record *rec)
8663 struct btrfs_trans_handle *trans;
8664 LIST_HEAD(delete_list);
8665 struct btrfs_path path;
8666 struct extent_record *tmp, *good, *n;
8667 int nr_del = 0;
8668 int ret = 0, err;
8669 struct btrfs_key key;
8671 btrfs_init_path(&path);
8673 good = rec;
8674 /* Find the record that covers all of the duplicates. */
8675 list_for_each_entry(tmp, &rec->dups, list) {
8676 if (good->start < tmp->start)
8677 continue;
8678 if (good->nr > tmp->nr)
8679 continue;
8681 if (tmp->start + tmp->nr < good->start + good->nr) {
8682 fprintf(stderr, "Ok we have overlapping extents that "
8683 "aren't completely covered by each other, this "
8684 "is going to require more careful thought. "
8685 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8686 tmp->start, tmp->nr, good->start, good->nr);
8687 abort();
8689 good = tmp;
8692 if (good != rec)
8693 list_add_tail(&rec->list, &delete_list);
8695 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8696 if (tmp == good)
8697 continue;
8698 list_move_tail(&tmp->list, &delete_list);
8701 root = root->fs_info->extent_root;
8702 trans = btrfs_start_transaction(root, 1);
8703 if (IS_ERR(trans)) {
8704 ret = PTR_ERR(trans);
8705 goto out;
8708 list_for_each_entry(tmp, &delete_list, list) {
8709 if (tmp->found_rec == 0)
8710 continue;
8711 key.objectid = tmp->start;
8712 key.type = BTRFS_EXTENT_ITEM_KEY;
8713 key.offset = tmp->nr;
8715 /* Shouldn't happen but just in case */
8716 if (tmp->metadata) {
8717 fprintf(stderr, "Well this shouldn't happen, extent "
8718 "record overlaps but is metadata? "
8719 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8720 abort();
8723 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8724 if (ret) {
8725 if (ret > 0)
8726 ret = -EINVAL;
8727 break;
8729 ret = btrfs_del_item(trans, root, &path);
8730 if (ret)
8731 break;
8732 btrfs_release_path(&path);
8733 nr_del++;
8735 err = btrfs_commit_transaction(trans, root);
8736 if (err && !ret)
8737 ret = err;
8738 out:
8739 while (!list_empty(&delete_list)) {
8740 tmp = to_extent_record(delete_list.next);
8741 list_del_init(&tmp->list);
8742 if (tmp == rec)
8743 continue;
8744 free(tmp);
8747 while (!list_empty(&rec->dups)) {
8748 tmp = to_extent_record(rec->dups.next);
8749 list_del_init(&tmp->list);
8750 free(tmp);
8753 btrfs_release_path(&path);
8755 if (!ret && !nr_del)
8756 rec->num_duplicates = 0;
8758 return ret ? ret : nr_del;
8761 static int find_possible_backrefs(struct btrfs_fs_info *info,
8762 struct btrfs_path *path,
8763 struct cache_tree *extent_cache,
8764 struct extent_record *rec)
8766 struct btrfs_root *root;
8767 struct extent_backref *back;
8768 struct data_backref *dback;
8769 struct cache_extent *cache;
8770 struct btrfs_file_extent_item *fi;
8771 struct btrfs_key key;
8772 u64 bytenr, bytes;
8773 int ret;
8775 list_for_each_entry(back, &rec->backrefs, list) {
8776 /* Don't care about full backrefs (poor unloved backrefs) */
8777 if (back->full_backref || !back->is_data)
8778 continue;
8780 dback = to_data_backref(back);
8782 /* We found this one, we don't need to do a lookup */
8783 if (dback->found_ref)
8784 continue;
8786 key.objectid = dback->root;
8787 key.type = BTRFS_ROOT_ITEM_KEY;
8788 key.offset = (u64)-1;
8790 root = btrfs_read_fs_root(info, &key);
8792 /* No root, definitely a bad ref, skip */
8793 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8794 continue;
8795 /* Other err, exit */
8796 if (IS_ERR(root))
8797 return PTR_ERR(root);
8799 key.objectid = dback->owner;
8800 key.type = BTRFS_EXTENT_DATA_KEY;
8801 key.offset = dback->offset;
8802 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8803 if (ret) {
8804 btrfs_release_path(path);
8805 if (ret < 0)
8806 return ret;
8807 /* Didn't find it, we can carry on */
8808 ret = 0;
8809 continue;
8812 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8813 struct btrfs_file_extent_item);
8814 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8815 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8816 btrfs_release_path(path);
8817 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8818 if (cache) {
8819 struct extent_record *tmp;
8820 tmp = container_of(cache, struct extent_record, cache);
8823 * If we found an extent record for the bytenr for this
8824 * particular backref then we can't add it to our
8825 * current extent record. We only want to add backrefs
8826 * that don't have a corresponding extent item in the
8827 * extent tree since they likely belong to this record
8828 * and we need to fix it if it doesn't match bytenrs.
8830 if (tmp->found_rec)
8831 continue;
8834 dback->found_ref += 1;
8835 dback->disk_bytenr = bytenr;
8836 dback->bytes = bytes;
8839 * Set this so the verify backref code knows not to trust the
8840 * values in this backref.
8842 back->broken = 1;
8845 return 0;
8849 * Record orphan data ref into corresponding root.
8851 * Return 0 if the extent item contains data ref and recorded.
8852 * Return 1 if the extent item contains no useful data ref
8853 * On that case, it may contains only shared_dataref or metadata backref
8854 * or the file extent exists(this should be handled by the extent bytenr
8855 * recovery routine)
8856 * Return <0 if something goes wrong.
8858 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8859 struct extent_record *rec)
8861 struct btrfs_key key;
8862 struct btrfs_root *dest_root;
8863 struct extent_backref *back;
8864 struct data_backref *dback;
8865 struct orphan_data_extent *orphan;
8866 struct btrfs_path path;
8867 int recorded_data_ref = 0;
8868 int ret = 0;
8870 if (rec->metadata)
8871 return 1;
8872 btrfs_init_path(&path);
8873 list_for_each_entry(back, &rec->backrefs, list) {
8874 if (back->full_backref || !back->is_data ||
8875 !back->found_extent_tree)
8876 continue;
8877 dback = to_data_backref(back);
8878 if (dback->found_ref)
8879 continue;
8880 key.objectid = dback->root;
8881 key.type = BTRFS_ROOT_ITEM_KEY;
8882 key.offset = (u64)-1;
8884 dest_root = btrfs_read_fs_root(fs_info, &key);
8886 /* For non-exist root we just skip it */
8887 if (IS_ERR(dest_root) || !dest_root)
8888 continue;
8890 key.objectid = dback->owner;
8891 key.type = BTRFS_EXTENT_DATA_KEY;
8892 key.offset = dback->offset;
8894 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8895 btrfs_release_path(&path);
8897 * For ret < 0, it's OK since the fs-tree may be corrupted,
8898 * we need to record it for inode/file extent rebuild.
8899 * For ret > 0, we record it only for file extent rebuild.
8900 * For ret == 0, the file extent exists but only bytenr
8901 * mismatch, let the original bytenr fix routine to handle,
8902 * don't record it.
8904 if (ret == 0)
8905 continue;
8906 ret = 0;
8907 orphan = malloc(sizeof(*orphan));
8908 if (!orphan) {
8909 ret = -ENOMEM;
8910 goto out;
8912 INIT_LIST_HEAD(&orphan->list);
8913 orphan->root = dback->root;
8914 orphan->objectid = dback->owner;
8915 orphan->offset = dback->offset;
8916 orphan->disk_bytenr = rec->cache.start;
8917 orphan->disk_len = rec->cache.size;
8918 list_add(&dest_root->orphan_data_extents, &orphan->list);
8919 recorded_data_ref = 1;
8921 out:
8922 btrfs_release_path(&path);
8923 if (!ret)
8924 return !recorded_data_ref;
8925 else
8926 return ret;
8930 * when an incorrect extent item is found, this will delete
8931 * all of the existing entries for it and recreate them
8932 * based on what the tree scan found.
8934 static int fixup_extent_refs(struct btrfs_fs_info *info,
8935 struct cache_tree *extent_cache,
8936 struct extent_record *rec)
8938 struct btrfs_trans_handle *trans = NULL;
8939 int ret;
8940 struct btrfs_path path;
8941 struct list_head *cur = rec->backrefs.next;
8942 struct cache_extent *cache;
8943 struct extent_backref *back;
8944 int allocated = 0;
8945 u64 flags = 0;
8947 if (rec->flag_block_full_backref)
8948 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8950 btrfs_init_path(&path);
8951 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8953 * Sometimes the backrefs themselves are so broken they don't
8954 * get attached to any meaningful rec, so first go back and
8955 * check any of our backrefs that we couldn't find and throw
8956 * them into the list if we find the backref so that
8957 * verify_backrefs can figure out what to do.
8959 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8960 if (ret < 0)
8961 goto out;
8964 /* step one, make sure all of the backrefs agree */
8965 ret = verify_backrefs(info, &path, rec);
8966 if (ret < 0)
8967 goto out;
8969 trans = btrfs_start_transaction(info->extent_root, 1);
8970 if (IS_ERR(trans)) {
8971 ret = PTR_ERR(trans);
8972 goto out;
8975 /* step two, delete all the existing records */
8976 ret = delete_extent_records(trans, info->extent_root, &path,
8977 rec->start);
8979 if (ret < 0)
8980 goto out;
8982 /* was this block corrupt? If so, don't add references to it */
8983 cache = lookup_cache_extent(info->corrupt_blocks,
8984 rec->start, rec->max_size);
8985 if (cache) {
8986 ret = 0;
8987 goto out;
8990 /* step three, recreate all the refs we did find */
8991 while(cur != &rec->backrefs) {
8992 back = to_extent_backref(cur);
8993 cur = cur->next;
8996 * if we didn't find any references, don't create a
8997 * new extent record
8999 if (!back->found_ref)
9000 continue;
9002 rec->bad_full_backref = 0;
9003 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9004 allocated = 1;
9006 if (ret)
9007 goto out;
9009 out:
9010 if (trans) {
9011 int err = btrfs_commit_transaction(trans, info->extent_root);
9012 if (!ret)
9013 ret = err;
9016 if (!ret)
9017 fprintf(stderr, "Repaired extent references for %llu\n",
9018 (unsigned long long)rec->start);
9020 btrfs_release_path(&path);
9021 return ret;
9024 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9025 struct extent_record *rec)
9027 struct btrfs_trans_handle *trans;
9028 struct btrfs_root *root = fs_info->extent_root;
9029 struct btrfs_path path;
9030 struct btrfs_extent_item *ei;
9031 struct btrfs_key key;
9032 u64 flags;
9033 int ret = 0;
9035 key.objectid = rec->start;
9036 if (rec->metadata) {
9037 key.type = BTRFS_METADATA_ITEM_KEY;
9038 key.offset = rec->info_level;
9039 } else {
9040 key.type = BTRFS_EXTENT_ITEM_KEY;
9041 key.offset = rec->max_size;
9044 trans = btrfs_start_transaction(root, 0);
9045 if (IS_ERR(trans))
9046 return PTR_ERR(trans);
9048 btrfs_init_path(&path);
9049 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9050 if (ret < 0) {
9051 btrfs_release_path(&path);
9052 btrfs_commit_transaction(trans, root);
9053 return ret;
9054 } else if (ret) {
9055 fprintf(stderr, "Didn't find extent for %llu\n",
9056 (unsigned long long)rec->start);
9057 btrfs_release_path(&path);
9058 btrfs_commit_transaction(trans, root);
9059 return -ENOENT;
9062 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9063 struct btrfs_extent_item);
9064 flags = btrfs_extent_flags(path.nodes[0], ei);
9065 if (rec->flag_block_full_backref) {
9066 fprintf(stderr, "setting full backref on %llu\n",
9067 (unsigned long long)key.objectid);
9068 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9069 } else {
9070 fprintf(stderr, "clearing full backref on %llu\n",
9071 (unsigned long long)key.objectid);
9072 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9074 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9075 btrfs_mark_buffer_dirty(path.nodes[0]);
9076 btrfs_release_path(&path);
9077 ret = btrfs_commit_transaction(trans, root);
9078 if (!ret)
9079 fprintf(stderr, "Repaired extent flags for %llu\n",
9080 (unsigned long long)rec->start);
9082 return ret;
9085 /* right now we only prune from the extent allocation tree */
9086 static int prune_one_block(struct btrfs_trans_handle *trans,
9087 struct btrfs_fs_info *info,
9088 struct btrfs_corrupt_block *corrupt)
9090 int ret;
9091 struct btrfs_path path;
9092 struct extent_buffer *eb;
9093 u64 found;
9094 int slot;
9095 int nritems;
9096 int level = corrupt->level + 1;
9098 btrfs_init_path(&path);
9099 again:
9100 /* we want to stop at the parent to our busted block */
9101 path.lowest_level = level;
9103 ret = btrfs_search_slot(trans, info->extent_root,
9104 &corrupt->key, &path, -1, 1);
9106 if (ret < 0)
9107 goto out;
9109 eb = path.nodes[level];
9110 if (!eb) {
9111 ret = -ENOENT;
9112 goto out;
9116 * hopefully the search gave us the block we want to prune,
9117 * lets try that first
9119 slot = path.slots[level];
9120 found = btrfs_node_blockptr(eb, slot);
9121 if (found == corrupt->cache.start)
9122 goto del_ptr;
9124 nritems = btrfs_header_nritems(eb);
9126 /* the search failed, lets scan this node and hope we find it */
9127 for (slot = 0; slot < nritems; slot++) {
9128 found = btrfs_node_blockptr(eb, slot);
9129 if (found == corrupt->cache.start)
9130 goto del_ptr;
9133 * we couldn't find the bad block. TODO, search all the nodes for pointers
9134 * to this block
9136 if (eb == info->extent_root->node) {
9137 ret = -ENOENT;
9138 goto out;
9139 } else {
9140 level++;
9141 btrfs_release_path(&path);
9142 goto again;
9145 del_ptr:
9146 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9147 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9149 out:
9150 btrfs_release_path(&path);
9151 return ret;
9154 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9156 struct btrfs_trans_handle *trans = NULL;
9157 struct cache_extent *cache;
9158 struct btrfs_corrupt_block *corrupt;
9160 while (1) {
9161 cache = search_cache_extent(info->corrupt_blocks, 0);
9162 if (!cache)
9163 break;
9164 if (!trans) {
9165 trans = btrfs_start_transaction(info->extent_root, 1);
9166 if (IS_ERR(trans))
9167 return PTR_ERR(trans);
9169 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9170 prune_one_block(trans, info, corrupt);
9171 remove_cache_extent(info->corrupt_blocks, cache);
9173 if (trans)
9174 return btrfs_commit_transaction(trans, info->extent_root);
9175 return 0;
9178 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9180 struct btrfs_block_group_cache *cache;
9181 u64 start, end;
9182 int ret;
9184 while (1) {
9185 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9186 &start, &end, EXTENT_DIRTY);
9187 if (ret)
9188 break;
9189 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9190 GFP_NOFS);
9193 start = 0;
9194 while (1) {
9195 cache = btrfs_lookup_first_block_group(fs_info, start);
9196 if (!cache)
9197 break;
9198 if (cache->cached)
9199 cache->cached = 0;
9200 start = cache->key.objectid + cache->key.offset;
9204 static int check_extent_refs(struct btrfs_root *root,
9205 struct cache_tree *extent_cache)
9207 struct extent_record *rec;
9208 struct cache_extent *cache;
9209 int ret = 0;
9210 int had_dups = 0;
9212 if (repair) {
9214 * if we're doing a repair, we have to make sure
9215 * we don't allocate from the problem extents.
9216 * In the worst case, this will be all the
9217 * extents in the FS
9219 cache = search_cache_extent(extent_cache, 0);
9220 while(cache) {
9221 rec = container_of(cache, struct extent_record, cache);
9222 set_extent_dirty(root->fs_info->excluded_extents,
9223 rec->start,
9224 rec->start + rec->max_size - 1,
9225 GFP_NOFS);
9226 cache = next_cache_extent(cache);
9229 /* pin down all the corrupted blocks too */
9230 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9231 while(cache) {
9232 set_extent_dirty(root->fs_info->excluded_extents,
9233 cache->start,
9234 cache->start + cache->size - 1,
9235 GFP_NOFS);
9236 cache = next_cache_extent(cache);
9238 prune_corrupt_blocks(root->fs_info);
9239 reset_cached_block_groups(root->fs_info);
9242 reset_cached_block_groups(root->fs_info);
9245 * We need to delete any duplicate entries we find first otherwise we
9246 * could mess up the extent tree when we have backrefs that actually
9247 * belong to a different extent item and not the weird duplicate one.
9249 while (repair && !list_empty(&duplicate_extents)) {
9250 rec = to_extent_record(duplicate_extents.next);
9251 list_del_init(&rec->list);
9253 /* Sometimes we can find a backref before we find an actual
9254 * extent, so we need to process it a little bit to see if there
9255 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9256 * if this is a backref screwup. If we need to delete stuff
9257 * process_duplicates() will return 0, otherwise it will return
9258 * 1 and we
9260 if (process_duplicates(root, extent_cache, rec))
9261 continue;
9262 ret = delete_duplicate_records(root, rec);
9263 if (ret < 0)
9264 return ret;
9266 * delete_duplicate_records will return the number of entries
9267 * deleted, so if it's greater than 0 then we know we actually
9268 * did something and we need to remove.
9270 if (ret)
9271 had_dups = 1;
9274 if (had_dups)
9275 return -EAGAIN;
9277 while(1) {
9278 int cur_err = 0;
9279 int fix = 0;
9281 cache = search_cache_extent(extent_cache, 0);
9282 if (!cache)
9283 break;
9284 rec = container_of(cache, struct extent_record, cache);
9285 if (rec->num_duplicates) {
9286 fprintf(stderr, "extent item %llu has multiple extent "
9287 "items\n", (unsigned long long)rec->start);
9288 cur_err = 1;
9291 if (rec->refs != rec->extent_item_refs) {
9292 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9293 (unsigned long long)rec->start,
9294 (unsigned long long)rec->nr);
9295 fprintf(stderr, "extent item %llu, found %llu\n",
9296 (unsigned long long)rec->extent_item_refs,
9297 (unsigned long long)rec->refs);
9298 ret = record_orphan_data_extents(root->fs_info, rec);
9299 if (ret < 0)
9300 goto repair_abort;
9301 fix = ret;
9302 cur_err = 1;
9304 if (all_backpointers_checked(rec, 1)) {
9305 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9306 (unsigned long long)rec->start,
9307 (unsigned long long)rec->nr);
9308 fix = 1;
9309 cur_err = 1;
9311 if (!rec->owner_ref_checked) {
9312 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9313 (unsigned long long)rec->start,
9314 (unsigned long long)rec->nr);
9315 fix = 1;
9316 cur_err = 1;
9319 if (repair && fix) {
9320 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9321 if (ret)
9322 goto repair_abort;
9326 if (rec->bad_full_backref) {
9327 fprintf(stderr, "bad full backref, on [%llu]\n",
9328 (unsigned long long)rec->start);
9329 if (repair) {
9330 ret = fixup_extent_flags(root->fs_info, rec);
9331 if (ret)
9332 goto repair_abort;
9333 fix = 1;
9335 cur_err = 1;
9338 * Although it's not a extent ref's problem, we reuse this
9339 * routine for error reporting.
9340 * No repair function yet.
9342 if (rec->crossing_stripes) {
9343 fprintf(stderr,
9344 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9345 rec->start, rec->start + rec->max_size);
9346 cur_err = 1;
9349 if (rec->wrong_chunk_type) {
9350 fprintf(stderr,
9351 "bad extent [%llu, %llu), type mismatch with chunk\n",
9352 rec->start, rec->start + rec->max_size);
9353 cur_err = 1;
9356 remove_cache_extent(extent_cache, cache);
9357 free_all_extent_backrefs(rec);
9358 if (!init_extent_tree && repair && (!cur_err || fix))
9359 clear_extent_dirty(root->fs_info->excluded_extents,
9360 rec->start,
9361 rec->start + rec->max_size - 1,
9362 GFP_NOFS);
9363 free(rec);
9365 repair_abort:
9366 if (repair) {
9367 if (ret && ret != -EAGAIN) {
9368 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9369 exit(1);
9370 } else if (!ret) {
9371 struct btrfs_trans_handle *trans;
9373 root = root->fs_info->extent_root;
9374 trans = btrfs_start_transaction(root, 1);
9375 if (IS_ERR(trans)) {
9376 ret = PTR_ERR(trans);
9377 goto repair_abort;
9380 btrfs_fix_block_accounting(trans, root);
9381 ret = btrfs_commit_transaction(trans, root);
9382 if (ret)
9383 goto repair_abort;
9385 return ret;
9387 return 0;
9390 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9392 u64 stripe_size;
9394 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9395 stripe_size = length;
9396 stripe_size /= num_stripes;
9397 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9398 stripe_size = length * 2;
9399 stripe_size /= num_stripes;
9400 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9401 stripe_size = length;
9402 stripe_size /= (num_stripes - 1);
9403 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9404 stripe_size = length;
9405 stripe_size /= (num_stripes - 2);
9406 } else {
9407 stripe_size = length;
9409 return stripe_size;
9413 * Check the chunk with its block group/dev list ref:
9414 * Return 0 if all refs seems valid.
9415 * Return 1 if part of refs seems valid, need later check for rebuild ref
9416 * like missing block group and needs to search extent tree to rebuild them.
9417 * Return -1 if essential refs are missing and unable to rebuild.
9419 static int check_chunk_refs(struct chunk_record *chunk_rec,
9420 struct block_group_tree *block_group_cache,
9421 struct device_extent_tree *dev_extent_cache,
9422 int silent)
9424 struct cache_extent *block_group_item;
9425 struct block_group_record *block_group_rec;
9426 struct cache_extent *dev_extent_item;
9427 struct device_extent_record *dev_extent_rec;
9428 u64 devid;
9429 u64 offset;
9430 u64 length;
9431 int metadump_v2 = 0;
9432 int i;
9433 int ret = 0;
9435 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9436 chunk_rec->offset,
9437 chunk_rec->length);
9438 if (block_group_item) {
9439 block_group_rec = container_of(block_group_item,
9440 struct block_group_record,
9441 cache);
9442 if (chunk_rec->length != block_group_rec->offset ||
9443 chunk_rec->offset != block_group_rec->objectid ||
9444 (!metadump_v2 &&
9445 chunk_rec->type_flags != block_group_rec->flags)) {
9446 if (!silent)
9447 fprintf(stderr,
9448 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9449 chunk_rec->objectid,
9450 chunk_rec->type,
9451 chunk_rec->offset,
9452 chunk_rec->length,
9453 chunk_rec->offset,
9454 chunk_rec->type_flags,
9455 block_group_rec->objectid,
9456 block_group_rec->type,
9457 block_group_rec->offset,
9458 block_group_rec->offset,
9459 block_group_rec->objectid,
9460 block_group_rec->flags);
9461 ret = -1;
9462 } else {
9463 list_del_init(&block_group_rec->list);
9464 chunk_rec->bg_rec = block_group_rec;
9466 } else {
9467 if (!silent)
9468 fprintf(stderr,
9469 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9470 chunk_rec->objectid,
9471 chunk_rec->type,
9472 chunk_rec->offset,
9473 chunk_rec->length,
9474 chunk_rec->offset,
9475 chunk_rec->type_flags);
9476 ret = 1;
9479 if (metadump_v2)
9480 return ret;
9482 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9483 chunk_rec->num_stripes);
9484 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9485 devid = chunk_rec->stripes[i].devid;
9486 offset = chunk_rec->stripes[i].offset;
9487 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9488 devid, offset, length);
9489 if (dev_extent_item) {
9490 dev_extent_rec = container_of(dev_extent_item,
9491 struct device_extent_record,
9492 cache);
9493 if (dev_extent_rec->objectid != devid ||
9494 dev_extent_rec->offset != offset ||
9495 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9496 dev_extent_rec->length != length) {
9497 if (!silent)
9498 fprintf(stderr,
9499 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9500 chunk_rec->objectid,
9501 chunk_rec->type,
9502 chunk_rec->offset,
9503 chunk_rec->stripes[i].devid,
9504 chunk_rec->stripes[i].offset,
9505 dev_extent_rec->objectid,
9506 dev_extent_rec->offset,
9507 dev_extent_rec->length);
9508 ret = -1;
9509 } else {
9510 list_move(&dev_extent_rec->chunk_list,
9511 &chunk_rec->dextents);
9513 } else {
9514 if (!silent)
9515 fprintf(stderr,
9516 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9517 chunk_rec->objectid,
9518 chunk_rec->type,
9519 chunk_rec->offset,
9520 chunk_rec->stripes[i].devid,
9521 chunk_rec->stripes[i].offset);
9522 ret = -1;
9525 return ret;
9528 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9529 int check_chunks(struct cache_tree *chunk_cache,
9530 struct block_group_tree *block_group_cache,
9531 struct device_extent_tree *dev_extent_cache,
9532 struct list_head *good, struct list_head *bad,
9533 struct list_head *rebuild, int silent)
9535 struct cache_extent *chunk_item;
9536 struct chunk_record *chunk_rec;
9537 struct block_group_record *bg_rec;
9538 struct device_extent_record *dext_rec;
9539 int err;
9540 int ret = 0;
9542 chunk_item = first_cache_extent(chunk_cache);
9543 while (chunk_item) {
9544 chunk_rec = container_of(chunk_item, struct chunk_record,
9545 cache);
9546 err = check_chunk_refs(chunk_rec, block_group_cache,
9547 dev_extent_cache, silent);
9548 if (err < 0)
9549 ret = err;
9550 if (err == 0 && good)
9551 list_add_tail(&chunk_rec->list, good);
9552 if (err > 0 && rebuild)
9553 list_add_tail(&chunk_rec->list, rebuild);
9554 if (err < 0 && bad)
9555 list_add_tail(&chunk_rec->list, bad);
9556 chunk_item = next_cache_extent(chunk_item);
9559 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9560 if (!silent)
9561 fprintf(stderr,
9562 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9563 bg_rec->objectid,
9564 bg_rec->offset,
9565 bg_rec->flags);
9566 if (!ret)
9567 ret = 1;
9570 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9571 chunk_list) {
9572 if (!silent)
9573 fprintf(stderr,
9574 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9575 dext_rec->objectid,
9576 dext_rec->offset,
9577 dext_rec->length);
9578 if (!ret)
9579 ret = 1;
9581 return ret;
9585 static int check_device_used(struct device_record *dev_rec,
9586 struct device_extent_tree *dext_cache)
9588 struct cache_extent *cache;
9589 struct device_extent_record *dev_extent_rec;
9590 u64 total_byte = 0;
9592 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9593 while (cache) {
9594 dev_extent_rec = container_of(cache,
9595 struct device_extent_record,
9596 cache);
9597 if (dev_extent_rec->objectid != dev_rec->devid)
9598 break;
9600 list_del_init(&dev_extent_rec->device_list);
9601 total_byte += dev_extent_rec->length;
9602 cache = next_cache_extent(cache);
9605 if (total_byte != dev_rec->byte_used) {
9606 fprintf(stderr,
9607 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9608 total_byte, dev_rec->byte_used, dev_rec->objectid,
9609 dev_rec->type, dev_rec->offset);
9610 return -1;
9611 } else {
9612 return 0;
9616 /* check btrfs_dev_item -> btrfs_dev_extent */
9617 static int check_devices(struct rb_root *dev_cache,
9618 struct device_extent_tree *dev_extent_cache)
9620 struct rb_node *dev_node;
9621 struct device_record *dev_rec;
9622 struct device_extent_record *dext_rec;
9623 int err;
9624 int ret = 0;
9626 dev_node = rb_first(dev_cache);
9627 while (dev_node) {
9628 dev_rec = container_of(dev_node, struct device_record, node);
9629 err = check_device_used(dev_rec, dev_extent_cache);
9630 if (err)
9631 ret = err;
9633 dev_node = rb_next(dev_node);
9635 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9636 device_list) {
9637 fprintf(stderr,
9638 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9639 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9640 if (!ret)
9641 ret = 1;
9643 return ret;
9646 static int add_root_item_to_list(struct list_head *head,
9647 u64 objectid, u64 bytenr, u64 last_snapshot,
9648 u8 level, u8 drop_level,
9649 int level_size, struct btrfs_key *drop_key)
9652 struct root_item_record *ri_rec;
9653 ri_rec = malloc(sizeof(*ri_rec));
9654 if (!ri_rec)
9655 return -ENOMEM;
9656 ri_rec->bytenr = bytenr;
9657 ri_rec->objectid = objectid;
9658 ri_rec->level = level;
9659 ri_rec->level_size = level_size;
9660 ri_rec->drop_level = drop_level;
9661 ri_rec->last_snapshot = last_snapshot;
9662 if (drop_key)
9663 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9664 list_add_tail(&ri_rec->list, head);
9666 return 0;
9669 static void free_root_item_list(struct list_head *list)
9671 struct root_item_record *ri_rec;
9673 while (!list_empty(list)) {
9674 ri_rec = list_first_entry(list, struct root_item_record,
9675 list);
9676 list_del_init(&ri_rec->list);
9677 free(ri_rec);
9681 static int deal_root_from_list(struct list_head *list,
9682 struct btrfs_root *root,
9683 struct block_info *bits,
9684 int bits_nr,
9685 struct cache_tree *pending,
9686 struct cache_tree *seen,
9687 struct cache_tree *reada,
9688 struct cache_tree *nodes,
9689 struct cache_tree *extent_cache,
9690 struct cache_tree *chunk_cache,
9691 struct rb_root *dev_cache,
9692 struct block_group_tree *block_group_cache,
9693 struct device_extent_tree *dev_extent_cache)
9695 int ret = 0;
9696 u64 last;
9698 while (!list_empty(list)) {
9699 struct root_item_record *rec;
9700 struct extent_buffer *buf;
9701 rec = list_entry(list->next,
9702 struct root_item_record, list);
9703 last = 0;
9704 buf = read_tree_block(root->fs_info->tree_root,
9705 rec->bytenr, rec->level_size, 0);
9706 if (!extent_buffer_uptodate(buf)) {
9707 free_extent_buffer(buf);
9708 ret = -EIO;
9709 break;
9711 ret = add_root_to_pending(buf, extent_cache, pending,
9712 seen, nodes, rec->objectid);
9713 if (ret < 0)
9714 break;
9716 * To rebuild extent tree, we need deal with snapshot
9717 * one by one, otherwise we deal with node firstly which
9718 * can maximize readahead.
9720 while (1) {
9721 ret = run_next_block(root, bits, bits_nr, &last,
9722 pending, seen, reada, nodes,
9723 extent_cache, chunk_cache,
9724 dev_cache, block_group_cache,
9725 dev_extent_cache, rec);
9726 if (ret != 0)
9727 break;
9729 free_extent_buffer(buf);
9730 list_del(&rec->list);
9731 free(rec);
9732 if (ret < 0)
9733 break;
9735 while (ret >= 0) {
9736 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9737 reada, nodes, extent_cache, chunk_cache,
9738 dev_cache, block_group_cache,
9739 dev_extent_cache, NULL);
9740 if (ret != 0) {
9741 if (ret > 0)
9742 ret = 0;
9743 break;
9746 return ret;
9749 static int check_chunks_and_extents(struct btrfs_root *root)
9751 struct rb_root dev_cache;
9752 struct cache_tree chunk_cache;
9753 struct block_group_tree block_group_cache;
9754 struct device_extent_tree dev_extent_cache;
9755 struct cache_tree extent_cache;
9756 struct cache_tree seen;
9757 struct cache_tree pending;
9758 struct cache_tree reada;
9759 struct cache_tree nodes;
9760 struct extent_io_tree excluded_extents;
9761 struct cache_tree corrupt_blocks;
9762 struct btrfs_path path;
9763 struct btrfs_key key;
9764 struct btrfs_key found_key;
9765 int ret, err = 0;
9766 struct block_info *bits;
9767 int bits_nr;
9768 struct extent_buffer *leaf;
9769 int slot;
9770 struct btrfs_root_item ri;
9771 struct list_head dropping_trees;
9772 struct list_head normal_trees;
9773 struct btrfs_root *root1;
9774 u64 objectid;
9775 u32 level_size;
9776 u8 level;
9778 dev_cache = RB_ROOT;
9779 cache_tree_init(&chunk_cache);
9780 block_group_tree_init(&block_group_cache);
9781 device_extent_tree_init(&dev_extent_cache);
9783 cache_tree_init(&extent_cache);
9784 cache_tree_init(&seen);
9785 cache_tree_init(&pending);
9786 cache_tree_init(&nodes);
9787 cache_tree_init(&reada);
9788 cache_tree_init(&corrupt_blocks);
9789 extent_io_tree_init(&excluded_extents);
9790 INIT_LIST_HEAD(&dropping_trees);
9791 INIT_LIST_HEAD(&normal_trees);
9793 if (repair) {
9794 root->fs_info->excluded_extents = &excluded_extents;
9795 root->fs_info->fsck_extent_cache = &extent_cache;
9796 root->fs_info->free_extent_hook = free_extent_hook;
9797 root->fs_info->corrupt_blocks = &corrupt_blocks;
9800 bits_nr = 1024;
9801 bits = malloc(bits_nr * sizeof(struct block_info));
9802 if (!bits) {
9803 perror("malloc");
9804 exit(1);
9807 if (ctx.progress_enabled) {
9808 ctx.tp = TASK_EXTENTS;
9809 task_start(ctx.info);
9812 again:
9813 root1 = root->fs_info->tree_root;
9814 level = btrfs_header_level(root1->node);
9815 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9816 root1->node->start, 0, level, 0,
9817 root1->nodesize, NULL);
9818 if (ret < 0)
9819 goto out;
9820 root1 = root->fs_info->chunk_root;
9821 level = btrfs_header_level(root1->node);
9822 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9823 root1->node->start, 0, level, 0,
9824 root1->nodesize, NULL);
9825 if (ret < 0)
9826 goto out;
9827 btrfs_init_path(&path);
9828 key.offset = 0;
9829 key.objectid = 0;
9830 key.type = BTRFS_ROOT_ITEM_KEY;
9831 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9832 &key, &path, 0, 0);
9833 if (ret < 0)
9834 goto out;
9835 while(1) {
9836 leaf = path.nodes[0];
9837 slot = path.slots[0];
9838 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9839 ret = btrfs_next_leaf(root, &path);
9840 if (ret != 0)
9841 break;
9842 leaf = path.nodes[0];
9843 slot = path.slots[0];
9845 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9846 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9847 unsigned long offset;
9848 u64 last_snapshot;
9850 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9851 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9852 last_snapshot = btrfs_root_last_snapshot(&ri);
9853 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9854 level = btrfs_root_level(&ri);
9855 level_size = root->nodesize;
9856 ret = add_root_item_to_list(&normal_trees,
9857 found_key.objectid,
9858 btrfs_root_bytenr(&ri),
9859 last_snapshot, level,
9860 0, level_size, NULL);
9861 if (ret < 0)
9862 goto out;
9863 } else {
9864 level = btrfs_root_level(&ri);
9865 level_size = root->nodesize;
9866 objectid = found_key.objectid;
9867 btrfs_disk_key_to_cpu(&found_key,
9868 &ri.drop_progress);
9869 ret = add_root_item_to_list(&dropping_trees,
9870 objectid,
9871 btrfs_root_bytenr(&ri),
9872 last_snapshot, level,
9873 ri.drop_level,
9874 level_size, &found_key);
9875 if (ret < 0)
9876 goto out;
9879 path.slots[0]++;
9881 btrfs_release_path(&path);
9884 * check_block can return -EAGAIN if it fixes something, please keep
9885 * this in mind when dealing with return values from these functions, if
9886 * we get -EAGAIN we want to fall through and restart the loop.
9888 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9889 &seen, &reada, &nodes, &extent_cache,
9890 &chunk_cache, &dev_cache, &block_group_cache,
9891 &dev_extent_cache);
9892 if (ret < 0) {
9893 if (ret == -EAGAIN)
9894 goto loop;
9895 goto out;
9897 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9898 &pending, &seen, &reada, &nodes,
9899 &extent_cache, &chunk_cache, &dev_cache,
9900 &block_group_cache, &dev_extent_cache);
9901 if (ret < 0) {
9902 if (ret == -EAGAIN)
9903 goto loop;
9904 goto out;
9907 ret = check_chunks(&chunk_cache, &block_group_cache,
9908 &dev_extent_cache, NULL, NULL, NULL, 0);
9909 if (ret) {
9910 if (ret == -EAGAIN)
9911 goto loop;
9912 err = ret;
9915 ret = check_extent_refs(root, &extent_cache);
9916 if (ret < 0) {
9917 if (ret == -EAGAIN)
9918 goto loop;
9919 goto out;
9922 ret = check_devices(&dev_cache, &dev_extent_cache);
9923 if (ret && err)
9924 ret = err;
9926 out:
9927 task_stop(ctx.info);
9928 if (repair) {
9929 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9930 extent_io_tree_cleanup(&excluded_extents);
9931 root->fs_info->fsck_extent_cache = NULL;
9932 root->fs_info->free_extent_hook = NULL;
9933 root->fs_info->corrupt_blocks = NULL;
9934 root->fs_info->excluded_extents = NULL;
9936 free(bits);
9937 free_chunk_cache_tree(&chunk_cache);
9938 free_device_cache_tree(&dev_cache);
9939 free_block_group_tree(&block_group_cache);
9940 free_device_extent_tree(&dev_extent_cache);
9941 free_extent_cache_tree(&seen);
9942 free_extent_cache_tree(&pending);
9943 free_extent_cache_tree(&reada);
9944 free_extent_cache_tree(&nodes);
9945 return ret;
9946 loop:
9947 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9948 free_extent_cache_tree(&seen);
9949 free_extent_cache_tree(&pending);
9950 free_extent_cache_tree(&reada);
9951 free_extent_cache_tree(&nodes);
9952 free_chunk_cache_tree(&chunk_cache);
9953 free_block_group_tree(&block_group_cache);
9954 free_device_cache_tree(&dev_cache);
9955 free_device_extent_tree(&dev_extent_cache);
9956 free_extent_record_cache(root->fs_info, &extent_cache);
9957 free_root_item_list(&normal_trees);
9958 free_root_item_list(&dropping_trees);
9959 extent_io_tree_cleanup(&excluded_extents);
9960 goto again;
9964 * Check backrefs of a tree block given by @bytenr or @eb.
9966 * @root: the root containing the @bytenr or @eb
9967 * @eb: tree block extent buffer, can be NULL
9968 * @bytenr: bytenr of the tree block to search
9969 * @level: tree level of the tree block
9970 * @owner: owner of the tree block
9972 * Return >0 for any error found and output error message
9973 * Return 0 for no error found
9975 static int check_tree_block_ref(struct btrfs_root *root,
9976 struct extent_buffer *eb, u64 bytenr,
9977 int level, u64 owner)
9979 struct btrfs_key key;
9980 struct btrfs_root *extent_root = root->fs_info->extent_root;
9981 struct btrfs_path path;
9982 struct btrfs_extent_item *ei;
9983 struct btrfs_extent_inline_ref *iref;
9984 struct extent_buffer *leaf;
9985 unsigned long end;
9986 unsigned long ptr;
9987 int slot;
9988 int skinny_level;
9989 int type;
9990 u32 nodesize = root->nodesize;
9991 u32 item_size;
9992 u64 offset;
9993 int tree_reloc_root = 0;
9994 int found_ref = 0;
9995 int err = 0;
9996 int ret;
9998 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9999 btrfs_header_bytenr(root->node) == bytenr)
10000 tree_reloc_root = 1;
10002 btrfs_init_path(&path);
10003 key.objectid = bytenr;
10004 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10005 key.type = BTRFS_METADATA_ITEM_KEY;
10006 else
10007 key.type = BTRFS_EXTENT_ITEM_KEY;
10008 key.offset = (u64)-1;
10010 /* Search for the backref in extent tree */
10011 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10012 if (ret < 0) {
10013 err |= BACKREF_MISSING;
10014 goto out;
10016 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10017 if (ret) {
10018 err |= BACKREF_MISSING;
10019 goto out;
10022 leaf = path.nodes[0];
10023 slot = path.slots[0];
10024 btrfs_item_key_to_cpu(leaf, &key, slot);
10026 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10028 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10029 skinny_level = (int)key.offset;
10030 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10031 } else {
10032 struct btrfs_tree_block_info *info;
10034 info = (struct btrfs_tree_block_info *)(ei + 1);
10035 skinny_level = btrfs_tree_block_level(leaf, info);
10036 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10039 if (eb) {
10040 u64 header_gen;
10041 u64 extent_gen;
10043 if (!(btrfs_extent_flags(leaf, ei) &
10044 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10045 error(
10046 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10047 key.objectid, nodesize,
10048 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10049 err = BACKREF_MISMATCH;
10051 header_gen = btrfs_header_generation(eb);
10052 extent_gen = btrfs_extent_generation(leaf, ei);
10053 if (header_gen != extent_gen) {
10054 error(
10055 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10056 key.objectid, nodesize, header_gen,
10057 extent_gen);
10058 err = BACKREF_MISMATCH;
10060 if (level != skinny_level) {
10061 error(
10062 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10063 key.objectid, nodesize, level, skinny_level);
10064 err = BACKREF_MISMATCH;
10066 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10067 error(
10068 "extent[%llu %u] is referred by other roots than %llu",
10069 key.objectid, nodesize, root->objectid);
10070 err = BACKREF_MISMATCH;
10075 * Iterate the extent/metadata item to find the exact backref
10077 item_size = btrfs_item_size_nr(leaf, slot);
10078 ptr = (unsigned long)iref;
10079 end = (unsigned long)ei + item_size;
10080 while (ptr < end) {
10081 iref = (struct btrfs_extent_inline_ref *)ptr;
10082 type = btrfs_extent_inline_ref_type(leaf, iref);
10083 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10085 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10086 (offset == root->objectid || offset == owner)) {
10087 found_ref = 1;
10088 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10090 * Backref of tree reloc root points to itself, no need
10091 * to check backref any more.
10093 if (tree_reloc_root)
10094 found_ref = 1;
10095 else
10096 /* Check if the backref points to valid referencer */
10097 found_ref = !check_tree_block_ref(root, NULL,
10098 offset, level + 1, owner);
10101 if (found_ref)
10102 break;
10103 ptr += btrfs_extent_inline_ref_size(type);
10107 * Inlined extent item doesn't have what we need, check
10108 * TREE_BLOCK_REF_KEY
10110 if (!found_ref) {
10111 btrfs_release_path(&path);
10112 key.objectid = bytenr;
10113 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10114 key.offset = root->objectid;
10116 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10117 if (!ret)
10118 found_ref = 1;
10120 if (!found_ref)
10121 err |= BACKREF_MISSING;
10122 out:
10123 btrfs_release_path(&path);
10124 if (eb && (err & BACKREF_MISSING))
10125 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10126 bytenr, nodesize, owner, level);
10127 return err;
10131 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10133 * Return >0 any error found and output error message
10134 * Return 0 for no error found
10136 static int check_extent_data_item(struct btrfs_root *root,
10137 struct extent_buffer *eb, int slot)
10139 struct btrfs_file_extent_item *fi;
10140 struct btrfs_path path;
10141 struct btrfs_root *extent_root = root->fs_info->extent_root;
10142 struct btrfs_key fi_key;
10143 struct btrfs_key dbref_key;
10144 struct extent_buffer *leaf;
10145 struct btrfs_extent_item *ei;
10146 struct btrfs_extent_inline_ref *iref;
10147 struct btrfs_extent_data_ref *dref;
10148 u64 owner;
10149 u64 disk_bytenr;
10150 u64 disk_num_bytes;
10151 u64 extent_num_bytes;
10152 u64 extent_flags;
10153 u32 item_size;
10154 unsigned long end;
10155 unsigned long ptr;
10156 int type;
10157 u64 ref_root;
10158 int found_dbackref = 0;
10159 int err = 0;
10160 int ret;
10162 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10163 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10165 /* Nothing to check for hole and inline data extents */
10166 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10167 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10168 return 0;
10170 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10171 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10172 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10174 /* Check unaligned disk_num_bytes and num_bytes */
10175 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10176 error(
10177 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10178 fi_key.objectid, fi_key.offset, disk_num_bytes,
10179 root->sectorsize);
10180 err |= BYTES_UNALIGNED;
10181 } else {
10182 data_bytes_allocated += disk_num_bytes;
10184 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10185 error(
10186 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10187 fi_key.objectid, fi_key.offset, extent_num_bytes,
10188 root->sectorsize);
10189 err |= BYTES_UNALIGNED;
10190 } else {
10191 data_bytes_referenced += extent_num_bytes;
10193 owner = btrfs_header_owner(eb);
10195 /* Check the extent item of the file extent in extent tree */
10196 btrfs_init_path(&path);
10197 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10198 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10199 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10201 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10202 if (ret) {
10203 err |= BACKREF_MISSING;
10204 goto error;
10207 leaf = path.nodes[0];
10208 slot = path.slots[0];
10209 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10211 extent_flags = btrfs_extent_flags(leaf, ei);
10213 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10214 error(
10215 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10216 disk_bytenr, disk_num_bytes,
10217 BTRFS_EXTENT_FLAG_DATA);
10218 err |= BACKREF_MISMATCH;
10221 /* Check data backref inside that extent item */
10222 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10223 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10224 ptr = (unsigned long)iref;
10225 end = (unsigned long)ei + item_size;
10226 while (ptr < end) {
10227 iref = (struct btrfs_extent_inline_ref *)ptr;
10228 type = btrfs_extent_inline_ref_type(leaf, iref);
10229 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10231 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10232 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10233 if (ref_root == owner || ref_root == root->objectid)
10234 found_dbackref = 1;
10235 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10236 found_dbackref = !check_tree_block_ref(root, NULL,
10237 btrfs_extent_inline_ref_offset(leaf, iref),
10238 0, owner);
10241 if (found_dbackref)
10242 break;
10243 ptr += btrfs_extent_inline_ref_size(type);
10246 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10247 if (!found_dbackref) {
10248 btrfs_release_path(&path);
10250 btrfs_init_path(&path);
10251 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10252 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10253 dbref_key.offset = hash_extent_data_ref(root->objectid,
10254 fi_key.objectid, fi_key.offset);
10256 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10257 &dbref_key, &path, 0, 0);
10258 if (!ret)
10259 found_dbackref = 1;
10262 if (!found_dbackref)
10263 err |= BACKREF_MISSING;
10264 error:
10265 btrfs_release_path(&path);
10266 if (err & BACKREF_MISSING) {
10267 error("data extent[%llu %llu] backref lost",
10268 disk_bytenr, disk_num_bytes);
10270 return err;
10274 * Get real tree block level for the case like shared block
10275 * Return >= 0 as tree level
10276 * Return <0 for error
10278 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10280 struct extent_buffer *eb;
10281 struct btrfs_path path;
10282 struct btrfs_key key;
10283 struct btrfs_extent_item *ei;
10284 u64 flags;
10285 u64 transid;
10286 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10287 u8 backref_level;
10288 u8 header_level;
10289 int ret;
10291 /* Search extent tree for extent generation and level */
10292 key.objectid = bytenr;
10293 key.type = BTRFS_METADATA_ITEM_KEY;
10294 key.offset = (u64)-1;
10296 btrfs_init_path(&path);
10297 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10298 if (ret < 0)
10299 goto release_out;
10300 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10301 if (ret < 0)
10302 goto release_out;
10303 if (ret > 0) {
10304 ret = -ENOENT;
10305 goto release_out;
10308 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10309 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10310 struct btrfs_extent_item);
10311 flags = btrfs_extent_flags(path.nodes[0], ei);
10312 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10313 ret = -ENOENT;
10314 goto release_out;
10317 /* Get transid for later read_tree_block() check */
10318 transid = btrfs_extent_generation(path.nodes[0], ei);
10320 /* Get backref level as one source */
10321 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10322 backref_level = key.offset;
10323 } else {
10324 struct btrfs_tree_block_info *info;
10326 info = (struct btrfs_tree_block_info *)(ei + 1);
10327 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10329 btrfs_release_path(&path);
10331 /* Get level from tree block as an alternative source */
10332 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10333 if (!extent_buffer_uptodate(eb)) {
10334 free_extent_buffer(eb);
10335 return -EIO;
10337 header_level = btrfs_header_level(eb);
10338 free_extent_buffer(eb);
10340 if (header_level != backref_level)
10341 return -EIO;
10342 return header_level;
10344 release_out:
10345 btrfs_release_path(&path);
10346 return ret;
10350 * Check if a tree block backref is valid (points to a valid tree block)
10351 * if level == -1, level will be resolved
10352 * Return >0 for any error found and print error message
10354 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10355 u64 bytenr, int level)
10357 struct btrfs_root *root;
10358 struct btrfs_key key;
10359 struct btrfs_path path;
10360 struct extent_buffer *eb;
10361 struct extent_buffer *node;
10362 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10363 int err = 0;
10364 int ret;
10366 /* Query level for level == -1 special case */
10367 if (level == -1)
10368 level = query_tree_block_level(fs_info, bytenr);
10369 if (level < 0) {
10370 err |= REFERENCER_MISSING;
10371 goto out;
10374 key.objectid = root_id;
10375 key.type = BTRFS_ROOT_ITEM_KEY;
10376 key.offset = (u64)-1;
10378 root = btrfs_read_fs_root(fs_info, &key);
10379 if (IS_ERR(root)) {
10380 err |= REFERENCER_MISSING;
10381 goto out;
10384 /* Read out the tree block to get item/node key */
10385 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10386 if (!extent_buffer_uptodate(eb)) {
10387 err |= REFERENCER_MISSING;
10388 free_extent_buffer(eb);
10389 goto out;
10392 /* Empty tree, no need to check key */
10393 if (!btrfs_header_nritems(eb) && !level) {
10394 free_extent_buffer(eb);
10395 goto out;
10398 if (level)
10399 btrfs_node_key_to_cpu(eb, &key, 0);
10400 else
10401 btrfs_item_key_to_cpu(eb, &key, 0);
10403 free_extent_buffer(eb);
10405 btrfs_init_path(&path);
10406 path.lowest_level = level;
10407 /* Search with the first key, to ensure we can reach it */
10408 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10409 if (ret < 0) {
10410 err |= REFERENCER_MISSING;
10411 goto release_out;
10414 node = path.nodes[level];
10415 if (btrfs_header_bytenr(node) != bytenr) {
10416 error(
10417 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10418 bytenr, nodesize, bytenr,
10419 btrfs_header_bytenr(node));
10420 err |= REFERENCER_MISMATCH;
10422 if (btrfs_header_level(node) != level) {
10423 error(
10424 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10425 bytenr, nodesize, level,
10426 btrfs_header_level(node));
10427 err |= REFERENCER_MISMATCH;
10430 release_out:
10431 btrfs_release_path(&path);
10432 out:
10433 if (err & REFERENCER_MISSING) {
10434 if (level < 0)
10435 error("extent [%llu %d] lost referencer (owner: %llu)",
10436 bytenr, nodesize, root_id);
10437 else
10438 error(
10439 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10440 bytenr, nodesize, root_id, level);
10443 return err;
10447 * Check if tree block @eb is tree reloc root.
10448 * Return 0 if it's not or any problem happens
10449 * Return 1 if it's a tree reloc root
10451 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10452 struct extent_buffer *eb)
10454 struct btrfs_root *tree_reloc_root;
10455 struct btrfs_key key;
10456 u64 bytenr = btrfs_header_bytenr(eb);
10457 u64 owner = btrfs_header_owner(eb);
10458 int ret = 0;
10460 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10461 key.offset = owner;
10462 key.type = BTRFS_ROOT_ITEM_KEY;
10464 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10465 if (IS_ERR(tree_reloc_root))
10466 return 0;
10468 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10469 ret = 1;
10470 btrfs_free_fs_root(tree_reloc_root);
10471 return ret;
10475 * Check referencer for shared block backref
10476 * If level == -1, this function will resolve the level.
10478 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10479 u64 parent, u64 bytenr, int level)
10481 struct extent_buffer *eb;
10482 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10483 u32 nr;
10484 int found_parent = 0;
10485 int i;
10487 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10488 if (!extent_buffer_uptodate(eb))
10489 goto out;
10491 if (level == -1)
10492 level = query_tree_block_level(fs_info, bytenr);
10493 if (level < 0)
10494 goto out;
10496 /* It's possible it's a tree reloc root */
10497 if (parent == bytenr) {
10498 if (is_tree_reloc_root(fs_info, eb))
10499 found_parent = 1;
10500 goto out;
10503 if (level + 1 != btrfs_header_level(eb))
10504 goto out;
10506 nr = btrfs_header_nritems(eb);
10507 for (i = 0; i < nr; i++) {
10508 if (bytenr == btrfs_node_blockptr(eb, i)) {
10509 found_parent = 1;
10510 break;
10513 out:
10514 free_extent_buffer(eb);
10515 if (!found_parent) {
10516 error(
10517 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10518 bytenr, nodesize, parent, level);
10519 return REFERENCER_MISSING;
10521 return 0;
10525 * Check referencer for normal (inlined) data ref
10526 * If len == 0, it will be resolved by searching in extent tree
10528 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10529 u64 root_id, u64 objectid, u64 offset,
10530 u64 bytenr, u64 len, u32 count)
10532 struct btrfs_root *root;
10533 struct btrfs_root *extent_root = fs_info->extent_root;
10534 struct btrfs_key key;
10535 struct btrfs_path path;
10536 struct extent_buffer *leaf;
10537 struct btrfs_file_extent_item *fi;
10538 u32 found_count = 0;
10539 int slot;
10540 int ret = 0;
10542 if (!len) {
10543 key.objectid = bytenr;
10544 key.type = BTRFS_EXTENT_ITEM_KEY;
10545 key.offset = (u64)-1;
10547 btrfs_init_path(&path);
10548 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10549 if (ret < 0)
10550 goto out;
10551 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10552 if (ret)
10553 goto out;
10554 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10555 if (key.objectid != bytenr ||
10556 key.type != BTRFS_EXTENT_ITEM_KEY)
10557 goto out;
10558 len = key.offset;
10559 btrfs_release_path(&path);
10561 key.objectid = root_id;
10562 key.type = BTRFS_ROOT_ITEM_KEY;
10563 key.offset = (u64)-1;
10564 btrfs_init_path(&path);
10566 root = btrfs_read_fs_root(fs_info, &key);
10567 if (IS_ERR(root))
10568 goto out;
10570 key.objectid = objectid;
10571 key.type = BTRFS_EXTENT_DATA_KEY;
10573 * It can be nasty as data backref offset is
10574 * file offset - file extent offset, which is smaller or
10575 * equal to original backref offset. The only special case is
10576 * overflow. So we need to special check and do further search.
10578 key.offset = offset & (1ULL << 63) ? 0 : offset;
10580 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10581 if (ret < 0)
10582 goto out;
10585 * Search afterwards to get correct one
10586 * NOTE: As we must do a comprehensive check on the data backref to
10587 * make sure the dref count also matches, we must iterate all file
10588 * extents for that inode.
10590 while (1) {
10591 leaf = path.nodes[0];
10592 slot = path.slots[0];
10594 btrfs_item_key_to_cpu(leaf, &key, slot);
10595 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10596 break;
10597 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10599 * Except normal disk bytenr and disk num bytes, we still
10600 * need to do extra check on dbackref offset as
10601 * dbackref offset = file_offset - file_extent_offset
10603 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10604 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10605 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10606 offset)
10607 found_count++;
10609 ret = btrfs_next_item(root, &path);
10610 if (ret)
10611 break;
10613 out:
10614 btrfs_release_path(&path);
10615 if (found_count != count) {
10616 error(
10617 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10618 bytenr, len, root_id, objectid, offset, count, found_count);
10619 return REFERENCER_MISSING;
10621 return 0;
10625 * Check if the referencer of a shared data backref exists
10627 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10628 u64 parent, u64 bytenr)
10630 struct extent_buffer *eb;
10631 struct btrfs_key key;
10632 struct btrfs_file_extent_item *fi;
10633 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10634 u32 nr;
10635 int found_parent = 0;
10636 int i;
10638 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10639 if (!extent_buffer_uptodate(eb))
10640 goto out;
10642 nr = btrfs_header_nritems(eb);
10643 for (i = 0; i < nr; i++) {
10644 btrfs_item_key_to_cpu(eb, &key, i);
10645 if (key.type != BTRFS_EXTENT_DATA_KEY)
10646 continue;
10648 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10649 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10650 continue;
10652 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10653 found_parent = 1;
10654 break;
10658 out:
10659 free_extent_buffer(eb);
10660 if (!found_parent) {
10661 error("shared extent %llu referencer lost (parent: %llu)",
10662 bytenr, parent);
10663 return REFERENCER_MISSING;
10665 return 0;
10669 * This function will check a given extent item, including its backref and
10670 * itself (like crossing stripe boundary and type)
10672 * Since we don't use extent_record anymore, introduce new error bit
10674 static int check_extent_item(struct btrfs_fs_info *fs_info,
10675 struct extent_buffer *eb, int slot)
10677 struct btrfs_extent_item *ei;
10678 struct btrfs_extent_inline_ref *iref;
10679 struct btrfs_extent_data_ref *dref;
10680 unsigned long end;
10681 unsigned long ptr;
10682 int type;
10683 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10684 u32 item_size = btrfs_item_size_nr(eb, slot);
10685 u64 flags;
10686 u64 offset;
10687 int metadata = 0;
10688 int level;
10689 struct btrfs_key key;
10690 int ret;
10691 int err = 0;
10693 btrfs_item_key_to_cpu(eb, &key, slot);
10694 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10695 bytes_used += key.offset;
10696 else
10697 bytes_used += nodesize;
10699 if (item_size < sizeof(*ei)) {
10701 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10702 * old thing when on disk format is still un-determined.
10703 * No need to care about it anymore
10705 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10706 return -ENOTTY;
10709 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10710 flags = btrfs_extent_flags(eb, ei);
10712 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10713 metadata = 1;
10714 if (metadata && check_crossing_stripes(global_info, key.objectid,
10715 eb->len)) {
10716 error("bad metadata [%llu, %llu) crossing stripe boundary",
10717 key.objectid, key.objectid + nodesize);
10718 err |= CROSSING_STRIPE_BOUNDARY;
10721 ptr = (unsigned long)(ei + 1);
10723 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10724 /* Old EXTENT_ITEM metadata */
10725 struct btrfs_tree_block_info *info;
10727 info = (struct btrfs_tree_block_info *)ptr;
10728 level = btrfs_tree_block_level(eb, info);
10729 ptr += sizeof(struct btrfs_tree_block_info);
10730 } else {
10731 /* New METADATA_ITEM */
10732 level = key.offset;
10734 end = (unsigned long)ei + item_size;
10736 if (ptr >= end) {
10737 err |= ITEM_SIZE_MISMATCH;
10738 goto out;
10741 /* Now check every backref in this extent item */
10742 next:
10743 iref = (struct btrfs_extent_inline_ref *)ptr;
10744 type = btrfs_extent_inline_ref_type(eb, iref);
10745 offset = btrfs_extent_inline_ref_offset(eb, iref);
10746 switch (type) {
10747 case BTRFS_TREE_BLOCK_REF_KEY:
10748 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10749 level);
10750 err |= ret;
10751 break;
10752 case BTRFS_SHARED_BLOCK_REF_KEY:
10753 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10754 level);
10755 err |= ret;
10756 break;
10757 case BTRFS_EXTENT_DATA_REF_KEY:
10758 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10759 ret = check_extent_data_backref(fs_info,
10760 btrfs_extent_data_ref_root(eb, dref),
10761 btrfs_extent_data_ref_objectid(eb, dref),
10762 btrfs_extent_data_ref_offset(eb, dref),
10763 key.objectid, key.offset,
10764 btrfs_extent_data_ref_count(eb, dref));
10765 err |= ret;
10766 break;
10767 case BTRFS_SHARED_DATA_REF_KEY:
10768 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10769 err |= ret;
10770 break;
10771 default:
10772 error("extent[%llu %d %llu] has unknown ref type: %d",
10773 key.objectid, key.type, key.offset, type);
10774 err |= UNKNOWN_TYPE;
10775 goto out;
10778 ptr += btrfs_extent_inline_ref_size(type);
10779 if (ptr < end)
10780 goto next;
10782 out:
10783 return err;
10787 * Check if a dev extent item is referred correctly by its chunk
10789 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10790 struct extent_buffer *eb, int slot)
10792 struct btrfs_root *chunk_root = fs_info->chunk_root;
10793 struct btrfs_dev_extent *ptr;
10794 struct btrfs_path path;
10795 struct btrfs_key chunk_key;
10796 struct btrfs_key devext_key;
10797 struct btrfs_chunk *chunk;
10798 struct extent_buffer *l;
10799 int num_stripes;
10800 u64 length;
10801 int i;
10802 int found_chunk = 0;
10803 int ret;
10805 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10806 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10807 length = btrfs_dev_extent_length(eb, ptr);
10809 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10810 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10811 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10813 btrfs_init_path(&path);
10814 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10815 if (ret)
10816 goto out;
10818 l = path.nodes[0];
10819 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10820 if (btrfs_chunk_length(l, chunk) != length)
10821 goto out;
10823 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10824 for (i = 0; i < num_stripes; i++) {
10825 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10826 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10828 if (devid == devext_key.objectid &&
10829 offset == devext_key.offset) {
10830 found_chunk = 1;
10831 break;
10834 out:
10835 btrfs_release_path(&path);
10836 if (!found_chunk) {
10837 error(
10838 "device extent[%llu, %llu, %llu] did not find the related chunk",
10839 devext_key.objectid, devext_key.offset, length);
10840 return REFERENCER_MISSING;
10842 return 0;
10846 * Check if the used space is correct with the dev item
10848 static int check_dev_item(struct btrfs_fs_info *fs_info,
10849 struct extent_buffer *eb, int slot)
10851 struct btrfs_root *dev_root = fs_info->dev_root;
10852 struct btrfs_dev_item *dev_item;
10853 struct btrfs_path path;
10854 struct btrfs_key key;
10855 struct btrfs_dev_extent *ptr;
10856 u64 dev_id;
10857 u64 used;
10858 u64 total = 0;
10859 int ret;
10861 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10862 dev_id = btrfs_device_id(eb, dev_item);
10863 used = btrfs_device_bytes_used(eb, dev_item);
10865 key.objectid = dev_id;
10866 key.type = BTRFS_DEV_EXTENT_KEY;
10867 key.offset = 0;
10869 btrfs_init_path(&path);
10870 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10871 if (ret < 0) {
10872 btrfs_item_key_to_cpu(eb, &key, slot);
10873 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10874 key.objectid, key.type, key.offset);
10875 btrfs_release_path(&path);
10876 return REFERENCER_MISSING;
10879 /* Iterate dev_extents to calculate the used space of a device */
10880 while (1) {
10881 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10883 if (key.objectid > dev_id)
10884 break;
10885 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10886 goto next;
10888 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10889 struct btrfs_dev_extent);
10890 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10891 next:
10892 ret = btrfs_next_item(dev_root, &path);
10893 if (ret)
10894 break;
10896 btrfs_release_path(&path);
10898 if (used != total) {
10899 btrfs_item_key_to_cpu(eb, &key, slot);
10900 error(
10901 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10902 total, used, BTRFS_ROOT_TREE_OBJECTID,
10903 BTRFS_DEV_EXTENT_KEY, dev_id);
10904 return ACCOUNTING_MISMATCH;
10906 return 0;
10910 * Check a block group item with its referener (chunk) and its used space
10911 * with extent/metadata item
10913 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10914 struct extent_buffer *eb, int slot)
10916 struct btrfs_root *extent_root = fs_info->extent_root;
10917 struct btrfs_root *chunk_root = fs_info->chunk_root;
10918 struct btrfs_block_group_item *bi;
10919 struct btrfs_block_group_item bg_item;
10920 struct btrfs_path path;
10921 struct btrfs_key bg_key;
10922 struct btrfs_key chunk_key;
10923 struct btrfs_key extent_key;
10924 struct btrfs_chunk *chunk;
10925 struct extent_buffer *leaf;
10926 struct btrfs_extent_item *ei;
10927 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10928 u64 flags;
10929 u64 bg_flags;
10930 u64 used;
10931 u64 total = 0;
10932 int ret;
10933 int err = 0;
10935 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10936 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10937 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10938 used = btrfs_block_group_used(&bg_item);
10939 bg_flags = btrfs_block_group_flags(&bg_item);
10941 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10942 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10943 chunk_key.offset = bg_key.objectid;
10945 btrfs_init_path(&path);
10946 /* Search for the referencer chunk */
10947 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10948 if (ret) {
10949 error(
10950 "block group[%llu %llu] did not find the related chunk item",
10951 bg_key.objectid, bg_key.offset);
10952 err |= REFERENCER_MISSING;
10953 } else {
10954 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10955 struct btrfs_chunk);
10956 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10957 bg_key.offset) {
10958 error(
10959 "block group[%llu %llu] related chunk item length does not match",
10960 bg_key.objectid, bg_key.offset);
10961 err |= REFERENCER_MISMATCH;
10964 btrfs_release_path(&path);
10966 /* Search from the block group bytenr */
10967 extent_key.objectid = bg_key.objectid;
10968 extent_key.type = 0;
10969 extent_key.offset = 0;
10971 btrfs_init_path(&path);
10972 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10973 if (ret < 0)
10974 goto out;
10976 /* Iterate extent tree to account used space */
10977 while (1) {
10978 leaf = path.nodes[0];
10979 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10980 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10981 break;
10983 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10984 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10985 goto next;
10986 if (extent_key.objectid < bg_key.objectid)
10987 goto next;
10989 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10990 total += nodesize;
10991 else
10992 total += extent_key.offset;
10994 ei = btrfs_item_ptr(leaf, path.slots[0],
10995 struct btrfs_extent_item);
10996 flags = btrfs_extent_flags(leaf, ei);
10997 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10998 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10999 error(
11000 "bad extent[%llu, %llu) type mismatch with chunk",
11001 extent_key.objectid,
11002 extent_key.objectid + extent_key.offset);
11003 err |= CHUNK_TYPE_MISMATCH;
11005 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11006 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11007 BTRFS_BLOCK_GROUP_METADATA))) {
11008 error(
11009 "bad extent[%llu, %llu) type mismatch with chunk",
11010 extent_key.objectid,
11011 extent_key.objectid + nodesize);
11012 err |= CHUNK_TYPE_MISMATCH;
11015 next:
11016 ret = btrfs_next_item(extent_root, &path);
11017 if (ret)
11018 break;
11021 out:
11022 btrfs_release_path(&path);
11024 if (total != used) {
11025 error(
11026 "block group[%llu %llu] used %llu but extent items used %llu",
11027 bg_key.objectid, bg_key.offset, used, total);
11028 err |= ACCOUNTING_MISMATCH;
11030 return err;
11034 * Check a chunk item.
11035 * Including checking all referred dev_extents and block group
11037 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11038 struct extent_buffer *eb, int slot)
11040 struct btrfs_root *extent_root = fs_info->extent_root;
11041 struct btrfs_root *dev_root = fs_info->dev_root;
11042 struct btrfs_path path;
11043 struct btrfs_key chunk_key;
11044 struct btrfs_key bg_key;
11045 struct btrfs_key devext_key;
11046 struct btrfs_chunk *chunk;
11047 struct extent_buffer *leaf;
11048 struct btrfs_block_group_item *bi;
11049 struct btrfs_block_group_item bg_item;
11050 struct btrfs_dev_extent *ptr;
11051 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11052 u64 length;
11053 u64 chunk_end;
11054 u64 type;
11055 u64 profile;
11056 int num_stripes;
11057 u64 offset;
11058 u64 objectid;
11059 int i;
11060 int ret;
11061 int err = 0;
11063 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11064 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11065 length = btrfs_chunk_length(eb, chunk);
11066 chunk_end = chunk_key.offset + length;
11067 if (!IS_ALIGNED(length, sectorsize)) {
11068 error("chunk[%llu %llu) not aligned to %u",
11069 chunk_key.offset, chunk_end, sectorsize);
11070 err |= BYTES_UNALIGNED;
11071 goto out;
11074 type = btrfs_chunk_type(eb, chunk);
11075 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11076 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11077 error("chunk[%llu %llu) has no chunk type",
11078 chunk_key.offset, chunk_end);
11079 err |= UNKNOWN_TYPE;
11081 if (profile && (profile & (profile - 1))) {
11082 error("chunk[%llu %llu) multiple profiles detected: %llx",
11083 chunk_key.offset, chunk_end, profile);
11084 err |= UNKNOWN_TYPE;
11087 bg_key.objectid = chunk_key.offset;
11088 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11089 bg_key.offset = length;
11091 btrfs_init_path(&path);
11092 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11093 if (ret) {
11094 error(
11095 "chunk[%llu %llu) did not find the related block group item",
11096 chunk_key.offset, chunk_end);
11097 err |= REFERENCER_MISSING;
11098 } else{
11099 leaf = path.nodes[0];
11100 bi = btrfs_item_ptr(leaf, path.slots[0],
11101 struct btrfs_block_group_item);
11102 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11103 sizeof(bg_item));
11104 if (btrfs_block_group_flags(&bg_item) != type) {
11105 error(
11106 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11107 chunk_key.offset, chunk_end, type,
11108 btrfs_block_group_flags(&bg_item));
11109 err |= REFERENCER_MISSING;
11113 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11114 for (i = 0; i < num_stripes; i++) {
11115 btrfs_release_path(&path);
11116 btrfs_init_path(&path);
11117 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11118 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11119 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11121 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11122 0, 0);
11123 if (ret)
11124 goto not_match_dev;
11126 leaf = path.nodes[0];
11127 ptr = btrfs_item_ptr(leaf, path.slots[0],
11128 struct btrfs_dev_extent);
11129 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11130 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11131 if (objectid != chunk_key.objectid ||
11132 offset != chunk_key.offset ||
11133 btrfs_dev_extent_length(leaf, ptr) != length)
11134 goto not_match_dev;
11135 continue;
11136 not_match_dev:
11137 err |= BACKREF_MISSING;
11138 error(
11139 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11140 chunk_key.objectid, chunk_end, i);
11141 continue;
11143 btrfs_release_path(&path);
11144 out:
11145 return err;
11149 * Main entry function to check known items and update related accounting info
11151 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11153 struct btrfs_fs_info *fs_info = root->fs_info;
11154 struct btrfs_key key;
11155 int slot = 0;
11156 int type;
11157 struct btrfs_extent_data_ref *dref;
11158 int ret;
11159 int err = 0;
11161 next:
11162 btrfs_item_key_to_cpu(eb, &key, slot);
11163 type = key.type;
11165 switch (type) {
11166 case BTRFS_EXTENT_DATA_KEY:
11167 ret = check_extent_data_item(root, eb, slot);
11168 err |= ret;
11169 break;
11170 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11171 ret = check_block_group_item(fs_info, eb, slot);
11172 err |= ret;
11173 break;
11174 case BTRFS_DEV_ITEM_KEY:
11175 ret = check_dev_item(fs_info, eb, slot);
11176 err |= ret;
11177 break;
11178 case BTRFS_CHUNK_ITEM_KEY:
11179 ret = check_chunk_item(fs_info, eb, slot);
11180 err |= ret;
11181 break;
11182 case BTRFS_DEV_EXTENT_KEY:
11183 ret = check_dev_extent_item(fs_info, eb, slot);
11184 err |= ret;
11185 break;
11186 case BTRFS_EXTENT_ITEM_KEY:
11187 case BTRFS_METADATA_ITEM_KEY:
11188 ret = check_extent_item(fs_info, eb, slot);
11189 err |= ret;
11190 break;
11191 case BTRFS_EXTENT_CSUM_KEY:
11192 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11193 break;
11194 case BTRFS_TREE_BLOCK_REF_KEY:
11195 ret = check_tree_block_backref(fs_info, key.offset,
11196 key.objectid, -1);
11197 err |= ret;
11198 break;
11199 case BTRFS_EXTENT_DATA_REF_KEY:
11200 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11201 ret = check_extent_data_backref(fs_info,
11202 btrfs_extent_data_ref_root(eb, dref),
11203 btrfs_extent_data_ref_objectid(eb, dref),
11204 btrfs_extent_data_ref_offset(eb, dref),
11205 key.objectid, 0,
11206 btrfs_extent_data_ref_count(eb, dref));
11207 err |= ret;
11208 break;
11209 case BTRFS_SHARED_BLOCK_REF_KEY:
11210 ret = check_shared_block_backref(fs_info, key.offset,
11211 key.objectid, -1);
11212 err |= ret;
11213 break;
11214 case BTRFS_SHARED_DATA_REF_KEY:
11215 ret = check_shared_data_backref(fs_info, key.offset,
11216 key.objectid);
11217 err |= ret;
11218 break;
11219 default:
11220 break;
11223 if (++slot < btrfs_header_nritems(eb))
11224 goto next;
11226 return err;
11230 * Helper function for later fs/subvol tree check. To determine if a tree
11231 * block should be checked.
11232 * This function will ensure only the direct referencer with lowest rootid to
11233 * check a fs/subvolume tree block.
11235 * Backref check at extent tree would detect errors like missing subvolume
11236 * tree, so we can do aggressive check to reduce duplicated checks.
11238 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11240 struct btrfs_root *extent_root = root->fs_info->extent_root;
11241 struct btrfs_key key;
11242 struct btrfs_path path;
11243 struct extent_buffer *leaf;
11244 int slot;
11245 struct btrfs_extent_item *ei;
11246 unsigned long ptr;
11247 unsigned long end;
11248 int type;
11249 u32 item_size;
11250 u64 offset;
11251 struct btrfs_extent_inline_ref *iref;
11252 int ret;
11254 btrfs_init_path(&path);
11255 key.objectid = btrfs_header_bytenr(eb);
11256 key.type = BTRFS_METADATA_ITEM_KEY;
11257 key.offset = (u64)-1;
11260 * Any failure in backref resolving means we can't determine
11261 * whom the tree block belongs to.
11262 * So in that case, we need to check that tree block
11264 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11265 if (ret < 0)
11266 goto need_check;
11268 ret = btrfs_previous_extent_item(extent_root, &path,
11269 btrfs_header_bytenr(eb));
11270 if (ret)
11271 goto need_check;
11273 leaf = path.nodes[0];
11274 slot = path.slots[0];
11275 btrfs_item_key_to_cpu(leaf, &key, slot);
11276 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11278 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11279 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11280 } else {
11281 struct btrfs_tree_block_info *info;
11283 info = (struct btrfs_tree_block_info *)(ei + 1);
11284 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11287 item_size = btrfs_item_size_nr(leaf, slot);
11288 ptr = (unsigned long)iref;
11289 end = (unsigned long)ei + item_size;
11290 while (ptr < end) {
11291 iref = (struct btrfs_extent_inline_ref *)ptr;
11292 type = btrfs_extent_inline_ref_type(leaf, iref);
11293 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11296 * We only check the tree block if current root is
11297 * the lowest referencer of it.
11299 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11300 offset < root->objectid) {
11301 btrfs_release_path(&path);
11302 return 0;
11305 ptr += btrfs_extent_inline_ref_size(type);
11308 * Normally we should also check keyed tree block ref, but that may be
11309 * very time consuming. Inlined ref should already make us skip a lot
11310 * of refs now. So skip search keyed tree block ref.
11313 need_check:
11314 btrfs_release_path(&path);
11315 return 1;
11319 * Traversal function for tree block. We will do:
11320 * 1) Skip shared fs/subvolume tree blocks
11321 * 2) Update related bytes accounting
11322 * 3) Pre-order traversal
11324 static int traverse_tree_block(struct btrfs_root *root,
11325 struct extent_buffer *node)
11327 struct extent_buffer *eb;
11328 struct btrfs_key key;
11329 struct btrfs_key drop_key;
11330 int level;
11331 u64 nr;
11332 int i;
11333 int err = 0;
11334 int ret;
11337 * Skip shared fs/subvolume tree block, in that case they will
11338 * be checked by referencer with lowest rootid
11340 if (is_fstree(root->objectid) && !should_check(root, node))
11341 return 0;
11343 /* Update bytes accounting */
11344 total_btree_bytes += node->len;
11345 if (fs_root_objectid(btrfs_header_owner(node)))
11346 total_fs_tree_bytes += node->len;
11347 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11348 total_extent_tree_bytes += node->len;
11349 if (!found_old_backref &&
11350 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11351 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11352 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11353 found_old_backref = 1;
11355 /* pre-order tranversal, check itself first */
11356 level = btrfs_header_level(node);
11357 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11358 btrfs_header_level(node),
11359 btrfs_header_owner(node));
11360 err |= ret;
11361 if (err)
11362 error(
11363 "check %s failed root %llu bytenr %llu level %d, force continue check",
11364 level ? "node":"leaf", root->objectid,
11365 btrfs_header_bytenr(node), btrfs_header_level(node));
11367 if (!level) {
11368 btree_space_waste += btrfs_leaf_free_space(root, node);
11369 ret = check_leaf_items(root, node);
11370 err |= ret;
11371 return err;
11374 nr = btrfs_header_nritems(node);
11375 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11376 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11377 sizeof(struct btrfs_key_ptr);
11379 /* Then check all its children */
11380 for (i = 0; i < nr; i++) {
11381 u64 blocknr = btrfs_node_blockptr(node, i);
11383 btrfs_node_key_to_cpu(node, &key, i);
11384 if (level == root->root_item.drop_level &&
11385 is_dropped_key(&key, &drop_key))
11386 continue;
11389 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11390 * to call the function itself.
11392 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11393 if (extent_buffer_uptodate(eb)) {
11394 ret = traverse_tree_block(root, eb);
11395 err |= ret;
11397 free_extent_buffer(eb);
11400 return err;
11404 * Low memory usage version check_chunks_and_extents.
11406 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11408 struct btrfs_path path;
11409 struct btrfs_key key;
11410 struct btrfs_root *root1;
11411 struct btrfs_root *cur_root;
11412 int err = 0;
11413 int ret;
11415 root1 = root->fs_info->chunk_root;
11416 ret = traverse_tree_block(root1, root1->node);
11417 err |= ret;
11419 root1 = root->fs_info->tree_root;
11420 ret = traverse_tree_block(root1, root1->node);
11421 err |= ret;
11423 btrfs_init_path(&path);
11424 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11425 key.offset = 0;
11426 key.type = BTRFS_ROOT_ITEM_KEY;
11428 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11429 if (ret) {
11430 error("cannot find extent treet in tree_root");
11431 goto out;
11434 while (1) {
11435 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11436 if (key.type != BTRFS_ROOT_ITEM_KEY)
11437 goto next;
11438 key.offset = (u64)-1;
11440 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11441 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11442 &key);
11443 else
11444 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11445 if (IS_ERR(cur_root) || !cur_root) {
11446 error("failed to read tree: %lld", key.objectid);
11447 goto next;
11450 ret = traverse_tree_block(cur_root, cur_root->node);
11451 err |= ret;
11453 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11454 btrfs_free_fs_root(cur_root);
11455 next:
11456 ret = btrfs_next_item(root1, &path);
11457 if (ret)
11458 goto out;
11461 out:
11462 btrfs_release_path(&path);
11463 return err;
11466 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11467 struct btrfs_root *root, int overwrite)
11469 struct extent_buffer *c;
11470 struct extent_buffer *old = root->node;
11471 int level;
11472 int ret;
11473 struct btrfs_disk_key disk_key = {0,0,0};
11475 level = 0;
11477 if (overwrite) {
11478 c = old;
11479 extent_buffer_get(c);
11480 goto init;
11482 c = btrfs_alloc_free_block(trans, root,
11483 root->nodesize,
11484 root->root_key.objectid,
11485 &disk_key, level, 0, 0);
11486 if (IS_ERR(c)) {
11487 c = old;
11488 extent_buffer_get(c);
11489 overwrite = 1;
11491 init:
11492 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11493 btrfs_set_header_level(c, level);
11494 btrfs_set_header_bytenr(c, c->start);
11495 btrfs_set_header_generation(c, trans->transid);
11496 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11497 btrfs_set_header_owner(c, root->root_key.objectid);
11499 write_extent_buffer(c, root->fs_info->fsid,
11500 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11502 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11503 btrfs_header_chunk_tree_uuid(c),
11504 BTRFS_UUID_SIZE);
11506 btrfs_mark_buffer_dirty(c);
11508 * this case can happen in the following case:
11510 * 1.overwrite previous root.
11512 * 2.reinit reloc data root, this is because we skip pin
11513 * down reloc data tree before which means we can allocate
11514 * same block bytenr here.
11516 if (old->start == c->start) {
11517 btrfs_set_root_generation(&root->root_item,
11518 trans->transid);
11519 root->root_item.level = btrfs_header_level(root->node);
11520 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11521 &root->root_key, &root->root_item);
11522 if (ret) {
11523 free_extent_buffer(c);
11524 return ret;
11527 free_extent_buffer(old);
11528 root->node = c;
11529 add_root_to_dirty_list(root);
11530 return 0;
11533 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11534 struct extent_buffer *eb, int tree_root)
11536 struct extent_buffer *tmp;
11537 struct btrfs_root_item *ri;
11538 struct btrfs_key key;
11539 u64 bytenr;
11540 u32 nodesize;
11541 int level = btrfs_header_level(eb);
11542 int nritems;
11543 int ret;
11544 int i;
11547 * If we have pinned this block before, don't pin it again.
11548 * This can not only avoid forever loop with broken filesystem
11549 * but also give us some speedups.
11551 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11552 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11553 return 0;
11555 btrfs_pin_extent(fs_info, eb->start, eb->len);
11557 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11558 nritems = btrfs_header_nritems(eb);
11559 for (i = 0; i < nritems; i++) {
11560 if (level == 0) {
11561 btrfs_item_key_to_cpu(eb, &key, i);
11562 if (key.type != BTRFS_ROOT_ITEM_KEY)
11563 continue;
11564 /* Skip the extent root and reloc roots */
11565 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11566 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11567 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11568 continue;
11569 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11570 bytenr = btrfs_disk_root_bytenr(eb, ri);
11573 * If at any point we start needing the real root we
11574 * will have to build a stump root for the root we are
11575 * in, but for now this doesn't actually use the root so
11576 * just pass in extent_root.
11578 tmp = read_tree_block(fs_info->extent_root, bytenr,
11579 nodesize, 0);
11580 if (!extent_buffer_uptodate(tmp)) {
11581 fprintf(stderr, "Error reading root block\n");
11582 return -EIO;
11584 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11585 free_extent_buffer(tmp);
11586 if (ret)
11587 return ret;
11588 } else {
11589 bytenr = btrfs_node_blockptr(eb, i);
11591 /* If we aren't the tree root don't read the block */
11592 if (level == 1 && !tree_root) {
11593 btrfs_pin_extent(fs_info, bytenr, nodesize);
11594 continue;
11597 tmp = read_tree_block(fs_info->extent_root, bytenr,
11598 nodesize, 0);
11599 if (!extent_buffer_uptodate(tmp)) {
11600 fprintf(stderr, "Error reading tree block\n");
11601 return -EIO;
11603 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11604 free_extent_buffer(tmp);
11605 if (ret)
11606 return ret;
11610 return 0;
11613 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11615 int ret;
11617 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11618 if (ret)
11619 return ret;
11621 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11624 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11626 struct btrfs_block_group_cache *cache;
11627 struct btrfs_path path;
11628 struct extent_buffer *leaf;
11629 struct btrfs_chunk *chunk;
11630 struct btrfs_key key;
11631 int ret;
11632 u64 start;
11634 btrfs_init_path(&path);
11635 key.objectid = 0;
11636 key.type = BTRFS_CHUNK_ITEM_KEY;
11637 key.offset = 0;
11638 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11639 if (ret < 0) {
11640 btrfs_release_path(&path);
11641 return ret;
11645 * We do this in case the block groups were screwed up and had alloc
11646 * bits that aren't actually set on the chunks. This happens with
11647 * restored images every time and could happen in real life I guess.
11649 fs_info->avail_data_alloc_bits = 0;
11650 fs_info->avail_metadata_alloc_bits = 0;
11651 fs_info->avail_system_alloc_bits = 0;
11653 /* First we need to create the in-memory block groups */
11654 while (1) {
11655 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11656 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11657 if (ret < 0) {
11658 btrfs_release_path(&path);
11659 return ret;
11661 if (ret) {
11662 ret = 0;
11663 break;
11666 leaf = path.nodes[0];
11667 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11668 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11669 path.slots[0]++;
11670 continue;
11673 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11674 btrfs_add_block_group(fs_info, 0,
11675 btrfs_chunk_type(leaf, chunk),
11676 key.objectid, key.offset,
11677 btrfs_chunk_length(leaf, chunk));
11678 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11679 key.offset + btrfs_chunk_length(leaf, chunk),
11680 GFP_NOFS);
11681 path.slots[0]++;
11683 start = 0;
11684 while (1) {
11685 cache = btrfs_lookup_first_block_group(fs_info, start);
11686 if (!cache)
11687 break;
11688 cache->cached = 1;
11689 start = cache->key.objectid + cache->key.offset;
11692 btrfs_release_path(&path);
11693 return 0;
11696 static int reset_balance(struct btrfs_trans_handle *trans,
11697 struct btrfs_fs_info *fs_info)
11699 struct btrfs_root *root = fs_info->tree_root;
11700 struct btrfs_path path;
11701 struct extent_buffer *leaf;
11702 struct btrfs_key key;
11703 int del_slot, del_nr = 0;
11704 int ret;
11705 int found = 0;
11707 btrfs_init_path(&path);
11708 key.objectid = BTRFS_BALANCE_OBJECTID;
11709 key.type = BTRFS_BALANCE_ITEM_KEY;
11710 key.offset = 0;
11711 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11712 if (ret) {
11713 if (ret > 0)
11714 ret = 0;
11715 if (!ret)
11716 goto reinit_data_reloc;
11717 else
11718 goto out;
11721 ret = btrfs_del_item(trans, root, &path);
11722 if (ret)
11723 goto out;
11724 btrfs_release_path(&path);
11726 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11727 key.type = BTRFS_ROOT_ITEM_KEY;
11728 key.offset = 0;
11729 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11730 if (ret < 0)
11731 goto out;
11732 while (1) {
11733 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11734 if (!found)
11735 break;
11737 if (del_nr) {
11738 ret = btrfs_del_items(trans, root, &path,
11739 del_slot, del_nr);
11740 del_nr = 0;
11741 if (ret)
11742 goto out;
11744 key.offset++;
11745 btrfs_release_path(&path);
11747 found = 0;
11748 ret = btrfs_search_slot(trans, root, &key, &path,
11749 -1, 1);
11750 if (ret < 0)
11751 goto out;
11752 continue;
11754 found = 1;
11755 leaf = path.nodes[0];
11756 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11757 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11758 break;
11759 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11760 path.slots[0]++;
11761 continue;
11763 if (!del_nr) {
11764 del_slot = path.slots[0];
11765 del_nr = 1;
11766 } else {
11767 del_nr++;
11769 path.slots[0]++;
11772 if (del_nr) {
11773 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11774 if (ret)
11775 goto out;
11777 btrfs_release_path(&path);
11779 reinit_data_reloc:
11780 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11781 key.type = BTRFS_ROOT_ITEM_KEY;
11782 key.offset = (u64)-1;
11783 root = btrfs_read_fs_root(fs_info, &key);
11784 if (IS_ERR(root)) {
11785 fprintf(stderr, "Error reading data reloc tree\n");
11786 ret = PTR_ERR(root);
11787 goto out;
11789 record_root_in_trans(trans, root);
11790 ret = btrfs_fsck_reinit_root(trans, root, 0);
11791 if (ret)
11792 goto out;
11793 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11794 out:
11795 btrfs_release_path(&path);
11796 return ret;
11799 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11800 struct btrfs_fs_info *fs_info)
11802 u64 start = 0;
11803 int ret;
11806 * The only reason we don't do this is because right now we're just
11807 * walking the trees we find and pinning down their bytes, we don't look
11808 * at any of the leaves. In order to do mixed groups we'd have to check
11809 * the leaves of any fs roots and pin down the bytes for any file
11810 * extents we find. Not hard but why do it if we don't have to?
11812 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11813 fprintf(stderr, "We don't support re-initing the extent tree "
11814 "for mixed block groups yet, please notify a btrfs "
11815 "developer you want to do this so they can add this "
11816 "functionality.\n");
11817 return -EINVAL;
11821 * first we need to walk all of the trees except the extent tree and pin
11822 * down the bytes that are in use so we don't overwrite any existing
11823 * metadata.
11825 ret = pin_metadata_blocks(fs_info);
11826 if (ret) {
11827 fprintf(stderr, "error pinning down used bytes\n");
11828 return ret;
11832 * Need to drop all the block groups since we're going to recreate all
11833 * of them again.
11835 btrfs_free_block_groups(fs_info);
11836 ret = reset_block_groups(fs_info);
11837 if (ret) {
11838 fprintf(stderr, "error resetting the block groups\n");
11839 return ret;
11842 /* Ok we can allocate now, reinit the extent root */
11843 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11844 if (ret) {
11845 fprintf(stderr, "extent root initialization failed\n");
11847 * When the transaction code is updated we should end the
11848 * transaction, but for now progs only knows about commit so
11849 * just return an error.
11851 return ret;
11855 * Now we have all the in-memory block groups setup so we can make
11856 * allocations properly, and the metadata we care about is safe since we
11857 * pinned all of it above.
11859 while (1) {
11860 struct btrfs_block_group_cache *cache;
11862 cache = btrfs_lookup_first_block_group(fs_info, start);
11863 if (!cache)
11864 break;
11865 start = cache->key.objectid + cache->key.offset;
11866 ret = btrfs_insert_item(trans, fs_info->extent_root,
11867 &cache->key, &cache->item,
11868 sizeof(cache->item));
11869 if (ret) {
11870 fprintf(stderr, "Error adding block group\n");
11871 return ret;
11873 btrfs_extent_post_op(trans, fs_info->extent_root);
11876 ret = reset_balance(trans, fs_info);
11877 if (ret)
11878 fprintf(stderr, "error resetting the pending balance\n");
11880 return ret;
11883 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11885 struct btrfs_path path;
11886 struct btrfs_trans_handle *trans;
11887 struct btrfs_key key;
11888 int ret;
11890 printf("Recowing metadata block %llu\n", eb->start);
11891 key.objectid = btrfs_header_owner(eb);
11892 key.type = BTRFS_ROOT_ITEM_KEY;
11893 key.offset = (u64)-1;
11895 root = btrfs_read_fs_root(root->fs_info, &key);
11896 if (IS_ERR(root)) {
11897 fprintf(stderr, "Couldn't find owner root %llu\n",
11898 key.objectid);
11899 return PTR_ERR(root);
11902 trans = btrfs_start_transaction(root, 1);
11903 if (IS_ERR(trans))
11904 return PTR_ERR(trans);
11906 btrfs_init_path(&path);
11907 path.lowest_level = btrfs_header_level(eb);
11908 if (path.lowest_level)
11909 btrfs_node_key_to_cpu(eb, &key, 0);
11910 else
11911 btrfs_item_key_to_cpu(eb, &key, 0);
11913 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11914 btrfs_commit_transaction(trans, root);
11915 btrfs_release_path(&path);
11916 return ret;
11919 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11921 struct btrfs_path path;
11922 struct btrfs_trans_handle *trans;
11923 struct btrfs_key key;
11924 int ret;
11926 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11927 bad->key.type, bad->key.offset);
11928 key.objectid = bad->root_id;
11929 key.type = BTRFS_ROOT_ITEM_KEY;
11930 key.offset = (u64)-1;
11932 root = btrfs_read_fs_root(root->fs_info, &key);
11933 if (IS_ERR(root)) {
11934 fprintf(stderr, "Couldn't find owner root %llu\n",
11935 key.objectid);
11936 return PTR_ERR(root);
11939 trans = btrfs_start_transaction(root, 1);
11940 if (IS_ERR(trans))
11941 return PTR_ERR(trans);
11943 btrfs_init_path(&path);
11944 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11945 if (ret) {
11946 if (ret > 0)
11947 ret = 0;
11948 goto out;
11950 ret = btrfs_del_item(trans, root, &path);
11951 out:
11952 btrfs_commit_transaction(trans, root);
11953 btrfs_release_path(&path);
11954 return ret;
11957 static int zero_log_tree(struct btrfs_root *root)
11959 struct btrfs_trans_handle *trans;
11960 int ret;
11962 trans = btrfs_start_transaction(root, 1);
11963 if (IS_ERR(trans)) {
11964 ret = PTR_ERR(trans);
11965 return ret;
11967 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11968 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11969 ret = btrfs_commit_transaction(trans, root);
11970 return ret;
11973 static int populate_csum(struct btrfs_trans_handle *trans,
11974 struct btrfs_root *csum_root, char *buf, u64 start,
11975 u64 len)
11977 u64 offset = 0;
11978 u64 sectorsize;
11979 int ret = 0;
11981 while (offset < len) {
11982 sectorsize = csum_root->sectorsize;
11983 ret = read_extent_data(csum_root, buf, start + offset,
11984 &sectorsize, 0);
11985 if (ret)
11986 break;
11987 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11988 start + offset, buf, sectorsize);
11989 if (ret)
11990 break;
11991 offset += sectorsize;
11993 return ret;
11996 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11997 struct btrfs_root *csum_root,
11998 struct btrfs_root *cur_root)
12000 struct btrfs_path path;
12001 struct btrfs_key key;
12002 struct extent_buffer *node;
12003 struct btrfs_file_extent_item *fi;
12004 char *buf = NULL;
12005 u64 start = 0;
12006 u64 len = 0;
12007 int slot = 0;
12008 int ret = 0;
12010 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12011 if (!buf)
12012 return -ENOMEM;
12014 btrfs_init_path(&path);
12015 key.objectid = 0;
12016 key.offset = 0;
12017 key.type = 0;
12018 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12019 if (ret < 0)
12020 goto out;
12021 /* Iterate all regular file extents and fill its csum */
12022 while (1) {
12023 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12025 if (key.type != BTRFS_EXTENT_DATA_KEY)
12026 goto next;
12027 node = path.nodes[0];
12028 slot = path.slots[0];
12029 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12030 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12031 goto next;
12032 start = btrfs_file_extent_disk_bytenr(node, fi);
12033 len = btrfs_file_extent_disk_num_bytes(node, fi);
12035 ret = populate_csum(trans, csum_root, buf, start, len);
12036 if (ret == -EEXIST)
12037 ret = 0;
12038 if (ret < 0)
12039 goto out;
12040 next:
12042 * TODO: if next leaf is corrupted, jump to nearest next valid
12043 * leaf.
12045 ret = btrfs_next_item(cur_root, &path);
12046 if (ret < 0)
12047 goto out;
12048 if (ret > 0) {
12049 ret = 0;
12050 goto out;
12054 out:
12055 btrfs_release_path(&path);
12056 free(buf);
12057 return ret;
12060 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12061 struct btrfs_root *csum_root)
12063 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12064 struct btrfs_path path;
12065 struct btrfs_root *tree_root = fs_info->tree_root;
12066 struct btrfs_root *cur_root;
12067 struct extent_buffer *node;
12068 struct btrfs_key key;
12069 int slot = 0;
12070 int ret = 0;
12072 btrfs_init_path(&path);
12073 key.objectid = BTRFS_FS_TREE_OBJECTID;
12074 key.offset = 0;
12075 key.type = BTRFS_ROOT_ITEM_KEY;
12076 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12077 if (ret < 0)
12078 goto out;
12079 if (ret > 0) {
12080 ret = -ENOENT;
12081 goto out;
12084 while (1) {
12085 node = path.nodes[0];
12086 slot = path.slots[0];
12087 btrfs_item_key_to_cpu(node, &key, slot);
12088 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12089 goto out;
12090 if (key.type != BTRFS_ROOT_ITEM_KEY)
12091 goto next;
12092 if (!is_fstree(key.objectid))
12093 goto next;
12094 key.offset = (u64)-1;
12096 cur_root = btrfs_read_fs_root(fs_info, &key);
12097 if (IS_ERR(cur_root) || !cur_root) {
12098 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12099 key.objectid);
12100 goto out;
12102 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12103 cur_root);
12104 if (ret < 0)
12105 goto out;
12106 next:
12107 ret = btrfs_next_item(tree_root, &path);
12108 if (ret > 0) {
12109 ret = 0;
12110 goto out;
12112 if (ret < 0)
12113 goto out;
12116 out:
12117 btrfs_release_path(&path);
12118 return ret;
12121 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12122 struct btrfs_root *csum_root)
12124 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12125 struct btrfs_path path;
12126 struct btrfs_extent_item *ei;
12127 struct extent_buffer *leaf;
12128 char *buf;
12129 struct btrfs_key key;
12130 int ret;
12132 btrfs_init_path(&path);
12133 key.objectid = 0;
12134 key.type = BTRFS_EXTENT_ITEM_KEY;
12135 key.offset = 0;
12136 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12137 if (ret < 0) {
12138 btrfs_release_path(&path);
12139 return ret;
12142 buf = malloc(csum_root->sectorsize);
12143 if (!buf) {
12144 btrfs_release_path(&path);
12145 return -ENOMEM;
12148 while (1) {
12149 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12150 ret = btrfs_next_leaf(extent_root, &path);
12151 if (ret < 0)
12152 break;
12153 if (ret) {
12154 ret = 0;
12155 break;
12158 leaf = path.nodes[0];
12160 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12161 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12162 path.slots[0]++;
12163 continue;
12166 ei = btrfs_item_ptr(leaf, path.slots[0],
12167 struct btrfs_extent_item);
12168 if (!(btrfs_extent_flags(leaf, ei) &
12169 BTRFS_EXTENT_FLAG_DATA)) {
12170 path.slots[0]++;
12171 continue;
12174 ret = populate_csum(trans, csum_root, buf, key.objectid,
12175 key.offset);
12176 if (ret)
12177 break;
12178 path.slots[0]++;
12181 btrfs_release_path(&path);
12182 free(buf);
12183 return ret;
12187 * Recalculate the csum and put it into the csum tree.
12189 * Extent tree init will wipe out all the extent info, so in that case, we
12190 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12191 * will use fs/subvol trees to init the csum tree.
12193 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12194 struct btrfs_root *csum_root,
12195 int search_fs_tree)
12197 if (search_fs_tree)
12198 return fill_csum_tree_from_fs(trans, csum_root);
12199 else
12200 return fill_csum_tree_from_extent(trans, csum_root);
12203 static void free_roots_info_cache(void)
12205 if (!roots_info_cache)
12206 return;
12208 while (!cache_tree_empty(roots_info_cache)) {
12209 struct cache_extent *entry;
12210 struct root_item_info *rii;
12212 entry = first_cache_extent(roots_info_cache);
12213 if (!entry)
12214 break;
12215 remove_cache_extent(roots_info_cache, entry);
12216 rii = container_of(entry, struct root_item_info, cache_extent);
12217 free(rii);
12220 free(roots_info_cache);
12221 roots_info_cache = NULL;
12224 static int build_roots_info_cache(struct btrfs_fs_info *info)
12226 int ret = 0;
12227 struct btrfs_key key;
12228 struct extent_buffer *leaf;
12229 struct btrfs_path path;
12231 if (!roots_info_cache) {
12232 roots_info_cache = malloc(sizeof(*roots_info_cache));
12233 if (!roots_info_cache)
12234 return -ENOMEM;
12235 cache_tree_init(roots_info_cache);
12238 btrfs_init_path(&path);
12239 key.objectid = 0;
12240 key.type = BTRFS_EXTENT_ITEM_KEY;
12241 key.offset = 0;
12242 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12243 if (ret < 0)
12244 goto out;
12245 leaf = path.nodes[0];
12247 while (1) {
12248 struct btrfs_key found_key;
12249 struct btrfs_extent_item *ei;
12250 struct btrfs_extent_inline_ref *iref;
12251 int slot = path.slots[0];
12252 int type;
12253 u64 flags;
12254 u64 root_id;
12255 u8 level;
12256 struct cache_extent *entry;
12257 struct root_item_info *rii;
12259 if (slot >= btrfs_header_nritems(leaf)) {
12260 ret = btrfs_next_leaf(info->extent_root, &path);
12261 if (ret < 0) {
12262 break;
12263 } else if (ret) {
12264 ret = 0;
12265 break;
12267 leaf = path.nodes[0];
12268 slot = path.slots[0];
12271 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12273 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12274 found_key.type != BTRFS_METADATA_ITEM_KEY)
12275 goto next;
12277 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12278 flags = btrfs_extent_flags(leaf, ei);
12280 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12281 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12282 goto next;
12284 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12285 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12286 level = found_key.offset;
12287 } else {
12288 struct btrfs_tree_block_info *binfo;
12290 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12291 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12292 level = btrfs_tree_block_level(leaf, binfo);
12296 * For a root extent, it must be of the following type and the
12297 * first (and only one) iref in the item.
12299 type = btrfs_extent_inline_ref_type(leaf, iref);
12300 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12301 goto next;
12303 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12304 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12305 if (!entry) {
12306 rii = malloc(sizeof(struct root_item_info));
12307 if (!rii) {
12308 ret = -ENOMEM;
12309 goto out;
12311 rii->cache_extent.start = root_id;
12312 rii->cache_extent.size = 1;
12313 rii->level = (u8)-1;
12314 entry = &rii->cache_extent;
12315 ret = insert_cache_extent(roots_info_cache, entry);
12316 ASSERT(ret == 0);
12317 } else {
12318 rii = container_of(entry, struct root_item_info,
12319 cache_extent);
12322 ASSERT(rii->cache_extent.start == root_id);
12323 ASSERT(rii->cache_extent.size == 1);
12325 if (level > rii->level || rii->level == (u8)-1) {
12326 rii->level = level;
12327 rii->bytenr = found_key.objectid;
12328 rii->gen = btrfs_extent_generation(leaf, ei);
12329 rii->node_count = 1;
12330 } else if (level == rii->level) {
12331 rii->node_count++;
12333 next:
12334 path.slots[0]++;
12337 out:
12338 btrfs_release_path(&path);
12340 return ret;
12343 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12344 struct btrfs_path *path,
12345 const struct btrfs_key *root_key,
12346 const int read_only_mode)
12348 const u64 root_id = root_key->objectid;
12349 struct cache_extent *entry;
12350 struct root_item_info *rii;
12351 struct btrfs_root_item ri;
12352 unsigned long offset;
12354 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12355 if (!entry) {
12356 fprintf(stderr,
12357 "Error: could not find extent items for root %llu\n",
12358 root_key->objectid);
12359 return -ENOENT;
12362 rii = container_of(entry, struct root_item_info, cache_extent);
12363 ASSERT(rii->cache_extent.start == root_id);
12364 ASSERT(rii->cache_extent.size == 1);
12366 if (rii->node_count != 1) {
12367 fprintf(stderr,
12368 "Error: could not find btree root extent for root %llu\n",
12369 root_id);
12370 return -ENOENT;
12373 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12374 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12376 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12377 btrfs_root_level(&ri) != rii->level ||
12378 btrfs_root_generation(&ri) != rii->gen) {
12381 * If we're in repair mode but our caller told us to not update
12382 * the root item, i.e. just check if it needs to be updated, don't
12383 * print this message, since the caller will call us again shortly
12384 * for the same root item without read only mode (the caller will
12385 * open a transaction first).
12387 if (!(read_only_mode && repair))
12388 fprintf(stderr,
12389 "%sroot item for root %llu,"
12390 " current bytenr %llu, current gen %llu, current level %u,"
12391 " new bytenr %llu, new gen %llu, new level %u\n",
12392 (read_only_mode ? "" : "fixing "),
12393 root_id,
12394 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12395 btrfs_root_level(&ri),
12396 rii->bytenr, rii->gen, rii->level);
12398 if (btrfs_root_generation(&ri) > rii->gen) {
12399 fprintf(stderr,
12400 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12401 root_id, btrfs_root_generation(&ri), rii->gen);
12402 return -EINVAL;
12405 if (!read_only_mode) {
12406 btrfs_set_root_bytenr(&ri, rii->bytenr);
12407 btrfs_set_root_level(&ri, rii->level);
12408 btrfs_set_root_generation(&ri, rii->gen);
12409 write_extent_buffer(path->nodes[0], &ri,
12410 offset, sizeof(ri));
12413 return 1;
12416 return 0;
12420 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12421 * caused read-only snapshots to be corrupted if they were created at a moment
12422 * when the source subvolume/snapshot had orphan items. The issue was that the
12423 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12424 * node instead of the post orphan cleanup root node.
12425 * So this function, and its callees, just detects and fixes those cases. Even
12426 * though the regression was for read-only snapshots, this function applies to
12427 * any snapshot/subvolume root.
12428 * This must be run before any other repair code - not doing it so, makes other
12429 * repair code delete or modify backrefs in the extent tree for example, which
12430 * will result in an inconsistent fs after repairing the root items.
12432 static int repair_root_items(struct btrfs_fs_info *info)
12434 struct btrfs_path path;
12435 struct btrfs_key key;
12436 struct extent_buffer *leaf;
12437 struct btrfs_trans_handle *trans = NULL;
12438 int ret = 0;
12439 int bad_roots = 0;
12440 int need_trans = 0;
12442 btrfs_init_path(&path);
12444 ret = build_roots_info_cache(info);
12445 if (ret)
12446 goto out;
12448 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12449 key.type = BTRFS_ROOT_ITEM_KEY;
12450 key.offset = 0;
12452 again:
12454 * Avoid opening and committing transactions if a leaf doesn't have
12455 * any root items that need to be fixed, so that we avoid rotating
12456 * backup roots unnecessarily.
12458 if (need_trans) {
12459 trans = btrfs_start_transaction(info->tree_root, 1);
12460 if (IS_ERR(trans)) {
12461 ret = PTR_ERR(trans);
12462 goto out;
12466 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12467 0, trans ? 1 : 0);
12468 if (ret < 0)
12469 goto out;
12470 leaf = path.nodes[0];
12472 while (1) {
12473 struct btrfs_key found_key;
12475 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12476 int no_more_keys = find_next_key(&path, &key);
12478 btrfs_release_path(&path);
12479 if (trans) {
12480 ret = btrfs_commit_transaction(trans,
12481 info->tree_root);
12482 trans = NULL;
12483 if (ret < 0)
12484 goto out;
12486 need_trans = 0;
12487 if (no_more_keys)
12488 break;
12489 goto again;
12492 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12494 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12495 goto next;
12496 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12497 goto next;
12499 ret = maybe_repair_root_item(info, &path, &found_key,
12500 trans ? 0 : 1);
12501 if (ret < 0)
12502 goto out;
12503 if (ret) {
12504 if (!trans && repair) {
12505 need_trans = 1;
12506 key = found_key;
12507 btrfs_release_path(&path);
12508 goto again;
12510 bad_roots++;
12512 next:
12513 path.slots[0]++;
12515 ret = 0;
12516 out:
12517 free_roots_info_cache();
12518 btrfs_release_path(&path);
12519 if (trans)
12520 btrfs_commit_transaction(trans, info->tree_root);
12521 if (ret < 0)
12522 return ret;
12524 return bad_roots;
12527 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12529 struct btrfs_trans_handle *trans;
12530 struct btrfs_block_group_cache *bg_cache;
12531 u64 current = 0;
12532 int ret = 0;
12534 /* Clear all free space cache inodes and its extent data */
12535 while (1) {
12536 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12537 if (!bg_cache)
12538 break;
12539 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12540 if (ret < 0)
12541 return ret;
12542 current = bg_cache->key.objectid + bg_cache->key.offset;
12545 /* Don't forget to set cache_generation to -1 */
12546 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12547 if (IS_ERR(trans)) {
12548 error("failed to update super block cache generation");
12549 return PTR_ERR(trans);
12551 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12552 btrfs_commit_transaction(trans, fs_info->tree_root);
12554 return ret;
12557 const char * const cmd_check_usage[] = {
12558 "btrfs check [options] <device>",
12559 "Check structural integrity of a filesystem (unmounted).",
12560 "Check structural integrity of an unmounted filesystem. Verify internal",
12561 "trees' consistency and item connectivity. In the repair mode try to",
12562 "fix the problems found. ",
12563 "WARNING: the repair mode is considered dangerous",
12565 "-s|--super <superblock> use this superblock copy",
12566 "-b|--backup use the first valid backup root copy",
12567 "--repair try to repair the filesystem",
12568 "--readonly run in read-only mode (default)",
12569 "--init-csum-tree create a new CRC tree",
12570 "--init-extent-tree create a new extent tree",
12571 "--mode <MODE> allows choice of memory/IO trade-offs",
12572 " where MODE is one of:",
12573 " original - read inodes and extents to memory (requires",
12574 " more memory, does less IO)",
12575 " lowmem - try to use less memory but read blocks again",
12576 " when needed",
12577 "--check-data-csum verify checksums of data blocks",
12578 "-Q|--qgroup-report print a report on qgroup consistency",
12579 "-E|--subvol-extents <subvolid>",
12580 " print subvolume extents and sharing state",
12581 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12582 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12583 "-p|--progress indicate progress",
12584 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12585 NULL
12588 int cmd_check(int argc, char **argv)
12590 struct cache_tree root_cache;
12591 struct btrfs_root *root;
12592 struct btrfs_fs_info *info;
12593 u64 bytenr = 0;
12594 u64 subvolid = 0;
12595 u64 tree_root_bytenr = 0;
12596 u64 chunk_root_bytenr = 0;
12597 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12598 int ret;
12599 int err = 0;
12600 u64 num;
12601 int init_csum_tree = 0;
12602 int readonly = 0;
12603 int clear_space_cache = 0;
12604 int qgroup_report = 0;
12605 int qgroups_repaired = 0;
12606 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12608 while(1) {
12609 int c;
12610 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12611 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12612 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12613 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12614 static const struct option long_options[] = {
12615 { "super", required_argument, NULL, 's' },
12616 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12617 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12618 { "init-csum-tree", no_argument, NULL,
12619 GETOPT_VAL_INIT_CSUM },
12620 { "init-extent-tree", no_argument, NULL,
12621 GETOPT_VAL_INIT_EXTENT },
12622 { "check-data-csum", no_argument, NULL,
12623 GETOPT_VAL_CHECK_CSUM },
12624 { "backup", no_argument, NULL, 'b' },
12625 { "subvol-extents", required_argument, NULL, 'E' },
12626 { "qgroup-report", no_argument, NULL, 'Q' },
12627 { "tree-root", required_argument, NULL, 'r' },
12628 { "chunk-root", required_argument, NULL,
12629 GETOPT_VAL_CHUNK_TREE },
12630 { "progress", no_argument, NULL, 'p' },
12631 { "mode", required_argument, NULL,
12632 GETOPT_VAL_MODE },
12633 { "clear-space-cache", required_argument, NULL,
12634 GETOPT_VAL_CLEAR_SPACE_CACHE},
12635 { NULL, 0, NULL, 0}
12638 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12639 if (c < 0)
12640 break;
12641 switch(c) {
12642 case 'a': /* ignored */ break;
12643 case 'b':
12644 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12645 break;
12646 case 's':
12647 num = arg_strtou64(optarg);
12648 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12649 error(
12650 "super mirror should be less than %d",
12651 BTRFS_SUPER_MIRROR_MAX);
12652 exit(1);
12654 bytenr = btrfs_sb_offset(((int)num));
12655 printf("using SB copy %llu, bytenr %llu\n", num,
12656 (unsigned long long)bytenr);
12657 break;
12658 case 'Q':
12659 qgroup_report = 1;
12660 break;
12661 case 'E':
12662 subvolid = arg_strtou64(optarg);
12663 break;
12664 case 'r':
12665 tree_root_bytenr = arg_strtou64(optarg);
12666 break;
12667 case GETOPT_VAL_CHUNK_TREE:
12668 chunk_root_bytenr = arg_strtou64(optarg);
12669 break;
12670 case 'p':
12671 ctx.progress_enabled = true;
12672 break;
12673 case '?':
12674 case 'h':
12675 usage(cmd_check_usage);
12676 case GETOPT_VAL_REPAIR:
12677 printf("enabling repair mode\n");
12678 repair = 1;
12679 ctree_flags |= OPEN_CTREE_WRITES;
12680 break;
12681 case GETOPT_VAL_READONLY:
12682 readonly = 1;
12683 break;
12684 case GETOPT_VAL_INIT_CSUM:
12685 printf("Creating a new CRC tree\n");
12686 init_csum_tree = 1;
12687 repair = 1;
12688 ctree_flags |= OPEN_CTREE_WRITES;
12689 break;
12690 case GETOPT_VAL_INIT_EXTENT:
12691 init_extent_tree = 1;
12692 ctree_flags |= (OPEN_CTREE_WRITES |
12693 OPEN_CTREE_NO_BLOCK_GROUPS);
12694 repair = 1;
12695 break;
12696 case GETOPT_VAL_CHECK_CSUM:
12697 check_data_csum = 1;
12698 break;
12699 case GETOPT_VAL_MODE:
12700 check_mode = parse_check_mode(optarg);
12701 if (check_mode == CHECK_MODE_UNKNOWN) {
12702 error("unknown mode: %s", optarg);
12703 exit(1);
12705 break;
12706 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12707 if (strcmp(optarg, "v1") == 0) {
12708 clear_space_cache = 1;
12709 } else if (strcmp(optarg, "v2") == 0) {
12710 clear_space_cache = 2;
12711 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12712 } else {
12713 error(
12714 "invalid argument to --clear-space-cache, must be v1 or v2");
12715 exit(1);
12717 ctree_flags |= OPEN_CTREE_WRITES;
12718 break;
12722 if (check_argc_exact(argc - optind, 1))
12723 usage(cmd_check_usage);
12725 if (ctx.progress_enabled) {
12726 ctx.tp = TASK_NOTHING;
12727 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12730 /* This check is the only reason for --readonly to exist */
12731 if (readonly && repair) {
12732 error("repair options are not compatible with --readonly");
12733 exit(1);
12737 * Not supported yet
12739 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12740 error("low memory mode doesn't support repair yet");
12741 exit(1);
12744 radix_tree_init();
12745 cache_tree_init(&root_cache);
12747 if((ret = check_mounted(argv[optind])) < 0) {
12748 error("could not check mount status: %s", strerror(-ret));
12749 err |= !!ret;
12750 goto err_out;
12751 } else if(ret) {
12752 error("%s is currently mounted, aborting", argv[optind]);
12753 ret = -EBUSY;
12754 err |= !!ret;
12755 goto err_out;
12758 /* only allow partial opening under repair mode */
12759 if (repair)
12760 ctree_flags |= OPEN_CTREE_PARTIAL;
12762 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12763 chunk_root_bytenr, ctree_flags);
12764 if (!info) {
12765 error("cannot open file system");
12766 ret = -EIO;
12767 err |= !!ret;
12768 goto err_out;
12771 global_info = info;
12772 root = info->fs_root;
12773 if (clear_space_cache == 1) {
12774 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12775 error(
12776 "free space cache v2 detected, use --clear-space-cache v2");
12777 ret = 1;
12778 goto close_out;
12780 printf("Clearing free space cache\n");
12781 ret = clear_free_space_cache(info);
12782 if (ret) {
12783 error("failed to clear free space cache");
12784 ret = 1;
12785 } else {
12786 printf("Free space cache cleared\n");
12788 goto close_out;
12789 } else if (clear_space_cache == 2) {
12790 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12791 printf("no free space cache v2 to clear\n");
12792 ret = 0;
12793 goto close_out;
12795 printf("Clear free space cache v2\n");
12796 ret = btrfs_clear_free_space_tree(info);
12797 if (ret) {
12798 error("failed to clear free space cache v2: %d", ret);
12799 ret = 1;
12800 } else {
12801 printf("free space cache v2 cleared\n");
12803 goto close_out;
12807 * repair mode will force us to commit transaction which
12808 * will make us fail to load log tree when mounting.
12810 if (repair && btrfs_super_log_root(info->super_copy)) {
12811 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12812 if (!ret) {
12813 ret = 1;
12814 err |= !!ret;
12815 goto close_out;
12817 ret = zero_log_tree(root);
12818 err |= !!ret;
12819 if (ret) {
12820 error("failed to zero log tree: %d", ret);
12821 goto close_out;
12825 uuid_unparse(info->super_copy->fsid, uuidbuf);
12826 if (qgroup_report) {
12827 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12828 uuidbuf);
12829 ret = qgroup_verify_all(info);
12830 err |= !!ret;
12831 if (ret == 0)
12832 report_qgroups(1);
12833 goto close_out;
12835 if (subvolid) {
12836 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12837 subvolid, argv[optind], uuidbuf);
12838 ret = print_extent_state(info, subvolid);
12839 err |= !!ret;
12840 goto close_out;
12842 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12844 if (!extent_buffer_uptodate(info->tree_root->node) ||
12845 !extent_buffer_uptodate(info->dev_root->node) ||
12846 !extent_buffer_uptodate(info->chunk_root->node)) {
12847 error("critical roots corrupted, unable to check the filesystem");
12848 err |= !!ret;
12849 ret = -EIO;
12850 goto close_out;
12853 if (init_extent_tree || init_csum_tree) {
12854 struct btrfs_trans_handle *trans;
12856 trans = btrfs_start_transaction(info->extent_root, 0);
12857 if (IS_ERR(trans)) {
12858 error("error starting transaction");
12859 ret = PTR_ERR(trans);
12860 err |= !!ret;
12861 goto close_out;
12864 if (init_extent_tree) {
12865 printf("Creating a new extent tree\n");
12866 ret = reinit_extent_tree(trans, info);
12867 err |= !!ret;
12868 if (ret)
12869 goto close_out;
12872 if (init_csum_tree) {
12873 printf("Reinitialize checksum tree\n");
12874 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12875 if (ret) {
12876 error("checksum tree initialization failed: %d",
12877 ret);
12878 ret = -EIO;
12879 err |= !!ret;
12880 goto close_out;
12883 ret = fill_csum_tree(trans, info->csum_root,
12884 init_extent_tree);
12885 err |= !!ret;
12886 if (ret) {
12887 error("checksum tree refilling failed: %d", ret);
12888 return -EIO;
12892 * Ok now we commit and run the normal fsck, which will add
12893 * extent entries for all of the items it finds.
12895 ret = btrfs_commit_transaction(trans, info->extent_root);
12896 err |= !!ret;
12897 if (ret)
12898 goto close_out;
12900 if (!extent_buffer_uptodate(info->extent_root->node)) {
12901 error("critical: extent_root, unable to check the filesystem");
12902 ret = -EIO;
12903 err |= !!ret;
12904 goto close_out;
12906 if (!extent_buffer_uptodate(info->csum_root->node)) {
12907 error("critical: csum_root, unable to check the filesystem");
12908 ret = -EIO;
12909 err |= !!ret;
12910 goto close_out;
12913 if (!ctx.progress_enabled)
12914 fprintf(stderr, "checking extents\n");
12915 if (check_mode == CHECK_MODE_LOWMEM)
12916 ret = check_chunks_and_extents_v2(root);
12917 else
12918 ret = check_chunks_and_extents(root);
12919 err |= !!ret;
12920 if (ret)
12921 error(
12922 "errors found in extent allocation tree or chunk allocation");
12924 ret = repair_root_items(info);
12925 err |= !!ret;
12926 if (ret < 0)
12927 goto close_out;
12928 if (repair) {
12929 fprintf(stderr, "Fixed %d roots.\n", ret);
12930 ret = 0;
12931 } else if (ret > 0) {
12932 fprintf(stderr,
12933 "Found %d roots with an outdated root item.\n",
12934 ret);
12935 fprintf(stderr,
12936 "Please run a filesystem check with the option --repair to fix them.\n");
12937 ret = 1;
12938 err |= !!ret;
12939 goto close_out;
12942 if (!ctx.progress_enabled) {
12943 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12944 fprintf(stderr, "checking free space tree\n");
12945 else
12946 fprintf(stderr, "checking free space cache\n");
12948 ret = check_space_cache(root);
12949 err |= !!ret;
12950 if (ret)
12951 goto out;
12954 * We used to have to have these hole extents in between our real
12955 * extents so if we don't have this flag set we need to make sure there
12956 * are no gaps in the file extents for inodes, otherwise we can just
12957 * ignore it when this happens.
12959 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12960 if (!ctx.progress_enabled)
12961 fprintf(stderr, "checking fs roots\n");
12962 if (check_mode == CHECK_MODE_LOWMEM)
12963 ret = check_fs_roots_v2(root->fs_info);
12964 else
12965 ret = check_fs_roots(root, &root_cache);
12966 err |= !!ret;
12967 if (ret)
12968 goto out;
12970 fprintf(stderr, "checking csums\n");
12971 ret = check_csums(root);
12972 err |= !!ret;
12973 if (ret)
12974 goto out;
12976 fprintf(stderr, "checking root refs\n");
12977 /* For low memory mode, check_fs_roots_v2 handles root refs */
12978 if (check_mode != CHECK_MODE_LOWMEM) {
12979 ret = check_root_refs(root, &root_cache);
12980 err |= !!ret;
12981 if (ret)
12982 goto out;
12985 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12986 struct extent_buffer *eb;
12988 eb = list_first_entry(&root->fs_info->recow_ebs,
12989 struct extent_buffer, recow);
12990 list_del_init(&eb->recow);
12991 ret = recow_extent_buffer(root, eb);
12992 err |= !!ret;
12993 if (ret)
12994 break;
12997 while (!list_empty(&delete_items)) {
12998 struct bad_item *bad;
13000 bad = list_first_entry(&delete_items, struct bad_item, list);
13001 list_del_init(&bad->list);
13002 if (repair) {
13003 ret = delete_bad_item(root, bad);
13004 err |= !!ret;
13006 free(bad);
13009 if (info->quota_enabled) {
13010 fprintf(stderr, "checking quota groups\n");
13011 ret = qgroup_verify_all(info);
13012 err |= !!ret;
13013 if (ret)
13014 goto out;
13015 report_qgroups(0);
13016 ret = repair_qgroups(info, &qgroups_repaired);
13017 err |= !!ret;
13018 if (err)
13019 goto out;
13020 ret = 0;
13023 if (!list_empty(&root->fs_info->recow_ebs)) {
13024 error("transid errors in file system");
13025 ret = 1;
13026 err |= !!ret;
13028 out:
13029 if (found_old_backref) { /*
13030 * there was a disk format change when mixed
13031 * backref was in testing tree. The old format
13032 * existed about one week.
13034 printf("\n * Found old mixed backref format. "
13035 "The old format is not supported! *"
13036 "\n * Please mount the FS in readonly mode, "
13037 "backup data and re-format the FS. *\n\n");
13038 err |= 1;
13040 printf("found %llu bytes used err is %d\n",
13041 (unsigned long long)bytes_used, ret);
13042 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13043 printf("total tree bytes: %llu\n",
13044 (unsigned long long)total_btree_bytes);
13045 printf("total fs tree bytes: %llu\n",
13046 (unsigned long long)total_fs_tree_bytes);
13047 printf("total extent tree bytes: %llu\n",
13048 (unsigned long long)total_extent_tree_bytes);
13049 printf("btree space waste bytes: %llu\n",
13050 (unsigned long long)btree_space_waste);
13051 printf("file data blocks allocated: %llu\n referenced %llu\n",
13052 (unsigned long long)data_bytes_allocated,
13053 (unsigned long long)data_bytes_referenced);
13055 free_qgroup_counts();
13056 free_root_recs_tree(&root_cache);
13057 close_out:
13058 close_ctree(root);
13059 err_out:
13060 if (ctx.progress_enabled)
13061 task_deinit(ctx.info);
13063 return err;