btrfs-progs: convert: Introduce function to read out btrfs reserved range
[btrfs-progs-unstable/devel.git] / cmds-check.c
blob5cc8469069c3f2871b413f5f974079be7e5ec097
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
47 enum task_position {
48 TASK_EXTENTS,
49 TASK_FREE_SPACE,
50 TASK_FS_ROOTS,
51 TASK_NOTHING, /* have to be the last element */
54 struct task_ctx {
55 int progress_enabled;
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
80 CHECK_MODE_ORIGINAL,
81 CHECK_MODE_LOWMEM,
82 CHECK_MODE_UNKNOWN,
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
104 union {
105 u64 parent;
106 u64 root;
108 u64 owner;
109 u64 offset;
110 u64 disk_bytenr;
111 u64 bytes;
112 u64 ram_bytes;
113 u32 num_refs;
114 u32 found_ref;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
148 u64 root;
149 u64 objectid;
150 u64 offset;
151 u64 disk_bytenr;
152 u64 disk_len;
155 struct tree_backref {
156 struct extent_backref node;
157 union {
158 u64 parent;
159 u64 root;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
177 u64 start;
178 u64 max_size;
179 u64 nr;
180 u64 refs;
181 u64 extent_item_refs;
182 u64 generation;
183 u64 parent_generation;
184 u64 info_objectid;
185 u32 num_duplicates;
186 u8 info_level;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
208 u8 filetype;
209 u8 ref_type;
210 int errors;
211 u64 dir;
212 u64 index;
213 u16 namelen;
214 char name[0];
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
224 u64 objectid;
225 u64 bytenr;
226 u64 last_snapshot;
227 u8 level;
228 u8 drop_level;
229 int level_size;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
248 struct rb_node node;
249 u64 start;
250 u64 len;
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
263 int errors;
265 u64 ino;
266 u32 nlink;
267 u32 imode;
268 u64 isize;
269 u64 nbytes;
271 u32 found_link;
272 u64 found_size;
273 u64 extent_start;
274 u64 extent_end;
275 struct rb_root holes;
276 struct list_head orphan_extents;
278 u32 refs;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
304 int errors;
305 u64 ref_root;
306 u64 dir;
307 u64 index;
308 u16 namelen;
309 char name[0];
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
317 struct root_record {
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
321 u64 objectid;
322 u32 found_ref;
325 struct ptr_node {
326 struct cache_extent cache;
327 void *data;
330 struct shared_node {
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
335 u32 refs;
338 struct block_info {
339 u64 start;
340 u32 size;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
346 int active_node;
347 int root_level;
350 struct bad_item {
351 struct btrfs_key key;
352 u64 root_id;
353 struct list_head list;
356 struct extent_entry {
357 u64 bytenr;
358 u64 bytes;
359 int count;
360 int broken;
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
366 u8 level;
367 /* number of nodes at this level, must be 1 for a root */
368 int node_count;
369 u64 bytenr;
370 u64 gen;
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
378 * classification.
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 uint32_t count = 0;
396 static char *task_position_string[] = {
397 "checking extents",
398 "checking free space cache",
399 "checking fs roots",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
405 return NULL;
407 while (1) {
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
410 count++;
411 fflush(stdout);
412 task_period_wait(priv->info);
414 return NULL;
417 static int print_status_return(void *p)
419 printf("\n");
420 fflush(stdout);
422 return 0;
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
443 return (u64)-1;
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446 return hole->start;
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
458 return -1;
459 if (hole1->start < hole2->start)
460 return 1;
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
467 return -1;
468 /* Hole 2 will be merge center */
469 return 1;
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
479 u64 start, u64 len)
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
486 if (!hole)
487 return -ENOMEM;
488 hole->start = start;
489 hole->len = len;
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 node);
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
501 free(prev);
502 prev = NULL;
505 /* iterate merge with next holes */
506 while (1) {
507 if (!rb_next(&hole->node))
508 break;
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 node);
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
514 hole->start;
515 rb_erase(&next->node, holes);
516 free(next);
517 next = NULL;
518 } else
519 break;
521 return 0;
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
527 u64 start;
529 hole = (struct file_extent_hole *)data;
530 start = hole->start;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
534 return -1;
535 if (start >= hole->start && start < hole->start + hole->len)
536 return 0;
537 return 1;
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
546 u64 start, u64 len)
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
550 u64 prev_start = 0;
551 u64 prev_len = 0;
552 u64 next_start = 0;
553 u64 next_len = 0;
554 struct rb_node *node;
555 int have_prev = 0;
556 int have_next = 0;
557 int ret = 0;
559 tmp.start = start;
560 tmp.len = len;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 if (!node)
563 return -EEXIST;
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
566 return -EEXIST;
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
575 have_prev = 1;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
580 have_next = 1;
582 rb_erase(node, holes);
583 free(hole);
584 if (have_prev) {
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
586 if (ret < 0)
587 return ret;
589 if (have_next) {
590 ret = add_file_extent_hole(holes, next_start, next_len);
591 if (ret < 0)
592 return ret;
594 return 0;
597 static int copy_file_extent_holes(struct rb_root *dst,
598 struct rb_root *src)
600 struct file_extent_hole *hole;
601 struct rb_node *node;
602 int ret = 0;
604 node = rb_first(src);
605 while (node) {
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 if (ret)
609 break;
610 node = rb_next(node);
612 return ret;
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
621 while (node) {
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
624 free(hole);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
644 #define S_SHIFT 12
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
667 return -1;
668 else if (rec1->devid < rec2->devid)
669 return 1;
670 else
671 return 0;
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
682 struct rb_node *rb;
683 size_t size;
684 int ret;
686 rec = malloc(sizeof(*rec));
687 if (!rec)
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
690 rec->refs = 1;
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
698 if (!backref) {
699 ret = -ENOMEM;
700 goto cleanup;
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
707 if (!dst_orphan) {
708 ret = -ENOMEM;
709 goto cleanup;
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715 if (ret < 0)
716 goto cleanup_rb;
718 return rec;
720 cleanup_rb:
721 rb = rb_first(&rec->holes);
722 while (rb) {
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
726 rb = rb_next(rb);
727 free(hole);
730 cleanup:
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
734 free(orig);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
740 free(orig);
743 free(rec);
745 return ERR_PTR(ret);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 u64 objectid)
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
754 return;
755 printf("The following data extent is lost in tree %llu:\n",
756 objectid);
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
760 orphan->disk_len);
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
769 if (!errors)
770 return;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
819 int found = 0;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
823 while (node) {
824 found = 1;
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
830 if (!found)
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868 u64 ino, int mod)
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
873 int ret;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
876 if (cache) {
877 node = container_of(cache, struct ptr_node, cache);
878 rec = node->data;
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
882 return node->data;
883 rec->refs--;
884 rec = node->data;
886 } else if (mod) {
887 rec = calloc(1, sizeof(*rec));
888 if (!rec)
889 return ERR_PTR(-ENOMEM);
890 rec->ino = ino;
891 rec->extent_start = (u64)-1;
892 rec->refs = 1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
898 if (!node) {
899 free(rec);
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
904 node->data = rec;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
907 rec->found_link = 1;
909 ret = insert_cache_extent(inode_cache, &node->cache);
910 if (ret)
911 return ERR_PTR(-EEXIST);
913 return rec;
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
924 free(orphan);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
932 if (--rec->refs > 0)
933 return;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
938 free(backref);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
942 free(rec);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949 return 1;
950 return 0;
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
959 u8 filetype;
961 if (!rec->found_inode_item)
962 return;
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
972 free(backref);
977 if (!rec->checked || rec->merging)
978 return;
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1009 free(node);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1018 int ret;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1022 key.offset = ino;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1027 if (ret > 0)
1028 ret = -ENOENT;
1029 return ret;
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043 return 1;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->nodatasum = 1;
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1056 return 0;
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 const char *name,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 break;
1068 if (backref->dir != dir || backref->namelen != namelen)
1069 continue;
1070 if (memcmp(name, backref->name, namelen))
1071 continue;
1072 return backref;
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1076 if (!backref)
1077 return NULL;
1078 memset(backref, 0, sizeof(*backref));
1079 backref->dir = dir;
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1084 return backref;
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1098 BUG_ON(!backref);
1099 if (errors)
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 rec->found_link++;
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 else
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1132 } else {
1133 BUG_ON(1);
1136 maybe_free_inode_rec(inode_cache, rec);
1137 return 0;
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1144 u32 dir_count = 0;
1145 int ret = 0;
1147 dst->merging = 1;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1156 dir_count++;
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180 if (ret < 0)
1181 return ret;
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1191 } else {
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1196 dst->extent_end,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1213 } else {
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1217 dst->merging = 0;
1219 return 0;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1230 int splice = 0;
1231 int ret;
1233 if (--src_node->refs == 0)
1234 splice = 1;
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1240 again:
1241 cache = search_cache_extent(src, 0);
1242 while (cache) {
1243 node = container_of(cache, struct ptr_node, cache);
1244 rec = node->data;
1245 cache = next_cache_extent(cache);
1247 if (splice) {
1248 remove_cache_extent(src, &node->cache);
1249 ins = node;
1250 } else {
1251 ins = malloc(sizeof(*ins));
1252 BUG_ON(!ins);
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1255 ins->data = rec;
1256 rec->refs++;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1263 if (rec->checked) {
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1270 free(ins);
1271 } else {
1272 BUG_ON(ret);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1279 goto again;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1291 return 0;
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1300 rec = node->data;
1301 free_inode_rec(rec);
1302 free(node);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308 u64 bytenr)
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1314 if (cache) {
1315 node = container_of(cache, struct shared_node, cache);
1316 return node;
1318 return NULL;
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 int ret;
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1327 if (!node)
1328 return -ENOMEM;
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1333 node->refs = refs;
1335 ret = insert_cache_extent(shared, &node->cache);
1337 return ret;
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1345 int ret;
1347 if (level == wc->active_node)
1348 return 0;
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1352 if (!node) {
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 BUG_ON(ret);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1358 return 0;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1367 free(node);
1369 return 1;
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1376 free(node);
1378 return 1;
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1386 int i;
1388 if (level == wc->root_level)
1389 return 0;
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392 if (wc->nodes[i])
1393 break;
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1406 } else {
1407 BUG_ON(node->refs < 2);
1408 node->refs--;
1410 return 0;
1414 * Returns:
1415 * < 0 - on error
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422 u64 child_root_id)
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1427 int has_parent = 0;
1428 int ret;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436 0, 0);
1437 if (ret < 0)
1438 return ret;
1439 btrfs_release_path(&path);
1440 if (!ret)
1441 return 1;
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 key.offset = 0;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447 0, 0);
1448 if (ret < 0)
1449 goto out;
1451 while (1) {
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455 if (ret)
1456 break;
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1463 break;
1465 has_parent = 1;
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1469 return 1;
1472 path.slots[0]++;
1474 out:
1475 btrfs_release_path(&path);
1476 if (ret < 0)
1477 return ret;
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1485 u32 total;
1486 u32 cur = 0;
1487 u32 len;
1488 u32 name_len;
1489 u32 data_len;
1490 int error;
1491 int nritems = 0;
1492 u8 filetype;
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1508 nritems++;
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1516 len = name_len;
1517 error = 0;
1518 } else {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1532 key->type, error);
1533 } else {
1534 fprintf(stderr, "invalid location in dir item %u\n",
1535 location.type);
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1543 cur += len;
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1548 return 0;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1555 u32 total;
1556 u32 cur = 0;
1557 u32 len;
1558 u32 name_len;
1559 u64 index;
1560 int error;
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1573 len = name_len;
1574 error = 0;
1575 } else {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585 cur += len;
1587 return 0;
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1594 u32 total;
1595 u32 cur = 0;
1596 u32 len;
1597 u32 name_len;
1598 u64 index;
1599 u64 parent;
1600 int error;
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1614 len = name_len;
1615 error = 0;
1616 } else {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627 cur += len;
1629 return 0;
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1639 int ret;
1640 size_t size;
1641 *found = 0;
1642 u64 csum_end;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648 key.offset = start;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652 &key, &path, 0, 0);
1653 if (ret < 0)
1654 goto out;
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1660 path.slots[0]--;
1663 while (len > 0) {
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667 if (ret > 0)
1668 break;
1669 else if (ret < 0)
1670 goto out;
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1677 break;
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1681 break;
1683 if (key.offset > start)
1684 start = key.offset;
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1690 len -= size;
1691 start += size;
1692 *found += size;
1695 path.slots[0]++;
1697 out:
1698 btrfs_release_path(&path);
1699 if (ret < 0)
1700 return ret;
1701 return 0;
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1711 u64 num_bytes = 0;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1715 int extent_type;
1716 int ret;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1732 if (ret < 0)
1733 return ret;
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741 if (num_bytes == 0)
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1762 } else {
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1773 * data reloc tree.
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777 u64 found;
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780 else
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784 if (ret < 0)
1785 return ret;
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787 if (found > 0)
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792 if (found > 0)
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1796 return 0;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1803 u32 nritems;
1804 int i;
1805 int ret = 0;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1811 return 0;
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820 continue;
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822 continue;
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1832 key.objectid, 1);
1833 BUG_ON(IS_ERR(active_node->current));
1835 switch (key.type) {
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1839 break;
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1842 break;
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1845 break;
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1848 break;
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1851 active_node);
1852 break;
1853 default:
1854 break;
1857 return ret;
1860 struct node_refs {
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1872 * Returns >0 Found error, not fatal, should continue
1873 * Returns <0 Fatal error, must exit the whole check
1874 * Returns 0 No errors found
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877 struct node_refs *nrefs, int *level, int ext_ref)
1879 struct extent_buffer *cur = path->nodes[0];
1880 struct btrfs_key key;
1881 u64 cur_bytenr;
1882 u32 nritems;
1883 u64 first_ino = 0;
1884 int root_level = btrfs_header_level(root->node);
1885 int i;
1886 int ret = 0; /* Final return value */
1887 int err = 0; /* Positive error bitmap */
1889 cur_bytenr = cur->start;
1891 /* skip to first inode item or the first inode number change */
1892 nritems = btrfs_header_nritems(cur);
1893 for (i = 0; i < nritems; i++) {
1894 btrfs_item_key_to_cpu(cur, &key, i);
1895 if (i == 0)
1896 first_ino = key.objectid;
1897 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898 (first_ino && first_ino != key.objectid))
1899 break;
1901 if (i == nritems) {
1902 path->slots[0] = nritems;
1903 return 0;
1905 path->slots[0] = i;
1907 again:
1908 err |= check_inode_item(root, path, ext_ref);
1910 if (err & LAST_ITEM)
1911 goto out;
1913 /* still have inode items in thie leaf */
1914 if (cur->start == cur_bytenr)
1915 goto again;
1918 * we have switched to another leaf, above nodes may
1919 * have changed, here walk down the path, if a node
1920 * or leaf is shared, check whether we can skip this
1921 * node or leaf.
1923 for (i = root_level; i >= 0; i--) {
1924 if (path->nodes[i]->start == nrefs->bytenr[i])
1925 continue;
1927 ret = update_nodes_refs(root,
1928 path->nodes[i]->start,
1929 nrefs, i);
1930 if (ret)
1931 goto out;
1933 if (!nrefs->need_check[i]) {
1934 *level += 1;
1935 break;
1939 for (i = 0; i < *level; i++) {
1940 free_extent_buffer(path->nodes[i]);
1941 path->nodes[i] = NULL;
1943 out:
1944 err &= ~LAST_ITEM;
1945 if (err && !ret)
1946 ret = err;
1947 return ret;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1953 u64 bytenr;
1954 u64 ptr_gen;
1955 u32 nritems;
1956 u32 blocksize;
1957 int i;
1958 int level;
1960 level = btrfs_header_level(node);
1961 if (level != 1)
1962 return;
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1976 * in parent.
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1991 int ret = 0;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1996 else
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000 ret = -EINVAL;
2001 fprintf(stderr,
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007 ret = -EINVAL;
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2014 ret = -EINVAL;
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2019 return ret;
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2033 return 1;
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2042 return 0;
2044 return 1;
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2054 int check, ret;
2055 u64 refs;
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2061 if (ret < 0)
2062 return ret;
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2066 if (refs > 1) {
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068 0, &roots);
2069 if (ret)
2070 return -EIO;
2072 check = need_check(root, roots);
2073 ulist_free(roots);
2074 nrefs->need_check[level] = check;
2075 } else {
2076 nrefs->need_check[level] = 1;
2080 return 0;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2088 u64 bytenr;
2089 u64 ptr_gen;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2092 u32 blocksize;
2093 int ret, err = 0;
2094 u64 refs;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2101 ret = 0;
2102 } else {
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2106 if (ret < 0) {
2107 err = ret;
2108 goto out;
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2114 if (refs > 1) {
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2116 refs, wc, *level);
2117 if (ret > 0) {
2118 err = ret;
2119 goto out;
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2129 WARN_ON(1);
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132 break;
2133 if (*level == 0) {
2134 ret = process_one_leaf(root, cur, wc);
2135 if (ret < 0)
2136 err = ret;
2137 break;
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2145 } else {
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2148 if (ret < 0) {
2149 refs = 0;
2150 } else {
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2156 if (refs > 1) {
2157 ret = enter_shared_node(root, bytenr, refs,
2158 wc, *level - 1);
2159 if (ret > 0) {
2160 path->slots[*level]++;
2161 continue;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2170 ptr_gen);
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2175 &node_key,
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2178 &node_key,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2181 err = -EIO;
2182 goto out;
2186 ret = check_child_node(cur, path->slots[*level], next);
2187 if (ret) {
2188 err = ret;
2189 goto out;
2192 if (btrfs_is_leaf(next))
2193 status = btrfs_check_leaf(root, NULL, next);
2194 else
2195 status = btrfs_check_node(root, NULL, next);
2196 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197 free_extent_buffer(next);
2198 err = -EIO;
2199 goto out;
2202 *level = *level - 1;
2203 free_extent_buffer(path->nodes[*level]);
2204 path->nodes[*level] = next;
2205 path->slots[*level] = 0;
2207 out:
2208 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209 return err;
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213 unsigned int ext_ref);
2216 * Returns >0 Found error, should continue
2217 * Returns <0 Fatal error, must exit the whole check
2218 * Returns 0 No errors found
2220 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2221 int *level, struct node_refs *nrefs, int ext_ref)
2223 enum btrfs_tree_block_status status;
2224 u64 bytenr;
2225 u64 ptr_gen;
2226 struct extent_buffer *next;
2227 struct extent_buffer *cur;
2228 u32 blocksize;
2229 int ret;
2231 WARN_ON(*level < 0);
2232 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2234 ret = update_nodes_refs(root, path->nodes[*level]->start,
2235 nrefs, *level);
2236 if (ret < 0)
2237 return ret;
2239 while (*level >= 0) {
2240 WARN_ON(*level < 0);
2241 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2242 cur = path->nodes[*level];
2244 if (btrfs_header_level(cur) != *level)
2245 WARN_ON(1);
2247 if (path->slots[*level] >= btrfs_header_nritems(cur))
2248 break;
2249 /* Don't forgot to check leaf/node validation */
2250 if (*level == 0) {
2251 ret = btrfs_check_leaf(root, NULL, cur);
2252 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2253 ret = -EIO;
2254 break;
2256 ret = process_one_leaf_v2(root, path, nrefs,
2257 level, ext_ref);
2258 break;
2259 } else {
2260 ret = btrfs_check_node(root, NULL, cur);
2261 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2262 ret = -EIO;
2263 break;
2266 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2267 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2268 blocksize = root->nodesize;
2270 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2271 if (ret)
2272 break;
2273 if (!nrefs->need_check[*level - 1]) {
2274 path->slots[*level]++;
2275 continue;
2278 next = btrfs_find_tree_block(root, bytenr, blocksize);
2279 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280 free_extent_buffer(next);
2281 reada_walk_down(root, cur, path->slots[*level]);
2282 next = read_tree_block(root, bytenr, blocksize,
2283 ptr_gen);
2284 if (!extent_buffer_uptodate(next)) {
2285 struct btrfs_key node_key;
2287 btrfs_node_key_to_cpu(path->nodes[*level],
2288 &node_key,
2289 path->slots[*level]);
2290 btrfs_add_corrupt_extent_record(root->fs_info,
2291 &node_key,
2292 path->nodes[*level]->start,
2293 root->nodesize, *level);
2294 ret = -EIO;
2295 break;
2299 ret = check_child_node(cur, path->slots[*level], next);
2300 if (ret < 0)
2301 break;
2303 if (btrfs_is_leaf(next))
2304 status = btrfs_check_leaf(root, NULL, next);
2305 else
2306 status = btrfs_check_node(root, NULL, next);
2307 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2308 free_extent_buffer(next);
2309 ret = -EIO;
2310 break;
2313 *level = *level - 1;
2314 free_extent_buffer(path->nodes[*level]);
2315 path->nodes[*level] = next;
2316 path->slots[*level] = 0;
2318 return ret;
2321 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2322 struct walk_control *wc, int *level)
2324 int i;
2325 struct extent_buffer *leaf;
2327 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2328 leaf = path->nodes[i];
2329 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2330 path->slots[i]++;
2331 *level = i;
2332 return 0;
2333 } else {
2334 free_extent_buffer(path->nodes[*level]);
2335 path->nodes[*level] = NULL;
2336 BUG_ON(*level > wc->active_node);
2337 if (*level == wc->active_node)
2338 leave_shared_node(root, wc, *level);
2339 *level = i + 1;
2342 return 1;
2345 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2346 int *level)
2348 int i;
2349 struct extent_buffer *leaf;
2351 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2352 leaf = path->nodes[i];
2353 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2354 path->slots[i]++;
2355 *level = i;
2356 return 0;
2357 } else {
2358 free_extent_buffer(path->nodes[*level]);
2359 path->nodes[*level] = NULL;
2360 *level = i + 1;
2363 return 1;
2366 static int check_root_dir(struct inode_record *rec)
2368 struct inode_backref *backref;
2369 int ret = -1;
2371 if (!rec->found_inode_item || rec->errors)
2372 goto out;
2373 if (rec->nlink != 1 || rec->found_link != 0)
2374 goto out;
2375 if (list_empty(&rec->backrefs))
2376 goto out;
2377 backref = to_inode_backref(rec->backrefs.next);
2378 if (!backref->found_inode_ref)
2379 goto out;
2380 if (backref->index != 0 || backref->namelen != 2 ||
2381 memcmp(backref->name, "..", 2))
2382 goto out;
2383 if (backref->found_dir_index || backref->found_dir_item)
2384 goto out;
2385 ret = 0;
2386 out:
2387 return ret;
2390 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2391 struct btrfs_root *root, struct btrfs_path *path,
2392 struct inode_record *rec)
2394 struct btrfs_inode_item *ei;
2395 struct btrfs_key key;
2396 int ret;
2398 key.objectid = rec->ino;
2399 key.type = BTRFS_INODE_ITEM_KEY;
2400 key.offset = (u64)-1;
2402 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2403 if (ret < 0)
2404 goto out;
2405 if (ret) {
2406 if (!path->slots[0]) {
2407 ret = -ENOENT;
2408 goto out;
2410 path->slots[0]--;
2411 ret = 0;
2413 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2414 if (key.objectid != rec->ino) {
2415 ret = -ENOENT;
2416 goto out;
2419 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2420 struct btrfs_inode_item);
2421 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2422 btrfs_mark_buffer_dirty(path->nodes[0]);
2423 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2424 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2425 root->root_key.objectid);
2426 out:
2427 btrfs_release_path(path);
2428 return ret;
2431 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2432 struct btrfs_root *root,
2433 struct btrfs_path *path,
2434 struct inode_record *rec)
2436 int ret;
2438 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2439 btrfs_release_path(path);
2440 if (!ret)
2441 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2442 return ret;
2445 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2446 struct btrfs_root *root,
2447 struct btrfs_path *path,
2448 struct inode_record *rec)
2450 struct btrfs_inode_item *ei;
2451 struct btrfs_key key;
2452 int ret = 0;
2454 key.objectid = rec->ino;
2455 key.type = BTRFS_INODE_ITEM_KEY;
2456 key.offset = 0;
2458 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2459 if (ret) {
2460 if (ret > 0)
2461 ret = -ENOENT;
2462 goto out;
2465 /* Since ret == 0, no need to check anything */
2466 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2467 struct btrfs_inode_item);
2468 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2469 btrfs_mark_buffer_dirty(path->nodes[0]);
2470 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2471 printf("reset nbytes for ino %llu root %llu\n",
2472 rec->ino, root->root_key.objectid);
2473 out:
2474 btrfs_release_path(path);
2475 return ret;
2478 static int add_missing_dir_index(struct btrfs_root *root,
2479 struct cache_tree *inode_cache,
2480 struct inode_record *rec,
2481 struct inode_backref *backref)
2483 struct btrfs_path path;
2484 struct btrfs_trans_handle *trans;
2485 struct btrfs_dir_item *dir_item;
2486 struct extent_buffer *leaf;
2487 struct btrfs_key key;
2488 struct btrfs_disk_key disk_key;
2489 struct inode_record *dir_rec;
2490 unsigned long name_ptr;
2491 u32 data_size = sizeof(*dir_item) + backref->namelen;
2492 int ret;
2494 trans = btrfs_start_transaction(root, 1);
2495 if (IS_ERR(trans))
2496 return PTR_ERR(trans);
2498 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2499 (unsigned long long)rec->ino);
2501 btrfs_init_path(&path);
2502 key.objectid = backref->dir;
2503 key.type = BTRFS_DIR_INDEX_KEY;
2504 key.offset = backref->index;
2505 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2506 BUG_ON(ret);
2508 leaf = path.nodes[0];
2509 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2511 disk_key.objectid = cpu_to_le64(rec->ino);
2512 disk_key.type = BTRFS_INODE_ITEM_KEY;
2513 disk_key.offset = 0;
2515 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2516 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2517 btrfs_set_dir_data_len(leaf, dir_item, 0);
2518 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2519 name_ptr = (unsigned long)(dir_item + 1);
2520 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2521 btrfs_mark_buffer_dirty(leaf);
2522 btrfs_release_path(&path);
2523 btrfs_commit_transaction(trans, root);
2525 backref->found_dir_index = 1;
2526 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2527 BUG_ON(IS_ERR(dir_rec));
2528 if (!dir_rec)
2529 return 0;
2530 dir_rec->found_size += backref->namelen;
2531 if (dir_rec->found_size == dir_rec->isize &&
2532 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2533 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2534 if (dir_rec->found_size != dir_rec->isize)
2535 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2537 return 0;
2540 static int delete_dir_index(struct btrfs_root *root,
2541 struct inode_backref *backref)
2543 struct btrfs_trans_handle *trans;
2544 struct btrfs_dir_item *di;
2545 struct btrfs_path path;
2546 int ret = 0;
2548 trans = btrfs_start_transaction(root, 1);
2549 if (IS_ERR(trans))
2550 return PTR_ERR(trans);
2552 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2553 (unsigned long long)backref->dir,
2554 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2555 (unsigned long long)root->objectid);
2557 btrfs_init_path(&path);
2558 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2559 backref->name, backref->namelen,
2560 backref->index, -1);
2561 if (IS_ERR(di)) {
2562 ret = PTR_ERR(di);
2563 btrfs_release_path(&path);
2564 btrfs_commit_transaction(trans, root);
2565 if (ret == -ENOENT)
2566 return 0;
2567 return ret;
2570 if (!di)
2571 ret = btrfs_del_item(trans, root, &path);
2572 else
2573 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2574 BUG_ON(ret);
2575 btrfs_release_path(&path);
2576 btrfs_commit_transaction(trans, root);
2577 return ret;
2580 static int create_inode_item(struct btrfs_root *root,
2581 struct inode_record *rec,
2582 int root_dir)
2584 struct btrfs_trans_handle *trans;
2585 struct btrfs_inode_item inode_item;
2586 time_t now = time(NULL);
2587 int ret;
2589 trans = btrfs_start_transaction(root, 1);
2590 if (IS_ERR(trans)) {
2591 ret = PTR_ERR(trans);
2592 return ret;
2595 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2596 "be incomplete, please check permissions and content after "
2597 "the fsck completes.\n", (unsigned long long)root->objectid,
2598 (unsigned long long)rec->ino);
2600 memset(&inode_item, 0, sizeof(inode_item));
2601 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2602 if (root_dir)
2603 btrfs_set_stack_inode_nlink(&inode_item, 1);
2604 else
2605 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2606 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2607 if (rec->found_dir_item) {
2608 if (rec->found_file_extent)
2609 fprintf(stderr, "root %llu inode %llu has both a dir "
2610 "item and extents, unsure if it is a dir or a "
2611 "regular file so setting it as a directory\n",
2612 (unsigned long long)root->objectid,
2613 (unsigned long long)rec->ino);
2614 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2615 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2616 } else if (!rec->found_dir_item) {
2617 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2618 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2620 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2621 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2622 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2623 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2624 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2625 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2626 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2627 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2629 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2630 BUG_ON(ret);
2631 btrfs_commit_transaction(trans, root);
2632 return 0;
2635 static int repair_inode_backrefs(struct btrfs_root *root,
2636 struct inode_record *rec,
2637 struct cache_tree *inode_cache,
2638 int delete)
2640 struct inode_backref *tmp, *backref;
2641 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2642 int ret = 0;
2643 int repaired = 0;
2645 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2646 if (!delete && rec->ino == root_dirid) {
2647 if (!rec->found_inode_item) {
2648 ret = create_inode_item(root, rec, 1);
2649 if (ret)
2650 break;
2651 repaired++;
2655 /* Index 0 for root dir's are special, don't mess with it */
2656 if (rec->ino == root_dirid && backref->index == 0)
2657 continue;
2659 if (delete &&
2660 ((backref->found_dir_index && !backref->found_inode_ref) ||
2661 (backref->found_dir_index && backref->found_inode_ref &&
2662 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2663 ret = delete_dir_index(root, backref);
2664 if (ret)
2665 break;
2666 repaired++;
2667 list_del(&backref->list);
2668 free(backref);
2671 if (!delete && !backref->found_dir_index &&
2672 backref->found_dir_item && backref->found_inode_ref) {
2673 ret = add_missing_dir_index(root, inode_cache, rec,
2674 backref);
2675 if (ret)
2676 break;
2677 repaired++;
2678 if (backref->found_dir_item &&
2679 backref->found_dir_index &&
2680 backref->found_dir_index) {
2681 if (!backref->errors &&
2682 backref->found_inode_ref) {
2683 list_del(&backref->list);
2684 free(backref);
2689 if (!delete && (!backref->found_dir_index &&
2690 !backref->found_dir_item &&
2691 backref->found_inode_ref)) {
2692 struct btrfs_trans_handle *trans;
2693 struct btrfs_key location;
2695 ret = check_dir_conflict(root, backref->name,
2696 backref->namelen,
2697 backref->dir,
2698 backref->index);
2699 if (ret) {
2701 * let nlink fixing routine to handle it,
2702 * which can do it better.
2704 ret = 0;
2705 break;
2707 location.objectid = rec->ino;
2708 location.type = BTRFS_INODE_ITEM_KEY;
2709 location.offset = 0;
2711 trans = btrfs_start_transaction(root, 1);
2712 if (IS_ERR(trans)) {
2713 ret = PTR_ERR(trans);
2714 break;
2716 fprintf(stderr, "adding missing dir index/item pair "
2717 "for inode %llu\n",
2718 (unsigned long long)rec->ino);
2719 ret = btrfs_insert_dir_item(trans, root, backref->name,
2720 backref->namelen,
2721 backref->dir, &location,
2722 imode_to_type(rec->imode),
2723 backref->index);
2724 BUG_ON(ret);
2725 btrfs_commit_transaction(trans, root);
2726 repaired++;
2729 if (!delete && (backref->found_inode_ref &&
2730 backref->found_dir_index &&
2731 backref->found_dir_item &&
2732 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2733 !rec->found_inode_item)) {
2734 ret = create_inode_item(root, rec, 0);
2735 if (ret)
2736 break;
2737 repaired++;
2741 return ret ? ret : repaired;
2745 * To determine the file type for nlink/inode_item repair
2747 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2748 * Return -ENOENT if file type is not found.
2750 static int find_file_type(struct inode_record *rec, u8 *type)
2752 struct inode_backref *backref;
2754 /* For inode item recovered case */
2755 if (rec->found_inode_item) {
2756 *type = imode_to_type(rec->imode);
2757 return 0;
2760 list_for_each_entry(backref, &rec->backrefs, list) {
2761 if (backref->found_dir_index || backref->found_dir_item) {
2762 *type = backref->filetype;
2763 return 0;
2766 return -ENOENT;
2770 * To determine the file name for nlink repair
2772 * Return 0 if file name is found, set name and namelen.
2773 * Return -ENOENT if file name is not found.
2775 static int find_file_name(struct inode_record *rec,
2776 char *name, int *namelen)
2778 struct inode_backref *backref;
2780 list_for_each_entry(backref, &rec->backrefs, list) {
2781 if (backref->found_dir_index || backref->found_dir_item ||
2782 backref->found_inode_ref) {
2783 memcpy(name, backref->name, backref->namelen);
2784 *namelen = backref->namelen;
2785 return 0;
2788 return -ENOENT;
2791 /* Reset the nlink of the inode to the correct one */
2792 static int reset_nlink(struct btrfs_trans_handle *trans,
2793 struct btrfs_root *root,
2794 struct btrfs_path *path,
2795 struct inode_record *rec)
2797 struct inode_backref *backref;
2798 struct inode_backref *tmp;
2799 struct btrfs_key key;
2800 struct btrfs_inode_item *inode_item;
2801 int ret = 0;
2803 /* We don't believe this either, reset it and iterate backref */
2804 rec->found_link = 0;
2806 /* Remove all backref including the valid ones */
2807 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2808 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2809 backref->index, backref->name,
2810 backref->namelen, 0);
2811 if (ret < 0)
2812 goto out;
2814 /* remove invalid backref, so it won't be added back */
2815 if (!(backref->found_dir_index &&
2816 backref->found_dir_item &&
2817 backref->found_inode_ref)) {
2818 list_del(&backref->list);
2819 free(backref);
2820 } else {
2821 rec->found_link++;
2825 /* Set nlink to 0 */
2826 key.objectid = rec->ino;
2827 key.type = BTRFS_INODE_ITEM_KEY;
2828 key.offset = 0;
2829 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2830 if (ret < 0)
2831 goto out;
2832 if (ret > 0) {
2833 ret = -ENOENT;
2834 goto out;
2836 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2837 struct btrfs_inode_item);
2838 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2839 btrfs_mark_buffer_dirty(path->nodes[0]);
2840 btrfs_release_path(path);
2843 * Add back valid inode_ref/dir_item/dir_index,
2844 * add_link() will handle the nlink inc, so new nlink must be correct
2846 list_for_each_entry(backref, &rec->backrefs, list) {
2847 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2848 backref->name, backref->namelen,
2849 backref->filetype, &backref->index, 1);
2850 if (ret < 0)
2851 goto out;
2853 out:
2854 btrfs_release_path(path);
2855 return ret;
2858 static int get_highest_inode(struct btrfs_trans_handle *trans,
2859 struct btrfs_root *root,
2860 struct btrfs_path *path,
2861 u64 *highest_ino)
2863 struct btrfs_key key, found_key;
2864 int ret;
2866 btrfs_init_path(path);
2867 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2868 key.offset = -1;
2869 key.type = BTRFS_INODE_ITEM_KEY;
2870 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2871 if (ret == 1) {
2872 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2873 path->slots[0] - 1);
2874 *highest_ino = found_key.objectid;
2875 ret = 0;
2877 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2878 ret = -EOVERFLOW;
2879 btrfs_release_path(path);
2880 return ret;
2883 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2884 struct btrfs_root *root,
2885 struct btrfs_path *path,
2886 struct inode_record *rec)
2888 char *dir_name = "lost+found";
2889 char namebuf[BTRFS_NAME_LEN] = {0};
2890 u64 lost_found_ino;
2891 u32 mode = 0700;
2892 u8 type = 0;
2893 int namelen = 0;
2894 int name_recovered = 0;
2895 int type_recovered = 0;
2896 int ret = 0;
2899 * Get file name and type first before these invalid inode ref
2900 * are deleted by remove_all_invalid_backref()
2902 name_recovered = !find_file_name(rec, namebuf, &namelen);
2903 type_recovered = !find_file_type(rec, &type);
2905 if (!name_recovered) {
2906 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2907 rec->ino, rec->ino);
2908 namelen = count_digits(rec->ino);
2909 sprintf(namebuf, "%llu", rec->ino);
2910 name_recovered = 1;
2912 if (!type_recovered) {
2913 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2914 rec->ino);
2915 type = BTRFS_FT_REG_FILE;
2916 type_recovered = 1;
2919 ret = reset_nlink(trans, root, path, rec);
2920 if (ret < 0) {
2921 fprintf(stderr,
2922 "Failed to reset nlink for inode %llu: %s\n",
2923 rec->ino, strerror(-ret));
2924 goto out;
2927 if (rec->found_link == 0) {
2928 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2929 if (ret < 0)
2930 goto out;
2931 lost_found_ino++;
2932 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2933 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2934 mode);
2935 if (ret < 0) {
2936 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2937 dir_name, strerror(-ret));
2938 goto out;
2940 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2941 namebuf, namelen, type, NULL, 1);
2943 * Add ".INO" suffix several times to handle case where
2944 * "FILENAME.INO" is already taken by another file.
2946 while (ret == -EEXIST) {
2948 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2950 if (namelen + count_digits(rec->ino) + 1 >
2951 BTRFS_NAME_LEN) {
2952 ret = -EFBIG;
2953 goto out;
2955 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2956 ".%llu", rec->ino);
2957 namelen += count_digits(rec->ino) + 1;
2958 ret = btrfs_add_link(trans, root, rec->ino,
2959 lost_found_ino, namebuf,
2960 namelen, type, NULL, 1);
2962 if (ret < 0) {
2963 fprintf(stderr,
2964 "Failed to link the inode %llu to %s dir: %s\n",
2965 rec->ino, dir_name, strerror(-ret));
2966 goto out;
2969 * Just increase the found_link, don't actually add the
2970 * backref. This will make things easier and this inode
2971 * record will be freed after the repair is done.
2972 * So fsck will not report problem about this inode.
2974 rec->found_link++;
2975 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2976 namelen, namebuf, dir_name);
2978 printf("Fixed the nlink of inode %llu\n", rec->ino);
2979 out:
2981 * Clear the flag anyway, or we will loop forever for the same inode
2982 * as it will not be removed from the bad inode list and the dead loop
2983 * happens.
2985 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2986 btrfs_release_path(path);
2987 return ret;
2991 * Check if there is any normal(reg or prealloc) file extent for given
2992 * ino.
2993 * This is used to determine the file type when neither its dir_index/item or
2994 * inode_item exists.
2996 * This will *NOT* report error, if any error happens, just consider it does
2997 * not have any normal file extent.
2999 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3001 struct btrfs_path path;
3002 struct btrfs_key key;
3003 struct btrfs_key found_key;
3004 struct btrfs_file_extent_item *fi;
3005 u8 type;
3006 int ret = 0;
3008 btrfs_init_path(&path);
3009 key.objectid = ino;
3010 key.type = BTRFS_EXTENT_DATA_KEY;
3011 key.offset = 0;
3013 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3014 if (ret < 0) {
3015 ret = 0;
3016 goto out;
3018 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3019 ret = btrfs_next_leaf(root, &path);
3020 if (ret) {
3021 ret = 0;
3022 goto out;
3025 while (1) {
3026 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3027 path.slots[0]);
3028 if (found_key.objectid != ino ||
3029 found_key.type != BTRFS_EXTENT_DATA_KEY)
3030 break;
3031 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3032 struct btrfs_file_extent_item);
3033 type = btrfs_file_extent_type(path.nodes[0], fi);
3034 if (type != BTRFS_FILE_EXTENT_INLINE) {
3035 ret = 1;
3036 goto out;
3039 out:
3040 btrfs_release_path(&path);
3041 return ret;
3044 static u32 btrfs_type_to_imode(u8 type)
3046 static u32 imode_by_btrfs_type[] = {
3047 [BTRFS_FT_REG_FILE] = S_IFREG,
3048 [BTRFS_FT_DIR] = S_IFDIR,
3049 [BTRFS_FT_CHRDEV] = S_IFCHR,
3050 [BTRFS_FT_BLKDEV] = S_IFBLK,
3051 [BTRFS_FT_FIFO] = S_IFIFO,
3052 [BTRFS_FT_SOCK] = S_IFSOCK,
3053 [BTRFS_FT_SYMLINK] = S_IFLNK,
3056 return imode_by_btrfs_type[(type)];
3059 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3060 struct btrfs_root *root,
3061 struct btrfs_path *path,
3062 struct inode_record *rec)
3064 u8 filetype;
3065 u32 mode = 0700;
3066 int type_recovered = 0;
3067 int ret = 0;
3069 printf("Trying to rebuild inode:%llu\n", rec->ino);
3071 type_recovered = !find_file_type(rec, &filetype);
3074 * Try to determine inode type if type not found.
3076 * For found regular file extent, it must be FILE.
3077 * For found dir_item/index, it must be DIR.
3079 * For undetermined one, use FILE as fallback.
3081 * TODO:
3082 * 1. If found backref(inode_index/item is already handled) to it,
3083 * it must be DIR.
3084 * Need new inode-inode ref structure to allow search for that.
3086 if (!type_recovered) {
3087 if (rec->found_file_extent &&
3088 find_normal_file_extent(root, rec->ino)) {
3089 type_recovered = 1;
3090 filetype = BTRFS_FT_REG_FILE;
3091 } else if (rec->found_dir_item) {
3092 type_recovered = 1;
3093 filetype = BTRFS_FT_DIR;
3094 } else if (!list_empty(&rec->orphan_extents)) {
3095 type_recovered = 1;
3096 filetype = BTRFS_FT_REG_FILE;
3097 } else{
3098 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3099 rec->ino);
3100 type_recovered = 1;
3101 filetype = BTRFS_FT_REG_FILE;
3105 ret = btrfs_new_inode(trans, root, rec->ino,
3106 mode | btrfs_type_to_imode(filetype));
3107 if (ret < 0)
3108 goto out;
3111 * Here inode rebuild is done, we only rebuild the inode item,
3112 * don't repair the nlink(like move to lost+found).
3113 * That is the job of nlink repair.
3115 * We just fill the record and return
3117 rec->found_dir_item = 1;
3118 rec->imode = mode | btrfs_type_to_imode(filetype);
3119 rec->nlink = 0;
3120 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3121 /* Ensure the inode_nlinks repair function will be called */
3122 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3123 out:
3124 return ret;
3127 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3128 struct btrfs_root *root,
3129 struct btrfs_path *path,
3130 struct inode_record *rec)
3132 struct orphan_data_extent *orphan;
3133 struct orphan_data_extent *tmp;
3134 int ret = 0;
3136 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3138 * Check for conflicting file extents
3140 * Here we don't know whether the extents is compressed or not,
3141 * so we can only assume it not compressed nor data offset,
3142 * and use its disk_len as extent length.
3144 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3145 orphan->offset, orphan->disk_len, 0);
3146 btrfs_release_path(path);
3147 if (ret < 0)
3148 goto out;
3149 if (!ret) {
3150 fprintf(stderr,
3151 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3152 orphan->disk_bytenr, orphan->disk_len);
3153 ret = btrfs_free_extent(trans,
3154 root->fs_info->extent_root,
3155 orphan->disk_bytenr, orphan->disk_len,
3156 0, root->objectid, orphan->objectid,
3157 orphan->offset);
3158 if (ret < 0)
3159 goto out;
3161 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3162 orphan->offset, orphan->disk_bytenr,
3163 orphan->disk_len, orphan->disk_len);
3164 if (ret < 0)
3165 goto out;
3167 /* Update file size info */
3168 rec->found_size += orphan->disk_len;
3169 if (rec->found_size == rec->nbytes)
3170 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3172 /* Update the file extent hole info too */
3173 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3174 orphan->disk_len);
3175 if (ret < 0)
3176 goto out;
3177 if (RB_EMPTY_ROOT(&rec->holes))
3178 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3180 list_del(&orphan->list);
3181 free(orphan);
3183 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3184 out:
3185 return ret;
3188 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3189 struct btrfs_root *root,
3190 struct btrfs_path *path,
3191 struct inode_record *rec)
3193 struct rb_node *node;
3194 struct file_extent_hole *hole;
3195 int found = 0;
3196 int ret = 0;
3198 node = rb_first(&rec->holes);
3200 while (node) {
3201 found = 1;
3202 hole = rb_entry(node, struct file_extent_hole, node);
3203 ret = btrfs_punch_hole(trans, root, rec->ino,
3204 hole->start, hole->len);
3205 if (ret < 0)
3206 goto out;
3207 ret = del_file_extent_hole(&rec->holes, hole->start,
3208 hole->len);
3209 if (ret < 0)
3210 goto out;
3211 if (RB_EMPTY_ROOT(&rec->holes))
3212 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3213 node = rb_first(&rec->holes);
3215 /* special case for a file losing all its file extent */
3216 if (!found) {
3217 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3218 round_up(rec->isize, root->sectorsize));
3219 if (ret < 0)
3220 goto out;
3222 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3223 rec->ino, root->objectid);
3224 out:
3225 return ret;
3228 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3230 struct btrfs_trans_handle *trans;
3231 struct btrfs_path path;
3232 int ret = 0;
3234 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3235 I_ERR_NO_ORPHAN_ITEM |
3236 I_ERR_LINK_COUNT_WRONG |
3237 I_ERR_NO_INODE_ITEM |
3238 I_ERR_FILE_EXTENT_ORPHAN |
3239 I_ERR_FILE_EXTENT_DISCOUNT|
3240 I_ERR_FILE_NBYTES_WRONG)))
3241 return rec->errors;
3244 * For nlink repair, it may create a dir and add link, so
3245 * 2 for parent(256)'s dir_index and dir_item
3246 * 2 for lost+found dir's inode_item and inode_ref
3247 * 1 for the new inode_ref of the file
3248 * 2 for lost+found dir's dir_index and dir_item for the file
3250 trans = btrfs_start_transaction(root, 7);
3251 if (IS_ERR(trans))
3252 return PTR_ERR(trans);
3254 btrfs_init_path(&path);
3255 if (rec->errors & I_ERR_NO_INODE_ITEM)
3256 ret = repair_inode_no_item(trans, root, &path, rec);
3257 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3258 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3259 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3260 ret = repair_inode_discount_extent(trans, root, &path, rec);
3261 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3262 ret = repair_inode_isize(trans, root, &path, rec);
3263 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3264 ret = repair_inode_orphan_item(trans, root, &path, rec);
3265 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3266 ret = repair_inode_nlinks(trans, root, &path, rec);
3267 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3268 ret = repair_inode_nbytes(trans, root, &path, rec);
3269 btrfs_commit_transaction(trans, root);
3270 btrfs_release_path(&path);
3271 return ret;
3274 static int check_inode_recs(struct btrfs_root *root,
3275 struct cache_tree *inode_cache)
3277 struct cache_extent *cache;
3278 struct ptr_node *node;
3279 struct inode_record *rec;
3280 struct inode_backref *backref;
3281 int stage = 0;
3282 int ret = 0;
3283 int err = 0;
3284 u64 error = 0;
3285 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3287 if (btrfs_root_refs(&root->root_item) == 0) {
3288 if (!cache_tree_empty(inode_cache))
3289 fprintf(stderr, "warning line %d\n", __LINE__);
3290 return 0;
3294 * We need to repair backrefs first because we could change some of the
3295 * errors in the inode recs.
3297 * We also need to go through and delete invalid backrefs first and then
3298 * add the correct ones second. We do this because we may get EEXIST
3299 * when adding back the correct index because we hadn't yet deleted the
3300 * invalid index.
3302 * For example, if we were missing a dir index then the directories
3303 * isize would be wrong, so if we fixed the isize to what we thought it
3304 * would be and then fixed the backref we'd still have a invalid fs, so
3305 * we need to add back the dir index and then check to see if the isize
3306 * is still wrong.
3308 while (stage < 3) {
3309 stage++;
3310 if (stage == 3 && !err)
3311 break;
3313 cache = search_cache_extent(inode_cache, 0);
3314 while (repair && cache) {
3315 node = container_of(cache, struct ptr_node, cache);
3316 rec = node->data;
3317 cache = next_cache_extent(cache);
3319 /* Need to free everything up and rescan */
3320 if (stage == 3) {
3321 remove_cache_extent(inode_cache, &node->cache);
3322 free(node);
3323 free_inode_rec(rec);
3324 continue;
3327 if (list_empty(&rec->backrefs))
3328 continue;
3330 ret = repair_inode_backrefs(root, rec, inode_cache,
3331 stage == 1);
3332 if (ret < 0) {
3333 err = ret;
3334 stage = 2;
3335 break;
3336 } if (ret > 0) {
3337 err = -EAGAIN;
3341 if (err)
3342 return err;
3344 rec = get_inode_rec(inode_cache, root_dirid, 0);
3345 BUG_ON(IS_ERR(rec));
3346 if (rec) {
3347 ret = check_root_dir(rec);
3348 if (ret) {
3349 fprintf(stderr, "root %llu root dir %llu error\n",
3350 (unsigned long long)root->root_key.objectid,
3351 (unsigned long long)root_dirid);
3352 print_inode_error(root, rec);
3353 error++;
3355 } else {
3356 if (repair) {
3357 struct btrfs_trans_handle *trans;
3359 trans = btrfs_start_transaction(root, 1);
3360 if (IS_ERR(trans)) {
3361 err = PTR_ERR(trans);
3362 return err;
3365 fprintf(stderr,
3366 "root %llu missing its root dir, recreating\n",
3367 (unsigned long long)root->objectid);
3369 ret = btrfs_make_root_dir(trans, root, root_dirid);
3370 BUG_ON(ret);
3372 btrfs_commit_transaction(trans, root);
3373 return -EAGAIN;
3376 fprintf(stderr, "root %llu root dir %llu not found\n",
3377 (unsigned long long)root->root_key.objectid,
3378 (unsigned long long)root_dirid);
3381 while (1) {
3382 cache = search_cache_extent(inode_cache, 0);
3383 if (!cache)
3384 break;
3385 node = container_of(cache, struct ptr_node, cache);
3386 rec = node->data;
3387 remove_cache_extent(inode_cache, &node->cache);
3388 free(node);
3389 if (rec->ino == root_dirid ||
3390 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3391 free_inode_rec(rec);
3392 continue;
3395 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3396 ret = check_orphan_item(root, rec->ino);
3397 if (ret == 0)
3398 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3399 if (can_free_inode_rec(rec)) {
3400 free_inode_rec(rec);
3401 continue;
3405 if (!rec->found_inode_item)
3406 rec->errors |= I_ERR_NO_INODE_ITEM;
3407 if (rec->found_link != rec->nlink)
3408 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3409 if (repair) {
3410 ret = try_repair_inode(root, rec);
3411 if (ret == 0 && can_free_inode_rec(rec)) {
3412 free_inode_rec(rec);
3413 continue;
3415 ret = 0;
3418 if (!(repair && ret == 0))
3419 error++;
3420 print_inode_error(root, rec);
3421 list_for_each_entry(backref, &rec->backrefs, list) {
3422 if (!backref->found_dir_item)
3423 backref->errors |= REF_ERR_NO_DIR_ITEM;
3424 if (!backref->found_dir_index)
3425 backref->errors |= REF_ERR_NO_DIR_INDEX;
3426 if (!backref->found_inode_ref)
3427 backref->errors |= REF_ERR_NO_INODE_REF;
3428 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3429 " namelen %u name %s filetype %d errors %x",
3430 (unsigned long long)backref->dir,
3431 (unsigned long long)backref->index,
3432 backref->namelen, backref->name,
3433 backref->filetype, backref->errors);
3434 print_ref_error(backref->errors);
3436 free_inode_rec(rec);
3438 return (error > 0) ? -1 : 0;
3441 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3442 u64 objectid)
3444 struct cache_extent *cache;
3445 struct root_record *rec = NULL;
3446 int ret;
3448 cache = lookup_cache_extent(root_cache, objectid, 1);
3449 if (cache) {
3450 rec = container_of(cache, struct root_record, cache);
3451 } else {
3452 rec = calloc(1, sizeof(*rec));
3453 if (!rec)
3454 return ERR_PTR(-ENOMEM);
3455 rec->objectid = objectid;
3456 INIT_LIST_HEAD(&rec->backrefs);
3457 rec->cache.start = objectid;
3458 rec->cache.size = 1;
3460 ret = insert_cache_extent(root_cache, &rec->cache);
3461 if (ret)
3462 return ERR_PTR(-EEXIST);
3464 return rec;
3467 static struct root_backref *get_root_backref(struct root_record *rec,
3468 u64 ref_root, u64 dir, u64 index,
3469 const char *name, int namelen)
3471 struct root_backref *backref;
3473 list_for_each_entry(backref, &rec->backrefs, list) {
3474 if (backref->ref_root != ref_root || backref->dir != dir ||
3475 backref->namelen != namelen)
3476 continue;
3477 if (memcmp(name, backref->name, namelen))
3478 continue;
3479 return backref;
3482 backref = calloc(1, sizeof(*backref) + namelen + 1);
3483 if (!backref)
3484 return NULL;
3485 backref->ref_root = ref_root;
3486 backref->dir = dir;
3487 backref->index = index;
3488 backref->namelen = namelen;
3489 memcpy(backref->name, name, namelen);
3490 backref->name[namelen] = '\0';
3491 list_add_tail(&backref->list, &rec->backrefs);
3492 return backref;
3495 static void free_root_record(struct cache_extent *cache)
3497 struct root_record *rec;
3498 struct root_backref *backref;
3500 rec = container_of(cache, struct root_record, cache);
3501 while (!list_empty(&rec->backrefs)) {
3502 backref = to_root_backref(rec->backrefs.next);
3503 list_del(&backref->list);
3504 free(backref);
3507 free(rec);
3510 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3512 static int add_root_backref(struct cache_tree *root_cache,
3513 u64 root_id, u64 ref_root, u64 dir, u64 index,
3514 const char *name, int namelen,
3515 int item_type, int errors)
3517 struct root_record *rec;
3518 struct root_backref *backref;
3520 rec = get_root_rec(root_cache, root_id);
3521 BUG_ON(IS_ERR(rec));
3522 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3523 BUG_ON(!backref);
3525 backref->errors |= errors;
3527 if (item_type != BTRFS_DIR_ITEM_KEY) {
3528 if (backref->found_dir_index || backref->found_back_ref ||
3529 backref->found_forward_ref) {
3530 if (backref->index != index)
3531 backref->errors |= REF_ERR_INDEX_UNMATCH;
3532 } else {
3533 backref->index = index;
3537 if (item_type == BTRFS_DIR_ITEM_KEY) {
3538 if (backref->found_forward_ref)
3539 rec->found_ref++;
3540 backref->found_dir_item = 1;
3541 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3542 backref->found_dir_index = 1;
3543 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3544 if (backref->found_forward_ref)
3545 backref->errors |= REF_ERR_DUP_ROOT_REF;
3546 else if (backref->found_dir_item)
3547 rec->found_ref++;
3548 backref->found_forward_ref = 1;
3549 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3550 if (backref->found_back_ref)
3551 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3552 backref->found_back_ref = 1;
3553 } else {
3554 BUG_ON(1);
3557 if (backref->found_forward_ref && backref->found_dir_item)
3558 backref->reachable = 1;
3559 return 0;
3562 static int merge_root_recs(struct btrfs_root *root,
3563 struct cache_tree *src_cache,
3564 struct cache_tree *dst_cache)
3566 struct cache_extent *cache;
3567 struct ptr_node *node;
3568 struct inode_record *rec;
3569 struct inode_backref *backref;
3570 int ret = 0;
3572 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3573 free_inode_recs_tree(src_cache);
3574 return 0;
3577 while (1) {
3578 cache = search_cache_extent(src_cache, 0);
3579 if (!cache)
3580 break;
3581 node = container_of(cache, struct ptr_node, cache);
3582 rec = node->data;
3583 remove_cache_extent(src_cache, &node->cache);
3584 free(node);
3586 ret = is_child_root(root, root->objectid, rec->ino);
3587 if (ret < 0)
3588 break;
3589 else if (ret == 0)
3590 goto skip;
3592 list_for_each_entry(backref, &rec->backrefs, list) {
3593 BUG_ON(backref->found_inode_ref);
3594 if (backref->found_dir_item)
3595 add_root_backref(dst_cache, rec->ino,
3596 root->root_key.objectid, backref->dir,
3597 backref->index, backref->name,
3598 backref->namelen, BTRFS_DIR_ITEM_KEY,
3599 backref->errors);
3600 if (backref->found_dir_index)
3601 add_root_backref(dst_cache, rec->ino,
3602 root->root_key.objectid, backref->dir,
3603 backref->index, backref->name,
3604 backref->namelen, BTRFS_DIR_INDEX_KEY,
3605 backref->errors);
3607 skip:
3608 free_inode_rec(rec);
3610 if (ret < 0)
3611 return ret;
3612 return 0;
3615 static int check_root_refs(struct btrfs_root *root,
3616 struct cache_tree *root_cache)
3618 struct root_record *rec;
3619 struct root_record *ref_root;
3620 struct root_backref *backref;
3621 struct cache_extent *cache;
3622 int loop = 1;
3623 int ret;
3624 int error;
3625 int errors = 0;
3627 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3628 BUG_ON(IS_ERR(rec));
3629 rec->found_ref = 1;
3631 /* fixme: this can not detect circular references */
3632 while (loop) {
3633 loop = 0;
3634 cache = search_cache_extent(root_cache, 0);
3635 while (1) {
3636 if (!cache)
3637 break;
3638 rec = container_of(cache, struct root_record, cache);
3639 cache = next_cache_extent(cache);
3641 if (rec->found_ref == 0)
3642 continue;
3644 list_for_each_entry(backref, &rec->backrefs, list) {
3645 if (!backref->reachable)
3646 continue;
3648 ref_root = get_root_rec(root_cache,
3649 backref->ref_root);
3650 BUG_ON(IS_ERR(ref_root));
3651 if (ref_root->found_ref > 0)
3652 continue;
3654 backref->reachable = 0;
3655 rec->found_ref--;
3656 if (rec->found_ref == 0)
3657 loop = 1;
3662 cache = search_cache_extent(root_cache, 0);
3663 while (1) {
3664 if (!cache)
3665 break;
3666 rec = container_of(cache, struct root_record, cache);
3667 cache = next_cache_extent(cache);
3669 if (rec->found_ref == 0 &&
3670 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3671 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3672 ret = check_orphan_item(root->fs_info->tree_root,
3673 rec->objectid);
3674 if (ret == 0)
3675 continue;
3678 * If we don't have a root item then we likely just have
3679 * a dir item in a snapshot for this root but no actual
3680 * ref key or anything so it's meaningless.
3682 if (!rec->found_root_item)
3683 continue;
3684 errors++;
3685 fprintf(stderr, "fs tree %llu not referenced\n",
3686 (unsigned long long)rec->objectid);
3689 error = 0;
3690 if (rec->found_ref > 0 && !rec->found_root_item)
3691 error = 1;
3692 list_for_each_entry(backref, &rec->backrefs, list) {
3693 if (!backref->found_dir_item)
3694 backref->errors |= REF_ERR_NO_DIR_ITEM;
3695 if (!backref->found_dir_index)
3696 backref->errors |= REF_ERR_NO_DIR_INDEX;
3697 if (!backref->found_back_ref)
3698 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3699 if (!backref->found_forward_ref)
3700 backref->errors |= REF_ERR_NO_ROOT_REF;
3701 if (backref->reachable && backref->errors)
3702 error = 1;
3704 if (!error)
3705 continue;
3707 errors++;
3708 fprintf(stderr, "fs tree %llu refs %u %s\n",
3709 (unsigned long long)rec->objectid, rec->found_ref,
3710 rec->found_root_item ? "" : "not found");
3712 list_for_each_entry(backref, &rec->backrefs, list) {
3713 if (!backref->reachable)
3714 continue;
3715 if (!backref->errors && rec->found_root_item)
3716 continue;
3717 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3718 " index %llu namelen %u name %s errors %x\n",
3719 (unsigned long long)backref->ref_root,
3720 (unsigned long long)backref->dir,
3721 (unsigned long long)backref->index,
3722 backref->namelen, backref->name,
3723 backref->errors);
3724 print_ref_error(backref->errors);
3727 return errors > 0 ? 1 : 0;
3730 static int process_root_ref(struct extent_buffer *eb, int slot,
3731 struct btrfs_key *key,
3732 struct cache_tree *root_cache)
3734 u64 dirid;
3735 u64 index;
3736 u32 len;
3737 u32 name_len;
3738 struct btrfs_root_ref *ref;
3739 char namebuf[BTRFS_NAME_LEN];
3740 int error;
3742 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3744 dirid = btrfs_root_ref_dirid(eb, ref);
3745 index = btrfs_root_ref_sequence(eb, ref);
3746 name_len = btrfs_root_ref_name_len(eb, ref);
3748 if (name_len <= BTRFS_NAME_LEN) {
3749 len = name_len;
3750 error = 0;
3751 } else {
3752 len = BTRFS_NAME_LEN;
3753 error = REF_ERR_NAME_TOO_LONG;
3755 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3757 if (key->type == BTRFS_ROOT_REF_KEY) {
3758 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3759 index, namebuf, len, key->type, error);
3760 } else {
3761 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3762 index, namebuf, len, key->type, error);
3764 return 0;
3767 static void free_corrupt_block(struct cache_extent *cache)
3769 struct btrfs_corrupt_block *corrupt;
3771 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3772 free(corrupt);
3775 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3778 * Repair the btree of the given root.
3780 * The fix is to remove the node key in corrupt_blocks cache_tree.
3781 * and rebalance the tree.
3782 * After the fix, the btree should be writeable.
3784 static int repair_btree(struct btrfs_root *root,
3785 struct cache_tree *corrupt_blocks)
3787 struct btrfs_trans_handle *trans;
3788 struct btrfs_path path;
3789 struct btrfs_corrupt_block *corrupt;
3790 struct cache_extent *cache;
3791 struct btrfs_key key;
3792 u64 offset;
3793 int level;
3794 int ret = 0;
3796 if (cache_tree_empty(corrupt_blocks))
3797 return 0;
3799 trans = btrfs_start_transaction(root, 1);
3800 if (IS_ERR(trans)) {
3801 ret = PTR_ERR(trans);
3802 fprintf(stderr, "Error starting transaction: %s\n",
3803 strerror(-ret));
3804 return ret;
3806 btrfs_init_path(&path);
3807 cache = first_cache_extent(corrupt_blocks);
3808 while (cache) {
3809 corrupt = container_of(cache, struct btrfs_corrupt_block,
3810 cache);
3811 level = corrupt->level;
3812 path.lowest_level = level;
3813 key.objectid = corrupt->key.objectid;
3814 key.type = corrupt->key.type;
3815 key.offset = corrupt->key.offset;
3818 * Here we don't want to do any tree balance, since it may
3819 * cause a balance with corrupted brother leaf/node,
3820 * so ins_len set to 0 here.
3821 * Balance will be done after all corrupt node/leaf is deleted.
3823 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3824 if (ret < 0)
3825 goto out;
3826 offset = btrfs_node_blockptr(path.nodes[level],
3827 path.slots[level]);
3829 /* Remove the ptr */
3830 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3831 if (ret < 0)
3832 goto out;
3834 * Remove the corresponding extent
3835 * return value is not concerned.
3837 btrfs_release_path(&path);
3838 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3839 0, root->root_key.objectid,
3840 level - 1, 0);
3841 cache = next_cache_extent(cache);
3844 /* Balance the btree using btrfs_search_slot() */
3845 cache = first_cache_extent(corrupt_blocks);
3846 while (cache) {
3847 corrupt = container_of(cache, struct btrfs_corrupt_block,
3848 cache);
3849 memcpy(&key, &corrupt->key, sizeof(key));
3850 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3851 if (ret < 0)
3852 goto out;
3853 /* return will always >0 since it won't find the item */
3854 ret = 0;
3855 btrfs_release_path(&path);
3856 cache = next_cache_extent(cache);
3858 out:
3859 btrfs_commit_transaction(trans, root);
3860 btrfs_release_path(&path);
3861 return ret;
3864 static int check_fs_root(struct btrfs_root *root,
3865 struct cache_tree *root_cache,
3866 struct walk_control *wc)
3868 int ret = 0;
3869 int err = 0;
3870 int wret;
3871 int level;
3872 struct btrfs_path path;
3873 struct shared_node root_node;
3874 struct root_record *rec;
3875 struct btrfs_root_item *root_item = &root->root_item;
3876 struct cache_tree corrupt_blocks;
3877 struct orphan_data_extent *orphan;
3878 struct orphan_data_extent *tmp;
3879 enum btrfs_tree_block_status status;
3880 struct node_refs nrefs;
3883 * Reuse the corrupt_block cache tree to record corrupted tree block
3885 * Unlike the usage in extent tree check, here we do it in a per
3886 * fs/subvol tree base.
3888 cache_tree_init(&corrupt_blocks);
3889 root->fs_info->corrupt_blocks = &corrupt_blocks;
3891 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3892 rec = get_root_rec(root_cache, root->root_key.objectid);
3893 BUG_ON(IS_ERR(rec));
3894 if (btrfs_root_refs(root_item) > 0)
3895 rec->found_root_item = 1;
3898 btrfs_init_path(&path);
3899 memset(&root_node, 0, sizeof(root_node));
3900 cache_tree_init(&root_node.root_cache);
3901 cache_tree_init(&root_node.inode_cache);
3902 memset(&nrefs, 0, sizeof(nrefs));
3904 /* Move the orphan extent record to corresponding inode_record */
3905 list_for_each_entry_safe(orphan, tmp,
3906 &root->orphan_data_extents, list) {
3907 struct inode_record *inode;
3909 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3911 BUG_ON(IS_ERR(inode));
3912 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3913 list_move(&orphan->list, &inode->orphan_extents);
3916 level = btrfs_header_level(root->node);
3917 memset(wc->nodes, 0, sizeof(wc->nodes));
3918 wc->nodes[level] = &root_node;
3919 wc->active_node = level;
3920 wc->root_level = level;
3922 /* We may not have checked the root block, lets do that now */
3923 if (btrfs_is_leaf(root->node))
3924 status = btrfs_check_leaf(root, NULL, root->node);
3925 else
3926 status = btrfs_check_node(root, NULL, root->node);
3927 if (status != BTRFS_TREE_BLOCK_CLEAN)
3928 return -EIO;
3930 if (btrfs_root_refs(root_item) > 0 ||
3931 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3932 path.nodes[level] = root->node;
3933 extent_buffer_get(root->node);
3934 path.slots[level] = 0;
3935 } else {
3936 struct btrfs_key key;
3937 struct btrfs_disk_key found_key;
3939 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3940 level = root_item->drop_level;
3941 path.lowest_level = level;
3942 if (level > btrfs_header_level(root->node) ||
3943 level >= BTRFS_MAX_LEVEL) {
3944 error("ignoring invalid drop level: %u", level);
3945 goto skip_walking;
3947 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3948 if (wret < 0)
3949 goto skip_walking;
3950 btrfs_node_key(path.nodes[level], &found_key,
3951 path.slots[level]);
3952 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3953 sizeof(found_key)));
3956 while (1) {
3957 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3958 if (wret < 0)
3959 ret = wret;
3960 if (wret != 0)
3961 break;
3963 wret = walk_up_tree(root, &path, wc, &level);
3964 if (wret < 0)
3965 ret = wret;
3966 if (wret != 0)
3967 break;
3969 skip_walking:
3970 btrfs_release_path(&path);
3972 if (!cache_tree_empty(&corrupt_blocks)) {
3973 struct cache_extent *cache;
3974 struct btrfs_corrupt_block *corrupt;
3976 printf("The following tree block(s) is corrupted in tree %llu:\n",
3977 root->root_key.objectid);
3978 cache = first_cache_extent(&corrupt_blocks);
3979 while (cache) {
3980 corrupt = container_of(cache,
3981 struct btrfs_corrupt_block,
3982 cache);
3983 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3984 cache->start, corrupt->level,
3985 corrupt->key.objectid, corrupt->key.type,
3986 corrupt->key.offset);
3987 cache = next_cache_extent(cache);
3989 if (repair) {
3990 printf("Try to repair the btree for root %llu\n",
3991 root->root_key.objectid);
3992 ret = repair_btree(root, &corrupt_blocks);
3993 if (ret < 0)
3994 fprintf(stderr, "Failed to repair btree: %s\n",
3995 strerror(-ret));
3996 if (!ret)
3997 printf("Btree for root %llu is fixed\n",
3998 root->root_key.objectid);
4002 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4003 if (err < 0)
4004 ret = err;
4006 if (root_node.current) {
4007 root_node.current->checked = 1;
4008 maybe_free_inode_rec(&root_node.inode_cache,
4009 root_node.current);
4012 err = check_inode_recs(root, &root_node.inode_cache);
4013 if (!ret)
4014 ret = err;
4016 free_corrupt_blocks_tree(&corrupt_blocks);
4017 root->fs_info->corrupt_blocks = NULL;
4018 free_orphan_data_extents(&root->orphan_data_extents);
4019 return ret;
4022 static int fs_root_objectid(u64 objectid)
4024 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4025 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4026 return 1;
4027 return is_fstree(objectid);
4030 static int check_fs_roots(struct btrfs_root *root,
4031 struct cache_tree *root_cache)
4033 struct btrfs_path path;
4034 struct btrfs_key key;
4035 struct walk_control wc;
4036 struct extent_buffer *leaf, *tree_node;
4037 struct btrfs_root *tmp_root;
4038 struct btrfs_root *tree_root = root->fs_info->tree_root;
4039 int ret;
4040 int err = 0;
4042 if (ctx.progress_enabled) {
4043 ctx.tp = TASK_FS_ROOTS;
4044 task_start(ctx.info);
4048 * Just in case we made any changes to the extent tree that weren't
4049 * reflected into the free space cache yet.
4051 if (repair)
4052 reset_cached_block_groups(root->fs_info);
4053 memset(&wc, 0, sizeof(wc));
4054 cache_tree_init(&wc.shared);
4055 btrfs_init_path(&path);
4057 again:
4058 key.offset = 0;
4059 key.objectid = 0;
4060 key.type = BTRFS_ROOT_ITEM_KEY;
4061 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4062 if (ret < 0) {
4063 err = 1;
4064 goto out;
4066 tree_node = tree_root->node;
4067 while (1) {
4068 if (tree_node != tree_root->node) {
4069 free_root_recs_tree(root_cache);
4070 btrfs_release_path(&path);
4071 goto again;
4073 leaf = path.nodes[0];
4074 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4075 ret = btrfs_next_leaf(tree_root, &path);
4076 if (ret) {
4077 if (ret < 0)
4078 err = 1;
4079 break;
4081 leaf = path.nodes[0];
4083 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4084 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4085 fs_root_objectid(key.objectid)) {
4086 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4087 tmp_root = btrfs_read_fs_root_no_cache(
4088 root->fs_info, &key);
4089 } else {
4090 key.offset = (u64)-1;
4091 tmp_root = btrfs_read_fs_root(
4092 root->fs_info, &key);
4094 if (IS_ERR(tmp_root)) {
4095 err = 1;
4096 goto next;
4098 ret = check_fs_root(tmp_root, root_cache, &wc);
4099 if (ret == -EAGAIN) {
4100 free_root_recs_tree(root_cache);
4101 btrfs_release_path(&path);
4102 goto again;
4104 if (ret)
4105 err = 1;
4106 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4107 btrfs_free_fs_root(tmp_root);
4108 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4109 key.type == BTRFS_ROOT_BACKREF_KEY) {
4110 process_root_ref(leaf, path.slots[0], &key,
4111 root_cache);
4113 next:
4114 path.slots[0]++;
4116 out:
4117 btrfs_release_path(&path);
4118 if (err)
4119 free_extent_cache_tree(&wc.shared);
4120 if (!cache_tree_empty(&wc.shared))
4121 fprintf(stderr, "warning line %d\n", __LINE__);
4123 task_stop(ctx.info);
4125 return err;
4129 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4130 * INODE_REF/INODE_EXTREF match.
4132 * @root: the root of the fs/file tree
4133 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4134 * @key: the key of the DIR_ITEM/DIR_INDEX
4135 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4136 * distinguish root_dir between normal dir/file
4137 * @name: the name in the INODE_REF/INODE_EXTREF
4138 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4139 * @mode: the st_mode of INODE_ITEM
4141 * Return 0 if no error occurred.
4142 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4143 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4144 * dir/file.
4145 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4146 * not match for normal dir/file.
4148 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4149 struct btrfs_key *key, u64 index, char *name,
4150 u32 namelen, u32 mode)
4152 struct btrfs_path path;
4153 struct extent_buffer *node;
4154 struct btrfs_dir_item *di;
4155 struct btrfs_key location;
4156 char namebuf[BTRFS_NAME_LEN] = {0};
4157 u32 total;
4158 u32 cur = 0;
4159 u32 len;
4160 u32 name_len;
4161 u32 data_len;
4162 u8 filetype;
4163 int slot;
4164 int ret;
4166 btrfs_init_path(&path);
4167 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4168 if (ret < 0) {
4169 ret = DIR_ITEM_MISSING;
4170 goto out;
4173 /* Process root dir and goto out*/
4174 if (index == 0) {
4175 if (ret == 0) {
4176 ret = ROOT_DIR_ERROR;
4177 error(
4178 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4179 root->objectid,
4180 ref_key->type == BTRFS_INODE_REF_KEY ?
4181 "REF" : "EXTREF",
4182 ref_key->objectid, ref_key->offset,
4183 key->type == BTRFS_DIR_ITEM_KEY ?
4184 "DIR_ITEM" : "DIR_INDEX");
4185 } else {
4186 ret = 0;
4189 goto out;
4192 /* Process normal file/dir */
4193 if (ret > 0) {
4194 ret = DIR_ITEM_MISSING;
4195 error(
4196 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4197 root->objectid,
4198 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4199 ref_key->objectid, ref_key->offset,
4200 key->type == BTRFS_DIR_ITEM_KEY ?
4201 "DIR_ITEM" : "DIR_INDEX",
4202 key->objectid, key->offset, namelen, name,
4203 imode_to_type(mode));
4204 goto out;
4207 /* Check whether inode_id/filetype/name match */
4208 node = path.nodes[0];
4209 slot = path.slots[0];
4210 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4211 total = btrfs_item_size_nr(node, slot);
4212 while (cur < total) {
4213 ret = DIR_ITEM_MISMATCH;
4214 name_len = btrfs_dir_name_len(node, di);
4215 data_len = btrfs_dir_data_len(node, di);
4217 btrfs_dir_item_key_to_cpu(node, di, &location);
4218 if (location.objectid != ref_key->objectid ||
4219 location.type != BTRFS_INODE_ITEM_KEY ||
4220 location.offset != 0)
4221 goto next;
4223 filetype = btrfs_dir_type(node, di);
4224 if (imode_to_type(mode) != filetype)
4225 goto next;
4227 if (name_len <= BTRFS_NAME_LEN) {
4228 len = name_len;
4229 } else {
4230 len = BTRFS_NAME_LEN;
4231 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4232 root->objectid,
4233 key->type == BTRFS_DIR_ITEM_KEY ?
4234 "DIR_ITEM" : "DIR_INDEX",
4235 key->objectid, key->offset, name_len);
4237 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4238 if (len != namelen || strncmp(namebuf, name, len))
4239 goto next;
4241 ret = 0;
4242 goto out;
4243 next:
4244 len = sizeof(*di) + name_len + data_len;
4245 di = (struct btrfs_dir_item *)((char *)di + len);
4246 cur += len;
4248 if (ret == DIR_ITEM_MISMATCH)
4249 error(
4250 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4251 root->objectid,
4252 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4253 ref_key->objectid, ref_key->offset,
4254 key->type == BTRFS_DIR_ITEM_KEY ?
4255 "DIR_ITEM" : "DIR_INDEX",
4256 key->objectid, key->offset, namelen, name,
4257 imode_to_type(mode));
4258 out:
4259 btrfs_release_path(&path);
4260 return ret;
4264 * Traverse the given INODE_REF and call find_dir_item() to find related
4265 * DIR_ITEM/DIR_INDEX.
4267 * @root: the root of the fs/file tree
4268 * @ref_key: the key of the INODE_REF
4269 * @refs: the count of INODE_REF
4270 * @mode: the st_mode of INODE_ITEM
4272 * Return 0 if no error occurred.
4274 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4275 struct extent_buffer *node, int slot, u64 *refs,
4276 int mode)
4278 struct btrfs_key key;
4279 struct btrfs_inode_ref *ref;
4280 char namebuf[BTRFS_NAME_LEN] = {0};
4281 u32 total;
4282 u32 cur = 0;
4283 u32 len;
4284 u32 name_len;
4285 u64 index;
4286 int ret, err = 0;
4288 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4289 total = btrfs_item_size_nr(node, slot);
4291 next:
4292 /* Update inode ref count */
4293 (*refs)++;
4295 index = btrfs_inode_ref_index(node, ref);
4296 name_len = btrfs_inode_ref_name_len(node, ref);
4297 if (name_len <= BTRFS_NAME_LEN) {
4298 len = name_len;
4299 } else {
4300 len = BTRFS_NAME_LEN;
4301 warning("root %llu INODE_REF[%llu %llu] name too long",
4302 root->objectid, ref_key->objectid, ref_key->offset);
4305 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4307 /* Check root dir ref name */
4308 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4309 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4310 root->objectid, ref_key->objectid, ref_key->offset,
4311 namebuf);
4312 err |= ROOT_DIR_ERROR;
4315 /* Find related DIR_INDEX */
4316 key.objectid = ref_key->offset;
4317 key.type = BTRFS_DIR_INDEX_KEY;
4318 key.offset = index;
4319 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4320 err |= ret;
4322 /* Find related dir_item */
4323 key.objectid = ref_key->offset;
4324 key.type = BTRFS_DIR_ITEM_KEY;
4325 key.offset = btrfs_name_hash(namebuf, len);
4326 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4327 err |= ret;
4329 len = sizeof(*ref) + name_len;
4330 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4331 cur += len;
4332 if (cur < total)
4333 goto next;
4335 return err;
4339 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4340 * DIR_ITEM/DIR_INDEX.
4342 * @root: the root of the fs/file tree
4343 * @ref_key: the key of the INODE_EXTREF
4344 * @refs: the count of INODE_EXTREF
4345 * @mode: the st_mode of INODE_ITEM
4347 * Return 0 if no error occurred.
4349 static int check_inode_extref(struct btrfs_root *root,
4350 struct btrfs_key *ref_key,
4351 struct extent_buffer *node, int slot, u64 *refs,
4352 int mode)
4354 struct btrfs_key key;
4355 struct btrfs_inode_extref *extref;
4356 char namebuf[BTRFS_NAME_LEN] = {0};
4357 u32 total;
4358 u32 cur = 0;
4359 u32 len;
4360 u32 name_len;
4361 u64 index;
4362 u64 parent;
4363 int ret;
4364 int err = 0;
4366 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4367 total = btrfs_item_size_nr(node, slot);
4369 next:
4370 /* update inode ref count */
4371 (*refs)++;
4372 name_len = btrfs_inode_extref_name_len(node, extref);
4373 index = btrfs_inode_extref_index(node, extref);
4374 parent = btrfs_inode_extref_parent(node, extref);
4375 if (name_len <= BTRFS_NAME_LEN) {
4376 len = name_len;
4377 } else {
4378 len = BTRFS_NAME_LEN;
4379 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4380 root->objectid, ref_key->objectid, ref_key->offset);
4382 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4384 /* Check root dir ref name */
4385 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4386 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4387 root->objectid, ref_key->objectid, ref_key->offset,
4388 namebuf);
4389 err |= ROOT_DIR_ERROR;
4392 /* find related dir_index */
4393 key.objectid = parent;
4394 key.type = BTRFS_DIR_INDEX_KEY;
4395 key.offset = index;
4396 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4397 err |= ret;
4399 /* find related dir_item */
4400 key.objectid = parent;
4401 key.type = BTRFS_DIR_ITEM_KEY;
4402 key.offset = btrfs_name_hash(namebuf, len);
4403 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4404 err |= ret;
4406 len = sizeof(*extref) + name_len;
4407 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4408 cur += len;
4410 if (cur < total)
4411 goto next;
4413 return err;
4417 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4418 * DIR_ITEM/DIR_INDEX match.
4420 * @root: the root of the fs/file tree
4421 * @key: the key of the INODE_REF/INODE_EXTREF
4422 * @name: the name in the INODE_REF/INODE_EXTREF
4423 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4424 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4425 * to (u64)-1
4426 * @ext_ref: the EXTENDED_IREF feature
4428 * Return 0 if no error occurred.
4429 * Return >0 for error bitmap
4431 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4432 char *name, int namelen, u64 index,
4433 unsigned int ext_ref)
4435 struct btrfs_path path;
4436 struct btrfs_inode_ref *ref;
4437 struct btrfs_inode_extref *extref;
4438 struct extent_buffer *node;
4439 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4440 u32 total;
4441 u32 cur = 0;
4442 u32 len;
4443 u32 ref_namelen;
4444 u64 ref_index;
4445 u64 parent;
4446 u64 dir_id;
4447 int slot;
4448 int ret;
4450 btrfs_init_path(&path);
4451 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4452 if (ret) {
4453 ret = INODE_REF_MISSING;
4454 goto extref;
4457 node = path.nodes[0];
4458 slot = path.slots[0];
4460 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4461 total = btrfs_item_size_nr(node, slot);
4463 /* Iterate all entry of INODE_REF */
4464 while (cur < total) {
4465 ret = INODE_REF_MISSING;
4467 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4468 ref_index = btrfs_inode_ref_index(node, ref);
4469 if (index != (u64)-1 && index != ref_index)
4470 goto next_ref;
4472 if (ref_namelen <= BTRFS_NAME_LEN) {
4473 len = ref_namelen;
4474 } else {
4475 len = BTRFS_NAME_LEN;
4476 warning("root %llu INODE %s[%llu %llu] name too long",
4477 root->objectid,
4478 key->type == BTRFS_INODE_REF_KEY ?
4479 "REF" : "EXTREF",
4480 key->objectid, key->offset);
4482 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4483 len);
4485 if (len != namelen || strncmp(ref_namebuf, name, len))
4486 goto next_ref;
4488 ret = 0;
4489 goto out;
4490 next_ref:
4491 len = sizeof(*ref) + ref_namelen;
4492 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4493 cur += len;
4496 extref:
4497 /* Skip if not support EXTENDED_IREF feature */
4498 if (!ext_ref)
4499 goto out;
4501 btrfs_release_path(&path);
4502 btrfs_init_path(&path);
4504 dir_id = key->offset;
4505 key->type = BTRFS_INODE_EXTREF_KEY;
4506 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4508 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4509 if (ret) {
4510 ret = INODE_REF_MISSING;
4511 goto out;
4514 node = path.nodes[0];
4515 slot = path.slots[0];
4517 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4518 cur = 0;
4519 total = btrfs_item_size_nr(node, slot);
4521 /* Iterate all entry of INODE_EXTREF */
4522 while (cur < total) {
4523 ret = INODE_REF_MISSING;
4525 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4526 ref_index = btrfs_inode_extref_index(node, extref);
4527 parent = btrfs_inode_extref_parent(node, extref);
4528 if (index != (u64)-1 && index != ref_index)
4529 goto next_extref;
4531 if (parent != dir_id)
4532 goto next_extref;
4534 if (ref_namelen <= BTRFS_NAME_LEN) {
4535 len = ref_namelen;
4536 } else {
4537 len = BTRFS_NAME_LEN;
4538 warning("root %llu INODE %s[%llu %llu] name too long",
4539 root->objectid,
4540 key->type == BTRFS_INODE_REF_KEY ?
4541 "REF" : "EXTREF",
4542 key->objectid, key->offset);
4544 read_extent_buffer(node, ref_namebuf,
4545 (unsigned long)(extref + 1), len);
4547 if (len != namelen || strncmp(ref_namebuf, name, len))
4548 goto next_extref;
4550 ret = 0;
4551 goto out;
4553 next_extref:
4554 len = sizeof(*extref) + ref_namelen;
4555 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4556 cur += len;
4559 out:
4560 btrfs_release_path(&path);
4561 return ret;
4565 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4566 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4568 * @root: the root of the fs/file tree
4569 * @key: the key of the INODE_REF/INODE_EXTREF
4570 * @size: the st_size of the INODE_ITEM
4571 * @ext_ref: the EXTENDED_IREF feature
4573 * Return 0 if no error occurred.
4575 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4576 struct extent_buffer *node, int slot, u64 *size,
4577 unsigned int ext_ref)
4579 struct btrfs_dir_item *di;
4580 struct btrfs_inode_item *ii;
4581 struct btrfs_path path;
4582 struct btrfs_key location;
4583 char namebuf[BTRFS_NAME_LEN] = {0};
4584 u32 total;
4585 u32 cur = 0;
4586 u32 len;
4587 u32 name_len;
4588 u32 data_len;
4589 u8 filetype;
4590 u32 mode;
4591 u64 index;
4592 int ret;
4593 int err = 0;
4596 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4597 * ignore index check.
4599 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4601 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4602 total = btrfs_item_size_nr(node, slot);
4604 while (cur < total) {
4605 data_len = btrfs_dir_data_len(node, di);
4606 if (data_len)
4607 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4608 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4609 "DIR_ITEM" : "DIR_INDEX",
4610 key->objectid, key->offset, data_len);
4612 name_len = btrfs_dir_name_len(node, di);
4613 if (name_len <= BTRFS_NAME_LEN) {
4614 len = name_len;
4615 } else {
4616 len = BTRFS_NAME_LEN;
4617 warning("root %llu %s[%llu %llu] name too long",
4618 root->objectid,
4619 key->type == BTRFS_DIR_ITEM_KEY ?
4620 "DIR_ITEM" : "DIR_INDEX",
4621 key->objectid, key->offset);
4623 (*size) += name_len;
4625 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4626 filetype = btrfs_dir_type(node, di);
4628 btrfs_init_path(&path);
4629 btrfs_dir_item_key_to_cpu(node, di, &location);
4631 /* Ignore related ROOT_ITEM check */
4632 if (location.type == BTRFS_ROOT_ITEM_KEY)
4633 goto next;
4635 /* Check relative INODE_ITEM(existence/filetype) */
4636 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4637 if (ret) {
4638 err |= INODE_ITEM_MISSING;
4639 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4640 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4641 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4642 key->offset, location.objectid, name_len,
4643 namebuf, filetype);
4644 goto next;
4647 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4648 struct btrfs_inode_item);
4649 mode = btrfs_inode_mode(path.nodes[0], ii);
4651 if (imode_to_type(mode) != filetype) {
4652 err |= INODE_ITEM_MISMATCH;
4653 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4654 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4655 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4656 key->offset, name_len, namebuf, filetype);
4659 /* Check relative INODE_REF/INODE_EXTREF */
4660 location.type = BTRFS_INODE_REF_KEY;
4661 location.offset = key->objectid;
4662 ret = find_inode_ref(root, &location, namebuf, len,
4663 index, ext_ref);
4664 err |= ret;
4665 if (ret & INODE_REF_MISSING)
4666 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4667 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4668 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4669 key->offset, name_len, namebuf, filetype);
4671 next:
4672 btrfs_release_path(&path);
4673 len = sizeof(*di) + name_len + data_len;
4674 di = (struct btrfs_dir_item *)((char *)di + len);
4675 cur += len;
4677 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4678 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4679 root->objectid, key->objectid, key->offset);
4680 break;
4684 return err;
4688 * Check file extent datasum/hole, update the size of the file extents,
4689 * check and update the last offset of the file extent.
4691 * @root: the root of fs/file tree.
4692 * @fkey: the key of the file extent.
4693 * @nodatasum: INODE_NODATASUM feature.
4694 * @size: the sum of all EXTENT_DATA items size for this inode.
4695 * @end: the offset of the last extent.
4697 * Return 0 if no error occurred.
4699 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4700 struct extent_buffer *node, int slot,
4701 unsigned int nodatasum, u64 *size, u64 *end)
4703 struct btrfs_file_extent_item *fi;
4704 u64 disk_bytenr;
4705 u64 disk_num_bytes;
4706 u64 extent_num_bytes;
4707 u64 extent_offset;
4708 u64 csum_found; /* In byte size, sectorsize aligned */
4709 u64 search_start; /* Logical range start we search for csum */
4710 u64 search_len; /* Logical range len we search for csum */
4711 unsigned int extent_type;
4712 unsigned int is_hole;
4713 int compressed = 0;
4714 int ret;
4715 int err = 0;
4717 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4719 /* Check inline extent */
4720 extent_type = btrfs_file_extent_type(node, fi);
4721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4722 struct btrfs_item *e = btrfs_item_nr(slot);
4723 u32 item_inline_len;
4725 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4726 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4727 compressed = btrfs_file_extent_compression(node, fi);
4728 if (extent_num_bytes == 0) {
4729 error(
4730 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4731 root->objectid, fkey->objectid, fkey->offset);
4732 err |= FILE_EXTENT_ERROR;
4734 if (!compressed && extent_num_bytes != item_inline_len) {
4735 error(
4736 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4737 root->objectid, fkey->objectid, fkey->offset,
4738 extent_num_bytes, item_inline_len);
4739 err |= FILE_EXTENT_ERROR;
4741 *size += extent_num_bytes;
4742 return err;
4745 /* Check extent type */
4746 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4747 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4748 err |= FILE_EXTENT_ERROR;
4749 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4750 root->objectid, fkey->objectid, fkey->offset);
4751 return err;
4754 /* Check REG_EXTENT/PREALLOC_EXTENT */
4755 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4756 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4757 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4758 extent_offset = btrfs_file_extent_offset(node, fi);
4759 compressed = btrfs_file_extent_compression(node, fi);
4760 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4763 * Check EXTENT_DATA csum
4765 * For plain (uncompressed) extent, we should only check the range
4766 * we're referring to, as it's possible that part of prealloc extent
4767 * has been written, and has csum:
4769 * |<--- Original large preallocated extent A ---->|
4770 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4771 * No csum Has csum
4773 * For compressed extent, we should check the whole range.
4775 if (!compressed) {
4776 search_start = disk_bytenr + extent_offset;
4777 search_len = extent_num_bytes;
4778 } else {
4779 search_start = disk_bytenr;
4780 search_len = disk_num_bytes;
4782 ret = count_csum_range(root, search_start, search_len, &csum_found);
4783 if (csum_found > 0 && nodatasum) {
4784 err |= ODD_CSUM_ITEM;
4785 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4786 root->objectid, fkey->objectid, fkey->offset);
4787 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4788 !is_hole && (ret < 0 || csum_found < search_len)) {
4789 err |= CSUM_ITEM_MISSING;
4790 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4791 root->objectid, fkey->objectid, fkey->offset,
4792 csum_found, search_len);
4793 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4794 err |= ODD_CSUM_ITEM;
4795 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4796 root->objectid, fkey->objectid, fkey->offset, csum_found);
4799 /* Check EXTENT_DATA hole */
4800 if (no_holes && is_hole) {
4801 err |= FILE_EXTENT_ERROR;
4802 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4803 root->objectid, fkey->objectid, fkey->offset);
4804 } else if (!no_holes && *end != fkey->offset) {
4805 err |= FILE_EXTENT_ERROR;
4806 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4807 root->objectid, fkey->objectid, fkey->offset);
4810 *end += extent_num_bytes;
4811 if (!is_hole)
4812 *size += extent_num_bytes;
4814 return err;
4818 * Check INODE_ITEM and related ITEMs (the same inode number)
4819 * 1. check link count
4820 * 2. check inode ref/extref
4821 * 3. check dir item/index
4823 * @ext_ref: the EXTENDED_IREF feature
4825 * Return 0 if no error occurred.
4826 * Return >0 for error or hit the traversal is done(by error bitmap)
4828 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4829 unsigned int ext_ref)
4831 struct extent_buffer *node;
4832 struct btrfs_inode_item *ii;
4833 struct btrfs_key key;
4834 u64 inode_id;
4835 u32 mode;
4836 u64 nlink;
4837 u64 nbytes;
4838 u64 isize;
4839 u64 size = 0;
4840 u64 refs = 0;
4841 u64 extent_end = 0;
4842 u64 extent_size = 0;
4843 unsigned int dir;
4844 unsigned int nodatasum;
4845 int slot;
4846 int ret;
4847 int err = 0;
4849 node = path->nodes[0];
4850 slot = path->slots[0];
4852 btrfs_item_key_to_cpu(node, &key, slot);
4853 inode_id = key.objectid;
4855 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4856 ret = btrfs_next_item(root, path);
4857 if (ret > 0)
4858 err |= LAST_ITEM;
4859 return err;
4862 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4863 isize = btrfs_inode_size(node, ii);
4864 nbytes = btrfs_inode_nbytes(node, ii);
4865 mode = btrfs_inode_mode(node, ii);
4866 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4867 nlink = btrfs_inode_nlink(node, ii);
4868 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4870 while (1) {
4871 ret = btrfs_next_item(root, path);
4872 if (ret < 0) {
4873 /* out will fill 'err' rusing current statistics */
4874 goto out;
4875 } else if (ret > 0) {
4876 err |= LAST_ITEM;
4877 goto out;
4880 node = path->nodes[0];
4881 slot = path->slots[0];
4882 btrfs_item_key_to_cpu(node, &key, slot);
4883 if (key.objectid != inode_id)
4884 goto out;
4886 switch (key.type) {
4887 case BTRFS_INODE_REF_KEY:
4888 ret = check_inode_ref(root, &key, node, slot, &refs,
4889 mode);
4890 err |= ret;
4891 break;
4892 case BTRFS_INODE_EXTREF_KEY:
4893 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4894 warning("root %llu EXTREF[%llu %llu] isn't supported",
4895 root->objectid, key.objectid,
4896 key.offset);
4897 ret = check_inode_extref(root, &key, node, slot, &refs,
4898 mode);
4899 err |= ret;
4900 break;
4901 case BTRFS_DIR_ITEM_KEY:
4902 case BTRFS_DIR_INDEX_KEY:
4903 if (!dir) {
4904 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4905 root->objectid, inode_id,
4906 imode_to_type(mode), key.objectid,
4907 key.offset);
4909 ret = check_dir_item(root, &key, node, slot, &size,
4910 ext_ref);
4911 err |= ret;
4912 break;
4913 case BTRFS_EXTENT_DATA_KEY:
4914 if (dir) {
4915 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4916 root->objectid, inode_id, key.objectid,
4917 key.offset);
4919 ret = check_file_extent(root, &key, node, slot,
4920 nodatasum, &extent_size,
4921 &extent_end);
4922 err |= ret;
4923 break;
4924 case BTRFS_XATTR_ITEM_KEY:
4925 break;
4926 default:
4927 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4928 key.objectid, key.type, key.offset);
4932 out:
4933 /* verify INODE_ITEM nlink/isize/nbytes */
4934 if (dir) {
4935 if (nlink != 1) {
4936 err |= LINK_COUNT_ERROR;
4937 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4938 root->objectid, inode_id, nlink);
4942 * Just a warning, as dir inode nbytes is just an
4943 * instructive value.
4945 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4946 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4947 root->objectid, inode_id, root->nodesize);
4950 if (isize != size) {
4951 err |= ISIZE_ERROR;
4952 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4953 root->objectid, inode_id, isize, size);
4955 } else {
4956 if (nlink != refs) {
4957 err |= LINK_COUNT_ERROR;
4958 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4959 root->objectid, inode_id, nlink, refs);
4960 } else if (!nlink) {
4961 err |= ORPHAN_ITEM;
4964 if (!nbytes && !no_holes && extent_end < isize) {
4965 err |= NBYTES_ERROR;
4966 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4967 root->objectid, inode_id, isize);
4970 if (nbytes != extent_size) {
4971 err |= NBYTES_ERROR;
4972 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4973 root->objectid, inode_id, nbytes, extent_size);
4977 return err;
4980 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4982 struct btrfs_path path;
4983 struct btrfs_key key;
4984 int err = 0;
4985 int ret;
4987 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4988 key.type = BTRFS_INODE_ITEM_KEY;
4989 key.offset = 0;
4991 /* For root being dropped, we don't need to check first inode */
4992 if (btrfs_root_refs(&root->root_item) == 0 &&
4993 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4994 key.objectid)
4995 return 0;
4997 btrfs_init_path(&path);
4999 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5000 if (ret < 0)
5001 goto out;
5002 if (ret > 0) {
5003 ret = 0;
5004 err |= INODE_ITEM_MISSING;
5005 error("first inode item of root %llu is missing",
5006 root->objectid);
5009 err |= check_inode_item(root, &path, ext_ref);
5010 err &= ~LAST_ITEM;
5011 if (err && !ret)
5012 ret = -EIO;
5013 out:
5014 btrfs_release_path(&path);
5015 return ret;
5019 * Iterate all item on the tree and call check_inode_item() to check.
5021 * @root: the root of the tree to be checked.
5022 * @ext_ref: the EXTENDED_IREF feature
5024 * Return 0 if no error found.
5025 * Return <0 for error.
5027 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5029 struct btrfs_path path;
5030 struct node_refs nrefs;
5031 struct btrfs_root_item *root_item = &root->root_item;
5032 int ret;
5033 int level;
5034 int err = 0;
5037 * We need to manually check the first inode item(256)
5038 * As the following traversal function will only start from
5039 * the first inode item in the leaf, if inode item(256) is missing
5040 * we will just skip it forever.
5042 ret = check_fs_first_inode(root, ext_ref);
5043 if (ret < 0)
5044 return ret;
5046 memset(&nrefs, 0, sizeof(nrefs));
5047 level = btrfs_header_level(root->node);
5048 btrfs_init_path(&path);
5050 if (btrfs_root_refs(root_item) > 0 ||
5051 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5052 path.nodes[level] = root->node;
5053 path.slots[level] = 0;
5054 extent_buffer_get(root->node);
5055 } else {
5056 struct btrfs_key key;
5058 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5059 level = root_item->drop_level;
5060 path.lowest_level = level;
5061 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5062 if (ret < 0)
5063 goto out;
5064 ret = 0;
5067 while (1) {
5068 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5069 err |= !!ret;
5071 /* if ret is negative, walk shall stop */
5072 if (ret < 0) {
5073 ret = err;
5074 break;
5077 ret = walk_up_tree_v2(root, &path, &level);
5078 if (ret != 0) {
5079 /* Normal exit, reset ret to err */
5080 ret = err;
5081 break;
5085 out:
5086 btrfs_release_path(&path);
5087 return ret;
5091 * Find the relative ref for root_ref and root_backref.
5093 * @root: the root of the root tree.
5094 * @ref_key: the key of the root ref.
5096 * Return 0 if no error occurred.
5098 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5099 struct extent_buffer *node, int slot)
5101 struct btrfs_path path;
5102 struct btrfs_key key;
5103 struct btrfs_root_ref *ref;
5104 struct btrfs_root_ref *backref;
5105 char ref_name[BTRFS_NAME_LEN] = {0};
5106 char backref_name[BTRFS_NAME_LEN] = {0};
5107 u64 ref_dirid;
5108 u64 ref_seq;
5109 u32 ref_namelen;
5110 u64 backref_dirid;
5111 u64 backref_seq;
5112 u32 backref_namelen;
5113 u32 len;
5114 int ret;
5115 int err = 0;
5117 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5118 ref_dirid = btrfs_root_ref_dirid(node, ref);
5119 ref_seq = btrfs_root_ref_sequence(node, ref);
5120 ref_namelen = btrfs_root_ref_name_len(node, ref);
5122 if (ref_namelen <= BTRFS_NAME_LEN) {
5123 len = ref_namelen;
5124 } else {
5125 len = BTRFS_NAME_LEN;
5126 warning("%s[%llu %llu] ref_name too long",
5127 ref_key->type == BTRFS_ROOT_REF_KEY ?
5128 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5129 ref_key->offset);
5131 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5133 /* Find relative root_ref */
5134 key.objectid = ref_key->offset;
5135 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5136 key.offset = ref_key->objectid;
5138 btrfs_init_path(&path);
5139 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5140 if (ret) {
5141 err |= ROOT_REF_MISSING;
5142 error("%s[%llu %llu] couldn't find relative ref",
5143 ref_key->type == BTRFS_ROOT_REF_KEY ?
5144 "ROOT_REF" : "ROOT_BACKREF",
5145 ref_key->objectid, ref_key->offset);
5146 goto out;
5149 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5150 struct btrfs_root_ref);
5151 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5152 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5153 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5155 if (backref_namelen <= BTRFS_NAME_LEN) {
5156 len = backref_namelen;
5157 } else {
5158 len = BTRFS_NAME_LEN;
5159 warning("%s[%llu %llu] ref_name too long",
5160 key.type == BTRFS_ROOT_REF_KEY ?
5161 "ROOT_REF" : "ROOT_BACKREF",
5162 key.objectid, key.offset);
5164 read_extent_buffer(path.nodes[0], backref_name,
5165 (unsigned long)(backref + 1), len);
5167 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5168 ref_namelen != backref_namelen ||
5169 strncmp(ref_name, backref_name, len)) {
5170 err |= ROOT_REF_MISMATCH;
5171 error("%s[%llu %llu] mismatch relative ref",
5172 ref_key->type == BTRFS_ROOT_REF_KEY ?
5173 "ROOT_REF" : "ROOT_BACKREF",
5174 ref_key->objectid, ref_key->offset);
5176 out:
5177 btrfs_release_path(&path);
5178 return err;
5182 * Check all fs/file tree in low_memory mode.
5184 * 1. for fs tree root item, call check_fs_root_v2()
5185 * 2. for fs tree root ref/backref, call check_root_ref()
5187 * Return 0 if no error occurred.
5189 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5191 struct btrfs_root *tree_root = fs_info->tree_root;
5192 struct btrfs_root *cur_root = NULL;
5193 struct btrfs_path path;
5194 struct btrfs_key key;
5195 struct extent_buffer *node;
5196 unsigned int ext_ref;
5197 int slot;
5198 int ret;
5199 int err = 0;
5201 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5203 btrfs_init_path(&path);
5204 key.objectid = BTRFS_FS_TREE_OBJECTID;
5205 key.offset = 0;
5206 key.type = BTRFS_ROOT_ITEM_KEY;
5208 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5209 if (ret < 0) {
5210 err = ret;
5211 goto out;
5212 } else if (ret > 0) {
5213 err = -ENOENT;
5214 goto out;
5217 while (1) {
5218 node = path.nodes[0];
5219 slot = path.slots[0];
5220 btrfs_item_key_to_cpu(node, &key, slot);
5221 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5222 goto out;
5223 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5224 fs_root_objectid(key.objectid)) {
5225 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5226 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5227 &key);
5228 } else {
5229 key.offset = (u64)-1;
5230 cur_root = btrfs_read_fs_root(fs_info, &key);
5233 if (IS_ERR(cur_root)) {
5234 error("Fail to read fs/subvol tree: %lld",
5235 key.objectid);
5236 err = -EIO;
5237 goto next;
5240 ret = check_fs_root_v2(cur_root, ext_ref);
5241 err |= ret;
5243 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5244 btrfs_free_fs_root(cur_root);
5245 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5246 key.type == BTRFS_ROOT_BACKREF_KEY) {
5247 ret = check_root_ref(tree_root, &key, node, slot);
5248 err |= ret;
5250 next:
5251 ret = btrfs_next_item(tree_root, &path);
5252 if (ret > 0)
5253 goto out;
5254 if (ret < 0) {
5255 err = ret;
5256 goto out;
5260 out:
5261 btrfs_release_path(&path);
5262 return err;
5265 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5267 struct list_head *cur = rec->backrefs.next;
5268 struct extent_backref *back;
5269 struct tree_backref *tback;
5270 struct data_backref *dback;
5271 u64 found = 0;
5272 int err = 0;
5274 while(cur != &rec->backrefs) {
5275 back = to_extent_backref(cur);
5276 cur = cur->next;
5277 if (!back->found_extent_tree) {
5278 err = 1;
5279 if (!print_errs)
5280 goto out;
5281 if (back->is_data) {
5282 dback = to_data_backref(back);
5283 fprintf(stderr, "Backref %llu %s %llu"
5284 " owner %llu offset %llu num_refs %lu"
5285 " not found in extent tree\n",
5286 (unsigned long long)rec->start,
5287 back->full_backref ?
5288 "parent" : "root",
5289 back->full_backref ?
5290 (unsigned long long)dback->parent:
5291 (unsigned long long)dback->root,
5292 (unsigned long long)dback->owner,
5293 (unsigned long long)dback->offset,
5294 (unsigned long)dback->num_refs);
5295 } else {
5296 tback = to_tree_backref(back);
5297 fprintf(stderr, "Backref %llu parent %llu"
5298 " root %llu not found in extent tree\n",
5299 (unsigned long long)rec->start,
5300 (unsigned long long)tback->parent,
5301 (unsigned long long)tback->root);
5304 if (!back->is_data && !back->found_ref) {
5305 err = 1;
5306 if (!print_errs)
5307 goto out;
5308 tback = to_tree_backref(back);
5309 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5310 (unsigned long long)rec->start,
5311 back->full_backref ? "parent" : "root",
5312 back->full_backref ?
5313 (unsigned long long)tback->parent :
5314 (unsigned long long)tback->root, back);
5316 if (back->is_data) {
5317 dback = to_data_backref(back);
5318 if (dback->found_ref != dback->num_refs) {
5319 err = 1;
5320 if (!print_errs)
5321 goto out;
5322 fprintf(stderr, "Incorrect local backref count"
5323 " on %llu %s %llu owner %llu"
5324 " offset %llu found %u wanted %u back %p\n",
5325 (unsigned long long)rec->start,
5326 back->full_backref ?
5327 "parent" : "root",
5328 back->full_backref ?
5329 (unsigned long long)dback->parent:
5330 (unsigned long long)dback->root,
5331 (unsigned long long)dback->owner,
5332 (unsigned long long)dback->offset,
5333 dback->found_ref, dback->num_refs, back);
5335 if (dback->disk_bytenr != rec->start) {
5336 err = 1;
5337 if (!print_errs)
5338 goto out;
5339 fprintf(stderr, "Backref disk bytenr does not"
5340 " match extent record, bytenr=%llu, "
5341 "ref bytenr=%llu\n",
5342 (unsigned long long)rec->start,
5343 (unsigned long long)dback->disk_bytenr);
5346 if (dback->bytes != rec->nr) {
5347 err = 1;
5348 if (!print_errs)
5349 goto out;
5350 fprintf(stderr, "Backref bytes do not match "
5351 "extent backref, bytenr=%llu, ref "
5352 "bytes=%llu, backref bytes=%llu\n",
5353 (unsigned long long)rec->start,
5354 (unsigned long long)rec->nr,
5355 (unsigned long long)dback->bytes);
5358 if (!back->is_data) {
5359 found += 1;
5360 } else {
5361 dback = to_data_backref(back);
5362 found += dback->found_ref;
5365 if (found != rec->refs) {
5366 err = 1;
5367 if (!print_errs)
5368 goto out;
5369 fprintf(stderr, "Incorrect global backref count "
5370 "on %llu found %llu wanted %llu\n",
5371 (unsigned long long)rec->start,
5372 (unsigned long long)found,
5373 (unsigned long long)rec->refs);
5375 out:
5376 return err;
5379 static int free_all_extent_backrefs(struct extent_record *rec)
5381 struct extent_backref *back;
5382 struct list_head *cur;
5383 while (!list_empty(&rec->backrefs)) {
5384 cur = rec->backrefs.next;
5385 back = to_extent_backref(cur);
5386 list_del(cur);
5387 free(back);
5389 return 0;
5392 static void free_extent_record_cache(struct cache_tree *extent_cache)
5394 struct cache_extent *cache;
5395 struct extent_record *rec;
5397 while (1) {
5398 cache = first_cache_extent(extent_cache);
5399 if (!cache)
5400 break;
5401 rec = container_of(cache, struct extent_record, cache);
5402 remove_cache_extent(extent_cache, cache);
5403 free_all_extent_backrefs(rec);
5404 free(rec);
5408 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5409 struct extent_record *rec)
5411 if (rec->content_checked && rec->owner_ref_checked &&
5412 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5413 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5414 !rec->bad_full_backref && !rec->crossing_stripes &&
5415 !rec->wrong_chunk_type) {
5416 remove_cache_extent(extent_cache, &rec->cache);
5417 free_all_extent_backrefs(rec);
5418 list_del_init(&rec->list);
5419 free(rec);
5421 return 0;
5424 static int check_owner_ref(struct btrfs_root *root,
5425 struct extent_record *rec,
5426 struct extent_buffer *buf)
5428 struct extent_backref *node;
5429 struct tree_backref *back;
5430 struct btrfs_root *ref_root;
5431 struct btrfs_key key;
5432 struct btrfs_path path;
5433 struct extent_buffer *parent;
5434 int level;
5435 int found = 0;
5436 int ret;
5438 list_for_each_entry(node, &rec->backrefs, list) {
5439 if (node->is_data)
5440 continue;
5441 if (!node->found_ref)
5442 continue;
5443 if (node->full_backref)
5444 continue;
5445 back = to_tree_backref(node);
5446 if (btrfs_header_owner(buf) == back->root)
5447 return 0;
5449 BUG_ON(rec->is_root);
5451 /* try to find the block by search corresponding fs tree */
5452 key.objectid = btrfs_header_owner(buf);
5453 key.type = BTRFS_ROOT_ITEM_KEY;
5454 key.offset = (u64)-1;
5456 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5457 if (IS_ERR(ref_root))
5458 return 1;
5460 level = btrfs_header_level(buf);
5461 if (level == 0)
5462 btrfs_item_key_to_cpu(buf, &key, 0);
5463 else
5464 btrfs_node_key_to_cpu(buf, &key, 0);
5466 btrfs_init_path(&path);
5467 path.lowest_level = level + 1;
5468 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5469 if (ret < 0)
5470 return 0;
5472 parent = path.nodes[level + 1];
5473 if (parent && buf->start == btrfs_node_blockptr(parent,
5474 path.slots[level + 1]))
5475 found = 1;
5477 btrfs_release_path(&path);
5478 return found ? 0 : 1;
5481 static int is_extent_tree_record(struct extent_record *rec)
5483 struct list_head *cur = rec->backrefs.next;
5484 struct extent_backref *node;
5485 struct tree_backref *back;
5486 int is_extent = 0;
5488 while(cur != &rec->backrefs) {
5489 node = to_extent_backref(cur);
5490 cur = cur->next;
5491 if (node->is_data)
5492 return 0;
5493 back = to_tree_backref(node);
5494 if (node->full_backref)
5495 return 0;
5496 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5497 is_extent = 1;
5499 return is_extent;
5503 static int record_bad_block_io(struct btrfs_fs_info *info,
5504 struct cache_tree *extent_cache,
5505 u64 start, u64 len)
5507 struct extent_record *rec;
5508 struct cache_extent *cache;
5509 struct btrfs_key key;
5511 cache = lookup_cache_extent(extent_cache, start, len);
5512 if (!cache)
5513 return 0;
5515 rec = container_of(cache, struct extent_record, cache);
5516 if (!is_extent_tree_record(rec))
5517 return 0;
5519 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5520 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5523 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5524 struct extent_buffer *buf, int slot)
5526 if (btrfs_header_level(buf)) {
5527 struct btrfs_key_ptr ptr1, ptr2;
5529 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5530 sizeof(struct btrfs_key_ptr));
5531 read_extent_buffer(buf, &ptr2,
5532 btrfs_node_key_ptr_offset(slot + 1),
5533 sizeof(struct btrfs_key_ptr));
5534 write_extent_buffer(buf, &ptr1,
5535 btrfs_node_key_ptr_offset(slot + 1),
5536 sizeof(struct btrfs_key_ptr));
5537 write_extent_buffer(buf, &ptr2,
5538 btrfs_node_key_ptr_offset(slot),
5539 sizeof(struct btrfs_key_ptr));
5540 if (slot == 0) {
5541 struct btrfs_disk_key key;
5542 btrfs_node_key(buf, &key, 0);
5543 btrfs_fixup_low_keys(root, path, &key,
5544 btrfs_header_level(buf) + 1);
5546 } else {
5547 struct btrfs_item *item1, *item2;
5548 struct btrfs_key k1, k2;
5549 char *item1_data, *item2_data;
5550 u32 item1_offset, item2_offset, item1_size, item2_size;
5552 item1 = btrfs_item_nr(slot);
5553 item2 = btrfs_item_nr(slot + 1);
5554 btrfs_item_key_to_cpu(buf, &k1, slot);
5555 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5556 item1_offset = btrfs_item_offset(buf, item1);
5557 item2_offset = btrfs_item_offset(buf, item2);
5558 item1_size = btrfs_item_size(buf, item1);
5559 item2_size = btrfs_item_size(buf, item2);
5561 item1_data = malloc(item1_size);
5562 if (!item1_data)
5563 return -ENOMEM;
5564 item2_data = malloc(item2_size);
5565 if (!item2_data) {
5566 free(item1_data);
5567 return -ENOMEM;
5570 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5571 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5573 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5574 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5575 free(item1_data);
5576 free(item2_data);
5578 btrfs_set_item_offset(buf, item1, item2_offset);
5579 btrfs_set_item_offset(buf, item2, item1_offset);
5580 btrfs_set_item_size(buf, item1, item2_size);
5581 btrfs_set_item_size(buf, item2, item1_size);
5583 path->slots[0] = slot;
5584 btrfs_set_item_key_unsafe(root, path, &k2);
5585 path->slots[0] = slot + 1;
5586 btrfs_set_item_key_unsafe(root, path, &k1);
5588 return 0;
5591 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5593 struct extent_buffer *buf;
5594 struct btrfs_key k1, k2;
5595 int i;
5596 int level = path->lowest_level;
5597 int ret = -EIO;
5599 buf = path->nodes[level];
5600 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5601 if (level) {
5602 btrfs_node_key_to_cpu(buf, &k1, i);
5603 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5604 } else {
5605 btrfs_item_key_to_cpu(buf, &k1, i);
5606 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5608 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5609 continue;
5610 ret = swap_values(root, path, buf, i);
5611 if (ret)
5612 break;
5613 btrfs_mark_buffer_dirty(buf);
5614 i = 0;
5616 return ret;
5619 static int delete_bogus_item(struct btrfs_root *root,
5620 struct btrfs_path *path,
5621 struct extent_buffer *buf, int slot)
5623 struct btrfs_key key;
5624 int nritems = btrfs_header_nritems(buf);
5626 btrfs_item_key_to_cpu(buf, &key, slot);
5628 /* These are all the keys we can deal with missing. */
5629 if (key.type != BTRFS_DIR_INDEX_KEY &&
5630 key.type != BTRFS_EXTENT_ITEM_KEY &&
5631 key.type != BTRFS_METADATA_ITEM_KEY &&
5632 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5633 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5634 return -1;
5636 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5637 (unsigned long long)key.objectid, key.type,
5638 (unsigned long long)key.offset, slot, buf->start);
5639 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5640 btrfs_item_nr_offset(slot + 1),
5641 sizeof(struct btrfs_item) *
5642 (nritems - slot - 1));
5643 btrfs_set_header_nritems(buf, nritems - 1);
5644 if (slot == 0) {
5645 struct btrfs_disk_key disk_key;
5647 btrfs_item_key(buf, &disk_key, 0);
5648 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5650 btrfs_mark_buffer_dirty(buf);
5651 return 0;
5654 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5656 struct extent_buffer *buf;
5657 int i;
5658 int ret = 0;
5660 /* We should only get this for leaves */
5661 BUG_ON(path->lowest_level);
5662 buf = path->nodes[0];
5663 again:
5664 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5665 unsigned int shift = 0, offset;
5667 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5668 BTRFS_LEAF_DATA_SIZE(root)) {
5669 if (btrfs_item_end_nr(buf, i) >
5670 BTRFS_LEAF_DATA_SIZE(root)) {
5671 ret = delete_bogus_item(root, path, buf, i);
5672 if (!ret)
5673 goto again;
5674 fprintf(stderr, "item is off the end of the "
5675 "leaf, can't fix\n");
5676 ret = -EIO;
5677 break;
5679 shift = BTRFS_LEAF_DATA_SIZE(root) -
5680 btrfs_item_end_nr(buf, i);
5681 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5682 btrfs_item_offset_nr(buf, i - 1)) {
5683 if (btrfs_item_end_nr(buf, i) >
5684 btrfs_item_offset_nr(buf, i - 1)) {
5685 ret = delete_bogus_item(root, path, buf, i);
5686 if (!ret)
5687 goto again;
5688 fprintf(stderr, "items overlap, can't fix\n");
5689 ret = -EIO;
5690 break;
5692 shift = btrfs_item_offset_nr(buf, i - 1) -
5693 btrfs_item_end_nr(buf, i);
5695 if (!shift)
5696 continue;
5698 printf("Shifting item nr %d by %u bytes in block %llu\n",
5699 i, shift, (unsigned long long)buf->start);
5700 offset = btrfs_item_offset_nr(buf, i);
5701 memmove_extent_buffer(buf,
5702 btrfs_leaf_data(buf) + offset + shift,
5703 btrfs_leaf_data(buf) + offset,
5704 btrfs_item_size_nr(buf, i));
5705 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5706 offset + shift);
5707 btrfs_mark_buffer_dirty(buf);
5711 * We may have moved things, in which case we want to exit so we don't
5712 * write those changes out. Once we have proper abort functionality in
5713 * progs this can be changed to something nicer.
5715 BUG_ON(ret);
5716 return ret;
5720 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5721 * then just return -EIO.
5723 static int try_to_fix_bad_block(struct btrfs_root *root,
5724 struct extent_buffer *buf,
5725 enum btrfs_tree_block_status status)
5727 struct btrfs_trans_handle *trans;
5728 struct ulist *roots;
5729 struct ulist_node *node;
5730 struct btrfs_root *search_root;
5731 struct btrfs_path path;
5732 struct ulist_iterator iter;
5733 struct btrfs_key root_key, key;
5734 int ret;
5736 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5737 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5738 return -EIO;
5740 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5741 if (ret)
5742 return -EIO;
5744 btrfs_init_path(&path);
5745 ULIST_ITER_INIT(&iter);
5746 while ((node = ulist_next(roots, &iter))) {
5747 root_key.objectid = node->val;
5748 root_key.type = BTRFS_ROOT_ITEM_KEY;
5749 root_key.offset = (u64)-1;
5751 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5752 if (IS_ERR(root)) {
5753 ret = -EIO;
5754 break;
5758 trans = btrfs_start_transaction(search_root, 0);
5759 if (IS_ERR(trans)) {
5760 ret = PTR_ERR(trans);
5761 break;
5764 path.lowest_level = btrfs_header_level(buf);
5765 path.skip_check_block = 1;
5766 if (path.lowest_level)
5767 btrfs_node_key_to_cpu(buf, &key, 0);
5768 else
5769 btrfs_item_key_to_cpu(buf, &key, 0);
5770 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5771 if (ret) {
5772 ret = -EIO;
5773 btrfs_commit_transaction(trans, search_root);
5774 break;
5776 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5777 ret = fix_key_order(search_root, &path);
5778 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5779 ret = fix_item_offset(search_root, &path);
5780 if (ret) {
5781 btrfs_commit_transaction(trans, search_root);
5782 break;
5784 btrfs_release_path(&path);
5785 btrfs_commit_transaction(trans, search_root);
5787 ulist_free(roots);
5788 btrfs_release_path(&path);
5789 return ret;
5792 static int check_block(struct btrfs_root *root,
5793 struct cache_tree *extent_cache,
5794 struct extent_buffer *buf, u64 flags)
5796 struct extent_record *rec;
5797 struct cache_extent *cache;
5798 struct btrfs_key key;
5799 enum btrfs_tree_block_status status;
5800 int ret = 0;
5801 int level;
5803 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5804 if (!cache)
5805 return 1;
5806 rec = container_of(cache, struct extent_record, cache);
5807 rec->generation = btrfs_header_generation(buf);
5809 level = btrfs_header_level(buf);
5810 if (btrfs_header_nritems(buf) > 0) {
5812 if (level == 0)
5813 btrfs_item_key_to_cpu(buf, &key, 0);
5814 else
5815 btrfs_node_key_to_cpu(buf, &key, 0);
5817 rec->info_objectid = key.objectid;
5819 rec->info_level = level;
5821 if (btrfs_is_leaf(buf))
5822 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5823 else
5824 status = btrfs_check_node(root, &rec->parent_key, buf);
5826 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5827 if (repair)
5828 status = try_to_fix_bad_block(root, buf, status);
5829 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5830 ret = -EIO;
5831 fprintf(stderr, "bad block %llu\n",
5832 (unsigned long long)buf->start);
5833 } else {
5835 * Signal to callers we need to start the scan over
5836 * again since we'll have cowed blocks.
5838 ret = -EAGAIN;
5840 } else {
5841 rec->content_checked = 1;
5842 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5843 rec->owner_ref_checked = 1;
5844 else {
5845 ret = check_owner_ref(root, rec, buf);
5846 if (!ret)
5847 rec->owner_ref_checked = 1;
5850 if (!ret)
5851 maybe_free_extent_rec(extent_cache, rec);
5852 return ret;
5855 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5856 u64 parent, u64 root)
5858 struct list_head *cur = rec->backrefs.next;
5859 struct extent_backref *node;
5860 struct tree_backref *back;
5862 while(cur != &rec->backrefs) {
5863 node = to_extent_backref(cur);
5864 cur = cur->next;
5865 if (node->is_data)
5866 continue;
5867 back = to_tree_backref(node);
5868 if (parent > 0) {
5869 if (!node->full_backref)
5870 continue;
5871 if (parent == back->parent)
5872 return back;
5873 } else {
5874 if (node->full_backref)
5875 continue;
5876 if (back->root == root)
5877 return back;
5880 return NULL;
5883 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5884 u64 parent, u64 root)
5886 struct tree_backref *ref = malloc(sizeof(*ref));
5888 if (!ref)
5889 return NULL;
5890 memset(&ref->node, 0, sizeof(ref->node));
5891 if (parent > 0) {
5892 ref->parent = parent;
5893 ref->node.full_backref = 1;
5894 } else {
5895 ref->root = root;
5896 ref->node.full_backref = 0;
5898 list_add_tail(&ref->node.list, &rec->backrefs);
5900 return ref;
5903 static struct data_backref *find_data_backref(struct extent_record *rec,
5904 u64 parent, u64 root,
5905 u64 owner, u64 offset,
5906 int found_ref,
5907 u64 disk_bytenr, u64 bytes)
5909 struct list_head *cur = rec->backrefs.next;
5910 struct extent_backref *node;
5911 struct data_backref *back;
5913 while(cur != &rec->backrefs) {
5914 node = to_extent_backref(cur);
5915 cur = cur->next;
5916 if (!node->is_data)
5917 continue;
5918 back = to_data_backref(node);
5919 if (parent > 0) {
5920 if (!node->full_backref)
5921 continue;
5922 if (parent == back->parent)
5923 return back;
5924 } else {
5925 if (node->full_backref)
5926 continue;
5927 if (back->root == root && back->owner == owner &&
5928 back->offset == offset) {
5929 if (found_ref && node->found_ref &&
5930 (back->bytes != bytes ||
5931 back->disk_bytenr != disk_bytenr))
5932 continue;
5933 return back;
5937 return NULL;
5940 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5941 u64 parent, u64 root,
5942 u64 owner, u64 offset,
5943 u64 max_size)
5945 struct data_backref *ref = malloc(sizeof(*ref));
5947 if (!ref)
5948 return NULL;
5949 memset(&ref->node, 0, sizeof(ref->node));
5950 ref->node.is_data = 1;
5952 if (parent > 0) {
5953 ref->parent = parent;
5954 ref->owner = 0;
5955 ref->offset = 0;
5956 ref->node.full_backref = 1;
5957 } else {
5958 ref->root = root;
5959 ref->owner = owner;
5960 ref->offset = offset;
5961 ref->node.full_backref = 0;
5963 ref->bytes = max_size;
5964 ref->found_ref = 0;
5965 ref->num_refs = 0;
5966 list_add_tail(&ref->node.list, &rec->backrefs);
5967 if (max_size > rec->max_size)
5968 rec->max_size = max_size;
5969 return ref;
5972 /* Check if the type of extent matches with its chunk */
5973 static void check_extent_type(struct extent_record *rec)
5975 struct btrfs_block_group_cache *bg_cache;
5977 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5978 if (!bg_cache)
5979 return;
5981 /* data extent, check chunk directly*/
5982 if (!rec->metadata) {
5983 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5984 rec->wrong_chunk_type = 1;
5985 return;
5988 /* metadata extent, check the obvious case first */
5989 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5990 BTRFS_BLOCK_GROUP_METADATA))) {
5991 rec->wrong_chunk_type = 1;
5992 return;
5996 * Check SYSTEM extent, as it's also marked as metadata, we can only
5997 * make sure it's a SYSTEM extent by its backref
5999 if (!list_empty(&rec->backrefs)) {
6000 struct extent_backref *node;
6001 struct tree_backref *tback;
6002 u64 bg_type;
6004 node = to_extent_backref(rec->backrefs.next);
6005 if (node->is_data) {
6006 /* tree block shouldn't have data backref */
6007 rec->wrong_chunk_type = 1;
6008 return;
6010 tback = container_of(node, struct tree_backref, node);
6012 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6013 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6014 else
6015 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6016 if (!(bg_cache->flags & bg_type))
6017 rec->wrong_chunk_type = 1;
6022 * Allocate a new extent record, fill default values from @tmpl and insert int
6023 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6024 * the cache, otherwise it fails.
6026 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6027 struct extent_record *tmpl)
6029 struct extent_record *rec;
6030 int ret = 0;
6032 rec = malloc(sizeof(*rec));
6033 if (!rec)
6034 return -ENOMEM;
6035 rec->start = tmpl->start;
6036 rec->max_size = tmpl->max_size;
6037 rec->nr = max(tmpl->nr, tmpl->max_size);
6038 rec->found_rec = tmpl->found_rec;
6039 rec->content_checked = tmpl->content_checked;
6040 rec->owner_ref_checked = tmpl->owner_ref_checked;
6041 rec->num_duplicates = 0;
6042 rec->metadata = tmpl->metadata;
6043 rec->flag_block_full_backref = FLAG_UNSET;
6044 rec->bad_full_backref = 0;
6045 rec->crossing_stripes = 0;
6046 rec->wrong_chunk_type = 0;
6047 rec->is_root = tmpl->is_root;
6048 rec->refs = tmpl->refs;
6049 rec->extent_item_refs = tmpl->extent_item_refs;
6050 rec->parent_generation = tmpl->parent_generation;
6051 INIT_LIST_HEAD(&rec->backrefs);
6052 INIT_LIST_HEAD(&rec->dups);
6053 INIT_LIST_HEAD(&rec->list);
6054 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6055 rec->cache.start = tmpl->start;
6056 rec->cache.size = tmpl->nr;
6057 ret = insert_cache_extent(extent_cache, &rec->cache);
6058 if (ret) {
6059 free(rec);
6060 return ret;
6062 bytes_used += rec->nr;
6064 if (tmpl->metadata)
6065 rec->crossing_stripes = check_crossing_stripes(global_info,
6066 rec->start, global_info->tree_root->nodesize);
6067 check_extent_type(rec);
6068 return ret;
6072 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6073 * some are hints:
6074 * - refs - if found, increase refs
6075 * - is_root - if found, set
6076 * - content_checked - if found, set
6077 * - owner_ref_checked - if found, set
6079 * If not found, create a new one, initialize and insert.
6081 static int add_extent_rec(struct cache_tree *extent_cache,
6082 struct extent_record *tmpl)
6084 struct extent_record *rec;
6085 struct cache_extent *cache;
6086 int ret = 0;
6087 int dup = 0;
6089 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6090 if (cache) {
6091 rec = container_of(cache, struct extent_record, cache);
6092 if (tmpl->refs)
6093 rec->refs++;
6094 if (rec->nr == 1)
6095 rec->nr = max(tmpl->nr, tmpl->max_size);
6098 * We need to make sure to reset nr to whatever the extent
6099 * record says was the real size, this way we can compare it to
6100 * the backrefs.
6102 if (tmpl->found_rec) {
6103 if (tmpl->start != rec->start || rec->found_rec) {
6104 struct extent_record *tmp;
6106 dup = 1;
6107 if (list_empty(&rec->list))
6108 list_add_tail(&rec->list,
6109 &duplicate_extents);
6112 * We have to do this song and dance in case we
6113 * find an extent record that falls inside of
6114 * our current extent record but does not have
6115 * the same objectid.
6117 tmp = malloc(sizeof(*tmp));
6118 if (!tmp)
6119 return -ENOMEM;
6120 tmp->start = tmpl->start;
6121 tmp->max_size = tmpl->max_size;
6122 tmp->nr = tmpl->nr;
6123 tmp->found_rec = 1;
6124 tmp->metadata = tmpl->metadata;
6125 tmp->extent_item_refs = tmpl->extent_item_refs;
6126 INIT_LIST_HEAD(&tmp->list);
6127 list_add_tail(&tmp->list, &rec->dups);
6128 rec->num_duplicates++;
6129 } else {
6130 rec->nr = tmpl->nr;
6131 rec->found_rec = 1;
6135 if (tmpl->extent_item_refs && !dup) {
6136 if (rec->extent_item_refs) {
6137 fprintf(stderr, "block %llu rec "
6138 "extent_item_refs %llu, passed %llu\n",
6139 (unsigned long long)tmpl->start,
6140 (unsigned long long)
6141 rec->extent_item_refs,
6142 (unsigned long long)tmpl->extent_item_refs);
6144 rec->extent_item_refs = tmpl->extent_item_refs;
6146 if (tmpl->is_root)
6147 rec->is_root = 1;
6148 if (tmpl->content_checked)
6149 rec->content_checked = 1;
6150 if (tmpl->owner_ref_checked)
6151 rec->owner_ref_checked = 1;
6152 memcpy(&rec->parent_key, &tmpl->parent_key,
6153 sizeof(tmpl->parent_key));
6154 if (tmpl->parent_generation)
6155 rec->parent_generation = tmpl->parent_generation;
6156 if (rec->max_size < tmpl->max_size)
6157 rec->max_size = tmpl->max_size;
6160 * A metadata extent can't cross stripe_len boundary, otherwise
6161 * kernel scrub won't be able to handle it.
6162 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6163 * it.
6165 if (tmpl->metadata)
6166 rec->crossing_stripes = check_crossing_stripes(
6167 global_info, rec->start,
6168 global_info->tree_root->nodesize);
6169 check_extent_type(rec);
6170 maybe_free_extent_rec(extent_cache, rec);
6171 return ret;
6174 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6176 return ret;
6179 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6180 u64 parent, u64 root, int found_ref)
6182 struct extent_record *rec;
6183 struct tree_backref *back;
6184 struct cache_extent *cache;
6185 int ret;
6187 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6188 if (!cache) {
6189 struct extent_record tmpl;
6191 memset(&tmpl, 0, sizeof(tmpl));
6192 tmpl.start = bytenr;
6193 tmpl.nr = 1;
6194 tmpl.metadata = 1;
6196 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6197 if (ret)
6198 return ret;
6200 /* really a bug in cache_extent implement now */
6201 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6202 if (!cache)
6203 return -ENOENT;
6206 rec = container_of(cache, struct extent_record, cache);
6207 if (rec->start != bytenr) {
6209 * Several cause, from unaligned bytenr to over lapping extents
6211 return -EEXIST;
6214 back = find_tree_backref(rec, parent, root);
6215 if (!back) {
6216 back = alloc_tree_backref(rec, parent, root);
6217 if (!back)
6218 return -ENOMEM;
6221 if (found_ref) {
6222 if (back->node.found_ref) {
6223 fprintf(stderr, "Extent back ref already exists "
6224 "for %llu parent %llu root %llu \n",
6225 (unsigned long long)bytenr,
6226 (unsigned long long)parent,
6227 (unsigned long long)root);
6229 back->node.found_ref = 1;
6230 } else {
6231 if (back->node.found_extent_tree) {
6232 fprintf(stderr, "Extent back ref already exists "
6233 "for %llu parent %llu root %llu \n",
6234 (unsigned long long)bytenr,
6235 (unsigned long long)parent,
6236 (unsigned long long)root);
6238 back->node.found_extent_tree = 1;
6240 check_extent_type(rec);
6241 maybe_free_extent_rec(extent_cache, rec);
6242 return 0;
6245 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6246 u64 parent, u64 root, u64 owner, u64 offset,
6247 u32 num_refs, int found_ref, u64 max_size)
6249 struct extent_record *rec;
6250 struct data_backref *back;
6251 struct cache_extent *cache;
6252 int ret;
6254 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6255 if (!cache) {
6256 struct extent_record tmpl;
6258 memset(&tmpl, 0, sizeof(tmpl));
6259 tmpl.start = bytenr;
6260 tmpl.nr = 1;
6261 tmpl.max_size = max_size;
6263 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6264 if (ret)
6265 return ret;
6267 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6268 if (!cache)
6269 abort();
6272 rec = container_of(cache, struct extent_record, cache);
6273 if (rec->max_size < max_size)
6274 rec->max_size = max_size;
6277 * If found_ref is set then max_size is the real size and must match the
6278 * existing refs. So if we have already found a ref then we need to
6279 * make sure that this ref matches the existing one, otherwise we need
6280 * to add a new backref so we can notice that the backrefs don't match
6281 * and we need to figure out who is telling the truth. This is to
6282 * account for that awful fsync bug I introduced where we'd end up with
6283 * a btrfs_file_extent_item that would have its length include multiple
6284 * prealloc extents or point inside of a prealloc extent.
6286 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6287 bytenr, max_size);
6288 if (!back) {
6289 back = alloc_data_backref(rec, parent, root, owner, offset,
6290 max_size);
6291 BUG_ON(!back);
6294 if (found_ref) {
6295 BUG_ON(num_refs != 1);
6296 if (back->node.found_ref)
6297 BUG_ON(back->bytes != max_size);
6298 back->node.found_ref = 1;
6299 back->found_ref += 1;
6300 back->bytes = max_size;
6301 back->disk_bytenr = bytenr;
6302 rec->refs += 1;
6303 rec->content_checked = 1;
6304 rec->owner_ref_checked = 1;
6305 } else {
6306 if (back->node.found_extent_tree) {
6307 fprintf(stderr, "Extent back ref already exists "
6308 "for %llu parent %llu root %llu "
6309 "owner %llu offset %llu num_refs %lu\n",
6310 (unsigned long long)bytenr,
6311 (unsigned long long)parent,
6312 (unsigned long long)root,
6313 (unsigned long long)owner,
6314 (unsigned long long)offset,
6315 (unsigned long)num_refs);
6317 back->num_refs = num_refs;
6318 back->node.found_extent_tree = 1;
6320 maybe_free_extent_rec(extent_cache, rec);
6321 return 0;
6324 static int add_pending(struct cache_tree *pending,
6325 struct cache_tree *seen, u64 bytenr, u32 size)
6327 int ret;
6328 ret = add_cache_extent(seen, bytenr, size);
6329 if (ret)
6330 return ret;
6331 add_cache_extent(pending, bytenr, size);
6332 return 0;
6335 static int pick_next_pending(struct cache_tree *pending,
6336 struct cache_tree *reada,
6337 struct cache_tree *nodes,
6338 u64 last, struct block_info *bits, int bits_nr,
6339 int *reada_bits)
6341 unsigned long node_start = last;
6342 struct cache_extent *cache;
6343 int ret;
6345 cache = search_cache_extent(reada, 0);
6346 if (cache) {
6347 bits[0].start = cache->start;
6348 bits[0].size = cache->size;
6349 *reada_bits = 1;
6350 return 1;
6352 *reada_bits = 0;
6353 if (node_start > 32768)
6354 node_start -= 32768;
6356 cache = search_cache_extent(nodes, node_start);
6357 if (!cache)
6358 cache = search_cache_extent(nodes, 0);
6360 if (!cache) {
6361 cache = search_cache_extent(pending, 0);
6362 if (!cache)
6363 return 0;
6364 ret = 0;
6365 do {
6366 bits[ret].start = cache->start;
6367 bits[ret].size = cache->size;
6368 cache = next_cache_extent(cache);
6369 ret++;
6370 } while (cache && ret < bits_nr);
6371 return ret;
6374 ret = 0;
6375 do {
6376 bits[ret].start = cache->start;
6377 bits[ret].size = cache->size;
6378 cache = next_cache_extent(cache);
6379 ret++;
6380 } while (cache && ret < bits_nr);
6382 if (bits_nr - ret > 8) {
6383 u64 lookup = bits[0].start + bits[0].size;
6384 struct cache_extent *next;
6385 next = search_cache_extent(pending, lookup);
6386 while(next) {
6387 if (next->start - lookup > 32768)
6388 break;
6389 bits[ret].start = next->start;
6390 bits[ret].size = next->size;
6391 lookup = next->start + next->size;
6392 ret++;
6393 if (ret == bits_nr)
6394 break;
6395 next = next_cache_extent(next);
6396 if (!next)
6397 break;
6400 return ret;
6403 static void free_chunk_record(struct cache_extent *cache)
6405 struct chunk_record *rec;
6407 rec = container_of(cache, struct chunk_record, cache);
6408 list_del_init(&rec->list);
6409 list_del_init(&rec->dextents);
6410 free(rec);
6413 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6415 cache_tree_free_extents(chunk_cache, free_chunk_record);
6418 static void free_device_record(struct rb_node *node)
6420 struct device_record *rec;
6422 rec = container_of(node, struct device_record, node);
6423 free(rec);
6426 FREE_RB_BASED_TREE(device_cache, free_device_record);
6428 int insert_block_group_record(struct block_group_tree *tree,
6429 struct block_group_record *bg_rec)
6431 int ret;
6433 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6434 if (ret)
6435 return ret;
6437 list_add_tail(&bg_rec->list, &tree->block_groups);
6438 return 0;
6441 static void free_block_group_record(struct cache_extent *cache)
6443 struct block_group_record *rec;
6445 rec = container_of(cache, struct block_group_record, cache);
6446 list_del_init(&rec->list);
6447 free(rec);
6450 void free_block_group_tree(struct block_group_tree *tree)
6452 cache_tree_free_extents(&tree->tree, free_block_group_record);
6455 int insert_device_extent_record(struct device_extent_tree *tree,
6456 struct device_extent_record *de_rec)
6458 int ret;
6461 * Device extent is a bit different from the other extents, because
6462 * the extents which belong to the different devices may have the
6463 * same start and size, so we need use the special extent cache
6464 * search/insert functions.
6466 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6467 if (ret)
6468 return ret;
6470 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6471 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6472 return 0;
6475 static void free_device_extent_record(struct cache_extent *cache)
6477 struct device_extent_record *rec;
6479 rec = container_of(cache, struct device_extent_record, cache);
6480 if (!list_empty(&rec->chunk_list))
6481 list_del_init(&rec->chunk_list);
6482 if (!list_empty(&rec->device_list))
6483 list_del_init(&rec->device_list);
6484 free(rec);
6487 void free_device_extent_tree(struct device_extent_tree *tree)
6489 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6492 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6493 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6494 struct extent_buffer *leaf, int slot)
6496 struct btrfs_extent_ref_v0 *ref0;
6497 struct btrfs_key key;
6498 int ret;
6500 btrfs_item_key_to_cpu(leaf, &key, slot);
6501 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6502 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6503 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6504 0, 0);
6505 } else {
6506 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6507 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6509 return ret;
6511 #endif
6513 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6514 struct btrfs_key *key,
6515 int slot)
6517 struct btrfs_chunk *ptr;
6518 struct chunk_record *rec;
6519 int num_stripes, i;
6521 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6522 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6524 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6525 if (!rec) {
6526 fprintf(stderr, "memory allocation failed\n");
6527 exit(-1);
6530 INIT_LIST_HEAD(&rec->list);
6531 INIT_LIST_HEAD(&rec->dextents);
6532 rec->bg_rec = NULL;
6534 rec->cache.start = key->offset;
6535 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6537 rec->generation = btrfs_header_generation(leaf);
6539 rec->objectid = key->objectid;
6540 rec->type = key->type;
6541 rec->offset = key->offset;
6543 rec->length = rec->cache.size;
6544 rec->owner = btrfs_chunk_owner(leaf, ptr);
6545 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6546 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6547 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6548 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6549 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6550 rec->num_stripes = num_stripes;
6551 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6553 for (i = 0; i < rec->num_stripes; ++i) {
6554 rec->stripes[i].devid =
6555 btrfs_stripe_devid_nr(leaf, ptr, i);
6556 rec->stripes[i].offset =
6557 btrfs_stripe_offset_nr(leaf, ptr, i);
6558 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6559 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6560 BTRFS_UUID_SIZE);
6563 return rec;
6566 static int process_chunk_item(struct cache_tree *chunk_cache,
6567 struct btrfs_key *key, struct extent_buffer *eb,
6568 int slot)
6570 struct chunk_record *rec;
6571 struct btrfs_chunk *chunk;
6572 int ret = 0;
6574 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6576 * Do extra check for this chunk item,
6578 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6579 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6580 * and owner<->key_type check.
6582 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6583 key->offset);
6584 if (ret < 0) {
6585 error("chunk(%llu, %llu) is not valid, ignore it",
6586 key->offset, btrfs_chunk_length(eb, chunk));
6587 return 0;
6589 rec = btrfs_new_chunk_record(eb, key, slot);
6590 ret = insert_cache_extent(chunk_cache, &rec->cache);
6591 if (ret) {
6592 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6593 rec->offset, rec->length);
6594 free(rec);
6597 return ret;
6600 static int process_device_item(struct rb_root *dev_cache,
6601 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6603 struct btrfs_dev_item *ptr;
6604 struct device_record *rec;
6605 int ret = 0;
6607 ptr = btrfs_item_ptr(eb,
6608 slot, struct btrfs_dev_item);
6610 rec = malloc(sizeof(*rec));
6611 if (!rec) {
6612 fprintf(stderr, "memory allocation failed\n");
6613 return -ENOMEM;
6616 rec->devid = key->offset;
6617 rec->generation = btrfs_header_generation(eb);
6619 rec->objectid = key->objectid;
6620 rec->type = key->type;
6621 rec->offset = key->offset;
6623 rec->devid = btrfs_device_id(eb, ptr);
6624 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6625 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6627 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6628 if (ret) {
6629 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6630 free(rec);
6633 return ret;
6636 struct block_group_record *
6637 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6638 int slot)
6640 struct btrfs_block_group_item *ptr;
6641 struct block_group_record *rec;
6643 rec = calloc(1, sizeof(*rec));
6644 if (!rec) {
6645 fprintf(stderr, "memory allocation failed\n");
6646 exit(-1);
6649 rec->cache.start = key->objectid;
6650 rec->cache.size = key->offset;
6652 rec->generation = btrfs_header_generation(leaf);
6654 rec->objectid = key->objectid;
6655 rec->type = key->type;
6656 rec->offset = key->offset;
6658 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6659 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6661 INIT_LIST_HEAD(&rec->list);
6663 return rec;
6666 static int process_block_group_item(struct block_group_tree *block_group_cache,
6667 struct btrfs_key *key,
6668 struct extent_buffer *eb, int slot)
6670 struct block_group_record *rec;
6671 int ret = 0;
6673 rec = btrfs_new_block_group_record(eb, key, slot);
6674 ret = insert_block_group_record(block_group_cache, rec);
6675 if (ret) {
6676 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6677 rec->objectid, rec->offset);
6678 free(rec);
6681 return ret;
6684 struct device_extent_record *
6685 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6686 struct btrfs_key *key, int slot)
6688 struct device_extent_record *rec;
6689 struct btrfs_dev_extent *ptr;
6691 rec = calloc(1, sizeof(*rec));
6692 if (!rec) {
6693 fprintf(stderr, "memory allocation failed\n");
6694 exit(-1);
6697 rec->cache.objectid = key->objectid;
6698 rec->cache.start = key->offset;
6700 rec->generation = btrfs_header_generation(leaf);
6702 rec->objectid = key->objectid;
6703 rec->type = key->type;
6704 rec->offset = key->offset;
6706 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6707 rec->chunk_objecteid =
6708 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6709 rec->chunk_offset =
6710 btrfs_dev_extent_chunk_offset(leaf, ptr);
6711 rec->length = btrfs_dev_extent_length(leaf, ptr);
6712 rec->cache.size = rec->length;
6714 INIT_LIST_HEAD(&rec->chunk_list);
6715 INIT_LIST_HEAD(&rec->device_list);
6717 return rec;
6720 static int
6721 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6722 struct btrfs_key *key, struct extent_buffer *eb,
6723 int slot)
6725 struct device_extent_record *rec;
6726 int ret;
6728 rec = btrfs_new_device_extent_record(eb, key, slot);
6729 ret = insert_device_extent_record(dev_extent_cache, rec);
6730 if (ret) {
6731 fprintf(stderr,
6732 "Device extent[%llu, %llu, %llu] existed.\n",
6733 rec->objectid, rec->offset, rec->length);
6734 free(rec);
6737 return ret;
6740 static int process_extent_item(struct btrfs_root *root,
6741 struct cache_tree *extent_cache,
6742 struct extent_buffer *eb, int slot)
6744 struct btrfs_extent_item *ei;
6745 struct btrfs_extent_inline_ref *iref;
6746 struct btrfs_extent_data_ref *dref;
6747 struct btrfs_shared_data_ref *sref;
6748 struct btrfs_key key;
6749 struct extent_record tmpl;
6750 unsigned long end;
6751 unsigned long ptr;
6752 int ret;
6753 int type;
6754 u32 item_size = btrfs_item_size_nr(eb, slot);
6755 u64 refs = 0;
6756 u64 offset;
6757 u64 num_bytes;
6758 int metadata = 0;
6760 btrfs_item_key_to_cpu(eb, &key, slot);
6762 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6763 metadata = 1;
6764 num_bytes = root->nodesize;
6765 } else {
6766 num_bytes = key.offset;
6769 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6770 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6771 key.objectid, root->sectorsize);
6772 return -EIO;
6774 if (item_size < sizeof(*ei)) {
6775 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6776 struct btrfs_extent_item_v0 *ei0;
6777 BUG_ON(item_size != sizeof(*ei0));
6778 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6779 refs = btrfs_extent_refs_v0(eb, ei0);
6780 #else
6781 BUG();
6782 #endif
6783 memset(&tmpl, 0, sizeof(tmpl));
6784 tmpl.start = key.objectid;
6785 tmpl.nr = num_bytes;
6786 tmpl.extent_item_refs = refs;
6787 tmpl.metadata = metadata;
6788 tmpl.found_rec = 1;
6789 tmpl.max_size = num_bytes;
6791 return add_extent_rec(extent_cache, &tmpl);
6794 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6795 refs = btrfs_extent_refs(eb, ei);
6796 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6797 metadata = 1;
6798 else
6799 metadata = 0;
6800 if (metadata && num_bytes != root->nodesize) {
6801 error("ignore invalid metadata extent, length %llu does not equal to %u",
6802 num_bytes, root->nodesize);
6803 return -EIO;
6805 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6806 error("ignore invalid data extent, length %llu is not aligned to %u",
6807 num_bytes, root->sectorsize);
6808 return -EIO;
6811 memset(&tmpl, 0, sizeof(tmpl));
6812 tmpl.start = key.objectid;
6813 tmpl.nr = num_bytes;
6814 tmpl.extent_item_refs = refs;
6815 tmpl.metadata = metadata;
6816 tmpl.found_rec = 1;
6817 tmpl.max_size = num_bytes;
6818 add_extent_rec(extent_cache, &tmpl);
6820 ptr = (unsigned long)(ei + 1);
6821 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6822 key.type == BTRFS_EXTENT_ITEM_KEY)
6823 ptr += sizeof(struct btrfs_tree_block_info);
6825 end = (unsigned long)ei + item_size;
6826 while (ptr < end) {
6827 iref = (struct btrfs_extent_inline_ref *)ptr;
6828 type = btrfs_extent_inline_ref_type(eb, iref);
6829 offset = btrfs_extent_inline_ref_offset(eb, iref);
6830 switch (type) {
6831 case BTRFS_TREE_BLOCK_REF_KEY:
6832 ret = add_tree_backref(extent_cache, key.objectid,
6833 0, offset, 0);
6834 if (ret < 0)
6835 error("add_tree_backref failed: %s",
6836 strerror(-ret));
6837 break;
6838 case BTRFS_SHARED_BLOCK_REF_KEY:
6839 ret = add_tree_backref(extent_cache, key.objectid,
6840 offset, 0, 0);
6841 if (ret < 0)
6842 error("add_tree_backref failed: %s",
6843 strerror(-ret));
6844 break;
6845 case BTRFS_EXTENT_DATA_REF_KEY:
6846 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6847 add_data_backref(extent_cache, key.objectid, 0,
6848 btrfs_extent_data_ref_root(eb, dref),
6849 btrfs_extent_data_ref_objectid(eb,
6850 dref),
6851 btrfs_extent_data_ref_offset(eb, dref),
6852 btrfs_extent_data_ref_count(eb, dref),
6853 0, num_bytes);
6854 break;
6855 case BTRFS_SHARED_DATA_REF_KEY:
6856 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6857 add_data_backref(extent_cache, key.objectid, offset,
6858 0, 0, 0,
6859 btrfs_shared_data_ref_count(eb, sref),
6860 0, num_bytes);
6861 break;
6862 default:
6863 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6864 key.objectid, key.type, num_bytes);
6865 goto out;
6867 ptr += btrfs_extent_inline_ref_size(type);
6869 WARN_ON(ptr > end);
6870 out:
6871 return 0;
6874 static int check_cache_range(struct btrfs_root *root,
6875 struct btrfs_block_group_cache *cache,
6876 u64 offset, u64 bytes)
6878 struct btrfs_free_space *entry;
6879 u64 *logical;
6880 u64 bytenr;
6881 int stripe_len;
6882 int i, nr, ret;
6884 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6885 bytenr = btrfs_sb_offset(i);
6886 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6887 cache->key.objectid, bytenr, 0,
6888 &logical, &nr, &stripe_len);
6889 if (ret)
6890 return ret;
6892 while (nr--) {
6893 if (logical[nr] + stripe_len <= offset)
6894 continue;
6895 if (offset + bytes <= logical[nr])
6896 continue;
6897 if (logical[nr] == offset) {
6898 if (stripe_len >= bytes) {
6899 free(logical);
6900 return 0;
6902 bytes -= stripe_len;
6903 offset += stripe_len;
6904 } else if (logical[nr] < offset) {
6905 if (logical[nr] + stripe_len >=
6906 offset + bytes) {
6907 free(logical);
6908 return 0;
6910 bytes = (offset + bytes) -
6911 (logical[nr] + stripe_len);
6912 offset = logical[nr] + stripe_len;
6913 } else {
6915 * Could be tricky, the super may land in the
6916 * middle of the area we're checking. First
6917 * check the easiest case, it's at the end.
6919 if (logical[nr] + stripe_len >=
6920 bytes + offset) {
6921 bytes = logical[nr] - offset;
6922 continue;
6925 /* Check the left side */
6926 ret = check_cache_range(root, cache,
6927 offset,
6928 logical[nr] - offset);
6929 if (ret) {
6930 free(logical);
6931 return ret;
6934 /* Now we continue with the right side */
6935 bytes = (offset + bytes) -
6936 (logical[nr] + stripe_len);
6937 offset = logical[nr] + stripe_len;
6941 free(logical);
6944 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6945 if (!entry) {
6946 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6947 offset, offset+bytes);
6948 return -EINVAL;
6951 if (entry->offset != offset) {
6952 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6953 entry->offset);
6954 return -EINVAL;
6957 if (entry->bytes != bytes) {
6958 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6959 bytes, entry->bytes, offset);
6960 return -EINVAL;
6963 unlink_free_space(cache->free_space_ctl, entry);
6964 free(entry);
6965 return 0;
6968 static int verify_space_cache(struct btrfs_root *root,
6969 struct btrfs_block_group_cache *cache)
6971 struct btrfs_path path;
6972 struct extent_buffer *leaf;
6973 struct btrfs_key key;
6974 u64 last;
6975 int ret = 0;
6977 root = root->fs_info->extent_root;
6979 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6981 btrfs_init_path(&path);
6982 key.objectid = last;
6983 key.offset = 0;
6984 key.type = BTRFS_EXTENT_ITEM_KEY;
6985 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6986 if (ret < 0)
6987 goto out;
6988 ret = 0;
6989 while (1) {
6990 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6991 ret = btrfs_next_leaf(root, &path);
6992 if (ret < 0)
6993 goto out;
6994 if (ret > 0) {
6995 ret = 0;
6996 break;
6999 leaf = path.nodes[0];
7000 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7001 if (key.objectid >= cache->key.offset + cache->key.objectid)
7002 break;
7003 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7004 key.type != BTRFS_METADATA_ITEM_KEY) {
7005 path.slots[0]++;
7006 continue;
7009 if (last == key.objectid) {
7010 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7011 last = key.objectid + key.offset;
7012 else
7013 last = key.objectid + root->nodesize;
7014 path.slots[0]++;
7015 continue;
7018 ret = check_cache_range(root, cache, last,
7019 key.objectid - last);
7020 if (ret)
7021 break;
7022 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7023 last = key.objectid + key.offset;
7024 else
7025 last = key.objectid + root->nodesize;
7026 path.slots[0]++;
7029 if (last < cache->key.objectid + cache->key.offset)
7030 ret = check_cache_range(root, cache, last,
7031 cache->key.objectid +
7032 cache->key.offset - last);
7034 out:
7035 btrfs_release_path(&path);
7037 if (!ret &&
7038 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7039 fprintf(stderr, "There are still entries left in the space "
7040 "cache\n");
7041 ret = -EINVAL;
7044 return ret;
7047 static int check_space_cache(struct btrfs_root *root)
7049 struct btrfs_block_group_cache *cache;
7050 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7051 int ret;
7052 int error = 0;
7054 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7055 btrfs_super_generation(root->fs_info->super_copy) !=
7056 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7057 printf("cache and super generation don't match, space cache "
7058 "will be invalidated\n");
7059 return 0;
7062 if (ctx.progress_enabled) {
7063 ctx.tp = TASK_FREE_SPACE;
7064 task_start(ctx.info);
7067 while (1) {
7068 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7069 if (!cache)
7070 break;
7072 start = cache->key.objectid + cache->key.offset;
7073 if (!cache->free_space_ctl) {
7074 if (btrfs_init_free_space_ctl(cache,
7075 root->sectorsize)) {
7076 ret = -ENOMEM;
7077 break;
7079 } else {
7080 btrfs_remove_free_space_cache(cache);
7083 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7084 ret = exclude_super_stripes(root, cache);
7085 if (ret) {
7086 fprintf(stderr, "could not exclude super stripes: %s\n",
7087 strerror(-ret));
7088 error++;
7089 continue;
7091 ret = load_free_space_tree(root->fs_info, cache);
7092 free_excluded_extents(root, cache);
7093 if (ret < 0) {
7094 fprintf(stderr, "could not load free space tree: %s\n",
7095 strerror(-ret));
7096 error++;
7097 continue;
7099 error += ret;
7100 } else {
7101 ret = load_free_space_cache(root->fs_info, cache);
7102 if (!ret)
7103 continue;
7106 ret = verify_space_cache(root, cache);
7107 if (ret) {
7108 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7109 cache->key.objectid);
7110 error++;
7114 task_stop(ctx.info);
7116 return error ? -EINVAL : 0;
7119 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7120 u64 num_bytes, unsigned long leaf_offset,
7121 struct extent_buffer *eb) {
7123 u64 offset = 0;
7124 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7125 char *data;
7126 unsigned long csum_offset;
7127 u32 csum;
7128 u32 csum_expected;
7129 u64 read_len;
7130 u64 data_checked = 0;
7131 u64 tmp;
7132 int ret = 0;
7133 int mirror;
7134 int num_copies;
7136 if (num_bytes % root->sectorsize)
7137 return -EINVAL;
7139 data = malloc(num_bytes);
7140 if (!data)
7141 return -ENOMEM;
7143 while (offset < num_bytes) {
7144 mirror = 0;
7145 again:
7146 read_len = num_bytes - offset;
7147 /* read as much space once a time */
7148 ret = read_extent_data(root, data + offset,
7149 bytenr + offset, &read_len, mirror);
7150 if (ret)
7151 goto out;
7152 data_checked = 0;
7153 /* verify every 4k data's checksum */
7154 while (data_checked < read_len) {
7155 csum = ~(u32)0;
7156 tmp = offset + data_checked;
7158 csum = btrfs_csum_data((char *)data + tmp,
7159 csum, root->sectorsize);
7160 btrfs_csum_final(csum, (u8 *)&csum);
7162 csum_offset = leaf_offset +
7163 tmp / root->sectorsize * csum_size;
7164 read_extent_buffer(eb, (char *)&csum_expected,
7165 csum_offset, csum_size);
7166 /* try another mirror */
7167 if (csum != csum_expected) {
7168 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7169 mirror, bytenr + tmp,
7170 csum, csum_expected);
7171 num_copies = btrfs_num_copies(
7172 &root->fs_info->mapping_tree,
7173 bytenr, num_bytes);
7174 if (mirror < num_copies - 1) {
7175 mirror += 1;
7176 goto again;
7179 data_checked += root->sectorsize;
7181 offset += read_len;
7183 out:
7184 free(data);
7185 return ret;
7188 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7189 u64 num_bytes)
7191 struct btrfs_path path;
7192 struct extent_buffer *leaf;
7193 struct btrfs_key key;
7194 int ret;
7196 btrfs_init_path(&path);
7197 key.objectid = bytenr;
7198 key.type = BTRFS_EXTENT_ITEM_KEY;
7199 key.offset = (u64)-1;
7201 again:
7202 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7203 0, 0);
7204 if (ret < 0) {
7205 fprintf(stderr, "Error looking up extent record %d\n", ret);
7206 btrfs_release_path(&path);
7207 return ret;
7208 } else if (ret) {
7209 if (path.slots[0] > 0) {
7210 path.slots[0]--;
7211 } else {
7212 ret = btrfs_prev_leaf(root, &path);
7213 if (ret < 0) {
7214 goto out;
7215 } else if (ret > 0) {
7216 ret = 0;
7217 goto out;
7222 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7225 * Block group items come before extent items if they have the same
7226 * bytenr, so walk back one more just in case. Dear future traveller,
7227 * first congrats on mastering time travel. Now if it's not too much
7228 * trouble could you go back to 2006 and tell Chris to make the
7229 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7230 * EXTENT_ITEM_KEY please?
7232 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7233 if (path.slots[0] > 0) {
7234 path.slots[0]--;
7235 } else {
7236 ret = btrfs_prev_leaf(root, &path);
7237 if (ret < 0) {
7238 goto out;
7239 } else if (ret > 0) {
7240 ret = 0;
7241 goto out;
7244 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7247 while (num_bytes) {
7248 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7249 ret = btrfs_next_leaf(root, &path);
7250 if (ret < 0) {
7251 fprintf(stderr, "Error going to next leaf "
7252 "%d\n", ret);
7253 btrfs_release_path(&path);
7254 return ret;
7255 } else if (ret) {
7256 break;
7259 leaf = path.nodes[0];
7260 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7261 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7262 path.slots[0]++;
7263 continue;
7265 if (key.objectid + key.offset < bytenr) {
7266 path.slots[0]++;
7267 continue;
7269 if (key.objectid > bytenr + num_bytes)
7270 break;
7272 if (key.objectid == bytenr) {
7273 if (key.offset >= num_bytes) {
7274 num_bytes = 0;
7275 break;
7277 num_bytes -= key.offset;
7278 bytenr += key.offset;
7279 } else if (key.objectid < bytenr) {
7280 if (key.objectid + key.offset >= bytenr + num_bytes) {
7281 num_bytes = 0;
7282 break;
7284 num_bytes = (bytenr + num_bytes) -
7285 (key.objectid + key.offset);
7286 bytenr = key.objectid + key.offset;
7287 } else {
7288 if (key.objectid + key.offset < bytenr + num_bytes) {
7289 u64 new_start = key.objectid + key.offset;
7290 u64 new_bytes = bytenr + num_bytes - new_start;
7293 * Weird case, the extent is in the middle of
7294 * our range, we'll have to search one side
7295 * and then the other. Not sure if this happens
7296 * in real life, but no harm in coding it up
7297 * anyway just in case.
7299 btrfs_release_path(&path);
7300 ret = check_extent_exists(root, new_start,
7301 new_bytes);
7302 if (ret) {
7303 fprintf(stderr, "Right section didn't "
7304 "have a record\n");
7305 break;
7307 num_bytes = key.objectid - bytenr;
7308 goto again;
7310 num_bytes = key.objectid - bytenr;
7312 path.slots[0]++;
7314 ret = 0;
7316 out:
7317 if (num_bytes && !ret) {
7318 fprintf(stderr, "There are no extents for csum range "
7319 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7320 ret = 1;
7323 btrfs_release_path(&path);
7324 return ret;
7327 static int check_csums(struct btrfs_root *root)
7329 struct btrfs_path path;
7330 struct extent_buffer *leaf;
7331 struct btrfs_key key;
7332 u64 offset = 0, num_bytes = 0;
7333 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7334 int errors = 0;
7335 int ret;
7336 u64 data_len;
7337 unsigned long leaf_offset;
7339 root = root->fs_info->csum_root;
7340 if (!extent_buffer_uptodate(root->node)) {
7341 fprintf(stderr, "No valid csum tree found\n");
7342 return -ENOENT;
7345 btrfs_init_path(&path);
7346 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7347 key.type = BTRFS_EXTENT_CSUM_KEY;
7348 key.offset = 0;
7349 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7350 if (ret < 0) {
7351 fprintf(stderr, "Error searching csum tree %d\n", ret);
7352 btrfs_release_path(&path);
7353 return ret;
7356 if (ret > 0 && path.slots[0])
7357 path.slots[0]--;
7358 ret = 0;
7360 while (1) {
7361 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7362 ret = btrfs_next_leaf(root, &path);
7363 if (ret < 0) {
7364 fprintf(stderr, "Error going to next leaf "
7365 "%d\n", ret);
7366 break;
7368 if (ret)
7369 break;
7371 leaf = path.nodes[0];
7373 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7374 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7375 path.slots[0]++;
7376 continue;
7379 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7380 csum_size) * root->sectorsize;
7381 if (!check_data_csum)
7382 goto skip_csum_check;
7383 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7384 ret = check_extent_csums(root, key.offset, data_len,
7385 leaf_offset, leaf);
7386 if (ret)
7387 break;
7388 skip_csum_check:
7389 if (!num_bytes) {
7390 offset = key.offset;
7391 } else if (key.offset != offset + num_bytes) {
7392 ret = check_extent_exists(root, offset, num_bytes);
7393 if (ret) {
7394 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7395 "there is no extent record\n",
7396 offset, offset+num_bytes);
7397 errors++;
7399 offset = key.offset;
7400 num_bytes = 0;
7402 num_bytes += data_len;
7403 path.slots[0]++;
7406 btrfs_release_path(&path);
7407 return errors;
7410 static int is_dropped_key(struct btrfs_key *key,
7411 struct btrfs_key *drop_key) {
7412 if (key->objectid < drop_key->objectid)
7413 return 1;
7414 else if (key->objectid == drop_key->objectid) {
7415 if (key->type < drop_key->type)
7416 return 1;
7417 else if (key->type == drop_key->type) {
7418 if (key->offset < drop_key->offset)
7419 return 1;
7422 return 0;
7426 * Here are the rules for FULL_BACKREF.
7428 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7429 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7430 * FULL_BACKREF set.
7431 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7432 * if it happened after the relocation occurred since we'll have dropped the
7433 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7434 * have no real way to know for sure.
7436 * We process the blocks one root at a time, and we start from the lowest root
7437 * objectid and go to the highest. So we can just lookup the owner backref for
7438 * the record and if we don't find it then we know it doesn't exist and we have
7439 * a FULL BACKREF.
7441 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7442 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7443 * be set or not and then we can check later once we've gathered all the refs.
7445 static int calc_extent_flag(struct cache_tree *extent_cache,
7446 struct extent_buffer *buf,
7447 struct root_item_record *ri,
7448 u64 *flags)
7450 struct extent_record *rec;
7451 struct cache_extent *cache;
7452 struct tree_backref *tback;
7453 u64 owner = 0;
7455 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7456 /* we have added this extent before */
7457 if (!cache)
7458 return -ENOENT;
7460 rec = container_of(cache, struct extent_record, cache);
7463 * Except file/reloc tree, we can not have
7464 * FULL BACKREF MODE
7466 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7467 goto normal;
7469 * root node
7471 if (buf->start == ri->bytenr)
7472 goto normal;
7474 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7475 goto full_backref;
7477 owner = btrfs_header_owner(buf);
7478 if (owner == ri->objectid)
7479 goto normal;
7481 tback = find_tree_backref(rec, 0, owner);
7482 if (!tback)
7483 goto full_backref;
7484 normal:
7485 *flags = 0;
7486 if (rec->flag_block_full_backref != FLAG_UNSET &&
7487 rec->flag_block_full_backref != 0)
7488 rec->bad_full_backref = 1;
7489 return 0;
7490 full_backref:
7491 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7492 if (rec->flag_block_full_backref != FLAG_UNSET &&
7493 rec->flag_block_full_backref != 1)
7494 rec->bad_full_backref = 1;
7495 return 0;
7498 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7500 fprintf(stderr, "Invalid key type(");
7501 print_key_type(stderr, 0, key_type);
7502 fprintf(stderr, ") found in root(");
7503 print_objectid(stderr, rootid, 0);
7504 fprintf(stderr, ")\n");
7508 * Check if the key is valid with its extent buffer.
7510 * This is a early check in case invalid key exists in a extent buffer
7511 * This is not comprehensive yet, but should prevent wrong key/item passed
7512 * further
7514 static int check_type_with_root(u64 rootid, u8 key_type)
7516 switch (key_type) {
7517 /* Only valid in chunk tree */
7518 case BTRFS_DEV_ITEM_KEY:
7519 case BTRFS_CHUNK_ITEM_KEY:
7520 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7521 goto err;
7522 break;
7523 /* valid in csum and log tree */
7524 case BTRFS_CSUM_TREE_OBJECTID:
7525 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7526 is_fstree(rootid)))
7527 goto err;
7528 break;
7529 case BTRFS_EXTENT_ITEM_KEY:
7530 case BTRFS_METADATA_ITEM_KEY:
7531 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7532 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7533 goto err;
7534 break;
7535 case BTRFS_ROOT_ITEM_KEY:
7536 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7537 goto err;
7538 break;
7539 case BTRFS_DEV_EXTENT_KEY:
7540 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7541 goto err;
7542 break;
7544 return 0;
7545 err:
7546 report_mismatch_key_root(key_type, rootid);
7547 return -EINVAL;
7550 static int run_next_block(struct btrfs_root *root,
7551 struct block_info *bits,
7552 int bits_nr,
7553 u64 *last,
7554 struct cache_tree *pending,
7555 struct cache_tree *seen,
7556 struct cache_tree *reada,
7557 struct cache_tree *nodes,
7558 struct cache_tree *extent_cache,
7559 struct cache_tree *chunk_cache,
7560 struct rb_root *dev_cache,
7561 struct block_group_tree *block_group_cache,
7562 struct device_extent_tree *dev_extent_cache,
7563 struct root_item_record *ri)
7565 struct extent_buffer *buf;
7566 struct extent_record *rec = NULL;
7567 u64 bytenr;
7568 u32 size;
7569 u64 parent;
7570 u64 owner;
7571 u64 flags;
7572 u64 ptr;
7573 u64 gen = 0;
7574 int ret = 0;
7575 int i;
7576 int nritems;
7577 struct btrfs_key key;
7578 struct cache_extent *cache;
7579 int reada_bits;
7581 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7582 bits_nr, &reada_bits);
7583 if (nritems == 0)
7584 return 1;
7586 if (!reada_bits) {
7587 for(i = 0; i < nritems; i++) {
7588 ret = add_cache_extent(reada, bits[i].start,
7589 bits[i].size);
7590 if (ret == -EEXIST)
7591 continue;
7593 /* fixme, get the parent transid */
7594 readahead_tree_block(root, bits[i].start,
7595 bits[i].size, 0);
7598 *last = bits[0].start;
7599 bytenr = bits[0].start;
7600 size = bits[0].size;
7602 cache = lookup_cache_extent(pending, bytenr, size);
7603 if (cache) {
7604 remove_cache_extent(pending, cache);
7605 free(cache);
7607 cache = lookup_cache_extent(reada, bytenr, size);
7608 if (cache) {
7609 remove_cache_extent(reada, cache);
7610 free(cache);
7612 cache = lookup_cache_extent(nodes, bytenr, size);
7613 if (cache) {
7614 remove_cache_extent(nodes, cache);
7615 free(cache);
7617 cache = lookup_cache_extent(extent_cache, bytenr, size);
7618 if (cache) {
7619 rec = container_of(cache, struct extent_record, cache);
7620 gen = rec->parent_generation;
7623 /* fixme, get the real parent transid */
7624 buf = read_tree_block(root, bytenr, size, gen);
7625 if (!extent_buffer_uptodate(buf)) {
7626 record_bad_block_io(root->fs_info,
7627 extent_cache, bytenr, size);
7628 goto out;
7631 nritems = btrfs_header_nritems(buf);
7633 flags = 0;
7634 if (!init_extent_tree) {
7635 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7636 btrfs_header_level(buf), 1, NULL,
7637 &flags);
7638 if (ret < 0) {
7639 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7640 if (ret < 0) {
7641 fprintf(stderr, "Couldn't calc extent flags\n");
7642 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7645 } else {
7646 flags = 0;
7647 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7648 if (ret < 0) {
7649 fprintf(stderr, "Couldn't calc extent flags\n");
7650 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7654 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7655 if (ri != NULL &&
7656 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7657 ri->objectid == btrfs_header_owner(buf)) {
7659 * Ok we got to this block from it's original owner and
7660 * we have FULL_BACKREF set. Relocation can leave
7661 * converted blocks over so this is altogether possible,
7662 * however it's not possible if the generation > the
7663 * last snapshot, so check for this case.
7665 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7666 btrfs_header_generation(buf) > ri->last_snapshot) {
7667 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7668 rec->bad_full_backref = 1;
7671 } else {
7672 if (ri != NULL &&
7673 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7674 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7675 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7676 rec->bad_full_backref = 1;
7680 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7681 rec->flag_block_full_backref = 1;
7682 parent = bytenr;
7683 owner = 0;
7684 } else {
7685 rec->flag_block_full_backref = 0;
7686 parent = 0;
7687 owner = btrfs_header_owner(buf);
7690 ret = check_block(root, extent_cache, buf, flags);
7691 if (ret)
7692 goto out;
7694 if (btrfs_is_leaf(buf)) {
7695 btree_space_waste += btrfs_leaf_free_space(root, buf);
7696 for (i = 0; i < nritems; i++) {
7697 struct btrfs_file_extent_item *fi;
7698 btrfs_item_key_to_cpu(buf, &key, i);
7700 * Check key type against the leaf owner.
7701 * Could filter quite a lot of early error if
7702 * owner is correct
7704 if (check_type_with_root(btrfs_header_owner(buf),
7705 key.type)) {
7706 fprintf(stderr, "ignoring invalid key\n");
7707 continue;
7709 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7710 process_extent_item(root, extent_cache, buf,
7712 continue;
7714 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7715 process_extent_item(root, extent_cache, buf,
7717 continue;
7719 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7720 total_csum_bytes +=
7721 btrfs_item_size_nr(buf, i);
7722 continue;
7724 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7725 process_chunk_item(chunk_cache, &key, buf, i);
7726 continue;
7728 if (key.type == BTRFS_DEV_ITEM_KEY) {
7729 process_device_item(dev_cache, &key, buf, i);
7730 continue;
7732 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7733 process_block_group_item(block_group_cache,
7734 &key, buf, i);
7735 continue;
7737 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7738 process_device_extent_item(dev_extent_cache,
7739 &key, buf, i);
7740 continue;
7743 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7744 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7745 process_extent_ref_v0(extent_cache, buf, i);
7746 #else
7747 BUG();
7748 #endif
7749 continue;
7752 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7753 ret = add_tree_backref(extent_cache,
7754 key.objectid, 0, key.offset, 0);
7755 if (ret < 0)
7756 error("add_tree_backref failed: %s",
7757 strerror(-ret));
7758 continue;
7760 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7761 ret = add_tree_backref(extent_cache,
7762 key.objectid, key.offset, 0, 0);
7763 if (ret < 0)
7764 error("add_tree_backref failed: %s",
7765 strerror(-ret));
7766 continue;
7768 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7769 struct btrfs_extent_data_ref *ref;
7770 ref = btrfs_item_ptr(buf, i,
7771 struct btrfs_extent_data_ref);
7772 add_data_backref(extent_cache,
7773 key.objectid, 0,
7774 btrfs_extent_data_ref_root(buf, ref),
7775 btrfs_extent_data_ref_objectid(buf,
7776 ref),
7777 btrfs_extent_data_ref_offset(buf, ref),
7778 btrfs_extent_data_ref_count(buf, ref),
7779 0, root->sectorsize);
7780 continue;
7782 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7783 struct btrfs_shared_data_ref *ref;
7784 ref = btrfs_item_ptr(buf, i,
7785 struct btrfs_shared_data_ref);
7786 add_data_backref(extent_cache,
7787 key.objectid, key.offset, 0, 0, 0,
7788 btrfs_shared_data_ref_count(buf, ref),
7789 0, root->sectorsize);
7790 continue;
7792 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7793 struct bad_item *bad;
7795 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7796 continue;
7797 if (!owner)
7798 continue;
7799 bad = malloc(sizeof(struct bad_item));
7800 if (!bad)
7801 continue;
7802 INIT_LIST_HEAD(&bad->list);
7803 memcpy(&bad->key, &key,
7804 sizeof(struct btrfs_key));
7805 bad->root_id = owner;
7806 list_add_tail(&bad->list, &delete_items);
7807 continue;
7809 if (key.type != BTRFS_EXTENT_DATA_KEY)
7810 continue;
7811 fi = btrfs_item_ptr(buf, i,
7812 struct btrfs_file_extent_item);
7813 if (btrfs_file_extent_type(buf, fi) ==
7814 BTRFS_FILE_EXTENT_INLINE)
7815 continue;
7816 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7817 continue;
7819 data_bytes_allocated +=
7820 btrfs_file_extent_disk_num_bytes(buf, fi);
7821 if (data_bytes_allocated < root->sectorsize) {
7822 abort();
7824 data_bytes_referenced +=
7825 btrfs_file_extent_num_bytes(buf, fi);
7826 add_data_backref(extent_cache,
7827 btrfs_file_extent_disk_bytenr(buf, fi),
7828 parent, owner, key.objectid, key.offset -
7829 btrfs_file_extent_offset(buf, fi), 1, 1,
7830 btrfs_file_extent_disk_num_bytes(buf, fi));
7832 } else {
7833 int level;
7834 struct btrfs_key first_key;
7836 first_key.objectid = 0;
7838 if (nritems > 0)
7839 btrfs_item_key_to_cpu(buf, &first_key, 0);
7840 level = btrfs_header_level(buf);
7841 for (i = 0; i < nritems; i++) {
7842 struct extent_record tmpl;
7844 ptr = btrfs_node_blockptr(buf, i);
7845 size = root->nodesize;
7846 btrfs_node_key_to_cpu(buf, &key, i);
7847 if (ri != NULL) {
7848 if ((level == ri->drop_level)
7849 && is_dropped_key(&key, &ri->drop_key)) {
7850 continue;
7854 memset(&tmpl, 0, sizeof(tmpl));
7855 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7856 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7857 tmpl.start = ptr;
7858 tmpl.nr = size;
7859 tmpl.refs = 1;
7860 tmpl.metadata = 1;
7861 tmpl.max_size = size;
7862 ret = add_extent_rec(extent_cache, &tmpl);
7863 if (ret < 0)
7864 goto out;
7866 ret = add_tree_backref(extent_cache, ptr, parent,
7867 owner, 1);
7868 if (ret < 0) {
7869 error("add_tree_backref failed: %s",
7870 strerror(-ret));
7871 continue;
7874 if (level > 1) {
7875 add_pending(nodes, seen, ptr, size);
7876 } else {
7877 add_pending(pending, seen, ptr, size);
7880 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7881 nritems) * sizeof(struct btrfs_key_ptr);
7883 total_btree_bytes += buf->len;
7884 if (fs_root_objectid(btrfs_header_owner(buf)))
7885 total_fs_tree_bytes += buf->len;
7886 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7887 total_extent_tree_bytes += buf->len;
7888 if (!found_old_backref &&
7889 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7890 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7891 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7892 found_old_backref = 1;
7893 out:
7894 free_extent_buffer(buf);
7895 return ret;
7898 static int add_root_to_pending(struct extent_buffer *buf,
7899 struct cache_tree *extent_cache,
7900 struct cache_tree *pending,
7901 struct cache_tree *seen,
7902 struct cache_tree *nodes,
7903 u64 objectid)
7905 struct extent_record tmpl;
7906 int ret;
7908 if (btrfs_header_level(buf) > 0)
7909 add_pending(nodes, seen, buf->start, buf->len);
7910 else
7911 add_pending(pending, seen, buf->start, buf->len);
7913 memset(&tmpl, 0, sizeof(tmpl));
7914 tmpl.start = buf->start;
7915 tmpl.nr = buf->len;
7916 tmpl.is_root = 1;
7917 tmpl.refs = 1;
7918 tmpl.metadata = 1;
7919 tmpl.max_size = buf->len;
7920 add_extent_rec(extent_cache, &tmpl);
7922 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7923 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7924 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7925 0, 1);
7926 else
7927 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7929 return ret;
7932 /* as we fix the tree, we might be deleting blocks that
7933 * we're tracking for repair. This hook makes sure we
7934 * remove any backrefs for blocks as we are fixing them.
7936 static int free_extent_hook(struct btrfs_trans_handle *trans,
7937 struct btrfs_root *root,
7938 u64 bytenr, u64 num_bytes, u64 parent,
7939 u64 root_objectid, u64 owner, u64 offset,
7940 int refs_to_drop)
7942 struct extent_record *rec;
7943 struct cache_extent *cache;
7944 int is_data;
7945 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7947 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7948 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7949 if (!cache)
7950 return 0;
7952 rec = container_of(cache, struct extent_record, cache);
7953 if (is_data) {
7954 struct data_backref *back;
7955 back = find_data_backref(rec, parent, root_objectid, owner,
7956 offset, 1, bytenr, num_bytes);
7957 if (!back)
7958 goto out;
7959 if (back->node.found_ref) {
7960 back->found_ref -= refs_to_drop;
7961 if (rec->refs)
7962 rec->refs -= refs_to_drop;
7964 if (back->node.found_extent_tree) {
7965 back->num_refs -= refs_to_drop;
7966 if (rec->extent_item_refs)
7967 rec->extent_item_refs -= refs_to_drop;
7969 if (back->found_ref == 0)
7970 back->node.found_ref = 0;
7971 if (back->num_refs == 0)
7972 back->node.found_extent_tree = 0;
7974 if (!back->node.found_extent_tree && back->node.found_ref) {
7975 list_del(&back->node.list);
7976 free(back);
7978 } else {
7979 struct tree_backref *back;
7980 back = find_tree_backref(rec, parent, root_objectid);
7981 if (!back)
7982 goto out;
7983 if (back->node.found_ref) {
7984 if (rec->refs)
7985 rec->refs--;
7986 back->node.found_ref = 0;
7988 if (back->node.found_extent_tree) {
7989 if (rec->extent_item_refs)
7990 rec->extent_item_refs--;
7991 back->node.found_extent_tree = 0;
7993 if (!back->node.found_extent_tree && back->node.found_ref) {
7994 list_del(&back->node.list);
7995 free(back);
7998 maybe_free_extent_rec(extent_cache, rec);
7999 out:
8000 return 0;
8003 static int delete_extent_records(struct btrfs_trans_handle *trans,
8004 struct btrfs_root *root,
8005 struct btrfs_path *path,
8006 u64 bytenr)
8008 struct btrfs_key key;
8009 struct btrfs_key found_key;
8010 struct extent_buffer *leaf;
8011 int ret;
8012 int slot;
8015 key.objectid = bytenr;
8016 key.type = (u8)-1;
8017 key.offset = (u64)-1;
8019 while(1) {
8020 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8021 &key, path, 0, 1);
8022 if (ret < 0)
8023 break;
8025 if (ret > 0) {
8026 ret = 0;
8027 if (path->slots[0] == 0)
8028 break;
8029 path->slots[0]--;
8031 ret = 0;
8033 leaf = path->nodes[0];
8034 slot = path->slots[0];
8036 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8037 if (found_key.objectid != bytenr)
8038 break;
8040 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8041 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8042 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8043 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8044 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8045 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8046 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8047 btrfs_release_path(path);
8048 if (found_key.type == 0) {
8049 if (found_key.offset == 0)
8050 break;
8051 key.offset = found_key.offset - 1;
8052 key.type = found_key.type;
8054 key.type = found_key.type - 1;
8055 key.offset = (u64)-1;
8056 continue;
8059 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8060 found_key.objectid, found_key.type, found_key.offset);
8062 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8063 if (ret)
8064 break;
8065 btrfs_release_path(path);
8067 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8068 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8069 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8070 found_key.offset : root->nodesize;
8072 ret = btrfs_update_block_group(trans, root, bytenr,
8073 bytes, 0, 0);
8074 if (ret)
8075 break;
8079 btrfs_release_path(path);
8080 return ret;
8084 * for a single backref, this will allocate a new extent
8085 * and add the backref to it.
8087 static int record_extent(struct btrfs_trans_handle *trans,
8088 struct btrfs_fs_info *info,
8089 struct btrfs_path *path,
8090 struct extent_record *rec,
8091 struct extent_backref *back,
8092 int allocated, u64 flags)
8094 int ret = 0;
8095 struct btrfs_root *extent_root = info->extent_root;
8096 struct extent_buffer *leaf;
8097 struct btrfs_key ins_key;
8098 struct btrfs_extent_item *ei;
8099 struct data_backref *dback;
8100 struct btrfs_tree_block_info *bi;
8102 if (!back->is_data)
8103 rec->max_size = max_t(u64, rec->max_size,
8104 info->extent_root->nodesize);
8106 if (!allocated) {
8107 u32 item_size = sizeof(*ei);
8109 if (!back->is_data)
8110 item_size += sizeof(*bi);
8112 ins_key.objectid = rec->start;
8113 ins_key.offset = rec->max_size;
8114 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8116 ret = btrfs_insert_empty_item(trans, extent_root, path,
8117 &ins_key, item_size);
8118 if (ret)
8119 goto fail;
8121 leaf = path->nodes[0];
8122 ei = btrfs_item_ptr(leaf, path->slots[0],
8123 struct btrfs_extent_item);
8125 btrfs_set_extent_refs(leaf, ei, 0);
8126 btrfs_set_extent_generation(leaf, ei, rec->generation);
8128 if (back->is_data) {
8129 btrfs_set_extent_flags(leaf, ei,
8130 BTRFS_EXTENT_FLAG_DATA);
8131 } else {
8132 struct btrfs_disk_key copy_key;;
8134 bi = (struct btrfs_tree_block_info *)(ei + 1);
8135 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8136 sizeof(*bi));
8138 btrfs_set_disk_key_objectid(&copy_key,
8139 rec->info_objectid);
8140 btrfs_set_disk_key_type(&copy_key, 0);
8141 btrfs_set_disk_key_offset(&copy_key, 0);
8143 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8144 btrfs_set_tree_block_key(leaf, bi, &copy_key);
8146 btrfs_set_extent_flags(leaf, ei,
8147 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8150 btrfs_mark_buffer_dirty(leaf);
8151 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8152 rec->max_size, 1, 0);
8153 if (ret)
8154 goto fail;
8155 btrfs_release_path(path);
8158 if (back->is_data) {
8159 u64 parent;
8160 int i;
8162 dback = to_data_backref(back);
8163 if (back->full_backref)
8164 parent = dback->parent;
8165 else
8166 parent = 0;
8168 for (i = 0; i < dback->found_ref; i++) {
8169 /* if parent != 0, we're doing a full backref
8170 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8171 * just makes the backref allocator create a data
8172 * backref
8174 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8175 rec->start, rec->max_size,
8176 parent,
8177 dback->root,
8178 parent ?
8179 BTRFS_FIRST_FREE_OBJECTID :
8180 dback->owner,
8181 dback->offset);
8182 if (ret)
8183 break;
8185 fprintf(stderr, "adding new data backref"
8186 " on %llu %s %llu owner %llu"
8187 " offset %llu found %d\n",
8188 (unsigned long long)rec->start,
8189 back->full_backref ?
8190 "parent" : "root",
8191 back->full_backref ?
8192 (unsigned long long)parent :
8193 (unsigned long long)dback->root,
8194 (unsigned long long)dback->owner,
8195 (unsigned long long)dback->offset,
8196 dback->found_ref);
8197 } else {
8198 u64 parent;
8199 struct tree_backref *tback;
8201 tback = to_tree_backref(back);
8202 if (back->full_backref)
8203 parent = tback->parent;
8204 else
8205 parent = 0;
8207 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8208 rec->start, rec->max_size,
8209 parent, tback->root, 0, 0);
8210 fprintf(stderr, "adding new tree backref on "
8211 "start %llu len %llu parent %llu root %llu\n",
8212 rec->start, rec->max_size, parent, tback->root);
8214 fail:
8215 btrfs_release_path(path);
8216 return ret;
8219 static struct extent_entry *find_entry(struct list_head *entries,
8220 u64 bytenr, u64 bytes)
8222 struct extent_entry *entry = NULL;
8224 list_for_each_entry(entry, entries, list) {
8225 if (entry->bytenr == bytenr && entry->bytes == bytes)
8226 return entry;
8229 return NULL;
8232 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8234 struct extent_entry *entry, *best = NULL, *prev = NULL;
8236 list_for_each_entry(entry, entries, list) {
8238 * If there are as many broken entries as entries then we know
8239 * not to trust this particular entry.
8241 if (entry->broken == entry->count)
8242 continue;
8245 * Special case, when there are only two entries and 'best' is
8246 * the first one
8248 if (!prev) {
8249 best = entry;
8250 prev = entry;
8251 continue;
8255 * If our current entry == best then we can't be sure our best
8256 * is really the best, so we need to keep searching.
8258 if (best && best->count == entry->count) {
8259 prev = entry;
8260 best = NULL;
8261 continue;
8264 /* Prev == entry, not good enough, have to keep searching */
8265 if (!prev->broken && prev->count == entry->count)
8266 continue;
8268 if (!best)
8269 best = (prev->count > entry->count) ? prev : entry;
8270 else if (best->count < entry->count)
8271 best = entry;
8272 prev = entry;
8275 return best;
8278 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8279 struct data_backref *dback, struct extent_entry *entry)
8281 struct btrfs_trans_handle *trans;
8282 struct btrfs_root *root;
8283 struct btrfs_file_extent_item *fi;
8284 struct extent_buffer *leaf;
8285 struct btrfs_key key;
8286 u64 bytenr, bytes;
8287 int ret, err;
8289 key.objectid = dback->root;
8290 key.type = BTRFS_ROOT_ITEM_KEY;
8291 key.offset = (u64)-1;
8292 root = btrfs_read_fs_root(info, &key);
8293 if (IS_ERR(root)) {
8294 fprintf(stderr, "Couldn't find root for our ref\n");
8295 return -EINVAL;
8299 * The backref points to the original offset of the extent if it was
8300 * split, so we need to search down to the offset we have and then walk
8301 * forward until we find the backref we're looking for.
8303 key.objectid = dback->owner;
8304 key.type = BTRFS_EXTENT_DATA_KEY;
8305 key.offset = dback->offset;
8306 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8307 if (ret < 0) {
8308 fprintf(stderr, "Error looking up ref %d\n", ret);
8309 return ret;
8312 while (1) {
8313 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8314 ret = btrfs_next_leaf(root, path);
8315 if (ret) {
8316 fprintf(stderr, "Couldn't find our ref, next\n");
8317 return -EINVAL;
8320 leaf = path->nodes[0];
8321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8322 if (key.objectid != dback->owner ||
8323 key.type != BTRFS_EXTENT_DATA_KEY) {
8324 fprintf(stderr, "Couldn't find our ref, search\n");
8325 return -EINVAL;
8327 fi = btrfs_item_ptr(leaf, path->slots[0],
8328 struct btrfs_file_extent_item);
8329 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8330 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8332 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8333 break;
8334 path->slots[0]++;
8337 btrfs_release_path(path);
8339 trans = btrfs_start_transaction(root, 1);
8340 if (IS_ERR(trans))
8341 return PTR_ERR(trans);
8344 * Ok we have the key of the file extent we want to fix, now we can cow
8345 * down to the thing and fix it.
8347 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8348 if (ret < 0) {
8349 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8350 key.objectid, key.type, key.offset, ret);
8351 goto out;
8353 if (ret > 0) {
8354 fprintf(stderr, "Well that's odd, we just found this key "
8355 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8356 key.offset);
8357 ret = -EINVAL;
8358 goto out;
8360 leaf = path->nodes[0];
8361 fi = btrfs_item_ptr(leaf, path->slots[0],
8362 struct btrfs_file_extent_item);
8364 if (btrfs_file_extent_compression(leaf, fi) &&
8365 dback->disk_bytenr != entry->bytenr) {
8366 fprintf(stderr, "Ref doesn't match the record start and is "
8367 "compressed, please take a btrfs-image of this file "
8368 "system and send it to a btrfs developer so they can "
8369 "complete this functionality for bytenr %Lu\n",
8370 dback->disk_bytenr);
8371 ret = -EINVAL;
8372 goto out;
8375 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8376 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8377 } else if (dback->disk_bytenr > entry->bytenr) {
8378 u64 off_diff, offset;
8380 off_diff = dback->disk_bytenr - entry->bytenr;
8381 offset = btrfs_file_extent_offset(leaf, fi);
8382 if (dback->disk_bytenr + offset +
8383 btrfs_file_extent_num_bytes(leaf, fi) >
8384 entry->bytenr + entry->bytes) {
8385 fprintf(stderr, "Ref is past the entry end, please "
8386 "take a btrfs-image of this file system and "
8387 "send it to a btrfs developer, ref %Lu\n",
8388 dback->disk_bytenr);
8389 ret = -EINVAL;
8390 goto out;
8392 offset += off_diff;
8393 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8394 btrfs_set_file_extent_offset(leaf, fi, offset);
8395 } else if (dback->disk_bytenr < entry->bytenr) {
8396 u64 offset;
8398 offset = btrfs_file_extent_offset(leaf, fi);
8399 if (dback->disk_bytenr + offset < entry->bytenr) {
8400 fprintf(stderr, "Ref is before the entry start, please"
8401 " take a btrfs-image of this file system and "
8402 "send it to a btrfs developer, ref %Lu\n",
8403 dback->disk_bytenr);
8404 ret = -EINVAL;
8405 goto out;
8408 offset += dback->disk_bytenr;
8409 offset -= entry->bytenr;
8410 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8411 btrfs_set_file_extent_offset(leaf, fi, offset);
8414 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8417 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8418 * only do this if we aren't using compression, otherwise it's a
8419 * trickier case.
8421 if (!btrfs_file_extent_compression(leaf, fi))
8422 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8423 else
8424 printf("ram bytes may be wrong?\n");
8425 btrfs_mark_buffer_dirty(leaf);
8426 out:
8427 err = btrfs_commit_transaction(trans, root);
8428 btrfs_release_path(path);
8429 return ret ? ret : err;
8432 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8433 struct extent_record *rec)
8435 struct extent_backref *back;
8436 struct data_backref *dback;
8437 struct extent_entry *entry, *best = NULL;
8438 LIST_HEAD(entries);
8439 int nr_entries = 0;
8440 int broken_entries = 0;
8441 int ret = 0;
8442 short mismatch = 0;
8445 * Metadata is easy and the backrefs should always agree on bytenr and
8446 * size, if not we've got bigger issues.
8448 if (rec->metadata)
8449 return 0;
8451 list_for_each_entry(back, &rec->backrefs, list) {
8452 if (back->full_backref || !back->is_data)
8453 continue;
8455 dback = to_data_backref(back);
8458 * We only pay attention to backrefs that we found a real
8459 * backref for.
8461 if (dback->found_ref == 0)
8462 continue;
8465 * For now we only catch when the bytes don't match, not the
8466 * bytenr. We can easily do this at the same time, but I want
8467 * to have a fs image to test on before we just add repair
8468 * functionality willy-nilly so we know we won't screw up the
8469 * repair.
8472 entry = find_entry(&entries, dback->disk_bytenr,
8473 dback->bytes);
8474 if (!entry) {
8475 entry = malloc(sizeof(struct extent_entry));
8476 if (!entry) {
8477 ret = -ENOMEM;
8478 goto out;
8480 memset(entry, 0, sizeof(*entry));
8481 entry->bytenr = dback->disk_bytenr;
8482 entry->bytes = dback->bytes;
8483 list_add_tail(&entry->list, &entries);
8484 nr_entries++;
8488 * If we only have on entry we may think the entries agree when
8489 * in reality they don't so we have to do some extra checking.
8491 if (dback->disk_bytenr != rec->start ||
8492 dback->bytes != rec->nr || back->broken)
8493 mismatch = 1;
8495 if (back->broken) {
8496 entry->broken++;
8497 broken_entries++;
8500 entry->count++;
8503 /* Yay all the backrefs agree, carry on good sir */
8504 if (nr_entries <= 1 && !mismatch)
8505 goto out;
8507 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8508 "%Lu\n", rec->start);
8511 * First we want to see if the backrefs can agree amongst themselves who
8512 * is right, so figure out which one of the entries has the highest
8513 * count.
8515 best = find_most_right_entry(&entries);
8518 * Ok so we may have an even split between what the backrefs think, so
8519 * this is where we use the extent ref to see what it thinks.
8521 if (!best) {
8522 entry = find_entry(&entries, rec->start, rec->nr);
8523 if (!entry && (!broken_entries || !rec->found_rec)) {
8524 fprintf(stderr, "Backrefs don't agree with each other "
8525 "and extent record doesn't agree with anybody,"
8526 " so we can't fix bytenr %Lu bytes %Lu\n",
8527 rec->start, rec->nr);
8528 ret = -EINVAL;
8529 goto out;
8530 } else if (!entry) {
8532 * Ok our backrefs were broken, we'll assume this is the
8533 * correct value and add an entry for this range.
8535 entry = malloc(sizeof(struct extent_entry));
8536 if (!entry) {
8537 ret = -ENOMEM;
8538 goto out;
8540 memset(entry, 0, sizeof(*entry));
8541 entry->bytenr = rec->start;
8542 entry->bytes = rec->nr;
8543 list_add_tail(&entry->list, &entries);
8544 nr_entries++;
8546 entry->count++;
8547 best = find_most_right_entry(&entries);
8548 if (!best) {
8549 fprintf(stderr, "Backrefs and extent record evenly "
8550 "split on who is right, this is going to "
8551 "require user input to fix bytenr %Lu bytes "
8552 "%Lu\n", rec->start, rec->nr);
8553 ret = -EINVAL;
8554 goto out;
8559 * I don't think this can happen currently as we'll abort() if we catch
8560 * this case higher up, but in case somebody removes that we still can't
8561 * deal with it properly here yet, so just bail out of that's the case.
8563 if (best->bytenr != rec->start) {
8564 fprintf(stderr, "Extent start and backref starts don't match, "
8565 "please use btrfs-image on this file system and send "
8566 "it to a btrfs developer so they can make fsck fix "
8567 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8568 rec->start, rec->nr);
8569 ret = -EINVAL;
8570 goto out;
8574 * Ok great we all agreed on an extent record, let's go find the real
8575 * references and fix up the ones that don't match.
8577 list_for_each_entry(back, &rec->backrefs, list) {
8578 if (back->full_backref || !back->is_data)
8579 continue;
8581 dback = to_data_backref(back);
8584 * Still ignoring backrefs that don't have a real ref attached
8585 * to them.
8587 if (dback->found_ref == 0)
8588 continue;
8590 if (dback->bytes == best->bytes &&
8591 dback->disk_bytenr == best->bytenr)
8592 continue;
8594 ret = repair_ref(info, path, dback, best);
8595 if (ret)
8596 goto out;
8600 * Ok we messed with the actual refs, which means we need to drop our
8601 * entire cache and go back and rescan. I know this is a huge pain and
8602 * adds a lot of extra work, but it's the only way to be safe. Once all
8603 * the backrefs agree we may not need to do anything to the extent
8604 * record itself.
8606 ret = -EAGAIN;
8607 out:
8608 while (!list_empty(&entries)) {
8609 entry = list_entry(entries.next, struct extent_entry, list);
8610 list_del_init(&entry->list);
8611 free(entry);
8613 return ret;
8616 static int process_duplicates(struct cache_tree *extent_cache,
8617 struct extent_record *rec)
8619 struct extent_record *good, *tmp;
8620 struct cache_extent *cache;
8621 int ret;
8624 * If we found a extent record for this extent then return, or if we
8625 * have more than one duplicate we are likely going to need to delete
8626 * something.
8628 if (rec->found_rec || rec->num_duplicates > 1)
8629 return 0;
8631 /* Shouldn't happen but just in case */
8632 BUG_ON(!rec->num_duplicates);
8635 * So this happens if we end up with a backref that doesn't match the
8636 * actual extent entry. So either the backref is bad or the extent
8637 * entry is bad. Either way we want to have the extent_record actually
8638 * reflect what we found in the extent_tree, so we need to take the
8639 * duplicate out and use that as the extent_record since the only way we
8640 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8642 remove_cache_extent(extent_cache, &rec->cache);
8644 good = to_extent_record(rec->dups.next);
8645 list_del_init(&good->list);
8646 INIT_LIST_HEAD(&good->backrefs);
8647 INIT_LIST_HEAD(&good->dups);
8648 good->cache.start = good->start;
8649 good->cache.size = good->nr;
8650 good->content_checked = 0;
8651 good->owner_ref_checked = 0;
8652 good->num_duplicates = 0;
8653 good->refs = rec->refs;
8654 list_splice_init(&rec->backrefs, &good->backrefs);
8655 while (1) {
8656 cache = lookup_cache_extent(extent_cache, good->start,
8657 good->nr);
8658 if (!cache)
8659 break;
8660 tmp = container_of(cache, struct extent_record, cache);
8663 * If we find another overlapping extent and it's found_rec is
8664 * set then it's a duplicate and we need to try and delete
8665 * something.
8667 if (tmp->found_rec || tmp->num_duplicates > 0) {
8668 if (list_empty(&good->list))
8669 list_add_tail(&good->list,
8670 &duplicate_extents);
8671 good->num_duplicates += tmp->num_duplicates + 1;
8672 list_splice_init(&tmp->dups, &good->dups);
8673 list_del_init(&tmp->list);
8674 list_add_tail(&tmp->list, &good->dups);
8675 remove_cache_extent(extent_cache, &tmp->cache);
8676 continue;
8680 * Ok we have another non extent item backed extent rec, so lets
8681 * just add it to this extent and carry on like we did above.
8683 good->refs += tmp->refs;
8684 list_splice_init(&tmp->backrefs, &good->backrefs);
8685 remove_cache_extent(extent_cache, &tmp->cache);
8686 free(tmp);
8688 ret = insert_cache_extent(extent_cache, &good->cache);
8689 BUG_ON(ret);
8690 free(rec);
8691 return good->num_duplicates ? 0 : 1;
8694 static int delete_duplicate_records(struct btrfs_root *root,
8695 struct extent_record *rec)
8697 struct btrfs_trans_handle *trans;
8698 LIST_HEAD(delete_list);
8699 struct btrfs_path path;
8700 struct extent_record *tmp, *good, *n;
8701 int nr_del = 0;
8702 int ret = 0, err;
8703 struct btrfs_key key;
8705 btrfs_init_path(&path);
8707 good = rec;
8708 /* Find the record that covers all of the duplicates. */
8709 list_for_each_entry(tmp, &rec->dups, list) {
8710 if (good->start < tmp->start)
8711 continue;
8712 if (good->nr > tmp->nr)
8713 continue;
8715 if (tmp->start + tmp->nr < good->start + good->nr) {
8716 fprintf(stderr, "Ok we have overlapping extents that "
8717 "aren't completely covered by each other, this "
8718 "is going to require more careful thought. "
8719 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8720 tmp->start, tmp->nr, good->start, good->nr);
8721 abort();
8723 good = tmp;
8726 if (good != rec)
8727 list_add_tail(&rec->list, &delete_list);
8729 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8730 if (tmp == good)
8731 continue;
8732 list_move_tail(&tmp->list, &delete_list);
8735 root = root->fs_info->extent_root;
8736 trans = btrfs_start_transaction(root, 1);
8737 if (IS_ERR(trans)) {
8738 ret = PTR_ERR(trans);
8739 goto out;
8742 list_for_each_entry(tmp, &delete_list, list) {
8743 if (tmp->found_rec == 0)
8744 continue;
8745 key.objectid = tmp->start;
8746 key.type = BTRFS_EXTENT_ITEM_KEY;
8747 key.offset = tmp->nr;
8749 /* Shouldn't happen but just in case */
8750 if (tmp->metadata) {
8751 fprintf(stderr, "Well this shouldn't happen, extent "
8752 "record overlaps but is metadata? "
8753 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8754 abort();
8757 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8758 if (ret) {
8759 if (ret > 0)
8760 ret = -EINVAL;
8761 break;
8763 ret = btrfs_del_item(trans, root, &path);
8764 if (ret)
8765 break;
8766 btrfs_release_path(&path);
8767 nr_del++;
8769 err = btrfs_commit_transaction(trans, root);
8770 if (err && !ret)
8771 ret = err;
8772 out:
8773 while (!list_empty(&delete_list)) {
8774 tmp = to_extent_record(delete_list.next);
8775 list_del_init(&tmp->list);
8776 if (tmp == rec)
8777 continue;
8778 free(tmp);
8781 while (!list_empty(&rec->dups)) {
8782 tmp = to_extent_record(rec->dups.next);
8783 list_del_init(&tmp->list);
8784 free(tmp);
8787 btrfs_release_path(&path);
8789 if (!ret && !nr_del)
8790 rec->num_duplicates = 0;
8792 return ret ? ret : nr_del;
8795 static int find_possible_backrefs(struct btrfs_fs_info *info,
8796 struct btrfs_path *path,
8797 struct cache_tree *extent_cache,
8798 struct extent_record *rec)
8800 struct btrfs_root *root;
8801 struct extent_backref *back;
8802 struct data_backref *dback;
8803 struct cache_extent *cache;
8804 struct btrfs_file_extent_item *fi;
8805 struct btrfs_key key;
8806 u64 bytenr, bytes;
8807 int ret;
8809 list_for_each_entry(back, &rec->backrefs, list) {
8810 /* Don't care about full backrefs (poor unloved backrefs) */
8811 if (back->full_backref || !back->is_data)
8812 continue;
8814 dback = to_data_backref(back);
8816 /* We found this one, we don't need to do a lookup */
8817 if (dback->found_ref)
8818 continue;
8820 key.objectid = dback->root;
8821 key.type = BTRFS_ROOT_ITEM_KEY;
8822 key.offset = (u64)-1;
8824 root = btrfs_read_fs_root(info, &key);
8826 /* No root, definitely a bad ref, skip */
8827 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8828 continue;
8829 /* Other err, exit */
8830 if (IS_ERR(root))
8831 return PTR_ERR(root);
8833 key.objectid = dback->owner;
8834 key.type = BTRFS_EXTENT_DATA_KEY;
8835 key.offset = dback->offset;
8836 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8837 if (ret) {
8838 btrfs_release_path(path);
8839 if (ret < 0)
8840 return ret;
8841 /* Didn't find it, we can carry on */
8842 ret = 0;
8843 continue;
8846 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8847 struct btrfs_file_extent_item);
8848 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8849 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8850 btrfs_release_path(path);
8851 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8852 if (cache) {
8853 struct extent_record *tmp;
8854 tmp = container_of(cache, struct extent_record, cache);
8857 * If we found an extent record for the bytenr for this
8858 * particular backref then we can't add it to our
8859 * current extent record. We only want to add backrefs
8860 * that don't have a corresponding extent item in the
8861 * extent tree since they likely belong to this record
8862 * and we need to fix it if it doesn't match bytenrs.
8864 if (tmp->found_rec)
8865 continue;
8868 dback->found_ref += 1;
8869 dback->disk_bytenr = bytenr;
8870 dback->bytes = bytes;
8873 * Set this so the verify backref code knows not to trust the
8874 * values in this backref.
8876 back->broken = 1;
8879 return 0;
8883 * Record orphan data ref into corresponding root.
8885 * Return 0 if the extent item contains data ref and recorded.
8886 * Return 1 if the extent item contains no useful data ref
8887 * On that case, it may contains only shared_dataref or metadata backref
8888 * or the file extent exists(this should be handled by the extent bytenr
8889 * recovery routine)
8890 * Return <0 if something goes wrong.
8892 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8893 struct extent_record *rec)
8895 struct btrfs_key key;
8896 struct btrfs_root *dest_root;
8897 struct extent_backref *back;
8898 struct data_backref *dback;
8899 struct orphan_data_extent *orphan;
8900 struct btrfs_path path;
8901 int recorded_data_ref = 0;
8902 int ret = 0;
8904 if (rec->metadata)
8905 return 1;
8906 btrfs_init_path(&path);
8907 list_for_each_entry(back, &rec->backrefs, list) {
8908 if (back->full_backref || !back->is_data ||
8909 !back->found_extent_tree)
8910 continue;
8911 dback = to_data_backref(back);
8912 if (dback->found_ref)
8913 continue;
8914 key.objectid = dback->root;
8915 key.type = BTRFS_ROOT_ITEM_KEY;
8916 key.offset = (u64)-1;
8918 dest_root = btrfs_read_fs_root(fs_info, &key);
8920 /* For non-exist root we just skip it */
8921 if (IS_ERR(dest_root) || !dest_root)
8922 continue;
8924 key.objectid = dback->owner;
8925 key.type = BTRFS_EXTENT_DATA_KEY;
8926 key.offset = dback->offset;
8928 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8929 btrfs_release_path(&path);
8931 * For ret < 0, it's OK since the fs-tree may be corrupted,
8932 * we need to record it for inode/file extent rebuild.
8933 * For ret > 0, we record it only for file extent rebuild.
8934 * For ret == 0, the file extent exists but only bytenr
8935 * mismatch, let the original bytenr fix routine to handle,
8936 * don't record it.
8938 if (ret == 0)
8939 continue;
8940 ret = 0;
8941 orphan = malloc(sizeof(*orphan));
8942 if (!orphan) {
8943 ret = -ENOMEM;
8944 goto out;
8946 INIT_LIST_HEAD(&orphan->list);
8947 orphan->root = dback->root;
8948 orphan->objectid = dback->owner;
8949 orphan->offset = dback->offset;
8950 orphan->disk_bytenr = rec->cache.start;
8951 orphan->disk_len = rec->cache.size;
8952 list_add(&dest_root->orphan_data_extents, &orphan->list);
8953 recorded_data_ref = 1;
8955 out:
8956 btrfs_release_path(&path);
8957 if (!ret)
8958 return !recorded_data_ref;
8959 else
8960 return ret;
8964 * when an incorrect extent item is found, this will delete
8965 * all of the existing entries for it and recreate them
8966 * based on what the tree scan found.
8968 static int fixup_extent_refs(struct btrfs_fs_info *info,
8969 struct cache_tree *extent_cache,
8970 struct extent_record *rec)
8972 struct btrfs_trans_handle *trans = NULL;
8973 int ret;
8974 struct btrfs_path path;
8975 struct list_head *cur = rec->backrefs.next;
8976 struct cache_extent *cache;
8977 struct extent_backref *back;
8978 int allocated = 0;
8979 u64 flags = 0;
8981 if (rec->flag_block_full_backref)
8982 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8984 btrfs_init_path(&path);
8985 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8987 * Sometimes the backrefs themselves are so broken they don't
8988 * get attached to any meaningful rec, so first go back and
8989 * check any of our backrefs that we couldn't find and throw
8990 * them into the list if we find the backref so that
8991 * verify_backrefs can figure out what to do.
8993 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8994 if (ret < 0)
8995 goto out;
8998 /* step one, make sure all of the backrefs agree */
8999 ret = verify_backrefs(info, &path, rec);
9000 if (ret < 0)
9001 goto out;
9003 trans = btrfs_start_transaction(info->extent_root, 1);
9004 if (IS_ERR(trans)) {
9005 ret = PTR_ERR(trans);
9006 goto out;
9009 /* step two, delete all the existing records */
9010 ret = delete_extent_records(trans, info->extent_root, &path,
9011 rec->start);
9013 if (ret < 0)
9014 goto out;
9016 /* was this block corrupt? If so, don't add references to it */
9017 cache = lookup_cache_extent(info->corrupt_blocks,
9018 rec->start, rec->max_size);
9019 if (cache) {
9020 ret = 0;
9021 goto out;
9024 /* step three, recreate all the refs we did find */
9025 while(cur != &rec->backrefs) {
9026 back = to_extent_backref(cur);
9027 cur = cur->next;
9030 * if we didn't find any references, don't create a
9031 * new extent record
9033 if (!back->found_ref)
9034 continue;
9036 rec->bad_full_backref = 0;
9037 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9038 allocated = 1;
9040 if (ret)
9041 goto out;
9043 out:
9044 if (trans) {
9045 int err = btrfs_commit_transaction(trans, info->extent_root);
9046 if (!ret)
9047 ret = err;
9050 if (!ret)
9051 fprintf(stderr, "Repaired extent references for %llu\n",
9052 (unsigned long long)rec->start);
9054 btrfs_release_path(&path);
9055 return ret;
9058 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9059 struct extent_record *rec)
9061 struct btrfs_trans_handle *trans;
9062 struct btrfs_root *root = fs_info->extent_root;
9063 struct btrfs_path path;
9064 struct btrfs_extent_item *ei;
9065 struct btrfs_key key;
9066 u64 flags;
9067 int ret = 0;
9069 key.objectid = rec->start;
9070 if (rec->metadata) {
9071 key.type = BTRFS_METADATA_ITEM_KEY;
9072 key.offset = rec->info_level;
9073 } else {
9074 key.type = BTRFS_EXTENT_ITEM_KEY;
9075 key.offset = rec->max_size;
9078 trans = btrfs_start_transaction(root, 0);
9079 if (IS_ERR(trans))
9080 return PTR_ERR(trans);
9082 btrfs_init_path(&path);
9083 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9084 if (ret < 0) {
9085 btrfs_release_path(&path);
9086 btrfs_commit_transaction(trans, root);
9087 return ret;
9088 } else if (ret) {
9089 fprintf(stderr, "Didn't find extent for %llu\n",
9090 (unsigned long long)rec->start);
9091 btrfs_release_path(&path);
9092 btrfs_commit_transaction(trans, root);
9093 return -ENOENT;
9096 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9097 struct btrfs_extent_item);
9098 flags = btrfs_extent_flags(path.nodes[0], ei);
9099 if (rec->flag_block_full_backref) {
9100 fprintf(stderr, "setting full backref on %llu\n",
9101 (unsigned long long)key.objectid);
9102 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9103 } else {
9104 fprintf(stderr, "clearing full backref on %llu\n",
9105 (unsigned long long)key.objectid);
9106 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9108 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9109 btrfs_mark_buffer_dirty(path.nodes[0]);
9110 btrfs_release_path(&path);
9111 ret = btrfs_commit_transaction(trans, root);
9112 if (!ret)
9113 fprintf(stderr, "Repaired extent flags for %llu\n",
9114 (unsigned long long)rec->start);
9116 return ret;
9119 /* right now we only prune from the extent allocation tree */
9120 static int prune_one_block(struct btrfs_trans_handle *trans,
9121 struct btrfs_fs_info *info,
9122 struct btrfs_corrupt_block *corrupt)
9124 int ret;
9125 struct btrfs_path path;
9126 struct extent_buffer *eb;
9127 u64 found;
9128 int slot;
9129 int nritems;
9130 int level = corrupt->level + 1;
9132 btrfs_init_path(&path);
9133 again:
9134 /* we want to stop at the parent to our busted block */
9135 path.lowest_level = level;
9137 ret = btrfs_search_slot(trans, info->extent_root,
9138 &corrupt->key, &path, -1, 1);
9140 if (ret < 0)
9141 goto out;
9143 eb = path.nodes[level];
9144 if (!eb) {
9145 ret = -ENOENT;
9146 goto out;
9150 * hopefully the search gave us the block we want to prune,
9151 * lets try that first
9153 slot = path.slots[level];
9154 found = btrfs_node_blockptr(eb, slot);
9155 if (found == corrupt->cache.start)
9156 goto del_ptr;
9158 nritems = btrfs_header_nritems(eb);
9160 /* the search failed, lets scan this node and hope we find it */
9161 for (slot = 0; slot < nritems; slot++) {
9162 found = btrfs_node_blockptr(eb, slot);
9163 if (found == corrupt->cache.start)
9164 goto del_ptr;
9167 * we couldn't find the bad block. TODO, search all the nodes for pointers
9168 * to this block
9170 if (eb == info->extent_root->node) {
9171 ret = -ENOENT;
9172 goto out;
9173 } else {
9174 level++;
9175 btrfs_release_path(&path);
9176 goto again;
9179 del_ptr:
9180 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9181 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9183 out:
9184 btrfs_release_path(&path);
9185 return ret;
9188 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9190 struct btrfs_trans_handle *trans = NULL;
9191 struct cache_extent *cache;
9192 struct btrfs_corrupt_block *corrupt;
9194 while (1) {
9195 cache = search_cache_extent(info->corrupt_blocks, 0);
9196 if (!cache)
9197 break;
9198 if (!trans) {
9199 trans = btrfs_start_transaction(info->extent_root, 1);
9200 if (IS_ERR(trans))
9201 return PTR_ERR(trans);
9203 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9204 prune_one_block(trans, info, corrupt);
9205 remove_cache_extent(info->corrupt_blocks, cache);
9207 if (trans)
9208 return btrfs_commit_transaction(trans, info->extent_root);
9209 return 0;
9212 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9214 struct btrfs_block_group_cache *cache;
9215 u64 start, end;
9216 int ret;
9218 while (1) {
9219 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9220 &start, &end, EXTENT_DIRTY);
9221 if (ret)
9222 break;
9223 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9226 start = 0;
9227 while (1) {
9228 cache = btrfs_lookup_first_block_group(fs_info, start);
9229 if (!cache)
9230 break;
9231 if (cache->cached)
9232 cache->cached = 0;
9233 start = cache->key.objectid + cache->key.offset;
9237 static int check_extent_refs(struct btrfs_root *root,
9238 struct cache_tree *extent_cache)
9240 struct extent_record *rec;
9241 struct cache_extent *cache;
9242 int ret = 0;
9243 int had_dups = 0;
9245 if (repair) {
9247 * if we're doing a repair, we have to make sure
9248 * we don't allocate from the problem extents.
9249 * In the worst case, this will be all the
9250 * extents in the FS
9252 cache = search_cache_extent(extent_cache, 0);
9253 while(cache) {
9254 rec = container_of(cache, struct extent_record, cache);
9255 set_extent_dirty(root->fs_info->excluded_extents,
9256 rec->start,
9257 rec->start + rec->max_size - 1);
9258 cache = next_cache_extent(cache);
9261 /* pin down all the corrupted blocks too */
9262 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9263 while(cache) {
9264 set_extent_dirty(root->fs_info->excluded_extents,
9265 cache->start,
9266 cache->start + cache->size - 1);
9267 cache = next_cache_extent(cache);
9269 prune_corrupt_blocks(root->fs_info);
9270 reset_cached_block_groups(root->fs_info);
9273 reset_cached_block_groups(root->fs_info);
9276 * We need to delete any duplicate entries we find first otherwise we
9277 * could mess up the extent tree when we have backrefs that actually
9278 * belong to a different extent item and not the weird duplicate one.
9280 while (repair && !list_empty(&duplicate_extents)) {
9281 rec = to_extent_record(duplicate_extents.next);
9282 list_del_init(&rec->list);
9284 /* Sometimes we can find a backref before we find an actual
9285 * extent, so we need to process it a little bit to see if there
9286 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9287 * if this is a backref screwup. If we need to delete stuff
9288 * process_duplicates() will return 0, otherwise it will return
9289 * 1 and we
9291 if (process_duplicates(extent_cache, rec))
9292 continue;
9293 ret = delete_duplicate_records(root, rec);
9294 if (ret < 0)
9295 return ret;
9297 * delete_duplicate_records will return the number of entries
9298 * deleted, so if it's greater than 0 then we know we actually
9299 * did something and we need to remove.
9301 if (ret)
9302 had_dups = 1;
9305 if (had_dups)
9306 return -EAGAIN;
9308 while(1) {
9309 int cur_err = 0;
9310 int fix = 0;
9312 cache = search_cache_extent(extent_cache, 0);
9313 if (!cache)
9314 break;
9315 rec = container_of(cache, struct extent_record, cache);
9316 if (rec->num_duplicates) {
9317 fprintf(stderr, "extent item %llu has multiple extent "
9318 "items\n", (unsigned long long)rec->start);
9319 cur_err = 1;
9322 if (rec->refs != rec->extent_item_refs) {
9323 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9324 (unsigned long long)rec->start,
9325 (unsigned long long)rec->nr);
9326 fprintf(stderr, "extent item %llu, found %llu\n",
9327 (unsigned long long)rec->extent_item_refs,
9328 (unsigned long long)rec->refs);
9329 ret = record_orphan_data_extents(root->fs_info, rec);
9330 if (ret < 0)
9331 goto repair_abort;
9332 fix = ret;
9333 cur_err = 1;
9335 if (all_backpointers_checked(rec, 1)) {
9336 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9337 (unsigned long long)rec->start,
9338 (unsigned long long)rec->nr);
9339 fix = 1;
9340 cur_err = 1;
9342 if (!rec->owner_ref_checked) {
9343 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9344 (unsigned long long)rec->start,
9345 (unsigned long long)rec->nr);
9346 fix = 1;
9347 cur_err = 1;
9350 if (repair && fix) {
9351 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9352 if (ret)
9353 goto repair_abort;
9357 if (rec->bad_full_backref) {
9358 fprintf(stderr, "bad full backref, on [%llu]\n",
9359 (unsigned long long)rec->start);
9360 if (repair) {
9361 ret = fixup_extent_flags(root->fs_info, rec);
9362 if (ret)
9363 goto repair_abort;
9364 fix = 1;
9366 cur_err = 1;
9369 * Although it's not a extent ref's problem, we reuse this
9370 * routine for error reporting.
9371 * No repair function yet.
9373 if (rec->crossing_stripes) {
9374 fprintf(stderr,
9375 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9376 rec->start, rec->start + rec->max_size);
9377 cur_err = 1;
9380 if (rec->wrong_chunk_type) {
9381 fprintf(stderr,
9382 "bad extent [%llu, %llu), type mismatch with chunk\n",
9383 rec->start, rec->start + rec->max_size);
9384 cur_err = 1;
9387 remove_cache_extent(extent_cache, cache);
9388 free_all_extent_backrefs(rec);
9389 if (!init_extent_tree && repair && (!cur_err || fix))
9390 clear_extent_dirty(root->fs_info->excluded_extents,
9391 rec->start,
9392 rec->start + rec->max_size - 1);
9393 free(rec);
9395 repair_abort:
9396 if (repair) {
9397 if (ret && ret != -EAGAIN) {
9398 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9399 exit(1);
9400 } else if (!ret) {
9401 struct btrfs_trans_handle *trans;
9403 root = root->fs_info->extent_root;
9404 trans = btrfs_start_transaction(root, 1);
9405 if (IS_ERR(trans)) {
9406 ret = PTR_ERR(trans);
9407 goto repair_abort;
9410 btrfs_fix_block_accounting(trans, root);
9411 ret = btrfs_commit_transaction(trans, root);
9412 if (ret)
9413 goto repair_abort;
9415 return ret;
9417 return 0;
9420 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9422 u64 stripe_size;
9424 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9425 stripe_size = length;
9426 stripe_size /= num_stripes;
9427 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9428 stripe_size = length * 2;
9429 stripe_size /= num_stripes;
9430 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9431 stripe_size = length;
9432 stripe_size /= (num_stripes - 1);
9433 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9434 stripe_size = length;
9435 stripe_size /= (num_stripes - 2);
9436 } else {
9437 stripe_size = length;
9439 return stripe_size;
9443 * Check the chunk with its block group/dev list ref:
9444 * Return 0 if all refs seems valid.
9445 * Return 1 if part of refs seems valid, need later check for rebuild ref
9446 * like missing block group and needs to search extent tree to rebuild them.
9447 * Return -1 if essential refs are missing and unable to rebuild.
9449 static int check_chunk_refs(struct chunk_record *chunk_rec,
9450 struct block_group_tree *block_group_cache,
9451 struct device_extent_tree *dev_extent_cache,
9452 int silent)
9454 struct cache_extent *block_group_item;
9455 struct block_group_record *block_group_rec;
9456 struct cache_extent *dev_extent_item;
9457 struct device_extent_record *dev_extent_rec;
9458 u64 devid;
9459 u64 offset;
9460 u64 length;
9461 int metadump_v2 = 0;
9462 int i;
9463 int ret = 0;
9465 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9466 chunk_rec->offset,
9467 chunk_rec->length);
9468 if (block_group_item) {
9469 block_group_rec = container_of(block_group_item,
9470 struct block_group_record,
9471 cache);
9472 if (chunk_rec->length != block_group_rec->offset ||
9473 chunk_rec->offset != block_group_rec->objectid ||
9474 (!metadump_v2 &&
9475 chunk_rec->type_flags != block_group_rec->flags)) {
9476 if (!silent)
9477 fprintf(stderr,
9478 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9479 chunk_rec->objectid,
9480 chunk_rec->type,
9481 chunk_rec->offset,
9482 chunk_rec->length,
9483 chunk_rec->offset,
9484 chunk_rec->type_flags,
9485 block_group_rec->objectid,
9486 block_group_rec->type,
9487 block_group_rec->offset,
9488 block_group_rec->offset,
9489 block_group_rec->objectid,
9490 block_group_rec->flags);
9491 ret = -1;
9492 } else {
9493 list_del_init(&block_group_rec->list);
9494 chunk_rec->bg_rec = block_group_rec;
9496 } else {
9497 if (!silent)
9498 fprintf(stderr,
9499 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9500 chunk_rec->objectid,
9501 chunk_rec->type,
9502 chunk_rec->offset,
9503 chunk_rec->length,
9504 chunk_rec->offset,
9505 chunk_rec->type_flags);
9506 ret = 1;
9509 if (metadump_v2)
9510 return ret;
9512 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9513 chunk_rec->num_stripes);
9514 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9515 devid = chunk_rec->stripes[i].devid;
9516 offset = chunk_rec->stripes[i].offset;
9517 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9518 devid, offset, length);
9519 if (dev_extent_item) {
9520 dev_extent_rec = container_of(dev_extent_item,
9521 struct device_extent_record,
9522 cache);
9523 if (dev_extent_rec->objectid != devid ||
9524 dev_extent_rec->offset != offset ||
9525 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9526 dev_extent_rec->length != length) {
9527 if (!silent)
9528 fprintf(stderr,
9529 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9530 chunk_rec->objectid,
9531 chunk_rec->type,
9532 chunk_rec->offset,
9533 chunk_rec->stripes[i].devid,
9534 chunk_rec->stripes[i].offset,
9535 dev_extent_rec->objectid,
9536 dev_extent_rec->offset,
9537 dev_extent_rec->length);
9538 ret = -1;
9539 } else {
9540 list_move(&dev_extent_rec->chunk_list,
9541 &chunk_rec->dextents);
9543 } else {
9544 if (!silent)
9545 fprintf(stderr,
9546 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9547 chunk_rec->objectid,
9548 chunk_rec->type,
9549 chunk_rec->offset,
9550 chunk_rec->stripes[i].devid,
9551 chunk_rec->stripes[i].offset);
9552 ret = -1;
9555 return ret;
9558 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9559 int check_chunks(struct cache_tree *chunk_cache,
9560 struct block_group_tree *block_group_cache,
9561 struct device_extent_tree *dev_extent_cache,
9562 struct list_head *good, struct list_head *bad,
9563 struct list_head *rebuild, int silent)
9565 struct cache_extent *chunk_item;
9566 struct chunk_record *chunk_rec;
9567 struct block_group_record *bg_rec;
9568 struct device_extent_record *dext_rec;
9569 int err;
9570 int ret = 0;
9572 chunk_item = first_cache_extent(chunk_cache);
9573 while (chunk_item) {
9574 chunk_rec = container_of(chunk_item, struct chunk_record,
9575 cache);
9576 err = check_chunk_refs(chunk_rec, block_group_cache,
9577 dev_extent_cache, silent);
9578 if (err < 0)
9579 ret = err;
9580 if (err == 0 && good)
9581 list_add_tail(&chunk_rec->list, good);
9582 if (err > 0 && rebuild)
9583 list_add_tail(&chunk_rec->list, rebuild);
9584 if (err < 0 && bad)
9585 list_add_tail(&chunk_rec->list, bad);
9586 chunk_item = next_cache_extent(chunk_item);
9589 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9590 if (!silent)
9591 fprintf(stderr,
9592 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9593 bg_rec->objectid,
9594 bg_rec->offset,
9595 bg_rec->flags);
9596 if (!ret)
9597 ret = 1;
9600 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9601 chunk_list) {
9602 if (!silent)
9603 fprintf(stderr,
9604 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9605 dext_rec->objectid,
9606 dext_rec->offset,
9607 dext_rec->length);
9608 if (!ret)
9609 ret = 1;
9611 return ret;
9615 static int check_device_used(struct device_record *dev_rec,
9616 struct device_extent_tree *dext_cache)
9618 struct cache_extent *cache;
9619 struct device_extent_record *dev_extent_rec;
9620 u64 total_byte = 0;
9622 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9623 while (cache) {
9624 dev_extent_rec = container_of(cache,
9625 struct device_extent_record,
9626 cache);
9627 if (dev_extent_rec->objectid != dev_rec->devid)
9628 break;
9630 list_del_init(&dev_extent_rec->device_list);
9631 total_byte += dev_extent_rec->length;
9632 cache = next_cache_extent(cache);
9635 if (total_byte != dev_rec->byte_used) {
9636 fprintf(stderr,
9637 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9638 total_byte, dev_rec->byte_used, dev_rec->objectid,
9639 dev_rec->type, dev_rec->offset);
9640 return -1;
9641 } else {
9642 return 0;
9646 /* check btrfs_dev_item -> btrfs_dev_extent */
9647 static int check_devices(struct rb_root *dev_cache,
9648 struct device_extent_tree *dev_extent_cache)
9650 struct rb_node *dev_node;
9651 struct device_record *dev_rec;
9652 struct device_extent_record *dext_rec;
9653 int err;
9654 int ret = 0;
9656 dev_node = rb_first(dev_cache);
9657 while (dev_node) {
9658 dev_rec = container_of(dev_node, struct device_record, node);
9659 err = check_device_used(dev_rec, dev_extent_cache);
9660 if (err)
9661 ret = err;
9663 dev_node = rb_next(dev_node);
9665 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9666 device_list) {
9667 fprintf(stderr,
9668 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9669 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9670 if (!ret)
9671 ret = 1;
9673 return ret;
9676 static int add_root_item_to_list(struct list_head *head,
9677 u64 objectid, u64 bytenr, u64 last_snapshot,
9678 u8 level, u8 drop_level,
9679 int level_size, struct btrfs_key *drop_key)
9682 struct root_item_record *ri_rec;
9683 ri_rec = malloc(sizeof(*ri_rec));
9684 if (!ri_rec)
9685 return -ENOMEM;
9686 ri_rec->bytenr = bytenr;
9687 ri_rec->objectid = objectid;
9688 ri_rec->level = level;
9689 ri_rec->level_size = level_size;
9690 ri_rec->drop_level = drop_level;
9691 ri_rec->last_snapshot = last_snapshot;
9692 if (drop_key)
9693 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9694 list_add_tail(&ri_rec->list, head);
9696 return 0;
9699 static void free_root_item_list(struct list_head *list)
9701 struct root_item_record *ri_rec;
9703 while (!list_empty(list)) {
9704 ri_rec = list_first_entry(list, struct root_item_record,
9705 list);
9706 list_del_init(&ri_rec->list);
9707 free(ri_rec);
9711 static int deal_root_from_list(struct list_head *list,
9712 struct btrfs_root *root,
9713 struct block_info *bits,
9714 int bits_nr,
9715 struct cache_tree *pending,
9716 struct cache_tree *seen,
9717 struct cache_tree *reada,
9718 struct cache_tree *nodes,
9719 struct cache_tree *extent_cache,
9720 struct cache_tree *chunk_cache,
9721 struct rb_root *dev_cache,
9722 struct block_group_tree *block_group_cache,
9723 struct device_extent_tree *dev_extent_cache)
9725 int ret = 0;
9726 u64 last;
9728 while (!list_empty(list)) {
9729 struct root_item_record *rec;
9730 struct extent_buffer *buf;
9731 rec = list_entry(list->next,
9732 struct root_item_record, list);
9733 last = 0;
9734 buf = read_tree_block(root->fs_info->tree_root,
9735 rec->bytenr, rec->level_size, 0);
9736 if (!extent_buffer_uptodate(buf)) {
9737 free_extent_buffer(buf);
9738 ret = -EIO;
9739 break;
9741 ret = add_root_to_pending(buf, extent_cache, pending,
9742 seen, nodes, rec->objectid);
9743 if (ret < 0)
9744 break;
9746 * To rebuild extent tree, we need deal with snapshot
9747 * one by one, otherwise we deal with node firstly which
9748 * can maximize readahead.
9750 while (1) {
9751 ret = run_next_block(root, bits, bits_nr, &last,
9752 pending, seen, reada, nodes,
9753 extent_cache, chunk_cache,
9754 dev_cache, block_group_cache,
9755 dev_extent_cache, rec);
9756 if (ret != 0)
9757 break;
9759 free_extent_buffer(buf);
9760 list_del(&rec->list);
9761 free(rec);
9762 if (ret < 0)
9763 break;
9765 while (ret >= 0) {
9766 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9767 reada, nodes, extent_cache, chunk_cache,
9768 dev_cache, block_group_cache,
9769 dev_extent_cache, NULL);
9770 if (ret != 0) {
9771 if (ret > 0)
9772 ret = 0;
9773 break;
9776 return ret;
9779 static int check_chunks_and_extents(struct btrfs_root *root)
9781 struct rb_root dev_cache;
9782 struct cache_tree chunk_cache;
9783 struct block_group_tree block_group_cache;
9784 struct device_extent_tree dev_extent_cache;
9785 struct cache_tree extent_cache;
9786 struct cache_tree seen;
9787 struct cache_tree pending;
9788 struct cache_tree reada;
9789 struct cache_tree nodes;
9790 struct extent_io_tree excluded_extents;
9791 struct cache_tree corrupt_blocks;
9792 struct btrfs_path path;
9793 struct btrfs_key key;
9794 struct btrfs_key found_key;
9795 int ret, err = 0;
9796 struct block_info *bits;
9797 int bits_nr;
9798 struct extent_buffer *leaf;
9799 int slot;
9800 struct btrfs_root_item ri;
9801 struct list_head dropping_trees;
9802 struct list_head normal_trees;
9803 struct btrfs_root *root1;
9804 u64 objectid;
9805 u32 level_size;
9806 u8 level;
9808 dev_cache = RB_ROOT;
9809 cache_tree_init(&chunk_cache);
9810 block_group_tree_init(&block_group_cache);
9811 device_extent_tree_init(&dev_extent_cache);
9813 cache_tree_init(&extent_cache);
9814 cache_tree_init(&seen);
9815 cache_tree_init(&pending);
9816 cache_tree_init(&nodes);
9817 cache_tree_init(&reada);
9818 cache_tree_init(&corrupt_blocks);
9819 extent_io_tree_init(&excluded_extents);
9820 INIT_LIST_HEAD(&dropping_trees);
9821 INIT_LIST_HEAD(&normal_trees);
9823 if (repair) {
9824 root->fs_info->excluded_extents = &excluded_extents;
9825 root->fs_info->fsck_extent_cache = &extent_cache;
9826 root->fs_info->free_extent_hook = free_extent_hook;
9827 root->fs_info->corrupt_blocks = &corrupt_blocks;
9830 bits_nr = 1024;
9831 bits = malloc(bits_nr * sizeof(struct block_info));
9832 if (!bits) {
9833 perror("malloc");
9834 exit(1);
9837 if (ctx.progress_enabled) {
9838 ctx.tp = TASK_EXTENTS;
9839 task_start(ctx.info);
9842 again:
9843 root1 = root->fs_info->tree_root;
9844 level = btrfs_header_level(root1->node);
9845 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9846 root1->node->start, 0, level, 0,
9847 root1->nodesize, NULL);
9848 if (ret < 0)
9849 goto out;
9850 root1 = root->fs_info->chunk_root;
9851 level = btrfs_header_level(root1->node);
9852 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9853 root1->node->start, 0, level, 0,
9854 root1->nodesize, NULL);
9855 if (ret < 0)
9856 goto out;
9857 btrfs_init_path(&path);
9858 key.offset = 0;
9859 key.objectid = 0;
9860 key.type = BTRFS_ROOT_ITEM_KEY;
9861 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9862 &key, &path, 0, 0);
9863 if (ret < 0)
9864 goto out;
9865 while(1) {
9866 leaf = path.nodes[0];
9867 slot = path.slots[0];
9868 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9869 ret = btrfs_next_leaf(root, &path);
9870 if (ret != 0)
9871 break;
9872 leaf = path.nodes[0];
9873 slot = path.slots[0];
9875 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9876 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9877 unsigned long offset;
9878 u64 last_snapshot;
9880 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9881 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9882 last_snapshot = btrfs_root_last_snapshot(&ri);
9883 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9884 level = btrfs_root_level(&ri);
9885 level_size = root->nodesize;
9886 ret = add_root_item_to_list(&normal_trees,
9887 found_key.objectid,
9888 btrfs_root_bytenr(&ri),
9889 last_snapshot, level,
9890 0, level_size, NULL);
9891 if (ret < 0)
9892 goto out;
9893 } else {
9894 level = btrfs_root_level(&ri);
9895 level_size = root->nodesize;
9896 objectid = found_key.objectid;
9897 btrfs_disk_key_to_cpu(&found_key,
9898 &ri.drop_progress);
9899 ret = add_root_item_to_list(&dropping_trees,
9900 objectid,
9901 btrfs_root_bytenr(&ri),
9902 last_snapshot, level,
9903 ri.drop_level,
9904 level_size, &found_key);
9905 if (ret < 0)
9906 goto out;
9909 path.slots[0]++;
9911 btrfs_release_path(&path);
9914 * check_block can return -EAGAIN if it fixes something, please keep
9915 * this in mind when dealing with return values from these functions, if
9916 * we get -EAGAIN we want to fall through and restart the loop.
9918 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9919 &seen, &reada, &nodes, &extent_cache,
9920 &chunk_cache, &dev_cache, &block_group_cache,
9921 &dev_extent_cache);
9922 if (ret < 0) {
9923 if (ret == -EAGAIN)
9924 goto loop;
9925 goto out;
9927 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9928 &pending, &seen, &reada, &nodes,
9929 &extent_cache, &chunk_cache, &dev_cache,
9930 &block_group_cache, &dev_extent_cache);
9931 if (ret < 0) {
9932 if (ret == -EAGAIN)
9933 goto loop;
9934 goto out;
9937 ret = check_chunks(&chunk_cache, &block_group_cache,
9938 &dev_extent_cache, NULL, NULL, NULL, 0);
9939 if (ret) {
9940 if (ret == -EAGAIN)
9941 goto loop;
9942 err = ret;
9945 ret = check_extent_refs(root, &extent_cache);
9946 if (ret < 0) {
9947 if (ret == -EAGAIN)
9948 goto loop;
9949 goto out;
9952 ret = check_devices(&dev_cache, &dev_extent_cache);
9953 if (ret && err)
9954 ret = err;
9956 out:
9957 task_stop(ctx.info);
9958 if (repair) {
9959 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9960 extent_io_tree_cleanup(&excluded_extents);
9961 root->fs_info->fsck_extent_cache = NULL;
9962 root->fs_info->free_extent_hook = NULL;
9963 root->fs_info->corrupt_blocks = NULL;
9964 root->fs_info->excluded_extents = NULL;
9966 free(bits);
9967 free_chunk_cache_tree(&chunk_cache);
9968 free_device_cache_tree(&dev_cache);
9969 free_block_group_tree(&block_group_cache);
9970 free_device_extent_tree(&dev_extent_cache);
9971 free_extent_cache_tree(&seen);
9972 free_extent_cache_tree(&pending);
9973 free_extent_cache_tree(&reada);
9974 free_extent_cache_tree(&nodes);
9975 return ret;
9976 loop:
9977 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9978 free_extent_cache_tree(&seen);
9979 free_extent_cache_tree(&pending);
9980 free_extent_cache_tree(&reada);
9981 free_extent_cache_tree(&nodes);
9982 free_chunk_cache_tree(&chunk_cache);
9983 free_block_group_tree(&block_group_cache);
9984 free_device_cache_tree(&dev_cache);
9985 free_device_extent_tree(&dev_extent_cache);
9986 free_extent_record_cache(&extent_cache);
9987 free_root_item_list(&normal_trees);
9988 free_root_item_list(&dropping_trees);
9989 extent_io_tree_cleanup(&excluded_extents);
9990 goto again;
9994 * Check backrefs of a tree block given by @bytenr or @eb.
9996 * @root: the root containing the @bytenr or @eb
9997 * @eb: tree block extent buffer, can be NULL
9998 * @bytenr: bytenr of the tree block to search
9999 * @level: tree level of the tree block
10000 * @owner: owner of the tree block
10002 * Return >0 for any error found and output error message
10003 * Return 0 for no error found
10005 static int check_tree_block_ref(struct btrfs_root *root,
10006 struct extent_buffer *eb, u64 bytenr,
10007 int level, u64 owner)
10009 struct btrfs_key key;
10010 struct btrfs_root *extent_root = root->fs_info->extent_root;
10011 struct btrfs_path path;
10012 struct btrfs_extent_item *ei;
10013 struct btrfs_extent_inline_ref *iref;
10014 struct extent_buffer *leaf;
10015 unsigned long end;
10016 unsigned long ptr;
10017 int slot;
10018 int skinny_level;
10019 int type;
10020 u32 nodesize = root->nodesize;
10021 u32 item_size;
10022 u64 offset;
10023 int tree_reloc_root = 0;
10024 int found_ref = 0;
10025 int err = 0;
10026 int ret;
10028 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10029 btrfs_header_bytenr(root->node) == bytenr)
10030 tree_reloc_root = 1;
10032 btrfs_init_path(&path);
10033 key.objectid = bytenr;
10034 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10035 key.type = BTRFS_METADATA_ITEM_KEY;
10036 else
10037 key.type = BTRFS_EXTENT_ITEM_KEY;
10038 key.offset = (u64)-1;
10040 /* Search for the backref in extent tree */
10041 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10042 if (ret < 0) {
10043 err |= BACKREF_MISSING;
10044 goto out;
10046 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10047 if (ret) {
10048 err |= BACKREF_MISSING;
10049 goto out;
10052 leaf = path.nodes[0];
10053 slot = path.slots[0];
10054 btrfs_item_key_to_cpu(leaf, &key, slot);
10056 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10058 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10059 skinny_level = (int)key.offset;
10060 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10061 } else {
10062 struct btrfs_tree_block_info *info;
10064 info = (struct btrfs_tree_block_info *)(ei + 1);
10065 skinny_level = btrfs_tree_block_level(leaf, info);
10066 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10069 if (eb) {
10070 u64 header_gen;
10071 u64 extent_gen;
10073 if (!(btrfs_extent_flags(leaf, ei) &
10074 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10075 error(
10076 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10077 key.objectid, nodesize,
10078 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10079 err = BACKREF_MISMATCH;
10081 header_gen = btrfs_header_generation(eb);
10082 extent_gen = btrfs_extent_generation(leaf, ei);
10083 if (header_gen != extent_gen) {
10084 error(
10085 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10086 key.objectid, nodesize, header_gen,
10087 extent_gen);
10088 err = BACKREF_MISMATCH;
10090 if (level != skinny_level) {
10091 error(
10092 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10093 key.objectid, nodesize, level, skinny_level);
10094 err = BACKREF_MISMATCH;
10096 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10097 error(
10098 "extent[%llu %u] is referred by other roots than %llu",
10099 key.objectid, nodesize, root->objectid);
10100 err = BACKREF_MISMATCH;
10105 * Iterate the extent/metadata item to find the exact backref
10107 item_size = btrfs_item_size_nr(leaf, slot);
10108 ptr = (unsigned long)iref;
10109 end = (unsigned long)ei + item_size;
10110 while (ptr < end) {
10111 iref = (struct btrfs_extent_inline_ref *)ptr;
10112 type = btrfs_extent_inline_ref_type(leaf, iref);
10113 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10115 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10116 (offset == root->objectid || offset == owner)) {
10117 found_ref = 1;
10118 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10120 * Backref of tree reloc root points to itself, no need
10121 * to check backref any more.
10123 if (tree_reloc_root)
10124 found_ref = 1;
10125 else
10126 /* Check if the backref points to valid referencer */
10127 found_ref = !check_tree_block_ref(root, NULL,
10128 offset, level + 1, owner);
10131 if (found_ref)
10132 break;
10133 ptr += btrfs_extent_inline_ref_size(type);
10137 * Inlined extent item doesn't have what we need, check
10138 * TREE_BLOCK_REF_KEY
10140 if (!found_ref) {
10141 btrfs_release_path(&path);
10142 key.objectid = bytenr;
10143 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10144 key.offset = root->objectid;
10146 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10147 if (!ret)
10148 found_ref = 1;
10150 if (!found_ref)
10151 err |= BACKREF_MISSING;
10152 out:
10153 btrfs_release_path(&path);
10154 if (eb && (err & BACKREF_MISSING))
10155 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10156 bytenr, nodesize, owner, level);
10157 return err;
10161 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10163 * Return >0 any error found and output error message
10164 * Return 0 for no error found
10166 static int check_extent_data_item(struct btrfs_root *root,
10167 struct extent_buffer *eb, int slot)
10169 struct btrfs_file_extent_item *fi;
10170 struct btrfs_path path;
10171 struct btrfs_root *extent_root = root->fs_info->extent_root;
10172 struct btrfs_key fi_key;
10173 struct btrfs_key dbref_key;
10174 struct extent_buffer *leaf;
10175 struct btrfs_extent_item *ei;
10176 struct btrfs_extent_inline_ref *iref;
10177 struct btrfs_extent_data_ref *dref;
10178 u64 owner;
10179 u64 disk_bytenr;
10180 u64 disk_num_bytes;
10181 u64 extent_num_bytes;
10182 u64 extent_flags;
10183 u32 item_size;
10184 unsigned long end;
10185 unsigned long ptr;
10186 int type;
10187 u64 ref_root;
10188 int found_dbackref = 0;
10189 int err = 0;
10190 int ret;
10192 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10193 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10195 /* Nothing to check for hole and inline data extents */
10196 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10197 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10198 return 0;
10200 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10201 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10202 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10204 /* Check unaligned disk_num_bytes and num_bytes */
10205 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10206 error(
10207 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10208 fi_key.objectid, fi_key.offset, disk_num_bytes,
10209 root->sectorsize);
10210 err |= BYTES_UNALIGNED;
10211 } else {
10212 data_bytes_allocated += disk_num_bytes;
10214 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10215 error(
10216 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10217 fi_key.objectid, fi_key.offset, extent_num_bytes,
10218 root->sectorsize);
10219 err |= BYTES_UNALIGNED;
10220 } else {
10221 data_bytes_referenced += extent_num_bytes;
10223 owner = btrfs_header_owner(eb);
10225 /* Check the extent item of the file extent in extent tree */
10226 btrfs_init_path(&path);
10227 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10228 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10229 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10231 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10232 if (ret) {
10233 err |= BACKREF_MISSING;
10234 goto error;
10237 leaf = path.nodes[0];
10238 slot = path.slots[0];
10239 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10241 extent_flags = btrfs_extent_flags(leaf, ei);
10243 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10244 error(
10245 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10246 disk_bytenr, disk_num_bytes,
10247 BTRFS_EXTENT_FLAG_DATA);
10248 err |= BACKREF_MISMATCH;
10251 /* Check data backref inside that extent item */
10252 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10253 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10254 ptr = (unsigned long)iref;
10255 end = (unsigned long)ei + item_size;
10256 while (ptr < end) {
10257 iref = (struct btrfs_extent_inline_ref *)ptr;
10258 type = btrfs_extent_inline_ref_type(leaf, iref);
10259 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10261 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10262 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10263 if (ref_root == owner || ref_root == root->objectid)
10264 found_dbackref = 1;
10265 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10266 found_dbackref = !check_tree_block_ref(root, NULL,
10267 btrfs_extent_inline_ref_offset(leaf, iref),
10268 0, owner);
10271 if (found_dbackref)
10272 break;
10273 ptr += btrfs_extent_inline_ref_size(type);
10276 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10277 if (!found_dbackref) {
10278 btrfs_release_path(&path);
10280 btrfs_init_path(&path);
10281 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10282 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10283 dbref_key.offset = hash_extent_data_ref(root->objectid,
10284 fi_key.objectid, fi_key.offset);
10286 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10287 &dbref_key, &path, 0, 0);
10288 if (!ret)
10289 found_dbackref = 1;
10292 if (!found_dbackref)
10293 err |= BACKREF_MISSING;
10294 error:
10295 btrfs_release_path(&path);
10296 if (err & BACKREF_MISSING) {
10297 error("data extent[%llu %llu] backref lost",
10298 disk_bytenr, disk_num_bytes);
10300 return err;
10304 * Get real tree block level for the case like shared block
10305 * Return >= 0 as tree level
10306 * Return <0 for error
10308 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10310 struct extent_buffer *eb;
10311 struct btrfs_path path;
10312 struct btrfs_key key;
10313 struct btrfs_extent_item *ei;
10314 u64 flags;
10315 u64 transid;
10316 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10317 u8 backref_level;
10318 u8 header_level;
10319 int ret;
10321 /* Search extent tree for extent generation and level */
10322 key.objectid = bytenr;
10323 key.type = BTRFS_METADATA_ITEM_KEY;
10324 key.offset = (u64)-1;
10326 btrfs_init_path(&path);
10327 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10328 if (ret < 0)
10329 goto release_out;
10330 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10331 if (ret < 0)
10332 goto release_out;
10333 if (ret > 0) {
10334 ret = -ENOENT;
10335 goto release_out;
10338 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10339 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10340 struct btrfs_extent_item);
10341 flags = btrfs_extent_flags(path.nodes[0], ei);
10342 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10343 ret = -ENOENT;
10344 goto release_out;
10347 /* Get transid for later read_tree_block() check */
10348 transid = btrfs_extent_generation(path.nodes[0], ei);
10350 /* Get backref level as one source */
10351 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10352 backref_level = key.offset;
10353 } else {
10354 struct btrfs_tree_block_info *info;
10356 info = (struct btrfs_tree_block_info *)(ei + 1);
10357 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10359 btrfs_release_path(&path);
10361 /* Get level from tree block as an alternative source */
10362 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10363 if (!extent_buffer_uptodate(eb)) {
10364 free_extent_buffer(eb);
10365 return -EIO;
10367 header_level = btrfs_header_level(eb);
10368 free_extent_buffer(eb);
10370 if (header_level != backref_level)
10371 return -EIO;
10372 return header_level;
10374 release_out:
10375 btrfs_release_path(&path);
10376 return ret;
10380 * Check if a tree block backref is valid (points to a valid tree block)
10381 * if level == -1, level will be resolved
10382 * Return >0 for any error found and print error message
10384 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10385 u64 bytenr, int level)
10387 struct btrfs_root *root;
10388 struct btrfs_key key;
10389 struct btrfs_path path;
10390 struct extent_buffer *eb;
10391 struct extent_buffer *node;
10392 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10393 int err = 0;
10394 int ret;
10396 /* Query level for level == -1 special case */
10397 if (level == -1)
10398 level = query_tree_block_level(fs_info, bytenr);
10399 if (level < 0) {
10400 err |= REFERENCER_MISSING;
10401 goto out;
10404 key.objectid = root_id;
10405 key.type = BTRFS_ROOT_ITEM_KEY;
10406 key.offset = (u64)-1;
10408 root = btrfs_read_fs_root(fs_info, &key);
10409 if (IS_ERR(root)) {
10410 err |= REFERENCER_MISSING;
10411 goto out;
10414 /* Read out the tree block to get item/node key */
10415 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10416 if (!extent_buffer_uptodate(eb)) {
10417 err |= REFERENCER_MISSING;
10418 free_extent_buffer(eb);
10419 goto out;
10422 /* Empty tree, no need to check key */
10423 if (!btrfs_header_nritems(eb) && !level) {
10424 free_extent_buffer(eb);
10425 goto out;
10428 if (level)
10429 btrfs_node_key_to_cpu(eb, &key, 0);
10430 else
10431 btrfs_item_key_to_cpu(eb, &key, 0);
10433 free_extent_buffer(eb);
10435 btrfs_init_path(&path);
10436 path.lowest_level = level;
10437 /* Search with the first key, to ensure we can reach it */
10438 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10439 if (ret < 0) {
10440 err |= REFERENCER_MISSING;
10441 goto release_out;
10444 node = path.nodes[level];
10445 if (btrfs_header_bytenr(node) != bytenr) {
10446 error(
10447 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10448 bytenr, nodesize, bytenr,
10449 btrfs_header_bytenr(node));
10450 err |= REFERENCER_MISMATCH;
10452 if (btrfs_header_level(node) != level) {
10453 error(
10454 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10455 bytenr, nodesize, level,
10456 btrfs_header_level(node));
10457 err |= REFERENCER_MISMATCH;
10460 release_out:
10461 btrfs_release_path(&path);
10462 out:
10463 if (err & REFERENCER_MISSING) {
10464 if (level < 0)
10465 error("extent [%llu %d] lost referencer (owner: %llu)",
10466 bytenr, nodesize, root_id);
10467 else
10468 error(
10469 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10470 bytenr, nodesize, root_id, level);
10473 return err;
10477 * Check if tree block @eb is tree reloc root.
10478 * Return 0 if it's not or any problem happens
10479 * Return 1 if it's a tree reloc root
10481 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10482 struct extent_buffer *eb)
10484 struct btrfs_root *tree_reloc_root;
10485 struct btrfs_key key;
10486 u64 bytenr = btrfs_header_bytenr(eb);
10487 u64 owner = btrfs_header_owner(eb);
10488 int ret = 0;
10490 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10491 key.offset = owner;
10492 key.type = BTRFS_ROOT_ITEM_KEY;
10494 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10495 if (IS_ERR(tree_reloc_root))
10496 return 0;
10498 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10499 ret = 1;
10500 btrfs_free_fs_root(tree_reloc_root);
10501 return ret;
10505 * Check referencer for shared block backref
10506 * If level == -1, this function will resolve the level.
10508 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10509 u64 parent, u64 bytenr, int level)
10511 struct extent_buffer *eb;
10512 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10513 u32 nr;
10514 int found_parent = 0;
10515 int i;
10517 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10518 if (!extent_buffer_uptodate(eb))
10519 goto out;
10521 if (level == -1)
10522 level = query_tree_block_level(fs_info, bytenr);
10523 if (level < 0)
10524 goto out;
10526 /* It's possible it's a tree reloc root */
10527 if (parent == bytenr) {
10528 if (is_tree_reloc_root(fs_info, eb))
10529 found_parent = 1;
10530 goto out;
10533 if (level + 1 != btrfs_header_level(eb))
10534 goto out;
10536 nr = btrfs_header_nritems(eb);
10537 for (i = 0; i < nr; i++) {
10538 if (bytenr == btrfs_node_blockptr(eb, i)) {
10539 found_parent = 1;
10540 break;
10543 out:
10544 free_extent_buffer(eb);
10545 if (!found_parent) {
10546 error(
10547 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10548 bytenr, nodesize, parent, level);
10549 return REFERENCER_MISSING;
10551 return 0;
10555 * Check referencer for normal (inlined) data ref
10556 * If len == 0, it will be resolved by searching in extent tree
10558 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10559 u64 root_id, u64 objectid, u64 offset,
10560 u64 bytenr, u64 len, u32 count)
10562 struct btrfs_root *root;
10563 struct btrfs_root *extent_root = fs_info->extent_root;
10564 struct btrfs_key key;
10565 struct btrfs_path path;
10566 struct extent_buffer *leaf;
10567 struct btrfs_file_extent_item *fi;
10568 u32 found_count = 0;
10569 int slot;
10570 int ret = 0;
10572 if (!len) {
10573 key.objectid = bytenr;
10574 key.type = BTRFS_EXTENT_ITEM_KEY;
10575 key.offset = (u64)-1;
10577 btrfs_init_path(&path);
10578 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10579 if (ret < 0)
10580 goto out;
10581 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10582 if (ret)
10583 goto out;
10584 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10585 if (key.objectid != bytenr ||
10586 key.type != BTRFS_EXTENT_ITEM_KEY)
10587 goto out;
10588 len = key.offset;
10589 btrfs_release_path(&path);
10591 key.objectid = root_id;
10592 key.type = BTRFS_ROOT_ITEM_KEY;
10593 key.offset = (u64)-1;
10594 btrfs_init_path(&path);
10596 root = btrfs_read_fs_root(fs_info, &key);
10597 if (IS_ERR(root))
10598 goto out;
10600 key.objectid = objectid;
10601 key.type = BTRFS_EXTENT_DATA_KEY;
10603 * It can be nasty as data backref offset is
10604 * file offset - file extent offset, which is smaller or
10605 * equal to original backref offset. The only special case is
10606 * overflow. So we need to special check and do further search.
10608 key.offset = offset & (1ULL << 63) ? 0 : offset;
10610 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10611 if (ret < 0)
10612 goto out;
10615 * Search afterwards to get correct one
10616 * NOTE: As we must do a comprehensive check on the data backref to
10617 * make sure the dref count also matches, we must iterate all file
10618 * extents for that inode.
10620 while (1) {
10621 leaf = path.nodes[0];
10622 slot = path.slots[0];
10624 if (slot >= btrfs_header_nritems(leaf))
10625 goto next;
10626 btrfs_item_key_to_cpu(leaf, &key, slot);
10627 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10628 break;
10629 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10631 * Except normal disk bytenr and disk num bytes, we still
10632 * need to do extra check on dbackref offset as
10633 * dbackref offset = file_offset - file_extent_offset
10635 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10636 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10637 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10638 offset)
10639 found_count++;
10641 next:
10642 ret = btrfs_next_item(root, &path);
10643 if (ret)
10644 break;
10646 out:
10647 btrfs_release_path(&path);
10648 if (found_count != count) {
10649 error(
10650 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10651 bytenr, len, root_id, objectid, offset, count, found_count);
10652 return REFERENCER_MISSING;
10654 return 0;
10658 * Check if the referencer of a shared data backref exists
10660 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10661 u64 parent, u64 bytenr)
10663 struct extent_buffer *eb;
10664 struct btrfs_key key;
10665 struct btrfs_file_extent_item *fi;
10666 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10667 u32 nr;
10668 int found_parent = 0;
10669 int i;
10671 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10672 if (!extent_buffer_uptodate(eb))
10673 goto out;
10675 nr = btrfs_header_nritems(eb);
10676 for (i = 0; i < nr; i++) {
10677 btrfs_item_key_to_cpu(eb, &key, i);
10678 if (key.type != BTRFS_EXTENT_DATA_KEY)
10679 continue;
10681 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10682 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10683 continue;
10685 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10686 found_parent = 1;
10687 break;
10691 out:
10692 free_extent_buffer(eb);
10693 if (!found_parent) {
10694 error("shared extent %llu referencer lost (parent: %llu)",
10695 bytenr, parent);
10696 return REFERENCER_MISSING;
10698 return 0;
10702 * This function will check a given extent item, including its backref and
10703 * itself (like crossing stripe boundary and type)
10705 * Since we don't use extent_record anymore, introduce new error bit
10707 static int check_extent_item(struct btrfs_fs_info *fs_info,
10708 struct extent_buffer *eb, int slot)
10710 struct btrfs_extent_item *ei;
10711 struct btrfs_extent_inline_ref *iref;
10712 struct btrfs_extent_data_ref *dref;
10713 unsigned long end;
10714 unsigned long ptr;
10715 int type;
10716 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10717 u32 item_size = btrfs_item_size_nr(eb, slot);
10718 u64 flags;
10719 u64 offset;
10720 int metadata = 0;
10721 int level;
10722 struct btrfs_key key;
10723 int ret;
10724 int err = 0;
10726 btrfs_item_key_to_cpu(eb, &key, slot);
10727 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10728 bytes_used += key.offset;
10729 else
10730 bytes_used += nodesize;
10732 if (item_size < sizeof(*ei)) {
10734 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10735 * old thing when on disk format is still un-determined.
10736 * No need to care about it anymore
10738 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10739 return -ENOTTY;
10742 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10743 flags = btrfs_extent_flags(eb, ei);
10745 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10746 metadata = 1;
10747 if (metadata && check_crossing_stripes(global_info, key.objectid,
10748 eb->len)) {
10749 error("bad metadata [%llu, %llu) crossing stripe boundary",
10750 key.objectid, key.objectid + nodesize);
10751 err |= CROSSING_STRIPE_BOUNDARY;
10754 ptr = (unsigned long)(ei + 1);
10756 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10757 /* Old EXTENT_ITEM metadata */
10758 struct btrfs_tree_block_info *info;
10760 info = (struct btrfs_tree_block_info *)ptr;
10761 level = btrfs_tree_block_level(eb, info);
10762 ptr += sizeof(struct btrfs_tree_block_info);
10763 } else {
10764 /* New METADATA_ITEM */
10765 level = key.offset;
10767 end = (unsigned long)ei + item_size;
10769 next:
10770 /* Reached extent item end normally */
10771 if (ptr == end)
10772 goto out;
10774 /* Beyond extent item end, wrong item size */
10775 if (ptr > end) {
10776 err |= ITEM_SIZE_MISMATCH;
10777 error("extent item at bytenr %llu slot %d has wrong size",
10778 eb->start, slot);
10779 goto out;
10782 /* Now check every backref in this extent item */
10783 iref = (struct btrfs_extent_inline_ref *)ptr;
10784 type = btrfs_extent_inline_ref_type(eb, iref);
10785 offset = btrfs_extent_inline_ref_offset(eb, iref);
10786 switch (type) {
10787 case BTRFS_TREE_BLOCK_REF_KEY:
10788 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10789 level);
10790 err |= ret;
10791 break;
10792 case BTRFS_SHARED_BLOCK_REF_KEY:
10793 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10794 level);
10795 err |= ret;
10796 break;
10797 case BTRFS_EXTENT_DATA_REF_KEY:
10798 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10799 ret = check_extent_data_backref(fs_info,
10800 btrfs_extent_data_ref_root(eb, dref),
10801 btrfs_extent_data_ref_objectid(eb, dref),
10802 btrfs_extent_data_ref_offset(eb, dref),
10803 key.objectid, key.offset,
10804 btrfs_extent_data_ref_count(eb, dref));
10805 err |= ret;
10806 break;
10807 case BTRFS_SHARED_DATA_REF_KEY:
10808 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10809 err |= ret;
10810 break;
10811 default:
10812 error("extent[%llu %d %llu] has unknown ref type: %d",
10813 key.objectid, key.type, key.offset, type);
10814 err |= UNKNOWN_TYPE;
10815 goto out;
10818 ptr += btrfs_extent_inline_ref_size(type);
10819 goto next;
10821 out:
10822 return err;
10826 * Check if a dev extent item is referred correctly by its chunk
10828 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10829 struct extent_buffer *eb, int slot)
10831 struct btrfs_root *chunk_root = fs_info->chunk_root;
10832 struct btrfs_dev_extent *ptr;
10833 struct btrfs_path path;
10834 struct btrfs_key chunk_key;
10835 struct btrfs_key devext_key;
10836 struct btrfs_chunk *chunk;
10837 struct extent_buffer *l;
10838 int num_stripes;
10839 u64 length;
10840 int i;
10841 int found_chunk = 0;
10842 int ret;
10844 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10845 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10846 length = btrfs_dev_extent_length(eb, ptr);
10848 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10849 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10850 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10852 btrfs_init_path(&path);
10853 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10854 if (ret)
10855 goto out;
10857 l = path.nodes[0];
10858 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10859 if (btrfs_chunk_length(l, chunk) != length)
10860 goto out;
10862 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10863 for (i = 0; i < num_stripes; i++) {
10864 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10865 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10867 if (devid == devext_key.objectid &&
10868 offset == devext_key.offset) {
10869 found_chunk = 1;
10870 break;
10873 out:
10874 btrfs_release_path(&path);
10875 if (!found_chunk) {
10876 error(
10877 "device extent[%llu, %llu, %llu] did not find the related chunk",
10878 devext_key.objectid, devext_key.offset, length);
10879 return REFERENCER_MISSING;
10881 return 0;
10885 * Check if the used space is correct with the dev item
10887 static int check_dev_item(struct btrfs_fs_info *fs_info,
10888 struct extent_buffer *eb, int slot)
10890 struct btrfs_root *dev_root = fs_info->dev_root;
10891 struct btrfs_dev_item *dev_item;
10892 struct btrfs_path path;
10893 struct btrfs_key key;
10894 struct btrfs_dev_extent *ptr;
10895 u64 dev_id;
10896 u64 used;
10897 u64 total = 0;
10898 int ret;
10900 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10901 dev_id = btrfs_device_id(eb, dev_item);
10902 used = btrfs_device_bytes_used(eb, dev_item);
10904 key.objectid = dev_id;
10905 key.type = BTRFS_DEV_EXTENT_KEY;
10906 key.offset = 0;
10908 btrfs_init_path(&path);
10909 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10910 if (ret < 0) {
10911 btrfs_item_key_to_cpu(eb, &key, slot);
10912 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10913 key.objectid, key.type, key.offset);
10914 btrfs_release_path(&path);
10915 return REFERENCER_MISSING;
10918 /* Iterate dev_extents to calculate the used space of a device */
10919 while (1) {
10920 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10921 goto next;
10923 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10924 if (key.objectid > dev_id)
10925 break;
10926 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10927 goto next;
10929 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10930 struct btrfs_dev_extent);
10931 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10932 next:
10933 ret = btrfs_next_item(dev_root, &path);
10934 if (ret)
10935 break;
10937 btrfs_release_path(&path);
10939 if (used != total) {
10940 btrfs_item_key_to_cpu(eb, &key, slot);
10941 error(
10942 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10943 total, used, BTRFS_ROOT_TREE_OBJECTID,
10944 BTRFS_DEV_EXTENT_KEY, dev_id);
10945 return ACCOUNTING_MISMATCH;
10947 return 0;
10951 * Check a block group item with its referener (chunk) and its used space
10952 * with extent/metadata item
10954 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10955 struct extent_buffer *eb, int slot)
10957 struct btrfs_root *extent_root = fs_info->extent_root;
10958 struct btrfs_root *chunk_root = fs_info->chunk_root;
10959 struct btrfs_block_group_item *bi;
10960 struct btrfs_block_group_item bg_item;
10961 struct btrfs_path path;
10962 struct btrfs_key bg_key;
10963 struct btrfs_key chunk_key;
10964 struct btrfs_key extent_key;
10965 struct btrfs_chunk *chunk;
10966 struct extent_buffer *leaf;
10967 struct btrfs_extent_item *ei;
10968 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10969 u64 flags;
10970 u64 bg_flags;
10971 u64 used;
10972 u64 total = 0;
10973 int ret;
10974 int err = 0;
10976 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10977 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10978 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10979 used = btrfs_block_group_used(&bg_item);
10980 bg_flags = btrfs_block_group_flags(&bg_item);
10982 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10983 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10984 chunk_key.offset = bg_key.objectid;
10986 btrfs_init_path(&path);
10987 /* Search for the referencer chunk */
10988 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10989 if (ret) {
10990 error(
10991 "block group[%llu %llu] did not find the related chunk item",
10992 bg_key.objectid, bg_key.offset);
10993 err |= REFERENCER_MISSING;
10994 } else {
10995 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10996 struct btrfs_chunk);
10997 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10998 bg_key.offset) {
10999 error(
11000 "block group[%llu %llu] related chunk item length does not match",
11001 bg_key.objectid, bg_key.offset);
11002 err |= REFERENCER_MISMATCH;
11005 btrfs_release_path(&path);
11007 /* Search from the block group bytenr */
11008 extent_key.objectid = bg_key.objectid;
11009 extent_key.type = 0;
11010 extent_key.offset = 0;
11012 btrfs_init_path(&path);
11013 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11014 if (ret < 0)
11015 goto out;
11017 /* Iterate extent tree to account used space */
11018 while (1) {
11019 leaf = path.nodes[0];
11021 /* Search slot can point to the last item beyond leaf nritems */
11022 if (path.slots[0] >= btrfs_header_nritems(leaf))
11023 goto next;
11025 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11026 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11027 break;
11029 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11030 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11031 goto next;
11032 if (extent_key.objectid < bg_key.objectid)
11033 goto next;
11035 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11036 total += nodesize;
11037 else
11038 total += extent_key.offset;
11040 ei = btrfs_item_ptr(leaf, path.slots[0],
11041 struct btrfs_extent_item);
11042 flags = btrfs_extent_flags(leaf, ei);
11043 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11044 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11045 error(
11046 "bad extent[%llu, %llu) type mismatch with chunk",
11047 extent_key.objectid,
11048 extent_key.objectid + extent_key.offset);
11049 err |= CHUNK_TYPE_MISMATCH;
11051 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11052 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11053 BTRFS_BLOCK_GROUP_METADATA))) {
11054 error(
11055 "bad extent[%llu, %llu) type mismatch with chunk",
11056 extent_key.objectid,
11057 extent_key.objectid + nodesize);
11058 err |= CHUNK_TYPE_MISMATCH;
11061 next:
11062 ret = btrfs_next_item(extent_root, &path);
11063 if (ret)
11064 break;
11067 out:
11068 btrfs_release_path(&path);
11070 if (total != used) {
11071 error(
11072 "block group[%llu %llu] used %llu but extent items used %llu",
11073 bg_key.objectid, bg_key.offset, used, total);
11074 err |= ACCOUNTING_MISMATCH;
11076 return err;
11080 * Check a chunk item.
11081 * Including checking all referred dev_extents and block group
11083 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11084 struct extent_buffer *eb, int slot)
11086 struct btrfs_root *extent_root = fs_info->extent_root;
11087 struct btrfs_root *dev_root = fs_info->dev_root;
11088 struct btrfs_path path;
11089 struct btrfs_key chunk_key;
11090 struct btrfs_key bg_key;
11091 struct btrfs_key devext_key;
11092 struct btrfs_chunk *chunk;
11093 struct extent_buffer *leaf;
11094 struct btrfs_block_group_item *bi;
11095 struct btrfs_block_group_item bg_item;
11096 struct btrfs_dev_extent *ptr;
11097 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11098 u64 length;
11099 u64 chunk_end;
11100 u64 type;
11101 u64 profile;
11102 int num_stripes;
11103 u64 offset;
11104 u64 objectid;
11105 int i;
11106 int ret;
11107 int err = 0;
11109 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11110 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11111 length = btrfs_chunk_length(eb, chunk);
11112 chunk_end = chunk_key.offset + length;
11113 if (!IS_ALIGNED(length, sectorsize)) {
11114 error("chunk[%llu %llu) not aligned to %u",
11115 chunk_key.offset, chunk_end, sectorsize);
11116 err |= BYTES_UNALIGNED;
11117 goto out;
11120 type = btrfs_chunk_type(eb, chunk);
11121 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11122 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11123 error("chunk[%llu %llu) has no chunk type",
11124 chunk_key.offset, chunk_end);
11125 err |= UNKNOWN_TYPE;
11127 if (profile && (profile & (profile - 1))) {
11128 error("chunk[%llu %llu) multiple profiles detected: %llx",
11129 chunk_key.offset, chunk_end, profile);
11130 err |= UNKNOWN_TYPE;
11133 bg_key.objectid = chunk_key.offset;
11134 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11135 bg_key.offset = length;
11137 btrfs_init_path(&path);
11138 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11139 if (ret) {
11140 error(
11141 "chunk[%llu %llu) did not find the related block group item",
11142 chunk_key.offset, chunk_end);
11143 err |= REFERENCER_MISSING;
11144 } else{
11145 leaf = path.nodes[0];
11146 bi = btrfs_item_ptr(leaf, path.slots[0],
11147 struct btrfs_block_group_item);
11148 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11149 sizeof(bg_item));
11150 if (btrfs_block_group_flags(&bg_item) != type) {
11151 error(
11152 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11153 chunk_key.offset, chunk_end, type,
11154 btrfs_block_group_flags(&bg_item));
11155 err |= REFERENCER_MISSING;
11159 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11160 for (i = 0; i < num_stripes; i++) {
11161 btrfs_release_path(&path);
11162 btrfs_init_path(&path);
11163 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11164 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11165 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11167 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11168 0, 0);
11169 if (ret)
11170 goto not_match_dev;
11172 leaf = path.nodes[0];
11173 ptr = btrfs_item_ptr(leaf, path.slots[0],
11174 struct btrfs_dev_extent);
11175 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11176 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11177 if (objectid != chunk_key.objectid ||
11178 offset != chunk_key.offset ||
11179 btrfs_dev_extent_length(leaf, ptr) != length)
11180 goto not_match_dev;
11181 continue;
11182 not_match_dev:
11183 err |= BACKREF_MISSING;
11184 error(
11185 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11186 chunk_key.objectid, chunk_end, i);
11187 continue;
11189 btrfs_release_path(&path);
11190 out:
11191 return err;
11195 * Main entry function to check known items and update related accounting info
11197 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11199 struct btrfs_fs_info *fs_info = root->fs_info;
11200 struct btrfs_key key;
11201 int slot = 0;
11202 int type;
11203 struct btrfs_extent_data_ref *dref;
11204 int ret;
11205 int err = 0;
11207 next:
11208 btrfs_item_key_to_cpu(eb, &key, slot);
11209 type = key.type;
11211 switch (type) {
11212 case BTRFS_EXTENT_DATA_KEY:
11213 ret = check_extent_data_item(root, eb, slot);
11214 err |= ret;
11215 break;
11216 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11217 ret = check_block_group_item(fs_info, eb, slot);
11218 err |= ret;
11219 break;
11220 case BTRFS_DEV_ITEM_KEY:
11221 ret = check_dev_item(fs_info, eb, slot);
11222 err |= ret;
11223 break;
11224 case BTRFS_CHUNK_ITEM_KEY:
11225 ret = check_chunk_item(fs_info, eb, slot);
11226 err |= ret;
11227 break;
11228 case BTRFS_DEV_EXTENT_KEY:
11229 ret = check_dev_extent_item(fs_info, eb, slot);
11230 err |= ret;
11231 break;
11232 case BTRFS_EXTENT_ITEM_KEY:
11233 case BTRFS_METADATA_ITEM_KEY:
11234 ret = check_extent_item(fs_info, eb, slot);
11235 err |= ret;
11236 break;
11237 case BTRFS_EXTENT_CSUM_KEY:
11238 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11239 break;
11240 case BTRFS_TREE_BLOCK_REF_KEY:
11241 ret = check_tree_block_backref(fs_info, key.offset,
11242 key.objectid, -1);
11243 err |= ret;
11244 break;
11245 case BTRFS_EXTENT_DATA_REF_KEY:
11246 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11247 ret = check_extent_data_backref(fs_info,
11248 btrfs_extent_data_ref_root(eb, dref),
11249 btrfs_extent_data_ref_objectid(eb, dref),
11250 btrfs_extent_data_ref_offset(eb, dref),
11251 key.objectid, 0,
11252 btrfs_extent_data_ref_count(eb, dref));
11253 err |= ret;
11254 break;
11255 case BTRFS_SHARED_BLOCK_REF_KEY:
11256 ret = check_shared_block_backref(fs_info, key.offset,
11257 key.objectid, -1);
11258 err |= ret;
11259 break;
11260 case BTRFS_SHARED_DATA_REF_KEY:
11261 ret = check_shared_data_backref(fs_info, key.offset,
11262 key.objectid);
11263 err |= ret;
11264 break;
11265 default:
11266 break;
11269 if (++slot < btrfs_header_nritems(eb))
11270 goto next;
11272 return err;
11276 * Helper function for later fs/subvol tree check. To determine if a tree
11277 * block should be checked.
11278 * This function will ensure only the direct referencer with lowest rootid to
11279 * check a fs/subvolume tree block.
11281 * Backref check at extent tree would detect errors like missing subvolume
11282 * tree, so we can do aggressive check to reduce duplicated checks.
11284 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11286 struct btrfs_root *extent_root = root->fs_info->extent_root;
11287 struct btrfs_key key;
11288 struct btrfs_path path;
11289 struct extent_buffer *leaf;
11290 int slot;
11291 struct btrfs_extent_item *ei;
11292 unsigned long ptr;
11293 unsigned long end;
11294 int type;
11295 u32 item_size;
11296 u64 offset;
11297 struct btrfs_extent_inline_ref *iref;
11298 int ret;
11300 btrfs_init_path(&path);
11301 key.objectid = btrfs_header_bytenr(eb);
11302 key.type = BTRFS_METADATA_ITEM_KEY;
11303 key.offset = (u64)-1;
11306 * Any failure in backref resolving means we can't determine
11307 * whom the tree block belongs to.
11308 * So in that case, we need to check that tree block
11310 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11311 if (ret < 0)
11312 goto need_check;
11314 ret = btrfs_previous_extent_item(extent_root, &path,
11315 btrfs_header_bytenr(eb));
11316 if (ret)
11317 goto need_check;
11319 leaf = path.nodes[0];
11320 slot = path.slots[0];
11321 btrfs_item_key_to_cpu(leaf, &key, slot);
11322 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11324 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11325 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11326 } else {
11327 struct btrfs_tree_block_info *info;
11329 info = (struct btrfs_tree_block_info *)(ei + 1);
11330 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11333 item_size = btrfs_item_size_nr(leaf, slot);
11334 ptr = (unsigned long)iref;
11335 end = (unsigned long)ei + item_size;
11336 while (ptr < end) {
11337 iref = (struct btrfs_extent_inline_ref *)ptr;
11338 type = btrfs_extent_inline_ref_type(leaf, iref);
11339 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11342 * We only check the tree block if current root is
11343 * the lowest referencer of it.
11345 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11346 offset < root->objectid) {
11347 btrfs_release_path(&path);
11348 return 0;
11351 ptr += btrfs_extent_inline_ref_size(type);
11354 * Normally we should also check keyed tree block ref, but that may be
11355 * very time consuming. Inlined ref should already make us skip a lot
11356 * of refs now. So skip search keyed tree block ref.
11359 need_check:
11360 btrfs_release_path(&path);
11361 return 1;
11365 * Traversal function for tree block. We will do:
11366 * 1) Skip shared fs/subvolume tree blocks
11367 * 2) Update related bytes accounting
11368 * 3) Pre-order traversal
11370 static int traverse_tree_block(struct btrfs_root *root,
11371 struct extent_buffer *node)
11373 struct extent_buffer *eb;
11374 struct btrfs_key key;
11375 struct btrfs_key drop_key;
11376 int level;
11377 u64 nr;
11378 int i;
11379 int err = 0;
11380 int ret;
11383 * Skip shared fs/subvolume tree block, in that case they will
11384 * be checked by referencer with lowest rootid
11386 if (is_fstree(root->objectid) && !should_check(root, node))
11387 return 0;
11389 /* Update bytes accounting */
11390 total_btree_bytes += node->len;
11391 if (fs_root_objectid(btrfs_header_owner(node)))
11392 total_fs_tree_bytes += node->len;
11393 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11394 total_extent_tree_bytes += node->len;
11395 if (!found_old_backref &&
11396 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11397 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11398 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11399 found_old_backref = 1;
11401 /* pre-order tranversal, check itself first */
11402 level = btrfs_header_level(node);
11403 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11404 btrfs_header_level(node),
11405 btrfs_header_owner(node));
11406 err |= ret;
11407 if (err)
11408 error(
11409 "check %s failed root %llu bytenr %llu level %d, force continue check",
11410 level ? "node":"leaf", root->objectid,
11411 btrfs_header_bytenr(node), btrfs_header_level(node));
11413 if (!level) {
11414 btree_space_waste += btrfs_leaf_free_space(root, node);
11415 ret = check_leaf_items(root, node);
11416 err |= ret;
11417 return err;
11420 nr = btrfs_header_nritems(node);
11421 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11422 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11423 sizeof(struct btrfs_key_ptr);
11425 /* Then check all its children */
11426 for (i = 0; i < nr; i++) {
11427 u64 blocknr = btrfs_node_blockptr(node, i);
11429 btrfs_node_key_to_cpu(node, &key, i);
11430 if (level == root->root_item.drop_level &&
11431 is_dropped_key(&key, &drop_key))
11432 continue;
11435 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11436 * to call the function itself.
11438 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11439 if (extent_buffer_uptodate(eb)) {
11440 ret = traverse_tree_block(root, eb);
11441 err |= ret;
11443 free_extent_buffer(eb);
11446 return err;
11450 * Low memory usage version check_chunks_and_extents.
11452 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11454 struct btrfs_path path;
11455 struct btrfs_key key;
11456 struct btrfs_root *root1;
11457 struct btrfs_root *cur_root;
11458 int err = 0;
11459 int ret;
11461 root1 = root->fs_info->chunk_root;
11462 ret = traverse_tree_block(root1, root1->node);
11463 err |= ret;
11465 root1 = root->fs_info->tree_root;
11466 ret = traverse_tree_block(root1, root1->node);
11467 err |= ret;
11469 btrfs_init_path(&path);
11470 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11471 key.offset = 0;
11472 key.type = BTRFS_ROOT_ITEM_KEY;
11474 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11475 if (ret) {
11476 error("cannot find extent treet in tree_root");
11477 goto out;
11480 while (1) {
11481 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11482 if (key.type != BTRFS_ROOT_ITEM_KEY)
11483 goto next;
11484 key.offset = (u64)-1;
11486 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11487 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11488 &key);
11489 else
11490 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11491 if (IS_ERR(cur_root) || !cur_root) {
11492 error("failed to read tree: %lld", key.objectid);
11493 goto next;
11496 ret = traverse_tree_block(cur_root, cur_root->node);
11497 err |= ret;
11499 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11500 btrfs_free_fs_root(cur_root);
11501 next:
11502 ret = btrfs_next_item(root1, &path);
11503 if (ret)
11504 goto out;
11507 out:
11508 btrfs_release_path(&path);
11509 return err;
11512 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11513 struct btrfs_root *root, int overwrite)
11515 struct extent_buffer *c;
11516 struct extent_buffer *old = root->node;
11517 int level;
11518 int ret;
11519 struct btrfs_disk_key disk_key = {0,0,0};
11521 level = 0;
11523 if (overwrite) {
11524 c = old;
11525 extent_buffer_get(c);
11526 goto init;
11528 c = btrfs_alloc_free_block(trans, root,
11529 root->nodesize,
11530 root->root_key.objectid,
11531 &disk_key, level, 0, 0);
11532 if (IS_ERR(c)) {
11533 c = old;
11534 extent_buffer_get(c);
11535 overwrite = 1;
11537 init:
11538 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11539 btrfs_set_header_level(c, level);
11540 btrfs_set_header_bytenr(c, c->start);
11541 btrfs_set_header_generation(c, trans->transid);
11542 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11543 btrfs_set_header_owner(c, root->root_key.objectid);
11545 write_extent_buffer(c, root->fs_info->fsid,
11546 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11548 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11549 btrfs_header_chunk_tree_uuid(c),
11550 BTRFS_UUID_SIZE);
11552 btrfs_mark_buffer_dirty(c);
11554 * this case can happen in the following case:
11556 * 1.overwrite previous root.
11558 * 2.reinit reloc data root, this is because we skip pin
11559 * down reloc data tree before which means we can allocate
11560 * same block bytenr here.
11562 if (old->start == c->start) {
11563 btrfs_set_root_generation(&root->root_item,
11564 trans->transid);
11565 root->root_item.level = btrfs_header_level(root->node);
11566 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11567 &root->root_key, &root->root_item);
11568 if (ret) {
11569 free_extent_buffer(c);
11570 return ret;
11573 free_extent_buffer(old);
11574 root->node = c;
11575 add_root_to_dirty_list(root);
11576 return 0;
11579 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11580 struct extent_buffer *eb, int tree_root)
11582 struct extent_buffer *tmp;
11583 struct btrfs_root_item *ri;
11584 struct btrfs_key key;
11585 u64 bytenr;
11586 u32 nodesize;
11587 int level = btrfs_header_level(eb);
11588 int nritems;
11589 int ret;
11590 int i;
11593 * If we have pinned this block before, don't pin it again.
11594 * This can not only avoid forever loop with broken filesystem
11595 * but also give us some speedups.
11597 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11598 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11599 return 0;
11601 btrfs_pin_extent(fs_info, eb->start, eb->len);
11603 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11604 nritems = btrfs_header_nritems(eb);
11605 for (i = 0; i < nritems; i++) {
11606 if (level == 0) {
11607 btrfs_item_key_to_cpu(eb, &key, i);
11608 if (key.type != BTRFS_ROOT_ITEM_KEY)
11609 continue;
11610 /* Skip the extent root and reloc roots */
11611 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11612 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11613 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11614 continue;
11615 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11616 bytenr = btrfs_disk_root_bytenr(eb, ri);
11619 * If at any point we start needing the real root we
11620 * will have to build a stump root for the root we are
11621 * in, but for now this doesn't actually use the root so
11622 * just pass in extent_root.
11624 tmp = read_tree_block(fs_info->extent_root, bytenr,
11625 nodesize, 0);
11626 if (!extent_buffer_uptodate(tmp)) {
11627 fprintf(stderr, "Error reading root block\n");
11628 return -EIO;
11630 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11631 free_extent_buffer(tmp);
11632 if (ret)
11633 return ret;
11634 } else {
11635 bytenr = btrfs_node_blockptr(eb, i);
11637 /* If we aren't the tree root don't read the block */
11638 if (level == 1 && !tree_root) {
11639 btrfs_pin_extent(fs_info, bytenr, nodesize);
11640 continue;
11643 tmp = read_tree_block(fs_info->extent_root, bytenr,
11644 nodesize, 0);
11645 if (!extent_buffer_uptodate(tmp)) {
11646 fprintf(stderr, "Error reading tree block\n");
11647 return -EIO;
11649 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11650 free_extent_buffer(tmp);
11651 if (ret)
11652 return ret;
11656 return 0;
11659 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11661 int ret;
11663 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11664 if (ret)
11665 return ret;
11667 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11670 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11672 struct btrfs_block_group_cache *cache;
11673 struct btrfs_path path;
11674 struct extent_buffer *leaf;
11675 struct btrfs_chunk *chunk;
11676 struct btrfs_key key;
11677 int ret;
11678 u64 start;
11680 btrfs_init_path(&path);
11681 key.objectid = 0;
11682 key.type = BTRFS_CHUNK_ITEM_KEY;
11683 key.offset = 0;
11684 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11685 if (ret < 0) {
11686 btrfs_release_path(&path);
11687 return ret;
11691 * We do this in case the block groups were screwed up and had alloc
11692 * bits that aren't actually set on the chunks. This happens with
11693 * restored images every time and could happen in real life I guess.
11695 fs_info->avail_data_alloc_bits = 0;
11696 fs_info->avail_metadata_alloc_bits = 0;
11697 fs_info->avail_system_alloc_bits = 0;
11699 /* First we need to create the in-memory block groups */
11700 while (1) {
11701 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11702 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11703 if (ret < 0) {
11704 btrfs_release_path(&path);
11705 return ret;
11707 if (ret) {
11708 ret = 0;
11709 break;
11712 leaf = path.nodes[0];
11713 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11714 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11715 path.slots[0]++;
11716 continue;
11719 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11720 btrfs_add_block_group(fs_info, 0,
11721 btrfs_chunk_type(leaf, chunk),
11722 key.objectid, key.offset,
11723 btrfs_chunk_length(leaf, chunk));
11724 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11725 key.offset + btrfs_chunk_length(leaf, chunk));
11726 path.slots[0]++;
11728 start = 0;
11729 while (1) {
11730 cache = btrfs_lookup_first_block_group(fs_info, start);
11731 if (!cache)
11732 break;
11733 cache->cached = 1;
11734 start = cache->key.objectid + cache->key.offset;
11737 btrfs_release_path(&path);
11738 return 0;
11741 static int reset_balance(struct btrfs_trans_handle *trans,
11742 struct btrfs_fs_info *fs_info)
11744 struct btrfs_root *root = fs_info->tree_root;
11745 struct btrfs_path path;
11746 struct extent_buffer *leaf;
11747 struct btrfs_key key;
11748 int del_slot, del_nr = 0;
11749 int ret;
11750 int found = 0;
11752 btrfs_init_path(&path);
11753 key.objectid = BTRFS_BALANCE_OBJECTID;
11754 key.type = BTRFS_BALANCE_ITEM_KEY;
11755 key.offset = 0;
11756 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11757 if (ret) {
11758 if (ret > 0)
11759 ret = 0;
11760 if (!ret)
11761 goto reinit_data_reloc;
11762 else
11763 goto out;
11766 ret = btrfs_del_item(trans, root, &path);
11767 if (ret)
11768 goto out;
11769 btrfs_release_path(&path);
11771 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11772 key.type = BTRFS_ROOT_ITEM_KEY;
11773 key.offset = 0;
11774 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11775 if (ret < 0)
11776 goto out;
11777 while (1) {
11778 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11779 if (!found)
11780 break;
11782 if (del_nr) {
11783 ret = btrfs_del_items(trans, root, &path,
11784 del_slot, del_nr);
11785 del_nr = 0;
11786 if (ret)
11787 goto out;
11789 key.offset++;
11790 btrfs_release_path(&path);
11792 found = 0;
11793 ret = btrfs_search_slot(trans, root, &key, &path,
11794 -1, 1);
11795 if (ret < 0)
11796 goto out;
11797 continue;
11799 found = 1;
11800 leaf = path.nodes[0];
11801 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11802 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11803 break;
11804 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11805 path.slots[0]++;
11806 continue;
11808 if (!del_nr) {
11809 del_slot = path.slots[0];
11810 del_nr = 1;
11811 } else {
11812 del_nr++;
11814 path.slots[0]++;
11817 if (del_nr) {
11818 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11819 if (ret)
11820 goto out;
11822 btrfs_release_path(&path);
11824 reinit_data_reloc:
11825 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11826 key.type = BTRFS_ROOT_ITEM_KEY;
11827 key.offset = (u64)-1;
11828 root = btrfs_read_fs_root(fs_info, &key);
11829 if (IS_ERR(root)) {
11830 fprintf(stderr, "Error reading data reloc tree\n");
11831 ret = PTR_ERR(root);
11832 goto out;
11834 record_root_in_trans(trans, root);
11835 ret = btrfs_fsck_reinit_root(trans, root, 0);
11836 if (ret)
11837 goto out;
11838 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11839 out:
11840 btrfs_release_path(&path);
11841 return ret;
11844 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11845 struct btrfs_fs_info *fs_info)
11847 u64 start = 0;
11848 int ret;
11851 * The only reason we don't do this is because right now we're just
11852 * walking the trees we find and pinning down their bytes, we don't look
11853 * at any of the leaves. In order to do mixed groups we'd have to check
11854 * the leaves of any fs roots and pin down the bytes for any file
11855 * extents we find. Not hard but why do it if we don't have to?
11857 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11858 fprintf(stderr, "We don't support re-initing the extent tree "
11859 "for mixed block groups yet, please notify a btrfs "
11860 "developer you want to do this so they can add this "
11861 "functionality.\n");
11862 return -EINVAL;
11866 * first we need to walk all of the trees except the extent tree and pin
11867 * down the bytes that are in use so we don't overwrite any existing
11868 * metadata.
11870 ret = pin_metadata_blocks(fs_info);
11871 if (ret) {
11872 fprintf(stderr, "error pinning down used bytes\n");
11873 return ret;
11877 * Need to drop all the block groups since we're going to recreate all
11878 * of them again.
11880 btrfs_free_block_groups(fs_info);
11881 ret = reset_block_groups(fs_info);
11882 if (ret) {
11883 fprintf(stderr, "error resetting the block groups\n");
11884 return ret;
11887 /* Ok we can allocate now, reinit the extent root */
11888 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11889 if (ret) {
11890 fprintf(stderr, "extent root initialization failed\n");
11892 * When the transaction code is updated we should end the
11893 * transaction, but for now progs only knows about commit so
11894 * just return an error.
11896 return ret;
11900 * Now we have all the in-memory block groups setup so we can make
11901 * allocations properly, and the metadata we care about is safe since we
11902 * pinned all of it above.
11904 while (1) {
11905 struct btrfs_block_group_cache *cache;
11907 cache = btrfs_lookup_first_block_group(fs_info, start);
11908 if (!cache)
11909 break;
11910 start = cache->key.objectid + cache->key.offset;
11911 ret = btrfs_insert_item(trans, fs_info->extent_root,
11912 &cache->key, &cache->item,
11913 sizeof(cache->item));
11914 if (ret) {
11915 fprintf(stderr, "Error adding block group\n");
11916 return ret;
11918 btrfs_extent_post_op(trans, fs_info->extent_root);
11921 ret = reset_balance(trans, fs_info);
11922 if (ret)
11923 fprintf(stderr, "error resetting the pending balance\n");
11925 return ret;
11928 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11930 struct btrfs_path path;
11931 struct btrfs_trans_handle *trans;
11932 struct btrfs_key key;
11933 int ret;
11935 printf("Recowing metadata block %llu\n", eb->start);
11936 key.objectid = btrfs_header_owner(eb);
11937 key.type = BTRFS_ROOT_ITEM_KEY;
11938 key.offset = (u64)-1;
11940 root = btrfs_read_fs_root(root->fs_info, &key);
11941 if (IS_ERR(root)) {
11942 fprintf(stderr, "Couldn't find owner root %llu\n",
11943 key.objectid);
11944 return PTR_ERR(root);
11947 trans = btrfs_start_transaction(root, 1);
11948 if (IS_ERR(trans))
11949 return PTR_ERR(trans);
11951 btrfs_init_path(&path);
11952 path.lowest_level = btrfs_header_level(eb);
11953 if (path.lowest_level)
11954 btrfs_node_key_to_cpu(eb, &key, 0);
11955 else
11956 btrfs_item_key_to_cpu(eb, &key, 0);
11958 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11959 btrfs_commit_transaction(trans, root);
11960 btrfs_release_path(&path);
11961 return ret;
11964 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11966 struct btrfs_path path;
11967 struct btrfs_trans_handle *trans;
11968 struct btrfs_key key;
11969 int ret;
11971 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11972 bad->key.type, bad->key.offset);
11973 key.objectid = bad->root_id;
11974 key.type = BTRFS_ROOT_ITEM_KEY;
11975 key.offset = (u64)-1;
11977 root = btrfs_read_fs_root(root->fs_info, &key);
11978 if (IS_ERR(root)) {
11979 fprintf(stderr, "Couldn't find owner root %llu\n",
11980 key.objectid);
11981 return PTR_ERR(root);
11984 trans = btrfs_start_transaction(root, 1);
11985 if (IS_ERR(trans))
11986 return PTR_ERR(trans);
11988 btrfs_init_path(&path);
11989 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11990 if (ret) {
11991 if (ret > 0)
11992 ret = 0;
11993 goto out;
11995 ret = btrfs_del_item(trans, root, &path);
11996 out:
11997 btrfs_commit_transaction(trans, root);
11998 btrfs_release_path(&path);
11999 return ret;
12002 static int zero_log_tree(struct btrfs_root *root)
12004 struct btrfs_trans_handle *trans;
12005 int ret;
12007 trans = btrfs_start_transaction(root, 1);
12008 if (IS_ERR(trans)) {
12009 ret = PTR_ERR(trans);
12010 return ret;
12012 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12013 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12014 ret = btrfs_commit_transaction(trans, root);
12015 return ret;
12018 static int populate_csum(struct btrfs_trans_handle *trans,
12019 struct btrfs_root *csum_root, char *buf, u64 start,
12020 u64 len)
12022 u64 offset = 0;
12023 u64 sectorsize;
12024 int ret = 0;
12026 while (offset < len) {
12027 sectorsize = csum_root->sectorsize;
12028 ret = read_extent_data(csum_root, buf, start + offset,
12029 &sectorsize, 0);
12030 if (ret)
12031 break;
12032 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12033 start + offset, buf, sectorsize);
12034 if (ret)
12035 break;
12036 offset += sectorsize;
12038 return ret;
12041 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12042 struct btrfs_root *csum_root,
12043 struct btrfs_root *cur_root)
12045 struct btrfs_path path;
12046 struct btrfs_key key;
12047 struct extent_buffer *node;
12048 struct btrfs_file_extent_item *fi;
12049 char *buf = NULL;
12050 u64 start = 0;
12051 u64 len = 0;
12052 int slot = 0;
12053 int ret = 0;
12055 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12056 if (!buf)
12057 return -ENOMEM;
12059 btrfs_init_path(&path);
12060 key.objectid = 0;
12061 key.offset = 0;
12062 key.type = 0;
12063 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12064 if (ret < 0)
12065 goto out;
12066 /* Iterate all regular file extents and fill its csum */
12067 while (1) {
12068 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12070 if (key.type != BTRFS_EXTENT_DATA_KEY)
12071 goto next;
12072 node = path.nodes[0];
12073 slot = path.slots[0];
12074 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12075 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12076 goto next;
12077 start = btrfs_file_extent_disk_bytenr(node, fi);
12078 len = btrfs_file_extent_disk_num_bytes(node, fi);
12080 ret = populate_csum(trans, csum_root, buf, start, len);
12081 if (ret == -EEXIST)
12082 ret = 0;
12083 if (ret < 0)
12084 goto out;
12085 next:
12087 * TODO: if next leaf is corrupted, jump to nearest next valid
12088 * leaf.
12090 ret = btrfs_next_item(cur_root, &path);
12091 if (ret < 0)
12092 goto out;
12093 if (ret > 0) {
12094 ret = 0;
12095 goto out;
12099 out:
12100 btrfs_release_path(&path);
12101 free(buf);
12102 return ret;
12105 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12106 struct btrfs_root *csum_root)
12108 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12109 struct btrfs_path path;
12110 struct btrfs_root *tree_root = fs_info->tree_root;
12111 struct btrfs_root *cur_root;
12112 struct extent_buffer *node;
12113 struct btrfs_key key;
12114 int slot = 0;
12115 int ret = 0;
12117 btrfs_init_path(&path);
12118 key.objectid = BTRFS_FS_TREE_OBJECTID;
12119 key.offset = 0;
12120 key.type = BTRFS_ROOT_ITEM_KEY;
12121 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12122 if (ret < 0)
12123 goto out;
12124 if (ret > 0) {
12125 ret = -ENOENT;
12126 goto out;
12129 while (1) {
12130 node = path.nodes[0];
12131 slot = path.slots[0];
12132 btrfs_item_key_to_cpu(node, &key, slot);
12133 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12134 goto out;
12135 if (key.type != BTRFS_ROOT_ITEM_KEY)
12136 goto next;
12137 if (!is_fstree(key.objectid))
12138 goto next;
12139 key.offset = (u64)-1;
12141 cur_root = btrfs_read_fs_root(fs_info, &key);
12142 if (IS_ERR(cur_root) || !cur_root) {
12143 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12144 key.objectid);
12145 goto out;
12147 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12148 cur_root);
12149 if (ret < 0)
12150 goto out;
12151 next:
12152 ret = btrfs_next_item(tree_root, &path);
12153 if (ret > 0) {
12154 ret = 0;
12155 goto out;
12157 if (ret < 0)
12158 goto out;
12161 out:
12162 btrfs_release_path(&path);
12163 return ret;
12166 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12167 struct btrfs_root *csum_root)
12169 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12170 struct btrfs_path path;
12171 struct btrfs_extent_item *ei;
12172 struct extent_buffer *leaf;
12173 char *buf;
12174 struct btrfs_key key;
12175 int ret;
12177 btrfs_init_path(&path);
12178 key.objectid = 0;
12179 key.type = BTRFS_EXTENT_ITEM_KEY;
12180 key.offset = 0;
12181 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12182 if (ret < 0) {
12183 btrfs_release_path(&path);
12184 return ret;
12187 buf = malloc(csum_root->sectorsize);
12188 if (!buf) {
12189 btrfs_release_path(&path);
12190 return -ENOMEM;
12193 while (1) {
12194 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12195 ret = btrfs_next_leaf(extent_root, &path);
12196 if (ret < 0)
12197 break;
12198 if (ret) {
12199 ret = 0;
12200 break;
12203 leaf = path.nodes[0];
12205 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12206 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12207 path.slots[0]++;
12208 continue;
12211 ei = btrfs_item_ptr(leaf, path.slots[0],
12212 struct btrfs_extent_item);
12213 if (!(btrfs_extent_flags(leaf, ei) &
12214 BTRFS_EXTENT_FLAG_DATA)) {
12215 path.slots[0]++;
12216 continue;
12219 ret = populate_csum(trans, csum_root, buf, key.objectid,
12220 key.offset);
12221 if (ret)
12222 break;
12223 path.slots[0]++;
12226 btrfs_release_path(&path);
12227 free(buf);
12228 return ret;
12232 * Recalculate the csum and put it into the csum tree.
12234 * Extent tree init will wipe out all the extent info, so in that case, we
12235 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12236 * will use fs/subvol trees to init the csum tree.
12238 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12239 struct btrfs_root *csum_root,
12240 int search_fs_tree)
12242 if (search_fs_tree)
12243 return fill_csum_tree_from_fs(trans, csum_root);
12244 else
12245 return fill_csum_tree_from_extent(trans, csum_root);
12248 static void free_roots_info_cache(void)
12250 if (!roots_info_cache)
12251 return;
12253 while (!cache_tree_empty(roots_info_cache)) {
12254 struct cache_extent *entry;
12255 struct root_item_info *rii;
12257 entry = first_cache_extent(roots_info_cache);
12258 if (!entry)
12259 break;
12260 remove_cache_extent(roots_info_cache, entry);
12261 rii = container_of(entry, struct root_item_info, cache_extent);
12262 free(rii);
12265 free(roots_info_cache);
12266 roots_info_cache = NULL;
12269 static int build_roots_info_cache(struct btrfs_fs_info *info)
12271 int ret = 0;
12272 struct btrfs_key key;
12273 struct extent_buffer *leaf;
12274 struct btrfs_path path;
12276 if (!roots_info_cache) {
12277 roots_info_cache = malloc(sizeof(*roots_info_cache));
12278 if (!roots_info_cache)
12279 return -ENOMEM;
12280 cache_tree_init(roots_info_cache);
12283 btrfs_init_path(&path);
12284 key.objectid = 0;
12285 key.type = BTRFS_EXTENT_ITEM_KEY;
12286 key.offset = 0;
12287 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12288 if (ret < 0)
12289 goto out;
12290 leaf = path.nodes[0];
12292 while (1) {
12293 struct btrfs_key found_key;
12294 struct btrfs_extent_item *ei;
12295 struct btrfs_extent_inline_ref *iref;
12296 int slot = path.slots[0];
12297 int type;
12298 u64 flags;
12299 u64 root_id;
12300 u8 level;
12301 struct cache_extent *entry;
12302 struct root_item_info *rii;
12304 if (slot >= btrfs_header_nritems(leaf)) {
12305 ret = btrfs_next_leaf(info->extent_root, &path);
12306 if (ret < 0) {
12307 break;
12308 } else if (ret) {
12309 ret = 0;
12310 break;
12312 leaf = path.nodes[0];
12313 slot = path.slots[0];
12316 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12318 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12319 found_key.type != BTRFS_METADATA_ITEM_KEY)
12320 goto next;
12322 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12323 flags = btrfs_extent_flags(leaf, ei);
12325 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12326 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12327 goto next;
12329 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12330 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12331 level = found_key.offset;
12332 } else {
12333 struct btrfs_tree_block_info *binfo;
12335 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12336 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12337 level = btrfs_tree_block_level(leaf, binfo);
12341 * For a root extent, it must be of the following type and the
12342 * first (and only one) iref in the item.
12344 type = btrfs_extent_inline_ref_type(leaf, iref);
12345 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12346 goto next;
12348 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12349 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12350 if (!entry) {
12351 rii = malloc(sizeof(struct root_item_info));
12352 if (!rii) {
12353 ret = -ENOMEM;
12354 goto out;
12356 rii->cache_extent.start = root_id;
12357 rii->cache_extent.size = 1;
12358 rii->level = (u8)-1;
12359 entry = &rii->cache_extent;
12360 ret = insert_cache_extent(roots_info_cache, entry);
12361 ASSERT(ret == 0);
12362 } else {
12363 rii = container_of(entry, struct root_item_info,
12364 cache_extent);
12367 ASSERT(rii->cache_extent.start == root_id);
12368 ASSERT(rii->cache_extent.size == 1);
12370 if (level > rii->level || rii->level == (u8)-1) {
12371 rii->level = level;
12372 rii->bytenr = found_key.objectid;
12373 rii->gen = btrfs_extent_generation(leaf, ei);
12374 rii->node_count = 1;
12375 } else if (level == rii->level) {
12376 rii->node_count++;
12378 next:
12379 path.slots[0]++;
12382 out:
12383 btrfs_release_path(&path);
12385 return ret;
12388 static int maybe_repair_root_item(struct btrfs_path *path,
12389 const struct btrfs_key *root_key,
12390 const int read_only_mode)
12392 const u64 root_id = root_key->objectid;
12393 struct cache_extent *entry;
12394 struct root_item_info *rii;
12395 struct btrfs_root_item ri;
12396 unsigned long offset;
12398 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12399 if (!entry) {
12400 fprintf(stderr,
12401 "Error: could not find extent items for root %llu\n",
12402 root_key->objectid);
12403 return -ENOENT;
12406 rii = container_of(entry, struct root_item_info, cache_extent);
12407 ASSERT(rii->cache_extent.start == root_id);
12408 ASSERT(rii->cache_extent.size == 1);
12410 if (rii->node_count != 1) {
12411 fprintf(stderr,
12412 "Error: could not find btree root extent for root %llu\n",
12413 root_id);
12414 return -ENOENT;
12417 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12418 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12420 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12421 btrfs_root_level(&ri) != rii->level ||
12422 btrfs_root_generation(&ri) != rii->gen) {
12425 * If we're in repair mode but our caller told us to not update
12426 * the root item, i.e. just check if it needs to be updated, don't
12427 * print this message, since the caller will call us again shortly
12428 * for the same root item without read only mode (the caller will
12429 * open a transaction first).
12431 if (!(read_only_mode && repair))
12432 fprintf(stderr,
12433 "%sroot item for root %llu,"
12434 " current bytenr %llu, current gen %llu, current level %u,"
12435 " new bytenr %llu, new gen %llu, new level %u\n",
12436 (read_only_mode ? "" : "fixing "),
12437 root_id,
12438 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12439 btrfs_root_level(&ri),
12440 rii->bytenr, rii->gen, rii->level);
12442 if (btrfs_root_generation(&ri) > rii->gen) {
12443 fprintf(stderr,
12444 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12445 root_id, btrfs_root_generation(&ri), rii->gen);
12446 return -EINVAL;
12449 if (!read_only_mode) {
12450 btrfs_set_root_bytenr(&ri, rii->bytenr);
12451 btrfs_set_root_level(&ri, rii->level);
12452 btrfs_set_root_generation(&ri, rii->gen);
12453 write_extent_buffer(path->nodes[0], &ri,
12454 offset, sizeof(ri));
12457 return 1;
12460 return 0;
12464 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12465 * caused read-only snapshots to be corrupted if they were created at a moment
12466 * when the source subvolume/snapshot had orphan items. The issue was that the
12467 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12468 * node instead of the post orphan cleanup root node.
12469 * So this function, and its callees, just detects and fixes those cases. Even
12470 * though the regression was for read-only snapshots, this function applies to
12471 * any snapshot/subvolume root.
12472 * This must be run before any other repair code - not doing it so, makes other
12473 * repair code delete or modify backrefs in the extent tree for example, which
12474 * will result in an inconsistent fs after repairing the root items.
12476 static int repair_root_items(struct btrfs_fs_info *info)
12478 struct btrfs_path path;
12479 struct btrfs_key key;
12480 struct extent_buffer *leaf;
12481 struct btrfs_trans_handle *trans = NULL;
12482 int ret = 0;
12483 int bad_roots = 0;
12484 int need_trans = 0;
12486 btrfs_init_path(&path);
12488 ret = build_roots_info_cache(info);
12489 if (ret)
12490 goto out;
12492 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12493 key.type = BTRFS_ROOT_ITEM_KEY;
12494 key.offset = 0;
12496 again:
12498 * Avoid opening and committing transactions if a leaf doesn't have
12499 * any root items that need to be fixed, so that we avoid rotating
12500 * backup roots unnecessarily.
12502 if (need_trans) {
12503 trans = btrfs_start_transaction(info->tree_root, 1);
12504 if (IS_ERR(trans)) {
12505 ret = PTR_ERR(trans);
12506 goto out;
12510 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12511 0, trans ? 1 : 0);
12512 if (ret < 0)
12513 goto out;
12514 leaf = path.nodes[0];
12516 while (1) {
12517 struct btrfs_key found_key;
12519 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12520 int no_more_keys = find_next_key(&path, &key);
12522 btrfs_release_path(&path);
12523 if (trans) {
12524 ret = btrfs_commit_transaction(trans,
12525 info->tree_root);
12526 trans = NULL;
12527 if (ret < 0)
12528 goto out;
12530 need_trans = 0;
12531 if (no_more_keys)
12532 break;
12533 goto again;
12536 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12538 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12539 goto next;
12540 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12541 goto next;
12543 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12544 if (ret < 0)
12545 goto out;
12546 if (ret) {
12547 if (!trans && repair) {
12548 need_trans = 1;
12549 key = found_key;
12550 btrfs_release_path(&path);
12551 goto again;
12553 bad_roots++;
12555 next:
12556 path.slots[0]++;
12558 ret = 0;
12559 out:
12560 free_roots_info_cache();
12561 btrfs_release_path(&path);
12562 if (trans)
12563 btrfs_commit_transaction(trans, info->tree_root);
12564 if (ret < 0)
12565 return ret;
12567 return bad_roots;
12570 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12572 struct btrfs_trans_handle *trans;
12573 struct btrfs_block_group_cache *bg_cache;
12574 u64 current = 0;
12575 int ret = 0;
12577 /* Clear all free space cache inodes and its extent data */
12578 while (1) {
12579 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12580 if (!bg_cache)
12581 break;
12582 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12583 if (ret < 0)
12584 return ret;
12585 current = bg_cache->key.objectid + bg_cache->key.offset;
12588 /* Don't forget to set cache_generation to -1 */
12589 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12590 if (IS_ERR(trans)) {
12591 error("failed to update super block cache generation");
12592 return PTR_ERR(trans);
12594 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12595 btrfs_commit_transaction(trans, fs_info->tree_root);
12597 return ret;
12600 const char * const cmd_check_usage[] = {
12601 "btrfs check [options] <device>",
12602 "Check structural integrity of a filesystem (unmounted).",
12603 "Check structural integrity of an unmounted filesystem. Verify internal",
12604 "trees' consistency and item connectivity. In the repair mode try to",
12605 "fix the problems found. ",
12606 "WARNING: the repair mode is considered dangerous",
12608 "-s|--super <superblock> use this superblock copy",
12609 "-b|--backup use the first valid backup root copy",
12610 "--repair try to repair the filesystem",
12611 "--readonly run in read-only mode (default)",
12612 "--init-csum-tree create a new CRC tree",
12613 "--init-extent-tree create a new extent tree",
12614 "--mode <MODE> allows choice of memory/IO trade-offs",
12615 " where MODE is one of:",
12616 " original - read inodes and extents to memory (requires",
12617 " more memory, does less IO)",
12618 " lowmem - try to use less memory but read blocks again",
12619 " when needed",
12620 "--check-data-csum verify checksums of data blocks",
12621 "-Q|--qgroup-report print a report on qgroup consistency",
12622 "-E|--subvol-extents <subvolid>",
12623 " print subvolume extents and sharing state",
12624 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12625 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12626 "-p|--progress indicate progress",
12627 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12628 NULL
12631 int cmd_check(int argc, char **argv)
12633 struct cache_tree root_cache;
12634 struct btrfs_root *root;
12635 struct btrfs_fs_info *info;
12636 u64 bytenr = 0;
12637 u64 subvolid = 0;
12638 u64 tree_root_bytenr = 0;
12639 u64 chunk_root_bytenr = 0;
12640 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12641 int ret;
12642 int err = 0;
12643 u64 num;
12644 int init_csum_tree = 0;
12645 int readonly = 0;
12646 int clear_space_cache = 0;
12647 int qgroup_report = 0;
12648 int qgroups_repaired = 0;
12649 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12651 while(1) {
12652 int c;
12653 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12654 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12655 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12656 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12657 static const struct option long_options[] = {
12658 { "super", required_argument, NULL, 's' },
12659 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12660 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12661 { "init-csum-tree", no_argument, NULL,
12662 GETOPT_VAL_INIT_CSUM },
12663 { "init-extent-tree", no_argument, NULL,
12664 GETOPT_VAL_INIT_EXTENT },
12665 { "check-data-csum", no_argument, NULL,
12666 GETOPT_VAL_CHECK_CSUM },
12667 { "backup", no_argument, NULL, 'b' },
12668 { "subvol-extents", required_argument, NULL, 'E' },
12669 { "qgroup-report", no_argument, NULL, 'Q' },
12670 { "tree-root", required_argument, NULL, 'r' },
12671 { "chunk-root", required_argument, NULL,
12672 GETOPT_VAL_CHUNK_TREE },
12673 { "progress", no_argument, NULL, 'p' },
12674 { "mode", required_argument, NULL,
12675 GETOPT_VAL_MODE },
12676 { "clear-space-cache", required_argument, NULL,
12677 GETOPT_VAL_CLEAR_SPACE_CACHE},
12678 { NULL, 0, NULL, 0}
12681 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12682 if (c < 0)
12683 break;
12684 switch(c) {
12685 case 'a': /* ignored */ break;
12686 case 'b':
12687 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12688 break;
12689 case 's':
12690 num = arg_strtou64(optarg);
12691 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12692 error(
12693 "super mirror should be less than %d",
12694 BTRFS_SUPER_MIRROR_MAX);
12695 exit(1);
12697 bytenr = btrfs_sb_offset(((int)num));
12698 printf("using SB copy %llu, bytenr %llu\n", num,
12699 (unsigned long long)bytenr);
12700 break;
12701 case 'Q':
12702 qgroup_report = 1;
12703 break;
12704 case 'E':
12705 subvolid = arg_strtou64(optarg);
12706 break;
12707 case 'r':
12708 tree_root_bytenr = arg_strtou64(optarg);
12709 break;
12710 case GETOPT_VAL_CHUNK_TREE:
12711 chunk_root_bytenr = arg_strtou64(optarg);
12712 break;
12713 case 'p':
12714 ctx.progress_enabled = true;
12715 break;
12716 case '?':
12717 case 'h':
12718 usage(cmd_check_usage);
12719 case GETOPT_VAL_REPAIR:
12720 printf("enabling repair mode\n");
12721 repair = 1;
12722 ctree_flags |= OPEN_CTREE_WRITES;
12723 break;
12724 case GETOPT_VAL_READONLY:
12725 readonly = 1;
12726 break;
12727 case GETOPT_VAL_INIT_CSUM:
12728 printf("Creating a new CRC tree\n");
12729 init_csum_tree = 1;
12730 repair = 1;
12731 ctree_flags |= OPEN_CTREE_WRITES;
12732 break;
12733 case GETOPT_VAL_INIT_EXTENT:
12734 init_extent_tree = 1;
12735 ctree_flags |= (OPEN_CTREE_WRITES |
12736 OPEN_CTREE_NO_BLOCK_GROUPS);
12737 repair = 1;
12738 break;
12739 case GETOPT_VAL_CHECK_CSUM:
12740 check_data_csum = 1;
12741 break;
12742 case GETOPT_VAL_MODE:
12743 check_mode = parse_check_mode(optarg);
12744 if (check_mode == CHECK_MODE_UNKNOWN) {
12745 error("unknown mode: %s", optarg);
12746 exit(1);
12748 break;
12749 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12750 if (strcmp(optarg, "v1") == 0) {
12751 clear_space_cache = 1;
12752 } else if (strcmp(optarg, "v2") == 0) {
12753 clear_space_cache = 2;
12754 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12755 } else {
12756 error(
12757 "invalid argument to --clear-space-cache, must be v1 or v2");
12758 exit(1);
12760 ctree_flags |= OPEN_CTREE_WRITES;
12761 break;
12765 if (check_argc_exact(argc - optind, 1))
12766 usage(cmd_check_usage);
12768 if (ctx.progress_enabled) {
12769 ctx.tp = TASK_NOTHING;
12770 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12773 /* This check is the only reason for --readonly to exist */
12774 if (readonly && repair) {
12775 error("repair options are not compatible with --readonly");
12776 exit(1);
12780 * Not supported yet
12782 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12783 error("low memory mode doesn't support repair yet");
12784 exit(1);
12787 radix_tree_init();
12788 cache_tree_init(&root_cache);
12790 if((ret = check_mounted(argv[optind])) < 0) {
12791 error("could not check mount status: %s", strerror(-ret));
12792 err |= !!ret;
12793 goto err_out;
12794 } else if(ret) {
12795 error("%s is currently mounted, aborting", argv[optind]);
12796 ret = -EBUSY;
12797 err |= !!ret;
12798 goto err_out;
12801 /* only allow partial opening under repair mode */
12802 if (repair)
12803 ctree_flags |= OPEN_CTREE_PARTIAL;
12805 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12806 chunk_root_bytenr, ctree_flags);
12807 if (!info) {
12808 error("cannot open file system");
12809 ret = -EIO;
12810 err |= !!ret;
12811 goto err_out;
12814 global_info = info;
12815 root = info->fs_root;
12816 if (clear_space_cache == 1) {
12817 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12818 error(
12819 "free space cache v2 detected, use --clear-space-cache v2");
12820 ret = 1;
12821 goto close_out;
12823 printf("Clearing free space cache\n");
12824 ret = clear_free_space_cache(info);
12825 if (ret) {
12826 error("failed to clear free space cache");
12827 ret = 1;
12828 } else {
12829 printf("Free space cache cleared\n");
12831 goto close_out;
12832 } else if (clear_space_cache == 2) {
12833 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12834 printf("no free space cache v2 to clear\n");
12835 ret = 0;
12836 goto close_out;
12838 printf("Clear free space cache v2\n");
12839 ret = btrfs_clear_free_space_tree(info);
12840 if (ret) {
12841 error("failed to clear free space cache v2: %d", ret);
12842 ret = 1;
12843 } else {
12844 printf("free space cache v2 cleared\n");
12846 goto close_out;
12850 * repair mode will force us to commit transaction which
12851 * will make us fail to load log tree when mounting.
12853 if (repair && btrfs_super_log_root(info->super_copy)) {
12854 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12855 if (!ret) {
12856 ret = 1;
12857 err |= !!ret;
12858 goto close_out;
12860 ret = zero_log_tree(root);
12861 err |= !!ret;
12862 if (ret) {
12863 error("failed to zero log tree: %d", ret);
12864 goto close_out;
12868 uuid_unparse(info->super_copy->fsid, uuidbuf);
12869 if (qgroup_report) {
12870 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12871 uuidbuf);
12872 ret = qgroup_verify_all(info);
12873 err |= !!ret;
12874 if (ret == 0)
12875 report_qgroups(1);
12876 goto close_out;
12878 if (subvolid) {
12879 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12880 subvolid, argv[optind], uuidbuf);
12881 ret = print_extent_state(info, subvolid);
12882 err |= !!ret;
12883 goto close_out;
12885 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12887 if (!extent_buffer_uptodate(info->tree_root->node) ||
12888 !extent_buffer_uptodate(info->dev_root->node) ||
12889 !extent_buffer_uptodate(info->chunk_root->node)) {
12890 error("critical roots corrupted, unable to check the filesystem");
12891 err |= !!ret;
12892 ret = -EIO;
12893 goto close_out;
12896 if (init_extent_tree || init_csum_tree) {
12897 struct btrfs_trans_handle *trans;
12899 trans = btrfs_start_transaction(info->extent_root, 0);
12900 if (IS_ERR(trans)) {
12901 error("error starting transaction");
12902 ret = PTR_ERR(trans);
12903 err |= !!ret;
12904 goto close_out;
12907 if (init_extent_tree) {
12908 printf("Creating a new extent tree\n");
12909 ret = reinit_extent_tree(trans, info);
12910 err |= !!ret;
12911 if (ret)
12912 goto close_out;
12915 if (init_csum_tree) {
12916 printf("Reinitialize checksum tree\n");
12917 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12918 if (ret) {
12919 error("checksum tree initialization failed: %d",
12920 ret);
12921 ret = -EIO;
12922 err |= !!ret;
12923 goto close_out;
12926 ret = fill_csum_tree(trans, info->csum_root,
12927 init_extent_tree);
12928 err |= !!ret;
12929 if (ret) {
12930 error("checksum tree refilling failed: %d", ret);
12931 return -EIO;
12935 * Ok now we commit and run the normal fsck, which will add
12936 * extent entries for all of the items it finds.
12938 ret = btrfs_commit_transaction(trans, info->extent_root);
12939 err |= !!ret;
12940 if (ret)
12941 goto close_out;
12943 if (!extent_buffer_uptodate(info->extent_root->node)) {
12944 error("critical: extent_root, unable to check the filesystem");
12945 ret = -EIO;
12946 err |= !!ret;
12947 goto close_out;
12949 if (!extent_buffer_uptodate(info->csum_root->node)) {
12950 error("critical: csum_root, unable to check the filesystem");
12951 ret = -EIO;
12952 err |= !!ret;
12953 goto close_out;
12956 if (!ctx.progress_enabled)
12957 fprintf(stderr, "checking extents\n");
12958 if (check_mode == CHECK_MODE_LOWMEM)
12959 ret = check_chunks_and_extents_v2(root);
12960 else
12961 ret = check_chunks_and_extents(root);
12962 err |= !!ret;
12963 if (ret)
12964 error(
12965 "errors found in extent allocation tree or chunk allocation");
12967 ret = repair_root_items(info);
12968 err |= !!ret;
12969 if (ret < 0) {
12970 error("failed to repair root items: %s", strerror(-ret));
12971 goto close_out;
12973 if (repair) {
12974 fprintf(stderr, "Fixed %d roots.\n", ret);
12975 ret = 0;
12976 } else if (ret > 0) {
12977 fprintf(stderr,
12978 "Found %d roots with an outdated root item.\n",
12979 ret);
12980 fprintf(stderr,
12981 "Please run a filesystem check with the option --repair to fix them.\n");
12982 ret = 1;
12983 err |= !!ret;
12984 goto close_out;
12987 if (!ctx.progress_enabled) {
12988 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12989 fprintf(stderr, "checking free space tree\n");
12990 else
12991 fprintf(stderr, "checking free space cache\n");
12993 ret = check_space_cache(root);
12994 err |= !!ret;
12995 if (ret) {
12996 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12997 error("errors found in free space tree");
12998 else
12999 error("errors found in free space cache");
13000 goto out;
13004 * We used to have to have these hole extents in between our real
13005 * extents so if we don't have this flag set we need to make sure there
13006 * are no gaps in the file extents for inodes, otherwise we can just
13007 * ignore it when this happens.
13009 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13010 if (!ctx.progress_enabled)
13011 fprintf(stderr, "checking fs roots\n");
13012 if (check_mode == CHECK_MODE_LOWMEM)
13013 ret = check_fs_roots_v2(root->fs_info);
13014 else
13015 ret = check_fs_roots(root, &root_cache);
13016 err |= !!ret;
13017 if (ret) {
13018 error("errors found in fs roots");
13019 goto out;
13022 fprintf(stderr, "checking csums\n");
13023 ret = check_csums(root);
13024 err |= !!ret;
13025 if (ret) {
13026 error("errors found in csum tree");
13027 goto out;
13030 fprintf(stderr, "checking root refs\n");
13031 /* For low memory mode, check_fs_roots_v2 handles root refs */
13032 if (check_mode != CHECK_MODE_LOWMEM) {
13033 ret = check_root_refs(root, &root_cache);
13034 err |= !!ret;
13035 if (ret) {
13036 error("errors found in root refs");
13037 goto out;
13041 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13042 struct extent_buffer *eb;
13044 eb = list_first_entry(&root->fs_info->recow_ebs,
13045 struct extent_buffer, recow);
13046 list_del_init(&eb->recow);
13047 ret = recow_extent_buffer(root, eb);
13048 err |= !!ret;
13049 if (ret) {
13050 error("fails to fix transid errors");
13051 break;
13055 while (!list_empty(&delete_items)) {
13056 struct bad_item *bad;
13058 bad = list_first_entry(&delete_items, struct bad_item, list);
13059 list_del_init(&bad->list);
13060 if (repair) {
13061 ret = delete_bad_item(root, bad);
13062 err |= !!ret;
13064 free(bad);
13067 if (info->quota_enabled) {
13068 fprintf(stderr, "checking quota groups\n");
13069 ret = qgroup_verify_all(info);
13070 err |= !!ret;
13071 if (ret) {
13072 error("failed to check quota groups");
13073 goto out;
13075 report_qgroups(0);
13076 ret = repair_qgroups(info, &qgroups_repaired);
13077 err |= !!ret;
13078 if (err) {
13079 error("failed to repair quota groups");
13080 goto out;
13082 ret = 0;
13085 if (!list_empty(&root->fs_info->recow_ebs)) {
13086 error("transid errors in file system");
13087 ret = 1;
13088 err |= !!ret;
13090 out:
13091 if (found_old_backref) { /*
13092 * there was a disk format change when mixed
13093 * backref was in testing tree. The old format
13094 * existed about one week.
13096 printf("\n * Found old mixed backref format. "
13097 "The old format is not supported! *"
13098 "\n * Please mount the FS in readonly mode, "
13099 "backup data and re-format the FS. *\n\n");
13100 err |= 1;
13102 printf("found %llu bytes used, ",
13103 (unsigned long long)bytes_used);
13104 if (err)
13105 printf("error(s) found\n");
13106 else
13107 printf("no error found\n");
13108 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13109 printf("total tree bytes: %llu\n",
13110 (unsigned long long)total_btree_bytes);
13111 printf("total fs tree bytes: %llu\n",
13112 (unsigned long long)total_fs_tree_bytes);
13113 printf("total extent tree bytes: %llu\n",
13114 (unsigned long long)total_extent_tree_bytes);
13115 printf("btree space waste bytes: %llu\n",
13116 (unsigned long long)btree_space_waste);
13117 printf("file data blocks allocated: %llu\n referenced %llu\n",
13118 (unsigned long long)data_bytes_allocated,
13119 (unsigned long long)data_bytes_referenced);
13121 free_qgroup_counts();
13122 free_root_recs_tree(&root_cache);
13123 close_out:
13124 close_ctree(root);
13125 err_out:
13126 if (ctx.progress_enabled)
13127 task_deinit(ctx.info);
13129 return err;