btrfs-progs: check: lowmem: Fix several bugs related to afterward search
[btrfs-progs-unstable/devel.git] / cmds-check.c
blob89d34d8c99387765d86766d1e826902c935a9d6c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
47 enum task_position {
48 TASK_EXTENTS,
49 TASK_FREE_SPACE,
50 TASK_FS_ROOTS,
51 TASK_NOTHING, /* have to be the last element */
54 struct task_ctx {
55 int progress_enabled;
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
80 CHECK_MODE_ORIGINAL,
81 CHECK_MODE_LOWMEM,
82 CHECK_MODE_UNKNOWN,
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
104 union {
105 u64 parent;
106 u64 root;
108 u64 owner;
109 u64 offset;
110 u64 disk_bytenr;
111 u64 bytes;
112 u64 ram_bytes;
113 u32 num_refs;
114 u32 found_ref;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
148 u64 root;
149 u64 objectid;
150 u64 offset;
151 u64 disk_bytenr;
152 u64 disk_len;
155 struct tree_backref {
156 struct extent_backref node;
157 union {
158 u64 parent;
159 u64 root;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
177 u64 start;
178 u64 max_size;
179 u64 nr;
180 u64 refs;
181 u64 extent_item_refs;
182 u64 generation;
183 u64 parent_generation;
184 u64 info_objectid;
185 u32 num_duplicates;
186 u8 info_level;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
208 u8 filetype;
209 u8 ref_type;
210 int errors;
211 u64 dir;
212 u64 index;
213 u16 namelen;
214 char name[0];
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
224 u64 objectid;
225 u64 bytenr;
226 u64 last_snapshot;
227 u8 level;
228 u8 drop_level;
229 int level_size;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
248 struct rb_node node;
249 u64 start;
250 u64 len;
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
263 int errors;
265 u64 ino;
266 u32 nlink;
267 u32 imode;
268 u64 isize;
269 u64 nbytes;
271 u32 found_link;
272 u64 found_size;
273 u64 extent_start;
274 u64 extent_end;
275 struct rb_root holes;
276 struct list_head orphan_extents;
278 u32 refs;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
304 int errors;
305 u64 ref_root;
306 u64 dir;
307 u64 index;
308 u16 namelen;
309 char name[0];
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
317 struct root_record {
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
321 u64 objectid;
322 u32 found_ref;
325 struct ptr_node {
326 struct cache_extent cache;
327 void *data;
330 struct shared_node {
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
335 u32 refs;
338 struct block_info {
339 u64 start;
340 u32 size;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
346 int active_node;
347 int root_level;
350 struct bad_item {
351 struct btrfs_key key;
352 u64 root_id;
353 struct list_head list;
356 struct extent_entry {
357 u64 bytenr;
358 u64 bytes;
359 int count;
360 int broken;
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
366 u8 level;
367 /* number of nodes at this level, must be 1 for a root */
368 int node_count;
369 u64 bytenr;
370 u64 gen;
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
378 * classification.
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 uint32_t count = 0;
396 static char *task_position_string[] = {
397 "checking extents",
398 "checking free space cache",
399 "checking fs roots",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
405 return NULL;
407 while (1) {
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
410 count++;
411 fflush(stdout);
412 task_period_wait(priv->info);
414 return NULL;
417 static int print_status_return(void *p)
419 printf("\n");
420 fflush(stdout);
422 return 0;
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
443 return (u64)-1;
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446 return hole->start;
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
458 return -1;
459 if (hole1->start < hole2->start)
460 return 1;
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
467 return -1;
468 /* Hole 2 will be merge center */
469 return 1;
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
479 u64 start, u64 len)
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
486 if (!hole)
487 return -ENOMEM;
488 hole->start = start;
489 hole->len = len;
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 node);
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
501 free(prev);
502 prev = NULL;
505 /* iterate merge with next holes */
506 while (1) {
507 if (!rb_next(&hole->node))
508 break;
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 node);
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
514 hole->start;
515 rb_erase(&next->node, holes);
516 free(next);
517 next = NULL;
518 } else
519 break;
521 return 0;
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
527 u64 start;
529 hole = (struct file_extent_hole *)data;
530 start = hole->start;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
534 return -1;
535 if (start >= hole->start && start < hole->start + hole->len)
536 return 0;
537 return 1;
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
546 u64 start, u64 len)
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
550 u64 prev_start = 0;
551 u64 prev_len = 0;
552 u64 next_start = 0;
553 u64 next_len = 0;
554 struct rb_node *node;
555 int have_prev = 0;
556 int have_next = 0;
557 int ret = 0;
559 tmp.start = start;
560 tmp.len = len;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 if (!node)
563 return -EEXIST;
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
566 return -EEXIST;
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
575 have_prev = 1;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
580 have_next = 1;
582 rb_erase(node, holes);
583 free(hole);
584 if (have_prev) {
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
586 if (ret < 0)
587 return ret;
589 if (have_next) {
590 ret = add_file_extent_hole(holes, next_start, next_len);
591 if (ret < 0)
592 return ret;
594 return 0;
597 static int copy_file_extent_holes(struct rb_root *dst,
598 struct rb_root *src)
600 struct file_extent_hole *hole;
601 struct rb_node *node;
602 int ret = 0;
604 node = rb_first(src);
605 while (node) {
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 if (ret)
609 break;
610 node = rb_next(node);
612 return ret;
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
621 while (node) {
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
624 free(hole);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
644 #define S_SHIFT 12
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
667 return -1;
668 else if (rec1->devid < rec2->devid)
669 return 1;
670 else
671 return 0;
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
682 struct rb_node *rb;
683 size_t size;
684 int ret;
686 rec = malloc(sizeof(*rec));
687 if (!rec)
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
690 rec->refs = 1;
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
698 if (!backref) {
699 ret = -ENOMEM;
700 goto cleanup;
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
707 if (!dst_orphan) {
708 ret = -ENOMEM;
709 goto cleanup;
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715 if (ret < 0)
716 goto cleanup_rb;
718 return rec;
720 cleanup_rb:
721 rb = rb_first(&rec->holes);
722 while (rb) {
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
726 rb = rb_next(rb);
727 free(hole);
730 cleanup:
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
734 free(orig);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
740 free(orig);
743 free(rec);
745 return ERR_PTR(ret);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 u64 objectid)
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
754 return;
755 printf("The following data extent is lost in tree %llu:\n",
756 objectid);
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
760 orphan->disk_len);
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
769 if (!errors)
770 return;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
819 int found = 0;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
823 while (node) {
824 found = 1;
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
830 if (!found)
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868 u64 ino, int mod)
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
873 int ret;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
876 if (cache) {
877 node = container_of(cache, struct ptr_node, cache);
878 rec = node->data;
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
882 return node->data;
883 rec->refs--;
884 rec = node->data;
886 } else if (mod) {
887 rec = calloc(1, sizeof(*rec));
888 if (!rec)
889 return ERR_PTR(-ENOMEM);
890 rec->ino = ino;
891 rec->extent_start = (u64)-1;
892 rec->refs = 1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
898 if (!node) {
899 free(rec);
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
904 node->data = rec;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
907 rec->found_link = 1;
909 ret = insert_cache_extent(inode_cache, &node->cache);
910 if (ret)
911 return ERR_PTR(-EEXIST);
913 return rec;
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
924 free(orphan);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
932 if (--rec->refs > 0)
933 return;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
938 free(backref);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
942 free(rec);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949 return 1;
950 return 0;
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
959 u8 filetype;
961 if (!rec->found_inode_item)
962 return;
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
972 free(backref);
977 if (!rec->checked || rec->merging)
978 return;
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1009 free(node);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1018 int ret;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1022 key.offset = ino;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1027 if (ret > 0)
1028 ret = -ENOENT;
1029 return ret;
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043 return 1;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->nodatasum = 1;
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1056 return 0;
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 const char *name,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 break;
1068 if (backref->dir != dir || backref->namelen != namelen)
1069 continue;
1070 if (memcmp(name, backref->name, namelen))
1071 continue;
1072 return backref;
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1076 if (!backref)
1077 return NULL;
1078 memset(backref, 0, sizeof(*backref));
1079 backref->dir = dir;
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1084 return backref;
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1098 BUG_ON(!backref);
1099 if (errors)
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 rec->found_link++;
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 else
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1132 } else {
1133 BUG_ON(1);
1136 maybe_free_inode_rec(inode_cache, rec);
1137 return 0;
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1144 u32 dir_count = 0;
1145 int ret = 0;
1147 dst->merging = 1;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1156 dir_count++;
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180 if (ret < 0)
1181 return ret;
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1191 } else {
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1196 dst->extent_end,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1213 } else {
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1217 dst->merging = 0;
1219 return 0;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1230 int splice = 0;
1231 int ret;
1233 if (--src_node->refs == 0)
1234 splice = 1;
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1240 again:
1241 cache = search_cache_extent(src, 0);
1242 while (cache) {
1243 node = container_of(cache, struct ptr_node, cache);
1244 rec = node->data;
1245 cache = next_cache_extent(cache);
1247 if (splice) {
1248 remove_cache_extent(src, &node->cache);
1249 ins = node;
1250 } else {
1251 ins = malloc(sizeof(*ins));
1252 BUG_ON(!ins);
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1255 ins->data = rec;
1256 rec->refs++;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1263 if (rec->checked) {
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1270 free(ins);
1271 } else {
1272 BUG_ON(ret);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1279 goto again;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1291 return 0;
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1300 rec = node->data;
1301 free_inode_rec(rec);
1302 free(node);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308 u64 bytenr)
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1314 if (cache) {
1315 node = container_of(cache, struct shared_node, cache);
1316 return node;
1318 return NULL;
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 int ret;
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1327 if (!node)
1328 return -ENOMEM;
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1333 node->refs = refs;
1335 ret = insert_cache_extent(shared, &node->cache);
1337 return ret;
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1345 int ret;
1347 if (level == wc->active_node)
1348 return 0;
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1352 if (!node) {
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 BUG_ON(ret);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1358 return 0;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1367 free(node);
1369 return 1;
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1376 free(node);
1378 return 1;
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1386 int i;
1388 if (level == wc->root_level)
1389 return 0;
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392 if (wc->nodes[i])
1393 break;
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1406 } else {
1407 BUG_ON(node->refs < 2);
1408 node->refs--;
1410 return 0;
1414 * Returns:
1415 * < 0 - on error
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422 u64 child_root_id)
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1427 int has_parent = 0;
1428 int ret;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436 0, 0);
1437 if (ret < 0)
1438 return ret;
1439 btrfs_release_path(&path);
1440 if (!ret)
1441 return 1;
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 key.offset = 0;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447 0, 0);
1448 if (ret < 0)
1449 goto out;
1451 while (1) {
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455 if (ret)
1456 break;
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1463 break;
1465 has_parent = 1;
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1469 return 1;
1472 path.slots[0]++;
1474 out:
1475 btrfs_release_path(&path);
1476 if (ret < 0)
1477 return ret;
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1485 u32 total;
1486 u32 cur = 0;
1487 u32 len;
1488 u32 name_len;
1489 u32 data_len;
1490 int error;
1491 int nritems = 0;
1492 u8 filetype;
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1508 nritems++;
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1516 len = name_len;
1517 error = 0;
1518 } else {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1532 key->type, error);
1533 } else {
1534 fprintf(stderr, "invalid location in dir item %u\n",
1535 location.type);
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1543 cur += len;
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1548 return 0;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1555 u32 total;
1556 u32 cur = 0;
1557 u32 len;
1558 u32 name_len;
1559 u64 index;
1560 int error;
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1573 len = name_len;
1574 error = 0;
1575 } else {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585 cur += len;
1587 return 0;
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1594 u32 total;
1595 u32 cur = 0;
1596 u32 len;
1597 u32 name_len;
1598 u64 index;
1599 u64 parent;
1600 int error;
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1614 len = name_len;
1615 error = 0;
1616 } else {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627 cur += len;
1629 return 0;
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1639 int ret;
1640 size_t size;
1641 *found = 0;
1642 u64 csum_end;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648 key.offset = start;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652 &key, &path, 0, 0);
1653 if (ret < 0)
1654 goto out;
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1660 path.slots[0]--;
1663 while (len > 0) {
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667 if (ret > 0)
1668 break;
1669 else if (ret < 0)
1670 goto out;
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1677 break;
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1681 break;
1683 if (key.offset > start)
1684 start = key.offset;
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1690 len -= size;
1691 start += size;
1692 *found += size;
1695 path.slots[0]++;
1697 out:
1698 btrfs_release_path(&path);
1699 if (ret < 0)
1700 return ret;
1701 return 0;
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1711 u64 num_bytes = 0;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1715 int extent_type;
1716 int ret;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1732 if (ret < 0)
1733 return ret;
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741 if (num_bytes == 0)
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1762 } else {
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1773 * data reloc tree.
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777 u64 found;
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780 else
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784 if (ret < 0)
1785 return ret;
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787 if (found > 0)
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792 if (found > 0)
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1796 return 0;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1803 u32 nritems;
1804 int i;
1805 int ret = 0;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1811 return 0;
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820 continue;
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822 continue;
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1832 key.objectid, 1);
1833 BUG_ON(IS_ERR(active_node->current));
1835 switch (key.type) {
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1839 break;
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1842 break;
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1845 break;
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1848 break;
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1851 active_node);
1852 break;
1853 default:
1854 break;
1857 return ret;
1860 struct node_refs {
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872 struct node_refs *nrefs, int *level, int ext_ref)
1874 struct extent_buffer *cur = path->nodes[0];
1875 struct btrfs_key key;
1876 u64 cur_bytenr;
1877 u32 nritems;
1878 u64 first_ino = 0;
1879 int root_level = btrfs_header_level(root->node);
1880 int i;
1881 int ret = 0; /* Final return value */
1882 int err = 0; /* Positive error bitmap */
1884 cur_bytenr = cur->start;
1886 /* skip to first inode item or the first inode number change */
1887 nritems = btrfs_header_nritems(cur);
1888 for (i = 0; i < nritems; i++) {
1889 btrfs_item_key_to_cpu(cur, &key, i);
1890 if (i == 0)
1891 first_ino = key.objectid;
1892 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893 (first_ino && first_ino != key.objectid))
1894 break;
1896 if (i == nritems) {
1897 path->slots[0] = nritems;
1898 return 0;
1900 path->slots[0] = i;
1902 again:
1903 err |= check_inode_item(root, path, ext_ref);
1905 if (err & LAST_ITEM)
1906 goto out;
1908 /* still have inode items in thie leaf */
1909 if (cur->start == cur_bytenr)
1910 goto again;
1913 * we have switched to another leaf, above nodes may
1914 * have changed, here walk down the path, if a node
1915 * or leaf is shared, check whether we can skip this
1916 * node or leaf.
1918 for (i = root_level; i >= 0; i--) {
1919 if (path->nodes[i]->start == nrefs->bytenr[i])
1920 continue;
1922 ret = update_nodes_refs(root,
1923 path->nodes[i]->start,
1924 nrefs, i);
1925 if (ret)
1926 goto out;
1928 if (!nrefs->need_check[i]) {
1929 *level += 1;
1930 break;
1934 for (i = 0; i < *level; i++) {
1935 free_extent_buffer(path->nodes[i]);
1936 path->nodes[i] = NULL;
1938 out:
1939 err &= ~LAST_ITEM;
1941 * Convert any error bitmap to -EIO, as we should avoid
1942 * mixing positive and negative return value to represent
1943 * error
1945 if (err && !ret)
1946 ret = -EIO;
1947 return ret;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1953 u64 bytenr;
1954 u64 ptr_gen;
1955 u32 nritems;
1956 u32 blocksize;
1957 int i;
1958 int level;
1960 level = btrfs_header_level(node);
1961 if (level != 1)
1962 return;
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1976 * in parent.
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1991 int ret = 0;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1996 else
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000 ret = -EINVAL;
2001 fprintf(stderr,
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007 ret = -EINVAL;
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2014 ret = -EINVAL;
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2019 return ret;
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2033 return 1;
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2042 return 0;
2044 return 1;
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2054 int check, ret;
2055 u64 refs;
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2061 if (ret < 0)
2062 return ret;
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2066 if (refs > 1) {
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068 0, &roots);
2069 if (ret)
2070 return -EIO;
2072 check = need_check(root, roots);
2073 ulist_free(roots);
2074 nrefs->need_check[level] = check;
2075 } else {
2076 nrefs->need_check[level] = 1;
2080 return 0;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2088 u64 bytenr;
2089 u64 ptr_gen;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2092 u32 blocksize;
2093 int ret, err = 0;
2094 u64 refs;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2101 ret = 0;
2102 } else {
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2106 if (ret < 0) {
2107 err = ret;
2108 goto out;
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2114 if (refs > 1) {
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2116 refs, wc, *level);
2117 if (ret > 0) {
2118 err = ret;
2119 goto out;
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2129 WARN_ON(1);
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132 break;
2133 if (*level == 0) {
2134 ret = process_one_leaf(root, cur, wc);
2135 if (ret < 0)
2136 err = ret;
2137 break;
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2145 } else {
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2148 if (ret < 0) {
2149 refs = 0;
2150 } else {
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2156 if (refs > 1) {
2157 ret = enter_shared_node(root, bytenr, refs,
2158 wc, *level - 1);
2159 if (ret > 0) {
2160 path->slots[*level]++;
2161 continue;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2170 ptr_gen);
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2175 &node_key,
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2178 &node_key,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2181 err = -EIO;
2182 goto out;
2186 ret = check_child_node(cur, path->slots[*level], next);
2187 if (ret) {
2188 err = ret;
2189 goto out;
2192 if (btrfs_is_leaf(next))
2193 status = btrfs_check_leaf(root, NULL, next);
2194 else
2195 status = btrfs_check_node(root, NULL, next);
2196 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197 free_extent_buffer(next);
2198 err = -EIO;
2199 goto out;
2202 *level = *level - 1;
2203 free_extent_buffer(path->nodes[*level]);
2204 path->nodes[*level] = next;
2205 path->slots[*level] = 0;
2207 out:
2208 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209 return err;
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213 unsigned int ext_ref);
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216 int *level, struct node_refs *nrefs, int ext_ref)
2218 enum btrfs_tree_block_status status;
2219 u64 bytenr;
2220 u64 ptr_gen;
2221 struct extent_buffer *next;
2222 struct extent_buffer *cur;
2223 u32 blocksize;
2224 int ret;
2226 WARN_ON(*level < 0);
2227 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229 ret = update_nodes_refs(root, path->nodes[*level]->start,
2230 nrefs, *level);
2231 if (ret < 0)
2232 return ret;
2234 while (*level >= 0) {
2235 WARN_ON(*level < 0);
2236 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237 cur = path->nodes[*level];
2239 if (btrfs_header_level(cur) != *level)
2240 WARN_ON(1);
2242 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243 break;
2244 /* Don't forgot to check leaf/node validation */
2245 if (*level == 0) {
2246 ret = btrfs_check_leaf(root, NULL, cur);
2247 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248 ret = -EIO;
2249 break;
2251 ret = process_one_leaf_v2(root, path, nrefs,
2252 level, ext_ref);
2253 break;
2254 } else {
2255 ret = btrfs_check_node(root, NULL, cur);
2256 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = -EIO;
2258 break;
2261 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263 blocksize = root->nodesize;
2265 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266 if (ret)
2267 break;
2268 if (!nrefs->need_check[*level - 1]) {
2269 path->slots[*level]++;
2270 continue;
2273 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275 free_extent_buffer(next);
2276 reada_walk_down(root, cur, path->slots[*level]);
2277 next = read_tree_block(root, bytenr, blocksize,
2278 ptr_gen);
2279 if (!extent_buffer_uptodate(next)) {
2280 struct btrfs_key node_key;
2282 btrfs_node_key_to_cpu(path->nodes[*level],
2283 &node_key,
2284 path->slots[*level]);
2285 btrfs_add_corrupt_extent_record(root->fs_info,
2286 &node_key,
2287 path->nodes[*level]->start,
2288 root->nodesize, *level);
2289 ret = -EIO;
2290 break;
2294 ret = check_child_node(cur, path->slots[*level], next);
2295 if (ret < 0)
2296 break;
2298 if (btrfs_is_leaf(next))
2299 status = btrfs_check_leaf(root, NULL, next);
2300 else
2301 status = btrfs_check_node(root, NULL, next);
2302 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303 free_extent_buffer(next);
2304 ret = -EIO;
2305 break;
2308 *level = *level - 1;
2309 free_extent_buffer(path->nodes[*level]);
2310 path->nodes[*level] = next;
2311 path->slots[*level] = 0;
2313 return ret;
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317 struct walk_control *wc, int *level)
2319 int i;
2320 struct extent_buffer *leaf;
2322 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323 leaf = path->nodes[i];
2324 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325 path->slots[i]++;
2326 *level = i;
2327 return 0;
2328 } else {
2329 free_extent_buffer(path->nodes[*level]);
2330 path->nodes[*level] = NULL;
2331 BUG_ON(*level > wc->active_node);
2332 if (*level == wc->active_node)
2333 leave_shared_node(root, wc, *level);
2334 *level = i + 1;
2337 return 1;
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341 int *level)
2343 int i;
2344 struct extent_buffer *leaf;
2346 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347 leaf = path->nodes[i];
2348 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349 path->slots[i]++;
2350 *level = i;
2351 return 0;
2352 } else {
2353 free_extent_buffer(path->nodes[*level]);
2354 path->nodes[*level] = NULL;
2355 *level = i + 1;
2358 return 1;
2361 static int check_root_dir(struct inode_record *rec)
2363 struct inode_backref *backref;
2364 int ret = -1;
2366 if (!rec->found_inode_item || rec->errors)
2367 goto out;
2368 if (rec->nlink != 1 || rec->found_link != 0)
2369 goto out;
2370 if (list_empty(&rec->backrefs))
2371 goto out;
2372 backref = to_inode_backref(rec->backrefs.next);
2373 if (!backref->found_inode_ref)
2374 goto out;
2375 if (backref->index != 0 || backref->namelen != 2 ||
2376 memcmp(backref->name, "..", 2))
2377 goto out;
2378 if (backref->found_dir_index || backref->found_dir_item)
2379 goto out;
2380 ret = 0;
2381 out:
2382 return ret;
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386 struct btrfs_root *root, struct btrfs_path *path,
2387 struct inode_record *rec)
2389 struct btrfs_inode_item *ei;
2390 struct btrfs_key key;
2391 int ret;
2393 key.objectid = rec->ino;
2394 key.type = BTRFS_INODE_ITEM_KEY;
2395 key.offset = (u64)-1;
2397 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398 if (ret < 0)
2399 goto out;
2400 if (ret) {
2401 if (!path->slots[0]) {
2402 ret = -ENOENT;
2403 goto out;
2405 path->slots[0]--;
2406 ret = 0;
2408 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409 if (key.objectid != rec->ino) {
2410 ret = -ENOENT;
2411 goto out;
2414 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415 struct btrfs_inode_item);
2416 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417 btrfs_mark_buffer_dirty(path->nodes[0]);
2418 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420 root->root_key.objectid);
2421 out:
2422 btrfs_release_path(path);
2423 return ret;
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427 struct btrfs_root *root,
2428 struct btrfs_path *path,
2429 struct inode_record *rec)
2431 int ret;
2433 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434 btrfs_release_path(path);
2435 if (!ret)
2436 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437 return ret;
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441 struct btrfs_root *root,
2442 struct btrfs_path *path,
2443 struct inode_record *rec)
2445 struct btrfs_inode_item *ei;
2446 struct btrfs_key key;
2447 int ret = 0;
2449 key.objectid = rec->ino;
2450 key.type = BTRFS_INODE_ITEM_KEY;
2451 key.offset = 0;
2453 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454 if (ret) {
2455 if (ret > 0)
2456 ret = -ENOENT;
2457 goto out;
2460 /* Since ret == 0, no need to check anything */
2461 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462 struct btrfs_inode_item);
2463 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464 btrfs_mark_buffer_dirty(path->nodes[0]);
2465 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466 printf("reset nbytes for ino %llu root %llu\n",
2467 rec->ino, root->root_key.objectid);
2468 out:
2469 btrfs_release_path(path);
2470 return ret;
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474 struct cache_tree *inode_cache,
2475 struct inode_record *rec,
2476 struct inode_backref *backref)
2478 struct btrfs_path path;
2479 struct btrfs_trans_handle *trans;
2480 struct btrfs_dir_item *dir_item;
2481 struct extent_buffer *leaf;
2482 struct btrfs_key key;
2483 struct btrfs_disk_key disk_key;
2484 struct inode_record *dir_rec;
2485 unsigned long name_ptr;
2486 u32 data_size = sizeof(*dir_item) + backref->namelen;
2487 int ret;
2489 trans = btrfs_start_transaction(root, 1);
2490 if (IS_ERR(trans))
2491 return PTR_ERR(trans);
2493 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494 (unsigned long long)rec->ino);
2496 btrfs_init_path(&path);
2497 key.objectid = backref->dir;
2498 key.type = BTRFS_DIR_INDEX_KEY;
2499 key.offset = backref->index;
2500 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501 BUG_ON(ret);
2503 leaf = path.nodes[0];
2504 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506 disk_key.objectid = cpu_to_le64(rec->ino);
2507 disk_key.type = BTRFS_INODE_ITEM_KEY;
2508 disk_key.offset = 0;
2510 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512 btrfs_set_dir_data_len(leaf, dir_item, 0);
2513 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514 name_ptr = (unsigned long)(dir_item + 1);
2515 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516 btrfs_mark_buffer_dirty(leaf);
2517 btrfs_release_path(&path);
2518 btrfs_commit_transaction(trans, root);
2520 backref->found_dir_index = 1;
2521 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522 BUG_ON(IS_ERR(dir_rec));
2523 if (!dir_rec)
2524 return 0;
2525 dir_rec->found_size += backref->namelen;
2526 if (dir_rec->found_size == dir_rec->isize &&
2527 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529 if (dir_rec->found_size != dir_rec->isize)
2530 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532 return 0;
2535 static int delete_dir_index(struct btrfs_root *root,
2536 struct inode_backref *backref)
2538 struct btrfs_trans_handle *trans;
2539 struct btrfs_dir_item *di;
2540 struct btrfs_path path;
2541 int ret = 0;
2543 trans = btrfs_start_transaction(root, 1);
2544 if (IS_ERR(trans))
2545 return PTR_ERR(trans);
2547 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548 (unsigned long long)backref->dir,
2549 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550 (unsigned long long)root->objectid);
2552 btrfs_init_path(&path);
2553 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554 backref->name, backref->namelen,
2555 backref->index, -1);
2556 if (IS_ERR(di)) {
2557 ret = PTR_ERR(di);
2558 btrfs_release_path(&path);
2559 btrfs_commit_transaction(trans, root);
2560 if (ret == -ENOENT)
2561 return 0;
2562 return ret;
2565 if (!di)
2566 ret = btrfs_del_item(trans, root, &path);
2567 else
2568 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569 BUG_ON(ret);
2570 btrfs_release_path(&path);
2571 btrfs_commit_transaction(trans, root);
2572 return ret;
2575 static int create_inode_item(struct btrfs_root *root,
2576 struct inode_record *rec,
2577 int root_dir)
2579 struct btrfs_trans_handle *trans;
2580 struct btrfs_inode_item inode_item;
2581 time_t now = time(NULL);
2582 int ret;
2584 trans = btrfs_start_transaction(root, 1);
2585 if (IS_ERR(trans)) {
2586 ret = PTR_ERR(trans);
2587 return ret;
2590 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591 "be incomplete, please check permissions and content after "
2592 "the fsck completes.\n", (unsigned long long)root->objectid,
2593 (unsigned long long)rec->ino);
2595 memset(&inode_item, 0, sizeof(inode_item));
2596 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597 if (root_dir)
2598 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599 else
2600 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602 if (rec->found_dir_item) {
2603 if (rec->found_file_extent)
2604 fprintf(stderr, "root %llu inode %llu has both a dir "
2605 "item and extents, unsure if it is a dir or a "
2606 "regular file so setting it as a directory\n",
2607 (unsigned long long)root->objectid,
2608 (unsigned long long)rec->ino);
2609 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611 } else if (!rec->found_dir_item) {
2612 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2615 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2624 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625 BUG_ON(ret);
2626 btrfs_commit_transaction(trans, root);
2627 return 0;
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631 struct inode_record *rec,
2632 struct cache_tree *inode_cache,
2633 int delete)
2635 struct inode_backref *tmp, *backref;
2636 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637 int ret = 0;
2638 int repaired = 0;
2640 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641 if (!delete && rec->ino == root_dirid) {
2642 if (!rec->found_inode_item) {
2643 ret = create_inode_item(root, rec, 1);
2644 if (ret)
2645 break;
2646 repaired++;
2650 /* Index 0 for root dir's are special, don't mess with it */
2651 if (rec->ino == root_dirid && backref->index == 0)
2652 continue;
2654 if (delete &&
2655 ((backref->found_dir_index && !backref->found_inode_ref) ||
2656 (backref->found_dir_index && backref->found_inode_ref &&
2657 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658 ret = delete_dir_index(root, backref);
2659 if (ret)
2660 break;
2661 repaired++;
2662 list_del(&backref->list);
2663 free(backref);
2666 if (!delete && !backref->found_dir_index &&
2667 backref->found_dir_item && backref->found_inode_ref) {
2668 ret = add_missing_dir_index(root, inode_cache, rec,
2669 backref);
2670 if (ret)
2671 break;
2672 repaired++;
2673 if (backref->found_dir_item &&
2674 backref->found_dir_index &&
2675 backref->found_dir_index) {
2676 if (!backref->errors &&
2677 backref->found_inode_ref) {
2678 list_del(&backref->list);
2679 free(backref);
2684 if (!delete && (!backref->found_dir_index &&
2685 !backref->found_dir_item &&
2686 backref->found_inode_ref)) {
2687 struct btrfs_trans_handle *trans;
2688 struct btrfs_key location;
2690 ret = check_dir_conflict(root, backref->name,
2691 backref->namelen,
2692 backref->dir,
2693 backref->index);
2694 if (ret) {
2696 * let nlink fixing routine to handle it,
2697 * which can do it better.
2699 ret = 0;
2700 break;
2702 location.objectid = rec->ino;
2703 location.type = BTRFS_INODE_ITEM_KEY;
2704 location.offset = 0;
2706 trans = btrfs_start_transaction(root, 1);
2707 if (IS_ERR(trans)) {
2708 ret = PTR_ERR(trans);
2709 break;
2711 fprintf(stderr, "adding missing dir index/item pair "
2712 "for inode %llu\n",
2713 (unsigned long long)rec->ino);
2714 ret = btrfs_insert_dir_item(trans, root, backref->name,
2715 backref->namelen,
2716 backref->dir, &location,
2717 imode_to_type(rec->imode),
2718 backref->index);
2719 BUG_ON(ret);
2720 btrfs_commit_transaction(trans, root);
2721 repaired++;
2724 if (!delete && (backref->found_inode_ref &&
2725 backref->found_dir_index &&
2726 backref->found_dir_item &&
2727 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728 !rec->found_inode_item)) {
2729 ret = create_inode_item(root, rec, 0);
2730 if (ret)
2731 break;
2732 repaired++;
2736 return ret ? ret : repaired;
2740 * To determine the file type for nlink/inode_item repair
2742 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743 * Return -ENOENT if file type is not found.
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2747 struct inode_backref *backref;
2749 /* For inode item recovered case */
2750 if (rec->found_inode_item) {
2751 *type = imode_to_type(rec->imode);
2752 return 0;
2755 list_for_each_entry(backref, &rec->backrefs, list) {
2756 if (backref->found_dir_index || backref->found_dir_item) {
2757 *type = backref->filetype;
2758 return 0;
2761 return -ENOENT;
2765 * To determine the file name for nlink repair
2767 * Return 0 if file name is found, set name and namelen.
2768 * Return -ENOENT if file name is not found.
2770 static int find_file_name(struct inode_record *rec,
2771 char *name, int *namelen)
2773 struct inode_backref *backref;
2775 list_for_each_entry(backref, &rec->backrefs, list) {
2776 if (backref->found_dir_index || backref->found_dir_item ||
2777 backref->found_inode_ref) {
2778 memcpy(name, backref->name, backref->namelen);
2779 *namelen = backref->namelen;
2780 return 0;
2783 return -ENOENT;
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788 struct btrfs_root *root,
2789 struct btrfs_path *path,
2790 struct inode_record *rec)
2792 struct inode_backref *backref;
2793 struct inode_backref *tmp;
2794 struct btrfs_key key;
2795 struct btrfs_inode_item *inode_item;
2796 int ret = 0;
2798 /* We don't believe this either, reset it and iterate backref */
2799 rec->found_link = 0;
2801 /* Remove all backref including the valid ones */
2802 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804 backref->index, backref->name,
2805 backref->namelen, 0);
2806 if (ret < 0)
2807 goto out;
2809 /* remove invalid backref, so it won't be added back */
2810 if (!(backref->found_dir_index &&
2811 backref->found_dir_item &&
2812 backref->found_inode_ref)) {
2813 list_del(&backref->list);
2814 free(backref);
2815 } else {
2816 rec->found_link++;
2820 /* Set nlink to 0 */
2821 key.objectid = rec->ino;
2822 key.type = BTRFS_INODE_ITEM_KEY;
2823 key.offset = 0;
2824 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825 if (ret < 0)
2826 goto out;
2827 if (ret > 0) {
2828 ret = -ENOENT;
2829 goto out;
2831 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832 struct btrfs_inode_item);
2833 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834 btrfs_mark_buffer_dirty(path->nodes[0]);
2835 btrfs_release_path(path);
2838 * Add back valid inode_ref/dir_item/dir_index,
2839 * add_link() will handle the nlink inc, so new nlink must be correct
2841 list_for_each_entry(backref, &rec->backrefs, list) {
2842 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843 backref->name, backref->namelen,
2844 backref->filetype, &backref->index, 1);
2845 if (ret < 0)
2846 goto out;
2848 out:
2849 btrfs_release_path(path);
2850 return ret;
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854 struct btrfs_root *root,
2855 struct btrfs_path *path,
2856 u64 *highest_ino)
2858 struct btrfs_key key, found_key;
2859 int ret;
2861 btrfs_init_path(path);
2862 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863 key.offset = -1;
2864 key.type = BTRFS_INODE_ITEM_KEY;
2865 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866 if (ret == 1) {
2867 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868 path->slots[0] - 1);
2869 *highest_ino = found_key.objectid;
2870 ret = 0;
2872 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873 ret = -EOVERFLOW;
2874 btrfs_release_path(path);
2875 return ret;
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879 struct btrfs_root *root,
2880 struct btrfs_path *path,
2881 struct inode_record *rec)
2883 char *dir_name = "lost+found";
2884 char namebuf[BTRFS_NAME_LEN] = {0};
2885 u64 lost_found_ino;
2886 u32 mode = 0700;
2887 u8 type = 0;
2888 int namelen = 0;
2889 int name_recovered = 0;
2890 int type_recovered = 0;
2891 int ret = 0;
2894 * Get file name and type first before these invalid inode ref
2895 * are deleted by remove_all_invalid_backref()
2897 name_recovered = !find_file_name(rec, namebuf, &namelen);
2898 type_recovered = !find_file_type(rec, &type);
2900 if (!name_recovered) {
2901 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902 rec->ino, rec->ino);
2903 namelen = count_digits(rec->ino);
2904 sprintf(namebuf, "%llu", rec->ino);
2905 name_recovered = 1;
2907 if (!type_recovered) {
2908 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909 rec->ino);
2910 type = BTRFS_FT_REG_FILE;
2911 type_recovered = 1;
2914 ret = reset_nlink(trans, root, path, rec);
2915 if (ret < 0) {
2916 fprintf(stderr,
2917 "Failed to reset nlink for inode %llu: %s\n",
2918 rec->ino, strerror(-ret));
2919 goto out;
2922 if (rec->found_link == 0) {
2923 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924 if (ret < 0)
2925 goto out;
2926 lost_found_ino++;
2927 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929 mode);
2930 if (ret < 0) {
2931 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932 dir_name, strerror(-ret));
2933 goto out;
2935 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936 namebuf, namelen, type, NULL, 1);
2938 * Add ".INO" suffix several times to handle case where
2939 * "FILENAME.INO" is already taken by another file.
2941 while (ret == -EEXIST) {
2943 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2945 if (namelen + count_digits(rec->ino) + 1 >
2946 BTRFS_NAME_LEN) {
2947 ret = -EFBIG;
2948 goto out;
2950 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951 ".%llu", rec->ino);
2952 namelen += count_digits(rec->ino) + 1;
2953 ret = btrfs_add_link(trans, root, rec->ino,
2954 lost_found_ino, namebuf,
2955 namelen, type, NULL, 1);
2957 if (ret < 0) {
2958 fprintf(stderr,
2959 "Failed to link the inode %llu to %s dir: %s\n",
2960 rec->ino, dir_name, strerror(-ret));
2961 goto out;
2964 * Just increase the found_link, don't actually add the
2965 * backref. This will make things easier and this inode
2966 * record will be freed after the repair is done.
2967 * So fsck will not report problem about this inode.
2969 rec->found_link++;
2970 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971 namelen, namebuf, dir_name);
2973 printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2976 * Clear the flag anyway, or we will loop forever for the same inode
2977 * as it will not be removed from the bad inode list and the dead loop
2978 * happens.
2980 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981 btrfs_release_path(path);
2982 return ret;
2986 * Check if there is any normal(reg or prealloc) file extent for given
2987 * ino.
2988 * This is used to determine the file type when neither its dir_index/item or
2989 * inode_item exists.
2991 * This will *NOT* report error, if any error happens, just consider it does
2992 * not have any normal file extent.
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2996 struct btrfs_path path;
2997 struct btrfs_key key;
2998 struct btrfs_key found_key;
2999 struct btrfs_file_extent_item *fi;
3000 u8 type;
3001 int ret = 0;
3003 btrfs_init_path(&path);
3004 key.objectid = ino;
3005 key.type = BTRFS_EXTENT_DATA_KEY;
3006 key.offset = 0;
3008 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009 if (ret < 0) {
3010 ret = 0;
3011 goto out;
3013 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014 ret = btrfs_next_leaf(root, &path);
3015 if (ret) {
3016 ret = 0;
3017 goto out;
3020 while (1) {
3021 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022 path.slots[0]);
3023 if (found_key.objectid != ino ||
3024 found_key.type != BTRFS_EXTENT_DATA_KEY)
3025 break;
3026 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027 struct btrfs_file_extent_item);
3028 type = btrfs_file_extent_type(path.nodes[0], fi);
3029 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030 ret = 1;
3031 goto out;
3034 out:
3035 btrfs_release_path(&path);
3036 return ret;
3039 static u32 btrfs_type_to_imode(u8 type)
3041 static u32 imode_by_btrfs_type[] = {
3042 [BTRFS_FT_REG_FILE] = S_IFREG,
3043 [BTRFS_FT_DIR] = S_IFDIR,
3044 [BTRFS_FT_CHRDEV] = S_IFCHR,
3045 [BTRFS_FT_BLKDEV] = S_IFBLK,
3046 [BTRFS_FT_FIFO] = S_IFIFO,
3047 [BTRFS_FT_SOCK] = S_IFSOCK,
3048 [BTRFS_FT_SYMLINK] = S_IFLNK,
3051 return imode_by_btrfs_type[(type)];
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055 struct btrfs_root *root,
3056 struct btrfs_path *path,
3057 struct inode_record *rec)
3059 u8 filetype;
3060 u32 mode = 0700;
3061 int type_recovered = 0;
3062 int ret = 0;
3064 printf("Trying to rebuild inode:%llu\n", rec->ino);
3066 type_recovered = !find_file_type(rec, &filetype);
3069 * Try to determine inode type if type not found.
3071 * For found regular file extent, it must be FILE.
3072 * For found dir_item/index, it must be DIR.
3074 * For undetermined one, use FILE as fallback.
3076 * TODO:
3077 * 1. If found backref(inode_index/item is already handled) to it,
3078 * it must be DIR.
3079 * Need new inode-inode ref structure to allow search for that.
3081 if (!type_recovered) {
3082 if (rec->found_file_extent &&
3083 find_normal_file_extent(root, rec->ino)) {
3084 type_recovered = 1;
3085 filetype = BTRFS_FT_REG_FILE;
3086 } else if (rec->found_dir_item) {
3087 type_recovered = 1;
3088 filetype = BTRFS_FT_DIR;
3089 } else if (!list_empty(&rec->orphan_extents)) {
3090 type_recovered = 1;
3091 filetype = BTRFS_FT_REG_FILE;
3092 } else{
3093 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094 rec->ino);
3095 type_recovered = 1;
3096 filetype = BTRFS_FT_REG_FILE;
3100 ret = btrfs_new_inode(trans, root, rec->ino,
3101 mode | btrfs_type_to_imode(filetype));
3102 if (ret < 0)
3103 goto out;
3106 * Here inode rebuild is done, we only rebuild the inode item,
3107 * don't repair the nlink(like move to lost+found).
3108 * That is the job of nlink repair.
3110 * We just fill the record and return
3112 rec->found_dir_item = 1;
3113 rec->imode = mode | btrfs_type_to_imode(filetype);
3114 rec->nlink = 0;
3115 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116 /* Ensure the inode_nlinks repair function will be called */
3117 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119 return ret;
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123 struct btrfs_root *root,
3124 struct btrfs_path *path,
3125 struct inode_record *rec)
3127 struct orphan_data_extent *orphan;
3128 struct orphan_data_extent *tmp;
3129 int ret = 0;
3131 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3133 * Check for conflicting file extents
3135 * Here we don't know whether the extents is compressed or not,
3136 * so we can only assume it not compressed nor data offset,
3137 * and use its disk_len as extent length.
3139 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140 orphan->offset, orphan->disk_len, 0);
3141 btrfs_release_path(path);
3142 if (ret < 0)
3143 goto out;
3144 if (!ret) {
3145 fprintf(stderr,
3146 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147 orphan->disk_bytenr, orphan->disk_len);
3148 ret = btrfs_free_extent(trans,
3149 root->fs_info->extent_root,
3150 orphan->disk_bytenr, orphan->disk_len,
3151 0, root->objectid, orphan->objectid,
3152 orphan->offset);
3153 if (ret < 0)
3154 goto out;
3156 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157 orphan->offset, orphan->disk_bytenr,
3158 orphan->disk_len, orphan->disk_len);
3159 if (ret < 0)
3160 goto out;
3162 /* Update file size info */
3163 rec->found_size += orphan->disk_len;
3164 if (rec->found_size == rec->nbytes)
3165 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3167 /* Update the file extent hole info too */
3168 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169 orphan->disk_len);
3170 if (ret < 0)
3171 goto out;
3172 if (RB_EMPTY_ROOT(&rec->holes))
3173 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3175 list_del(&orphan->list);
3176 free(orphan);
3178 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180 return ret;
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184 struct btrfs_root *root,
3185 struct btrfs_path *path,
3186 struct inode_record *rec)
3188 struct rb_node *node;
3189 struct file_extent_hole *hole;
3190 int found = 0;
3191 int ret = 0;
3193 node = rb_first(&rec->holes);
3195 while (node) {
3196 found = 1;
3197 hole = rb_entry(node, struct file_extent_hole, node);
3198 ret = btrfs_punch_hole(trans, root, rec->ino,
3199 hole->start, hole->len);
3200 if (ret < 0)
3201 goto out;
3202 ret = del_file_extent_hole(&rec->holes, hole->start,
3203 hole->len);
3204 if (ret < 0)
3205 goto out;
3206 if (RB_EMPTY_ROOT(&rec->holes))
3207 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208 node = rb_first(&rec->holes);
3210 /* special case for a file losing all its file extent */
3211 if (!found) {
3212 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213 round_up(rec->isize, root->sectorsize));
3214 if (ret < 0)
3215 goto out;
3217 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218 rec->ino, root->objectid);
3219 out:
3220 return ret;
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3225 struct btrfs_trans_handle *trans;
3226 struct btrfs_path path;
3227 int ret = 0;
3229 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230 I_ERR_NO_ORPHAN_ITEM |
3231 I_ERR_LINK_COUNT_WRONG |
3232 I_ERR_NO_INODE_ITEM |
3233 I_ERR_FILE_EXTENT_ORPHAN |
3234 I_ERR_FILE_EXTENT_DISCOUNT|
3235 I_ERR_FILE_NBYTES_WRONG)))
3236 return rec->errors;
3239 * For nlink repair, it may create a dir and add link, so
3240 * 2 for parent(256)'s dir_index and dir_item
3241 * 2 for lost+found dir's inode_item and inode_ref
3242 * 1 for the new inode_ref of the file
3243 * 2 for lost+found dir's dir_index and dir_item for the file
3245 trans = btrfs_start_transaction(root, 7);
3246 if (IS_ERR(trans))
3247 return PTR_ERR(trans);
3249 btrfs_init_path(&path);
3250 if (rec->errors & I_ERR_NO_INODE_ITEM)
3251 ret = repair_inode_no_item(trans, root, &path, rec);
3252 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257 ret = repair_inode_isize(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261 ret = repair_inode_nlinks(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263 ret = repair_inode_nbytes(trans, root, &path, rec);
3264 btrfs_commit_transaction(trans, root);
3265 btrfs_release_path(&path);
3266 return ret;
3269 static int check_inode_recs(struct btrfs_root *root,
3270 struct cache_tree *inode_cache)
3272 struct cache_extent *cache;
3273 struct ptr_node *node;
3274 struct inode_record *rec;
3275 struct inode_backref *backref;
3276 int stage = 0;
3277 int ret = 0;
3278 int err = 0;
3279 u64 error = 0;
3280 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3282 if (btrfs_root_refs(&root->root_item) == 0) {
3283 if (!cache_tree_empty(inode_cache))
3284 fprintf(stderr, "warning line %d\n", __LINE__);
3285 return 0;
3289 * We need to repair backrefs first because we could change some of the
3290 * errors in the inode recs.
3292 * We also need to go through and delete invalid backrefs first and then
3293 * add the correct ones second. We do this because we may get EEXIST
3294 * when adding back the correct index because we hadn't yet deleted the
3295 * invalid index.
3297 * For example, if we were missing a dir index then the directories
3298 * isize would be wrong, so if we fixed the isize to what we thought it
3299 * would be and then fixed the backref we'd still have a invalid fs, so
3300 * we need to add back the dir index and then check to see if the isize
3301 * is still wrong.
3303 while (stage < 3) {
3304 stage++;
3305 if (stage == 3 && !err)
3306 break;
3308 cache = search_cache_extent(inode_cache, 0);
3309 while (repair && cache) {
3310 node = container_of(cache, struct ptr_node, cache);
3311 rec = node->data;
3312 cache = next_cache_extent(cache);
3314 /* Need to free everything up and rescan */
3315 if (stage == 3) {
3316 remove_cache_extent(inode_cache, &node->cache);
3317 free(node);
3318 free_inode_rec(rec);
3319 continue;
3322 if (list_empty(&rec->backrefs))
3323 continue;
3325 ret = repair_inode_backrefs(root, rec, inode_cache,
3326 stage == 1);
3327 if (ret < 0) {
3328 err = ret;
3329 stage = 2;
3330 break;
3331 } if (ret > 0) {
3332 err = -EAGAIN;
3336 if (err)
3337 return err;
3339 rec = get_inode_rec(inode_cache, root_dirid, 0);
3340 BUG_ON(IS_ERR(rec));
3341 if (rec) {
3342 ret = check_root_dir(rec);
3343 if (ret) {
3344 fprintf(stderr, "root %llu root dir %llu error\n",
3345 (unsigned long long)root->root_key.objectid,
3346 (unsigned long long)root_dirid);
3347 print_inode_error(root, rec);
3348 error++;
3350 } else {
3351 if (repair) {
3352 struct btrfs_trans_handle *trans;
3354 trans = btrfs_start_transaction(root, 1);
3355 if (IS_ERR(trans)) {
3356 err = PTR_ERR(trans);
3357 return err;
3360 fprintf(stderr,
3361 "root %llu missing its root dir, recreating\n",
3362 (unsigned long long)root->objectid);
3364 ret = btrfs_make_root_dir(trans, root, root_dirid);
3365 BUG_ON(ret);
3367 btrfs_commit_transaction(trans, root);
3368 return -EAGAIN;
3371 fprintf(stderr, "root %llu root dir %llu not found\n",
3372 (unsigned long long)root->root_key.objectid,
3373 (unsigned long long)root_dirid);
3376 while (1) {
3377 cache = search_cache_extent(inode_cache, 0);
3378 if (!cache)
3379 break;
3380 node = container_of(cache, struct ptr_node, cache);
3381 rec = node->data;
3382 remove_cache_extent(inode_cache, &node->cache);
3383 free(node);
3384 if (rec->ino == root_dirid ||
3385 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386 free_inode_rec(rec);
3387 continue;
3390 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391 ret = check_orphan_item(root, rec->ino);
3392 if (ret == 0)
3393 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394 if (can_free_inode_rec(rec)) {
3395 free_inode_rec(rec);
3396 continue;
3400 if (!rec->found_inode_item)
3401 rec->errors |= I_ERR_NO_INODE_ITEM;
3402 if (rec->found_link != rec->nlink)
3403 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404 if (repair) {
3405 ret = try_repair_inode(root, rec);
3406 if (ret == 0 && can_free_inode_rec(rec)) {
3407 free_inode_rec(rec);
3408 continue;
3410 ret = 0;
3413 if (!(repair && ret == 0))
3414 error++;
3415 print_inode_error(root, rec);
3416 list_for_each_entry(backref, &rec->backrefs, list) {
3417 if (!backref->found_dir_item)
3418 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419 if (!backref->found_dir_index)
3420 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421 if (!backref->found_inode_ref)
3422 backref->errors |= REF_ERR_NO_INODE_REF;
3423 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424 " namelen %u name %s filetype %d errors %x",
3425 (unsigned long long)backref->dir,
3426 (unsigned long long)backref->index,
3427 backref->namelen, backref->name,
3428 backref->filetype, backref->errors);
3429 print_ref_error(backref->errors);
3431 free_inode_rec(rec);
3433 return (error > 0) ? -1 : 0;
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437 u64 objectid)
3439 struct cache_extent *cache;
3440 struct root_record *rec = NULL;
3441 int ret;
3443 cache = lookup_cache_extent(root_cache, objectid, 1);
3444 if (cache) {
3445 rec = container_of(cache, struct root_record, cache);
3446 } else {
3447 rec = calloc(1, sizeof(*rec));
3448 if (!rec)
3449 return ERR_PTR(-ENOMEM);
3450 rec->objectid = objectid;
3451 INIT_LIST_HEAD(&rec->backrefs);
3452 rec->cache.start = objectid;
3453 rec->cache.size = 1;
3455 ret = insert_cache_extent(root_cache, &rec->cache);
3456 if (ret)
3457 return ERR_PTR(-EEXIST);
3459 return rec;
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463 u64 ref_root, u64 dir, u64 index,
3464 const char *name, int namelen)
3466 struct root_backref *backref;
3468 list_for_each_entry(backref, &rec->backrefs, list) {
3469 if (backref->ref_root != ref_root || backref->dir != dir ||
3470 backref->namelen != namelen)
3471 continue;
3472 if (memcmp(name, backref->name, namelen))
3473 continue;
3474 return backref;
3477 backref = calloc(1, sizeof(*backref) + namelen + 1);
3478 if (!backref)
3479 return NULL;
3480 backref->ref_root = ref_root;
3481 backref->dir = dir;
3482 backref->index = index;
3483 backref->namelen = namelen;
3484 memcpy(backref->name, name, namelen);
3485 backref->name[namelen] = '\0';
3486 list_add_tail(&backref->list, &rec->backrefs);
3487 return backref;
3490 static void free_root_record(struct cache_extent *cache)
3492 struct root_record *rec;
3493 struct root_backref *backref;
3495 rec = container_of(cache, struct root_record, cache);
3496 while (!list_empty(&rec->backrefs)) {
3497 backref = to_root_backref(rec->backrefs.next);
3498 list_del(&backref->list);
3499 free(backref);
3502 free(rec);
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3507 static int add_root_backref(struct cache_tree *root_cache,
3508 u64 root_id, u64 ref_root, u64 dir, u64 index,
3509 const char *name, int namelen,
3510 int item_type, int errors)
3512 struct root_record *rec;
3513 struct root_backref *backref;
3515 rec = get_root_rec(root_cache, root_id);
3516 BUG_ON(IS_ERR(rec));
3517 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518 BUG_ON(!backref);
3520 backref->errors |= errors;
3522 if (item_type != BTRFS_DIR_ITEM_KEY) {
3523 if (backref->found_dir_index || backref->found_back_ref ||
3524 backref->found_forward_ref) {
3525 if (backref->index != index)
3526 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527 } else {
3528 backref->index = index;
3532 if (item_type == BTRFS_DIR_ITEM_KEY) {
3533 if (backref->found_forward_ref)
3534 rec->found_ref++;
3535 backref->found_dir_item = 1;
3536 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537 backref->found_dir_index = 1;
3538 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539 if (backref->found_forward_ref)
3540 backref->errors |= REF_ERR_DUP_ROOT_REF;
3541 else if (backref->found_dir_item)
3542 rec->found_ref++;
3543 backref->found_forward_ref = 1;
3544 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545 if (backref->found_back_ref)
3546 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547 backref->found_back_ref = 1;
3548 } else {
3549 BUG_ON(1);
3552 if (backref->found_forward_ref && backref->found_dir_item)
3553 backref->reachable = 1;
3554 return 0;
3557 static int merge_root_recs(struct btrfs_root *root,
3558 struct cache_tree *src_cache,
3559 struct cache_tree *dst_cache)
3561 struct cache_extent *cache;
3562 struct ptr_node *node;
3563 struct inode_record *rec;
3564 struct inode_backref *backref;
3565 int ret = 0;
3567 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568 free_inode_recs_tree(src_cache);
3569 return 0;
3572 while (1) {
3573 cache = search_cache_extent(src_cache, 0);
3574 if (!cache)
3575 break;
3576 node = container_of(cache, struct ptr_node, cache);
3577 rec = node->data;
3578 remove_cache_extent(src_cache, &node->cache);
3579 free(node);
3581 ret = is_child_root(root, root->objectid, rec->ino);
3582 if (ret < 0)
3583 break;
3584 else if (ret == 0)
3585 goto skip;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 BUG_ON(backref->found_inode_ref);
3589 if (backref->found_dir_item)
3590 add_root_backref(dst_cache, rec->ino,
3591 root->root_key.objectid, backref->dir,
3592 backref->index, backref->name,
3593 backref->namelen, BTRFS_DIR_ITEM_KEY,
3594 backref->errors);
3595 if (backref->found_dir_index)
3596 add_root_backref(dst_cache, rec->ino,
3597 root->root_key.objectid, backref->dir,
3598 backref->index, backref->name,
3599 backref->namelen, BTRFS_DIR_INDEX_KEY,
3600 backref->errors);
3602 skip:
3603 free_inode_rec(rec);
3605 if (ret < 0)
3606 return ret;
3607 return 0;
3610 static int check_root_refs(struct btrfs_root *root,
3611 struct cache_tree *root_cache)
3613 struct root_record *rec;
3614 struct root_record *ref_root;
3615 struct root_backref *backref;
3616 struct cache_extent *cache;
3617 int loop = 1;
3618 int ret;
3619 int error;
3620 int errors = 0;
3622 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623 BUG_ON(IS_ERR(rec));
3624 rec->found_ref = 1;
3626 /* fixme: this can not detect circular references */
3627 while (loop) {
3628 loop = 0;
3629 cache = search_cache_extent(root_cache, 0);
3630 while (1) {
3631 if (!cache)
3632 break;
3633 rec = container_of(cache, struct root_record, cache);
3634 cache = next_cache_extent(cache);
3636 if (rec->found_ref == 0)
3637 continue;
3639 list_for_each_entry(backref, &rec->backrefs, list) {
3640 if (!backref->reachable)
3641 continue;
3643 ref_root = get_root_rec(root_cache,
3644 backref->ref_root);
3645 BUG_ON(IS_ERR(ref_root));
3646 if (ref_root->found_ref > 0)
3647 continue;
3649 backref->reachable = 0;
3650 rec->found_ref--;
3651 if (rec->found_ref == 0)
3652 loop = 1;
3657 cache = search_cache_extent(root_cache, 0);
3658 while (1) {
3659 if (!cache)
3660 break;
3661 rec = container_of(cache, struct root_record, cache);
3662 cache = next_cache_extent(cache);
3664 if (rec->found_ref == 0 &&
3665 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667 ret = check_orphan_item(root->fs_info->tree_root,
3668 rec->objectid);
3669 if (ret == 0)
3670 continue;
3673 * If we don't have a root item then we likely just have
3674 * a dir item in a snapshot for this root but no actual
3675 * ref key or anything so it's meaningless.
3677 if (!rec->found_root_item)
3678 continue;
3679 errors++;
3680 fprintf(stderr, "fs tree %llu not referenced\n",
3681 (unsigned long long)rec->objectid);
3684 error = 0;
3685 if (rec->found_ref > 0 && !rec->found_root_item)
3686 error = 1;
3687 list_for_each_entry(backref, &rec->backrefs, list) {
3688 if (!backref->found_dir_item)
3689 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690 if (!backref->found_dir_index)
3691 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692 if (!backref->found_back_ref)
3693 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694 if (!backref->found_forward_ref)
3695 backref->errors |= REF_ERR_NO_ROOT_REF;
3696 if (backref->reachable && backref->errors)
3697 error = 1;
3699 if (!error)
3700 continue;
3702 errors++;
3703 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704 (unsigned long long)rec->objectid, rec->found_ref,
3705 rec->found_root_item ? "" : "not found");
3707 list_for_each_entry(backref, &rec->backrefs, list) {
3708 if (!backref->reachable)
3709 continue;
3710 if (!backref->errors && rec->found_root_item)
3711 continue;
3712 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713 " index %llu namelen %u name %s errors %x\n",
3714 (unsigned long long)backref->ref_root,
3715 (unsigned long long)backref->dir,
3716 (unsigned long long)backref->index,
3717 backref->namelen, backref->name,
3718 backref->errors);
3719 print_ref_error(backref->errors);
3722 return errors > 0 ? 1 : 0;
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726 struct btrfs_key *key,
3727 struct cache_tree *root_cache)
3729 u64 dirid;
3730 u64 index;
3731 u32 len;
3732 u32 name_len;
3733 struct btrfs_root_ref *ref;
3734 char namebuf[BTRFS_NAME_LEN];
3735 int error;
3737 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3739 dirid = btrfs_root_ref_dirid(eb, ref);
3740 index = btrfs_root_ref_sequence(eb, ref);
3741 name_len = btrfs_root_ref_name_len(eb, ref);
3743 if (name_len <= BTRFS_NAME_LEN) {
3744 len = name_len;
3745 error = 0;
3746 } else {
3747 len = BTRFS_NAME_LEN;
3748 error = REF_ERR_NAME_TOO_LONG;
3750 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3752 if (key->type == BTRFS_ROOT_REF_KEY) {
3753 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754 index, namebuf, len, key->type, error);
3755 } else {
3756 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757 index, namebuf, len, key->type, error);
3759 return 0;
3762 static void free_corrupt_block(struct cache_extent *cache)
3764 struct btrfs_corrupt_block *corrupt;
3766 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767 free(corrupt);
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3773 * Repair the btree of the given root.
3775 * The fix is to remove the node key in corrupt_blocks cache_tree.
3776 * and rebalance the tree.
3777 * After the fix, the btree should be writeable.
3779 static int repair_btree(struct btrfs_root *root,
3780 struct cache_tree *corrupt_blocks)
3782 struct btrfs_trans_handle *trans;
3783 struct btrfs_path path;
3784 struct btrfs_corrupt_block *corrupt;
3785 struct cache_extent *cache;
3786 struct btrfs_key key;
3787 u64 offset;
3788 int level;
3789 int ret = 0;
3791 if (cache_tree_empty(corrupt_blocks))
3792 return 0;
3794 trans = btrfs_start_transaction(root, 1);
3795 if (IS_ERR(trans)) {
3796 ret = PTR_ERR(trans);
3797 fprintf(stderr, "Error starting transaction: %s\n",
3798 strerror(-ret));
3799 return ret;
3801 btrfs_init_path(&path);
3802 cache = first_cache_extent(corrupt_blocks);
3803 while (cache) {
3804 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805 cache);
3806 level = corrupt->level;
3807 path.lowest_level = level;
3808 key.objectid = corrupt->key.objectid;
3809 key.type = corrupt->key.type;
3810 key.offset = corrupt->key.offset;
3813 * Here we don't want to do any tree balance, since it may
3814 * cause a balance with corrupted brother leaf/node,
3815 * so ins_len set to 0 here.
3816 * Balance will be done after all corrupt node/leaf is deleted.
3818 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819 if (ret < 0)
3820 goto out;
3821 offset = btrfs_node_blockptr(path.nodes[level],
3822 path.slots[level]);
3824 /* Remove the ptr */
3825 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826 if (ret < 0)
3827 goto out;
3829 * Remove the corresponding extent
3830 * return value is not concerned.
3832 btrfs_release_path(&path);
3833 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834 0, root->root_key.objectid,
3835 level - 1, 0);
3836 cache = next_cache_extent(cache);
3839 /* Balance the btree using btrfs_search_slot() */
3840 cache = first_cache_extent(corrupt_blocks);
3841 while (cache) {
3842 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843 cache);
3844 memcpy(&key, &corrupt->key, sizeof(key));
3845 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846 if (ret < 0)
3847 goto out;
3848 /* return will always >0 since it won't find the item */
3849 ret = 0;
3850 btrfs_release_path(&path);
3851 cache = next_cache_extent(cache);
3853 out:
3854 btrfs_commit_transaction(trans, root);
3855 btrfs_release_path(&path);
3856 return ret;
3859 static int check_fs_root(struct btrfs_root *root,
3860 struct cache_tree *root_cache,
3861 struct walk_control *wc)
3863 int ret = 0;
3864 int err = 0;
3865 int wret;
3866 int level;
3867 struct btrfs_path path;
3868 struct shared_node root_node;
3869 struct root_record *rec;
3870 struct btrfs_root_item *root_item = &root->root_item;
3871 struct cache_tree corrupt_blocks;
3872 struct orphan_data_extent *orphan;
3873 struct orphan_data_extent *tmp;
3874 enum btrfs_tree_block_status status;
3875 struct node_refs nrefs;
3878 * Reuse the corrupt_block cache tree to record corrupted tree block
3880 * Unlike the usage in extent tree check, here we do it in a per
3881 * fs/subvol tree base.
3883 cache_tree_init(&corrupt_blocks);
3884 root->fs_info->corrupt_blocks = &corrupt_blocks;
3886 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887 rec = get_root_rec(root_cache, root->root_key.objectid);
3888 BUG_ON(IS_ERR(rec));
3889 if (btrfs_root_refs(root_item) > 0)
3890 rec->found_root_item = 1;
3893 btrfs_init_path(&path);
3894 memset(&root_node, 0, sizeof(root_node));
3895 cache_tree_init(&root_node.root_cache);
3896 cache_tree_init(&root_node.inode_cache);
3897 memset(&nrefs, 0, sizeof(nrefs));
3899 /* Move the orphan extent record to corresponding inode_record */
3900 list_for_each_entry_safe(orphan, tmp,
3901 &root->orphan_data_extents, list) {
3902 struct inode_record *inode;
3904 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3906 BUG_ON(IS_ERR(inode));
3907 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908 list_move(&orphan->list, &inode->orphan_extents);
3911 level = btrfs_header_level(root->node);
3912 memset(wc->nodes, 0, sizeof(wc->nodes));
3913 wc->nodes[level] = &root_node;
3914 wc->active_node = level;
3915 wc->root_level = level;
3917 /* We may not have checked the root block, lets do that now */
3918 if (btrfs_is_leaf(root->node))
3919 status = btrfs_check_leaf(root, NULL, root->node);
3920 else
3921 status = btrfs_check_node(root, NULL, root->node);
3922 if (status != BTRFS_TREE_BLOCK_CLEAN)
3923 return -EIO;
3925 if (btrfs_root_refs(root_item) > 0 ||
3926 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927 path.nodes[level] = root->node;
3928 extent_buffer_get(root->node);
3929 path.slots[level] = 0;
3930 } else {
3931 struct btrfs_key key;
3932 struct btrfs_disk_key found_key;
3934 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935 level = root_item->drop_level;
3936 path.lowest_level = level;
3937 if (level > btrfs_header_level(root->node) ||
3938 level >= BTRFS_MAX_LEVEL) {
3939 error("ignoring invalid drop level: %u", level);
3940 goto skip_walking;
3942 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943 if (wret < 0)
3944 goto skip_walking;
3945 btrfs_node_key(path.nodes[level], &found_key,
3946 path.slots[level]);
3947 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948 sizeof(found_key)));
3951 while (1) {
3952 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953 if (wret < 0)
3954 ret = wret;
3955 if (wret != 0)
3956 break;
3958 wret = walk_up_tree(root, &path, wc, &level);
3959 if (wret < 0)
3960 ret = wret;
3961 if (wret != 0)
3962 break;
3964 skip_walking:
3965 btrfs_release_path(&path);
3967 if (!cache_tree_empty(&corrupt_blocks)) {
3968 struct cache_extent *cache;
3969 struct btrfs_corrupt_block *corrupt;
3971 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972 root->root_key.objectid);
3973 cache = first_cache_extent(&corrupt_blocks);
3974 while (cache) {
3975 corrupt = container_of(cache,
3976 struct btrfs_corrupt_block,
3977 cache);
3978 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979 cache->start, corrupt->level,
3980 corrupt->key.objectid, corrupt->key.type,
3981 corrupt->key.offset);
3982 cache = next_cache_extent(cache);
3984 if (repair) {
3985 printf("Try to repair the btree for root %llu\n",
3986 root->root_key.objectid);
3987 ret = repair_btree(root, &corrupt_blocks);
3988 if (ret < 0)
3989 fprintf(stderr, "Failed to repair btree: %s\n",
3990 strerror(-ret));
3991 if (!ret)
3992 printf("Btree for root %llu is fixed\n",
3993 root->root_key.objectid);
3997 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998 if (err < 0)
3999 ret = err;
4001 if (root_node.current) {
4002 root_node.current->checked = 1;
4003 maybe_free_inode_rec(&root_node.inode_cache,
4004 root_node.current);
4007 err = check_inode_recs(root, &root_node.inode_cache);
4008 if (!ret)
4009 ret = err;
4011 free_corrupt_blocks_tree(&corrupt_blocks);
4012 root->fs_info->corrupt_blocks = NULL;
4013 free_orphan_data_extents(&root->orphan_data_extents);
4014 return ret;
4017 static int fs_root_objectid(u64 objectid)
4019 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021 return 1;
4022 return is_fstree(objectid);
4025 static int check_fs_roots(struct btrfs_root *root,
4026 struct cache_tree *root_cache)
4028 struct btrfs_path path;
4029 struct btrfs_key key;
4030 struct walk_control wc;
4031 struct extent_buffer *leaf, *tree_node;
4032 struct btrfs_root *tmp_root;
4033 struct btrfs_root *tree_root = root->fs_info->tree_root;
4034 int ret;
4035 int err = 0;
4037 if (ctx.progress_enabled) {
4038 ctx.tp = TASK_FS_ROOTS;
4039 task_start(ctx.info);
4043 * Just in case we made any changes to the extent tree that weren't
4044 * reflected into the free space cache yet.
4046 if (repair)
4047 reset_cached_block_groups(root->fs_info);
4048 memset(&wc, 0, sizeof(wc));
4049 cache_tree_init(&wc.shared);
4050 btrfs_init_path(&path);
4052 again:
4053 key.offset = 0;
4054 key.objectid = 0;
4055 key.type = BTRFS_ROOT_ITEM_KEY;
4056 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057 if (ret < 0) {
4058 err = 1;
4059 goto out;
4061 tree_node = tree_root->node;
4062 while (1) {
4063 if (tree_node != tree_root->node) {
4064 free_root_recs_tree(root_cache);
4065 btrfs_release_path(&path);
4066 goto again;
4068 leaf = path.nodes[0];
4069 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070 ret = btrfs_next_leaf(tree_root, &path);
4071 if (ret) {
4072 if (ret < 0)
4073 err = 1;
4074 break;
4076 leaf = path.nodes[0];
4078 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080 fs_root_objectid(key.objectid)) {
4081 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082 tmp_root = btrfs_read_fs_root_no_cache(
4083 root->fs_info, &key);
4084 } else {
4085 key.offset = (u64)-1;
4086 tmp_root = btrfs_read_fs_root(
4087 root->fs_info, &key);
4089 if (IS_ERR(tmp_root)) {
4090 err = 1;
4091 goto next;
4093 ret = check_fs_root(tmp_root, root_cache, &wc);
4094 if (ret == -EAGAIN) {
4095 free_root_recs_tree(root_cache);
4096 btrfs_release_path(&path);
4097 goto again;
4099 if (ret)
4100 err = 1;
4101 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102 btrfs_free_fs_root(tmp_root);
4103 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104 key.type == BTRFS_ROOT_BACKREF_KEY) {
4105 process_root_ref(leaf, path.slots[0], &key,
4106 root_cache);
4108 next:
4109 path.slots[0]++;
4111 out:
4112 btrfs_release_path(&path);
4113 if (err)
4114 free_extent_cache_tree(&wc.shared);
4115 if (!cache_tree_empty(&wc.shared))
4116 fprintf(stderr, "warning line %d\n", __LINE__);
4118 task_stop(ctx.info);
4120 return err;
4124 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125 * INODE_REF/INODE_EXTREF match.
4127 * @root: the root of the fs/file tree
4128 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4129 * @key: the key of the DIR_ITEM/DIR_INDEX
4130 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4131 * distinguish root_dir between normal dir/file
4132 * @name: the name in the INODE_REF/INODE_EXTREF
4133 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4134 * @mode: the st_mode of INODE_ITEM
4136 * Return 0 if no error occurred.
4137 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139 * dir/file.
4140 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141 * not match for normal dir/file.
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144 struct btrfs_key *key, u64 index, char *name,
4145 u32 namelen, u32 mode)
4147 struct btrfs_path path;
4148 struct extent_buffer *node;
4149 struct btrfs_dir_item *di;
4150 struct btrfs_key location;
4151 char namebuf[BTRFS_NAME_LEN] = {0};
4152 u32 total;
4153 u32 cur = 0;
4154 u32 len;
4155 u32 name_len;
4156 u32 data_len;
4157 u8 filetype;
4158 int slot;
4159 int ret;
4161 btrfs_init_path(&path);
4162 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163 if (ret < 0) {
4164 ret = DIR_ITEM_MISSING;
4165 goto out;
4168 /* Process root dir and goto out*/
4169 if (index == 0) {
4170 if (ret == 0) {
4171 ret = ROOT_DIR_ERROR;
4172 error(
4173 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174 root->objectid,
4175 ref_key->type == BTRFS_INODE_REF_KEY ?
4176 "REF" : "EXTREF",
4177 ref_key->objectid, ref_key->offset,
4178 key->type == BTRFS_DIR_ITEM_KEY ?
4179 "DIR_ITEM" : "DIR_INDEX");
4180 } else {
4181 ret = 0;
4184 goto out;
4187 /* Process normal file/dir */
4188 if (ret > 0) {
4189 ret = DIR_ITEM_MISSING;
4190 error(
4191 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192 root->objectid,
4193 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194 ref_key->objectid, ref_key->offset,
4195 key->type == BTRFS_DIR_ITEM_KEY ?
4196 "DIR_ITEM" : "DIR_INDEX",
4197 key->objectid, key->offset, namelen, name,
4198 imode_to_type(mode));
4199 goto out;
4202 /* Check whether inode_id/filetype/name match */
4203 node = path.nodes[0];
4204 slot = path.slots[0];
4205 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206 total = btrfs_item_size_nr(node, slot);
4207 while (cur < total) {
4208 ret = DIR_ITEM_MISMATCH;
4209 name_len = btrfs_dir_name_len(node, di);
4210 data_len = btrfs_dir_data_len(node, di);
4212 btrfs_dir_item_key_to_cpu(node, di, &location);
4213 if (location.objectid != ref_key->objectid ||
4214 location.type != BTRFS_INODE_ITEM_KEY ||
4215 location.offset != 0)
4216 goto next;
4218 filetype = btrfs_dir_type(node, di);
4219 if (imode_to_type(mode) != filetype)
4220 goto next;
4222 if (name_len <= BTRFS_NAME_LEN) {
4223 len = name_len;
4224 } else {
4225 len = BTRFS_NAME_LEN;
4226 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227 root->objectid,
4228 key->type == BTRFS_DIR_ITEM_KEY ?
4229 "DIR_ITEM" : "DIR_INDEX",
4230 key->objectid, key->offset, name_len);
4232 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233 if (len != namelen || strncmp(namebuf, name, len))
4234 goto next;
4236 ret = 0;
4237 goto out;
4238 next:
4239 len = sizeof(*di) + name_len + data_len;
4240 di = (struct btrfs_dir_item *)((char *)di + len);
4241 cur += len;
4243 if (ret == DIR_ITEM_MISMATCH)
4244 error(
4245 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246 root->objectid,
4247 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248 ref_key->objectid, ref_key->offset,
4249 key->type == BTRFS_DIR_ITEM_KEY ?
4250 "DIR_ITEM" : "DIR_INDEX",
4251 key->objectid, key->offset, namelen, name,
4252 imode_to_type(mode));
4253 out:
4254 btrfs_release_path(&path);
4255 return ret;
4259 * Traverse the given INODE_REF and call find_dir_item() to find related
4260 * DIR_ITEM/DIR_INDEX.
4262 * @root: the root of the fs/file tree
4263 * @ref_key: the key of the INODE_REF
4264 * @refs: the count of INODE_REF
4265 * @mode: the st_mode of INODE_ITEM
4267 * Return 0 if no error occurred.
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270 struct extent_buffer *node, int slot, u64 *refs,
4271 int mode)
4273 struct btrfs_key key;
4274 struct btrfs_inode_ref *ref;
4275 char namebuf[BTRFS_NAME_LEN] = {0};
4276 u32 total;
4277 u32 cur = 0;
4278 u32 len;
4279 u32 name_len;
4280 u64 index;
4281 int ret, err = 0;
4283 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284 total = btrfs_item_size_nr(node, slot);
4286 next:
4287 /* Update inode ref count */
4288 (*refs)++;
4290 index = btrfs_inode_ref_index(node, ref);
4291 name_len = btrfs_inode_ref_name_len(node, ref);
4292 if (name_len <= BTRFS_NAME_LEN) {
4293 len = name_len;
4294 } else {
4295 len = BTRFS_NAME_LEN;
4296 warning("root %llu INODE_REF[%llu %llu] name too long",
4297 root->objectid, ref_key->objectid, ref_key->offset);
4300 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4302 /* Check root dir ref name */
4303 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305 root->objectid, ref_key->objectid, ref_key->offset,
4306 namebuf);
4307 err |= ROOT_DIR_ERROR;
4310 /* Find related DIR_INDEX */
4311 key.objectid = ref_key->offset;
4312 key.type = BTRFS_DIR_INDEX_KEY;
4313 key.offset = index;
4314 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315 err |= ret;
4317 /* Find related dir_item */
4318 key.objectid = ref_key->offset;
4319 key.type = BTRFS_DIR_ITEM_KEY;
4320 key.offset = btrfs_name_hash(namebuf, len);
4321 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322 err |= ret;
4324 len = sizeof(*ref) + name_len;
4325 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326 cur += len;
4327 if (cur < total)
4328 goto next;
4330 return err;
4334 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335 * DIR_ITEM/DIR_INDEX.
4337 * @root: the root of the fs/file tree
4338 * @ref_key: the key of the INODE_EXTREF
4339 * @refs: the count of INODE_EXTREF
4340 * @mode: the st_mode of INODE_ITEM
4342 * Return 0 if no error occurred.
4344 static int check_inode_extref(struct btrfs_root *root,
4345 struct btrfs_key *ref_key,
4346 struct extent_buffer *node, int slot, u64 *refs,
4347 int mode)
4349 struct btrfs_key key;
4350 struct btrfs_inode_extref *extref;
4351 char namebuf[BTRFS_NAME_LEN] = {0};
4352 u32 total;
4353 u32 cur = 0;
4354 u32 len;
4355 u32 name_len;
4356 u64 index;
4357 u64 parent;
4358 int ret;
4359 int err = 0;
4361 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362 total = btrfs_item_size_nr(node, slot);
4364 next:
4365 /* update inode ref count */
4366 (*refs)++;
4367 name_len = btrfs_inode_extref_name_len(node, extref);
4368 index = btrfs_inode_extref_index(node, extref);
4369 parent = btrfs_inode_extref_parent(node, extref);
4370 if (name_len <= BTRFS_NAME_LEN) {
4371 len = name_len;
4372 } else {
4373 len = BTRFS_NAME_LEN;
4374 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375 root->objectid, ref_key->objectid, ref_key->offset);
4377 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4379 /* Check root dir ref name */
4380 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382 root->objectid, ref_key->objectid, ref_key->offset,
4383 namebuf);
4384 err |= ROOT_DIR_ERROR;
4387 /* find related dir_index */
4388 key.objectid = parent;
4389 key.type = BTRFS_DIR_INDEX_KEY;
4390 key.offset = index;
4391 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392 err |= ret;
4394 /* find related dir_item */
4395 key.objectid = parent;
4396 key.type = BTRFS_DIR_ITEM_KEY;
4397 key.offset = btrfs_name_hash(namebuf, len);
4398 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399 err |= ret;
4401 len = sizeof(*extref) + name_len;
4402 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403 cur += len;
4405 if (cur < total)
4406 goto next;
4408 return err;
4412 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413 * DIR_ITEM/DIR_INDEX match.
4415 * @root: the root of the fs/file tree
4416 * @key: the key of the INODE_REF/INODE_EXTREF
4417 * @name: the name in the INODE_REF/INODE_EXTREF
4418 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4419 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420 * to (u64)-1
4421 * @ext_ref: the EXTENDED_IREF feature
4423 * Return 0 if no error occurred.
4424 * Return >0 for error bitmap
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427 char *name, int namelen, u64 index,
4428 unsigned int ext_ref)
4430 struct btrfs_path path;
4431 struct btrfs_inode_ref *ref;
4432 struct btrfs_inode_extref *extref;
4433 struct extent_buffer *node;
4434 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435 u32 total;
4436 u32 cur = 0;
4437 u32 len;
4438 u32 ref_namelen;
4439 u64 ref_index;
4440 u64 parent;
4441 u64 dir_id;
4442 int slot;
4443 int ret;
4445 btrfs_init_path(&path);
4446 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447 if (ret) {
4448 ret = INODE_REF_MISSING;
4449 goto extref;
4452 node = path.nodes[0];
4453 slot = path.slots[0];
4455 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456 total = btrfs_item_size_nr(node, slot);
4458 /* Iterate all entry of INODE_REF */
4459 while (cur < total) {
4460 ret = INODE_REF_MISSING;
4462 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463 ref_index = btrfs_inode_ref_index(node, ref);
4464 if (index != (u64)-1 && index != ref_index)
4465 goto next_ref;
4467 if (ref_namelen <= BTRFS_NAME_LEN) {
4468 len = ref_namelen;
4469 } else {
4470 len = BTRFS_NAME_LEN;
4471 warning("root %llu INODE %s[%llu %llu] name too long",
4472 root->objectid,
4473 key->type == BTRFS_INODE_REF_KEY ?
4474 "REF" : "EXTREF",
4475 key->objectid, key->offset);
4477 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478 len);
4480 if (len != namelen || strncmp(ref_namebuf, name, len))
4481 goto next_ref;
4483 ret = 0;
4484 goto out;
4485 next_ref:
4486 len = sizeof(*ref) + ref_namelen;
4487 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488 cur += len;
4491 extref:
4492 /* Skip if not support EXTENDED_IREF feature */
4493 if (!ext_ref)
4494 goto out;
4496 btrfs_release_path(&path);
4497 btrfs_init_path(&path);
4499 dir_id = key->offset;
4500 key->type = BTRFS_INODE_EXTREF_KEY;
4501 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4503 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504 if (ret) {
4505 ret = INODE_REF_MISSING;
4506 goto out;
4509 node = path.nodes[0];
4510 slot = path.slots[0];
4512 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513 cur = 0;
4514 total = btrfs_item_size_nr(node, slot);
4516 /* Iterate all entry of INODE_EXTREF */
4517 while (cur < total) {
4518 ret = INODE_REF_MISSING;
4520 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521 ref_index = btrfs_inode_extref_index(node, extref);
4522 parent = btrfs_inode_extref_parent(node, extref);
4523 if (index != (u64)-1 && index != ref_index)
4524 goto next_extref;
4526 if (parent != dir_id)
4527 goto next_extref;
4529 if (ref_namelen <= BTRFS_NAME_LEN) {
4530 len = ref_namelen;
4531 } else {
4532 len = BTRFS_NAME_LEN;
4533 warning("root %llu INODE %s[%llu %llu] name too long",
4534 root->objectid,
4535 key->type == BTRFS_INODE_REF_KEY ?
4536 "REF" : "EXTREF",
4537 key->objectid, key->offset);
4539 read_extent_buffer(node, ref_namebuf,
4540 (unsigned long)(extref + 1), len);
4542 if (len != namelen || strncmp(ref_namebuf, name, len))
4543 goto next_extref;
4545 ret = 0;
4546 goto out;
4548 next_extref:
4549 len = sizeof(*extref) + ref_namelen;
4550 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551 cur += len;
4554 out:
4555 btrfs_release_path(&path);
4556 return ret;
4560 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4563 * @root: the root of the fs/file tree
4564 * @key: the key of the INODE_REF/INODE_EXTREF
4565 * @size: the st_size of the INODE_ITEM
4566 * @ext_ref: the EXTENDED_IREF feature
4568 * Return 0 if no error occurred.
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571 struct extent_buffer *node, int slot, u64 *size,
4572 unsigned int ext_ref)
4574 struct btrfs_dir_item *di;
4575 struct btrfs_inode_item *ii;
4576 struct btrfs_path path;
4577 struct btrfs_key location;
4578 char namebuf[BTRFS_NAME_LEN] = {0};
4579 u32 total;
4580 u32 cur = 0;
4581 u32 len;
4582 u32 name_len;
4583 u32 data_len;
4584 u8 filetype;
4585 u32 mode;
4586 u64 index;
4587 int ret;
4588 int err = 0;
4591 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592 * ignore index check.
4594 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4596 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597 total = btrfs_item_size_nr(node, slot);
4599 while (cur < total) {
4600 data_len = btrfs_dir_data_len(node, di);
4601 if (data_len)
4602 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604 "DIR_ITEM" : "DIR_INDEX",
4605 key->objectid, key->offset, data_len);
4607 name_len = btrfs_dir_name_len(node, di);
4608 if (name_len <= BTRFS_NAME_LEN) {
4609 len = name_len;
4610 } else {
4611 len = BTRFS_NAME_LEN;
4612 warning("root %llu %s[%llu %llu] name too long",
4613 root->objectid,
4614 key->type == BTRFS_DIR_ITEM_KEY ?
4615 "DIR_ITEM" : "DIR_INDEX",
4616 key->objectid, key->offset);
4618 (*size) += name_len;
4620 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621 filetype = btrfs_dir_type(node, di);
4623 btrfs_init_path(&path);
4624 btrfs_dir_item_key_to_cpu(node, di, &location);
4626 /* Ignore related ROOT_ITEM check */
4627 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628 goto next;
4630 /* Check relative INODE_ITEM(existence/filetype) */
4631 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632 if (ret) {
4633 err |= INODE_ITEM_MISSING;
4634 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637 key->offset, location.objectid, name_len,
4638 namebuf, filetype);
4639 goto next;
4642 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643 struct btrfs_inode_item);
4644 mode = btrfs_inode_mode(path.nodes[0], ii);
4646 if (imode_to_type(mode) != filetype) {
4647 err |= INODE_ITEM_MISMATCH;
4648 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651 key->offset, name_len, namebuf, filetype);
4654 /* Check relative INODE_REF/INODE_EXTREF */
4655 location.type = BTRFS_INODE_REF_KEY;
4656 location.offset = key->objectid;
4657 ret = find_inode_ref(root, &location, namebuf, len,
4658 index, ext_ref);
4659 err |= ret;
4660 if (ret & INODE_REF_MISSING)
4661 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664 key->offset, name_len, namebuf, filetype);
4666 next:
4667 btrfs_release_path(&path);
4668 len = sizeof(*di) + name_len + data_len;
4669 di = (struct btrfs_dir_item *)((char *)di + len);
4670 cur += len;
4672 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674 root->objectid, key->objectid, key->offset);
4675 break;
4679 return err;
4683 * Check file extent datasum/hole, update the size of the file extents,
4684 * check and update the last offset of the file extent.
4686 * @root: the root of fs/file tree.
4687 * @fkey: the key of the file extent.
4688 * @nodatasum: INODE_NODATASUM feature.
4689 * @size: the sum of all EXTENT_DATA items size for this inode.
4690 * @end: the offset of the last extent.
4692 * Return 0 if no error occurred.
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695 struct extent_buffer *node, int slot,
4696 unsigned int nodatasum, u64 *size, u64 *end)
4698 struct btrfs_file_extent_item *fi;
4699 u64 disk_bytenr;
4700 u64 disk_num_bytes;
4701 u64 extent_num_bytes;
4702 u64 found;
4703 unsigned int extent_type;
4704 unsigned int is_hole;
4705 int ret;
4706 int err = 0;
4708 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4710 extent_type = btrfs_file_extent_type(node, fi);
4711 /* Skip if file extent is inline */
4712 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4713 struct btrfs_item *e = btrfs_item_nr(slot);
4714 u32 item_inline_len;
4716 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4717 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4718 if (extent_num_bytes == 0 ||
4719 extent_num_bytes != item_inline_len)
4720 err |= FILE_EXTENT_ERROR;
4721 *size += extent_num_bytes;
4722 return err;
4725 /* Check extent type */
4726 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4727 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4728 err |= FILE_EXTENT_ERROR;
4729 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4730 root->objectid, fkey->objectid, fkey->offset);
4731 return err;
4734 /* Check REG_EXTENT/PREALLOC_EXTENT */
4735 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4736 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4737 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4738 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4740 /* Check EXTENT_DATA datasum */
4741 ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4742 if (found > 0 && nodatasum) {
4743 err |= ODD_CSUM_ITEM;
4744 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4745 root->objectid, fkey->objectid, fkey->offset);
4746 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4747 !is_hole &&
4748 (ret < 0 || found == 0 || found < disk_num_bytes)) {
4749 err |= CSUM_ITEM_MISSING;
4750 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4751 root->objectid, fkey->objectid, fkey->offset);
4752 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4753 err |= ODD_CSUM_ITEM;
4754 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4755 root->objectid, fkey->objectid, fkey->offset);
4758 /* Check EXTENT_DATA hole */
4759 if (no_holes && is_hole) {
4760 err |= FILE_EXTENT_ERROR;
4761 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4762 root->objectid, fkey->objectid, fkey->offset);
4763 } else if (!no_holes && *end != fkey->offset) {
4764 err |= FILE_EXTENT_ERROR;
4765 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4766 root->objectid, fkey->objectid, fkey->offset);
4769 *end += extent_num_bytes;
4770 if (!is_hole)
4771 *size += extent_num_bytes;
4773 return err;
4777 * Check INODE_ITEM and related ITEMs (the same inode number)
4778 * 1. check link count
4779 * 2. check inode ref/extref
4780 * 3. check dir item/index
4782 * @ext_ref: the EXTENDED_IREF feature
4784 * Return 0 if no error occurred.
4785 * Return >0 for error or hit the traversal is done(by error bitmap)
4787 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4788 unsigned int ext_ref)
4790 struct extent_buffer *node;
4791 struct btrfs_inode_item *ii;
4792 struct btrfs_key key;
4793 u64 inode_id;
4794 u32 mode;
4795 u64 nlink;
4796 u64 nbytes;
4797 u64 isize;
4798 u64 size = 0;
4799 u64 refs = 0;
4800 u64 extent_end = 0;
4801 u64 extent_size = 0;
4802 unsigned int dir;
4803 unsigned int nodatasum;
4804 int slot;
4805 int ret;
4806 int err = 0;
4808 node = path->nodes[0];
4809 slot = path->slots[0];
4811 btrfs_item_key_to_cpu(node, &key, slot);
4812 inode_id = key.objectid;
4814 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4815 ret = btrfs_next_item(root, path);
4816 if (ret > 0)
4817 err |= LAST_ITEM;
4818 return err;
4821 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4822 isize = btrfs_inode_size(node, ii);
4823 nbytes = btrfs_inode_nbytes(node, ii);
4824 mode = btrfs_inode_mode(node, ii);
4825 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4826 nlink = btrfs_inode_nlink(node, ii);
4827 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4829 while (1) {
4830 ret = btrfs_next_item(root, path);
4831 if (ret < 0) {
4832 /* out will fill 'err' rusing current statistics */
4833 goto out;
4834 } else if (ret > 0) {
4835 err |= LAST_ITEM;
4836 goto out;
4839 node = path->nodes[0];
4840 slot = path->slots[0];
4841 btrfs_item_key_to_cpu(node, &key, slot);
4842 if (key.objectid != inode_id)
4843 goto out;
4845 switch (key.type) {
4846 case BTRFS_INODE_REF_KEY:
4847 ret = check_inode_ref(root, &key, node, slot, &refs,
4848 mode);
4849 err |= ret;
4850 break;
4851 case BTRFS_INODE_EXTREF_KEY:
4852 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4853 warning("root %llu EXTREF[%llu %llu] isn't supported",
4854 root->objectid, key.objectid,
4855 key.offset);
4856 ret = check_inode_extref(root, &key, node, slot, &refs,
4857 mode);
4858 err |= ret;
4859 break;
4860 case BTRFS_DIR_ITEM_KEY:
4861 case BTRFS_DIR_INDEX_KEY:
4862 if (!dir) {
4863 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4864 root->objectid, inode_id,
4865 imode_to_type(mode), key.objectid,
4866 key.offset);
4868 ret = check_dir_item(root, &key, node, slot, &size,
4869 ext_ref);
4870 err |= ret;
4871 break;
4872 case BTRFS_EXTENT_DATA_KEY:
4873 if (dir) {
4874 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4875 root->objectid, inode_id, key.objectid,
4876 key.offset);
4878 ret = check_file_extent(root, &key, node, slot,
4879 nodatasum, &extent_size,
4880 &extent_end);
4881 err |= ret;
4882 break;
4883 case BTRFS_XATTR_ITEM_KEY:
4884 break;
4885 default:
4886 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4887 key.objectid, key.type, key.offset);
4891 out:
4892 /* verify INODE_ITEM nlink/isize/nbytes */
4893 if (dir) {
4894 if (nlink != 1) {
4895 err |= LINK_COUNT_ERROR;
4896 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4897 root->objectid, inode_id, nlink);
4901 * Just a warning, as dir inode nbytes is just an
4902 * instructive value.
4904 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4905 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4906 root->objectid, inode_id, root->nodesize);
4909 if (isize != size) {
4910 err |= ISIZE_ERROR;
4911 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4912 root->objectid, inode_id, isize, size);
4914 } else {
4915 if (nlink != refs) {
4916 err |= LINK_COUNT_ERROR;
4917 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4918 root->objectid, inode_id, nlink, refs);
4919 } else if (!nlink) {
4920 err |= ORPHAN_ITEM;
4923 if (!nbytes && !no_holes && extent_end < isize) {
4924 err |= NBYTES_ERROR;
4925 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4926 root->objectid, inode_id, isize);
4929 if (nbytes != extent_size) {
4930 err |= NBYTES_ERROR;
4931 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4932 root->objectid, inode_id, nbytes, extent_size);
4936 return err;
4939 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4941 struct btrfs_path path;
4942 struct btrfs_key key;
4943 int err = 0;
4944 int ret;
4946 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4947 key.type = BTRFS_INODE_ITEM_KEY;
4948 key.offset = 0;
4950 /* For root being dropped, we don't need to check first inode */
4951 if (btrfs_root_refs(&root->root_item) == 0 &&
4952 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4953 key.objectid)
4954 return 0;
4956 btrfs_init_path(&path);
4958 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4959 if (ret < 0)
4960 goto out;
4961 if (ret > 0) {
4962 ret = 0;
4963 err |= INODE_ITEM_MISSING;
4966 err |= check_inode_item(root, &path, ext_ref);
4967 err &= ~LAST_ITEM;
4968 if (err && !ret)
4969 ret = -EIO;
4970 out:
4971 btrfs_release_path(&path);
4972 return ret;
4976 * Iterate all item on the tree and call check_inode_item() to check.
4978 * @root: the root of the tree to be checked.
4979 * @ext_ref: the EXTENDED_IREF feature
4981 * Return 0 if no error found.
4982 * Return <0 for error.
4984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4986 struct btrfs_path path;
4987 struct node_refs nrefs;
4988 struct btrfs_root_item *root_item = &root->root_item;
4989 int ret, wret;
4990 int level;
4993 * We need to manually check the first inode item(256)
4994 * As the following traversal function will only start from
4995 * the first inode item in the leaf, if inode item(256) is missing
4996 * we will just skip it forever.
4998 ret = check_fs_first_inode(root, ext_ref);
4999 if (ret < 0)
5000 return ret;
5002 memset(&nrefs, 0, sizeof(nrefs));
5003 level = btrfs_header_level(root->node);
5004 btrfs_init_path(&path);
5006 if (btrfs_root_refs(root_item) > 0 ||
5007 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5008 path.nodes[level] = root->node;
5009 path.slots[level] = 0;
5010 extent_buffer_get(root->node);
5011 } else {
5012 struct btrfs_key key;
5014 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5015 level = root_item->drop_level;
5016 path.lowest_level = level;
5017 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5018 if (ret < 0)
5019 goto out;
5020 ret = 0;
5023 while (1) {
5024 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5025 if (wret < 0)
5026 ret = wret;
5027 if (wret != 0)
5028 break;
5030 wret = walk_up_tree_v2(root, &path, &level);
5031 if (wret < 0)
5032 ret = wret;
5033 if (wret != 0)
5034 break;
5037 out:
5038 btrfs_release_path(&path);
5039 return ret;
5043 * Find the relative ref for root_ref and root_backref.
5045 * @root: the root of the root tree.
5046 * @ref_key: the key of the root ref.
5048 * Return 0 if no error occurred.
5050 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5051 struct extent_buffer *node, int slot)
5053 struct btrfs_path path;
5054 struct btrfs_key key;
5055 struct btrfs_root_ref *ref;
5056 struct btrfs_root_ref *backref;
5057 char ref_name[BTRFS_NAME_LEN] = {0};
5058 char backref_name[BTRFS_NAME_LEN] = {0};
5059 u64 ref_dirid;
5060 u64 ref_seq;
5061 u32 ref_namelen;
5062 u64 backref_dirid;
5063 u64 backref_seq;
5064 u32 backref_namelen;
5065 u32 len;
5066 int ret;
5067 int err = 0;
5069 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5070 ref_dirid = btrfs_root_ref_dirid(node, ref);
5071 ref_seq = btrfs_root_ref_sequence(node, ref);
5072 ref_namelen = btrfs_root_ref_name_len(node, ref);
5074 if (ref_namelen <= BTRFS_NAME_LEN) {
5075 len = ref_namelen;
5076 } else {
5077 len = BTRFS_NAME_LEN;
5078 warning("%s[%llu %llu] ref_name too long",
5079 ref_key->type == BTRFS_ROOT_REF_KEY ?
5080 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5081 ref_key->offset);
5083 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5085 /* Find relative root_ref */
5086 key.objectid = ref_key->offset;
5087 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5088 key.offset = ref_key->objectid;
5090 btrfs_init_path(&path);
5091 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5092 if (ret) {
5093 err |= ROOT_REF_MISSING;
5094 error("%s[%llu %llu] couldn't find relative ref",
5095 ref_key->type == BTRFS_ROOT_REF_KEY ?
5096 "ROOT_REF" : "ROOT_BACKREF",
5097 ref_key->objectid, ref_key->offset);
5098 goto out;
5101 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5102 struct btrfs_root_ref);
5103 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5104 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5105 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5107 if (backref_namelen <= BTRFS_NAME_LEN) {
5108 len = backref_namelen;
5109 } else {
5110 len = BTRFS_NAME_LEN;
5111 warning("%s[%llu %llu] ref_name too long",
5112 key.type == BTRFS_ROOT_REF_KEY ?
5113 "ROOT_REF" : "ROOT_BACKREF",
5114 key.objectid, key.offset);
5116 read_extent_buffer(path.nodes[0], backref_name,
5117 (unsigned long)(backref + 1), len);
5119 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5120 ref_namelen != backref_namelen ||
5121 strncmp(ref_name, backref_name, len)) {
5122 err |= ROOT_REF_MISMATCH;
5123 error("%s[%llu %llu] mismatch relative ref",
5124 ref_key->type == BTRFS_ROOT_REF_KEY ?
5125 "ROOT_REF" : "ROOT_BACKREF",
5126 ref_key->objectid, ref_key->offset);
5128 out:
5129 btrfs_release_path(&path);
5130 return err;
5134 * Check all fs/file tree in low_memory mode.
5136 * 1. for fs tree root item, call check_fs_root_v2()
5137 * 2. for fs tree root ref/backref, call check_root_ref()
5139 * Return 0 if no error occurred.
5141 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5143 struct btrfs_root *tree_root = fs_info->tree_root;
5144 struct btrfs_root *cur_root = NULL;
5145 struct btrfs_path path;
5146 struct btrfs_key key;
5147 struct extent_buffer *node;
5148 unsigned int ext_ref;
5149 int slot;
5150 int ret;
5151 int err = 0;
5153 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5155 btrfs_init_path(&path);
5156 key.objectid = BTRFS_FS_TREE_OBJECTID;
5157 key.offset = 0;
5158 key.type = BTRFS_ROOT_ITEM_KEY;
5160 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5161 if (ret < 0) {
5162 err = ret;
5163 goto out;
5164 } else if (ret > 0) {
5165 err = -ENOENT;
5166 goto out;
5169 while (1) {
5170 node = path.nodes[0];
5171 slot = path.slots[0];
5172 btrfs_item_key_to_cpu(node, &key, slot);
5173 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5174 goto out;
5175 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5176 fs_root_objectid(key.objectid)) {
5177 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5178 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5179 &key);
5180 } else {
5181 key.offset = (u64)-1;
5182 cur_root = btrfs_read_fs_root(fs_info, &key);
5185 if (IS_ERR(cur_root)) {
5186 error("Fail to read fs/subvol tree: %lld",
5187 key.objectid);
5188 err = -EIO;
5189 goto next;
5192 ret = check_fs_root_v2(cur_root, ext_ref);
5193 err |= ret;
5195 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5196 btrfs_free_fs_root(cur_root);
5197 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5198 key.type == BTRFS_ROOT_BACKREF_KEY) {
5199 ret = check_root_ref(tree_root, &key, node, slot);
5200 err |= ret;
5202 next:
5203 ret = btrfs_next_item(tree_root, &path);
5204 if (ret > 0)
5205 goto out;
5206 if (ret < 0) {
5207 err = ret;
5208 goto out;
5212 out:
5213 btrfs_release_path(&path);
5214 return err;
5217 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5219 struct list_head *cur = rec->backrefs.next;
5220 struct extent_backref *back;
5221 struct tree_backref *tback;
5222 struct data_backref *dback;
5223 u64 found = 0;
5224 int err = 0;
5226 while(cur != &rec->backrefs) {
5227 back = to_extent_backref(cur);
5228 cur = cur->next;
5229 if (!back->found_extent_tree) {
5230 err = 1;
5231 if (!print_errs)
5232 goto out;
5233 if (back->is_data) {
5234 dback = to_data_backref(back);
5235 fprintf(stderr, "Backref %llu %s %llu"
5236 " owner %llu offset %llu num_refs %lu"
5237 " not found in extent tree\n",
5238 (unsigned long long)rec->start,
5239 back->full_backref ?
5240 "parent" : "root",
5241 back->full_backref ?
5242 (unsigned long long)dback->parent:
5243 (unsigned long long)dback->root,
5244 (unsigned long long)dback->owner,
5245 (unsigned long long)dback->offset,
5246 (unsigned long)dback->num_refs);
5247 } else {
5248 tback = to_tree_backref(back);
5249 fprintf(stderr, "Backref %llu parent %llu"
5250 " root %llu not found in extent tree\n",
5251 (unsigned long long)rec->start,
5252 (unsigned long long)tback->parent,
5253 (unsigned long long)tback->root);
5256 if (!back->is_data && !back->found_ref) {
5257 err = 1;
5258 if (!print_errs)
5259 goto out;
5260 tback = to_tree_backref(back);
5261 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5262 (unsigned long long)rec->start,
5263 back->full_backref ? "parent" : "root",
5264 back->full_backref ?
5265 (unsigned long long)tback->parent :
5266 (unsigned long long)tback->root, back);
5268 if (back->is_data) {
5269 dback = to_data_backref(back);
5270 if (dback->found_ref != dback->num_refs) {
5271 err = 1;
5272 if (!print_errs)
5273 goto out;
5274 fprintf(stderr, "Incorrect local backref count"
5275 " on %llu %s %llu owner %llu"
5276 " offset %llu found %u wanted %u back %p\n",
5277 (unsigned long long)rec->start,
5278 back->full_backref ?
5279 "parent" : "root",
5280 back->full_backref ?
5281 (unsigned long long)dback->parent:
5282 (unsigned long long)dback->root,
5283 (unsigned long long)dback->owner,
5284 (unsigned long long)dback->offset,
5285 dback->found_ref, dback->num_refs, back);
5287 if (dback->disk_bytenr != rec->start) {
5288 err = 1;
5289 if (!print_errs)
5290 goto out;
5291 fprintf(stderr, "Backref disk bytenr does not"
5292 " match extent record, bytenr=%llu, "
5293 "ref bytenr=%llu\n",
5294 (unsigned long long)rec->start,
5295 (unsigned long long)dback->disk_bytenr);
5298 if (dback->bytes != rec->nr) {
5299 err = 1;
5300 if (!print_errs)
5301 goto out;
5302 fprintf(stderr, "Backref bytes do not match "
5303 "extent backref, bytenr=%llu, ref "
5304 "bytes=%llu, backref bytes=%llu\n",
5305 (unsigned long long)rec->start,
5306 (unsigned long long)rec->nr,
5307 (unsigned long long)dback->bytes);
5310 if (!back->is_data) {
5311 found += 1;
5312 } else {
5313 dback = to_data_backref(back);
5314 found += dback->found_ref;
5317 if (found != rec->refs) {
5318 err = 1;
5319 if (!print_errs)
5320 goto out;
5321 fprintf(stderr, "Incorrect global backref count "
5322 "on %llu found %llu wanted %llu\n",
5323 (unsigned long long)rec->start,
5324 (unsigned long long)found,
5325 (unsigned long long)rec->refs);
5327 out:
5328 return err;
5331 static int free_all_extent_backrefs(struct extent_record *rec)
5333 struct extent_backref *back;
5334 struct list_head *cur;
5335 while (!list_empty(&rec->backrefs)) {
5336 cur = rec->backrefs.next;
5337 back = to_extent_backref(cur);
5338 list_del(cur);
5339 free(back);
5341 return 0;
5344 static void free_extent_record_cache(struct cache_tree *extent_cache)
5346 struct cache_extent *cache;
5347 struct extent_record *rec;
5349 while (1) {
5350 cache = first_cache_extent(extent_cache);
5351 if (!cache)
5352 break;
5353 rec = container_of(cache, struct extent_record, cache);
5354 remove_cache_extent(extent_cache, cache);
5355 free_all_extent_backrefs(rec);
5356 free(rec);
5360 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5361 struct extent_record *rec)
5363 if (rec->content_checked && rec->owner_ref_checked &&
5364 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5365 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5366 !rec->bad_full_backref && !rec->crossing_stripes &&
5367 !rec->wrong_chunk_type) {
5368 remove_cache_extent(extent_cache, &rec->cache);
5369 free_all_extent_backrefs(rec);
5370 list_del_init(&rec->list);
5371 free(rec);
5373 return 0;
5376 static int check_owner_ref(struct btrfs_root *root,
5377 struct extent_record *rec,
5378 struct extent_buffer *buf)
5380 struct extent_backref *node;
5381 struct tree_backref *back;
5382 struct btrfs_root *ref_root;
5383 struct btrfs_key key;
5384 struct btrfs_path path;
5385 struct extent_buffer *parent;
5386 int level;
5387 int found = 0;
5388 int ret;
5390 list_for_each_entry(node, &rec->backrefs, list) {
5391 if (node->is_data)
5392 continue;
5393 if (!node->found_ref)
5394 continue;
5395 if (node->full_backref)
5396 continue;
5397 back = to_tree_backref(node);
5398 if (btrfs_header_owner(buf) == back->root)
5399 return 0;
5401 BUG_ON(rec->is_root);
5403 /* try to find the block by search corresponding fs tree */
5404 key.objectid = btrfs_header_owner(buf);
5405 key.type = BTRFS_ROOT_ITEM_KEY;
5406 key.offset = (u64)-1;
5408 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5409 if (IS_ERR(ref_root))
5410 return 1;
5412 level = btrfs_header_level(buf);
5413 if (level == 0)
5414 btrfs_item_key_to_cpu(buf, &key, 0);
5415 else
5416 btrfs_node_key_to_cpu(buf, &key, 0);
5418 btrfs_init_path(&path);
5419 path.lowest_level = level + 1;
5420 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5421 if (ret < 0)
5422 return 0;
5424 parent = path.nodes[level + 1];
5425 if (parent && buf->start == btrfs_node_blockptr(parent,
5426 path.slots[level + 1]))
5427 found = 1;
5429 btrfs_release_path(&path);
5430 return found ? 0 : 1;
5433 static int is_extent_tree_record(struct extent_record *rec)
5435 struct list_head *cur = rec->backrefs.next;
5436 struct extent_backref *node;
5437 struct tree_backref *back;
5438 int is_extent = 0;
5440 while(cur != &rec->backrefs) {
5441 node = to_extent_backref(cur);
5442 cur = cur->next;
5443 if (node->is_data)
5444 return 0;
5445 back = to_tree_backref(node);
5446 if (node->full_backref)
5447 return 0;
5448 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5449 is_extent = 1;
5451 return is_extent;
5455 static int record_bad_block_io(struct btrfs_fs_info *info,
5456 struct cache_tree *extent_cache,
5457 u64 start, u64 len)
5459 struct extent_record *rec;
5460 struct cache_extent *cache;
5461 struct btrfs_key key;
5463 cache = lookup_cache_extent(extent_cache, start, len);
5464 if (!cache)
5465 return 0;
5467 rec = container_of(cache, struct extent_record, cache);
5468 if (!is_extent_tree_record(rec))
5469 return 0;
5471 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5472 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5475 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5476 struct extent_buffer *buf, int slot)
5478 if (btrfs_header_level(buf)) {
5479 struct btrfs_key_ptr ptr1, ptr2;
5481 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5482 sizeof(struct btrfs_key_ptr));
5483 read_extent_buffer(buf, &ptr2,
5484 btrfs_node_key_ptr_offset(slot + 1),
5485 sizeof(struct btrfs_key_ptr));
5486 write_extent_buffer(buf, &ptr1,
5487 btrfs_node_key_ptr_offset(slot + 1),
5488 sizeof(struct btrfs_key_ptr));
5489 write_extent_buffer(buf, &ptr2,
5490 btrfs_node_key_ptr_offset(slot),
5491 sizeof(struct btrfs_key_ptr));
5492 if (slot == 0) {
5493 struct btrfs_disk_key key;
5494 btrfs_node_key(buf, &key, 0);
5495 btrfs_fixup_low_keys(root, path, &key,
5496 btrfs_header_level(buf) + 1);
5498 } else {
5499 struct btrfs_item *item1, *item2;
5500 struct btrfs_key k1, k2;
5501 char *item1_data, *item2_data;
5502 u32 item1_offset, item2_offset, item1_size, item2_size;
5504 item1 = btrfs_item_nr(slot);
5505 item2 = btrfs_item_nr(slot + 1);
5506 btrfs_item_key_to_cpu(buf, &k1, slot);
5507 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5508 item1_offset = btrfs_item_offset(buf, item1);
5509 item2_offset = btrfs_item_offset(buf, item2);
5510 item1_size = btrfs_item_size(buf, item1);
5511 item2_size = btrfs_item_size(buf, item2);
5513 item1_data = malloc(item1_size);
5514 if (!item1_data)
5515 return -ENOMEM;
5516 item2_data = malloc(item2_size);
5517 if (!item2_data) {
5518 free(item1_data);
5519 return -ENOMEM;
5522 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5523 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5525 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5526 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5527 free(item1_data);
5528 free(item2_data);
5530 btrfs_set_item_offset(buf, item1, item2_offset);
5531 btrfs_set_item_offset(buf, item2, item1_offset);
5532 btrfs_set_item_size(buf, item1, item2_size);
5533 btrfs_set_item_size(buf, item2, item1_size);
5535 path->slots[0] = slot;
5536 btrfs_set_item_key_unsafe(root, path, &k2);
5537 path->slots[0] = slot + 1;
5538 btrfs_set_item_key_unsafe(root, path, &k1);
5540 return 0;
5543 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5545 struct extent_buffer *buf;
5546 struct btrfs_key k1, k2;
5547 int i;
5548 int level = path->lowest_level;
5549 int ret = -EIO;
5551 buf = path->nodes[level];
5552 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5553 if (level) {
5554 btrfs_node_key_to_cpu(buf, &k1, i);
5555 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5556 } else {
5557 btrfs_item_key_to_cpu(buf, &k1, i);
5558 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5560 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5561 continue;
5562 ret = swap_values(root, path, buf, i);
5563 if (ret)
5564 break;
5565 btrfs_mark_buffer_dirty(buf);
5566 i = 0;
5568 return ret;
5571 static int delete_bogus_item(struct btrfs_root *root,
5572 struct btrfs_path *path,
5573 struct extent_buffer *buf, int slot)
5575 struct btrfs_key key;
5576 int nritems = btrfs_header_nritems(buf);
5578 btrfs_item_key_to_cpu(buf, &key, slot);
5580 /* These are all the keys we can deal with missing. */
5581 if (key.type != BTRFS_DIR_INDEX_KEY &&
5582 key.type != BTRFS_EXTENT_ITEM_KEY &&
5583 key.type != BTRFS_METADATA_ITEM_KEY &&
5584 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5585 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5586 return -1;
5588 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5589 (unsigned long long)key.objectid, key.type,
5590 (unsigned long long)key.offset, slot, buf->start);
5591 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5592 btrfs_item_nr_offset(slot + 1),
5593 sizeof(struct btrfs_item) *
5594 (nritems - slot - 1));
5595 btrfs_set_header_nritems(buf, nritems - 1);
5596 if (slot == 0) {
5597 struct btrfs_disk_key disk_key;
5599 btrfs_item_key(buf, &disk_key, 0);
5600 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5602 btrfs_mark_buffer_dirty(buf);
5603 return 0;
5606 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5608 struct extent_buffer *buf;
5609 int i;
5610 int ret = 0;
5612 /* We should only get this for leaves */
5613 BUG_ON(path->lowest_level);
5614 buf = path->nodes[0];
5615 again:
5616 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5617 unsigned int shift = 0, offset;
5619 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5620 BTRFS_LEAF_DATA_SIZE(root)) {
5621 if (btrfs_item_end_nr(buf, i) >
5622 BTRFS_LEAF_DATA_SIZE(root)) {
5623 ret = delete_bogus_item(root, path, buf, i);
5624 if (!ret)
5625 goto again;
5626 fprintf(stderr, "item is off the end of the "
5627 "leaf, can't fix\n");
5628 ret = -EIO;
5629 break;
5631 shift = BTRFS_LEAF_DATA_SIZE(root) -
5632 btrfs_item_end_nr(buf, i);
5633 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5634 btrfs_item_offset_nr(buf, i - 1)) {
5635 if (btrfs_item_end_nr(buf, i) >
5636 btrfs_item_offset_nr(buf, i - 1)) {
5637 ret = delete_bogus_item(root, path, buf, i);
5638 if (!ret)
5639 goto again;
5640 fprintf(stderr, "items overlap, can't fix\n");
5641 ret = -EIO;
5642 break;
5644 shift = btrfs_item_offset_nr(buf, i - 1) -
5645 btrfs_item_end_nr(buf, i);
5647 if (!shift)
5648 continue;
5650 printf("Shifting item nr %d by %u bytes in block %llu\n",
5651 i, shift, (unsigned long long)buf->start);
5652 offset = btrfs_item_offset_nr(buf, i);
5653 memmove_extent_buffer(buf,
5654 btrfs_leaf_data(buf) + offset + shift,
5655 btrfs_leaf_data(buf) + offset,
5656 btrfs_item_size_nr(buf, i));
5657 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5658 offset + shift);
5659 btrfs_mark_buffer_dirty(buf);
5663 * We may have moved things, in which case we want to exit so we don't
5664 * write those changes out. Once we have proper abort functionality in
5665 * progs this can be changed to something nicer.
5667 BUG_ON(ret);
5668 return ret;
5672 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5673 * then just return -EIO.
5675 static int try_to_fix_bad_block(struct btrfs_root *root,
5676 struct extent_buffer *buf,
5677 enum btrfs_tree_block_status status)
5679 struct btrfs_trans_handle *trans;
5680 struct ulist *roots;
5681 struct ulist_node *node;
5682 struct btrfs_root *search_root;
5683 struct btrfs_path path;
5684 struct ulist_iterator iter;
5685 struct btrfs_key root_key, key;
5686 int ret;
5688 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5689 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5690 return -EIO;
5692 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5693 if (ret)
5694 return -EIO;
5696 btrfs_init_path(&path);
5697 ULIST_ITER_INIT(&iter);
5698 while ((node = ulist_next(roots, &iter))) {
5699 root_key.objectid = node->val;
5700 root_key.type = BTRFS_ROOT_ITEM_KEY;
5701 root_key.offset = (u64)-1;
5703 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5704 if (IS_ERR(root)) {
5705 ret = -EIO;
5706 break;
5710 trans = btrfs_start_transaction(search_root, 0);
5711 if (IS_ERR(trans)) {
5712 ret = PTR_ERR(trans);
5713 break;
5716 path.lowest_level = btrfs_header_level(buf);
5717 path.skip_check_block = 1;
5718 if (path.lowest_level)
5719 btrfs_node_key_to_cpu(buf, &key, 0);
5720 else
5721 btrfs_item_key_to_cpu(buf, &key, 0);
5722 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5723 if (ret) {
5724 ret = -EIO;
5725 btrfs_commit_transaction(trans, search_root);
5726 break;
5728 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5729 ret = fix_key_order(search_root, &path);
5730 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5731 ret = fix_item_offset(search_root, &path);
5732 if (ret) {
5733 btrfs_commit_transaction(trans, search_root);
5734 break;
5736 btrfs_release_path(&path);
5737 btrfs_commit_transaction(trans, search_root);
5739 ulist_free(roots);
5740 btrfs_release_path(&path);
5741 return ret;
5744 static int check_block(struct btrfs_root *root,
5745 struct cache_tree *extent_cache,
5746 struct extent_buffer *buf, u64 flags)
5748 struct extent_record *rec;
5749 struct cache_extent *cache;
5750 struct btrfs_key key;
5751 enum btrfs_tree_block_status status;
5752 int ret = 0;
5753 int level;
5755 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5756 if (!cache)
5757 return 1;
5758 rec = container_of(cache, struct extent_record, cache);
5759 rec->generation = btrfs_header_generation(buf);
5761 level = btrfs_header_level(buf);
5762 if (btrfs_header_nritems(buf) > 0) {
5764 if (level == 0)
5765 btrfs_item_key_to_cpu(buf, &key, 0);
5766 else
5767 btrfs_node_key_to_cpu(buf, &key, 0);
5769 rec->info_objectid = key.objectid;
5771 rec->info_level = level;
5773 if (btrfs_is_leaf(buf))
5774 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5775 else
5776 status = btrfs_check_node(root, &rec->parent_key, buf);
5778 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5779 if (repair)
5780 status = try_to_fix_bad_block(root, buf, status);
5781 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5782 ret = -EIO;
5783 fprintf(stderr, "bad block %llu\n",
5784 (unsigned long long)buf->start);
5785 } else {
5787 * Signal to callers we need to start the scan over
5788 * again since we'll have cowed blocks.
5790 ret = -EAGAIN;
5792 } else {
5793 rec->content_checked = 1;
5794 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5795 rec->owner_ref_checked = 1;
5796 else {
5797 ret = check_owner_ref(root, rec, buf);
5798 if (!ret)
5799 rec->owner_ref_checked = 1;
5802 if (!ret)
5803 maybe_free_extent_rec(extent_cache, rec);
5804 return ret;
5807 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5808 u64 parent, u64 root)
5810 struct list_head *cur = rec->backrefs.next;
5811 struct extent_backref *node;
5812 struct tree_backref *back;
5814 while(cur != &rec->backrefs) {
5815 node = to_extent_backref(cur);
5816 cur = cur->next;
5817 if (node->is_data)
5818 continue;
5819 back = to_tree_backref(node);
5820 if (parent > 0) {
5821 if (!node->full_backref)
5822 continue;
5823 if (parent == back->parent)
5824 return back;
5825 } else {
5826 if (node->full_backref)
5827 continue;
5828 if (back->root == root)
5829 return back;
5832 return NULL;
5835 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5836 u64 parent, u64 root)
5838 struct tree_backref *ref = malloc(sizeof(*ref));
5840 if (!ref)
5841 return NULL;
5842 memset(&ref->node, 0, sizeof(ref->node));
5843 if (parent > 0) {
5844 ref->parent = parent;
5845 ref->node.full_backref = 1;
5846 } else {
5847 ref->root = root;
5848 ref->node.full_backref = 0;
5850 list_add_tail(&ref->node.list, &rec->backrefs);
5852 return ref;
5855 static struct data_backref *find_data_backref(struct extent_record *rec,
5856 u64 parent, u64 root,
5857 u64 owner, u64 offset,
5858 int found_ref,
5859 u64 disk_bytenr, u64 bytes)
5861 struct list_head *cur = rec->backrefs.next;
5862 struct extent_backref *node;
5863 struct data_backref *back;
5865 while(cur != &rec->backrefs) {
5866 node = to_extent_backref(cur);
5867 cur = cur->next;
5868 if (!node->is_data)
5869 continue;
5870 back = to_data_backref(node);
5871 if (parent > 0) {
5872 if (!node->full_backref)
5873 continue;
5874 if (parent == back->parent)
5875 return back;
5876 } else {
5877 if (node->full_backref)
5878 continue;
5879 if (back->root == root && back->owner == owner &&
5880 back->offset == offset) {
5881 if (found_ref && node->found_ref &&
5882 (back->bytes != bytes ||
5883 back->disk_bytenr != disk_bytenr))
5884 continue;
5885 return back;
5889 return NULL;
5892 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5893 u64 parent, u64 root,
5894 u64 owner, u64 offset,
5895 u64 max_size)
5897 struct data_backref *ref = malloc(sizeof(*ref));
5899 if (!ref)
5900 return NULL;
5901 memset(&ref->node, 0, sizeof(ref->node));
5902 ref->node.is_data = 1;
5904 if (parent > 0) {
5905 ref->parent = parent;
5906 ref->owner = 0;
5907 ref->offset = 0;
5908 ref->node.full_backref = 1;
5909 } else {
5910 ref->root = root;
5911 ref->owner = owner;
5912 ref->offset = offset;
5913 ref->node.full_backref = 0;
5915 ref->bytes = max_size;
5916 ref->found_ref = 0;
5917 ref->num_refs = 0;
5918 list_add_tail(&ref->node.list, &rec->backrefs);
5919 if (max_size > rec->max_size)
5920 rec->max_size = max_size;
5921 return ref;
5924 /* Check if the type of extent matches with its chunk */
5925 static void check_extent_type(struct extent_record *rec)
5927 struct btrfs_block_group_cache *bg_cache;
5929 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5930 if (!bg_cache)
5931 return;
5933 /* data extent, check chunk directly*/
5934 if (!rec->metadata) {
5935 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5936 rec->wrong_chunk_type = 1;
5937 return;
5940 /* metadata extent, check the obvious case first */
5941 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5942 BTRFS_BLOCK_GROUP_METADATA))) {
5943 rec->wrong_chunk_type = 1;
5944 return;
5948 * Check SYSTEM extent, as it's also marked as metadata, we can only
5949 * make sure it's a SYSTEM extent by its backref
5951 if (!list_empty(&rec->backrefs)) {
5952 struct extent_backref *node;
5953 struct tree_backref *tback;
5954 u64 bg_type;
5956 node = to_extent_backref(rec->backrefs.next);
5957 if (node->is_data) {
5958 /* tree block shouldn't have data backref */
5959 rec->wrong_chunk_type = 1;
5960 return;
5962 tback = container_of(node, struct tree_backref, node);
5964 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5965 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5966 else
5967 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5968 if (!(bg_cache->flags & bg_type))
5969 rec->wrong_chunk_type = 1;
5974 * Allocate a new extent record, fill default values from @tmpl and insert int
5975 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5976 * the cache, otherwise it fails.
5978 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5979 struct extent_record *tmpl)
5981 struct extent_record *rec;
5982 int ret = 0;
5984 rec = malloc(sizeof(*rec));
5985 if (!rec)
5986 return -ENOMEM;
5987 rec->start = tmpl->start;
5988 rec->max_size = tmpl->max_size;
5989 rec->nr = max(tmpl->nr, tmpl->max_size);
5990 rec->found_rec = tmpl->found_rec;
5991 rec->content_checked = tmpl->content_checked;
5992 rec->owner_ref_checked = tmpl->owner_ref_checked;
5993 rec->num_duplicates = 0;
5994 rec->metadata = tmpl->metadata;
5995 rec->flag_block_full_backref = FLAG_UNSET;
5996 rec->bad_full_backref = 0;
5997 rec->crossing_stripes = 0;
5998 rec->wrong_chunk_type = 0;
5999 rec->is_root = tmpl->is_root;
6000 rec->refs = tmpl->refs;
6001 rec->extent_item_refs = tmpl->extent_item_refs;
6002 rec->parent_generation = tmpl->parent_generation;
6003 INIT_LIST_HEAD(&rec->backrefs);
6004 INIT_LIST_HEAD(&rec->dups);
6005 INIT_LIST_HEAD(&rec->list);
6006 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6007 rec->cache.start = tmpl->start;
6008 rec->cache.size = tmpl->nr;
6009 ret = insert_cache_extent(extent_cache, &rec->cache);
6010 if (ret) {
6011 free(rec);
6012 return ret;
6014 bytes_used += rec->nr;
6016 if (tmpl->metadata)
6017 rec->crossing_stripes = check_crossing_stripes(global_info,
6018 rec->start, global_info->tree_root->nodesize);
6019 check_extent_type(rec);
6020 return ret;
6024 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6025 * some are hints:
6026 * - refs - if found, increase refs
6027 * - is_root - if found, set
6028 * - content_checked - if found, set
6029 * - owner_ref_checked - if found, set
6031 * If not found, create a new one, initialize and insert.
6033 static int add_extent_rec(struct cache_tree *extent_cache,
6034 struct extent_record *tmpl)
6036 struct extent_record *rec;
6037 struct cache_extent *cache;
6038 int ret = 0;
6039 int dup = 0;
6041 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6042 if (cache) {
6043 rec = container_of(cache, struct extent_record, cache);
6044 if (tmpl->refs)
6045 rec->refs++;
6046 if (rec->nr == 1)
6047 rec->nr = max(tmpl->nr, tmpl->max_size);
6050 * We need to make sure to reset nr to whatever the extent
6051 * record says was the real size, this way we can compare it to
6052 * the backrefs.
6054 if (tmpl->found_rec) {
6055 if (tmpl->start != rec->start || rec->found_rec) {
6056 struct extent_record *tmp;
6058 dup = 1;
6059 if (list_empty(&rec->list))
6060 list_add_tail(&rec->list,
6061 &duplicate_extents);
6064 * We have to do this song and dance in case we
6065 * find an extent record that falls inside of
6066 * our current extent record but does not have
6067 * the same objectid.
6069 tmp = malloc(sizeof(*tmp));
6070 if (!tmp)
6071 return -ENOMEM;
6072 tmp->start = tmpl->start;
6073 tmp->max_size = tmpl->max_size;
6074 tmp->nr = tmpl->nr;
6075 tmp->found_rec = 1;
6076 tmp->metadata = tmpl->metadata;
6077 tmp->extent_item_refs = tmpl->extent_item_refs;
6078 INIT_LIST_HEAD(&tmp->list);
6079 list_add_tail(&tmp->list, &rec->dups);
6080 rec->num_duplicates++;
6081 } else {
6082 rec->nr = tmpl->nr;
6083 rec->found_rec = 1;
6087 if (tmpl->extent_item_refs && !dup) {
6088 if (rec->extent_item_refs) {
6089 fprintf(stderr, "block %llu rec "
6090 "extent_item_refs %llu, passed %llu\n",
6091 (unsigned long long)tmpl->start,
6092 (unsigned long long)
6093 rec->extent_item_refs,
6094 (unsigned long long)tmpl->extent_item_refs);
6096 rec->extent_item_refs = tmpl->extent_item_refs;
6098 if (tmpl->is_root)
6099 rec->is_root = 1;
6100 if (tmpl->content_checked)
6101 rec->content_checked = 1;
6102 if (tmpl->owner_ref_checked)
6103 rec->owner_ref_checked = 1;
6104 memcpy(&rec->parent_key, &tmpl->parent_key,
6105 sizeof(tmpl->parent_key));
6106 if (tmpl->parent_generation)
6107 rec->parent_generation = tmpl->parent_generation;
6108 if (rec->max_size < tmpl->max_size)
6109 rec->max_size = tmpl->max_size;
6112 * A metadata extent can't cross stripe_len boundary, otherwise
6113 * kernel scrub won't be able to handle it.
6114 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6115 * it.
6117 if (tmpl->metadata)
6118 rec->crossing_stripes = check_crossing_stripes(
6119 global_info, rec->start,
6120 global_info->tree_root->nodesize);
6121 check_extent_type(rec);
6122 maybe_free_extent_rec(extent_cache, rec);
6123 return ret;
6126 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6128 return ret;
6131 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6132 u64 parent, u64 root, int found_ref)
6134 struct extent_record *rec;
6135 struct tree_backref *back;
6136 struct cache_extent *cache;
6137 int ret;
6139 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6140 if (!cache) {
6141 struct extent_record tmpl;
6143 memset(&tmpl, 0, sizeof(tmpl));
6144 tmpl.start = bytenr;
6145 tmpl.nr = 1;
6146 tmpl.metadata = 1;
6148 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6149 if (ret)
6150 return ret;
6152 /* really a bug in cache_extent implement now */
6153 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6154 if (!cache)
6155 return -ENOENT;
6158 rec = container_of(cache, struct extent_record, cache);
6159 if (rec->start != bytenr) {
6161 * Several cause, from unaligned bytenr to over lapping extents
6163 return -EEXIST;
6166 back = find_tree_backref(rec, parent, root);
6167 if (!back) {
6168 back = alloc_tree_backref(rec, parent, root);
6169 if (!back)
6170 return -ENOMEM;
6173 if (found_ref) {
6174 if (back->node.found_ref) {
6175 fprintf(stderr, "Extent back ref already exists "
6176 "for %llu parent %llu root %llu \n",
6177 (unsigned long long)bytenr,
6178 (unsigned long long)parent,
6179 (unsigned long long)root);
6181 back->node.found_ref = 1;
6182 } else {
6183 if (back->node.found_extent_tree) {
6184 fprintf(stderr, "Extent back ref already exists "
6185 "for %llu parent %llu root %llu \n",
6186 (unsigned long long)bytenr,
6187 (unsigned long long)parent,
6188 (unsigned long long)root);
6190 back->node.found_extent_tree = 1;
6192 check_extent_type(rec);
6193 maybe_free_extent_rec(extent_cache, rec);
6194 return 0;
6197 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6198 u64 parent, u64 root, u64 owner, u64 offset,
6199 u32 num_refs, int found_ref, u64 max_size)
6201 struct extent_record *rec;
6202 struct data_backref *back;
6203 struct cache_extent *cache;
6204 int ret;
6206 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6207 if (!cache) {
6208 struct extent_record tmpl;
6210 memset(&tmpl, 0, sizeof(tmpl));
6211 tmpl.start = bytenr;
6212 tmpl.nr = 1;
6213 tmpl.max_size = max_size;
6215 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6216 if (ret)
6217 return ret;
6219 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6220 if (!cache)
6221 abort();
6224 rec = container_of(cache, struct extent_record, cache);
6225 if (rec->max_size < max_size)
6226 rec->max_size = max_size;
6229 * If found_ref is set then max_size is the real size and must match the
6230 * existing refs. So if we have already found a ref then we need to
6231 * make sure that this ref matches the existing one, otherwise we need
6232 * to add a new backref so we can notice that the backrefs don't match
6233 * and we need to figure out who is telling the truth. This is to
6234 * account for that awful fsync bug I introduced where we'd end up with
6235 * a btrfs_file_extent_item that would have its length include multiple
6236 * prealloc extents or point inside of a prealloc extent.
6238 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6239 bytenr, max_size);
6240 if (!back) {
6241 back = alloc_data_backref(rec, parent, root, owner, offset,
6242 max_size);
6243 BUG_ON(!back);
6246 if (found_ref) {
6247 BUG_ON(num_refs != 1);
6248 if (back->node.found_ref)
6249 BUG_ON(back->bytes != max_size);
6250 back->node.found_ref = 1;
6251 back->found_ref += 1;
6252 back->bytes = max_size;
6253 back->disk_bytenr = bytenr;
6254 rec->refs += 1;
6255 rec->content_checked = 1;
6256 rec->owner_ref_checked = 1;
6257 } else {
6258 if (back->node.found_extent_tree) {
6259 fprintf(stderr, "Extent back ref already exists "
6260 "for %llu parent %llu root %llu "
6261 "owner %llu offset %llu num_refs %lu\n",
6262 (unsigned long long)bytenr,
6263 (unsigned long long)parent,
6264 (unsigned long long)root,
6265 (unsigned long long)owner,
6266 (unsigned long long)offset,
6267 (unsigned long)num_refs);
6269 back->num_refs = num_refs;
6270 back->node.found_extent_tree = 1;
6272 maybe_free_extent_rec(extent_cache, rec);
6273 return 0;
6276 static int add_pending(struct cache_tree *pending,
6277 struct cache_tree *seen, u64 bytenr, u32 size)
6279 int ret;
6280 ret = add_cache_extent(seen, bytenr, size);
6281 if (ret)
6282 return ret;
6283 add_cache_extent(pending, bytenr, size);
6284 return 0;
6287 static int pick_next_pending(struct cache_tree *pending,
6288 struct cache_tree *reada,
6289 struct cache_tree *nodes,
6290 u64 last, struct block_info *bits, int bits_nr,
6291 int *reada_bits)
6293 unsigned long node_start = last;
6294 struct cache_extent *cache;
6295 int ret;
6297 cache = search_cache_extent(reada, 0);
6298 if (cache) {
6299 bits[0].start = cache->start;
6300 bits[0].size = cache->size;
6301 *reada_bits = 1;
6302 return 1;
6304 *reada_bits = 0;
6305 if (node_start > 32768)
6306 node_start -= 32768;
6308 cache = search_cache_extent(nodes, node_start);
6309 if (!cache)
6310 cache = search_cache_extent(nodes, 0);
6312 if (!cache) {
6313 cache = search_cache_extent(pending, 0);
6314 if (!cache)
6315 return 0;
6316 ret = 0;
6317 do {
6318 bits[ret].start = cache->start;
6319 bits[ret].size = cache->size;
6320 cache = next_cache_extent(cache);
6321 ret++;
6322 } while (cache && ret < bits_nr);
6323 return ret;
6326 ret = 0;
6327 do {
6328 bits[ret].start = cache->start;
6329 bits[ret].size = cache->size;
6330 cache = next_cache_extent(cache);
6331 ret++;
6332 } while (cache && ret < bits_nr);
6334 if (bits_nr - ret > 8) {
6335 u64 lookup = bits[0].start + bits[0].size;
6336 struct cache_extent *next;
6337 next = search_cache_extent(pending, lookup);
6338 while(next) {
6339 if (next->start - lookup > 32768)
6340 break;
6341 bits[ret].start = next->start;
6342 bits[ret].size = next->size;
6343 lookup = next->start + next->size;
6344 ret++;
6345 if (ret == bits_nr)
6346 break;
6347 next = next_cache_extent(next);
6348 if (!next)
6349 break;
6352 return ret;
6355 static void free_chunk_record(struct cache_extent *cache)
6357 struct chunk_record *rec;
6359 rec = container_of(cache, struct chunk_record, cache);
6360 list_del_init(&rec->list);
6361 list_del_init(&rec->dextents);
6362 free(rec);
6365 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6367 cache_tree_free_extents(chunk_cache, free_chunk_record);
6370 static void free_device_record(struct rb_node *node)
6372 struct device_record *rec;
6374 rec = container_of(node, struct device_record, node);
6375 free(rec);
6378 FREE_RB_BASED_TREE(device_cache, free_device_record);
6380 int insert_block_group_record(struct block_group_tree *tree,
6381 struct block_group_record *bg_rec)
6383 int ret;
6385 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6386 if (ret)
6387 return ret;
6389 list_add_tail(&bg_rec->list, &tree->block_groups);
6390 return 0;
6393 static void free_block_group_record(struct cache_extent *cache)
6395 struct block_group_record *rec;
6397 rec = container_of(cache, struct block_group_record, cache);
6398 list_del_init(&rec->list);
6399 free(rec);
6402 void free_block_group_tree(struct block_group_tree *tree)
6404 cache_tree_free_extents(&tree->tree, free_block_group_record);
6407 int insert_device_extent_record(struct device_extent_tree *tree,
6408 struct device_extent_record *de_rec)
6410 int ret;
6413 * Device extent is a bit different from the other extents, because
6414 * the extents which belong to the different devices may have the
6415 * same start and size, so we need use the special extent cache
6416 * search/insert functions.
6418 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6419 if (ret)
6420 return ret;
6422 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6423 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6424 return 0;
6427 static void free_device_extent_record(struct cache_extent *cache)
6429 struct device_extent_record *rec;
6431 rec = container_of(cache, struct device_extent_record, cache);
6432 if (!list_empty(&rec->chunk_list))
6433 list_del_init(&rec->chunk_list);
6434 if (!list_empty(&rec->device_list))
6435 list_del_init(&rec->device_list);
6436 free(rec);
6439 void free_device_extent_tree(struct device_extent_tree *tree)
6441 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6444 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6445 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6446 struct extent_buffer *leaf, int slot)
6448 struct btrfs_extent_ref_v0 *ref0;
6449 struct btrfs_key key;
6450 int ret;
6452 btrfs_item_key_to_cpu(leaf, &key, slot);
6453 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6454 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6455 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6456 0, 0);
6457 } else {
6458 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6459 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6461 return ret;
6463 #endif
6465 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6466 struct btrfs_key *key,
6467 int slot)
6469 struct btrfs_chunk *ptr;
6470 struct chunk_record *rec;
6471 int num_stripes, i;
6473 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6474 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6476 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6477 if (!rec) {
6478 fprintf(stderr, "memory allocation failed\n");
6479 exit(-1);
6482 INIT_LIST_HEAD(&rec->list);
6483 INIT_LIST_HEAD(&rec->dextents);
6484 rec->bg_rec = NULL;
6486 rec->cache.start = key->offset;
6487 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6489 rec->generation = btrfs_header_generation(leaf);
6491 rec->objectid = key->objectid;
6492 rec->type = key->type;
6493 rec->offset = key->offset;
6495 rec->length = rec->cache.size;
6496 rec->owner = btrfs_chunk_owner(leaf, ptr);
6497 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6498 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6499 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6500 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6501 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6502 rec->num_stripes = num_stripes;
6503 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6505 for (i = 0; i < rec->num_stripes; ++i) {
6506 rec->stripes[i].devid =
6507 btrfs_stripe_devid_nr(leaf, ptr, i);
6508 rec->stripes[i].offset =
6509 btrfs_stripe_offset_nr(leaf, ptr, i);
6510 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6511 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6512 BTRFS_UUID_SIZE);
6515 return rec;
6518 static int process_chunk_item(struct cache_tree *chunk_cache,
6519 struct btrfs_key *key, struct extent_buffer *eb,
6520 int slot)
6522 struct chunk_record *rec;
6523 struct btrfs_chunk *chunk;
6524 int ret = 0;
6526 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6528 * Do extra check for this chunk item,
6530 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6531 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6532 * and owner<->key_type check.
6534 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6535 key->offset);
6536 if (ret < 0) {
6537 error("chunk(%llu, %llu) is not valid, ignore it",
6538 key->offset, btrfs_chunk_length(eb, chunk));
6539 return 0;
6541 rec = btrfs_new_chunk_record(eb, key, slot);
6542 ret = insert_cache_extent(chunk_cache, &rec->cache);
6543 if (ret) {
6544 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6545 rec->offset, rec->length);
6546 free(rec);
6549 return ret;
6552 static int process_device_item(struct rb_root *dev_cache,
6553 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6555 struct btrfs_dev_item *ptr;
6556 struct device_record *rec;
6557 int ret = 0;
6559 ptr = btrfs_item_ptr(eb,
6560 slot, struct btrfs_dev_item);
6562 rec = malloc(sizeof(*rec));
6563 if (!rec) {
6564 fprintf(stderr, "memory allocation failed\n");
6565 return -ENOMEM;
6568 rec->devid = key->offset;
6569 rec->generation = btrfs_header_generation(eb);
6571 rec->objectid = key->objectid;
6572 rec->type = key->type;
6573 rec->offset = key->offset;
6575 rec->devid = btrfs_device_id(eb, ptr);
6576 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6577 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6579 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6580 if (ret) {
6581 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6582 free(rec);
6585 return ret;
6588 struct block_group_record *
6589 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6590 int slot)
6592 struct btrfs_block_group_item *ptr;
6593 struct block_group_record *rec;
6595 rec = calloc(1, sizeof(*rec));
6596 if (!rec) {
6597 fprintf(stderr, "memory allocation failed\n");
6598 exit(-1);
6601 rec->cache.start = key->objectid;
6602 rec->cache.size = key->offset;
6604 rec->generation = btrfs_header_generation(leaf);
6606 rec->objectid = key->objectid;
6607 rec->type = key->type;
6608 rec->offset = key->offset;
6610 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6611 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6613 INIT_LIST_HEAD(&rec->list);
6615 return rec;
6618 static int process_block_group_item(struct block_group_tree *block_group_cache,
6619 struct btrfs_key *key,
6620 struct extent_buffer *eb, int slot)
6622 struct block_group_record *rec;
6623 int ret = 0;
6625 rec = btrfs_new_block_group_record(eb, key, slot);
6626 ret = insert_block_group_record(block_group_cache, rec);
6627 if (ret) {
6628 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6629 rec->objectid, rec->offset);
6630 free(rec);
6633 return ret;
6636 struct device_extent_record *
6637 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6638 struct btrfs_key *key, int slot)
6640 struct device_extent_record *rec;
6641 struct btrfs_dev_extent *ptr;
6643 rec = calloc(1, sizeof(*rec));
6644 if (!rec) {
6645 fprintf(stderr, "memory allocation failed\n");
6646 exit(-1);
6649 rec->cache.objectid = key->objectid;
6650 rec->cache.start = key->offset;
6652 rec->generation = btrfs_header_generation(leaf);
6654 rec->objectid = key->objectid;
6655 rec->type = key->type;
6656 rec->offset = key->offset;
6658 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6659 rec->chunk_objecteid =
6660 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6661 rec->chunk_offset =
6662 btrfs_dev_extent_chunk_offset(leaf, ptr);
6663 rec->length = btrfs_dev_extent_length(leaf, ptr);
6664 rec->cache.size = rec->length;
6666 INIT_LIST_HEAD(&rec->chunk_list);
6667 INIT_LIST_HEAD(&rec->device_list);
6669 return rec;
6672 static int
6673 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6674 struct btrfs_key *key, struct extent_buffer *eb,
6675 int slot)
6677 struct device_extent_record *rec;
6678 int ret;
6680 rec = btrfs_new_device_extent_record(eb, key, slot);
6681 ret = insert_device_extent_record(dev_extent_cache, rec);
6682 if (ret) {
6683 fprintf(stderr,
6684 "Device extent[%llu, %llu, %llu] existed.\n",
6685 rec->objectid, rec->offset, rec->length);
6686 free(rec);
6689 return ret;
6692 static int process_extent_item(struct btrfs_root *root,
6693 struct cache_tree *extent_cache,
6694 struct extent_buffer *eb, int slot)
6696 struct btrfs_extent_item *ei;
6697 struct btrfs_extent_inline_ref *iref;
6698 struct btrfs_extent_data_ref *dref;
6699 struct btrfs_shared_data_ref *sref;
6700 struct btrfs_key key;
6701 struct extent_record tmpl;
6702 unsigned long end;
6703 unsigned long ptr;
6704 int ret;
6705 int type;
6706 u32 item_size = btrfs_item_size_nr(eb, slot);
6707 u64 refs = 0;
6708 u64 offset;
6709 u64 num_bytes;
6710 int metadata = 0;
6712 btrfs_item_key_to_cpu(eb, &key, slot);
6714 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6715 metadata = 1;
6716 num_bytes = root->nodesize;
6717 } else {
6718 num_bytes = key.offset;
6721 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6722 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6723 key.objectid, root->sectorsize);
6724 return -EIO;
6726 if (item_size < sizeof(*ei)) {
6727 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6728 struct btrfs_extent_item_v0 *ei0;
6729 BUG_ON(item_size != sizeof(*ei0));
6730 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6731 refs = btrfs_extent_refs_v0(eb, ei0);
6732 #else
6733 BUG();
6734 #endif
6735 memset(&tmpl, 0, sizeof(tmpl));
6736 tmpl.start = key.objectid;
6737 tmpl.nr = num_bytes;
6738 tmpl.extent_item_refs = refs;
6739 tmpl.metadata = metadata;
6740 tmpl.found_rec = 1;
6741 tmpl.max_size = num_bytes;
6743 return add_extent_rec(extent_cache, &tmpl);
6746 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6747 refs = btrfs_extent_refs(eb, ei);
6748 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6749 metadata = 1;
6750 else
6751 metadata = 0;
6752 if (metadata && num_bytes != root->nodesize) {
6753 error("ignore invalid metadata extent, length %llu does not equal to %u",
6754 num_bytes, root->nodesize);
6755 return -EIO;
6757 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6758 error("ignore invalid data extent, length %llu is not aligned to %u",
6759 num_bytes, root->sectorsize);
6760 return -EIO;
6763 memset(&tmpl, 0, sizeof(tmpl));
6764 tmpl.start = key.objectid;
6765 tmpl.nr = num_bytes;
6766 tmpl.extent_item_refs = refs;
6767 tmpl.metadata = metadata;
6768 tmpl.found_rec = 1;
6769 tmpl.max_size = num_bytes;
6770 add_extent_rec(extent_cache, &tmpl);
6772 ptr = (unsigned long)(ei + 1);
6773 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6774 key.type == BTRFS_EXTENT_ITEM_KEY)
6775 ptr += sizeof(struct btrfs_tree_block_info);
6777 end = (unsigned long)ei + item_size;
6778 while (ptr < end) {
6779 iref = (struct btrfs_extent_inline_ref *)ptr;
6780 type = btrfs_extent_inline_ref_type(eb, iref);
6781 offset = btrfs_extent_inline_ref_offset(eb, iref);
6782 switch (type) {
6783 case BTRFS_TREE_BLOCK_REF_KEY:
6784 ret = add_tree_backref(extent_cache, key.objectid,
6785 0, offset, 0);
6786 if (ret < 0)
6787 error("add_tree_backref failed: %s",
6788 strerror(-ret));
6789 break;
6790 case BTRFS_SHARED_BLOCK_REF_KEY:
6791 ret = add_tree_backref(extent_cache, key.objectid,
6792 offset, 0, 0);
6793 if (ret < 0)
6794 error("add_tree_backref failed: %s",
6795 strerror(-ret));
6796 break;
6797 case BTRFS_EXTENT_DATA_REF_KEY:
6798 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6799 add_data_backref(extent_cache, key.objectid, 0,
6800 btrfs_extent_data_ref_root(eb, dref),
6801 btrfs_extent_data_ref_objectid(eb,
6802 dref),
6803 btrfs_extent_data_ref_offset(eb, dref),
6804 btrfs_extent_data_ref_count(eb, dref),
6805 0, num_bytes);
6806 break;
6807 case BTRFS_SHARED_DATA_REF_KEY:
6808 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6809 add_data_backref(extent_cache, key.objectid, offset,
6810 0, 0, 0,
6811 btrfs_shared_data_ref_count(eb, sref),
6812 0, num_bytes);
6813 break;
6814 default:
6815 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6816 key.objectid, key.type, num_bytes);
6817 goto out;
6819 ptr += btrfs_extent_inline_ref_size(type);
6821 WARN_ON(ptr > end);
6822 out:
6823 return 0;
6826 static int check_cache_range(struct btrfs_root *root,
6827 struct btrfs_block_group_cache *cache,
6828 u64 offset, u64 bytes)
6830 struct btrfs_free_space *entry;
6831 u64 *logical;
6832 u64 bytenr;
6833 int stripe_len;
6834 int i, nr, ret;
6836 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6837 bytenr = btrfs_sb_offset(i);
6838 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6839 cache->key.objectid, bytenr, 0,
6840 &logical, &nr, &stripe_len);
6841 if (ret)
6842 return ret;
6844 while (nr--) {
6845 if (logical[nr] + stripe_len <= offset)
6846 continue;
6847 if (offset + bytes <= logical[nr])
6848 continue;
6849 if (logical[nr] == offset) {
6850 if (stripe_len >= bytes) {
6851 free(logical);
6852 return 0;
6854 bytes -= stripe_len;
6855 offset += stripe_len;
6856 } else if (logical[nr] < offset) {
6857 if (logical[nr] + stripe_len >=
6858 offset + bytes) {
6859 free(logical);
6860 return 0;
6862 bytes = (offset + bytes) -
6863 (logical[nr] + stripe_len);
6864 offset = logical[nr] + stripe_len;
6865 } else {
6867 * Could be tricky, the super may land in the
6868 * middle of the area we're checking. First
6869 * check the easiest case, it's at the end.
6871 if (logical[nr] + stripe_len >=
6872 bytes + offset) {
6873 bytes = logical[nr] - offset;
6874 continue;
6877 /* Check the left side */
6878 ret = check_cache_range(root, cache,
6879 offset,
6880 logical[nr] - offset);
6881 if (ret) {
6882 free(logical);
6883 return ret;
6886 /* Now we continue with the right side */
6887 bytes = (offset + bytes) -
6888 (logical[nr] + stripe_len);
6889 offset = logical[nr] + stripe_len;
6893 free(logical);
6896 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6897 if (!entry) {
6898 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6899 offset, offset+bytes);
6900 return -EINVAL;
6903 if (entry->offset != offset) {
6904 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6905 entry->offset);
6906 return -EINVAL;
6909 if (entry->bytes != bytes) {
6910 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6911 bytes, entry->bytes, offset);
6912 return -EINVAL;
6915 unlink_free_space(cache->free_space_ctl, entry);
6916 free(entry);
6917 return 0;
6920 static int verify_space_cache(struct btrfs_root *root,
6921 struct btrfs_block_group_cache *cache)
6923 struct btrfs_path path;
6924 struct extent_buffer *leaf;
6925 struct btrfs_key key;
6926 u64 last;
6927 int ret = 0;
6929 root = root->fs_info->extent_root;
6931 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6933 btrfs_init_path(&path);
6934 key.objectid = last;
6935 key.offset = 0;
6936 key.type = BTRFS_EXTENT_ITEM_KEY;
6937 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6938 if (ret < 0)
6939 goto out;
6940 ret = 0;
6941 while (1) {
6942 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6943 ret = btrfs_next_leaf(root, &path);
6944 if (ret < 0)
6945 goto out;
6946 if (ret > 0) {
6947 ret = 0;
6948 break;
6951 leaf = path.nodes[0];
6952 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6953 if (key.objectid >= cache->key.offset + cache->key.objectid)
6954 break;
6955 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6956 key.type != BTRFS_METADATA_ITEM_KEY) {
6957 path.slots[0]++;
6958 continue;
6961 if (last == key.objectid) {
6962 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6963 last = key.objectid + key.offset;
6964 else
6965 last = key.objectid + root->nodesize;
6966 path.slots[0]++;
6967 continue;
6970 ret = check_cache_range(root, cache, last,
6971 key.objectid - last);
6972 if (ret)
6973 break;
6974 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6975 last = key.objectid + key.offset;
6976 else
6977 last = key.objectid + root->nodesize;
6978 path.slots[0]++;
6981 if (last < cache->key.objectid + cache->key.offset)
6982 ret = check_cache_range(root, cache, last,
6983 cache->key.objectid +
6984 cache->key.offset - last);
6986 out:
6987 btrfs_release_path(&path);
6989 if (!ret &&
6990 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6991 fprintf(stderr, "There are still entries left in the space "
6992 "cache\n");
6993 ret = -EINVAL;
6996 return ret;
6999 static int check_space_cache(struct btrfs_root *root)
7001 struct btrfs_block_group_cache *cache;
7002 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7003 int ret;
7004 int error = 0;
7006 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7007 btrfs_super_generation(root->fs_info->super_copy) !=
7008 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7009 printf("cache and super generation don't match, space cache "
7010 "will be invalidated\n");
7011 return 0;
7014 if (ctx.progress_enabled) {
7015 ctx.tp = TASK_FREE_SPACE;
7016 task_start(ctx.info);
7019 while (1) {
7020 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7021 if (!cache)
7022 break;
7024 start = cache->key.objectid + cache->key.offset;
7025 if (!cache->free_space_ctl) {
7026 if (btrfs_init_free_space_ctl(cache,
7027 root->sectorsize)) {
7028 ret = -ENOMEM;
7029 break;
7031 } else {
7032 btrfs_remove_free_space_cache(cache);
7035 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7036 ret = exclude_super_stripes(root, cache);
7037 if (ret) {
7038 fprintf(stderr, "could not exclude super stripes: %s\n",
7039 strerror(-ret));
7040 error++;
7041 continue;
7043 ret = load_free_space_tree(root->fs_info, cache);
7044 free_excluded_extents(root, cache);
7045 if (ret < 0) {
7046 fprintf(stderr, "could not load free space tree: %s\n",
7047 strerror(-ret));
7048 error++;
7049 continue;
7051 error += ret;
7052 } else {
7053 ret = load_free_space_cache(root->fs_info, cache);
7054 if (!ret)
7055 continue;
7058 ret = verify_space_cache(root, cache);
7059 if (ret) {
7060 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7061 cache->key.objectid);
7062 error++;
7066 task_stop(ctx.info);
7068 return error ? -EINVAL : 0;
7071 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7072 u64 num_bytes, unsigned long leaf_offset,
7073 struct extent_buffer *eb) {
7075 u64 offset = 0;
7076 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7077 char *data;
7078 unsigned long csum_offset;
7079 u32 csum;
7080 u32 csum_expected;
7081 u64 read_len;
7082 u64 data_checked = 0;
7083 u64 tmp;
7084 int ret = 0;
7085 int mirror;
7086 int num_copies;
7088 if (num_bytes % root->sectorsize)
7089 return -EINVAL;
7091 data = malloc(num_bytes);
7092 if (!data)
7093 return -ENOMEM;
7095 while (offset < num_bytes) {
7096 mirror = 0;
7097 again:
7098 read_len = num_bytes - offset;
7099 /* read as much space once a time */
7100 ret = read_extent_data(root, data + offset,
7101 bytenr + offset, &read_len, mirror);
7102 if (ret)
7103 goto out;
7104 data_checked = 0;
7105 /* verify every 4k data's checksum */
7106 while (data_checked < read_len) {
7107 csum = ~(u32)0;
7108 tmp = offset + data_checked;
7110 csum = btrfs_csum_data((char *)data + tmp,
7111 csum, root->sectorsize);
7112 btrfs_csum_final(csum, (u8 *)&csum);
7114 csum_offset = leaf_offset +
7115 tmp / root->sectorsize * csum_size;
7116 read_extent_buffer(eb, (char *)&csum_expected,
7117 csum_offset, csum_size);
7118 /* try another mirror */
7119 if (csum != csum_expected) {
7120 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7121 mirror, bytenr + tmp,
7122 csum, csum_expected);
7123 num_copies = btrfs_num_copies(
7124 &root->fs_info->mapping_tree,
7125 bytenr, num_bytes);
7126 if (mirror < num_copies - 1) {
7127 mirror += 1;
7128 goto again;
7131 data_checked += root->sectorsize;
7133 offset += read_len;
7135 out:
7136 free(data);
7137 return ret;
7140 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7141 u64 num_bytes)
7143 struct btrfs_path path;
7144 struct extent_buffer *leaf;
7145 struct btrfs_key key;
7146 int ret;
7148 btrfs_init_path(&path);
7149 key.objectid = bytenr;
7150 key.type = BTRFS_EXTENT_ITEM_KEY;
7151 key.offset = (u64)-1;
7153 again:
7154 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7155 0, 0);
7156 if (ret < 0) {
7157 fprintf(stderr, "Error looking up extent record %d\n", ret);
7158 btrfs_release_path(&path);
7159 return ret;
7160 } else if (ret) {
7161 if (path.slots[0] > 0) {
7162 path.slots[0]--;
7163 } else {
7164 ret = btrfs_prev_leaf(root, &path);
7165 if (ret < 0) {
7166 goto out;
7167 } else if (ret > 0) {
7168 ret = 0;
7169 goto out;
7174 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7177 * Block group items come before extent items if they have the same
7178 * bytenr, so walk back one more just in case. Dear future traveller,
7179 * first congrats on mastering time travel. Now if it's not too much
7180 * trouble could you go back to 2006 and tell Chris to make the
7181 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7182 * EXTENT_ITEM_KEY please?
7184 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7185 if (path.slots[0] > 0) {
7186 path.slots[0]--;
7187 } else {
7188 ret = btrfs_prev_leaf(root, &path);
7189 if (ret < 0) {
7190 goto out;
7191 } else if (ret > 0) {
7192 ret = 0;
7193 goto out;
7196 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7199 while (num_bytes) {
7200 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7201 ret = btrfs_next_leaf(root, &path);
7202 if (ret < 0) {
7203 fprintf(stderr, "Error going to next leaf "
7204 "%d\n", ret);
7205 btrfs_release_path(&path);
7206 return ret;
7207 } else if (ret) {
7208 break;
7211 leaf = path.nodes[0];
7212 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7213 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7214 path.slots[0]++;
7215 continue;
7217 if (key.objectid + key.offset < bytenr) {
7218 path.slots[0]++;
7219 continue;
7221 if (key.objectid > bytenr + num_bytes)
7222 break;
7224 if (key.objectid == bytenr) {
7225 if (key.offset >= num_bytes) {
7226 num_bytes = 0;
7227 break;
7229 num_bytes -= key.offset;
7230 bytenr += key.offset;
7231 } else if (key.objectid < bytenr) {
7232 if (key.objectid + key.offset >= bytenr + num_bytes) {
7233 num_bytes = 0;
7234 break;
7236 num_bytes = (bytenr + num_bytes) -
7237 (key.objectid + key.offset);
7238 bytenr = key.objectid + key.offset;
7239 } else {
7240 if (key.objectid + key.offset < bytenr + num_bytes) {
7241 u64 new_start = key.objectid + key.offset;
7242 u64 new_bytes = bytenr + num_bytes - new_start;
7245 * Weird case, the extent is in the middle of
7246 * our range, we'll have to search one side
7247 * and then the other. Not sure if this happens
7248 * in real life, but no harm in coding it up
7249 * anyway just in case.
7251 btrfs_release_path(&path);
7252 ret = check_extent_exists(root, new_start,
7253 new_bytes);
7254 if (ret) {
7255 fprintf(stderr, "Right section didn't "
7256 "have a record\n");
7257 break;
7259 num_bytes = key.objectid - bytenr;
7260 goto again;
7262 num_bytes = key.objectid - bytenr;
7264 path.slots[0]++;
7266 ret = 0;
7268 out:
7269 if (num_bytes && !ret) {
7270 fprintf(stderr, "There are no extents for csum range "
7271 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7272 ret = 1;
7275 btrfs_release_path(&path);
7276 return ret;
7279 static int check_csums(struct btrfs_root *root)
7281 struct btrfs_path path;
7282 struct extent_buffer *leaf;
7283 struct btrfs_key key;
7284 u64 offset = 0, num_bytes = 0;
7285 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7286 int errors = 0;
7287 int ret;
7288 u64 data_len;
7289 unsigned long leaf_offset;
7291 root = root->fs_info->csum_root;
7292 if (!extent_buffer_uptodate(root->node)) {
7293 fprintf(stderr, "No valid csum tree found\n");
7294 return -ENOENT;
7297 btrfs_init_path(&path);
7298 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7299 key.type = BTRFS_EXTENT_CSUM_KEY;
7300 key.offset = 0;
7301 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7302 if (ret < 0) {
7303 fprintf(stderr, "Error searching csum tree %d\n", ret);
7304 btrfs_release_path(&path);
7305 return ret;
7308 if (ret > 0 && path.slots[0])
7309 path.slots[0]--;
7310 ret = 0;
7312 while (1) {
7313 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7314 ret = btrfs_next_leaf(root, &path);
7315 if (ret < 0) {
7316 fprintf(stderr, "Error going to next leaf "
7317 "%d\n", ret);
7318 break;
7320 if (ret)
7321 break;
7323 leaf = path.nodes[0];
7325 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7326 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7327 path.slots[0]++;
7328 continue;
7331 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7332 csum_size) * root->sectorsize;
7333 if (!check_data_csum)
7334 goto skip_csum_check;
7335 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7336 ret = check_extent_csums(root, key.offset, data_len,
7337 leaf_offset, leaf);
7338 if (ret)
7339 break;
7340 skip_csum_check:
7341 if (!num_bytes) {
7342 offset = key.offset;
7343 } else if (key.offset != offset + num_bytes) {
7344 ret = check_extent_exists(root, offset, num_bytes);
7345 if (ret) {
7346 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7347 "there is no extent record\n",
7348 offset, offset+num_bytes);
7349 errors++;
7351 offset = key.offset;
7352 num_bytes = 0;
7354 num_bytes += data_len;
7355 path.slots[0]++;
7358 btrfs_release_path(&path);
7359 return errors;
7362 static int is_dropped_key(struct btrfs_key *key,
7363 struct btrfs_key *drop_key) {
7364 if (key->objectid < drop_key->objectid)
7365 return 1;
7366 else if (key->objectid == drop_key->objectid) {
7367 if (key->type < drop_key->type)
7368 return 1;
7369 else if (key->type == drop_key->type) {
7370 if (key->offset < drop_key->offset)
7371 return 1;
7374 return 0;
7378 * Here are the rules for FULL_BACKREF.
7380 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7381 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7382 * FULL_BACKREF set.
7383 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7384 * if it happened after the relocation occurred since we'll have dropped the
7385 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7386 * have no real way to know for sure.
7388 * We process the blocks one root at a time, and we start from the lowest root
7389 * objectid and go to the highest. So we can just lookup the owner backref for
7390 * the record and if we don't find it then we know it doesn't exist and we have
7391 * a FULL BACKREF.
7393 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7394 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7395 * be set or not and then we can check later once we've gathered all the refs.
7397 static int calc_extent_flag(struct cache_tree *extent_cache,
7398 struct extent_buffer *buf,
7399 struct root_item_record *ri,
7400 u64 *flags)
7402 struct extent_record *rec;
7403 struct cache_extent *cache;
7404 struct tree_backref *tback;
7405 u64 owner = 0;
7407 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7408 /* we have added this extent before */
7409 if (!cache)
7410 return -ENOENT;
7412 rec = container_of(cache, struct extent_record, cache);
7415 * Except file/reloc tree, we can not have
7416 * FULL BACKREF MODE
7418 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7419 goto normal;
7421 * root node
7423 if (buf->start == ri->bytenr)
7424 goto normal;
7426 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7427 goto full_backref;
7429 owner = btrfs_header_owner(buf);
7430 if (owner == ri->objectid)
7431 goto normal;
7433 tback = find_tree_backref(rec, 0, owner);
7434 if (!tback)
7435 goto full_backref;
7436 normal:
7437 *flags = 0;
7438 if (rec->flag_block_full_backref != FLAG_UNSET &&
7439 rec->flag_block_full_backref != 0)
7440 rec->bad_full_backref = 1;
7441 return 0;
7442 full_backref:
7443 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7444 if (rec->flag_block_full_backref != FLAG_UNSET &&
7445 rec->flag_block_full_backref != 1)
7446 rec->bad_full_backref = 1;
7447 return 0;
7450 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7452 fprintf(stderr, "Invalid key type(");
7453 print_key_type(stderr, 0, key_type);
7454 fprintf(stderr, ") found in root(");
7455 print_objectid(stderr, rootid, 0);
7456 fprintf(stderr, ")\n");
7460 * Check if the key is valid with its extent buffer.
7462 * This is a early check in case invalid key exists in a extent buffer
7463 * This is not comprehensive yet, but should prevent wrong key/item passed
7464 * further
7466 static int check_type_with_root(u64 rootid, u8 key_type)
7468 switch (key_type) {
7469 /* Only valid in chunk tree */
7470 case BTRFS_DEV_ITEM_KEY:
7471 case BTRFS_CHUNK_ITEM_KEY:
7472 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7473 goto err;
7474 break;
7475 /* valid in csum and log tree */
7476 case BTRFS_CSUM_TREE_OBJECTID:
7477 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7478 is_fstree(rootid)))
7479 goto err;
7480 break;
7481 case BTRFS_EXTENT_ITEM_KEY:
7482 case BTRFS_METADATA_ITEM_KEY:
7483 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7484 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7485 goto err;
7486 break;
7487 case BTRFS_ROOT_ITEM_KEY:
7488 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7489 goto err;
7490 break;
7491 case BTRFS_DEV_EXTENT_KEY:
7492 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7493 goto err;
7494 break;
7496 return 0;
7497 err:
7498 report_mismatch_key_root(key_type, rootid);
7499 return -EINVAL;
7502 static int run_next_block(struct btrfs_root *root,
7503 struct block_info *bits,
7504 int bits_nr,
7505 u64 *last,
7506 struct cache_tree *pending,
7507 struct cache_tree *seen,
7508 struct cache_tree *reada,
7509 struct cache_tree *nodes,
7510 struct cache_tree *extent_cache,
7511 struct cache_tree *chunk_cache,
7512 struct rb_root *dev_cache,
7513 struct block_group_tree *block_group_cache,
7514 struct device_extent_tree *dev_extent_cache,
7515 struct root_item_record *ri)
7517 struct extent_buffer *buf;
7518 struct extent_record *rec = NULL;
7519 u64 bytenr;
7520 u32 size;
7521 u64 parent;
7522 u64 owner;
7523 u64 flags;
7524 u64 ptr;
7525 u64 gen = 0;
7526 int ret = 0;
7527 int i;
7528 int nritems;
7529 struct btrfs_key key;
7530 struct cache_extent *cache;
7531 int reada_bits;
7533 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7534 bits_nr, &reada_bits);
7535 if (nritems == 0)
7536 return 1;
7538 if (!reada_bits) {
7539 for(i = 0; i < nritems; i++) {
7540 ret = add_cache_extent(reada, bits[i].start,
7541 bits[i].size);
7542 if (ret == -EEXIST)
7543 continue;
7545 /* fixme, get the parent transid */
7546 readahead_tree_block(root, bits[i].start,
7547 bits[i].size, 0);
7550 *last = bits[0].start;
7551 bytenr = bits[0].start;
7552 size = bits[0].size;
7554 cache = lookup_cache_extent(pending, bytenr, size);
7555 if (cache) {
7556 remove_cache_extent(pending, cache);
7557 free(cache);
7559 cache = lookup_cache_extent(reada, bytenr, size);
7560 if (cache) {
7561 remove_cache_extent(reada, cache);
7562 free(cache);
7564 cache = lookup_cache_extent(nodes, bytenr, size);
7565 if (cache) {
7566 remove_cache_extent(nodes, cache);
7567 free(cache);
7569 cache = lookup_cache_extent(extent_cache, bytenr, size);
7570 if (cache) {
7571 rec = container_of(cache, struct extent_record, cache);
7572 gen = rec->parent_generation;
7575 /* fixme, get the real parent transid */
7576 buf = read_tree_block(root, bytenr, size, gen);
7577 if (!extent_buffer_uptodate(buf)) {
7578 record_bad_block_io(root->fs_info,
7579 extent_cache, bytenr, size);
7580 goto out;
7583 nritems = btrfs_header_nritems(buf);
7585 flags = 0;
7586 if (!init_extent_tree) {
7587 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7588 btrfs_header_level(buf), 1, NULL,
7589 &flags);
7590 if (ret < 0) {
7591 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7592 if (ret < 0) {
7593 fprintf(stderr, "Couldn't calc extent flags\n");
7594 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7597 } else {
7598 flags = 0;
7599 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7600 if (ret < 0) {
7601 fprintf(stderr, "Couldn't calc extent flags\n");
7602 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7606 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7607 if (ri != NULL &&
7608 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7609 ri->objectid == btrfs_header_owner(buf)) {
7611 * Ok we got to this block from it's original owner and
7612 * we have FULL_BACKREF set. Relocation can leave
7613 * converted blocks over so this is altogether possible,
7614 * however it's not possible if the generation > the
7615 * last snapshot, so check for this case.
7617 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7618 btrfs_header_generation(buf) > ri->last_snapshot) {
7619 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7620 rec->bad_full_backref = 1;
7623 } else {
7624 if (ri != NULL &&
7625 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7626 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7627 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7628 rec->bad_full_backref = 1;
7632 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7633 rec->flag_block_full_backref = 1;
7634 parent = bytenr;
7635 owner = 0;
7636 } else {
7637 rec->flag_block_full_backref = 0;
7638 parent = 0;
7639 owner = btrfs_header_owner(buf);
7642 ret = check_block(root, extent_cache, buf, flags);
7643 if (ret)
7644 goto out;
7646 if (btrfs_is_leaf(buf)) {
7647 btree_space_waste += btrfs_leaf_free_space(root, buf);
7648 for (i = 0; i < nritems; i++) {
7649 struct btrfs_file_extent_item *fi;
7650 btrfs_item_key_to_cpu(buf, &key, i);
7652 * Check key type against the leaf owner.
7653 * Could filter quite a lot of early error if
7654 * owner is correct
7656 if (check_type_with_root(btrfs_header_owner(buf),
7657 key.type)) {
7658 fprintf(stderr, "ignoring invalid key\n");
7659 continue;
7661 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7662 process_extent_item(root, extent_cache, buf,
7664 continue;
7666 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7667 process_extent_item(root, extent_cache, buf,
7669 continue;
7671 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7672 total_csum_bytes +=
7673 btrfs_item_size_nr(buf, i);
7674 continue;
7676 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7677 process_chunk_item(chunk_cache, &key, buf, i);
7678 continue;
7680 if (key.type == BTRFS_DEV_ITEM_KEY) {
7681 process_device_item(dev_cache, &key, buf, i);
7682 continue;
7684 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7685 process_block_group_item(block_group_cache,
7686 &key, buf, i);
7687 continue;
7689 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7690 process_device_extent_item(dev_extent_cache,
7691 &key, buf, i);
7692 continue;
7695 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7696 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7697 process_extent_ref_v0(extent_cache, buf, i);
7698 #else
7699 BUG();
7700 #endif
7701 continue;
7704 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7705 ret = add_tree_backref(extent_cache,
7706 key.objectid, 0, key.offset, 0);
7707 if (ret < 0)
7708 error("add_tree_backref failed: %s",
7709 strerror(-ret));
7710 continue;
7712 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7713 ret = add_tree_backref(extent_cache,
7714 key.objectid, key.offset, 0, 0);
7715 if (ret < 0)
7716 error("add_tree_backref failed: %s",
7717 strerror(-ret));
7718 continue;
7720 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7721 struct btrfs_extent_data_ref *ref;
7722 ref = btrfs_item_ptr(buf, i,
7723 struct btrfs_extent_data_ref);
7724 add_data_backref(extent_cache,
7725 key.objectid, 0,
7726 btrfs_extent_data_ref_root(buf, ref),
7727 btrfs_extent_data_ref_objectid(buf,
7728 ref),
7729 btrfs_extent_data_ref_offset(buf, ref),
7730 btrfs_extent_data_ref_count(buf, ref),
7731 0, root->sectorsize);
7732 continue;
7734 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7735 struct btrfs_shared_data_ref *ref;
7736 ref = btrfs_item_ptr(buf, i,
7737 struct btrfs_shared_data_ref);
7738 add_data_backref(extent_cache,
7739 key.objectid, key.offset, 0, 0, 0,
7740 btrfs_shared_data_ref_count(buf, ref),
7741 0, root->sectorsize);
7742 continue;
7744 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7745 struct bad_item *bad;
7747 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7748 continue;
7749 if (!owner)
7750 continue;
7751 bad = malloc(sizeof(struct bad_item));
7752 if (!bad)
7753 continue;
7754 INIT_LIST_HEAD(&bad->list);
7755 memcpy(&bad->key, &key,
7756 sizeof(struct btrfs_key));
7757 bad->root_id = owner;
7758 list_add_tail(&bad->list, &delete_items);
7759 continue;
7761 if (key.type != BTRFS_EXTENT_DATA_KEY)
7762 continue;
7763 fi = btrfs_item_ptr(buf, i,
7764 struct btrfs_file_extent_item);
7765 if (btrfs_file_extent_type(buf, fi) ==
7766 BTRFS_FILE_EXTENT_INLINE)
7767 continue;
7768 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7769 continue;
7771 data_bytes_allocated +=
7772 btrfs_file_extent_disk_num_bytes(buf, fi);
7773 if (data_bytes_allocated < root->sectorsize) {
7774 abort();
7776 data_bytes_referenced +=
7777 btrfs_file_extent_num_bytes(buf, fi);
7778 add_data_backref(extent_cache,
7779 btrfs_file_extent_disk_bytenr(buf, fi),
7780 parent, owner, key.objectid, key.offset -
7781 btrfs_file_extent_offset(buf, fi), 1, 1,
7782 btrfs_file_extent_disk_num_bytes(buf, fi));
7784 } else {
7785 int level;
7786 struct btrfs_key first_key;
7788 first_key.objectid = 0;
7790 if (nritems > 0)
7791 btrfs_item_key_to_cpu(buf, &first_key, 0);
7792 level = btrfs_header_level(buf);
7793 for (i = 0; i < nritems; i++) {
7794 struct extent_record tmpl;
7796 ptr = btrfs_node_blockptr(buf, i);
7797 size = root->nodesize;
7798 btrfs_node_key_to_cpu(buf, &key, i);
7799 if (ri != NULL) {
7800 if ((level == ri->drop_level)
7801 && is_dropped_key(&key, &ri->drop_key)) {
7802 continue;
7806 memset(&tmpl, 0, sizeof(tmpl));
7807 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7808 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7809 tmpl.start = ptr;
7810 tmpl.nr = size;
7811 tmpl.refs = 1;
7812 tmpl.metadata = 1;
7813 tmpl.max_size = size;
7814 ret = add_extent_rec(extent_cache, &tmpl);
7815 if (ret < 0)
7816 goto out;
7818 ret = add_tree_backref(extent_cache, ptr, parent,
7819 owner, 1);
7820 if (ret < 0) {
7821 error("add_tree_backref failed: %s",
7822 strerror(-ret));
7823 continue;
7826 if (level > 1) {
7827 add_pending(nodes, seen, ptr, size);
7828 } else {
7829 add_pending(pending, seen, ptr, size);
7832 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7833 nritems) * sizeof(struct btrfs_key_ptr);
7835 total_btree_bytes += buf->len;
7836 if (fs_root_objectid(btrfs_header_owner(buf)))
7837 total_fs_tree_bytes += buf->len;
7838 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7839 total_extent_tree_bytes += buf->len;
7840 if (!found_old_backref &&
7841 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7842 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7843 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7844 found_old_backref = 1;
7845 out:
7846 free_extent_buffer(buf);
7847 return ret;
7850 static int add_root_to_pending(struct extent_buffer *buf,
7851 struct cache_tree *extent_cache,
7852 struct cache_tree *pending,
7853 struct cache_tree *seen,
7854 struct cache_tree *nodes,
7855 u64 objectid)
7857 struct extent_record tmpl;
7858 int ret;
7860 if (btrfs_header_level(buf) > 0)
7861 add_pending(nodes, seen, buf->start, buf->len);
7862 else
7863 add_pending(pending, seen, buf->start, buf->len);
7865 memset(&tmpl, 0, sizeof(tmpl));
7866 tmpl.start = buf->start;
7867 tmpl.nr = buf->len;
7868 tmpl.is_root = 1;
7869 tmpl.refs = 1;
7870 tmpl.metadata = 1;
7871 tmpl.max_size = buf->len;
7872 add_extent_rec(extent_cache, &tmpl);
7874 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7875 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7876 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7877 0, 1);
7878 else
7879 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7881 return ret;
7884 /* as we fix the tree, we might be deleting blocks that
7885 * we're tracking for repair. This hook makes sure we
7886 * remove any backrefs for blocks as we are fixing them.
7888 static int free_extent_hook(struct btrfs_trans_handle *trans,
7889 struct btrfs_root *root,
7890 u64 bytenr, u64 num_bytes, u64 parent,
7891 u64 root_objectid, u64 owner, u64 offset,
7892 int refs_to_drop)
7894 struct extent_record *rec;
7895 struct cache_extent *cache;
7896 int is_data;
7897 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7899 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7900 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7901 if (!cache)
7902 return 0;
7904 rec = container_of(cache, struct extent_record, cache);
7905 if (is_data) {
7906 struct data_backref *back;
7907 back = find_data_backref(rec, parent, root_objectid, owner,
7908 offset, 1, bytenr, num_bytes);
7909 if (!back)
7910 goto out;
7911 if (back->node.found_ref) {
7912 back->found_ref -= refs_to_drop;
7913 if (rec->refs)
7914 rec->refs -= refs_to_drop;
7916 if (back->node.found_extent_tree) {
7917 back->num_refs -= refs_to_drop;
7918 if (rec->extent_item_refs)
7919 rec->extent_item_refs -= refs_to_drop;
7921 if (back->found_ref == 0)
7922 back->node.found_ref = 0;
7923 if (back->num_refs == 0)
7924 back->node.found_extent_tree = 0;
7926 if (!back->node.found_extent_tree && back->node.found_ref) {
7927 list_del(&back->node.list);
7928 free(back);
7930 } else {
7931 struct tree_backref *back;
7932 back = find_tree_backref(rec, parent, root_objectid);
7933 if (!back)
7934 goto out;
7935 if (back->node.found_ref) {
7936 if (rec->refs)
7937 rec->refs--;
7938 back->node.found_ref = 0;
7940 if (back->node.found_extent_tree) {
7941 if (rec->extent_item_refs)
7942 rec->extent_item_refs--;
7943 back->node.found_extent_tree = 0;
7945 if (!back->node.found_extent_tree && back->node.found_ref) {
7946 list_del(&back->node.list);
7947 free(back);
7950 maybe_free_extent_rec(extent_cache, rec);
7951 out:
7952 return 0;
7955 static int delete_extent_records(struct btrfs_trans_handle *trans,
7956 struct btrfs_root *root,
7957 struct btrfs_path *path,
7958 u64 bytenr)
7960 struct btrfs_key key;
7961 struct btrfs_key found_key;
7962 struct extent_buffer *leaf;
7963 int ret;
7964 int slot;
7967 key.objectid = bytenr;
7968 key.type = (u8)-1;
7969 key.offset = (u64)-1;
7971 while(1) {
7972 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7973 &key, path, 0, 1);
7974 if (ret < 0)
7975 break;
7977 if (ret > 0) {
7978 ret = 0;
7979 if (path->slots[0] == 0)
7980 break;
7981 path->slots[0]--;
7983 ret = 0;
7985 leaf = path->nodes[0];
7986 slot = path->slots[0];
7988 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7989 if (found_key.objectid != bytenr)
7990 break;
7992 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7993 found_key.type != BTRFS_METADATA_ITEM_KEY &&
7994 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7995 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7996 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7997 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7998 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7999 btrfs_release_path(path);
8000 if (found_key.type == 0) {
8001 if (found_key.offset == 0)
8002 break;
8003 key.offset = found_key.offset - 1;
8004 key.type = found_key.type;
8006 key.type = found_key.type - 1;
8007 key.offset = (u64)-1;
8008 continue;
8011 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8012 found_key.objectid, found_key.type, found_key.offset);
8014 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8015 if (ret)
8016 break;
8017 btrfs_release_path(path);
8019 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8020 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8021 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8022 found_key.offset : root->nodesize;
8024 ret = btrfs_update_block_group(trans, root, bytenr,
8025 bytes, 0, 0);
8026 if (ret)
8027 break;
8031 btrfs_release_path(path);
8032 return ret;
8036 * for a single backref, this will allocate a new extent
8037 * and add the backref to it.
8039 static int record_extent(struct btrfs_trans_handle *trans,
8040 struct btrfs_fs_info *info,
8041 struct btrfs_path *path,
8042 struct extent_record *rec,
8043 struct extent_backref *back,
8044 int allocated, u64 flags)
8046 int ret = 0;
8047 struct btrfs_root *extent_root = info->extent_root;
8048 struct extent_buffer *leaf;
8049 struct btrfs_key ins_key;
8050 struct btrfs_extent_item *ei;
8051 struct data_backref *dback;
8052 struct btrfs_tree_block_info *bi;
8054 if (!back->is_data)
8055 rec->max_size = max_t(u64, rec->max_size,
8056 info->extent_root->nodesize);
8058 if (!allocated) {
8059 u32 item_size = sizeof(*ei);
8061 if (!back->is_data)
8062 item_size += sizeof(*bi);
8064 ins_key.objectid = rec->start;
8065 ins_key.offset = rec->max_size;
8066 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8068 ret = btrfs_insert_empty_item(trans, extent_root, path,
8069 &ins_key, item_size);
8070 if (ret)
8071 goto fail;
8073 leaf = path->nodes[0];
8074 ei = btrfs_item_ptr(leaf, path->slots[0],
8075 struct btrfs_extent_item);
8077 btrfs_set_extent_refs(leaf, ei, 0);
8078 btrfs_set_extent_generation(leaf, ei, rec->generation);
8080 if (back->is_data) {
8081 btrfs_set_extent_flags(leaf, ei,
8082 BTRFS_EXTENT_FLAG_DATA);
8083 } else {
8084 struct btrfs_disk_key copy_key;;
8086 bi = (struct btrfs_tree_block_info *)(ei + 1);
8087 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8088 sizeof(*bi));
8090 btrfs_set_disk_key_objectid(&copy_key,
8091 rec->info_objectid);
8092 btrfs_set_disk_key_type(&copy_key, 0);
8093 btrfs_set_disk_key_offset(&copy_key, 0);
8095 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8096 btrfs_set_tree_block_key(leaf, bi, &copy_key);
8098 btrfs_set_extent_flags(leaf, ei,
8099 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8102 btrfs_mark_buffer_dirty(leaf);
8103 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8104 rec->max_size, 1, 0);
8105 if (ret)
8106 goto fail;
8107 btrfs_release_path(path);
8110 if (back->is_data) {
8111 u64 parent;
8112 int i;
8114 dback = to_data_backref(back);
8115 if (back->full_backref)
8116 parent = dback->parent;
8117 else
8118 parent = 0;
8120 for (i = 0; i < dback->found_ref; i++) {
8121 /* if parent != 0, we're doing a full backref
8122 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8123 * just makes the backref allocator create a data
8124 * backref
8126 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8127 rec->start, rec->max_size,
8128 parent,
8129 dback->root,
8130 parent ?
8131 BTRFS_FIRST_FREE_OBJECTID :
8132 dback->owner,
8133 dback->offset);
8134 if (ret)
8135 break;
8137 fprintf(stderr, "adding new data backref"
8138 " on %llu %s %llu owner %llu"
8139 " offset %llu found %d\n",
8140 (unsigned long long)rec->start,
8141 back->full_backref ?
8142 "parent" : "root",
8143 back->full_backref ?
8144 (unsigned long long)parent :
8145 (unsigned long long)dback->root,
8146 (unsigned long long)dback->owner,
8147 (unsigned long long)dback->offset,
8148 dback->found_ref);
8149 } else {
8150 u64 parent;
8151 struct tree_backref *tback;
8153 tback = to_tree_backref(back);
8154 if (back->full_backref)
8155 parent = tback->parent;
8156 else
8157 parent = 0;
8159 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8160 rec->start, rec->max_size,
8161 parent, tback->root, 0, 0);
8162 fprintf(stderr, "adding new tree backref on "
8163 "start %llu len %llu parent %llu root %llu\n",
8164 rec->start, rec->max_size, parent, tback->root);
8166 fail:
8167 btrfs_release_path(path);
8168 return ret;
8171 static struct extent_entry *find_entry(struct list_head *entries,
8172 u64 bytenr, u64 bytes)
8174 struct extent_entry *entry = NULL;
8176 list_for_each_entry(entry, entries, list) {
8177 if (entry->bytenr == bytenr && entry->bytes == bytes)
8178 return entry;
8181 return NULL;
8184 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8186 struct extent_entry *entry, *best = NULL, *prev = NULL;
8188 list_for_each_entry(entry, entries, list) {
8190 * If there are as many broken entries as entries then we know
8191 * not to trust this particular entry.
8193 if (entry->broken == entry->count)
8194 continue;
8197 * Special case, when there are only two entries and 'best' is
8198 * the first one
8200 if (!prev) {
8201 best = entry;
8202 prev = entry;
8203 continue;
8207 * If our current entry == best then we can't be sure our best
8208 * is really the best, so we need to keep searching.
8210 if (best && best->count == entry->count) {
8211 prev = entry;
8212 best = NULL;
8213 continue;
8216 /* Prev == entry, not good enough, have to keep searching */
8217 if (!prev->broken && prev->count == entry->count)
8218 continue;
8220 if (!best)
8221 best = (prev->count > entry->count) ? prev : entry;
8222 else if (best->count < entry->count)
8223 best = entry;
8224 prev = entry;
8227 return best;
8230 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8231 struct data_backref *dback, struct extent_entry *entry)
8233 struct btrfs_trans_handle *trans;
8234 struct btrfs_root *root;
8235 struct btrfs_file_extent_item *fi;
8236 struct extent_buffer *leaf;
8237 struct btrfs_key key;
8238 u64 bytenr, bytes;
8239 int ret, err;
8241 key.objectid = dback->root;
8242 key.type = BTRFS_ROOT_ITEM_KEY;
8243 key.offset = (u64)-1;
8244 root = btrfs_read_fs_root(info, &key);
8245 if (IS_ERR(root)) {
8246 fprintf(stderr, "Couldn't find root for our ref\n");
8247 return -EINVAL;
8251 * The backref points to the original offset of the extent if it was
8252 * split, so we need to search down to the offset we have and then walk
8253 * forward until we find the backref we're looking for.
8255 key.objectid = dback->owner;
8256 key.type = BTRFS_EXTENT_DATA_KEY;
8257 key.offset = dback->offset;
8258 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8259 if (ret < 0) {
8260 fprintf(stderr, "Error looking up ref %d\n", ret);
8261 return ret;
8264 while (1) {
8265 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8266 ret = btrfs_next_leaf(root, path);
8267 if (ret) {
8268 fprintf(stderr, "Couldn't find our ref, next\n");
8269 return -EINVAL;
8272 leaf = path->nodes[0];
8273 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8274 if (key.objectid != dback->owner ||
8275 key.type != BTRFS_EXTENT_DATA_KEY) {
8276 fprintf(stderr, "Couldn't find our ref, search\n");
8277 return -EINVAL;
8279 fi = btrfs_item_ptr(leaf, path->slots[0],
8280 struct btrfs_file_extent_item);
8281 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8282 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8284 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8285 break;
8286 path->slots[0]++;
8289 btrfs_release_path(path);
8291 trans = btrfs_start_transaction(root, 1);
8292 if (IS_ERR(trans))
8293 return PTR_ERR(trans);
8296 * Ok we have the key of the file extent we want to fix, now we can cow
8297 * down to the thing and fix it.
8299 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8300 if (ret < 0) {
8301 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8302 key.objectid, key.type, key.offset, ret);
8303 goto out;
8305 if (ret > 0) {
8306 fprintf(stderr, "Well that's odd, we just found this key "
8307 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8308 key.offset);
8309 ret = -EINVAL;
8310 goto out;
8312 leaf = path->nodes[0];
8313 fi = btrfs_item_ptr(leaf, path->slots[0],
8314 struct btrfs_file_extent_item);
8316 if (btrfs_file_extent_compression(leaf, fi) &&
8317 dback->disk_bytenr != entry->bytenr) {
8318 fprintf(stderr, "Ref doesn't match the record start and is "
8319 "compressed, please take a btrfs-image of this file "
8320 "system and send it to a btrfs developer so they can "
8321 "complete this functionality for bytenr %Lu\n",
8322 dback->disk_bytenr);
8323 ret = -EINVAL;
8324 goto out;
8327 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8328 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8329 } else if (dback->disk_bytenr > entry->bytenr) {
8330 u64 off_diff, offset;
8332 off_diff = dback->disk_bytenr - entry->bytenr;
8333 offset = btrfs_file_extent_offset(leaf, fi);
8334 if (dback->disk_bytenr + offset +
8335 btrfs_file_extent_num_bytes(leaf, fi) >
8336 entry->bytenr + entry->bytes) {
8337 fprintf(stderr, "Ref is past the entry end, please "
8338 "take a btrfs-image of this file system and "
8339 "send it to a btrfs developer, ref %Lu\n",
8340 dback->disk_bytenr);
8341 ret = -EINVAL;
8342 goto out;
8344 offset += off_diff;
8345 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8346 btrfs_set_file_extent_offset(leaf, fi, offset);
8347 } else if (dback->disk_bytenr < entry->bytenr) {
8348 u64 offset;
8350 offset = btrfs_file_extent_offset(leaf, fi);
8351 if (dback->disk_bytenr + offset < entry->bytenr) {
8352 fprintf(stderr, "Ref is before the entry start, please"
8353 " take a btrfs-image of this file system and "
8354 "send it to a btrfs developer, ref %Lu\n",
8355 dback->disk_bytenr);
8356 ret = -EINVAL;
8357 goto out;
8360 offset += dback->disk_bytenr;
8361 offset -= entry->bytenr;
8362 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8363 btrfs_set_file_extent_offset(leaf, fi, offset);
8366 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8369 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8370 * only do this if we aren't using compression, otherwise it's a
8371 * trickier case.
8373 if (!btrfs_file_extent_compression(leaf, fi))
8374 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8375 else
8376 printf("ram bytes may be wrong?\n");
8377 btrfs_mark_buffer_dirty(leaf);
8378 out:
8379 err = btrfs_commit_transaction(trans, root);
8380 btrfs_release_path(path);
8381 return ret ? ret : err;
8384 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8385 struct extent_record *rec)
8387 struct extent_backref *back;
8388 struct data_backref *dback;
8389 struct extent_entry *entry, *best = NULL;
8390 LIST_HEAD(entries);
8391 int nr_entries = 0;
8392 int broken_entries = 0;
8393 int ret = 0;
8394 short mismatch = 0;
8397 * Metadata is easy and the backrefs should always agree on bytenr and
8398 * size, if not we've got bigger issues.
8400 if (rec->metadata)
8401 return 0;
8403 list_for_each_entry(back, &rec->backrefs, list) {
8404 if (back->full_backref || !back->is_data)
8405 continue;
8407 dback = to_data_backref(back);
8410 * We only pay attention to backrefs that we found a real
8411 * backref for.
8413 if (dback->found_ref == 0)
8414 continue;
8417 * For now we only catch when the bytes don't match, not the
8418 * bytenr. We can easily do this at the same time, but I want
8419 * to have a fs image to test on before we just add repair
8420 * functionality willy-nilly so we know we won't screw up the
8421 * repair.
8424 entry = find_entry(&entries, dback->disk_bytenr,
8425 dback->bytes);
8426 if (!entry) {
8427 entry = malloc(sizeof(struct extent_entry));
8428 if (!entry) {
8429 ret = -ENOMEM;
8430 goto out;
8432 memset(entry, 0, sizeof(*entry));
8433 entry->bytenr = dback->disk_bytenr;
8434 entry->bytes = dback->bytes;
8435 list_add_tail(&entry->list, &entries);
8436 nr_entries++;
8440 * If we only have on entry we may think the entries agree when
8441 * in reality they don't so we have to do some extra checking.
8443 if (dback->disk_bytenr != rec->start ||
8444 dback->bytes != rec->nr || back->broken)
8445 mismatch = 1;
8447 if (back->broken) {
8448 entry->broken++;
8449 broken_entries++;
8452 entry->count++;
8455 /* Yay all the backrefs agree, carry on good sir */
8456 if (nr_entries <= 1 && !mismatch)
8457 goto out;
8459 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8460 "%Lu\n", rec->start);
8463 * First we want to see if the backrefs can agree amongst themselves who
8464 * is right, so figure out which one of the entries has the highest
8465 * count.
8467 best = find_most_right_entry(&entries);
8470 * Ok so we may have an even split between what the backrefs think, so
8471 * this is where we use the extent ref to see what it thinks.
8473 if (!best) {
8474 entry = find_entry(&entries, rec->start, rec->nr);
8475 if (!entry && (!broken_entries || !rec->found_rec)) {
8476 fprintf(stderr, "Backrefs don't agree with each other "
8477 "and extent record doesn't agree with anybody,"
8478 " so we can't fix bytenr %Lu bytes %Lu\n",
8479 rec->start, rec->nr);
8480 ret = -EINVAL;
8481 goto out;
8482 } else if (!entry) {
8484 * Ok our backrefs were broken, we'll assume this is the
8485 * correct value and add an entry for this range.
8487 entry = malloc(sizeof(struct extent_entry));
8488 if (!entry) {
8489 ret = -ENOMEM;
8490 goto out;
8492 memset(entry, 0, sizeof(*entry));
8493 entry->bytenr = rec->start;
8494 entry->bytes = rec->nr;
8495 list_add_tail(&entry->list, &entries);
8496 nr_entries++;
8498 entry->count++;
8499 best = find_most_right_entry(&entries);
8500 if (!best) {
8501 fprintf(stderr, "Backrefs and extent record evenly "
8502 "split on who is right, this is going to "
8503 "require user input to fix bytenr %Lu bytes "
8504 "%Lu\n", rec->start, rec->nr);
8505 ret = -EINVAL;
8506 goto out;
8511 * I don't think this can happen currently as we'll abort() if we catch
8512 * this case higher up, but in case somebody removes that we still can't
8513 * deal with it properly here yet, so just bail out of that's the case.
8515 if (best->bytenr != rec->start) {
8516 fprintf(stderr, "Extent start and backref starts don't match, "
8517 "please use btrfs-image on this file system and send "
8518 "it to a btrfs developer so they can make fsck fix "
8519 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8520 rec->start, rec->nr);
8521 ret = -EINVAL;
8522 goto out;
8526 * Ok great we all agreed on an extent record, let's go find the real
8527 * references and fix up the ones that don't match.
8529 list_for_each_entry(back, &rec->backrefs, list) {
8530 if (back->full_backref || !back->is_data)
8531 continue;
8533 dback = to_data_backref(back);
8536 * Still ignoring backrefs that don't have a real ref attached
8537 * to them.
8539 if (dback->found_ref == 0)
8540 continue;
8542 if (dback->bytes == best->bytes &&
8543 dback->disk_bytenr == best->bytenr)
8544 continue;
8546 ret = repair_ref(info, path, dback, best);
8547 if (ret)
8548 goto out;
8552 * Ok we messed with the actual refs, which means we need to drop our
8553 * entire cache and go back and rescan. I know this is a huge pain and
8554 * adds a lot of extra work, but it's the only way to be safe. Once all
8555 * the backrefs agree we may not need to do anything to the extent
8556 * record itself.
8558 ret = -EAGAIN;
8559 out:
8560 while (!list_empty(&entries)) {
8561 entry = list_entry(entries.next, struct extent_entry, list);
8562 list_del_init(&entry->list);
8563 free(entry);
8565 return ret;
8568 static int process_duplicates(struct cache_tree *extent_cache,
8569 struct extent_record *rec)
8571 struct extent_record *good, *tmp;
8572 struct cache_extent *cache;
8573 int ret;
8576 * If we found a extent record for this extent then return, or if we
8577 * have more than one duplicate we are likely going to need to delete
8578 * something.
8580 if (rec->found_rec || rec->num_duplicates > 1)
8581 return 0;
8583 /* Shouldn't happen but just in case */
8584 BUG_ON(!rec->num_duplicates);
8587 * So this happens if we end up with a backref that doesn't match the
8588 * actual extent entry. So either the backref is bad or the extent
8589 * entry is bad. Either way we want to have the extent_record actually
8590 * reflect what we found in the extent_tree, so we need to take the
8591 * duplicate out and use that as the extent_record since the only way we
8592 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8594 remove_cache_extent(extent_cache, &rec->cache);
8596 good = to_extent_record(rec->dups.next);
8597 list_del_init(&good->list);
8598 INIT_LIST_HEAD(&good->backrefs);
8599 INIT_LIST_HEAD(&good->dups);
8600 good->cache.start = good->start;
8601 good->cache.size = good->nr;
8602 good->content_checked = 0;
8603 good->owner_ref_checked = 0;
8604 good->num_duplicates = 0;
8605 good->refs = rec->refs;
8606 list_splice_init(&rec->backrefs, &good->backrefs);
8607 while (1) {
8608 cache = lookup_cache_extent(extent_cache, good->start,
8609 good->nr);
8610 if (!cache)
8611 break;
8612 tmp = container_of(cache, struct extent_record, cache);
8615 * If we find another overlapping extent and it's found_rec is
8616 * set then it's a duplicate and we need to try and delete
8617 * something.
8619 if (tmp->found_rec || tmp->num_duplicates > 0) {
8620 if (list_empty(&good->list))
8621 list_add_tail(&good->list,
8622 &duplicate_extents);
8623 good->num_duplicates += tmp->num_duplicates + 1;
8624 list_splice_init(&tmp->dups, &good->dups);
8625 list_del_init(&tmp->list);
8626 list_add_tail(&tmp->list, &good->dups);
8627 remove_cache_extent(extent_cache, &tmp->cache);
8628 continue;
8632 * Ok we have another non extent item backed extent rec, so lets
8633 * just add it to this extent and carry on like we did above.
8635 good->refs += tmp->refs;
8636 list_splice_init(&tmp->backrefs, &good->backrefs);
8637 remove_cache_extent(extent_cache, &tmp->cache);
8638 free(tmp);
8640 ret = insert_cache_extent(extent_cache, &good->cache);
8641 BUG_ON(ret);
8642 free(rec);
8643 return good->num_duplicates ? 0 : 1;
8646 static int delete_duplicate_records(struct btrfs_root *root,
8647 struct extent_record *rec)
8649 struct btrfs_trans_handle *trans;
8650 LIST_HEAD(delete_list);
8651 struct btrfs_path path;
8652 struct extent_record *tmp, *good, *n;
8653 int nr_del = 0;
8654 int ret = 0, err;
8655 struct btrfs_key key;
8657 btrfs_init_path(&path);
8659 good = rec;
8660 /* Find the record that covers all of the duplicates. */
8661 list_for_each_entry(tmp, &rec->dups, list) {
8662 if (good->start < tmp->start)
8663 continue;
8664 if (good->nr > tmp->nr)
8665 continue;
8667 if (tmp->start + tmp->nr < good->start + good->nr) {
8668 fprintf(stderr, "Ok we have overlapping extents that "
8669 "aren't completely covered by each other, this "
8670 "is going to require more careful thought. "
8671 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8672 tmp->start, tmp->nr, good->start, good->nr);
8673 abort();
8675 good = tmp;
8678 if (good != rec)
8679 list_add_tail(&rec->list, &delete_list);
8681 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8682 if (tmp == good)
8683 continue;
8684 list_move_tail(&tmp->list, &delete_list);
8687 root = root->fs_info->extent_root;
8688 trans = btrfs_start_transaction(root, 1);
8689 if (IS_ERR(trans)) {
8690 ret = PTR_ERR(trans);
8691 goto out;
8694 list_for_each_entry(tmp, &delete_list, list) {
8695 if (tmp->found_rec == 0)
8696 continue;
8697 key.objectid = tmp->start;
8698 key.type = BTRFS_EXTENT_ITEM_KEY;
8699 key.offset = tmp->nr;
8701 /* Shouldn't happen but just in case */
8702 if (tmp->metadata) {
8703 fprintf(stderr, "Well this shouldn't happen, extent "
8704 "record overlaps but is metadata? "
8705 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8706 abort();
8709 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8710 if (ret) {
8711 if (ret > 0)
8712 ret = -EINVAL;
8713 break;
8715 ret = btrfs_del_item(trans, root, &path);
8716 if (ret)
8717 break;
8718 btrfs_release_path(&path);
8719 nr_del++;
8721 err = btrfs_commit_transaction(trans, root);
8722 if (err && !ret)
8723 ret = err;
8724 out:
8725 while (!list_empty(&delete_list)) {
8726 tmp = to_extent_record(delete_list.next);
8727 list_del_init(&tmp->list);
8728 if (tmp == rec)
8729 continue;
8730 free(tmp);
8733 while (!list_empty(&rec->dups)) {
8734 tmp = to_extent_record(rec->dups.next);
8735 list_del_init(&tmp->list);
8736 free(tmp);
8739 btrfs_release_path(&path);
8741 if (!ret && !nr_del)
8742 rec->num_duplicates = 0;
8744 return ret ? ret : nr_del;
8747 static int find_possible_backrefs(struct btrfs_fs_info *info,
8748 struct btrfs_path *path,
8749 struct cache_tree *extent_cache,
8750 struct extent_record *rec)
8752 struct btrfs_root *root;
8753 struct extent_backref *back;
8754 struct data_backref *dback;
8755 struct cache_extent *cache;
8756 struct btrfs_file_extent_item *fi;
8757 struct btrfs_key key;
8758 u64 bytenr, bytes;
8759 int ret;
8761 list_for_each_entry(back, &rec->backrefs, list) {
8762 /* Don't care about full backrefs (poor unloved backrefs) */
8763 if (back->full_backref || !back->is_data)
8764 continue;
8766 dback = to_data_backref(back);
8768 /* We found this one, we don't need to do a lookup */
8769 if (dback->found_ref)
8770 continue;
8772 key.objectid = dback->root;
8773 key.type = BTRFS_ROOT_ITEM_KEY;
8774 key.offset = (u64)-1;
8776 root = btrfs_read_fs_root(info, &key);
8778 /* No root, definitely a bad ref, skip */
8779 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8780 continue;
8781 /* Other err, exit */
8782 if (IS_ERR(root))
8783 return PTR_ERR(root);
8785 key.objectid = dback->owner;
8786 key.type = BTRFS_EXTENT_DATA_KEY;
8787 key.offset = dback->offset;
8788 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8789 if (ret) {
8790 btrfs_release_path(path);
8791 if (ret < 0)
8792 return ret;
8793 /* Didn't find it, we can carry on */
8794 ret = 0;
8795 continue;
8798 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8799 struct btrfs_file_extent_item);
8800 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8801 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8802 btrfs_release_path(path);
8803 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8804 if (cache) {
8805 struct extent_record *tmp;
8806 tmp = container_of(cache, struct extent_record, cache);
8809 * If we found an extent record for the bytenr for this
8810 * particular backref then we can't add it to our
8811 * current extent record. We only want to add backrefs
8812 * that don't have a corresponding extent item in the
8813 * extent tree since they likely belong to this record
8814 * and we need to fix it if it doesn't match bytenrs.
8816 if (tmp->found_rec)
8817 continue;
8820 dback->found_ref += 1;
8821 dback->disk_bytenr = bytenr;
8822 dback->bytes = bytes;
8825 * Set this so the verify backref code knows not to trust the
8826 * values in this backref.
8828 back->broken = 1;
8831 return 0;
8835 * Record orphan data ref into corresponding root.
8837 * Return 0 if the extent item contains data ref and recorded.
8838 * Return 1 if the extent item contains no useful data ref
8839 * On that case, it may contains only shared_dataref or metadata backref
8840 * or the file extent exists(this should be handled by the extent bytenr
8841 * recovery routine)
8842 * Return <0 if something goes wrong.
8844 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8845 struct extent_record *rec)
8847 struct btrfs_key key;
8848 struct btrfs_root *dest_root;
8849 struct extent_backref *back;
8850 struct data_backref *dback;
8851 struct orphan_data_extent *orphan;
8852 struct btrfs_path path;
8853 int recorded_data_ref = 0;
8854 int ret = 0;
8856 if (rec->metadata)
8857 return 1;
8858 btrfs_init_path(&path);
8859 list_for_each_entry(back, &rec->backrefs, list) {
8860 if (back->full_backref || !back->is_data ||
8861 !back->found_extent_tree)
8862 continue;
8863 dback = to_data_backref(back);
8864 if (dback->found_ref)
8865 continue;
8866 key.objectid = dback->root;
8867 key.type = BTRFS_ROOT_ITEM_KEY;
8868 key.offset = (u64)-1;
8870 dest_root = btrfs_read_fs_root(fs_info, &key);
8872 /* For non-exist root we just skip it */
8873 if (IS_ERR(dest_root) || !dest_root)
8874 continue;
8876 key.objectid = dback->owner;
8877 key.type = BTRFS_EXTENT_DATA_KEY;
8878 key.offset = dback->offset;
8880 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8881 btrfs_release_path(&path);
8883 * For ret < 0, it's OK since the fs-tree may be corrupted,
8884 * we need to record it for inode/file extent rebuild.
8885 * For ret > 0, we record it only for file extent rebuild.
8886 * For ret == 0, the file extent exists but only bytenr
8887 * mismatch, let the original bytenr fix routine to handle,
8888 * don't record it.
8890 if (ret == 0)
8891 continue;
8892 ret = 0;
8893 orphan = malloc(sizeof(*orphan));
8894 if (!orphan) {
8895 ret = -ENOMEM;
8896 goto out;
8898 INIT_LIST_HEAD(&orphan->list);
8899 orphan->root = dback->root;
8900 orphan->objectid = dback->owner;
8901 orphan->offset = dback->offset;
8902 orphan->disk_bytenr = rec->cache.start;
8903 orphan->disk_len = rec->cache.size;
8904 list_add(&dest_root->orphan_data_extents, &orphan->list);
8905 recorded_data_ref = 1;
8907 out:
8908 btrfs_release_path(&path);
8909 if (!ret)
8910 return !recorded_data_ref;
8911 else
8912 return ret;
8916 * when an incorrect extent item is found, this will delete
8917 * all of the existing entries for it and recreate them
8918 * based on what the tree scan found.
8920 static int fixup_extent_refs(struct btrfs_fs_info *info,
8921 struct cache_tree *extent_cache,
8922 struct extent_record *rec)
8924 struct btrfs_trans_handle *trans = NULL;
8925 int ret;
8926 struct btrfs_path path;
8927 struct list_head *cur = rec->backrefs.next;
8928 struct cache_extent *cache;
8929 struct extent_backref *back;
8930 int allocated = 0;
8931 u64 flags = 0;
8933 if (rec->flag_block_full_backref)
8934 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8936 btrfs_init_path(&path);
8937 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8939 * Sometimes the backrefs themselves are so broken they don't
8940 * get attached to any meaningful rec, so first go back and
8941 * check any of our backrefs that we couldn't find and throw
8942 * them into the list if we find the backref so that
8943 * verify_backrefs can figure out what to do.
8945 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8946 if (ret < 0)
8947 goto out;
8950 /* step one, make sure all of the backrefs agree */
8951 ret = verify_backrefs(info, &path, rec);
8952 if (ret < 0)
8953 goto out;
8955 trans = btrfs_start_transaction(info->extent_root, 1);
8956 if (IS_ERR(trans)) {
8957 ret = PTR_ERR(trans);
8958 goto out;
8961 /* step two, delete all the existing records */
8962 ret = delete_extent_records(trans, info->extent_root, &path,
8963 rec->start);
8965 if (ret < 0)
8966 goto out;
8968 /* was this block corrupt? If so, don't add references to it */
8969 cache = lookup_cache_extent(info->corrupt_blocks,
8970 rec->start, rec->max_size);
8971 if (cache) {
8972 ret = 0;
8973 goto out;
8976 /* step three, recreate all the refs we did find */
8977 while(cur != &rec->backrefs) {
8978 back = to_extent_backref(cur);
8979 cur = cur->next;
8982 * if we didn't find any references, don't create a
8983 * new extent record
8985 if (!back->found_ref)
8986 continue;
8988 rec->bad_full_backref = 0;
8989 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8990 allocated = 1;
8992 if (ret)
8993 goto out;
8995 out:
8996 if (trans) {
8997 int err = btrfs_commit_transaction(trans, info->extent_root);
8998 if (!ret)
8999 ret = err;
9002 if (!ret)
9003 fprintf(stderr, "Repaired extent references for %llu\n",
9004 (unsigned long long)rec->start);
9006 btrfs_release_path(&path);
9007 return ret;
9010 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9011 struct extent_record *rec)
9013 struct btrfs_trans_handle *trans;
9014 struct btrfs_root *root = fs_info->extent_root;
9015 struct btrfs_path path;
9016 struct btrfs_extent_item *ei;
9017 struct btrfs_key key;
9018 u64 flags;
9019 int ret = 0;
9021 key.objectid = rec->start;
9022 if (rec->metadata) {
9023 key.type = BTRFS_METADATA_ITEM_KEY;
9024 key.offset = rec->info_level;
9025 } else {
9026 key.type = BTRFS_EXTENT_ITEM_KEY;
9027 key.offset = rec->max_size;
9030 trans = btrfs_start_transaction(root, 0);
9031 if (IS_ERR(trans))
9032 return PTR_ERR(trans);
9034 btrfs_init_path(&path);
9035 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9036 if (ret < 0) {
9037 btrfs_release_path(&path);
9038 btrfs_commit_transaction(trans, root);
9039 return ret;
9040 } else if (ret) {
9041 fprintf(stderr, "Didn't find extent for %llu\n",
9042 (unsigned long long)rec->start);
9043 btrfs_release_path(&path);
9044 btrfs_commit_transaction(trans, root);
9045 return -ENOENT;
9048 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9049 struct btrfs_extent_item);
9050 flags = btrfs_extent_flags(path.nodes[0], ei);
9051 if (rec->flag_block_full_backref) {
9052 fprintf(stderr, "setting full backref on %llu\n",
9053 (unsigned long long)key.objectid);
9054 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9055 } else {
9056 fprintf(stderr, "clearing full backref on %llu\n",
9057 (unsigned long long)key.objectid);
9058 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9060 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9061 btrfs_mark_buffer_dirty(path.nodes[0]);
9062 btrfs_release_path(&path);
9063 ret = btrfs_commit_transaction(trans, root);
9064 if (!ret)
9065 fprintf(stderr, "Repaired extent flags for %llu\n",
9066 (unsigned long long)rec->start);
9068 return ret;
9071 /* right now we only prune from the extent allocation tree */
9072 static int prune_one_block(struct btrfs_trans_handle *trans,
9073 struct btrfs_fs_info *info,
9074 struct btrfs_corrupt_block *corrupt)
9076 int ret;
9077 struct btrfs_path path;
9078 struct extent_buffer *eb;
9079 u64 found;
9080 int slot;
9081 int nritems;
9082 int level = corrupt->level + 1;
9084 btrfs_init_path(&path);
9085 again:
9086 /* we want to stop at the parent to our busted block */
9087 path.lowest_level = level;
9089 ret = btrfs_search_slot(trans, info->extent_root,
9090 &corrupt->key, &path, -1, 1);
9092 if (ret < 0)
9093 goto out;
9095 eb = path.nodes[level];
9096 if (!eb) {
9097 ret = -ENOENT;
9098 goto out;
9102 * hopefully the search gave us the block we want to prune,
9103 * lets try that first
9105 slot = path.slots[level];
9106 found = btrfs_node_blockptr(eb, slot);
9107 if (found == corrupt->cache.start)
9108 goto del_ptr;
9110 nritems = btrfs_header_nritems(eb);
9112 /* the search failed, lets scan this node and hope we find it */
9113 for (slot = 0; slot < nritems; slot++) {
9114 found = btrfs_node_blockptr(eb, slot);
9115 if (found == corrupt->cache.start)
9116 goto del_ptr;
9119 * we couldn't find the bad block. TODO, search all the nodes for pointers
9120 * to this block
9122 if (eb == info->extent_root->node) {
9123 ret = -ENOENT;
9124 goto out;
9125 } else {
9126 level++;
9127 btrfs_release_path(&path);
9128 goto again;
9131 del_ptr:
9132 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9133 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9135 out:
9136 btrfs_release_path(&path);
9137 return ret;
9140 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9142 struct btrfs_trans_handle *trans = NULL;
9143 struct cache_extent *cache;
9144 struct btrfs_corrupt_block *corrupt;
9146 while (1) {
9147 cache = search_cache_extent(info->corrupt_blocks, 0);
9148 if (!cache)
9149 break;
9150 if (!trans) {
9151 trans = btrfs_start_transaction(info->extent_root, 1);
9152 if (IS_ERR(trans))
9153 return PTR_ERR(trans);
9155 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9156 prune_one_block(trans, info, corrupt);
9157 remove_cache_extent(info->corrupt_blocks, cache);
9159 if (trans)
9160 return btrfs_commit_transaction(trans, info->extent_root);
9161 return 0;
9164 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9166 struct btrfs_block_group_cache *cache;
9167 u64 start, end;
9168 int ret;
9170 while (1) {
9171 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9172 &start, &end, EXTENT_DIRTY);
9173 if (ret)
9174 break;
9175 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9178 start = 0;
9179 while (1) {
9180 cache = btrfs_lookup_first_block_group(fs_info, start);
9181 if (!cache)
9182 break;
9183 if (cache->cached)
9184 cache->cached = 0;
9185 start = cache->key.objectid + cache->key.offset;
9189 static int check_extent_refs(struct btrfs_root *root,
9190 struct cache_tree *extent_cache)
9192 struct extent_record *rec;
9193 struct cache_extent *cache;
9194 int ret = 0;
9195 int had_dups = 0;
9197 if (repair) {
9199 * if we're doing a repair, we have to make sure
9200 * we don't allocate from the problem extents.
9201 * In the worst case, this will be all the
9202 * extents in the FS
9204 cache = search_cache_extent(extent_cache, 0);
9205 while(cache) {
9206 rec = container_of(cache, struct extent_record, cache);
9207 set_extent_dirty(root->fs_info->excluded_extents,
9208 rec->start,
9209 rec->start + rec->max_size - 1);
9210 cache = next_cache_extent(cache);
9213 /* pin down all the corrupted blocks too */
9214 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9215 while(cache) {
9216 set_extent_dirty(root->fs_info->excluded_extents,
9217 cache->start,
9218 cache->start + cache->size - 1);
9219 cache = next_cache_extent(cache);
9221 prune_corrupt_blocks(root->fs_info);
9222 reset_cached_block_groups(root->fs_info);
9225 reset_cached_block_groups(root->fs_info);
9228 * We need to delete any duplicate entries we find first otherwise we
9229 * could mess up the extent tree when we have backrefs that actually
9230 * belong to a different extent item and not the weird duplicate one.
9232 while (repair && !list_empty(&duplicate_extents)) {
9233 rec = to_extent_record(duplicate_extents.next);
9234 list_del_init(&rec->list);
9236 /* Sometimes we can find a backref before we find an actual
9237 * extent, so we need to process it a little bit to see if there
9238 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9239 * if this is a backref screwup. If we need to delete stuff
9240 * process_duplicates() will return 0, otherwise it will return
9241 * 1 and we
9243 if (process_duplicates(extent_cache, rec))
9244 continue;
9245 ret = delete_duplicate_records(root, rec);
9246 if (ret < 0)
9247 return ret;
9249 * delete_duplicate_records will return the number of entries
9250 * deleted, so if it's greater than 0 then we know we actually
9251 * did something and we need to remove.
9253 if (ret)
9254 had_dups = 1;
9257 if (had_dups)
9258 return -EAGAIN;
9260 while(1) {
9261 int cur_err = 0;
9262 int fix = 0;
9264 cache = search_cache_extent(extent_cache, 0);
9265 if (!cache)
9266 break;
9267 rec = container_of(cache, struct extent_record, cache);
9268 if (rec->num_duplicates) {
9269 fprintf(stderr, "extent item %llu has multiple extent "
9270 "items\n", (unsigned long long)rec->start);
9271 cur_err = 1;
9274 if (rec->refs != rec->extent_item_refs) {
9275 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9276 (unsigned long long)rec->start,
9277 (unsigned long long)rec->nr);
9278 fprintf(stderr, "extent item %llu, found %llu\n",
9279 (unsigned long long)rec->extent_item_refs,
9280 (unsigned long long)rec->refs);
9281 ret = record_orphan_data_extents(root->fs_info, rec);
9282 if (ret < 0)
9283 goto repair_abort;
9284 fix = ret;
9285 cur_err = 1;
9287 if (all_backpointers_checked(rec, 1)) {
9288 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9289 (unsigned long long)rec->start,
9290 (unsigned long long)rec->nr);
9291 fix = 1;
9292 cur_err = 1;
9294 if (!rec->owner_ref_checked) {
9295 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9296 (unsigned long long)rec->start,
9297 (unsigned long long)rec->nr);
9298 fix = 1;
9299 cur_err = 1;
9302 if (repair && fix) {
9303 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9304 if (ret)
9305 goto repair_abort;
9309 if (rec->bad_full_backref) {
9310 fprintf(stderr, "bad full backref, on [%llu]\n",
9311 (unsigned long long)rec->start);
9312 if (repair) {
9313 ret = fixup_extent_flags(root->fs_info, rec);
9314 if (ret)
9315 goto repair_abort;
9316 fix = 1;
9318 cur_err = 1;
9321 * Although it's not a extent ref's problem, we reuse this
9322 * routine for error reporting.
9323 * No repair function yet.
9325 if (rec->crossing_stripes) {
9326 fprintf(stderr,
9327 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9328 rec->start, rec->start + rec->max_size);
9329 cur_err = 1;
9332 if (rec->wrong_chunk_type) {
9333 fprintf(stderr,
9334 "bad extent [%llu, %llu), type mismatch with chunk\n",
9335 rec->start, rec->start + rec->max_size);
9336 cur_err = 1;
9339 remove_cache_extent(extent_cache, cache);
9340 free_all_extent_backrefs(rec);
9341 if (!init_extent_tree && repair && (!cur_err || fix))
9342 clear_extent_dirty(root->fs_info->excluded_extents,
9343 rec->start,
9344 rec->start + rec->max_size - 1);
9345 free(rec);
9347 repair_abort:
9348 if (repair) {
9349 if (ret && ret != -EAGAIN) {
9350 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9351 exit(1);
9352 } else if (!ret) {
9353 struct btrfs_trans_handle *trans;
9355 root = root->fs_info->extent_root;
9356 trans = btrfs_start_transaction(root, 1);
9357 if (IS_ERR(trans)) {
9358 ret = PTR_ERR(trans);
9359 goto repair_abort;
9362 btrfs_fix_block_accounting(trans, root);
9363 ret = btrfs_commit_transaction(trans, root);
9364 if (ret)
9365 goto repair_abort;
9367 return ret;
9369 return 0;
9372 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9374 u64 stripe_size;
9376 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9377 stripe_size = length;
9378 stripe_size /= num_stripes;
9379 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9380 stripe_size = length * 2;
9381 stripe_size /= num_stripes;
9382 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9383 stripe_size = length;
9384 stripe_size /= (num_stripes - 1);
9385 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9386 stripe_size = length;
9387 stripe_size /= (num_stripes - 2);
9388 } else {
9389 stripe_size = length;
9391 return stripe_size;
9395 * Check the chunk with its block group/dev list ref:
9396 * Return 0 if all refs seems valid.
9397 * Return 1 if part of refs seems valid, need later check for rebuild ref
9398 * like missing block group and needs to search extent tree to rebuild them.
9399 * Return -1 if essential refs are missing and unable to rebuild.
9401 static int check_chunk_refs(struct chunk_record *chunk_rec,
9402 struct block_group_tree *block_group_cache,
9403 struct device_extent_tree *dev_extent_cache,
9404 int silent)
9406 struct cache_extent *block_group_item;
9407 struct block_group_record *block_group_rec;
9408 struct cache_extent *dev_extent_item;
9409 struct device_extent_record *dev_extent_rec;
9410 u64 devid;
9411 u64 offset;
9412 u64 length;
9413 int metadump_v2 = 0;
9414 int i;
9415 int ret = 0;
9417 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9418 chunk_rec->offset,
9419 chunk_rec->length);
9420 if (block_group_item) {
9421 block_group_rec = container_of(block_group_item,
9422 struct block_group_record,
9423 cache);
9424 if (chunk_rec->length != block_group_rec->offset ||
9425 chunk_rec->offset != block_group_rec->objectid ||
9426 (!metadump_v2 &&
9427 chunk_rec->type_flags != block_group_rec->flags)) {
9428 if (!silent)
9429 fprintf(stderr,
9430 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9431 chunk_rec->objectid,
9432 chunk_rec->type,
9433 chunk_rec->offset,
9434 chunk_rec->length,
9435 chunk_rec->offset,
9436 chunk_rec->type_flags,
9437 block_group_rec->objectid,
9438 block_group_rec->type,
9439 block_group_rec->offset,
9440 block_group_rec->offset,
9441 block_group_rec->objectid,
9442 block_group_rec->flags);
9443 ret = -1;
9444 } else {
9445 list_del_init(&block_group_rec->list);
9446 chunk_rec->bg_rec = block_group_rec;
9448 } else {
9449 if (!silent)
9450 fprintf(stderr,
9451 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9452 chunk_rec->objectid,
9453 chunk_rec->type,
9454 chunk_rec->offset,
9455 chunk_rec->length,
9456 chunk_rec->offset,
9457 chunk_rec->type_flags);
9458 ret = 1;
9461 if (metadump_v2)
9462 return ret;
9464 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9465 chunk_rec->num_stripes);
9466 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9467 devid = chunk_rec->stripes[i].devid;
9468 offset = chunk_rec->stripes[i].offset;
9469 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9470 devid, offset, length);
9471 if (dev_extent_item) {
9472 dev_extent_rec = container_of(dev_extent_item,
9473 struct device_extent_record,
9474 cache);
9475 if (dev_extent_rec->objectid != devid ||
9476 dev_extent_rec->offset != offset ||
9477 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9478 dev_extent_rec->length != length) {
9479 if (!silent)
9480 fprintf(stderr,
9481 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9482 chunk_rec->objectid,
9483 chunk_rec->type,
9484 chunk_rec->offset,
9485 chunk_rec->stripes[i].devid,
9486 chunk_rec->stripes[i].offset,
9487 dev_extent_rec->objectid,
9488 dev_extent_rec->offset,
9489 dev_extent_rec->length);
9490 ret = -1;
9491 } else {
9492 list_move(&dev_extent_rec->chunk_list,
9493 &chunk_rec->dextents);
9495 } else {
9496 if (!silent)
9497 fprintf(stderr,
9498 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9499 chunk_rec->objectid,
9500 chunk_rec->type,
9501 chunk_rec->offset,
9502 chunk_rec->stripes[i].devid,
9503 chunk_rec->stripes[i].offset);
9504 ret = -1;
9507 return ret;
9510 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9511 int check_chunks(struct cache_tree *chunk_cache,
9512 struct block_group_tree *block_group_cache,
9513 struct device_extent_tree *dev_extent_cache,
9514 struct list_head *good, struct list_head *bad,
9515 struct list_head *rebuild, int silent)
9517 struct cache_extent *chunk_item;
9518 struct chunk_record *chunk_rec;
9519 struct block_group_record *bg_rec;
9520 struct device_extent_record *dext_rec;
9521 int err;
9522 int ret = 0;
9524 chunk_item = first_cache_extent(chunk_cache);
9525 while (chunk_item) {
9526 chunk_rec = container_of(chunk_item, struct chunk_record,
9527 cache);
9528 err = check_chunk_refs(chunk_rec, block_group_cache,
9529 dev_extent_cache, silent);
9530 if (err < 0)
9531 ret = err;
9532 if (err == 0 && good)
9533 list_add_tail(&chunk_rec->list, good);
9534 if (err > 0 && rebuild)
9535 list_add_tail(&chunk_rec->list, rebuild);
9536 if (err < 0 && bad)
9537 list_add_tail(&chunk_rec->list, bad);
9538 chunk_item = next_cache_extent(chunk_item);
9541 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9542 if (!silent)
9543 fprintf(stderr,
9544 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9545 bg_rec->objectid,
9546 bg_rec->offset,
9547 bg_rec->flags);
9548 if (!ret)
9549 ret = 1;
9552 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9553 chunk_list) {
9554 if (!silent)
9555 fprintf(stderr,
9556 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9557 dext_rec->objectid,
9558 dext_rec->offset,
9559 dext_rec->length);
9560 if (!ret)
9561 ret = 1;
9563 return ret;
9567 static int check_device_used(struct device_record *dev_rec,
9568 struct device_extent_tree *dext_cache)
9570 struct cache_extent *cache;
9571 struct device_extent_record *dev_extent_rec;
9572 u64 total_byte = 0;
9574 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9575 while (cache) {
9576 dev_extent_rec = container_of(cache,
9577 struct device_extent_record,
9578 cache);
9579 if (dev_extent_rec->objectid != dev_rec->devid)
9580 break;
9582 list_del_init(&dev_extent_rec->device_list);
9583 total_byte += dev_extent_rec->length;
9584 cache = next_cache_extent(cache);
9587 if (total_byte != dev_rec->byte_used) {
9588 fprintf(stderr,
9589 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9590 total_byte, dev_rec->byte_used, dev_rec->objectid,
9591 dev_rec->type, dev_rec->offset);
9592 return -1;
9593 } else {
9594 return 0;
9598 /* check btrfs_dev_item -> btrfs_dev_extent */
9599 static int check_devices(struct rb_root *dev_cache,
9600 struct device_extent_tree *dev_extent_cache)
9602 struct rb_node *dev_node;
9603 struct device_record *dev_rec;
9604 struct device_extent_record *dext_rec;
9605 int err;
9606 int ret = 0;
9608 dev_node = rb_first(dev_cache);
9609 while (dev_node) {
9610 dev_rec = container_of(dev_node, struct device_record, node);
9611 err = check_device_used(dev_rec, dev_extent_cache);
9612 if (err)
9613 ret = err;
9615 dev_node = rb_next(dev_node);
9617 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9618 device_list) {
9619 fprintf(stderr,
9620 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9621 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9622 if (!ret)
9623 ret = 1;
9625 return ret;
9628 static int add_root_item_to_list(struct list_head *head,
9629 u64 objectid, u64 bytenr, u64 last_snapshot,
9630 u8 level, u8 drop_level,
9631 int level_size, struct btrfs_key *drop_key)
9634 struct root_item_record *ri_rec;
9635 ri_rec = malloc(sizeof(*ri_rec));
9636 if (!ri_rec)
9637 return -ENOMEM;
9638 ri_rec->bytenr = bytenr;
9639 ri_rec->objectid = objectid;
9640 ri_rec->level = level;
9641 ri_rec->level_size = level_size;
9642 ri_rec->drop_level = drop_level;
9643 ri_rec->last_snapshot = last_snapshot;
9644 if (drop_key)
9645 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9646 list_add_tail(&ri_rec->list, head);
9648 return 0;
9651 static void free_root_item_list(struct list_head *list)
9653 struct root_item_record *ri_rec;
9655 while (!list_empty(list)) {
9656 ri_rec = list_first_entry(list, struct root_item_record,
9657 list);
9658 list_del_init(&ri_rec->list);
9659 free(ri_rec);
9663 static int deal_root_from_list(struct list_head *list,
9664 struct btrfs_root *root,
9665 struct block_info *bits,
9666 int bits_nr,
9667 struct cache_tree *pending,
9668 struct cache_tree *seen,
9669 struct cache_tree *reada,
9670 struct cache_tree *nodes,
9671 struct cache_tree *extent_cache,
9672 struct cache_tree *chunk_cache,
9673 struct rb_root *dev_cache,
9674 struct block_group_tree *block_group_cache,
9675 struct device_extent_tree *dev_extent_cache)
9677 int ret = 0;
9678 u64 last;
9680 while (!list_empty(list)) {
9681 struct root_item_record *rec;
9682 struct extent_buffer *buf;
9683 rec = list_entry(list->next,
9684 struct root_item_record, list);
9685 last = 0;
9686 buf = read_tree_block(root->fs_info->tree_root,
9687 rec->bytenr, rec->level_size, 0);
9688 if (!extent_buffer_uptodate(buf)) {
9689 free_extent_buffer(buf);
9690 ret = -EIO;
9691 break;
9693 ret = add_root_to_pending(buf, extent_cache, pending,
9694 seen, nodes, rec->objectid);
9695 if (ret < 0)
9696 break;
9698 * To rebuild extent tree, we need deal with snapshot
9699 * one by one, otherwise we deal with node firstly which
9700 * can maximize readahead.
9702 while (1) {
9703 ret = run_next_block(root, bits, bits_nr, &last,
9704 pending, seen, reada, nodes,
9705 extent_cache, chunk_cache,
9706 dev_cache, block_group_cache,
9707 dev_extent_cache, rec);
9708 if (ret != 0)
9709 break;
9711 free_extent_buffer(buf);
9712 list_del(&rec->list);
9713 free(rec);
9714 if (ret < 0)
9715 break;
9717 while (ret >= 0) {
9718 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9719 reada, nodes, extent_cache, chunk_cache,
9720 dev_cache, block_group_cache,
9721 dev_extent_cache, NULL);
9722 if (ret != 0) {
9723 if (ret > 0)
9724 ret = 0;
9725 break;
9728 return ret;
9731 static int check_chunks_and_extents(struct btrfs_root *root)
9733 struct rb_root dev_cache;
9734 struct cache_tree chunk_cache;
9735 struct block_group_tree block_group_cache;
9736 struct device_extent_tree dev_extent_cache;
9737 struct cache_tree extent_cache;
9738 struct cache_tree seen;
9739 struct cache_tree pending;
9740 struct cache_tree reada;
9741 struct cache_tree nodes;
9742 struct extent_io_tree excluded_extents;
9743 struct cache_tree corrupt_blocks;
9744 struct btrfs_path path;
9745 struct btrfs_key key;
9746 struct btrfs_key found_key;
9747 int ret, err = 0;
9748 struct block_info *bits;
9749 int bits_nr;
9750 struct extent_buffer *leaf;
9751 int slot;
9752 struct btrfs_root_item ri;
9753 struct list_head dropping_trees;
9754 struct list_head normal_trees;
9755 struct btrfs_root *root1;
9756 u64 objectid;
9757 u32 level_size;
9758 u8 level;
9760 dev_cache = RB_ROOT;
9761 cache_tree_init(&chunk_cache);
9762 block_group_tree_init(&block_group_cache);
9763 device_extent_tree_init(&dev_extent_cache);
9765 cache_tree_init(&extent_cache);
9766 cache_tree_init(&seen);
9767 cache_tree_init(&pending);
9768 cache_tree_init(&nodes);
9769 cache_tree_init(&reada);
9770 cache_tree_init(&corrupt_blocks);
9771 extent_io_tree_init(&excluded_extents);
9772 INIT_LIST_HEAD(&dropping_trees);
9773 INIT_LIST_HEAD(&normal_trees);
9775 if (repair) {
9776 root->fs_info->excluded_extents = &excluded_extents;
9777 root->fs_info->fsck_extent_cache = &extent_cache;
9778 root->fs_info->free_extent_hook = free_extent_hook;
9779 root->fs_info->corrupt_blocks = &corrupt_blocks;
9782 bits_nr = 1024;
9783 bits = malloc(bits_nr * sizeof(struct block_info));
9784 if (!bits) {
9785 perror("malloc");
9786 exit(1);
9789 if (ctx.progress_enabled) {
9790 ctx.tp = TASK_EXTENTS;
9791 task_start(ctx.info);
9794 again:
9795 root1 = root->fs_info->tree_root;
9796 level = btrfs_header_level(root1->node);
9797 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9798 root1->node->start, 0, level, 0,
9799 root1->nodesize, NULL);
9800 if (ret < 0)
9801 goto out;
9802 root1 = root->fs_info->chunk_root;
9803 level = btrfs_header_level(root1->node);
9804 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9805 root1->node->start, 0, level, 0,
9806 root1->nodesize, NULL);
9807 if (ret < 0)
9808 goto out;
9809 btrfs_init_path(&path);
9810 key.offset = 0;
9811 key.objectid = 0;
9812 key.type = BTRFS_ROOT_ITEM_KEY;
9813 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9814 &key, &path, 0, 0);
9815 if (ret < 0)
9816 goto out;
9817 while(1) {
9818 leaf = path.nodes[0];
9819 slot = path.slots[0];
9820 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9821 ret = btrfs_next_leaf(root, &path);
9822 if (ret != 0)
9823 break;
9824 leaf = path.nodes[0];
9825 slot = path.slots[0];
9827 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9828 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9829 unsigned long offset;
9830 u64 last_snapshot;
9832 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9833 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9834 last_snapshot = btrfs_root_last_snapshot(&ri);
9835 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9836 level = btrfs_root_level(&ri);
9837 level_size = root->nodesize;
9838 ret = add_root_item_to_list(&normal_trees,
9839 found_key.objectid,
9840 btrfs_root_bytenr(&ri),
9841 last_snapshot, level,
9842 0, level_size, NULL);
9843 if (ret < 0)
9844 goto out;
9845 } else {
9846 level = btrfs_root_level(&ri);
9847 level_size = root->nodesize;
9848 objectid = found_key.objectid;
9849 btrfs_disk_key_to_cpu(&found_key,
9850 &ri.drop_progress);
9851 ret = add_root_item_to_list(&dropping_trees,
9852 objectid,
9853 btrfs_root_bytenr(&ri),
9854 last_snapshot, level,
9855 ri.drop_level,
9856 level_size, &found_key);
9857 if (ret < 0)
9858 goto out;
9861 path.slots[0]++;
9863 btrfs_release_path(&path);
9866 * check_block can return -EAGAIN if it fixes something, please keep
9867 * this in mind when dealing with return values from these functions, if
9868 * we get -EAGAIN we want to fall through and restart the loop.
9870 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9871 &seen, &reada, &nodes, &extent_cache,
9872 &chunk_cache, &dev_cache, &block_group_cache,
9873 &dev_extent_cache);
9874 if (ret < 0) {
9875 if (ret == -EAGAIN)
9876 goto loop;
9877 goto out;
9879 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9880 &pending, &seen, &reada, &nodes,
9881 &extent_cache, &chunk_cache, &dev_cache,
9882 &block_group_cache, &dev_extent_cache);
9883 if (ret < 0) {
9884 if (ret == -EAGAIN)
9885 goto loop;
9886 goto out;
9889 ret = check_chunks(&chunk_cache, &block_group_cache,
9890 &dev_extent_cache, NULL, NULL, NULL, 0);
9891 if (ret) {
9892 if (ret == -EAGAIN)
9893 goto loop;
9894 err = ret;
9897 ret = check_extent_refs(root, &extent_cache);
9898 if (ret < 0) {
9899 if (ret == -EAGAIN)
9900 goto loop;
9901 goto out;
9904 ret = check_devices(&dev_cache, &dev_extent_cache);
9905 if (ret && err)
9906 ret = err;
9908 out:
9909 task_stop(ctx.info);
9910 if (repair) {
9911 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9912 extent_io_tree_cleanup(&excluded_extents);
9913 root->fs_info->fsck_extent_cache = NULL;
9914 root->fs_info->free_extent_hook = NULL;
9915 root->fs_info->corrupt_blocks = NULL;
9916 root->fs_info->excluded_extents = NULL;
9918 free(bits);
9919 free_chunk_cache_tree(&chunk_cache);
9920 free_device_cache_tree(&dev_cache);
9921 free_block_group_tree(&block_group_cache);
9922 free_device_extent_tree(&dev_extent_cache);
9923 free_extent_cache_tree(&seen);
9924 free_extent_cache_tree(&pending);
9925 free_extent_cache_tree(&reada);
9926 free_extent_cache_tree(&nodes);
9927 return ret;
9928 loop:
9929 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9930 free_extent_cache_tree(&seen);
9931 free_extent_cache_tree(&pending);
9932 free_extent_cache_tree(&reada);
9933 free_extent_cache_tree(&nodes);
9934 free_chunk_cache_tree(&chunk_cache);
9935 free_block_group_tree(&block_group_cache);
9936 free_device_cache_tree(&dev_cache);
9937 free_device_extent_tree(&dev_extent_cache);
9938 free_extent_record_cache(&extent_cache);
9939 free_root_item_list(&normal_trees);
9940 free_root_item_list(&dropping_trees);
9941 extent_io_tree_cleanup(&excluded_extents);
9942 goto again;
9946 * Check backrefs of a tree block given by @bytenr or @eb.
9948 * @root: the root containing the @bytenr or @eb
9949 * @eb: tree block extent buffer, can be NULL
9950 * @bytenr: bytenr of the tree block to search
9951 * @level: tree level of the tree block
9952 * @owner: owner of the tree block
9954 * Return >0 for any error found and output error message
9955 * Return 0 for no error found
9957 static int check_tree_block_ref(struct btrfs_root *root,
9958 struct extent_buffer *eb, u64 bytenr,
9959 int level, u64 owner)
9961 struct btrfs_key key;
9962 struct btrfs_root *extent_root = root->fs_info->extent_root;
9963 struct btrfs_path path;
9964 struct btrfs_extent_item *ei;
9965 struct btrfs_extent_inline_ref *iref;
9966 struct extent_buffer *leaf;
9967 unsigned long end;
9968 unsigned long ptr;
9969 int slot;
9970 int skinny_level;
9971 int type;
9972 u32 nodesize = root->nodesize;
9973 u32 item_size;
9974 u64 offset;
9975 int tree_reloc_root = 0;
9976 int found_ref = 0;
9977 int err = 0;
9978 int ret;
9980 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9981 btrfs_header_bytenr(root->node) == bytenr)
9982 tree_reloc_root = 1;
9984 btrfs_init_path(&path);
9985 key.objectid = bytenr;
9986 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9987 key.type = BTRFS_METADATA_ITEM_KEY;
9988 else
9989 key.type = BTRFS_EXTENT_ITEM_KEY;
9990 key.offset = (u64)-1;
9992 /* Search for the backref in extent tree */
9993 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9994 if (ret < 0) {
9995 err |= BACKREF_MISSING;
9996 goto out;
9998 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9999 if (ret) {
10000 err |= BACKREF_MISSING;
10001 goto out;
10004 leaf = path.nodes[0];
10005 slot = path.slots[0];
10006 btrfs_item_key_to_cpu(leaf, &key, slot);
10008 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10010 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10011 skinny_level = (int)key.offset;
10012 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10013 } else {
10014 struct btrfs_tree_block_info *info;
10016 info = (struct btrfs_tree_block_info *)(ei + 1);
10017 skinny_level = btrfs_tree_block_level(leaf, info);
10018 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10021 if (eb) {
10022 u64 header_gen;
10023 u64 extent_gen;
10025 if (!(btrfs_extent_flags(leaf, ei) &
10026 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10027 error(
10028 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10029 key.objectid, nodesize,
10030 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10031 err = BACKREF_MISMATCH;
10033 header_gen = btrfs_header_generation(eb);
10034 extent_gen = btrfs_extent_generation(leaf, ei);
10035 if (header_gen != extent_gen) {
10036 error(
10037 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10038 key.objectid, nodesize, header_gen,
10039 extent_gen);
10040 err = BACKREF_MISMATCH;
10042 if (level != skinny_level) {
10043 error(
10044 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10045 key.objectid, nodesize, level, skinny_level);
10046 err = BACKREF_MISMATCH;
10048 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10049 error(
10050 "extent[%llu %u] is referred by other roots than %llu",
10051 key.objectid, nodesize, root->objectid);
10052 err = BACKREF_MISMATCH;
10057 * Iterate the extent/metadata item to find the exact backref
10059 item_size = btrfs_item_size_nr(leaf, slot);
10060 ptr = (unsigned long)iref;
10061 end = (unsigned long)ei + item_size;
10062 while (ptr < end) {
10063 iref = (struct btrfs_extent_inline_ref *)ptr;
10064 type = btrfs_extent_inline_ref_type(leaf, iref);
10065 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10067 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10068 (offset == root->objectid || offset == owner)) {
10069 found_ref = 1;
10070 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10072 * Backref of tree reloc root points to itself, no need
10073 * to check backref any more.
10075 if (tree_reloc_root)
10076 found_ref = 1;
10077 else
10078 /* Check if the backref points to valid referencer */
10079 found_ref = !check_tree_block_ref(root, NULL,
10080 offset, level + 1, owner);
10083 if (found_ref)
10084 break;
10085 ptr += btrfs_extent_inline_ref_size(type);
10089 * Inlined extent item doesn't have what we need, check
10090 * TREE_BLOCK_REF_KEY
10092 if (!found_ref) {
10093 btrfs_release_path(&path);
10094 key.objectid = bytenr;
10095 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10096 key.offset = root->objectid;
10098 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10099 if (!ret)
10100 found_ref = 1;
10102 if (!found_ref)
10103 err |= BACKREF_MISSING;
10104 out:
10105 btrfs_release_path(&path);
10106 if (eb && (err & BACKREF_MISSING))
10107 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10108 bytenr, nodesize, owner, level);
10109 return err;
10113 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10115 * Return >0 any error found and output error message
10116 * Return 0 for no error found
10118 static int check_extent_data_item(struct btrfs_root *root,
10119 struct extent_buffer *eb, int slot)
10121 struct btrfs_file_extent_item *fi;
10122 struct btrfs_path path;
10123 struct btrfs_root *extent_root = root->fs_info->extent_root;
10124 struct btrfs_key fi_key;
10125 struct btrfs_key dbref_key;
10126 struct extent_buffer *leaf;
10127 struct btrfs_extent_item *ei;
10128 struct btrfs_extent_inline_ref *iref;
10129 struct btrfs_extent_data_ref *dref;
10130 u64 owner;
10131 u64 disk_bytenr;
10132 u64 disk_num_bytes;
10133 u64 extent_num_bytes;
10134 u64 extent_flags;
10135 u32 item_size;
10136 unsigned long end;
10137 unsigned long ptr;
10138 int type;
10139 u64 ref_root;
10140 int found_dbackref = 0;
10141 int err = 0;
10142 int ret;
10144 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10145 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10147 /* Nothing to check for hole and inline data extents */
10148 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10149 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10150 return 0;
10152 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10153 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10154 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10156 /* Check unaligned disk_num_bytes and num_bytes */
10157 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10158 error(
10159 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10160 fi_key.objectid, fi_key.offset, disk_num_bytes,
10161 root->sectorsize);
10162 err |= BYTES_UNALIGNED;
10163 } else {
10164 data_bytes_allocated += disk_num_bytes;
10166 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10167 error(
10168 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10169 fi_key.objectid, fi_key.offset, extent_num_bytes,
10170 root->sectorsize);
10171 err |= BYTES_UNALIGNED;
10172 } else {
10173 data_bytes_referenced += extent_num_bytes;
10175 owner = btrfs_header_owner(eb);
10177 /* Check the extent item of the file extent in extent tree */
10178 btrfs_init_path(&path);
10179 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10180 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10181 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10183 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10184 if (ret) {
10185 err |= BACKREF_MISSING;
10186 goto error;
10189 leaf = path.nodes[0];
10190 slot = path.slots[0];
10191 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10193 extent_flags = btrfs_extent_flags(leaf, ei);
10195 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10196 error(
10197 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10198 disk_bytenr, disk_num_bytes,
10199 BTRFS_EXTENT_FLAG_DATA);
10200 err |= BACKREF_MISMATCH;
10203 /* Check data backref inside that extent item */
10204 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10205 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10206 ptr = (unsigned long)iref;
10207 end = (unsigned long)ei + item_size;
10208 while (ptr < end) {
10209 iref = (struct btrfs_extent_inline_ref *)ptr;
10210 type = btrfs_extent_inline_ref_type(leaf, iref);
10211 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10213 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10214 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10215 if (ref_root == owner || ref_root == root->objectid)
10216 found_dbackref = 1;
10217 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10218 found_dbackref = !check_tree_block_ref(root, NULL,
10219 btrfs_extent_inline_ref_offset(leaf, iref),
10220 0, owner);
10223 if (found_dbackref)
10224 break;
10225 ptr += btrfs_extent_inline_ref_size(type);
10228 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10229 if (!found_dbackref) {
10230 btrfs_release_path(&path);
10232 btrfs_init_path(&path);
10233 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10234 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10235 dbref_key.offset = hash_extent_data_ref(root->objectid,
10236 fi_key.objectid, fi_key.offset);
10238 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10239 &dbref_key, &path, 0, 0);
10240 if (!ret)
10241 found_dbackref = 1;
10244 if (!found_dbackref)
10245 err |= BACKREF_MISSING;
10246 error:
10247 btrfs_release_path(&path);
10248 if (err & BACKREF_MISSING) {
10249 error("data extent[%llu %llu] backref lost",
10250 disk_bytenr, disk_num_bytes);
10252 return err;
10256 * Get real tree block level for the case like shared block
10257 * Return >= 0 as tree level
10258 * Return <0 for error
10260 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10262 struct extent_buffer *eb;
10263 struct btrfs_path path;
10264 struct btrfs_key key;
10265 struct btrfs_extent_item *ei;
10266 u64 flags;
10267 u64 transid;
10268 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10269 u8 backref_level;
10270 u8 header_level;
10271 int ret;
10273 /* Search extent tree for extent generation and level */
10274 key.objectid = bytenr;
10275 key.type = BTRFS_METADATA_ITEM_KEY;
10276 key.offset = (u64)-1;
10278 btrfs_init_path(&path);
10279 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10280 if (ret < 0)
10281 goto release_out;
10282 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10283 if (ret < 0)
10284 goto release_out;
10285 if (ret > 0) {
10286 ret = -ENOENT;
10287 goto release_out;
10290 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10291 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10292 struct btrfs_extent_item);
10293 flags = btrfs_extent_flags(path.nodes[0], ei);
10294 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10295 ret = -ENOENT;
10296 goto release_out;
10299 /* Get transid for later read_tree_block() check */
10300 transid = btrfs_extent_generation(path.nodes[0], ei);
10302 /* Get backref level as one source */
10303 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10304 backref_level = key.offset;
10305 } else {
10306 struct btrfs_tree_block_info *info;
10308 info = (struct btrfs_tree_block_info *)(ei + 1);
10309 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10311 btrfs_release_path(&path);
10313 /* Get level from tree block as an alternative source */
10314 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10315 if (!extent_buffer_uptodate(eb)) {
10316 free_extent_buffer(eb);
10317 return -EIO;
10319 header_level = btrfs_header_level(eb);
10320 free_extent_buffer(eb);
10322 if (header_level != backref_level)
10323 return -EIO;
10324 return header_level;
10326 release_out:
10327 btrfs_release_path(&path);
10328 return ret;
10332 * Check if a tree block backref is valid (points to a valid tree block)
10333 * if level == -1, level will be resolved
10334 * Return >0 for any error found and print error message
10336 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10337 u64 bytenr, int level)
10339 struct btrfs_root *root;
10340 struct btrfs_key key;
10341 struct btrfs_path path;
10342 struct extent_buffer *eb;
10343 struct extent_buffer *node;
10344 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10345 int err = 0;
10346 int ret;
10348 /* Query level for level == -1 special case */
10349 if (level == -1)
10350 level = query_tree_block_level(fs_info, bytenr);
10351 if (level < 0) {
10352 err |= REFERENCER_MISSING;
10353 goto out;
10356 key.objectid = root_id;
10357 key.type = BTRFS_ROOT_ITEM_KEY;
10358 key.offset = (u64)-1;
10360 root = btrfs_read_fs_root(fs_info, &key);
10361 if (IS_ERR(root)) {
10362 err |= REFERENCER_MISSING;
10363 goto out;
10366 /* Read out the tree block to get item/node key */
10367 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10368 if (!extent_buffer_uptodate(eb)) {
10369 err |= REFERENCER_MISSING;
10370 free_extent_buffer(eb);
10371 goto out;
10374 /* Empty tree, no need to check key */
10375 if (!btrfs_header_nritems(eb) && !level) {
10376 free_extent_buffer(eb);
10377 goto out;
10380 if (level)
10381 btrfs_node_key_to_cpu(eb, &key, 0);
10382 else
10383 btrfs_item_key_to_cpu(eb, &key, 0);
10385 free_extent_buffer(eb);
10387 btrfs_init_path(&path);
10388 path.lowest_level = level;
10389 /* Search with the first key, to ensure we can reach it */
10390 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10391 if (ret < 0) {
10392 err |= REFERENCER_MISSING;
10393 goto release_out;
10396 node = path.nodes[level];
10397 if (btrfs_header_bytenr(node) != bytenr) {
10398 error(
10399 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10400 bytenr, nodesize, bytenr,
10401 btrfs_header_bytenr(node));
10402 err |= REFERENCER_MISMATCH;
10404 if (btrfs_header_level(node) != level) {
10405 error(
10406 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10407 bytenr, nodesize, level,
10408 btrfs_header_level(node));
10409 err |= REFERENCER_MISMATCH;
10412 release_out:
10413 btrfs_release_path(&path);
10414 out:
10415 if (err & REFERENCER_MISSING) {
10416 if (level < 0)
10417 error("extent [%llu %d] lost referencer (owner: %llu)",
10418 bytenr, nodesize, root_id);
10419 else
10420 error(
10421 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10422 bytenr, nodesize, root_id, level);
10425 return err;
10429 * Check if tree block @eb is tree reloc root.
10430 * Return 0 if it's not or any problem happens
10431 * Return 1 if it's a tree reloc root
10433 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10434 struct extent_buffer *eb)
10436 struct btrfs_root *tree_reloc_root;
10437 struct btrfs_key key;
10438 u64 bytenr = btrfs_header_bytenr(eb);
10439 u64 owner = btrfs_header_owner(eb);
10440 int ret = 0;
10442 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10443 key.offset = owner;
10444 key.type = BTRFS_ROOT_ITEM_KEY;
10446 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10447 if (IS_ERR(tree_reloc_root))
10448 return 0;
10450 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10451 ret = 1;
10452 btrfs_free_fs_root(tree_reloc_root);
10453 return ret;
10457 * Check referencer for shared block backref
10458 * If level == -1, this function will resolve the level.
10460 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10461 u64 parent, u64 bytenr, int level)
10463 struct extent_buffer *eb;
10464 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10465 u32 nr;
10466 int found_parent = 0;
10467 int i;
10469 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10470 if (!extent_buffer_uptodate(eb))
10471 goto out;
10473 if (level == -1)
10474 level = query_tree_block_level(fs_info, bytenr);
10475 if (level < 0)
10476 goto out;
10478 /* It's possible it's a tree reloc root */
10479 if (parent == bytenr) {
10480 if (is_tree_reloc_root(fs_info, eb))
10481 found_parent = 1;
10482 goto out;
10485 if (level + 1 != btrfs_header_level(eb))
10486 goto out;
10488 nr = btrfs_header_nritems(eb);
10489 for (i = 0; i < nr; i++) {
10490 if (bytenr == btrfs_node_blockptr(eb, i)) {
10491 found_parent = 1;
10492 break;
10495 out:
10496 free_extent_buffer(eb);
10497 if (!found_parent) {
10498 error(
10499 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10500 bytenr, nodesize, parent, level);
10501 return REFERENCER_MISSING;
10503 return 0;
10507 * Check referencer for normal (inlined) data ref
10508 * If len == 0, it will be resolved by searching in extent tree
10510 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10511 u64 root_id, u64 objectid, u64 offset,
10512 u64 bytenr, u64 len, u32 count)
10514 struct btrfs_root *root;
10515 struct btrfs_root *extent_root = fs_info->extent_root;
10516 struct btrfs_key key;
10517 struct btrfs_path path;
10518 struct extent_buffer *leaf;
10519 struct btrfs_file_extent_item *fi;
10520 u32 found_count = 0;
10521 int slot;
10522 int ret = 0;
10524 if (!len) {
10525 key.objectid = bytenr;
10526 key.type = BTRFS_EXTENT_ITEM_KEY;
10527 key.offset = (u64)-1;
10529 btrfs_init_path(&path);
10530 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10531 if (ret < 0)
10532 goto out;
10533 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10534 if (ret)
10535 goto out;
10536 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10537 if (key.objectid != bytenr ||
10538 key.type != BTRFS_EXTENT_ITEM_KEY)
10539 goto out;
10540 len = key.offset;
10541 btrfs_release_path(&path);
10543 key.objectid = root_id;
10544 key.type = BTRFS_ROOT_ITEM_KEY;
10545 key.offset = (u64)-1;
10546 btrfs_init_path(&path);
10548 root = btrfs_read_fs_root(fs_info, &key);
10549 if (IS_ERR(root))
10550 goto out;
10552 key.objectid = objectid;
10553 key.type = BTRFS_EXTENT_DATA_KEY;
10555 * It can be nasty as data backref offset is
10556 * file offset - file extent offset, which is smaller or
10557 * equal to original backref offset. The only special case is
10558 * overflow. So we need to special check and do further search.
10560 key.offset = offset & (1ULL << 63) ? 0 : offset;
10562 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10563 if (ret < 0)
10564 goto out;
10567 * Search afterwards to get correct one
10568 * NOTE: As we must do a comprehensive check on the data backref to
10569 * make sure the dref count also matches, we must iterate all file
10570 * extents for that inode.
10572 while (1) {
10573 leaf = path.nodes[0];
10574 slot = path.slots[0];
10576 if (slot >= btrfs_header_nritems(leaf))
10577 goto next;
10578 btrfs_item_key_to_cpu(leaf, &key, slot);
10579 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10580 break;
10581 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10583 * Except normal disk bytenr and disk num bytes, we still
10584 * need to do extra check on dbackref offset as
10585 * dbackref offset = file_offset - file_extent_offset
10587 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10588 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10589 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10590 offset)
10591 found_count++;
10593 next:
10594 ret = btrfs_next_item(root, &path);
10595 if (ret)
10596 break;
10598 out:
10599 btrfs_release_path(&path);
10600 if (found_count != count) {
10601 error(
10602 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10603 bytenr, len, root_id, objectid, offset, count, found_count);
10604 return REFERENCER_MISSING;
10606 return 0;
10610 * Check if the referencer of a shared data backref exists
10612 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10613 u64 parent, u64 bytenr)
10615 struct extent_buffer *eb;
10616 struct btrfs_key key;
10617 struct btrfs_file_extent_item *fi;
10618 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10619 u32 nr;
10620 int found_parent = 0;
10621 int i;
10623 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10624 if (!extent_buffer_uptodate(eb))
10625 goto out;
10627 nr = btrfs_header_nritems(eb);
10628 for (i = 0; i < nr; i++) {
10629 btrfs_item_key_to_cpu(eb, &key, i);
10630 if (key.type != BTRFS_EXTENT_DATA_KEY)
10631 continue;
10633 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10634 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10635 continue;
10637 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10638 found_parent = 1;
10639 break;
10643 out:
10644 free_extent_buffer(eb);
10645 if (!found_parent) {
10646 error("shared extent %llu referencer lost (parent: %llu)",
10647 bytenr, parent);
10648 return REFERENCER_MISSING;
10650 return 0;
10654 * This function will check a given extent item, including its backref and
10655 * itself (like crossing stripe boundary and type)
10657 * Since we don't use extent_record anymore, introduce new error bit
10659 static int check_extent_item(struct btrfs_fs_info *fs_info,
10660 struct extent_buffer *eb, int slot)
10662 struct btrfs_extent_item *ei;
10663 struct btrfs_extent_inline_ref *iref;
10664 struct btrfs_extent_data_ref *dref;
10665 unsigned long end;
10666 unsigned long ptr;
10667 int type;
10668 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10669 u32 item_size = btrfs_item_size_nr(eb, slot);
10670 u64 flags;
10671 u64 offset;
10672 int metadata = 0;
10673 int level;
10674 struct btrfs_key key;
10675 int ret;
10676 int err = 0;
10678 btrfs_item_key_to_cpu(eb, &key, slot);
10679 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10680 bytes_used += key.offset;
10681 else
10682 bytes_used += nodesize;
10684 if (item_size < sizeof(*ei)) {
10686 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10687 * old thing when on disk format is still un-determined.
10688 * No need to care about it anymore
10690 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10691 return -ENOTTY;
10694 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10695 flags = btrfs_extent_flags(eb, ei);
10697 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10698 metadata = 1;
10699 if (metadata && check_crossing_stripes(global_info, key.objectid,
10700 eb->len)) {
10701 error("bad metadata [%llu, %llu) crossing stripe boundary",
10702 key.objectid, key.objectid + nodesize);
10703 err |= CROSSING_STRIPE_BOUNDARY;
10706 ptr = (unsigned long)(ei + 1);
10708 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10709 /* Old EXTENT_ITEM metadata */
10710 struct btrfs_tree_block_info *info;
10712 info = (struct btrfs_tree_block_info *)ptr;
10713 level = btrfs_tree_block_level(eb, info);
10714 ptr += sizeof(struct btrfs_tree_block_info);
10715 } else {
10716 /* New METADATA_ITEM */
10717 level = key.offset;
10719 end = (unsigned long)ei + item_size;
10721 if (ptr >= end) {
10722 err |= ITEM_SIZE_MISMATCH;
10723 goto out;
10726 /* Now check every backref in this extent item */
10727 next:
10728 iref = (struct btrfs_extent_inline_ref *)ptr;
10729 type = btrfs_extent_inline_ref_type(eb, iref);
10730 offset = btrfs_extent_inline_ref_offset(eb, iref);
10731 switch (type) {
10732 case BTRFS_TREE_BLOCK_REF_KEY:
10733 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10734 level);
10735 err |= ret;
10736 break;
10737 case BTRFS_SHARED_BLOCK_REF_KEY:
10738 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10739 level);
10740 err |= ret;
10741 break;
10742 case BTRFS_EXTENT_DATA_REF_KEY:
10743 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10744 ret = check_extent_data_backref(fs_info,
10745 btrfs_extent_data_ref_root(eb, dref),
10746 btrfs_extent_data_ref_objectid(eb, dref),
10747 btrfs_extent_data_ref_offset(eb, dref),
10748 key.objectid, key.offset,
10749 btrfs_extent_data_ref_count(eb, dref));
10750 err |= ret;
10751 break;
10752 case BTRFS_SHARED_DATA_REF_KEY:
10753 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10754 err |= ret;
10755 break;
10756 default:
10757 error("extent[%llu %d %llu] has unknown ref type: %d",
10758 key.objectid, key.type, key.offset, type);
10759 err |= UNKNOWN_TYPE;
10760 goto out;
10763 ptr += btrfs_extent_inline_ref_size(type);
10764 if (ptr < end)
10765 goto next;
10767 out:
10768 return err;
10772 * Check if a dev extent item is referred correctly by its chunk
10774 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10775 struct extent_buffer *eb, int slot)
10777 struct btrfs_root *chunk_root = fs_info->chunk_root;
10778 struct btrfs_dev_extent *ptr;
10779 struct btrfs_path path;
10780 struct btrfs_key chunk_key;
10781 struct btrfs_key devext_key;
10782 struct btrfs_chunk *chunk;
10783 struct extent_buffer *l;
10784 int num_stripes;
10785 u64 length;
10786 int i;
10787 int found_chunk = 0;
10788 int ret;
10790 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10791 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10792 length = btrfs_dev_extent_length(eb, ptr);
10794 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10795 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10796 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10798 btrfs_init_path(&path);
10799 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10800 if (ret)
10801 goto out;
10803 l = path.nodes[0];
10804 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10805 if (btrfs_chunk_length(l, chunk) != length)
10806 goto out;
10808 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10809 for (i = 0; i < num_stripes; i++) {
10810 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10811 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10813 if (devid == devext_key.objectid &&
10814 offset == devext_key.offset) {
10815 found_chunk = 1;
10816 break;
10819 out:
10820 btrfs_release_path(&path);
10821 if (!found_chunk) {
10822 error(
10823 "device extent[%llu, %llu, %llu] did not find the related chunk",
10824 devext_key.objectid, devext_key.offset, length);
10825 return REFERENCER_MISSING;
10827 return 0;
10831 * Check if the used space is correct with the dev item
10833 static int check_dev_item(struct btrfs_fs_info *fs_info,
10834 struct extent_buffer *eb, int slot)
10836 struct btrfs_root *dev_root = fs_info->dev_root;
10837 struct btrfs_dev_item *dev_item;
10838 struct btrfs_path path;
10839 struct btrfs_key key;
10840 struct btrfs_dev_extent *ptr;
10841 u64 dev_id;
10842 u64 used;
10843 u64 total = 0;
10844 int ret;
10846 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10847 dev_id = btrfs_device_id(eb, dev_item);
10848 used = btrfs_device_bytes_used(eb, dev_item);
10850 key.objectid = dev_id;
10851 key.type = BTRFS_DEV_EXTENT_KEY;
10852 key.offset = 0;
10854 btrfs_init_path(&path);
10855 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10856 if (ret < 0) {
10857 btrfs_item_key_to_cpu(eb, &key, slot);
10858 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10859 key.objectid, key.type, key.offset);
10860 btrfs_release_path(&path);
10861 return REFERENCER_MISSING;
10864 /* Iterate dev_extents to calculate the used space of a device */
10865 while (1) {
10866 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10867 goto next;
10869 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10870 if (key.objectid > dev_id)
10871 break;
10872 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10873 goto next;
10875 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10876 struct btrfs_dev_extent);
10877 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10878 next:
10879 ret = btrfs_next_item(dev_root, &path);
10880 if (ret)
10881 break;
10883 btrfs_release_path(&path);
10885 if (used != total) {
10886 btrfs_item_key_to_cpu(eb, &key, slot);
10887 error(
10888 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10889 total, used, BTRFS_ROOT_TREE_OBJECTID,
10890 BTRFS_DEV_EXTENT_KEY, dev_id);
10891 return ACCOUNTING_MISMATCH;
10893 return 0;
10897 * Check a block group item with its referener (chunk) and its used space
10898 * with extent/metadata item
10900 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10901 struct extent_buffer *eb, int slot)
10903 struct btrfs_root *extent_root = fs_info->extent_root;
10904 struct btrfs_root *chunk_root = fs_info->chunk_root;
10905 struct btrfs_block_group_item *bi;
10906 struct btrfs_block_group_item bg_item;
10907 struct btrfs_path path;
10908 struct btrfs_key bg_key;
10909 struct btrfs_key chunk_key;
10910 struct btrfs_key extent_key;
10911 struct btrfs_chunk *chunk;
10912 struct extent_buffer *leaf;
10913 struct btrfs_extent_item *ei;
10914 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10915 u64 flags;
10916 u64 bg_flags;
10917 u64 used;
10918 u64 total = 0;
10919 int ret;
10920 int err = 0;
10922 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10923 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10924 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10925 used = btrfs_block_group_used(&bg_item);
10926 bg_flags = btrfs_block_group_flags(&bg_item);
10928 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10929 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10930 chunk_key.offset = bg_key.objectid;
10932 btrfs_init_path(&path);
10933 /* Search for the referencer chunk */
10934 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10935 if (ret) {
10936 error(
10937 "block group[%llu %llu] did not find the related chunk item",
10938 bg_key.objectid, bg_key.offset);
10939 err |= REFERENCER_MISSING;
10940 } else {
10941 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10942 struct btrfs_chunk);
10943 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10944 bg_key.offset) {
10945 error(
10946 "block group[%llu %llu] related chunk item length does not match",
10947 bg_key.objectid, bg_key.offset);
10948 err |= REFERENCER_MISMATCH;
10951 btrfs_release_path(&path);
10953 /* Search from the block group bytenr */
10954 extent_key.objectid = bg_key.objectid;
10955 extent_key.type = 0;
10956 extent_key.offset = 0;
10958 btrfs_init_path(&path);
10959 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10960 if (ret < 0)
10961 goto out;
10963 /* Iterate extent tree to account used space */
10964 while (1) {
10965 leaf = path.nodes[0];
10967 /* Search slot can point to the last item beyond leaf nritems */
10968 if (path.slots[0] >= btrfs_header_nritems(leaf))
10969 goto next;
10971 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10972 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10973 break;
10975 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10976 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10977 goto next;
10978 if (extent_key.objectid < bg_key.objectid)
10979 goto next;
10981 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10982 total += nodesize;
10983 else
10984 total += extent_key.offset;
10986 ei = btrfs_item_ptr(leaf, path.slots[0],
10987 struct btrfs_extent_item);
10988 flags = btrfs_extent_flags(leaf, ei);
10989 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10990 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10991 error(
10992 "bad extent[%llu, %llu) type mismatch with chunk",
10993 extent_key.objectid,
10994 extent_key.objectid + extent_key.offset);
10995 err |= CHUNK_TYPE_MISMATCH;
10997 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10998 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10999 BTRFS_BLOCK_GROUP_METADATA))) {
11000 error(
11001 "bad extent[%llu, %llu) type mismatch with chunk",
11002 extent_key.objectid,
11003 extent_key.objectid + nodesize);
11004 err |= CHUNK_TYPE_MISMATCH;
11007 next:
11008 ret = btrfs_next_item(extent_root, &path);
11009 if (ret)
11010 break;
11013 out:
11014 btrfs_release_path(&path);
11016 if (total != used) {
11017 error(
11018 "block group[%llu %llu] used %llu but extent items used %llu",
11019 bg_key.objectid, bg_key.offset, used, total);
11020 err |= ACCOUNTING_MISMATCH;
11022 return err;
11026 * Check a chunk item.
11027 * Including checking all referred dev_extents and block group
11029 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11030 struct extent_buffer *eb, int slot)
11032 struct btrfs_root *extent_root = fs_info->extent_root;
11033 struct btrfs_root *dev_root = fs_info->dev_root;
11034 struct btrfs_path path;
11035 struct btrfs_key chunk_key;
11036 struct btrfs_key bg_key;
11037 struct btrfs_key devext_key;
11038 struct btrfs_chunk *chunk;
11039 struct extent_buffer *leaf;
11040 struct btrfs_block_group_item *bi;
11041 struct btrfs_block_group_item bg_item;
11042 struct btrfs_dev_extent *ptr;
11043 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11044 u64 length;
11045 u64 chunk_end;
11046 u64 type;
11047 u64 profile;
11048 int num_stripes;
11049 u64 offset;
11050 u64 objectid;
11051 int i;
11052 int ret;
11053 int err = 0;
11055 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11056 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11057 length = btrfs_chunk_length(eb, chunk);
11058 chunk_end = chunk_key.offset + length;
11059 if (!IS_ALIGNED(length, sectorsize)) {
11060 error("chunk[%llu %llu) not aligned to %u",
11061 chunk_key.offset, chunk_end, sectorsize);
11062 err |= BYTES_UNALIGNED;
11063 goto out;
11066 type = btrfs_chunk_type(eb, chunk);
11067 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11068 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11069 error("chunk[%llu %llu) has no chunk type",
11070 chunk_key.offset, chunk_end);
11071 err |= UNKNOWN_TYPE;
11073 if (profile && (profile & (profile - 1))) {
11074 error("chunk[%llu %llu) multiple profiles detected: %llx",
11075 chunk_key.offset, chunk_end, profile);
11076 err |= UNKNOWN_TYPE;
11079 bg_key.objectid = chunk_key.offset;
11080 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11081 bg_key.offset = length;
11083 btrfs_init_path(&path);
11084 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11085 if (ret) {
11086 error(
11087 "chunk[%llu %llu) did not find the related block group item",
11088 chunk_key.offset, chunk_end);
11089 err |= REFERENCER_MISSING;
11090 } else{
11091 leaf = path.nodes[0];
11092 bi = btrfs_item_ptr(leaf, path.slots[0],
11093 struct btrfs_block_group_item);
11094 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11095 sizeof(bg_item));
11096 if (btrfs_block_group_flags(&bg_item) != type) {
11097 error(
11098 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11099 chunk_key.offset, chunk_end, type,
11100 btrfs_block_group_flags(&bg_item));
11101 err |= REFERENCER_MISSING;
11105 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11106 for (i = 0; i < num_stripes; i++) {
11107 btrfs_release_path(&path);
11108 btrfs_init_path(&path);
11109 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11110 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11111 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11113 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11114 0, 0);
11115 if (ret)
11116 goto not_match_dev;
11118 leaf = path.nodes[0];
11119 ptr = btrfs_item_ptr(leaf, path.slots[0],
11120 struct btrfs_dev_extent);
11121 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11122 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11123 if (objectid != chunk_key.objectid ||
11124 offset != chunk_key.offset ||
11125 btrfs_dev_extent_length(leaf, ptr) != length)
11126 goto not_match_dev;
11127 continue;
11128 not_match_dev:
11129 err |= BACKREF_MISSING;
11130 error(
11131 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11132 chunk_key.objectid, chunk_end, i);
11133 continue;
11135 btrfs_release_path(&path);
11136 out:
11137 return err;
11141 * Main entry function to check known items and update related accounting info
11143 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11145 struct btrfs_fs_info *fs_info = root->fs_info;
11146 struct btrfs_key key;
11147 int slot = 0;
11148 int type;
11149 struct btrfs_extent_data_ref *dref;
11150 int ret;
11151 int err = 0;
11153 next:
11154 btrfs_item_key_to_cpu(eb, &key, slot);
11155 type = key.type;
11157 switch (type) {
11158 case BTRFS_EXTENT_DATA_KEY:
11159 ret = check_extent_data_item(root, eb, slot);
11160 err |= ret;
11161 break;
11162 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11163 ret = check_block_group_item(fs_info, eb, slot);
11164 err |= ret;
11165 break;
11166 case BTRFS_DEV_ITEM_KEY:
11167 ret = check_dev_item(fs_info, eb, slot);
11168 err |= ret;
11169 break;
11170 case BTRFS_CHUNK_ITEM_KEY:
11171 ret = check_chunk_item(fs_info, eb, slot);
11172 err |= ret;
11173 break;
11174 case BTRFS_DEV_EXTENT_KEY:
11175 ret = check_dev_extent_item(fs_info, eb, slot);
11176 err |= ret;
11177 break;
11178 case BTRFS_EXTENT_ITEM_KEY:
11179 case BTRFS_METADATA_ITEM_KEY:
11180 ret = check_extent_item(fs_info, eb, slot);
11181 err |= ret;
11182 break;
11183 case BTRFS_EXTENT_CSUM_KEY:
11184 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11185 break;
11186 case BTRFS_TREE_BLOCK_REF_KEY:
11187 ret = check_tree_block_backref(fs_info, key.offset,
11188 key.objectid, -1);
11189 err |= ret;
11190 break;
11191 case BTRFS_EXTENT_DATA_REF_KEY:
11192 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11193 ret = check_extent_data_backref(fs_info,
11194 btrfs_extent_data_ref_root(eb, dref),
11195 btrfs_extent_data_ref_objectid(eb, dref),
11196 btrfs_extent_data_ref_offset(eb, dref),
11197 key.objectid, 0,
11198 btrfs_extent_data_ref_count(eb, dref));
11199 err |= ret;
11200 break;
11201 case BTRFS_SHARED_BLOCK_REF_KEY:
11202 ret = check_shared_block_backref(fs_info, key.offset,
11203 key.objectid, -1);
11204 err |= ret;
11205 break;
11206 case BTRFS_SHARED_DATA_REF_KEY:
11207 ret = check_shared_data_backref(fs_info, key.offset,
11208 key.objectid);
11209 err |= ret;
11210 break;
11211 default:
11212 break;
11215 if (++slot < btrfs_header_nritems(eb))
11216 goto next;
11218 return err;
11222 * Helper function for later fs/subvol tree check. To determine if a tree
11223 * block should be checked.
11224 * This function will ensure only the direct referencer with lowest rootid to
11225 * check a fs/subvolume tree block.
11227 * Backref check at extent tree would detect errors like missing subvolume
11228 * tree, so we can do aggressive check to reduce duplicated checks.
11230 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11232 struct btrfs_root *extent_root = root->fs_info->extent_root;
11233 struct btrfs_key key;
11234 struct btrfs_path path;
11235 struct extent_buffer *leaf;
11236 int slot;
11237 struct btrfs_extent_item *ei;
11238 unsigned long ptr;
11239 unsigned long end;
11240 int type;
11241 u32 item_size;
11242 u64 offset;
11243 struct btrfs_extent_inline_ref *iref;
11244 int ret;
11246 btrfs_init_path(&path);
11247 key.objectid = btrfs_header_bytenr(eb);
11248 key.type = BTRFS_METADATA_ITEM_KEY;
11249 key.offset = (u64)-1;
11252 * Any failure in backref resolving means we can't determine
11253 * whom the tree block belongs to.
11254 * So in that case, we need to check that tree block
11256 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11257 if (ret < 0)
11258 goto need_check;
11260 ret = btrfs_previous_extent_item(extent_root, &path,
11261 btrfs_header_bytenr(eb));
11262 if (ret)
11263 goto need_check;
11265 leaf = path.nodes[0];
11266 slot = path.slots[0];
11267 btrfs_item_key_to_cpu(leaf, &key, slot);
11268 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11270 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11271 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11272 } else {
11273 struct btrfs_tree_block_info *info;
11275 info = (struct btrfs_tree_block_info *)(ei + 1);
11276 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11279 item_size = btrfs_item_size_nr(leaf, slot);
11280 ptr = (unsigned long)iref;
11281 end = (unsigned long)ei + item_size;
11282 while (ptr < end) {
11283 iref = (struct btrfs_extent_inline_ref *)ptr;
11284 type = btrfs_extent_inline_ref_type(leaf, iref);
11285 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11288 * We only check the tree block if current root is
11289 * the lowest referencer of it.
11291 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11292 offset < root->objectid) {
11293 btrfs_release_path(&path);
11294 return 0;
11297 ptr += btrfs_extent_inline_ref_size(type);
11300 * Normally we should also check keyed tree block ref, but that may be
11301 * very time consuming. Inlined ref should already make us skip a lot
11302 * of refs now. So skip search keyed tree block ref.
11305 need_check:
11306 btrfs_release_path(&path);
11307 return 1;
11311 * Traversal function for tree block. We will do:
11312 * 1) Skip shared fs/subvolume tree blocks
11313 * 2) Update related bytes accounting
11314 * 3) Pre-order traversal
11316 static int traverse_tree_block(struct btrfs_root *root,
11317 struct extent_buffer *node)
11319 struct extent_buffer *eb;
11320 struct btrfs_key key;
11321 struct btrfs_key drop_key;
11322 int level;
11323 u64 nr;
11324 int i;
11325 int err = 0;
11326 int ret;
11329 * Skip shared fs/subvolume tree block, in that case they will
11330 * be checked by referencer with lowest rootid
11332 if (is_fstree(root->objectid) && !should_check(root, node))
11333 return 0;
11335 /* Update bytes accounting */
11336 total_btree_bytes += node->len;
11337 if (fs_root_objectid(btrfs_header_owner(node)))
11338 total_fs_tree_bytes += node->len;
11339 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11340 total_extent_tree_bytes += node->len;
11341 if (!found_old_backref &&
11342 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11343 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11344 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11345 found_old_backref = 1;
11347 /* pre-order tranversal, check itself first */
11348 level = btrfs_header_level(node);
11349 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11350 btrfs_header_level(node),
11351 btrfs_header_owner(node));
11352 err |= ret;
11353 if (err)
11354 error(
11355 "check %s failed root %llu bytenr %llu level %d, force continue check",
11356 level ? "node":"leaf", root->objectid,
11357 btrfs_header_bytenr(node), btrfs_header_level(node));
11359 if (!level) {
11360 btree_space_waste += btrfs_leaf_free_space(root, node);
11361 ret = check_leaf_items(root, node);
11362 err |= ret;
11363 return err;
11366 nr = btrfs_header_nritems(node);
11367 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11368 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11369 sizeof(struct btrfs_key_ptr);
11371 /* Then check all its children */
11372 for (i = 0; i < nr; i++) {
11373 u64 blocknr = btrfs_node_blockptr(node, i);
11375 btrfs_node_key_to_cpu(node, &key, i);
11376 if (level == root->root_item.drop_level &&
11377 is_dropped_key(&key, &drop_key))
11378 continue;
11381 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11382 * to call the function itself.
11384 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11385 if (extent_buffer_uptodate(eb)) {
11386 ret = traverse_tree_block(root, eb);
11387 err |= ret;
11389 free_extent_buffer(eb);
11392 return err;
11396 * Low memory usage version check_chunks_and_extents.
11398 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11400 struct btrfs_path path;
11401 struct btrfs_key key;
11402 struct btrfs_root *root1;
11403 struct btrfs_root *cur_root;
11404 int err = 0;
11405 int ret;
11407 root1 = root->fs_info->chunk_root;
11408 ret = traverse_tree_block(root1, root1->node);
11409 err |= ret;
11411 root1 = root->fs_info->tree_root;
11412 ret = traverse_tree_block(root1, root1->node);
11413 err |= ret;
11415 btrfs_init_path(&path);
11416 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11417 key.offset = 0;
11418 key.type = BTRFS_ROOT_ITEM_KEY;
11420 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11421 if (ret) {
11422 error("cannot find extent treet in tree_root");
11423 goto out;
11426 while (1) {
11427 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11428 if (key.type != BTRFS_ROOT_ITEM_KEY)
11429 goto next;
11430 key.offset = (u64)-1;
11432 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11433 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11434 &key);
11435 else
11436 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11437 if (IS_ERR(cur_root) || !cur_root) {
11438 error("failed to read tree: %lld", key.objectid);
11439 goto next;
11442 ret = traverse_tree_block(cur_root, cur_root->node);
11443 err |= ret;
11445 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11446 btrfs_free_fs_root(cur_root);
11447 next:
11448 ret = btrfs_next_item(root1, &path);
11449 if (ret)
11450 goto out;
11453 out:
11454 btrfs_release_path(&path);
11455 return err;
11458 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11459 struct btrfs_root *root, int overwrite)
11461 struct extent_buffer *c;
11462 struct extent_buffer *old = root->node;
11463 int level;
11464 int ret;
11465 struct btrfs_disk_key disk_key = {0,0,0};
11467 level = 0;
11469 if (overwrite) {
11470 c = old;
11471 extent_buffer_get(c);
11472 goto init;
11474 c = btrfs_alloc_free_block(trans, root,
11475 root->nodesize,
11476 root->root_key.objectid,
11477 &disk_key, level, 0, 0);
11478 if (IS_ERR(c)) {
11479 c = old;
11480 extent_buffer_get(c);
11481 overwrite = 1;
11483 init:
11484 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11485 btrfs_set_header_level(c, level);
11486 btrfs_set_header_bytenr(c, c->start);
11487 btrfs_set_header_generation(c, trans->transid);
11488 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11489 btrfs_set_header_owner(c, root->root_key.objectid);
11491 write_extent_buffer(c, root->fs_info->fsid,
11492 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11494 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11495 btrfs_header_chunk_tree_uuid(c),
11496 BTRFS_UUID_SIZE);
11498 btrfs_mark_buffer_dirty(c);
11500 * this case can happen in the following case:
11502 * 1.overwrite previous root.
11504 * 2.reinit reloc data root, this is because we skip pin
11505 * down reloc data tree before which means we can allocate
11506 * same block bytenr here.
11508 if (old->start == c->start) {
11509 btrfs_set_root_generation(&root->root_item,
11510 trans->transid);
11511 root->root_item.level = btrfs_header_level(root->node);
11512 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11513 &root->root_key, &root->root_item);
11514 if (ret) {
11515 free_extent_buffer(c);
11516 return ret;
11519 free_extent_buffer(old);
11520 root->node = c;
11521 add_root_to_dirty_list(root);
11522 return 0;
11525 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11526 struct extent_buffer *eb, int tree_root)
11528 struct extent_buffer *tmp;
11529 struct btrfs_root_item *ri;
11530 struct btrfs_key key;
11531 u64 bytenr;
11532 u32 nodesize;
11533 int level = btrfs_header_level(eb);
11534 int nritems;
11535 int ret;
11536 int i;
11539 * If we have pinned this block before, don't pin it again.
11540 * This can not only avoid forever loop with broken filesystem
11541 * but also give us some speedups.
11543 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11544 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11545 return 0;
11547 btrfs_pin_extent(fs_info, eb->start, eb->len);
11549 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11550 nritems = btrfs_header_nritems(eb);
11551 for (i = 0; i < nritems; i++) {
11552 if (level == 0) {
11553 btrfs_item_key_to_cpu(eb, &key, i);
11554 if (key.type != BTRFS_ROOT_ITEM_KEY)
11555 continue;
11556 /* Skip the extent root and reloc roots */
11557 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11558 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11559 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11560 continue;
11561 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11562 bytenr = btrfs_disk_root_bytenr(eb, ri);
11565 * If at any point we start needing the real root we
11566 * will have to build a stump root for the root we are
11567 * in, but for now this doesn't actually use the root so
11568 * just pass in extent_root.
11570 tmp = read_tree_block(fs_info->extent_root, bytenr,
11571 nodesize, 0);
11572 if (!extent_buffer_uptodate(tmp)) {
11573 fprintf(stderr, "Error reading root block\n");
11574 return -EIO;
11576 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11577 free_extent_buffer(tmp);
11578 if (ret)
11579 return ret;
11580 } else {
11581 bytenr = btrfs_node_blockptr(eb, i);
11583 /* If we aren't the tree root don't read the block */
11584 if (level == 1 && !tree_root) {
11585 btrfs_pin_extent(fs_info, bytenr, nodesize);
11586 continue;
11589 tmp = read_tree_block(fs_info->extent_root, bytenr,
11590 nodesize, 0);
11591 if (!extent_buffer_uptodate(tmp)) {
11592 fprintf(stderr, "Error reading tree block\n");
11593 return -EIO;
11595 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11596 free_extent_buffer(tmp);
11597 if (ret)
11598 return ret;
11602 return 0;
11605 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11607 int ret;
11609 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11610 if (ret)
11611 return ret;
11613 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11616 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11618 struct btrfs_block_group_cache *cache;
11619 struct btrfs_path path;
11620 struct extent_buffer *leaf;
11621 struct btrfs_chunk *chunk;
11622 struct btrfs_key key;
11623 int ret;
11624 u64 start;
11626 btrfs_init_path(&path);
11627 key.objectid = 0;
11628 key.type = BTRFS_CHUNK_ITEM_KEY;
11629 key.offset = 0;
11630 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11631 if (ret < 0) {
11632 btrfs_release_path(&path);
11633 return ret;
11637 * We do this in case the block groups were screwed up and had alloc
11638 * bits that aren't actually set on the chunks. This happens with
11639 * restored images every time and could happen in real life I guess.
11641 fs_info->avail_data_alloc_bits = 0;
11642 fs_info->avail_metadata_alloc_bits = 0;
11643 fs_info->avail_system_alloc_bits = 0;
11645 /* First we need to create the in-memory block groups */
11646 while (1) {
11647 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11648 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11649 if (ret < 0) {
11650 btrfs_release_path(&path);
11651 return ret;
11653 if (ret) {
11654 ret = 0;
11655 break;
11658 leaf = path.nodes[0];
11659 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11660 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11661 path.slots[0]++;
11662 continue;
11665 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11666 btrfs_add_block_group(fs_info, 0,
11667 btrfs_chunk_type(leaf, chunk),
11668 key.objectid, key.offset,
11669 btrfs_chunk_length(leaf, chunk));
11670 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11671 key.offset + btrfs_chunk_length(leaf, chunk));
11672 path.slots[0]++;
11674 start = 0;
11675 while (1) {
11676 cache = btrfs_lookup_first_block_group(fs_info, start);
11677 if (!cache)
11678 break;
11679 cache->cached = 1;
11680 start = cache->key.objectid + cache->key.offset;
11683 btrfs_release_path(&path);
11684 return 0;
11687 static int reset_balance(struct btrfs_trans_handle *trans,
11688 struct btrfs_fs_info *fs_info)
11690 struct btrfs_root *root = fs_info->tree_root;
11691 struct btrfs_path path;
11692 struct extent_buffer *leaf;
11693 struct btrfs_key key;
11694 int del_slot, del_nr = 0;
11695 int ret;
11696 int found = 0;
11698 btrfs_init_path(&path);
11699 key.objectid = BTRFS_BALANCE_OBJECTID;
11700 key.type = BTRFS_BALANCE_ITEM_KEY;
11701 key.offset = 0;
11702 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11703 if (ret) {
11704 if (ret > 0)
11705 ret = 0;
11706 if (!ret)
11707 goto reinit_data_reloc;
11708 else
11709 goto out;
11712 ret = btrfs_del_item(trans, root, &path);
11713 if (ret)
11714 goto out;
11715 btrfs_release_path(&path);
11717 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11718 key.type = BTRFS_ROOT_ITEM_KEY;
11719 key.offset = 0;
11720 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11721 if (ret < 0)
11722 goto out;
11723 while (1) {
11724 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11725 if (!found)
11726 break;
11728 if (del_nr) {
11729 ret = btrfs_del_items(trans, root, &path,
11730 del_slot, del_nr);
11731 del_nr = 0;
11732 if (ret)
11733 goto out;
11735 key.offset++;
11736 btrfs_release_path(&path);
11738 found = 0;
11739 ret = btrfs_search_slot(trans, root, &key, &path,
11740 -1, 1);
11741 if (ret < 0)
11742 goto out;
11743 continue;
11745 found = 1;
11746 leaf = path.nodes[0];
11747 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11748 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11749 break;
11750 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11751 path.slots[0]++;
11752 continue;
11754 if (!del_nr) {
11755 del_slot = path.slots[0];
11756 del_nr = 1;
11757 } else {
11758 del_nr++;
11760 path.slots[0]++;
11763 if (del_nr) {
11764 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11765 if (ret)
11766 goto out;
11768 btrfs_release_path(&path);
11770 reinit_data_reloc:
11771 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11772 key.type = BTRFS_ROOT_ITEM_KEY;
11773 key.offset = (u64)-1;
11774 root = btrfs_read_fs_root(fs_info, &key);
11775 if (IS_ERR(root)) {
11776 fprintf(stderr, "Error reading data reloc tree\n");
11777 ret = PTR_ERR(root);
11778 goto out;
11780 record_root_in_trans(trans, root);
11781 ret = btrfs_fsck_reinit_root(trans, root, 0);
11782 if (ret)
11783 goto out;
11784 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11785 out:
11786 btrfs_release_path(&path);
11787 return ret;
11790 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11791 struct btrfs_fs_info *fs_info)
11793 u64 start = 0;
11794 int ret;
11797 * The only reason we don't do this is because right now we're just
11798 * walking the trees we find and pinning down their bytes, we don't look
11799 * at any of the leaves. In order to do mixed groups we'd have to check
11800 * the leaves of any fs roots and pin down the bytes for any file
11801 * extents we find. Not hard but why do it if we don't have to?
11803 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11804 fprintf(stderr, "We don't support re-initing the extent tree "
11805 "for mixed block groups yet, please notify a btrfs "
11806 "developer you want to do this so they can add this "
11807 "functionality.\n");
11808 return -EINVAL;
11812 * first we need to walk all of the trees except the extent tree and pin
11813 * down the bytes that are in use so we don't overwrite any existing
11814 * metadata.
11816 ret = pin_metadata_blocks(fs_info);
11817 if (ret) {
11818 fprintf(stderr, "error pinning down used bytes\n");
11819 return ret;
11823 * Need to drop all the block groups since we're going to recreate all
11824 * of them again.
11826 btrfs_free_block_groups(fs_info);
11827 ret = reset_block_groups(fs_info);
11828 if (ret) {
11829 fprintf(stderr, "error resetting the block groups\n");
11830 return ret;
11833 /* Ok we can allocate now, reinit the extent root */
11834 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11835 if (ret) {
11836 fprintf(stderr, "extent root initialization failed\n");
11838 * When the transaction code is updated we should end the
11839 * transaction, but for now progs only knows about commit so
11840 * just return an error.
11842 return ret;
11846 * Now we have all the in-memory block groups setup so we can make
11847 * allocations properly, and the metadata we care about is safe since we
11848 * pinned all of it above.
11850 while (1) {
11851 struct btrfs_block_group_cache *cache;
11853 cache = btrfs_lookup_first_block_group(fs_info, start);
11854 if (!cache)
11855 break;
11856 start = cache->key.objectid + cache->key.offset;
11857 ret = btrfs_insert_item(trans, fs_info->extent_root,
11858 &cache->key, &cache->item,
11859 sizeof(cache->item));
11860 if (ret) {
11861 fprintf(stderr, "Error adding block group\n");
11862 return ret;
11864 btrfs_extent_post_op(trans, fs_info->extent_root);
11867 ret = reset_balance(trans, fs_info);
11868 if (ret)
11869 fprintf(stderr, "error resetting the pending balance\n");
11871 return ret;
11874 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11876 struct btrfs_path path;
11877 struct btrfs_trans_handle *trans;
11878 struct btrfs_key key;
11879 int ret;
11881 printf("Recowing metadata block %llu\n", eb->start);
11882 key.objectid = btrfs_header_owner(eb);
11883 key.type = BTRFS_ROOT_ITEM_KEY;
11884 key.offset = (u64)-1;
11886 root = btrfs_read_fs_root(root->fs_info, &key);
11887 if (IS_ERR(root)) {
11888 fprintf(stderr, "Couldn't find owner root %llu\n",
11889 key.objectid);
11890 return PTR_ERR(root);
11893 trans = btrfs_start_transaction(root, 1);
11894 if (IS_ERR(trans))
11895 return PTR_ERR(trans);
11897 btrfs_init_path(&path);
11898 path.lowest_level = btrfs_header_level(eb);
11899 if (path.lowest_level)
11900 btrfs_node_key_to_cpu(eb, &key, 0);
11901 else
11902 btrfs_item_key_to_cpu(eb, &key, 0);
11904 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11905 btrfs_commit_transaction(trans, root);
11906 btrfs_release_path(&path);
11907 return ret;
11910 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11912 struct btrfs_path path;
11913 struct btrfs_trans_handle *trans;
11914 struct btrfs_key key;
11915 int ret;
11917 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11918 bad->key.type, bad->key.offset);
11919 key.objectid = bad->root_id;
11920 key.type = BTRFS_ROOT_ITEM_KEY;
11921 key.offset = (u64)-1;
11923 root = btrfs_read_fs_root(root->fs_info, &key);
11924 if (IS_ERR(root)) {
11925 fprintf(stderr, "Couldn't find owner root %llu\n",
11926 key.objectid);
11927 return PTR_ERR(root);
11930 trans = btrfs_start_transaction(root, 1);
11931 if (IS_ERR(trans))
11932 return PTR_ERR(trans);
11934 btrfs_init_path(&path);
11935 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11936 if (ret) {
11937 if (ret > 0)
11938 ret = 0;
11939 goto out;
11941 ret = btrfs_del_item(trans, root, &path);
11942 out:
11943 btrfs_commit_transaction(trans, root);
11944 btrfs_release_path(&path);
11945 return ret;
11948 static int zero_log_tree(struct btrfs_root *root)
11950 struct btrfs_trans_handle *trans;
11951 int ret;
11953 trans = btrfs_start_transaction(root, 1);
11954 if (IS_ERR(trans)) {
11955 ret = PTR_ERR(trans);
11956 return ret;
11958 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11959 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11960 ret = btrfs_commit_transaction(trans, root);
11961 return ret;
11964 static int populate_csum(struct btrfs_trans_handle *trans,
11965 struct btrfs_root *csum_root, char *buf, u64 start,
11966 u64 len)
11968 u64 offset = 0;
11969 u64 sectorsize;
11970 int ret = 0;
11972 while (offset < len) {
11973 sectorsize = csum_root->sectorsize;
11974 ret = read_extent_data(csum_root, buf, start + offset,
11975 &sectorsize, 0);
11976 if (ret)
11977 break;
11978 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11979 start + offset, buf, sectorsize);
11980 if (ret)
11981 break;
11982 offset += sectorsize;
11984 return ret;
11987 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11988 struct btrfs_root *csum_root,
11989 struct btrfs_root *cur_root)
11991 struct btrfs_path path;
11992 struct btrfs_key key;
11993 struct extent_buffer *node;
11994 struct btrfs_file_extent_item *fi;
11995 char *buf = NULL;
11996 u64 start = 0;
11997 u64 len = 0;
11998 int slot = 0;
11999 int ret = 0;
12001 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12002 if (!buf)
12003 return -ENOMEM;
12005 btrfs_init_path(&path);
12006 key.objectid = 0;
12007 key.offset = 0;
12008 key.type = 0;
12009 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12010 if (ret < 0)
12011 goto out;
12012 /* Iterate all regular file extents and fill its csum */
12013 while (1) {
12014 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12016 if (key.type != BTRFS_EXTENT_DATA_KEY)
12017 goto next;
12018 node = path.nodes[0];
12019 slot = path.slots[0];
12020 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12021 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12022 goto next;
12023 start = btrfs_file_extent_disk_bytenr(node, fi);
12024 len = btrfs_file_extent_disk_num_bytes(node, fi);
12026 ret = populate_csum(trans, csum_root, buf, start, len);
12027 if (ret == -EEXIST)
12028 ret = 0;
12029 if (ret < 0)
12030 goto out;
12031 next:
12033 * TODO: if next leaf is corrupted, jump to nearest next valid
12034 * leaf.
12036 ret = btrfs_next_item(cur_root, &path);
12037 if (ret < 0)
12038 goto out;
12039 if (ret > 0) {
12040 ret = 0;
12041 goto out;
12045 out:
12046 btrfs_release_path(&path);
12047 free(buf);
12048 return ret;
12051 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12052 struct btrfs_root *csum_root)
12054 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12055 struct btrfs_path path;
12056 struct btrfs_root *tree_root = fs_info->tree_root;
12057 struct btrfs_root *cur_root;
12058 struct extent_buffer *node;
12059 struct btrfs_key key;
12060 int slot = 0;
12061 int ret = 0;
12063 btrfs_init_path(&path);
12064 key.objectid = BTRFS_FS_TREE_OBJECTID;
12065 key.offset = 0;
12066 key.type = BTRFS_ROOT_ITEM_KEY;
12067 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12068 if (ret < 0)
12069 goto out;
12070 if (ret > 0) {
12071 ret = -ENOENT;
12072 goto out;
12075 while (1) {
12076 node = path.nodes[0];
12077 slot = path.slots[0];
12078 btrfs_item_key_to_cpu(node, &key, slot);
12079 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12080 goto out;
12081 if (key.type != BTRFS_ROOT_ITEM_KEY)
12082 goto next;
12083 if (!is_fstree(key.objectid))
12084 goto next;
12085 key.offset = (u64)-1;
12087 cur_root = btrfs_read_fs_root(fs_info, &key);
12088 if (IS_ERR(cur_root) || !cur_root) {
12089 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12090 key.objectid);
12091 goto out;
12093 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12094 cur_root);
12095 if (ret < 0)
12096 goto out;
12097 next:
12098 ret = btrfs_next_item(tree_root, &path);
12099 if (ret > 0) {
12100 ret = 0;
12101 goto out;
12103 if (ret < 0)
12104 goto out;
12107 out:
12108 btrfs_release_path(&path);
12109 return ret;
12112 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12113 struct btrfs_root *csum_root)
12115 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12116 struct btrfs_path path;
12117 struct btrfs_extent_item *ei;
12118 struct extent_buffer *leaf;
12119 char *buf;
12120 struct btrfs_key key;
12121 int ret;
12123 btrfs_init_path(&path);
12124 key.objectid = 0;
12125 key.type = BTRFS_EXTENT_ITEM_KEY;
12126 key.offset = 0;
12127 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12128 if (ret < 0) {
12129 btrfs_release_path(&path);
12130 return ret;
12133 buf = malloc(csum_root->sectorsize);
12134 if (!buf) {
12135 btrfs_release_path(&path);
12136 return -ENOMEM;
12139 while (1) {
12140 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12141 ret = btrfs_next_leaf(extent_root, &path);
12142 if (ret < 0)
12143 break;
12144 if (ret) {
12145 ret = 0;
12146 break;
12149 leaf = path.nodes[0];
12151 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12152 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12153 path.slots[0]++;
12154 continue;
12157 ei = btrfs_item_ptr(leaf, path.slots[0],
12158 struct btrfs_extent_item);
12159 if (!(btrfs_extent_flags(leaf, ei) &
12160 BTRFS_EXTENT_FLAG_DATA)) {
12161 path.slots[0]++;
12162 continue;
12165 ret = populate_csum(trans, csum_root, buf, key.objectid,
12166 key.offset);
12167 if (ret)
12168 break;
12169 path.slots[0]++;
12172 btrfs_release_path(&path);
12173 free(buf);
12174 return ret;
12178 * Recalculate the csum and put it into the csum tree.
12180 * Extent tree init will wipe out all the extent info, so in that case, we
12181 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12182 * will use fs/subvol trees to init the csum tree.
12184 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12185 struct btrfs_root *csum_root,
12186 int search_fs_tree)
12188 if (search_fs_tree)
12189 return fill_csum_tree_from_fs(trans, csum_root);
12190 else
12191 return fill_csum_tree_from_extent(trans, csum_root);
12194 static void free_roots_info_cache(void)
12196 if (!roots_info_cache)
12197 return;
12199 while (!cache_tree_empty(roots_info_cache)) {
12200 struct cache_extent *entry;
12201 struct root_item_info *rii;
12203 entry = first_cache_extent(roots_info_cache);
12204 if (!entry)
12205 break;
12206 remove_cache_extent(roots_info_cache, entry);
12207 rii = container_of(entry, struct root_item_info, cache_extent);
12208 free(rii);
12211 free(roots_info_cache);
12212 roots_info_cache = NULL;
12215 static int build_roots_info_cache(struct btrfs_fs_info *info)
12217 int ret = 0;
12218 struct btrfs_key key;
12219 struct extent_buffer *leaf;
12220 struct btrfs_path path;
12222 if (!roots_info_cache) {
12223 roots_info_cache = malloc(sizeof(*roots_info_cache));
12224 if (!roots_info_cache)
12225 return -ENOMEM;
12226 cache_tree_init(roots_info_cache);
12229 btrfs_init_path(&path);
12230 key.objectid = 0;
12231 key.type = BTRFS_EXTENT_ITEM_KEY;
12232 key.offset = 0;
12233 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12234 if (ret < 0)
12235 goto out;
12236 leaf = path.nodes[0];
12238 while (1) {
12239 struct btrfs_key found_key;
12240 struct btrfs_extent_item *ei;
12241 struct btrfs_extent_inline_ref *iref;
12242 int slot = path.slots[0];
12243 int type;
12244 u64 flags;
12245 u64 root_id;
12246 u8 level;
12247 struct cache_extent *entry;
12248 struct root_item_info *rii;
12250 if (slot >= btrfs_header_nritems(leaf)) {
12251 ret = btrfs_next_leaf(info->extent_root, &path);
12252 if (ret < 0) {
12253 break;
12254 } else if (ret) {
12255 ret = 0;
12256 break;
12258 leaf = path.nodes[0];
12259 slot = path.slots[0];
12262 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12264 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12265 found_key.type != BTRFS_METADATA_ITEM_KEY)
12266 goto next;
12268 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12269 flags = btrfs_extent_flags(leaf, ei);
12271 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12272 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12273 goto next;
12275 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12276 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12277 level = found_key.offset;
12278 } else {
12279 struct btrfs_tree_block_info *binfo;
12281 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12282 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12283 level = btrfs_tree_block_level(leaf, binfo);
12287 * For a root extent, it must be of the following type and the
12288 * first (and only one) iref in the item.
12290 type = btrfs_extent_inline_ref_type(leaf, iref);
12291 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12292 goto next;
12294 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12295 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12296 if (!entry) {
12297 rii = malloc(sizeof(struct root_item_info));
12298 if (!rii) {
12299 ret = -ENOMEM;
12300 goto out;
12302 rii->cache_extent.start = root_id;
12303 rii->cache_extent.size = 1;
12304 rii->level = (u8)-1;
12305 entry = &rii->cache_extent;
12306 ret = insert_cache_extent(roots_info_cache, entry);
12307 ASSERT(ret == 0);
12308 } else {
12309 rii = container_of(entry, struct root_item_info,
12310 cache_extent);
12313 ASSERT(rii->cache_extent.start == root_id);
12314 ASSERT(rii->cache_extent.size == 1);
12316 if (level > rii->level || rii->level == (u8)-1) {
12317 rii->level = level;
12318 rii->bytenr = found_key.objectid;
12319 rii->gen = btrfs_extent_generation(leaf, ei);
12320 rii->node_count = 1;
12321 } else if (level == rii->level) {
12322 rii->node_count++;
12324 next:
12325 path.slots[0]++;
12328 out:
12329 btrfs_release_path(&path);
12331 return ret;
12334 static int maybe_repair_root_item(struct btrfs_path *path,
12335 const struct btrfs_key *root_key,
12336 const int read_only_mode)
12338 const u64 root_id = root_key->objectid;
12339 struct cache_extent *entry;
12340 struct root_item_info *rii;
12341 struct btrfs_root_item ri;
12342 unsigned long offset;
12344 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12345 if (!entry) {
12346 fprintf(stderr,
12347 "Error: could not find extent items for root %llu\n",
12348 root_key->objectid);
12349 return -ENOENT;
12352 rii = container_of(entry, struct root_item_info, cache_extent);
12353 ASSERT(rii->cache_extent.start == root_id);
12354 ASSERT(rii->cache_extent.size == 1);
12356 if (rii->node_count != 1) {
12357 fprintf(stderr,
12358 "Error: could not find btree root extent for root %llu\n",
12359 root_id);
12360 return -ENOENT;
12363 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12364 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12366 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12367 btrfs_root_level(&ri) != rii->level ||
12368 btrfs_root_generation(&ri) != rii->gen) {
12371 * If we're in repair mode but our caller told us to not update
12372 * the root item, i.e. just check if it needs to be updated, don't
12373 * print this message, since the caller will call us again shortly
12374 * for the same root item without read only mode (the caller will
12375 * open a transaction first).
12377 if (!(read_only_mode && repair))
12378 fprintf(stderr,
12379 "%sroot item for root %llu,"
12380 " current bytenr %llu, current gen %llu, current level %u,"
12381 " new bytenr %llu, new gen %llu, new level %u\n",
12382 (read_only_mode ? "" : "fixing "),
12383 root_id,
12384 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12385 btrfs_root_level(&ri),
12386 rii->bytenr, rii->gen, rii->level);
12388 if (btrfs_root_generation(&ri) > rii->gen) {
12389 fprintf(stderr,
12390 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12391 root_id, btrfs_root_generation(&ri), rii->gen);
12392 return -EINVAL;
12395 if (!read_only_mode) {
12396 btrfs_set_root_bytenr(&ri, rii->bytenr);
12397 btrfs_set_root_level(&ri, rii->level);
12398 btrfs_set_root_generation(&ri, rii->gen);
12399 write_extent_buffer(path->nodes[0], &ri,
12400 offset, sizeof(ri));
12403 return 1;
12406 return 0;
12410 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12411 * caused read-only snapshots to be corrupted if they were created at a moment
12412 * when the source subvolume/snapshot had orphan items. The issue was that the
12413 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12414 * node instead of the post orphan cleanup root node.
12415 * So this function, and its callees, just detects and fixes those cases. Even
12416 * though the regression was for read-only snapshots, this function applies to
12417 * any snapshot/subvolume root.
12418 * This must be run before any other repair code - not doing it so, makes other
12419 * repair code delete or modify backrefs in the extent tree for example, which
12420 * will result in an inconsistent fs after repairing the root items.
12422 static int repair_root_items(struct btrfs_fs_info *info)
12424 struct btrfs_path path;
12425 struct btrfs_key key;
12426 struct extent_buffer *leaf;
12427 struct btrfs_trans_handle *trans = NULL;
12428 int ret = 0;
12429 int bad_roots = 0;
12430 int need_trans = 0;
12432 btrfs_init_path(&path);
12434 ret = build_roots_info_cache(info);
12435 if (ret)
12436 goto out;
12438 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12439 key.type = BTRFS_ROOT_ITEM_KEY;
12440 key.offset = 0;
12442 again:
12444 * Avoid opening and committing transactions if a leaf doesn't have
12445 * any root items that need to be fixed, so that we avoid rotating
12446 * backup roots unnecessarily.
12448 if (need_trans) {
12449 trans = btrfs_start_transaction(info->tree_root, 1);
12450 if (IS_ERR(trans)) {
12451 ret = PTR_ERR(trans);
12452 goto out;
12456 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12457 0, trans ? 1 : 0);
12458 if (ret < 0)
12459 goto out;
12460 leaf = path.nodes[0];
12462 while (1) {
12463 struct btrfs_key found_key;
12465 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12466 int no_more_keys = find_next_key(&path, &key);
12468 btrfs_release_path(&path);
12469 if (trans) {
12470 ret = btrfs_commit_transaction(trans,
12471 info->tree_root);
12472 trans = NULL;
12473 if (ret < 0)
12474 goto out;
12476 need_trans = 0;
12477 if (no_more_keys)
12478 break;
12479 goto again;
12482 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12484 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12485 goto next;
12486 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12487 goto next;
12489 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12490 if (ret < 0)
12491 goto out;
12492 if (ret) {
12493 if (!trans && repair) {
12494 need_trans = 1;
12495 key = found_key;
12496 btrfs_release_path(&path);
12497 goto again;
12499 bad_roots++;
12501 next:
12502 path.slots[0]++;
12504 ret = 0;
12505 out:
12506 free_roots_info_cache();
12507 btrfs_release_path(&path);
12508 if (trans)
12509 btrfs_commit_transaction(trans, info->tree_root);
12510 if (ret < 0)
12511 return ret;
12513 return bad_roots;
12516 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12518 struct btrfs_trans_handle *trans;
12519 struct btrfs_block_group_cache *bg_cache;
12520 u64 current = 0;
12521 int ret = 0;
12523 /* Clear all free space cache inodes and its extent data */
12524 while (1) {
12525 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12526 if (!bg_cache)
12527 break;
12528 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12529 if (ret < 0)
12530 return ret;
12531 current = bg_cache->key.objectid + bg_cache->key.offset;
12534 /* Don't forget to set cache_generation to -1 */
12535 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12536 if (IS_ERR(trans)) {
12537 error("failed to update super block cache generation");
12538 return PTR_ERR(trans);
12540 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12541 btrfs_commit_transaction(trans, fs_info->tree_root);
12543 return ret;
12546 const char * const cmd_check_usage[] = {
12547 "btrfs check [options] <device>",
12548 "Check structural integrity of a filesystem (unmounted).",
12549 "Check structural integrity of an unmounted filesystem. Verify internal",
12550 "trees' consistency and item connectivity. In the repair mode try to",
12551 "fix the problems found. ",
12552 "WARNING: the repair mode is considered dangerous",
12554 "-s|--super <superblock> use this superblock copy",
12555 "-b|--backup use the first valid backup root copy",
12556 "--repair try to repair the filesystem",
12557 "--readonly run in read-only mode (default)",
12558 "--init-csum-tree create a new CRC tree",
12559 "--init-extent-tree create a new extent tree",
12560 "--mode <MODE> allows choice of memory/IO trade-offs",
12561 " where MODE is one of:",
12562 " original - read inodes and extents to memory (requires",
12563 " more memory, does less IO)",
12564 " lowmem - try to use less memory but read blocks again",
12565 " when needed",
12566 "--check-data-csum verify checksums of data blocks",
12567 "-Q|--qgroup-report print a report on qgroup consistency",
12568 "-E|--subvol-extents <subvolid>",
12569 " print subvolume extents and sharing state",
12570 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12571 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12572 "-p|--progress indicate progress",
12573 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12574 NULL
12577 int cmd_check(int argc, char **argv)
12579 struct cache_tree root_cache;
12580 struct btrfs_root *root;
12581 struct btrfs_fs_info *info;
12582 u64 bytenr = 0;
12583 u64 subvolid = 0;
12584 u64 tree_root_bytenr = 0;
12585 u64 chunk_root_bytenr = 0;
12586 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12587 int ret;
12588 int err = 0;
12589 u64 num;
12590 int init_csum_tree = 0;
12591 int readonly = 0;
12592 int clear_space_cache = 0;
12593 int qgroup_report = 0;
12594 int qgroups_repaired = 0;
12595 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12597 while(1) {
12598 int c;
12599 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12600 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12601 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12602 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12603 static const struct option long_options[] = {
12604 { "super", required_argument, NULL, 's' },
12605 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12606 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12607 { "init-csum-tree", no_argument, NULL,
12608 GETOPT_VAL_INIT_CSUM },
12609 { "init-extent-tree", no_argument, NULL,
12610 GETOPT_VAL_INIT_EXTENT },
12611 { "check-data-csum", no_argument, NULL,
12612 GETOPT_VAL_CHECK_CSUM },
12613 { "backup", no_argument, NULL, 'b' },
12614 { "subvol-extents", required_argument, NULL, 'E' },
12615 { "qgroup-report", no_argument, NULL, 'Q' },
12616 { "tree-root", required_argument, NULL, 'r' },
12617 { "chunk-root", required_argument, NULL,
12618 GETOPT_VAL_CHUNK_TREE },
12619 { "progress", no_argument, NULL, 'p' },
12620 { "mode", required_argument, NULL,
12621 GETOPT_VAL_MODE },
12622 { "clear-space-cache", required_argument, NULL,
12623 GETOPT_VAL_CLEAR_SPACE_CACHE},
12624 { NULL, 0, NULL, 0}
12627 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12628 if (c < 0)
12629 break;
12630 switch(c) {
12631 case 'a': /* ignored */ break;
12632 case 'b':
12633 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12634 break;
12635 case 's':
12636 num = arg_strtou64(optarg);
12637 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12638 error(
12639 "super mirror should be less than %d",
12640 BTRFS_SUPER_MIRROR_MAX);
12641 exit(1);
12643 bytenr = btrfs_sb_offset(((int)num));
12644 printf("using SB copy %llu, bytenr %llu\n", num,
12645 (unsigned long long)bytenr);
12646 break;
12647 case 'Q':
12648 qgroup_report = 1;
12649 break;
12650 case 'E':
12651 subvolid = arg_strtou64(optarg);
12652 break;
12653 case 'r':
12654 tree_root_bytenr = arg_strtou64(optarg);
12655 break;
12656 case GETOPT_VAL_CHUNK_TREE:
12657 chunk_root_bytenr = arg_strtou64(optarg);
12658 break;
12659 case 'p':
12660 ctx.progress_enabled = true;
12661 break;
12662 case '?':
12663 case 'h':
12664 usage(cmd_check_usage);
12665 case GETOPT_VAL_REPAIR:
12666 printf("enabling repair mode\n");
12667 repair = 1;
12668 ctree_flags |= OPEN_CTREE_WRITES;
12669 break;
12670 case GETOPT_VAL_READONLY:
12671 readonly = 1;
12672 break;
12673 case GETOPT_VAL_INIT_CSUM:
12674 printf("Creating a new CRC tree\n");
12675 init_csum_tree = 1;
12676 repair = 1;
12677 ctree_flags |= OPEN_CTREE_WRITES;
12678 break;
12679 case GETOPT_VAL_INIT_EXTENT:
12680 init_extent_tree = 1;
12681 ctree_flags |= (OPEN_CTREE_WRITES |
12682 OPEN_CTREE_NO_BLOCK_GROUPS);
12683 repair = 1;
12684 break;
12685 case GETOPT_VAL_CHECK_CSUM:
12686 check_data_csum = 1;
12687 break;
12688 case GETOPT_VAL_MODE:
12689 check_mode = parse_check_mode(optarg);
12690 if (check_mode == CHECK_MODE_UNKNOWN) {
12691 error("unknown mode: %s", optarg);
12692 exit(1);
12694 break;
12695 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12696 if (strcmp(optarg, "v1") == 0) {
12697 clear_space_cache = 1;
12698 } else if (strcmp(optarg, "v2") == 0) {
12699 clear_space_cache = 2;
12700 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12701 } else {
12702 error(
12703 "invalid argument to --clear-space-cache, must be v1 or v2");
12704 exit(1);
12706 ctree_flags |= OPEN_CTREE_WRITES;
12707 break;
12711 if (check_argc_exact(argc - optind, 1))
12712 usage(cmd_check_usage);
12714 if (ctx.progress_enabled) {
12715 ctx.tp = TASK_NOTHING;
12716 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12719 /* This check is the only reason for --readonly to exist */
12720 if (readonly && repair) {
12721 error("repair options are not compatible with --readonly");
12722 exit(1);
12726 * Not supported yet
12728 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12729 error("low memory mode doesn't support repair yet");
12730 exit(1);
12733 radix_tree_init();
12734 cache_tree_init(&root_cache);
12736 if((ret = check_mounted(argv[optind])) < 0) {
12737 error("could not check mount status: %s", strerror(-ret));
12738 err |= !!ret;
12739 goto err_out;
12740 } else if(ret) {
12741 error("%s is currently mounted, aborting", argv[optind]);
12742 ret = -EBUSY;
12743 err |= !!ret;
12744 goto err_out;
12747 /* only allow partial opening under repair mode */
12748 if (repair)
12749 ctree_flags |= OPEN_CTREE_PARTIAL;
12751 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12752 chunk_root_bytenr, ctree_flags);
12753 if (!info) {
12754 error("cannot open file system");
12755 ret = -EIO;
12756 err |= !!ret;
12757 goto err_out;
12760 global_info = info;
12761 root = info->fs_root;
12762 if (clear_space_cache == 1) {
12763 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12764 error(
12765 "free space cache v2 detected, use --clear-space-cache v2");
12766 ret = 1;
12767 goto close_out;
12769 printf("Clearing free space cache\n");
12770 ret = clear_free_space_cache(info);
12771 if (ret) {
12772 error("failed to clear free space cache");
12773 ret = 1;
12774 } else {
12775 printf("Free space cache cleared\n");
12777 goto close_out;
12778 } else if (clear_space_cache == 2) {
12779 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12780 printf("no free space cache v2 to clear\n");
12781 ret = 0;
12782 goto close_out;
12784 printf("Clear free space cache v2\n");
12785 ret = btrfs_clear_free_space_tree(info);
12786 if (ret) {
12787 error("failed to clear free space cache v2: %d", ret);
12788 ret = 1;
12789 } else {
12790 printf("free space cache v2 cleared\n");
12792 goto close_out;
12796 * repair mode will force us to commit transaction which
12797 * will make us fail to load log tree when mounting.
12799 if (repair && btrfs_super_log_root(info->super_copy)) {
12800 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12801 if (!ret) {
12802 ret = 1;
12803 err |= !!ret;
12804 goto close_out;
12806 ret = zero_log_tree(root);
12807 err |= !!ret;
12808 if (ret) {
12809 error("failed to zero log tree: %d", ret);
12810 goto close_out;
12814 uuid_unparse(info->super_copy->fsid, uuidbuf);
12815 if (qgroup_report) {
12816 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12817 uuidbuf);
12818 ret = qgroup_verify_all(info);
12819 err |= !!ret;
12820 if (ret == 0)
12821 report_qgroups(1);
12822 goto close_out;
12824 if (subvolid) {
12825 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12826 subvolid, argv[optind], uuidbuf);
12827 ret = print_extent_state(info, subvolid);
12828 err |= !!ret;
12829 goto close_out;
12831 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12833 if (!extent_buffer_uptodate(info->tree_root->node) ||
12834 !extent_buffer_uptodate(info->dev_root->node) ||
12835 !extent_buffer_uptodate(info->chunk_root->node)) {
12836 error("critical roots corrupted, unable to check the filesystem");
12837 err |= !!ret;
12838 ret = -EIO;
12839 goto close_out;
12842 if (init_extent_tree || init_csum_tree) {
12843 struct btrfs_trans_handle *trans;
12845 trans = btrfs_start_transaction(info->extent_root, 0);
12846 if (IS_ERR(trans)) {
12847 error("error starting transaction");
12848 ret = PTR_ERR(trans);
12849 err |= !!ret;
12850 goto close_out;
12853 if (init_extent_tree) {
12854 printf("Creating a new extent tree\n");
12855 ret = reinit_extent_tree(trans, info);
12856 err |= !!ret;
12857 if (ret)
12858 goto close_out;
12861 if (init_csum_tree) {
12862 printf("Reinitialize checksum tree\n");
12863 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12864 if (ret) {
12865 error("checksum tree initialization failed: %d",
12866 ret);
12867 ret = -EIO;
12868 err |= !!ret;
12869 goto close_out;
12872 ret = fill_csum_tree(trans, info->csum_root,
12873 init_extent_tree);
12874 err |= !!ret;
12875 if (ret) {
12876 error("checksum tree refilling failed: %d", ret);
12877 return -EIO;
12881 * Ok now we commit and run the normal fsck, which will add
12882 * extent entries for all of the items it finds.
12884 ret = btrfs_commit_transaction(trans, info->extent_root);
12885 err |= !!ret;
12886 if (ret)
12887 goto close_out;
12889 if (!extent_buffer_uptodate(info->extent_root->node)) {
12890 error("critical: extent_root, unable to check the filesystem");
12891 ret = -EIO;
12892 err |= !!ret;
12893 goto close_out;
12895 if (!extent_buffer_uptodate(info->csum_root->node)) {
12896 error("critical: csum_root, unable to check the filesystem");
12897 ret = -EIO;
12898 err |= !!ret;
12899 goto close_out;
12902 if (!ctx.progress_enabled)
12903 fprintf(stderr, "checking extents\n");
12904 if (check_mode == CHECK_MODE_LOWMEM)
12905 ret = check_chunks_and_extents_v2(root);
12906 else
12907 ret = check_chunks_and_extents(root);
12908 err |= !!ret;
12909 if (ret)
12910 error(
12911 "errors found in extent allocation tree or chunk allocation");
12913 ret = repair_root_items(info);
12914 err |= !!ret;
12915 if (ret < 0)
12916 goto close_out;
12917 if (repair) {
12918 fprintf(stderr, "Fixed %d roots.\n", ret);
12919 ret = 0;
12920 } else if (ret > 0) {
12921 fprintf(stderr,
12922 "Found %d roots with an outdated root item.\n",
12923 ret);
12924 fprintf(stderr,
12925 "Please run a filesystem check with the option --repair to fix them.\n");
12926 ret = 1;
12927 err |= !!ret;
12928 goto close_out;
12931 if (!ctx.progress_enabled) {
12932 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12933 fprintf(stderr, "checking free space tree\n");
12934 else
12935 fprintf(stderr, "checking free space cache\n");
12937 ret = check_space_cache(root);
12938 err |= !!ret;
12939 if (ret)
12940 goto out;
12943 * We used to have to have these hole extents in between our real
12944 * extents so if we don't have this flag set we need to make sure there
12945 * are no gaps in the file extents for inodes, otherwise we can just
12946 * ignore it when this happens.
12948 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12949 if (!ctx.progress_enabled)
12950 fprintf(stderr, "checking fs roots\n");
12951 if (check_mode == CHECK_MODE_LOWMEM)
12952 ret = check_fs_roots_v2(root->fs_info);
12953 else
12954 ret = check_fs_roots(root, &root_cache);
12955 err |= !!ret;
12956 if (ret)
12957 goto out;
12959 fprintf(stderr, "checking csums\n");
12960 ret = check_csums(root);
12961 err |= !!ret;
12962 if (ret)
12963 goto out;
12965 fprintf(stderr, "checking root refs\n");
12966 /* For low memory mode, check_fs_roots_v2 handles root refs */
12967 if (check_mode != CHECK_MODE_LOWMEM) {
12968 ret = check_root_refs(root, &root_cache);
12969 err |= !!ret;
12970 if (ret)
12971 goto out;
12974 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12975 struct extent_buffer *eb;
12977 eb = list_first_entry(&root->fs_info->recow_ebs,
12978 struct extent_buffer, recow);
12979 list_del_init(&eb->recow);
12980 ret = recow_extent_buffer(root, eb);
12981 err |= !!ret;
12982 if (ret)
12983 break;
12986 while (!list_empty(&delete_items)) {
12987 struct bad_item *bad;
12989 bad = list_first_entry(&delete_items, struct bad_item, list);
12990 list_del_init(&bad->list);
12991 if (repair) {
12992 ret = delete_bad_item(root, bad);
12993 err |= !!ret;
12995 free(bad);
12998 if (info->quota_enabled) {
12999 fprintf(stderr, "checking quota groups\n");
13000 ret = qgroup_verify_all(info);
13001 err |= !!ret;
13002 if (ret)
13003 goto out;
13004 report_qgroups(0);
13005 ret = repair_qgroups(info, &qgroups_repaired);
13006 err |= !!ret;
13007 if (err)
13008 goto out;
13009 ret = 0;
13012 if (!list_empty(&root->fs_info->recow_ebs)) {
13013 error("transid errors in file system");
13014 ret = 1;
13015 err |= !!ret;
13017 out:
13018 if (found_old_backref) { /*
13019 * there was a disk format change when mixed
13020 * backref was in testing tree. The old format
13021 * existed about one week.
13023 printf("\n * Found old mixed backref format. "
13024 "The old format is not supported! *"
13025 "\n * Please mount the FS in readonly mode, "
13026 "backup data and re-format the FS. *\n\n");
13027 err |= 1;
13029 printf("found %llu bytes used err is %d\n",
13030 (unsigned long long)bytes_used, ret);
13031 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13032 printf("total tree bytes: %llu\n",
13033 (unsigned long long)total_btree_bytes);
13034 printf("total fs tree bytes: %llu\n",
13035 (unsigned long long)total_fs_tree_bytes);
13036 printf("total extent tree bytes: %llu\n",
13037 (unsigned long long)total_extent_tree_bytes);
13038 printf("btree space waste bytes: %llu\n",
13039 (unsigned long long)btree_space_waste);
13040 printf("file data blocks allocated: %llu\n referenced %llu\n",
13041 (unsigned long long)data_bytes_allocated,
13042 (unsigned long long)data_bytes_referenced);
13044 free_qgroup_counts();
13045 free_root_recs_tree(&root_cache);
13046 close_out:
13047 close_ctree(root);
13048 err_out:
13049 if (ctx.progress_enabled)
13050 task_deinit(ctx.info);
13052 return err;