btrfs-progs: check: lowmem: Fix silent error if first inode item missing
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobc3914531f43254a78c4024b949db8be17161d2c3
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
47 enum task_position {
48 TASK_EXTENTS,
49 TASK_FREE_SPACE,
50 TASK_FS_ROOTS,
51 TASK_NOTHING, /* have to be the last element */
54 struct task_ctx {
55 int progress_enabled;
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
80 CHECK_MODE_ORIGINAL,
81 CHECK_MODE_LOWMEM,
82 CHECK_MODE_UNKNOWN,
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
104 union {
105 u64 parent;
106 u64 root;
108 u64 owner;
109 u64 offset;
110 u64 disk_bytenr;
111 u64 bytes;
112 u64 ram_bytes;
113 u32 num_refs;
114 u32 found_ref;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
148 u64 root;
149 u64 objectid;
150 u64 offset;
151 u64 disk_bytenr;
152 u64 disk_len;
155 struct tree_backref {
156 struct extent_backref node;
157 union {
158 u64 parent;
159 u64 root;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
177 u64 start;
178 u64 max_size;
179 u64 nr;
180 u64 refs;
181 u64 extent_item_refs;
182 u64 generation;
183 u64 parent_generation;
184 u64 info_objectid;
185 u32 num_duplicates;
186 u8 info_level;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
208 u8 filetype;
209 u8 ref_type;
210 int errors;
211 u64 dir;
212 u64 index;
213 u16 namelen;
214 char name[0];
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
224 u64 objectid;
225 u64 bytenr;
226 u64 last_snapshot;
227 u8 level;
228 u8 drop_level;
229 int level_size;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
248 struct rb_node node;
249 u64 start;
250 u64 len;
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
263 int errors;
265 u64 ino;
266 u32 nlink;
267 u32 imode;
268 u64 isize;
269 u64 nbytes;
271 u32 found_link;
272 u64 found_size;
273 u64 extent_start;
274 u64 extent_end;
275 struct rb_root holes;
276 struct list_head orphan_extents;
278 u32 refs;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
304 int errors;
305 u64 ref_root;
306 u64 dir;
307 u64 index;
308 u16 namelen;
309 char name[0];
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
317 struct root_record {
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
321 u64 objectid;
322 u32 found_ref;
325 struct ptr_node {
326 struct cache_extent cache;
327 void *data;
330 struct shared_node {
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
335 u32 refs;
338 struct block_info {
339 u64 start;
340 u32 size;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
346 int active_node;
347 int root_level;
350 struct bad_item {
351 struct btrfs_key key;
352 u64 root_id;
353 struct list_head list;
356 struct extent_entry {
357 u64 bytenr;
358 u64 bytes;
359 int count;
360 int broken;
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
366 u8 level;
367 /* number of nodes at this level, must be 1 for a root */
368 int node_count;
369 u64 bytenr;
370 u64 gen;
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
378 * classification.
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 uint32_t count = 0;
396 static char *task_position_string[] = {
397 "checking extents",
398 "checking free space cache",
399 "checking fs roots",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
405 return NULL;
407 while (1) {
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
410 count++;
411 fflush(stdout);
412 task_period_wait(priv->info);
414 return NULL;
417 static int print_status_return(void *p)
419 printf("\n");
420 fflush(stdout);
422 return 0;
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
443 return (u64)-1;
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446 return hole->start;
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
458 return -1;
459 if (hole1->start < hole2->start)
460 return 1;
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
467 return -1;
468 /* Hole 2 will be merge center */
469 return 1;
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
479 u64 start, u64 len)
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
486 if (!hole)
487 return -ENOMEM;
488 hole->start = start;
489 hole->len = len;
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 node);
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
501 free(prev);
502 prev = NULL;
505 /* iterate merge with next holes */
506 while (1) {
507 if (!rb_next(&hole->node))
508 break;
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 node);
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
514 hole->start;
515 rb_erase(&next->node, holes);
516 free(next);
517 next = NULL;
518 } else
519 break;
521 return 0;
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
527 u64 start;
529 hole = (struct file_extent_hole *)data;
530 start = hole->start;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
534 return -1;
535 if (start >= hole->start && start < hole->start + hole->len)
536 return 0;
537 return 1;
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
546 u64 start, u64 len)
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
550 u64 prev_start = 0;
551 u64 prev_len = 0;
552 u64 next_start = 0;
553 u64 next_len = 0;
554 struct rb_node *node;
555 int have_prev = 0;
556 int have_next = 0;
557 int ret = 0;
559 tmp.start = start;
560 tmp.len = len;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 if (!node)
563 return -EEXIST;
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
566 return -EEXIST;
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
575 have_prev = 1;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
580 have_next = 1;
582 rb_erase(node, holes);
583 free(hole);
584 if (have_prev) {
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
586 if (ret < 0)
587 return ret;
589 if (have_next) {
590 ret = add_file_extent_hole(holes, next_start, next_len);
591 if (ret < 0)
592 return ret;
594 return 0;
597 static int copy_file_extent_holes(struct rb_root *dst,
598 struct rb_root *src)
600 struct file_extent_hole *hole;
601 struct rb_node *node;
602 int ret = 0;
604 node = rb_first(src);
605 while (node) {
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 if (ret)
609 break;
610 node = rb_next(node);
612 return ret;
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
621 while (node) {
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
624 free(hole);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
644 #define S_SHIFT 12
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
667 return -1;
668 else if (rec1->devid < rec2->devid)
669 return 1;
670 else
671 return 0;
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
682 struct rb_node *rb;
683 size_t size;
684 int ret;
686 rec = malloc(sizeof(*rec));
687 if (!rec)
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
690 rec->refs = 1;
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
698 if (!backref) {
699 ret = -ENOMEM;
700 goto cleanup;
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
707 if (!dst_orphan) {
708 ret = -ENOMEM;
709 goto cleanup;
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715 if (ret < 0)
716 goto cleanup_rb;
718 return rec;
720 cleanup_rb:
721 rb = rb_first(&rec->holes);
722 while (rb) {
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
726 rb = rb_next(rb);
727 free(hole);
730 cleanup:
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
734 free(orig);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
740 free(orig);
743 free(rec);
745 return ERR_PTR(ret);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 u64 objectid)
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
754 return;
755 printf("The following data extent is lost in tree %llu:\n",
756 objectid);
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
760 orphan->disk_len);
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
769 if (!errors)
770 return;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
819 int found = 0;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
823 while (node) {
824 found = 1;
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
830 if (!found)
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868 u64 ino, int mod)
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
873 int ret;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
876 if (cache) {
877 node = container_of(cache, struct ptr_node, cache);
878 rec = node->data;
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
882 return node->data;
883 rec->refs--;
884 rec = node->data;
886 } else if (mod) {
887 rec = calloc(1, sizeof(*rec));
888 if (!rec)
889 return ERR_PTR(-ENOMEM);
890 rec->ino = ino;
891 rec->extent_start = (u64)-1;
892 rec->refs = 1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
898 if (!node) {
899 free(rec);
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
904 node->data = rec;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
907 rec->found_link = 1;
909 ret = insert_cache_extent(inode_cache, &node->cache);
910 if (ret)
911 return ERR_PTR(-EEXIST);
913 return rec;
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
924 free(orphan);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
932 if (--rec->refs > 0)
933 return;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
938 free(backref);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
942 free(rec);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949 return 1;
950 return 0;
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
959 u8 filetype;
961 if (!rec->found_inode_item)
962 return;
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
972 free(backref);
977 if (!rec->checked || rec->merging)
978 return;
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1009 free(node);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1018 int ret;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1022 key.offset = ino;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1027 if (ret > 0)
1028 ret = -ENOENT;
1029 return ret;
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043 return 1;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->nodatasum = 1;
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1056 return 0;
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 const char *name,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 break;
1068 if (backref->dir != dir || backref->namelen != namelen)
1069 continue;
1070 if (memcmp(name, backref->name, namelen))
1071 continue;
1072 return backref;
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1076 if (!backref)
1077 return NULL;
1078 memset(backref, 0, sizeof(*backref));
1079 backref->dir = dir;
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1084 return backref;
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1098 BUG_ON(!backref);
1099 if (errors)
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 rec->found_link++;
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 else
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1132 } else {
1133 BUG_ON(1);
1136 maybe_free_inode_rec(inode_cache, rec);
1137 return 0;
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1144 u32 dir_count = 0;
1145 int ret = 0;
1147 dst->merging = 1;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1156 dir_count++;
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180 if (ret < 0)
1181 return ret;
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1191 } else {
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1196 dst->extent_end,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1213 } else {
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1217 dst->merging = 0;
1219 return 0;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1230 int splice = 0;
1231 int ret;
1233 if (--src_node->refs == 0)
1234 splice = 1;
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1240 again:
1241 cache = search_cache_extent(src, 0);
1242 while (cache) {
1243 node = container_of(cache, struct ptr_node, cache);
1244 rec = node->data;
1245 cache = next_cache_extent(cache);
1247 if (splice) {
1248 remove_cache_extent(src, &node->cache);
1249 ins = node;
1250 } else {
1251 ins = malloc(sizeof(*ins));
1252 BUG_ON(!ins);
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1255 ins->data = rec;
1256 rec->refs++;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1263 if (rec->checked) {
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1270 free(ins);
1271 } else {
1272 BUG_ON(ret);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1279 goto again;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1291 return 0;
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1300 rec = node->data;
1301 free_inode_rec(rec);
1302 free(node);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308 u64 bytenr)
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1314 if (cache) {
1315 node = container_of(cache, struct shared_node, cache);
1316 return node;
1318 return NULL;
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 int ret;
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1327 if (!node)
1328 return -ENOMEM;
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1333 node->refs = refs;
1335 ret = insert_cache_extent(shared, &node->cache);
1337 return ret;
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1345 int ret;
1347 if (level == wc->active_node)
1348 return 0;
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1352 if (!node) {
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 BUG_ON(ret);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1358 return 0;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1367 free(node);
1369 return 1;
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1376 free(node);
1378 return 1;
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1386 int i;
1388 if (level == wc->root_level)
1389 return 0;
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392 if (wc->nodes[i])
1393 break;
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1406 } else {
1407 BUG_ON(node->refs < 2);
1408 node->refs--;
1410 return 0;
1414 * Returns:
1415 * < 0 - on error
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422 u64 child_root_id)
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1427 int has_parent = 0;
1428 int ret;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436 0, 0);
1437 if (ret < 0)
1438 return ret;
1439 btrfs_release_path(&path);
1440 if (!ret)
1441 return 1;
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 key.offset = 0;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447 0, 0);
1448 if (ret < 0)
1449 goto out;
1451 while (1) {
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455 if (ret)
1456 break;
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1463 break;
1465 has_parent = 1;
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1469 return 1;
1472 path.slots[0]++;
1474 out:
1475 btrfs_release_path(&path);
1476 if (ret < 0)
1477 return ret;
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1485 u32 total;
1486 u32 cur = 0;
1487 u32 len;
1488 u32 name_len;
1489 u32 data_len;
1490 int error;
1491 int nritems = 0;
1492 u8 filetype;
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1508 nritems++;
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1516 len = name_len;
1517 error = 0;
1518 } else {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1532 key->type, error);
1533 } else {
1534 fprintf(stderr, "invalid location in dir item %u\n",
1535 location.type);
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1543 cur += len;
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1548 return 0;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1555 u32 total;
1556 u32 cur = 0;
1557 u32 len;
1558 u32 name_len;
1559 u64 index;
1560 int error;
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1573 len = name_len;
1574 error = 0;
1575 } else {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585 cur += len;
1587 return 0;
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1594 u32 total;
1595 u32 cur = 0;
1596 u32 len;
1597 u32 name_len;
1598 u64 index;
1599 u64 parent;
1600 int error;
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1614 len = name_len;
1615 error = 0;
1616 } else {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627 cur += len;
1629 return 0;
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1639 int ret;
1640 size_t size;
1641 *found = 0;
1642 u64 csum_end;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648 key.offset = start;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652 &key, &path, 0, 0);
1653 if (ret < 0)
1654 goto out;
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1660 path.slots[0]--;
1663 while (len > 0) {
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667 if (ret > 0)
1668 break;
1669 else if (ret < 0)
1670 goto out;
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1677 break;
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1681 break;
1683 if (key.offset > start)
1684 start = key.offset;
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1690 len -= size;
1691 start += size;
1692 *found += size;
1695 path.slots[0]++;
1697 out:
1698 btrfs_release_path(&path);
1699 if (ret < 0)
1700 return ret;
1701 return 0;
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1711 u64 num_bytes = 0;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1715 int extent_type;
1716 int ret;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1732 if (ret < 0)
1733 return ret;
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741 if (num_bytes == 0)
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1762 } else {
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1773 * data reloc tree.
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777 u64 found;
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780 else
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784 if (ret < 0)
1785 return ret;
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787 if (found > 0)
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792 if (found > 0)
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1796 return 0;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1803 u32 nritems;
1804 int i;
1805 int ret = 0;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1811 return 0;
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820 continue;
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822 continue;
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1832 key.objectid, 1);
1833 BUG_ON(IS_ERR(active_node->current));
1835 switch (key.type) {
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1839 break;
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1842 break;
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1845 break;
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1848 break;
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1851 active_node);
1852 break;
1853 default:
1854 break;
1857 return ret;
1860 struct node_refs {
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872 struct node_refs *nrefs, int *level, int ext_ref)
1874 struct extent_buffer *cur = path->nodes[0];
1875 struct btrfs_key key;
1876 u64 cur_bytenr;
1877 u32 nritems;
1878 u64 first_ino = 0;
1879 int root_level = btrfs_header_level(root->node);
1880 int i;
1881 int ret = 0; /* Final return value */
1882 int err = 0; /* Positive error bitmap */
1884 cur_bytenr = cur->start;
1886 /* skip to first inode item or the first inode number change */
1887 nritems = btrfs_header_nritems(cur);
1888 for (i = 0; i < nritems; i++) {
1889 btrfs_item_key_to_cpu(cur, &key, i);
1890 if (i == 0)
1891 first_ino = key.objectid;
1892 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893 (first_ino && first_ino != key.objectid))
1894 break;
1896 if (i == nritems) {
1897 path->slots[0] = nritems;
1898 return 0;
1900 path->slots[0] = i;
1902 again:
1903 err |= check_inode_item(root, path, ext_ref);
1905 if (err & LAST_ITEM)
1906 goto out;
1908 /* still have inode items in thie leaf */
1909 if (cur->start == cur_bytenr)
1910 goto again;
1913 * we have switched to another leaf, above nodes may
1914 * have changed, here walk down the path, if a node
1915 * or leaf is shared, check whether we can skip this
1916 * node or leaf.
1918 for (i = root_level; i >= 0; i--) {
1919 if (path->nodes[i]->start == nrefs->bytenr[i])
1920 continue;
1922 ret = update_nodes_refs(root,
1923 path->nodes[i]->start,
1924 nrefs, i);
1925 if (ret)
1926 goto out;
1928 if (!nrefs->need_check[i]) {
1929 *level += 1;
1930 break;
1934 for (i = 0; i < *level; i++) {
1935 free_extent_buffer(path->nodes[i]);
1936 path->nodes[i] = NULL;
1938 out:
1939 err &= ~LAST_ITEM;
1941 * Convert any error bitmap to -EIO, as we should avoid
1942 * mixing positive and negative return value to represent
1943 * error
1945 if (err && !ret)
1946 ret = -EIO;
1947 return ret;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1953 u64 bytenr;
1954 u64 ptr_gen;
1955 u32 nritems;
1956 u32 blocksize;
1957 int i;
1958 int level;
1960 level = btrfs_header_level(node);
1961 if (level != 1)
1962 return;
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1976 * in parent.
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1991 int ret = 0;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1996 else
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000 ret = -EINVAL;
2001 fprintf(stderr,
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007 ret = -EINVAL;
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2014 ret = -EINVAL;
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2019 return ret;
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2033 return 1;
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2042 return 0;
2044 return 1;
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2054 int check, ret;
2055 u64 refs;
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2061 if (ret < 0)
2062 return ret;
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2066 if (refs > 1) {
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068 0, &roots);
2069 if (ret)
2070 return -EIO;
2072 check = need_check(root, roots);
2073 ulist_free(roots);
2074 nrefs->need_check[level] = check;
2075 } else {
2076 nrefs->need_check[level] = 1;
2080 return 0;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2088 u64 bytenr;
2089 u64 ptr_gen;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2092 u32 blocksize;
2093 int ret, err = 0;
2094 u64 refs;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2101 ret = 0;
2102 } else {
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2106 if (ret < 0) {
2107 err = ret;
2108 goto out;
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2114 if (refs > 1) {
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2116 refs, wc, *level);
2117 if (ret > 0) {
2118 err = ret;
2119 goto out;
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2129 WARN_ON(1);
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132 break;
2133 if (*level == 0) {
2134 ret = process_one_leaf(root, cur, wc);
2135 if (ret < 0)
2136 err = ret;
2137 break;
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2145 } else {
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2148 if (ret < 0) {
2149 refs = 0;
2150 } else {
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2156 if (refs > 1) {
2157 ret = enter_shared_node(root, bytenr, refs,
2158 wc, *level - 1);
2159 if (ret > 0) {
2160 path->slots[*level]++;
2161 continue;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2170 ptr_gen);
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2175 &node_key,
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2178 &node_key,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2181 err = -EIO;
2182 goto out;
2186 ret = check_child_node(cur, path->slots[*level], next);
2187 if (ret) {
2188 err = ret;
2189 goto out;
2192 if (btrfs_is_leaf(next))
2193 status = btrfs_check_leaf(root, NULL, next);
2194 else
2195 status = btrfs_check_node(root, NULL, next);
2196 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197 free_extent_buffer(next);
2198 err = -EIO;
2199 goto out;
2202 *level = *level - 1;
2203 free_extent_buffer(path->nodes[*level]);
2204 path->nodes[*level] = next;
2205 path->slots[*level] = 0;
2207 out:
2208 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209 return err;
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213 unsigned int ext_ref);
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216 int *level, struct node_refs *nrefs, int ext_ref)
2218 enum btrfs_tree_block_status status;
2219 u64 bytenr;
2220 u64 ptr_gen;
2221 struct extent_buffer *next;
2222 struct extent_buffer *cur;
2223 u32 blocksize;
2224 int ret;
2226 WARN_ON(*level < 0);
2227 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229 ret = update_nodes_refs(root, path->nodes[*level]->start,
2230 nrefs, *level);
2231 if (ret < 0)
2232 return ret;
2234 while (*level >= 0) {
2235 WARN_ON(*level < 0);
2236 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237 cur = path->nodes[*level];
2239 if (btrfs_header_level(cur) != *level)
2240 WARN_ON(1);
2242 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243 break;
2244 /* Don't forgot to check leaf/node validation */
2245 if (*level == 0) {
2246 ret = btrfs_check_leaf(root, NULL, cur);
2247 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248 ret = -EIO;
2249 break;
2251 ret = process_one_leaf_v2(root, path, nrefs,
2252 level, ext_ref);
2253 break;
2254 } else {
2255 ret = btrfs_check_node(root, NULL, cur);
2256 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = -EIO;
2258 break;
2261 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263 blocksize = root->nodesize;
2265 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266 if (ret)
2267 break;
2268 if (!nrefs->need_check[*level - 1]) {
2269 path->slots[*level]++;
2270 continue;
2273 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275 free_extent_buffer(next);
2276 reada_walk_down(root, cur, path->slots[*level]);
2277 next = read_tree_block(root, bytenr, blocksize,
2278 ptr_gen);
2279 if (!extent_buffer_uptodate(next)) {
2280 struct btrfs_key node_key;
2282 btrfs_node_key_to_cpu(path->nodes[*level],
2283 &node_key,
2284 path->slots[*level]);
2285 btrfs_add_corrupt_extent_record(root->fs_info,
2286 &node_key,
2287 path->nodes[*level]->start,
2288 root->nodesize, *level);
2289 ret = -EIO;
2290 break;
2294 ret = check_child_node(cur, path->slots[*level], next);
2295 if (ret < 0)
2296 break;
2298 if (btrfs_is_leaf(next))
2299 status = btrfs_check_leaf(root, NULL, next);
2300 else
2301 status = btrfs_check_node(root, NULL, next);
2302 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303 free_extent_buffer(next);
2304 ret = -EIO;
2305 break;
2308 *level = *level - 1;
2309 free_extent_buffer(path->nodes[*level]);
2310 path->nodes[*level] = next;
2311 path->slots[*level] = 0;
2313 return ret;
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317 struct walk_control *wc, int *level)
2319 int i;
2320 struct extent_buffer *leaf;
2322 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323 leaf = path->nodes[i];
2324 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325 path->slots[i]++;
2326 *level = i;
2327 return 0;
2328 } else {
2329 free_extent_buffer(path->nodes[*level]);
2330 path->nodes[*level] = NULL;
2331 BUG_ON(*level > wc->active_node);
2332 if (*level == wc->active_node)
2333 leave_shared_node(root, wc, *level);
2334 *level = i + 1;
2337 return 1;
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341 int *level)
2343 int i;
2344 struct extent_buffer *leaf;
2346 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347 leaf = path->nodes[i];
2348 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349 path->slots[i]++;
2350 *level = i;
2351 return 0;
2352 } else {
2353 free_extent_buffer(path->nodes[*level]);
2354 path->nodes[*level] = NULL;
2355 *level = i + 1;
2358 return 1;
2361 static int check_root_dir(struct inode_record *rec)
2363 struct inode_backref *backref;
2364 int ret = -1;
2366 if (!rec->found_inode_item || rec->errors)
2367 goto out;
2368 if (rec->nlink != 1 || rec->found_link != 0)
2369 goto out;
2370 if (list_empty(&rec->backrefs))
2371 goto out;
2372 backref = to_inode_backref(rec->backrefs.next);
2373 if (!backref->found_inode_ref)
2374 goto out;
2375 if (backref->index != 0 || backref->namelen != 2 ||
2376 memcmp(backref->name, "..", 2))
2377 goto out;
2378 if (backref->found_dir_index || backref->found_dir_item)
2379 goto out;
2380 ret = 0;
2381 out:
2382 return ret;
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386 struct btrfs_root *root, struct btrfs_path *path,
2387 struct inode_record *rec)
2389 struct btrfs_inode_item *ei;
2390 struct btrfs_key key;
2391 int ret;
2393 key.objectid = rec->ino;
2394 key.type = BTRFS_INODE_ITEM_KEY;
2395 key.offset = (u64)-1;
2397 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398 if (ret < 0)
2399 goto out;
2400 if (ret) {
2401 if (!path->slots[0]) {
2402 ret = -ENOENT;
2403 goto out;
2405 path->slots[0]--;
2406 ret = 0;
2408 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409 if (key.objectid != rec->ino) {
2410 ret = -ENOENT;
2411 goto out;
2414 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415 struct btrfs_inode_item);
2416 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417 btrfs_mark_buffer_dirty(path->nodes[0]);
2418 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420 root->root_key.objectid);
2421 out:
2422 btrfs_release_path(path);
2423 return ret;
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427 struct btrfs_root *root,
2428 struct btrfs_path *path,
2429 struct inode_record *rec)
2431 int ret;
2433 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434 btrfs_release_path(path);
2435 if (!ret)
2436 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437 return ret;
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441 struct btrfs_root *root,
2442 struct btrfs_path *path,
2443 struct inode_record *rec)
2445 struct btrfs_inode_item *ei;
2446 struct btrfs_key key;
2447 int ret = 0;
2449 key.objectid = rec->ino;
2450 key.type = BTRFS_INODE_ITEM_KEY;
2451 key.offset = 0;
2453 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454 if (ret) {
2455 if (ret > 0)
2456 ret = -ENOENT;
2457 goto out;
2460 /* Since ret == 0, no need to check anything */
2461 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462 struct btrfs_inode_item);
2463 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464 btrfs_mark_buffer_dirty(path->nodes[0]);
2465 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466 printf("reset nbytes for ino %llu root %llu\n",
2467 rec->ino, root->root_key.objectid);
2468 out:
2469 btrfs_release_path(path);
2470 return ret;
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474 struct cache_tree *inode_cache,
2475 struct inode_record *rec,
2476 struct inode_backref *backref)
2478 struct btrfs_path path;
2479 struct btrfs_trans_handle *trans;
2480 struct btrfs_dir_item *dir_item;
2481 struct extent_buffer *leaf;
2482 struct btrfs_key key;
2483 struct btrfs_disk_key disk_key;
2484 struct inode_record *dir_rec;
2485 unsigned long name_ptr;
2486 u32 data_size = sizeof(*dir_item) + backref->namelen;
2487 int ret;
2489 trans = btrfs_start_transaction(root, 1);
2490 if (IS_ERR(trans))
2491 return PTR_ERR(trans);
2493 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494 (unsigned long long)rec->ino);
2496 btrfs_init_path(&path);
2497 key.objectid = backref->dir;
2498 key.type = BTRFS_DIR_INDEX_KEY;
2499 key.offset = backref->index;
2500 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501 BUG_ON(ret);
2503 leaf = path.nodes[0];
2504 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506 disk_key.objectid = cpu_to_le64(rec->ino);
2507 disk_key.type = BTRFS_INODE_ITEM_KEY;
2508 disk_key.offset = 0;
2510 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512 btrfs_set_dir_data_len(leaf, dir_item, 0);
2513 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514 name_ptr = (unsigned long)(dir_item + 1);
2515 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516 btrfs_mark_buffer_dirty(leaf);
2517 btrfs_release_path(&path);
2518 btrfs_commit_transaction(trans, root);
2520 backref->found_dir_index = 1;
2521 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522 BUG_ON(IS_ERR(dir_rec));
2523 if (!dir_rec)
2524 return 0;
2525 dir_rec->found_size += backref->namelen;
2526 if (dir_rec->found_size == dir_rec->isize &&
2527 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529 if (dir_rec->found_size != dir_rec->isize)
2530 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532 return 0;
2535 static int delete_dir_index(struct btrfs_root *root,
2536 struct inode_backref *backref)
2538 struct btrfs_trans_handle *trans;
2539 struct btrfs_dir_item *di;
2540 struct btrfs_path path;
2541 int ret = 0;
2543 trans = btrfs_start_transaction(root, 1);
2544 if (IS_ERR(trans))
2545 return PTR_ERR(trans);
2547 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548 (unsigned long long)backref->dir,
2549 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550 (unsigned long long)root->objectid);
2552 btrfs_init_path(&path);
2553 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554 backref->name, backref->namelen,
2555 backref->index, -1);
2556 if (IS_ERR(di)) {
2557 ret = PTR_ERR(di);
2558 btrfs_release_path(&path);
2559 btrfs_commit_transaction(trans, root);
2560 if (ret == -ENOENT)
2561 return 0;
2562 return ret;
2565 if (!di)
2566 ret = btrfs_del_item(trans, root, &path);
2567 else
2568 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569 BUG_ON(ret);
2570 btrfs_release_path(&path);
2571 btrfs_commit_transaction(trans, root);
2572 return ret;
2575 static int create_inode_item(struct btrfs_root *root,
2576 struct inode_record *rec,
2577 int root_dir)
2579 struct btrfs_trans_handle *trans;
2580 struct btrfs_inode_item inode_item;
2581 time_t now = time(NULL);
2582 int ret;
2584 trans = btrfs_start_transaction(root, 1);
2585 if (IS_ERR(trans)) {
2586 ret = PTR_ERR(trans);
2587 return ret;
2590 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591 "be incomplete, please check permissions and content after "
2592 "the fsck completes.\n", (unsigned long long)root->objectid,
2593 (unsigned long long)rec->ino);
2595 memset(&inode_item, 0, sizeof(inode_item));
2596 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597 if (root_dir)
2598 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599 else
2600 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602 if (rec->found_dir_item) {
2603 if (rec->found_file_extent)
2604 fprintf(stderr, "root %llu inode %llu has both a dir "
2605 "item and extents, unsure if it is a dir or a "
2606 "regular file so setting it as a directory\n",
2607 (unsigned long long)root->objectid,
2608 (unsigned long long)rec->ino);
2609 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611 } else if (!rec->found_dir_item) {
2612 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2615 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2624 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625 BUG_ON(ret);
2626 btrfs_commit_transaction(trans, root);
2627 return 0;
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631 struct inode_record *rec,
2632 struct cache_tree *inode_cache,
2633 int delete)
2635 struct inode_backref *tmp, *backref;
2636 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637 int ret = 0;
2638 int repaired = 0;
2640 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641 if (!delete && rec->ino == root_dirid) {
2642 if (!rec->found_inode_item) {
2643 ret = create_inode_item(root, rec, 1);
2644 if (ret)
2645 break;
2646 repaired++;
2650 /* Index 0 for root dir's are special, don't mess with it */
2651 if (rec->ino == root_dirid && backref->index == 0)
2652 continue;
2654 if (delete &&
2655 ((backref->found_dir_index && !backref->found_inode_ref) ||
2656 (backref->found_dir_index && backref->found_inode_ref &&
2657 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658 ret = delete_dir_index(root, backref);
2659 if (ret)
2660 break;
2661 repaired++;
2662 list_del(&backref->list);
2663 free(backref);
2666 if (!delete && !backref->found_dir_index &&
2667 backref->found_dir_item && backref->found_inode_ref) {
2668 ret = add_missing_dir_index(root, inode_cache, rec,
2669 backref);
2670 if (ret)
2671 break;
2672 repaired++;
2673 if (backref->found_dir_item &&
2674 backref->found_dir_index &&
2675 backref->found_dir_index) {
2676 if (!backref->errors &&
2677 backref->found_inode_ref) {
2678 list_del(&backref->list);
2679 free(backref);
2684 if (!delete && (!backref->found_dir_index &&
2685 !backref->found_dir_item &&
2686 backref->found_inode_ref)) {
2687 struct btrfs_trans_handle *trans;
2688 struct btrfs_key location;
2690 ret = check_dir_conflict(root, backref->name,
2691 backref->namelen,
2692 backref->dir,
2693 backref->index);
2694 if (ret) {
2696 * let nlink fixing routine to handle it,
2697 * which can do it better.
2699 ret = 0;
2700 break;
2702 location.objectid = rec->ino;
2703 location.type = BTRFS_INODE_ITEM_KEY;
2704 location.offset = 0;
2706 trans = btrfs_start_transaction(root, 1);
2707 if (IS_ERR(trans)) {
2708 ret = PTR_ERR(trans);
2709 break;
2711 fprintf(stderr, "adding missing dir index/item pair "
2712 "for inode %llu\n",
2713 (unsigned long long)rec->ino);
2714 ret = btrfs_insert_dir_item(trans, root, backref->name,
2715 backref->namelen,
2716 backref->dir, &location,
2717 imode_to_type(rec->imode),
2718 backref->index);
2719 BUG_ON(ret);
2720 btrfs_commit_transaction(trans, root);
2721 repaired++;
2724 if (!delete && (backref->found_inode_ref &&
2725 backref->found_dir_index &&
2726 backref->found_dir_item &&
2727 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728 !rec->found_inode_item)) {
2729 ret = create_inode_item(root, rec, 0);
2730 if (ret)
2731 break;
2732 repaired++;
2736 return ret ? ret : repaired;
2740 * To determine the file type for nlink/inode_item repair
2742 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743 * Return -ENOENT if file type is not found.
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2747 struct inode_backref *backref;
2749 /* For inode item recovered case */
2750 if (rec->found_inode_item) {
2751 *type = imode_to_type(rec->imode);
2752 return 0;
2755 list_for_each_entry(backref, &rec->backrefs, list) {
2756 if (backref->found_dir_index || backref->found_dir_item) {
2757 *type = backref->filetype;
2758 return 0;
2761 return -ENOENT;
2765 * To determine the file name for nlink repair
2767 * Return 0 if file name is found, set name and namelen.
2768 * Return -ENOENT if file name is not found.
2770 static int find_file_name(struct inode_record *rec,
2771 char *name, int *namelen)
2773 struct inode_backref *backref;
2775 list_for_each_entry(backref, &rec->backrefs, list) {
2776 if (backref->found_dir_index || backref->found_dir_item ||
2777 backref->found_inode_ref) {
2778 memcpy(name, backref->name, backref->namelen);
2779 *namelen = backref->namelen;
2780 return 0;
2783 return -ENOENT;
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788 struct btrfs_root *root,
2789 struct btrfs_path *path,
2790 struct inode_record *rec)
2792 struct inode_backref *backref;
2793 struct inode_backref *tmp;
2794 struct btrfs_key key;
2795 struct btrfs_inode_item *inode_item;
2796 int ret = 0;
2798 /* We don't believe this either, reset it and iterate backref */
2799 rec->found_link = 0;
2801 /* Remove all backref including the valid ones */
2802 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804 backref->index, backref->name,
2805 backref->namelen, 0);
2806 if (ret < 0)
2807 goto out;
2809 /* remove invalid backref, so it won't be added back */
2810 if (!(backref->found_dir_index &&
2811 backref->found_dir_item &&
2812 backref->found_inode_ref)) {
2813 list_del(&backref->list);
2814 free(backref);
2815 } else {
2816 rec->found_link++;
2820 /* Set nlink to 0 */
2821 key.objectid = rec->ino;
2822 key.type = BTRFS_INODE_ITEM_KEY;
2823 key.offset = 0;
2824 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825 if (ret < 0)
2826 goto out;
2827 if (ret > 0) {
2828 ret = -ENOENT;
2829 goto out;
2831 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832 struct btrfs_inode_item);
2833 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834 btrfs_mark_buffer_dirty(path->nodes[0]);
2835 btrfs_release_path(path);
2838 * Add back valid inode_ref/dir_item/dir_index,
2839 * add_link() will handle the nlink inc, so new nlink must be correct
2841 list_for_each_entry(backref, &rec->backrefs, list) {
2842 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843 backref->name, backref->namelen,
2844 backref->filetype, &backref->index, 1);
2845 if (ret < 0)
2846 goto out;
2848 out:
2849 btrfs_release_path(path);
2850 return ret;
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854 struct btrfs_root *root,
2855 struct btrfs_path *path,
2856 u64 *highest_ino)
2858 struct btrfs_key key, found_key;
2859 int ret;
2861 btrfs_init_path(path);
2862 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863 key.offset = -1;
2864 key.type = BTRFS_INODE_ITEM_KEY;
2865 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866 if (ret == 1) {
2867 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868 path->slots[0] - 1);
2869 *highest_ino = found_key.objectid;
2870 ret = 0;
2872 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873 ret = -EOVERFLOW;
2874 btrfs_release_path(path);
2875 return ret;
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879 struct btrfs_root *root,
2880 struct btrfs_path *path,
2881 struct inode_record *rec)
2883 char *dir_name = "lost+found";
2884 char namebuf[BTRFS_NAME_LEN] = {0};
2885 u64 lost_found_ino;
2886 u32 mode = 0700;
2887 u8 type = 0;
2888 int namelen = 0;
2889 int name_recovered = 0;
2890 int type_recovered = 0;
2891 int ret = 0;
2894 * Get file name and type first before these invalid inode ref
2895 * are deleted by remove_all_invalid_backref()
2897 name_recovered = !find_file_name(rec, namebuf, &namelen);
2898 type_recovered = !find_file_type(rec, &type);
2900 if (!name_recovered) {
2901 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902 rec->ino, rec->ino);
2903 namelen = count_digits(rec->ino);
2904 sprintf(namebuf, "%llu", rec->ino);
2905 name_recovered = 1;
2907 if (!type_recovered) {
2908 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909 rec->ino);
2910 type = BTRFS_FT_REG_FILE;
2911 type_recovered = 1;
2914 ret = reset_nlink(trans, root, path, rec);
2915 if (ret < 0) {
2916 fprintf(stderr,
2917 "Failed to reset nlink for inode %llu: %s\n",
2918 rec->ino, strerror(-ret));
2919 goto out;
2922 if (rec->found_link == 0) {
2923 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924 if (ret < 0)
2925 goto out;
2926 lost_found_ino++;
2927 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929 mode);
2930 if (ret < 0) {
2931 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932 dir_name, strerror(-ret));
2933 goto out;
2935 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936 namebuf, namelen, type, NULL, 1);
2938 * Add ".INO" suffix several times to handle case where
2939 * "FILENAME.INO" is already taken by another file.
2941 while (ret == -EEXIST) {
2943 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2945 if (namelen + count_digits(rec->ino) + 1 >
2946 BTRFS_NAME_LEN) {
2947 ret = -EFBIG;
2948 goto out;
2950 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951 ".%llu", rec->ino);
2952 namelen += count_digits(rec->ino) + 1;
2953 ret = btrfs_add_link(trans, root, rec->ino,
2954 lost_found_ino, namebuf,
2955 namelen, type, NULL, 1);
2957 if (ret < 0) {
2958 fprintf(stderr,
2959 "Failed to link the inode %llu to %s dir: %s\n",
2960 rec->ino, dir_name, strerror(-ret));
2961 goto out;
2964 * Just increase the found_link, don't actually add the
2965 * backref. This will make things easier and this inode
2966 * record will be freed after the repair is done.
2967 * So fsck will not report problem about this inode.
2969 rec->found_link++;
2970 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971 namelen, namebuf, dir_name);
2973 printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2976 * Clear the flag anyway, or we will loop forever for the same inode
2977 * as it will not be removed from the bad inode list and the dead loop
2978 * happens.
2980 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981 btrfs_release_path(path);
2982 return ret;
2986 * Check if there is any normal(reg or prealloc) file extent for given
2987 * ino.
2988 * This is used to determine the file type when neither its dir_index/item or
2989 * inode_item exists.
2991 * This will *NOT* report error, if any error happens, just consider it does
2992 * not have any normal file extent.
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2996 struct btrfs_path path;
2997 struct btrfs_key key;
2998 struct btrfs_key found_key;
2999 struct btrfs_file_extent_item *fi;
3000 u8 type;
3001 int ret = 0;
3003 btrfs_init_path(&path);
3004 key.objectid = ino;
3005 key.type = BTRFS_EXTENT_DATA_KEY;
3006 key.offset = 0;
3008 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009 if (ret < 0) {
3010 ret = 0;
3011 goto out;
3013 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014 ret = btrfs_next_leaf(root, &path);
3015 if (ret) {
3016 ret = 0;
3017 goto out;
3020 while (1) {
3021 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022 path.slots[0]);
3023 if (found_key.objectid != ino ||
3024 found_key.type != BTRFS_EXTENT_DATA_KEY)
3025 break;
3026 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027 struct btrfs_file_extent_item);
3028 type = btrfs_file_extent_type(path.nodes[0], fi);
3029 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030 ret = 1;
3031 goto out;
3034 out:
3035 btrfs_release_path(&path);
3036 return ret;
3039 static u32 btrfs_type_to_imode(u8 type)
3041 static u32 imode_by_btrfs_type[] = {
3042 [BTRFS_FT_REG_FILE] = S_IFREG,
3043 [BTRFS_FT_DIR] = S_IFDIR,
3044 [BTRFS_FT_CHRDEV] = S_IFCHR,
3045 [BTRFS_FT_BLKDEV] = S_IFBLK,
3046 [BTRFS_FT_FIFO] = S_IFIFO,
3047 [BTRFS_FT_SOCK] = S_IFSOCK,
3048 [BTRFS_FT_SYMLINK] = S_IFLNK,
3051 return imode_by_btrfs_type[(type)];
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055 struct btrfs_root *root,
3056 struct btrfs_path *path,
3057 struct inode_record *rec)
3059 u8 filetype;
3060 u32 mode = 0700;
3061 int type_recovered = 0;
3062 int ret = 0;
3064 printf("Trying to rebuild inode:%llu\n", rec->ino);
3066 type_recovered = !find_file_type(rec, &filetype);
3069 * Try to determine inode type if type not found.
3071 * For found regular file extent, it must be FILE.
3072 * For found dir_item/index, it must be DIR.
3074 * For undetermined one, use FILE as fallback.
3076 * TODO:
3077 * 1. If found backref(inode_index/item is already handled) to it,
3078 * it must be DIR.
3079 * Need new inode-inode ref structure to allow search for that.
3081 if (!type_recovered) {
3082 if (rec->found_file_extent &&
3083 find_normal_file_extent(root, rec->ino)) {
3084 type_recovered = 1;
3085 filetype = BTRFS_FT_REG_FILE;
3086 } else if (rec->found_dir_item) {
3087 type_recovered = 1;
3088 filetype = BTRFS_FT_DIR;
3089 } else if (!list_empty(&rec->orphan_extents)) {
3090 type_recovered = 1;
3091 filetype = BTRFS_FT_REG_FILE;
3092 } else{
3093 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094 rec->ino);
3095 type_recovered = 1;
3096 filetype = BTRFS_FT_REG_FILE;
3100 ret = btrfs_new_inode(trans, root, rec->ino,
3101 mode | btrfs_type_to_imode(filetype));
3102 if (ret < 0)
3103 goto out;
3106 * Here inode rebuild is done, we only rebuild the inode item,
3107 * don't repair the nlink(like move to lost+found).
3108 * That is the job of nlink repair.
3110 * We just fill the record and return
3112 rec->found_dir_item = 1;
3113 rec->imode = mode | btrfs_type_to_imode(filetype);
3114 rec->nlink = 0;
3115 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116 /* Ensure the inode_nlinks repair function will be called */
3117 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119 return ret;
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123 struct btrfs_root *root,
3124 struct btrfs_path *path,
3125 struct inode_record *rec)
3127 struct orphan_data_extent *orphan;
3128 struct orphan_data_extent *tmp;
3129 int ret = 0;
3131 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3133 * Check for conflicting file extents
3135 * Here we don't know whether the extents is compressed or not,
3136 * so we can only assume it not compressed nor data offset,
3137 * and use its disk_len as extent length.
3139 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140 orphan->offset, orphan->disk_len, 0);
3141 btrfs_release_path(path);
3142 if (ret < 0)
3143 goto out;
3144 if (!ret) {
3145 fprintf(stderr,
3146 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147 orphan->disk_bytenr, orphan->disk_len);
3148 ret = btrfs_free_extent(trans,
3149 root->fs_info->extent_root,
3150 orphan->disk_bytenr, orphan->disk_len,
3151 0, root->objectid, orphan->objectid,
3152 orphan->offset);
3153 if (ret < 0)
3154 goto out;
3156 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157 orphan->offset, orphan->disk_bytenr,
3158 orphan->disk_len, orphan->disk_len);
3159 if (ret < 0)
3160 goto out;
3162 /* Update file size info */
3163 rec->found_size += orphan->disk_len;
3164 if (rec->found_size == rec->nbytes)
3165 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3167 /* Update the file extent hole info too */
3168 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169 orphan->disk_len);
3170 if (ret < 0)
3171 goto out;
3172 if (RB_EMPTY_ROOT(&rec->holes))
3173 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3175 list_del(&orphan->list);
3176 free(orphan);
3178 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180 return ret;
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184 struct btrfs_root *root,
3185 struct btrfs_path *path,
3186 struct inode_record *rec)
3188 struct rb_node *node;
3189 struct file_extent_hole *hole;
3190 int found = 0;
3191 int ret = 0;
3193 node = rb_first(&rec->holes);
3195 while (node) {
3196 found = 1;
3197 hole = rb_entry(node, struct file_extent_hole, node);
3198 ret = btrfs_punch_hole(trans, root, rec->ino,
3199 hole->start, hole->len);
3200 if (ret < 0)
3201 goto out;
3202 ret = del_file_extent_hole(&rec->holes, hole->start,
3203 hole->len);
3204 if (ret < 0)
3205 goto out;
3206 if (RB_EMPTY_ROOT(&rec->holes))
3207 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208 node = rb_first(&rec->holes);
3210 /* special case for a file losing all its file extent */
3211 if (!found) {
3212 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213 round_up(rec->isize, root->sectorsize));
3214 if (ret < 0)
3215 goto out;
3217 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218 rec->ino, root->objectid);
3219 out:
3220 return ret;
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3225 struct btrfs_trans_handle *trans;
3226 struct btrfs_path path;
3227 int ret = 0;
3229 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230 I_ERR_NO_ORPHAN_ITEM |
3231 I_ERR_LINK_COUNT_WRONG |
3232 I_ERR_NO_INODE_ITEM |
3233 I_ERR_FILE_EXTENT_ORPHAN |
3234 I_ERR_FILE_EXTENT_DISCOUNT|
3235 I_ERR_FILE_NBYTES_WRONG)))
3236 return rec->errors;
3239 * For nlink repair, it may create a dir and add link, so
3240 * 2 for parent(256)'s dir_index and dir_item
3241 * 2 for lost+found dir's inode_item and inode_ref
3242 * 1 for the new inode_ref of the file
3243 * 2 for lost+found dir's dir_index and dir_item for the file
3245 trans = btrfs_start_transaction(root, 7);
3246 if (IS_ERR(trans))
3247 return PTR_ERR(trans);
3249 btrfs_init_path(&path);
3250 if (rec->errors & I_ERR_NO_INODE_ITEM)
3251 ret = repair_inode_no_item(trans, root, &path, rec);
3252 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257 ret = repair_inode_isize(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261 ret = repair_inode_nlinks(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263 ret = repair_inode_nbytes(trans, root, &path, rec);
3264 btrfs_commit_transaction(trans, root);
3265 btrfs_release_path(&path);
3266 return ret;
3269 static int check_inode_recs(struct btrfs_root *root,
3270 struct cache_tree *inode_cache)
3272 struct cache_extent *cache;
3273 struct ptr_node *node;
3274 struct inode_record *rec;
3275 struct inode_backref *backref;
3276 int stage = 0;
3277 int ret = 0;
3278 int err = 0;
3279 u64 error = 0;
3280 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3282 if (btrfs_root_refs(&root->root_item) == 0) {
3283 if (!cache_tree_empty(inode_cache))
3284 fprintf(stderr, "warning line %d\n", __LINE__);
3285 return 0;
3289 * We need to repair backrefs first because we could change some of the
3290 * errors in the inode recs.
3292 * We also need to go through and delete invalid backrefs first and then
3293 * add the correct ones second. We do this because we may get EEXIST
3294 * when adding back the correct index because we hadn't yet deleted the
3295 * invalid index.
3297 * For example, if we were missing a dir index then the directories
3298 * isize would be wrong, so if we fixed the isize to what we thought it
3299 * would be and then fixed the backref we'd still have a invalid fs, so
3300 * we need to add back the dir index and then check to see if the isize
3301 * is still wrong.
3303 while (stage < 3) {
3304 stage++;
3305 if (stage == 3 && !err)
3306 break;
3308 cache = search_cache_extent(inode_cache, 0);
3309 while (repair && cache) {
3310 node = container_of(cache, struct ptr_node, cache);
3311 rec = node->data;
3312 cache = next_cache_extent(cache);
3314 /* Need to free everything up and rescan */
3315 if (stage == 3) {
3316 remove_cache_extent(inode_cache, &node->cache);
3317 free(node);
3318 free_inode_rec(rec);
3319 continue;
3322 if (list_empty(&rec->backrefs))
3323 continue;
3325 ret = repair_inode_backrefs(root, rec, inode_cache,
3326 stage == 1);
3327 if (ret < 0) {
3328 err = ret;
3329 stage = 2;
3330 break;
3331 } if (ret > 0) {
3332 err = -EAGAIN;
3336 if (err)
3337 return err;
3339 rec = get_inode_rec(inode_cache, root_dirid, 0);
3340 BUG_ON(IS_ERR(rec));
3341 if (rec) {
3342 ret = check_root_dir(rec);
3343 if (ret) {
3344 fprintf(stderr, "root %llu root dir %llu error\n",
3345 (unsigned long long)root->root_key.objectid,
3346 (unsigned long long)root_dirid);
3347 print_inode_error(root, rec);
3348 error++;
3350 } else {
3351 if (repair) {
3352 struct btrfs_trans_handle *trans;
3354 trans = btrfs_start_transaction(root, 1);
3355 if (IS_ERR(trans)) {
3356 err = PTR_ERR(trans);
3357 return err;
3360 fprintf(stderr,
3361 "root %llu missing its root dir, recreating\n",
3362 (unsigned long long)root->objectid);
3364 ret = btrfs_make_root_dir(trans, root, root_dirid);
3365 BUG_ON(ret);
3367 btrfs_commit_transaction(trans, root);
3368 return -EAGAIN;
3371 fprintf(stderr, "root %llu root dir %llu not found\n",
3372 (unsigned long long)root->root_key.objectid,
3373 (unsigned long long)root_dirid);
3376 while (1) {
3377 cache = search_cache_extent(inode_cache, 0);
3378 if (!cache)
3379 break;
3380 node = container_of(cache, struct ptr_node, cache);
3381 rec = node->data;
3382 remove_cache_extent(inode_cache, &node->cache);
3383 free(node);
3384 if (rec->ino == root_dirid ||
3385 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386 free_inode_rec(rec);
3387 continue;
3390 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391 ret = check_orphan_item(root, rec->ino);
3392 if (ret == 0)
3393 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394 if (can_free_inode_rec(rec)) {
3395 free_inode_rec(rec);
3396 continue;
3400 if (!rec->found_inode_item)
3401 rec->errors |= I_ERR_NO_INODE_ITEM;
3402 if (rec->found_link != rec->nlink)
3403 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404 if (repair) {
3405 ret = try_repair_inode(root, rec);
3406 if (ret == 0 && can_free_inode_rec(rec)) {
3407 free_inode_rec(rec);
3408 continue;
3410 ret = 0;
3413 if (!(repair && ret == 0))
3414 error++;
3415 print_inode_error(root, rec);
3416 list_for_each_entry(backref, &rec->backrefs, list) {
3417 if (!backref->found_dir_item)
3418 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419 if (!backref->found_dir_index)
3420 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421 if (!backref->found_inode_ref)
3422 backref->errors |= REF_ERR_NO_INODE_REF;
3423 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424 " namelen %u name %s filetype %d errors %x",
3425 (unsigned long long)backref->dir,
3426 (unsigned long long)backref->index,
3427 backref->namelen, backref->name,
3428 backref->filetype, backref->errors);
3429 print_ref_error(backref->errors);
3431 free_inode_rec(rec);
3433 return (error > 0) ? -1 : 0;
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437 u64 objectid)
3439 struct cache_extent *cache;
3440 struct root_record *rec = NULL;
3441 int ret;
3443 cache = lookup_cache_extent(root_cache, objectid, 1);
3444 if (cache) {
3445 rec = container_of(cache, struct root_record, cache);
3446 } else {
3447 rec = calloc(1, sizeof(*rec));
3448 if (!rec)
3449 return ERR_PTR(-ENOMEM);
3450 rec->objectid = objectid;
3451 INIT_LIST_HEAD(&rec->backrefs);
3452 rec->cache.start = objectid;
3453 rec->cache.size = 1;
3455 ret = insert_cache_extent(root_cache, &rec->cache);
3456 if (ret)
3457 return ERR_PTR(-EEXIST);
3459 return rec;
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463 u64 ref_root, u64 dir, u64 index,
3464 const char *name, int namelen)
3466 struct root_backref *backref;
3468 list_for_each_entry(backref, &rec->backrefs, list) {
3469 if (backref->ref_root != ref_root || backref->dir != dir ||
3470 backref->namelen != namelen)
3471 continue;
3472 if (memcmp(name, backref->name, namelen))
3473 continue;
3474 return backref;
3477 backref = calloc(1, sizeof(*backref) + namelen + 1);
3478 if (!backref)
3479 return NULL;
3480 backref->ref_root = ref_root;
3481 backref->dir = dir;
3482 backref->index = index;
3483 backref->namelen = namelen;
3484 memcpy(backref->name, name, namelen);
3485 backref->name[namelen] = '\0';
3486 list_add_tail(&backref->list, &rec->backrefs);
3487 return backref;
3490 static void free_root_record(struct cache_extent *cache)
3492 struct root_record *rec;
3493 struct root_backref *backref;
3495 rec = container_of(cache, struct root_record, cache);
3496 while (!list_empty(&rec->backrefs)) {
3497 backref = to_root_backref(rec->backrefs.next);
3498 list_del(&backref->list);
3499 free(backref);
3502 free(rec);
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3507 static int add_root_backref(struct cache_tree *root_cache,
3508 u64 root_id, u64 ref_root, u64 dir, u64 index,
3509 const char *name, int namelen,
3510 int item_type, int errors)
3512 struct root_record *rec;
3513 struct root_backref *backref;
3515 rec = get_root_rec(root_cache, root_id);
3516 BUG_ON(IS_ERR(rec));
3517 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518 BUG_ON(!backref);
3520 backref->errors |= errors;
3522 if (item_type != BTRFS_DIR_ITEM_KEY) {
3523 if (backref->found_dir_index || backref->found_back_ref ||
3524 backref->found_forward_ref) {
3525 if (backref->index != index)
3526 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527 } else {
3528 backref->index = index;
3532 if (item_type == BTRFS_DIR_ITEM_KEY) {
3533 if (backref->found_forward_ref)
3534 rec->found_ref++;
3535 backref->found_dir_item = 1;
3536 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537 backref->found_dir_index = 1;
3538 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539 if (backref->found_forward_ref)
3540 backref->errors |= REF_ERR_DUP_ROOT_REF;
3541 else if (backref->found_dir_item)
3542 rec->found_ref++;
3543 backref->found_forward_ref = 1;
3544 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545 if (backref->found_back_ref)
3546 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547 backref->found_back_ref = 1;
3548 } else {
3549 BUG_ON(1);
3552 if (backref->found_forward_ref && backref->found_dir_item)
3553 backref->reachable = 1;
3554 return 0;
3557 static int merge_root_recs(struct btrfs_root *root,
3558 struct cache_tree *src_cache,
3559 struct cache_tree *dst_cache)
3561 struct cache_extent *cache;
3562 struct ptr_node *node;
3563 struct inode_record *rec;
3564 struct inode_backref *backref;
3565 int ret = 0;
3567 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568 free_inode_recs_tree(src_cache);
3569 return 0;
3572 while (1) {
3573 cache = search_cache_extent(src_cache, 0);
3574 if (!cache)
3575 break;
3576 node = container_of(cache, struct ptr_node, cache);
3577 rec = node->data;
3578 remove_cache_extent(src_cache, &node->cache);
3579 free(node);
3581 ret = is_child_root(root, root->objectid, rec->ino);
3582 if (ret < 0)
3583 break;
3584 else if (ret == 0)
3585 goto skip;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 BUG_ON(backref->found_inode_ref);
3589 if (backref->found_dir_item)
3590 add_root_backref(dst_cache, rec->ino,
3591 root->root_key.objectid, backref->dir,
3592 backref->index, backref->name,
3593 backref->namelen, BTRFS_DIR_ITEM_KEY,
3594 backref->errors);
3595 if (backref->found_dir_index)
3596 add_root_backref(dst_cache, rec->ino,
3597 root->root_key.objectid, backref->dir,
3598 backref->index, backref->name,
3599 backref->namelen, BTRFS_DIR_INDEX_KEY,
3600 backref->errors);
3602 skip:
3603 free_inode_rec(rec);
3605 if (ret < 0)
3606 return ret;
3607 return 0;
3610 static int check_root_refs(struct btrfs_root *root,
3611 struct cache_tree *root_cache)
3613 struct root_record *rec;
3614 struct root_record *ref_root;
3615 struct root_backref *backref;
3616 struct cache_extent *cache;
3617 int loop = 1;
3618 int ret;
3619 int error;
3620 int errors = 0;
3622 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623 BUG_ON(IS_ERR(rec));
3624 rec->found_ref = 1;
3626 /* fixme: this can not detect circular references */
3627 while (loop) {
3628 loop = 0;
3629 cache = search_cache_extent(root_cache, 0);
3630 while (1) {
3631 if (!cache)
3632 break;
3633 rec = container_of(cache, struct root_record, cache);
3634 cache = next_cache_extent(cache);
3636 if (rec->found_ref == 0)
3637 continue;
3639 list_for_each_entry(backref, &rec->backrefs, list) {
3640 if (!backref->reachable)
3641 continue;
3643 ref_root = get_root_rec(root_cache,
3644 backref->ref_root);
3645 BUG_ON(IS_ERR(ref_root));
3646 if (ref_root->found_ref > 0)
3647 continue;
3649 backref->reachable = 0;
3650 rec->found_ref--;
3651 if (rec->found_ref == 0)
3652 loop = 1;
3657 cache = search_cache_extent(root_cache, 0);
3658 while (1) {
3659 if (!cache)
3660 break;
3661 rec = container_of(cache, struct root_record, cache);
3662 cache = next_cache_extent(cache);
3664 if (rec->found_ref == 0 &&
3665 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667 ret = check_orphan_item(root->fs_info->tree_root,
3668 rec->objectid);
3669 if (ret == 0)
3670 continue;
3673 * If we don't have a root item then we likely just have
3674 * a dir item in a snapshot for this root but no actual
3675 * ref key or anything so it's meaningless.
3677 if (!rec->found_root_item)
3678 continue;
3679 errors++;
3680 fprintf(stderr, "fs tree %llu not referenced\n",
3681 (unsigned long long)rec->objectid);
3684 error = 0;
3685 if (rec->found_ref > 0 && !rec->found_root_item)
3686 error = 1;
3687 list_for_each_entry(backref, &rec->backrefs, list) {
3688 if (!backref->found_dir_item)
3689 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690 if (!backref->found_dir_index)
3691 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692 if (!backref->found_back_ref)
3693 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694 if (!backref->found_forward_ref)
3695 backref->errors |= REF_ERR_NO_ROOT_REF;
3696 if (backref->reachable && backref->errors)
3697 error = 1;
3699 if (!error)
3700 continue;
3702 errors++;
3703 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704 (unsigned long long)rec->objectid, rec->found_ref,
3705 rec->found_root_item ? "" : "not found");
3707 list_for_each_entry(backref, &rec->backrefs, list) {
3708 if (!backref->reachable)
3709 continue;
3710 if (!backref->errors && rec->found_root_item)
3711 continue;
3712 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713 " index %llu namelen %u name %s errors %x\n",
3714 (unsigned long long)backref->ref_root,
3715 (unsigned long long)backref->dir,
3716 (unsigned long long)backref->index,
3717 backref->namelen, backref->name,
3718 backref->errors);
3719 print_ref_error(backref->errors);
3722 return errors > 0 ? 1 : 0;
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726 struct btrfs_key *key,
3727 struct cache_tree *root_cache)
3729 u64 dirid;
3730 u64 index;
3731 u32 len;
3732 u32 name_len;
3733 struct btrfs_root_ref *ref;
3734 char namebuf[BTRFS_NAME_LEN];
3735 int error;
3737 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3739 dirid = btrfs_root_ref_dirid(eb, ref);
3740 index = btrfs_root_ref_sequence(eb, ref);
3741 name_len = btrfs_root_ref_name_len(eb, ref);
3743 if (name_len <= BTRFS_NAME_LEN) {
3744 len = name_len;
3745 error = 0;
3746 } else {
3747 len = BTRFS_NAME_LEN;
3748 error = REF_ERR_NAME_TOO_LONG;
3750 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3752 if (key->type == BTRFS_ROOT_REF_KEY) {
3753 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754 index, namebuf, len, key->type, error);
3755 } else {
3756 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757 index, namebuf, len, key->type, error);
3759 return 0;
3762 static void free_corrupt_block(struct cache_extent *cache)
3764 struct btrfs_corrupt_block *corrupt;
3766 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767 free(corrupt);
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3773 * Repair the btree of the given root.
3775 * The fix is to remove the node key in corrupt_blocks cache_tree.
3776 * and rebalance the tree.
3777 * After the fix, the btree should be writeable.
3779 static int repair_btree(struct btrfs_root *root,
3780 struct cache_tree *corrupt_blocks)
3782 struct btrfs_trans_handle *trans;
3783 struct btrfs_path path;
3784 struct btrfs_corrupt_block *corrupt;
3785 struct cache_extent *cache;
3786 struct btrfs_key key;
3787 u64 offset;
3788 int level;
3789 int ret = 0;
3791 if (cache_tree_empty(corrupt_blocks))
3792 return 0;
3794 trans = btrfs_start_transaction(root, 1);
3795 if (IS_ERR(trans)) {
3796 ret = PTR_ERR(trans);
3797 fprintf(stderr, "Error starting transaction: %s\n",
3798 strerror(-ret));
3799 return ret;
3801 btrfs_init_path(&path);
3802 cache = first_cache_extent(corrupt_blocks);
3803 while (cache) {
3804 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805 cache);
3806 level = corrupt->level;
3807 path.lowest_level = level;
3808 key.objectid = corrupt->key.objectid;
3809 key.type = corrupt->key.type;
3810 key.offset = corrupt->key.offset;
3813 * Here we don't want to do any tree balance, since it may
3814 * cause a balance with corrupted brother leaf/node,
3815 * so ins_len set to 0 here.
3816 * Balance will be done after all corrupt node/leaf is deleted.
3818 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819 if (ret < 0)
3820 goto out;
3821 offset = btrfs_node_blockptr(path.nodes[level],
3822 path.slots[level]);
3824 /* Remove the ptr */
3825 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826 if (ret < 0)
3827 goto out;
3829 * Remove the corresponding extent
3830 * return value is not concerned.
3832 btrfs_release_path(&path);
3833 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834 0, root->root_key.objectid,
3835 level - 1, 0);
3836 cache = next_cache_extent(cache);
3839 /* Balance the btree using btrfs_search_slot() */
3840 cache = first_cache_extent(corrupt_blocks);
3841 while (cache) {
3842 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843 cache);
3844 memcpy(&key, &corrupt->key, sizeof(key));
3845 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846 if (ret < 0)
3847 goto out;
3848 /* return will always >0 since it won't find the item */
3849 ret = 0;
3850 btrfs_release_path(&path);
3851 cache = next_cache_extent(cache);
3853 out:
3854 btrfs_commit_transaction(trans, root);
3855 btrfs_release_path(&path);
3856 return ret;
3859 static int check_fs_root(struct btrfs_root *root,
3860 struct cache_tree *root_cache,
3861 struct walk_control *wc)
3863 int ret = 0;
3864 int err = 0;
3865 int wret;
3866 int level;
3867 struct btrfs_path path;
3868 struct shared_node root_node;
3869 struct root_record *rec;
3870 struct btrfs_root_item *root_item = &root->root_item;
3871 struct cache_tree corrupt_blocks;
3872 struct orphan_data_extent *orphan;
3873 struct orphan_data_extent *tmp;
3874 enum btrfs_tree_block_status status;
3875 struct node_refs nrefs;
3878 * Reuse the corrupt_block cache tree to record corrupted tree block
3880 * Unlike the usage in extent tree check, here we do it in a per
3881 * fs/subvol tree base.
3883 cache_tree_init(&corrupt_blocks);
3884 root->fs_info->corrupt_blocks = &corrupt_blocks;
3886 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887 rec = get_root_rec(root_cache, root->root_key.objectid);
3888 BUG_ON(IS_ERR(rec));
3889 if (btrfs_root_refs(root_item) > 0)
3890 rec->found_root_item = 1;
3893 btrfs_init_path(&path);
3894 memset(&root_node, 0, sizeof(root_node));
3895 cache_tree_init(&root_node.root_cache);
3896 cache_tree_init(&root_node.inode_cache);
3897 memset(&nrefs, 0, sizeof(nrefs));
3899 /* Move the orphan extent record to corresponding inode_record */
3900 list_for_each_entry_safe(orphan, tmp,
3901 &root->orphan_data_extents, list) {
3902 struct inode_record *inode;
3904 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3906 BUG_ON(IS_ERR(inode));
3907 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908 list_move(&orphan->list, &inode->orphan_extents);
3911 level = btrfs_header_level(root->node);
3912 memset(wc->nodes, 0, sizeof(wc->nodes));
3913 wc->nodes[level] = &root_node;
3914 wc->active_node = level;
3915 wc->root_level = level;
3917 /* We may not have checked the root block, lets do that now */
3918 if (btrfs_is_leaf(root->node))
3919 status = btrfs_check_leaf(root, NULL, root->node);
3920 else
3921 status = btrfs_check_node(root, NULL, root->node);
3922 if (status != BTRFS_TREE_BLOCK_CLEAN)
3923 return -EIO;
3925 if (btrfs_root_refs(root_item) > 0 ||
3926 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927 path.nodes[level] = root->node;
3928 extent_buffer_get(root->node);
3929 path.slots[level] = 0;
3930 } else {
3931 struct btrfs_key key;
3932 struct btrfs_disk_key found_key;
3934 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935 level = root_item->drop_level;
3936 path.lowest_level = level;
3937 if (level > btrfs_header_level(root->node) ||
3938 level >= BTRFS_MAX_LEVEL) {
3939 error("ignoring invalid drop level: %u", level);
3940 goto skip_walking;
3942 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943 if (wret < 0)
3944 goto skip_walking;
3945 btrfs_node_key(path.nodes[level], &found_key,
3946 path.slots[level]);
3947 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948 sizeof(found_key)));
3951 while (1) {
3952 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953 if (wret < 0)
3954 ret = wret;
3955 if (wret != 0)
3956 break;
3958 wret = walk_up_tree(root, &path, wc, &level);
3959 if (wret < 0)
3960 ret = wret;
3961 if (wret != 0)
3962 break;
3964 skip_walking:
3965 btrfs_release_path(&path);
3967 if (!cache_tree_empty(&corrupt_blocks)) {
3968 struct cache_extent *cache;
3969 struct btrfs_corrupt_block *corrupt;
3971 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972 root->root_key.objectid);
3973 cache = first_cache_extent(&corrupt_blocks);
3974 while (cache) {
3975 corrupt = container_of(cache,
3976 struct btrfs_corrupt_block,
3977 cache);
3978 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979 cache->start, corrupt->level,
3980 corrupt->key.objectid, corrupt->key.type,
3981 corrupt->key.offset);
3982 cache = next_cache_extent(cache);
3984 if (repair) {
3985 printf("Try to repair the btree for root %llu\n",
3986 root->root_key.objectid);
3987 ret = repair_btree(root, &corrupt_blocks);
3988 if (ret < 0)
3989 fprintf(stderr, "Failed to repair btree: %s\n",
3990 strerror(-ret));
3991 if (!ret)
3992 printf("Btree for root %llu is fixed\n",
3993 root->root_key.objectid);
3997 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998 if (err < 0)
3999 ret = err;
4001 if (root_node.current) {
4002 root_node.current->checked = 1;
4003 maybe_free_inode_rec(&root_node.inode_cache,
4004 root_node.current);
4007 err = check_inode_recs(root, &root_node.inode_cache);
4008 if (!ret)
4009 ret = err;
4011 free_corrupt_blocks_tree(&corrupt_blocks);
4012 root->fs_info->corrupt_blocks = NULL;
4013 free_orphan_data_extents(&root->orphan_data_extents);
4014 return ret;
4017 static int fs_root_objectid(u64 objectid)
4019 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021 return 1;
4022 return is_fstree(objectid);
4025 static int check_fs_roots(struct btrfs_root *root,
4026 struct cache_tree *root_cache)
4028 struct btrfs_path path;
4029 struct btrfs_key key;
4030 struct walk_control wc;
4031 struct extent_buffer *leaf, *tree_node;
4032 struct btrfs_root *tmp_root;
4033 struct btrfs_root *tree_root = root->fs_info->tree_root;
4034 int ret;
4035 int err = 0;
4037 if (ctx.progress_enabled) {
4038 ctx.tp = TASK_FS_ROOTS;
4039 task_start(ctx.info);
4043 * Just in case we made any changes to the extent tree that weren't
4044 * reflected into the free space cache yet.
4046 if (repair)
4047 reset_cached_block_groups(root->fs_info);
4048 memset(&wc, 0, sizeof(wc));
4049 cache_tree_init(&wc.shared);
4050 btrfs_init_path(&path);
4052 again:
4053 key.offset = 0;
4054 key.objectid = 0;
4055 key.type = BTRFS_ROOT_ITEM_KEY;
4056 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057 if (ret < 0) {
4058 err = 1;
4059 goto out;
4061 tree_node = tree_root->node;
4062 while (1) {
4063 if (tree_node != tree_root->node) {
4064 free_root_recs_tree(root_cache);
4065 btrfs_release_path(&path);
4066 goto again;
4068 leaf = path.nodes[0];
4069 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070 ret = btrfs_next_leaf(tree_root, &path);
4071 if (ret) {
4072 if (ret < 0)
4073 err = 1;
4074 break;
4076 leaf = path.nodes[0];
4078 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080 fs_root_objectid(key.objectid)) {
4081 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082 tmp_root = btrfs_read_fs_root_no_cache(
4083 root->fs_info, &key);
4084 } else {
4085 key.offset = (u64)-1;
4086 tmp_root = btrfs_read_fs_root(
4087 root->fs_info, &key);
4089 if (IS_ERR(tmp_root)) {
4090 err = 1;
4091 goto next;
4093 ret = check_fs_root(tmp_root, root_cache, &wc);
4094 if (ret == -EAGAIN) {
4095 free_root_recs_tree(root_cache);
4096 btrfs_release_path(&path);
4097 goto again;
4099 if (ret)
4100 err = 1;
4101 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102 btrfs_free_fs_root(tmp_root);
4103 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104 key.type == BTRFS_ROOT_BACKREF_KEY) {
4105 process_root_ref(leaf, path.slots[0], &key,
4106 root_cache);
4108 next:
4109 path.slots[0]++;
4111 out:
4112 btrfs_release_path(&path);
4113 if (err)
4114 free_extent_cache_tree(&wc.shared);
4115 if (!cache_tree_empty(&wc.shared))
4116 fprintf(stderr, "warning line %d\n", __LINE__);
4118 task_stop(ctx.info);
4120 return err;
4124 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125 * INODE_REF/INODE_EXTREF match.
4127 * @root: the root of the fs/file tree
4128 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4129 * @key: the key of the DIR_ITEM/DIR_INDEX
4130 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4131 * distinguish root_dir between normal dir/file
4132 * @name: the name in the INODE_REF/INODE_EXTREF
4133 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4134 * @mode: the st_mode of INODE_ITEM
4136 * Return 0 if no error occurred.
4137 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139 * dir/file.
4140 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141 * not match for normal dir/file.
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144 struct btrfs_key *key, u64 index, char *name,
4145 u32 namelen, u32 mode)
4147 struct btrfs_path path;
4148 struct extent_buffer *node;
4149 struct btrfs_dir_item *di;
4150 struct btrfs_key location;
4151 char namebuf[BTRFS_NAME_LEN] = {0};
4152 u32 total;
4153 u32 cur = 0;
4154 u32 len;
4155 u32 name_len;
4156 u32 data_len;
4157 u8 filetype;
4158 int slot;
4159 int ret;
4161 btrfs_init_path(&path);
4162 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163 if (ret < 0) {
4164 ret = DIR_ITEM_MISSING;
4165 goto out;
4168 /* Process root dir and goto out*/
4169 if (index == 0) {
4170 if (ret == 0) {
4171 ret = ROOT_DIR_ERROR;
4172 error(
4173 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174 root->objectid,
4175 ref_key->type == BTRFS_INODE_REF_KEY ?
4176 "REF" : "EXTREF",
4177 ref_key->objectid, ref_key->offset,
4178 key->type == BTRFS_DIR_ITEM_KEY ?
4179 "DIR_ITEM" : "DIR_INDEX");
4180 } else {
4181 ret = 0;
4184 goto out;
4187 /* Process normal file/dir */
4188 if (ret > 0) {
4189 ret = DIR_ITEM_MISSING;
4190 error(
4191 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192 root->objectid,
4193 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194 ref_key->objectid, ref_key->offset,
4195 key->type == BTRFS_DIR_ITEM_KEY ?
4196 "DIR_ITEM" : "DIR_INDEX",
4197 key->objectid, key->offset, namelen, name,
4198 imode_to_type(mode));
4199 goto out;
4202 /* Check whether inode_id/filetype/name match */
4203 node = path.nodes[0];
4204 slot = path.slots[0];
4205 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206 total = btrfs_item_size_nr(node, slot);
4207 while (cur < total) {
4208 ret = DIR_ITEM_MISMATCH;
4209 name_len = btrfs_dir_name_len(node, di);
4210 data_len = btrfs_dir_data_len(node, di);
4212 btrfs_dir_item_key_to_cpu(node, di, &location);
4213 if (location.objectid != ref_key->objectid ||
4214 location.type != BTRFS_INODE_ITEM_KEY ||
4215 location.offset != 0)
4216 goto next;
4218 filetype = btrfs_dir_type(node, di);
4219 if (imode_to_type(mode) != filetype)
4220 goto next;
4222 if (name_len <= BTRFS_NAME_LEN) {
4223 len = name_len;
4224 } else {
4225 len = BTRFS_NAME_LEN;
4226 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227 root->objectid,
4228 key->type == BTRFS_DIR_ITEM_KEY ?
4229 "DIR_ITEM" : "DIR_INDEX",
4230 key->objectid, key->offset, name_len);
4232 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233 if (len != namelen || strncmp(namebuf, name, len))
4234 goto next;
4236 ret = 0;
4237 goto out;
4238 next:
4239 len = sizeof(*di) + name_len + data_len;
4240 di = (struct btrfs_dir_item *)((char *)di + len);
4241 cur += len;
4243 if (ret == DIR_ITEM_MISMATCH)
4244 error(
4245 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246 root->objectid,
4247 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248 ref_key->objectid, ref_key->offset,
4249 key->type == BTRFS_DIR_ITEM_KEY ?
4250 "DIR_ITEM" : "DIR_INDEX",
4251 key->objectid, key->offset, namelen, name,
4252 imode_to_type(mode));
4253 out:
4254 btrfs_release_path(&path);
4255 return ret;
4259 * Traverse the given INODE_REF and call find_dir_item() to find related
4260 * DIR_ITEM/DIR_INDEX.
4262 * @root: the root of the fs/file tree
4263 * @ref_key: the key of the INODE_REF
4264 * @refs: the count of INODE_REF
4265 * @mode: the st_mode of INODE_ITEM
4267 * Return 0 if no error occurred.
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270 struct extent_buffer *node, int slot, u64 *refs,
4271 int mode)
4273 struct btrfs_key key;
4274 struct btrfs_inode_ref *ref;
4275 char namebuf[BTRFS_NAME_LEN] = {0};
4276 u32 total;
4277 u32 cur = 0;
4278 u32 len;
4279 u32 name_len;
4280 u64 index;
4281 int ret, err = 0;
4283 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284 total = btrfs_item_size_nr(node, slot);
4286 next:
4287 /* Update inode ref count */
4288 (*refs)++;
4290 index = btrfs_inode_ref_index(node, ref);
4291 name_len = btrfs_inode_ref_name_len(node, ref);
4292 if (name_len <= BTRFS_NAME_LEN) {
4293 len = name_len;
4294 } else {
4295 len = BTRFS_NAME_LEN;
4296 warning("root %llu INODE_REF[%llu %llu] name too long",
4297 root->objectid, ref_key->objectid, ref_key->offset);
4300 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4302 /* Check root dir ref name */
4303 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305 root->objectid, ref_key->objectid, ref_key->offset,
4306 namebuf);
4307 err |= ROOT_DIR_ERROR;
4310 /* Find related DIR_INDEX */
4311 key.objectid = ref_key->offset;
4312 key.type = BTRFS_DIR_INDEX_KEY;
4313 key.offset = index;
4314 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315 err |= ret;
4317 /* Find related dir_item */
4318 key.objectid = ref_key->offset;
4319 key.type = BTRFS_DIR_ITEM_KEY;
4320 key.offset = btrfs_name_hash(namebuf, len);
4321 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322 err |= ret;
4324 len = sizeof(*ref) + name_len;
4325 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326 cur += len;
4327 if (cur < total)
4328 goto next;
4330 return err;
4334 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335 * DIR_ITEM/DIR_INDEX.
4337 * @root: the root of the fs/file tree
4338 * @ref_key: the key of the INODE_EXTREF
4339 * @refs: the count of INODE_EXTREF
4340 * @mode: the st_mode of INODE_ITEM
4342 * Return 0 if no error occurred.
4344 static int check_inode_extref(struct btrfs_root *root,
4345 struct btrfs_key *ref_key,
4346 struct extent_buffer *node, int slot, u64 *refs,
4347 int mode)
4349 struct btrfs_key key;
4350 struct btrfs_inode_extref *extref;
4351 char namebuf[BTRFS_NAME_LEN] = {0};
4352 u32 total;
4353 u32 cur = 0;
4354 u32 len;
4355 u32 name_len;
4356 u64 index;
4357 u64 parent;
4358 int ret;
4359 int err = 0;
4361 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362 total = btrfs_item_size_nr(node, slot);
4364 next:
4365 /* update inode ref count */
4366 (*refs)++;
4367 name_len = btrfs_inode_extref_name_len(node, extref);
4368 index = btrfs_inode_extref_index(node, extref);
4369 parent = btrfs_inode_extref_parent(node, extref);
4370 if (name_len <= BTRFS_NAME_LEN) {
4371 len = name_len;
4372 } else {
4373 len = BTRFS_NAME_LEN;
4374 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375 root->objectid, ref_key->objectid, ref_key->offset);
4377 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4379 /* Check root dir ref name */
4380 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382 root->objectid, ref_key->objectid, ref_key->offset,
4383 namebuf);
4384 err |= ROOT_DIR_ERROR;
4387 /* find related dir_index */
4388 key.objectid = parent;
4389 key.type = BTRFS_DIR_INDEX_KEY;
4390 key.offset = index;
4391 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392 err |= ret;
4394 /* find related dir_item */
4395 key.objectid = parent;
4396 key.type = BTRFS_DIR_ITEM_KEY;
4397 key.offset = btrfs_name_hash(namebuf, len);
4398 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399 err |= ret;
4401 len = sizeof(*extref) + name_len;
4402 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403 cur += len;
4405 if (cur < total)
4406 goto next;
4408 return err;
4412 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413 * DIR_ITEM/DIR_INDEX match.
4415 * @root: the root of the fs/file tree
4416 * @key: the key of the INODE_REF/INODE_EXTREF
4417 * @name: the name in the INODE_REF/INODE_EXTREF
4418 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4419 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420 * to (u64)-1
4421 * @ext_ref: the EXTENDED_IREF feature
4423 * Return 0 if no error occurred.
4424 * Return >0 for error bitmap
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427 char *name, int namelen, u64 index,
4428 unsigned int ext_ref)
4430 struct btrfs_path path;
4431 struct btrfs_inode_ref *ref;
4432 struct btrfs_inode_extref *extref;
4433 struct extent_buffer *node;
4434 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435 u32 total;
4436 u32 cur = 0;
4437 u32 len;
4438 u32 ref_namelen;
4439 u64 ref_index;
4440 u64 parent;
4441 u64 dir_id;
4442 int slot;
4443 int ret;
4445 btrfs_init_path(&path);
4446 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447 if (ret) {
4448 ret = INODE_REF_MISSING;
4449 goto extref;
4452 node = path.nodes[0];
4453 slot = path.slots[0];
4455 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456 total = btrfs_item_size_nr(node, slot);
4458 /* Iterate all entry of INODE_REF */
4459 while (cur < total) {
4460 ret = INODE_REF_MISSING;
4462 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463 ref_index = btrfs_inode_ref_index(node, ref);
4464 if (index != (u64)-1 && index != ref_index)
4465 goto next_ref;
4467 if (ref_namelen <= BTRFS_NAME_LEN) {
4468 len = ref_namelen;
4469 } else {
4470 len = BTRFS_NAME_LEN;
4471 warning("root %llu INODE %s[%llu %llu] name too long",
4472 root->objectid,
4473 key->type == BTRFS_INODE_REF_KEY ?
4474 "REF" : "EXTREF",
4475 key->objectid, key->offset);
4477 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478 len);
4480 if (len != namelen || strncmp(ref_namebuf, name, len))
4481 goto next_ref;
4483 ret = 0;
4484 goto out;
4485 next_ref:
4486 len = sizeof(*ref) + ref_namelen;
4487 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488 cur += len;
4491 extref:
4492 /* Skip if not support EXTENDED_IREF feature */
4493 if (!ext_ref)
4494 goto out;
4496 btrfs_release_path(&path);
4497 btrfs_init_path(&path);
4499 dir_id = key->offset;
4500 key->type = BTRFS_INODE_EXTREF_KEY;
4501 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4503 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504 if (ret) {
4505 ret = INODE_REF_MISSING;
4506 goto out;
4509 node = path.nodes[0];
4510 slot = path.slots[0];
4512 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513 cur = 0;
4514 total = btrfs_item_size_nr(node, slot);
4516 /* Iterate all entry of INODE_EXTREF */
4517 while (cur < total) {
4518 ret = INODE_REF_MISSING;
4520 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521 ref_index = btrfs_inode_extref_index(node, extref);
4522 parent = btrfs_inode_extref_parent(node, extref);
4523 if (index != (u64)-1 && index != ref_index)
4524 goto next_extref;
4526 if (parent != dir_id)
4527 goto next_extref;
4529 if (ref_namelen <= BTRFS_NAME_LEN) {
4530 len = ref_namelen;
4531 } else {
4532 len = BTRFS_NAME_LEN;
4533 warning("root %llu INODE %s[%llu %llu] name too long",
4534 root->objectid,
4535 key->type == BTRFS_INODE_REF_KEY ?
4536 "REF" : "EXTREF",
4537 key->objectid, key->offset);
4539 read_extent_buffer(node, ref_namebuf,
4540 (unsigned long)(extref + 1), len);
4542 if (len != namelen || strncmp(ref_namebuf, name, len))
4543 goto next_extref;
4545 ret = 0;
4546 goto out;
4548 next_extref:
4549 len = sizeof(*extref) + ref_namelen;
4550 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551 cur += len;
4554 out:
4555 btrfs_release_path(&path);
4556 return ret;
4560 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4563 * @root: the root of the fs/file tree
4564 * @key: the key of the INODE_REF/INODE_EXTREF
4565 * @size: the st_size of the INODE_ITEM
4566 * @ext_ref: the EXTENDED_IREF feature
4568 * Return 0 if no error occurred.
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571 struct extent_buffer *node, int slot, u64 *size,
4572 unsigned int ext_ref)
4574 struct btrfs_dir_item *di;
4575 struct btrfs_inode_item *ii;
4576 struct btrfs_path path;
4577 struct btrfs_key location;
4578 char namebuf[BTRFS_NAME_LEN] = {0};
4579 u32 total;
4580 u32 cur = 0;
4581 u32 len;
4582 u32 name_len;
4583 u32 data_len;
4584 u8 filetype;
4585 u32 mode;
4586 u64 index;
4587 int ret;
4588 int err = 0;
4591 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592 * ignore index check.
4594 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4596 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597 total = btrfs_item_size_nr(node, slot);
4599 while (cur < total) {
4600 data_len = btrfs_dir_data_len(node, di);
4601 if (data_len)
4602 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604 "DIR_ITEM" : "DIR_INDEX",
4605 key->objectid, key->offset, data_len);
4607 name_len = btrfs_dir_name_len(node, di);
4608 if (name_len <= BTRFS_NAME_LEN) {
4609 len = name_len;
4610 } else {
4611 len = BTRFS_NAME_LEN;
4612 warning("root %llu %s[%llu %llu] name too long",
4613 root->objectid,
4614 key->type == BTRFS_DIR_ITEM_KEY ?
4615 "DIR_ITEM" : "DIR_INDEX",
4616 key->objectid, key->offset);
4618 (*size) += name_len;
4620 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621 filetype = btrfs_dir_type(node, di);
4623 btrfs_init_path(&path);
4624 btrfs_dir_item_key_to_cpu(node, di, &location);
4626 /* Ignore related ROOT_ITEM check */
4627 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628 goto next;
4630 /* Check relative INODE_ITEM(existence/filetype) */
4631 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632 if (ret) {
4633 err |= INODE_ITEM_MISSING;
4634 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637 key->offset, location.objectid, name_len,
4638 namebuf, filetype);
4639 goto next;
4642 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643 struct btrfs_inode_item);
4644 mode = btrfs_inode_mode(path.nodes[0], ii);
4646 if (imode_to_type(mode) != filetype) {
4647 err |= INODE_ITEM_MISMATCH;
4648 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651 key->offset, name_len, namebuf, filetype);
4654 /* Check relative INODE_REF/INODE_EXTREF */
4655 location.type = BTRFS_INODE_REF_KEY;
4656 location.offset = key->objectid;
4657 ret = find_inode_ref(root, &location, namebuf, len,
4658 index, ext_ref);
4659 err |= ret;
4660 if (ret & INODE_REF_MISSING)
4661 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664 key->offset, name_len, namebuf, filetype);
4666 next:
4667 btrfs_release_path(&path);
4668 len = sizeof(*di) + name_len + data_len;
4669 di = (struct btrfs_dir_item *)((char *)di + len);
4670 cur += len;
4672 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674 root->objectid, key->objectid, key->offset);
4675 break;
4679 return err;
4683 * Check file extent datasum/hole, update the size of the file extents,
4684 * check and update the last offset of the file extent.
4686 * @root: the root of fs/file tree.
4687 * @fkey: the key of the file extent.
4688 * @nodatasum: INODE_NODATASUM feature.
4689 * @size: the sum of all EXTENT_DATA items size for this inode.
4690 * @end: the offset of the last extent.
4692 * Return 0 if no error occurred.
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695 struct extent_buffer *node, int slot,
4696 unsigned int nodatasum, u64 *size, u64 *end)
4698 struct btrfs_file_extent_item *fi;
4699 u64 disk_bytenr;
4700 u64 disk_num_bytes;
4701 u64 extent_num_bytes;
4702 u64 extent_offset;
4703 u64 csum_found; /* In byte size, sectorsize aligned */
4704 u64 search_start; /* Logical range start we search for csum */
4705 u64 search_len; /* Logical range len we search for csum */
4706 unsigned int extent_type;
4707 unsigned int is_hole;
4708 int compressed = 0;
4709 int ret;
4710 int err = 0;
4712 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4714 /* Check inline extent */
4715 extent_type = btrfs_file_extent_type(node, fi);
4716 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717 struct btrfs_item *e = btrfs_item_nr(slot);
4718 u32 item_inline_len;
4720 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722 compressed = btrfs_file_extent_compression(node, fi);
4723 if (extent_num_bytes == 0) {
4724 error(
4725 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4726 root->objectid, fkey->objectid, fkey->offset);
4727 err |= FILE_EXTENT_ERROR;
4729 if (!compressed && extent_num_bytes != item_inline_len) {
4730 error(
4731 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4732 root->objectid, fkey->objectid, fkey->offset,
4733 extent_num_bytes, item_inline_len);
4734 err |= FILE_EXTENT_ERROR;
4736 *size += extent_num_bytes;
4737 return err;
4740 /* Check extent type */
4741 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4742 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4743 err |= FILE_EXTENT_ERROR;
4744 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4745 root->objectid, fkey->objectid, fkey->offset);
4746 return err;
4749 /* Check REG_EXTENT/PREALLOC_EXTENT */
4750 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4751 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4752 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4753 extent_offset = btrfs_file_extent_offset(node, fi);
4754 compressed = btrfs_file_extent_compression(node, fi);
4755 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4758 * Check EXTENT_DATA csum
4760 * For plain (uncompressed) extent, we should only check the range
4761 * we're referring to, as it's possible that part of prealloc extent
4762 * has been written, and has csum:
4764 * |<--- Original large preallocated extent A ---->|
4765 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4766 * No csum Has csum
4768 * For compressed extent, we should check the whole range.
4770 if (!compressed) {
4771 search_start = disk_bytenr + extent_offset;
4772 search_len = extent_num_bytes;
4773 } else {
4774 search_start = disk_bytenr;
4775 search_len = disk_num_bytes;
4777 ret = count_csum_range(root, search_start, search_len, &csum_found);
4778 if (csum_found > 0 && nodatasum) {
4779 err |= ODD_CSUM_ITEM;
4780 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4781 root->objectid, fkey->objectid, fkey->offset);
4782 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4783 !is_hole && (ret < 0 || csum_found < search_len)) {
4784 err |= CSUM_ITEM_MISSING;
4785 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4786 root->objectid, fkey->objectid, fkey->offset,
4787 csum_found, search_len);
4788 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4789 err |= ODD_CSUM_ITEM;
4790 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4791 root->objectid, fkey->objectid, fkey->offset, csum_found);
4794 /* Check EXTENT_DATA hole */
4795 if (no_holes && is_hole) {
4796 err |= FILE_EXTENT_ERROR;
4797 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4798 root->objectid, fkey->objectid, fkey->offset);
4799 } else if (!no_holes && *end != fkey->offset) {
4800 err |= FILE_EXTENT_ERROR;
4801 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4802 root->objectid, fkey->objectid, fkey->offset);
4805 *end += extent_num_bytes;
4806 if (!is_hole)
4807 *size += extent_num_bytes;
4809 return err;
4813 * Check INODE_ITEM and related ITEMs (the same inode number)
4814 * 1. check link count
4815 * 2. check inode ref/extref
4816 * 3. check dir item/index
4818 * @ext_ref: the EXTENDED_IREF feature
4820 * Return 0 if no error occurred.
4821 * Return >0 for error or hit the traversal is done(by error bitmap)
4823 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4824 unsigned int ext_ref)
4826 struct extent_buffer *node;
4827 struct btrfs_inode_item *ii;
4828 struct btrfs_key key;
4829 u64 inode_id;
4830 u32 mode;
4831 u64 nlink;
4832 u64 nbytes;
4833 u64 isize;
4834 u64 size = 0;
4835 u64 refs = 0;
4836 u64 extent_end = 0;
4837 u64 extent_size = 0;
4838 unsigned int dir;
4839 unsigned int nodatasum;
4840 int slot;
4841 int ret;
4842 int err = 0;
4844 node = path->nodes[0];
4845 slot = path->slots[0];
4847 btrfs_item_key_to_cpu(node, &key, slot);
4848 inode_id = key.objectid;
4850 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4851 ret = btrfs_next_item(root, path);
4852 if (ret > 0)
4853 err |= LAST_ITEM;
4854 return err;
4857 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4858 isize = btrfs_inode_size(node, ii);
4859 nbytes = btrfs_inode_nbytes(node, ii);
4860 mode = btrfs_inode_mode(node, ii);
4861 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4862 nlink = btrfs_inode_nlink(node, ii);
4863 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4865 while (1) {
4866 ret = btrfs_next_item(root, path);
4867 if (ret < 0) {
4868 /* out will fill 'err' rusing current statistics */
4869 goto out;
4870 } else if (ret > 0) {
4871 err |= LAST_ITEM;
4872 goto out;
4875 node = path->nodes[0];
4876 slot = path->slots[0];
4877 btrfs_item_key_to_cpu(node, &key, slot);
4878 if (key.objectid != inode_id)
4879 goto out;
4881 switch (key.type) {
4882 case BTRFS_INODE_REF_KEY:
4883 ret = check_inode_ref(root, &key, node, slot, &refs,
4884 mode);
4885 err |= ret;
4886 break;
4887 case BTRFS_INODE_EXTREF_KEY:
4888 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4889 warning("root %llu EXTREF[%llu %llu] isn't supported",
4890 root->objectid, key.objectid,
4891 key.offset);
4892 ret = check_inode_extref(root, &key, node, slot, &refs,
4893 mode);
4894 err |= ret;
4895 break;
4896 case BTRFS_DIR_ITEM_KEY:
4897 case BTRFS_DIR_INDEX_KEY:
4898 if (!dir) {
4899 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4900 root->objectid, inode_id,
4901 imode_to_type(mode), key.objectid,
4902 key.offset);
4904 ret = check_dir_item(root, &key, node, slot, &size,
4905 ext_ref);
4906 err |= ret;
4907 break;
4908 case BTRFS_EXTENT_DATA_KEY:
4909 if (dir) {
4910 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4911 root->objectid, inode_id, key.objectid,
4912 key.offset);
4914 ret = check_file_extent(root, &key, node, slot,
4915 nodatasum, &extent_size,
4916 &extent_end);
4917 err |= ret;
4918 break;
4919 case BTRFS_XATTR_ITEM_KEY:
4920 break;
4921 default:
4922 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4923 key.objectid, key.type, key.offset);
4927 out:
4928 /* verify INODE_ITEM nlink/isize/nbytes */
4929 if (dir) {
4930 if (nlink != 1) {
4931 err |= LINK_COUNT_ERROR;
4932 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4933 root->objectid, inode_id, nlink);
4937 * Just a warning, as dir inode nbytes is just an
4938 * instructive value.
4940 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4941 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4942 root->objectid, inode_id, root->nodesize);
4945 if (isize != size) {
4946 err |= ISIZE_ERROR;
4947 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4948 root->objectid, inode_id, isize, size);
4950 } else {
4951 if (nlink != refs) {
4952 err |= LINK_COUNT_ERROR;
4953 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4954 root->objectid, inode_id, nlink, refs);
4955 } else if (!nlink) {
4956 err |= ORPHAN_ITEM;
4959 if (!nbytes && !no_holes && extent_end < isize) {
4960 err |= NBYTES_ERROR;
4961 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4962 root->objectid, inode_id, isize);
4965 if (nbytes != extent_size) {
4966 err |= NBYTES_ERROR;
4967 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4968 root->objectid, inode_id, nbytes, extent_size);
4972 return err;
4975 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4977 struct btrfs_path path;
4978 struct btrfs_key key;
4979 int err = 0;
4980 int ret;
4982 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4983 key.type = BTRFS_INODE_ITEM_KEY;
4984 key.offset = 0;
4986 /* For root being dropped, we don't need to check first inode */
4987 if (btrfs_root_refs(&root->root_item) == 0 &&
4988 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4989 key.objectid)
4990 return 0;
4992 btrfs_init_path(&path);
4994 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4995 if (ret < 0)
4996 goto out;
4997 if (ret > 0) {
4998 ret = 0;
4999 err |= INODE_ITEM_MISSING;
5000 error("first inode item of root %llu is missing",
5001 root->objectid);
5004 err |= check_inode_item(root, &path, ext_ref);
5005 err &= ~LAST_ITEM;
5006 if (err && !ret)
5007 ret = -EIO;
5008 out:
5009 btrfs_release_path(&path);
5010 return ret;
5014 * Iterate all item on the tree and call check_inode_item() to check.
5016 * @root: the root of the tree to be checked.
5017 * @ext_ref: the EXTENDED_IREF feature
5019 * Return 0 if no error found.
5020 * Return <0 for error.
5022 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5024 struct btrfs_path path;
5025 struct node_refs nrefs;
5026 struct btrfs_root_item *root_item = &root->root_item;
5027 int ret, wret;
5028 int level;
5031 * We need to manually check the first inode item(256)
5032 * As the following traversal function will only start from
5033 * the first inode item in the leaf, if inode item(256) is missing
5034 * we will just skip it forever.
5036 ret = check_fs_first_inode(root, ext_ref);
5037 if (ret < 0)
5038 return ret;
5040 memset(&nrefs, 0, sizeof(nrefs));
5041 level = btrfs_header_level(root->node);
5042 btrfs_init_path(&path);
5044 if (btrfs_root_refs(root_item) > 0 ||
5045 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5046 path.nodes[level] = root->node;
5047 path.slots[level] = 0;
5048 extent_buffer_get(root->node);
5049 } else {
5050 struct btrfs_key key;
5052 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5053 level = root_item->drop_level;
5054 path.lowest_level = level;
5055 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5056 if (ret < 0)
5057 goto out;
5058 ret = 0;
5061 while (1) {
5062 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5063 if (wret < 0)
5064 ret = wret;
5065 if (wret != 0)
5066 break;
5068 wret = walk_up_tree_v2(root, &path, &level);
5069 if (wret < 0)
5070 ret = wret;
5071 if (wret != 0)
5072 break;
5075 out:
5076 btrfs_release_path(&path);
5077 return ret;
5081 * Find the relative ref for root_ref and root_backref.
5083 * @root: the root of the root tree.
5084 * @ref_key: the key of the root ref.
5086 * Return 0 if no error occurred.
5088 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5089 struct extent_buffer *node, int slot)
5091 struct btrfs_path path;
5092 struct btrfs_key key;
5093 struct btrfs_root_ref *ref;
5094 struct btrfs_root_ref *backref;
5095 char ref_name[BTRFS_NAME_LEN] = {0};
5096 char backref_name[BTRFS_NAME_LEN] = {0};
5097 u64 ref_dirid;
5098 u64 ref_seq;
5099 u32 ref_namelen;
5100 u64 backref_dirid;
5101 u64 backref_seq;
5102 u32 backref_namelen;
5103 u32 len;
5104 int ret;
5105 int err = 0;
5107 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5108 ref_dirid = btrfs_root_ref_dirid(node, ref);
5109 ref_seq = btrfs_root_ref_sequence(node, ref);
5110 ref_namelen = btrfs_root_ref_name_len(node, ref);
5112 if (ref_namelen <= BTRFS_NAME_LEN) {
5113 len = ref_namelen;
5114 } else {
5115 len = BTRFS_NAME_LEN;
5116 warning("%s[%llu %llu] ref_name too long",
5117 ref_key->type == BTRFS_ROOT_REF_KEY ?
5118 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5119 ref_key->offset);
5121 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5123 /* Find relative root_ref */
5124 key.objectid = ref_key->offset;
5125 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5126 key.offset = ref_key->objectid;
5128 btrfs_init_path(&path);
5129 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5130 if (ret) {
5131 err |= ROOT_REF_MISSING;
5132 error("%s[%llu %llu] couldn't find relative ref",
5133 ref_key->type == BTRFS_ROOT_REF_KEY ?
5134 "ROOT_REF" : "ROOT_BACKREF",
5135 ref_key->objectid, ref_key->offset);
5136 goto out;
5139 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5140 struct btrfs_root_ref);
5141 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5142 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5143 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5145 if (backref_namelen <= BTRFS_NAME_LEN) {
5146 len = backref_namelen;
5147 } else {
5148 len = BTRFS_NAME_LEN;
5149 warning("%s[%llu %llu] ref_name too long",
5150 key.type == BTRFS_ROOT_REF_KEY ?
5151 "ROOT_REF" : "ROOT_BACKREF",
5152 key.objectid, key.offset);
5154 read_extent_buffer(path.nodes[0], backref_name,
5155 (unsigned long)(backref + 1), len);
5157 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5158 ref_namelen != backref_namelen ||
5159 strncmp(ref_name, backref_name, len)) {
5160 err |= ROOT_REF_MISMATCH;
5161 error("%s[%llu %llu] mismatch relative ref",
5162 ref_key->type == BTRFS_ROOT_REF_KEY ?
5163 "ROOT_REF" : "ROOT_BACKREF",
5164 ref_key->objectid, ref_key->offset);
5166 out:
5167 btrfs_release_path(&path);
5168 return err;
5172 * Check all fs/file tree in low_memory mode.
5174 * 1. for fs tree root item, call check_fs_root_v2()
5175 * 2. for fs tree root ref/backref, call check_root_ref()
5177 * Return 0 if no error occurred.
5179 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5181 struct btrfs_root *tree_root = fs_info->tree_root;
5182 struct btrfs_root *cur_root = NULL;
5183 struct btrfs_path path;
5184 struct btrfs_key key;
5185 struct extent_buffer *node;
5186 unsigned int ext_ref;
5187 int slot;
5188 int ret;
5189 int err = 0;
5191 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5193 btrfs_init_path(&path);
5194 key.objectid = BTRFS_FS_TREE_OBJECTID;
5195 key.offset = 0;
5196 key.type = BTRFS_ROOT_ITEM_KEY;
5198 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5199 if (ret < 0) {
5200 err = ret;
5201 goto out;
5202 } else if (ret > 0) {
5203 err = -ENOENT;
5204 goto out;
5207 while (1) {
5208 node = path.nodes[0];
5209 slot = path.slots[0];
5210 btrfs_item_key_to_cpu(node, &key, slot);
5211 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5212 goto out;
5213 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5214 fs_root_objectid(key.objectid)) {
5215 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5216 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5217 &key);
5218 } else {
5219 key.offset = (u64)-1;
5220 cur_root = btrfs_read_fs_root(fs_info, &key);
5223 if (IS_ERR(cur_root)) {
5224 error("Fail to read fs/subvol tree: %lld",
5225 key.objectid);
5226 err = -EIO;
5227 goto next;
5230 ret = check_fs_root_v2(cur_root, ext_ref);
5231 err |= ret;
5233 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5234 btrfs_free_fs_root(cur_root);
5235 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5236 key.type == BTRFS_ROOT_BACKREF_KEY) {
5237 ret = check_root_ref(tree_root, &key, node, slot);
5238 err |= ret;
5240 next:
5241 ret = btrfs_next_item(tree_root, &path);
5242 if (ret > 0)
5243 goto out;
5244 if (ret < 0) {
5245 err = ret;
5246 goto out;
5250 out:
5251 btrfs_release_path(&path);
5252 return err;
5255 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5257 struct list_head *cur = rec->backrefs.next;
5258 struct extent_backref *back;
5259 struct tree_backref *tback;
5260 struct data_backref *dback;
5261 u64 found = 0;
5262 int err = 0;
5264 while(cur != &rec->backrefs) {
5265 back = to_extent_backref(cur);
5266 cur = cur->next;
5267 if (!back->found_extent_tree) {
5268 err = 1;
5269 if (!print_errs)
5270 goto out;
5271 if (back->is_data) {
5272 dback = to_data_backref(back);
5273 fprintf(stderr, "Backref %llu %s %llu"
5274 " owner %llu offset %llu num_refs %lu"
5275 " not found in extent tree\n",
5276 (unsigned long long)rec->start,
5277 back->full_backref ?
5278 "parent" : "root",
5279 back->full_backref ?
5280 (unsigned long long)dback->parent:
5281 (unsigned long long)dback->root,
5282 (unsigned long long)dback->owner,
5283 (unsigned long long)dback->offset,
5284 (unsigned long)dback->num_refs);
5285 } else {
5286 tback = to_tree_backref(back);
5287 fprintf(stderr, "Backref %llu parent %llu"
5288 " root %llu not found in extent tree\n",
5289 (unsigned long long)rec->start,
5290 (unsigned long long)tback->parent,
5291 (unsigned long long)tback->root);
5294 if (!back->is_data && !back->found_ref) {
5295 err = 1;
5296 if (!print_errs)
5297 goto out;
5298 tback = to_tree_backref(back);
5299 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5300 (unsigned long long)rec->start,
5301 back->full_backref ? "parent" : "root",
5302 back->full_backref ?
5303 (unsigned long long)tback->parent :
5304 (unsigned long long)tback->root, back);
5306 if (back->is_data) {
5307 dback = to_data_backref(back);
5308 if (dback->found_ref != dback->num_refs) {
5309 err = 1;
5310 if (!print_errs)
5311 goto out;
5312 fprintf(stderr, "Incorrect local backref count"
5313 " on %llu %s %llu owner %llu"
5314 " offset %llu found %u wanted %u back %p\n",
5315 (unsigned long long)rec->start,
5316 back->full_backref ?
5317 "parent" : "root",
5318 back->full_backref ?
5319 (unsigned long long)dback->parent:
5320 (unsigned long long)dback->root,
5321 (unsigned long long)dback->owner,
5322 (unsigned long long)dback->offset,
5323 dback->found_ref, dback->num_refs, back);
5325 if (dback->disk_bytenr != rec->start) {
5326 err = 1;
5327 if (!print_errs)
5328 goto out;
5329 fprintf(stderr, "Backref disk bytenr does not"
5330 " match extent record, bytenr=%llu, "
5331 "ref bytenr=%llu\n",
5332 (unsigned long long)rec->start,
5333 (unsigned long long)dback->disk_bytenr);
5336 if (dback->bytes != rec->nr) {
5337 err = 1;
5338 if (!print_errs)
5339 goto out;
5340 fprintf(stderr, "Backref bytes do not match "
5341 "extent backref, bytenr=%llu, ref "
5342 "bytes=%llu, backref bytes=%llu\n",
5343 (unsigned long long)rec->start,
5344 (unsigned long long)rec->nr,
5345 (unsigned long long)dback->bytes);
5348 if (!back->is_data) {
5349 found += 1;
5350 } else {
5351 dback = to_data_backref(back);
5352 found += dback->found_ref;
5355 if (found != rec->refs) {
5356 err = 1;
5357 if (!print_errs)
5358 goto out;
5359 fprintf(stderr, "Incorrect global backref count "
5360 "on %llu found %llu wanted %llu\n",
5361 (unsigned long long)rec->start,
5362 (unsigned long long)found,
5363 (unsigned long long)rec->refs);
5365 out:
5366 return err;
5369 static int free_all_extent_backrefs(struct extent_record *rec)
5371 struct extent_backref *back;
5372 struct list_head *cur;
5373 while (!list_empty(&rec->backrefs)) {
5374 cur = rec->backrefs.next;
5375 back = to_extent_backref(cur);
5376 list_del(cur);
5377 free(back);
5379 return 0;
5382 static void free_extent_record_cache(struct cache_tree *extent_cache)
5384 struct cache_extent *cache;
5385 struct extent_record *rec;
5387 while (1) {
5388 cache = first_cache_extent(extent_cache);
5389 if (!cache)
5390 break;
5391 rec = container_of(cache, struct extent_record, cache);
5392 remove_cache_extent(extent_cache, cache);
5393 free_all_extent_backrefs(rec);
5394 free(rec);
5398 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5399 struct extent_record *rec)
5401 if (rec->content_checked && rec->owner_ref_checked &&
5402 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5403 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5404 !rec->bad_full_backref && !rec->crossing_stripes &&
5405 !rec->wrong_chunk_type) {
5406 remove_cache_extent(extent_cache, &rec->cache);
5407 free_all_extent_backrefs(rec);
5408 list_del_init(&rec->list);
5409 free(rec);
5411 return 0;
5414 static int check_owner_ref(struct btrfs_root *root,
5415 struct extent_record *rec,
5416 struct extent_buffer *buf)
5418 struct extent_backref *node;
5419 struct tree_backref *back;
5420 struct btrfs_root *ref_root;
5421 struct btrfs_key key;
5422 struct btrfs_path path;
5423 struct extent_buffer *parent;
5424 int level;
5425 int found = 0;
5426 int ret;
5428 list_for_each_entry(node, &rec->backrefs, list) {
5429 if (node->is_data)
5430 continue;
5431 if (!node->found_ref)
5432 continue;
5433 if (node->full_backref)
5434 continue;
5435 back = to_tree_backref(node);
5436 if (btrfs_header_owner(buf) == back->root)
5437 return 0;
5439 BUG_ON(rec->is_root);
5441 /* try to find the block by search corresponding fs tree */
5442 key.objectid = btrfs_header_owner(buf);
5443 key.type = BTRFS_ROOT_ITEM_KEY;
5444 key.offset = (u64)-1;
5446 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5447 if (IS_ERR(ref_root))
5448 return 1;
5450 level = btrfs_header_level(buf);
5451 if (level == 0)
5452 btrfs_item_key_to_cpu(buf, &key, 0);
5453 else
5454 btrfs_node_key_to_cpu(buf, &key, 0);
5456 btrfs_init_path(&path);
5457 path.lowest_level = level + 1;
5458 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5459 if (ret < 0)
5460 return 0;
5462 parent = path.nodes[level + 1];
5463 if (parent && buf->start == btrfs_node_blockptr(parent,
5464 path.slots[level + 1]))
5465 found = 1;
5467 btrfs_release_path(&path);
5468 return found ? 0 : 1;
5471 static int is_extent_tree_record(struct extent_record *rec)
5473 struct list_head *cur = rec->backrefs.next;
5474 struct extent_backref *node;
5475 struct tree_backref *back;
5476 int is_extent = 0;
5478 while(cur != &rec->backrefs) {
5479 node = to_extent_backref(cur);
5480 cur = cur->next;
5481 if (node->is_data)
5482 return 0;
5483 back = to_tree_backref(node);
5484 if (node->full_backref)
5485 return 0;
5486 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5487 is_extent = 1;
5489 return is_extent;
5493 static int record_bad_block_io(struct btrfs_fs_info *info,
5494 struct cache_tree *extent_cache,
5495 u64 start, u64 len)
5497 struct extent_record *rec;
5498 struct cache_extent *cache;
5499 struct btrfs_key key;
5501 cache = lookup_cache_extent(extent_cache, start, len);
5502 if (!cache)
5503 return 0;
5505 rec = container_of(cache, struct extent_record, cache);
5506 if (!is_extent_tree_record(rec))
5507 return 0;
5509 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5510 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5513 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5514 struct extent_buffer *buf, int slot)
5516 if (btrfs_header_level(buf)) {
5517 struct btrfs_key_ptr ptr1, ptr2;
5519 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5520 sizeof(struct btrfs_key_ptr));
5521 read_extent_buffer(buf, &ptr2,
5522 btrfs_node_key_ptr_offset(slot + 1),
5523 sizeof(struct btrfs_key_ptr));
5524 write_extent_buffer(buf, &ptr1,
5525 btrfs_node_key_ptr_offset(slot + 1),
5526 sizeof(struct btrfs_key_ptr));
5527 write_extent_buffer(buf, &ptr2,
5528 btrfs_node_key_ptr_offset(slot),
5529 sizeof(struct btrfs_key_ptr));
5530 if (slot == 0) {
5531 struct btrfs_disk_key key;
5532 btrfs_node_key(buf, &key, 0);
5533 btrfs_fixup_low_keys(root, path, &key,
5534 btrfs_header_level(buf) + 1);
5536 } else {
5537 struct btrfs_item *item1, *item2;
5538 struct btrfs_key k1, k2;
5539 char *item1_data, *item2_data;
5540 u32 item1_offset, item2_offset, item1_size, item2_size;
5542 item1 = btrfs_item_nr(slot);
5543 item2 = btrfs_item_nr(slot + 1);
5544 btrfs_item_key_to_cpu(buf, &k1, slot);
5545 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5546 item1_offset = btrfs_item_offset(buf, item1);
5547 item2_offset = btrfs_item_offset(buf, item2);
5548 item1_size = btrfs_item_size(buf, item1);
5549 item2_size = btrfs_item_size(buf, item2);
5551 item1_data = malloc(item1_size);
5552 if (!item1_data)
5553 return -ENOMEM;
5554 item2_data = malloc(item2_size);
5555 if (!item2_data) {
5556 free(item1_data);
5557 return -ENOMEM;
5560 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5561 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5563 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5564 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5565 free(item1_data);
5566 free(item2_data);
5568 btrfs_set_item_offset(buf, item1, item2_offset);
5569 btrfs_set_item_offset(buf, item2, item1_offset);
5570 btrfs_set_item_size(buf, item1, item2_size);
5571 btrfs_set_item_size(buf, item2, item1_size);
5573 path->slots[0] = slot;
5574 btrfs_set_item_key_unsafe(root, path, &k2);
5575 path->slots[0] = slot + 1;
5576 btrfs_set_item_key_unsafe(root, path, &k1);
5578 return 0;
5581 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5583 struct extent_buffer *buf;
5584 struct btrfs_key k1, k2;
5585 int i;
5586 int level = path->lowest_level;
5587 int ret = -EIO;
5589 buf = path->nodes[level];
5590 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5591 if (level) {
5592 btrfs_node_key_to_cpu(buf, &k1, i);
5593 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5594 } else {
5595 btrfs_item_key_to_cpu(buf, &k1, i);
5596 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5598 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5599 continue;
5600 ret = swap_values(root, path, buf, i);
5601 if (ret)
5602 break;
5603 btrfs_mark_buffer_dirty(buf);
5604 i = 0;
5606 return ret;
5609 static int delete_bogus_item(struct btrfs_root *root,
5610 struct btrfs_path *path,
5611 struct extent_buffer *buf, int slot)
5613 struct btrfs_key key;
5614 int nritems = btrfs_header_nritems(buf);
5616 btrfs_item_key_to_cpu(buf, &key, slot);
5618 /* These are all the keys we can deal with missing. */
5619 if (key.type != BTRFS_DIR_INDEX_KEY &&
5620 key.type != BTRFS_EXTENT_ITEM_KEY &&
5621 key.type != BTRFS_METADATA_ITEM_KEY &&
5622 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5623 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5624 return -1;
5626 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5627 (unsigned long long)key.objectid, key.type,
5628 (unsigned long long)key.offset, slot, buf->start);
5629 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5630 btrfs_item_nr_offset(slot + 1),
5631 sizeof(struct btrfs_item) *
5632 (nritems - slot - 1));
5633 btrfs_set_header_nritems(buf, nritems - 1);
5634 if (slot == 0) {
5635 struct btrfs_disk_key disk_key;
5637 btrfs_item_key(buf, &disk_key, 0);
5638 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5640 btrfs_mark_buffer_dirty(buf);
5641 return 0;
5644 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5646 struct extent_buffer *buf;
5647 int i;
5648 int ret = 0;
5650 /* We should only get this for leaves */
5651 BUG_ON(path->lowest_level);
5652 buf = path->nodes[0];
5653 again:
5654 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5655 unsigned int shift = 0, offset;
5657 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5658 BTRFS_LEAF_DATA_SIZE(root)) {
5659 if (btrfs_item_end_nr(buf, i) >
5660 BTRFS_LEAF_DATA_SIZE(root)) {
5661 ret = delete_bogus_item(root, path, buf, i);
5662 if (!ret)
5663 goto again;
5664 fprintf(stderr, "item is off the end of the "
5665 "leaf, can't fix\n");
5666 ret = -EIO;
5667 break;
5669 shift = BTRFS_LEAF_DATA_SIZE(root) -
5670 btrfs_item_end_nr(buf, i);
5671 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5672 btrfs_item_offset_nr(buf, i - 1)) {
5673 if (btrfs_item_end_nr(buf, i) >
5674 btrfs_item_offset_nr(buf, i - 1)) {
5675 ret = delete_bogus_item(root, path, buf, i);
5676 if (!ret)
5677 goto again;
5678 fprintf(stderr, "items overlap, can't fix\n");
5679 ret = -EIO;
5680 break;
5682 shift = btrfs_item_offset_nr(buf, i - 1) -
5683 btrfs_item_end_nr(buf, i);
5685 if (!shift)
5686 continue;
5688 printf("Shifting item nr %d by %u bytes in block %llu\n",
5689 i, shift, (unsigned long long)buf->start);
5690 offset = btrfs_item_offset_nr(buf, i);
5691 memmove_extent_buffer(buf,
5692 btrfs_leaf_data(buf) + offset + shift,
5693 btrfs_leaf_data(buf) + offset,
5694 btrfs_item_size_nr(buf, i));
5695 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5696 offset + shift);
5697 btrfs_mark_buffer_dirty(buf);
5701 * We may have moved things, in which case we want to exit so we don't
5702 * write those changes out. Once we have proper abort functionality in
5703 * progs this can be changed to something nicer.
5705 BUG_ON(ret);
5706 return ret;
5710 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5711 * then just return -EIO.
5713 static int try_to_fix_bad_block(struct btrfs_root *root,
5714 struct extent_buffer *buf,
5715 enum btrfs_tree_block_status status)
5717 struct btrfs_trans_handle *trans;
5718 struct ulist *roots;
5719 struct ulist_node *node;
5720 struct btrfs_root *search_root;
5721 struct btrfs_path path;
5722 struct ulist_iterator iter;
5723 struct btrfs_key root_key, key;
5724 int ret;
5726 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5727 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5728 return -EIO;
5730 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5731 if (ret)
5732 return -EIO;
5734 btrfs_init_path(&path);
5735 ULIST_ITER_INIT(&iter);
5736 while ((node = ulist_next(roots, &iter))) {
5737 root_key.objectid = node->val;
5738 root_key.type = BTRFS_ROOT_ITEM_KEY;
5739 root_key.offset = (u64)-1;
5741 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5742 if (IS_ERR(root)) {
5743 ret = -EIO;
5744 break;
5748 trans = btrfs_start_transaction(search_root, 0);
5749 if (IS_ERR(trans)) {
5750 ret = PTR_ERR(trans);
5751 break;
5754 path.lowest_level = btrfs_header_level(buf);
5755 path.skip_check_block = 1;
5756 if (path.lowest_level)
5757 btrfs_node_key_to_cpu(buf, &key, 0);
5758 else
5759 btrfs_item_key_to_cpu(buf, &key, 0);
5760 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5761 if (ret) {
5762 ret = -EIO;
5763 btrfs_commit_transaction(trans, search_root);
5764 break;
5766 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5767 ret = fix_key_order(search_root, &path);
5768 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5769 ret = fix_item_offset(search_root, &path);
5770 if (ret) {
5771 btrfs_commit_transaction(trans, search_root);
5772 break;
5774 btrfs_release_path(&path);
5775 btrfs_commit_transaction(trans, search_root);
5777 ulist_free(roots);
5778 btrfs_release_path(&path);
5779 return ret;
5782 static int check_block(struct btrfs_root *root,
5783 struct cache_tree *extent_cache,
5784 struct extent_buffer *buf, u64 flags)
5786 struct extent_record *rec;
5787 struct cache_extent *cache;
5788 struct btrfs_key key;
5789 enum btrfs_tree_block_status status;
5790 int ret = 0;
5791 int level;
5793 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5794 if (!cache)
5795 return 1;
5796 rec = container_of(cache, struct extent_record, cache);
5797 rec->generation = btrfs_header_generation(buf);
5799 level = btrfs_header_level(buf);
5800 if (btrfs_header_nritems(buf) > 0) {
5802 if (level == 0)
5803 btrfs_item_key_to_cpu(buf, &key, 0);
5804 else
5805 btrfs_node_key_to_cpu(buf, &key, 0);
5807 rec->info_objectid = key.objectid;
5809 rec->info_level = level;
5811 if (btrfs_is_leaf(buf))
5812 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5813 else
5814 status = btrfs_check_node(root, &rec->parent_key, buf);
5816 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5817 if (repair)
5818 status = try_to_fix_bad_block(root, buf, status);
5819 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5820 ret = -EIO;
5821 fprintf(stderr, "bad block %llu\n",
5822 (unsigned long long)buf->start);
5823 } else {
5825 * Signal to callers we need to start the scan over
5826 * again since we'll have cowed blocks.
5828 ret = -EAGAIN;
5830 } else {
5831 rec->content_checked = 1;
5832 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5833 rec->owner_ref_checked = 1;
5834 else {
5835 ret = check_owner_ref(root, rec, buf);
5836 if (!ret)
5837 rec->owner_ref_checked = 1;
5840 if (!ret)
5841 maybe_free_extent_rec(extent_cache, rec);
5842 return ret;
5845 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5846 u64 parent, u64 root)
5848 struct list_head *cur = rec->backrefs.next;
5849 struct extent_backref *node;
5850 struct tree_backref *back;
5852 while(cur != &rec->backrefs) {
5853 node = to_extent_backref(cur);
5854 cur = cur->next;
5855 if (node->is_data)
5856 continue;
5857 back = to_tree_backref(node);
5858 if (parent > 0) {
5859 if (!node->full_backref)
5860 continue;
5861 if (parent == back->parent)
5862 return back;
5863 } else {
5864 if (node->full_backref)
5865 continue;
5866 if (back->root == root)
5867 return back;
5870 return NULL;
5873 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5874 u64 parent, u64 root)
5876 struct tree_backref *ref = malloc(sizeof(*ref));
5878 if (!ref)
5879 return NULL;
5880 memset(&ref->node, 0, sizeof(ref->node));
5881 if (parent > 0) {
5882 ref->parent = parent;
5883 ref->node.full_backref = 1;
5884 } else {
5885 ref->root = root;
5886 ref->node.full_backref = 0;
5888 list_add_tail(&ref->node.list, &rec->backrefs);
5890 return ref;
5893 static struct data_backref *find_data_backref(struct extent_record *rec,
5894 u64 parent, u64 root,
5895 u64 owner, u64 offset,
5896 int found_ref,
5897 u64 disk_bytenr, u64 bytes)
5899 struct list_head *cur = rec->backrefs.next;
5900 struct extent_backref *node;
5901 struct data_backref *back;
5903 while(cur != &rec->backrefs) {
5904 node = to_extent_backref(cur);
5905 cur = cur->next;
5906 if (!node->is_data)
5907 continue;
5908 back = to_data_backref(node);
5909 if (parent > 0) {
5910 if (!node->full_backref)
5911 continue;
5912 if (parent == back->parent)
5913 return back;
5914 } else {
5915 if (node->full_backref)
5916 continue;
5917 if (back->root == root && back->owner == owner &&
5918 back->offset == offset) {
5919 if (found_ref && node->found_ref &&
5920 (back->bytes != bytes ||
5921 back->disk_bytenr != disk_bytenr))
5922 continue;
5923 return back;
5927 return NULL;
5930 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5931 u64 parent, u64 root,
5932 u64 owner, u64 offset,
5933 u64 max_size)
5935 struct data_backref *ref = malloc(sizeof(*ref));
5937 if (!ref)
5938 return NULL;
5939 memset(&ref->node, 0, sizeof(ref->node));
5940 ref->node.is_data = 1;
5942 if (parent > 0) {
5943 ref->parent = parent;
5944 ref->owner = 0;
5945 ref->offset = 0;
5946 ref->node.full_backref = 1;
5947 } else {
5948 ref->root = root;
5949 ref->owner = owner;
5950 ref->offset = offset;
5951 ref->node.full_backref = 0;
5953 ref->bytes = max_size;
5954 ref->found_ref = 0;
5955 ref->num_refs = 0;
5956 list_add_tail(&ref->node.list, &rec->backrefs);
5957 if (max_size > rec->max_size)
5958 rec->max_size = max_size;
5959 return ref;
5962 /* Check if the type of extent matches with its chunk */
5963 static void check_extent_type(struct extent_record *rec)
5965 struct btrfs_block_group_cache *bg_cache;
5967 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5968 if (!bg_cache)
5969 return;
5971 /* data extent, check chunk directly*/
5972 if (!rec->metadata) {
5973 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5974 rec->wrong_chunk_type = 1;
5975 return;
5978 /* metadata extent, check the obvious case first */
5979 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5980 BTRFS_BLOCK_GROUP_METADATA))) {
5981 rec->wrong_chunk_type = 1;
5982 return;
5986 * Check SYSTEM extent, as it's also marked as metadata, we can only
5987 * make sure it's a SYSTEM extent by its backref
5989 if (!list_empty(&rec->backrefs)) {
5990 struct extent_backref *node;
5991 struct tree_backref *tback;
5992 u64 bg_type;
5994 node = to_extent_backref(rec->backrefs.next);
5995 if (node->is_data) {
5996 /* tree block shouldn't have data backref */
5997 rec->wrong_chunk_type = 1;
5998 return;
6000 tback = container_of(node, struct tree_backref, node);
6002 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6003 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6004 else
6005 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6006 if (!(bg_cache->flags & bg_type))
6007 rec->wrong_chunk_type = 1;
6012 * Allocate a new extent record, fill default values from @tmpl and insert int
6013 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6014 * the cache, otherwise it fails.
6016 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6017 struct extent_record *tmpl)
6019 struct extent_record *rec;
6020 int ret = 0;
6022 rec = malloc(sizeof(*rec));
6023 if (!rec)
6024 return -ENOMEM;
6025 rec->start = tmpl->start;
6026 rec->max_size = tmpl->max_size;
6027 rec->nr = max(tmpl->nr, tmpl->max_size);
6028 rec->found_rec = tmpl->found_rec;
6029 rec->content_checked = tmpl->content_checked;
6030 rec->owner_ref_checked = tmpl->owner_ref_checked;
6031 rec->num_duplicates = 0;
6032 rec->metadata = tmpl->metadata;
6033 rec->flag_block_full_backref = FLAG_UNSET;
6034 rec->bad_full_backref = 0;
6035 rec->crossing_stripes = 0;
6036 rec->wrong_chunk_type = 0;
6037 rec->is_root = tmpl->is_root;
6038 rec->refs = tmpl->refs;
6039 rec->extent_item_refs = tmpl->extent_item_refs;
6040 rec->parent_generation = tmpl->parent_generation;
6041 INIT_LIST_HEAD(&rec->backrefs);
6042 INIT_LIST_HEAD(&rec->dups);
6043 INIT_LIST_HEAD(&rec->list);
6044 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6045 rec->cache.start = tmpl->start;
6046 rec->cache.size = tmpl->nr;
6047 ret = insert_cache_extent(extent_cache, &rec->cache);
6048 if (ret) {
6049 free(rec);
6050 return ret;
6052 bytes_used += rec->nr;
6054 if (tmpl->metadata)
6055 rec->crossing_stripes = check_crossing_stripes(global_info,
6056 rec->start, global_info->tree_root->nodesize);
6057 check_extent_type(rec);
6058 return ret;
6062 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6063 * some are hints:
6064 * - refs - if found, increase refs
6065 * - is_root - if found, set
6066 * - content_checked - if found, set
6067 * - owner_ref_checked - if found, set
6069 * If not found, create a new one, initialize and insert.
6071 static int add_extent_rec(struct cache_tree *extent_cache,
6072 struct extent_record *tmpl)
6074 struct extent_record *rec;
6075 struct cache_extent *cache;
6076 int ret = 0;
6077 int dup = 0;
6079 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6080 if (cache) {
6081 rec = container_of(cache, struct extent_record, cache);
6082 if (tmpl->refs)
6083 rec->refs++;
6084 if (rec->nr == 1)
6085 rec->nr = max(tmpl->nr, tmpl->max_size);
6088 * We need to make sure to reset nr to whatever the extent
6089 * record says was the real size, this way we can compare it to
6090 * the backrefs.
6092 if (tmpl->found_rec) {
6093 if (tmpl->start != rec->start || rec->found_rec) {
6094 struct extent_record *tmp;
6096 dup = 1;
6097 if (list_empty(&rec->list))
6098 list_add_tail(&rec->list,
6099 &duplicate_extents);
6102 * We have to do this song and dance in case we
6103 * find an extent record that falls inside of
6104 * our current extent record but does not have
6105 * the same objectid.
6107 tmp = malloc(sizeof(*tmp));
6108 if (!tmp)
6109 return -ENOMEM;
6110 tmp->start = tmpl->start;
6111 tmp->max_size = tmpl->max_size;
6112 tmp->nr = tmpl->nr;
6113 tmp->found_rec = 1;
6114 tmp->metadata = tmpl->metadata;
6115 tmp->extent_item_refs = tmpl->extent_item_refs;
6116 INIT_LIST_HEAD(&tmp->list);
6117 list_add_tail(&tmp->list, &rec->dups);
6118 rec->num_duplicates++;
6119 } else {
6120 rec->nr = tmpl->nr;
6121 rec->found_rec = 1;
6125 if (tmpl->extent_item_refs && !dup) {
6126 if (rec->extent_item_refs) {
6127 fprintf(stderr, "block %llu rec "
6128 "extent_item_refs %llu, passed %llu\n",
6129 (unsigned long long)tmpl->start,
6130 (unsigned long long)
6131 rec->extent_item_refs,
6132 (unsigned long long)tmpl->extent_item_refs);
6134 rec->extent_item_refs = tmpl->extent_item_refs;
6136 if (tmpl->is_root)
6137 rec->is_root = 1;
6138 if (tmpl->content_checked)
6139 rec->content_checked = 1;
6140 if (tmpl->owner_ref_checked)
6141 rec->owner_ref_checked = 1;
6142 memcpy(&rec->parent_key, &tmpl->parent_key,
6143 sizeof(tmpl->parent_key));
6144 if (tmpl->parent_generation)
6145 rec->parent_generation = tmpl->parent_generation;
6146 if (rec->max_size < tmpl->max_size)
6147 rec->max_size = tmpl->max_size;
6150 * A metadata extent can't cross stripe_len boundary, otherwise
6151 * kernel scrub won't be able to handle it.
6152 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6153 * it.
6155 if (tmpl->metadata)
6156 rec->crossing_stripes = check_crossing_stripes(
6157 global_info, rec->start,
6158 global_info->tree_root->nodesize);
6159 check_extent_type(rec);
6160 maybe_free_extent_rec(extent_cache, rec);
6161 return ret;
6164 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6166 return ret;
6169 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6170 u64 parent, u64 root, int found_ref)
6172 struct extent_record *rec;
6173 struct tree_backref *back;
6174 struct cache_extent *cache;
6175 int ret;
6177 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6178 if (!cache) {
6179 struct extent_record tmpl;
6181 memset(&tmpl, 0, sizeof(tmpl));
6182 tmpl.start = bytenr;
6183 tmpl.nr = 1;
6184 tmpl.metadata = 1;
6186 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6187 if (ret)
6188 return ret;
6190 /* really a bug in cache_extent implement now */
6191 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6192 if (!cache)
6193 return -ENOENT;
6196 rec = container_of(cache, struct extent_record, cache);
6197 if (rec->start != bytenr) {
6199 * Several cause, from unaligned bytenr to over lapping extents
6201 return -EEXIST;
6204 back = find_tree_backref(rec, parent, root);
6205 if (!back) {
6206 back = alloc_tree_backref(rec, parent, root);
6207 if (!back)
6208 return -ENOMEM;
6211 if (found_ref) {
6212 if (back->node.found_ref) {
6213 fprintf(stderr, "Extent back ref already exists "
6214 "for %llu parent %llu root %llu \n",
6215 (unsigned long long)bytenr,
6216 (unsigned long long)parent,
6217 (unsigned long long)root);
6219 back->node.found_ref = 1;
6220 } else {
6221 if (back->node.found_extent_tree) {
6222 fprintf(stderr, "Extent back ref already exists "
6223 "for %llu parent %llu root %llu \n",
6224 (unsigned long long)bytenr,
6225 (unsigned long long)parent,
6226 (unsigned long long)root);
6228 back->node.found_extent_tree = 1;
6230 check_extent_type(rec);
6231 maybe_free_extent_rec(extent_cache, rec);
6232 return 0;
6235 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6236 u64 parent, u64 root, u64 owner, u64 offset,
6237 u32 num_refs, int found_ref, u64 max_size)
6239 struct extent_record *rec;
6240 struct data_backref *back;
6241 struct cache_extent *cache;
6242 int ret;
6244 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6245 if (!cache) {
6246 struct extent_record tmpl;
6248 memset(&tmpl, 0, sizeof(tmpl));
6249 tmpl.start = bytenr;
6250 tmpl.nr = 1;
6251 tmpl.max_size = max_size;
6253 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6254 if (ret)
6255 return ret;
6257 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6258 if (!cache)
6259 abort();
6262 rec = container_of(cache, struct extent_record, cache);
6263 if (rec->max_size < max_size)
6264 rec->max_size = max_size;
6267 * If found_ref is set then max_size is the real size and must match the
6268 * existing refs. So if we have already found a ref then we need to
6269 * make sure that this ref matches the existing one, otherwise we need
6270 * to add a new backref so we can notice that the backrefs don't match
6271 * and we need to figure out who is telling the truth. This is to
6272 * account for that awful fsync bug I introduced where we'd end up with
6273 * a btrfs_file_extent_item that would have its length include multiple
6274 * prealloc extents or point inside of a prealloc extent.
6276 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6277 bytenr, max_size);
6278 if (!back) {
6279 back = alloc_data_backref(rec, parent, root, owner, offset,
6280 max_size);
6281 BUG_ON(!back);
6284 if (found_ref) {
6285 BUG_ON(num_refs != 1);
6286 if (back->node.found_ref)
6287 BUG_ON(back->bytes != max_size);
6288 back->node.found_ref = 1;
6289 back->found_ref += 1;
6290 back->bytes = max_size;
6291 back->disk_bytenr = bytenr;
6292 rec->refs += 1;
6293 rec->content_checked = 1;
6294 rec->owner_ref_checked = 1;
6295 } else {
6296 if (back->node.found_extent_tree) {
6297 fprintf(stderr, "Extent back ref already exists "
6298 "for %llu parent %llu root %llu "
6299 "owner %llu offset %llu num_refs %lu\n",
6300 (unsigned long long)bytenr,
6301 (unsigned long long)parent,
6302 (unsigned long long)root,
6303 (unsigned long long)owner,
6304 (unsigned long long)offset,
6305 (unsigned long)num_refs);
6307 back->num_refs = num_refs;
6308 back->node.found_extent_tree = 1;
6310 maybe_free_extent_rec(extent_cache, rec);
6311 return 0;
6314 static int add_pending(struct cache_tree *pending,
6315 struct cache_tree *seen, u64 bytenr, u32 size)
6317 int ret;
6318 ret = add_cache_extent(seen, bytenr, size);
6319 if (ret)
6320 return ret;
6321 add_cache_extent(pending, bytenr, size);
6322 return 0;
6325 static int pick_next_pending(struct cache_tree *pending,
6326 struct cache_tree *reada,
6327 struct cache_tree *nodes,
6328 u64 last, struct block_info *bits, int bits_nr,
6329 int *reada_bits)
6331 unsigned long node_start = last;
6332 struct cache_extent *cache;
6333 int ret;
6335 cache = search_cache_extent(reada, 0);
6336 if (cache) {
6337 bits[0].start = cache->start;
6338 bits[0].size = cache->size;
6339 *reada_bits = 1;
6340 return 1;
6342 *reada_bits = 0;
6343 if (node_start > 32768)
6344 node_start -= 32768;
6346 cache = search_cache_extent(nodes, node_start);
6347 if (!cache)
6348 cache = search_cache_extent(nodes, 0);
6350 if (!cache) {
6351 cache = search_cache_extent(pending, 0);
6352 if (!cache)
6353 return 0;
6354 ret = 0;
6355 do {
6356 bits[ret].start = cache->start;
6357 bits[ret].size = cache->size;
6358 cache = next_cache_extent(cache);
6359 ret++;
6360 } while (cache && ret < bits_nr);
6361 return ret;
6364 ret = 0;
6365 do {
6366 bits[ret].start = cache->start;
6367 bits[ret].size = cache->size;
6368 cache = next_cache_extent(cache);
6369 ret++;
6370 } while (cache && ret < bits_nr);
6372 if (bits_nr - ret > 8) {
6373 u64 lookup = bits[0].start + bits[0].size;
6374 struct cache_extent *next;
6375 next = search_cache_extent(pending, lookup);
6376 while(next) {
6377 if (next->start - lookup > 32768)
6378 break;
6379 bits[ret].start = next->start;
6380 bits[ret].size = next->size;
6381 lookup = next->start + next->size;
6382 ret++;
6383 if (ret == bits_nr)
6384 break;
6385 next = next_cache_extent(next);
6386 if (!next)
6387 break;
6390 return ret;
6393 static void free_chunk_record(struct cache_extent *cache)
6395 struct chunk_record *rec;
6397 rec = container_of(cache, struct chunk_record, cache);
6398 list_del_init(&rec->list);
6399 list_del_init(&rec->dextents);
6400 free(rec);
6403 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6405 cache_tree_free_extents(chunk_cache, free_chunk_record);
6408 static void free_device_record(struct rb_node *node)
6410 struct device_record *rec;
6412 rec = container_of(node, struct device_record, node);
6413 free(rec);
6416 FREE_RB_BASED_TREE(device_cache, free_device_record);
6418 int insert_block_group_record(struct block_group_tree *tree,
6419 struct block_group_record *bg_rec)
6421 int ret;
6423 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6424 if (ret)
6425 return ret;
6427 list_add_tail(&bg_rec->list, &tree->block_groups);
6428 return 0;
6431 static void free_block_group_record(struct cache_extent *cache)
6433 struct block_group_record *rec;
6435 rec = container_of(cache, struct block_group_record, cache);
6436 list_del_init(&rec->list);
6437 free(rec);
6440 void free_block_group_tree(struct block_group_tree *tree)
6442 cache_tree_free_extents(&tree->tree, free_block_group_record);
6445 int insert_device_extent_record(struct device_extent_tree *tree,
6446 struct device_extent_record *de_rec)
6448 int ret;
6451 * Device extent is a bit different from the other extents, because
6452 * the extents which belong to the different devices may have the
6453 * same start and size, so we need use the special extent cache
6454 * search/insert functions.
6456 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6457 if (ret)
6458 return ret;
6460 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6461 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6462 return 0;
6465 static void free_device_extent_record(struct cache_extent *cache)
6467 struct device_extent_record *rec;
6469 rec = container_of(cache, struct device_extent_record, cache);
6470 if (!list_empty(&rec->chunk_list))
6471 list_del_init(&rec->chunk_list);
6472 if (!list_empty(&rec->device_list))
6473 list_del_init(&rec->device_list);
6474 free(rec);
6477 void free_device_extent_tree(struct device_extent_tree *tree)
6479 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6482 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6483 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6484 struct extent_buffer *leaf, int slot)
6486 struct btrfs_extent_ref_v0 *ref0;
6487 struct btrfs_key key;
6488 int ret;
6490 btrfs_item_key_to_cpu(leaf, &key, slot);
6491 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6492 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6493 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6494 0, 0);
6495 } else {
6496 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6497 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6499 return ret;
6501 #endif
6503 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6504 struct btrfs_key *key,
6505 int slot)
6507 struct btrfs_chunk *ptr;
6508 struct chunk_record *rec;
6509 int num_stripes, i;
6511 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6512 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6514 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6515 if (!rec) {
6516 fprintf(stderr, "memory allocation failed\n");
6517 exit(-1);
6520 INIT_LIST_HEAD(&rec->list);
6521 INIT_LIST_HEAD(&rec->dextents);
6522 rec->bg_rec = NULL;
6524 rec->cache.start = key->offset;
6525 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6527 rec->generation = btrfs_header_generation(leaf);
6529 rec->objectid = key->objectid;
6530 rec->type = key->type;
6531 rec->offset = key->offset;
6533 rec->length = rec->cache.size;
6534 rec->owner = btrfs_chunk_owner(leaf, ptr);
6535 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6536 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6537 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6538 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6539 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6540 rec->num_stripes = num_stripes;
6541 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6543 for (i = 0; i < rec->num_stripes; ++i) {
6544 rec->stripes[i].devid =
6545 btrfs_stripe_devid_nr(leaf, ptr, i);
6546 rec->stripes[i].offset =
6547 btrfs_stripe_offset_nr(leaf, ptr, i);
6548 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6549 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6550 BTRFS_UUID_SIZE);
6553 return rec;
6556 static int process_chunk_item(struct cache_tree *chunk_cache,
6557 struct btrfs_key *key, struct extent_buffer *eb,
6558 int slot)
6560 struct chunk_record *rec;
6561 struct btrfs_chunk *chunk;
6562 int ret = 0;
6564 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6566 * Do extra check for this chunk item,
6568 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6569 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6570 * and owner<->key_type check.
6572 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6573 key->offset);
6574 if (ret < 0) {
6575 error("chunk(%llu, %llu) is not valid, ignore it",
6576 key->offset, btrfs_chunk_length(eb, chunk));
6577 return 0;
6579 rec = btrfs_new_chunk_record(eb, key, slot);
6580 ret = insert_cache_extent(chunk_cache, &rec->cache);
6581 if (ret) {
6582 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6583 rec->offset, rec->length);
6584 free(rec);
6587 return ret;
6590 static int process_device_item(struct rb_root *dev_cache,
6591 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6593 struct btrfs_dev_item *ptr;
6594 struct device_record *rec;
6595 int ret = 0;
6597 ptr = btrfs_item_ptr(eb,
6598 slot, struct btrfs_dev_item);
6600 rec = malloc(sizeof(*rec));
6601 if (!rec) {
6602 fprintf(stderr, "memory allocation failed\n");
6603 return -ENOMEM;
6606 rec->devid = key->offset;
6607 rec->generation = btrfs_header_generation(eb);
6609 rec->objectid = key->objectid;
6610 rec->type = key->type;
6611 rec->offset = key->offset;
6613 rec->devid = btrfs_device_id(eb, ptr);
6614 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6615 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6617 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6618 if (ret) {
6619 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6620 free(rec);
6623 return ret;
6626 struct block_group_record *
6627 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6628 int slot)
6630 struct btrfs_block_group_item *ptr;
6631 struct block_group_record *rec;
6633 rec = calloc(1, sizeof(*rec));
6634 if (!rec) {
6635 fprintf(stderr, "memory allocation failed\n");
6636 exit(-1);
6639 rec->cache.start = key->objectid;
6640 rec->cache.size = key->offset;
6642 rec->generation = btrfs_header_generation(leaf);
6644 rec->objectid = key->objectid;
6645 rec->type = key->type;
6646 rec->offset = key->offset;
6648 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6649 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6651 INIT_LIST_HEAD(&rec->list);
6653 return rec;
6656 static int process_block_group_item(struct block_group_tree *block_group_cache,
6657 struct btrfs_key *key,
6658 struct extent_buffer *eb, int slot)
6660 struct block_group_record *rec;
6661 int ret = 0;
6663 rec = btrfs_new_block_group_record(eb, key, slot);
6664 ret = insert_block_group_record(block_group_cache, rec);
6665 if (ret) {
6666 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6667 rec->objectid, rec->offset);
6668 free(rec);
6671 return ret;
6674 struct device_extent_record *
6675 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6676 struct btrfs_key *key, int slot)
6678 struct device_extent_record *rec;
6679 struct btrfs_dev_extent *ptr;
6681 rec = calloc(1, sizeof(*rec));
6682 if (!rec) {
6683 fprintf(stderr, "memory allocation failed\n");
6684 exit(-1);
6687 rec->cache.objectid = key->objectid;
6688 rec->cache.start = key->offset;
6690 rec->generation = btrfs_header_generation(leaf);
6692 rec->objectid = key->objectid;
6693 rec->type = key->type;
6694 rec->offset = key->offset;
6696 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6697 rec->chunk_objecteid =
6698 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6699 rec->chunk_offset =
6700 btrfs_dev_extent_chunk_offset(leaf, ptr);
6701 rec->length = btrfs_dev_extent_length(leaf, ptr);
6702 rec->cache.size = rec->length;
6704 INIT_LIST_HEAD(&rec->chunk_list);
6705 INIT_LIST_HEAD(&rec->device_list);
6707 return rec;
6710 static int
6711 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6712 struct btrfs_key *key, struct extent_buffer *eb,
6713 int slot)
6715 struct device_extent_record *rec;
6716 int ret;
6718 rec = btrfs_new_device_extent_record(eb, key, slot);
6719 ret = insert_device_extent_record(dev_extent_cache, rec);
6720 if (ret) {
6721 fprintf(stderr,
6722 "Device extent[%llu, %llu, %llu] existed.\n",
6723 rec->objectid, rec->offset, rec->length);
6724 free(rec);
6727 return ret;
6730 static int process_extent_item(struct btrfs_root *root,
6731 struct cache_tree *extent_cache,
6732 struct extent_buffer *eb, int slot)
6734 struct btrfs_extent_item *ei;
6735 struct btrfs_extent_inline_ref *iref;
6736 struct btrfs_extent_data_ref *dref;
6737 struct btrfs_shared_data_ref *sref;
6738 struct btrfs_key key;
6739 struct extent_record tmpl;
6740 unsigned long end;
6741 unsigned long ptr;
6742 int ret;
6743 int type;
6744 u32 item_size = btrfs_item_size_nr(eb, slot);
6745 u64 refs = 0;
6746 u64 offset;
6747 u64 num_bytes;
6748 int metadata = 0;
6750 btrfs_item_key_to_cpu(eb, &key, slot);
6752 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6753 metadata = 1;
6754 num_bytes = root->nodesize;
6755 } else {
6756 num_bytes = key.offset;
6759 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6760 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6761 key.objectid, root->sectorsize);
6762 return -EIO;
6764 if (item_size < sizeof(*ei)) {
6765 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6766 struct btrfs_extent_item_v0 *ei0;
6767 BUG_ON(item_size != sizeof(*ei0));
6768 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6769 refs = btrfs_extent_refs_v0(eb, ei0);
6770 #else
6771 BUG();
6772 #endif
6773 memset(&tmpl, 0, sizeof(tmpl));
6774 tmpl.start = key.objectid;
6775 tmpl.nr = num_bytes;
6776 tmpl.extent_item_refs = refs;
6777 tmpl.metadata = metadata;
6778 tmpl.found_rec = 1;
6779 tmpl.max_size = num_bytes;
6781 return add_extent_rec(extent_cache, &tmpl);
6784 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6785 refs = btrfs_extent_refs(eb, ei);
6786 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6787 metadata = 1;
6788 else
6789 metadata = 0;
6790 if (metadata && num_bytes != root->nodesize) {
6791 error("ignore invalid metadata extent, length %llu does not equal to %u",
6792 num_bytes, root->nodesize);
6793 return -EIO;
6795 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6796 error("ignore invalid data extent, length %llu is not aligned to %u",
6797 num_bytes, root->sectorsize);
6798 return -EIO;
6801 memset(&tmpl, 0, sizeof(tmpl));
6802 tmpl.start = key.objectid;
6803 tmpl.nr = num_bytes;
6804 tmpl.extent_item_refs = refs;
6805 tmpl.metadata = metadata;
6806 tmpl.found_rec = 1;
6807 tmpl.max_size = num_bytes;
6808 add_extent_rec(extent_cache, &tmpl);
6810 ptr = (unsigned long)(ei + 1);
6811 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6812 key.type == BTRFS_EXTENT_ITEM_KEY)
6813 ptr += sizeof(struct btrfs_tree_block_info);
6815 end = (unsigned long)ei + item_size;
6816 while (ptr < end) {
6817 iref = (struct btrfs_extent_inline_ref *)ptr;
6818 type = btrfs_extent_inline_ref_type(eb, iref);
6819 offset = btrfs_extent_inline_ref_offset(eb, iref);
6820 switch (type) {
6821 case BTRFS_TREE_BLOCK_REF_KEY:
6822 ret = add_tree_backref(extent_cache, key.objectid,
6823 0, offset, 0);
6824 if (ret < 0)
6825 error("add_tree_backref failed: %s",
6826 strerror(-ret));
6827 break;
6828 case BTRFS_SHARED_BLOCK_REF_KEY:
6829 ret = add_tree_backref(extent_cache, key.objectid,
6830 offset, 0, 0);
6831 if (ret < 0)
6832 error("add_tree_backref failed: %s",
6833 strerror(-ret));
6834 break;
6835 case BTRFS_EXTENT_DATA_REF_KEY:
6836 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6837 add_data_backref(extent_cache, key.objectid, 0,
6838 btrfs_extent_data_ref_root(eb, dref),
6839 btrfs_extent_data_ref_objectid(eb,
6840 dref),
6841 btrfs_extent_data_ref_offset(eb, dref),
6842 btrfs_extent_data_ref_count(eb, dref),
6843 0, num_bytes);
6844 break;
6845 case BTRFS_SHARED_DATA_REF_KEY:
6846 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6847 add_data_backref(extent_cache, key.objectid, offset,
6848 0, 0, 0,
6849 btrfs_shared_data_ref_count(eb, sref),
6850 0, num_bytes);
6851 break;
6852 default:
6853 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6854 key.objectid, key.type, num_bytes);
6855 goto out;
6857 ptr += btrfs_extent_inline_ref_size(type);
6859 WARN_ON(ptr > end);
6860 out:
6861 return 0;
6864 static int check_cache_range(struct btrfs_root *root,
6865 struct btrfs_block_group_cache *cache,
6866 u64 offset, u64 bytes)
6868 struct btrfs_free_space *entry;
6869 u64 *logical;
6870 u64 bytenr;
6871 int stripe_len;
6872 int i, nr, ret;
6874 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6875 bytenr = btrfs_sb_offset(i);
6876 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6877 cache->key.objectid, bytenr, 0,
6878 &logical, &nr, &stripe_len);
6879 if (ret)
6880 return ret;
6882 while (nr--) {
6883 if (logical[nr] + stripe_len <= offset)
6884 continue;
6885 if (offset + bytes <= logical[nr])
6886 continue;
6887 if (logical[nr] == offset) {
6888 if (stripe_len >= bytes) {
6889 free(logical);
6890 return 0;
6892 bytes -= stripe_len;
6893 offset += stripe_len;
6894 } else if (logical[nr] < offset) {
6895 if (logical[nr] + stripe_len >=
6896 offset + bytes) {
6897 free(logical);
6898 return 0;
6900 bytes = (offset + bytes) -
6901 (logical[nr] + stripe_len);
6902 offset = logical[nr] + stripe_len;
6903 } else {
6905 * Could be tricky, the super may land in the
6906 * middle of the area we're checking. First
6907 * check the easiest case, it's at the end.
6909 if (logical[nr] + stripe_len >=
6910 bytes + offset) {
6911 bytes = logical[nr] - offset;
6912 continue;
6915 /* Check the left side */
6916 ret = check_cache_range(root, cache,
6917 offset,
6918 logical[nr] - offset);
6919 if (ret) {
6920 free(logical);
6921 return ret;
6924 /* Now we continue with the right side */
6925 bytes = (offset + bytes) -
6926 (logical[nr] + stripe_len);
6927 offset = logical[nr] + stripe_len;
6931 free(logical);
6934 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6935 if (!entry) {
6936 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6937 offset, offset+bytes);
6938 return -EINVAL;
6941 if (entry->offset != offset) {
6942 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6943 entry->offset);
6944 return -EINVAL;
6947 if (entry->bytes != bytes) {
6948 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6949 bytes, entry->bytes, offset);
6950 return -EINVAL;
6953 unlink_free_space(cache->free_space_ctl, entry);
6954 free(entry);
6955 return 0;
6958 static int verify_space_cache(struct btrfs_root *root,
6959 struct btrfs_block_group_cache *cache)
6961 struct btrfs_path path;
6962 struct extent_buffer *leaf;
6963 struct btrfs_key key;
6964 u64 last;
6965 int ret = 0;
6967 root = root->fs_info->extent_root;
6969 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6971 btrfs_init_path(&path);
6972 key.objectid = last;
6973 key.offset = 0;
6974 key.type = BTRFS_EXTENT_ITEM_KEY;
6975 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6976 if (ret < 0)
6977 goto out;
6978 ret = 0;
6979 while (1) {
6980 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6981 ret = btrfs_next_leaf(root, &path);
6982 if (ret < 0)
6983 goto out;
6984 if (ret > 0) {
6985 ret = 0;
6986 break;
6989 leaf = path.nodes[0];
6990 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6991 if (key.objectid >= cache->key.offset + cache->key.objectid)
6992 break;
6993 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6994 key.type != BTRFS_METADATA_ITEM_KEY) {
6995 path.slots[0]++;
6996 continue;
6999 if (last == key.objectid) {
7000 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7001 last = key.objectid + key.offset;
7002 else
7003 last = key.objectid + root->nodesize;
7004 path.slots[0]++;
7005 continue;
7008 ret = check_cache_range(root, cache, last,
7009 key.objectid - last);
7010 if (ret)
7011 break;
7012 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7013 last = key.objectid + key.offset;
7014 else
7015 last = key.objectid + root->nodesize;
7016 path.slots[0]++;
7019 if (last < cache->key.objectid + cache->key.offset)
7020 ret = check_cache_range(root, cache, last,
7021 cache->key.objectid +
7022 cache->key.offset - last);
7024 out:
7025 btrfs_release_path(&path);
7027 if (!ret &&
7028 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7029 fprintf(stderr, "There are still entries left in the space "
7030 "cache\n");
7031 ret = -EINVAL;
7034 return ret;
7037 static int check_space_cache(struct btrfs_root *root)
7039 struct btrfs_block_group_cache *cache;
7040 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7041 int ret;
7042 int error = 0;
7044 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7045 btrfs_super_generation(root->fs_info->super_copy) !=
7046 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7047 printf("cache and super generation don't match, space cache "
7048 "will be invalidated\n");
7049 return 0;
7052 if (ctx.progress_enabled) {
7053 ctx.tp = TASK_FREE_SPACE;
7054 task_start(ctx.info);
7057 while (1) {
7058 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7059 if (!cache)
7060 break;
7062 start = cache->key.objectid + cache->key.offset;
7063 if (!cache->free_space_ctl) {
7064 if (btrfs_init_free_space_ctl(cache,
7065 root->sectorsize)) {
7066 ret = -ENOMEM;
7067 break;
7069 } else {
7070 btrfs_remove_free_space_cache(cache);
7073 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7074 ret = exclude_super_stripes(root, cache);
7075 if (ret) {
7076 fprintf(stderr, "could not exclude super stripes: %s\n",
7077 strerror(-ret));
7078 error++;
7079 continue;
7081 ret = load_free_space_tree(root->fs_info, cache);
7082 free_excluded_extents(root, cache);
7083 if (ret < 0) {
7084 fprintf(stderr, "could not load free space tree: %s\n",
7085 strerror(-ret));
7086 error++;
7087 continue;
7089 error += ret;
7090 } else {
7091 ret = load_free_space_cache(root->fs_info, cache);
7092 if (!ret)
7093 continue;
7096 ret = verify_space_cache(root, cache);
7097 if (ret) {
7098 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7099 cache->key.objectid);
7100 error++;
7104 task_stop(ctx.info);
7106 return error ? -EINVAL : 0;
7109 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7110 u64 num_bytes, unsigned long leaf_offset,
7111 struct extent_buffer *eb) {
7113 u64 offset = 0;
7114 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7115 char *data;
7116 unsigned long csum_offset;
7117 u32 csum;
7118 u32 csum_expected;
7119 u64 read_len;
7120 u64 data_checked = 0;
7121 u64 tmp;
7122 int ret = 0;
7123 int mirror;
7124 int num_copies;
7126 if (num_bytes % root->sectorsize)
7127 return -EINVAL;
7129 data = malloc(num_bytes);
7130 if (!data)
7131 return -ENOMEM;
7133 while (offset < num_bytes) {
7134 mirror = 0;
7135 again:
7136 read_len = num_bytes - offset;
7137 /* read as much space once a time */
7138 ret = read_extent_data(root, data + offset,
7139 bytenr + offset, &read_len, mirror);
7140 if (ret)
7141 goto out;
7142 data_checked = 0;
7143 /* verify every 4k data's checksum */
7144 while (data_checked < read_len) {
7145 csum = ~(u32)0;
7146 tmp = offset + data_checked;
7148 csum = btrfs_csum_data((char *)data + tmp,
7149 csum, root->sectorsize);
7150 btrfs_csum_final(csum, (u8 *)&csum);
7152 csum_offset = leaf_offset +
7153 tmp / root->sectorsize * csum_size;
7154 read_extent_buffer(eb, (char *)&csum_expected,
7155 csum_offset, csum_size);
7156 /* try another mirror */
7157 if (csum != csum_expected) {
7158 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7159 mirror, bytenr + tmp,
7160 csum, csum_expected);
7161 num_copies = btrfs_num_copies(
7162 &root->fs_info->mapping_tree,
7163 bytenr, num_bytes);
7164 if (mirror < num_copies - 1) {
7165 mirror += 1;
7166 goto again;
7169 data_checked += root->sectorsize;
7171 offset += read_len;
7173 out:
7174 free(data);
7175 return ret;
7178 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7179 u64 num_bytes)
7181 struct btrfs_path path;
7182 struct extent_buffer *leaf;
7183 struct btrfs_key key;
7184 int ret;
7186 btrfs_init_path(&path);
7187 key.objectid = bytenr;
7188 key.type = BTRFS_EXTENT_ITEM_KEY;
7189 key.offset = (u64)-1;
7191 again:
7192 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7193 0, 0);
7194 if (ret < 0) {
7195 fprintf(stderr, "Error looking up extent record %d\n", ret);
7196 btrfs_release_path(&path);
7197 return ret;
7198 } else if (ret) {
7199 if (path.slots[0] > 0) {
7200 path.slots[0]--;
7201 } else {
7202 ret = btrfs_prev_leaf(root, &path);
7203 if (ret < 0) {
7204 goto out;
7205 } else if (ret > 0) {
7206 ret = 0;
7207 goto out;
7212 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7215 * Block group items come before extent items if they have the same
7216 * bytenr, so walk back one more just in case. Dear future traveller,
7217 * first congrats on mastering time travel. Now if it's not too much
7218 * trouble could you go back to 2006 and tell Chris to make the
7219 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7220 * EXTENT_ITEM_KEY please?
7222 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7223 if (path.slots[0] > 0) {
7224 path.slots[0]--;
7225 } else {
7226 ret = btrfs_prev_leaf(root, &path);
7227 if (ret < 0) {
7228 goto out;
7229 } else if (ret > 0) {
7230 ret = 0;
7231 goto out;
7234 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7237 while (num_bytes) {
7238 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7239 ret = btrfs_next_leaf(root, &path);
7240 if (ret < 0) {
7241 fprintf(stderr, "Error going to next leaf "
7242 "%d\n", ret);
7243 btrfs_release_path(&path);
7244 return ret;
7245 } else if (ret) {
7246 break;
7249 leaf = path.nodes[0];
7250 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7251 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7252 path.slots[0]++;
7253 continue;
7255 if (key.objectid + key.offset < bytenr) {
7256 path.slots[0]++;
7257 continue;
7259 if (key.objectid > bytenr + num_bytes)
7260 break;
7262 if (key.objectid == bytenr) {
7263 if (key.offset >= num_bytes) {
7264 num_bytes = 0;
7265 break;
7267 num_bytes -= key.offset;
7268 bytenr += key.offset;
7269 } else if (key.objectid < bytenr) {
7270 if (key.objectid + key.offset >= bytenr + num_bytes) {
7271 num_bytes = 0;
7272 break;
7274 num_bytes = (bytenr + num_bytes) -
7275 (key.objectid + key.offset);
7276 bytenr = key.objectid + key.offset;
7277 } else {
7278 if (key.objectid + key.offset < bytenr + num_bytes) {
7279 u64 new_start = key.objectid + key.offset;
7280 u64 new_bytes = bytenr + num_bytes - new_start;
7283 * Weird case, the extent is in the middle of
7284 * our range, we'll have to search one side
7285 * and then the other. Not sure if this happens
7286 * in real life, but no harm in coding it up
7287 * anyway just in case.
7289 btrfs_release_path(&path);
7290 ret = check_extent_exists(root, new_start,
7291 new_bytes);
7292 if (ret) {
7293 fprintf(stderr, "Right section didn't "
7294 "have a record\n");
7295 break;
7297 num_bytes = key.objectid - bytenr;
7298 goto again;
7300 num_bytes = key.objectid - bytenr;
7302 path.slots[0]++;
7304 ret = 0;
7306 out:
7307 if (num_bytes && !ret) {
7308 fprintf(stderr, "There are no extents for csum range "
7309 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7310 ret = 1;
7313 btrfs_release_path(&path);
7314 return ret;
7317 static int check_csums(struct btrfs_root *root)
7319 struct btrfs_path path;
7320 struct extent_buffer *leaf;
7321 struct btrfs_key key;
7322 u64 offset = 0, num_bytes = 0;
7323 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7324 int errors = 0;
7325 int ret;
7326 u64 data_len;
7327 unsigned long leaf_offset;
7329 root = root->fs_info->csum_root;
7330 if (!extent_buffer_uptodate(root->node)) {
7331 fprintf(stderr, "No valid csum tree found\n");
7332 return -ENOENT;
7335 btrfs_init_path(&path);
7336 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7337 key.type = BTRFS_EXTENT_CSUM_KEY;
7338 key.offset = 0;
7339 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7340 if (ret < 0) {
7341 fprintf(stderr, "Error searching csum tree %d\n", ret);
7342 btrfs_release_path(&path);
7343 return ret;
7346 if (ret > 0 && path.slots[0])
7347 path.slots[0]--;
7348 ret = 0;
7350 while (1) {
7351 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7352 ret = btrfs_next_leaf(root, &path);
7353 if (ret < 0) {
7354 fprintf(stderr, "Error going to next leaf "
7355 "%d\n", ret);
7356 break;
7358 if (ret)
7359 break;
7361 leaf = path.nodes[0];
7363 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7364 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7365 path.slots[0]++;
7366 continue;
7369 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7370 csum_size) * root->sectorsize;
7371 if (!check_data_csum)
7372 goto skip_csum_check;
7373 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7374 ret = check_extent_csums(root, key.offset, data_len,
7375 leaf_offset, leaf);
7376 if (ret)
7377 break;
7378 skip_csum_check:
7379 if (!num_bytes) {
7380 offset = key.offset;
7381 } else if (key.offset != offset + num_bytes) {
7382 ret = check_extent_exists(root, offset, num_bytes);
7383 if (ret) {
7384 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7385 "there is no extent record\n",
7386 offset, offset+num_bytes);
7387 errors++;
7389 offset = key.offset;
7390 num_bytes = 0;
7392 num_bytes += data_len;
7393 path.slots[0]++;
7396 btrfs_release_path(&path);
7397 return errors;
7400 static int is_dropped_key(struct btrfs_key *key,
7401 struct btrfs_key *drop_key) {
7402 if (key->objectid < drop_key->objectid)
7403 return 1;
7404 else if (key->objectid == drop_key->objectid) {
7405 if (key->type < drop_key->type)
7406 return 1;
7407 else if (key->type == drop_key->type) {
7408 if (key->offset < drop_key->offset)
7409 return 1;
7412 return 0;
7416 * Here are the rules for FULL_BACKREF.
7418 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7419 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7420 * FULL_BACKREF set.
7421 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7422 * if it happened after the relocation occurred since we'll have dropped the
7423 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7424 * have no real way to know for sure.
7426 * We process the blocks one root at a time, and we start from the lowest root
7427 * objectid and go to the highest. So we can just lookup the owner backref for
7428 * the record and if we don't find it then we know it doesn't exist and we have
7429 * a FULL BACKREF.
7431 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7432 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7433 * be set or not and then we can check later once we've gathered all the refs.
7435 static int calc_extent_flag(struct cache_tree *extent_cache,
7436 struct extent_buffer *buf,
7437 struct root_item_record *ri,
7438 u64 *flags)
7440 struct extent_record *rec;
7441 struct cache_extent *cache;
7442 struct tree_backref *tback;
7443 u64 owner = 0;
7445 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7446 /* we have added this extent before */
7447 if (!cache)
7448 return -ENOENT;
7450 rec = container_of(cache, struct extent_record, cache);
7453 * Except file/reloc tree, we can not have
7454 * FULL BACKREF MODE
7456 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7457 goto normal;
7459 * root node
7461 if (buf->start == ri->bytenr)
7462 goto normal;
7464 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7465 goto full_backref;
7467 owner = btrfs_header_owner(buf);
7468 if (owner == ri->objectid)
7469 goto normal;
7471 tback = find_tree_backref(rec, 0, owner);
7472 if (!tback)
7473 goto full_backref;
7474 normal:
7475 *flags = 0;
7476 if (rec->flag_block_full_backref != FLAG_UNSET &&
7477 rec->flag_block_full_backref != 0)
7478 rec->bad_full_backref = 1;
7479 return 0;
7480 full_backref:
7481 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7482 if (rec->flag_block_full_backref != FLAG_UNSET &&
7483 rec->flag_block_full_backref != 1)
7484 rec->bad_full_backref = 1;
7485 return 0;
7488 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7490 fprintf(stderr, "Invalid key type(");
7491 print_key_type(stderr, 0, key_type);
7492 fprintf(stderr, ") found in root(");
7493 print_objectid(stderr, rootid, 0);
7494 fprintf(stderr, ")\n");
7498 * Check if the key is valid with its extent buffer.
7500 * This is a early check in case invalid key exists in a extent buffer
7501 * This is not comprehensive yet, but should prevent wrong key/item passed
7502 * further
7504 static int check_type_with_root(u64 rootid, u8 key_type)
7506 switch (key_type) {
7507 /* Only valid in chunk tree */
7508 case BTRFS_DEV_ITEM_KEY:
7509 case BTRFS_CHUNK_ITEM_KEY:
7510 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7511 goto err;
7512 break;
7513 /* valid in csum and log tree */
7514 case BTRFS_CSUM_TREE_OBJECTID:
7515 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7516 is_fstree(rootid)))
7517 goto err;
7518 break;
7519 case BTRFS_EXTENT_ITEM_KEY:
7520 case BTRFS_METADATA_ITEM_KEY:
7521 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7522 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7523 goto err;
7524 break;
7525 case BTRFS_ROOT_ITEM_KEY:
7526 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7527 goto err;
7528 break;
7529 case BTRFS_DEV_EXTENT_KEY:
7530 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7531 goto err;
7532 break;
7534 return 0;
7535 err:
7536 report_mismatch_key_root(key_type, rootid);
7537 return -EINVAL;
7540 static int run_next_block(struct btrfs_root *root,
7541 struct block_info *bits,
7542 int bits_nr,
7543 u64 *last,
7544 struct cache_tree *pending,
7545 struct cache_tree *seen,
7546 struct cache_tree *reada,
7547 struct cache_tree *nodes,
7548 struct cache_tree *extent_cache,
7549 struct cache_tree *chunk_cache,
7550 struct rb_root *dev_cache,
7551 struct block_group_tree *block_group_cache,
7552 struct device_extent_tree *dev_extent_cache,
7553 struct root_item_record *ri)
7555 struct extent_buffer *buf;
7556 struct extent_record *rec = NULL;
7557 u64 bytenr;
7558 u32 size;
7559 u64 parent;
7560 u64 owner;
7561 u64 flags;
7562 u64 ptr;
7563 u64 gen = 0;
7564 int ret = 0;
7565 int i;
7566 int nritems;
7567 struct btrfs_key key;
7568 struct cache_extent *cache;
7569 int reada_bits;
7571 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7572 bits_nr, &reada_bits);
7573 if (nritems == 0)
7574 return 1;
7576 if (!reada_bits) {
7577 for(i = 0; i < nritems; i++) {
7578 ret = add_cache_extent(reada, bits[i].start,
7579 bits[i].size);
7580 if (ret == -EEXIST)
7581 continue;
7583 /* fixme, get the parent transid */
7584 readahead_tree_block(root, bits[i].start,
7585 bits[i].size, 0);
7588 *last = bits[0].start;
7589 bytenr = bits[0].start;
7590 size = bits[0].size;
7592 cache = lookup_cache_extent(pending, bytenr, size);
7593 if (cache) {
7594 remove_cache_extent(pending, cache);
7595 free(cache);
7597 cache = lookup_cache_extent(reada, bytenr, size);
7598 if (cache) {
7599 remove_cache_extent(reada, cache);
7600 free(cache);
7602 cache = lookup_cache_extent(nodes, bytenr, size);
7603 if (cache) {
7604 remove_cache_extent(nodes, cache);
7605 free(cache);
7607 cache = lookup_cache_extent(extent_cache, bytenr, size);
7608 if (cache) {
7609 rec = container_of(cache, struct extent_record, cache);
7610 gen = rec->parent_generation;
7613 /* fixme, get the real parent transid */
7614 buf = read_tree_block(root, bytenr, size, gen);
7615 if (!extent_buffer_uptodate(buf)) {
7616 record_bad_block_io(root->fs_info,
7617 extent_cache, bytenr, size);
7618 goto out;
7621 nritems = btrfs_header_nritems(buf);
7623 flags = 0;
7624 if (!init_extent_tree) {
7625 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7626 btrfs_header_level(buf), 1, NULL,
7627 &flags);
7628 if (ret < 0) {
7629 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7630 if (ret < 0) {
7631 fprintf(stderr, "Couldn't calc extent flags\n");
7632 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7635 } else {
7636 flags = 0;
7637 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7638 if (ret < 0) {
7639 fprintf(stderr, "Couldn't calc extent flags\n");
7640 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7644 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7645 if (ri != NULL &&
7646 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7647 ri->objectid == btrfs_header_owner(buf)) {
7649 * Ok we got to this block from it's original owner and
7650 * we have FULL_BACKREF set. Relocation can leave
7651 * converted blocks over so this is altogether possible,
7652 * however it's not possible if the generation > the
7653 * last snapshot, so check for this case.
7655 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7656 btrfs_header_generation(buf) > ri->last_snapshot) {
7657 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7658 rec->bad_full_backref = 1;
7661 } else {
7662 if (ri != NULL &&
7663 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7664 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7665 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7666 rec->bad_full_backref = 1;
7670 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7671 rec->flag_block_full_backref = 1;
7672 parent = bytenr;
7673 owner = 0;
7674 } else {
7675 rec->flag_block_full_backref = 0;
7676 parent = 0;
7677 owner = btrfs_header_owner(buf);
7680 ret = check_block(root, extent_cache, buf, flags);
7681 if (ret)
7682 goto out;
7684 if (btrfs_is_leaf(buf)) {
7685 btree_space_waste += btrfs_leaf_free_space(root, buf);
7686 for (i = 0; i < nritems; i++) {
7687 struct btrfs_file_extent_item *fi;
7688 btrfs_item_key_to_cpu(buf, &key, i);
7690 * Check key type against the leaf owner.
7691 * Could filter quite a lot of early error if
7692 * owner is correct
7694 if (check_type_with_root(btrfs_header_owner(buf),
7695 key.type)) {
7696 fprintf(stderr, "ignoring invalid key\n");
7697 continue;
7699 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7700 process_extent_item(root, extent_cache, buf,
7702 continue;
7704 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7705 process_extent_item(root, extent_cache, buf,
7707 continue;
7709 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7710 total_csum_bytes +=
7711 btrfs_item_size_nr(buf, i);
7712 continue;
7714 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7715 process_chunk_item(chunk_cache, &key, buf, i);
7716 continue;
7718 if (key.type == BTRFS_DEV_ITEM_KEY) {
7719 process_device_item(dev_cache, &key, buf, i);
7720 continue;
7722 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7723 process_block_group_item(block_group_cache,
7724 &key, buf, i);
7725 continue;
7727 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7728 process_device_extent_item(dev_extent_cache,
7729 &key, buf, i);
7730 continue;
7733 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7734 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7735 process_extent_ref_v0(extent_cache, buf, i);
7736 #else
7737 BUG();
7738 #endif
7739 continue;
7742 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7743 ret = add_tree_backref(extent_cache,
7744 key.objectid, 0, key.offset, 0);
7745 if (ret < 0)
7746 error("add_tree_backref failed: %s",
7747 strerror(-ret));
7748 continue;
7750 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7751 ret = add_tree_backref(extent_cache,
7752 key.objectid, key.offset, 0, 0);
7753 if (ret < 0)
7754 error("add_tree_backref failed: %s",
7755 strerror(-ret));
7756 continue;
7758 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7759 struct btrfs_extent_data_ref *ref;
7760 ref = btrfs_item_ptr(buf, i,
7761 struct btrfs_extent_data_ref);
7762 add_data_backref(extent_cache,
7763 key.objectid, 0,
7764 btrfs_extent_data_ref_root(buf, ref),
7765 btrfs_extent_data_ref_objectid(buf,
7766 ref),
7767 btrfs_extent_data_ref_offset(buf, ref),
7768 btrfs_extent_data_ref_count(buf, ref),
7769 0, root->sectorsize);
7770 continue;
7772 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7773 struct btrfs_shared_data_ref *ref;
7774 ref = btrfs_item_ptr(buf, i,
7775 struct btrfs_shared_data_ref);
7776 add_data_backref(extent_cache,
7777 key.objectid, key.offset, 0, 0, 0,
7778 btrfs_shared_data_ref_count(buf, ref),
7779 0, root->sectorsize);
7780 continue;
7782 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7783 struct bad_item *bad;
7785 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7786 continue;
7787 if (!owner)
7788 continue;
7789 bad = malloc(sizeof(struct bad_item));
7790 if (!bad)
7791 continue;
7792 INIT_LIST_HEAD(&bad->list);
7793 memcpy(&bad->key, &key,
7794 sizeof(struct btrfs_key));
7795 bad->root_id = owner;
7796 list_add_tail(&bad->list, &delete_items);
7797 continue;
7799 if (key.type != BTRFS_EXTENT_DATA_KEY)
7800 continue;
7801 fi = btrfs_item_ptr(buf, i,
7802 struct btrfs_file_extent_item);
7803 if (btrfs_file_extent_type(buf, fi) ==
7804 BTRFS_FILE_EXTENT_INLINE)
7805 continue;
7806 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7807 continue;
7809 data_bytes_allocated +=
7810 btrfs_file_extent_disk_num_bytes(buf, fi);
7811 if (data_bytes_allocated < root->sectorsize) {
7812 abort();
7814 data_bytes_referenced +=
7815 btrfs_file_extent_num_bytes(buf, fi);
7816 add_data_backref(extent_cache,
7817 btrfs_file_extent_disk_bytenr(buf, fi),
7818 parent, owner, key.objectid, key.offset -
7819 btrfs_file_extent_offset(buf, fi), 1, 1,
7820 btrfs_file_extent_disk_num_bytes(buf, fi));
7822 } else {
7823 int level;
7824 struct btrfs_key first_key;
7826 first_key.objectid = 0;
7828 if (nritems > 0)
7829 btrfs_item_key_to_cpu(buf, &first_key, 0);
7830 level = btrfs_header_level(buf);
7831 for (i = 0; i < nritems; i++) {
7832 struct extent_record tmpl;
7834 ptr = btrfs_node_blockptr(buf, i);
7835 size = root->nodesize;
7836 btrfs_node_key_to_cpu(buf, &key, i);
7837 if (ri != NULL) {
7838 if ((level == ri->drop_level)
7839 && is_dropped_key(&key, &ri->drop_key)) {
7840 continue;
7844 memset(&tmpl, 0, sizeof(tmpl));
7845 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7846 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7847 tmpl.start = ptr;
7848 tmpl.nr = size;
7849 tmpl.refs = 1;
7850 tmpl.metadata = 1;
7851 tmpl.max_size = size;
7852 ret = add_extent_rec(extent_cache, &tmpl);
7853 if (ret < 0)
7854 goto out;
7856 ret = add_tree_backref(extent_cache, ptr, parent,
7857 owner, 1);
7858 if (ret < 0) {
7859 error("add_tree_backref failed: %s",
7860 strerror(-ret));
7861 continue;
7864 if (level > 1) {
7865 add_pending(nodes, seen, ptr, size);
7866 } else {
7867 add_pending(pending, seen, ptr, size);
7870 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7871 nritems) * sizeof(struct btrfs_key_ptr);
7873 total_btree_bytes += buf->len;
7874 if (fs_root_objectid(btrfs_header_owner(buf)))
7875 total_fs_tree_bytes += buf->len;
7876 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7877 total_extent_tree_bytes += buf->len;
7878 if (!found_old_backref &&
7879 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7880 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7881 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7882 found_old_backref = 1;
7883 out:
7884 free_extent_buffer(buf);
7885 return ret;
7888 static int add_root_to_pending(struct extent_buffer *buf,
7889 struct cache_tree *extent_cache,
7890 struct cache_tree *pending,
7891 struct cache_tree *seen,
7892 struct cache_tree *nodes,
7893 u64 objectid)
7895 struct extent_record tmpl;
7896 int ret;
7898 if (btrfs_header_level(buf) > 0)
7899 add_pending(nodes, seen, buf->start, buf->len);
7900 else
7901 add_pending(pending, seen, buf->start, buf->len);
7903 memset(&tmpl, 0, sizeof(tmpl));
7904 tmpl.start = buf->start;
7905 tmpl.nr = buf->len;
7906 tmpl.is_root = 1;
7907 tmpl.refs = 1;
7908 tmpl.metadata = 1;
7909 tmpl.max_size = buf->len;
7910 add_extent_rec(extent_cache, &tmpl);
7912 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7913 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7914 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7915 0, 1);
7916 else
7917 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7919 return ret;
7922 /* as we fix the tree, we might be deleting blocks that
7923 * we're tracking for repair. This hook makes sure we
7924 * remove any backrefs for blocks as we are fixing them.
7926 static int free_extent_hook(struct btrfs_trans_handle *trans,
7927 struct btrfs_root *root,
7928 u64 bytenr, u64 num_bytes, u64 parent,
7929 u64 root_objectid, u64 owner, u64 offset,
7930 int refs_to_drop)
7932 struct extent_record *rec;
7933 struct cache_extent *cache;
7934 int is_data;
7935 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7937 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7938 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7939 if (!cache)
7940 return 0;
7942 rec = container_of(cache, struct extent_record, cache);
7943 if (is_data) {
7944 struct data_backref *back;
7945 back = find_data_backref(rec, parent, root_objectid, owner,
7946 offset, 1, bytenr, num_bytes);
7947 if (!back)
7948 goto out;
7949 if (back->node.found_ref) {
7950 back->found_ref -= refs_to_drop;
7951 if (rec->refs)
7952 rec->refs -= refs_to_drop;
7954 if (back->node.found_extent_tree) {
7955 back->num_refs -= refs_to_drop;
7956 if (rec->extent_item_refs)
7957 rec->extent_item_refs -= refs_to_drop;
7959 if (back->found_ref == 0)
7960 back->node.found_ref = 0;
7961 if (back->num_refs == 0)
7962 back->node.found_extent_tree = 0;
7964 if (!back->node.found_extent_tree && back->node.found_ref) {
7965 list_del(&back->node.list);
7966 free(back);
7968 } else {
7969 struct tree_backref *back;
7970 back = find_tree_backref(rec, parent, root_objectid);
7971 if (!back)
7972 goto out;
7973 if (back->node.found_ref) {
7974 if (rec->refs)
7975 rec->refs--;
7976 back->node.found_ref = 0;
7978 if (back->node.found_extent_tree) {
7979 if (rec->extent_item_refs)
7980 rec->extent_item_refs--;
7981 back->node.found_extent_tree = 0;
7983 if (!back->node.found_extent_tree && back->node.found_ref) {
7984 list_del(&back->node.list);
7985 free(back);
7988 maybe_free_extent_rec(extent_cache, rec);
7989 out:
7990 return 0;
7993 static int delete_extent_records(struct btrfs_trans_handle *trans,
7994 struct btrfs_root *root,
7995 struct btrfs_path *path,
7996 u64 bytenr)
7998 struct btrfs_key key;
7999 struct btrfs_key found_key;
8000 struct extent_buffer *leaf;
8001 int ret;
8002 int slot;
8005 key.objectid = bytenr;
8006 key.type = (u8)-1;
8007 key.offset = (u64)-1;
8009 while(1) {
8010 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8011 &key, path, 0, 1);
8012 if (ret < 0)
8013 break;
8015 if (ret > 0) {
8016 ret = 0;
8017 if (path->slots[0] == 0)
8018 break;
8019 path->slots[0]--;
8021 ret = 0;
8023 leaf = path->nodes[0];
8024 slot = path->slots[0];
8026 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8027 if (found_key.objectid != bytenr)
8028 break;
8030 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8031 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8032 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8033 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8034 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8035 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8036 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8037 btrfs_release_path(path);
8038 if (found_key.type == 0) {
8039 if (found_key.offset == 0)
8040 break;
8041 key.offset = found_key.offset - 1;
8042 key.type = found_key.type;
8044 key.type = found_key.type - 1;
8045 key.offset = (u64)-1;
8046 continue;
8049 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8050 found_key.objectid, found_key.type, found_key.offset);
8052 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8053 if (ret)
8054 break;
8055 btrfs_release_path(path);
8057 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8058 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8059 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8060 found_key.offset : root->nodesize;
8062 ret = btrfs_update_block_group(trans, root, bytenr,
8063 bytes, 0, 0);
8064 if (ret)
8065 break;
8069 btrfs_release_path(path);
8070 return ret;
8074 * for a single backref, this will allocate a new extent
8075 * and add the backref to it.
8077 static int record_extent(struct btrfs_trans_handle *trans,
8078 struct btrfs_fs_info *info,
8079 struct btrfs_path *path,
8080 struct extent_record *rec,
8081 struct extent_backref *back,
8082 int allocated, u64 flags)
8084 int ret = 0;
8085 struct btrfs_root *extent_root = info->extent_root;
8086 struct extent_buffer *leaf;
8087 struct btrfs_key ins_key;
8088 struct btrfs_extent_item *ei;
8089 struct data_backref *dback;
8090 struct btrfs_tree_block_info *bi;
8092 if (!back->is_data)
8093 rec->max_size = max_t(u64, rec->max_size,
8094 info->extent_root->nodesize);
8096 if (!allocated) {
8097 u32 item_size = sizeof(*ei);
8099 if (!back->is_data)
8100 item_size += sizeof(*bi);
8102 ins_key.objectid = rec->start;
8103 ins_key.offset = rec->max_size;
8104 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8106 ret = btrfs_insert_empty_item(trans, extent_root, path,
8107 &ins_key, item_size);
8108 if (ret)
8109 goto fail;
8111 leaf = path->nodes[0];
8112 ei = btrfs_item_ptr(leaf, path->slots[0],
8113 struct btrfs_extent_item);
8115 btrfs_set_extent_refs(leaf, ei, 0);
8116 btrfs_set_extent_generation(leaf, ei, rec->generation);
8118 if (back->is_data) {
8119 btrfs_set_extent_flags(leaf, ei,
8120 BTRFS_EXTENT_FLAG_DATA);
8121 } else {
8122 struct btrfs_disk_key copy_key;;
8124 bi = (struct btrfs_tree_block_info *)(ei + 1);
8125 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8126 sizeof(*bi));
8128 btrfs_set_disk_key_objectid(&copy_key,
8129 rec->info_objectid);
8130 btrfs_set_disk_key_type(&copy_key, 0);
8131 btrfs_set_disk_key_offset(&copy_key, 0);
8133 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8134 btrfs_set_tree_block_key(leaf, bi, &copy_key);
8136 btrfs_set_extent_flags(leaf, ei,
8137 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8140 btrfs_mark_buffer_dirty(leaf);
8141 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8142 rec->max_size, 1, 0);
8143 if (ret)
8144 goto fail;
8145 btrfs_release_path(path);
8148 if (back->is_data) {
8149 u64 parent;
8150 int i;
8152 dback = to_data_backref(back);
8153 if (back->full_backref)
8154 parent = dback->parent;
8155 else
8156 parent = 0;
8158 for (i = 0; i < dback->found_ref; i++) {
8159 /* if parent != 0, we're doing a full backref
8160 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8161 * just makes the backref allocator create a data
8162 * backref
8164 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8165 rec->start, rec->max_size,
8166 parent,
8167 dback->root,
8168 parent ?
8169 BTRFS_FIRST_FREE_OBJECTID :
8170 dback->owner,
8171 dback->offset);
8172 if (ret)
8173 break;
8175 fprintf(stderr, "adding new data backref"
8176 " on %llu %s %llu owner %llu"
8177 " offset %llu found %d\n",
8178 (unsigned long long)rec->start,
8179 back->full_backref ?
8180 "parent" : "root",
8181 back->full_backref ?
8182 (unsigned long long)parent :
8183 (unsigned long long)dback->root,
8184 (unsigned long long)dback->owner,
8185 (unsigned long long)dback->offset,
8186 dback->found_ref);
8187 } else {
8188 u64 parent;
8189 struct tree_backref *tback;
8191 tback = to_tree_backref(back);
8192 if (back->full_backref)
8193 parent = tback->parent;
8194 else
8195 parent = 0;
8197 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8198 rec->start, rec->max_size,
8199 parent, tback->root, 0, 0);
8200 fprintf(stderr, "adding new tree backref on "
8201 "start %llu len %llu parent %llu root %llu\n",
8202 rec->start, rec->max_size, parent, tback->root);
8204 fail:
8205 btrfs_release_path(path);
8206 return ret;
8209 static struct extent_entry *find_entry(struct list_head *entries,
8210 u64 bytenr, u64 bytes)
8212 struct extent_entry *entry = NULL;
8214 list_for_each_entry(entry, entries, list) {
8215 if (entry->bytenr == bytenr && entry->bytes == bytes)
8216 return entry;
8219 return NULL;
8222 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8224 struct extent_entry *entry, *best = NULL, *prev = NULL;
8226 list_for_each_entry(entry, entries, list) {
8228 * If there are as many broken entries as entries then we know
8229 * not to trust this particular entry.
8231 if (entry->broken == entry->count)
8232 continue;
8235 * Special case, when there are only two entries and 'best' is
8236 * the first one
8238 if (!prev) {
8239 best = entry;
8240 prev = entry;
8241 continue;
8245 * If our current entry == best then we can't be sure our best
8246 * is really the best, so we need to keep searching.
8248 if (best && best->count == entry->count) {
8249 prev = entry;
8250 best = NULL;
8251 continue;
8254 /* Prev == entry, not good enough, have to keep searching */
8255 if (!prev->broken && prev->count == entry->count)
8256 continue;
8258 if (!best)
8259 best = (prev->count > entry->count) ? prev : entry;
8260 else if (best->count < entry->count)
8261 best = entry;
8262 prev = entry;
8265 return best;
8268 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8269 struct data_backref *dback, struct extent_entry *entry)
8271 struct btrfs_trans_handle *trans;
8272 struct btrfs_root *root;
8273 struct btrfs_file_extent_item *fi;
8274 struct extent_buffer *leaf;
8275 struct btrfs_key key;
8276 u64 bytenr, bytes;
8277 int ret, err;
8279 key.objectid = dback->root;
8280 key.type = BTRFS_ROOT_ITEM_KEY;
8281 key.offset = (u64)-1;
8282 root = btrfs_read_fs_root(info, &key);
8283 if (IS_ERR(root)) {
8284 fprintf(stderr, "Couldn't find root for our ref\n");
8285 return -EINVAL;
8289 * The backref points to the original offset of the extent if it was
8290 * split, so we need to search down to the offset we have and then walk
8291 * forward until we find the backref we're looking for.
8293 key.objectid = dback->owner;
8294 key.type = BTRFS_EXTENT_DATA_KEY;
8295 key.offset = dback->offset;
8296 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8297 if (ret < 0) {
8298 fprintf(stderr, "Error looking up ref %d\n", ret);
8299 return ret;
8302 while (1) {
8303 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8304 ret = btrfs_next_leaf(root, path);
8305 if (ret) {
8306 fprintf(stderr, "Couldn't find our ref, next\n");
8307 return -EINVAL;
8310 leaf = path->nodes[0];
8311 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8312 if (key.objectid != dback->owner ||
8313 key.type != BTRFS_EXTENT_DATA_KEY) {
8314 fprintf(stderr, "Couldn't find our ref, search\n");
8315 return -EINVAL;
8317 fi = btrfs_item_ptr(leaf, path->slots[0],
8318 struct btrfs_file_extent_item);
8319 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8320 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8322 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8323 break;
8324 path->slots[0]++;
8327 btrfs_release_path(path);
8329 trans = btrfs_start_transaction(root, 1);
8330 if (IS_ERR(trans))
8331 return PTR_ERR(trans);
8334 * Ok we have the key of the file extent we want to fix, now we can cow
8335 * down to the thing and fix it.
8337 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8338 if (ret < 0) {
8339 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8340 key.objectid, key.type, key.offset, ret);
8341 goto out;
8343 if (ret > 0) {
8344 fprintf(stderr, "Well that's odd, we just found this key "
8345 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8346 key.offset);
8347 ret = -EINVAL;
8348 goto out;
8350 leaf = path->nodes[0];
8351 fi = btrfs_item_ptr(leaf, path->slots[0],
8352 struct btrfs_file_extent_item);
8354 if (btrfs_file_extent_compression(leaf, fi) &&
8355 dback->disk_bytenr != entry->bytenr) {
8356 fprintf(stderr, "Ref doesn't match the record start and is "
8357 "compressed, please take a btrfs-image of this file "
8358 "system and send it to a btrfs developer so they can "
8359 "complete this functionality for bytenr %Lu\n",
8360 dback->disk_bytenr);
8361 ret = -EINVAL;
8362 goto out;
8365 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8366 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8367 } else if (dback->disk_bytenr > entry->bytenr) {
8368 u64 off_diff, offset;
8370 off_diff = dback->disk_bytenr - entry->bytenr;
8371 offset = btrfs_file_extent_offset(leaf, fi);
8372 if (dback->disk_bytenr + offset +
8373 btrfs_file_extent_num_bytes(leaf, fi) >
8374 entry->bytenr + entry->bytes) {
8375 fprintf(stderr, "Ref is past the entry end, please "
8376 "take a btrfs-image of this file system and "
8377 "send it to a btrfs developer, ref %Lu\n",
8378 dback->disk_bytenr);
8379 ret = -EINVAL;
8380 goto out;
8382 offset += off_diff;
8383 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8384 btrfs_set_file_extent_offset(leaf, fi, offset);
8385 } else if (dback->disk_bytenr < entry->bytenr) {
8386 u64 offset;
8388 offset = btrfs_file_extent_offset(leaf, fi);
8389 if (dback->disk_bytenr + offset < entry->bytenr) {
8390 fprintf(stderr, "Ref is before the entry start, please"
8391 " take a btrfs-image of this file system and "
8392 "send it to a btrfs developer, ref %Lu\n",
8393 dback->disk_bytenr);
8394 ret = -EINVAL;
8395 goto out;
8398 offset += dback->disk_bytenr;
8399 offset -= entry->bytenr;
8400 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8401 btrfs_set_file_extent_offset(leaf, fi, offset);
8404 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8407 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8408 * only do this if we aren't using compression, otherwise it's a
8409 * trickier case.
8411 if (!btrfs_file_extent_compression(leaf, fi))
8412 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8413 else
8414 printf("ram bytes may be wrong?\n");
8415 btrfs_mark_buffer_dirty(leaf);
8416 out:
8417 err = btrfs_commit_transaction(trans, root);
8418 btrfs_release_path(path);
8419 return ret ? ret : err;
8422 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8423 struct extent_record *rec)
8425 struct extent_backref *back;
8426 struct data_backref *dback;
8427 struct extent_entry *entry, *best = NULL;
8428 LIST_HEAD(entries);
8429 int nr_entries = 0;
8430 int broken_entries = 0;
8431 int ret = 0;
8432 short mismatch = 0;
8435 * Metadata is easy and the backrefs should always agree on bytenr and
8436 * size, if not we've got bigger issues.
8438 if (rec->metadata)
8439 return 0;
8441 list_for_each_entry(back, &rec->backrefs, list) {
8442 if (back->full_backref || !back->is_data)
8443 continue;
8445 dback = to_data_backref(back);
8448 * We only pay attention to backrefs that we found a real
8449 * backref for.
8451 if (dback->found_ref == 0)
8452 continue;
8455 * For now we only catch when the bytes don't match, not the
8456 * bytenr. We can easily do this at the same time, but I want
8457 * to have a fs image to test on before we just add repair
8458 * functionality willy-nilly so we know we won't screw up the
8459 * repair.
8462 entry = find_entry(&entries, dback->disk_bytenr,
8463 dback->bytes);
8464 if (!entry) {
8465 entry = malloc(sizeof(struct extent_entry));
8466 if (!entry) {
8467 ret = -ENOMEM;
8468 goto out;
8470 memset(entry, 0, sizeof(*entry));
8471 entry->bytenr = dback->disk_bytenr;
8472 entry->bytes = dback->bytes;
8473 list_add_tail(&entry->list, &entries);
8474 nr_entries++;
8478 * If we only have on entry we may think the entries agree when
8479 * in reality they don't so we have to do some extra checking.
8481 if (dback->disk_bytenr != rec->start ||
8482 dback->bytes != rec->nr || back->broken)
8483 mismatch = 1;
8485 if (back->broken) {
8486 entry->broken++;
8487 broken_entries++;
8490 entry->count++;
8493 /* Yay all the backrefs agree, carry on good sir */
8494 if (nr_entries <= 1 && !mismatch)
8495 goto out;
8497 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8498 "%Lu\n", rec->start);
8501 * First we want to see if the backrefs can agree amongst themselves who
8502 * is right, so figure out which one of the entries has the highest
8503 * count.
8505 best = find_most_right_entry(&entries);
8508 * Ok so we may have an even split between what the backrefs think, so
8509 * this is where we use the extent ref to see what it thinks.
8511 if (!best) {
8512 entry = find_entry(&entries, rec->start, rec->nr);
8513 if (!entry && (!broken_entries || !rec->found_rec)) {
8514 fprintf(stderr, "Backrefs don't agree with each other "
8515 "and extent record doesn't agree with anybody,"
8516 " so we can't fix bytenr %Lu bytes %Lu\n",
8517 rec->start, rec->nr);
8518 ret = -EINVAL;
8519 goto out;
8520 } else if (!entry) {
8522 * Ok our backrefs were broken, we'll assume this is the
8523 * correct value and add an entry for this range.
8525 entry = malloc(sizeof(struct extent_entry));
8526 if (!entry) {
8527 ret = -ENOMEM;
8528 goto out;
8530 memset(entry, 0, sizeof(*entry));
8531 entry->bytenr = rec->start;
8532 entry->bytes = rec->nr;
8533 list_add_tail(&entry->list, &entries);
8534 nr_entries++;
8536 entry->count++;
8537 best = find_most_right_entry(&entries);
8538 if (!best) {
8539 fprintf(stderr, "Backrefs and extent record evenly "
8540 "split on who is right, this is going to "
8541 "require user input to fix bytenr %Lu bytes "
8542 "%Lu\n", rec->start, rec->nr);
8543 ret = -EINVAL;
8544 goto out;
8549 * I don't think this can happen currently as we'll abort() if we catch
8550 * this case higher up, but in case somebody removes that we still can't
8551 * deal with it properly here yet, so just bail out of that's the case.
8553 if (best->bytenr != rec->start) {
8554 fprintf(stderr, "Extent start and backref starts don't match, "
8555 "please use btrfs-image on this file system and send "
8556 "it to a btrfs developer so they can make fsck fix "
8557 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8558 rec->start, rec->nr);
8559 ret = -EINVAL;
8560 goto out;
8564 * Ok great we all agreed on an extent record, let's go find the real
8565 * references and fix up the ones that don't match.
8567 list_for_each_entry(back, &rec->backrefs, list) {
8568 if (back->full_backref || !back->is_data)
8569 continue;
8571 dback = to_data_backref(back);
8574 * Still ignoring backrefs that don't have a real ref attached
8575 * to them.
8577 if (dback->found_ref == 0)
8578 continue;
8580 if (dback->bytes == best->bytes &&
8581 dback->disk_bytenr == best->bytenr)
8582 continue;
8584 ret = repair_ref(info, path, dback, best);
8585 if (ret)
8586 goto out;
8590 * Ok we messed with the actual refs, which means we need to drop our
8591 * entire cache and go back and rescan. I know this is a huge pain and
8592 * adds a lot of extra work, but it's the only way to be safe. Once all
8593 * the backrefs agree we may not need to do anything to the extent
8594 * record itself.
8596 ret = -EAGAIN;
8597 out:
8598 while (!list_empty(&entries)) {
8599 entry = list_entry(entries.next, struct extent_entry, list);
8600 list_del_init(&entry->list);
8601 free(entry);
8603 return ret;
8606 static int process_duplicates(struct cache_tree *extent_cache,
8607 struct extent_record *rec)
8609 struct extent_record *good, *tmp;
8610 struct cache_extent *cache;
8611 int ret;
8614 * If we found a extent record for this extent then return, or if we
8615 * have more than one duplicate we are likely going to need to delete
8616 * something.
8618 if (rec->found_rec || rec->num_duplicates > 1)
8619 return 0;
8621 /* Shouldn't happen but just in case */
8622 BUG_ON(!rec->num_duplicates);
8625 * So this happens if we end up with a backref that doesn't match the
8626 * actual extent entry. So either the backref is bad or the extent
8627 * entry is bad. Either way we want to have the extent_record actually
8628 * reflect what we found in the extent_tree, so we need to take the
8629 * duplicate out and use that as the extent_record since the only way we
8630 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8632 remove_cache_extent(extent_cache, &rec->cache);
8634 good = to_extent_record(rec->dups.next);
8635 list_del_init(&good->list);
8636 INIT_LIST_HEAD(&good->backrefs);
8637 INIT_LIST_HEAD(&good->dups);
8638 good->cache.start = good->start;
8639 good->cache.size = good->nr;
8640 good->content_checked = 0;
8641 good->owner_ref_checked = 0;
8642 good->num_duplicates = 0;
8643 good->refs = rec->refs;
8644 list_splice_init(&rec->backrefs, &good->backrefs);
8645 while (1) {
8646 cache = lookup_cache_extent(extent_cache, good->start,
8647 good->nr);
8648 if (!cache)
8649 break;
8650 tmp = container_of(cache, struct extent_record, cache);
8653 * If we find another overlapping extent and it's found_rec is
8654 * set then it's a duplicate and we need to try and delete
8655 * something.
8657 if (tmp->found_rec || tmp->num_duplicates > 0) {
8658 if (list_empty(&good->list))
8659 list_add_tail(&good->list,
8660 &duplicate_extents);
8661 good->num_duplicates += tmp->num_duplicates + 1;
8662 list_splice_init(&tmp->dups, &good->dups);
8663 list_del_init(&tmp->list);
8664 list_add_tail(&tmp->list, &good->dups);
8665 remove_cache_extent(extent_cache, &tmp->cache);
8666 continue;
8670 * Ok we have another non extent item backed extent rec, so lets
8671 * just add it to this extent and carry on like we did above.
8673 good->refs += tmp->refs;
8674 list_splice_init(&tmp->backrefs, &good->backrefs);
8675 remove_cache_extent(extent_cache, &tmp->cache);
8676 free(tmp);
8678 ret = insert_cache_extent(extent_cache, &good->cache);
8679 BUG_ON(ret);
8680 free(rec);
8681 return good->num_duplicates ? 0 : 1;
8684 static int delete_duplicate_records(struct btrfs_root *root,
8685 struct extent_record *rec)
8687 struct btrfs_trans_handle *trans;
8688 LIST_HEAD(delete_list);
8689 struct btrfs_path path;
8690 struct extent_record *tmp, *good, *n;
8691 int nr_del = 0;
8692 int ret = 0, err;
8693 struct btrfs_key key;
8695 btrfs_init_path(&path);
8697 good = rec;
8698 /* Find the record that covers all of the duplicates. */
8699 list_for_each_entry(tmp, &rec->dups, list) {
8700 if (good->start < tmp->start)
8701 continue;
8702 if (good->nr > tmp->nr)
8703 continue;
8705 if (tmp->start + tmp->nr < good->start + good->nr) {
8706 fprintf(stderr, "Ok we have overlapping extents that "
8707 "aren't completely covered by each other, this "
8708 "is going to require more careful thought. "
8709 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8710 tmp->start, tmp->nr, good->start, good->nr);
8711 abort();
8713 good = tmp;
8716 if (good != rec)
8717 list_add_tail(&rec->list, &delete_list);
8719 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8720 if (tmp == good)
8721 continue;
8722 list_move_tail(&tmp->list, &delete_list);
8725 root = root->fs_info->extent_root;
8726 trans = btrfs_start_transaction(root, 1);
8727 if (IS_ERR(trans)) {
8728 ret = PTR_ERR(trans);
8729 goto out;
8732 list_for_each_entry(tmp, &delete_list, list) {
8733 if (tmp->found_rec == 0)
8734 continue;
8735 key.objectid = tmp->start;
8736 key.type = BTRFS_EXTENT_ITEM_KEY;
8737 key.offset = tmp->nr;
8739 /* Shouldn't happen but just in case */
8740 if (tmp->metadata) {
8741 fprintf(stderr, "Well this shouldn't happen, extent "
8742 "record overlaps but is metadata? "
8743 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8744 abort();
8747 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8748 if (ret) {
8749 if (ret > 0)
8750 ret = -EINVAL;
8751 break;
8753 ret = btrfs_del_item(trans, root, &path);
8754 if (ret)
8755 break;
8756 btrfs_release_path(&path);
8757 nr_del++;
8759 err = btrfs_commit_transaction(trans, root);
8760 if (err && !ret)
8761 ret = err;
8762 out:
8763 while (!list_empty(&delete_list)) {
8764 tmp = to_extent_record(delete_list.next);
8765 list_del_init(&tmp->list);
8766 if (tmp == rec)
8767 continue;
8768 free(tmp);
8771 while (!list_empty(&rec->dups)) {
8772 tmp = to_extent_record(rec->dups.next);
8773 list_del_init(&tmp->list);
8774 free(tmp);
8777 btrfs_release_path(&path);
8779 if (!ret && !nr_del)
8780 rec->num_duplicates = 0;
8782 return ret ? ret : nr_del;
8785 static int find_possible_backrefs(struct btrfs_fs_info *info,
8786 struct btrfs_path *path,
8787 struct cache_tree *extent_cache,
8788 struct extent_record *rec)
8790 struct btrfs_root *root;
8791 struct extent_backref *back;
8792 struct data_backref *dback;
8793 struct cache_extent *cache;
8794 struct btrfs_file_extent_item *fi;
8795 struct btrfs_key key;
8796 u64 bytenr, bytes;
8797 int ret;
8799 list_for_each_entry(back, &rec->backrefs, list) {
8800 /* Don't care about full backrefs (poor unloved backrefs) */
8801 if (back->full_backref || !back->is_data)
8802 continue;
8804 dback = to_data_backref(back);
8806 /* We found this one, we don't need to do a lookup */
8807 if (dback->found_ref)
8808 continue;
8810 key.objectid = dback->root;
8811 key.type = BTRFS_ROOT_ITEM_KEY;
8812 key.offset = (u64)-1;
8814 root = btrfs_read_fs_root(info, &key);
8816 /* No root, definitely a bad ref, skip */
8817 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8818 continue;
8819 /* Other err, exit */
8820 if (IS_ERR(root))
8821 return PTR_ERR(root);
8823 key.objectid = dback->owner;
8824 key.type = BTRFS_EXTENT_DATA_KEY;
8825 key.offset = dback->offset;
8826 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8827 if (ret) {
8828 btrfs_release_path(path);
8829 if (ret < 0)
8830 return ret;
8831 /* Didn't find it, we can carry on */
8832 ret = 0;
8833 continue;
8836 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8837 struct btrfs_file_extent_item);
8838 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8839 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8840 btrfs_release_path(path);
8841 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8842 if (cache) {
8843 struct extent_record *tmp;
8844 tmp = container_of(cache, struct extent_record, cache);
8847 * If we found an extent record for the bytenr for this
8848 * particular backref then we can't add it to our
8849 * current extent record. We only want to add backrefs
8850 * that don't have a corresponding extent item in the
8851 * extent tree since they likely belong to this record
8852 * and we need to fix it if it doesn't match bytenrs.
8854 if (tmp->found_rec)
8855 continue;
8858 dback->found_ref += 1;
8859 dback->disk_bytenr = bytenr;
8860 dback->bytes = bytes;
8863 * Set this so the verify backref code knows not to trust the
8864 * values in this backref.
8866 back->broken = 1;
8869 return 0;
8873 * Record orphan data ref into corresponding root.
8875 * Return 0 if the extent item contains data ref and recorded.
8876 * Return 1 if the extent item contains no useful data ref
8877 * On that case, it may contains only shared_dataref or metadata backref
8878 * or the file extent exists(this should be handled by the extent bytenr
8879 * recovery routine)
8880 * Return <0 if something goes wrong.
8882 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8883 struct extent_record *rec)
8885 struct btrfs_key key;
8886 struct btrfs_root *dest_root;
8887 struct extent_backref *back;
8888 struct data_backref *dback;
8889 struct orphan_data_extent *orphan;
8890 struct btrfs_path path;
8891 int recorded_data_ref = 0;
8892 int ret = 0;
8894 if (rec->metadata)
8895 return 1;
8896 btrfs_init_path(&path);
8897 list_for_each_entry(back, &rec->backrefs, list) {
8898 if (back->full_backref || !back->is_data ||
8899 !back->found_extent_tree)
8900 continue;
8901 dback = to_data_backref(back);
8902 if (dback->found_ref)
8903 continue;
8904 key.objectid = dback->root;
8905 key.type = BTRFS_ROOT_ITEM_KEY;
8906 key.offset = (u64)-1;
8908 dest_root = btrfs_read_fs_root(fs_info, &key);
8910 /* For non-exist root we just skip it */
8911 if (IS_ERR(dest_root) || !dest_root)
8912 continue;
8914 key.objectid = dback->owner;
8915 key.type = BTRFS_EXTENT_DATA_KEY;
8916 key.offset = dback->offset;
8918 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8919 btrfs_release_path(&path);
8921 * For ret < 0, it's OK since the fs-tree may be corrupted,
8922 * we need to record it for inode/file extent rebuild.
8923 * For ret > 0, we record it only for file extent rebuild.
8924 * For ret == 0, the file extent exists but only bytenr
8925 * mismatch, let the original bytenr fix routine to handle,
8926 * don't record it.
8928 if (ret == 0)
8929 continue;
8930 ret = 0;
8931 orphan = malloc(sizeof(*orphan));
8932 if (!orphan) {
8933 ret = -ENOMEM;
8934 goto out;
8936 INIT_LIST_HEAD(&orphan->list);
8937 orphan->root = dback->root;
8938 orphan->objectid = dback->owner;
8939 orphan->offset = dback->offset;
8940 orphan->disk_bytenr = rec->cache.start;
8941 orphan->disk_len = rec->cache.size;
8942 list_add(&dest_root->orphan_data_extents, &orphan->list);
8943 recorded_data_ref = 1;
8945 out:
8946 btrfs_release_path(&path);
8947 if (!ret)
8948 return !recorded_data_ref;
8949 else
8950 return ret;
8954 * when an incorrect extent item is found, this will delete
8955 * all of the existing entries for it and recreate them
8956 * based on what the tree scan found.
8958 static int fixup_extent_refs(struct btrfs_fs_info *info,
8959 struct cache_tree *extent_cache,
8960 struct extent_record *rec)
8962 struct btrfs_trans_handle *trans = NULL;
8963 int ret;
8964 struct btrfs_path path;
8965 struct list_head *cur = rec->backrefs.next;
8966 struct cache_extent *cache;
8967 struct extent_backref *back;
8968 int allocated = 0;
8969 u64 flags = 0;
8971 if (rec->flag_block_full_backref)
8972 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8974 btrfs_init_path(&path);
8975 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8977 * Sometimes the backrefs themselves are so broken they don't
8978 * get attached to any meaningful rec, so first go back and
8979 * check any of our backrefs that we couldn't find and throw
8980 * them into the list if we find the backref so that
8981 * verify_backrefs can figure out what to do.
8983 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8984 if (ret < 0)
8985 goto out;
8988 /* step one, make sure all of the backrefs agree */
8989 ret = verify_backrefs(info, &path, rec);
8990 if (ret < 0)
8991 goto out;
8993 trans = btrfs_start_transaction(info->extent_root, 1);
8994 if (IS_ERR(trans)) {
8995 ret = PTR_ERR(trans);
8996 goto out;
8999 /* step two, delete all the existing records */
9000 ret = delete_extent_records(trans, info->extent_root, &path,
9001 rec->start);
9003 if (ret < 0)
9004 goto out;
9006 /* was this block corrupt? If so, don't add references to it */
9007 cache = lookup_cache_extent(info->corrupt_blocks,
9008 rec->start, rec->max_size);
9009 if (cache) {
9010 ret = 0;
9011 goto out;
9014 /* step three, recreate all the refs we did find */
9015 while(cur != &rec->backrefs) {
9016 back = to_extent_backref(cur);
9017 cur = cur->next;
9020 * if we didn't find any references, don't create a
9021 * new extent record
9023 if (!back->found_ref)
9024 continue;
9026 rec->bad_full_backref = 0;
9027 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9028 allocated = 1;
9030 if (ret)
9031 goto out;
9033 out:
9034 if (trans) {
9035 int err = btrfs_commit_transaction(trans, info->extent_root);
9036 if (!ret)
9037 ret = err;
9040 if (!ret)
9041 fprintf(stderr, "Repaired extent references for %llu\n",
9042 (unsigned long long)rec->start);
9044 btrfs_release_path(&path);
9045 return ret;
9048 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9049 struct extent_record *rec)
9051 struct btrfs_trans_handle *trans;
9052 struct btrfs_root *root = fs_info->extent_root;
9053 struct btrfs_path path;
9054 struct btrfs_extent_item *ei;
9055 struct btrfs_key key;
9056 u64 flags;
9057 int ret = 0;
9059 key.objectid = rec->start;
9060 if (rec->metadata) {
9061 key.type = BTRFS_METADATA_ITEM_KEY;
9062 key.offset = rec->info_level;
9063 } else {
9064 key.type = BTRFS_EXTENT_ITEM_KEY;
9065 key.offset = rec->max_size;
9068 trans = btrfs_start_transaction(root, 0);
9069 if (IS_ERR(trans))
9070 return PTR_ERR(trans);
9072 btrfs_init_path(&path);
9073 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9074 if (ret < 0) {
9075 btrfs_release_path(&path);
9076 btrfs_commit_transaction(trans, root);
9077 return ret;
9078 } else if (ret) {
9079 fprintf(stderr, "Didn't find extent for %llu\n",
9080 (unsigned long long)rec->start);
9081 btrfs_release_path(&path);
9082 btrfs_commit_transaction(trans, root);
9083 return -ENOENT;
9086 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9087 struct btrfs_extent_item);
9088 flags = btrfs_extent_flags(path.nodes[0], ei);
9089 if (rec->flag_block_full_backref) {
9090 fprintf(stderr, "setting full backref on %llu\n",
9091 (unsigned long long)key.objectid);
9092 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9093 } else {
9094 fprintf(stderr, "clearing full backref on %llu\n",
9095 (unsigned long long)key.objectid);
9096 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9098 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9099 btrfs_mark_buffer_dirty(path.nodes[0]);
9100 btrfs_release_path(&path);
9101 ret = btrfs_commit_transaction(trans, root);
9102 if (!ret)
9103 fprintf(stderr, "Repaired extent flags for %llu\n",
9104 (unsigned long long)rec->start);
9106 return ret;
9109 /* right now we only prune from the extent allocation tree */
9110 static int prune_one_block(struct btrfs_trans_handle *trans,
9111 struct btrfs_fs_info *info,
9112 struct btrfs_corrupt_block *corrupt)
9114 int ret;
9115 struct btrfs_path path;
9116 struct extent_buffer *eb;
9117 u64 found;
9118 int slot;
9119 int nritems;
9120 int level = corrupt->level + 1;
9122 btrfs_init_path(&path);
9123 again:
9124 /* we want to stop at the parent to our busted block */
9125 path.lowest_level = level;
9127 ret = btrfs_search_slot(trans, info->extent_root,
9128 &corrupt->key, &path, -1, 1);
9130 if (ret < 0)
9131 goto out;
9133 eb = path.nodes[level];
9134 if (!eb) {
9135 ret = -ENOENT;
9136 goto out;
9140 * hopefully the search gave us the block we want to prune,
9141 * lets try that first
9143 slot = path.slots[level];
9144 found = btrfs_node_blockptr(eb, slot);
9145 if (found == corrupt->cache.start)
9146 goto del_ptr;
9148 nritems = btrfs_header_nritems(eb);
9150 /* the search failed, lets scan this node and hope we find it */
9151 for (slot = 0; slot < nritems; slot++) {
9152 found = btrfs_node_blockptr(eb, slot);
9153 if (found == corrupt->cache.start)
9154 goto del_ptr;
9157 * we couldn't find the bad block. TODO, search all the nodes for pointers
9158 * to this block
9160 if (eb == info->extent_root->node) {
9161 ret = -ENOENT;
9162 goto out;
9163 } else {
9164 level++;
9165 btrfs_release_path(&path);
9166 goto again;
9169 del_ptr:
9170 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9171 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9173 out:
9174 btrfs_release_path(&path);
9175 return ret;
9178 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9180 struct btrfs_trans_handle *trans = NULL;
9181 struct cache_extent *cache;
9182 struct btrfs_corrupt_block *corrupt;
9184 while (1) {
9185 cache = search_cache_extent(info->corrupt_blocks, 0);
9186 if (!cache)
9187 break;
9188 if (!trans) {
9189 trans = btrfs_start_transaction(info->extent_root, 1);
9190 if (IS_ERR(trans))
9191 return PTR_ERR(trans);
9193 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9194 prune_one_block(trans, info, corrupt);
9195 remove_cache_extent(info->corrupt_blocks, cache);
9197 if (trans)
9198 return btrfs_commit_transaction(trans, info->extent_root);
9199 return 0;
9202 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9204 struct btrfs_block_group_cache *cache;
9205 u64 start, end;
9206 int ret;
9208 while (1) {
9209 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9210 &start, &end, EXTENT_DIRTY);
9211 if (ret)
9212 break;
9213 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9216 start = 0;
9217 while (1) {
9218 cache = btrfs_lookup_first_block_group(fs_info, start);
9219 if (!cache)
9220 break;
9221 if (cache->cached)
9222 cache->cached = 0;
9223 start = cache->key.objectid + cache->key.offset;
9227 static int check_extent_refs(struct btrfs_root *root,
9228 struct cache_tree *extent_cache)
9230 struct extent_record *rec;
9231 struct cache_extent *cache;
9232 int ret = 0;
9233 int had_dups = 0;
9235 if (repair) {
9237 * if we're doing a repair, we have to make sure
9238 * we don't allocate from the problem extents.
9239 * In the worst case, this will be all the
9240 * extents in the FS
9242 cache = search_cache_extent(extent_cache, 0);
9243 while(cache) {
9244 rec = container_of(cache, struct extent_record, cache);
9245 set_extent_dirty(root->fs_info->excluded_extents,
9246 rec->start,
9247 rec->start + rec->max_size - 1);
9248 cache = next_cache_extent(cache);
9251 /* pin down all the corrupted blocks too */
9252 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9253 while(cache) {
9254 set_extent_dirty(root->fs_info->excluded_extents,
9255 cache->start,
9256 cache->start + cache->size - 1);
9257 cache = next_cache_extent(cache);
9259 prune_corrupt_blocks(root->fs_info);
9260 reset_cached_block_groups(root->fs_info);
9263 reset_cached_block_groups(root->fs_info);
9266 * We need to delete any duplicate entries we find first otherwise we
9267 * could mess up the extent tree when we have backrefs that actually
9268 * belong to a different extent item and not the weird duplicate one.
9270 while (repair && !list_empty(&duplicate_extents)) {
9271 rec = to_extent_record(duplicate_extents.next);
9272 list_del_init(&rec->list);
9274 /* Sometimes we can find a backref before we find an actual
9275 * extent, so we need to process it a little bit to see if there
9276 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9277 * if this is a backref screwup. If we need to delete stuff
9278 * process_duplicates() will return 0, otherwise it will return
9279 * 1 and we
9281 if (process_duplicates(extent_cache, rec))
9282 continue;
9283 ret = delete_duplicate_records(root, rec);
9284 if (ret < 0)
9285 return ret;
9287 * delete_duplicate_records will return the number of entries
9288 * deleted, so if it's greater than 0 then we know we actually
9289 * did something and we need to remove.
9291 if (ret)
9292 had_dups = 1;
9295 if (had_dups)
9296 return -EAGAIN;
9298 while(1) {
9299 int cur_err = 0;
9300 int fix = 0;
9302 cache = search_cache_extent(extent_cache, 0);
9303 if (!cache)
9304 break;
9305 rec = container_of(cache, struct extent_record, cache);
9306 if (rec->num_duplicates) {
9307 fprintf(stderr, "extent item %llu has multiple extent "
9308 "items\n", (unsigned long long)rec->start);
9309 cur_err = 1;
9312 if (rec->refs != rec->extent_item_refs) {
9313 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9314 (unsigned long long)rec->start,
9315 (unsigned long long)rec->nr);
9316 fprintf(stderr, "extent item %llu, found %llu\n",
9317 (unsigned long long)rec->extent_item_refs,
9318 (unsigned long long)rec->refs);
9319 ret = record_orphan_data_extents(root->fs_info, rec);
9320 if (ret < 0)
9321 goto repair_abort;
9322 fix = ret;
9323 cur_err = 1;
9325 if (all_backpointers_checked(rec, 1)) {
9326 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9327 (unsigned long long)rec->start,
9328 (unsigned long long)rec->nr);
9329 fix = 1;
9330 cur_err = 1;
9332 if (!rec->owner_ref_checked) {
9333 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9334 (unsigned long long)rec->start,
9335 (unsigned long long)rec->nr);
9336 fix = 1;
9337 cur_err = 1;
9340 if (repair && fix) {
9341 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9342 if (ret)
9343 goto repair_abort;
9347 if (rec->bad_full_backref) {
9348 fprintf(stderr, "bad full backref, on [%llu]\n",
9349 (unsigned long long)rec->start);
9350 if (repair) {
9351 ret = fixup_extent_flags(root->fs_info, rec);
9352 if (ret)
9353 goto repair_abort;
9354 fix = 1;
9356 cur_err = 1;
9359 * Although it's not a extent ref's problem, we reuse this
9360 * routine for error reporting.
9361 * No repair function yet.
9363 if (rec->crossing_stripes) {
9364 fprintf(stderr,
9365 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9366 rec->start, rec->start + rec->max_size);
9367 cur_err = 1;
9370 if (rec->wrong_chunk_type) {
9371 fprintf(stderr,
9372 "bad extent [%llu, %llu), type mismatch with chunk\n",
9373 rec->start, rec->start + rec->max_size);
9374 cur_err = 1;
9377 remove_cache_extent(extent_cache, cache);
9378 free_all_extent_backrefs(rec);
9379 if (!init_extent_tree && repair && (!cur_err || fix))
9380 clear_extent_dirty(root->fs_info->excluded_extents,
9381 rec->start,
9382 rec->start + rec->max_size - 1);
9383 free(rec);
9385 repair_abort:
9386 if (repair) {
9387 if (ret && ret != -EAGAIN) {
9388 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9389 exit(1);
9390 } else if (!ret) {
9391 struct btrfs_trans_handle *trans;
9393 root = root->fs_info->extent_root;
9394 trans = btrfs_start_transaction(root, 1);
9395 if (IS_ERR(trans)) {
9396 ret = PTR_ERR(trans);
9397 goto repair_abort;
9400 btrfs_fix_block_accounting(trans, root);
9401 ret = btrfs_commit_transaction(trans, root);
9402 if (ret)
9403 goto repair_abort;
9405 return ret;
9407 return 0;
9410 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9412 u64 stripe_size;
9414 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9415 stripe_size = length;
9416 stripe_size /= num_stripes;
9417 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9418 stripe_size = length * 2;
9419 stripe_size /= num_stripes;
9420 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9421 stripe_size = length;
9422 stripe_size /= (num_stripes - 1);
9423 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9424 stripe_size = length;
9425 stripe_size /= (num_stripes - 2);
9426 } else {
9427 stripe_size = length;
9429 return stripe_size;
9433 * Check the chunk with its block group/dev list ref:
9434 * Return 0 if all refs seems valid.
9435 * Return 1 if part of refs seems valid, need later check for rebuild ref
9436 * like missing block group and needs to search extent tree to rebuild them.
9437 * Return -1 if essential refs are missing and unable to rebuild.
9439 static int check_chunk_refs(struct chunk_record *chunk_rec,
9440 struct block_group_tree *block_group_cache,
9441 struct device_extent_tree *dev_extent_cache,
9442 int silent)
9444 struct cache_extent *block_group_item;
9445 struct block_group_record *block_group_rec;
9446 struct cache_extent *dev_extent_item;
9447 struct device_extent_record *dev_extent_rec;
9448 u64 devid;
9449 u64 offset;
9450 u64 length;
9451 int metadump_v2 = 0;
9452 int i;
9453 int ret = 0;
9455 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9456 chunk_rec->offset,
9457 chunk_rec->length);
9458 if (block_group_item) {
9459 block_group_rec = container_of(block_group_item,
9460 struct block_group_record,
9461 cache);
9462 if (chunk_rec->length != block_group_rec->offset ||
9463 chunk_rec->offset != block_group_rec->objectid ||
9464 (!metadump_v2 &&
9465 chunk_rec->type_flags != block_group_rec->flags)) {
9466 if (!silent)
9467 fprintf(stderr,
9468 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9469 chunk_rec->objectid,
9470 chunk_rec->type,
9471 chunk_rec->offset,
9472 chunk_rec->length,
9473 chunk_rec->offset,
9474 chunk_rec->type_flags,
9475 block_group_rec->objectid,
9476 block_group_rec->type,
9477 block_group_rec->offset,
9478 block_group_rec->offset,
9479 block_group_rec->objectid,
9480 block_group_rec->flags);
9481 ret = -1;
9482 } else {
9483 list_del_init(&block_group_rec->list);
9484 chunk_rec->bg_rec = block_group_rec;
9486 } else {
9487 if (!silent)
9488 fprintf(stderr,
9489 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9490 chunk_rec->objectid,
9491 chunk_rec->type,
9492 chunk_rec->offset,
9493 chunk_rec->length,
9494 chunk_rec->offset,
9495 chunk_rec->type_flags);
9496 ret = 1;
9499 if (metadump_v2)
9500 return ret;
9502 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9503 chunk_rec->num_stripes);
9504 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9505 devid = chunk_rec->stripes[i].devid;
9506 offset = chunk_rec->stripes[i].offset;
9507 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9508 devid, offset, length);
9509 if (dev_extent_item) {
9510 dev_extent_rec = container_of(dev_extent_item,
9511 struct device_extent_record,
9512 cache);
9513 if (dev_extent_rec->objectid != devid ||
9514 dev_extent_rec->offset != offset ||
9515 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9516 dev_extent_rec->length != length) {
9517 if (!silent)
9518 fprintf(stderr,
9519 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9520 chunk_rec->objectid,
9521 chunk_rec->type,
9522 chunk_rec->offset,
9523 chunk_rec->stripes[i].devid,
9524 chunk_rec->stripes[i].offset,
9525 dev_extent_rec->objectid,
9526 dev_extent_rec->offset,
9527 dev_extent_rec->length);
9528 ret = -1;
9529 } else {
9530 list_move(&dev_extent_rec->chunk_list,
9531 &chunk_rec->dextents);
9533 } else {
9534 if (!silent)
9535 fprintf(stderr,
9536 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9537 chunk_rec->objectid,
9538 chunk_rec->type,
9539 chunk_rec->offset,
9540 chunk_rec->stripes[i].devid,
9541 chunk_rec->stripes[i].offset);
9542 ret = -1;
9545 return ret;
9548 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9549 int check_chunks(struct cache_tree *chunk_cache,
9550 struct block_group_tree *block_group_cache,
9551 struct device_extent_tree *dev_extent_cache,
9552 struct list_head *good, struct list_head *bad,
9553 struct list_head *rebuild, int silent)
9555 struct cache_extent *chunk_item;
9556 struct chunk_record *chunk_rec;
9557 struct block_group_record *bg_rec;
9558 struct device_extent_record *dext_rec;
9559 int err;
9560 int ret = 0;
9562 chunk_item = first_cache_extent(chunk_cache);
9563 while (chunk_item) {
9564 chunk_rec = container_of(chunk_item, struct chunk_record,
9565 cache);
9566 err = check_chunk_refs(chunk_rec, block_group_cache,
9567 dev_extent_cache, silent);
9568 if (err < 0)
9569 ret = err;
9570 if (err == 0 && good)
9571 list_add_tail(&chunk_rec->list, good);
9572 if (err > 0 && rebuild)
9573 list_add_tail(&chunk_rec->list, rebuild);
9574 if (err < 0 && bad)
9575 list_add_tail(&chunk_rec->list, bad);
9576 chunk_item = next_cache_extent(chunk_item);
9579 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9580 if (!silent)
9581 fprintf(stderr,
9582 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9583 bg_rec->objectid,
9584 bg_rec->offset,
9585 bg_rec->flags);
9586 if (!ret)
9587 ret = 1;
9590 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9591 chunk_list) {
9592 if (!silent)
9593 fprintf(stderr,
9594 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9595 dext_rec->objectid,
9596 dext_rec->offset,
9597 dext_rec->length);
9598 if (!ret)
9599 ret = 1;
9601 return ret;
9605 static int check_device_used(struct device_record *dev_rec,
9606 struct device_extent_tree *dext_cache)
9608 struct cache_extent *cache;
9609 struct device_extent_record *dev_extent_rec;
9610 u64 total_byte = 0;
9612 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9613 while (cache) {
9614 dev_extent_rec = container_of(cache,
9615 struct device_extent_record,
9616 cache);
9617 if (dev_extent_rec->objectid != dev_rec->devid)
9618 break;
9620 list_del_init(&dev_extent_rec->device_list);
9621 total_byte += dev_extent_rec->length;
9622 cache = next_cache_extent(cache);
9625 if (total_byte != dev_rec->byte_used) {
9626 fprintf(stderr,
9627 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9628 total_byte, dev_rec->byte_used, dev_rec->objectid,
9629 dev_rec->type, dev_rec->offset);
9630 return -1;
9631 } else {
9632 return 0;
9636 /* check btrfs_dev_item -> btrfs_dev_extent */
9637 static int check_devices(struct rb_root *dev_cache,
9638 struct device_extent_tree *dev_extent_cache)
9640 struct rb_node *dev_node;
9641 struct device_record *dev_rec;
9642 struct device_extent_record *dext_rec;
9643 int err;
9644 int ret = 0;
9646 dev_node = rb_first(dev_cache);
9647 while (dev_node) {
9648 dev_rec = container_of(dev_node, struct device_record, node);
9649 err = check_device_used(dev_rec, dev_extent_cache);
9650 if (err)
9651 ret = err;
9653 dev_node = rb_next(dev_node);
9655 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9656 device_list) {
9657 fprintf(stderr,
9658 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9659 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9660 if (!ret)
9661 ret = 1;
9663 return ret;
9666 static int add_root_item_to_list(struct list_head *head,
9667 u64 objectid, u64 bytenr, u64 last_snapshot,
9668 u8 level, u8 drop_level,
9669 int level_size, struct btrfs_key *drop_key)
9672 struct root_item_record *ri_rec;
9673 ri_rec = malloc(sizeof(*ri_rec));
9674 if (!ri_rec)
9675 return -ENOMEM;
9676 ri_rec->bytenr = bytenr;
9677 ri_rec->objectid = objectid;
9678 ri_rec->level = level;
9679 ri_rec->level_size = level_size;
9680 ri_rec->drop_level = drop_level;
9681 ri_rec->last_snapshot = last_snapshot;
9682 if (drop_key)
9683 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9684 list_add_tail(&ri_rec->list, head);
9686 return 0;
9689 static void free_root_item_list(struct list_head *list)
9691 struct root_item_record *ri_rec;
9693 while (!list_empty(list)) {
9694 ri_rec = list_first_entry(list, struct root_item_record,
9695 list);
9696 list_del_init(&ri_rec->list);
9697 free(ri_rec);
9701 static int deal_root_from_list(struct list_head *list,
9702 struct btrfs_root *root,
9703 struct block_info *bits,
9704 int bits_nr,
9705 struct cache_tree *pending,
9706 struct cache_tree *seen,
9707 struct cache_tree *reada,
9708 struct cache_tree *nodes,
9709 struct cache_tree *extent_cache,
9710 struct cache_tree *chunk_cache,
9711 struct rb_root *dev_cache,
9712 struct block_group_tree *block_group_cache,
9713 struct device_extent_tree *dev_extent_cache)
9715 int ret = 0;
9716 u64 last;
9718 while (!list_empty(list)) {
9719 struct root_item_record *rec;
9720 struct extent_buffer *buf;
9721 rec = list_entry(list->next,
9722 struct root_item_record, list);
9723 last = 0;
9724 buf = read_tree_block(root->fs_info->tree_root,
9725 rec->bytenr, rec->level_size, 0);
9726 if (!extent_buffer_uptodate(buf)) {
9727 free_extent_buffer(buf);
9728 ret = -EIO;
9729 break;
9731 ret = add_root_to_pending(buf, extent_cache, pending,
9732 seen, nodes, rec->objectid);
9733 if (ret < 0)
9734 break;
9736 * To rebuild extent tree, we need deal with snapshot
9737 * one by one, otherwise we deal with node firstly which
9738 * can maximize readahead.
9740 while (1) {
9741 ret = run_next_block(root, bits, bits_nr, &last,
9742 pending, seen, reada, nodes,
9743 extent_cache, chunk_cache,
9744 dev_cache, block_group_cache,
9745 dev_extent_cache, rec);
9746 if (ret != 0)
9747 break;
9749 free_extent_buffer(buf);
9750 list_del(&rec->list);
9751 free(rec);
9752 if (ret < 0)
9753 break;
9755 while (ret >= 0) {
9756 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9757 reada, nodes, extent_cache, chunk_cache,
9758 dev_cache, block_group_cache,
9759 dev_extent_cache, NULL);
9760 if (ret != 0) {
9761 if (ret > 0)
9762 ret = 0;
9763 break;
9766 return ret;
9769 static int check_chunks_and_extents(struct btrfs_root *root)
9771 struct rb_root dev_cache;
9772 struct cache_tree chunk_cache;
9773 struct block_group_tree block_group_cache;
9774 struct device_extent_tree dev_extent_cache;
9775 struct cache_tree extent_cache;
9776 struct cache_tree seen;
9777 struct cache_tree pending;
9778 struct cache_tree reada;
9779 struct cache_tree nodes;
9780 struct extent_io_tree excluded_extents;
9781 struct cache_tree corrupt_blocks;
9782 struct btrfs_path path;
9783 struct btrfs_key key;
9784 struct btrfs_key found_key;
9785 int ret, err = 0;
9786 struct block_info *bits;
9787 int bits_nr;
9788 struct extent_buffer *leaf;
9789 int slot;
9790 struct btrfs_root_item ri;
9791 struct list_head dropping_trees;
9792 struct list_head normal_trees;
9793 struct btrfs_root *root1;
9794 u64 objectid;
9795 u32 level_size;
9796 u8 level;
9798 dev_cache = RB_ROOT;
9799 cache_tree_init(&chunk_cache);
9800 block_group_tree_init(&block_group_cache);
9801 device_extent_tree_init(&dev_extent_cache);
9803 cache_tree_init(&extent_cache);
9804 cache_tree_init(&seen);
9805 cache_tree_init(&pending);
9806 cache_tree_init(&nodes);
9807 cache_tree_init(&reada);
9808 cache_tree_init(&corrupt_blocks);
9809 extent_io_tree_init(&excluded_extents);
9810 INIT_LIST_HEAD(&dropping_trees);
9811 INIT_LIST_HEAD(&normal_trees);
9813 if (repair) {
9814 root->fs_info->excluded_extents = &excluded_extents;
9815 root->fs_info->fsck_extent_cache = &extent_cache;
9816 root->fs_info->free_extent_hook = free_extent_hook;
9817 root->fs_info->corrupt_blocks = &corrupt_blocks;
9820 bits_nr = 1024;
9821 bits = malloc(bits_nr * sizeof(struct block_info));
9822 if (!bits) {
9823 perror("malloc");
9824 exit(1);
9827 if (ctx.progress_enabled) {
9828 ctx.tp = TASK_EXTENTS;
9829 task_start(ctx.info);
9832 again:
9833 root1 = root->fs_info->tree_root;
9834 level = btrfs_header_level(root1->node);
9835 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9836 root1->node->start, 0, level, 0,
9837 root1->nodesize, NULL);
9838 if (ret < 0)
9839 goto out;
9840 root1 = root->fs_info->chunk_root;
9841 level = btrfs_header_level(root1->node);
9842 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9843 root1->node->start, 0, level, 0,
9844 root1->nodesize, NULL);
9845 if (ret < 0)
9846 goto out;
9847 btrfs_init_path(&path);
9848 key.offset = 0;
9849 key.objectid = 0;
9850 key.type = BTRFS_ROOT_ITEM_KEY;
9851 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9852 &key, &path, 0, 0);
9853 if (ret < 0)
9854 goto out;
9855 while(1) {
9856 leaf = path.nodes[0];
9857 slot = path.slots[0];
9858 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9859 ret = btrfs_next_leaf(root, &path);
9860 if (ret != 0)
9861 break;
9862 leaf = path.nodes[0];
9863 slot = path.slots[0];
9865 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9866 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9867 unsigned long offset;
9868 u64 last_snapshot;
9870 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9871 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9872 last_snapshot = btrfs_root_last_snapshot(&ri);
9873 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9874 level = btrfs_root_level(&ri);
9875 level_size = root->nodesize;
9876 ret = add_root_item_to_list(&normal_trees,
9877 found_key.objectid,
9878 btrfs_root_bytenr(&ri),
9879 last_snapshot, level,
9880 0, level_size, NULL);
9881 if (ret < 0)
9882 goto out;
9883 } else {
9884 level = btrfs_root_level(&ri);
9885 level_size = root->nodesize;
9886 objectid = found_key.objectid;
9887 btrfs_disk_key_to_cpu(&found_key,
9888 &ri.drop_progress);
9889 ret = add_root_item_to_list(&dropping_trees,
9890 objectid,
9891 btrfs_root_bytenr(&ri),
9892 last_snapshot, level,
9893 ri.drop_level,
9894 level_size, &found_key);
9895 if (ret < 0)
9896 goto out;
9899 path.slots[0]++;
9901 btrfs_release_path(&path);
9904 * check_block can return -EAGAIN if it fixes something, please keep
9905 * this in mind when dealing with return values from these functions, if
9906 * we get -EAGAIN we want to fall through and restart the loop.
9908 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9909 &seen, &reada, &nodes, &extent_cache,
9910 &chunk_cache, &dev_cache, &block_group_cache,
9911 &dev_extent_cache);
9912 if (ret < 0) {
9913 if (ret == -EAGAIN)
9914 goto loop;
9915 goto out;
9917 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9918 &pending, &seen, &reada, &nodes,
9919 &extent_cache, &chunk_cache, &dev_cache,
9920 &block_group_cache, &dev_extent_cache);
9921 if (ret < 0) {
9922 if (ret == -EAGAIN)
9923 goto loop;
9924 goto out;
9927 ret = check_chunks(&chunk_cache, &block_group_cache,
9928 &dev_extent_cache, NULL, NULL, NULL, 0);
9929 if (ret) {
9930 if (ret == -EAGAIN)
9931 goto loop;
9932 err = ret;
9935 ret = check_extent_refs(root, &extent_cache);
9936 if (ret < 0) {
9937 if (ret == -EAGAIN)
9938 goto loop;
9939 goto out;
9942 ret = check_devices(&dev_cache, &dev_extent_cache);
9943 if (ret && err)
9944 ret = err;
9946 out:
9947 task_stop(ctx.info);
9948 if (repair) {
9949 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9950 extent_io_tree_cleanup(&excluded_extents);
9951 root->fs_info->fsck_extent_cache = NULL;
9952 root->fs_info->free_extent_hook = NULL;
9953 root->fs_info->corrupt_blocks = NULL;
9954 root->fs_info->excluded_extents = NULL;
9956 free(bits);
9957 free_chunk_cache_tree(&chunk_cache);
9958 free_device_cache_tree(&dev_cache);
9959 free_block_group_tree(&block_group_cache);
9960 free_device_extent_tree(&dev_extent_cache);
9961 free_extent_cache_tree(&seen);
9962 free_extent_cache_tree(&pending);
9963 free_extent_cache_tree(&reada);
9964 free_extent_cache_tree(&nodes);
9965 return ret;
9966 loop:
9967 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9968 free_extent_cache_tree(&seen);
9969 free_extent_cache_tree(&pending);
9970 free_extent_cache_tree(&reada);
9971 free_extent_cache_tree(&nodes);
9972 free_chunk_cache_tree(&chunk_cache);
9973 free_block_group_tree(&block_group_cache);
9974 free_device_cache_tree(&dev_cache);
9975 free_device_extent_tree(&dev_extent_cache);
9976 free_extent_record_cache(&extent_cache);
9977 free_root_item_list(&normal_trees);
9978 free_root_item_list(&dropping_trees);
9979 extent_io_tree_cleanup(&excluded_extents);
9980 goto again;
9984 * Check backrefs of a tree block given by @bytenr or @eb.
9986 * @root: the root containing the @bytenr or @eb
9987 * @eb: tree block extent buffer, can be NULL
9988 * @bytenr: bytenr of the tree block to search
9989 * @level: tree level of the tree block
9990 * @owner: owner of the tree block
9992 * Return >0 for any error found and output error message
9993 * Return 0 for no error found
9995 static int check_tree_block_ref(struct btrfs_root *root,
9996 struct extent_buffer *eb, u64 bytenr,
9997 int level, u64 owner)
9999 struct btrfs_key key;
10000 struct btrfs_root *extent_root = root->fs_info->extent_root;
10001 struct btrfs_path path;
10002 struct btrfs_extent_item *ei;
10003 struct btrfs_extent_inline_ref *iref;
10004 struct extent_buffer *leaf;
10005 unsigned long end;
10006 unsigned long ptr;
10007 int slot;
10008 int skinny_level;
10009 int type;
10010 u32 nodesize = root->nodesize;
10011 u32 item_size;
10012 u64 offset;
10013 int tree_reloc_root = 0;
10014 int found_ref = 0;
10015 int err = 0;
10016 int ret;
10018 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10019 btrfs_header_bytenr(root->node) == bytenr)
10020 tree_reloc_root = 1;
10022 btrfs_init_path(&path);
10023 key.objectid = bytenr;
10024 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10025 key.type = BTRFS_METADATA_ITEM_KEY;
10026 else
10027 key.type = BTRFS_EXTENT_ITEM_KEY;
10028 key.offset = (u64)-1;
10030 /* Search for the backref in extent tree */
10031 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10032 if (ret < 0) {
10033 err |= BACKREF_MISSING;
10034 goto out;
10036 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10037 if (ret) {
10038 err |= BACKREF_MISSING;
10039 goto out;
10042 leaf = path.nodes[0];
10043 slot = path.slots[0];
10044 btrfs_item_key_to_cpu(leaf, &key, slot);
10046 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10048 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10049 skinny_level = (int)key.offset;
10050 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10051 } else {
10052 struct btrfs_tree_block_info *info;
10054 info = (struct btrfs_tree_block_info *)(ei + 1);
10055 skinny_level = btrfs_tree_block_level(leaf, info);
10056 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10059 if (eb) {
10060 u64 header_gen;
10061 u64 extent_gen;
10063 if (!(btrfs_extent_flags(leaf, ei) &
10064 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10065 error(
10066 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10067 key.objectid, nodesize,
10068 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10069 err = BACKREF_MISMATCH;
10071 header_gen = btrfs_header_generation(eb);
10072 extent_gen = btrfs_extent_generation(leaf, ei);
10073 if (header_gen != extent_gen) {
10074 error(
10075 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10076 key.objectid, nodesize, header_gen,
10077 extent_gen);
10078 err = BACKREF_MISMATCH;
10080 if (level != skinny_level) {
10081 error(
10082 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10083 key.objectid, nodesize, level, skinny_level);
10084 err = BACKREF_MISMATCH;
10086 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10087 error(
10088 "extent[%llu %u] is referred by other roots than %llu",
10089 key.objectid, nodesize, root->objectid);
10090 err = BACKREF_MISMATCH;
10095 * Iterate the extent/metadata item to find the exact backref
10097 item_size = btrfs_item_size_nr(leaf, slot);
10098 ptr = (unsigned long)iref;
10099 end = (unsigned long)ei + item_size;
10100 while (ptr < end) {
10101 iref = (struct btrfs_extent_inline_ref *)ptr;
10102 type = btrfs_extent_inline_ref_type(leaf, iref);
10103 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10105 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10106 (offset == root->objectid || offset == owner)) {
10107 found_ref = 1;
10108 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10110 * Backref of tree reloc root points to itself, no need
10111 * to check backref any more.
10113 if (tree_reloc_root)
10114 found_ref = 1;
10115 else
10116 /* Check if the backref points to valid referencer */
10117 found_ref = !check_tree_block_ref(root, NULL,
10118 offset, level + 1, owner);
10121 if (found_ref)
10122 break;
10123 ptr += btrfs_extent_inline_ref_size(type);
10127 * Inlined extent item doesn't have what we need, check
10128 * TREE_BLOCK_REF_KEY
10130 if (!found_ref) {
10131 btrfs_release_path(&path);
10132 key.objectid = bytenr;
10133 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10134 key.offset = root->objectid;
10136 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10137 if (!ret)
10138 found_ref = 1;
10140 if (!found_ref)
10141 err |= BACKREF_MISSING;
10142 out:
10143 btrfs_release_path(&path);
10144 if (eb && (err & BACKREF_MISSING))
10145 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10146 bytenr, nodesize, owner, level);
10147 return err;
10151 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10153 * Return >0 any error found and output error message
10154 * Return 0 for no error found
10156 static int check_extent_data_item(struct btrfs_root *root,
10157 struct extent_buffer *eb, int slot)
10159 struct btrfs_file_extent_item *fi;
10160 struct btrfs_path path;
10161 struct btrfs_root *extent_root = root->fs_info->extent_root;
10162 struct btrfs_key fi_key;
10163 struct btrfs_key dbref_key;
10164 struct extent_buffer *leaf;
10165 struct btrfs_extent_item *ei;
10166 struct btrfs_extent_inline_ref *iref;
10167 struct btrfs_extent_data_ref *dref;
10168 u64 owner;
10169 u64 disk_bytenr;
10170 u64 disk_num_bytes;
10171 u64 extent_num_bytes;
10172 u64 extent_flags;
10173 u32 item_size;
10174 unsigned long end;
10175 unsigned long ptr;
10176 int type;
10177 u64 ref_root;
10178 int found_dbackref = 0;
10179 int err = 0;
10180 int ret;
10182 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10183 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10185 /* Nothing to check for hole and inline data extents */
10186 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10187 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10188 return 0;
10190 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10191 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10192 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10194 /* Check unaligned disk_num_bytes and num_bytes */
10195 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10196 error(
10197 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10198 fi_key.objectid, fi_key.offset, disk_num_bytes,
10199 root->sectorsize);
10200 err |= BYTES_UNALIGNED;
10201 } else {
10202 data_bytes_allocated += disk_num_bytes;
10204 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10205 error(
10206 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10207 fi_key.objectid, fi_key.offset, extent_num_bytes,
10208 root->sectorsize);
10209 err |= BYTES_UNALIGNED;
10210 } else {
10211 data_bytes_referenced += extent_num_bytes;
10213 owner = btrfs_header_owner(eb);
10215 /* Check the extent item of the file extent in extent tree */
10216 btrfs_init_path(&path);
10217 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10218 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10219 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10221 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10222 if (ret) {
10223 err |= BACKREF_MISSING;
10224 goto error;
10227 leaf = path.nodes[0];
10228 slot = path.slots[0];
10229 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10231 extent_flags = btrfs_extent_flags(leaf, ei);
10233 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10234 error(
10235 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10236 disk_bytenr, disk_num_bytes,
10237 BTRFS_EXTENT_FLAG_DATA);
10238 err |= BACKREF_MISMATCH;
10241 /* Check data backref inside that extent item */
10242 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10243 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10244 ptr = (unsigned long)iref;
10245 end = (unsigned long)ei + item_size;
10246 while (ptr < end) {
10247 iref = (struct btrfs_extent_inline_ref *)ptr;
10248 type = btrfs_extent_inline_ref_type(leaf, iref);
10249 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10251 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10252 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10253 if (ref_root == owner || ref_root == root->objectid)
10254 found_dbackref = 1;
10255 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10256 found_dbackref = !check_tree_block_ref(root, NULL,
10257 btrfs_extent_inline_ref_offset(leaf, iref),
10258 0, owner);
10261 if (found_dbackref)
10262 break;
10263 ptr += btrfs_extent_inline_ref_size(type);
10266 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10267 if (!found_dbackref) {
10268 btrfs_release_path(&path);
10270 btrfs_init_path(&path);
10271 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10272 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10273 dbref_key.offset = hash_extent_data_ref(root->objectid,
10274 fi_key.objectid, fi_key.offset);
10276 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10277 &dbref_key, &path, 0, 0);
10278 if (!ret)
10279 found_dbackref = 1;
10282 if (!found_dbackref)
10283 err |= BACKREF_MISSING;
10284 error:
10285 btrfs_release_path(&path);
10286 if (err & BACKREF_MISSING) {
10287 error("data extent[%llu %llu] backref lost",
10288 disk_bytenr, disk_num_bytes);
10290 return err;
10294 * Get real tree block level for the case like shared block
10295 * Return >= 0 as tree level
10296 * Return <0 for error
10298 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10300 struct extent_buffer *eb;
10301 struct btrfs_path path;
10302 struct btrfs_key key;
10303 struct btrfs_extent_item *ei;
10304 u64 flags;
10305 u64 transid;
10306 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10307 u8 backref_level;
10308 u8 header_level;
10309 int ret;
10311 /* Search extent tree for extent generation and level */
10312 key.objectid = bytenr;
10313 key.type = BTRFS_METADATA_ITEM_KEY;
10314 key.offset = (u64)-1;
10316 btrfs_init_path(&path);
10317 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10318 if (ret < 0)
10319 goto release_out;
10320 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10321 if (ret < 0)
10322 goto release_out;
10323 if (ret > 0) {
10324 ret = -ENOENT;
10325 goto release_out;
10328 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10329 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10330 struct btrfs_extent_item);
10331 flags = btrfs_extent_flags(path.nodes[0], ei);
10332 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10333 ret = -ENOENT;
10334 goto release_out;
10337 /* Get transid for later read_tree_block() check */
10338 transid = btrfs_extent_generation(path.nodes[0], ei);
10340 /* Get backref level as one source */
10341 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10342 backref_level = key.offset;
10343 } else {
10344 struct btrfs_tree_block_info *info;
10346 info = (struct btrfs_tree_block_info *)(ei + 1);
10347 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10349 btrfs_release_path(&path);
10351 /* Get level from tree block as an alternative source */
10352 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10353 if (!extent_buffer_uptodate(eb)) {
10354 free_extent_buffer(eb);
10355 return -EIO;
10357 header_level = btrfs_header_level(eb);
10358 free_extent_buffer(eb);
10360 if (header_level != backref_level)
10361 return -EIO;
10362 return header_level;
10364 release_out:
10365 btrfs_release_path(&path);
10366 return ret;
10370 * Check if a tree block backref is valid (points to a valid tree block)
10371 * if level == -1, level will be resolved
10372 * Return >0 for any error found and print error message
10374 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10375 u64 bytenr, int level)
10377 struct btrfs_root *root;
10378 struct btrfs_key key;
10379 struct btrfs_path path;
10380 struct extent_buffer *eb;
10381 struct extent_buffer *node;
10382 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10383 int err = 0;
10384 int ret;
10386 /* Query level for level == -1 special case */
10387 if (level == -1)
10388 level = query_tree_block_level(fs_info, bytenr);
10389 if (level < 0) {
10390 err |= REFERENCER_MISSING;
10391 goto out;
10394 key.objectid = root_id;
10395 key.type = BTRFS_ROOT_ITEM_KEY;
10396 key.offset = (u64)-1;
10398 root = btrfs_read_fs_root(fs_info, &key);
10399 if (IS_ERR(root)) {
10400 err |= REFERENCER_MISSING;
10401 goto out;
10404 /* Read out the tree block to get item/node key */
10405 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10406 if (!extent_buffer_uptodate(eb)) {
10407 err |= REFERENCER_MISSING;
10408 free_extent_buffer(eb);
10409 goto out;
10412 /* Empty tree, no need to check key */
10413 if (!btrfs_header_nritems(eb) && !level) {
10414 free_extent_buffer(eb);
10415 goto out;
10418 if (level)
10419 btrfs_node_key_to_cpu(eb, &key, 0);
10420 else
10421 btrfs_item_key_to_cpu(eb, &key, 0);
10423 free_extent_buffer(eb);
10425 btrfs_init_path(&path);
10426 path.lowest_level = level;
10427 /* Search with the first key, to ensure we can reach it */
10428 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10429 if (ret < 0) {
10430 err |= REFERENCER_MISSING;
10431 goto release_out;
10434 node = path.nodes[level];
10435 if (btrfs_header_bytenr(node) != bytenr) {
10436 error(
10437 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10438 bytenr, nodesize, bytenr,
10439 btrfs_header_bytenr(node));
10440 err |= REFERENCER_MISMATCH;
10442 if (btrfs_header_level(node) != level) {
10443 error(
10444 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10445 bytenr, nodesize, level,
10446 btrfs_header_level(node));
10447 err |= REFERENCER_MISMATCH;
10450 release_out:
10451 btrfs_release_path(&path);
10452 out:
10453 if (err & REFERENCER_MISSING) {
10454 if (level < 0)
10455 error("extent [%llu %d] lost referencer (owner: %llu)",
10456 bytenr, nodesize, root_id);
10457 else
10458 error(
10459 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10460 bytenr, nodesize, root_id, level);
10463 return err;
10467 * Check if tree block @eb is tree reloc root.
10468 * Return 0 if it's not or any problem happens
10469 * Return 1 if it's a tree reloc root
10471 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10472 struct extent_buffer *eb)
10474 struct btrfs_root *tree_reloc_root;
10475 struct btrfs_key key;
10476 u64 bytenr = btrfs_header_bytenr(eb);
10477 u64 owner = btrfs_header_owner(eb);
10478 int ret = 0;
10480 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10481 key.offset = owner;
10482 key.type = BTRFS_ROOT_ITEM_KEY;
10484 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10485 if (IS_ERR(tree_reloc_root))
10486 return 0;
10488 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10489 ret = 1;
10490 btrfs_free_fs_root(tree_reloc_root);
10491 return ret;
10495 * Check referencer for shared block backref
10496 * If level == -1, this function will resolve the level.
10498 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10499 u64 parent, u64 bytenr, int level)
10501 struct extent_buffer *eb;
10502 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10503 u32 nr;
10504 int found_parent = 0;
10505 int i;
10507 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10508 if (!extent_buffer_uptodate(eb))
10509 goto out;
10511 if (level == -1)
10512 level = query_tree_block_level(fs_info, bytenr);
10513 if (level < 0)
10514 goto out;
10516 /* It's possible it's a tree reloc root */
10517 if (parent == bytenr) {
10518 if (is_tree_reloc_root(fs_info, eb))
10519 found_parent = 1;
10520 goto out;
10523 if (level + 1 != btrfs_header_level(eb))
10524 goto out;
10526 nr = btrfs_header_nritems(eb);
10527 for (i = 0; i < nr; i++) {
10528 if (bytenr == btrfs_node_blockptr(eb, i)) {
10529 found_parent = 1;
10530 break;
10533 out:
10534 free_extent_buffer(eb);
10535 if (!found_parent) {
10536 error(
10537 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10538 bytenr, nodesize, parent, level);
10539 return REFERENCER_MISSING;
10541 return 0;
10545 * Check referencer for normal (inlined) data ref
10546 * If len == 0, it will be resolved by searching in extent tree
10548 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10549 u64 root_id, u64 objectid, u64 offset,
10550 u64 bytenr, u64 len, u32 count)
10552 struct btrfs_root *root;
10553 struct btrfs_root *extent_root = fs_info->extent_root;
10554 struct btrfs_key key;
10555 struct btrfs_path path;
10556 struct extent_buffer *leaf;
10557 struct btrfs_file_extent_item *fi;
10558 u32 found_count = 0;
10559 int slot;
10560 int ret = 0;
10562 if (!len) {
10563 key.objectid = bytenr;
10564 key.type = BTRFS_EXTENT_ITEM_KEY;
10565 key.offset = (u64)-1;
10567 btrfs_init_path(&path);
10568 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10569 if (ret < 0)
10570 goto out;
10571 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10572 if (ret)
10573 goto out;
10574 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10575 if (key.objectid != bytenr ||
10576 key.type != BTRFS_EXTENT_ITEM_KEY)
10577 goto out;
10578 len = key.offset;
10579 btrfs_release_path(&path);
10581 key.objectid = root_id;
10582 key.type = BTRFS_ROOT_ITEM_KEY;
10583 key.offset = (u64)-1;
10584 btrfs_init_path(&path);
10586 root = btrfs_read_fs_root(fs_info, &key);
10587 if (IS_ERR(root))
10588 goto out;
10590 key.objectid = objectid;
10591 key.type = BTRFS_EXTENT_DATA_KEY;
10593 * It can be nasty as data backref offset is
10594 * file offset - file extent offset, which is smaller or
10595 * equal to original backref offset. The only special case is
10596 * overflow. So we need to special check and do further search.
10598 key.offset = offset & (1ULL << 63) ? 0 : offset;
10600 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10601 if (ret < 0)
10602 goto out;
10605 * Search afterwards to get correct one
10606 * NOTE: As we must do a comprehensive check on the data backref to
10607 * make sure the dref count also matches, we must iterate all file
10608 * extents for that inode.
10610 while (1) {
10611 leaf = path.nodes[0];
10612 slot = path.slots[0];
10614 if (slot >= btrfs_header_nritems(leaf))
10615 goto next;
10616 btrfs_item_key_to_cpu(leaf, &key, slot);
10617 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10618 break;
10619 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10621 * Except normal disk bytenr and disk num bytes, we still
10622 * need to do extra check on dbackref offset as
10623 * dbackref offset = file_offset - file_extent_offset
10625 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10626 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10627 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10628 offset)
10629 found_count++;
10631 next:
10632 ret = btrfs_next_item(root, &path);
10633 if (ret)
10634 break;
10636 out:
10637 btrfs_release_path(&path);
10638 if (found_count != count) {
10639 error(
10640 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10641 bytenr, len, root_id, objectid, offset, count, found_count);
10642 return REFERENCER_MISSING;
10644 return 0;
10648 * Check if the referencer of a shared data backref exists
10650 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10651 u64 parent, u64 bytenr)
10653 struct extent_buffer *eb;
10654 struct btrfs_key key;
10655 struct btrfs_file_extent_item *fi;
10656 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10657 u32 nr;
10658 int found_parent = 0;
10659 int i;
10661 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10662 if (!extent_buffer_uptodate(eb))
10663 goto out;
10665 nr = btrfs_header_nritems(eb);
10666 for (i = 0; i < nr; i++) {
10667 btrfs_item_key_to_cpu(eb, &key, i);
10668 if (key.type != BTRFS_EXTENT_DATA_KEY)
10669 continue;
10671 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10672 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10673 continue;
10675 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10676 found_parent = 1;
10677 break;
10681 out:
10682 free_extent_buffer(eb);
10683 if (!found_parent) {
10684 error("shared extent %llu referencer lost (parent: %llu)",
10685 bytenr, parent);
10686 return REFERENCER_MISSING;
10688 return 0;
10692 * This function will check a given extent item, including its backref and
10693 * itself (like crossing stripe boundary and type)
10695 * Since we don't use extent_record anymore, introduce new error bit
10697 static int check_extent_item(struct btrfs_fs_info *fs_info,
10698 struct extent_buffer *eb, int slot)
10700 struct btrfs_extent_item *ei;
10701 struct btrfs_extent_inline_ref *iref;
10702 struct btrfs_extent_data_ref *dref;
10703 unsigned long end;
10704 unsigned long ptr;
10705 int type;
10706 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10707 u32 item_size = btrfs_item_size_nr(eb, slot);
10708 u64 flags;
10709 u64 offset;
10710 int metadata = 0;
10711 int level;
10712 struct btrfs_key key;
10713 int ret;
10714 int err = 0;
10716 btrfs_item_key_to_cpu(eb, &key, slot);
10717 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10718 bytes_used += key.offset;
10719 else
10720 bytes_used += nodesize;
10722 if (item_size < sizeof(*ei)) {
10724 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10725 * old thing when on disk format is still un-determined.
10726 * No need to care about it anymore
10728 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10729 return -ENOTTY;
10732 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10733 flags = btrfs_extent_flags(eb, ei);
10735 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10736 metadata = 1;
10737 if (metadata && check_crossing_stripes(global_info, key.objectid,
10738 eb->len)) {
10739 error("bad metadata [%llu, %llu) crossing stripe boundary",
10740 key.objectid, key.objectid + nodesize);
10741 err |= CROSSING_STRIPE_BOUNDARY;
10744 ptr = (unsigned long)(ei + 1);
10746 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10747 /* Old EXTENT_ITEM metadata */
10748 struct btrfs_tree_block_info *info;
10750 info = (struct btrfs_tree_block_info *)ptr;
10751 level = btrfs_tree_block_level(eb, info);
10752 ptr += sizeof(struct btrfs_tree_block_info);
10753 } else {
10754 /* New METADATA_ITEM */
10755 level = key.offset;
10757 end = (unsigned long)ei + item_size;
10759 next:
10760 /* Reached extent item end normally */
10761 if (ptr == end)
10762 goto out;
10764 /* Beyond extent item end, wrong item size */
10765 if (ptr > end) {
10766 err |= ITEM_SIZE_MISMATCH;
10767 error("extent item at bytenr %llu slot %d has wrong size",
10768 eb->start, slot);
10769 goto out;
10772 /* Now check every backref in this extent item */
10773 iref = (struct btrfs_extent_inline_ref *)ptr;
10774 type = btrfs_extent_inline_ref_type(eb, iref);
10775 offset = btrfs_extent_inline_ref_offset(eb, iref);
10776 switch (type) {
10777 case BTRFS_TREE_BLOCK_REF_KEY:
10778 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10779 level);
10780 err |= ret;
10781 break;
10782 case BTRFS_SHARED_BLOCK_REF_KEY:
10783 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10784 level);
10785 err |= ret;
10786 break;
10787 case BTRFS_EXTENT_DATA_REF_KEY:
10788 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10789 ret = check_extent_data_backref(fs_info,
10790 btrfs_extent_data_ref_root(eb, dref),
10791 btrfs_extent_data_ref_objectid(eb, dref),
10792 btrfs_extent_data_ref_offset(eb, dref),
10793 key.objectid, key.offset,
10794 btrfs_extent_data_ref_count(eb, dref));
10795 err |= ret;
10796 break;
10797 case BTRFS_SHARED_DATA_REF_KEY:
10798 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10799 err |= ret;
10800 break;
10801 default:
10802 error("extent[%llu %d %llu] has unknown ref type: %d",
10803 key.objectid, key.type, key.offset, type);
10804 err |= UNKNOWN_TYPE;
10805 goto out;
10808 ptr += btrfs_extent_inline_ref_size(type);
10809 goto next;
10811 out:
10812 return err;
10816 * Check if a dev extent item is referred correctly by its chunk
10818 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10819 struct extent_buffer *eb, int slot)
10821 struct btrfs_root *chunk_root = fs_info->chunk_root;
10822 struct btrfs_dev_extent *ptr;
10823 struct btrfs_path path;
10824 struct btrfs_key chunk_key;
10825 struct btrfs_key devext_key;
10826 struct btrfs_chunk *chunk;
10827 struct extent_buffer *l;
10828 int num_stripes;
10829 u64 length;
10830 int i;
10831 int found_chunk = 0;
10832 int ret;
10834 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10835 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10836 length = btrfs_dev_extent_length(eb, ptr);
10838 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10839 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10840 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10842 btrfs_init_path(&path);
10843 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10844 if (ret)
10845 goto out;
10847 l = path.nodes[0];
10848 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10849 if (btrfs_chunk_length(l, chunk) != length)
10850 goto out;
10852 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10853 for (i = 0; i < num_stripes; i++) {
10854 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10855 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10857 if (devid == devext_key.objectid &&
10858 offset == devext_key.offset) {
10859 found_chunk = 1;
10860 break;
10863 out:
10864 btrfs_release_path(&path);
10865 if (!found_chunk) {
10866 error(
10867 "device extent[%llu, %llu, %llu] did not find the related chunk",
10868 devext_key.objectid, devext_key.offset, length);
10869 return REFERENCER_MISSING;
10871 return 0;
10875 * Check if the used space is correct with the dev item
10877 static int check_dev_item(struct btrfs_fs_info *fs_info,
10878 struct extent_buffer *eb, int slot)
10880 struct btrfs_root *dev_root = fs_info->dev_root;
10881 struct btrfs_dev_item *dev_item;
10882 struct btrfs_path path;
10883 struct btrfs_key key;
10884 struct btrfs_dev_extent *ptr;
10885 u64 dev_id;
10886 u64 used;
10887 u64 total = 0;
10888 int ret;
10890 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10891 dev_id = btrfs_device_id(eb, dev_item);
10892 used = btrfs_device_bytes_used(eb, dev_item);
10894 key.objectid = dev_id;
10895 key.type = BTRFS_DEV_EXTENT_KEY;
10896 key.offset = 0;
10898 btrfs_init_path(&path);
10899 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10900 if (ret < 0) {
10901 btrfs_item_key_to_cpu(eb, &key, slot);
10902 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10903 key.objectid, key.type, key.offset);
10904 btrfs_release_path(&path);
10905 return REFERENCER_MISSING;
10908 /* Iterate dev_extents to calculate the used space of a device */
10909 while (1) {
10910 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10911 goto next;
10913 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10914 if (key.objectid > dev_id)
10915 break;
10916 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10917 goto next;
10919 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10920 struct btrfs_dev_extent);
10921 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10922 next:
10923 ret = btrfs_next_item(dev_root, &path);
10924 if (ret)
10925 break;
10927 btrfs_release_path(&path);
10929 if (used != total) {
10930 btrfs_item_key_to_cpu(eb, &key, slot);
10931 error(
10932 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10933 total, used, BTRFS_ROOT_TREE_OBJECTID,
10934 BTRFS_DEV_EXTENT_KEY, dev_id);
10935 return ACCOUNTING_MISMATCH;
10937 return 0;
10941 * Check a block group item with its referener (chunk) and its used space
10942 * with extent/metadata item
10944 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10945 struct extent_buffer *eb, int slot)
10947 struct btrfs_root *extent_root = fs_info->extent_root;
10948 struct btrfs_root *chunk_root = fs_info->chunk_root;
10949 struct btrfs_block_group_item *bi;
10950 struct btrfs_block_group_item bg_item;
10951 struct btrfs_path path;
10952 struct btrfs_key bg_key;
10953 struct btrfs_key chunk_key;
10954 struct btrfs_key extent_key;
10955 struct btrfs_chunk *chunk;
10956 struct extent_buffer *leaf;
10957 struct btrfs_extent_item *ei;
10958 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10959 u64 flags;
10960 u64 bg_flags;
10961 u64 used;
10962 u64 total = 0;
10963 int ret;
10964 int err = 0;
10966 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10967 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10968 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10969 used = btrfs_block_group_used(&bg_item);
10970 bg_flags = btrfs_block_group_flags(&bg_item);
10972 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10973 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10974 chunk_key.offset = bg_key.objectid;
10976 btrfs_init_path(&path);
10977 /* Search for the referencer chunk */
10978 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10979 if (ret) {
10980 error(
10981 "block group[%llu %llu] did not find the related chunk item",
10982 bg_key.objectid, bg_key.offset);
10983 err |= REFERENCER_MISSING;
10984 } else {
10985 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10986 struct btrfs_chunk);
10987 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10988 bg_key.offset) {
10989 error(
10990 "block group[%llu %llu] related chunk item length does not match",
10991 bg_key.objectid, bg_key.offset);
10992 err |= REFERENCER_MISMATCH;
10995 btrfs_release_path(&path);
10997 /* Search from the block group bytenr */
10998 extent_key.objectid = bg_key.objectid;
10999 extent_key.type = 0;
11000 extent_key.offset = 0;
11002 btrfs_init_path(&path);
11003 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11004 if (ret < 0)
11005 goto out;
11007 /* Iterate extent tree to account used space */
11008 while (1) {
11009 leaf = path.nodes[0];
11011 /* Search slot can point to the last item beyond leaf nritems */
11012 if (path.slots[0] >= btrfs_header_nritems(leaf))
11013 goto next;
11015 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11016 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11017 break;
11019 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11020 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11021 goto next;
11022 if (extent_key.objectid < bg_key.objectid)
11023 goto next;
11025 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11026 total += nodesize;
11027 else
11028 total += extent_key.offset;
11030 ei = btrfs_item_ptr(leaf, path.slots[0],
11031 struct btrfs_extent_item);
11032 flags = btrfs_extent_flags(leaf, ei);
11033 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11034 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11035 error(
11036 "bad extent[%llu, %llu) type mismatch with chunk",
11037 extent_key.objectid,
11038 extent_key.objectid + extent_key.offset);
11039 err |= CHUNK_TYPE_MISMATCH;
11041 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11042 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11043 BTRFS_BLOCK_GROUP_METADATA))) {
11044 error(
11045 "bad extent[%llu, %llu) type mismatch with chunk",
11046 extent_key.objectid,
11047 extent_key.objectid + nodesize);
11048 err |= CHUNK_TYPE_MISMATCH;
11051 next:
11052 ret = btrfs_next_item(extent_root, &path);
11053 if (ret)
11054 break;
11057 out:
11058 btrfs_release_path(&path);
11060 if (total != used) {
11061 error(
11062 "block group[%llu %llu] used %llu but extent items used %llu",
11063 bg_key.objectid, bg_key.offset, used, total);
11064 err |= ACCOUNTING_MISMATCH;
11066 return err;
11070 * Check a chunk item.
11071 * Including checking all referred dev_extents and block group
11073 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11074 struct extent_buffer *eb, int slot)
11076 struct btrfs_root *extent_root = fs_info->extent_root;
11077 struct btrfs_root *dev_root = fs_info->dev_root;
11078 struct btrfs_path path;
11079 struct btrfs_key chunk_key;
11080 struct btrfs_key bg_key;
11081 struct btrfs_key devext_key;
11082 struct btrfs_chunk *chunk;
11083 struct extent_buffer *leaf;
11084 struct btrfs_block_group_item *bi;
11085 struct btrfs_block_group_item bg_item;
11086 struct btrfs_dev_extent *ptr;
11087 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11088 u64 length;
11089 u64 chunk_end;
11090 u64 type;
11091 u64 profile;
11092 int num_stripes;
11093 u64 offset;
11094 u64 objectid;
11095 int i;
11096 int ret;
11097 int err = 0;
11099 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11100 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11101 length = btrfs_chunk_length(eb, chunk);
11102 chunk_end = chunk_key.offset + length;
11103 if (!IS_ALIGNED(length, sectorsize)) {
11104 error("chunk[%llu %llu) not aligned to %u",
11105 chunk_key.offset, chunk_end, sectorsize);
11106 err |= BYTES_UNALIGNED;
11107 goto out;
11110 type = btrfs_chunk_type(eb, chunk);
11111 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11112 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11113 error("chunk[%llu %llu) has no chunk type",
11114 chunk_key.offset, chunk_end);
11115 err |= UNKNOWN_TYPE;
11117 if (profile && (profile & (profile - 1))) {
11118 error("chunk[%llu %llu) multiple profiles detected: %llx",
11119 chunk_key.offset, chunk_end, profile);
11120 err |= UNKNOWN_TYPE;
11123 bg_key.objectid = chunk_key.offset;
11124 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11125 bg_key.offset = length;
11127 btrfs_init_path(&path);
11128 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11129 if (ret) {
11130 error(
11131 "chunk[%llu %llu) did not find the related block group item",
11132 chunk_key.offset, chunk_end);
11133 err |= REFERENCER_MISSING;
11134 } else{
11135 leaf = path.nodes[0];
11136 bi = btrfs_item_ptr(leaf, path.slots[0],
11137 struct btrfs_block_group_item);
11138 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11139 sizeof(bg_item));
11140 if (btrfs_block_group_flags(&bg_item) != type) {
11141 error(
11142 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11143 chunk_key.offset, chunk_end, type,
11144 btrfs_block_group_flags(&bg_item));
11145 err |= REFERENCER_MISSING;
11149 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11150 for (i = 0; i < num_stripes; i++) {
11151 btrfs_release_path(&path);
11152 btrfs_init_path(&path);
11153 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11154 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11155 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11157 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11158 0, 0);
11159 if (ret)
11160 goto not_match_dev;
11162 leaf = path.nodes[0];
11163 ptr = btrfs_item_ptr(leaf, path.slots[0],
11164 struct btrfs_dev_extent);
11165 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11166 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11167 if (objectid != chunk_key.objectid ||
11168 offset != chunk_key.offset ||
11169 btrfs_dev_extent_length(leaf, ptr) != length)
11170 goto not_match_dev;
11171 continue;
11172 not_match_dev:
11173 err |= BACKREF_MISSING;
11174 error(
11175 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11176 chunk_key.objectid, chunk_end, i);
11177 continue;
11179 btrfs_release_path(&path);
11180 out:
11181 return err;
11185 * Main entry function to check known items and update related accounting info
11187 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11189 struct btrfs_fs_info *fs_info = root->fs_info;
11190 struct btrfs_key key;
11191 int slot = 0;
11192 int type;
11193 struct btrfs_extent_data_ref *dref;
11194 int ret;
11195 int err = 0;
11197 next:
11198 btrfs_item_key_to_cpu(eb, &key, slot);
11199 type = key.type;
11201 switch (type) {
11202 case BTRFS_EXTENT_DATA_KEY:
11203 ret = check_extent_data_item(root, eb, slot);
11204 err |= ret;
11205 break;
11206 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11207 ret = check_block_group_item(fs_info, eb, slot);
11208 err |= ret;
11209 break;
11210 case BTRFS_DEV_ITEM_KEY:
11211 ret = check_dev_item(fs_info, eb, slot);
11212 err |= ret;
11213 break;
11214 case BTRFS_CHUNK_ITEM_KEY:
11215 ret = check_chunk_item(fs_info, eb, slot);
11216 err |= ret;
11217 break;
11218 case BTRFS_DEV_EXTENT_KEY:
11219 ret = check_dev_extent_item(fs_info, eb, slot);
11220 err |= ret;
11221 break;
11222 case BTRFS_EXTENT_ITEM_KEY:
11223 case BTRFS_METADATA_ITEM_KEY:
11224 ret = check_extent_item(fs_info, eb, slot);
11225 err |= ret;
11226 break;
11227 case BTRFS_EXTENT_CSUM_KEY:
11228 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11229 break;
11230 case BTRFS_TREE_BLOCK_REF_KEY:
11231 ret = check_tree_block_backref(fs_info, key.offset,
11232 key.objectid, -1);
11233 err |= ret;
11234 break;
11235 case BTRFS_EXTENT_DATA_REF_KEY:
11236 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11237 ret = check_extent_data_backref(fs_info,
11238 btrfs_extent_data_ref_root(eb, dref),
11239 btrfs_extent_data_ref_objectid(eb, dref),
11240 btrfs_extent_data_ref_offset(eb, dref),
11241 key.objectid, 0,
11242 btrfs_extent_data_ref_count(eb, dref));
11243 err |= ret;
11244 break;
11245 case BTRFS_SHARED_BLOCK_REF_KEY:
11246 ret = check_shared_block_backref(fs_info, key.offset,
11247 key.objectid, -1);
11248 err |= ret;
11249 break;
11250 case BTRFS_SHARED_DATA_REF_KEY:
11251 ret = check_shared_data_backref(fs_info, key.offset,
11252 key.objectid);
11253 err |= ret;
11254 break;
11255 default:
11256 break;
11259 if (++slot < btrfs_header_nritems(eb))
11260 goto next;
11262 return err;
11266 * Helper function for later fs/subvol tree check. To determine if a tree
11267 * block should be checked.
11268 * This function will ensure only the direct referencer with lowest rootid to
11269 * check a fs/subvolume tree block.
11271 * Backref check at extent tree would detect errors like missing subvolume
11272 * tree, so we can do aggressive check to reduce duplicated checks.
11274 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11276 struct btrfs_root *extent_root = root->fs_info->extent_root;
11277 struct btrfs_key key;
11278 struct btrfs_path path;
11279 struct extent_buffer *leaf;
11280 int slot;
11281 struct btrfs_extent_item *ei;
11282 unsigned long ptr;
11283 unsigned long end;
11284 int type;
11285 u32 item_size;
11286 u64 offset;
11287 struct btrfs_extent_inline_ref *iref;
11288 int ret;
11290 btrfs_init_path(&path);
11291 key.objectid = btrfs_header_bytenr(eb);
11292 key.type = BTRFS_METADATA_ITEM_KEY;
11293 key.offset = (u64)-1;
11296 * Any failure in backref resolving means we can't determine
11297 * whom the tree block belongs to.
11298 * So in that case, we need to check that tree block
11300 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11301 if (ret < 0)
11302 goto need_check;
11304 ret = btrfs_previous_extent_item(extent_root, &path,
11305 btrfs_header_bytenr(eb));
11306 if (ret)
11307 goto need_check;
11309 leaf = path.nodes[0];
11310 slot = path.slots[0];
11311 btrfs_item_key_to_cpu(leaf, &key, slot);
11312 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11314 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11315 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11316 } else {
11317 struct btrfs_tree_block_info *info;
11319 info = (struct btrfs_tree_block_info *)(ei + 1);
11320 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11323 item_size = btrfs_item_size_nr(leaf, slot);
11324 ptr = (unsigned long)iref;
11325 end = (unsigned long)ei + item_size;
11326 while (ptr < end) {
11327 iref = (struct btrfs_extent_inline_ref *)ptr;
11328 type = btrfs_extent_inline_ref_type(leaf, iref);
11329 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11332 * We only check the tree block if current root is
11333 * the lowest referencer of it.
11335 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11336 offset < root->objectid) {
11337 btrfs_release_path(&path);
11338 return 0;
11341 ptr += btrfs_extent_inline_ref_size(type);
11344 * Normally we should also check keyed tree block ref, but that may be
11345 * very time consuming. Inlined ref should already make us skip a lot
11346 * of refs now. So skip search keyed tree block ref.
11349 need_check:
11350 btrfs_release_path(&path);
11351 return 1;
11355 * Traversal function for tree block. We will do:
11356 * 1) Skip shared fs/subvolume tree blocks
11357 * 2) Update related bytes accounting
11358 * 3) Pre-order traversal
11360 static int traverse_tree_block(struct btrfs_root *root,
11361 struct extent_buffer *node)
11363 struct extent_buffer *eb;
11364 struct btrfs_key key;
11365 struct btrfs_key drop_key;
11366 int level;
11367 u64 nr;
11368 int i;
11369 int err = 0;
11370 int ret;
11373 * Skip shared fs/subvolume tree block, in that case they will
11374 * be checked by referencer with lowest rootid
11376 if (is_fstree(root->objectid) && !should_check(root, node))
11377 return 0;
11379 /* Update bytes accounting */
11380 total_btree_bytes += node->len;
11381 if (fs_root_objectid(btrfs_header_owner(node)))
11382 total_fs_tree_bytes += node->len;
11383 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11384 total_extent_tree_bytes += node->len;
11385 if (!found_old_backref &&
11386 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11387 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11388 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11389 found_old_backref = 1;
11391 /* pre-order tranversal, check itself first */
11392 level = btrfs_header_level(node);
11393 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11394 btrfs_header_level(node),
11395 btrfs_header_owner(node));
11396 err |= ret;
11397 if (err)
11398 error(
11399 "check %s failed root %llu bytenr %llu level %d, force continue check",
11400 level ? "node":"leaf", root->objectid,
11401 btrfs_header_bytenr(node), btrfs_header_level(node));
11403 if (!level) {
11404 btree_space_waste += btrfs_leaf_free_space(root, node);
11405 ret = check_leaf_items(root, node);
11406 err |= ret;
11407 return err;
11410 nr = btrfs_header_nritems(node);
11411 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11412 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11413 sizeof(struct btrfs_key_ptr);
11415 /* Then check all its children */
11416 for (i = 0; i < nr; i++) {
11417 u64 blocknr = btrfs_node_blockptr(node, i);
11419 btrfs_node_key_to_cpu(node, &key, i);
11420 if (level == root->root_item.drop_level &&
11421 is_dropped_key(&key, &drop_key))
11422 continue;
11425 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11426 * to call the function itself.
11428 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11429 if (extent_buffer_uptodate(eb)) {
11430 ret = traverse_tree_block(root, eb);
11431 err |= ret;
11433 free_extent_buffer(eb);
11436 return err;
11440 * Low memory usage version check_chunks_and_extents.
11442 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11444 struct btrfs_path path;
11445 struct btrfs_key key;
11446 struct btrfs_root *root1;
11447 struct btrfs_root *cur_root;
11448 int err = 0;
11449 int ret;
11451 root1 = root->fs_info->chunk_root;
11452 ret = traverse_tree_block(root1, root1->node);
11453 err |= ret;
11455 root1 = root->fs_info->tree_root;
11456 ret = traverse_tree_block(root1, root1->node);
11457 err |= ret;
11459 btrfs_init_path(&path);
11460 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11461 key.offset = 0;
11462 key.type = BTRFS_ROOT_ITEM_KEY;
11464 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11465 if (ret) {
11466 error("cannot find extent treet in tree_root");
11467 goto out;
11470 while (1) {
11471 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11472 if (key.type != BTRFS_ROOT_ITEM_KEY)
11473 goto next;
11474 key.offset = (u64)-1;
11476 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11477 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11478 &key);
11479 else
11480 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11481 if (IS_ERR(cur_root) || !cur_root) {
11482 error("failed to read tree: %lld", key.objectid);
11483 goto next;
11486 ret = traverse_tree_block(cur_root, cur_root->node);
11487 err |= ret;
11489 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11490 btrfs_free_fs_root(cur_root);
11491 next:
11492 ret = btrfs_next_item(root1, &path);
11493 if (ret)
11494 goto out;
11497 out:
11498 btrfs_release_path(&path);
11499 return err;
11502 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11503 struct btrfs_root *root, int overwrite)
11505 struct extent_buffer *c;
11506 struct extent_buffer *old = root->node;
11507 int level;
11508 int ret;
11509 struct btrfs_disk_key disk_key = {0,0,0};
11511 level = 0;
11513 if (overwrite) {
11514 c = old;
11515 extent_buffer_get(c);
11516 goto init;
11518 c = btrfs_alloc_free_block(trans, root,
11519 root->nodesize,
11520 root->root_key.objectid,
11521 &disk_key, level, 0, 0);
11522 if (IS_ERR(c)) {
11523 c = old;
11524 extent_buffer_get(c);
11525 overwrite = 1;
11527 init:
11528 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11529 btrfs_set_header_level(c, level);
11530 btrfs_set_header_bytenr(c, c->start);
11531 btrfs_set_header_generation(c, trans->transid);
11532 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11533 btrfs_set_header_owner(c, root->root_key.objectid);
11535 write_extent_buffer(c, root->fs_info->fsid,
11536 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11538 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11539 btrfs_header_chunk_tree_uuid(c),
11540 BTRFS_UUID_SIZE);
11542 btrfs_mark_buffer_dirty(c);
11544 * this case can happen in the following case:
11546 * 1.overwrite previous root.
11548 * 2.reinit reloc data root, this is because we skip pin
11549 * down reloc data tree before which means we can allocate
11550 * same block bytenr here.
11552 if (old->start == c->start) {
11553 btrfs_set_root_generation(&root->root_item,
11554 trans->transid);
11555 root->root_item.level = btrfs_header_level(root->node);
11556 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11557 &root->root_key, &root->root_item);
11558 if (ret) {
11559 free_extent_buffer(c);
11560 return ret;
11563 free_extent_buffer(old);
11564 root->node = c;
11565 add_root_to_dirty_list(root);
11566 return 0;
11569 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11570 struct extent_buffer *eb, int tree_root)
11572 struct extent_buffer *tmp;
11573 struct btrfs_root_item *ri;
11574 struct btrfs_key key;
11575 u64 bytenr;
11576 u32 nodesize;
11577 int level = btrfs_header_level(eb);
11578 int nritems;
11579 int ret;
11580 int i;
11583 * If we have pinned this block before, don't pin it again.
11584 * This can not only avoid forever loop with broken filesystem
11585 * but also give us some speedups.
11587 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11588 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11589 return 0;
11591 btrfs_pin_extent(fs_info, eb->start, eb->len);
11593 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11594 nritems = btrfs_header_nritems(eb);
11595 for (i = 0; i < nritems; i++) {
11596 if (level == 0) {
11597 btrfs_item_key_to_cpu(eb, &key, i);
11598 if (key.type != BTRFS_ROOT_ITEM_KEY)
11599 continue;
11600 /* Skip the extent root and reloc roots */
11601 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11602 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11603 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11604 continue;
11605 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11606 bytenr = btrfs_disk_root_bytenr(eb, ri);
11609 * If at any point we start needing the real root we
11610 * will have to build a stump root for the root we are
11611 * in, but for now this doesn't actually use the root so
11612 * just pass in extent_root.
11614 tmp = read_tree_block(fs_info->extent_root, bytenr,
11615 nodesize, 0);
11616 if (!extent_buffer_uptodate(tmp)) {
11617 fprintf(stderr, "Error reading root block\n");
11618 return -EIO;
11620 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11621 free_extent_buffer(tmp);
11622 if (ret)
11623 return ret;
11624 } else {
11625 bytenr = btrfs_node_blockptr(eb, i);
11627 /* If we aren't the tree root don't read the block */
11628 if (level == 1 && !tree_root) {
11629 btrfs_pin_extent(fs_info, bytenr, nodesize);
11630 continue;
11633 tmp = read_tree_block(fs_info->extent_root, bytenr,
11634 nodesize, 0);
11635 if (!extent_buffer_uptodate(tmp)) {
11636 fprintf(stderr, "Error reading tree block\n");
11637 return -EIO;
11639 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11640 free_extent_buffer(tmp);
11641 if (ret)
11642 return ret;
11646 return 0;
11649 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11651 int ret;
11653 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11654 if (ret)
11655 return ret;
11657 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11660 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11662 struct btrfs_block_group_cache *cache;
11663 struct btrfs_path path;
11664 struct extent_buffer *leaf;
11665 struct btrfs_chunk *chunk;
11666 struct btrfs_key key;
11667 int ret;
11668 u64 start;
11670 btrfs_init_path(&path);
11671 key.objectid = 0;
11672 key.type = BTRFS_CHUNK_ITEM_KEY;
11673 key.offset = 0;
11674 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11675 if (ret < 0) {
11676 btrfs_release_path(&path);
11677 return ret;
11681 * We do this in case the block groups were screwed up and had alloc
11682 * bits that aren't actually set on the chunks. This happens with
11683 * restored images every time and could happen in real life I guess.
11685 fs_info->avail_data_alloc_bits = 0;
11686 fs_info->avail_metadata_alloc_bits = 0;
11687 fs_info->avail_system_alloc_bits = 0;
11689 /* First we need to create the in-memory block groups */
11690 while (1) {
11691 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11692 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11693 if (ret < 0) {
11694 btrfs_release_path(&path);
11695 return ret;
11697 if (ret) {
11698 ret = 0;
11699 break;
11702 leaf = path.nodes[0];
11703 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11704 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11705 path.slots[0]++;
11706 continue;
11709 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11710 btrfs_add_block_group(fs_info, 0,
11711 btrfs_chunk_type(leaf, chunk),
11712 key.objectid, key.offset,
11713 btrfs_chunk_length(leaf, chunk));
11714 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11715 key.offset + btrfs_chunk_length(leaf, chunk));
11716 path.slots[0]++;
11718 start = 0;
11719 while (1) {
11720 cache = btrfs_lookup_first_block_group(fs_info, start);
11721 if (!cache)
11722 break;
11723 cache->cached = 1;
11724 start = cache->key.objectid + cache->key.offset;
11727 btrfs_release_path(&path);
11728 return 0;
11731 static int reset_balance(struct btrfs_trans_handle *trans,
11732 struct btrfs_fs_info *fs_info)
11734 struct btrfs_root *root = fs_info->tree_root;
11735 struct btrfs_path path;
11736 struct extent_buffer *leaf;
11737 struct btrfs_key key;
11738 int del_slot, del_nr = 0;
11739 int ret;
11740 int found = 0;
11742 btrfs_init_path(&path);
11743 key.objectid = BTRFS_BALANCE_OBJECTID;
11744 key.type = BTRFS_BALANCE_ITEM_KEY;
11745 key.offset = 0;
11746 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11747 if (ret) {
11748 if (ret > 0)
11749 ret = 0;
11750 if (!ret)
11751 goto reinit_data_reloc;
11752 else
11753 goto out;
11756 ret = btrfs_del_item(trans, root, &path);
11757 if (ret)
11758 goto out;
11759 btrfs_release_path(&path);
11761 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11762 key.type = BTRFS_ROOT_ITEM_KEY;
11763 key.offset = 0;
11764 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11765 if (ret < 0)
11766 goto out;
11767 while (1) {
11768 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11769 if (!found)
11770 break;
11772 if (del_nr) {
11773 ret = btrfs_del_items(trans, root, &path,
11774 del_slot, del_nr);
11775 del_nr = 0;
11776 if (ret)
11777 goto out;
11779 key.offset++;
11780 btrfs_release_path(&path);
11782 found = 0;
11783 ret = btrfs_search_slot(trans, root, &key, &path,
11784 -1, 1);
11785 if (ret < 0)
11786 goto out;
11787 continue;
11789 found = 1;
11790 leaf = path.nodes[0];
11791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11792 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11793 break;
11794 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11795 path.slots[0]++;
11796 continue;
11798 if (!del_nr) {
11799 del_slot = path.slots[0];
11800 del_nr = 1;
11801 } else {
11802 del_nr++;
11804 path.slots[0]++;
11807 if (del_nr) {
11808 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11809 if (ret)
11810 goto out;
11812 btrfs_release_path(&path);
11814 reinit_data_reloc:
11815 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11816 key.type = BTRFS_ROOT_ITEM_KEY;
11817 key.offset = (u64)-1;
11818 root = btrfs_read_fs_root(fs_info, &key);
11819 if (IS_ERR(root)) {
11820 fprintf(stderr, "Error reading data reloc tree\n");
11821 ret = PTR_ERR(root);
11822 goto out;
11824 record_root_in_trans(trans, root);
11825 ret = btrfs_fsck_reinit_root(trans, root, 0);
11826 if (ret)
11827 goto out;
11828 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11829 out:
11830 btrfs_release_path(&path);
11831 return ret;
11834 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11835 struct btrfs_fs_info *fs_info)
11837 u64 start = 0;
11838 int ret;
11841 * The only reason we don't do this is because right now we're just
11842 * walking the trees we find and pinning down their bytes, we don't look
11843 * at any of the leaves. In order to do mixed groups we'd have to check
11844 * the leaves of any fs roots and pin down the bytes for any file
11845 * extents we find. Not hard but why do it if we don't have to?
11847 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11848 fprintf(stderr, "We don't support re-initing the extent tree "
11849 "for mixed block groups yet, please notify a btrfs "
11850 "developer you want to do this so they can add this "
11851 "functionality.\n");
11852 return -EINVAL;
11856 * first we need to walk all of the trees except the extent tree and pin
11857 * down the bytes that are in use so we don't overwrite any existing
11858 * metadata.
11860 ret = pin_metadata_blocks(fs_info);
11861 if (ret) {
11862 fprintf(stderr, "error pinning down used bytes\n");
11863 return ret;
11867 * Need to drop all the block groups since we're going to recreate all
11868 * of them again.
11870 btrfs_free_block_groups(fs_info);
11871 ret = reset_block_groups(fs_info);
11872 if (ret) {
11873 fprintf(stderr, "error resetting the block groups\n");
11874 return ret;
11877 /* Ok we can allocate now, reinit the extent root */
11878 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11879 if (ret) {
11880 fprintf(stderr, "extent root initialization failed\n");
11882 * When the transaction code is updated we should end the
11883 * transaction, but for now progs only knows about commit so
11884 * just return an error.
11886 return ret;
11890 * Now we have all the in-memory block groups setup so we can make
11891 * allocations properly, and the metadata we care about is safe since we
11892 * pinned all of it above.
11894 while (1) {
11895 struct btrfs_block_group_cache *cache;
11897 cache = btrfs_lookup_first_block_group(fs_info, start);
11898 if (!cache)
11899 break;
11900 start = cache->key.objectid + cache->key.offset;
11901 ret = btrfs_insert_item(trans, fs_info->extent_root,
11902 &cache->key, &cache->item,
11903 sizeof(cache->item));
11904 if (ret) {
11905 fprintf(stderr, "Error adding block group\n");
11906 return ret;
11908 btrfs_extent_post_op(trans, fs_info->extent_root);
11911 ret = reset_balance(trans, fs_info);
11912 if (ret)
11913 fprintf(stderr, "error resetting the pending balance\n");
11915 return ret;
11918 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11920 struct btrfs_path path;
11921 struct btrfs_trans_handle *trans;
11922 struct btrfs_key key;
11923 int ret;
11925 printf("Recowing metadata block %llu\n", eb->start);
11926 key.objectid = btrfs_header_owner(eb);
11927 key.type = BTRFS_ROOT_ITEM_KEY;
11928 key.offset = (u64)-1;
11930 root = btrfs_read_fs_root(root->fs_info, &key);
11931 if (IS_ERR(root)) {
11932 fprintf(stderr, "Couldn't find owner root %llu\n",
11933 key.objectid);
11934 return PTR_ERR(root);
11937 trans = btrfs_start_transaction(root, 1);
11938 if (IS_ERR(trans))
11939 return PTR_ERR(trans);
11941 btrfs_init_path(&path);
11942 path.lowest_level = btrfs_header_level(eb);
11943 if (path.lowest_level)
11944 btrfs_node_key_to_cpu(eb, &key, 0);
11945 else
11946 btrfs_item_key_to_cpu(eb, &key, 0);
11948 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11949 btrfs_commit_transaction(trans, root);
11950 btrfs_release_path(&path);
11951 return ret;
11954 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11956 struct btrfs_path path;
11957 struct btrfs_trans_handle *trans;
11958 struct btrfs_key key;
11959 int ret;
11961 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11962 bad->key.type, bad->key.offset);
11963 key.objectid = bad->root_id;
11964 key.type = BTRFS_ROOT_ITEM_KEY;
11965 key.offset = (u64)-1;
11967 root = btrfs_read_fs_root(root->fs_info, &key);
11968 if (IS_ERR(root)) {
11969 fprintf(stderr, "Couldn't find owner root %llu\n",
11970 key.objectid);
11971 return PTR_ERR(root);
11974 trans = btrfs_start_transaction(root, 1);
11975 if (IS_ERR(trans))
11976 return PTR_ERR(trans);
11978 btrfs_init_path(&path);
11979 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11980 if (ret) {
11981 if (ret > 0)
11982 ret = 0;
11983 goto out;
11985 ret = btrfs_del_item(trans, root, &path);
11986 out:
11987 btrfs_commit_transaction(trans, root);
11988 btrfs_release_path(&path);
11989 return ret;
11992 static int zero_log_tree(struct btrfs_root *root)
11994 struct btrfs_trans_handle *trans;
11995 int ret;
11997 trans = btrfs_start_transaction(root, 1);
11998 if (IS_ERR(trans)) {
11999 ret = PTR_ERR(trans);
12000 return ret;
12002 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12003 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12004 ret = btrfs_commit_transaction(trans, root);
12005 return ret;
12008 static int populate_csum(struct btrfs_trans_handle *trans,
12009 struct btrfs_root *csum_root, char *buf, u64 start,
12010 u64 len)
12012 u64 offset = 0;
12013 u64 sectorsize;
12014 int ret = 0;
12016 while (offset < len) {
12017 sectorsize = csum_root->sectorsize;
12018 ret = read_extent_data(csum_root, buf, start + offset,
12019 &sectorsize, 0);
12020 if (ret)
12021 break;
12022 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12023 start + offset, buf, sectorsize);
12024 if (ret)
12025 break;
12026 offset += sectorsize;
12028 return ret;
12031 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12032 struct btrfs_root *csum_root,
12033 struct btrfs_root *cur_root)
12035 struct btrfs_path path;
12036 struct btrfs_key key;
12037 struct extent_buffer *node;
12038 struct btrfs_file_extent_item *fi;
12039 char *buf = NULL;
12040 u64 start = 0;
12041 u64 len = 0;
12042 int slot = 0;
12043 int ret = 0;
12045 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12046 if (!buf)
12047 return -ENOMEM;
12049 btrfs_init_path(&path);
12050 key.objectid = 0;
12051 key.offset = 0;
12052 key.type = 0;
12053 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12054 if (ret < 0)
12055 goto out;
12056 /* Iterate all regular file extents and fill its csum */
12057 while (1) {
12058 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12060 if (key.type != BTRFS_EXTENT_DATA_KEY)
12061 goto next;
12062 node = path.nodes[0];
12063 slot = path.slots[0];
12064 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12065 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12066 goto next;
12067 start = btrfs_file_extent_disk_bytenr(node, fi);
12068 len = btrfs_file_extent_disk_num_bytes(node, fi);
12070 ret = populate_csum(trans, csum_root, buf, start, len);
12071 if (ret == -EEXIST)
12072 ret = 0;
12073 if (ret < 0)
12074 goto out;
12075 next:
12077 * TODO: if next leaf is corrupted, jump to nearest next valid
12078 * leaf.
12080 ret = btrfs_next_item(cur_root, &path);
12081 if (ret < 0)
12082 goto out;
12083 if (ret > 0) {
12084 ret = 0;
12085 goto out;
12089 out:
12090 btrfs_release_path(&path);
12091 free(buf);
12092 return ret;
12095 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12096 struct btrfs_root *csum_root)
12098 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12099 struct btrfs_path path;
12100 struct btrfs_root *tree_root = fs_info->tree_root;
12101 struct btrfs_root *cur_root;
12102 struct extent_buffer *node;
12103 struct btrfs_key key;
12104 int slot = 0;
12105 int ret = 0;
12107 btrfs_init_path(&path);
12108 key.objectid = BTRFS_FS_TREE_OBJECTID;
12109 key.offset = 0;
12110 key.type = BTRFS_ROOT_ITEM_KEY;
12111 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12112 if (ret < 0)
12113 goto out;
12114 if (ret > 0) {
12115 ret = -ENOENT;
12116 goto out;
12119 while (1) {
12120 node = path.nodes[0];
12121 slot = path.slots[0];
12122 btrfs_item_key_to_cpu(node, &key, slot);
12123 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12124 goto out;
12125 if (key.type != BTRFS_ROOT_ITEM_KEY)
12126 goto next;
12127 if (!is_fstree(key.objectid))
12128 goto next;
12129 key.offset = (u64)-1;
12131 cur_root = btrfs_read_fs_root(fs_info, &key);
12132 if (IS_ERR(cur_root) || !cur_root) {
12133 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12134 key.objectid);
12135 goto out;
12137 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12138 cur_root);
12139 if (ret < 0)
12140 goto out;
12141 next:
12142 ret = btrfs_next_item(tree_root, &path);
12143 if (ret > 0) {
12144 ret = 0;
12145 goto out;
12147 if (ret < 0)
12148 goto out;
12151 out:
12152 btrfs_release_path(&path);
12153 return ret;
12156 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12157 struct btrfs_root *csum_root)
12159 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12160 struct btrfs_path path;
12161 struct btrfs_extent_item *ei;
12162 struct extent_buffer *leaf;
12163 char *buf;
12164 struct btrfs_key key;
12165 int ret;
12167 btrfs_init_path(&path);
12168 key.objectid = 0;
12169 key.type = BTRFS_EXTENT_ITEM_KEY;
12170 key.offset = 0;
12171 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12172 if (ret < 0) {
12173 btrfs_release_path(&path);
12174 return ret;
12177 buf = malloc(csum_root->sectorsize);
12178 if (!buf) {
12179 btrfs_release_path(&path);
12180 return -ENOMEM;
12183 while (1) {
12184 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12185 ret = btrfs_next_leaf(extent_root, &path);
12186 if (ret < 0)
12187 break;
12188 if (ret) {
12189 ret = 0;
12190 break;
12193 leaf = path.nodes[0];
12195 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12196 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12197 path.slots[0]++;
12198 continue;
12201 ei = btrfs_item_ptr(leaf, path.slots[0],
12202 struct btrfs_extent_item);
12203 if (!(btrfs_extent_flags(leaf, ei) &
12204 BTRFS_EXTENT_FLAG_DATA)) {
12205 path.slots[0]++;
12206 continue;
12209 ret = populate_csum(trans, csum_root, buf, key.objectid,
12210 key.offset);
12211 if (ret)
12212 break;
12213 path.slots[0]++;
12216 btrfs_release_path(&path);
12217 free(buf);
12218 return ret;
12222 * Recalculate the csum and put it into the csum tree.
12224 * Extent tree init will wipe out all the extent info, so in that case, we
12225 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12226 * will use fs/subvol trees to init the csum tree.
12228 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12229 struct btrfs_root *csum_root,
12230 int search_fs_tree)
12232 if (search_fs_tree)
12233 return fill_csum_tree_from_fs(trans, csum_root);
12234 else
12235 return fill_csum_tree_from_extent(trans, csum_root);
12238 static void free_roots_info_cache(void)
12240 if (!roots_info_cache)
12241 return;
12243 while (!cache_tree_empty(roots_info_cache)) {
12244 struct cache_extent *entry;
12245 struct root_item_info *rii;
12247 entry = first_cache_extent(roots_info_cache);
12248 if (!entry)
12249 break;
12250 remove_cache_extent(roots_info_cache, entry);
12251 rii = container_of(entry, struct root_item_info, cache_extent);
12252 free(rii);
12255 free(roots_info_cache);
12256 roots_info_cache = NULL;
12259 static int build_roots_info_cache(struct btrfs_fs_info *info)
12261 int ret = 0;
12262 struct btrfs_key key;
12263 struct extent_buffer *leaf;
12264 struct btrfs_path path;
12266 if (!roots_info_cache) {
12267 roots_info_cache = malloc(sizeof(*roots_info_cache));
12268 if (!roots_info_cache)
12269 return -ENOMEM;
12270 cache_tree_init(roots_info_cache);
12273 btrfs_init_path(&path);
12274 key.objectid = 0;
12275 key.type = BTRFS_EXTENT_ITEM_KEY;
12276 key.offset = 0;
12277 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12278 if (ret < 0)
12279 goto out;
12280 leaf = path.nodes[0];
12282 while (1) {
12283 struct btrfs_key found_key;
12284 struct btrfs_extent_item *ei;
12285 struct btrfs_extent_inline_ref *iref;
12286 int slot = path.slots[0];
12287 int type;
12288 u64 flags;
12289 u64 root_id;
12290 u8 level;
12291 struct cache_extent *entry;
12292 struct root_item_info *rii;
12294 if (slot >= btrfs_header_nritems(leaf)) {
12295 ret = btrfs_next_leaf(info->extent_root, &path);
12296 if (ret < 0) {
12297 break;
12298 } else if (ret) {
12299 ret = 0;
12300 break;
12302 leaf = path.nodes[0];
12303 slot = path.slots[0];
12306 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12308 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12309 found_key.type != BTRFS_METADATA_ITEM_KEY)
12310 goto next;
12312 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12313 flags = btrfs_extent_flags(leaf, ei);
12315 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12316 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12317 goto next;
12319 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12320 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12321 level = found_key.offset;
12322 } else {
12323 struct btrfs_tree_block_info *binfo;
12325 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12326 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12327 level = btrfs_tree_block_level(leaf, binfo);
12331 * For a root extent, it must be of the following type and the
12332 * first (and only one) iref in the item.
12334 type = btrfs_extent_inline_ref_type(leaf, iref);
12335 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12336 goto next;
12338 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12339 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12340 if (!entry) {
12341 rii = malloc(sizeof(struct root_item_info));
12342 if (!rii) {
12343 ret = -ENOMEM;
12344 goto out;
12346 rii->cache_extent.start = root_id;
12347 rii->cache_extent.size = 1;
12348 rii->level = (u8)-1;
12349 entry = &rii->cache_extent;
12350 ret = insert_cache_extent(roots_info_cache, entry);
12351 ASSERT(ret == 0);
12352 } else {
12353 rii = container_of(entry, struct root_item_info,
12354 cache_extent);
12357 ASSERT(rii->cache_extent.start == root_id);
12358 ASSERT(rii->cache_extent.size == 1);
12360 if (level > rii->level || rii->level == (u8)-1) {
12361 rii->level = level;
12362 rii->bytenr = found_key.objectid;
12363 rii->gen = btrfs_extent_generation(leaf, ei);
12364 rii->node_count = 1;
12365 } else if (level == rii->level) {
12366 rii->node_count++;
12368 next:
12369 path.slots[0]++;
12372 out:
12373 btrfs_release_path(&path);
12375 return ret;
12378 static int maybe_repair_root_item(struct btrfs_path *path,
12379 const struct btrfs_key *root_key,
12380 const int read_only_mode)
12382 const u64 root_id = root_key->objectid;
12383 struct cache_extent *entry;
12384 struct root_item_info *rii;
12385 struct btrfs_root_item ri;
12386 unsigned long offset;
12388 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12389 if (!entry) {
12390 fprintf(stderr,
12391 "Error: could not find extent items for root %llu\n",
12392 root_key->objectid);
12393 return -ENOENT;
12396 rii = container_of(entry, struct root_item_info, cache_extent);
12397 ASSERT(rii->cache_extent.start == root_id);
12398 ASSERT(rii->cache_extent.size == 1);
12400 if (rii->node_count != 1) {
12401 fprintf(stderr,
12402 "Error: could not find btree root extent for root %llu\n",
12403 root_id);
12404 return -ENOENT;
12407 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12408 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12410 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12411 btrfs_root_level(&ri) != rii->level ||
12412 btrfs_root_generation(&ri) != rii->gen) {
12415 * If we're in repair mode but our caller told us to not update
12416 * the root item, i.e. just check if it needs to be updated, don't
12417 * print this message, since the caller will call us again shortly
12418 * for the same root item without read only mode (the caller will
12419 * open a transaction first).
12421 if (!(read_only_mode && repair))
12422 fprintf(stderr,
12423 "%sroot item for root %llu,"
12424 " current bytenr %llu, current gen %llu, current level %u,"
12425 " new bytenr %llu, new gen %llu, new level %u\n",
12426 (read_only_mode ? "" : "fixing "),
12427 root_id,
12428 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12429 btrfs_root_level(&ri),
12430 rii->bytenr, rii->gen, rii->level);
12432 if (btrfs_root_generation(&ri) > rii->gen) {
12433 fprintf(stderr,
12434 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12435 root_id, btrfs_root_generation(&ri), rii->gen);
12436 return -EINVAL;
12439 if (!read_only_mode) {
12440 btrfs_set_root_bytenr(&ri, rii->bytenr);
12441 btrfs_set_root_level(&ri, rii->level);
12442 btrfs_set_root_generation(&ri, rii->gen);
12443 write_extent_buffer(path->nodes[0], &ri,
12444 offset, sizeof(ri));
12447 return 1;
12450 return 0;
12454 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12455 * caused read-only snapshots to be corrupted if they were created at a moment
12456 * when the source subvolume/snapshot had orphan items. The issue was that the
12457 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12458 * node instead of the post orphan cleanup root node.
12459 * So this function, and its callees, just detects and fixes those cases. Even
12460 * though the regression was for read-only snapshots, this function applies to
12461 * any snapshot/subvolume root.
12462 * This must be run before any other repair code - not doing it so, makes other
12463 * repair code delete or modify backrefs in the extent tree for example, which
12464 * will result in an inconsistent fs after repairing the root items.
12466 static int repair_root_items(struct btrfs_fs_info *info)
12468 struct btrfs_path path;
12469 struct btrfs_key key;
12470 struct extent_buffer *leaf;
12471 struct btrfs_trans_handle *trans = NULL;
12472 int ret = 0;
12473 int bad_roots = 0;
12474 int need_trans = 0;
12476 btrfs_init_path(&path);
12478 ret = build_roots_info_cache(info);
12479 if (ret)
12480 goto out;
12482 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12483 key.type = BTRFS_ROOT_ITEM_KEY;
12484 key.offset = 0;
12486 again:
12488 * Avoid opening and committing transactions if a leaf doesn't have
12489 * any root items that need to be fixed, so that we avoid rotating
12490 * backup roots unnecessarily.
12492 if (need_trans) {
12493 trans = btrfs_start_transaction(info->tree_root, 1);
12494 if (IS_ERR(trans)) {
12495 ret = PTR_ERR(trans);
12496 goto out;
12500 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12501 0, trans ? 1 : 0);
12502 if (ret < 0)
12503 goto out;
12504 leaf = path.nodes[0];
12506 while (1) {
12507 struct btrfs_key found_key;
12509 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12510 int no_more_keys = find_next_key(&path, &key);
12512 btrfs_release_path(&path);
12513 if (trans) {
12514 ret = btrfs_commit_transaction(trans,
12515 info->tree_root);
12516 trans = NULL;
12517 if (ret < 0)
12518 goto out;
12520 need_trans = 0;
12521 if (no_more_keys)
12522 break;
12523 goto again;
12526 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12528 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12529 goto next;
12530 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12531 goto next;
12533 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12534 if (ret < 0)
12535 goto out;
12536 if (ret) {
12537 if (!trans && repair) {
12538 need_trans = 1;
12539 key = found_key;
12540 btrfs_release_path(&path);
12541 goto again;
12543 bad_roots++;
12545 next:
12546 path.slots[0]++;
12548 ret = 0;
12549 out:
12550 free_roots_info_cache();
12551 btrfs_release_path(&path);
12552 if (trans)
12553 btrfs_commit_transaction(trans, info->tree_root);
12554 if (ret < 0)
12555 return ret;
12557 return bad_roots;
12560 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12562 struct btrfs_trans_handle *trans;
12563 struct btrfs_block_group_cache *bg_cache;
12564 u64 current = 0;
12565 int ret = 0;
12567 /* Clear all free space cache inodes and its extent data */
12568 while (1) {
12569 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12570 if (!bg_cache)
12571 break;
12572 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12573 if (ret < 0)
12574 return ret;
12575 current = bg_cache->key.objectid + bg_cache->key.offset;
12578 /* Don't forget to set cache_generation to -1 */
12579 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12580 if (IS_ERR(trans)) {
12581 error("failed to update super block cache generation");
12582 return PTR_ERR(trans);
12584 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12585 btrfs_commit_transaction(trans, fs_info->tree_root);
12587 return ret;
12590 const char * const cmd_check_usage[] = {
12591 "btrfs check [options] <device>",
12592 "Check structural integrity of a filesystem (unmounted).",
12593 "Check structural integrity of an unmounted filesystem. Verify internal",
12594 "trees' consistency and item connectivity. In the repair mode try to",
12595 "fix the problems found. ",
12596 "WARNING: the repair mode is considered dangerous",
12598 "-s|--super <superblock> use this superblock copy",
12599 "-b|--backup use the first valid backup root copy",
12600 "--repair try to repair the filesystem",
12601 "--readonly run in read-only mode (default)",
12602 "--init-csum-tree create a new CRC tree",
12603 "--init-extent-tree create a new extent tree",
12604 "--mode <MODE> allows choice of memory/IO trade-offs",
12605 " where MODE is one of:",
12606 " original - read inodes and extents to memory (requires",
12607 " more memory, does less IO)",
12608 " lowmem - try to use less memory but read blocks again",
12609 " when needed",
12610 "--check-data-csum verify checksums of data blocks",
12611 "-Q|--qgroup-report print a report on qgroup consistency",
12612 "-E|--subvol-extents <subvolid>",
12613 " print subvolume extents and sharing state",
12614 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12615 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12616 "-p|--progress indicate progress",
12617 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12618 NULL
12621 int cmd_check(int argc, char **argv)
12623 struct cache_tree root_cache;
12624 struct btrfs_root *root;
12625 struct btrfs_fs_info *info;
12626 u64 bytenr = 0;
12627 u64 subvolid = 0;
12628 u64 tree_root_bytenr = 0;
12629 u64 chunk_root_bytenr = 0;
12630 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12631 int ret;
12632 int err = 0;
12633 u64 num;
12634 int init_csum_tree = 0;
12635 int readonly = 0;
12636 int clear_space_cache = 0;
12637 int qgroup_report = 0;
12638 int qgroups_repaired = 0;
12639 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12641 while(1) {
12642 int c;
12643 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12644 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12645 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12646 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12647 static const struct option long_options[] = {
12648 { "super", required_argument, NULL, 's' },
12649 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12650 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12651 { "init-csum-tree", no_argument, NULL,
12652 GETOPT_VAL_INIT_CSUM },
12653 { "init-extent-tree", no_argument, NULL,
12654 GETOPT_VAL_INIT_EXTENT },
12655 { "check-data-csum", no_argument, NULL,
12656 GETOPT_VAL_CHECK_CSUM },
12657 { "backup", no_argument, NULL, 'b' },
12658 { "subvol-extents", required_argument, NULL, 'E' },
12659 { "qgroup-report", no_argument, NULL, 'Q' },
12660 { "tree-root", required_argument, NULL, 'r' },
12661 { "chunk-root", required_argument, NULL,
12662 GETOPT_VAL_CHUNK_TREE },
12663 { "progress", no_argument, NULL, 'p' },
12664 { "mode", required_argument, NULL,
12665 GETOPT_VAL_MODE },
12666 { "clear-space-cache", required_argument, NULL,
12667 GETOPT_VAL_CLEAR_SPACE_CACHE},
12668 { NULL, 0, NULL, 0}
12671 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12672 if (c < 0)
12673 break;
12674 switch(c) {
12675 case 'a': /* ignored */ break;
12676 case 'b':
12677 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12678 break;
12679 case 's':
12680 num = arg_strtou64(optarg);
12681 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12682 error(
12683 "super mirror should be less than %d",
12684 BTRFS_SUPER_MIRROR_MAX);
12685 exit(1);
12687 bytenr = btrfs_sb_offset(((int)num));
12688 printf("using SB copy %llu, bytenr %llu\n", num,
12689 (unsigned long long)bytenr);
12690 break;
12691 case 'Q':
12692 qgroup_report = 1;
12693 break;
12694 case 'E':
12695 subvolid = arg_strtou64(optarg);
12696 break;
12697 case 'r':
12698 tree_root_bytenr = arg_strtou64(optarg);
12699 break;
12700 case GETOPT_VAL_CHUNK_TREE:
12701 chunk_root_bytenr = arg_strtou64(optarg);
12702 break;
12703 case 'p':
12704 ctx.progress_enabled = true;
12705 break;
12706 case '?':
12707 case 'h':
12708 usage(cmd_check_usage);
12709 case GETOPT_VAL_REPAIR:
12710 printf("enabling repair mode\n");
12711 repair = 1;
12712 ctree_flags |= OPEN_CTREE_WRITES;
12713 break;
12714 case GETOPT_VAL_READONLY:
12715 readonly = 1;
12716 break;
12717 case GETOPT_VAL_INIT_CSUM:
12718 printf("Creating a new CRC tree\n");
12719 init_csum_tree = 1;
12720 repair = 1;
12721 ctree_flags |= OPEN_CTREE_WRITES;
12722 break;
12723 case GETOPT_VAL_INIT_EXTENT:
12724 init_extent_tree = 1;
12725 ctree_flags |= (OPEN_CTREE_WRITES |
12726 OPEN_CTREE_NO_BLOCK_GROUPS);
12727 repair = 1;
12728 break;
12729 case GETOPT_VAL_CHECK_CSUM:
12730 check_data_csum = 1;
12731 break;
12732 case GETOPT_VAL_MODE:
12733 check_mode = parse_check_mode(optarg);
12734 if (check_mode == CHECK_MODE_UNKNOWN) {
12735 error("unknown mode: %s", optarg);
12736 exit(1);
12738 break;
12739 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12740 if (strcmp(optarg, "v1") == 0) {
12741 clear_space_cache = 1;
12742 } else if (strcmp(optarg, "v2") == 0) {
12743 clear_space_cache = 2;
12744 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12745 } else {
12746 error(
12747 "invalid argument to --clear-space-cache, must be v1 or v2");
12748 exit(1);
12750 ctree_flags |= OPEN_CTREE_WRITES;
12751 break;
12755 if (check_argc_exact(argc - optind, 1))
12756 usage(cmd_check_usage);
12758 if (ctx.progress_enabled) {
12759 ctx.tp = TASK_NOTHING;
12760 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12763 /* This check is the only reason for --readonly to exist */
12764 if (readonly && repair) {
12765 error("repair options are not compatible with --readonly");
12766 exit(1);
12770 * Not supported yet
12772 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12773 error("low memory mode doesn't support repair yet");
12774 exit(1);
12777 radix_tree_init();
12778 cache_tree_init(&root_cache);
12780 if((ret = check_mounted(argv[optind])) < 0) {
12781 error("could not check mount status: %s", strerror(-ret));
12782 err |= !!ret;
12783 goto err_out;
12784 } else if(ret) {
12785 error("%s is currently mounted, aborting", argv[optind]);
12786 ret = -EBUSY;
12787 err |= !!ret;
12788 goto err_out;
12791 /* only allow partial opening under repair mode */
12792 if (repair)
12793 ctree_flags |= OPEN_CTREE_PARTIAL;
12795 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12796 chunk_root_bytenr, ctree_flags);
12797 if (!info) {
12798 error("cannot open file system");
12799 ret = -EIO;
12800 err |= !!ret;
12801 goto err_out;
12804 global_info = info;
12805 root = info->fs_root;
12806 if (clear_space_cache == 1) {
12807 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12808 error(
12809 "free space cache v2 detected, use --clear-space-cache v2");
12810 ret = 1;
12811 goto close_out;
12813 printf("Clearing free space cache\n");
12814 ret = clear_free_space_cache(info);
12815 if (ret) {
12816 error("failed to clear free space cache");
12817 ret = 1;
12818 } else {
12819 printf("Free space cache cleared\n");
12821 goto close_out;
12822 } else if (clear_space_cache == 2) {
12823 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12824 printf("no free space cache v2 to clear\n");
12825 ret = 0;
12826 goto close_out;
12828 printf("Clear free space cache v2\n");
12829 ret = btrfs_clear_free_space_tree(info);
12830 if (ret) {
12831 error("failed to clear free space cache v2: %d", ret);
12832 ret = 1;
12833 } else {
12834 printf("free space cache v2 cleared\n");
12836 goto close_out;
12840 * repair mode will force us to commit transaction which
12841 * will make us fail to load log tree when mounting.
12843 if (repair && btrfs_super_log_root(info->super_copy)) {
12844 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12845 if (!ret) {
12846 ret = 1;
12847 err |= !!ret;
12848 goto close_out;
12850 ret = zero_log_tree(root);
12851 err |= !!ret;
12852 if (ret) {
12853 error("failed to zero log tree: %d", ret);
12854 goto close_out;
12858 uuid_unparse(info->super_copy->fsid, uuidbuf);
12859 if (qgroup_report) {
12860 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12861 uuidbuf);
12862 ret = qgroup_verify_all(info);
12863 err |= !!ret;
12864 if (ret == 0)
12865 report_qgroups(1);
12866 goto close_out;
12868 if (subvolid) {
12869 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12870 subvolid, argv[optind], uuidbuf);
12871 ret = print_extent_state(info, subvolid);
12872 err |= !!ret;
12873 goto close_out;
12875 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12877 if (!extent_buffer_uptodate(info->tree_root->node) ||
12878 !extent_buffer_uptodate(info->dev_root->node) ||
12879 !extent_buffer_uptodate(info->chunk_root->node)) {
12880 error("critical roots corrupted, unable to check the filesystem");
12881 err |= !!ret;
12882 ret = -EIO;
12883 goto close_out;
12886 if (init_extent_tree || init_csum_tree) {
12887 struct btrfs_trans_handle *trans;
12889 trans = btrfs_start_transaction(info->extent_root, 0);
12890 if (IS_ERR(trans)) {
12891 error("error starting transaction");
12892 ret = PTR_ERR(trans);
12893 err |= !!ret;
12894 goto close_out;
12897 if (init_extent_tree) {
12898 printf("Creating a new extent tree\n");
12899 ret = reinit_extent_tree(trans, info);
12900 err |= !!ret;
12901 if (ret)
12902 goto close_out;
12905 if (init_csum_tree) {
12906 printf("Reinitialize checksum tree\n");
12907 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12908 if (ret) {
12909 error("checksum tree initialization failed: %d",
12910 ret);
12911 ret = -EIO;
12912 err |= !!ret;
12913 goto close_out;
12916 ret = fill_csum_tree(trans, info->csum_root,
12917 init_extent_tree);
12918 err |= !!ret;
12919 if (ret) {
12920 error("checksum tree refilling failed: %d", ret);
12921 return -EIO;
12925 * Ok now we commit and run the normal fsck, which will add
12926 * extent entries for all of the items it finds.
12928 ret = btrfs_commit_transaction(trans, info->extent_root);
12929 err |= !!ret;
12930 if (ret)
12931 goto close_out;
12933 if (!extent_buffer_uptodate(info->extent_root->node)) {
12934 error("critical: extent_root, unable to check the filesystem");
12935 ret = -EIO;
12936 err |= !!ret;
12937 goto close_out;
12939 if (!extent_buffer_uptodate(info->csum_root->node)) {
12940 error("critical: csum_root, unable to check the filesystem");
12941 ret = -EIO;
12942 err |= !!ret;
12943 goto close_out;
12946 if (!ctx.progress_enabled)
12947 fprintf(stderr, "checking extents\n");
12948 if (check_mode == CHECK_MODE_LOWMEM)
12949 ret = check_chunks_and_extents_v2(root);
12950 else
12951 ret = check_chunks_and_extents(root);
12952 err |= !!ret;
12953 if (ret)
12954 error(
12955 "errors found in extent allocation tree or chunk allocation");
12957 ret = repair_root_items(info);
12958 err |= !!ret;
12959 if (ret < 0) {
12960 error("failed to repair root items: %s", strerror(-ret));
12961 goto close_out;
12963 if (repair) {
12964 fprintf(stderr, "Fixed %d roots.\n", ret);
12965 ret = 0;
12966 } else if (ret > 0) {
12967 fprintf(stderr,
12968 "Found %d roots with an outdated root item.\n",
12969 ret);
12970 fprintf(stderr,
12971 "Please run a filesystem check with the option --repair to fix them.\n");
12972 ret = 1;
12973 err |= !!ret;
12974 goto close_out;
12977 if (!ctx.progress_enabled) {
12978 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12979 fprintf(stderr, "checking free space tree\n");
12980 else
12981 fprintf(stderr, "checking free space cache\n");
12983 ret = check_space_cache(root);
12984 err |= !!ret;
12985 if (ret) {
12986 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12987 error("errors found in free space tree");
12988 else
12989 error("errors found in free space cache");
12990 goto out;
12994 * We used to have to have these hole extents in between our real
12995 * extents so if we don't have this flag set we need to make sure there
12996 * are no gaps in the file extents for inodes, otherwise we can just
12997 * ignore it when this happens.
12999 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13000 if (!ctx.progress_enabled)
13001 fprintf(stderr, "checking fs roots\n");
13002 if (check_mode == CHECK_MODE_LOWMEM)
13003 ret = check_fs_roots_v2(root->fs_info);
13004 else
13005 ret = check_fs_roots(root, &root_cache);
13006 err |= !!ret;
13007 if (ret) {
13008 error("errors found in fs roots");
13009 goto out;
13012 fprintf(stderr, "checking csums\n");
13013 ret = check_csums(root);
13014 err |= !!ret;
13015 if (ret) {
13016 error("errors found in csum tree");
13017 goto out;
13020 fprintf(stderr, "checking root refs\n");
13021 /* For low memory mode, check_fs_roots_v2 handles root refs */
13022 if (check_mode != CHECK_MODE_LOWMEM) {
13023 ret = check_root_refs(root, &root_cache);
13024 err |= !!ret;
13025 if (ret) {
13026 error("errors found in root refs");
13027 goto out;
13031 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13032 struct extent_buffer *eb;
13034 eb = list_first_entry(&root->fs_info->recow_ebs,
13035 struct extent_buffer, recow);
13036 list_del_init(&eb->recow);
13037 ret = recow_extent_buffer(root, eb);
13038 err |= !!ret;
13039 if (ret) {
13040 error("fails to fix transid errors");
13041 break;
13045 while (!list_empty(&delete_items)) {
13046 struct bad_item *bad;
13048 bad = list_first_entry(&delete_items, struct bad_item, list);
13049 list_del_init(&bad->list);
13050 if (repair) {
13051 ret = delete_bad_item(root, bad);
13052 err |= !!ret;
13054 free(bad);
13057 if (info->quota_enabled) {
13058 fprintf(stderr, "checking quota groups\n");
13059 ret = qgroup_verify_all(info);
13060 err |= !!ret;
13061 if (ret) {
13062 error("failed to check quota groups");
13063 goto out;
13065 report_qgroups(0);
13066 ret = repair_qgroups(info, &qgroups_repaired);
13067 err |= !!ret;
13068 if (err) {
13069 error("failed to repair quota groups");
13070 goto out;
13072 ret = 0;
13075 if (!list_empty(&root->fs_info->recow_ebs)) {
13076 error("transid errors in file system");
13077 ret = 1;
13078 err |= !!ret;
13080 out:
13081 if (found_old_backref) { /*
13082 * there was a disk format change when mixed
13083 * backref was in testing tree. The old format
13084 * existed about one week.
13086 printf("\n * Found old mixed backref format. "
13087 "The old format is not supported! *"
13088 "\n * Please mount the FS in readonly mode, "
13089 "backup data and re-format the FS. *\n\n");
13090 err |= 1;
13092 printf("found %llu bytes used, ",
13093 (unsigned long long)bytes_used);
13094 if (err)
13095 printf("error(s) found\n");
13096 else
13097 printf("no error found\n");
13098 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13099 printf("total tree bytes: %llu\n",
13100 (unsigned long long)total_btree_bytes);
13101 printf("total fs tree bytes: %llu\n",
13102 (unsigned long long)total_fs_tree_bytes);
13103 printf("total extent tree bytes: %llu\n",
13104 (unsigned long long)total_extent_tree_bytes);
13105 printf("btree space waste bytes: %llu\n",
13106 (unsigned long long)btree_space_waste);
13107 printf("file data blocks allocated: %llu\n referenced %llu\n",
13108 (unsigned long long)data_bytes_allocated,
13109 (unsigned long long)data_bytes_referenced);
13111 free_qgroup_counts();
13112 free_root_recs_tree(&root_cache);
13113 close_out:
13114 close_ctree(root);
13115 err_out:
13116 if (ctx.progress_enabled)
13117 task_deinit(ctx.info);
13119 return err;