btrfs-progs: check: lowmem: Fix false alert on inline compressed extent
[btrfs-progs-unstable/devel.git] / cmds-check.c
blob3d0b12313c745c6390f1cb305f56bab10fbc8095
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
47 enum task_position {
48 TASK_EXTENTS,
49 TASK_FREE_SPACE,
50 TASK_FS_ROOTS,
51 TASK_NOTHING, /* have to be the last element */
54 struct task_ctx {
55 int progress_enabled;
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
80 CHECK_MODE_ORIGINAL,
81 CHECK_MODE_LOWMEM,
82 CHECK_MODE_UNKNOWN,
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
104 union {
105 u64 parent;
106 u64 root;
108 u64 owner;
109 u64 offset;
110 u64 disk_bytenr;
111 u64 bytes;
112 u64 ram_bytes;
113 u32 num_refs;
114 u32 found_ref;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
148 u64 root;
149 u64 objectid;
150 u64 offset;
151 u64 disk_bytenr;
152 u64 disk_len;
155 struct tree_backref {
156 struct extent_backref node;
157 union {
158 u64 parent;
159 u64 root;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
177 u64 start;
178 u64 max_size;
179 u64 nr;
180 u64 refs;
181 u64 extent_item_refs;
182 u64 generation;
183 u64 parent_generation;
184 u64 info_objectid;
185 u32 num_duplicates;
186 u8 info_level;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
208 u8 filetype;
209 u8 ref_type;
210 int errors;
211 u64 dir;
212 u64 index;
213 u16 namelen;
214 char name[0];
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
224 u64 objectid;
225 u64 bytenr;
226 u64 last_snapshot;
227 u8 level;
228 u8 drop_level;
229 int level_size;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
248 struct rb_node node;
249 u64 start;
250 u64 len;
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
263 int errors;
265 u64 ino;
266 u32 nlink;
267 u32 imode;
268 u64 isize;
269 u64 nbytes;
271 u32 found_link;
272 u64 found_size;
273 u64 extent_start;
274 u64 extent_end;
275 struct rb_root holes;
276 struct list_head orphan_extents;
278 u32 refs;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
304 int errors;
305 u64 ref_root;
306 u64 dir;
307 u64 index;
308 u16 namelen;
309 char name[0];
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
317 struct root_record {
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
321 u64 objectid;
322 u32 found_ref;
325 struct ptr_node {
326 struct cache_extent cache;
327 void *data;
330 struct shared_node {
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
335 u32 refs;
338 struct block_info {
339 u64 start;
340 u32 size;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
346 int active_node;
347 int root_level;
350 struct bad_item {
351 struct btrfs_key key;
352 u64 root_id;
353 struct list_head list;
356 struct extent_entry {
357 u64 bytenr;
358 u64 bytes;
359 int count;
360 int broken;
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
366 u8 level;
367 /* number of nodes at this level, must be 1 for a root */
368 int node_count;
369 u64 bytenr;
370 u64 gen;
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
378 * classification.
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 uint32_t count = 0;
396 static char *task_position_string[] = {
397 "checking extents",
398 "checking free space cache",
399 "checking fs roots",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
405 return NULL;
407 while (1) {
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
410 count++;
411 fflush(stdout);
412 task_period_wait(priv->info);
414 return NULL;
417 static int print_status_return(void *p)
419 printf("\n");
420 fflush(stdout);
422 return 0;
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
443 return (u64)-1;
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446 return hole->start;
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
458 return -1;
459 if (hole1->start < hole2->start)
460 return 1;
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
467 return -1;
468 /* Hole 2 will be merge center */
469 return 1;
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
479 u64 start, u64 len)
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
486 if (!hole)
487 return -ENOMEM;
488 hole->start = start;
489 hole->len = len;
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 node);
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
501 free(prev);
502 prev = NULL;
505 /* iterate merge with next holes */
506 while (1) {
507 if (!rb_next(&hole->node))
508 break;
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 node);
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
514 hole->start;
515 rb_erase(&next->node, holes);
516 free(next);
517 next = NULL;
518 } else
519 break;
521 return 0;
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
527 u64 start;
529 hole = (struct file_extent_hole *)data;
530 start = hole->start;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
534 return -1;
535 if (start >= hole->start && start < hole->start + hole->len)
536 return 0;
537 return 1;
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
546 u64 start, u64 len)
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
550 u64 prev_start = 0;
551 u64 prev_len = 0;
552 u64 next_start = 0;
553 u64 next_len = 0;
554 struct rb_node *node;
555 int have_prev = 0;
556 int have_next = 0;
557 int ret = 0;
559 tmp.start = start;
560 tmp.len = len;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 if (!node)
563 return -EEXIST;
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
566 return -EEXIST;
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
575 have_prev = 1;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
580 have_next = 1;
582 rb_erase(node, holes);
583 free(hole);
584 if (have_prev) {
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
586 if (ret < 0)
587 return ret;
589 if (have_next) {
590 ret = add_file_extent_hole(holes, next_start, next_len);
591 if (ret < 0)
592 return ret;
594 return 0;
597 static int copy_file_extent_holes(struct rb_root *dst,
598 struct rb_root *src)
600 struct file_extent_hole *hole;
601 struct rb_node *node;
602 int ret = 0;
604 node = rb_first(src);
605 while (node) {
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 if (ret)
609 break;
610 node = rb_next(node);
612 return ret;
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
621 while (node) {
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
624 free(hole);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
644 #define S_SHIFT 12
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
667 return -1;
668 else if (rec1->devid < rec2->devid)
669 return 1;
670 else
671 return 0;
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
682 struct rb_node *rb;
683 size_t size;
684 int ret;
686 rec = malloc(sizeof(*rec));
687 if (!rec)
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
690 rec->refs = 1;
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
698 if (!backref) {
699 ret = -ENOMEM;
700 goto cleanup;
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
707 if (!dst_orphan) {
708 ret = -ENOMEM;
709 goto cleanup;
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715 if (ret < 0)
716 goto cleanup_rb;
718 return rec;
720 cleanup_rb:
721 rb = rb_first(&rec->holes);
722 while (rb) {
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
726 rb = rb_next(rb);
727 free(hole);
730 cleanup:
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
734 free(orig);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
740 free(orig);
743 free(rec);
745 return ERR_PTR(ret);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 u64 objectid)
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
754 return;
755 printf("The following data extent is lost in tree %llu:\n",
756 objectid);
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
760 orphan->disk_len);
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
769 if (!errors)
770 return;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
819 int found = 0;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
823 while (node) {
824 found = 1;
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
830 if (!found)
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868 u64 ino, int mod)
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
873 int ret;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
876 if (cache) {
877 node = container_of(cache, struct ptr_node, cache);
878 rec = node->data;
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
882 return node->data;
883 rec->refs--;
884 rec = node->data;
886 } else if (mod) {
887 rec = calloc(1, sizeof(*rec));
888 if (!rec)
889 return ERR_PTR(-ENOMEM);
890 rec->ino = ino;
891 rec->extent_start = (u64)-1;
892 rec->refs = 1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
898 if (!node) {
899 free(rec);
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
904 node->data = rec;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
907 rec->found_link = 1;
909 ret = insert_cache_extent(inode_cache, &node->cache);
910 if (ret)
911 return ERR_PTR(-EEXIST);
913 return rec;
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
924 free(orphan);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
932 if (--rec->refs > 0)
933 return;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
938 free(backref);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
942 free(rec);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949 return 1;
950 return 0;
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
959 u8 filetype;
961 if (!rec->found_inode_item)
962 return;
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
972 free(backref);
977 if (!rec->checked || rec->merging)
978 return;
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1009 free(node);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1018 int ret;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1022 key.offset = ino;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1027 if (ret > 0)
1028 ret = -ENOENT;
1029 return ret;
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043 return 1;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->nodatasum = 1;
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1056 return 0;
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 const char *name,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 break;
1068 if (backref->dir != dir || backref->namelen != namelen)
1069 continue;
1070 if (memcmp(name, backref->name, namelen))
1071 continue;
1072 return backref;
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1076 if (!backref)
1077 return NULL;
1078 memset(backref, 0, sizeof(*backref));
1079 backref->dir = dir;
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1084 return backref;
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1098 BUG_ON(!backref);
1099 if (errors)
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 rec->found_link++;
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 else
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1132 } else {
1133 BUG_ON(1);
1136 maybe_free_inode_rec(inode_cache, rec);
1137 return 0;
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1144 u32 dir_count = 0;
1145 int ret = 0;
1147 dst->merging = 1;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1156 dir_count++;
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180 if (ret < 0)
1181 return ret;
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1191 } else {
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1196 dst->extent_end,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1213 } else {
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1217 dst->merging = 0;
1219 return 0;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1230 int splice = 0;
1231 int ret;
1233 if (--src_node->refs == 0)
1234 splice = 1;
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1240 again:
1241 cache = search_cache_extent(src, 0);
1242 while (cache) {
1243 node = container_of(cache, struct ptr_node, cache);
1244 rec = node->data;
1245 cache = next_cache_extent(cache);
1247 if (splice) {
1248 remove_cache_extent(src, &node->cache);
1249 ins = node;
1250 } else {
1251 ins = malloc(sizeof(*ins));
1252 BUG_ON(!ins);
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1255 ins->data = rec;
1256 rec->refs++;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1263 if (rec->checked) {
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1270 free(ins);
1271 } else {
1272 BUG_ON(ret);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1279 goto again;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1291 return 0;
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1300 rec = node->data;
1301 free_inode_rec(rec);
1302 free(node);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308 u64 bytenr)
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1314 if (cache) {
1315 node = container_of(cache, struct shared_node, cache);
1316 return node;
1318 return NULL;
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 int ret;
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1327 if (!node)
1328 return -ENOMEM;
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1333 node->refs = refs;
1335 ret = insert_cache_extent(shared, &node->cache);
1337 return ret;
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1345 int ret;
1347 if (level == wc->active_node)
1348 return 0;
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1352 if (!node) {
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 BUG_ON(ret);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1358 return 0;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1367 free(node);
1369 return 1;
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1376 free(node);
1378 return 1;
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1386 int i;
1388 if (level == wc->root_level)
1389 return 0;
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392 if (wc->nodes[i])
1393 break;
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1406 } else {
1407 BUG_ON(node->refs < 2);
1408 node->refs--;
1410 return 0;
1414 * Returns:
1415 * < 0 - on error
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422 u64 child_root_id)
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1427 int has_parent = 0;
1428 int ret;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436 0, 0);
1437 if (ret < 0)
1438 return ret;
1439 btrfs_release_path(&path);
1440 if (!ret)
1441 return 1;
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 key.offset = 0;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447 0, 0);
1448 if (ret < 0)
1449 goto out;
1451 while (1) {
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455 if (ret)
1456 break;
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1463 break;
1465 has_parent = 1;
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1469 return 1;
1472 path.slots[0]++;
1474 out:
1475 btrfs_release_path(&path);
1476 if (ret < 0)
1477 return ret;
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1485 u32 total;
1486 u32 cur = 0;
1487 u32 len;
1488 u32 name_len;
1489 u32 data_len;
1490 int error;
1491 int nritems = 0;
1492 u8 filetype;
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1508 nritems++;
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1516 len = name_len;
1517 error = 0;
1518 } else {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1532 key->type, error);
1533 } else {
1534 fprintf(stderr, "invalid location in dir item %u\n",
1535 location.type);
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1543 cur += len;
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1548 return 0;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1555 u32 total;
1556 u32 cur = 0;
1557 u32 len;
1558 u32 name_len;
1559 u64 index;
1560 int error;
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1573 len = name_len;
1574 error = 0;
1575 } else {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585 cur += len;
1587 return 0;
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1594 u32 total;
1595 u32 cur = 0;
1596 u32 len;
1597 u32 name_len;
1598 u64 index;
1599 u64 parent;
1600 int error;
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1614 len = name_len;
1615 error = 0;
1616 } else {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627 cur += len;
1629 return 0;
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1639 int ret;
1640 size_t size;
1641 *found = 0;
1642 u64 csum_end;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648 key.offset = start;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652 &key, &path, 0, 0);
1653 if (ret < 0)
1654 goto out;
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1660 path.slots[0]--;
1663 while (len > 0) {
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667 if (ret > 0)
1668 break;
1669 else if (ret < 0)
1670 goto out;
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1677 break;
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1681 break;
1683 if (key.offset > start)
1684 start = key.offset;
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1690 len -= size;
1691 start += size;
1692 *found += size;
1695 path.slots[0]++;
1697 out:
1698 btrfs_release_path(&path);
1699 if (ret < 0)
1700 return ret;
1701 return 0;
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1711 u64 num_bytes = 0;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1715 int extent_type;
1716 int ret;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1732 if (ret < 0)
1733 return ret;
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741 if (num_bytes == 0)
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1762 } else {
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1773 * data reloc tree.
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777 u64 found;
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780 else
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784 if (ret < 0)
1785 return ret;
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787 if (found > 0)
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792 if (found > 0)
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1796 return 0;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1803 u32 nritems;
1804 int i;
1805 int ret = 0;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1811 return 0;
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820 continue;
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822 continue;
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1832 key.objectid, 1);
1833 BUG_ON(IS_ERR(active_node->current));
1835 switch (key.type) {
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1839 break;
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1842 break;
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1845 break;
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1848 break;
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1851 active_node);
1852 break;
1853 default:
1854 break;
1857 return ret;
1860 struct node_refs {
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872 struct node_refs *nrefs, int *level, int ext_ref)
1874 struct extent_buffer *cur = path->nodes[0];
1875 struct btrfs_key key;
1876 u64 cur_bytenr;
1877 u32 nritems;
1878 u64 first_ino = 0;
1879 int root_level = btrfs_header_level(root->node);
1880 int i;
1881 int ret = 0; /* Final return value */
1882 int err = 0; /* Positive error bitmap */
1884 cur_bytenr = cur->start;
1886 /* skip to first inode item or the first inode number change */
1887 nritems = btrfs_header_nritems(cur);
1888 for (i = 0; i < nritems; i++) {
1889 btrfs_item_key_to_cpu(cur, &key, i);
1890 if (i == 0)
1891 first_ino = key.objectid;
1892 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893 (first_ino && first_ino != key.objectid))
1894 break;
1896 if (i == nritems) {
1897 path->slots[0] = nritems;
1898 return 0;
1900 path->slots[0] = i;
1902 again:
1903 err |= check_inode_item(root, path, ext_ref);
1905 if (err & LAST_ITEM)
1906 goto out;
1908 /* still have inode items in thie leaf */
1909 if (cur->start == cur_bytenr)
1910 goto again;
1913 * we have switched to another leaf, above nodes may
1914 * have changed, here walk down the path, if a node
1915 * or leaf is shared, check whether we can skip this
1916 * node or leaf.
1918 for (i = root_level; i >= 0; i--) {
1919 if (path->nodes[i]->start == nrefs->bytenr[i])
1920 continue;
1922 ret = update_nodes_refs(root,
1923 path->nodes[i]->start,
1924 nrefs, i);
1925 if (ret)
1926 goto out;
1928 if (!nrefs->need_check[i]) {
1929 *level += 1;
1930 break;
1934 for (i = 0; i < *level; i++) {
1935 free_extent_buffer(path->nodes[i]);
1936 path->nodes[i] = NULL;
1938 out:
1939 err &= ~LAST_ITEM;
1941 * Convert any error bitmap to -EIO, as we should avoid
1942 * mixing positive and negative return value to represent
1943 * error
1945 if (err && !ret)
1946 ret = -EIO;
1947 return ret;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1953 u64 bytenr;
1954 u64 ptr_gen;
1955 u32 nritems;
1956 u32 blocksize;
1957 int i;
1958 int level;
1960 level = btrfs_header_level(node);
1961 if (level != 1)
1962 return;
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1976 * in parent.
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1991 int ret = 0;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1996 else
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000 ret = -EINVAL;
2001 fprintf(stderr,
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007 ret = -EINVAL;
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2014 ret = -EINVAL;
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2019 return ret;
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2033 return 1;
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2042 return 0;
2044 return 1;
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2054 int check, ret;
2055 u64 refs;
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2061 if (ret < 0)
2062 return ret;
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2066 if (refs > 1) {
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068 0, &roots);
2069 if (ret)
2070 return -EIO;
2072 check = need_check(root, roots);
2073 ulist_free(roots);
2074 nrefs->need_check[level] = check;
2075 } else {
2076 nrefs->need_check[level] = 1;
2080 return 0;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2088 u64 bytenr;
2089 u64 ptr_gen;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2092 u32 blocksize;
2093 int ret, err = 0;
2094 u64 refs;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2101 ret = 0;
2102 } else {
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2106 if (ret < 0) {
2107 err = ret;
2108 goto out;
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2114 if (refs > 1) {
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2116 refs, wc, *level);
2117 if (ret > 0) {
2118 err = ret;
2119 goto out;
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2129 WARN_ON(1);
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132 break;
2133 if (*level == 0) {
2134 ret = process_one_leaf(root, cur, wc);
2135 if (ret < 0)
2136 err = ret;
2137 break;
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2145 } else {
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2148 if (ret < 0) {
2149 refs = 0;
2150 } else {
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2156 if (refs > 1) {
2157 ret = enter_shared_node(root, bytenr, refs,
2158 wc, *level - 1);
2159 if (ret > 0) {
2160 path->slots[*level]++;
2161 continue;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2170 ptr_gen);
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2175 &node_key,
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2178 &node_key,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2181 err = -EIO;
2182 goto out;
2186 ret = check_child_node(cur, path->slots[*level], next);
2187 if (ret) {
2188 err = ret;
2189 goto out;
2192 if (btrfs_is_leaf(next))
2193 status = btrfs_check_leaf(root, NULL, next);
2194 else
2195 status = btrfs_check_node(root, NULL, next);
2196 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197 free_extent_buffer(next);
2198 err = -EIO;
2199 goto out;
2202 *level = *level - 1;
2203 free_extent_buffer(path->nodes[*level]);
2204 path->nodes[*level] = next;
2205 path->slots[*level] = 0;
2207 out:
2208 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209 return err;
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213 unsigned int ext_ref);
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216 int *level, struct node_refs *nrefs, int ext_ref)
2218 enum btrfs_tree_block_status status;
2219 u64 bytenr;
2220 u64 ptr_gen;
2221 struct extent_buffer *next;
2222 struct extent_buffer *cur;
2223 u32 blocksize;
2224 int ret;
2226 WARN_ON(*level < 0);
2227 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229 ret = update_nodes_refs(root, path->nodes[*level]->start,
2230 nrefs, *level);
2231 if (ret < 0)
2232 return ret;
2234 while (*level >= 0) {
2235 WARN_ON(*level < 0);
2236 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237 cur = path->nodes[*level];
2239 if (btrfs_header_level(cur) != *level)
2240 WARN_ON(1);
2242 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243 break;
2244 /* Don't forgot to check leaf/node validation */
2245 if (*level == 0) {
2246 ret = btrfs_check_leaf(root, NULL, cur);
2247 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248 ret = -EIO;
2249 break;
2251 ret = process_one_leaf_v2(root, path, nrefs,
2252 level, ext_ref);
2253 break;
2254 } else {
2255 ret = btrfs_check_node(root, NULL, cur);
2256 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = -EIO;
2258 break;
2261 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263 blocksize = root->nodesize;
2265 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266 if (ret)
2267 break;
2268 if (!nrefs->need_check[*level - 1]) {
2269 path->slots[*level]++;
2270 continue;
2273 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275 free_extent_buffer(next);
2276 reada_walk_down(root, cur, path->slots[*level]);
2277 next = read_tree_block(root, bytenr, blocksize,
2278 ptr_gen);
2279 if (!extent_buffer_uptodate(next)) {
2280 struct btrfs_key node_key;
2282 btrfs_node_key_to_cpu(path->nodes[*level],
2283 &node_key,
2284 path->slots[*level]);
2285 btrfs_add_corrupt_extent_record(root->fs_info,
2286 &node_key,
2287 path->nodes[*level]->start,
2288 root->nodesize, *level);
2289 ret = -EIO;
2290 break;
2294 ret = check_child_node(cur, path->slots[*level], next);
2295 if (ret < 0)
2296 break;
2298 if (btrfs_is_leaf(next))
2299 status = btrfs_check_leaf(root, NULL, next);
2300 else
2301 status = btrfs_check_node(root, NULL, next);
2302 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303 free_extent_buffer(next);
2304 ret = -EIO;
2305 break;
2308 *level = *level - 1;
2309 free_extent_buffer(path->nodes[*level]);
2310 path->nodes[*level] = next;
2311 path->slots[*level] = 0;
2313 return ret;
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317 struct walk_control *wc, int *level)
2319 int i;
2320 struct extent_buffer *leaf;
2322 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323 leaf = path->nodes[i];
2324 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325 path->slots[i]++;
2326 *level = i;
2327 return 0;
2328 } else {
2329 free_extent_buffer(path->nodes[*level]);
2330 path->nodes[*level] = NULL;
2331 BUG_ON(*level > wc->active_node);
2332 if (*level == wc->active_node)
2333 leave_shared_node(root, wc, *level);
2334 *level = i + 1;
2337 return 1;
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341 int *level)
2343 int i;
2344 struct extent_buffer *leaf;
2346 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347 leaf = path->nodes[i];
2348 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349 path->slots[i]++;
2350 *level = i;
2351 return 0;
2352 } else {
2353 free_extent_buffer(path->nodes[*level]);
2354 path->nodes[*level] = NULL;
2355 *level = i + 1;
2358 return 1;
2361 static int check_root_dir(struct inode_record *rec)
2363 struct inode_backref *backref;
2364 int ret = -1;
2366 if (!rec->found_inode_item || rec->errors)
2367 goto out;
2368 if (rec->nlink != 1 || rec->found_link != 0)
2369 goto out;
2370 if (list_empty(&rec->backrefs))
2371 goto out;
2372 backref = to_inode_backref(rec->backrefs.next);
2373 if (!backref->found_inode_ref)
2374 goto out;
2375 if (backref->index != 0 || backref->namelen != 2 ||
2376 memcmp(backref->name, "..", 2))
2377 goto out;
2378 if (backref->found_dir_index || backref->found_dir_item)
2379 goto out;
2380 ret = 0;
2381 out:
2382 return ret;
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386 struct btrfs_root *root, struct btrfs_path *path,
2387 struct inode_record *rec)
2389 struct btrfs_inode_item *ei;
2390 struct btrfs_key key;
2391 int ret;
2393 key.objectid = rec->ino;
2394 key.type = BTRFS_INODE_ITEM_KEY;
2395 key.offset = (u64)-1;
2397 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398 if (ret < 0)
2399 goto out;
2400 if (ret) {
2401 if (!path->slots[0]) {
2402 ret = -ENOENT;
2403 goto out;
2405 path->slots[0]--;
2406 ret = 0;
2408 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409 if (key.objectid != rec->ino) {
2410 ret = -ENOENT;
2411 goto out;
2414 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415 struct btrfs_inode_item);
2416 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417 btrfs_mark_buffer_dirty(path->nodes[0]);
2418 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420 root->root_key.objectid);
2421 out:
2422 btrfs_release_path(path);
2423 return ret;
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427 struct btrfs_root *root,
2428 struct btrfs_path *path,
2429 struct inode_record *rec)
2431 int ret;
2433 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434 btrfs_release_path(path);
2435 if (!ret)
2436 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437 return ret;
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441 struct btrfs_root *root,
2442 struct btrfs_path *path,
2443 struct inode_record *rec)
2445 struct btrfs_inode_item *ei;
2446 struct btrfs_key key;
2447 int ret = 0;
2449 key.objectid = rec->ino;
2450 key.type = BTRFS_INODE_ITEM_KEY;
2451 key.offset = 0;
2453 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454 if (ret) {
2455 if (ret > 0)
2456 ret = -ENOENT;
2457 goto out;
2460 /* Since ret == 0, no need to check anything */
2461 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462 struct btrfs_inode_item);
2463 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464 btrfs_mark_buffer_dirty(path->nodes[0]);
2465 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466 printf("reset nbytes for ino %llu root %llu\n",
2467 rec->ino, root->root_key.objectid);
2468 out:
2469 btrfs_release_path(path);
2470 return ret;
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474 struct cache_tree *inode_cache,
2475 struct inode_record *rec,
2476 struct inode_backref *backref)
2478 struct btrfs_path path;
2479 struct btrfs_trans_handle *trans;
2480 struct btrfs_dir_item *dir_item;
2481 struct extent_buffer *leaf;
2482 struct btrfs_key key;
2483 struct btrfs_disk_key disk_key;
2484 struct inode_record *dir_rec;
2485 unsigned long name_ptr;
2486 u32 data_size = sizeof(*dir_item) + backref->namelen;
2487 int ret;
2489 trans = btrfs_start_transaction(root, 1);
2490 if (IS_ERR(trans))
2491 return PTR_ERR(trans);
2493 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494 (unsigned long long)rec->ino);
2496 btrfs_init_path(&path);
2497 key.objectid = backref->dir;
2498 key.type = BTRFS_DIR_INDEX_KEY;
2499 key.offset = backref->index;
2500 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501 BUG_ON(ret);
2503 leaf = path.nodes[0];
2504 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506 disk_key.objectid = cpu_to_le64(rec->ino);
2507 disk_key.type = BTRFS_INODE_ITEM_KEY;
2508 disk_key.offset = 0;
2510 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512 btrfs_set_dir_data_len(leaf, dir_item, 0);
2513 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514 name_ptr = (unsigned long)(dir_item + 1);
2515 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516 btrfs_mark_buffer_dirty(leaf);
2517 btrfs_release_path(&path);
2518 btrfs_commit_transaction(trans, root);
2520 backref->found_dir_index = 1;
2521 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522 BUG_ON(IS_ERR(dir_rec));
2523 if (!dir_rec)
2524 return 0;
2525 dir_rec->found_size += backref->namelen;
2526 if (dir_rec->found_size == dir_rec->isize &&
2527 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529 if (dir_rec->found_size != dir_rec->isize)
2530 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532 return 0;
2535 static int delete_dir_index(struct btrfs_root *root,
2536 struct inode_backref *backref)
2538 struct btrfs_trans_handle *trans;
2539 struct btrfs_dir_item *di;
2540 struct btrfs_path path;
2541 int ret = 0;
2543 trans = btrfs_start_transaction(root, 1);
2544 if (IS_ERR(trans))
2545 return PTR_ERR(trans);
2547 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548 (unsigned long long)backref->dir,
2549 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550 (unsigned long long)root->objectid);
2552 btrfs_init_path(&path);
2553 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554 backref->name, backref->namelen,
2555 backref->index, -1);
2556 if (IS_ERR(di)) {
2557 ret = PTR_ERR(di);
2558 btrfs_release_path(&path);
2559 btrfs_commit_transaction(trans, root);
2560 if (ret == -ENOENT)
2561 return 0;
2562 return ret;
2565 if (!di)
2566 ret = btrfs_del_item(trans, root, &path);
2567 else
2568 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569 BUG_ON(ret);
2570 btrfs_release_path(&path);
2571 btrfs_commit_transaction(trans, root);
2572 return ret;
2575 static int create_inode_item(struct btrfs_root *root,
2576 struct inode_record *rec,
2577 int root_dir)
2579 struct btrfs_trans_handle *trans;
2580 struct btrfs_inode_item inode_item;
2581 time_t now = time(NULL);
2582 int ret;
2584 trans = btrfs_start_transaction(root, 1);
2585 if (IS_ERR(trans)) {
2586 ret = PTR_ERR(trans);
2587 return ret;
2590 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591 "be incomplete, please check permissions and content after "
2592 "the fsck completes.\n", (unsigned long long)root->objectid,
2593 (unsigned long long)rec->ino);
2595 memset(&inode_item, 0, sizeof(inode_item));
2596 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597 if (root_dir)
2598 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599 else
2600 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602 if (rec->found_dir_item) {
2603 if (rec->found_file_extent)
2604 fprintf(stderr, "root %llu inode %llu has both a dir "
2605 "item and extents, unsure if it is a dir or a "
2606 "regular file so setting it as a directory\n",
2607 (unsigned long long)root->objectid,
2608 (unsigned long long)rec->ino);
2609 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611 } else if (!rec->found_dir_item) {
2612 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2615 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2624 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625 BUG_ON(ret);
2626 btrfs_commit_transaction(trans, root);
2627 return 0;
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631 struct inode_record *rec,
2632 struct cache_tree *inode_cache,
2633 int delete)
2635 struct inode_backref *tmp, *backref;
2636 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637 int ret = 0;
2638 int repaired = 0;
2640 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641 if (!delete && rec->ino == root_dirid) {
2642 if (!rec->found_inode_item) {
2643 ret = create_inode_item(root, rec, 1);
2644 if (ret)
2645 break;
2646 repaired++;
2650 /* Index 0 for root dir's are special, don't mess with it */
2651 if (rec->ino == root_dirid && backref->index == 0)
2652 continue;
2654 if (delete &&
2655 ((backref->found_dir_index && !backref->found_inode_ref) ||
2656 (backref->found_dir_index && backref->found_inode_ref &&
2657 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658 ret = delete_dir_index(root, backref);
2659 if (ret)
2660 break;
2661 repaired++;
2662 list_del(&backref->list);
2663 free(backref);
2666 if (!delete && !backref->found_dir_index &&
2667 backref->found_dir_item && backref->found_inode_ref) {
2668 ret = add_missing_dir_index(root, inode_cache, rec,
2669 backref);
2670 if (ret)
2671 break;
2672 repaired++;
2673 if (backref->found_dir_item &&
2674 backref->found_dir_index &&
2675 backref->found_dir_index) {
2676 if (!backref->errors &&
2677 backref->found_inode_ref) {
2678 list_del(&backref->list);
2679 free(backref);
2684 if (!delete && (!backref->found_dir_index &&
2685 !backref->found_dir_item &&
2686 backref->found_inode_ref)) {
2687 struct btrfs_trans_handle *trans;
2688 struct btrfs_key location;
2690 ret = check_dir_conflict(root, backref->name,
2691 backref->namelen,
2692 backref->dir,
2693 backref->index);
2694 if (ret) {
2696 * let nlink fixing routine to handle it,
2697 * which can do it better.
2699 ret = 0;
2700 break;
2702 location.objectid = rec->ino;
2703 location.type = BTRFS_INODE_ITEM_KEY;
2704 location.offset = 0;
2706 trans = btrfs_start_transaction(root, 1);
2707 if (IS_ERR(trans)) {
2708 ret = PTR_ERR(trans);
2709 break;
2711 fprintf(stderr, "adding missing dir index/item pair "
2712 "for inode %llu\n",
2713 (unsigned long long)rec->ino);
2714 ret = btrfs_insert_dir_item(trans, root, backref->name,
2715 backref->namelen,
2716 backref->dir, &location,
2717 imode_to_type(rec->imode),
2718 backref->index);
2719 BUG_ON(ret);
2720 btrfs_commit_transaction(trans, root);
2721 repaired++;
2724 if (!delete && (backref->found_inode_ref &&
2725 backref->found_dir_index &&
2726 backref->found_dir_item &&
2727 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728 !rec->found_inode_item)) {
2729 ret = create_inode_item(root, rec, 0);
2730 if (ret)
2731 break;
2732 repaired++;
2736 return ret ? ret : repaired;
2740 * To determine the file type for nlink/inode_item repair
2742 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743 * Return -ENOENT if file type is not found.
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2747 struct inode_backref *backref;
2749 /* For inode item recovered case */
2750 if (rec->found_inode_item) {
2751 *type = imode_to_type(rec->imode);
2752 return 0;
2755 list_for_each_entry(backref, &rec->backrefs, list) {
2756 if (backref->found_dir_index || backref->found_dir_item) {
2757 *type = backref->filetype;
2758 return 0;
2761 return -ENOENT;
2765 * To determine the file name for nlink repair
2767 * Return 0 if file name is found, set name and namelen.
2768 * Return -ENOENT if file name is not found.
2770 static int find_file_name(struct inode_record *rec,
2771 char *name, int *namelen)
2773 struct inode_backref *backref;
2775 list_for_each_entry(backref, &rec->backrefs, list) {
2776 if (backref->found_dir_index || backref->found_dir_item ||
2777 backref->found_inode_ref) {
2778 memcpy(name, backref->name, backref->namelen);
2779 *namelen = backref->namelen;
2780 return 0;
2783 return -ENOENT;
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788 struct btrfs_root *root,
2789 struct btrfs_path *path,
2790 struct inode_record *rec)
2792 struct inode_backref *backref;
2793 struct inode_backref *tmp;
2794 struct btrfs_key key;
2795 struct btrfs_inode_item *inode_item;
2796 int ret = 0;
2798 /* We don't believe this either, reset it and iterate backref */
2799 rec->found_link = 0;
2801 /* Remove all backref including the valid ones */
2802 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804 backref->index, backref->name,
2805 backref->namelen, 0);
2806 if (ret < 0)
2807 goto out;
2809 /* remove invalid backref, so it won't be added back */
2810 if (!(backref->found_dir_index &&
2811 backref->found_dir_item &&
2812 backref->found_inode_ref)) {
2813 list_del(&backref->list);
2814 free(backref);
2815 } else {
2816 rec->found_link++;
2820 /* Set nlink to 0 */
2821 key.objectid = rec->ino;
2822 key.type = BTRFS_INODE_ITEM_KEY;
2823 key.offset = 0;
2824 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825 if (ret < 0)
2826 goto out;
2827 if (ret > 0) {
2828 ret = -ENOENT;
2829 goto out;
2831 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832 struct btrfs_inode_item);
2833 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834 btrfs_mark_buffer_dirty(path->nodes[0]);
2835 btrfs_release_path(path);
2838 * Add back valid inode_ref/dir_item/dir_index,
2839 * add_link() will handle the nlink inc, so new nlink must be correct
2841 list_for_each_entry(backref, &rec->backrefs, list) {
2842 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843 backref->name, backref->namelen,
2844 backref->filetype, &backref->index, 1);
2845 if (ret < 0)
2846 goto out;
2848 out:
2849 btrfs_release_path(path);
2850 return ret;
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854 struct btrfs_root *root,
2855 struct btrfs_path *path,
2856 u64 *highest_ino)
2858 struct btrfs_key key, found_key;
2859 int ret;
2861 btrfs_init_path(path);
2862 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863 key.offset = -1;
2864 key.type = BTRFS_INODE_ITEM_KEY;
2865 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866 if (ret == 1) {
2867 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868 path->slots[0] - 1);
2869 *highest_ino = found_key.objectid;
2870 ret = 0;
2872 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873 ret = -EOVERFLOW;
2874 btrfs_release_path(path);
2875 return ret;
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879 struct btrfs_root *root,
2880 struct btrfs_path *path,
2881 struct inode_record *rec)
2883 char *dir_name = "lost+found";
2884 char namebuf[BTRFS_NAME_LEN] = {0};
2885 u64 lost_found_ino;
2886 u32 mode = 0700;
2887 u8 type = 0;
2888 int namelen = 0;
2889 int name_recovered = 0;
2890 int type_recovered = 0;
2891 int ret = 0;
2894 * Get file name and type first before these invalid inode ref
2895 * are deleted by remove_all_invalid_backref()
2897 name_recovered = !find_file_name(rec, namebuf, &namelen);
2898 type_recovered = !find_file_type(rec, &type);
2900 if (!name_recovered) {
2901 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902 rec->ino, rec->ino);
2903 namelen = count_digits(rec->ino);
2904 sprintf(namebuf, "%llu", rec->ino);
2905 name_recovered = 1;
2907 if (!type_recovered) {
2908 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909 rec->ino);
2910 type = BTRFS_FT_REG_FILE;
2911 type_recovered = 1;
2914 ret = reset_nlink(trans, root, path, rec);
2915 if (ret < 0) {
2916 fprintf(stderr,
2917 "Failed to reset nlink for inode %llu: %s\n",
2918 rec->ino, strerror(-ret));
2919 goto out;
2922 if (rec->found_link == 0) {
2923 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924 if (ret < 0)
2925 goto out;
2926 lost_found_ino++;
2927 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929 mode);
2930 if (ret < 0) {
2931 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932 dir_name, strerror(-ret));
2933 goto out;
2935 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936 namebuf, namelen, type, NULL, 1);
2938 * Add ".INO" suffix several times to handle case where
2939 * "FILENAME.INO" is already taken by another file.
2941 while (ret == -EEXIST) {
2943 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2945 if (namelen + count_digits(rec->ino) + 1 >
2946 BTRFS_NAME_LEN) {
2947 ret = -EFBIG;
2948 goto out;
2950 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951 ".%llu", rec->ino);
2952 namelen += count_digits(rec->ino) + 1;
2953 ret = btrfs_add_link(trans, root, rec->ino,
2954 lost_found_ino, namebuf,
2955 namelen, type, NULL, 1);
2957 if (ret < 0) {
2958 fprintf(stderr,
2959 "Failed to link the inode %llu to %s dir: %s\n",
2960 rec->ino, dir_name, strerror(-ret));
2961 goto out;
2964 * Just increase the found_link, don't actually add the
2965 * backref. This will make things easier and this inode
2966 * record will be freed after the repair is done.
2967 * So fsck will not report problem about this inode.
2969 rec->found_link++;
2970 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971 namelen, namebuf, dir_name);
2973 printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2976 * Clear the flag anyway, or we will loop forever for the same inode
2977 * as it will not be removed from the bad inode list and the dead loop
2978 * happens.
2980 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981 btrfs_release_path(path);
2982 return ret;
2986 * Check if there is any normal(reg or prealloc) file extent for given
2987 * ino.
2988 * This is used to determine the file type when neither its dir_index/item or
2989 * inode_item exists.
2991 * This will *NOT* report error, if any error happens, just consider it does
2992 * not have any normal file extent.
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2996 struct btrfs_path path;
2997 struct btrfs_key key;
2998 struct btrfs_key found_key;
2999 struct btrfs_file_extent_item *fi;
3000 u8 type;
3001 int ret = 0;
3003 btrfs_init_path(&path);
3004 key.objectid = ino;
3005 key.type = BTRFS_EXTENT_DATA_KEY;
3006 key.offset = 0;
3008 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009 if (ret < 0) {
3010 ret = 0;
3011 goto out;
3013 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014 ret = btrfs_next_leaf(root, &path);
3015 if (ret) {
3016 ret = 0;
3017 goto out;
3020 while (1) {
3021 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022 path.slots[0]);
3023 if (found_key.objectid != ino ||
3024 found_key.type != BTRFS_EXTENT_DATA_KEY)
3025 break;
3026 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027 struct btrfs_file_extent_item);
3028 type = btrfs_file_extent_type(path.nodes[0], fi);
3029 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030 ret = 1;
3031 goto out;
3034 out:
3035 btrfs_release_path(&path);
3036 return ret;
3039 static u32 btrfs_type_to_imode(u8 type)
3041 static u32 imode_by_btrfs_type[] = {
3042 [BTRFS_FT_REG_FILE] = S_IFREG,
3043 [BTRFS_FT_DIR] = S_IFDIR,
3044 [BTRFS_FT_CHRDEV] = S_IFCHR,
3045 [BTRFS_FT_BLKDEV] = S_IFBLK,
3046 [BTRFS_FT_FIFO] = S_IFIFO,
3047 [BTRFS_FT_SOCK] = S_IFSOCK,
3048 [BTRFS_FT_SYMLINK] = S_IFLNK,
3051 return imode_by_btrfs_type[(type)];
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055 struct btrfs_root *root,
3056 struct btrfs_path *path,
3057 struct inode_record *rec)
3059 u8 filetype;
3060 u32 mode = 0700;
3061 int type_recovered = 0;
3062 int ret = 0;
3064 printf("Trying to rebuild inode:%llu\n", rec->ino);
3066 type_recovered = !find_file_type(rec, &filetype);
3069 * Try to determine inode type if type not found.
3071 * For found regular file extent, it must be FILE.
3072 * For found dir_item/index, it must be DIR.
3074 * For undetermined one, use FILE as fallback.
3076 * TODO:
3077 * 1. If found backref(inode_index/item is already handled) to it,
3078 * it must be DIR.
3079 * Need new inode-inode ref structure to allow search for that.
3081 if (!type_recovered) {
3082 if (rec->found_file_extent &&
3083 find_normal_file_extent(root, rec->ino)) {
3084 type_recovered = 1;
3085 filetype = BTRFS_FT_REG_FILE;
3086 } else if (rec->found_dir_item) {
3087 type_recovered = 1;
3088 filetype = BTRFS_FT_DIR;
3089 } else if (!list_empty(&rec->orphan_extents)) {
3090 type_recovered = 1;
3091 filetype = BTRFS_FT_REG_FILE;
3092 } else{
3093 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094 rec->ino);
3095 type_recovered = 1;
3096 filetype = BTRFS_FT_REG_FILE;
3100 ret = btrfs_new_inode(trans, root, rec->ino,
3101 mode | btrfs_type_to_imode(filetype));
3102 if (ret < 0)
3103 goto out;
3106 * Here inode rebuild is done, we only rebuild the inode item,
3107 * don't repair the nlink(like move to lost+found).
3108 * That is the job of nlink repair.
3110 * We just fill the record and return
3112 rec->found_dir_item = 1;
3113 rec->imode = mode | btrfs_type_to_imode(filetype);
3114 rec->nlink = 0;
3115 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116 /* Ensure the inode_nlinks repair function will be called */
3117 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119 return ret;
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123 struct btrfs_root *root,
3124 struct btrfs_path *path,
3125 struct inode_record *rec)
3127 struct orphan_data_extent *orphan;
3128 struct orphan_data_extent *tmp;
3129 int ret = 0;
3131 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3133 * Check for conflicting file extents
3135 * Here we don't know whether the extents is compressed or not,
3136 * so we can only assume it not compressed nor data offset,
3137 * and use its disk_len as extent length.
3139 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140 orphan->offset, orphan->disk_len, 0);
3141 btrfs_release_path(path);
3142 if (ret < 0)
3143 goto out;
3144 if (!ret) {
3145 fprintf(stderr,
3146 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147 orphan->disk_bytenr, orphan->disk_len);
3148 ret = btrfs_free_extent(trans,
3149 root->fs_info->extent_root,
3150 orphan->disk_bytenr, orphan->disk_len,
3151 0, root->objectid, orphan->objectid,
3152 orphan->offset);
3153 if (ret < 0)
3154 goto out;
3156 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157 orphan->offset, orphan->disk_bytenr,
3158 orphan->disk_len, orphan->disk_len);
3159 if (ret < 0)
3160 goto out;
3162 /* Update file size info */
3163 rec->found_size += orphan->disk_len;
3164 if (rec->found_size == rec->nbytes)
3165 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3167 /* Update the file extent hole info too */
3168 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169 orphan->disk_len);
3170 if (ret < 0)
3171 goto out;
3172 if (RB_EMPTY_ROOT(&rec->holes))
3173 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3175 list_del(&orphan->list);
3176 free(orphan);
3178 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180 return ret;
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184 struct btrfs_root *root,
3185 struct btrfs_path *path,
3186 struct inode_record *rec)
3188 struct rb_node *node;
3189 struct file_extent_hole *hole;
3190 int found = 0;
3191 int ret = 0;
3193 node = rb_first(&rec->holes);
3195 while (node) {
3196 found = 1;
3197 hole = rb_entry(node, struct file_extent_hole, node);
3198 ret = btrfs_punch_hole(trans, root, rec->ino,
3199 hole->start, hole->len);
3200 if (ret < 0)
3201 goto out;
3202 ret = del_file_extent_hole(&rec->holes, hole->start,
3203 hole->len);
3204 if (ret < 0)
3205 goto out;
3206 if (RB_EMPTY_ROOT(&rec->holes))
3207 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208 node = rb_first(&rec->holes);
3210 /* special case for a file losing all its file extent */
3211 if (!found) {
3212 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213 round_up(rec->isize, root->sectorsize));
3214 if (ret < 0)
3215 goto out;
3217 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218 rec->ino, root->objectid);
3219 out:
3220 return ret;
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3225 struct btrfs_trans_handle *trans;
3226 struct btrfs_path path;
3227 int ret = 0;
3229 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230 I_ERR_NO_ORPHAN_ITEM |
3231 I_ERR_LINK_COUNT_WRONG |
3232 I_ERR_NO_INODE_ITEM |
3233 I_ERR_FILE_EXTENT_ORPHAN |
3234 I_ERR_FILE_EXTENT_DISCOUNT|
3235 I_ERR_FILE_NBYTES_WRONG)))
3236 return rec->errors;
3239 * For nlink repair, it may create a dir and add link, so
3240 * 2 for parent(256)'s dir_index and dir_item
3241 * 2 for lost+found dir's inode_item and inode_ref
3242 * 1 for the new inode_ref of the file
3243 * 2 for lost+found dir's dir_index and dir_item for the file
3245 trans = btrfs_start_transaction(root, 7);
3246 if (IS_ERR(trans))
3247 return PTR_ERR(trans);
3249 btrfs_init_path(&path);
3250 if (rec->errors & I_ERR_NO_INODE_ITEM)
3251 ret = repair_inode_no_item(trans, root, &path, rec);
3252 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257 ret = repair_inode_isize(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261 ret = repair_inode_nlinks(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263 ret = repair_inode_nbytes(trans, root, &path, rec);
3264 btrfs_commit_transaction(trans, root);
3265 btrfs_release_path(&path);
3266 return ret;
3269 static int check_inode_recs(struct btrfs_root *root,
3270 struct cache_tree *inode_cache)
3272 struct cache_extent *cache;
3273 struct ptr_node *node;
3274 struct inode_record *rec;
3275 struct inode_backref *backref;
3276 int stage = 0;
3277 int ret = 0;
3278 int err = 0;
3279 u64 error = 0;
3280 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3282 if (btrfs_root_refs(&root->root_item) == 0) {
3283 if (!cache_tree_empty(inode_cache))
3284 fprintf(stderr, "warning line %d\n", __LINE__);
3285 return 0;
3289 * We need to repair backrefs first because we could change some of the
3290 * errors in the inode recs.
3292 * We also need to go through and delete invalid backrefs first and then
3293 * add the correct ones second. We do this because we may get EEXIST
3294 * when adding back the correct index because we hadn't yet deleted the
3295 * invalid index.
3297 * For example, if we were missing a dir index then the directories
3298 * isize would be wrong, so if we fixed the isize to what we thought it
3299 * would be and then fixed the backref we'd still have a invalid fs, so
3300 * we need to add back the dir index and then check to see if the isize
3301 * is still wrong.
3303 while (stage < 3) {
3304 stage++;
3305 if (stage == 3 && !err)
3306 break;
3308 cache = search_cache_extent(inode_cache, 0);
3309 while (repair && cache) {
3310 node = container_of(cache, struct ptr_node, cache);
3311 rec = node->data;
3312 cache = next_cache_extent(cache);
3314 /* Need to free everything up and rescan */
3315 if (stage == 3) {
3316 remove_cache_extent(inode_cache, &node->cache);
3317 free(node);
3318 free_inode_rec(rec);
3319 continue;
3322 if (list_empty(&rec->backrefs))
3323 continue;
3325 ret = repair_inode_backrefs(root, rec, inode_cache,
3326 stage == 1);
3327 if (ret < 0) {
3328 err = ret;
3329 stage = 2;
3330 break;
3331 } if (ret > 0) {
3332 err = -EAGAIN;
3336 if (err)
3337 return err;
3339 rec = get_inode_rec(inode_cache, root_dirid, 0);
3340 BUG_ON(IS_ERR(rec));
3341 if (rec) {
3342 ret = check_root_dir(rec);
3343 if (ret) {
3344 fprintf(stderr, "root %llu root dir %llu error\n",
3345 (unsigned long long)root->root_key.objectid,
3346 (unsigned long long)root_dirid);
3347 print_inode_error(root, rec);
3348 error++;
3350 } else {
3351 if (repair) {
3352 struct btrfs_trans_handle *trans;
3354 trans = btrfs_start_transaction(root, 1);
3355 if (IS_ERR(trans)) {
3356 err = PTR_ERR(trans);
3357 return err;
3360 fprintf(stderr,
3361 "root %llu missing its root dir, recreating\n",
3362 (unsigned long long)root->objectid);
3364 ret = btrfs_make_root_dir(trans, root, root_dirid);
3365 BUG_ON(ret);
3367 btrfs_commit_transaction(trans, root);
3368 return -EAGAIN;
3371 fprintf(stderr, "root %llu root dir %llu not found\n",
3372 (unsigned long long)root->root_key.objectid,
3373 (unsigned long long)root_dirid);
3376 while (1) {
3377 cache = search_cache_extent(inode_cache, 0);
3378 if (!cache)
3379 break;
3380 node = container_of(cache, struct ptr_node, cache);
3381 rec = node->data;
3382 remove_cache_extent(inode_cache, &node->cache);
3383 free(node);
3384 if (rec->ino == root_dirid ||
3385 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386 free_inode_rec(rec);
3387 continue;
3390 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391 ret = check_orphan_item(root, rec->ino);
3392 if (ret == 0)
3393 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394 if (can_free_inode_rec(rec)) {
3395 free_inode_rec(rec);
3396 continue;
3400 if (!rec->found_inode_item)
3401 rec->errors |= I_ERR_NO_INODE_ITEM;
3402 if (rec->found_link != rec->nlink)
3403 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404 if (repair) {
3405 ret = try_repair_inode(root, rec);
3406 if (ret == 0 && can_free_inode_rec(rec)) {
3407 free_inode_rec(rec);
3408 continue;
3410 ret = 0;
3413 if (!(repair && ret == 0))
3414 error++;
3415 print_inode_error(root, rec);
3416 list_for_each_entry(backref, &rec->backrefs, list) {
3417 if (!backref->found_dir_item)
3418 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419 if (!backref->found_dir_index)
3420 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421 if (!backref->found_inode_ref)
3422 backref->errors |= REF_ERR_NO_INODE_REF;
3423 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424 " namelen %u name %s filetype %d errors %x",
3425 (unsigned long long)backref->dir,
3426 (unsigned long long)backref->index,
3427 backref->namelen, backref->name,
3428 backref->filetype, backref->errors);
3429 print_ref_error(backref->errors);
3431 free_inode_rec(rec);
3433 return (error > 0) ? -1 : 0;
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437 u64 objectid)
3439 struct cache_extent *cache;
3440 struct root_record *rec = NULL;
3441 int ret;
3443 cache = lookup_cache_extent(root_cache, objectid, 1);
3444 if (cache) {
3445 rec = container_of(cache, struct root_record, cache);
3446 } else {
3447 rec = calloc(1, sizeof(*rec));
3448 if (!rec)
3449 return ERR_PTR(-ENOMEM);
3450 rec->objectid = objectid;
3451 INIT_LIST_HEAD(&rec->backrefs);
3452 rec->cache.start = objectid;
3453 rec->cache.size = 1;
3455 ret = insert_cache_extent(root_cache, &rec->cache);
3456 if (ret)
3457 return ERR_PTR(-EEXIST);
3459 return rec;
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463 u64 ref_root, u64 dir, u64 index,
3464 const char *name, int namelen)
3466 struct root_backref *backref;
3468 list_for_each_entry(backref, &rec->backrefs, list) {
3469 if (backref->ref_root != ref_root || backref->dir != dir ||
3470 backref->namelen != namelen)
3471 continue;
3472 if (memcmp(name, backref->name, namelen))
3473 continue;
3474 return backref;
3477 backref = calloc(1, sizeof(*backref) + namelen + 1);
3478 if (!backref)
3479 return NULL;
3480 backref->ref_root = ref_root;
3481 backref->dir = dir;
3482 backref->index = index;
3483 backref->namelen = namelen;
3484 memcpy(backref->name, name, namelen);
3485 backref->name[namelen] = '\0';
3486 list_add_tail(&backref->list, &rec->backrefs);
3487 return backref;
3490 static void free_root_record(struct cache_extent *cache)
3492 struct root_record *rec;
3493 struct root_backref *backref;
3495 rec = container_of(cache, struct root_record, cache);
3496 while (!list_empty(&rec->backrefs)) {
3497 backref = to_root_backref(rec->backrefs.next);
3498 list_del(&backref->list);
3499 free(backref);
3502 free(rec);
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3507 static int add_root_backref(struct cache_tree *root_cache,
3508 u64 root_id, u64 ref_root, u64 dir, u64 index,
3509 const char *name, int namelen,
3510 int item_type, int errors)
3512 struct root_record *rec;
3513 struct root_backref *backref;
3515 rec = get_root_rec(root_cache, root_id);
3516 BUG_ON(IS_ERR(rec));
3517 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518 BUG_ON(!backref);
3520 backref->errors |= errors;
3522 if (item_type != BTRFS_DIR_ITEM_KEY) {
3523 if (backref->found_dir_index || backref->found_back_ref ||
3524 backref->found_forward_ref) {
3525 if (backref->index != index)
3526 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527 } else {
3528 backref->index = index;
3532 if (item_type == BTRFS_DIR_ITEM_KEY) {
3533 if (backref->found_forward_ref)
3534 rec->found_ref++;
3535 backref->found_dir_item = 1;
3536 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537 backref->found_dir_index = 1;
3538 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539 if (backref->found_forward_ref)
3540 backref->errors |= REF_ERR_DUP_ROOT_REF;
3541 else if (backref->found_dir_item)
3542 rec->found_ref++;
3543 backref->found_forward_ref = 1;
3544 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545 if (backref->found_back_ref)
3546 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547 backref->found_back_ref = 1;
3548 } else {
3549 BUG_ON(1);
3552 if (backref->found_forward_ref && backref->found_dir_item)
3553 backref->reachable = 1;
3554 return 0;
3557 static int merge_root_recs(struct btrfs_root *root,
3558 struct cache_tree *src_cache,
3559 struct cache_tree *dst_cache)
3561 struct cache_extent *cache;
3562 struct ptr_node *node;
3563 struct inode_record *rec;
3564 struct inode_backref *backref;
3565 int ret = 0;
3567 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568 free_inode_recs_tree(src_cache);
3569 return 0;
3572 while (1) {
3573 cache = search_cache_extent(src_cache, 0);
3574 if (!cache)
3575 break;
3576 node = container_of(cache, struct ptr_node, cache);
3577 rec = node->data;
3578 remove_cache_extent(src_cache, &node->cache);
3579 free(node);
3581 ret = is_child_root(root, root->objectid, rec->ino);
3582 if (ret < 0)
3583 break;
3584 else if (ret == 0)
3585 goto skip;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 BUG_ON(backref->found_inode_ref);
3589 if (backref->found_dir_item)
3590 add_root_backref(dst_cache, rec->ino,
3591 root->root_key.objectid, backref->dir,
3592 backref->index, backref->name,
3593 backref->namelen, BTRFS_DIR_ITEM_KEY,
3594 backref->errors);
3595 if (backref->found_dir_index)
3596 add_root_backref(dst_cache, rec->ino,
3597 root->root_key.objectid, backref->dir,
3598 backref->index, backref->name,
3599 backref->namelen, BTRFS_DIR_INDEX_KEY,
3600 backref->errors);
3602 skip:
3603 free_inode_rec(rec);
3605 if (ret < 0)
3606 return ret;
3607 return 0;
3610 static int check_root_refs(struct btrfs_root *root,
3611 struct cache_tree *root_cache)
3613 struct root_record *rec;
3614 struct root_record *ref_root;
3615 struct root_backref *backref;
3616 struct cache_extent *cache;
3617 int loop = 1;
3618 int ret;
3619 int error;
3620 int errors = 0;
3622 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623 BUG_ON(IS_ERR(rec));
3624 rec->found_ref = 1;
3626 /* fixme: this can not detect circular references */
3627 while (loop) {
3628 loop = 0;
3629 cache = search_cache_extent(root_cache, 0);
3630 while (1) {
3631 if (!cache)
3632 break;
3633 rec = container_of(cache, struct root_record, cache);
3634 cache = next_cache_extent(cache);
3636 if (rec->found_ref == 0)
3637 continue;
3639 list_for_each_entry(backref, &rec->backrefs, list) {
3640 if (!backref->reachable)
3641 continue;
3643 ref_root = get_root_rec(root_cache,
3644 backref->ref_root);
3645 BUG_ON(IS_ERR(ref_root));
3646 if (ref_root->found_ref > 0)
3647 continue;
3649 backref->reachable = 0;
3650 rec->found_ref--;
3651 if (rec->found_ref == 0)
3652 loop = 1;
3657 cache = search_cache_extent(root_cache, 0);
3658 while (1) {
3659 if (!cache)
3660 break;
3661 rec = container_of(cache, struct root_record, cache);
3662 cache = next_cache_extent(cache);
3664 if (rec->found_ref == 0 &&
3665 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667 ret = check_orphan_item(root->fs_info->tree_root,
3668 rec->objectid);
3669 if (ret == 0)
3670 continue;
3673 * If we don't have a root item then we likely just have
3674 * a dir item in a snapshot for this root but no actual
3675 * ref key or anything so it's meaningless.
3677 if (!rec->found_root_item)
3678 continue;
3679 errors++;
3680 fprintf(stderr, "fs tree %llu not referenced\n",
3681 (unsigned long long)rec->objectid);
3684 error = 0;
3685 if (rec->found_ref > 0 && !rec->found_root_item)
3686 error = 1;
3687 list_for_each_entry(backref, &rec->backrefs, list) {
3688 if (!backref->found_dir_item)
3689 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690 if (!backref->found_dir_index)
3691 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692 if (!backref->found_back_ref)
3693 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694 if (!backref->found_forward_ref)
3695 backref->errors |= REF_ERR_NO_ROOT_REF;
3696 if (backref->reachable && backref->errors)
3697 error = 1;
3699 if (!error)
3700 continue;
3702 errors++;
3703 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704 (unsigned long long)rec->objectid, rec->found_ref,
3705 rec->found_root_item ? "" : "not found");
3707 list_for_each_entry(backref, &rec->backrefs, list) {
3708 if (!backref->reachable)
3709 continue;
3710 if (!backref->errors && rec->found_root_item)
3711 continue;
3712 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713 " index %llu namelen %u name %s errors %x\n",
3714 (unsigned long long)backref->ref_root,
3715 (unsigned long long)backref->dir,
3716 (unsigned long long)backref->index,
3717 backref->namelen, backref->name,
3718 backref->errors);
3719 print_ref_error(backref->errors);
3722 return errors > 0 ? 1 : 0;
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726 struct btrfs_key *key,
3727 struct cache_tree *root_cache)
3729 u64 dirid;
3730 u64 index;
3731 u32 len;
3732 u32 name_len;
3733 struct btrfs_root_ref *ref;
3734 char namebuf[BTRFS_NAME_LEN];
3735 int error;
3737 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3739 dirid = btrfs_root_ref_dirid(eb, ref);
3740 index = btrfs_root_ref_sequence(eb, ref);
3741 name_len = btrfs_root_ref_name_len(eb, ref);
3743 if (name_len <= BTRFS_NAME_LEN) {
3744 len = name_len;
3745 error = 0;
3746 } else {
3747 len = BTRFS_NAME_LEN;
3748 error = REF_ERR_NAME_TOO_LONG;
3750 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3752 if (key->type == BTRFS_ROOT_REF_KEY) {
3753 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754 index, namebuf, len, key->type, error);
3755 } else {
3756 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757 index, namebuf, len, key->type, error);
3759 return 0;
3762 static void free_corrupt_block(struct cache_extent *cache)
3764 struct btrfs_corrupt_block *corrupt;
3766 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767 free(corrupt);
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3773 * Repair the btree of the given root.
3775 * The fix is to remove the node key in corrupt_blocks cache_tree.
3776 * and rebalance the tree.
3777 * After the fix, the btree should be writeable.
3779 static int repair_btree(struct btrfs_root *root,
3780 struct cache_tree *corrupt_blocks)
3782 struct btrfs_trans_handle *trans;
3783 struct btrfs_path path;
3784 struct btrfs_corrupt_block *corrupt;
3785 struct cache_extent *cache;
3786 struct btrfs_key key;
3787 u64 offset;
3788 int level;
3789 int ret = 0;
3791 if (cache_tree_empty(corrupt_blocks))
3792 return 0;
3794 trans = btrfs_start_transaction(root, 1);
3795 if (IS_ERR(trans)) {
3796 ret = PTR_ERR(trans);
3797 fprintf(stderr, "Error starting transaction: %s\n",
3798 strerror(-ret));
3799 return ret;
3801 btrfs_init_path(&path);
3802 cache = first_cache_extent(corrupt_blocks);
3803 while (cache) {
3804 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805 cache);
3806 level = corrupt->level;
3807 path.lowest_level = level;
3808 key.objectid = corrupt->key.objectid;
3809 key.type = corrupt->key.type;
3810 key.offset = corrupt->key.offset;
3813 * Here we don't want to do any tree balance, since it may
3814 * cause a balance with corrupted brother leaf/node,
3815 * so ins_len set to 0 here.
3816 * Balance will be done after all corrupt node/leaf is deleted.
3818 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819 if (ret < 0)
3820 goto out;
3821 offset = btrfs_node_blockptr(path.nodes[level],
3822 path.slots[level]);
3824 /* Remove the ptr */
3825 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826 if (ret < 0)
3827 goto out;
3829 * Remove the corresponding extent
3830 * return value is not concerned.
3832 btrfs_release_path(&path);
3833 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834 0, root->root_key.objectid,
3835 level - 1, 0);
3836 cache = next_cache_extent(cache);
3839 /* Balance the btree using btrfs_search_slot() */
3840 cache = first_cache_extent(corrupt_blocks);
3841 while (cache) {
3842 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843 cache);
3844 memcpy(&key, &corrupt->key, sizeof(key));
3845 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846 if (ret < 0)
3847 goto out;
3848 /* return will always >0 since it won't find the item */
3849 ret = 0;
3850 btrfs_release_path(&path);
3851 cache = next_cache_extent(cache);
3853 out:
3854 btrfs_commit_transaction(trans, root);
3855 btrfs_release_path(&path);
3856 return ret;
3859 static int check_fs_root(struct btrfs_root *root,
3860 struct cache_tree *root_cache,
3861 struct walk_control *wc)
3863 int ret = 0;
3864 int err = 0;
3865 int wret;
3866 int level;
3867 struct btrfs_path path;
3868 struct shared_node root_node;
3869 struct root_record *rec;
3870 struct btrfs_root_item *root_item = &root->root_item;
3871 struct cache_tree corrupt_blocks;
3872 struct orphan_data_extent *orphan;
3873 struct orphan_data_extent *tmp;
3874 enum btrfs_tree_block_status status;
3875 struct node_refs nrefs;
3878 * Reuse the corrupt_block cache tree to record corrupted tree block
3880 * Unlike the usage in extent tree check, here we do it in a per
3881 * fs/subvol tree base.
3883 cache_tree_init(&corrupt_blocks);
3884 root->fs_info->corrupt_blocks = &corrupt_blocks;
3886 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887 rec = get_root_rec(root_cache, root->root_key.objectid);
3888 BUG_ON(IS_ERR(rec));
3889 if (btrfs_root_refs(root_item) > 0)
3890 rec->found_root_item = 1;
3893 btrfs_init_path(&path);
3894 memset(&root_node, 0, sizeof(root_node));
3895 cache_tree_init(&root_node.root_cache);
3896 cache_tree_init(&root_node.inode_cache);
3897 memset(&nrefs, 0, sizeof(nrefs));
3899 /* Move the orphan extent record to corresponding inode_record */
3900 list_for_each_entry_safe(orphan, tmp,
3901 &root->orphan_data_extents, list) {
3902 struct inode_record *inode;
3904 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3906 BUG_ON(IS_ERR(inode));
3907 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908 list_move(&orphan->list, &inode->orphan_extents);
3911 level = btrfs_header_level(root->node);
3912 memset(wc->nodes, 0, sizeof(wc->nodes));
3913 wc->nodes[level] = &root_node;
3914 wc->active_node = level;
3915 wc->root_level = level;
3917 /* We may not have checked the root block, lets do that now */
3918 if (btrfs_is_leaf(root->node))
3919 status = btrfs_check_leaf(root, NULL, root->node);
3920 else
3921 status = btrfs_check_node(root, NULL, root->node);
3922 if (status != BTRFS_TREE_BLOCK_CLEAN)
3923 return -EIO;
3925 if (btrfs_root_refs(root_item) > 0 ||
3926 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927 path.nodes[level] = root->node;
3928 extent_buffer_get(root->node);
3929 path.slots[level] = 0;
3930 } else {
3931 struct btrfs_key key;
3932 struct btrfs_disk_key found_key;
3934 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935 level = root_item->drop_level;
3936 path.lowest_level = level;
3937 if (level > btrfs_header_level(root->node) ||
3938 level >= BTRFS_MAX_LEVEL) {
3939 error("ignoring invalid drop level: %u", level);
3940 goto skip_walking;
3942 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943 if (wret < 0)
3944 goto skip_walking;
3945 btrfs_node_key(path.nodes[level], &found_key,
3946 path.slots[level]);
3947 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948 sizeof(found_key)));
3951 while (1) {
3952 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953 if (wret < 0)
3954 ret = wret;
3955 if (wret != 0)
3956 break;
3958 wret = walk_up_tree(root, &path, wc, &level);
3959 if (wret < 0)
3960 ret = wret;
3961 if (wret != 0)
3962 break;
3964 skip_walking:
3965 btrfs_release_path(&path);
3967 if (!cache_tree_empty(&corrupt_blocks)) {
3968 struct cache_extent *cache;
3969 struct btrfs_corrupt_block *corrupt;
3971 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972 root->root_key.objectid);
3973 cache = first_cache_extent(&corrupt_blocks);
3974 while (cache) {
3975 corrupt = container_of(cache,
3976 struct btrfs_corrupt_block,
3977 cache);
3978 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979 cache->start, corrupt->level,
3980 corrupt->key.objectid, corrupt->key.type,
3981 corrupt->key.offset);
3982 cache = next_cache_extent(cache);
3984 if (repair) {
3985 printf("Try to repair the btree for root %llu\n",
3986 root->root_key.objectid);
3987 ret = repair_btree(root, &corrupt_blocks);
3988 if (ret < 0)
3989 fprintf(stderr, "Failed to repair btree: %s\n",
3990 strerror(-ret));
3991 if (!ret)
3992 printf("Btree for root %llu is fixed\n",
3993 root->root_key.objectid);
3997 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998 if (err < 0)
3999 ret = err;
4001 if (root_node.current) {
4002 root_node.current->checked = 1;
4003 maybe_free_inode_rec(&root_node.inode_cache,
4004 root_node.current);
4007 err = check_inode_recs(root, &root_node.inode_cache);
4008 if (!ret)
4009 ret = err;
4011 free_corrupt_blocks_tree(&corrupt_blocks);
4012 root->fs_info->corrupt_blocks = NULL;
4013 free_orphan_data_extents(&root->orphan_data_extents);
4014 return ret;
4017 static int fs_root_objectid(u64 objectid)
4019 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021 return 1;
4022 return is_fstree(objectid);
4025 static int check_fs_roots(struct btrfs_root *root,
4026 struct cache_tree *root_cache)
4028 struct btrfs_path path;
4029 struct btrfs_key key;
4030 struct walk_control wc;
4031 struct extent_buffer *leaf, *tree_node;
4032 struct btrfs_root *tmp_root;
4033 struct btrfs_root *tree_root = root->fs_info->tree_root;
4034 int ret;
4035 int err = 0;
4037 if (ctx.progress_enabled) {
4038 ctx.tp = TASK_FS_ROOTS;
4039 task_start(ctx.info);
4043 * Just in case we made any changes to the extent tree that weren't
4044 * reflected into the free space cache yet.
4046 if (repair)
4047 reset_cached_block_groups(root->fs_info);
4048 memset(&wc, 0, sizeof(wc));
4049 cache_tree_init(&wc.shared);
4050 btrfs_init_path(&path);
4052 again:
4053 key.offset = 0;
4054 key.objectid = 0;
4055 key.type = BTRFS_ROOT_ITEM_KEY;
4056 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057 if (ret < 0) {
4058 err = 1;
4059 goto out;
4061 tree_node = tree_root->node;
4062 while (1) {
4063 if (tree_node != tree_root->node) {
4064 free_root_recs_tree(root_cache);
4065 btrfs_release_path(&path);
4066 goto again;
4068 leaf = path.nodes[0];
4069 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070 ret = btrfs_next_leaf(tree_root, &path);
4071 if (ret) {
4072 if (ret < 0)
4073 err = 1;
4074 break;
4076 leaf = path.nodes[0];
4078 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080 fs_root_objectid(key.objectid)) {
4081 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082 tmp_root = btrfs_read_fs_root_no_cache(
4083 root->fs_info, &key);
4084 } else {
4085 key.offset = (u64)-1;
4086 tmp_root = btrfs_read_fs_root(
4087 root->fs_info, &key);
4089 if (IS_ERR(tmp_root)) {
4090 err = 1;
4091 goto next;
4093 ret = check_fs_root(tmp_root, root_cache, &wc);
4094 if (ret == -EAGAIN) {
4095 free_root_recs_tree(root_cache);
4096 btrfs_release_path(&path);
4097 goto again;
4099 if (ret)
4100 err = 1;
4101 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102 btrfs_free_fs_root(tmp_root);
4103 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104 key.type == BTRFS_ROOT_BACKREF_KEY) {
4105 process_root_ref(leaf, path.slots[0], &key,
4106 root_cache);
4108 next:
4109 path.slots[0]++;
4111 out:
4112 btrfs_release_path(&path);
4113 if (err)
4114 free_extent_cache_tree(&wc.shared);
4115 if (!cache_tree_empty(&wc.shared))
4116 fprintf(stderr, "warning line %d\n", __LINE__);
4118 task_stop(ctx.info);
4120 return err;
4124 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125 * INODE_REF/INODE_EXTREF match.
4127 * @root: the root of the fs/file tree
4128 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4129 * @key: the key of the DIR_ITEM/DIR_INDEX
4130 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4131 * distinguish root_dir between normal dir/file
4132 * @name: the name in the INODE_REF/INODE_EXTREF
4133 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4134 * @mode: the st_mode of INODE_ITEM
4136 * Return 0 if no error occurred.
4137 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139 * dir/file.
4140 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141 * not match for normal dir/file.
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144 struct btrfs_key *key, u64 index, char *name,
4145 u32 namelen, u32 mode)
4147 struct btrfs_path path;
4148 struct extent_buffer *node;
4149 struct btrfs_dir_item *di;
4150 struct btrfs_key location;
4151 char namebuf[BTRFS_NAME_LEN] = {0};
4152 u32 total;
4153 u32 cur = 0;
4154 u32 len;
4155 u32 name_len;
4156 u32 data_len;
4157 u8 filetype;
4158 int slot;
4159 int ret;
4161 btrfs_init_path(&path);
4162 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163 if (ret < 0) {
4164 ret = DIR_ITEM_MISSING;
4165 goto out;
4168 /* Process root dir and goto out*/
4169 if (index == 0) {
4170 if (ret == 0) {
4171 ret = ROOT_DIR_ERROR;
4172 error(
4173 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174 root->objectid,
4175 ref_key->type == BTRFS_INODE_REF_KEY ?
4176 "REF" : "EXTREF",
4177 ref_key->objectid, ref_key->offset,
4178 key->type == BTRFS_DIR_ITEM_KEY ?
4179 "DIR_ITEM" : "DIR_INDEX");
4180 } else {
4181 ret = 0;
4184 goto out;
4187 /* Process normal file/dir */
4188 if (ret > 0) {
4189 ret = DIR_ITEM_MISSING;
4190 error(
4191 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192 root->objectid,
4193 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194 ref_key->objectid, ref_key->offset,
4195 key->type == BTRFS_DIR_ITEM_KEY ?
4196 "DIR_ITEM" : "DIR_INDEX",
4197 key->objectid, key->offset, namelen, name,
4198 imode_to_type(mode));
4199 goto out;
4202 /* Check whether inode_id/filetype/name match */
4203 node = path.nodes[0];
4204 slot = path.slots[0];
4205 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206 total = btrfs_item_size_nr(node, slot);
4207 while (cur < total) {
4208 ret = DIR_ITEM_MISMATCH;
4209 name_len = btrfs_dir_name_len(node, di);
4210 data_len = btrfs_dir_data_len(node, di);
4212 btrfs_dir_item_key_to_cpu(node, di, &location);
4213 if (location.objectid != ref_key->objectid ||
4214 location.type != BTRFS_INODE_ITEM_KEY ||
4215 location.offset != 0)
4216 goto next;
4218 filetype = btrfs_dir_type(node, di);
4219 if (imode_to_type(mode) != filetype)
4220 goto next;
4222 if (name_len <= BTRFS_NAME_LEN) {
4223 len = name_len;
4224 } else {
4225 len = BTRFS_NAME_LEN;
4226 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227 root->objectid,
4228 key->type == BTRFS_DIR_ITEM_KEY ?
4229 "DIR_ITEM" : "DIR_INDEX",
4230 key->objectid, key->offset, name_len);
4232 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233 if (len != namelen || strncmp(namebuf, name, len))
4234 goto next;
4236 ret = 0;
4237 goto out;
4238 next:
4239 len = sizeof(*di) + name_len + data_len;
4240 di = (struct btrfs_dir_item *)((char *)di + len);
4241 cur += len;
4243 if (ret == DIR_ITEM_MISMATCH)
4244 error(
4245 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246 root->objectid,
4247 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248 ref_key->objectid, ref_key->offset,
4249 key->type == BTRFS_DIR_ITEM_KEY ?
4250 "DIR_ITEM" : "DIR_INDEX",
4251 key->objectid, key->offset, namelen, name,
4252 imode_to_type(mode));
4253 out:
4254 btrfs_release_path(&path);
4255 return ret;
4259 * Traverse the given INODE_REF and call find_dir_item() to find related
4260 * DIR_ITEM/DIR_INDEX.
4262 * @root: the root of the fs/file tree
4263 * @ref_key: the key of the INODE_REF
4264 * @refs: the count of INODE_REF
4265 * @mode: the st_mode of INODE_ITEM
4267 * Return 0 if no error occurred.
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270 struct extent_buffer *node, int slot, u64 *refs,
4271 int mode)
4273 struct btrfs_key key;
4274 struct btrfs_inode_ref *ref;
4275 char namebuf[BTRFS_NAME_LEN] = {0};
4276 u32 total;
4277 u32 cur = 0;
4278 u32 len;
4279 u32 name_len;
4280 u64 index;
4281 int ret, err = 0;
4283 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284 total = btrfs_item_size_nr(node, slot);
4286 next:
4287 /* Update inode ref count */
4288 (*refs)++;
4290 index = btrfs_inode_ref_index(node, ref);
4291 name_len = btrfs_inode_ref_name_len(node, ref);
4292 if (name_len <= BTRFS_NAME_LEN) {
4293 len = name_len;
4294 } else {
4295 len = BTRFS_NAME_LEN;
4296 warning("root %llu INODE_REF[%llu %llu] name too long",
4297 root->objectid, ref_key->objectid, ref_key->offset);
4300 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4302 /* Check root dir ref name */
4303 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305 root->objectid, ref_key->objectid, ref_key->offset,
4306 namebuf);
4307 err |= ROOT_DIR_ERROR;
4310 /* Find related DIR_INDEX */
4311 key.objectid = ref_key->offset;
4312 key.type = BTRFS_DIR_INDEX_KEY;
4313 key.offset = index;
4314 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315 err |= ret;
4317 /* Find related dir_item */
4318 key.objectid = ref_key->offset;
4319 key.type = BTRFS_DIR_ITEM_KEY;
4320 key.offset = btrfs_name_hash(namebuf, len);
4321 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322 err |= ret;
4324 len = sizeof(*ref) + name_len;
4325 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326 cur += len;
4327 if (cur < total)
4328 goto next;
4330 return err;
4334 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335 * DIR_ITEM/DIR_INDEX.
4337 * @root: the root of the fs/file tree
4338 * @ref_key: the key of the INODE_EXTREF
4339 * @refs: the count of INODE_EXTREF
4340 * @mode: the st_mode of INODE_ITEM
4342 * Return 0 if no error occurred.
4344 static int check_inode_extref(struct btrfs_root *root,
4345 struct btrfs_key *ref_key,
4346 struct extent_buffer *node, int slot, u64 *refs,
4347 int mode)
4349 struct btrfs_key key;
4350 struct btrfs_inode_extref *extref;
4351 char namebuf[BTRFS_NAME_LEN] = {0};
4352 u32 total;
4353 u32 cur = 0;
4354 u32 len;
4355 u32 name_len;
4356 u64 index;
4357 u64 parent;
4358 int ret;
4359 int err = 0;
4361 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362 total = btrfs_item_size_nr(node, slot);
4364 next:
4365 /* update inode ref count */
4366 (*refs)++;
4367 name_len = btrfs_inode_extref_name_len(node, extref);
4368 index = btrfs_inode_extref_index(node, extref);
4369 parent = btrfs_inode_extref_parent(node, extref);
4370 if (name_len <= BTRFS_NAME_LEN) {
4371 len = name_len;
4372 } else {
4373 len = BTRFS_NAME_LEN;
4374 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375 root->objectid, ref_key->objectid, ref_key->offset);
4377 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4379 /* Check root dir ref name */
4380 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382 root->objectid, ref_key->objectid, ref_key->offset,
4383 namebuf);
4384 err |= ROOT_DIR_ERROR;
4387 /* find related dir_index */
4388 key.objectid = parent;
4389 key.type = BTRFS_DIR_INDEX_KEY;
4390 key.offset = index;
4391 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392 err |= ret;
4394 /* find related dir_item */
4395 key.objectid = parent;
4396 key.type = BTRFS_DIR_ITEM_KEY;
4397 key.offset = btrfs_name_hash(namebuf, len);
4398 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399 err |= ret;
4401 len = sizeof(*extref) + name_len;
4402 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403 cur += len;
4405 if (cur < total)
4406 goto next;
4408 return err;
4412 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413 * DIR_ITEM/DIR_INDEX match.
4415 * @root: the root of the fs/file tree
4416 * @key: the key of the INODE_REF/INODE_EXTREF
4417 * @name: the name in the INODE_REF/INODE_EXTREF
4418 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4419 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420 * to (u64)-1
4421 * @ext_ref: the EXTENDED_IREF feature
4423 * Return 0 if no error occurred.
4424 * Return >0 for error bitmap
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427 char *name, int namelen, u64 index,
4428 unsigned int ext_ref)
4430 struct btrfs_path path;
4431 struct btrfs_inode_ref *ref;
4432 struct btrfs_inode_extref *extref;
4433 struct extent_buffer *node;
4434 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435 u32 total;
4436 u32 cur = 0;
4437 u32 len;
4438 u32 ref_namelen;
4439 u64 ref_index;
4440 u64 parent;
4441 u64 dir_id;
4442 int slot;
4443 int ret;
4445 btrfs_init_path(&path);
4446 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447 if (ret) {
4448 ret = INODE_REF_MISSING;
4449 goto extref;
4452 node = path.nodes[0];
4453 slot = path.slots[0];
4455 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456 total = btrfs_item_size_nr(node, slot);
4458 /* Iterate all entry of INODE_REF */
4459 while (cur < total) {
4460 ret = INODE_REF_MISSING;
4462 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463 ref_index = btrfs_inode_ref_index(node, ref);
4464 if (index != (u64)-1 && index != ref_index)
4465 goto next_ref;
4467 if (ref_namelen <= BTRFS_NAME_LEN) {
4468 len = ref_namelen;
4469 } else {
4470 len = BTRFS_NAME_LEN;
4471 warning("root %llu INODE %s[%llu %llu] name too long",
4472 root->objectid,
4473 key->type == BTRFS_INODE_REF_KEY ?
4474 "REF" : "EXTREF",
4475 key->objectid, key->offset);
4477 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478 len);
4480 if (len != namelen || strncmp(ref_namebuf, name, len))
4481 goto next_ref;
4483 ret = 0;
4484 goto out;
4485 next_ref:
4486 len = sizeof(*ref) + ref_namelen;
4487 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488 cur += len;
4491 extref:
4492 /* Skip if not support EXTENDED_IREF feature */
4493 if (!ext_ref)
4494 goto out;
4496 btrfs_release_path(&path);
4497 btrfs_init_path(&path);
4499 dir_id = key->offset;
4500 key->type = BTRFS_INODE_EXTREF_KEY;
4501 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4503 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504 if (ret) {
4505 ret = INODE_REF_MISSING;
4506 goto out;
4509 node = path.nodes[0];
4510 slot = path.slots[0];
4512 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513 cur = 0;
4514 total = btrfs_item_size_nr(node, slot);
4516 /* Iterate all entry of INODE_EXTREF */
4517 while (cur < total) {
4518 ret = INODE_REF_MISSING;
4520 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521 ref_index = btrfs_inode_extref_index(node, extref);
4522 parent = btrfs_inode_extref_parent(node, extref);
4523 if (index != (u64)-1 && index != ref_index)
4524 goto next_extref;
4526 if (parent != dir_id)
4527 goto next_extref;
4529 if (ref_namelen <= BTRFS_NAME_LEN) {
4530 len = ref_namelen;
4531 } else {
4532 len = BTRFS_NAME_LEN;
4533 warning("root %llu INODE %s[%llu %llu] name too long",
4534 root->objectid,
4535 key->type == BTRFS_INODE_REF_KEY ?
4536 "REF" : "EXTREF",
4537 key->objectid, key->offset);
4539 read_extent_buffer(node, ref_namebuf,
4540 (unsigned long)(extref + 1), len);
4542 if (len != namelen || strncmp(ref_namebuf, name, len))
4543 goto next_extref;
4545 ret = 0;
4546 goto out;
4548 next_extref:
4549 len = sizeof(*extref) + ref_namelen;
4550 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551 cur += len;
4554 out:
4555 btrfs_release_path(&path);
4556 return ret;
4560 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4563 * @root: the root of the fs/file tree
4564 * @key: the key of the INODE_REF/INODE_EXTREF
4565 * @size: the st_size of the INODE_ITEM
4566 * @ext_ref: the EXTENDED_IREF feature
4568 * Return 0 if no error occurred.
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571 struct extent_buffer *node, int slot, u64 *size,
4572 unsigned int ext_ref)
4574 struct btrfs_dir_item *di;
4575 struct btrfs_inode_item *ii;
4576 struct btrfs_path path;
4577 struct btrfs_key location;
4578 char namebuf[BTRFS_NAME_LEN] = {0};
4579 u32 total;
4580 u32 cur = 0;
4581 u32 len;
4582 u32 name_len;
4583 u32 data_len;
4584 u8 filetype;
4585 u32 mode;
4586 u64 index;
4587 int ret;
4588 int err = 0;
4591 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592 * ignore index check.
4594 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4596 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597 total = btrfs_item_size_nr(node, slot);
4599 while (cur < total) {
4600 data_len = btrfs_dir_data_len(node, di);
4601 if (data_len)
4602 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604 "DIR_ITEM" : "DIR_INDEX",
4605 key->objectid, key->offset, data_len);
4607 name_len = btrfs_dir_name_len(node, di);
4608 if (name_len <= BTRFS_NAME_LEN) {
4609 len = name_len;
4610 } else {
4611 len = BTRFS_NAME_LEN;
4612 warning("root %llu %s[%llu %llu] name too long",
4613 root->objectid,
4614 key->type == BTRFS_DIR_ITEM_KEY ?
4615 "DIR_ITEM" : "DIR_INDEX",
4616 key->objectid, key->offset);
4618 (*size) += name_len;
4620 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621 filetype = btrfs_dir_type(node, di);
4623 btrfs_init_path(&path);
4624 btrfs_dir_item_key_to_cpu(node, di, &location);
4626 /* Ignore related ROOT_ITEM check */
4627 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628 goto next;
4630 /* Check relative INODE_ITEM(existence/filetype) */
4631 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632 if (ret) {
4633 err |= INODE_ITEM_MISSING;
4634 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637 key->offset, location.objectid, name_len,
4638 namebuf, filetype);
4639 goto next;
4642 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643 struct btrfs_inode_item);
4644 mode = btrfs_inode_mode(path.nodes[0], ii);
4646 if (imode_to_type(mode) != filetype) {
4647 err |= INODE_ITEM_MISMATCH;
4648 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651 key->offset, name_len, namebuf, filetype);
4654 /* Check relative INODE_REF/INODE_EXTREF */
4655 location.type = BTRFS_INODE_REF_KEY;
4656 location.offset = key->objectid;
4657 ret = find_inode_ref(root, &location, namebuf, len,
4658 index, ext_ref);
4659 err |= ret;
4660 if (ret & INODE_REF_MISSING)
4661 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664 key->offset, name_len, namebuf, filetype);
4666 next:
4667 btrfs_release_path(&path);
4668 len = sizeof(*di) + name_len + data_len;
4669 di = (struct btrfs_dir_item *)((char *)di + len);
4670 cur += len;
4672 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674 root->objectid, key->objectid, key->offset);
4675 break;
4679 return err;
4683 * Check file extent datasum/hole, update the size of the file extents,
4684 * check and update the last offset of the file extent.
4686 * @root: the root of fs/file tree.
4687 * @fkey: the key of the file extent.
4688 * @nodatasum: INODE_NODATASUM feature.
4689 * @size: the sum of all EXTENT_DATA items size for this inode.
4690 * @end: the offset of the last extent.
4692 * Return 0 if no error occurred.
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695 struct extent_buffer *node, int slot,
4696 unsigned int nodatasum, u64 *size, u64 *end)
4698 struct btrfs_file_extent_item *fi;
4699 u64 disk_bytenr;
4700 u64 disk_num_bytes;
4701 u64 extent_num_bytes;
4702 u64 extent_offset;
4703 u64 csum_found; /* In byte size, sectorsize aligned */
4704 u64 search_start; /* Logical range start we search for csum */
4705 u64 search_len; /* Logical range len we search for csum */
4706 unsigned int extent_type;
4707 unsigned int is_hole;
4708 int compressed = 0;
4709 int ret;
4710 int err = 0;
4712 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4714 /* Check inline extent */
4715 extent_type = btrfs_file_extent_type(node, fi);
4716 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717 struct btrfs_item *e = btrfs_item_nr(slot);
4718 u32 item_inline_len;
4720 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722 compressed = btrfs_file_extent_compression(node, fi);
4723 if (extent_num_bytes == 0) {
4724 error(
4725 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4726 root->objectid, fkey->objectid, fkey->offset);
4727 err |= FILE_EXTENT_ERROR;
4729 if (!compressed && extent_num_bytes != item_inline_len) {
4730 error(
4731 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4732 root->objectid, fkey->objectid, fkey->offset,
4733 extent_num_bytes, item_inline_len);
4734 err |= FILE_EXTENT_ERROR;
4736 *size += extent_num_bytes;
4737 return err;
4740 /* Check extent type */
4741 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4742 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4743 err |= FILE_EXTENT_ERROR;
4744 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4745 root->objectid, fkey->objectid, fkey->offset);
4746 return err;
4749 /* Check REG_EXTENT/PREALLOC_EXTENT */
4750 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4751 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4752 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4753 extent_offset = btrfs_file_extent_offset(node, fi);
4754 compressed = btrfs_file_extent_compression(node, fi);
4755 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4758 * Check EXTENT_DATA csum
4760 * For plain (uncompressed) extent, we should only check the range
4761 * we're referring to, as it's possible that part of prealloc extent
4762 * has been written, and has csum:
4764 * |<--- Original large preallocated extent A ---->|
4765 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4766 * No csum Has csum
4768 * For compressed extent, we should check the whole range.
4770 if (!compressed) {
4771 search_start = disk_bytenr + extent_offset;
4772 search_len = extent_num_bytes;
4773 } else {
4774 search_start = disk_bytenr;
4775 search_len = disk_num_bytes;
4777 ret = count_csum_range(root, search_start, search_len, &csum_found);
4778 if (csum_found > 0 && nodatasum) {
4779 err |= ODD_CSUM_ITEM;
4780 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4781 root->objectid, fkey->objectid, fkey->offset);
4782 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4783 !is_hole && (ret < 0 || csum_found < search_len)) {
4784 err |= CSUM_ITEM_MISSING;
4785 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4786 root->objectid, fkey->objectid, fkey->offset,
4787 csum_found, search_len);
4788 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4789 err |= ODD_CSUM_ITEM;
4790 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4791 root->objectid, fkey->objectid, fkey->offset, csum_found);
4794 /* Check EXTENT_DATA hole */
4795 if (no_holes && is_hole) {
4796 err |= FILE_EXTENT_ERROR;
4797 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4798 root->objectid, fkey->objectid, fkey->offset);
4799 } else if (!no_holes && *end != fkey->offset) {
4800 err |= FILE_EXTENT_ERROR;
4801 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4802 root->objectid, fkey->objectid, fkey->offset);
4805 *end += extent_num_bytes;
4806 if (!is_hole)
4807 *size += extent_num_bytes;
4809 return err;
4813 * Check INODE_ITEM and related ITEMs (the same inode number)
4814 * 1. check link count
4815 * 2. check inode ref/extref
4816 * 3. check dir item/index
4818 * @ext_ref: the EXTENDED_IREF feature
4820 * Return 0 if no error occurred.
4821 * Return >0 for error or hit the traversal is done(by error bitmap)
4823 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4824 unsigned int ext_ref)
4826 struct extent_buffer *node;
4827 struct btrfs_inode_item *ii;
4828 struct btrfs_key key;
4829 u64 inode_id;
4830 u32 mode;
4831 u64 nlink;
4832 u64 nbytes;
4833 u64 isize;
4834 u64 size = 0;
4835 u64 refs = 0;
4836 u64 extent_end = 0;
4837 u64 extent_size = 0;
4838 unsigned int dir;
4839 unsigned int nodatasum;
4840 int slot;
4841 int ret;
4842 int err = 0;
4844 node = path->nodes[0];
4845 slot = path->slots[0];
4847 btrfs_item_key_to_cpu(node, &key, slot);
4848 inode_id = key.objectid;
4850 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4851 ret = btrfs_next_item(root, path);
4852 if (ret > 0)
4853 err |= LAST_ITEM;
4854 return err;
4857 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4858 isize = btrfs_inode_size(node, ii);
4859 nbytes = btrfs_inode_nbytes(node, ii);
4860 mode = btrfs_inode_mode(node, ii);
4861 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4862 nlink = btrfs_inode_nlink(node, ii);
4863 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4865 while (1) {
4866 ret = btrfs_next_item(root, path);
4867 if (ret < 0) {
4868 /* out will fill 'err' rusing current statistics */
4869 goto out;
4870 } else if (ret > 0) {
4871 err |= LAST_ITEM;
4872 goto out;
4875 node = path->nodes[0];
4876 slot = path->slots[0];
4877 btrfs_item_key_to_cpu(node, &key, slot);
4878 if (key.objectid != inode_id)
4879 goto out;
4881 switch (key.type) {
4882 case BTRFS_INODE_REF_KEY:
4883 ret = check_inode_ref(root, &key, node, slot, &refs,
4884 mode);
4885 err |= ret;
4886 break;
4887 case BTRFS_INODE_EXTREF_KEY:
4888 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4889 warning("root %llu EXTREF[%llu %llu] isn't supported",
4890 root->objectid, key.objectid,
4891 key.offset);
4892 ret = check_inode_extref(root, &key, node, slot, &refs,
4893 mode);
4894 err |= ret;
4895 break;
4896 case BTRFS_DIR_ITEM_KEY:
4897 case BTRFS_DIR_INDEX_KEY:
4898 if (!dir) {
4899 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4900 root->objectid, inode_id,
4901 imode_to_type(mode), key.objectid,
4902 key.offset);
4904 ret = check_dir_item(root, &key, node, slot, &size,
4905 ext_ref);
4906 err |= ret;
4907 break;
4908 case BTRFS_EXTENT_DATA_KEY:
4909 if (dir) {
4910 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4911 root->objectid, inode_id, key.objectid,
4912 key.offset);
4914 ret = check_file_extent(root, &key, node, slot,
4915 nodatasum, &extent_size,
4916 &extent_end);
4917 err |= ret;
4918 break;
4919 case BTRFS_XATTR_ITEM_KEY:
4920 break;
4921 default:
4922 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4923 key.objectid, key.type, key.offset);
4927 out:
4928 /* verify INODE_ITEM nlink/isize/nbytes */
4929 if (dir) {
4930 if (nlink != 1) {
4931 err |= LINK_COUNT_ERROR;
4932 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4933 root->objectid, inode_id, nlink);
4937 * Just a warning, as dir inode nbytes is just an
4938 * instructive value.
4940 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4941 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4942 root->objectid, inode_id, root->nodesize);
4945 if (isize != size) {
4946 err |= ISIZE_ERROR;
4947 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4948 root->objectid, inode_id, isize, size);
4950 } else {
4951 if (nlink != refs) {
4952 err |= LINK_COUNT_ERROR;
4953 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4954 root->objectid, inode_id, nlink, refs);
4955 } else if (!nlink) {
4956 err |= ORPHAN_ITEM;
4959 if (!nbytes && !no_holes && extent_end < isize) {
4960 err |= NBYTES_ERROR;
4961 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4962 root->objectid, inode_id, isize);
4965 if (nbytes != extent_size) {
4966 err |= NBYTES_ERROR;
4967 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4968 root->objectid, inode_id, nbytes, extent_size);
4972 return err;
4975 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4977 struct btrfs_path path;
4978 struct btrfs_key key;
4979 int err = 0;
4980 int ret;
4982 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4983 key.type = BTRFS_INODE_ITEM_KEY;
4984 key.offset = 0;
4986 /* For root being dropped, we don't need to check first inode */
4987 if (btrfs_root_refs(&root->root_item) == 0 &&
4988 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4989 key.objectid)
4990 return 0;
4992 btrfs_init_path(&path);
4994 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4995 if (ret < 0)
4996 goto out;
4997 if (ret > 0) {
4998 ret = 0;
4999 err |= INODE_ITEM_MISSING;
5002 err |= check_inode_item(root, &path, ext_ref);
5003 err &= ~LAST_ITEM;
5004 if (err && !ret)
5005 ret = -EIO;
5006 out:
5007 btrfs_release_path(&path);
5008 return ret;
5012 * Iterate all item on the tree and call check_inode_item() to check.
5014 * @root: the root of the tree to be checked.
5015 * @ext_ref: the EXTENDED_IREF feature
5017 * Return 0 if no error found.
5018 * Return <0 for error.
5020 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5022 struct btrfs_path path;
5023 struct node_refs nrefs;
5024 struct btrfs_root_item *root_item = &root->root_item;
5025 int ret, wret;
5026 int level;
5029 * We need to manually check the first inode item(256)
5030 * As the following traversal function will only start from
5031 * the first inode item in the leaf, if inode item(256) is missing
5032 * we will just skip it forever.
5034 ret = check_fs_first_inode(root, ext_ref);
5035 if (ret < 0)
5036 return ret;
5038 memset(&nrefs, 0, sizeof(nrefs));
5039 level = btrfs_header_level(root->node);
5040 btrfs_init_path(&path);
5042 if (btrfs_root_refs(root_item) > 0 ||
5043 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5044 path.nodes[level] = root->node;
5045 path.slots[level] = 0;
5046 extent_buffer_get(root->node);
5047 } else {
5048 struct btrfs_key key;
5050 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5051 level = root_item->drop_level;
5052 path.lowest_level = level;
5053 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5054 if (ret < 0)
5055 goto out;
5056 ret = 0;
5059 while (1) {
5060 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5061 if (wret < 0)
5062 ret = wret;
5063 if (wret != 0)
5064 break;
5066 wret = walk_up_tree_v2(root, &path, &level);
5067 if (wret < 0)
5068 ret = wret;
5069 if (wret != 0)
5070 break;
5073 out:
5074 btrfs_release_path(&path);
5075 return ret;
5079 * Find the relative ref for root_ref and root_backref.
5081 * @root: the root of the root tree.
5082 * @ref_key: the key of the root ref.
5084 * Return 0 if no error occurred.
5086 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5087 struct extent_buffer *node, int slot)
5089 struct btrfs_path path;
5090 struct btrfs_key key;
5091 struct btrfs_root_ref *ref;
5092 struct btrfs_root_ref *backref;
5093 char ref_name[BTRFS_NAME_LEN] = {0};
5094 char backref_name[BTRFS_NAME_LEN] = {0};
5095 u64 ref_dirid;
5096 u64 ref_seq;
5097 u32 ref_namelen;
5098 u64 backref_dirid;
5099 u64 backref_seq;
5100 u32 backref_namelen;
5101 u32 len;
5102 int ret;
5103 int err = 0;
5105 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5106 ref_dirid = btrfs_root_ref_dirid(node, ref);
5107 ref_seq = btrfs_root_ref_sequence(node, ref);
5108 ref_namelen = btrfs_root_ref_name_len(node, ref);
5110 if (ref_namelen <= BTRFS_NAME_LEN) {
5111 len = ref_namelen;
5112 } else {
5113 len = BTRFS_NAME_LEN;
5114 warning("%s[%llu %llu] ref_name too long",
5115 ref_key->type == BTRFS_ROOT_REF_KEY ?
5116 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5117 ref_key->offset);
5119 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5121 /* Find relative root_ref */
5122 key.objectid = ref_key->offset;
5123 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5124 key.offset = ref_key->objectid;
5126 btrfs_init_path(&path);
5127 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5128 if (ret) {
5129 err |= ROOT_REF_MISSING;
5130 error("%s[%llu %llu] couldn't find relative ref",
5131 ref_key->type == BTRFS_ROOT_REF_KEY ?
5132 "ROOT_REF" : "ROOT_BACKREF",
5133 ref_key->objectid, ref_key->offset);
5134 goto out;
5137 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5138 struct btrfs_root_ref);
5139 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5140 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5141 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5143 if (backref_namelen <= BTRFS_NAME_LEN) {
5144 len = backref_namelen;
5145 } else {
5146 len = BTRFS_NAME_LEN;
5147 warning("%s[%llu %llu] ref_name too long",
5148 key.type == BTRFS_ROOT_REF_KEY ?
5149 "ROOT_REF" : "ROOT_BACKREF",
5150 key.objectid, key.offset);
5152 read_extent_buffer(path.nodes[0], backref_name,
5153 (unsigned long)(backref + 1), len);
5155 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5156 ref_namelen != backref_namelen ||
5157 strncmp(ref_name, backref_name, len)) {
5158 err |= ROOT_REF_MISMATCH;
5159 error("%s[%llu %llu] mismatch relative ref",
5160 ref_key->type == BTRFS_ROOT_REF_KEY ?
5161 "ROOT_REF" : "ROOT_BACKREF",
5162 ref_key->objectid, ref_key->offset);
5164 out:
5165 btrfs_release_path(&path);
5166 return err;
5170 * Check all fs/file tree in low_memory mode.
5172 * 1. for fs tree root item, call check_fs_root_v2()
5173 * 2. for fs tree root ref/backref, call check_root_ref()
5175 * Return 0 if no error occurred.
5177 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5179 struct btrfs_root *tree_root = fs_info->tree_root;
5180 struct btrfs_root *cur_root = NULL;
5181 struct btrfs_path path;
5182 struct btrfs_key key;
5183 struct extent_buffer *node;
5184 unsigned int ext_ref;
5185 int slot;
5186 int ret;
5187 int err = 0;
5189 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5191 btrfs_init_path(&path);
5192 key.objectid = BTRFS_FS_TREE_OBJECTID;
5193 key.offset = 0;
5194 key.type = BTRFS_ROOT_ITEM_KEY;
5196 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5197 if (ret < 0) {
5198 err = ret;
5199 goto out;
5200 } else if (ret > 0) {
5201 err = -ENOENT;
5202 goto out;
5205 while (1) {
5206 node = path.nodes[0];
5207 slot = path.slots[0];
5208 btrfs_item_key_to_cpu(node, &key, slot);
5209 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5210 goto out;
5211 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5212 fs_root_objectid(key.objectid)) {
5213 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5214 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5215 &key);
5216 } else {
5217 key.offset = (u64)-1;
5218 cur_root = btrfs_read_fs_root(fs_info, &key);
5221 if (IS_ERR(cur_root)) {
5222 error("Fail to read fs/subvol tree: %lld",
5223 key.objectid);
5224 err = -EIO;
5225 goto next;
5228 ret = check_fs_root_v2(cur_root, ext_ref);
5229 err |= ret;
5231 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5232 btrfs_free_fs_root(cur_root);
5233 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5234 key.type == BTRFS_ROOT_BACKREF_KEY) {
5235 ret = check_root_ref(tree_root, &key, node, slot);
5236 err |= ret;
5238 next:
5239 ret = btrfs_next_item(tree_root, &path);
5240 if (ret > 0)
5241 goto out;
5242 if (ret < 0) {
5243 err = ret;
5244 goto out;
5248 out:
5249 btrfs_release_path(&path);
5250 return err;
5253 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5255 struct list_head *cur = rec->backrefs.next;
5256 struct extent_backref *back;
5257 struct tree_backref *tback;
5258 struct data_backref *dback;
5259 u64 found = 0;
5260 int err = 0;
5262 while(cur != &rec->backrefs) {
5263 back = to_extent_backref(cur);
5264 cur = cur->next;
5265 if (!back->found_extent_tree) {
5266 err = 1;
5267 if (!print_errs)
5268 goto out;
5269 if (back->is_data) {
5270 dback = to_data_backref(back);
5271 fprintf(stderr, "Backref %llu %s %llu"
5272 " owner %llu offset %llu num_refs %lu"
5273 " not found in extent tree\n",
5274 (unsigned long long)rec->start,
5275 back->full_backref ?
5276 "parent" : "root",
5277 back->full_backref ?
5278 (unsigned long long)dback->parent:
5279 (unsigned long long)dback->root,
5280 (unsigned long long)dback->owner,
5281 (unsigned long long)dback->offset,
5282 (unsigned long)dback->num_refs);
5283 } else {
5284 tback = to_tree_backref(back);
5285 fprintf(stderr, "Backref %llu parent %llu"
5286 " root %llu not found in extent tree\n",
5287 (unsigned long long)rec->start,
5288 (unsigned long long)tback->parent,
5289 (unsigned long long)tback->root);
5292 if (!back->is_data && !back->found_ref) {
5293 err = 1;
5294 if (!print_errs)
5295 goto out;
5296 tback = to_tree_backref(back);
5297 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5298 (unsigned long long)rec->start,
5299 back->full_backref ? "parent" : "root",
5300 back->full_backref ?
5301 (unsigned long long)tback->parent :
5302 (unsigned long long)tback->root, back);
5304 if (back->is_data) {
5305 dback = to_data_backref(back);
5306 if (dback->found_ref != dback->num_refs) {
5307 err = 1;
5308 if (!print_errs)
5309 goto out;
5310 fprintf(stderr, "Incorrect local backref count"
5311 " on %llu %s %llu owner %llu"
5312 " offset %llu found %u wanted %u back %p\n",
5313 (unsigned long long)rec->start,
5314 back->full_backref ?
5315 "parent" : "root",
5316 back->full_backref ?
5317 (unsigned long long)dback->parent:
5318 (unsigned long long)dback->root,
5319 (unsigned long long)dback->owner,
5320 (unsigned long long)dback->offset,
5321 dback->found_ref, dback->num_refs, back);
5323 if (dback->disk_bytenr != rec->start) {
5324 err = 1;
5325 if (!print_errs)
5326 goto out;
5327 fprintf(stderr, "Backref disk bytenr does not"
5328 " match extent record, bytenr=%llu, "
5329 "ref bytenr=%llu\n",
5330 (unsigned long long)rec->start,
5331 (unsigned long long)dback->disk_bytenr);
5334 if (dback->bytes != rec->nr) {
5335 err = 1;
5336 if (!print_errs)
5337 goto out;
5338 fprintf(stderr, "Backref bytes do not match "
5339 "extent backref, bytenr=%llu, ref "
5340 "bytes=%llu, backref bytes=%llu\n",
5341 (unsigned long long)rec->start,
5342 (unsigned long long)rec->nr,
5343 (unsigned long long)dback->bytes);
5346 if (!back->is_data) {
5347 found += 1;
5348 } else {
5349 dback = to_data_backref(back);
5350 found += dback->found_ref;
5353 if (found != rec->refs) {
5354 err = 1;
5355 if (!print_errs)
5356 goto out;
5357 fprintf(stderr, "Incorrect global backref count "
5358 "on %llu found %llu wanted %llu\n",
5359 (unsigned long long)rec->start,
5360 (unsigned long long)found,
5361 (unsigned long long)rec->refs);
5363 out:
5364 return err;
5367 static int free_all_extent_backrefs(struct extent_record *rec)
5369 struct extent_backref *back;
5370 struct list_head *cur;
5371 while (!list_empty(&rec->backrefs)) {
5372 cur = rec->backrefs.next;
5373 back = to_extent_backref(cur);
5374 list_del(cur);
5375 free(back);
5377 return 0;
5380 static void free_extent_record_cache(struct cache_tree *extent_cache)
5382 struct cache_extent *cache;
5383 struct extent_record *rec;
5385 while (1) {
5386 cache = first_cache_extent(extent_cache);
5387 if (!cache)
5388 break;
5389 rec = container_of(cache, struct extent_record, cache);
5390 remove_cache_extent(extent_cache, cache);
5391 free_all_extent_backrefs(rec);
5392 free(rec);
5396 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5397 struct extent_record *rec)
5399 if (rec->content_checked && rec->owner_ref_checked &&
5400 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5401 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5402 !rec->bad_full_backref && !rec->crossing_stripes &&
5403 !rec->wrong_chunk_type) {
5404 remove_cache_extent(extent_cache, &rec->cache);
5405 free_all_extent_backrefs(rec);
5406 list_del_init(&rec->list);
5407 free(rec);
5409 return 0;
5412 static int check_owner_ref(struct btrfs_root *root,
5413 struct extent_record *rec,
5414 struct extent_buffer *buf)
5416 struct extent_backref *node;
5417 struct tree_backref *back;
5418 struct btrfs_root *ref_root;
5419 struct btrfs_key key;
5420 struct btrfs_path path;
5421 struct extent_buffer *parent;
5422 int level;
5423 int found = 0;
5424 int ret;
5426 list_for_each_entry(node, &rec->backrefs, list) {
5427 if (node->is_data)
5428 continue;
5429 if (!node->found_ref)
5430 continue;
5431 if (node->full_backref)
5432 continue;
5433 back = to_tree_backref(node);
5434 if (btrfs_header_owner(buf) == back->root)
5435 return 0;
5437 BUG_ON(rec->is_root);
5439 /* try to find the block by search corresponding fs tree */
5440 key.objectid = btrfs_header_owner(buf);
5441 key.type = BTRFS_ROOT_ITEM_KEY;
5442 key.offset = (u64)-1;
5444 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5445 if (IS_ERR(ref_root))
5446 return 1;
5448 level = btrfs_header_level(buf);
5449 if (level == 0)
5450 btrfs_item_key_to_cpu(buf, &key, 0);
5451 else
5452 btrfs_node_key_to_cpu(buf, &key, 0);
5454 btrfs_init_path(&path);
5455 path.lowest_level = level + 1;
5456 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5457 if (ret < 0)
5458 return 0;
5460 parent = path.nodes[level + 1];
5461 if (parent && buf->start == btrfs_node_blockptr(parent,
5462 path.slots[level + 1]))
5463 found = 1;
5465 btrfs_release_path(&path);
5466 return found ? 0 : 1;
5469 static int is_extent_tree_record(struct extent_record *rec)
5471 struct list_head *cur = rec->backrefs.next;
5472 struct extent_backref *node;
5473 struct tree_backref *back;
5474 int is_extent = 0;
5476 while(cur != &rec->backrefs) {
5477 node = to_extent_backref(cur);
5478 cur = cur->next;
5479 if (node->is_data)
5480 return 0;
5481 back = to_tree_backref(node);
5482 if (node->full_backref)
5483 return 0;
5484 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5485 is_extent = 1;
5487 return is_extent;
5491 static int record_bad_block_io(struct btrfs_fs_info *info,
5492 struct cache_tree *extent_cache,
5493 u64 start, u64 len)
5495 struct extent_record *rec;
5496 struct cache_extent *cache;
5497 struct btrfs_key key;
5499 cache = lookup_cache_extent(extent_cache, start, len);
5500 if (!cache)
5501 return 0;
5503 rec = container_of(cache, struct extent_record, cache);
5504 if (!is_extent_tree_record(rec))
5505 return 0;
5507 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5508 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5511 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5512 struct extent_buffer *buf, int slot)
5514 if (btrfs_header_level(buf)) {
5515 struct btrfs_key_ptr ptr1, ptr2;
5517 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5518 sizeof(struct btrfs_key_ptr));
5519 read_extent_buffer(buf, &ptr2,
5520 btrfs_node_key_ptr_offset(slot + 1),
5521 sizeof(struct btrfs_key_ptr));
5522 write_extent_buffer(buf, &ptr1,
5523 btrfs_node_key_ptr_offset(slot + 1),
5524 sizeof(struct btrfs_key_ptr));
5525 write_extent_buffer(buf, &ptr2,
5526 btrfs_node_key_ptr_offset(slot),
5527 sizeof(struct btrfs_key_ptr));
5528 if (slot == 0) {
5529 struct btrfs_disk_key key;
5530 btrfs_node_key(buf, &key, 0);
5531 btrfs_fixup_low_keys(root, path, &key,
5532 btrfs_header_level(buf) + 1);
5534 } else {
5535 struct btrfs_item *item1, *item2;
5536 struct btrfs_key k1, k2;
5537 char *item1_data, *item2_data;
5538 u32 item1_offset, item2_offset, item1_size, item2_size;
5540 item1 = btrfs_item_nr(slot);
5541 item2 = btrfs_item_nr(slot + 1);
5542 btrfs_item_key_to_cpu(buf, &k1, slot);
5543 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5544 item1_offset = btrfs_item_offset(buf, item1);
5545 item2_offset = btrfs_item_offset(buf, item2);
5546 item1_size = btrfs_item_size(buf, item1);
5547 item2_size = btrfs_item_size(buf, item2);
5549 item1_data = malloc(item1_size);
5550 if (!item1_data)
5551 return -ENOMEM;
5552 item2_data = malloc(item2_size);
5553 if (!item2_data) {
5554 free(item1_data);
5555 return -ENOMEM;
5558 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5559 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5561 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5562 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5563 free(item1_data);
5564 free(item2_data);
5566 btrfs_set_item_offset(buf, item1, item2_offset);
5567 btrfs_set_item_offset(buf, item2, item1_offset);
5568 btrfs_set_item_size(buf, item1, item2_size);
5569 btrfs_set_item_size(buf, item2, item1_size);
5571 path->slots[0] = slot;
5572 btrfs_set_item_key_unsafe(root, path, &k2);
5573 path->slots[0] = slot + 1;
5574 btrfs_set_item_key_unsafe(root, path, &k1);
5576 return 0;
5579 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5581 struct extent_buffer *buf;
5582 struct btrfs_key k1, k2;
5583 int i;
5584 int level = path->lowest_level;
5585 int ret = -EIO;
5587 buf = path->nodes[level];
5588 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5589 if (level) {
5590 btrfs_node_key_to_cpu(buf, &k1, i);
5591 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5592 } else {
5593 btrfs_item_key_to_cpu(buf, &k1, i);
5594 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5596 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5597 continue;
5598 ret = swap_values(root, path, buf, i);
5599 if (ret)
5600 break;
5601 btrfs_mark_buffer_dirty(buf);
5602 i = 0;
5604 return ret;
5607 static int delete_bogus_item(struct btrfs_root *root,
5608 struct btrfs_path *path,
5609 struct extent_buffer *buf, int slot)
5611 struct btrfs_key key;
5612 int nritems = btrfs_header_nritems(buf);
5614 btrfs_item_key_to_cpu(buf, &key, slot);
5616 /* These are all the keys we can deal with missing. */
5617 if (key.type != BTRFS_DIR_INDEX_KEY &&
5618 key.type != BTRFS_EXTENT_ITEM_KEY &&
5619 key.type != BTRFS_METADATA_ITEM_KEY &&
5620 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5621 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5622 return -1;
5624 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5625 (unsigned long long)key.objectid, key.type,
5626 (unsigned long long)key.offset, slot, buf->start);
5627 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5628 btrfs_item_nr_offset(slot + 1),
5629 sizeof(struct btrfs_item) *
5630 (nritems - slot - 1));
5631 btrfs_set_header_nritems(buf, nritems - 1);
5632 if (slot == 0) {
5633 struct btrfs_disk_key disk_key;
5635 btrfs_item_key(buf, &disk_key, 0);
5636 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5638 btrfs_mark_buffer_dirty(buf);
5639 return 0;
5642 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5644 struct extent_buffer *buf;
5645 int i;
5646 int ret = 0;
5648 /* We should only get this for leaves */
5649 BUG_ON(path->lowest_level);
5650 buf = path->nodes[0];
5651 again:
5652 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5653 unsigned int shift = 0, offset;
5655 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5656 BTRFS_LEAF_DATA_SIZE(root)) {
5657 if (btrfs_item_end_nr(buf, i) >
5658 BTRFS_LEAF_DATA_SIZE(root)) {
5659 ret = delete_bogus_item(root, path, buf, i);
5660 if (!ret)
5661 goto again;
5662 fprintf(stderr, "item is off the end of the "
5663 "leaf, can't fix\n");
5664 ret = -EIO;
5665 break;
5667 shift = BTRFS_LEAF_DATA_SIZE(root) -
5668 btrfs_item_end_nr(buf, i);
5669 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5670 btrfs_item_offset_nr(buf, i - 1)) {
5671 if (btrfs_item_end_nr(buf, i) >
5672 btrfs_item_offset_nr(buf, i - 1)) {
5673 ret = delete_bogus_item(root, path, buf, i);
5674 if (!ret)
5675 goto again;
5676 fprintf(stderr, "items overlap, can't fix\n");
5677 ret = -EIO;
5678 break;
5680 shift = btrfs_item_offset_nr(buf, i - 1) -
5681 btrfs_item_end_nr(buf, i);
5683 if (!shift)
5684 continue;
5686 printf("Shifting item nr %d by %u bytes in block %llu\n",
5687 i, shift, (unsigned long long)buf->start);
5688 offset = btrfs_item_offset_nr(buf, i);
5689 memmove_extent_buffer(buf,
5690 btrfs_leaf_data(buf) + offset + shift,
5691 btrfs_leaf_data(buf) + offset,
5692 btrfs_item_size_nr(buf, i));
5693 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5694 offset + shift);
5695 btrfs_mark_buffer_dirty(buf);
5699 * We may have moved things, in which case we want to exit so we don't
5700 * write those changes out. Once we have proper abort functionality in
5701 * progs this can be changed to something nicer.
5703 BUG_ON(ret);
5704 return ret;
5708 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5709 * then just return -EIO.
5711 static int try_to_fix_bad_block(struct btrfs_root *root,
5712 struct extent_buffer *buf,
5713 enum btrfs_tree_block_status status)
5715 struct btrfs_trans_handle *trans;
5716 struct ulist *roots;
5717 struct ulist_node *node;
5718 struct btrfs_root *search_root;
5719 struct btrfs_path path;
5720 struct ulist_iterator iter;
5721 struct btrfs_key root_key, key;
5722 int ret;
5724 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5725 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5726 return -EIO;
5728 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5729 if (ret)
5730 return -EIO;
5732 btrfs_init_path(&path);
5733 ULIST_ITER_INIT(&iter);
5734 while ((node = ulist_next(roots, &iter))) {
5735 root_key.objectid = node->val;
5736 root_key.type = BTRFS_ROOT_ITEM_KEY;
5737 root_key.offset = (u64)-1;
5739 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5740 if (IS_ERR(root)) {
5741 ret = -EIO;
5742 break;
5746 trans = btrfs_start_transaction(search_root, 0);
5747 if (IS_ERR(trans)) {
5748 ret = PTR_ERR(trans);
5749 break;
5752 path.lowest_level = btrfs_header_level(buf);
5753 path.skip_check_block = 1;
5754 if (path.lowest_level)
5755 btrfs_node_key_to_cpu(buf, &key, 0);
5756 else
5757 btrfs_item_key_to_cpu(buf, &key, 0);
5758 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5759 if (ret) {
5760 ret = -EIO;
5761 btrfs_commit_transaction(trans, search_root);
5762 break;
5764 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5765 ret = fix_key_order(search_root, &path);
5766 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5767 ret = fix_item_offset(search_root, &path);
5768 if (ret) {
5769 btrfs_commit_transaction(trans, search_root);
5770 break;
5772 btrfs_release_path(&path);
5773 btrfs_commit_transaction(trans, search_root);
5775 ulist_free(roots);
5776 btrfs_release_path(&path);
5777 return ret;
5780 static int check_block(struct btrfs_root *root,
5781 struct cache_tree *extent_cache,
5782 struct extent_buffer *buf, u64 flags)
5784 struct extent_record *rec;
5785 struct cache_extent *cache;
5786 struct btrfs_key key;
5787 enum btrfs_tree_block_status status;
5788 int ret = 0;
5789 int level;
5791 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5792 if (!cache)
5793 return 1;
5794 rec = container_of(cache, struct extent_record, cache);
5795 rec->generation = btrfs_header_generation(buf);
5797 level = btrfs_header_level(buf);
5798 if (btrfs_header_nritems(buf) > 0) {
5800 if (level == 0)
5801 btrfs_item_key_to_cpu(buf, &key, 0);
5802 else
5803 btrfs_node_key_to_cpu(buf, &key, 0);
5805 rec->info_objectid = key.objectid;
5807 rec->info_level = level;
5809 if (btrfs_is_leaf(buf))
5810 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5811 else
5812 status = btrfs_check_node(root, &rec->parent_key, buf);
5814 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5815 if (repair)
5816 status = try_to_fix_bad_block(root, buf, status);
5817 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5818 ret = -EIO;
5819 fprintf(stderr, "bad block %llu\n",
5820 (unsigned long long)buf->start);
5821 } else {
5823 * Signal to callers we need to start the scan over
5824 * again since we'll have cowed blocks.
5826 ret = -EAGAIN;
5828 } else {
5829 rec->content_checked = 1;
5830 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5831 rec->owner_ref_checked = 1;
5832 else {
5833 ret = check_owner_ref(root, rec, buf);
5834 if (!ret)
5835 rec->owner_ref_checked = 1;
5838 if (!ret)
5839 maybe_free_extent_rec(extent_cache, rec);
5840 return ret;
5843 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5844 u64 parent, u64 root)
5846 struct list_head *cur = rec->backrefs.next;
5847 struct extent_backref *node;
5848 struct tree_backref *back;
5850 while(cur != &rec->backrefs) {
5851 node = to_extent_backref(cur);
5852 cur = cur->next;
5853 if (node->is_data)
5854 continue;
5855 back = to_tree_backref(node);
5856 if (parent > 0) {
5857 if (!node->full_backref)
5858 continue;
5859 if (parent == back->parent)
5860 return back;
5861 } else {
5862 if (node->full_backref)
5863 continue;
5864 if (back->root == root)
5865 return back;
5868 return NULL;
5871 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5872 u64 parent, u64 root)
5874 struct tree_backref *ref = malloc(sizeof(*ref));
5876 if (!ref)
5877 return NULL;
5878 memset(&ref->node, 0, sizeof(ref->node));
5879 if (parent > 0) {
5880 ref->parent = parent;
5881 ref->node.full_backref = 1;
5882 } else {
5883 ref->root = root;
5884 ref->node.full_backref = 0;
5886 list_add_tail(&ref->node.list, &rec->backrefs);
5888 return ref;
5891 static struct data_backref *find_data_backref(struct extent_record *rec,
5892 u64 parent, u64 root,
5893 u64 owner, u64 offset,
5894 int found_ref,
5895 u64 disk_bytenr, u64 bytes)
5897 struct list_head *cur = rec->backrefs.next;
5898 struct extent_backref *node;
5899 struct data_backref *back;
5901 while(cur != &rec->backrefs) {
5902 node = to_extent_backref(cur);
5903 cur = cur->next;
5904 if (!node->is_data)
5905 continue;
5906 back = to_data_backref(node);
5907 if (parent > 0) {
5908 if (!node->full_backref)
5909 continue;
5910 if (parent == back->parent)
5911 return back;
5912 } else {
5913 if (node->full_backref)
5914 continue;
5915 if (back->root == root && back->owner == owner &&
5916 back->offset == offset) {
5917 if (found_ref && node->found_ref &&
5918 (back->bytes != bytes ||
5919 back->disk_bytenr != disk_bytenr))
5920 continue;
5921 return back;
5925 return NULL;
5928 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5929 u64 parent, u64 root,
5930 u64 owner, u64 offset,
5931 u64 max_size)
5933 struct data_backref *ref = malloc(sizeof(*ref));
5935 if (!ref)
5936 return NULL;
5937 memset(&ref->node, 0, sizeof(ref->node));
5938 ref->node.is_data = 1;
5940 if (parent > 0) {
5941 ref->parent = parent;
5942 ref->owner = 0;
5943 ref->offset = 0;
5944 ref->node.full_backref = 1;
5945 } else {
5946 ref->root = root;
5947 ref->owner = owner;
5948 ref->offset = offset;
5949 ref->node.full_backref = 0;
5951 ref->bytes = max_size;
5952 ref->found_ref = 0;
5953 ref->num_refs = 0;
5954 list_add_tail(&ref->node.list, &rec->backrefs);
5955 if (max_size > rec->max_size)
5956 rec->max_size = max_size;
5957 return ref;
5960 /* Check if the type of extent matches with its chunk */
5961 static void check_extent_type(struct extent_record *rec)
5963 struct btrfs_block_group_cache *bg_cache;
5965 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5966 if (!bg_cache)
5967 return;
5969 /* data extent, check chunk directly*/
5970 if (!rec->metadata) {
5971 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5972 rec->wrong_chunk_type = 1;
5973 return;
5976 /* metadata extent, check the obvious case first */
5977 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5978 BTRFS_BLOCK_GROUP_METADATA))) {
5979 rec->wrong_chunk_type = 1;
5980 return;
5984 * Check SYSTEM extent, as it's also marked as metadata, we can only
5985 * make sure it's a SYSTEM extent by its backref
5987 if (!list_empty(&rec->backrefs)) {
5988 struct extent_backref *node;
5989 struct tree_backref *tback;
5990 u64 bg_type;
5992 node = to_extent_backref(rec->backrefs.next);
5993 if (node->is_data) {
5994 /* tree block shouldn't have data backref */
5995 rec->wrong_chunk_type = 1;
5996 return;
5998 tback = container_of(node, struct tree_backref, node);
6000 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6001 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6002 else
6003 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6004 if (!(bg_cache->flags & bg_type))
6005 rec->wrong_chunk_type = 1;
6010 * Allocate a new extent record, fill default values from @tmpl and insert int
6011 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6012 * the cache, otherwise it fails.
6014 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6015 struct extent_record *tmpl)
6017 struct extent_record *rec;
6018 int ret = 0;
6020 rec = malloc(sizeof(*rec));
6021 if (!rec)
6022 return -ENOMEM;
6023 rec->start = tmpl->start;
6024 rec->max_size = tmpl->max_size;
6025 rec->nr = max(tmpl->nr, tmpl->max_size);
6026 rec->found_rec = tmpl->found_rec;
6027 rec->content_checked = tmpl->content_checked;
6028 rec->owner_ref_checked = tmpl->owner_ref_checked;
6029 rec->num_duplicates = 0;
6030 rec->metadata = tmpl->metadata;
6031 rec->flag_block_full_backref = FLAG_UNSET;
6032 rec->bad_full_backref = 0;
6033 rec->crossing_stripes = 0;
6034 rec->wrong_chunk_type = 0;
6035 rec->is_root = tmpl->is_root;
6036 rec->refs = tmpl->refs;
6037 rec->extent_item_refs = tmpl->extent_item_refs;
6038 rec->parent_generation = tmpl->parent_generation;
6039 INIT_LIST_HEAD(&rec->backrefs);
6040 INIT_LIST_HEAD(&rec->dups);
6041 INIT_LIST_HEAD(&rec->list);
6042 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6043 rec->cache.start = tmpl->start;
6044 rec->cache.size = tmpl->nr;
6045 ret = insert_cache_extent(extent_cache, &rec->cache);
6046 if (ret) {
6047 free(rec);
6048 return ret;
6050 bytes_used += rec->nr;
6052 if (tmpl->metadata)
6053 rec->crossing_stripes = check_crossing_stripes(global_info,
6054 rec->start, global_info->tree_root->nodesize);
6055 check_extent_type(rec);
6056 return ret;
6060 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6061 * some are hints:
6062 * - refs - if found, increase refs
6063 * - is_root - if found, set
6064 * - content_checked - if found, set
6065 * - owner_ref_checked - if found, set
6067 * If not found, create a new one, initialize and insert.
6069 static int add_extent_rec(struct cache_tree *extent_cache,
6070 struct extent_record *tmpl)
6072 struct extent_record *rec;
6073 struct cache_extent *cache;
6074 int ret = 0;
6075 int dup = 0;
6077 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6078 if (cache) {
6079 rec = container_of(cache, struct extent_record, cache);
6080 if (tmpl->refs)
6081 rec->refs++;
6082 if (rec->nr == 1)
6083 rec->nr = max(tmpl->nr, tmpl->max_size);
6086 * We need to make sure to reset nr to whatever the extent
6087 * record says was the real size, this way we can compare it to
6088 * the backrefs.
6090 if (tmpl->found_rec) {
6091 if (tmpl->start != rec->start || rec->found_rec) {
6092 struct extent_record *tmp;
6094 dup = 1;
6095 if (list_empty(&rec->list))
6096 list_add_tail(&rec->list,
6097 &duplicate_extents);
6100 * We have to do this song and dance in case we
6101 * find an extent record that falls inside of
6102 * our current extent record but does not have
6103 * the same objectid.
6105 tmp = malloc(sizeof(*tmp));
6106 if (!tmp)
6107 return -ENOMEM;
6108 tmp->start = tmpl->start;
6109 tmp->max_size = tmpl->max_size;
6110 tmp->nr = tmpl->nr;
6111 tmp->found_rec = 1;
6112 tmp->metadata = tmpl->metadata;
6113 tmp->extent_item_refs = tmpl->extent_item_refs;
6114 INIT_LIST_HEAD(&tmp->list);
6115 list_add_tail(&tmp->list, &rec->dups);
6116 rec->num_duplicates++;
6117 } else {
6118 rec->nr = tmpl->nr;
6119 rec->found_rec = 1;
6123 if (tmpl->extent_item_refs && !dup) {
6124 if (rec->extent_item_refs) {
6125 fprintf(stderr, "block %llu rec "
6126 "extent_item_refs %llu, passed %llu\n",
6127 (unsigned long long)tmpl->start,
6128 (unsigned long long)
6129 rec->extent_item_refs,
6130 (unsigned long long)tmpl->extent_item_refs);
6132 rec->extent_item_refs = tmpl->extent_item_refs;
6134 if (tmpl->is_root)
6135 rec->is_root = 1;
6136 if (tmpl->content_checked)
6137 rec->content_checked = 1;
6138 if (tmpl->owner_ref_checked)
6139 rec->owner_ref_checked = 1;
6140 memcpy(&rec->parent_key, &tmpl->parent_key,
6141 sizeof(tmpl->parent_key));
6142 if (tmpl->parent_generation)
6143 rec->parent_generation = tmpl->parent_generation;
6144 if (rec->max_size < tmpl->max_size)
6145 rec->max_size = tmpl->max_size;
6148 * A metadata extent can't cross stripe_len boundary, otherwise
6149 * kernel scrub won't be able to handle it.
6150 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6151 * it.
6153 if (tmpl->metadata)
6154 rec->crossing_stripes = check_crossing_stripes(
6155 global_info, rec->start,
6156 global_info->tree_root->nodesize);
6157 check_extent_type(rec);
6158 maybe_free_extent_rec(extent_cache, rec);
6159 return ret;
6162 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6164 return ret;
6167 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6168 u64 parent, u64 root, int found_ref)
6170 struct extent_record *rec;
6171 struct tree_backref *back;
6172 struct cache_extent *cache;
6173 int ret;
6175 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6176 if (!cache) {
6177 struct extent_record tmpl;
6179 memset(&tmpl, 0, sizeof(tmpl));
6180 tmpl.start = bytenr;
6181 tmpl.nr = 1;
6182 tmpl.metadata = 1;
6184 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6185 if (ret)
6186 return ret;
6188 /* really a bug in cache_extent implement now */
6189 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6190 if (!cache)
6191 return -ENOENT;
6194 rec = container_of(cache, struct extent_record, cache);
6195 if (rec->start != bytenr) {
6197 * Several cause, from unaligned bytenr to over lapping extents
6199 return -EEXIST;
6202 back = find_tree_backref(rec, parent, root);
6203 if (!back) {
6204 back = alloc_tree_backref(rec, parent, root);
6205 if (!back)
6206 return -ENOMEM;
6209 if (found_ref) {
6210 if (back->node.found_ref) {
6211 fprintf(stderr, "Extent back ref already exists "
6212 "for %llu parent %llu root %llu \n",
6213 (unsigned long long)bytenr,
6214 (unsigned long long)parent,
6215 (unsigned long long)root);
6217 back->node.found_ref = 1;
6218 } else {
6219 if (back->node.found_extent_tree) {
6220 fprintf(stderr, "Extent back ref already exists "
6221 "for %llu parent %llu root %llu \n",
6222 (unsigned long long)bytenr,
6223 (unsigned long long)parent,
6224 (unsigned long long)root);
6226 back->node.found_extent_tree = 1;
6228 check_extent_type(rec);
6229 maybe_free_extent_rec(extent_cache, rec);
6230 return 0;
6233 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6234 u64 parent, u64 root, u64 owner, u64 offset,
6235 u32 num_refs, int found_ref, u64 max_size)
6237 struct extent_record *rec;
6238 struct data_backref *back;
6239 struct cache_extent *cache;
6240 int ret;
6242 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6243 if (!cache) {
6244 struct extent_record tmpl;
6246 memset(&tmpl, 0, sizeof(tmpl));
6247 tmpl.start = bytenr;
6248 tmpl.nr = 1;
6249 tmpl.max_size = max_size;
6251 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6252 if (ret)
6253 return ret;
6255 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6256 if (!cache)
6257 abort();
6260 rec = container_of(cache, struct extent_record, cache);
6261 if (rec->max_size < max_size)
6262 rec->max_size = max_size;
6265 * If found_ref is set then max_size is the real size and must match the
6266 * existing refs. So if we have already found a ref then we need to
6267 * make sure that this ref matches the existing one, otherwise we need
6268 * to add a new backref so we can notice that the backrefs don't match
6269 * and we need to figure out who is telling the truth. This is to
6270 * account for that awful fsync bug I introduced where we'd end up with
6271 * a btrfs_file_extent_item that would have its length include multiple
6272 * prealloc extents or point inside of a prealloc extent.
6274 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6275 bytenr, max_size);
6276 if (!back) {
6277 back = alloc_data_backref(rec, parent, root, owner, offset,
6278 max_size);
6279 BUG_ON(!back);
6282 if (found_ref) {
6283 BUG_ON(num_refs != 1);
6284 if (back->node.found_ref)
6285 BUG_ON(back->bytes != max_size);
6286 back->node.found_ref = 1;
6287 back->found_ref += 1;
6288 back->bytes = max_size;
6289 back->disk_bytenr = bytenr;
6290 rec->refs += 1;
6291 rec->content_checked = 1;
6292 rec->owner_ref_checked = 1;
6293 } else {
6294 if (back->node.found_extent_tree) {
6295 fprintf(stderr, "Extent back ref already exists "
6296 "for %llu parent %llu root %llu "
6297 "owner %llu offset %llu num_refs %lu\n",
6298 (unsigned long long)bytenr,
6299 (unsigned long long)parent,
6300 (unsigned long long)root,
6301 (unsigned long long)owner,
6302 (unsigned long long)offset,
6303 (unsigned long)num_refs);
6305 back->num_refs = num_refs;
6306 back->node.found_extent_tree = 1;
6308 maybe_free_extent_rec(extent_cache, rec);
6309 return 0;
6312 static int add_pending(struct cache_tree *pending,
6313 struct cache_tree *seen, u64 bytenr, u32 size)
6315 int ret;
6316 ret = add_cache_extent(seen, bytenr, size);
6317 if (ret)
6318 return ret;
6319 add_cache_extent(pending, bytenr, size);
6320 return 0;
6323 static int pick_next_pending(struct cache_tree *pending,
6324 struct cache_tree *reada,
6325 struct cache_tree *nodes,
6326 u64 last, struct block_info *bits, int bits_nr,
6327 int *reada_bits)
6329 unsigned long node_start = last;
6330 struct cache_extent *cache;
6331 int ret;
6333 cache = search_cache_extent(reada, 0);
6334 if (cache) {
6335 bits[0].start = cache->start;
6336 bits[0].size = cache->size;
6337 *reada_bits = 1;
6338 return 1;
6340 *reada_bits = 0;
6341 if (node_start > 32768)
6342 node_start -= 32768;
6344 cache = search_cache_extent(nodes, node_start);
6345 if (!cache)
6346 cache = search_cache_extent(nodes, 0);
6348 if (!cache) {
6349 cache = search_cache_extent(pending, 0);
6350 if (!cache)
6351 return 0;
6352 ret = 0;
6353 do {
6354 bits[ret].start = cache->start;
6355 bits[ret].size = cache->size;
6356 cache = next_cache_extent(cache);
6357 ret++;
6358 } while (cache && ret < bits_nr);
6359 return ret;
6362 ret = 0;
6363 do {
6364 bits[ret].start = cache->start;
6365 bits[ret].size = cache->size;
6366 cache = next_cache_extent(cache);
6367 ret++;
6368 } while (cache && ret < bits_nr);
6370 if (bits_nr - ret > 8) {
6371 u64 lookup = bits[0].start + bits[0].size;
6372 struct cache_extent *next;
6373 next = search_cache_extent(pending, lookup);
6374 while(next) {
6375 if (next->start - lookup > 32768)
6376 break;
6377 bits[ret].start = next->start;
6378 bits[ret].size = next->size;
6379 lookup = next->start + next->size;
6380 ret++;
6381 if (ret == bits_nr)
6382 break;
6383 next = next_cache_extent(next);
6384 if (!next)
6385 break;
6388 return ret;
6391 static void free_chunk_record(struct cache_extent *cache)
6393 struct chunk_record *rec;
6395 rec = container_of(cache, struct chunk_record, cache);
6396 list_del_init(&rec->list);
6397 list_del_init(&rec->dextents);
6398 free(rec);
6401 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6403 cache_tree_free_extents(chunk_cache, free_chunk_record);
6406 static void free_device_record(struct rb_node *node)
6408 struct device_record *rec;
6410 rec = container_of(node, struct device_record, node);
6411 free(rec);
6414 FREE_RB_BASED_TREE(device_cache, free_device_record);
6416 int insert_block_group_record(struct block_group_tree *tree,
6417 struct block_group_record *bg_rec)
6419 int ret;
6421 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6422 if (ret)
6423 return ret;
6425 list_add_tail(&bg_rec->list, &tree->block_groups);
6426 return 0;
6429 static void free_block_group_record(struct cache_extent *cache)
6431 struct block_group_record *rec;
6433 rec = container_of(cache, struct block_group_record, cache);
6434 list_del_init(&rec->list);
6435 free(rec);
6438 void free_block_group_tree(struct block_group_tree *tree)
6440 cache_tree_free_extents(&tree->tree, free_block_group_record);
6443 int insert_device_extent_record(struct device_extent_tree *tree,
6444 struct device_extent_record *de_rec)
6446 int ret;
6449 * Device extent is a bit different from the other extents, because
6450 * the extents which belong to the different devices may have the
6451 * same start and size, so we need use the special extent cache
6452 * search/insert functions.
6454 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6455 if (ret)
6456 return ret;
6458 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6459 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6460 return 0;
6463 static void free_device_extent_record(struct cache_extent *cache)
6465 struct device_extent_record *rec;
6467 rec = container_of(cache, struct device_extent_record, cache);
6468 if (!list_empty(&rec->chunk_list))
6469 list_del_init(&rec->chunk_list);
6470 if (!list_empty(&rec->device_list))
6471 list_del_init(&rec->device_list);
6472 free(rec);
6475 void free_device_extent_tree(struct device_extent_tree *tree)
6477 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6480 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6481 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6482 struct extent_buffer *leaf, int slot)
6484 struct btrfs_extent_ref_v0 *ref0;
6485 struct btrfs_key key;
6486 int ret;
6488 btrfs_item_key_to_cpu(leaf, &key, slot);
6489 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6490 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6491 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6492 0, 0);
6493 } else {
6494 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6495 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6497 return ret;
6499 #endif
6501 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6502 struct btrfs_key *key,
6503 int slot)
6505 struct btrfs_chunk *ptr;
6506 struct chunk_record *rec;
6507 int num_stripes, i;
6509 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6510 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6512 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6513 if (!rec) {
6514 fprintf(stderr, "memory allocation failed\n");
6515 exit(-1);
6518 INIT_LIST_HEAD(&rec->list);
6519 INIT_LIST_HEAD(&rec->dextents);
6520 rec->bg_rec = NULL;
6522 rec->cache.start = key->offset;
6523 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6525 rec->generation = btrfs_header_generation(leaf);
6527 rec->objectid = key->objectid;
6528 rec->type = key->type;
6529 rec->offset = key->offset;
6531 rec->length = rec->cache.size;
6532 rec->owner = btrfs_chunk_owner(leaf, ptr);
6533 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6534 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6535 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6536 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6537 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6538 rec->num_stripes = num_stripes;
6539 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6541 for (i = 0; i < rec->num_stripes; ++i) {
6542 rec->stripes[i].devid =
6543 btrfs_stripe_devid_nr(leaf, ptr, i);
6544 rec->stripes[i].offset =
6545 btrfs_stripe_offset_nr(leaf, ptr, i);
6546 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6547 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6548 BTRFS_UUID_SIZE);
6551 return rec;
6554 static int process_chunk_item(struct cache_tree *chunk_cache,
6555 struct btrfs_key *key, struct extent_buffer *eb,
6556 int slot)
6558 struct chunk_record *rec;
6559 struct btrfs_chunk *chunk;
6560 int ret = 0;
6562 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6564 * Do extra check for this chunk item,
6566 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6567 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6568 * and owner<->key_type check.
6570 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6571 key->offset);
6572 if (ret < 0) {
6573 error("chunk(%llu, %llu) is not valid, ignore it",
6574 key->offset, btrfs_chunk_length(eb, chunk));
6575 return 0;
6577 rec = btrfs_new_chunk_record(eb, key, slot);
6578 ret = insert_cache_extent(chunk_cache, &rec->cache);
6579 if (ret) {
6580 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6581 rec->offset, rec->length);
6582 free(rec);
6585 return ret;
6588 static int process_device_item(struct rb_root *dev_cache,
6589 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6591 struct btrfs_dev_item *ptr;
6592 struct device_record *rec;
6593 int ret = 0;
6595 ptr = btrfs_item_ptr(eb,
6596 slot, struct btrfs_dev_item);
6598 rec = malloc(sizeof(*rec));
6599 if (!rec) {
6600 fprintf(stderr, "memory allocation failed\n");
6601 return -ENOMEM;
6604 rec->devid = key->offset;
6605 rec->generation = btrfs_header_generation(eb);
6607 rec->objectid = key->objectid;
6608 rec->type = key->type;
6609 rec->offset = key->offset;
6611 rec->devid = btrfs_device_id(eb, ptr);
6612 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6613 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6615 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6616 if (ret) {
6617 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6618 free(rec);
6621 return ret;
6624 struct block_group_record *
6625 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6626 int slot)
6628 struct btrfs_block_group_item *ptr;
6629 struct block_group_record *rec;
6631 rec = calloc(1, sizeof(*rec));
6632 if (!rec) {
6633 fprintf(stderr, "memory allocation failed\n");
6634 exit(-1);
6637 rec->cache.start = key->objectid;
6638 rec->cache.size = key->offset;
6640 rec->generation = btrfs_header_generation(leaf);
6642 rec->objectid = key->objectid;
6643 rec->type = key->type;
6644 rec->offset = key->offset;
6646 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6647 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6649 INIT_LIST_HEAD(&rec->list);
6651 return rec;
6654 static int process_block_group_item(struct block_group_tree *block_group_cache,
6655 struct btrfs_key *key,
6656 struct extent_buffer *eb, int slot)
6658 struct block_group_record *rec;
6659 int ret = 0;
6661 rec = btrfs_new_block_group_record(eb, key, slot);
6662 ret = insert_block_group_record(block_group_cache, rec);
6663 if (ret) {
6664 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6665 rec->objectid, rec->offset);
6666 free(rec);
6669 return ret;
6672 struct device_extent_record *
6673 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6674 struct btrfs_key *key, int slot)
6676 struct device_extent_record *rec;
6677 struct btrfs_dev_extent *ptr;
6679 rec = calloc(1, sizeof(*rec));
6680 if (!rec) {
6681 fprintf(stderr, "memory allocation failed\n");
6682 exit(-1);
6685 rec->cache.objectid = key->objectid;
6686 rec->cache.start = key->offset;
6688 rec->generation = btrfs_header_generation(leaf);
6690 rec->objectid = key->objectid;
6691 rec->type = key->type;
6692 rec->offset = key->offset;
6694 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6695 rec->chunk_objecteid =
6696 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6697 rec->chunk_offset =
6698 btrfs_dev_extent_chunk_offset(leaf, ptr);
6699 rec->length = btrfs_dev_extent_length(leaf, ptr);
6700 rec->cache.size = rec->length;
6702 INIT_LIST_HEAD(&rec->chunk_list);
6703 INIT_LIST_HEAD(&rec->device_list);
6705 return rec;
6708 static int
6709 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6710 struct btrfs_key *key, struct extent_buffer *eb,
6711 int slot)
6713 struct device_extent_record *rec;
6714 int ret;
6716 rec = btrfs_new_device_extent_record(eb, key, slot);
6717 ret = insert_device_extent_record(dev_extent_cache, rec);
6718 if (ret) {
6719 fprintf(stderr,
6720 "Device extent[%llu, %llu, %llu] existed.\n",
6721 rec->objectid, rec->offset, rec->length);
6722 free(rec);
6725 return ret;
6728 static int process_extent_item(struct btrfs_root *root,
6729 struct cache_tree *extent_cache,
6730 struct extent_buffer *eb, int slot)
6732 struct btrfs_extent_item *ei;
6733 struct btrfs_extent_inline_ref *iref;
6734 struct btrfs_extent_data_ref *dref;
6735 struct btrfs_shared_data_ref *sref;
6736 struct btrfs_key key;
6737 struct extent_record tmpl;
6738 unsigned long end;
6739 unsigned long ptr;
6740 int ret;
6741 int type;
6742 u32 item_size = btrfs_item_size_nr(eb, slot);
6743 u64 refs = 0;
6744 u64 offset;
6745 u64 num_bytes;
6746 int metadata = 0;
6748 btrfs_item_key_to_cpu(eb, &key, slot);
6750 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6751 metadata = 1;
6752 num_bytes = root->nodesize;
6753 } else {
6754 num_bytes = key.offset;
6757 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6758 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6759 key.objectid, root->sectorsize);
6760 return -EIO;
6762 if (item_size < sizeof(*ei)) {
6763 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6764 struct btrfs_extent_item_v0 *ei0;
6765 BUG_ON(item_size != sizeof(*ei0));
6766 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6767 refs = btrfs_extent_refs_v0(eb, ei0);
6768 #else
6769 BUG();
6770 #endif
6771 memset(&tmpl, 0, sizeof(tmpl));
6772 tmpl.start = key.objectid;
6773 tmpl.nr = num_bytes;
6774 tmpl.extent_item_refs = refs;
6775 tmpl.metadata = metadata;
6776 tmpl.found_rec = 1;
6777 tmpl.max_size = num_bytes;
6779 return add_extent_rec(extent_cache, &tmpl);
6782 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6783 refs = btrfs_extent_refs(eb, ei);
6784 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6785 metadata = 1;
6786 else
6787 metadata = 0;
6788 if (metadata && num_bytes != root->nodesize) {
6789 error("ignore invalid metadata extent, length %llu does not equal to %u",
6790 num_bytes, root->nodesize);
6791 return -EIO;
6793 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6794 error("ignore invalid data extent, length %llu is not aligned to %u",
6795 num_bytes, root->sectorsize);
6796 return -EIO;
6799 memset(&tmpl, 0, sizeof(tmpl));
6800 tmpl.start = key.objectid;
6801 tmpl.nr = num_bytes;
6802 tmpl.extent_item_refs = refs;
6803 tmpl.metadata = metadata;
6804 tmpl.found_rec = 1;
6805 tmpl.max_size = num_bytes;
6806 add_extent_rec(extent_cache, &tmpl);
6808 ptr = (unsigned long)(ei + 1);
6809 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6810 key.type == BTRFS_EXTENT_ITEM_KEY)
6811 ptr += sizeof(struct btrfs_tree_block_info);
6813 end = (unsigned long)ei + item_size;
6814 while (ptr < end) {
6815 iref = (struct btrfs_extent_inline_ref *)ptr;
6816 type = btrfs_extent_inline_ref_type(eb, iref);
6817 offset = btrfs_extent_inline_ref_offset(eb, iref);
6818 switch (type) {
6819 case BTRFS_TREE_BLOCK_REF_KEY:
6820 ret = add_tree_backref(extent_cache, key.objectid,
6821 0, offset, 0);
6822 if (ret < 0)
6823 error("add_tree_backref failed: %s",
6824 strerror(-ret));
6825 break;
6826 case BTRFS_SHARED_BLOCK_REF_KEY:
6827 ret = add_tree_backref(extent_cache, key.objectid,
6828 offset, 0, 0);
6829 if (ret < 0)
6830 error("add_tree_backref failed: %s",
6831 strerror(-ret));
6832 break;
6833 case BTRFS_EXTENT_DATA_REF_KEY:
6834 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6835 add_data_backref(extent_cache, key.objectid, 0,
6836 btrfs_extent_data_ref_root(eb, dref),
6837 btrfs_extent_data_ref_objectid(eb,
6838 dref),
6839 btrfs_extent_data_ref_offset(eb, dref),
6840 btrfs_extent_data_ref_count(eb, dref),
6841 0, num_bytes);
6842 break;
6843 case BTRFS_SHARED_DATA_REF_KEY:
6844 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6845 add_data_backref(extent_cache, key.objectid, offset,
6846 0, 0, 0,
6847 btrfs_shared_data_ref_count(eb, sref),
6848 0, num_bytes);
6849 break;
6850 default:
6851 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6852 key.objectid, key.type, num_bytes);
6853 goto out;
6855 ptr += btrfs_extent_inline_ref_size(type);
6857 WARN_ON(ptr > end);
6858 out:
6859 return 0;
6862 static int check_cache_range(struct btrfs_root *root,
6863 struct btrfs_block_group_cache *cache,
6864 u64 offset, u64 bytes)
6866 struct btrfs_free_space *entry;
6867 u64 *logical;
6868 u64 bytenr;
6869 int stripe_len;
6870 int i, nr, ret;
6872 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6873 bytenr = btrfs_sb_offset(i);
6874 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6875 cache->key.objectid, bytenr, 0,
6876 &logical, &nr, &stripe_len);
6877 if (ret)
6878 return ret;
6880 while (nr--) {
6881 if (logical[nr] + stripe_len <= offset)
6882 continue;
6883 if (offset + bytes <= logical[nr])
6884 continue;
6885 if (logical[nr] == offset) {
6886 if (stripe_len >= bytes) {
6887 free(logical);
6888 return 0;
6890 bytes -= stripe_len;
6891 offset += stripe_len;
6892 } else if (logical[nr] < offset) {
6893 if (logical[nr] + stripe_len >=
6894 offset + bytes) {
6895 free(logical);
6896 return 0;
6898 bytes = (offset + bytes) -
6899 (logical[nr] + stripe_len);
6900 offset = logical[nr] + stripe_len;
6901 } else {
6903 * Could be tricky, the super may land in the
6904 * middle of the area we're checking. First
6905 * check the easiest case, it's at the end.
6907 if (logical[nr] + stripe_len >=
6908 bytes + offset) {
6909 bytes = logical[nr] - offset;
6910 continue;
6913 /* Check the left side */
6914 ret = check_cache_range(root, cache,
6915 offset,
6916 logical[nr] - offset);
6917 if (ret) {
6918 free(logical);
6919 return ret;
6922 /* Now we continue with the right side */
6923 bytes = (offset + bytes) -
6924 (logical[nr] + stripe_len);
6925 offset = logical[nr] + stripe_len;
6929 free(logical);
6932 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6933 if (!entry) {
6934 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6935 offset, offset+bytes);
6936 return -EINVAL;
6939 if (entry->offset != offset) {
6940 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6941 entry->offset);
6942 return -EINVAL;
6945 if (entry->bytes != bytes) {
6946 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6947 bytes, entry->bytes, offset);
6948 return -EINVAL;
6951 unlink_free_space(cache->free_space_ctl, entry);
6952 free(entry);
6953 return 0;
6956 static int verify_space_cache(struct btrfs_root *root,
6957 struct btrfs_block_group_cache *cache)
6959 struct btrfs_path path;
6960 struct extent_buffer *leaf;
6961 struct btrfs_key key;
6962 u64 last;
6963 int ret = 0;
6965 root = root->fs_info->extent_root;
6967 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6969 btrfs_init_path(&path);
6970 key.objectid = last;
6971 key.offset = 0;
6972 key.type = BTRFS_EXTENT_ITEM_KEY;
6973 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6974 if (ret < 0)
6975 goto out;
6976 ret = 0;
6977 while (1) {
6978 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6979 ret = btrfs_next_leaf(root, &path);
6980 if (ret < 0)
6981 goto out;
6982 if (ret > 0) {
6983 ret = 0;
6984 break;
6987 leaf = path.nodes[0];
6988 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6989 if (key.objectid >= cache->key.offset + cache->key.objectid)
6990 break;
6991 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6992 key.type != BTRFS_METADATA_ITEM_KEY) {
6993 path.slots[0]++;
6994 continue;
6997 if (last == key.objectid) {
6998 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6999 last = key.objectid + key.offset;
7000 else
7001 last = key.objectid + root->nodesize;
7002 path.slots[0]++;
7003 continue;
7006 ret = check_cache_range(root, cache, last,
7007 key.objectid - last);
7008 if (ret)
7009 break;
7010 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7011 last = key.objectid + key.offset;
7012 else
7013 last = key.objectid + root->nodesize;
7014 path.slots[0]++;
7017 if (last < cache->key.objectid + cache->key.offset)
7018 ret = check_cache_range(root, cache, last,
7019 cache->key.objectid +
7020 cache->key.offset - last);
7022 out:
7023 btrfs_release_path(&path);
7025 if (!ret &&
7026 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7027 fprintf(stderr, "There are still entries left in the space "
7028 "cache\n");
7029 ret = -EINVAL;
7032 return ret;
7035 static int check_space_cache(struct btrfs_root *root)
7037 struct btrfs_block_group_cache *cache;
7038 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7039 int ret;
7040 int error = 0;
7042 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7043 btrfs_super_generation(root->fs_info->super_copy) !=
7044 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7045 printf("cache and super generation don't match, space cache "
7046 "will be invalidated\n");
7047 return 0;
7050 if (ctx.progress_enabled) {
7051 ctx.tp = TASK_FREE_SPACE;
7052 task_start(ctx.info);
7055 while (1) {
7056 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7057 if (!cache)
7058 break;
7060 start = cache->key.objectid + cache->key.offset;
7061 if (!cache->free_space_ctl) {
7062 if (btrfs_init_free_space_ctl(cache,
7063 root->sectorsize)) {
7064 ret = -ENOMEM;
7065 break;
7067 } else {
7068 btrfs_remove_free_space_cache(cache);
7071 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7072 ret = exclude_super_stripes(root, cache);
7073 if (ret) {
7074 fprintf(stderr, "could not exclude super stripes: %s\n",
7075 strerror(-ret));
7076 error++;
7077 continue;
7079 ret = load_free_space_tree(root->fs_info, cache);
7080 free_excluded_extents(root, cache);
7081 if (ret < 0) {
7082 fprintf(stderr, "could not load free space tree: %s\n",
7083 strerror(-ret));
7084 error++;
7085 continue;
7087 error += ret;
7088 } else {
7089 ret = load_free_space_cache(root->fs_info, cache);
7090 if (!ret)
7091 continue;
7094 ret = verify_space_cache(root, cache);
7095 if (ret) {
7096 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7097 cache->key.objectid);
7098 error++;
7102 task_stop(ctx.info);
7104 return error ? -EINVAL : 0;
7107 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7108 u64 num_bytes, unsigned long leaf_offset,
7109 struct extent_buffer *eb) {
7111 u64 offset = 0;
7112 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7113 char *data;
7114 unsigned long csum_offset;
7115 u32 csum;
7116 u32 csum_expected;
7117 u64 read_len;
7118 u64 data_checked = 0;
7119 u64 tmp;
7120 int ret = 0;
7121 int mirror;
7122 int num_copies;
7124 if (num_bytes % root->sectorsize)
7125 return -EINVAL;
7127 data = malloc(num_bytes);
7128 if (!data)
7129 return -ENOMEM;
7131 while (offset < num_bytes) {
7132 mirror = 0;
7133 again:
7134 read_len = num_bytes - offset;
7135 /* read as much space once a time */
7136 ret = read_extent_data(root, data + offset,
7137 bytenr + offset, &read_len, mirror);
7138 if (ret)
7139 goto out;
7140 data_checked = 0;
7141 /* verify every 4k data's checksum */
7142 while (data_checked < read_len) {
7143 csum = ~(u32)0;
7144 tmp = offset + data_checked;
7146 csum = btrfs_csum_data((char *)data + tmp,
7147 csum, root->sectorsize);
7148 btrfs_csum_final(csum, (u8 *)&csum);
7150 csum_offset = leaf_offset +
7151 tmp / root->sectorsize * csum_size;
7152 read_extent_buffer(eb, (char *)&csum_expected,
7153 csum_offset, csum_size);
7154 /* try another mirror */
7155 if (csum != csum_expected) {
7156 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7157 mirror, bytenr + tmp,
7158 csum, csum_expected);
7159 num_copies = btrfs_num_copies(
7160 &root->fs_info->mapping_tree,
7161 bytenr, num_bytes);
7162 if (mirror < num_copies - 1) {
7163 mirror += 1;
7164 goto again;
7167 data_checked += root->sectorsize;
7169 offset += read_len;
7171 out:
7172 free(data);
7173 return ret;
7176 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7177 u64 num_bytes)
7179 struct btrfs_path path;
7180 struct extent_buffer *leaf;
7181 struct btrfs_key key;
7182 int ret;
7184 btrfs_init_path(&path);
7185 key.objectid = bytenr;
7186 key.type = BTRFS_EXTENT_ITEM_KEY;
7187 key.offset = (u64)-1;
7189 again:
7190 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7191 0, 0);
7192 if (ret < 0) {
7193 fprintf(stderr, "Error looking up extent record %d\n", ret);
7194 btrfs_release_path(&path);
7195 return ret;
7196 } else if (ret) {
7197 if (path.slots[0] > 0) {
7198 path.slots[0]--;
7199 } else {
7200 ret = btrfs_prev_leaf(root, &path);
7201 if (ret < 0) {
7202 goto out;
7203 } else if (ret > 0) {
7204 ret = 0;
7205 goto out;
7210 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7213 * Block group items come before extent items if they have the same
7214 * bytenr, so walk back one more just in case. Dear future traveller,
7215 * first congrats on mastering time travel. Now if it's not too much
7216 * trouble could you go back to 2006 and tell Chris to make the
7217 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7218 * EXTENT_ITEM_KEY please?
7220 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7221 if (path.slots[0] > 0) {
7222 path.slots[0]--;
7223 } else {
7224 ret = btrfs_prev_leaf(root, &path);
7225 if (ret < 0) {
7226 goto out;
7227 } else if (ret > 0) {
7228 ret = 0;
7229 goto out;
7232 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7235 while (num_bytes) {
7236 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7237 ret = btrfs_next_leaf(root, &path);
7238 if (ret < 0) {
7239 fprintf(stderr, "Error going to next leaf "
7240 "%d\n", ret);
7241 btrfs_release_path(&path);
7242 return ret;
7243 } else if (ret) {
7244 break;
7247 leaf = path.nodes[0];
7248 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7249 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7250 path.slots[0]++;
7251 continue;
7253 if (key.objectid + key.offset < bytenr) {
7254 path.slots[0]++;
7255 continue;
7257 if (key.objectid > bytenr + num_bytes)
7258 break;
7260 if (key.objectid == bytenr) {
7261 if (key.offset >= num_bytes) {
7262 num_bytes = 0;
7263 break;
7265 num_bytes -= key.offset;
7266 bytenr += key.offset;
7267 } else if (key.objectid < bytenr) {
7268 if (key.objectid + key.offset >= bytenr + num_bytes) {
7269 num_bytes = 0;
7270 break;
7272 num_bytes = (bytenr + num_bytes) -
7273 (key.objectid + key.offset);
7274 bytenr = key.objectid + key.offset;
7275 } else {
7276 if (key.objectid + key.offset < bytenr + num_bytes) {
7277 u64 new_start = key.objectid + key.offset;
7278 u64 new_bytes = bytenr + num_bytes - new_start;
7281 * Weird case, the extent is in the middle of
7282 * our range, we'll have to search one side
7283 * and then the other. Not sure if this happens
7284 * in real life, but no harm in coding it up
7285 * anyway just in case.
7287 btrfs_release_path(&path);
7288 ret = check_extent_exists(root, new_start,
7289 new_bytes);
7290 if (ret) {
7291 fprintf(stderr, "Right section didn't "
7292 "have a record\n");
7293 break;
7295 num_bytes = key.objectid - bytenr;
7296 goto again;
7298 num_bytes = key.objectid - bytenr;
7300 path.slots[0]++;
7302 ret = 0;
7304 out:
7305 if (num_bytes && !ret) {
7306 fprintf(stderr, "There are no extents for csum range "
7307 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7308 ret = 1;
7311 btrfs_release_path(&path);
7312 return ret;
7315 static int check_csums(struct btrfs_root *root)
7317 struct btrfs_path path;
7318 struct extent_buffer *leaf;
7319 struct btrfs_key key;
7320 u64 offset = 0, num_bytes = 0;
7321 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7322 int errors = 0;
7323 int ret;
7324 u64 data_len;
7325 unsigned long leaf_offset;
7327 root = root->fs_info->csum_root;
7328 if (!extent_buffer_uptodate(root->node)) {
7329 fprintf(stderr, "No valid csum tree found\n");
7330 return -ENOENT;
7333 btrfs_init_path(&path);
7334 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7335 key.type = BTRFS_EXTENT_CSUM_KEY;
7336 key.offset = 0;
7337 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7338 if (ret < 0) {
7339 fprintf(stderr, "Error searching csum tree %d\n", ret);
7340 btrfs_release_path(&path);
7341 return ret;
7344 if (ret > 0 && path.slots[0])
7345 path.slots[0]--;
7346 ret = 0;
7348 while (1) {
7349 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7350 ret = btrfs_next_leaf(root, &path);
7351 if (ret < 0) {
7352 fprintf(stderr, "Error going to next leaf "
7353 "%d\n", ret);
7354 break;
7356 if (ret)
7357 break;
7359 leaf = path.nodes[0];
7361 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7362 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7363 path.slots[0]++;
7364 continue;
7367 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7368 csum_size) * root->sectorsize;
7369 if (!check_data_csum)
7370 goto skip_csum_check;
7371 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7372 ret = check_extent_csums(root, key.offset, data_len,
7373 leaf_offset, leaf);
7374 if (ret)
7375 break;
7376 skip_csum_check:
7377 if (!num_bytes) {
7378 offset = key.offset;
7379 } else if (key.offset != offset + num_bytes) {
7380 ret = check_extent_exists(root, offset, num_bytes);
7381 if (ret) {
7382 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7383 "there is no extent record\n",
7384 offset, offset+num_bytes);
7385 errors++;
7387 offset = key.offset;
7388 num_bytes = 0;
7390 num_bytes += data_len;
7391 path.slots[0]++;
7394 btrfs_release_path(&path);
7395 return errors;
7398 static int is_dropped_key(struct btrfs_key *key,
7399 struct btrfs_key *drop_key) {
7400 if (key->objectid < drop_key->objectid)
7401 return 1;
7402 else if (key->objectid == drop_key->objectid) {
7403 if (key->type < drop_key->type)
7404 return 1;
7405 else if (key->type == drop_key->type) {
7406 if (key->offset < drop_key->offset)
7407 return 1;
7410 return 0;
7414 * Here are the rules for FULL_BACKREF.
7416 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7417 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7418 * FULL_BACKREF set.
7419 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7420 * if it happened after the relocation occurred since we'll have dropped the
7421 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7422 * have no real way to know for sure.
7424 * We process the blocks one root at a time, and we start from the lowest root
7425 * objectid and go to the highest. So we can just lookup the owner backref for
7426 * the record and if we don't find it then we know it doesn't exist and we have
7427 * a FULL BACKREF.
7429 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7430 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7431 * be set or not and then we can check later once we've gathered all the refs.
7433 static int calc_extent_flag(struct cache_tree *extent_cache,
7434 struct extent_buffer *buf,
7435 struct root_item_record *ri,
7436 u64 *flags)
7438 struct extent_record *rec;
7439 struct cache_extent *cache;
7440 struct tree_backref *tback;
7441 u64 owner = 0;
7443 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7444 /* we have added this extent before */
7445 if (!cache)
7446 return -ENOENT;
7448 rec = container_of(cache, struct extent_record, cache);
7451 * Except file/reloc tree, we can not have
7452 * FULL BACKREF MODE
7454 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7455 goto normal;
7457 * root node
7459 if (buf->start == ri->bytenr)
7460 goto normal;
7462 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7463 goto full_backref;
7465 owner = btrfs_header_owner(buf);
7466 if (owner == ri->objectid)
7467 goto normal;
7469 tback = find_tree_backref(rec, 0, owner);
7470 if (!tback)
7471 goto full_backref;
7472 normal:
7473 *flags = 0;
7474 if (rec->flag_block_full_backref != FLAG_UNSET &&
7475 rec->flag_block_full_backref != 0)
7476 rec->bad_full_backref = 1;
7477 return 0;
7478 full_backref:
7479 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7480 if (rec->flag_block_full_backref != FLAG_UNSET &&
7481 rec->flag_block_full_backref != 1)
7482 rec->bad_full_backref = 1;
7483 return 0;
7486 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7488 fprintf(stderr, "Invalid key type(");
7489 print_key_type(stderr, 0, key_type);
7490 fprintf(stderr, ") found in root(");
7491 print_objectid(stderr, rootid, 0);
7492 fprintf(stderr, ")\n");
7496 * Check if the key is valid with its extent buffer.
7498 * This is a early check in case invalid key exists in a extent buffer
7499 * This is not comprehensive yet, but should prevent wrong key/item passed
7500 * further
7502 static int check_type_with_root(u64 rootid, u8 key_type)
7504 switch (key_type) {
7505 /* Only valid in chunk tree */
7506 case BTRFS_DEV_ITEM_KEY:
7507 case BTRFS_CHUNK_ITEM_KEY:
7508 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7509 goto err;
7510 break;
7511 /* valid in csum and log tree */
7512 case BTRFS_CSUM_TREE_OBJECTID:
7513 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7514 is_fstree(rootid)))
7515 goto err;
7516 break;
7517 case BTRFS_EXTENT_ITEM_KEY:
7518 case BTRFS_METADATA_ITEM_KEY:
7519 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7520 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7521 goto err;
7522 break;
7523 case BTRFS_ROOT_ITEM_KEY:
7524 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7525 goto err;
7526 break;
7527 case BTRFS_DEV_EXTENT_KEY:
7528 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7529 goto err;
7530 break;
7532 return 0;
7533 err:
7534 report_mismatch_key_root(key_type, rootid);
7535 return -EINVAL;
7538 static int run_next_block(struct btrfs_root *root,
7539 struct block_info *bits,
7540 int bits_nr,
7541 u64 *last,
7542 struct cache_tree *pending,
7543 struct cache_tree *seen,
7544 struct cache_tree *reada,
7545 struct cache_tree *nodes,
7546 struct cache_tree *extent_cache,
7547 struct cache_tree *chunk_cache,
7548 struct rb_root *dev_cache,
7549 struct block_group_tree *block_group_cache,
7550 struct device_extent_tree *dev_extent_cache,
7551 struct root_item_record *ri)
7553 struct extent_buffer *buf;
7554 struct extent_record *rec = NULL;
7555 u64 bytenr;
7556 u32 size;
7557 u64 parent;
7558 u64 owner;
7559 u64 flags;
7560 u64 ptr;
7561 u64 gen = 0;
7562 int ret = 0;
7563 int i;
7564 int nritems;
7565 struct btrfs_key key;
7566 struct cache_extent *cache;
7567 int reada_bits;
7569 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7570 bits_nr, &reada_bits);
7571 if (nritems == 0)
7572 return 1;
7574 if (!reada_bits) {
7575 for(i = 0; i < nritems; i++) {
7576 ret = add_cache_extent(reada, bits[i].start,
7577 bits[i].size);
7578 if (ret == -EEXIST)
7579 continue;
7581 /* fixme, get the parent transid */
7582 readahead_tree_block(root, bits[i].start,
7583 bits[i].size, 0);
7586 *last = bits[0].start;
7587 bytenr = bits[0].start;
7588 size = bits[0].size;
7590 cache = lookup_cache_extent(pending, bytenr, size);
7591 if (cache) {
7592 remove_cache_extent(pending, cache);
7593 free(cache);
7595 cache = lookup_cache_extent(reada, bytenr, size);
7596 if (cache) {
7597 remove_cache_extent(reada, cache);
7598 free(cache);
7600 cache = lookup_cache_extent(nodes, bytenr, size);
7601 if (cache) {
7602 remove_cache_extent(nodes, cache);
7603 free(cache);
7605 cache = lookup_cache_extent(extent_cache, bytenr, size);
7606 if (cache) {
7607 rec = container_of(cache, struct extent_record, cache);
7608 gen = rec->parent_generation;
7611 /* fixme, get the real parent transid */
7612 buf = read_tree_block(root, bytenr, size, gen);
7613 if (!extent_buffer_uptodate(buf)) {
7614 record_bad_block_io(root->fs_info,
7615 extent_cache, bytenr, size);
7616 goto out;
7619 nritems = btrfs_header_nritems(buf);
7621 flags = 0;
7622 if (!init_extent_tree) {
7623 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7624 btrfs_header_level(buf), 1, NULL,
7625 &flags);
7626 if (ret < 0) {
7627 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7628 if (ret < 0) {
7629 fprintf(stderr, "Couldn't calc extent flags\n");
7630 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7633 } else {
7634 flags = 0;
7635 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7636 if (ret < 0) {
7637 fprintf(stderr, "Couldn't calc extent flags\n");
7638 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7642 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7643 if (ri != NULL &&
7644 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7645 ri->objectid == btrfs_header_owner(buf)) {
7647 * Ok we got to this block from it's original owner and
7648 * we have FULL_BACKREF set. Relocation can leave
7649 * converted blocks over so this is altogether possible,
7650 * however it's not possible if the generation > the
7651 * last snapshot, so check for this case.
7653 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7654 btrfs_header_generation(buf) > ri->last_snapshot) {
7655 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7656 rec->bad_full_backref = 1;
7659 } else {
7660 if (ri != NULL &&
7661 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7662 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7663 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7664 rec->bad_full_backref = 1;
7668 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7669 rec->flag_block_full_backref = 1;
7670 parent = bytenr;
7671 owner = 0;
7672 } else {
7673 rec->flag_block_full_backref = 0;
7674 parent = 0;
7675 owner = btrfs_header_owner(buf);
7678 ret = check_block(root, extent_cache, buf, flags);
7679 if (ret)
7680 goto out;
7682 if (btrfs_is_leaf(buf)) {
7683 btree_space_waste += btrfs_leaf_free_space(root, buf);
7684 for (i = 0; i < nritems; i++) {
7685 struct btrfs_file_extent_item *fi;
7686 btrfs_item_key_to_cpu(buf, &key, i);
7688 * Check key type against the leaf owner.
7689 * Could filter quite a lot of early error if
7690 * owner is correct
7692 if (check_type_with_root(btrfs_header_owner(buf),
7693 key.type)) {
7694 fprintf(stderr, "ignoring invalid key\n");
7695 continue;
7697 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7698 process_extent_item(root, extent_cache, buf,
7700 continue;
7702 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7703 process_extent_item(root, extent_cache, buf,
7705 continue;
7707 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7708 total_csum_bytes +=
7709 btrfs_item_size_nr(buf, i);
7710 continue;
7712 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7713 process_chunk_item(chunk_cache, &key, buf, i);
7714 continue;
7716 if (key.type == BTRFS_DEV_ITEM_KEY) {
7717 process_device_item(dev_cache, &key, buf, i);
7718 continue;
7720 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7721 process_block_group_item(block_group_cache,
7722 &key, buf, i);
7723 continue;
7725 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7726 process_device_extent_item(dev_extent_cache,
7727 &key, buf, i);
7728 continue;
7731 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7732 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7733 process_extent_ref_v0(extent_cache, buf, i);
7734 #else
7735 BUG();
7736 #endif
7737 continue;
7740 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7741 ret = add_tree_backref(extent_cache,
7742 key.objectid, 0, key.offset, 0);
7743 if (ret < 0)
7744 error("add_tree_backref failed: %s",
7745 strerror(-ret));
7746 continue;
7748 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7749 ret = add_tree_backref(extent_cache,
7750 key.objectid, key.offset, 0, 0);
7751 if (ret < 0)
7752 error("add_tree_backref failed: %s",
7753 strerror(-ret));
7754 continue;
7756 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7757 struct btrfs_extent_data_ref *ref;
7758 ref = btrfs_item_ptr(buf, i,
7759 struct btrfs_extent_data_ref);
7760 add_data_backref(extent_cache,
7761 key.objectid, 0,
7762 btrfs_extent_data_ref_root(buf, ref),
7763 btrfs_extent_data_ref_objectid(buf,
7764 ref),
7765 btrfs_extent_data_ref_offset(buf, ref),
7766 btrfs_extent_data_ref_count(buf, ref),
7767 0, root->sectorsize);
7768 continue;
7770 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7771 struct btrfs_shared_data_ref *ref;
7772 ref = btrfs_item_ptr(buf, i,
7773 struct btrfs_shared_data_ref);
7774 add_data_backref(extent_cache,
7775 key.objectid, key.offset, 0, 0, 0,
7776 btrfs_shared_data_ref_count(buf, ref),
7777 0, root->sectorsize);
7778 continue;
7780 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7781 struct bad_item *bad;
7783 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7784 continue;
7785 if (!owner)
7786 continue;
7787 bad = malloc(sizeof(struct bad_item));
7788 if (!bad)
7789 continue;
7790 INIT_LIST_HEAD(&bad->list);
7791 memcpy(&bad->key, &key,
7792 sizeof(struct btrfs_key));
7793 bad->root_id = owner;
7794 list_add_tail(&bad->list, &delete_items);
7795 continue;
7797 if (key.type != BTRFS_EXTENT_DATA_KEY)
7798 continue;
7799 fi = btrfs_item_ptr(buf, i,
7800 struct btrfs_file_extent_item);
7801 if (btrfs_file_extent_type(buf, fi) ==
7802 BTRFS_FILE_EXTENT_INLINE)
7803 continue;
7804 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7805 continue;
7807 data_bytes_allocated +=
7808 btrfs_file_extent_disk_num_bytes(buf, fi);
7809 if (data_bytes_allocated < root->sectorsize) {
7810 abort();
7812 data_bytes_referenced +=
7813 btrfs_file_extent_num_bytes(buf, fi);
7814 add_data_backref(extent_cache,
7815 btrfs_file_extent_disk_bytenr(buf, fi),
7816 parent, owner, key.objectid, key.offset -
7817 btrfs_file_extent_offset(buf, fi), 1, 1,
7818 btrfs_file_extent_disk_num_bytes(buf, fi));
7820 } else {
7821 int level;
7822 struct btrfs_key first_key;
7824 first_key.objectid = 0;
7826 if (nritems > 0)
7827 btrfs_item_key_to_cpu(buf, &first_key, 0);
7828 level = btrfs_header_level(buf);
7829 for (i = 0; i < nritems; i++) {
7830 struct extent_record tmpl;
7832 ptr = btrfs_node_blockptr(buf, i);
7833 size = root->nodesize;
7834 btrfs_node_key_to_cpu(buf, &key, i);
7835 if (ri != NULL) {
7836 if ((level == ri->drop_level)
7837 && is_dropped_key(&key, &ri->drop_key)) {
7838 continue;
7842 memset(&tmpl, 0, sizeof(tmpl));
7843 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7844 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7845 tmpl.start = ptr;
7846 tmpl.nr = size;
7847 tmpl.refs = 1;
7848 tmpl.metadata = 1;
7849 tmpl.max_size = size;
7850 ret = add_extent_rec(extent_cache, &tmpl);
7851 if (ret < 0)
7852 goto out;
7854 ret = add_tree_backref(extent_cache, ptr, parent,
7855 owner, 1);
7856 if (ret < 0) {
7857 error("add_tree_backref failed: %s",
7858 strerror(-ret));
7859 continue;
7862 if (level > 1) {
7863 add_pending(nodes, seen, ptr, size);
7864 } else {
7865 add_pending(pending, seen, ptr, size);
7868 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7869 nritems) * sizeof(struct btrfs_key_ptr);
7871 total_btree_bytes += buf->len;
7872 if (fs_root_objectid(btrfs_header_owner(buf)))
7873 total_fs_tree_bytes += buf->len;
7874 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7875 total_extent_tree_bytes += buf->len;
7876 if (!found_old_backref &&
7877 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7878 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7879 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7880 found_old_backref = 1;
7881 out:
7882 free_extent_buffer(buf);
7883 return ret;
7886 static int add_root_to_pending(struct extent_buffer *buf,
7887 struct cache_tree *extent_cache,
7888 struct cache_tree *pending,
7889 struct cache_tree *seen,
7890 struct cache_tree *nodes,
7891 u64 objectid)
7893 struct extent_record tmpl;
7894 int ret;
7896 if (btrfs_header_level(buf) > 0)
7897 add_pending(nodes, seen, buf->start, buf->len);
7898 else
7899 add_pending(pending, seen, buf->start, buf->len);
7901 memset(&tmpl, 0, sizeof(tmpl));
7902 tmpl.start = buf->start;
7903 tmpl.nr = buf->len;
7904 tmpl.is_root = 1;
7905 tmpl.refs = 1;
7906 tmpl.metadata = 1;
7907 tmpl.max_size = buf->len;
7908 add_extent_rec(extent_cache, &tmpl);
7910 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7911 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7912 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7913 0, 1);
7914 else
7915 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7917 return ret;
7920 /* as we fix the tree, we might be deleting blocks that
7921 * we're tracking for repair. This hook makes sure we
7922 * remove any backrefs for blocks as we are fixing them.
7924 static int free_extent_hook(struct btrfs_trans_handle *trans,
7925 struct btrfs_root *root,
7926 u64 bytenr, u64 num_bytes, u64 parent,
7927 u64 root_objectid, u64 owner, u64 offset,
7928 int refs_to_drop)
7930 struct extent_record *rec;
7931 struct cache_extent *cache;
7932 int is_data;
7933 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7935 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7936 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7937 if (!cache)
7938 return 0;
7940 rec = container_of(cache, struct extent_record, cache);
7941 if (is_data) {
7942 struct data_backref *back;
7943 back = find_data_backref(rec, parent, root_objectid, owner,
7944 offset, 1, bytenr, num_bytes);
7945 if (!back)
7946 goto out;
7947 if (back->node.found_ref) {
7948 back->found_ref -= refs_to_drop;
7949 if (rec->refs)
7950 rec->refs -= refs_to_drop;
7952 if (back->node.found_extent_tree) {
7953 back->num_refs -= refs_to_drop;
7954 if (rec->extent_item_refs)
7955 rec->extent_item_refs -= refs_to_drop;
7957 if (back->found_ref == 0)
7958 back->node.found_ref = 0;
7959 if (back->num_refs == 0)
7960 back->node.found_extent_tree = 0;
7962 if (!back->node.found_extent_tree && back->node.found_ref) {
7963 list_del(&back->node.list);
7964 free(back);
7966 } else {
7967 struct tree_backref *back;
7968 back = find_tree_backref(rec, parent, root_objectid);
7969 if (!back)
7970 goto out;
7971 if (back->node.found_ref) {
7972 if (rec->refs)
7973 rec->refs--;
7974 back->node.found_ref = 0;
7976 if (back->node.found_extent_tree) {
7977 if (rec->extent_item_refs)
7978 rec->extent_item_refs--;
7979 back->node.found_extent_tree = 0;
7981 if (!back->node.found_extent_tree && back->node.found_ref) {
7982 list_del(&back->node.list);
7983 free(back);
7986 maybe_free_extent_rec(extent_cache, rec);
7987 out:
7988 return 0;
7991 static int delete_extent_records(struct btrfs_trans_handle *trans,
7992 struct btrfs_root *root,
7993 struct btrfs_path *path,
7994 u64 bytenr)
7996 struct btrfs_key key;
7997 struct btrfs_key found_key;
7998 struct extent_buffer *leaf;
7999 int ret;
8000 int slot;
8003 key.objectid = bytenr;
8004 key.type = (u8)-1;
8005 key.offset = (u64)-1;
8007 while(1) {
8008 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8009 &key, path, 0, 1);
8010 if (ret < 0)
8011 break;
8013 if (ret > 0) {
8014 ret = 0;
8015 if (path->slots[0] == 0)
8016 break;
8017 path->slots[0]--;
8019 ret = 0;
8021 leaf = path->nodes[0];
8022 slot = path->slots[0];
8024 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8025 if (found_key.objectid != bytenr)
8026 break;
8028 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8029 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8030 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8031 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8032 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8033 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8034 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8035 btrfs_release_path(path);
8036 if (found_key.type == 0) {
8037 if (found_key.offset == 0)
8038 break;
8039 key.offset = found_key.offset - 1;
8040 key.type = found_key.type;
8042 key.type = found_key.type - 1;
8043 key.offset = (u64)-1;
8044 continue;
8047 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8048 found_key.objectid, found_key.type, found_key.offset);
8050 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8051 if (ret)
8052 break;
8053 btrfs_release_path(path);
8055 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8056 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8057 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8058 found_key.offset : root->nodesize;
8060 ret = btrfs_update_block_group(trans, root, bytenr,
8061 bytes, 0, 0);
8062 if (ret)
8063 break;
8067 btrfs_release_path(path);
8068 return ret;
8072 * for a single backref, this will allocate a new extent
8073 * and add the backref to it.
8075 static int record_extent(struct btrfs_trans_handle *trans,
8076 struct btrfs_fs_info *info,
8077 struct btrfs_path *path,
8078 struct extent_record *rec,
8079 struct extent_backref *back,
8080 int allocated, u64 flags)
8082 int ret = 0;
8083 struct btrfs_root *extent_root = info->extent_root;
8084 struct extent_buffer *leaf;
8085 struct btrfs_key ins_key;
8086 struct btrfs_extent_item *ei;
8087 struct data_backref *dback;
8088 struct btrfs_tree_block_info *bi;
8090 if (!back->is_data)
8091 rec->max_size = max_t(u64, rec->max_size,
8092 info->extent_root->nodesize);
8094 if (!allocated) {
8095 u32 item_size = sizeof(*ei);
8097 if (!back->is_data)
8098 item_size += sizeof(*bi);
8100 ins_key.objectid = rec->start;
8101 ins_key.offset = rec->max_size;
8102 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8104 ret = btrfs_insert_empty_item(trans, extent_root, path,
8105 &ins_key, item_size);
8106 if (ret)
8107 goto fail;
8109 leaf = path->nodes[0];
8110 ei = btrfs_item_ptr(leaf, path->slots[0],
8111 struct btrfs_extent_item);
8113 btrfs_set_extent_refs(leaf, ei, 0);
8114 btrfs_set_extent_generation(leaf, ei, rec->generation);
8116 if (back->is_data) {
8117 btrfs_set_extent_flags(leaf, ei,
8118 BTRFS_EXTENT_FLAG_DATA);
8119 } else {
8120 struct btrfs_disk_key copy_key;;
8122 bi = (struct btrfs_tree_block_info *)(ei + 1);
8123 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8124 sizeof(*bi));
8126 btrfs_set_disk_key_objectid(&copy_key,
8127 rec->info_objectid);
8128 btrfs_set_disk_key_type(&copy_key, 0);
8129 btrfs_set_disk_key_offset(&copy_key, 0);
8131 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8132 btrfs_set_tree_block_key(leaf, bi, &copy_key);
8134 btrfs_set_extent_flags(leaf, ei,
8135 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8138 btrfs_mark_buffer_dirty(leaf);
8139 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8140 rec->max_size, 1, 0);
8141 if (ret)
8142 goto fail;
8143 btrfs_release_path(path);
8146 if (back->is_data) {
8147 u64 parent;
8148 int i;
8150 dback = to_data_backref(back);
8151 if (back->full_backref)
8152 parent = dback->parent;
8153 else
8154 parent = 0;
8156 for (i = 0; i < dback->found_ref; i++) {
8157 /* if parent != 0, we're doing a full backref
8158 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8159 * just makes the backref allocator create a data
8160 * backref
8162 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8163 rec->start, rec->max_size,
8164 parent,
8165 dback->root,
8166 parent ?
8167 BTRFS_FIRST_FREE_OBJECTID :
8168 dback->owner,
8169 dback->offset);
8170 if (ret)
8171 break;
8173 fprintf(stderr, "adding new data backref"
8174 " on %llu %s %llu owner %llu"
8175 " offset %llu found %d\n",
8176 (unsigned long long)rec->start,
8177 back->full_backref ?
8178 "parent" : "root",
8179 back->full_backref ?
8180 (unsigned long long)parent :
8181 (unsigned long long)dback->root,
8182 (unsigned long long)dback->owner,
8183 (unsigned long long)dback->offset,
8184 dback->found_ref);
8185 } else {
8186 u64 parent;
8187 struct tree_backref *tback;
8189 tback = to_tree_backref(back);
8190 if (back->full_backref)
8191 parent = tback->parent;
8192 else
8193 parent = 0;
8195 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8196 rec->start, rec->max_size,
8197 parent, tback->root, 0, 0);
8198 fprintf(stderr, "adding new tree backref on "
8199 "start %llu len %llu parent %llu root %llu\n",
8200 rec->start, rec->max_size, parent, tback->root);
8202 fail:
8203 btrfs_release_path(path);
8204 return ret;
8207 static struct extent_entry *find_entry(struct list_head *entries,
8208 u64 bytenr, u64 bytes)
8210 struct extent_entry *entry = NULL;
8212 list_for_each_entry(entry, entries, list) {
8213 if (entry->bytenr == bytenr && entry->bytes == bytes)
8214 return entry;
8217 return NULL;
8220 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8222 struct extent_entry *entry, *best = NULL, *prev = NULL;
8224 list_for_each_entry(entry, entries, list) {
8226 * If there are as many broken entries as entries then we know
8227 * not to trust this particular entry.
8229 if (entry->broken == entry->count)
8230 continue;
8233 * Special case, when there are only two entries and 'best' is
8234 * the first one
8236 if (!prev) {
8237 best = entry;
8238 prev = entry;
8239 continue;
8243 * If our current entry == best then we can't be sure our best
8244 * is really the best, so we need to keep searching.
8246 if (best && best->count == entry->count) {
8247 prev = entry;
8248 best = NULL;
8249 continue;
8252 /* Prev == entry, not good enough, have to keep searching */
8253 if (!prev->broken && prev->count == entry->count)
8254 continue;
8256 if (!best)
8257 best = (prev->count > entry->count) ? prev : entry;
8258 else if (best->count < entry->count)
8259 best = entry;
8260 prev = entry;
8263 return best;
8266 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8267 struct data_backref *dback, struct extent_entry *entry)
8269 struct btrfs_trans_handle *trans;
8270 struct btrfs_root *root;
8271 struct btrfs_file_extent_item *fi;
8272 struct extent_buffer *leaf;
8273 struct btrfs_key key;
8274 u64 bytenr, bytes;
8275 int ret, err;
8277 key.objectid = dback->root;
8278 key.type = BTRFS_ROOT_ITEM_KEY;
8279 key.offset = (u64)-1;
8280 root = btrfs_read_fs_root(info, &key);
8281 if (IS_ERR(root)) {
8282 fprintf(stderr, "Couldn't find root for our ref\n");
8283 return -EINVAL;
8287 * The backref points to the original offset of the extent if it was
8288 * split, so we need to search down to the offset we have and then walk
8289 * forward until we find the backref we're looking for.
8291 key.objectid = dback->owner;
8292 key.type = BTRFS_EXTENT_DATA_KEY;
8293 key.offset = dback->offset;
8294 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8295 if (ret < 0) {
8296 fprintf(stderr, "Error looking up ref %d\n", ret);
8297 return ret;
8300 while (1) {
8301 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8302 ret = btrfs_next_leaf(root, path);
8303 if (ret) {
8304 fprintf(stderr, "Couldn't find our ref, next\n");
8305 return -EINVAL;
8308 leaf = path->nodes[0];
8309 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8310 if (key.objectid != dback->owner ||
8311 key.type != BTRFS_EXTENT_DATA_KEY) {
8312 fprintf(stderr, "Couldn't find our ref, search\n");
8313 return -EINVAL;
8315 fi = btrfs_item_ptr(leaf, path->slots[0],
8316 struct btrfs_file_extent_item);
8317 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8318 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8320 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8321 break;
8322 path->slots[0]++;
8325 btrfs_release_path(path);
8327 trans = btrfs_start_transaction(root, 1);
8328 if (IS_ERR(trans))
8329 return PTR_ERR(trans);
8332 * Ok we have the key of the file extent we want to fix, now we can cow
8333 * down to the thing and fix it.
8335 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8336 if (ret < 0) {
8337 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8338 key.objectid, key.type, key.offset, ret);
8339 goto out;
8341 if (ret > 0) {
8342 fprintf(stderr, "Well that's odd, we just found this key "
8343 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8344 key.offset);
8345 ret = -EINVAL;
8346 goto out;
8348 leaf = path->nodes[0];
8349 fi = btrfs_item_ptr(leaf, path->slots[0],
8350 struct btrfs_file_extent_item);
8352 if (btrfs_file_extent_compression(leaf, fi) &&
8353 dback->disk_bytenr != entry->bytenr) {
8354 fprintf(stderr, "Ref doesn't match the record start and is "
8355 "compressed, please take a btrfs-image of this file "
8356 "system and send it to a btrfs developer so they can "
8357 "complete this functionality for bytenr %Lu\n",
8358 dback->disk_bytenr);
8359 ret = -EINVAL;
8360 goto out;
8363 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8364 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8365 } else if (dback->disk_bytenr > entry->bytenr) {
8366 u64 off_diff, offset;
8368 off_diff = dback->disk_bytenr - entry->bytenr;
8369 offset = btrfs_file_extent_offset(leaf, fi);
8370 if (dback->disk_bytenr + offset +
8371 btrfs_file_extent_num_bytes(leaf, fi) >
8372 entry->bytenr + entry->bytes) {
8373 fprintf(stderr, "Ref is past the entry end, please "
8374 "take a btrfs-image of this file system and "
8375 "send it to a btrfs developer, ref %Lu\n",
8376 dback->disk_bytenr);
8377 ret = -EINVAL;
8378 goto out;
8380 offset += off_diff;
8381 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8382 btrfs_set_file_extent_offset(leaf, fi, offset);
8383 } else if (dback->disk_bytenr < entry->bytenr) {
8384 u64 offset;
8386 offset = btrfs_file_extent_offset(leaf, fi);
8387 if (dback->disk_bytenr + offset < entry->bytenr) {
8388 fprintf(stderr, "Ref is before the entry start, please"
8389 " take a btrfs-image of this file system and "
8390 "send it to a btrfs developer, ref %Lu\n",
8391 dback->disk_bytenr);
8392 ret = -EINVAL;
8393 goto out;
8396 offset += dback->disk_bytenr;
8397 offset -= entry->bytenr;
8398 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8399 btrfs_set_file_extent_offset(leaf, fi, offset);
8402 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8405 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8406 * only do this if we aren't using compression, otherwise it's a
8407 * trickier case.
8409 if (!btrfs_file_extent_compression(leaf, fi))
8410 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8411 else
8412 printf("ram bytes may be wrong?\n");
8413 btrfs_mark_buffer_dirty(leaf);
8414 out:
8415 err = btrfs_commit_transaction(trans, root);
8416 btrfs_release_path(path);
8417 return ret ? ret : err;
8420 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8421 struct extent_record *rec)
8423 struct extent_backref *back;
8424 struct data_backref *dback;
8425 struct extent_entry *entry, *best = NULL;
8426 LIST_HEAD(entries);
8427 int nr_entries = 0;
8428 int broken_entries = 0;
8429 int ret = 0;
8430 short mismatch = 0;
8433 * Metadata is easy and the backrefs should always agree on bytenr and
8434 * size, if not we've got bigger issues.
8436 if (rec->metadata)
8437 return 0;
8439 list_for_each_entry(back, &rec->backrefs, list) {
8440 if (back->full_backref || !back->is_data)
8441 continue;
8443 dback = to_data_backref(back);
8446 * We only pay attention to backrefs that we found a real
8447 * backref for.
8449 if (dback->found_ref == 0)
8450 continue;
8453 * For now we only catch when the bytes don't match, not the
8454 * bytenr. We can easily do this at the same time, but I want
8455 * to have a fs image to test on before we just add repair
8456 * functionality willy-nilly so we know we won't screw up the
8457 * repair.
8460 entry = find_entry(&entries, dback->disk_bytenr,
8461 dback->bytes);
8462 if (!entry) {
8463 entry = malloc(sizeof(struct extent_entry));
8464 if (!entry) {
8465 ret = -ENOMEM;
8466 goto out;
8468 memset(entry, 0, sizeof(*entry));
8469 entry->bytenr = dback->disk_bytenr;
8470 entry->bytes = dback->bytes;
8471 list_add_tail(&entry->list, &entries);
8472 nr_entries++;
8476 * If we only have on entry we may think the entries agree when
8477 * in reality they don't so we have to do some extra checking.
8479 if (dback->disk_bytenr != rec->start ||
8480 dback->bytes != rec->nr || back->broken)
8481 mismatch = 1;
8483 if (back->broken) {
8484 entry->broken++;
8485 broken_entries++;
8488 entry->count++;
8491 /* Yay all the backrefs agree, carry on good sir */
8492 if (nr_entries <= 1 && !mismatch)
8493 goto out;
8495 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8496 "%Lu\n", rec->start);
8499 * First we want to see if the backrefs can agree amongst themselves who
8500 * is right, so figure out which one of the entries has the highest
8501 * count.
8503 best = find_most_right_entry(&entries);
8506 * Ok so we may have an even split between what the backrefs think, so
8507 * this is where we use the extent ref to see what it thinks.
8509 if (!best) {
8510 entry = find_entry(&entries, rec->start, rec->nr);
8511 if (!entry && (!broken_entries || !rec->found_rec)) {
8512 fprintf(stderr, "Backrefs don't agree with each other "
8513 "and extent record doesn't agree with anybody,"
8514 " so we can't fix bytenr %Lu bytes %Lu\n",
8515 rec->start, rec->nr);
8516 ret = -EINVAL;
8517 goto out;
8518 } else if (!entry) {
8520 * Ok our backrefs were broken, we'll assume this is the
8521 * correct value and add an entry for this range.
8523 entry = malloc(sizeof(struct extent_entry));
8524 if (!entry) {
8525 ret = -ENOMEM;
8526 goto out;
8528 memset(entry, 0, sizeof(*entry));
8529 entry->bytenr = rec->start;
8530 entry->bytes = rec->nr;
8531 list_add_tail(&entry->list, &entries);
8532 nr_entries++;
8534 entry->count++;
8535 best = find_most_right_entry(&entries);
8536 if (!best) {
8537 fprintf(stderr, "Backrefs and extent record evenly "
8538 "split on who is right, this is going to "
8539 "require user input to fix bytenr %Lu bytes "
8540 "%Lu\n", rec->start, rec->nr);
8541 ret = -EINVAL;
8542 goto out;
8547 * I don't think this can happen currently as we'll abort() if we catch
8548 * this case higher up, but in case somebody removes that we still can't
8549 * deal with it properly here yet, so just bail out of that's the case.
8551 if (best->bytenr != rec->start) {
8552 fprintf(stderr, "Extent start and backref starts don't match, "
8553 "please use btrfs-image on this file system and send "
8554 "it to a btrfs developer so they can make fsck fix "
8555 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8556 rec->start, rec->nr);
8557 ret = -EINVAL;
8558 goto out;
8562 * Ok great we all agreed on an extent record, let's go find the real
8563 * references and fix up the ones that don't match.
8565 list_for_each_entry(back, &rec->backrefs, list) {
8566 if (back->full_backref || !back->is_data)
8567 continue;
8569 dback = to_data_backref(back);
8572 * Still ignoring backrefs that don't have a real ref attached
8573 * to them.
8575 if (dback->found_ref == 0)
8576 continue;
8578 if (dback->bytes == best->bytes &&
8579 dback->disk_bytenr == best->bytenr)
8580 continue;
8582 ret = repair_ref(info, path, dback, best);
8583 if (ret)
8584 goto out;
8588 * Ok we messed with the actual refs, which means we need to drop our
8589 * entire cache and go back and rescan. I know this is a huge pain and
8590 * adds a lot of extra work, but it's the only way to be safe. Once all
8591 * the backrefs agree we may not need to do anything to the extent
8592 * record itself.
8594 ret = -EAGAIN;
8595 out:
8596 while (!list_empty(&entries)) {
8597 entry = list_entry(entries.next, struct extent_entry, list);
8598 list_del_init(&entry->list);
8599 free(entry);
8601 return ret;
8604 static int process_duplicates(struct cache_tree *extent_cache,
8605 struct extent_record *rec)
8607 struct extent_record *good, *tmp;
8608 struct cache_extent *cache;
8609 int ret;
8612 * If we found a extent record for this extent then return, or if we
8613 * have more than one duplicate we are likely going to need to delete
8614 * something.
8616 if (rec->found_rec || rec->num_duplicates > 1)
8617 return 0;
8619 /* Shouldn't happen but just in case */
8620 BUG_ON(!rec->num_duplicates);
8623 * So this happens if we end up with a backref that doesn't match the
8624 * actual extent entry. So either the backref is bad or the extent
8625 * entry is bad. Either way we want to have the extent_record actually
8626 * reflect what we found in the extent_tree, so we need to take the
8627 * duplicate out and use that as the extent_record since the only way we
8628 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8630 remove_cache_extent(extent_cache, &rec->cache);
8632 good = to_extent_record(rec->dups.next);
8633 list_del_init(&good->list);
8634 INIT_LIST_HEAD(&good->backrefs);
8635 INIT_LIST_HEAD(&good->dups);
8636 good->cache.start = good->start;
8637 good->cache.size = good->nr;
8638 good->content_checked = 0;
8639 good->owner_ref_checked = 0;
8640 good->num_duplicates = 0;
8641 good->refs = rec->refs;
8642 list_splice_init(&rec->backrefs, &good->backrefs);
8643 while (1) {
8644 cache = lookup_cache_extent(extent_cache, good->start,
8645 good->nr);
8646 if (!cache)
8647 break;
8648 tmp = container_of(cache, struct extent_record, cache);
8651 * If we find another overlapping extent and it's found_rec is
8652 * set then it's a duplicate and we need to try and delete
8653 * something.
8655 if (tmp->found_rec || tmp->num_duplicates > 0) {
8656 if (list_empty(&good->list))
8657 list_add_tail(&good->list,
8658 &duplicate_extents);
8659 good->num_duplicates += tmp->num_duplicates + 1;
8660 list_splice_init(&tmp->dups, &good->dups);
8661 list_del_init(&tmp->list);
8662 list_add_tail(&tmp->list, &good->dups);
8663 remove_cache_extent(extent_cache, &tmp->cache);
8664 continue;
8668 * Ok we have another non extent item backed extent rec, so lets
8669 * just add it to this extent and carry on like we did above.
8671 good->refs += tmp->refs;
8672 list_splice_init(&tmp->backrefs, &good->backrefs);
8673 remove_cache_extent(extent_cache, &tmp->cache);
8674 free(tmp);
8676 ret = insert_cache_extent(extent_cache, &good->cache);
8677 BUG_ON(ret);
8678 free(rec);
8679 return good->num_duplicates ? 0 : 1;
8682 static int delete_duplicate_records(struct btrfs_root *root,
8683 struct extent_record *rec)
8685 struct btrfs_trans_handle *trans;
8686 LIST_HEAD(delete_list);
8687 struct btrfs_path path;
8688 struct extent_record *tmp, *good, *n;
8689 int nr_del = 0;
8690 int ret = 0, err;
8691 struct btrfs_key key;
8693 btrfs_init_path(&path);
8695 good = rec;
8696 /* Find the record that covers all of the duplicates. */
8697 list_for_each_entry(tmp, &rec->dups, list) {
8698 if (good->start < tmp->start)
8699 continue;
8700 if (good->nr > tmp->nr)
8701 continue;
8703 if (tmp->start + tmp->nr < good->start + good->nr) {
8704 fprintf(stderr, "Ok we have overlapping extents that "
8705 "aren't completely covered by each other, this "
8706 "is going to require more careful thought. "
8707 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8708 tmp->start, tmp->nr, good->start, good->nr);
8709 abort();
8711 good = tmp;
8714 if (good != rec)
8715 list_add_tail(&rec->list, &delete_list);
8717 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8718 if (tmp == good)
8719 continue;
8720 list_move_tail(&tmp->list, &delete_list);
8723 root = root->fs_info->extent_root;
8724 trans = btrfs_start_transaction(root, 1);
8725 if (IS_ERR(trans)) {
8726 ret = PTR_ERR(trans);
8727 goto out;
8730 list_for_each_entry(tmp, &delete_list, list) {
8731 if (tmp->found_rec == 0)
8732 continue;
8733 key.objectid = tmp->start;
8734 key.type = BTRFS_EXTENT_ITEM_KEY;
8735 key.offset = tmp->nr;
8737 /* Shouldn't happen but just in case */
8738 if (tmp->metadata) {
8739 fprintf(stderr, "Well this shouldn't happen, extent "
8740 "record overlaps but is metadata? "
8741 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8742 abort();
8745 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8746 if (ret) {
8747 if (ret > 0)
8748 ret = -EINVAL;
8749 break;
8751 ret = btrfs_del_item(trans, root, &path);
8752 if (ret)
8753 break;
8754 btrfs_release_path(&path);
8755 nr_del++;
8757 err = btrfs_commit_transaction(trans, root);
8758 if (err && !ret)
8759 ret = err;
8760 out:
8761 while (!list_empty(&delete_list)) {
8762 tmp = to_extent_record(delete_list.next);
8763 list_del_init(&tmp->list);
8764 if (tmp == rec)
8765 continue;
8766 free(tmp);
8769 while (!list_empty(&rec->dups)) {
8770 tmp = to_extent_record(rec->dups.next);
8771 list_del_init(&tmp->list);
8772 free(tmp);
8775 btrfs_release_path(&path);
8777 if (!ret && !nr_del)
8778 rec->num_duplicates = 0;
8780 return ret ? ret : nr_del;
8783 static int find_possible_backrefs(struct btrfs_fs_info *info,
8784 struct btrfs_path *path,
8785 struct cache_tree *extent_cache,
8786 struct extent_record *rec)
8788 struct btrfs_root *root;
8789 struct extent_backref *back;
8790 struct data_backref *dback;
8791 struct cache_extent *cache;
8792 struct btrfs_file_extent_item *fi;
8793 struct btrfs_key key;
8794 u64 bytenr, bytes;
8795 int ret;
8797 list_for_each_entry(back, &rec->backrefs, list) {
8798 /* Don't care about full backrefs (poor unloved backrefs) */
8799 if (back->full_backref || !back->is_data)
8800 continue;
8802 dback = to_data_backref(back);
8804 /* We found this one, we don't need to do a lookup */
8805 if (dback->found_ref)
8806 continue;
8808 key.objectid = dback->root;
8809 key.type = BTRFS_ROOT_ITEM_KEY;
8810 key.offset = (u64)-1;
8812 root = btrfs_read_fs_root(info, &key);
8814 /* No root, definitely a bad ref, skip */
8815 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8816 continue;
8817 /* Other err, exit */
8818 if (IS_ERR(root))
8819 return PTR_ERR(root);
8821 key.objectid = dback->owner;
8822 key.type = BTRFS_EXTENT_DATA_KEY;
8823 key.offset = dback->offset;
8824 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8825 if (ret) {
8826 btrfs_release_path(path);
8827 if (ret < 0)
8828 return ret;
8829 /* Didn't find it, we can carry on */
8830 ret = 0;
8831 continue;
8834 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8835 struct btrfs_file_extent_item);
8836 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8837 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8838 btrfs_release_path(path);
8839 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8840 if (cache) {
8841 struct extent_record *tmp;
8842 tmp = container_of(cache, struct extent_record, cache);
8845 * If we found an extent record for the bytenr for this
8846 * particular backref then we can't add it to our
8847 * current extent record. We only want to add backrefs
8848 * that don't have a corresponding extent item in the
8849 * extent tree since they likely belong to this record
8850 * and we need to fix it if it doesn't match bytenrs.
8852 if (tmp->found_rec)
8853 continue;
8856 dback->found_ref += 1;
8857 dback->disk_bytenr = bytenr;
8858 dback->bytes = bytes;
8861 * Set this so the verify backref code knows not to trust the
8862 * values in this backref.
8864 back->broken = 1;
8867 return 0;
8871 * Record orphan data ref into corresponding root.
8873 * Return 0 if the extent item contains data ref and recorded.
8874 * Return 1 if the extent item contains no useful data ref
8875 * On that case, it may contains only shared_dataref or metadata backref
8876 * or the file extent exists(this should be handled by the extent bytenr
8877 * recovery routine)
8878 * Return <0 if something goes wrong.
8880 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8881 struct extent_record *rec)
8883 struct btrfs_key key;
8884 struct btrfs_root *dest_root;
8885 struct extent_backref *back;
8886 struct data_backref *dback;
8887 struct orphan_data_extent *orphan;
8888 struct btrfs_path path;
8889 int recorded_data_ref = 0;
8890 int ret = 0;
8892 if (rec->metadata)
8893 return 1;
8894 btrfs_init_path(&path);
8895 list_for_each_entry(back, &rec->backrefs, list) {
8896 if (back->full_backref || !back->is_data ||
8897 !back->found_extent_tree)
8898 continue;
8899 dback = to_data_backref(back);
8900 if (dback->found_ref)
8901 continue;
8902 key.objectid = dback->root;
8903 key.type = BTRFS_ROOT_ITEM_KEY;
8904 key.offset = (u64)-1;
8906 dest_root = btrfs_read_fs_root(fs_info, &key);
8908 /* For non-exist root we just skip it */
8909 if (IS_ERR(dest_root) || !dest_root)
8910 continue;
8912 key.objectid = dback->owner;
8913 key.type = BTRFS_EXTENT_DATA_KEY;
8914 key.offset = dback->offset;
8916 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8917 btrfs_release_path(&path);
8919 * For ret < 0, it's OK since the fs-tree may be corrupted,
8920 * we need to record it for inode/file extent rebuild.
8921 * For ret > 0, we record it only for file extent rebuild.
8922 * For ret == 0, the file extent exists but only bytenr
8923 * mismatch, let the original bytenr fix routine to handle,
8924 * don't record it.
8926 if (ret == 0)
8927 continue;
8928 ret = 0;
8929 orphan = malloc(sizeof(*orphan));
8930 if (!orphan) {
8931 ret = -ENOMEM;
8932 goto out;
8934 INIT_LIST_HEAD(&orphan->list);
8935 orphan->root = dback->root;
8936 orphan->objectid = dback->owner;
8937 orphan->offset = dback->offset;
8938 orphan->disk_bytenr = rec->cache.start;
8939 orphan->disk_len = rec->cache.size;
8940 list_add(&dest_root->orphan_data_extents, &orphan->list);
8941 recorded_data_ref = 1;
8943 out:
8944 btrfs_release_path(&path);
8945 if (!ret)
8946 return !recorded_data_ref;
8947 else
8948 return ret;
8952 * when an incorrect extent item is found, this will delete
8953 * all of the existing entries for it and recreate them
8954 * based on what the tree scan found.
8956 static int fixup_extent_refs(struct btrfs_fs_info *info,
8957 struct cache_tree *extent_cache,
8958 struct extent_record *rec)
8960 struct btrfs_trans_handle *trans = NULL;
8961 int ret;
8962 struct btrfs_path path;
8963 struct list_head *cur = rec->backrefs.next;
8964 struct cache_extent *cache;
8965 struct extent_backref *back;
8966 int allocated = 0;
8967 u64 flags = 0;
8969 if (rec->flag_block_full_backref)
8970 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8972 btrfs_init_path(&path);
8973 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8975 * Sometimes the backrefs themselves are so broken they don't
8976 * get attached to any meaningful rec, so first go back and
8977 * check any of our backrefs that we couldn't find and throw
8978 * them into the list if we find the backref so that
8979 * verify_backrefs can figure out what to do.
8981 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8982 if (ret < 0)
8983 goto out;
8986 /* step one, make sure all of the backrefs agree */
8987 ret = verify_backrefs(info, &path, rec);
8988 if (ret < 0)
8989 goto out;
8991 trans = btrfs_start_transaction(info->extent_root, 1);
8992 if (IS_ERR(trans)) {
8993 ret = PTR_ERR(trans);
8994 goto out;
8997 /* step two, delete all the existing records */
8998 ret = delete_extent_records(trans, info->extent_root, &path,
8999 rec->start);
9001 if (ret < 0)
9002 goto out;
9004 /* was this block corrupt? If so, don't add references to it */
9005 cache = lookup_cache_extent(info->corrupt_blocks,
9006 rec->start, rec->max_size);
9007 if (cache) {
9008 ret = 0;
9009 goto out;
9012 /* step three, recreate all the refs we did find */
9013 while(cur != &rec->backrefs) {
9014 back = to_extent_backref(cur);
9015 cur = cur->next;
9018 * if we didn't find any references, don't create a
9019 * new extent record
9021 if (!back->found_ref)
9022 continue;
9024 rec->bad_full_backref = 0;
9025 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9026 allocated = 1;
9028 if (ret)
9029 goto out;
9031 out:
9032 if (trans) {
9033 int err = btrfs_commit_transaction(trans, info->extent_root);
9034 if (!ret)
9035 ret = err;
9038 if (!ret)
9039 fprintf(stderr, "Repaired extent references for %llu\n",
9040 (unsigned long long)rec->start);
9042 btrfs_release_path(&path);
9043 return ret;
9046 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9047 struct extent_record *rec)
9049 struct btrfs_trans_handle *trans;
9050 struct btrfs_root *root = fs_info->extent_root;
9051 struct btrfs_path path;
9052 struct btrfs_extent_item *ei;
9053 struct btrfs_key key;
9054 u64 flags;
9055 int ret = 0;
9057 key.objectid = rec->start;
9058 if (rec->metadata) {
9059 key.type = BTRFS_METADATA_ITEM_KEY;
9060 key.offset = rec->info_level;
9061 } else {
9062 key.type = BTRFS_EXTENT_ITEM_KEY;
9063 key.offset = rec->max_size;
9066 trans = btrfs_start_transaction(root, 0);
9067 if (IS_ERR(trans))
9068 return PTR_ERR(trans);
9070 btrfs_init_path(&path);
9071 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9072 if (ret < 0) {
9073 btrfs_release_path(&path);
9074 btrfs_commit_transaction(trans, root);
9075 return ret;
9076 } else if (ret) {
9077 fprintf(stderr, "Didn't find extent for %llu\n",
9078 (unsigned long long)rec->start);
9079 btrfs_release_path(&path);
9080 btrfs_commit_transaction(trans, root);
9081 return -ENOENT;
9084 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9085 struct btrfs_extent_item);
9086 flags = btrfs_extent_flags(path.nodes[0], ei);
9087 if (rec->flag_block_full_backref) {
9088 fprintf(stderr, "setting full backref on %llu\n",
9089 (unsigned long long)key.objectid);
9090 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9091 } else {
9092 fprintf(stderr, "clearing full backref on %llu\n",
9093 (unsigned long long)key.objectid);
9094 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9096 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9097 btrfs_mark_buffer_dirty(path.nodes[0]);
9098 btrfs_release_path(&path);
9099 ret = btrfs_commit_transaction(trans, root);
9100 if (!ret)
9101 fprintf(stderr, "Repaired extent flags for %llu\n",
9102 (unsigned long long)rec->start);
9104 return ret;
9107 /* right now we only prune from the extent allocation tree */
9108 static int prune_one_block(struct btrfs_trans_handle *trans,
9109 struct btrfs_fs_info *info,
9110 struct btrfs_corrupt_block *corrupt)
9112 int ret;
9113 struct btrfs_path path;
9114 struct extent_buffer *eb;
9115 u64 found;
9116 int slot;
9117 int nritems;
9118 int level = corrupt->level + 1;
9120 btrfs_init_path(&path);
9121 again:
9122 /* we want to stop at the parent to our busted block */
9123 path.lowest_level = level;
9125 ret = btrfs_search_slot(trans, info->extent_root,
9126 &corrupt->key, &path, -1, 1);
9128 if (ret < 0)
9129 goto out;
9131 eb = path.nodes[level];
9132 if (!eb) {
9133 ret = -ENOENT;
9134 goto out;
9138 * hopefully the search gave us the block we want to prune,
9139 * lets try that first
9141 slot = path.slots[level];
9142 found = btrfs_node_blockptr(eb, slot);
9143 if (found == corrupt->cache.start)
9144 goto del_ptr;
9146 nritems = btrfs_header_nritems(eb);
9148 /* the search failed, lets scan this node and hope we find it */
9149 for (slot = 0; slot < nritems; slot++) {
9150 found = btrfs_node_blockptr(eb, slot);
9151 if (found == corrupt->cache.start)
9152 goto del_ptr;
9155 * we couldn't find the bad block. TODO, search all the nodes for pointers
9156 * to this block
9158 if (eb == info->extent_root->node) {
9159 ret = -ENOENT;
9160 goto out;
9161 } else {
9162 level++;
9163 btrfs_release_path(&path);
9164 goto again;
9167 del_ptr:
9168 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9169 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9171 out:
9172 btrfs_release_path(&path);
9173 return ret;
9176 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9178 struct btrfs_trans_handle *trans = NULL;
9179 struct cache_extent *cache;
9180 struct btrfs_corrupt_block *corrupt;
9182 while (1) {
9183 cache = search_cache_extent(info->corrupt_blocks, 0);
9184 if (!cache)
9185 break;
9186 if (!trans) {
9187 trans = btrfs_start_transaction(info->extent_root, 1);
9188 if (IS_ERR(trans))
9189 return PTR_ERR(trans);
9191 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9192 prune_one_block(trans, info, corrupt);
9193 remove_cache_extent(info->corrupt_blocks, cache);
9195 if (trans)
9196 return btrfs_commit_transaction(trans, info->extent_root);
9197 return 0;
9200 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9202 struct btrfs_block_group_cache *cache;
9203 u64 start, end;
9204 int ret;
9206 while (1) {
9207 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9208 &start, &end, EXTENT_DIRTY);
9209 if (ret)
9210 break;
9211 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9214 start = 0;
9215 while (1) {
9216 cache = btrfs_lookup_first_block_group(fs_info, start);
9217 if (!cache)
9218 break;
9219 if (cache->cached)
9220 cache->cached = 0;
9221 start = cache->key.objectid + cache->key.offset;
9225 static int check_extent_refs(struct btrfs_root *root,
9226 struct cache_tree *extent_cache)
9228 struct extent_record *rec;
9229 struct cache_extent *cache;
9230 int ret = 0;
9231 int had_dups = 0;
9233 if (repair) {
9235 * if we're doing a repair, we have to make sure
9236 * we don't allocate from the problem extents.
9237 * In the worst case, this will be all the
9238 * extents in the FS
9240 cache = search_cache_extent(extent_cache, 0);
9241 while(cache) {
9242 rec = container_of(cache, struct extent_record, cache);
9243 set_extent_dirty(root->fs_info->excluded_extents,
9244 rec->start,
9245 rec->start + rec->max_size - 1);
9246 cache = next_cache_extent(cache);
9249 /* pin down all the corrupted blocks too */
9250 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9251 while(cache) {
9252 set_extent_dirty(root->fs_info->excluded_extents,
9253 cache->start,
9254 cache->start + cache->size - 1);
9255 cache = next_cache_extent(cache);
9257 prune_corrupt_blocks(root->fs_info);
9258 reset_cached_block_groups(root->fs_info);
9261 reset_cached_block_groups(root->fs_info);
9264 * We need to delete any duplicate entries we find first otherwise we
9265 * could mess up the extent tree when we have backrefs that actually
9266 * belong to a different extent item and not the weird duplicate one.
9268 while (repair && !list_empty(&duplicate_extents)) {
9269 rec = to_extent_record(duplicate_extents.next);
9270 list_del_init(&rec->list);
9272 /* Sometimes we can find a backref before we find an actual
9273 * extent, so we need to process it a little bit to see if there
9274 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9275 * if this is a backref screwup. If we need to delete stuff
9276 * process_duplicates() will return 0, otherwise it will return
9277 * 1 and we
9279 if (process_duplicates(extent_cache, rec))
9280 continue;
9281 ret = delete_duplicate_records(root, rec);
9282 if (ret < 0)
9283 return ret;
9285 * delete_duplicate_records will return the number of entries
9286 * deleted, so if it's greater than 0 then we know we actually
9287 * did something and we need to remove.
9289 if (ret)
9290 had_dups = 1;
9293 if (had_dups)
9294 return -EAGAIN;
9296 while(1) {
9297 int cur_err = 0;
9298 int fix = 0;
9300 cache = search_cache_extent(extent_cache, 0);
9301 if (!cache)
9302 break;
9303 rec = container_of(cache, struct extent_record, cache);
9304 if (rec->num_duplicates) {
9305 fprintf(stderr, "extent item %llu has multiple extent "
9306 "items\n", (unsigned long long)rec->start);
9307 cur_err = 1;
9310 if (rec->refs != rec->extent_item_refs) {
9311 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9312 (unsigned long long)rec->start,
9313 (unsigned long long)rec->nr);
9314 fprintf(stderr, "extent item %llu, found %llu\n",
9315 (unsigned long long)rec->extent_item_refs,
9316 (unsigned long long)rec->refs);
9317 ret = record_orphan_data_extents(root->fs_info, rec);
9318 if (ret < 0)
9319 goto repair_abort;
9320 fix = ret;
9321 cur_err = 1;
9323 if (all_backpointers_checked(rec, 1)) {
9324 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9325 (unsigned long long)rec->start,
9326 (unsigned long long)rec->nr);
9327 fix = 1;
9328 cur_err = 1;
9330 if (!rec->owner_ref_checked) {
9331 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9332 (unsigned long long)rec->start,
9333 (unsigned long long)rec->nr);
9334 fix = 1;
9335 cur_err = 1;
9338 if (repair && fix) {
9339 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9340 if (ret)
9341 goto repair_abort;
9345 if (rec->bad_full_backref) {
9346 fprintf(stderr, "bad full backref, on [%llu]\n",
9347 (unsigned long long)rec->start);
9348 if (repair) {
9349 ret = fixup_extent_flags(root->fs_info, rec);
9350 if (ret)
9351 goto repair_abort;
9352 fix = 1;
9354 cur_err = 1;
9357 * Although it's not a extent ref's problem, we reuse this
9358 * routine for error reporting.
9359 * No repair function yet.
9361 if (rec->crossing_stripes) {
9362 fprintf(stderr,
9363 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9364 rec->start, rec->start + rec->max_size);
9365 cur_err = 1;
9368 if (rec->wrong_chunk_type) {
9369 fprintf(stderr,
9370 "bad extent [%llu, %llu), type mismatch with chunk\n",
9371 rec->start, rec->start + rec->max_size);
9372 cur_err = 1;
9375 remove_cache_extent(extent_cache, cache);
9376 free_all_extent_backrefs(rec);
9377 if (!init_extent_tree && repair && (!cur_err || fix))
9378 clear_extent_dirty(root->fs_info->excluded_extents,
9379 rec->start,
9380 rec->start + rec->max_size - 1);
9381 free(rec);
9383 repair_abort:
9384 if (repair) {
9385 if (ret && ret != -EAGAIN) {
9386 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9387 exit(1);
9388 } else if (!ret) {
9389 struct btrfs_trans_handle *trans;
9391 root = root->fs_info->extent_root;
9392 trans = btrfs_start_transaction(root, 1);
9393 if (IS_ERR(trans)) {
9394 ret = PTR_ERR(trans);
9395 goto repair_abort;
9398 btrfs_fix_block_accounting(trans, root);
9399 ret = btrfs_commit_transaction(trans, root);
9400 if (ret)
9401 goto repair_abort;
9403 return ret;
9405 return 0;
9408 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9410 u64 stripe_size;
9412 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9413 stripe_size = length;
9414 stripe_size /= num_stripes;
9415 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9416 stripe_size = length * 2;
9417 stripe_size /= num_stripes;
9418 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9419 stripe_size = length;
9420 stripe_size /= (num_stripes - 1);
9421 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9422 stripe_size = length;
9423 stripe_size /= (num_stripes - 2);
9424 } else {
9425 stripe_size = length;
9427 return stripe_size;
9431 * Check the chunk with its block group/dev list ref:
9432 * Return 0 if all refs seems valid.
9433 * Return 1 if part of refs seems valid, need later check for rebuild ref
9434 * like missing block group and needs to search extent tree to rebuild them.
9435 * Return -1 if essential refs are missing and unable to rebuild.
9437 static int check_chunk_refs(struct chunk_record *chunk_rec,
9438 struct block_group_tree *block_group_cache,
9439 struct device_extent_tree *dev_extent_cache,
9440 int silent)
9442 struct cache_extent *block_group_item;
9443 struct block_group_record *block_group_rec;
9444 struct cache_extent *dev_extent_item;
9445 struct device_extent_record *dev_extent_rec;
9446 u64 devid;
9447 u64 offset;
9448 u64 length;
9449 int metadump_v2 = 0;
9450 int i;
9451 int ret = 0;
9453 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9454 chunk_rec->offset,
9455 chunk_rec->length);
9456 if (block_group_item) {
9457 block_group_rec = container_of(block_group_item,
9458 struct block_group_record,
9459 cache);
9460 if (chunk_rec->length != block_group_rec->offset ||
9461 chunk_rec->offset != block_group_rec->objectid ||
9462 (!metadump_v2 &&
9463 chunk_rec->type_flags != block_group_rec->flags)) {
9464 if (!silent)
9465 fprintf(stderr,
9466 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9467 chunk_rec->objectid,
9468 chunk_rec->type,
9469 chunk_rec->offset,
9470 chunk_rec->length,
9471 chunk_rec->offset,
9472 chunk_rec->type_flags,
9473 block_group_rec->objectid,
9474 block_group_rec->type,
9475 block_group_rec->offset,
9476 block_group_rec->offset,
9477 block_group_rec->objectid,
9478 block_group_rec->flags);
9479 ret = -1;
9480 } else {
9481 list_del_init(&block_group_rec->list);
9482 chunk_rec->bg_rec = block_group_rec;
9484 } else {
9485 if (!silent)
9486 fprintf(stderr,
9487 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9488 chunk_rec->objectid,
9489 chunk_rec->type,
9490 chunk_rec->offset,
9491 chunk_rec->length,
9492 chunk_rec->offset,
9493 chunk_rec->type_flags);
9494 ret = 1;
9497 if (metadump_v2)
9498 return ret;
9500 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9501 chunk_rec->num_stripes);
9502 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9503 devid = chunk_rec->stripes[i].devid;
9504 offset = chunk_rec->stripes[i].offset;
9505 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9506 devid, offset, length);
9507 if (dev_extent_item) {
9508 dev_extent_rec = container_of(dev_extent_item,
9509 struct device_extent_record,
9510 cache);
9511 if (dev_extent_rec->objectid != devid ||
9512 dev_extent_rec->offset != offset ||
9513 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9514 dev_extent_rec->length != length) {
9515 if (!silent)
9516 fprintf(stderr,
9517 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9518 chunk_rec->objectid,
9519 chunk_rec->type,
9520 chunk_rec->offset,
9521 chunk_rec->stripes[i].devid,
9522 chunk_rec->stripes[i].offset,
9523 dev_extent_rec->objectid,
9524 dev_extent_rec->offset,
9525 dev_extent_rec->length);
9526 ret = -1;
9527 } else {
9528 list_move(&dev_extent_rec->chunk_list,
9529 &chunk_rec->dextents);
9531 } else {
9532 if (!silent)
9533 fprintf(stderr,
9534 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9535 chunk_rec->objectid,
9536 chunk_rec->type,
9537 chunk_rec->offset,
9538 chunk_rec->stripes[i].devid,
9539 chunk_rec->stripes[i].offset);
9540 ret = -1;
9543 return ret;
9546 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9547 int check_chunks(struct cache_tree *chunk_cache,
9548 struct block_group_tree *block_group_cache,
9549 struct device_extent_tree *dev_extent_cache,
9550 struct list_head *good, struct list_head *bad,
9551 struct list_head *rebuild, int silent)
9553 struct cache_extent *chunk_item;
9554 struct chunk_record *chunk_rec;
9555 struct block_group_record *bg_rec;
9556 struct device_extent_record *dext_rec;
9557 int err;
9558 int ret = 0;
9560 chunk_item = first_cache_extent(chunk_cache);
9561 while (chunk_item) {
9562 chunk_rec = container_of(chunk_item, struct chunk_record,
9563 cache);
9564 err = check_chunk_refs(chunk_rec, block_group_cache,
9565 dev_extent_cache, silent);
9566 if (err < 0)
9567 ret = err;
9568 if (err == 0 && good)
9569 list_add_tail(&chunk_rec->list, good);
9570 if (err > 0 && rebuild)
9571 list_add_tail(&chunk_rec->list, rebuild);
9572 if (err < 0 && bad)
9573 list_add_tail(&chunk_rec->list, bad);
9574 chunk_item = next_cache_extent(chunk_item);
9577 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9578 if (!silent)
9579 fprintf(stderr,
9580 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9581 bg_rec->objectid,
9582 bg_rec->offset,
9583 bg_rec->flags);
9584 if (!ret)
9585 ret = 1;
9588 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9589 chunk_list) {
9590 if (!silent)
9591 fprintf(stderr,
9592 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9593 dext_rec->objectid,
9594 dext_rec->offset,
9595 dext_rec->length);
9596 if (!ret)
9597 ret = 1;
9599 return ret;
9603 static int check_device_used(struct device_record *dev_rec,
9604 struct device_extent_tree *dext_cache)
9606 struct cache_extent *cache;
9607 struct device_extent_record *dev_extent_rec;
9608 u64 total_byte = 0;
9610 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9611 while (cache) {
9612 dev_extent_rec = container_of(cache,
9613 struct device_extent_record,
9614 cache);
9615 if (dev_extent_rec->objectid != dev_rec->devid)
9616 break;
9618 list_del_init(&dev_extent_rec->device_list);
9619 total_byte += dev_extent_rec->length;
9620 cache = next_cache_extent(cache);
9623 if (total_byte != dev_rec->byte_used) {
9624 fprintf(stderr,
9625 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9626 total_byte, dev_rec->byte_used, dev_rec->objectid,
9627 dev_rec->type, dev_rec->offset);
9628 return -1;
9629 } else {
9630 return 0;
9634 /* check btrfs_dev_item -> btrfs_dev_extent */
9635 static int check_devices(struct rb_root *dev_cache,
9636 struct device_extent_tree *dev_extent_cache)
9638 struct rb_node *dev_node;
9639 struct device_record *dev_rec;
9640 struct device_extent_record *dext_rec;
9641 int err;
9642 int ret = 0;
9644 dev_node = rb_first(dev_cache);
9645 while (dev_node) {
9646 dev_rec = container_of(dev_node, struct device_record, node);
9647 err = check_device_used(dev_rec, dev_extent_cache);
9648 if (err)
9649 ret = err;
9651 dev_node = rb_next(dev_node);
9653 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9654 device_list) {
9655 fprintf(stderr,
9656 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9657 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9658 if (!ret)
9659 ret = 1;
9661 return ret;
9664 static int add_root_item_to_list(struct list_head *head,
9665 u64 objectid, u64 bytenr, u64 last_snapshot,
9666 u8 level, u8 drop_level,
9667 int level_size, struct btrfs_key *drop_key)
9670 struct root_item_record *ri_rec;
9671 ri_rec = malloc(sizeof(*ri_rec));
9672 if (!ri_rec)
9673 return -ENOMEM;
9674 ri_rec->bytenr = bytenr;
9675 ri_rec->objectid = objectid;
9676 ri_rec->level = level;
9677 ri_rec->level_size = level_size;
9678 ri_rec->drop_level = drop_level;
9679 ri_rec->last_snapshot = last_snapshot;
9680 if (drop_key)
9681 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9682 list_add_tail(&ri_rec->list, head);
9684 return 0;
9687 static void free_root_item_list(struct list_head *list)
9689 struct root_item_record *ri_rec;
9691 while (!list_empty(list)) {
9692 ri_rec = list_first_entry(list, struct root_item_record,
9693 list);
9694 list_del_init(&ri_rec->list);
9695 free(ri_rec);
9699 static int deal_root_from_list(struct list_head *list,
9700 struct btrfs_root *root,
9701 struct block_info *bits,
9702 int bits_nr,
9703 struct cache_tree *pending,
9704 struct cache_tree *seen,
9705 struct cache_tree *reada,
9706 struct cache_tree *nodes,
9707 struct cache_tree *extent_cache,
9708 struct cache_tree *chunk_cache,
9709 struct rb_root *dev_cache,
9710 struct block_group_tree *block_group_cache,
9711 struct device_extent_tree *dev_extent_cache)
9713 int ret = 0;
9714 u64 last;
9716 while (!list_empty(list)) {
9717 struct root_item_record *rec;
9718 struct extent_buffer *buf;
9719 rec = list_entry(list->next,
9720 struct root_item_record, list);
9721 last = 0;
9722 buf = read_tree_block(root->fs_info->tree_root,
9723 rec->bytenr, rec->level_size, 0);
9724 if (!extent_buffer_uptodate(buf)) {
9725 free_extent_buffer(buf);
9726 ret = -EIO;
9727 break;
9729 ret = add_root_to_pending(buf, extent_cache, pending,
9730 seen, nodes, rec->objectid);
9731 if (ret < 0)
9732 break;
9734 * To rebuild extent tree, we need deal with snapshot
9735 * one by one, otherwise we deal with node firstly which
9736 * can maximize readahead.
9738 while (1) {
9739 ret = run_next_block(root, bits, bits_nr, &last,
9740 pending, seen, reada, nodes,
9741 extent_cache, chunk_cache,
9742 dev_cache, block_group_cache,
9743 dev_extent_cache, rec);
9744 if (ret != 0)
9745 break;
9747 free_extent_buffer(buf);
9748 list_del(&rec->list);
9749 free(rec);
9750 if (ret < 0)
9751 break;
9753 while (ret >= 0) {
9754 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9755 reada, nodes, extent_cache, chunk_cache,
9756 dev_cache, block_group_cache,
9757 dev_extent_cache, NULL);
9758 if (ret != 0) {
9759 if (ret > 0)
9760 ret = 0;
9761 break;
9764 return ret;
9767 static int check_chunks_and_extents(struct btrfs_root *root)
9769 struct rb_root dev_cache;
9770 struct cache_tree chunk_cache;
9771 struct block_group_tree block_group_cache;
9772 struct device_extent_tree dev_extent_cache;
9773 struct cache_tree extent_cache;
9774 struct cache_tree seen;
9775 struct cache_tree pending;
9776 struct cache_tree reada;
9777 struct cache_tree nodes;
9778 struct extent_io_tree excluded_extents;
9779 struct cache_tree corrupt_blocks;
9780 struct btrfs_path path;
9781 struct btrfs_key key;
9782 struct btrfs_key found_key;
9783 int ret, err = 0;
9784 struct block_info *bits;
9785 int bits_nr;
9786 struct extent_buffer *leaf;
9787 int slot;
9788 struct btrfs_root_item ri;
9789 struct list_head dropping_trees;
9790 struct list_head normal_trees;
9791 struct btrfs_root *root1;
9792 u64 objectid;
9793 u32 level_size;
9794 u8 level;
9796 dev_cache = RB_ROOT;
9797 cache_tree_init(&chunk_cache);
9798 block_group_tree_init(&block_group_cache);
9799 device_extent_tree_init(&dev_extent_cache);
9801 cache_tree_init(&extent_cache);
9802 cache_tree_init(&seen);
9803 cache_tree_init(&pending);
9804 cache_tree_init(&nodes);
9805 cache_tree_init(&reada);
9806 cache_tree_init(&corrupt_blocks);
9807 extent_io_tree_init(&excluded_extents);
9808 INIT_LIST_HEAD(&dropping_trees);
9809 INIT_LIST_HEAD(&normal_trees);
9811 if (repair) {
9812 root->fs_info->excluded_extents = &excluded_extents;
9813 root->fs_info->fsck_extent_cache = &extent_cache;
9814 root->fs_info->free_extent_hook = free_extent_hook;
9815 root->fs_info->corrupt_blocks = &corrupt_blocks;
9818 bits_nr = 1024;
9819 bits = malloc(bits_nr * sizeof(struct block_info));
9820 if (!bits) {
9821 perror("malloc");
9822 exit(1);
9825 if (ctx.progress_enabled) {
9826 ctx.tp = TASK_EXTENTS;
9827 task_start(ctx.info);
9830 again:
9831 root1 = root->fs_info->tree_root;
9832 level = btrfs_header_level(root1->node);
9833 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9834 root1->node->start, 0, level, 0,
9835 root1->nodesize, NULL);
9836 if (ret < 0)
9837 goto out;
9838 root1 = root->fs_info->chunk_root;
9839 level = btrfs_header_level(root1->node);
9840 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9841 root1->node->start, 0, level, 0,
9842 root1->nodesize, NULL);
9843 if (ret < 0)
9844 goto out;
9845 btrfs_init_path(&path);
9846 key.offset = 0;
9847 key.objectid = 0;
9848 key.type = BTRFS_ROOT_ITEM_KEY;
9849 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9850 &key, &path, 0, 0);
9851 if (ret < 0)
9852 goto out;
9853 while(1) {
9854 leaf = path.nodes[0];
9855 slot = path.slots[0];
9856 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9857 ret = btrfs_next_leaf(root, &path);
9858 if (ret != 0)
9859 break;
9860 leaf = path.nodes[0];
9861 slot = path.slots[0];
9863 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9864 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9865 unsigned long offset;
9866 u64 last_snapshot;
9868 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9869 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9870 last_snapshot = btrfs_root_last_snapshot(&ri);
9871 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9872 level = btrfs_root_level(&ri);
9873 level_size = root->nodesize;
9874 ret = add_root_item_to_list(&normal_trees,
9875 found_key.objectid,
9876 btrfs_root_bytenr(&ri),
9877 last_snapshot, level,
9878 0, level_size, NULL);
9879 if (ret < 0)
9880 goto out;
9881 } else {
9882 level = btrfs_root_level(&ri);
9883 level_size = root->nodesize;
9884 objectid = found_key.objectid;
9885 btrfs_disk_key_to_cpu(&found_key,
9886 &ri.drop_progress);
9887 ret = add_root_item_to_list(&dropping_trees,
9888 objectid,
9889 btrfs_root_bytenr(&ri),
9890 last_snapshot, level,
9891 ri.drop_level,
9892 level_size, &found_key);
9893 if (ret < 0)
9894 goto out;
9897 path.slots[0]++;
9899 btrfs_release_path(&path);
9902 * check_block can return -EAGAIN if it fixes something, please keep
9903 * this in mind when dealing with return values from these functions, if
9904 * we get -EAGAIN we want to fall through and restart the loop.
9906 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9907 &seen, &reada, &nodes, &extent_cache,
9908 &chunk_cache, &dev_cache, &block_group_cache,
9909 &dev_extent_cache);
9910 if (ret < 0) {
9911 if (ret == -EAGAIN)
9912 goto loop;
9913 goto out;
9915 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9916 &pending, &seen, &reada, &nodes,
9917 &extent_cache, &chunk_cache, &dev_cache,
9918 &block_group_cache, &dev_extent_cache);
9919 if (ret < 0) {
9920 if (ret == -EAGAIN)
9921 goto loop;
9922 goto out;
9925 ret = check_chunks(&chunk_cache, &block_group_cache,
9926 &dev_extent_cache, NULL, NULL, NULL, 0);
9927 if (ret) {
9928 if (ret == -EAGAIN)
9929 goto loop;
9930 err = ret;
9933 ret = check_extent_refs(root, &extent_cache);
9934 if (ret < 0) {
9935 if (ret == -EAGAIN)
9936 goto loop;
9937 goto out;
9940 ret = check_devices(&dev_cache, &dev_extent_cache);
9941 if (ret && err)
9942 ret = err;
9944 out:
9945 task_stop(ctx.info);
9946 if (repair) {
9947 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9948 extent_io_tree_cleanup(&excluded_extents);
9949 root->fs_info->fsck_extent_cache = NULL;
9950 root->fs_info->free_extent_hook = NULL;
9951 root->fs_info->corrupt_blocks = NULL;
9952 root->fs_info->excluded_extents = NULL;
9954 free(bits);
9955 free_chunk_cache_tree(&chunk_cache);
9956 free_device_cache_tree(&dev_cache);
9957 free_block_group_tree(&block_group_cache);
9958 free_device_extent_tree(&dev_extent_cache);
9959 free_extent_cache_tree(&seen);
9960 free_extent_cache_tree(&pending);
9961 free_extent_cache_tree(&reada);
9962 free_extent_cache_tree(&nodes);
9963 return ret;
9964 loop:
9965 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9966 free_extent_cache_tree(&seen);
9967 free_extent_cache_tree(&pending);
9968 free_extent_cache_tree(&reada);
9969 free_extent_cache_tree(&nodes);
9970 free_chunk_cache_tree(&chunk_cache);
9971 free_block_group_tree(&block_group_cache);
9972 free_device_cache_tree(&dev_cache);
9973 free_device_extent_tree(&dev_extent_cache);
9974 free_extent_record_cache(&extent_cache);
9975 free_root_item_list(&normal_trees);
9976 free_root_item_list(&dropping_trees);
9977 extent_io_tree_cleanup(&excluded_extents);
9978 goto again;
9982 * Check backrefs of a tree block given by @bytenr or @eb.
9984 * @root: the root containing the @bytenr or @eb
9985 * @eb: tree block extent buffer, can be NULL
9986 * @bytenr: bytenr of the tree block to search
9987 * @level: tree level of the tree block
9988 * @owner: owner of the tree block
9990 * Return >0 for any error found and output error message
9991 * Return 0 for no error found
9993 static int check_tree_block_ref(struct btrfs_root *root,
9994 struct extent_buffer *eb, u64 bytenr,
9995 int level, u64 owner)
9997 struct btrfs_key key;
9998 struct btrfs_root *extent_root = root->fs_info->extent_root;
9999 struct btrfs_path path;
10000 struct btrfs_extent_item *ei;
10001 struct btrfs_extent_inline_ref *iref;
10002 struct extent_buffer *leaf;
10003 unsigned long end;
10004 unsigned long ptr;
10005 int slot;
10006 int skinny_level;
10007 int type;
10008 u32 nodesize = root->nodesize;
10009 u32 item_size;
10010 u64 offset;
10011 int tree_reloc_root = 0;
10012 int found_ref = 0;
10013 int err = 0;
10014 int ret;
10016 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10017 btrfs_header_bytenr(root->node) == bytenr)
10018 tree_reloc_root = 1;
10020 btrfs_init_path(&path);
10021 key.objectid = bytenr;
10022 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10023 key.type = BTRFS_METADATA_ITEM_KEY;
10024 else
10025 key.type = BTRFS_EXTENT_ITEM_KEY;
10026 key.offset = (u64)-1;
10028 /* Search for the backref in extent tree */
10029 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10030 if (ret < 0) {
10031 err |= BACKREF_MISSING;
10032 goto out;
10034 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10035 if (ret) {
10036 err |= BACKREF_MISSING;
10037 goto out;
10040 leaf = path.nodes[0];
10041 slot = path.slots[0];
10042 btrfs_item_key_to_cpu(leaf, &key, slot);
10044 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10046 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10047 skinny_level = (int)key.offset;
10048 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10049 } else {
10050 struct btrfs_tree_block_info *info;
10052 info = (struct btrfs_tree_block_info *)(ei + 1);
10053 skinny_level = btrfs_tree_block_level(leaf, info);
10054 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10057 if (eb) {
10058 u64 header_gen;
10059 u64 extent_gen;
10061 if (!(btrfs_extent_flags(leaf, ei) &
10062 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10063 error(
10064 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10065 key.objectid, nodesize,
10066 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10067 err = BACKREF_MISMATCH;
10069 header_gen = btrfs_header_generation(eb);
10070 extent_gen = btrfs_extent_generation(leaf, ei);
10071 if (header_gen != extent_gen) {
10072 error(
10073 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10074 key.objectid, nodesize, header_gen,
10075 extent_gen);
10076 err = BACKREF_MISMATCH;
10078 if (level != skinny_level) {
10079 error(
10080 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10081 key.objectid, nodesize, level, skinny_level);
10082 err = BACKREF_MISMATCH;
10084 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10085 error(
10086 "extent[%llu %u] is referred by other roots than %llu",
10087 key.objectid, nodesize, root->objectid);
10088 err = BACKREF_MISMATCH;
10093 * Iterate the extent/metadata item to find the exact backref
10095 item_size = btrfs_item_size_nr(leaf, slot);
10096 ptr = (unsigned long)iref;
10097 end = (unsigned long)ei + item_size;
10098 while (ptr < end) {
10099 iref = (struct btrfs_extent_inline_ref *)ptr;
10100 type = btrfs_extent_inline_ref_type(leaf, iref);
10101 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10103 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10104 (offset == root->objectid || offset == owner)) {
10105 found_ref = 1;
10106 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10108 * Backref of tree reloc root points to itself, no need
10109 * to check backref any more.
10111 if (tree_reloc_root)
10112 found_ref = 1;
10113 else
10114 /* Check if the backref points to valid referencer */
10115 found_ref = !check_tree_block_ref(root, NULL,
10116 offset, level + 1, owner);
10119 if (found_ref)
10120 break;
10121 ptr += btrfs_extent_inline_ref_size(type);
10125 * Inlined extent item doesn't have what we need, check
10126 * TREE_BLOCK_REF_KEY
10128 if (!found_ref) {
10129 btrfs_release_path(&path);
10130 key.objectid = bytenr;
10131 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10132 key.offset = root->objectid;
10134 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10135 if (!ret)
10136 found_ref = 1;
10138 if (!found_ref)
10139 err |= BACKREF_MISSING;
10140 out:
10141 btrfs_release_path(&path);
10142 if (eb && (err & BACKREF_MISSING))
10143 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10144 bytenr, nodesize, owner, level);
10145 return err;
10149 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10151 * Return >0 any error found and output error message
10152 * Return 0 for no error found
10154 static int check_extent_data_item(struct btrfs_root *root,
10155 struct extent_buffer *eb, int slot)
10157 struct btrfs_file_extent_item *fi;
10158 struct btrfs_path path;
10159 struct btrfs_root *extent_root = root->fs_info->extent_root;
10160 struct btrfs_key fi_key;
10161 struct btrfs_key dbref_key;
10162 struct extent_buffer *leaf;
10163 struct btrfs_extent_item *ei;
10164 struct btrfs_extent_inline_ref *iref;
10165 struct btrfs_extent_data_ref *dref;
10166 u64 owner;
10167 u64 disk_bytenr;
10168 u64 disk_num_bytes;
10169 u64 extent_num_bytes;
10170 u64 extent_flags;
10171 u32 item_size;
10172 unsigned long end;
10173 unsigned long ptr;
10174 int type;
10175 u64 ref_root;
10176 int found_dbackref = 0;
10177 int err = 0;
10178 int ret;
10180 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10181 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10183 /* Nothing to check for hole and inline data extents */
10184 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10185 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10186 return 0;
10188 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10189 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10190 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10192 /* Check unaligned disk_num_bytes and num_bytes */
10193 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10194 error(
10195 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10196 fi_key.objectid, fi_key.offset, disk_num_bytes,
10197 root->sectorsize);
10198 err |= BYTES_UNALIGNED;
10199 } else {
10200 data_bytes_allocated += disk_num_bytes;
10202 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10203 error(
10204 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10205 fi_key.objectid, fi_key.offset, extent_num_bytes,
10206 root->sectorsize);
10207 err |= BYTES_UNALIGNED;
10208 } else {
10209 data_bytes_referenced += extent_num_bytes;
10211 owner = btrfs_header_owner(eb);
10213 /* Check the extent item of the file extent in extent tree */
10214 btrfs_init_path(&path);
10215 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10216 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10217 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10219 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10220 if (ret) {
10221 err |= BACKREF_MISSING;
10222 goto error;
10225 leaf = path.nodes[0];
10226 slot = path.slots[0];
10227 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10229 extent_flags = btrfs_extent_flags(leaf, ei);
10231 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10232 error(
10233 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10234 disk_bytenr, disk_num_bytes,
10235 BTRFS_EXTENT_FLAG_DATA);
10236 err |= BACKREF_MISMATCH;
10239 /* Check data backref inside that extent item */
10240 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10241 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10242 ptr = (unsigned long)iref;
10243 end = (unsigned long)ei + item_size;
10244 while (ptr < end) {
10245 iref = (struct btrfs_extent_inline_ref *)ptr;
10246 type = btrfs_extent_inline_ref_type(leaf, iref);
10247 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10249 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10250 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10251 if (ref_root == owner || ref_root == root->objectid)
10252 found_dbackref = 1;
10253 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10254 found_dbackref = !check_tree_block_ref(root, NULL,
10255 btrfs_extent_inline_ref_offset(leaf, iref),
10256 0, owner);
10259 if (found_dbackref)
10260 break;
10261 ptr += btrfs_extent_inline_ref_size(type);
10264 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10265 if (!found_dbackref) {
10266 btrfs_release_path(&path);
10268 btrfs_init_path(&path);
10269 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10270 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10271 dbref_key.offset = hash_extent_data_ref(root->objectid,
10272 fi_key.objectid, fi_key.offset);
10274 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10275 &dbref_key, &path, 0, 0);
10276 if (!ret)
10277 found_dbackref = 1;
10280 if (!found_dbackref)
10281 err |= BACKREF_MISSING;
10282 error:
10283 btrfs_release_path(&path);
10284 if (err & BACKREF_MISSING) {
10285 error("data extent[%llu %llu] backref lost",
10286 disk_bytenr, disk_num_bytes);
10288 return err;
10292 * Get real tree block level for the case like shared block
10293 * Return >= 0 as tree level
10294 * Return <0 for error
10296 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10298 struct extent_buffer *eb;
10299 struct btrfs_path path;
10300 struct btrfs_key key;
10301 struct btrfs_extent_item *ei;
10302 u64 flags;
10303 u64 transid;
10304 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10305 u8 backref_level;
10306 u8 header_level;
10307 int ret;
10309 /* Search extent tree for extent generation and level */
10310 key.objectid = bytenr;
10311 key.type = BTRFS_METADATA_ITEM_KEY;
10312 key.offset = (u64)-1;
10314 btrfs_init_path(&path);
10315 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10316 if (ret < 0)
10317 goto release_out;
10318 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10319 if (ret < 0)
10320 goto release_out;
10321 if (ret > 0) {
10322 ret = -ENOENT;
10323 goto release_out;
10326 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10327 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10328 struct btrfs_extent_item);
10329 flags = btrfs_extent_flags(path.nodes[0], ei);
10330 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10331 ret = -ENOENT;
10332 goto release_out;
10335 /* Get transid for later read_tree_block() check */
10336 transid = btrfs_extent_generation(path.nodes[0], ei);
10338 /* Get backref level as one source */
10339 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10340 backref_level = key.offset;
10341 } else {
10342 struct btrfs_tree_block_info *info;
10344 info = (struct btrfs_tree_block_info *)(ei + 1);
10345 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10347 btrfs_release_path(&path);
10349 /* Get level from tree block as an alternative source */
10350 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10351 if (!extent_buffer_uptodate(eb)) {
10352 free_extent_buffer(eb);
10353 return -EIO;
10355 header_level = btrfs_header_level(eb);
10356 free_extent_buffer(eb);
10358 if (header_level != backref_level)
10359 return -EIO;
10360 return header_level;
10362 release_out:
10363 btrfs_release_path(&path);
10364 return ret;
10368 * Check if a tree block backref is valid (points to a valid tree block)
10369 * if level == -1, level will be resolved
10370 * Return >0 for any error found and print error message
10372 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10373 u64 bytenr, int level)
10375 struct btrfs_root *root;
10376 struct btrfs_key key;
10377 struct btrfs_path path;
10378 struct extent_buffer *eb;
10379 struct extent_buffer *node;
10380 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10381 int err = 0;
10382 int ret;
10384 /* Query level for level == -1 special case */
10385 if (level == -1)
10386 level = query_tree_block_level(fs_info, bytenr);
10387 if (level < 0) {
10388 err |= REFERENCER_MISSING;
10389 goto out;
10392 key.objectid = root_id;
10393 key.type = BTRFS_ROOT_ITEM_KEY;
10394 key.offset = (u64)-1;
10396 root = btrfs_read_fs_root(fs_info, &key);
10397 if (IS_ERR(root)) {
10398 err |= REFERENCER_MISSING;
10399 goto out;
10402 /* Read out the tree block to get item/node key */
10403 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10404 if (!extent_buffer_uptodate(eb)) {
10405 err |= REFERENCER_MISSING;
10406 free_extent_buffer(eb);
10407 goto out;
10410 /* Empty tree, no need to check key */
10411 if (!btrfs_header_nritems(eb) && !level) {
10412 free_extent_buffer(eb);
10413 goto out;
10416 if (level)
10417 btrfs_node_key_to_cpu(eb, &key, 0);
10418 else
10419 btrfs_item_key_to_cpu(eb, &key, 0);
10421 free_extent_buffer(eb);
10423 btrfs_init_path(&path);
10424 path.lowest_level = level;
10425 /* Search with the first key, to ensure we can reach it */
10426 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10427 if (ret < 0) {
10428 err |= REFERENCER_MISSING;
10429 goto release_out;
10432 node = path.nodes[level];
10433 if (btrfs_header_bytenr(node) != bytenr) {
10434 error(
10435 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10436 bytenr, nodesize, bytenr,
10437 btrfs_header_bytenr(node));
10438 err |= REFERENCER_MISMATCH;
10440 if (btrfs_header_level(node) != level) {
10441 error(
10442 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10443 bytenr, nodesize, level,
10444 btrfs_header_level(node));
10445 err |= REFERENCER_MISMATCH;
10448 release_out:
10449 btrfs_release_path(&path);
10450 out:
10451 if (err & REFERENCER_MISSING) {
10452 if (level < 0)
10453 error("extent [%llu %d] lost referencer (owner: %llu)",
10454 bytenr, nodesize, root_id);
10455 else
10456 error(
10457 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10458 bytenr, nodesize, root_id, level);
10461 return err;
10465 * Check if tree block @eb is tree reloc root.
10466 * Return 0 if it's not or any problem happens
10467 * Return 1 if it's a tree reloc root
10469 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10470 struct extent_buffer *eb)
10472 struct btrfs_root *tree_reloc_root;
10473 struct btrfs_key key;
10474 u64 bytenr = btrfs_header_bytenr(eb);
10475 u64 owner = btrfs_header_owner(eb);
10476 int ret = 0;
10478 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10479 key.offset = owner;
10480 key.type = BTRFS_ROOT_ITEM_KEY;
10482 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10483 if (IS_ERR(tree_reloc_root))
10484 return 0;
10486 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10487 ret = 1;
10488 btrfs_free_fs_root(tree_reloc_root);
10489 return ret;
10493 * Check referencer for shared block backref
10494 * If level == -1, this function will resolve the level.
10496 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10497 u64 parent, u64 bytenr, int level)
10499 struct extent_buffer *eb;
10500 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10501 u32 nr;
10502 int found_parent = 0;
10503 int i;
10505 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10506 if (!extent_buffer_uptodate(eb))
10507 goto out;
10509 if (level == -1)
10510 level = query_tree_block_level(fs_info, bytenr);
10511 if (level < 0)
10512 goto out;
10514 /* It's possible it's a tree reloc root */
10515 if (parent == bytenr) {
10516 if (is_tree_reloc_root(fs_info, eb))
10517 found_parent = 1;
10518 goto out;
10521 if (level + 1 != btrfs_header_level(eb))
10522 goto out;
10524 nr = btrfs_header_nritems(eb);
10525 for (i = 0; i < nr; i++) {
10526 if (bytenr == btrfs_node_blockptr(eb, i)) {
10527 found_parent = 1;
10528 break;
10531 out:
10532 free_extent_buffer(eb);
10533 if (!found_parent) {
10534 error(
10535 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10536 bytenr, nodesize, parent, level);
10537 return REFERENCER_MISSING;
10539 return 0;
10543 * Check referencer for normal (inlined) data ref
10544 * If len == 0, it will be resolved by searching in extent tree
10546 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10547 u64 root_id, u64 objectid, u64 offset,
10548 u64 bytenr, u64 len, u32 count)
10550 struct btrfs_root *root;
10551 struct btrfs_root *extent_root = fs_info->extent_root;
10552 struct btrfs_key key;
10553 struct btrfs_path path;
10554 struct extent_buffer *leaf;
10555 struct btrfs_file_extent_item *fi;
10556 u32 found_count = 0;
10557 int slot;
10558 int ret = 0;
10560 if (!len) {
10561 key.objectid = bytenr;
10562 key.type = BTRFS_EXTENT_ITEM_KEY;
10563 key.offset = (u64)-1;
10565 btrfs_init_path(&path);
10566 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10567 if (ret < 0)
10568 goto out;
10569 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10570 if (ret)
10571 goto out;
10572 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10573 if (key.objectid != bytenr ||
10574 key.type != BTRFS_EXTENT_ITEM_KEY)
10575 goto out;
10576 len = key.offset;
10577 btrfs_release_path(&path);
10579 key.objectid = root_id;
10580 key.type = BTRFS_ROOT_ITEM_KEY;
10581 key.offset = (u64)-1;
10582 btrfs_init_path(&path);
10584 root = btrfs_read_fs_root(fs_info, &key);
10585 if (IS_ERR(root))
10586 goto out;
10588 key.objectid = objectid;
10589 key.type = BTRFS_EXTENT_DATA_KEY;
10591 * It can be nasty as data backref offset is
10592 * file offset - file extent offset, which is smaller or
10593 * equal to original backref offset. The only special case is
10594 * overflow. So we need to special check and do further search.
10596 key.offset = offset & (1ULL << 63) ? 0 : offset;
10598 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10599 if (ret < 0)
10600 goto out;
10603 * Search afterwards to get correct one
10604 * NOTE: As we must do a comprehensive check on the data backref to
10605 * make sure the dref count also matches, we must iterate all file
10606 * extents for that inode.
10608 while (1) {
10609 leaf = path.nodes[0];
10610 slot = path.slots[0];
10612 if (slot >= btrfs_header_nritems(leaf))
10613 goto next;
10614 btrfs_item_key_to_cpu(leaf, &key, slot);
10615 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10616 break;
10617 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10619 * Except normal disk bytenr and disk num bytes, we still
10620 * need to do extra check on dbackref offset as
10621 * dbackref offset = file_offset - file_extent_offset
10623 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10624 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10625 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10626 offset)
10627 found_count++;
10629 next:
10630 ret = btrfs_next_item(root, &path);
10631 if (ret)
10632 break;
10634 out:
10635 btrfs_release_path(&path);
10636 if (found_count != count) {
10637 error(
10638 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10639 bytenr, len, root_id, objectid, offset, count, found_count);
10640 return REFERENCER_MISSING;
10642 return 0;
10646 * Check if the referencer of a shared data backref exists
10648 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10649 u64 parent, u64 bytenr)
10651 struct extent_buffer *eb;
10652 struct btrfs_key key;
10653 struct btrfs_file_extent_item *fi;
10654 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10655 u32 nr;
10656 int found_parent = 0;
10657 int i;
10659 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10660 if (!extent_buffer_uptodate(eb))
10661 goto out;
10663 nr = btrfs_header_nritems(eb);
10664 for (i = 0; i < nr; i++) {
10665 btrfs_item_key_to_cpu(eb, &key, i);
10666 if (key.type != BTRFS_EXTENT_DATA_KEY)
10667 continue;
10669 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10670 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10671 continue;
10673 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10674 found_parent = 1;
10675 break;
10679 out:
10680 free_extent_buffer(eb);
10681 if (!found_parent) {
10682 error("shared extent %llu referencer lost (parent: %llu)",
10683 bytenr, parent);
10684 return REFERENCER_MISSING;
10686 return 0;
10690 * This function will check a given extent item, including its backref and
10691 * itself (like crossing stripe boundary and type)
10693 * Since we don't use extent_record anymore, introduce new error bit
10695 static int check_extent_item(struct btrfs_fs_info *fs_info,
10696 struct extent_buffer *eb, int slot)
10698 struct btrfs_extent_item *ei;
10699 struct btrfs_extent_inline_ref *iref;
10700 struct btrfs_extent_data_ref *dref;
10701 unsigned long end;
10702 unsigned long ptr;
10703 int type;
10704 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10705 u32 item_size = btrfs_item_size_nr(eb, slot);
10706 u64 flags;
10707 u64 offset;
10708 int metadata = 0;
10709 int level;
10710 struct btrfs_key key;
10711 int ret;
10712 int err = 0;
10714 btrfs_item_key_to_cpu(eb, &key, slot);
10715 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10716 bytes_used += key.offset;
10717 else
10718 bytes_used += nodesize;
10720 if (item_size < sizeof(*ei)) {
10722 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10723 * old thing when on disk format is still un-determined.
10724 * No need to care about it anymore
10726 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10727 return -ENOTTY;
10730 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10731 flags = btrfs_extent_flags(eb, ei);
10733 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10734 metadata = 1;
10735 if (metadata && check_crossing_stripes(global_info, key.objectid,
10736 eb->len)) {
10737 error("bad metadata [%llu, %llu) crossing stripe boundary",
10738 key.objectid, key.objectid + nodesize);
10739 err |= CROSSING_STRIPE_BOUNDARY;
10742 ptr = (unsigned long)(ei + 1);
10744 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10745 /* Old EXTENT_ITEM metadata */
10746 struct btrfs_tree_block_info *info;
10748 info = (struct btrfs_tree_block_info *)ptr;
10749 level = btrfs_tree_block_level(eb, info);
10750 ptr += sizeof(struct btrfs_tree_block_info);
10751 } else {
10752 /* New METADATA_ITEM */
10753 level = key.offset;
10755 end = (unsigned long)ei + item_size;
10757 next:
10758 /* Reached extent item end normally */
10759 if (ptr == end)
10760 goto out;
10762 /* Beyond extent item end, wrong item size */
10763 if (ptr > end) {
10764 err |= ITEM_SIZE_MISMATCH;
10765 error("extent item at bytenr %llu slot %d has wrong size",
10766 eb->start, slot);
10767 goto out;
10770 /* Now check every backref in this extent item */
10771 iref = (struct btrfs_extent_inline_ref *)ptr;
10772 type = btrfs_extent_inline_ref_type(eb, iref);
10773 offset = btrfs_extent_inline_ref_offset(eb, iref);
10774 switch (type) {
10775 case BTRFS_TREE_BLOCK_REF_KEY:
10776 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10777 level);
10778 err |= ret;
10779 break;
10780 case BTRFS_SHARED_BLOCK_REF_KEY:
10781 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10782 level);
10783 err |= ret;
10784 break;
10785 case BTRFS_EXTENT_DATA_REF_KEY:
10786 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10787 ret = check_extent_data_backref(fs_info,
10788 btrfs_extent_data_ref_root(eb, dref),
10789 btrfs_extent_data_ref_objectid(eb, dref),
10790 btrfs_extent_data_ref_offset(eb, dref),
10791 key.objectid, key.offset,
10792 btrfs_extent_data_ref_count(eb, dref));
10793 err |= ret;
10794 break;
10795 case BTRFS_SHARED_DATA_REF_KEY:
10796 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10797 err |= ret;
10798 break;
10799 default:
10800 error("extent[%llu %d %llu] has unknown ref type: %d",
10801 key.objectid, key.type, key.offset, type);
10802 err |= UNKNOWN_TYPE;
10803 goto out;
10806 ptr += btrfs_extent_inline_ref_size(type);
10807 goto next;
10809 out:
10810 return err;
10814 * Check if a dev extent item is referred correctly by its chunk
10816 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10817 struct extent_buffer *eb, int slot)
10819 struct btrfs_root *chunk_root = fs_info->chunk_root;
10820 struct btrfs_dev_extent *ptr;
10821 struct btrfs_path path;
10822 struct btrfs_key chunk_key;
10823 struct btrfs_key devext_key;
10824 struct btrfs_chunk *chunk;
10825 struct extent_buffer *l;
10826 int num_stripes;
10827 u64 length;
10828 int i;
10829 int found_chunk = 0;
10830 int ret;
10832 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10833 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10834 length = btrfs_dev_extent_length(eb, ptr);
10836 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10837 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10838 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10840 btrfs_init_path(&path);
10841 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10842 if (ret)
10843 goto out;
10845 l = path.nodes[0];
10846 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10847 if (btrfs_chunk_length(l, chunk) != length)
10848 goto out;
10850 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10851 for (i = 0; i < num_stripes; i++) {
10852 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10853 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10855 if (devid == devext_key.objectid &&
10856 offset == devext_key.offset) {
10857 found_chunk = 1;
10858 break;
10861 out:
10862 btrfs_release_path(&path);
10863 if (!found_chunk) {
10864 error(
10865 "device extent[%llu, %llu, %llu] did not find the related chunk",
10866 devext_key.objectid, devext_key.offset, length);
10867 return REFERENCER_MISSING;
10869 return 0;
10873 * Check if the used space is correct with the dev item
10875 static int check_dev_item(struct btrfs_fs_info *fs_info,
10876 struct extent_buffer *eb, int slot)
10878 struct btrfs_root *dev_root = fs_info->dev_root;
10879 struct btrfs_dev_item *dev_item;
10880 struct btrfs_path path;
10881 struct btrfs_key key;
10882 struct btrfs_dev_extent *ptr;
10883 u64 dev_id;
10884 u64 used;
10885 u64 total = 0;
10886 int ret;
10888 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10889 dev_id = btrfs_device_id(eb, dev_item);
10890 used = btrfs_device_bytes_used(eb, dev_item);
10892 key.objectid = dev_id;
10893 key.type = BTRFS_DEV_EXTENT_KEY;
10894 key.offset = 0;
10896 btrfs_init_path(&path);
10897 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10898 if (ret < 0) {
10899 btrfs_item_key_to_cpu(eb, &key, slot);
10900 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10901 key.objectid, key.type, key.offset);
10902 btrfs_release_path(&path);
10903 return REFERENCER_MISSING;
10906 /* Iterate dev_extents to calculate the used space of a device */
10907 while (1) {
10908 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10909 goto next;
10911 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10912 if (key.objectid > dev_id)
10913 break;
10914 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10915 goto next;
10917 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10918 struct btrfs_dev_extent);
10919 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10920 next:
10921 ret = btrfs_next_item(dev_root, &path);
10922 if (ret)
10923 break;
10925 btrfs_release_path(&path);
10927 if (used != total) {
10928 btrfs_item_key_to_cpu(eb, &key, slot);
10929 error(
10930 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10931 total, used, BTRFS_ROOT_TREE_OBJECTID,
10932 BTRFS_DEV_EXTENT_KEY, dev_id);
10933 return ACCOUNTING_MISMATCH;
10935 return 0;
10939 * Check a block group item with its referener (chunk) and its used space
10940 * with extent/metadata item
10942 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10943 struct extent_buffer *eb, int slot)
10945 struct btrfs_root *extent_root = fs_info->extent_root;
10946 struct btrfs_root *chunk_root = fs_info->chunk_root;
10947 struct btrfs_block_group_item *bi;
10948 struct btrfs_block_group_item bg_item;
10949 struct btrfs_path path;
10950 struct btrfs_key bg_key;
10951 struct btrfs_key chunk_key;
10952 struct btrfs_key extent_key;
10953 struct btrfs_chunk *chunk;
10954 struct extent_buffer *leaf;
10955 struct btrfs_extent_item *ei;
10956 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10957 u64 flags;
10958 u64 bg_flags;
10959 u64 used;
10960 u64 total = 0;
10961 int ret;
10962 int err = 0;
10964 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10965 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10966 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10967 used = btrfs_block_group_used(&bg_item);
10968 bg_flags = btrfs_block_group_flags(&bg_item);
10970 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10971 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10972 chunk_key.offset = bg_key.objectid;
10974 btrfs_init_path(&path);
10975 /* Search for the referencer chunk */
10976 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10977 if (ret) {
10978 error(
10979 "block group[%llu %llu] did not find the related chunk item",
10980 bg_key.objectid, bg_key.offset);
10981 err |= REFERENCER_MISSING;
10982 } else {
10983 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10984 struct btrfs_chunk);
10985 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10986 bg_key.offset) {
10987 error(
10988 "block group[%llu %llu] related chunk item length does not match",
10989 bg_key.objectid, bg_key.offset);
10990 err |= REFERENCER_MISMATCH;
10993 btrfs_release_path(&path);
10995 /* Search from the block group bytenr */
10996 extent_key.objectid = bg_key.objectid;
10997 extent_key.type = 0;
10998 extent_key.offset = 0;
11000 btrfs_init_path(&path);
11001 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11002 if (ret < 0)
11003 goto out;
11005 /* Iterate extent tree to account used space */
11006 while (1) {
11007 leaf = path.nodes[0];
11009 /* Search slot can point to the last item beyond leaf nritems */
11010 if (path.slots[0] >= btrfs_header_nritems(leaf))
11011 goto next;
11013 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11014 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11015 break;
11017 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11018 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11019 goto next;
11020 if (extent_key.objectid < bg_key.objectid)
11021 goto next;
11023 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11024 total += nodesize;
11025 else
11026 total += extent_key.offset;
11028 ei = btrfs_item_ptr(leaf, path.slots[0],
11029 struct btrfs_extent_item);
11030 flags = btrfs_extent_flags(leaf, ei);
11031 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11032 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11033 error(
11034 "bad extent[%llu, %llu) type mismatch with chunk",
11035 extent_key.objectid,
11036 extent_key.objectid + extent_key.offset);
11037 err |= CHUNK_TYPE_MISMATCH;
11039 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11040 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11041 BTRFS_BLOCK_GROUP_METADATA))) {
11042 error(
11043 "bad extent[%llu, %llu) type mismatch with chunk",
11044 extent_key.objectid,
11045 extent_key.objectid + nodesize);
11046 err |= CHUNK_TYPE_MISMATCH;
11049 next:
11050 ret = btrfs_next_item(extent_root, &path);
11051 if (ret)
11052 break;
11055 out:
11056 btrfs_release_path(&path);
11058 if (total != used) {
11059 error(
11060 "block group[%llu %llu] used %llu but extent items used %llu",
11061 bg_key.objectid, bg_key.offset, used, total);
11062 err |= ACCOUNTING_MISMATCH;
11064 return err;
11068 * Check a chunk item.
11069 * Including checking all referred dev_extents and block group
11071 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11072 struct extent_buffer *eb, int slot)
11074 struct btrfs_root *extent_root = fs_info->extent_root;
11075 struct btrfs_root *dev_root = fs_info->dev_root;
11076 struct btrfs_path path;
11077 struct btrfs_key chunk_key;
11078 struct btrfs_key bg_key;
11079 struct btrfs_key devext_key;
11080 struct btrfs_chunk *chunk;
11081 struct extent_buffer *leaf;
11082 struct btrfs_block_group_item *bi;
11083 struct btrfs_block_group_item bg_item;
11084 struct btrfs_dev_extent *ptr;
11085 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11086 u64 length;
11087 u64 chunk_end;
11088 u64 type;
11089 u64 profile;
11090 int num_stripes;
11091 u64 offset;
11092 u64 objectid;
11093 int i;
11094 int ret;
11095 int err = 0;
11097 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11098 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11099 length = btrfs_chunk_length(eb, chunk);
11100 chunk_end = chunk_key.offset + length;
11101 if (!IS_ALIGNED(length, sectorsize)) {
11102 error("chunk[%llu %llu) not aligned to %u",
11103 chunk_key.offset, chunk_end, sectorsize);
11104 err |= BYTES_UNALIGNED;
11105 goto out;
11108 type = btrfs_chunk_type(eb, chunk);
11109 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11110 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11111 error("chunk[%llu %llu) has no chunk type",
11112 chunk_key.offset, chunk_end);
11113 err |= UNKNOWN_TYPE;
11115 if (profile && (profile & (profile - 1))) {
11116 error("chunk[%llu %llu) multiple profiles detected: %llx",
11117 chunk_key.offset, chunk_end, profile);
11118 err |= UNKNOWN_TYPE;
11121 bg_key.objectid = chunk_key.offset;
11122 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11123 bg_key.offset = length;
11125 btrfs_init_path(&path);
11126 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11127 if (ret) {
11128 error(
11129 "chunk[%llu %llu) did not find the related block group item",
11130 chunk_key.offset, chunk_end);
11131 err |= REFERENCER_MISSING;
11132 } else{
11133 leaf = path.nodes[0];
11134 bi = btrfs_item_ptr(leaf, path.slots[0],
11135 struct btrfs_block_group_item);
11136 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11137 sizeof(bg_item));
11138 if (btrfs_block_group_flags(&bg_item) != type) {
11139 error(
11140 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11141 chunk_key.offset, chunk_end, type,
11142 btrfs_block_group_flags(&bg_item));
11143 err |= REFERENCER_MISSING;
11147 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11148 for (i = 0; i < num_stripes; i++) {
11149 btrfs_release_path(&path);
11150 btrfs_init_path(&path);
11151 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11152 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11153 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11155 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11156 0, 0);
11157 if (ret)
11158 goto not_match_dev;
11160 leaf = path.nodes[0];
11161 ptr = btrfs_item_ptr(leaf, path.slots[0],
11162 struct btrfs_dev_extent);
11163 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11164 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11165 if (objectid != chunk_key.objectid ||
11166 offset != chunk_key.offset ||
11167 btrfs_dev_extent_length(leaf, ptr) != length)
11168 goto not_match_dev;
11169 continue;
11170 not_match_dev:
11171 err |= BACKREF_MISSING;
11172 error(
11173 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11174 chunk_key.objectid, chunk_end, i);
11175 continue;
11177 btrfs_release_path(&path);
11178 out:
11179 return err;
11183 * Main entry function to check known items and update related accounting info
11185 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11187 struct btrfs_fs_info *fs_info = root->fs_info;
11188 struct btrfs_key key;
11189 int slot = 0;
11190 int type;
11191 struct btrfs_extent_data_ref *dref;
11192 int ret;
11193 int err = 0;
11195 next:
11196 btrfs_item_key_to_cpu(eb, &key, slot);
11197 type = key.type;
11199 switch (type) {
11200 case BTRFS_EXTENT_DATA_KEY:
11201 ret = check_extent_data_item(root, eb, slot);
11202 err |= ret;
11203 break;
11204 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11205 ret = check_block_group_item(fs_info, eb, slot);
11206 err |= ret;
11207 break;
11208 case BTRFS_DEV_ITEM_KEY:
11209 ret = check_dev_item(fs_info, eb, slot);
11210 err |= ret;
11211 break;
11212 case BTRFS_CHUNK_ITEM_KEY:
11213 ret = check_chunk_item(fs_info, eb, slot);
11214 err |= ret;
11215 break;
11216 case BTRFS_DEV_EXTENT_KEY:
11217 ret = check_dev_extent_item(fs_info, eb, slot);
11218 err |= ret;
11219 break;
11220 case BTRFS_EXTENT_ITEM_KEY:
11221 case BTRFS_METADATA_ITEM_KEY:
11222 ret = check_extent_item(fs_info, eb, slot);
11223 err |= ret;
11224 break;
11225 case BTRFS_EXTENT_CSUM_KEY:
11226 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11227 break;
11228 case BTRFS_TREE_BLOCK_REF_KEY:
11229 ret = check_tree_block_backref(fs_info, key.offset,
11230 key.objectid, -1);
11231 err |= ret;
11232 break;
11233 case BTRFS_EXTENT_DATA_REF_KEY:
11234 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11235 ret = check_extent_data_backref(fs_info,
11236 btrfs_extent_data_ref_root(eb, dref),
11237 btrfs_extent_data_ref_objectid(eb, dref),
11238 btrfs_extent_data_ref_offset(eb, dref),
11239 key.objectid, 0,
11240 btrfs_extent_data_ref_count(eb, dref));
11241 err |= ret;
11242 break;
11243 case BTRFS_SHARED_BLOCK_REF_KEY:
11244 ret = check_shared_block_backref(fs_info, key.offset,
11245 key.objectid, -1);
11246 err |= ret;
11247 break;
11248 case BTRFS_SHARED_DATA_REF_KEY:
11249 ret = check_shared_data_backref(fs_info, key.offset,
11250 key.objectid);
11251 err |= ret;
11252 break;
11253 default:
11254 break;
11257 if (++slot < btrfs_header_nritems(eb))
11258 goto next;
11260 return err;
11264 * Helper function for later fs/subvol tree check. To determine if a tree
11265 * block should be checked.
11266 * This function will ensure only the direct referencer with lowest rootid to
11267 * check a fs/subvolume tree block.
11269 * Backref check at extent tree would detect errors like missing subvolume
11270 * tree, so we can do aggressive check to reduce duplicated checks.
11272 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11274 struct btrfs_root *extent_root = root->fs_info->extent_root;
11275 struct btrfs_key key;
11276 struct btrfs_path path;
11277 struct extent_buffer *leaf;
11278 int slot;
11279 struct btrfs_extent_item *ei;
11280 unsigned long ptr;
11281 unsigned long end;
11282 int type;
11283 u32 item_size;
11284 u64 offset;
11285 struct btrfs_extent_inline_ref *iref;
11286 int ret;
11288 btrfs_init_path(&path);
11289 key.objectid = btrfs_header_bytenr(eb);
11290 key.type = BTRFS_METADATA_ITEM_KEY;
11291 key.offset = (u64)-1;
11294 * Any failure in backref resolving means we can't determine
11295 * whom the tree block belongs to.
11296 * So in that case, we need to check that tree block
11298 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11299 if (ret < 0)
11300 goto need_check;
11302 ret = btrfs_previous_extent_item(extent_root, &path,
11303 btrfs_header_bytenr(eb));
11304 if (ret)
11305 goto need_check;
11307 leaf = path.nodes[0];
11308 slot = path.slots[0];
11309 btrfs_item_key_to_cpu(leaf, &key, slot);
11310 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11312 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11313 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11314 } else {
11315 struct btrfs_tree_block_info *info;
11317 info = (struct btrfs_tree_block_info *)(ei + 1);
11318 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11321 item_size = btrfs_item_size_nr(leaf, slot);
11322 ptr = (unsigned long)iref;
11323 end = (unsigned long)ei + item_size;
11324 while (ptr < end) {
11325 iref = (struct btrfs_extent_inline_ref *)ptr;
11326 type = btrfs_extent_inline_ref_type(leaf, iref);
11327 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11330 * We only check the tree block if current root is
11331 * the lowest referencer of it.
11333 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11334 offset < root->objectid) {
11335 btrfs_release_path(&path);
11336 return 0;
11339 ptr += btrfs_extent_inline_ref_size(type);
11342 * Normally we should also check keyed tree block ref, but that may be
11343 * very time consuming. Inlined ref should already make us skip a lot
11344 * of refs now. So skip search keyed tree block ref.
11347 need_check:
11348 btrfs_release_path(&path);
11349 return 1;
11353 * Traversal function for tree block. We will do:
11354 * 1) Skip shared fs/subvolume tree blocks
11355 * 2) Update related bytes accounting
11356 * 3) Pre-order traversal
11358 static int traverse_tree_block(struct btrfs_root *root,
11359 struct extent_buffer *node)
11361 struct extent_buffer *eb;
11362 struct btrfs_key key;
11363 struct btrfs_key drop_key;
11364 int level;
11365 u64 nr;
11366 int i;
11367 int err = 0;
11368 int ret;
11371 * Skip shared fs/subvolume tree block, in that case they will
11372 * be checked by referencer with lowest rootid
11374 if (is_fstree(root->objectid) && !should_check(root, node))
11375 return 0;
11377 /* Update bytes accounting */
11378 total_btree_bytes += node->len;
11379 if (fs_root_objectid(btrfs_header_owner(node)))
11380 total_fs_tree_bytes += node->len;
11381 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11382 total_extent_tree_bytes += node->len;
11383 if (!found_old_backref &&
11384 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11385 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11386 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11387 found_old_backref = 1;
11389 /* pre-order tranversal, check itself first */
11390 level = btrfs_header_level(node);
11391 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11392 btrfs_header_level(node),
11393 btrfs_header_owner(node));
11394 err |= ret;
11395 if (err)
11396 error(
11397 "check %s failed root %llu bytenr %llu level %d, force continue check",
11398 level ? "node":"leaf", root->objectid,
11399 btrfs_header_bytenr(node), btrfs_header_level(node));
11401 if (!level) {
11402 btree_space_waste += btrfs_leaf_free_space(root, node);
11403 ret = check_leaf_items(root, node);
11404 err |= ret;
11405 return err;
11408 nr = btrfs_header_nritems(node);
11409 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11410 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11411 sizeof(struct btrfs_key_ptr);
11413 /* Then check all its children */
11414 for (i = 0; i < nr; i++) {
11415 u64 blocknr = btrfs_node_blockptr(node, i);
11417 btrfs_node_key_to_cpu(node, &key, i);
11418 if (level == root->root_item.drop_level &&
11419 is_dropped_key(&key, &drop_key))
11420 continue;
11423 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11424 * to call the function itself.
11426 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11427 if (extent_buffer_uptodate(eb)) {
11428 ret = traverse_tree_block(root, eb);
11429 err |= ret;
11431 free_extent_buffer(eb);
11434 return err;
11438 * Low memory usage version check_chunks_and_extents.
11440 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11442 struct btrfs_path path;
11443 struct btrfs_key key;
11444 struct btrfs_root *root1;
11445 struct btrfs_root *cur_root;
11446 int err = 0;
11447 int ret;
11449 root1 = root->fs_info->chunk_root;
11450 ret = traverse_tree_block(root1, root1->node);
11451 err |= ret;
11453 root1 = root->fs_info->tree_root;
11454 ret = traverse_tree_block(root1, root1->node);
11455 err |= ret;
11457 btrfs_init_path(&path);
11458 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11459 key.offset = 0;
11460 key.type = BTRFS_ROOT_ITEM_KEY;
11462 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11463 if (ret) {
11464 error("cannot find extent treet in tree_root");
11465 goto out;
11468 while (1) {
11469 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11470 if (key.type != BTRFS_ROOT_ITEM_KEY)
11471 goto next;
11472 key.offset = (u64)-1;
11474 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11475 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11476 &key);
11477 else
11478 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11479 if (IS_ERR(cur_root) || !cur_root) {
11480 error("failed to read tree: %lld", key.objectid);
11481 goto next;
11484 ret = traverse_tree_block(cur_root, cur_root->node);
11485 err |= ret;
11487 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11488 btrfs_free_fs_root(cur_root);
11489 next:
11490 ret = btrfs_next_item(root1, &path);
11491 if (ret)
11492 goto out;
11495 out:
11496 btrfs_release_path(&path);
11497 return err;
11500 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11501 struct btrfs_root *root, int overwrite)
11503 struct extent_buffer *c;
11504 struct extent_buffer *old = root->node;
11505 int level;
11506 int ret;
11507 struct btrfs_disk_key disk_key = {0,0,0};
11509 level = 0;
11511 if (overwrite) {
11512 c = old;
11513 extent_buffer_get(c);
11514 goto init;
11516 c = btrfs_alloc_free_block(trans, root,
11517 root->nodesize,
11518 root->root_key.objectid,
11519 &disk_key, level, 0, 0);
11520 if (IS_ERR(c)) {
11521 c = old;
11522 extent_buffer_get(c);
11523 overwrite = 1;
11525 init:
11526 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11527 btrfs_set_header_level(c, level);
11528 btrfs_set_header_bytenr(c, c->start);
11529 btrfs_set_header_generation(c, trans->transid);
11530 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11531 btrfs_set_header_owner(c, root->root_key.objectid);
11533 write_extent_buffer(c, root->fs_info->fsid,
11534 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11536 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11537 btrfs_header_chunk_tree_uuid(c),
11538 BTRFS_UUID_SIZE);
11540 btrfs_mark_buffer_dirty(c);
11542 * this case can happen in the following case:
11544 * 1.overwrite previous root.
11546 * 2.reinit reloc data root, this is because we skip pin
11547 * down reloc data tree before which means we can allocate
11548 * same block bytenr here.
11550 if (old->start == c->start) {
11551 btrfs_set_root_generation(&root->root_item,
11552 trans->transid);
11553 root->root_item.level = btrfs_header_level(root->node);
11554 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11555 &root->root_key, &root->root_item);
11556 if (ret) {
11557 free_extent_buffer(c);
11558 return ret;
11561 free_extent_buffer(old);
11562 root->node = c;
11563 add_root_to_dirty_list(root);
11564 return 0;
11567 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11568 struct extent_buffer *eb, int tree_root)
11570 struct extent_buffer *tmp;
11571 struct btrfs_root_item *ri;
11572 struct btrfs_key key;
11573 u64 bytenr;
11574 u32 nodesize;
11575 int level = btrfs_header_level(eb);
11576 int nritems;
11577 int ret;
11578 int i;
11581 * If we have pinned this block before, don't pin it again.
11582 * This can not only avoid forever loop with broken filesystem
11583 * but also give us some speedups.
11585 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11586 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11587 return 0;
11589 btrfs_pin_extent(fs_info, eb->start, eb->len);
11591 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11592 nritems = btrfs_header_nritems(eb);
11593 for (i = 0; i < nritems; i++) {
11594 if (level == 0) {
11595 btrfs_item_key_to_cpu(eb, &key, i);
11596 if (key.type != BTRFS_ROOT_ITEM_KEY)
11597 continue;
11598 /* Skip the extent root and reloc roots */
11599 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11600 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11601 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11602 continue;
11603 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11604 bytenr = btrfs_disk_root_bytenr(eb, ri);
11607 * If at any point we start needing the real root we
11608 * will have to build a stump root for the root we are
11609 * in, but for now this doesn't actually use the root so
11610 * just pass in extent_root.
11612 tmp = read_tree_block(fs_info->extent_root, bytenr,
11613 nodesize, 0);
11614 if (!extent_buffer_uptodate(tmp)) {
11615 fprintf(stderr, "Error reading root block\n");
11616 return -EIO;
11618 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11619 free_extent_buffer(tmp);
11620 if (ret)
11621 return ret;
11622 } else {
11623 bytenr = btrfs_node_blockptr(eb, i);
11625 /* If we aren't the tree root don't read the block */
11626 if (level == 1 && !tree_root) {
11627 btrfs_pin_extent(fs_info, bytenr, nodesize);
11628 continue;
11631 tmp = read_tree_block(fs_info->extent_root, bytenr,
11632 nodesize, 0);
11633 if (!extent_buffer_uptodate(tmp)) {
11634 fprintf(stderr, "Error reading tree block\n");
11635 return -EIO;
11637 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11638 free_extent_buffer(tmp);
11639 if (ret)
11640 return ret;
11644 return 0;
11647 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11649 int ret;
11651 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11652 if (ret)
11653 return ret;
11655 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11658 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11660 struct btrfs_block_group_cache *cache;
11661 struct btrfs_path path;
11662 struct extent_buffer *leaf;
11663 struct btrfs_chunk *chunk;
11664 struct btrfs_key key;
11665 int ret;
11666 u64 start;
11668 btrfs_init_path(&path);
11669 key.objectid = 0;
11670 key.type = BTRFS_CHUNK_ITEM_KEY;
11671 key.offset = 0;
11672 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11673 if (ret < 0) {
11674 btrfs_release_path(&path);
11675 return ret;
11679 * We do this in case the block groups were screwed up and had alloc
11680 * bits that aren't actually set on the chunks. This happens with
11681 * restored images every time and could happen in real life I guess.
11683 fs_info->avail_data_alloc_bits = 0;
11684 fs_info->avail_metadata_alloc_bits = 0;
11685 fs_info->avail_system_alloc_bits = 0;
11687 /* First we need to create the in-memory block groups */
11688 while (1) {
11689 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11690 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11691 if (ret < 0) {
11692 btrfs_release_path(&path);
11693 return ret;
11695 if (ret) {
11696 ret = 0;
11697 break;
11700 leaf = path.nodes[0];
11701 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11702 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11703 path.slots[0]++;
11704 continue;
11707 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11708 btrfs_add_block_group(fs_info, 0,
11709 btrfs_chunk_type(leaf, chunk),
11710 key.objectid, key.offset,
11711 btrfs_chunk_length(leaf, chunk));
11712 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11713 key.offset + btrfs_chunk_length(leaf, chunk));
11714 path.slots[0]++;
11716 start = 0;
11717 while (1) {
11718 cache = btrfs_lookup_first_block_group(fs_info, start);
11719 if (!cache)
11720 break;
11721 cache->cached = 1;
11722 start = cache->key.objectid + cache->key.offset;
11725 btrfs_release_path(&path);
11726 return 0;
11729 static int reset_balance(struct btrfs_trans_handle *trans,
11730 struct btrfs_fs_info *fs_info)
11732 struct btrfs_root *root = fs_info->tree_root;
11733 struct btrfs_path path;
11734 struct extent_buffer *leaf;
11735 struct btrfs_key key;
11736 int del_slot, del_nr = 0;
11737 int ret;
11738 int found = 0;
11740 btrfs_init_path(&path);
11741 key.objectid = BTRFS_BALANCE_OBJECTID;
11742 key.type = BTRFS_BALANCE_ITEM_KEY;
11743 key.offset = 0;
11744 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11745 if (ret) {
11746 if (ret > 0)
11747 ret = 0;
11748 if (!ret)
11749 goto reinit_data_reloc;
11750 else
11751 goto out;
11754 ret = btrfs_del_item(trans, root, &path);
11755 if (ret)
11756 goto out;
11757 btrfs_release_path(&path);
11759 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11760 key.type = BTRFS_ROOT_ITEM_KEY;
11761 key.offset = 0;
11762 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11763 if (ret < 0)
11764 goto out;
11765 while (1) {
11766 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11767 if (!found)
11768 break;
11770 if (del_nr) {
11771 ret = btrfs_del_items(trans, root, &path,
11772 del_slot, del_nr);
11773 del_nr = 0;
11774 if (ret)
11775 goto out;
11777 key.offset++;
11778 btrfs_release_path(&path);
11780 found = 0;
11781 ret = btrfs_search_slot(trans, root, &key, &path,
11782 -1, 1);
11783 if (ret < 0)
11784 goto out;
11785 continue;
11787 found = 1;
11788 leaf = path.nodes[0];
11789 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11790 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11791 break;
11792 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11793 path.slots[0]++;
11794 continue;
11796 if (!del_nr) {
11797 del_slot = path.slots[0];
11798 del_nr = 1;
11799 } else {
11800 del_nr++;
11802 path.slots[0]++;
11805 if (del_nr) {
11806 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11807 if (ret)
11808 goto out;
11810 btrfs_release_path(&path);
11812 reinit_data_reloc:
11813 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11814 key.type = BTRFS_ROOT_ITEM_KEY;
11815 key.offset = (u64)-1;
11816 root = btrfs_read_fs_root(fs_info, &key);
11817 if (IS_ERR(root)) {
11818 fprintf(stderr, "Error reading data reloc tree\n");
11819 ret = PTR_ERR(root);
11820 goto out;
11822 record_root_in_trans(trans, root);
11823 ret = btrfs_fsck_reinit_root(trans, root, 0);
11824 if (ret)
11825 goto out;
11826 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11827 out:
11828 btrfs_release_path(&path);
11829 return ret;
11832 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11833 struct btrfs_fs_info *fs_info)
11835 u64 start = 0;
11836 int ret;
11839 * The only reason we don't do this is because right now we're just
11840 * walking the trees we find and pinning down their bytes, we don't look
11841 * at any of the leaves. In order to do mixed groups we'd have to check
11842 * the leaves of any fs roots and pin down the bytes for any file
11843 * extents we find. Not hard but why do it if we don't have to?
11845 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11846 fprintf(stderr, "We don't support re-initing the extent tree "
11847 "for mixed block groups yet, please notify a btrfs "
11848 "developer you want to do this so they can add this "
11849 "functionality.\n");
11850 return -EINVAL;
11854 * first we need to walk all of the trees except the extent tree and pin
11855 * down the bytes that are in use so we don't overwrite any existing
11856 * metadata.
11858 ret = pin_metadata_blocks(fs_info);
11859 if (ret) {
11860 fprintf(stderr, "error pinning down used bytes\n");
11861 return ret;
11865 * Need to drop all the block groups since we're going to recreate all
11866 * of them again.
11868 btrfs_free_block_groups(fs_info);
11869 ret = reset_block_groups(fs_info);
11870 if (ret) {
11871 fprintf(stderr, "error resetting the block groups\n");
11872 return ret;
11875 /* Ok we can allocate now, reinit the extent root */
11876 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11877 if (ret) {
11878 fprintf(stderr, "extent root initialization failed\n");
11880 * When the transaction code is updated we should end the
11881 * transaction, but for now progs only knows about commit so
11882 * just return an error.
11884 return ret;
11888 * Now we have all the in-memory block groups setup so we can make
11889 * allocations properly, and the metadata we care about is safe since we
11890 * pinned all of it above.
11892 while (1) {
11893 struct btrfs_block_group_cache *cache;
11895 cache = btrfs_lookup_first_block_group(fs_info, start);
11896 if (!cache)
11897 break;
11898 start = cache->key.objectid + cache->key.offset;
11899 ret = btrfs_insert_item(trans, fs_info->extent_root,
11900 &cache->key, &cache->item,
11901 sizeof(cache->item));
11902 if (ret) {
11903 fprintf(stderr, "Error adding block group\n");
11904 return ret;
11906 btrfs_extent_post_op(trans, fs_info->extent_root);
11909 ret = reset_balance(trans, fs_info);
11910 if (ret)
11911 fprintf(stderr, "error resetting the pending balance\n");
11913 return ret;
11916 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11918 struct btrfs_path path;
11919 struct btrfs_trans_handle *trans;
11920 struct btrfs_key key;
11921 int ret;
11923 printf("Recowing metadata block %llu\n", eb->start);
11924 key.objectid = btrfs_header_owner(eb);
11925 key.type = BTRFS_ROOT_ITEM_KEY;
11926 key.offset = (u64)-1;
11928 root = btrfs_read_fs_root(root->fs_info, &key);
11929 if (IS_ERR(root)) {
11930 fprintf(stderr, "Couldn't find owner root %llu\n",
11931 key.objectid);
11932 return PTR_ERR(root);
11935 trans = btrfs_start_transaction(root, 1);
11936 if (IS_ERR(trans))
11937 return PTR_ERR(trans);
11939 btrfs_init_path(&path);
11940 path.lowest_level = btrfs_header_level(eb);
11941 if (path.lowest_level)
11942 btrfs_node_key_to_cpu(eb, &key, 0);
11943 else
11944 btrfs_item_key_to_cpu(eb, &key, 0);
11946 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11947 btrfs_commit_transaction(trans, root);
11948 btrfs_release_path(&path);
11949 return ret;
11952 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11954 struct btrfs_path path;
11955 struct btrfs_trans_handle *trans;
11956 struct btrfs_key key;
11957 int ret;
11959 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11960 bad->key.type, bad->key.offset);
11961 key.objectid = bad->root_id;
11962 key.type = BTRFS_ROOT_ITEM_KEY;
11963 key.offset = (u64)-1;
11965 root = btrfs_read_fs_root(root->fs_info, &key);
11966 if (IS_ERR(root)) {
11967 fprintf(stderr, "Couldn't find owner root %llu\n",
11968 key.objectid);
11969 return PTR_ERR(root);
11972 trans = btrfs_start_transaction(root, 1);
11973 if (IS_ERR(trans))
11974 return PTR_ERR(trans);
11976 btrfs_init_path(&path);
11977 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11978 if (ret) {
11979 if (ret > 0)
11980 ret = 0;
11981 goto out;
11983 ret = btrfs_del_item(trans, root, &path);
11984 out:
11985 btrfs_commit_transaction(trans, root);
11986 btrfs_release_path(&path);
11987 return ret;
11990 static int zero_log_tree(struct btrfs_root *root)
11992 struct btrfs_trans_handle *trans;
11993 int ret;
11995 trans = btrfs_start_transaction(root, 1);
11996 if (IS_ERR(trans)) {
11997 ret = PTR_ERR(trans);
11998 return ret;
12000 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12001 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12002 ret = btrfs_commit_transaction(trans, root);
12003 return ret;
12006 static int populate_csum(struct btrfs_trans_handle *trans,
12007 struct btrfs_root *csum_root, char *buf, u64 start,
12008 u64 len)
12010 u64 offset = 0;
12011 u64 sectorsize;
12012 int ret = 0;
12014 while (offset < len) {
12015 sectorsize = csum_root->sectorsize;
12016 ret = read_extent_data(csum_root, buf, start + offset,
12017 &sectorsize, 0);
12018 if (ret)
12019 break;
12020 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12021 start + offset, buf, sectorsize);
12022 if (ret)
12023 break;
12024 offset += sectorsize;
12026 return ret;
12029 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12030 struct btrfs_root *csum_root,
12031 struct btrfs_root *cur_root)
12033 struct btrfs_path path;
12034 struct btrfs_key key;
12035 struct extent_buffer *node;
12036 struct btrfs_file_extent_item *fi;
12037 char *buf = NULL;
12038 u64 start = 0;
12039 u64 len = 0;
12040 int slot = 0;
12041 int ret = 0;
12043 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12044 if (!buf)
12045 return -ENOMEM;
12047 btrfs_init_path(&path);
12048 key.objectid = 0;
12049 key.offset = 0;
12050 key.type = 0;
12051 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12052 if (ret < 0)
12053 goto out;
12054 /* Iterate all regular file extents and fill its csum */
12055 while (1) {
12056 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12058 if (key.type != BTRFS_EXTENT_DATA_KEY)
12059 goto next;
12060 node = path.nodes[0];
12061 slot = path.slots[0];
12062 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12063 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12064 goto next;
12065 start = btrfs_file_extent_disk_bytenr(node, fi);
12066 len = btrfs_file_extent_disk_num_bytes(node, fi);
12068 ret = populate_csum(trans, csum_root, buf, start, len);
12069 if (ret == -EEXIST)
12070 ret = 0;
12071 if (ret < 0)
12072 goto out;
12073 next:
12075 * TODO: if next leaf is corrupted, jump to nearest next valid
12076 * leaf.
12078 ret = btrfs_next_item(cur_root, &path);
12079 if (ret < 0)
12080 goto out;
12081 if (ret > 0) {
12082 ret = 0;
12083 goto out;
12087 out:
12088 btrfs_release_path(&path);
12089 free(buf);
12090 return ret;
12093 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12094 struct btrfs_root *csum_root)
12096 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12097 struct btrfs_path path;
12098 struct btrfs_root *tree_root = fs_info->tree_root;
12099 struct btrfs_root *cur_root;
12100 struct extent_buffer *node;
12101 struct btrfs_key key;
12102 int slot = 0;
12103 int ret = 0;
12105 btrfs_init_path(&path);
12106 key.objectid = BTRFS_FS_TREE_OBJECTID;
12107 key.offset = 0;
12108 key.type = BTRFS_ROOT_ITEM_KEY;
12109 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12110 if (ret < 0)
12111 goto out;
12112 if (ret > 0) {
12113 ret = -ENOENT;
12114 goto out;
12117 while (1) {
12118 node = path.nodes[0];
12119 slot = path.slots[0];
12120 btrfs_item_key_to_cpu(node, &key, slot);
12121 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12122 goto out;
12123 if (key.type != BTRFS_ROOT_ITEM_KEY)
12124 goto next;
12125 if (!is_fstree(key.objectid))
12126 goto next;
12127 key.offset = (u64)-1;
12129 cur_root = btrfs_read_fs_root(fs_info, &key);
12130 if (IS_ERR(cur_root) || !cur_root) {
12131 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12132 key.objectid);
12133 goto out;
12135 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12136 cur_root);
12137 if (ret < 0)
12138 goto out;
12139 next:
12140 ret = btrfs_next_item(tree_root, &path);
12141 if (ret > 0) {
12142 ret = 0;
12143 goto out;
12145 if (ret < 0)
12146 goto out;
12149 out:
12150 btrfs_release_path(&path);
12151 return ret;
12154 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12155 struct btrfs_root *csum_root)
12157 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12158 struct btrfs_path path;
12159 struct btrfs_extent_item *ei;
12160 struct extent_buffer *leaf;
12161 char *buf;
12162 struct btrfs_key key;
12163 int ret;
12165 btrfs_init_path(&path);
12166 key.objectid = 0;
12167 key.type = BTRFS_EXTENT_ITEM_KEY;
12168 key.offset = 0;
12169 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12170 if (ret < 0) {
12171 btrfs_release_path(&path);
12172 return ret;
12175 buf = malloc(csum_root->sectorsize);
12176 if (!buf) {
12177 btrfs_release_path(&path);
12178 return -ENOMEM;
12181 while (1) {
12182 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12183 ret = btrfs_next_leaf(extent_root, &path);
12184 if (ret < 0)
12185 break;
12186 if (ret) {
12187 ret = 0;
12188 break;
12191 leaf = path.nodes[0];
12193 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12194 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12195 path.slots[0]++;
12196 continue;
12199 ei = btrfs_item_ptr(leaf, path.slots[0],
12200 struct btrfs_extent_item);
12201 if (!(btrfs_extent_flags(leaf, ei) &
12202 BTRFS_EXTENT_FLAG_DATA)) {
12203 path.slots[0]++;
12204 continue;
12207 ret = populate_csum(trans, csum_root, buf, key.objectid,
12208 key.offset);
12209 if (ret)
12210 break;
12211 path.slots[0]++;
12214 btrfs_release_path(&path);
12215 free(buf);
12216 return ret;
12220 * Recalculate the csum and put it into the csum tree.
12222 * Extent tree init will wipe out all the extent info, so in that case, we
12223 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12224 * will use fs/subvol trees to init the csum tree.
12226 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12227 struct btrfs_root *csum_root,
12228 int search_fs_tree)
12230 if (search_fs_tree)
12231 return fill_csum_tree_from_fs(trans, csum_root);
12232 else
12233 return fill_csum_tree_from_extent(trans, csum_root);
12236 static void free_roots_info_cache(void)
12238 if (!roots_info_cache)
12239 return;
12241 while (!cache_tree_empty(roots_info_cache)) {
12242 struct cache_extent *entry;
12243 struct root_item_info *rii;
12245 entry = first_cache_extent(roots_info_cache);
12246 if (!entry)
12247 break;
12248 remove_cache_extent(roots_info_cache, entry);
12249 rii = container_of(entry, struct root_item_info, cache_extent);
12250 free(rii);
12253 free(roots_info_cache);
12254 roots_info_cache = NULL;
12257 static int build_roots_info_cache(struct btrfs_fs_info *info)
12259 int ret = 0;
12260 struct btrfs_key key;
12261 struct extent_buffer *leaf;
12262 struct btrfs_path path;
12264 if (!roots_info_cache) {
12265 roots_info_cache = malloc(sizeof(*roots_info_cache));
12266 if (!roots_info_cache)
12267 return -ENOMEM;
12268 cache_tree_init(roots_info_cache);
12271 btrfs_init_path(&path);
12272 key.objectid = 0;
12273 key.type = BTRFS_EXTENT_ITEM_KEY;
12274 key.offset = 0;
12275 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12276 if (ret < 0)
12277 goto out;
12278 leaf = path.nodes[0];
12280 while (1) {
12281 struct btrfs_key found_key;
12282 struct btrfs_extent_item *ei;
12283 struct btrfs_extent_inline_ref *iref;
12284 int slot = path.slots[0];
12285 int type;
12286 u64 flags;
12287 u64 root_id;
12288 u8 level;
12289 struct cache_extent *entry;
12290 struct root_item_info *rii;
12292 if (slot >= btrfs_header_nritems(leaf)) {
12293 ret = btrfs_next_leaf(info->extent_root, &path);
12294 if (ret < 0) {
12295 break;
12296 } else if (ret) {
12297 ret = 0;
12298 break;
12300 leaf = path.nodes[0];
12301 slot = path.slots[0];
12304 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12306 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12307 found_key.type != BTRFS_METADATA_ITEM_KEY)
12308 goto next;
12310 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12311 flags = btrfs_extent_flags(leaf, ei);
12313 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12314 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12315 goto next;
12317 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12318 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12319 level = found_key.offset;
12320 } else {
12321 struct btrfs_tree_block_info *binfo;
12323 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12324 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12325 level = btrfs_tree_block_level(leaf, binfo);
12329 * For a root extent, it must be of the following type and the
12330 * first (and only one) iref in the item.
12332 type = btrfs_extent_inline_ref_type(leaf, iref);
12333 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12334 goto next;
12336 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12337 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12338 if (!entry) {
12339 rii = malloc(sizeof(struct root_item_info));
12340 if (!rii) {
12341 ret = -ENOMEM;
12342 goto out;
12344 rii->cache_extent.start = root_id;
12345 rii->cache_extent.size = 1;
12346 rii->level = (u8)-1;
12347 entry = &rii->cache_extent;
12348 ret = insert_cache_extent(roots_info_cache, entry);
12349 ASSERT(ret == 0);
12350 } else {
12351 rii = container_of(entry, struct root_item_info,
12352 cache_extent);
12355 ASSERT(rii->cache_extent.start == root_id);
12356 ASSERT(rii->cache_extent.size == 1);
12358 if (level > rii->level || rii->level == (u8)-1) {
12359 rii->level = level;
12360 rii->bytenr = found_key.objectid;
12361 rii->gen = btrfs_extent_generation(leaf, ei);
12362 rii->node_count = 1;
12363 } else if (level == rii->level) {
12364 rii->node_count++;
12366 next:
12367 path.slots[0]++;
12370 out:
12371 btrfs_release_path(&path);
12373 return ret;
12376 static int maybe_repair_root_item(struct btrfs_path *path,
12377 const struct btrfs_key *root_key,
12378 const int read_only_mode)
12380 const u64 root_id = root_key->objectid;
12381 struct cache_extent *entry;
12382 struct root_item_info *rii;
12383 struct btrfs_root_item ri;
12384 unsigned long offset;
12386 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12387 if (!entry) {
12388 fprintf(stderr,
12389 "Error: could not find extent items for root %llu\n",
12390 root_key->objectid);
12391 return -ENOENT;
12394 rii = container_of(entry, struct root_item_info, cache_extent);
12395 ASSERT(rii->cache_extent.start == root_id);
12396 ASSERT(rii->cache_extent.size == 1);
12398 if (rii->node_count != 1) {
12399 fprintf(stderr,
12400 "Error: could not find btree root extent for root %llu\n",
12401 root_id);
12402 return -ENOENT;
12405 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12406 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12408 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12409 btrfs_root_level(&ri) != rii->level ||
12410 btrfs_root_generation(&ri) != rii->gen) {
12413 * If we're in repair mode but our caller told us to not update
12414 * the root item, i.e. just check if it needs to be updated, don't
12415 * print this message, since the caller will call us again shortly
12416 * for the same root item without read only mode (the caller will
12417 * open a transaction first).
12419 if (!(read_only_mode && repair))
12420 fprintf(stderr,
12421 "%sroot item for root %llu,"
12422 " current bytenr %llu, current gen %llu, current level %u,"
12423 " new bytenr %llu, new gen %llu, new level %u\n",
12424 (read_only_mode ? "" : "fixing "),
12425 root_id,
12426 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12427 btrfs_root_level(&ri),
12428 rii->bytenr, rii->gen, rii->level);
12430 if (btrfs_root_generation(&ri) > rii->gen) {
12431 fprintf(stderr,
12432 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12433 root_id, btrfs_root_generation(&ri), rii->gen);
12434 return -EINVAL;
12437 if (!read_only_mode) {
12438 btrfs_set_root_bytenr(&ri, rii->bytenr);
12439 btrfs_set_root_level(&ri, rii->level);
12440 btrfs_set_root_generation(&ri, rii->gen);
12441 write_extent_buffer(path->nodes[0], &ri,
12442 offset, sizeof(ri));
12445 return 1;
12448 return 0;
12452 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12453 * caused read-only snapshots to be corrupted if they were created at a moment
12454 * when the source subvolume/snapshot had orphan items. The issue was that the
12455 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12456 * node instead of the post orphan cleanup root node.
12457 * So this function, and its callees, just detects and fixes those cases. Even
12458 * though the regression was for read-only snapshots, this function applies to
12459 * any snapshot/subvolume root.
12460 * This must be run before any other repair code - not doing it so, makes other
12461 * repair code delete or modify backrefs in the extent tree for example, which
12462 * will result in an inconsistent fs after repairing the root items.
12464 static int repair_root_items(struct btrfs_fs_info *info)
12466 struct btrfs_path path;
12467 struct btrfs_key key;
12468 struct extent_buffer *leaf;
12469 struct btrfs_trans_handle *trans = NULL;
12470 int ret = 0;
12471 int bad_roots = 0;
12472 int need_trans = 0;
12474 btrfs_init_path(&path);
12476 ret = build_roots_info_cache(info);
12477 if (ret)
12478 goto out;
12480 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12481 key.type = BTRFS_ROOT_ITEM_KEY;
12482 key.offset = 0;
12484 again:
12486 * Avoid opening and committing transactions if a leaf doesn't have
12487 * any root items that need to be fixed, so that we avoid rotating
12488 * backup roots unnecessarily.
12490 if (need_trans) {
12491 trans = btrfs_start_transaction(info->tree_root, 1);
12492 if (IS_ERR(trans)) {
12493 ret = PTR_ERR(trans);
12494 goto out;
12498 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12499 0, trans ? 1 : 0);
12500 if (ret < 0)
12501 goto out;
12502 leaf = path.nodes[0];
12504 while (1) {
12505 struct btrfs_key found_key;
12507 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12508 int no_more_keys = find_next_key(&path, &key);
12510 btrfs_release_path(&path);
12511 if (trans) {
12512 ret = btrfs_commit_transaction(trans,
12513 info->tree_root);
12514 trans = NULL;
12515 if (ret < 0)
12516 goto out;
12518 need_trans = 0;
12519 if (no_more_keys)
12520 break;
12521 goto again;
12524 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12526 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12527 goto next;
12528 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12529 goto next;
12531 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12532 if (ret < 0)
12533 goto out;
12534 if (ret) {
12535 if (!trans && repair) {
12536 need_trans = 1;
12537 key = found_key;
12538 btrfs_release_path(&path);
12539 goto again;
12541 bad_roots++;
12543 next:
12544 path.slots[0]++;
12546 ret = 0;
12547 out:
12548 free_roots_info_cache();
12549 btrfs_release_path(&path);
12550 if (trans)
12551 btrfs_commit_transaction(trans, info->tree_root);
12552 if (ret < 0)
12553 return ret;
12555 return bad_roots;
12558 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12560 struct btrfs_trans_handle *trans;
12561 struct btrfs_block_group_cache *bg_cache;
12562 u64 current = 0;
12563 int ret = 0;
12565 /* Clear all free space cache inodes and its extent data */
12566 while (1) {
12567 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12568 if (!bg_cache)
12569 break;
12570 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12571 if (ret < 0)
12572 return ret;
12573 current = bg_cache->key.objectid + bg_cache->key.offset;
12576 /* Don't forget to set cache_generation to -1 */
12577 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12578 if (IS_ERR(trans)) {
12579 error("failed to update super block cache generation");
12580 return PTR_ERR(trans);
12582 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12583 btrfs_commit_transaction(trans, fs_info->tree_root);
12585 return ret;
12588 const char * const cmd_check_usage[] = {
12589 "btrfs check [options] <device>",
12590 "Check structural integrity of a filesystem (unmounted).",
12591 "Check structural integrity of an unmounted filesystem. Verify internal",
12592 "trees' consistency and item connectivity. In the repair mode try to",
12593 "fix the problems found. ",
12594 "WARNING: the repair mode is considered dangerous",
12596 "-s|--super <superblock> use this superblock copy",
12597 "-b|--backup use the first valid backup root copy",
12598 "--repair try to repair the filesystem",
12599 "--readonly run in read-only mode (default)",
12600 "--init-csum-tree create a new CRC tree",
12601 "--init-extent-tree create a new extent tree",
12602 "--mode <MODE> allows choice of memory/IO trade-offs",
12603 " where MODE is one of:",
12604 " original - read inodes and extents to memory (requires",
12605 " more memory, does less IO)",
12606 " lowmem - try to use less memory but read blocks again",
12607 " when needed",
12608 "--check-data-csum verify checksums of data blocks",
12609 "-Q|--qgroup-report print a report on qgroup consistency",
12610 "-E|--subvol-extents <subvolid>",
12611 " print subvolume extents and sharing state",
12612 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12613 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12614 "-p|--progress indicate progress",
12615 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12616 NULL
12619 int cmd_check(int argc, char **argv)
12621 struct cache_tree root_cache;
12622 struct btrfs_root *root;
12623 struct btrfs_fs_info *info;
12624 u64 bytenr = 0;
12625 u64 subvolid = 0;
12626 u64 tree_root_bytenr = 0;
12627 u64 chunk_root_bytenr = 0;
12628 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12629 int ret;
12630 int err = 0;
12631 u64 num;
12632 int init_csum_tree = 0;
12633 int readonly = 0;
12634 int clear_space_cache = 0;
12635 int qgroup_report = 0;
12636 int qgroups_repaired = 0;
12637 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12639 while(1) {
12640 int c;
12641 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12642 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12643 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12644 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12645 static const struct option long_options[] = {
12646 { "super", required_argument, NULL, 's' },
12647 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12648 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12649 { "init-csum-tree", no_argument, NULL,
12650 GETOPT_VAL_INIT_CSUM },
12651 { "init-extent-tree", no_argument, NULL,
12652 GETOPT_VAL_INIT_EXTENT },
12653 { "check-data-csum", no_argument, NULL,
12654 GETOPT_VAL_CHECK_CSUM },
12655 { "backup", no_argument, NULL, 'b' },
12656 { "subvol-extents", required_argument, NULL, 'E' },
12657 { "qgroup-report", no_argument, NULL, 'Q' },
12658 { "tree-root", required_argument, NULL, 'r' },
12659 { "chunk-root", required_argument, NULL,
12660 GETOPT_VAL_CHUNK_TREE },
12661 { "progress", no_argument, NULL, 'p' },
12662 { "mode", required_argument, NULL,
12663 GETOPT_VAL_MODE },
12664 { "clear-space-cache", required_argument, NULL,
12665 GETOPT_VAL_CLEAR_SPACE_CACHE},
12666 { NULL, 0, NULL, 0}
12669 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12670 if (c < 0)
12671 break;
12672 switch(c) {
12673 case 'a': /* ignored */ break;
12674 case 'b':
12675 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12676 break;
12677 case 's':
12678 num = arg_strtou64(optarg);
12679 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12680 error(
12681 "super mirror should be less than %d",
12682 BTRFS_SUPER_MIRROR_MAX);
12683 exit(1);
12685 bytenr = btrfs_sb_offset(((int)num));
12686 printf("using SB copy %llu, bytenr %llu\n", num,
12687 (unsigned long long)bytenr);
12688 break;
12689 case 'Q':
12690 qgroup_report = 1;
12691 break;
12692 case 'E':
12693 subvolid = arg_strtou64(optarg);
12694 break;
12695 case 'r':
12696 tree_root_bytenr = arg_strtou64(optarg);
12697 break;
12698 case GETOPT_VAL_CHUNK_TREE:
12699 chunk_root_bytenr = arg_strtou64(optarg);
12700 break;
12701 case 'p':
12702 ctx.progress_enabled = true;
12703 break;
12704 case '?':
12705 case 'h':
12706 usage(cmd_check_usage);
12707 case GETOPT_VAL_REPAIR:
12708 printf("enabling repair mode\n");
12709 repair = 1;
12710 ctree_flags |= OPEN_CTREE_WRITES;
12711 break;
12712 case GETOPT_VAL_READONLY:
12713 readonly = 1;
12714 break;
12715 case GETOPT_VAL_INIT_CSUM:
12716 printf("Creating a new CRC tree\n");
12717 init_csum_tree = 1;
12718 repair = 1;
12719 ctree_flags |= OPEN_CTREE_WRITES;
12720 break;
12721 case GETOPT_VAL_INIT_EXTENT:
12722 init_extent_tree = 1;
12723 ctree_flags |= (OPEN_CTREE_WRITES |
12724 OPEN_CTREE_NO_BLOCK_GROUPS);
12725 repair = 1;
12726 break;
12727 case GETOPT_VAL_CHECK_CSUM:
12728 check_data_csum = 1;
12729 break;
12730 case GETOPT_VAL_MODE:
12731 check_mode = parse_check_mode(optarg);
12732 if (check_mode == CHECK_MODE_UNKNOWN) {
12733 error("unknown mode: %s", optarg);
12734 exit(1);
12736 break;
12737 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12738 if (strcmp(optarg, "v1") == 0) {
12739 clear_space_cache = 1;
12740 } else if (strcmp(optarg, "v2") == 0) {
12741 clear_space_cache = 2;
12742 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12743 } else {
12744 error(
12745 "invalid argument to --clear-space-cache, must be v1 or v2");
12746 exit(1);
12748 ctree_flags |= OPEN_CTREE_WRITES;
12749 break;
12753 if (check_argc_exact(argc - optind, 1))
12754 usage(cmd_check_usage);
12756 if (ctx.progress_enabled) {
12757 ctx.tp = TASK_NOTHING;
12758 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12761 /* This check is the only reason for --readonly to exist */
12762 if (readonly && repair) {
12763 error("repair options are not compatible with --readonly");
12764 exit(1);
12768 * Not supported yet
12770 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12771 error("low memory mode doesn't support repair yet");
12772 exit(1);
12775 radix_tree_init();
12776 cache_tree_init(&root_cache);
12778 if((ret = check_mounted(argv[optind])) < 0) {
12779 error("could not check mount status: %s", strerror(-ret));
12780 err |= !!ret;
12781 goto err_out;
12782 } else if(ret) {
12783 error("%s is currently mounted, aborting", argv[optind]);
12784 ret = -EBUSY;
12785 err |= !!ret;
12786 goto err_out;
12789 /* only allow partial opening under repair mode */
12790 if (repair)
12791 ctree_flags |= OPEN_CTREE_PARTIAL;
12793 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12794 chunk_root_bytenr, ctree_flags);
12795 if (!info) {
12796 error("cannot open file system");
12797 ret = -EIO;
12798 err |= !!ret;
12799 goto err_out;
12802 global_info = info;
12803 root = info->fs_root;
12804 if (clear_space_cache == 1) {
12805 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12806 error(
12807 "free space cache v2 detected, use --clear-space-cache v2");
12808 ret = 1;
12809 goto close_out;
12811 printf("Clearing free space cache\n");
12812 ret = clear_free_space_cache(info);
12813 if (ret) {
12814 error("failed to clear free space cache");
12815 ret = 1;
12816 } else {
12817 printf("Free space cache cleared\n");
12819 goto close_out;
12820 } else if (clear_space_cache == 2) {
12821 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12822 printf("no free space cache v2 to clear\n");
12823 ret = 0;
12824 goto close_out;
12826 printf("Clear free space cache v2\n");
12827 ret = btrfs_clear_free_space_tree(info);
12828 if (ret) {
12829 error("failed to clear free space cache v2: %d", ret);
12830 ret = 1;
12831 } else {
12832 printf("free space cache v2 cleared\n");
12834 goto close_out;
12838 * repair mode will force us to commit transaction which
12839 * will make us fail to load log tree when mounting.
12841 if (repair && btrfs_super_log_root(info->super_copy)) {
12842 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12843 if (!ret) {
12844 ret = 1;
12845 err |= !!ret;
12846 goto close_out;
12848 ret = zero_log_tree(root);
12849 err |= !!ret;
12850 if (ret) {
12851 error("failed to zero log tree: %d", ret);
12852 goto close_out;
12856 uuid_unparse(info->super_copy->fsid, uuidbuf);
12857 if (qgroup_report) {
12858 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12859 uuidbuf);
12860 ret = qgroup_verify_all(info);
12861 err |= !!ret;
12862 if (ret == 0)
12863 report_qgroups(1);
12864 goto close_out;
12866 if (subvolid) {
12867 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12868 subvolid, argv[optind], uuidbuf);
12869 ret = print_extent_state(info, subvolid);
12870 err |= !!ret;
12871 goto close_out;
12873 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12875 if (!extent_buffer_uptodate(info->tree_root->node) ||
12876 !extent_buffer_uptodate(info->dev_root->node) ||
12877 !extent_buffer_uptodate(info->chunk_root->node)) {
12878 error("critical roots corrupted, unable to check the filesystem");
12879 err |= !!ret;
12880 ret = -EIO;
12881 goto close_out;
12884 if (init_extent_tree || init_csum_tree) {
12885 struct btrfs_trans_handle *trans;
12887 trans = btrfs_start_transaction(info->extent_root, 0);
12888 if (IS_ERR(trans)) {
12889 error("error starting transaction");
12890 ret = PTR_ERR(trans);
12891 err |= !!ret;
12892 goto close_out;
12895 if (init_extent_tree) {
12896 printf("Creating a new extent tree\n");
12897 ret = reinit_extent_tree(trans, info);
12898 err |= !!ret;
12899 if (ret)
12900 goto close_out;
12903 if (init_csum_tree) {
12904 printf("Reinitialize checksum tree\n");
12905 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12906 if (ret) {
12907 error("checksum tree initialization failed: %d",
12908 ret);
12909 ret = -EIO;
12910 err |= !!ret;
12911 goto close_out;
12914 ret = fill_csum_tree(trans, info->csum_root,
12915 init_extent_tree);
12916 err |= !!ret;
12917 if (ret) {
12918 error("checksum tree refilling failed: %d", ret);
12919 return -EIO;
12923 * Ok now we commit and run the normal fsck, which will add
12924 * extent entries for all of the items it finds.
12926 ret = btrfs_commit_transaction(trans, info->extent_root);
12927 err |= !!ret;
12928 if (ret)
12929 goto close_out;
12931 if (!extent_buffer_uptodate(info->extent_root->node)) {
12932 error("critical: extent_root, unable to check the filesystem");
12933 ret = -EIO;
12934 err |= !!ret;
12935 goto close_out;
12937 if (!extent_buffer_uptodate(info->csum_root->node)) {
12938 error("critical: csum_root, unable to check the filesystem");
12939 ret = -EIO;
12940 err |= !!ret;
12941 goto close_out;
12944 if (!ctx.progress_enabled)
12945 fprintf(stderr, "checking extents\n");
12946 if (check_mode == CHECK_MODE_LOWMEM)
12947 ret = check_chunks_and_extents_v2(root);
12948 else
12949 ret = check_chunks_and_extents(root);
12950 err |= !!ret;
12951 if (ret)
12952 error(
12953 "errors found in extent allocation tree or chunk allocation");
12955 ret = repair_root_items(info);
12956 err |= !!ret;
12957 if (ret < 0) {
12958 error("failed to repair root items: %s", strerror(-ret));
12959 goto close_out;
12961 if (repair) {
12962 fprintf(stderr, "Fixed %d roots.\n", ret);
12963 ret = 0;
12964 } else if (ret > 0) {
12965 fprintf(stderr,
12966 "Found %d roots with an outdated root item.\n",
12967 ret);
12968 fprintf(stderr,
12969 "Please run a filesystem check with the option --repair to fix them.\n");
12970 ret = 1;
12971 err |= !!ret;
12972 goto close_out;
12975 if (!ctx.progress_enabled) {
12976 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12977 fprintf(stderr, "checking free space tree\n");
12978 else
12979 fprintf(stderr, "checking free space cache\n");
12981 ret = check_space_cache(root);
12982 err |= !!ret;
12983 if (ret) {
12984 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12985 error("errors found in free space tree");
12986 else
12987 error("errors found in free space cache");
12988 goto out;
12992 * We used to have to have these hole extents in between our real
12993 * extents so if we don't have this flag set we need to make sure there
12994 * are no gaps in the file extents for inodes, otherwise we can just
12995 * ignore it when this happens.
12997 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12998 if (!ctx.progress_enabled)
12999 fprintf(stderr, "checking fs roots\n");
13000 if (check_mode == CHECK_MODE_LOWMEM)
13001 ret = check_fs_roots_v2(root->fs_info);
13002 else
13003 ret = check_fs_roots(root, &root_cache);
13004 err |= !!ret;
13005 if (ret) {
13006 error("errors found in fs roots");
13007 goto out;
13010 fprintf(stderr, "checking csums\n");
13011 ret = check_csums(root);
13012 err |= !!ret;
13013 if (ret) {
13014 error("errors found in csum tree");
13015 goto out;
13018 fprintf(stderr, "checking root refs\n");
13019 /* For low memory mode, check_fs_roots_v2 handles root refs */
13020 if (check_mode != CHECK_MODE_LOWMEM) {
13021 ret = check_root_refs(root, &root_cache);
13022 err |= !!ret;
13023 if (ret) {
13024 error("errors found in root refs");
13025 goto out;
13029 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13030 struct extent_buffer *eb;
13032 eb = list_first_entry(&root->fs_info->recow_ebs,
13033 struct extent_buffer, recow);
13034 list_del_init(&eb->recow);
13035 ret = recow_extent_buffer(root, eb);
13036 err |= !!ret;
13037 if (ret) {
13038 error("fails to fix transid errors");
13039 break;
13043 while (!list_empty(&delete_items)) {
13044 struct bad_item *bad;
13046 bad = list_first_entry(&delete_items, struct bad_item, list);
13047 list_del_init(&bad->list);
13048 if (repair) {
13049 ret = delete_bad_item(root, bad);
13050 err |= !!ret;
13052 free(bad);
13055 if (info->quota_enabled) {
13056 fprintf(stderr, "checking quota groups\n");
13057 ret = qgroup_verify_all(info);
13058 err |= !!ret;
13059 if (ret) {
13060 error("failed to check quota groups");
13061 goto out;
13063 report_qgroups(0);
13064 ret = repair_qgroups(info, &qgroups_repaired);
13065 err |= !!ret;
13066 if (err) {
13067 error("failed to repair quota groups");
13068 goto out;
13070 ret = 0;
13073 if (!list_empty(&root->fs_info->recow_ebs)) {
13074 error("transid errors in file system");
13075 ret = 1;
13076 err |= !!ret;
13078 out:
13079 if (found_old_backref) { /*
13080 * there was a disk format change when mixed
13081 * backref was in testing tree. The old format
13082 * existed about one week.
13084 printf("\n * Found old mixed backref format. "
13085 "The old format is not supported! *"
13086 "\n * Please mount the FS in readonly mode, "
13087 "backup data and re-format the FS. *\n\n");
13088 err |= 1;
13090 printf("found %llu bytes used, ",
13091 (unsigned long long)bytes_used);
13092 if (err)
13093 printf("error(s) found\n");
13094 else
13095 printf("no error found\n");
13096 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13097 printf("total tree bytes: %llu\n",
13098 (unsigned long long)total_btree_bytes);
13099 printf("total fs tree bytes: %llu\n",
13100 (unsigned long long)total_fs_tree_bytes);
13101 printf("total extent tree bytes: %llu\n",
13102 (unsigned long long)total_extent_tree_bytes);
13103 printf("btree space waste bytes: %llu\n",
13104 (unsigned long long)btree_space_waste);
13105 printf("file data blocks allocated: %llu\n referenced %llu\n",
13106 (unsigned long long)data_bytes_allocated,
13107 (unsigned long long)data_bytes_referenced);
13109 free_qgroup_counts();
13110 free_root_recs_tree(&root_cache);
13111 close_out:
13112 close_ctree(root);
13113 err_out:
13114 if (ctx.progress_enabled)
13115 task_deinit(ctx.info);
13117 return err;