Btrfs-progs: Initialize stripesize to the value of sectorsize
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobec0bbfd0744c51ec973e8c148a224f3a33d4456b
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
45 enum task_position {
46 TASK_EXTENTS,
47 TASK_FREE_SPACE,
48 TASK_FS_ROOTS,
49 TASK_NOTHING, /* have to be the last element */
52 struct task_ctx {
53 int progress_enabled;
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 struct extent_backref {
79 struct list_head list;
80 unsigned int is_data:1;
81 unsigned int found_extent_tree:1;
82 unsigned int full_backref:1;
83 unsigned int found_ref:1;
84 unsigned int broken:1;
87 struct data_backref {
88 struct extent_backref node;
89 union {
90 u64 parent;
91 u64 root;
93 u64 owner;
94 u64 offset;
95 u64 disk_bytenr;
96 u64 bytes;
97 u64 ram_bytes;
98 u32 num_refs;
99 u32 found_ref;
103 * Much like data_backref, just removed the undetermined members
104 * and change it to use list_head.
105 * During extent scan, it is stored in root->orphan_data_extent.
106 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
108 struct orphan_data_extent {
109 struct list_head list;
110 u64 root;
111 u64 objectid;
112 u64 offset;
113 u64 disk_bytenr;
114 u64 disk_len;
117 struct tree_backref {
118 struct extent_backref node;
119 union {
120 u64 parent;
121 u64 root;
125 /* Explicit initialization for extent_record::flag_block_full_backref */
126 enum { FLAG_UNSET = 2 };
128 struct extent_record {
129 struct list_head backrefs;
130 struct list_head dups;
131 struct list_head list;
132 struct cache_extent cache;
133 struct btrfs_disk_key parent_key;
134 u64 start;
135 u64 max_size;
136 u64 nr;
137 u64 refs;
138 u64 extent_item_refs;
139 u64 generation;
140 u64 parent_generation;
141 u64 info_objectid;
142 u32 num_duplicates;
143 u8 info_level;
144 unsigned int flag_block_full_backref:2;
145 unsigned int found_rec:1;
146 unsigned int content_checked:1;
147 unsigned int owner_ref_checked:1;
148 unsigned int is_root:1;
149 unsigned int metadata:1;
150 unsigned int bad_full_backref:1;
151 unsigned int crossing_stripes:1;
152 unsigned int wrong_chunk_type:1;
155 struct inode_backref {
156 struct list_head list;
157 unsigned int found_dir_item:1;
158 unsigned int found_dir_index:1;
159 unsigned int found_inode_ref:1;
160 unsigned int filetype:8;
161 int errors;
162 unsigned int ref_type;
163 u64 dir;
164 u64 index;
165 u16 namelen;
166 char name[0];
169 struct root_item_record {
170 struct list_head list;
171 u64 objectid;
172 u64 bytenr;
173 u64 last_snapshot;
174 u8 level;
175 u8 drop_level;
176 int level_size;
177 struct btrfs_key drop_key;
180 #define REF_ERR_NO_DIR_ITEM (1 << 0)
181 #define REF_ERR_NO_DIR_INDEX (1 << 1)
182 #define REF_ERR_NO_INODE_REF (1 << 2)
183 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
184 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
185 #define REF_ERR_DUP_INODE_REF (1 << 5)
186 #define REF_ERR_INDEX_UNMATCH (1 << 6)
187 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
188 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
189 #define REF_ERR_NO_ROOT_REF (1 << 9)
190 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
191 #define REF_ERR_DUP_ROOT_REF (1 << 11)
192 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
194 struct file_extent_hole {
195 struct rb_node node;
196 u64 start;
197 u64 len;
200 struct inode_record {
201 struct list_head backrefs;
202 unsigned int checked:1;
203 unsigned int merging:1;
204 unsigned int found_inode_item:1;
205 unsigned int found_dir_item:1;
206 unsigned int found_file_extent:1;
207 unsigned int found_csum_item:1;
208 unsigned int some_csum_missing:1;
209 unsigned int nodatasum:1;
210 int errors;
212 u64 ino;
213 u32 nlink;
214 u32 imode;
215 u64 isize;
216 u64 nbytes;
218 u32 found_link;
219 u64 found_size;
220 u64 extent_start;
221 u64 extent_end;
222 struct rb_root holes;
223 struct list_head orphan_extents;
225 u32 refs;
228 #define I_ERR_NO_INODE_ITEM (1 << 0)
229 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
230 #define I_ERR_DUP_INODE_ITEM (1 << 2)
231 #define I_ERR_DUP_DIR_INDEX (1 << 3)
232 #define I_ERR_ODD_DIR_ITEM (1 << 4)
233 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
234 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
235 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
236 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
237 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
238 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
239 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
240 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
241 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
242 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
244 struct root_backref {
245 struct list_head list;
246 unsigned int found_dir_item:1;
247 unsigned int found_dir_index:1;
248 unsigned int found_back_ref:1;
249 unsigned int found_forward_ref:1;
250 unsigned int reachable:1;
251 int errors;
252 u64 ref_root;
253 u64 dir;
254 u64 index;
255 u16 namelen;
256 char name[0];
259 struct root_record {
260 struct list_head backrefs;
261 struct cache_extent cache;
262 unsigned int found_root_item:1;
263 u64 objectid;
264 u32 found_ref;
267 struct ptr_node {
268 struct cache_extent cache;
269 void *data;
272 struct shared_node {
273 struct cache_extent cache;
274 struct cache_tree root_cache;
275 struct cache_tree inode_cache;
276 struct inode_record *current;
277 u32 refs;
280 struct block_info {
281 u64 start;
282 u32 size;
285 struct walk_control {
286 struct cache_tree shared;
287 struct shared_node *nodes[BTRFS_MAX_LEVEL];
288 int active_node;
289 int root_level;
292 struct bad_item {
293 struct btrfs_key key;
294 u64 root_id;
295 struct list_head list;
298 struct extent_entry {
299 u64 bytenr;
300 u64 bytes;
301 int count;
302 int broken;
303 struct list_head list;
306 struct root_item_info {
307 /* level of the root */
308 u8 level;
309 /* number of nodes at this level, must be 1 for a root */
310 int node_count;
311 u64 bytenr;
312 u64 gen;
313 struct cache_extent cache_extent;
316 static void *print_status_check(void *p)
318 struct task_ctx *priv = p;
319 const char work_indicator[] = { '.', 'o', 'O', 'o' };
320 uint32_t count = 0;
321 static char *task_position_string[] = {
322 "checking extents",
323 "checking free space cache",
324 "checking fs roots",
327 task_period_start(priv->info, 1000 /* 1s */);
329 if (priv->tp == TASK_NOTHING)
330 return NULL;
332 while (1) {
333 printf("%s [%c]\r", task_position_string[priv->tp],
334 work_indicator[count % 4]);
335 count++;
336 fflush(stdout);
337 task_period_wait(priv->info);
339 return NULL;
342 static int print_status_return(void *p)
344 printf("\n");
345 fflush(stdout);
347 return 0;
350 /* Compatible function to allow reuse of old codes */
351 static u64 first_extent_gap(struct rb_root *holes)
353 struct file_extent_hole *hole;
355 if (RB_EMPTY_ROOT(holes))
356 return (u64)-1;
358 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
359 return hole->start;
362 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
364 struct file_extent_hole *hole1;
365 struct file_extent_hole *hole2;
367 hole1 = rb_entry(node1, struct file_extent_hole, node);
368 hole2 = rb_entry(node2, struct file_extent_hole, node);
370 if (hole1->start > hole2->start)
371 return -1;
372 if (hole1->start < hole2->start)
373 return 1;
374 /* Now hole1->start == hole2->start */
375 if (hole1->len >= hole2->len)
377 * Hole 1 will be merge center
378 * Same hole will be merged later
380 return -1;
381 /* Hole 2 will be merge center */
382 return 1;
386 * Add a hole to the record
388 * This will do hole merge for copy_file_extent_holes(),
389 * which will ensure there won't be continuous holes.
391 static int add_file_extent_hole(struct rb_root *holes,
392 u64 start, u64 len)
394 struct file_extent_hole *hole;
395 struct file_extent_hole *prev = NULL;
396 struct file_extent_hole *next = NULL;
398 hole = malloc(sizeof(*hole));
399 if (!hole)
400 return -ENOMEM;
401 hole->start = start;
402 hole->len = len;
403 /* Since compare will not return 0, no -EEXIST will happen */
404 rb_insert(holes, &hole->node, compare_hole);
406 /* simple merge with previous hole */
407 if (rb_prev(&hole->node))
408 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
409 node);
410 if (prev && prev->start + prev->len >= hole->start) {
411 hole->len = hole->start + hole->len - prev->start;
412 hole->start = prev->start;
413 rb_erase(&prev->node, holes);
414 free(prev);
415 prev = NULL;
418 /* iterate merge with next holes */
419 while (1) {
420 if (!rb_next(&hole->node))
421 break;
422 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
423 node);
424 if (hole->start + hole->len >= next->start) {
425 if (hole->start + hole->len <= next->start + next->len)
426 hole->len = next->start + next->len -
427 hole->start;
428 rb_erase(&next->node, holes);
429 free(next);
430 next = NULL;
431 } else
432 break;
434 return 0;
437 static int compare_hole_range(struct rb_node *node, void *data)
439 struct file_extent_hole *hole;
440 u64 start;
442 hole = (struct file_extent_hole *)data;
443 start = hole->start;
445 hole = rb_entry(node, struct file_extent_hole, node);
446 if (start < hole->start)
447 return -1;
448 if (start >= hole->start && start < hole->start + hole->len)
449 return 0;
450 return 1;
454 * Delete a hole in the record
456 * This will do the hole split and is much restrict than add.
458 static int del_file_extent_hole(struct rb_root *holes,
459 u64 start, u64 len)
461 struct file_extent_hole *hole;
462 struct file_extent_hole tmp;
463 u64 prev_start = 0;
464 u64 prev_len = 0;
465 u64 next_start = 0;
466 u64 next_len = 0;
467 struct rb_node *node;
468 int have_prev = 0;
469 int have_next = 0;
470 int ret = 0;
472 tmp.start = start;
473 tmp.len = len;
474 node = rb_search(holes, &tmp, compare_hole_range, NULL);
475 if (!node)
476 return -EEXIST;
477 hole = rb_entry(node, struct file_extent_hole, node);
478 if (start + len > hole->start + hole->len)
479 return -EEXIST;
482 * Now there will be no overlap, delete the hole and re-add the
483 * split(s) if they exists.
485 if (start > hole->start) {
486 prev_start = hole->start;
487 prev_len = start - hole->start;
488 have_prev = 1;
490 if (hole->start + hole->len > start + len) {
491 next_start = start + len;
492 next_len = hole->start + hole->len - start - len;
493 have_next = 1;
495 rb_erase(node, holes);
496 free(hole);
497 if (have_prev) {
498 ret = add_file_extent_hole(holes, prev_start, prev_len);
499 if (ret < 0)
500 return ret;
502 if (have_next) {
503 ret = add_file_extent_hole(holes, next_start, next_len);
504 if (ret < 0)
505 return ret;
507 return 0;
510 static int copy_file_extent_holes(struct rb_root *dst,
511 struct rb_root *src)
513 struct file_extent_hole *hole;
514 struct rb_node *node;
515 int ret = 0;
517 node = rb_first(src);
518 while (node) {
519 hole = rb_entry(node, struct file_extent_hole, node);
520 ret = add_file_extent_hole(dst, hole->start, hole->len);
521 if (ret)
522 break;
523 node = rb_next(node);
525 return ret;
528 static void free_file_extent_holes(struct rb_root *holes)
530 struct rb_node *node;
531 struct file_extent_hole *hole;
533 node = rb_first(holes);
534 while (node) {
535 hole = rb_entry(node, struct file_extent_hole, node);
536 rb_erase(node, holes);
537 free(hole);
538 node = rb_first(holes);
542 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
544 static void record_root_in_trans(struct btrfs_trans_handle *trans,
545 struct btrfs_root *root)
547 if (root->last_trans != trans->transid) {
548 root->track_dirty = 1;
549 root->last_trans = trans->transid;
550 root->commit_root = root->node;
551 extent_buffer_get(root->node);
555 static u8 imode_to_type(u32 imode)
557 #define S_SHIFT 12
558 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
559 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
560 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
561 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
562 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
563 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
564 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
565 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
568 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
569 #undef S_SHIFT
572 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
574 struct device_record *rec1;
575 struct device_record *rec2;
577 rec1 = rb_entry(node1, struct device_record, node);
578 rec2 = rb_entry(node2, struct device_record, node);
579 if (rec1->devid > rec2->devid)
580 return -1;
581 else if (rec1->devid < rec2->devid)
582 return 1;
583 else
584 return 0;
587 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
589 struct inode_record *rec;
590 struct inode_backref *backref;
591 struct inode_backref *orig;
592 struct inode_backref *tmp;
593 struct orphan_data_extent *src_orphan;
594 struct orphan_data_extent *dst_orphan;
595 size_t size;
596 int ret;
598 rec = malloc(sizeof(*rec));
599 if (!rec)
600 return ERR_PTR(-ENOMEM);
601 memcpy(rec, orig_rec, sizeof(*rec));
602 rec->refs = 1;
603 INIT_LIST_HEAD(&rec->backrefs);
604 INIT_LIST_HEAD(&rec->orphan_extents);
605 rec->holes = RB_ROOT;
607 list_for_each_entry(orig, &orig_rec->backrefs, list) {
608 size = sizeof(*orig) + orig->namelen + 1;
609 backref = malloc(size);
610 if (!backref) {
611 ret = -ENOMEM;
612 goto cleanup;
614 memcpy(backref, orig, size);
615 list_add_tail(&backref->list, &rec->backrefs);
617 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
618 dst_orphan = malloc(sizeof(*dst_orphan));
619 if (!dst_orphan) {
620 ret = -ENOMEM;
621 goto cleanup;
623 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
624 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
626 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
627 BUG_ON(ret < 0);
629 return rec;
631 cleanup:
632 if (!list_empty(&rec->backrefs))
633 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
634 list_del(&orig->list);
635 free(orig);
638 if (!list_empty(&rec->orphan_extents))
639 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
640 list_del(&orig->list);
641 free(orig);
644 free(rec);
646 return ERR_PTR(ret);
649 static void print_orphan_data_extents(struct list_head *orphan_extents,
650 u64 objectid)
652 struct orphan_data_extent *orphan;
654 if (list_empty(orphan_extents))
655 return;
656 printf("The following data extent is lost in tree %llu:\n",
657 objectid);
658 list_for_each_entry(orphan, orphan_extents, list) {
659 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
660 orphan->objectid, orphan->offset, orphan->disk_bytenr,
661 orphan->disk_len);
665 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
667 u64 root_objectid = root->root_key.objectid;
668 int errors = rec->errors;
670 if (!errors)
671 return;
672 /* reloc root errors, we print its corresponding fs root objectid*/
673 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
674 root_objectid = root->root_key.offset;
675 fprintf(stderr, "reloc");
677 fprintf(stderr, "root %llu inode %llu errors %x",
678 (unsigned long long) root_objectid,
679 (unsigned long long) rec->ino, rec->errors);
681 if (errors & I_ERR_NO_INODE_ITEM)
682 fprintf(stderr, ", no inode item");
683 if (errors & I_ERR_NO_ORPHAN_ITEM)
684 fprintf(stderr, ", no orphan item");
685 if (errors & I_ERR_DUP_INODE_ITEM)
686 fprintf(stderr, ", dup inode item");
687 if (errors & I_ERR_DUP_DIR_INDEX)
688 fprintf(stderr, ", dup dir index");
689 if (errors & I_ERR_ODD_DIR_ITEM)
690 fprintf(stderr, ", odd dir item");
691 if (errors & I_ERR_ODD_FILE_EXTENT)
692 fprintf(stderr, ", odd file extent");
693 if (errors & I_ERR_BAD_FILE_EXTENT)
694 fprintf(stderr, ", bad file extent");
695 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
696 fprintf(stderr, ", file extent overlap");
697 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
698 fprintf(stderr, ", file extent discount");
699 if (errors & I_ERR_DIR_ISIZE_WRONG)
700 fprintf(stderr, ", dir isize wrong");
701 if (errors & I_ERR_FILE_NBYTES_WRONG)
702 fprintf(stderr, ", nbytes wrong");
703 if (errors & I_ERR_ODD_CSUM_ITEM)
704 fprintf(stderr, ", odd csum item");
705 if (errors & I_ERR_SOME_CSUM_MISSING)
706 fprintf(stderr, ", some csum missing");
707 if (errors & I_ERR_LINK_COUNT_WRONG)
708 fprintf(stderr, ", link count wrong");
709 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
710 fprintf(stderr, ", orphan file extent");
711 fprintf(stderr, "\n");
712 /* Print the orphan extents if needed */
713 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
714 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
716 /* Print the holes if needed */
717 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
718 struct file_extent_hole *hole;
719 struct rb_node *node;
720 int found = 0;
722 node = rb_first(&rec->holes);
723 fprintf(stderr, "Found file extent holes:\n");
724 while (node) {
725 found = 1;
726 hole = rb_entry(node, struct file_extent_hole, node);
727 fprintf(stderr, "\tstart: %llu, len: %llu\n",
728 hole->start, hole->len);
729 node = rb_next(node);
731 if (!found)
732 fprintf(stderr, "\tstart: 0, len: %llu\n",
733 round_up(rec->isize, root->sectorsize));
737 static void print_ref_error(int errors)
739 if (errors & REF_ERR_NO_DIR_ITEM)
740 fprintf(stderr, ", no dir item");
741 if (errors & REF_ERR_NO_DIR_INDEX)
742 fprintf(stderr, ", no dir index");
743 if (errors & REF_ERR_NO_INODE_REF)
744 fprintf(stderr, ", no inode ref");
745 if (errors & REF_ERR_DUP_DIR_ITEM)
746 fprintf(stderr, ", dup dir item");
747 if (errors & REF_ERR_DUP_DIR_INDEX)
748 fprintf(stderr, ", dup dir index");
749 if (errors & REF_ERR_DUP_INODE_REF)
750 fprintf(stderr, ", dup inode ref");
751 if (errors & REF_ERR_INDEX_UNMATCH)
752 fprintf(stderr, ", index mismatch");
753 if (errors & REF_ERR_FILETYPE_UNMATCH)
754 fprintf(stderr, ", filetype mismatch");
755 if (errors & REF_ERR_NAME_TOO_LONG)
756 fprintf(stderr, ", name too long");
757 if (errors & REF_ERR_NO_ROOT_REF)
758 fprintf(stderr, ", no root ref");
759 if (errors & REF_ERR_NO_ROOT_BACKREF)
760 fprintf(stderr, ", no root backref");
761 if (errors & REF_ERR_DUP_ROOT_REF)
762 fprintf(stderr, ", dup root ref");
763 if (errors & REF_ERR_DUP_ROOT_BACKREF)
764 fprintf(stderr, ", dup root backref");
765 fprintf(stderr, "\n");
768 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
769 u64 ino, int mod)
771 struct ptr_node *node;
772 struct cache_extent *cache;
773 struct inode_record *rec = NULL;
774 int ret;
776 cache = lookup_cache_extent(inode_cache, ino, 1);
777 if (cache) {
778 node = container_of(cache, struct ptr_node, cache);
779 rec = node->data;
780 if (mod && rec->refs > 1) {
781 node->data = clone_inode_rec(rec);
782 if (IS_ERR(node->data))
783 return node->data;
784 rec->refs--;
785 rec = node->data;
787 } else if (mod) {
788 rec = calloc(1, sizeof(*rec));
789 if (!rec)
790 return ERR_PTR(-ENOMEM);
791 rec->ino = ino;
792 rec->extent_start = (u64)-1;
793 rec->refs = 1;
794 INIT_LIST_HEAD(&rec->backrefs);
795 INIT_LIST_HEAD(&rec->orphan_extents);
796 rec->holes = RB_ROOT;
798 node = malloc(sizeof(*node));
799 if (!node) {
800 free(rec);
801 return ERR_PTR(-ENOMEM);
803 node->cache.start = ino;
804 node->cache.size = 1;
805 node->data = rec;
807 if (ino == BTRFS_FREE_INO_OBJECTID)
808 rec->found_link = 1;
810 ret = insert_cache_extent(inode_cache, &node->cache);
811 if (ret)
812 return ERR_PTR(-EEXIST);
814 return rec;
817 static void free_orphan_data_extents(struct list_head *orphan_extents)
819 struct orphan_data_extent *orphan;
821 while (!list_empty(orphan_extents)) {
822 orphan = list_entry(orphan_extents->next,
823 struct orphan_data_extent, list);
824 list_del(&orphan->list);
825 free(orphan);
829 static void free_inode_rec(struct inode_record *rec)
831 struct inode_backref *backref;
833 if (--rec->refs > 0)
834 return;
836 while (!list_empty(&rec->backrefs)) {
837 backref = list_entry(rec->backrefs.next,
838 struct inode_backref, list);
839 list_del(&backref->list);
840 free(backref);
842 free_orphan_data_extents(&rec->orphan_extents);
843 free_file_extent_holes(&rec->holes);
844 free(rec);
847 static int can_free_inode_rec(struct inode_record *rec)
849 if (!rec->errors && rec->checked && rec->found_inode_item &&
850 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
851 return 1;
852 return 0;
855 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
856 struct inode_record *rec)
858 struct cache_extent *cache;
859 struct inode_backref *tmp, *backref;
860 struct ptr_node *node;
861 unsigned char filetype;
863 if (!rec->found_inode_item)
864 return;
866 filetype = imode_to_type(rec->imode);
867 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
868 if (backref->found_dir_item && backref->found_dir_index) {
869 if (backref->filetype != filetype)
870 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
871 if (!backref->errors && backref->found_inode_ref &&
872 rec->nlink == rec->found_link) {
873 list_del(&backref->list);
874 free(backref);
879 if (!rec->checked || rec->merging)
880 return;
882 if (S_ISDIR(rec->imode)) {
883 if (rec->found_size != rec->isize)
884 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
885 if (rec->found_file_extent)
886 rec->errors |= I_ERR_ODD_FILE_EXTENT;
887 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
888 if (rec->found_dir_item)
889 rec->errors |= I_ERR_ODD_DIR_ITEM;
890 if (rec->found_size != rec->nbytes)
891 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
892 if (rec->nlink > 0 && !no_holes &&
893 (rec->extent_end < rec->isize ||
894 first_extent_gap(&rec->holes) < rec->isize))
895 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
898 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
899 if (rec->found_csum_item && rec->nodatasum)
900 rec->errors |= I_ERR_ODD_CSUM_ITEM;
901 if (rec->some_csum_missing && !rec->nodatasum)
902 rec->errors |= I_ERR_SOME_CSUM_MISSING;
905 BUG_ON(rec->refs != 1);
906 if (can_free_inode_rec(rec)) {
907 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
908 node = container_of(cache, struct ptr_node, cache);
909 BUG_ON(node->data != rec);
910 remove_cache_extent(inode_cache, &node->cache);
911 free(node);
912 free_inode_rec(rec);
916 static int check_orphan_item(struct btrfs_root *root, u64 ino)
918 struct btrfs_path path;
919 struct btrfs_key key;
920 int ret;
922 key.objectid = BTRFS_ORPHAN_OBJECTID;
923 key.type = BTRFS_ORPHAN_ITEM_KEY;
924 key.offset = ino;
926 btrfs_init_path(&path);
927 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
928 btrfs_release_path(&path);
929 if (ret > 0)
930 ret = -ENOENT;
931 return ret;
934 static int process_inode_item(struct extent_buffer *eb,
935 int slot, struct btrfs_key *key,
936 struct shared_node *active_node)
938 struct inode_record *rec;
939 struct btrfs_inode_item *item;
941 rec = active_node->current;
942 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
943 if (rec->found_inode_item) {
944 rec->errors |= I_ERR_DUP_INODE_ITEM;
945 return 1;
947 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
948 rec->nlink = btrfs_inode_nlink(eb, item);
949 rec->isize = btrfs_inode_size(eb, item);
950 rec->nbytes = btrfs_inode_nbytes(eb, item);
951 rec->imode = btrfs_inode_mode(eb, item);
952 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
953 rec->nodatasum = 1;
954 rec->found_inode_item = 1;
955 if (rec->nlink == 0)
956 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
957 maybe_free_inode_rec(&active_node->inode_cache, rec);
958 return 0;
961 static struct inode_backref *get_inode_backref(struct inode_record *rec,
962 const char *name,
963 int namelen, u64 dir)
965 struct inode_backref *backref;
967 list_for_each_entry(backref, &rec->backrefs, list) {
968 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
969 break;
970 if (backref->dir != dir || backref->namelen != namelen)
971 continue;
972 if (memcmp(name, backref->name, namelen))
973 continue;
974 return backref;
977 backref = malloc(sizeof(*backref) + namelen + 1);
978 if (!backref)
979 return NULL;
980 memset(backref, 0, sizeof(*backref));
981 backref->dir = dir;
982 backref->namelen = namelen;
983 memcpy(backref->name, name, namelen);
984 backref->name[namelen] = '\0';
985 list_add_tail(&backref->list, &rec->backrefs);
986 return backref;
989 static int add_inode_backref(struct cache_tree *inode_cache,
990 u64 ino, u64 dir, u64 index,
991 const char *name, int namelen,
992 int filetype, int itemtype, int errors)
994 struct inode_record *rec;
995 struct inode_backref *backref;
997 rec = get_inode_rec(inode_cache, ino, 1);
998 BUG_ON(IS_ERR(rec));
999 backref = get_inode_backref(rec, name, namelen, dir);
1000 BUG_ON(!backref);
1001 if (errors)
1002 backref->errors |= errors;
1003 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1004 if (backref->found_dir_index)
1005 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1006 if (backref->found_inode_ref && backref->index != index)
1007 backref->errors |= REF_ERR_INDEX_UNMATCH;
1008 if (backref->found_dir_item && backref->filetype != filetype)
1009 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1011 backref->index = index;
1012 backref->filetype = filetype;
1013 backref->found_dir_index = 1;
1014 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1015 rec->found_link++;
1016 if (backref->found_dir_item)
1017 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1018 if (backref->found_dir_index && backref->filetype != filetype)
1019 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1021 backref->filetype = filetype;
1022 backref->found_dir_item = 1;
1023 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1024 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1025 if (backref->found_inode_ref)
1026 backref->errors |= REF_ERR_DUP_INODE_REF;
1027 if (backref->found_dir_index && backref->index != index)
1028 backref->errors |= REF_ERR_INDEX_UNMATCH;
1029 else
1030 backref->index = index;
1032 backref->ref_type = itemtype;
1033 backref->found_inode_ref = 1;
1034 } else {
1035 BUG_ON(1);
1038 maybe_free_inode_rec(inode_cache, rec);
1039 return 0;
1042 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1043 struct cache_tree *dst_cache)
1045 struct inode_backref *backref;
1046 u32 dir_count = 0;
1047 int ret = 0;
1049 dst->merging = 1;
1050 list_for_each_entry(backref, &src->backrefs, list) {
1051 if (backref->found_dir_index) {
1052 add_inode_backref(dst_cache, dst->ino, backref->dir,
1053 backref->index, backref->name,
1054 backref->namelen, backref->filetype,
1055 BTRFS_DIR_INDEX_KEY, backref->errors);
1057 if (backref->found_dir_item) {
1058 dir_count++;
1059 add_inode_backref(dst_cache, dst->ino,
1060 backref->dir, 0, backref->name,
1061 backref->namelen, backref->filetype,
1062 BTRFS_DIR_ITEM_KEY, backref->errors);
1064 if (backref->found_inode_ref) {
1065 add_inode_backref(dst_cache, dst->ino,
1066 backref->dir, backref->index,
1067 backref->name, backref->namelen, 0,
1068 backref->ref_type, backref->errors);
1072 if (src->found_dir_item)
1073 dst->found_dir_item = 1;
1074 if (src->found_file_extent)
1075 dst->found_file_extent = 1;
1076 if (src->found_csum_item)
1077 dst->found_csum_item = 1;
1078 if (src->some_csum_missing)
1079 dst->some_csum_missing = 1;
1080 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1081 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1082 if (ret < 0)
1083 return ret;
1086 BUG_ON(src->found_link < dir_count);
1087 dst->found_link += src->found_link - dir_count;
1088 dst->found_size += src->found_size;
1089 if (src->extent_start != (u64)-1) {
1090 if (dst->extent_start == (u64)-1) {
1091 dst->extent_start = src->extent_start;
1092 dst->extent_end = src->extent_end;
1093 } else {
1094 if (dst->extent_end > src->extent_start)
1095 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1096 else if (dst->extent_end < src->extent_start) {
1097 ret = add_file_extent_hole(&dst->holes,
1098 dst->extent_end,
1099 src->extent_start - dst->extent_end);
1101 if (dst->extent_end < src->extent_end)
1102 dst->extent_end = src->extent_end;
1106 dst->errors |= src->errors;
1107 if (src->found_inode_item) {
1108 if (!dst->found_inode_item) {
1109 dst->nlink = src->nlink;
1110 dst->isize = src->isize;
1111 dst->nbytes = src->nbytes;
1112 dst->imode = src->imode;
1113 dst->nodatasum = src->nodatasum;
1114 dst->found_inode_item = 1;
1115 } else {
1116 dst->errors |= I_ERR_DUP_INODE_ITEM;
1119 dst->merging = 0;
1121 return 0;
1124 static int splice_shared_node(struct shared_node *src_node,
1125 struct shared_node *dst_node)
1127 struct cache_extent *cache;
1128 struct ptr_node *node, *ins;
1129 struct cache_tree *src, *dst;
1130 struct inode_record *rec, *conflict;
1131 u64 current_ino = 0;
1132 int splice = 0;
1133 int ret;
1135 if (--src_node->refs == 0)
1136 splice = 1;
1137 if (src_node->current)
1138 current_ino = src_node->current->ino;
1140 src = &src_node->root_cache;
1141 dst = &dst_node->root_cache;
1142 again:
1143 cache = search_cache_extent(src, 0);
1144 while (cache) {
1145 node = container_of(cache, struct ptr_node, cache);
1146 rec = node->data;
1147 cache = next_cache_extent(cache);
1149 if (splice) {
1150 remove_cache_extent(src, &node->cache);
1151 ins = node;
1152 } else {
1153 ins = malloc(sizeof(*ins));
1154 BUG_ON(!ins);
1155 ins->cache.start = node->cache.start;
1156 ins->cache.size = node->cache.size;
1157 ins->data = rec;
1158 rec->refs++;
1160 ret = insert_cache_extent(dst, &ins->cache);
1161 if (ret == -EEXIST) {
1162 conflict = get_inode_rec(dst, rec->ino, 1);
1163 BUG_ON(IS_ERR(conflict));
1164 merge_inode_recs(rec, conflict, dst);
1165 if (rec->checked) {
1166 conflict->checked = 1;
1167 if (dst_node->current == conflict)
1168 dst_node->current = NULL;
1170 maybe_free_inode_rec(dst, conflict);
1171 free_inode_rec(rec);
1172 free(ins);
1173 } else {
1174 BUG_ON(ret);
1178 if (src == &src_node->root_cache) {
1179 src = &src_node->inode_cache;
1180 dst = &dst_node->inode_cache;
1181 goto again;
1184 if (current_ino > 0 && (!dst_node->current ||
1185 current_ino > dst_node->current->ino)) {
1186 if (dst_node->current) {
1187 dst_node->current->checked = 1;
1188 maybe_free_inode_rec(dst, dst_node->current);
1190 dst_node->current = get_inode_rec(dst, current_ino, 1);
1191 BUG_ON(IS_ERR(dst_node->current));
1193 return 0;
1196 static void free_inode_ptr(struct cache_extent *cache)
1198 struct ptr_node *node;
1199 struct inode_record *rec;
1201 node = container_of(cache, struct ptr_node, cache);
1202 rec = node->data;
1203 free_inode_rec(rec);
1204 free(node);
1207 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1209 static struct shared_node *find_shared_node(struct cache_tree *shared,
1210 u64 bytenr)
1212 struct cache_extent *cache;
1213 struct shared_node *node;
1215 cache = lookup_cache_extent(shared, bytenr, 1);
1216 if (cache) {
1217 node = container_of(cache, struct shared_node, cache);
1218 return node;
1220 return NULL;
1223 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1225 int ret;
1226 struct shared_node *node;
1228 node = calloc(1, sizeof(*node));
1229 if (!node)
1230 return -ENOMEM;
1231 node->cache.start = bytenr;
1232 node->cache.size = 1;
1233 cache_tree_init(&node->root_cache);
1234 cache_tree_init(&node->inode_cache);
1235 node->refs = refs;
1237 ret = insert_cache_extent(shared, &node->cache);
1239 return ret;
1242 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1243 struct walk_control *wc, int level)
1245 struct shared_node *node;
1246 struct shared_node *dest;
1247 int ret;
1249 if (level == wc->active_node)
1250 return 0;
1252 BUG_ON(wc->active_node <= level);
1253 node = find_shared_node(&wc->shared, bytenr);
1254 if (!node) {
1255 ret = add_shared_node(&wc->shared, bytenr, refs);
1256 BUG_ON(ret);
1257 node = find_shared_node(&wc->shared, bytenr);
1258 wc->nodes[level] = node;
1259 wc->active_node = level;
1260 return 0;
1263 if (wc->root_level == wc->active_node &&
1264 btrfs_root_refs(&root->root_item) == 0) {
1265 if (--node->refs == 0) {
1266 free_inode_recs_tree(&node->root_cache);
1267 free_inode_recs_tree(&node->inode_cache);
1268 remove_cache_extent(&wc->shared, &node->cache);
1269 free(node);
1271 return 1;
1274 dest = wc->nodes[wc->active_node];
1275 splice_shared_node(node, dest);
1276 if (node->refs == 0) {
1277 remove_cache_extent(&wc->shared, &node->cache);
1278 free(node);
1280 return 1;
1283 static int leave_shared_node(struct btrfs_root *root,
1284 struct walk_control *wc, int level)
1286 struct shared_node *node;
1287 struct shared_node *dest;
1288 int i;
1290 if (level == wc->root_level)
1291 return 0;
1293 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1294 if (wc->nodes[i])
1295 break;
1297 BUG_ON(i >= BTRFS_MAX_LEVEL);
1299 node = wc->nodes[wc->active_node];
1300 wc->nodes[wc->active_node] = NULL;
1301 wc->active_node = i;
1303 dest = wc->nodes[wc->active_node];
1304 if (wc->active_node < wc->root_level ||
1305 btrfs_root_refs(&root->root_item) > 0) {
1306 BUG_ON(node->refs <= 1);
1307 splice_shared_node(node, dest);
1308 } else {
1309 BUG_ON(node->refs < 2);
1310 node->refs--;
1312 return 0;
1316 * Returns:
1317 * < 0 - on error
1318 * 1 - if the root with id child_root_id is a child of root parent_root_id
1319 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1320 * has other root(s) as parent(s)
1321 * 2 - if the root child_root_id doesn't have any parent roots
1323 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1324 u64 child_root_id)
1326 struct btrfs_path path;
1327 struct btrfs_key key;
1328 struct extent_buffer *leaf;
1329 int has_parent = 0;
1330 int ret;
1332 btrfs_init_path(&path);
1334 key.objectid = parent_root_id;
1335 key.type = BTRFS_ROOT_REF_KEY;
1336 key.offset = child_root_id;
1337 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1338 0, 0);
1339 if (ret < 0)
1340 return ret;
1341 btrfs_release_path(&path);
1342 if (!ret)
1343 return 1;
1345 key.objectid = child_root_id;
1346 key.type = BTRFS_ROOT_BACKREF_KEY;
1347 key.offset = 0;
1348 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1349 0, 0);
1350 if (ret < 0)
1351 goto out;
1353 while (1) {
1354 leaf = path.nodes[0];
1355 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1356 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1357 if (ret)
1358 break;
1359 leaf = path.nodes[0];
1362 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1363 if (key.objectid != child_root_id ||
1364 key.type != BTRFS_ROOT_BACKREF_KEY)
1365 break;
1367 has_parent = 1;
1369 if (key.offset == parent_root_id) {
1370 btrfs_release_path(&path);
1371 return 1;
1374 path.slots[0]++;
1376 out:
1377 btrfs_release_path(&path);
1378 if (ret < 0)
1379 return ret;
1380 return has_parent ? 0 : 2;
1383 static int process_dir_item(struct btrfs_root *root,
1384 struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1388 u32 total;
1389 u32 cur = 0;
1390 u32 len;
1391 u32 name_len;
1392 u32 data_len;
1393 int error;
1394 int nritems = 0;
1395 int filetype;
1396 struct btrfs_dir_item *di;
1397 struct inode_record *rec;
1398 struct cache_tree *root_cache;
1399 struct cache_tree *inode_cache;
1400 struct btrfs_key location;
1401 char namebuf[BTRFS_NAME_LEN];
1403 root_cache = &active_node->root_cache;
1404 inode_cache = &active_node->inode_cache;
1405 rec = active_node->current;
1406 rec->found_dir_item = 1;
1408 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1409 total = btrfs_item_size_nr(eb, slot);
1410 while (cur < total) {
1411 nritems++;
1412 btrfs_dir_item_key_to_cpu(eb, di, &location);
1413 name_len = btrfs_dir_name_len(eb, di);
1414 data_len = btrfs_dir_data_len(eb, di);
1415 filetype = btrfs_dir_type(eb, di);
1417 rec->found_size += name_len;
1418 if (name_len <= BTRFS_NAME_LEN) {
1419 len = name_len;
1420 error = 0;
1421 } else {
1422 len = BTRFS_NAME_LEN;
1423 error = REF_ERR_NAME_TOO_LONG;
1425 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1427 if (location.type == BTRFS_INODE_ITEM_KEY) {
1428 add_inode_backref(inode_cache, location.objectid,
1429 key->objectid, key->offset, namebuf,
1430 len, filetype, key->type, error);
1431 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1432 add_inode_backref(root_cache, location.objectid,
1433 key->objectid, key->offset,
1434 namebuf, len, filetype,
1435 key->type, error);
1436 } else {
1437 fprintf(stderr, "invalid location in dir item %u\n",
1438 location.type);
1439 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1440 key->objectid, key->offset, namebuf,
1441 len, filetype, key->type, error);
1444 len = sizeof(*di) + name_len + data_len;
1445 di = (struct btrfs_dir_item *)((char *)di + len);
1446 cur += len;
1448 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1449 rec->errors |= I_ERR_DUP_DIR_INDEX;
1451 return 0;
1454 static int process_inode_ref(struct extent_buffer *eb,
1455 int slot, struct btrfs_key *key,
1456 struct shared_node *active_node)
1458 u32 total;
1459 u32 cur = 0;
1460 u32 len;
1461 u32 name_len;
1462 u64 index;
1463 int error;
1464 struct cache_tree *inode_cache;
1465 struct btrfs_inode_ref *ref;
1466 char namebuf[BTRFS_NAME_LEN];
1468 inode_cache = &active_node->inode_cache;
1470 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1471 total = btrfs_item_size_nr(eb, slot);
1472 while (cur < total) {
1473 name_len = btrfs_inode_ref_name_len(eb, ref);
1474 index = btrfs_inode_ref_index(eb, ref);
1475 if (name_len <= BTRFS_NAME_LEN) {
1476 len = name_len;
1477 error = 0;
1478 } else {
1479 len = BTRFS_NAME_LEN;
1480 error = REF_ERR_NAME_TOO_LONG;
1482 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1483 add_inode_backref(inode_cache, key->objectid, key->offset,
1484 index, namebuf, len, 0, key->type, error);
1486 len = sizeof(*ref) + name_len;
1487 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1488 cur += len;
1490 return 0;
1493 static int process_inode_extref(struct extent_buffer *eb,
1494 int slot, struct btrfs_key *key,
1495 struct shared_node *active_node)
1497 u32 total;
1498 u32 cur = 0;
1499 u32 len;
1500 u32 name_len;
1501 u64 index;
1502 u64 parent;
1503 int error;
1504 struct cache_tree *inode_cache;
1505 struct btrfs_inode_extref *extref;
1506 char namebuf[BTRFS_NAME_LEN];
1508 inode_cache = &active_node->inode_cache;
1510 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1511 total = btrfs_item_size_nr(eb, slot);
1512 while (cur < total) {
1513 name_len = btrfs_inode_extref_name_len(eb, extref);
1514 index = btrfs_inode_extref_index(eb, extref);
1515 parent = btrfs_inode_extref_parent(eb, extref);
1516 if (name_len <= BTRFS_NAME_LEN) {
1517 len = name_len;
1518 error = 0;
1519 } else {
1520 len = BTRFS_NAME_LEN;
1521 error = REF_ERR_NAME_TOO_LONG;
1523 read_extent_buffer(eb, namebuf,
1524 (unsigned long)(extref + 1), len);
1525 add_inode_backref(inode_cache, key->objectid, parent,
1526 index, namebuf, len, 0, key->type, error);
1528 len = sizeof(*extref) + name_len;
1529 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1530 cur += len;
1532 return 0;
1536 static int count_csum_range(struct btrfs_root *root, u64 start,
1537 u64 len, u64 *found)
1539 struct btrfs_key key;
1540 struct btrfs_path path;
1541 struct extent_buffer *leaf;
1542 int ret;
1543 size_t size;
1544 *found = 0;
1545 u64 csum_end;
1546 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1548 btrfs_init_path(&path);
1550 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1551 key.offset = start;
1552 key.type = BTRFS_EXTENT_CSUM_KEY;
1554 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1555 &key, &path, 0, 0);
1556 if (ret < 0)
1557 goto out;
1558 if (ret > 0 && path.slots[0] > 0) {
1559 leaf = path.nodes[0];
1560 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1561 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1562 key.type == BTRFS_EXTENT_CSUM_KEY)
1563 path.slots[0]--;
1566 while (len > 0) {
1567 leaf = path.nodes[0];
1568 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1569 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1570 if (ret > 0)
1571 break;
1572 else if (ret < 0)
1573 goto out;
1574 leaf = path.nodes[0];
1577 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1578 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1579 key.type != BTRFS_EXTENT_CSUM_KEY)
1580 break;
1582 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1583 if (key.offset >= start + len)
1584 break;
1586 if (key.offset > start)
1587 start = key.offset;
1589 size = btrfs_item_size_nr(leaf, path.slots[0]);
1590 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1591 if (csum_end > start) {
1592 size = min(csum_end - start, len);
1593 len -= size;
1594 start += size;
1595 *found += size;
1598 path.slots[0]++;
1600 out:
1601 btrfs_release_path(&path);
1602 if (ret < 0)
1603 return ret;
1604 return 0;
1607 static int process_file_extent(struct btrfs_root *root,
1608 struct extent_buffer *eb,
1609 int slot, struct btrfs_key *key,
1610 struct shared_node *active_node)
1612 struct inode_record *rec;
1613 struct btrfs_file_extent_item *fi;
1614 u64 num_bytes = 0;
1615 u64 disk_bytenr = 0;
1616 u64 extent_offset = 0;
1617 u64 mask = root->sectorsize - 1;
1618 int extent_type;
1619 int ret;
1621 rec = active_node->current;
1622 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1623 rec->found_file_extent = 1;
1625 if (rec->extent_start == (u64)-1) {
1626 rec->extent_start = key->offset;
1627 rec->extent_end = key->offset;
1630 if (rec->extent_end > key->offset)
1631 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1632 else if (rec->extent_end < key->offset) {
1633 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1634 key->offset - rec->extent_end);
1635 if (ret < 0)
1636 return ret;
1639 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1640 extent_type = btrfs_file_extent_type(eb, fi);
1642 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1643 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1644 if (num_bytes == 0)
1645 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1646 rec->found_size += num_bytes;
1647 num_bytes = (num_bytes + mask) & ~mask;
1648 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1649 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1650 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1651 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1652 extent_offset = btrfs_file_extent_offset(eb, fi);
1653 if (num_bytes == 0 || (num_bytes & mask))
1654 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1655 if (num_bytes + extent_offset >
1656 btrfs_file_extent_ram_bytes(eb, fi))
1657 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1658 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1659 (btrfs_file_extent_compression(eb, fi) ||
1660 btrfs_file_extent_encryption(eb, fi) ||
1661 btrfs_file_extent_other_encoding(eb, fi)))
1662 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1663 if (disk_bytenr > 0)
1664 rec->found_size += num_bytes;
1665 } else {
1666 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1668 rec->extent_end = key->offset + num_bytes;
1671 * The data reloc tree will copy full extents into its inode and then
1672 * copy the corresponding csums. Because the extent it copied could be
1673 * a preallocated extent that hasn't been written to yet there may be no
1674 * csums to copy, ergo we won't have csums for our file extent. This is
1675 * ok so just don't bother checking csums if the inode belongs to the
1676 * data reloc tree.
1678 if (disk_bytenr > 0 &&
1679 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1680 u64 found;
1681 if (btrfs_file_extent_compression(eb, fi))
1682 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1683 else
1684 disk_bytenr += extent_offset;
1686 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1687 if (ret < 0)
1688 return ret;
1689 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1690 if (found > 0)
1691 rec->found_csum_item = 1;
1692 if (found < num_bytes)
1693 rec->some_csum_missing = 1;
1694 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1695 if (found > 0)
1696 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1699 return 0;
1702 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1703 struct walk_control *wc)
1705 struct btrfs_key key;
1706 u32 nritems;
1707 int i;
1708 int ret = 0;
1709 struct cache_tree *inode_cache;
1710 struct shared_node *active_node;
1712 if (wc->root_level == wc->active_node &&
1713 btrfs_root_refs(&root->root_item) == 0)
1714 return 0;
1716 active_node = wc->nodes[wc->active_node];
1717 inode_cache = &active_node->inode_cache;
1718 nritems = btrfs_header_nritems(eb);
1719 for (i = 0; i < nritems; i++) {
1720 btrfs_item_key_to_cpu(eb, &key, i);
1722 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1723 continue;
1724 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1725 continue;
1727 if (active_node->current == NULL ||
1728 active_node->current->ino < key.objectid) {
1729 if (active_node->current) {
1730 active_node->current->checked = 1;
1731 maybe_free_inode_rec(inode_cache,
1732 active_node->current);
1734 active_node->current = get_inode_rec(inode_cache,
1735 key.objectid, 1);
1736 BUG_ON(IS_ERR(active_node->current));
1738 switch (key.type) {
1739 case BTRFS_DIR_ITEM_KEY:
1740 case BTRFS_DIR_INDEX_KEY:
1741 ret = process_dir_item(root, eb, i, &key, active_node);
1742 break;
1743 case BTRFS_INODE_REF_KEY:
1744 ret = process_inode_ref(eb, i, &key, active_node);
1745 break;
1746 case BTRFS_INODE_EXTREF_KEY:
1747 ret = process_inode_extref(eb, i, &key, active_node);
1748 break;
1749 case BTRFS_INODE_ITEM_KEY:
1750 ret = process_inode_item(eb, i, &key, active_node);
1751 break;
1752 case BTRFS_EXTENT_DATA_KEY:
1753 ret = process_file_extent(root, eb, i, &key,
1754 active_node);
1755 break;
1756 default:
1757 break;
1760 return ret;
1763 static void reada_walk_down(struct btrfs_root *root,
1764 struct extent_buffer *node, int slot)
1766 u64 bytenr;
1767 u64 ptr_gen;
1768 u32 nritems;
1769 u32 blocksize;
1770 int i;
1771 int level;
1773 level = btrfs_header_level(node);
1774 if (level != 1)
1775 return;
1777 nritems = btrfs_header_nritems(node);
1778 blocksize = root->nodesize;
1779 for (i = slot; i < nritems; i++) {
1780 bytenr = btrfs_node_blockptr(node, i);
1781 ptr_gen = btrfs_node_ptr_generation(node, i);
1782 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1787 * Check the child node/leaf by the following condition:
1788 * 1. the first item key of the node/leaf should be the same with the one
1789 * in parent.
1790 * 2. block in parent node should match the child node/leaf.
1791 * 3. generation of parent node and child's header should be consistent.
1793 * Or the child node/leaf pointed by the key in parent is not valid.
1795 * We hope to check leaf owner too, but since subvol may share leaves,
1796 * which makes leaf owner check not so strong, key check should be
1797 * sufficient enough for that case.
1799 static int check_child_node(struct btrfs_root *root,
1800 struct extent_buffer *parent, int slot,
1801 struct extent_buffer *child)
1803 struct btrfs_key parent_key;
1804 struct btrfs_key child_key;
1805 int ret = 0;
1807 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1808 if (btrfs_header_level(child) == 0)
1809 btrfs_item_key_to_cpu(child, &child_key, 0);
1810 else
1811 btrfs_node_key_to_cpu(child, &child_key, 0);
1813 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1814 ret = -EINVAL;
1815 fprintf(stderr,
1816 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1817 parent_key.objectid, parent_key.type, parent_key.offset,
1818 child_key.objectid, child_key.type, child_key.offset);
1820 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1821 ret = -EINVAL;
1822 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1823 btrfs_node_blockptr(parent, slot),
1824 btrfs_header_bytenr(child));
1826 if (btrfs_node_ptr_generation(parent, slot) !=
1827 btrfs_header_generation(child)) {
1828 ret = -EINVAL;
1829 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1830 btrfs_header_generation(child),
1831 btrfs_node_ptr_generation(parent, slot));
1833 return ret;
1836 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1837 struct walk_control *wc, int *level)
1839 enum btrfs_tree_block_status status;
1840 u64 bytenr;
1841 u64 ptr_gen;
1842 struct extent_buffer *next;
1843 struct extent_buffer *cur;
1844 u32 blocksize;
1845 int ret, err = 0;
1846 u64 refs;
1848 WARN_ON(*level < 0);
1849 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1850 ret = btrfs_lookup_extent_info(NULL, root,
1851 path->nodes[*level]->start,
1852 *level, 1, &refs, NULL);
1853 if (ret < 0) {
1854 err = ret;
1855 goto out;
1858 if (refs > 1) {
1859 ret = enter_shared_node(root, path->nodes[*level]->start,
1860 refs, wc, *level);
1861 if (ret > 0) {
1862 err = ret;
1863 goto out;
1867 while (*level >= 0) {
1868 WARN_ON(*level < 0);
1869 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1870 cur = path->nodes[*level];
1872 if (btrfs_header_level(cur) != *level)
1873 WARN_ON(1);
1875 if (path->slots[*level] >= btrfs_header_nritems(cur))
1876 break;
1877 if (*level == 0) {
1878 ret = process_one_leaf(root, cur, wc);
1879 if (ret < 0)
1880 err = ret;
1881 break;
1883 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1884 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1885 blocksize = root->nodesize;
1886 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1887 1, &refs, NULL);
1888 if (ret < 0)
1889 refs = 0;
1891 if (refs > 1) {
1892 ret = enter_shared_node(root, bytenr, refs,
1893 wc, *level - 1);
1894 if (ret > 0) {
1895 path->slots[*level]++;
1896 continue;
1900 next = btrfs_find_tree_block(root, bytenr, blocksize);
1901 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1902 free_extent_buffer(next);
1903 reada_walk_down(root, cur, path->slots[*level]);
1904 next = read_tree_block(root, bytenr, blocksize,
1905 ptr_gen);
1906 if (!extent_buffer_uptodate(next)) {
1907 struct btrfs_key node_key;
1909 btrfs_node_key_to_cpu(path->nodes[*level],
1910 &node_key,
1911 path->slots[*level]);
1912 btrfs_add_corrupt_extent_record(root->fs_info,
1913 &node_key,
1914 path->nodes[*level]->start,
1915 root->nodesize, *level);
1916 err = -EIO;
1917 goto out;
1921 ret = check_child_node(root, cur, path->slots[*level], next);
1922 if (ret) {
1923 err = ret;
1924 goto out;
1927 if (btrfs_is_leaf(next))
1928 status = btrfs_check_leaf(root, NULL, next);
1929 else
1930 status = btrfs_check_node(root, NULL, next);
1931 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1932 free_extent_buffer(next);
1933 err = -EIO;
1934 goto out;
1937 *level = *level - 1;
1938 free_extent_buffer(path->nodes[*level]);
1939 path->nodes[*level] = next;
1940 path->slots[*level] = 0;
1942 out:
1943 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1944 return err;
1947 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1948 struct walk_control *wc, int *level)
1950 int i;
1951 struct extent_buffer *leaf;
1953 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1954 leaf = path->nodes[i];
1955 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1956 path->slots[i]++;
1957 *level = i;
1958 return 0;
1959 } else {
1960 free_extent_buffer(path->nodes[*level]);
1961 path->nodes[*level] = NULL;
1962 BUG_ON(*level > wc->active_node);
1963 if (*level == wc->active_node)
1964 leave_shared_node(root, wc, *level);
1965 *level = i + 1;
1968 return 1;
1971 static int check_root_dir(struct inode_record *rec)
1973 struct inode_backref *backref;
1974 int ret = -1;
1976 if (!rec->found_inode_item || rec->errors)
1977 goto out;
1978 if (rec->nlink != 1 || rec->found_link != 0)
1979 goto out;
1980 if (list_empty(&rec->backrefs))
1981 goto out;
1982 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1983 if (!backref->found_inode_ref)
1984 goto out;
1985 if (backref->index != 0 || backref->namelen != 2 ||
1986 memcmp(backref->name, "..", 2))
1987 goto out;
1988 if (backref->found_dir_index || backref->found_dir_item)
1989 goto out;
1990 ret = 0;
1991 out:
1992 return ret;
1995 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1996 struct btrfs_root *root, struct btrfs_path *path,
1997 struct inode_record *rec)
1999 struct btrfs_inode_item *ei;
2000 struct btrfs_key key;
2001 int ret;
2003 key.objectid = rec->ino;
2004 key.type = BTRFS_INODE_ITEM_KEY;
2005 key.offset = (u64)-1;
2007 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2008 if (ret < 0)
2009 goto out;
2010 if (ret) {
2011 if (!path->slots[0]) {
2012 ret = -ENOENT;
2013 goto out;
2015 path->slots[0]--;
2016 ret = 0;
2018 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2019 if (key.objectid != rec->ino) {
2020 ret = -ENOENT;
2021 goto out;
2024 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2025 struct btrfs_inode_item);
2026 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2027 btrfs_mark_buffer_dirty(path->nodes[0]);
2028 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2029 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2030 root->root_key.objectid);
2031 out:
2032 btrfs_release_path(path);
2033 return ret;
2036 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2037 struct btrfs_root *root,
2038 struct btrfs_path *path,
2039 struct inode_record *rec)
2041 int ret;
2043 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2044 btrfs_release_path(path);
2045 if (!ret)
2046 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2047 return ret;
2050 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2051 struct btrfs_root *root,
2052 struct btrfs_path *path,
2053 struct inode_record *rec)
2055 struct btrfs_inode_item *ei;
2056 struct btrfs_key key;
2057 int ret = 0;
2059 key.objectid = rec->ino;
2060 key.type = BTRFS_INODE_ITEM_KEY;
2061 key.offset = 0;
2063 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2064 if (ret) {
2065 if (ret > 0)
2066 ret = -ENOENT;
2067 goto out;
2070 /* Since ret == 0, no need to check anything */
2071 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2072 struct btrfs_inode_item);
2073 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2074 btrfs_mark_buffer_dirty(path->nodes[0]);
2075 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2076 printf("reset nbytes for ino %llu root %llu\n",
2077 rec->ino, root->root_key.objectid);
2078 out:
2079 btrfs_release_path(path);
2080 return ret;
2083 static int add_missing_dir_index(struct btrfs_root *root,
2084 struct cache_tree *inode_cache,
2085 struct inode_record *rec,
2086 struct inode_backref *backref)
2088 struct btrfs_path *path;
2089 struct btrfs_trans_handle *trans;
2090 struct btrfs_dir_item *dir_item;
2091 struct extent_buffer *leaf;
2092 struct btrfs_key key;
2093 struct btrfs_disk_key disk_key;
2094 struct inode_record *dir_rec;
2095 unsigned long name_ptr;
2096 u32 data_size = sizeof(*dir_item) + backref->namelen;
2097 int ret;
2099 path = btrfs_alloc_path();
2100 if (!path)
2101 return -ENOMEM;
2103 trans = btrfs_start_transaction(root, 1);
2104 if (IS_ERR(trans)) {
2105 btrfs_free_path(path);
2106 return PTR_ERR(trans);
2109 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2110 (unsigned long long)rec->ino);
2111 key.objectid = backref->dir;
2112 key.type = BTRFS_DIR_INDEX_KEY;
2113 key.offset = backref->index;
2115 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2116 BUG_ON(ret);
2118 leaf = path->nodes[0];
2119 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2121 disk_key.objectid = cpu_to_le64(rec->ino);
2122 disk_key.type = BTRFS_INODE_ITEM_KEY;
2123 disk_key.offset = 0;
2125 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2126 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2127 btrfs_set_dir_data_len(leaf, dir_item, 0);
2128 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2129 name_ptr = (unsigned long)(dir_item + 1);
2130 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2131 btrfs_mark_buffer_dirty(leaf);
2132 btrfs_free_path(path);
2133 btrfs_commit_transaction(trans, root);
2135 backref->found_dir_index = 1;
2136 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2137 BUG_ON(IS_ERR(dir_rec));
2138 if (!dir_rec)
2139 return 0;
2140 dir_rec->found_size += backref->namelen;
2141 if (dir_rec->found_size == dir_rec->isize &&
2142 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2143 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2144 if (dir_rec->found_size != dir_rec->isize)
2145 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2147 return 0;
2150 static int delete_dir_index(struct btrfs_root *root,
2151 struct cache_tree *inode_cache,
2152 struct inode_record *rec,
2153 struct inode_backref *backref)
2155 struct btrfs_trans_handle *trans;
2156 struct btrfs_dir_item *di;
2157 struct btrfs_path *path;
2158 int ret = 0;
2160 path = btrfs_alloc_path();
2161 if (!path)
2162 return -ENOMEM;
2164 trans = btrfs_start_transaction(root, 1);
2165 if (IS_ERR(trans)) {
2166 btrfs_free_path(path);
2167 return PTR_ERR(trans);
2171 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2172 (unsigned long long)backref->dir,
2173 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2174 (unsigned long long)root->objectid);
2176 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2177 backref->name, backref->namelen,
2178 backref->index, -1);
2179 if (IS_ERR(di)) {
2180 ret = PTR_ERR(di);
2181 btrfs_free_path(path);
2182 btrfs_commit_transaction(trans, root);
2183 if (ret == -ENOENT)
2184 return 0;
2185 return ret;
2188 if (!di)
2189 ret = btrfs_del_item(trans, root, path);
2190 else
2191 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2192 BUG_ON(ret);
2193 btrfs_free_path(path);
2194 btrfs_commit_transaction(trans, root);
2195 return ret;
2198 static int create_inode_item(struct btrfs_root *root,
2199 struct inode_record *rec,
2200 struct inode_backref *backref, int root_dir)
2202 struct btrfs_trans_handle *trans;
2203 struct btrfs_inode_item inode_item;
2204 time_t now = time(NULL);
2205 int ret;
2207 trans = btrfs_start_transaction(root, 1);
2208 if (IS_ERR(trans)) {
2209 ret = PTR_ERR(trans);
2210 return ret;
2213 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2214 "be incomplete, please check permissions and content after "
2215 "the fsck completes.\n", (unsigned long long)root->objectid,
2216 (unsigned long long)rec->ino);
2218 memset(&inode_item, 0, sizeof(inode_item));
2219 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2220 if (root_dir)
2221 btrfs_set_stack_inode_nlink(&inode_item, 1);
2222 else
2223 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2224 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2225 if (rec->found_dir_item) {
2226 if (rec->found_file_extent)
2227 fprintf(stderr, "root %llu inode %llu has both a dir "
2228 "item and extents, unsure if it is a dir or a "
2229 "regular file so setting it as a directory\n",
2230 (unsigned long long)root->objectid,
2231 (unsigned long long)rec->ino);
2232 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2233 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2234 } else if (!rec->found_dir_item) {
2235 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2236 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2238 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2239 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2240 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2241 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2242 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2243 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2244 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2245 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2247 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2248 BUG_ON(ret);
2249 btrfs_commit_transaction(trans, root);
2250 return 0;
2253 static int repair_inode_backrefs(struct btrfs_root *root,
2254 struct inode_record *rec,
2255 struct cache_tree *inode_cache,
2256 int delete)
2258 struct inode_backref *tmp, *backref;
2259 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2260 int ret = 0;
2261 int repaired = 0;
2263 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2264 if (!delete && rec->ino == root_dirid) {
2265 if (!rec->found_inode_item) {
2266 ret = create_inode_item(root, rec, backref, 1);
2267 if (ret)
2268 break;
2269 repaired++;
2273 /* Index 0 for root dir's are special, don't mess with it */
2274 if (rec->ino == root_dirid && backref->index == 0)
2275 continue;
2277 if (delete &&
2278 ((backref->found_dir_index && !backref->found_inode_ref) ||
2279 (backref->found_dir_index && backref->found_inode_ref &&
2280 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2281 ret = delete_dir_index(root, inode_cache, rec, backref);
2282 if (ret)
2283 break;
2284 repaired++;
2285 list_del(&backref->list);
2286 free(backref);
2289 if (!delete && !backref->found_dir_index &&
2290 backref->found_dir_item && backref->found_inode_ref) {
2291 ret = add_missing_dir_index(root, inode_cache, rec,
2292 backref);
2293 if (ret)
2294 break;
2295 repaired++;
2296 if (backref->found_dir_item &&
2297 backref->found_dir_index &&
2298 backref->found_dir_index) {
2299 if (!backref->errors &&
2300 backref->found_inode_ref) {
2301 list_del(&backref->list);
2302 free(backref);
2307 if (!delete && (!backref->found_dir_index &&
2308 !backref->found_dir_item &&
2309 backref->found_inode_ref)) {
2310 struct btrfs_trans_handle *trans;
2311 struct btrfs_key location;
2313 ret = check_dir_conflict(root, backref->name,
2314 backref->namelen,
2315 backref->dir,
2316 backref->index);
2317 if (ret) {
2319 * let nlink fixing routine to handle it,
2320 * which can do it better.
2322 ret = 0;
2323 break;
2325 location.objectid = rec->ino;
2326 location.type = BTRFS_INODE_ITEM_KEY;
2327 location.offset = 0;
2329 trans = btrfs_start_transaction(root, 1);
2330 if (IS_ERR(trans)) {
2331 ret = PTR_ERR(trans);
2332 break;
2334 fprintf(stderr, "adding missing dir index/item pair "
2335 "for inode %llu\n",
2336 (unsigned long long)rec->ino);
2337 ret = btrfs_insert_dir_item(trans, root, backref->name,
2338 backref->namelen,
2339 backref->dir, &location,
2340 imode_to_type(rec->imode),
2341 backref->index);
2342 BUG_ON(ret);
2343 btrfs_commit_transaction(trans, root);
2344 repaired++;
2347 if (!delete && (backref->found_inode_ref &&
2348 backref->found_dir_index &&
2349 backref->found_dir_item &&
2350 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2351 !rec->found_inode_item)) {
2352 ret = create_inode_item(root, rec, backref, 0);
2353 if (ret)
2354 break;
2355 repaired++;
2359 return ret ? ret : repaired;
2363 * To determine the file type for nlink/inode_item repair
2365 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2366 * Return -ENOENT if file type is not found.
2368 static int find_file_type(struct inode_record *rec, u8 *type)
2370 struct inode_backref *backref;
2372 /* For inode item recovered case */
2373 if (rec->found_inode_item) {
2374 *type = imode_to_type(rec->imode);
2375 return 0;
2378 list_for_each_entry(backref, &rec->backrefs, list) {
2379 if (backref->found_dir_index || backref->found_dir_item) {
2380 *type = backref->filetype;
2381 return 0;
2384 return -ENOENT;
2388 * To determine the file name for nlink repair
2390 * Return 0 if file name is found, set name and namelen.
2391 * Return -ENOENT if file name is not found.
2393 static int find_file_name(struct inode_record *rec,
2394 char *name, int *namelen)
2396 struct inode_backref *backref;
2398 list_for_each_entry(backref, &rec->backrefs, list) {
2399 if (backref->found_dir_index || backref->found_dir_item ||
2400 backref->found_inode_ref) {
2401 memcpy(name, backref->name, backref->namelen);
2402 *namelen = backref->namelen;
2403 return 0;
2406 return -ENOENT;
2409 /* Reset the nlink of the inode to the correct one */
2410 static int reset_nlink(struct btrfs_trans_handle *trans,
2411 struct btrfs_root *root,
2412 struct btrfs_path *path,
2413 struct inode_record *rec)
2415 struct inode_backref *backref;
2416 struct inode_backref *tmp;
2417 struct btrfs_key key;
2418 struct btrfs_inode_item *inode_item;
2419 int ret = 0;
2421 /* We don't believe this either, reset it and iterate backref */
2422 rec->found_link = 0;
2424 /* Remove all backref including the valid ones */
2425 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2426 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2427 backref->index, backref->name,
2428 backref->namelen, 0);
2429 if (ret < 0)
2430 goto out;
2432 /* remove invalid backref, so it won't be added back */
2433 if (!(backref->found_dir_index &&
2434 backref->found_dir_item &&
2435 backref->found_inode_ref)) {
2436 list_del(&backref->list);
2437 free(backref);
2438 } else {
2439 rec->found_link++;
2443 /* Set nlink to 0 */
2444 key.objectid = rec->ino;
2445 key.type = BTRFS_INODE_ITEM_KEY;
2446 key.offset = 0;
2447 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2448 if (ret < 0)
2449 goto out;
2450 if (ret > 0) {
2451 ret = -ENOENT;
2452 goto out;
2454 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2455 struct btrfs_inode_item);
2456 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2457 btrfs_mark_buffer_dirty(path->nodes[0]);
2458 btrfs_release_path(path);
2461 * Add back valid inode_ref/dir_item/dir_index,
2462 * add_link() will handle the nlink inc, so new nlink must be correct
2464 list_for_each_entry(backref, &rec->backrefs, list) {
2465 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2466 backref->name, backref->namelen,
2467 backref->filetype, &backref->index, 1);
2468 if (ret < 0)
2469 goto out;
2471 out:
2472 btrfs_release_path(path);
2473 return ret;
2476 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2477 struct btrfs_root *root,
2478 struct btrfs_path *path,
2479 struct inode_record *rec)
2481 char *dir_name = "lost+found";
2482 char namebuf[BTRFS_NAME_LEN] = {0};
2483 u64 lost_found_ino;
2484 u32 mode = 0700;
2485 u8 type = 0;
2486 int namelen = 0;
2487 int name_recovered = 0;
2488 int type_recovered = 0;
2489 int ret = 0;
2492 * Get file name and type first before these invalid inode ref
2493 * are deleted by remove_all_invalid_backref()
2495 name_recovered = !find_file_name(rec, namebuf, &namelen);
2496 type_recovered = !find_file_type(rec, &type);
2498 if (!name_recovered) {
2499 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2500 rec->ino, rec->ino);
2501 namelen = count_digits(rec->ino);
2502 sprintf(namebuf, "%llu", rec->ino);
2503 name_recovered = 1;
2505 if (!type_recovered) {
2506 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2507 rec->ino);
2508 type = BTRFS_FT_REG_FILE;
2509 type_recovered = 1;
2512 ret = reset_nlink(trans, root, path, rec);
2513 if (ret < 0) {
2514 fprintf(stderr,
2515 "Failed to reset nlink for inode %llu: %s\n",
2516 rec->ino, strerror(-ret));
2517 goto out;
2520 if (rec->found_link == 0) {
2521 lost_found_ino = root->highest_inode;
2522 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2523 ret = -EOVERFLOW;
2524 goto out;
2526 lost_found_ino++;
2527 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2528 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2529 mode);
2530 if (ret < 0) {
2531 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2532 dir_name, strerror(-ret));
2533 goto out;
2535 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2536 namebuf, namelen, type, NULL, 1);
2538 * Add ".INO" suffix several times to handle case where
2539 * "FILENAME.INO" is already taken by another file.
2541 while (ret == -EEXIST) {
2543 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2545 if (namelen + count_digits(rec->ino) + 1 >
2546 BTRFS_NAME_LEN) {
2547 ret = -EFBIG;
2548 goto out;
2550 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2551 ".%llu", rec->ino);
2552 namelen += count_digits(rec->ino) + 1;
2553 ret = btrfs_add_link(trans, root, rec->ino,
2554 lost_found_ino, namebuf,
2555 namelen, type, NULL, 1);
2557 if (ret < 0) {
2558 fprintf(stderr,
2559 "Failed to link the inode %llu to %s dir: %s\n",
2560 rec->ino, dir_name, strerror(-ret));
2561 goto out;
2564 * Just increase the found_link, don't actually add the
2565 * backref. This will make things easier and this inode
2566 * record will be freed after the repair is done.
2567 * So fsck will not report problem about this inode.
2569 rec->found_link++;
2570 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2571 namelen, namebuf, dir_name);
2573 printf("Fixed the nlink of inode %llu\n", rec->ino);
2574 out:
2576 * Clear the flag anyway, or we will loop forever for the same inode
2577 * as it will not be removed from the bad inode list and the dead loop
2578 * happens.
2580 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2581 btrfs_release_path(path);
2582 return ret;
2586 * Check if there is any normal(reg or prealloc) file extent for given
2587 * ino.
2588 * This is used to determine the file type when neither its dir_index/item or
2589 * inode_item exists.
2591 * This will *NOT* report error, if any error happens, just consider it does
2592 * not have any normal file extent.
2594 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2596 struct btrfs_path *path;
2597 struct btrfs_key key;
2598 struct btrfs_key found_key;
2599 struct btrfs_file_extent_item *fi;
2600 u8 type;
2601 int ret = 0;
2603 path = btrfs_alloc_path();
2604 if (!path)
2605 goto out;
2606 key.objectid = ino;
2607 key.type = BTRFS_EXTENT_DATA_KEY;
2608 key.offset = 0;
2610 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2611 if (ret < 0) {
2612 ret = 0;
2613 goto out;
2615 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2616 ret = btrfs_next_leaf(root, path);
2617 if (ret) {
2618 ret = 0;
2619 goto out;
2622 while (1) {
2623 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2624 path->slots[0]);
2625 if (found_key.objectid != ino ||
2626 found_key.type != BTRFS_EXTENT_DATA_KEY)
2627 break;
2628 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2629 struct btrfs_file_extent_item);
2630 type = btrfs_file_extent_type(path->nodes[0], fi);
2631 if (type != BTRFS_FILE_EXTENT_INLINE) {
2632 ret = 1;
2633 goto out;
2636 out:
2637 btrfs_free_path(path);
2638 return ret;
2641 static u32 btrfs_type_to_imode(u8 type)
2643 static u32 imode_by_btrfs_type[] = {
2644 [BTRFS_FT_REG_FILE] = S_IFREG,
2645 [BTRFS_FT_DIR] = S_IFDIR,
2646 [BTRFS_FT_CHRDEV] = S_IFCHR,
2647 [BTRFS_FT_BLKDEV] = S_IFBLK,
2648 [BTRFS_FT_FIFO] = S_IFIFO,
2649 [BTRFS_FT_SOCK] = S_IFSOCK,
2650 [BTRFS_FT_SYMLINK] = S_IFLNK,
2653 return imode_by_btrfs_type[(type)];
2656 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2657 struct btrfs_root *root,
2658 struct btrfs_path *path,
2659 struct inode_record *rec)
2661 u8 filetype;
2662 u32 mode = 0700;
2663 int type_recovered = 0;
2664 int ret = 0;
2666 printf("Trying to rebuild inode:%llu\n", rec->ino);
2668 type_recovered = !find_file_type(rec, &filetype);
2671 * Try to determine inode type if type not found.
2673 * For found regular file extent, it must be FILE.
2674 * For found dir_item/index, it must be DIR.
2676 * For undetermined one, use FILE as fallback.
2678 * TODO:
2679 * 1. If found backref(inode_index/item is already handled) to it,
2680 * it must be DIR.
2681 * Need new inode-inode ref structure to allow search for that.
2683 if (!type_recovered) {
2684 if (rec->found_file_extent &&
2685 find_normal_file_extent(root, rec->ino)) {
2686 type_recovered = 1;
2687 filetype = BTRFS_FT_REG_FILE;
2688 } else if (rec->found_dir_item) {
2689 type_recovered = 1;
2690 filetype = BTRFS_FT_DIR;
2691 } else if (!list_empty(&rec->orphan_extents)) {
2692 type_recovered = 1;
2693 filetype = BTRFS_FT_REG_FILE;
2694 } else{
2695 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2696 rec->ino);
2697 type_recovered = 1;
2698 filetype = BTRFS_FT_REG_FILE;
2702 ret = btrfs_new_inode(trans, root, rec->ino,
2703 mode | btrfs_type_to_imode(filetype));
2704 if (ret < 0)
2705 goto out;
2708 * Here inode rebuild is done, we only rebuild the inode item,
2709 * don't repair the nlink(like move to lost+found).
2710 * That is the job of nlink repair.
2712 * We just fill the record and return
2714 rec->found_dir_item = 1;
2715 rec->imode = mode | btrfs_type_to_imode(filetype);
2716 rec->nlink = 0;
2717 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2718 /* Ensure the inode_nlinks repair function will be called */
2719 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2720 out:
2721 return ret;
2724 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2725 struct btrfs_root *root,
2726 struct btrfs_path *path,
2727 struct inode_record *rec)
2729 struct orphan_data_extent *orphan;
2730 struct orphan_data_extent *tmp;
2731 int ret = 0;
2733 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2735 * Check for conflicting file extents
2737 * Here we don't know whether the extents is compressed or not,
2738 * so we can only assume it not compressed nor data offset,
2739 * and use its disk_len as extent length.
2741 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2742 orphan->offset, orphan->disk_len, 0);
2743 btrfs_release_path(path);
2744 if (ret < 0)
2745 goto out;
2746 if (!ret) {
2747 fprintf(stderr,
2748 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2749 orphan->disk_bytenr, orphan->disk_len);
2750 ret = btrfs_free_extent(trans,
2751 root->fs_info->extent_root,
2752 orphan->disk_bytenr, orphan->disk_len,
2753 0, root->objectid, orphan->objectid,
2754 orphan->offset);
2755 if (ret < 0)
2756 goto out;
2758 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2759 orphan->offset, orphan->disk_bytenr,
2760 orphan->disk_len, orphan->disk_len);
2761 if (ret < 0)
2762 goto out;
2764 /* Update file size info */
2765 rec->found_size += orphan->disk_len;
2766 if (rec->found_size == rec->nbytes)
2767 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2769 /* Update the file extent hole info too */
2770 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2771 orphan->disk_len);
2772 if (ret < 0)
2773 goto out;
2774 if (RB_EMPTY_ROOT(&rec->holes))
2775 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2777 list_del(&orphan->list);
2778 free(orphan);
2780 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2781 out:
2782 return ret;
2785 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2786 struct btrfs_root *root,
2787 struct btrfs_path *path,
2788 struct inode_record *rec)
2790 struct rb_node *node;
2791 struct file_extent_hole *hole;
2792 int found = 0;
2793 int ret = 0;
2795 node = rb_first(&rec->holes);
2797 while (node) {
2798 found = 1;
2799 hole = rb_entry(node, struct file_extent_hole, node);
2800 ret = btrfs_punch_hole(trans, root, rec->ino,
2801 hole->start, hole->len);
2802 if (ret < 0)
2803 goto out;
2804 ret = del_file_extent_hole(&rec->holes, hole->start,
2805 hole->len);
2806 if (ret < 0)
2807 goto out;
2808 if (RB_EMPTY_ROOT(&rec->holes))
2809 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2810 node = rb_first(&rec->holes);
2812 /* special case for a file losing all its file extent */
2813 if (!found) {
2814 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2815 round_up(rec->isize, root->sectorsize));
2816 if (ret < 0)
2817 goto out;
2819 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2820 rec->ino, root->objectid);
2821 out:
2822 return ret;
2825 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2827 struct btrfs_trans_handle *trans;
2828 struct btrfs_path *path;
2829 int ret = 0;
2831 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2832 I_ERR_NO_ORPHAN_ITEM |
2833 I_ERR_LINK_COUNT_WRONG |
2834 I_ERR_NO_INODE_ITEM |
2835 I_ERR_FILE_EXTENT_ORPHAN |
2836 I_ERR_FILE_EXTENT_DISCOUNT|
2837 I_ERR_FILE_NBYTES_WRONG)))
2838 return rec->errors;
2840 path = btrfs_alloc_path();
2841 if (!path)
2842 return -ENOMEM;
2845 * For nlink repair, it may create a dir and add link, so
2846 * 2 for parent(256)'s dir_index and dir_item
2847 * 2 for lost+found dir's inode_item and inode_ref
2848 * 1 for the new inode_ref of the file
2849 * 2 for lost+found dir's dir_index and dir_item for the file
2851 trans = btrfs_start_transaction(root, 7);
2852 if (IS_ERR(trans)) {
2853 btrfs_free_path(path);
2854 return PTR_ERR(trans);
2857 if (rec->errors & I_ERR_NO_INODE_ITEM)
2858 ret = repair_inode_no_item(trans, root, path, rec);
2859 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2860 ret = repair_inode_orphan_extent(trans, root, path, rec);
2861 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2862 ret = repair_inode_discount_extent(trans, root, path, rec);
2863 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2864 ret = repair_inode_isize(trans, root, path, rec);
2865 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2866 ret = repair_inode_orphan_item(trans, root, path, rec);
2867 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2868 ret = repair_inode_nlinks(trans, root, path, rec);
2869 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2870 ret = repair_inode_nbytes(trans, root, path, rec);
2871 btrfs_commit_transaction(trans, root);
2872 btrfs_free_path(path);
2873 return ret;
2876 static int check_inode_recs(struct btrfs_root *root,
2877 struct cache_tree *inode_cache)
2879 struct cache_extent *cache;
2880 struct ptr_node *node;
2881 struct inode_record *rec;
2882 struct inode_backref *backref;
2883 int stage = 0;
2884 int ret = 0;
2885 int err = 0;
2886 u64 error = 0;
2887 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2889 if (btrfs_root_refs(&root->root_item) == 0) {
2890 if (!cache_tree_empty(inode_cache))
2891 fprintf(stderr, "warning line %d\n", __LINE__);
2892 return 0;
2896 * We need to record the highest inode number for later 'lost+found'
2897 * dir creation.
2898 * We must select an ino not used/referred by any existing inode, or
2899 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2900 * this may cause 'lost+found' dir has wrong nlinks.
2902 cache = last_cache_extent(inode_cache);
2903 if (cache) {
2904 node = container_of(cache, struct ptr_node, cache);
2905 rec = node->data;
2906 if (rec->ino > root->highest_inode)
2907 root->highest_inode = rec->ino;
2911 * We need to repair backrefs first because we could change some of the
2912 * errors in the inode recs.
2914 * We also need to go through and delete invalid backrefs first and then
2915 * add the correct ones second. We do this because we may get EEXIST
2916 * when adding back the correct index because we hadn't yet deleted the
2917 * invalid index.
2919 * For example, if we were missing a dir index then the directories
2920 * isize would be wrong, so if we fixed the isize to what we thought it
2921 * would be and then fixed the backref we'd still have a invalid fs, so
2922 * we need to add back the dir index and then check to see if the isize
2923 * is still wrong.
2925 while (stage < 3) {
2926 stage++;
2927 if (stage == 3 && !err)
2928 break;
2930 cache = search_cache_extent(inode_cache, 0);
2931 while (repair && cache) {
2932 node = container_of(cache, struct ptr_node, cache);
2933 rec = node->data;
2934 cache = next_cache_extent(cache);
2936 /* Need to free everything up and rescan */
2937 if (stage == 3) {
2938 remove_cache_extent(inode_cache, &node->cache);
2939 free(node);
2940 free_inode_rec(rec);
2941 continue;
2944 if (list_empty(&rec->backrefs))
2945 continue;
2947 ret = repair_inode_backrefs(root, rec, inode_cache,
2948 stage == 1);
2949 if (ret < 0) {
2950 err = ret;
2951 stage = 2;
2952 break;
2953 } if (ret > 0) {
2954 err = -EAGAIN;
2958 if (err)
2959 return err;
2961 rec = get_inode_rec(inode_cache, root_dirid, 0);
2962 BUG_ON(IS_ERR(rec));
2963 if (rec) {
2964 ret = check_root_dir(rec);
2965 if (ret) {
2966 fprintf(stderr, "root %llu root dir %llu error\n",
2967 (unsigned long long)root->root_key.objectid,
2968 (unsigned long long)root_dirid);
2969 print_inode_error(root, rec);
2970 error++;
2972 } else {
2973 if (repair) {
2974 struct btrfs_trans_handle *trans;
2976 trans = btrfs_start_transaction(root, 1);
2977 if (IS_ERR(trans)) {
2978 err = PTR_ERR(trans);
2979 return err;
2982 fprintf(stderr,
2983 "root %llu missing its root dir, recreating\n",
2984 (unsigned long long)root->objectid);
2986 ret = btrfs_make_root_dir(trans, root, root_dirid);
2987 BUG_ON(ret);
2989 btrfs_commit_transaction(trans, root);
2990 return -EAGAIN;
2993 fprintf(stderr, "root %llu root dir %llu not found\n",
2994 (unsigned long long)root->root_key.objectid,
2995 (unsigned long long)root_dirid);
2998 while (1) {
2999 cache = search_cache_extent(inode_cache, 0);
3000 if (!cache)
3001 break;
3002 node = container_of(cache, struct ptr_node, cache);
3003 rec = node->data;
3004 remove_cache_extent(inode_cache, &node->cache);
3005 free(node);
3006 if (rec->ino == root_dirid ||
3007 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3008 free_inode_rec(rec);
3009 continue;
3012 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3013 ret = check_orphan_item(root, rec->ino);
3014 if (ret == 0)
3015 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3016 if (can_free_inode_rec(rec)) {
3017 free_inode_rec(rec);
3018 continue;
3022 if (!rec->found_inode_item)
3023 rec->errors |= I_ERR_NO_INODE_ITEM;
3024 if (rec->found_link != rec->nlink)
3025 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3026 if (repair) {
3027 ret = try_repair_inode(root, rec);
3028 if (ret == 0 && can_free_inode_rec(rec)) {
3029 free_inode_rec(rec);
3030 continue;
3032 ret = 0;
3035 if (!(repair && ret == 0))
3036 error++;
3037 print_inode_error(root, rec);
3038 list_for_each_entry(backref, &rec->backrefs, list) {
3039 if (!backref->found_dir_item)
3040 backref->errors |= REF_ERR_NO_DIR_ITEM;
3041 if (!backref->found_dir_index)
3042 backref->errors |= REF_ERR_NO_DIR_INDEX;
3043 if (!backref->found_inode_ref)
3044 backref->errors |= REF_ERR_NO_INODE_REF;
3045 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3046 " namelen %u name %s filetype %d errors %x",
3047 (unsigned long long)backref->dir,
3048 (unsigned long long)backref->index,
3049 backref->namelen, backref->name,
3050 backref->filetype, backref->errors);
3051 print_ref_error(backref->errors);
3053 free_inode_rec(rec);
3055 return (error > 0) ? -1 : 0;
3058 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3059 u64 objectid)
3061 struct cache_extent *cache;
3062 struct root_record *rec = NULL;
3063 int ret;
3065 cache = lookup_cache_extent(root_cache, objectid, 1);
3066 if (cache) {
3067 rec = container_of(cache, struct root_record, cache);
3068 } else {
3069 rec = calloc(1, sizeof(*rec));
3070 if (!rec)
3071 return ERR_PTR(-ENOMEM);
3072 rec->objectid = objectid;
3073 INIT_LIST_HEAD(&rec->backrefs);
3074 rec->cache.start = objectid;
3075 rec->cache.size = 1;
3077 ret = insert_cache_extent(root_cache, &rec->cache);
3078 if (ret)
3079 return ERR_PTR(-EEXIST);
3081 return rec;
3084 static struct root_backref *get_root_backref(struct root_record *rec,
3085 u64 ref_root, u64 dir, u64 index,
3086 const char *name, int namelen)
3088 struct root_backref *backref;
3090 list_for_each_entry(backref, &rec->backrefs, list) {
3091 if (backref->ref_root != ref_root || backref->dir != dir ||
3092 backref->namelen != namelen)
3093 continue;
3094 if (memcmp(name, backref->name, namelen))
3095 continue;
3096 return backref;
3099 backref = calloc(1, sizeof(*backref) + namelen + 1);
3100 if (!backref)
3101 return NULL;
3102 backref->ref_root = ref_root;
3103 backref->dir = dir;
3104 backref->index = index;
3105 backref->namelen = namelen;
3106 memcpy(backref->name, name, namelen);
3107 backref->name[namelen] = '\0';
3108 list_add_tail(&backref->list, &rec->backrefs);
3109 return backref;
3112 static void free_root_record(struct cache_extent *cache)
3114 struct root_record *rec;
3115 struct root_backref *backref;
3117 rec = container_of(cache, struct root_record, cache);
3118 while (!list_empty(&rec->backrefs)) {
3119 backref = list_entry(rec->backrefs.next,
3120 struct root_backref, list);
3121 list_del(&backref->list);
3122 free(backref);
3125 kfree(rec);
3128 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3130 static int add_root_backref(struct cache_tree *root_cache,
3131 u64 root_id, u64 ref_root, u64 dir, u64 index,
3132 const char *name, int namelen,
3133 int item_type, int errors)
3135 struct root_record *rec;
3136 struct root_backref *backref;
3138 rec = get_root_rec(root_cache, root_id);
3139 BUG_ON(IS_ERR(rec));
3140 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3141 BUG_ON(!backref);
3143 backref->errors |= errors;
3145 if (item_type != BTRFS_DIR_ITEM_KEY) {
3146 if (backref->found_dir_index || backref->found_back_ref ||
3147 backref->found_forward_ref) {
3148 if (backref->index != index)
3149 backref->errors |= REF_ERR_INDEX_UNMATCH;
3150 } else {
3151 backref->index = index;
3155 if (item_type == BTRFS_DIR_ITEM_KEY) {
3156 if (backref->found_forward_ref)
3157 rec->found_ref++;
3158 backref->found_dir_item = 1;
3159 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3160 backref->found_dir_index = 1;
3161 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3162 if (backref->found_forward_ref)
3163 backref->errors |= REF_ERR_DUP_ROOT_REF;
3164 else if (backref->found_dir_item)
3165 rec->found_ref++;
3166 backref->found_forward_ref = 1;
3167 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3168 if (backref->found_back_ref)
3169 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3170 backref->found_back_ref = 1;
3171 } else {
3172 BUG_ON(1);
3175 if (backref->found_forward_ref && backref->found_dir_item)
3176 backref->reachable = 1;
3177 return 0;
3180 static int merge_root_recs(struct btrfs_root *root,
3181 struct cache_tree *src_cache,
3182 struct cache_tree *dst_cache)
3184 struct cache_extent *cache;
3185 struct ptr_node *node;
3186 struct inode_record *rec;
3187 struct inode_backref *backref;
3188 int ret = 0;
3190 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3191 free_inode_recs_tree(src_cache);
3192 return 0;
3195 while (1) {
3196 cache = search_cache_extent(src_cache, 0);
3197 if (!cache)
3198 break;
3199 node = container_of(cache, struct ptr_node, cache);
3200 rec = node->data;
3201 remove_cache_extent(src_cache, &node->cache);
3202 free(node);
3204 ret = is_child_root(root, root->objectid, rec->ino);
3205 if (ret < 0)
3206 break;
3207 else if (ret == 0)
3208 goto skip;
3210 list_for_each_entry(backref, &rec->backrefs, list) {
3211 BUG_ON(backref->found_inode_ref);
3212 if (backref->found_dir_item)
3213 add_root_backref(dst_cache, rec->ino,
3214 root->root_key.objectid, backref->dir,
3215 backref->index, backref->name,
3216 backref->namelen, BTRFS_DIR_ITEM_KEY,
3217 backref->errors);
3218 if (backref->found_dir_index)
3219 add_root_backref(dst_cache, rec->ino,
3220 root->root_key.objectid, backref->dir,
3221 backref->index, backref->name,
3222 backref->namelen, BTRFS_DIR_INDEX_KEY,
3223 backref->errors);
3225 skip:
3226 free_inode_rec(rec);
3228 if (ret < 0)
3229 return ret;
3230 return 0;
3233 static int check_root_refs(struct btrfs_root *root,
3234 struct cache_tree *root_cache)
3236 struct root_record *rec;
3237 struct root_record *ref_root;
3238 struct root_backref *backref;
3239 struct cache_extent *cache;
3240 int loop = 1;
3241 int ret;
3242 int error;
3243 int errors = 0;
3245 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3246 BUG_ON(IS_ERR(rec));
3247 rec->found_ref = 1;
3249 /* fixme: this can not detect circular references */
3250 while (loop) {
3251 loop = 0;
3252 cache = search_cache_extent(root_cache, 0);
3253 while (1) {
3254 if (!cache)
3255 break;
3256 rec = container_of(cache, struct root_record, cache);
3257 cache = next_cache_extent(cache);
3259 if (rec->found_ref == 0)
3260 continue;
3262 list_for_each_entry(backref, &rec->backrefs, list) {
3263 if (!backref->reachable)
3264 continue;
3266 ref_root = get_root_rec(root_cache,
3267 backref->ref_root);
3268 BUG_ON(IS_ERR(ref_root));
3269 if (ref_root->found_ref > 0)
3270 continue;
3272 backref->reachable = 0;
3273 rec->found_ref--;
3274 if (rec->found_ref == 0)
3275 loop = 1;
3280 cache = search_cache_extent(root_cache, 0);
3281 while (1) {
3282 if (!cache)
3283 break;
3284 rec = container_of(cache, struct root_record, cache);
3285 cache = next_cache_extent(cache);
3287 if (rec->found_ref == 0 &&
3288 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3289 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3290 ret = check_orphan_item(root->fs_info->tree_root,
3291 rec->objectid);
3292 if (ret == 0)
3293 continue;
3296 * If we don't have a root item then we likely just have
3297 * a dir item in a snapshot for this root but no actual
3298 * ref key or anything so it's meaningless.
3300 if (!rec->found_root_item)
3301 continue;
3302 errors++;
3303 fprintf(stderr, "fs tree %llu not referenced\n",
3304 (unsigned long long)rec->objectid);
3307 error = 0;
3308 if (rec->found_ref > 0 && !rec->found_root_item)
3309 error = 1;
3310 list_for_each_entry(backref, &rec->backrefs, list) {
3311 if (!backref->found_dir_item)
3312 backref->errors |= REF_ERR_NO_DIR_ITEM;
3313 if (!backref->found_dir_index)
3314 backref->errors |= REF_ERR_NO_DIR_INDEX;
3315 if (!backref->found_back_ref)
3316 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3317 if (!backref->found_forward_ref)
3318 backref->errors |= REF_ERR_NO_ROOT_REF;
3319 if (backref->reachable && backref->errors)
3320 error = 1;
3322 if (!error)
3323 continue;
3325 errors++;
3326 fprintf(stderr, "fs tree %llu refs %u %s\n",
3327 (unsigned long long)rec->objectid, rec->found_ref,
3328 rec->found_root_item ? "" : "not found");
3330 list_for_each_entry(backref, &rec->backrefs, list) {
3331 if (!backref->reachable)
3332 continue;
3333 if (!backref->errors && rec->found_root_item)
3334 continue;
3335 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3336 " index %llu namelen %u name %s errors %x\n",
3337 (unsigned long long)backref->ref_root,
3338 (unsigned long long)backref->dir,
3339 (unsigned long long)backref->index,
3340 backref->namelen, backref->name,
3341 backref->errors);
3342 print_ref_error(backref->errors);
3345 return errors > 0 ? 1 : 0;
3348 static int process_root_ref(struct extent_buffer *eb, int slot,
3349 struct btrfs_key *key,
3350 struct cache_tree *root_cache)
3352 u64 dirid;
3353 u64 index;
3354 u32 len;
3355 u32 name_len;
3356 struct btrfs_root_ref *ref;
3357 char namebuf[BTRFS_NAME_LEN];
3358 int error;
3360 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3362 dirid = btrfs_root_ref_dirid(eb, ref);
3363 index = btrfs_root_ref_sequence(eb, ref);
3364 name_len = btrfs_root_ref_name_len(eb, ref);
3366 if (name_len <= BTRFS_NAME_LEN) {
3367 len = name_len;
3368 error = 0;
3369 } else {
3370 len = BTRFS_NAME_LEN;
3371 error = REF_ERR_NAME_TOO_LONG;
3373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3375 if (key->type == BTRFS_ROOT_REF_KEY) {
3376 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3377 index, namebuf, len, key->type, error);
3378 } else {
3379 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3380 index, namebuf, len, key->type, error);
3382 return 0;
3385 static void free_corrupt_block(struct cache_extent *cache)
3387 struct btrfs_corrupt_block *corrupt;
3389 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3390 free(corrupt);
3393 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3396 * Repair the btree of the given root.
3398 * The fix is to remove the node key in corrupt_blocks cache_tree.
3399 * and rebalance the tree.
3400 * After the fix, the btree should be writeable.
3402 static int repair_btree(struct btrfs_root *root,
3403 struct cache_tree *corrupt_blocks)
3405 struct btrfs_trans_handle *trans;
3406 struct btrfs_path *path;
3407 struct btrfs_corrupt_block *corrupt;
3408 struct cache_extent *cache;
3409 struct btrfs_key key;
3410 u64 offset;
3411 int level;
3412 int ret = 0;
3414 if (cache_tree_empty(corrupt_blocks))
3415 return 0;
3417 path = btrfs_alloc_path();
3418 if (!path)
3419 return -ENOMEM;
3421 trans = btrfs_start_transaction(root, 1);
3422 if (IS_ERR(trans)) {
3423 ret = PTR_ERR(trans);
3424 fprintf(stderr, "Error starting transaction: %s\n",
3425 strerror(-ret));
3426 goto out_free_path;
3428 cache = first_cache_extent(corrupt_blocks);
3429 while (cache) {
3430 corrupt = container_of(cache, struct btrfs_corrupt_block,
3431 cache);
3432 level = corrupt->level;
3433 path->lowest_level = level;
3434 key.objectid = corrupt->key.objectid;
3435 key.type = corrupt->key.type;
3436 key.offset = corrupt->key.offset;
3439 * Here we don't want to do any tree balance, since it may
3440 * cause a balance with corrupted brother leaf/node,
3441 * so ins_len set to 0 here.
3442 * Balance will be done after all corrupt node/leaf is deleted.
3444 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3445 if (ret < 0)
3446 goto out;
3447 offset = btrfs_node_blockptr(path->nodes[level],
3448 path->slots[level]);
3450 /* Remove the ptr */
3451 ret = btrfs_del_ptr(trans, root, path, level,
3452 path->slots[level]);
3453 if (ret < 0)
3454 goto out;
3456 * Remove the corresponding extent
3457 * return value is not concerned.
3459 btrfs_release_path(path);
3460 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3461 0, root->root_key.objectid,
3462 level - 1, 0);
3463 cache = next_cache_extent(cache);
3466 /* Balance the btree using btrfs_search_slot() */
3467 cache = first_cache_extent(corrupt_blocks);
3468 while (cache) {
3469 corrupt = container_of(cache, struct btrfs_corrupt_block,
3470 cache);
3471 memcpy(&key, &corrupt->key, sizeof(key));
3472 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3473 if (ret < 0)
3474 goto out;
3475 /* return will always >0 since it won't find the item */
3476 ret = 0;
3477 btrfs_release_path(path);
3478 cache = next_cache_extent(cache);
3480 out:
3481 btrfs_commit_transaction(trans, root);
3482 out_free_path:
3483 btrfs_free_path(path);
3484 return ret;
3487 static int check_fs_root(struct btrfs_root *root,
3488 struct cache_tree *root_cache,
3489 struct walk_control *wc)
3491 int ret = 0;
3492 int err = 0;
3493 int wret;
3494 int level;
3495 struct btrfs_path path;
3496 struct shared_node root_node;
3497 struct root_record *rec;
3498 struct btrfs_root_item *root_item = &root->root_item;
3499 struct cache_tree corrupt_blocks;
3500 struct orphan_data_extent *orphan;
3501 struct orphan_data_extent *tmp;
3502 enum btrfs_tree_block_status status;
3505 * Reuse the corrupt_block cache tree to record corrupted tree block
3507 * Unlike the usage in extent tree check, here we do it in a per
3508 * fs/subvol tree base.
3510 cache_tree_init(&corrupt_blocks);
3511 root->fs_info->corrupt_blocks = &corrupt_blocks;
3513 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3514 rec = get_root_rec(root_cache, root->root_key.objectid);
3515 BUG_ON(IS_ERR(rec));
3516 if (btrfs_root_refs(root_item) > 0)
3517 rec->found_root_item = 1;
3520 btrfs_init_path(&path);
3521 memset(&root_node, 0, sizeof(root_node));
3522 cache_tree_init(&root_node.root_cache);
3523 cache_tree_init(&root_node.inode_cache);
3525 /* Move the orphan extent record to corresponding inode_record */
3526 list_for_each_entry_safe(orphan, tmp,
3527 &root->orphan_data_extents, list) {
3528 struct inode_record *inode;
3530 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3532 BUG_ON(IS_ERR(inode));
3533 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3534 list_move(&orphan->list, &inode->orphan_extents);
3537 level = btrfs_header_level(root->node);
3538 memset(wc->nodes, 0, sizeof(wc->nodes));
3539 wc->nodes[level] = &root_node;
3540 wc->active_node = level;
3541 wc->root_level = level;
3543 /* We may not have checked the root block, lets do that now */
3544 if (btrfs_is_leaf(root->node))
3545 status = btrfs_check_leaf(root, NULL, root->node);
3546 else
3547 status = btrfs_check_node(root, NULL, root->node);
3548 if (status != BTRFS_TREE_BLOCK_CLEAN)
3549 return -EIO;
3551 if (btrfs_root_refs(root_item) > 0 ||
3552 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3553 path.nodes[level] = root->node;
3554 extent_buffer_get(root->node);
3555 path.slots[level] = 0;
3556 } else {
3557 struct btrfs_key key;
3558 struct btrfs_disk_key found_key;
3560 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3561 level = root_item->drop_level;
3562 path.lowest_level = level;
3563 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3564 if (wret < 0)
3565 goto skip_walking;
3566 btrfs_node_key(path.nodes[level], &found_key,
3567 path.slots[level]);
3568 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3569 sizeof(found_key)));
3572 while (1) {
3573 wret = walk_down_tree(root, &path, wc, &level);
3574 if (wret < 0)
3575 ret = wret;
3576 if (wret != 0)
3577 break;
3579 wret = walk_up_tree(root, &path, wc, &level);
3580 if (wret < 0)
3581 ret = wret;
3582 if (wret != 0)
3583 break;
3585 skip_walking:
3586 btrfs_release_path(&path);
3588 if (!cache_tree_empty(&corrupt_blocks)) {
3589 struct cache_extent *cache;
3590 struct btrfs_corrupt_block *corrupt;
3592 printf("The following tree block(s) is corrupted in tree %llu:\n",
3593 root->root_key.objectid);
3594 cache = first_cache_extent(&corrupt_blocks);
3595 while (cache) {
3596 corrupt = container_of(cache,
3597 struct btrfs_corrupt_block,
3598 cache);
3599 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3600 cache->start, corrupt->level,
3601 corrupt->key.objectid, corrupt->key.type,
3602 corrupt->key.offset);
3603 cache = next_cache_extent(cache);
3605 if (repair) {
3606 printf("Try to repair the btree for root %llu\n",
3607 root->root_key.objectid);
3608 ret = repair_btree(root, &corrupt_blocks);
3609 if (ret < 0)
3610 fprintf(stderr, "Failed to repair btree: %s\n",
3611 strerror(-ret));
3612 if (!ret)
3613 printf("Btree for root %llu is fixed\n",
3614 root->root_key.objectid);
3618 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3619 if (err < 0)
3620 ret = err;
3622 if (root_node.current) {
3623 root_node.current->checked = 1;
3624 maybe_free_inode_rec(&root_node.inode_cache,
3625 root_node.current);
3628 err = check_inode_recs(root, &root_node.inode_cache);
3629 if (!ret)
3630 ret = err;
3632 free_corrupt_blocks_tree(&corrupt_blocks);
3633 root->fs_info->corrupt_blocks = NULL;
3634 free_orphan_data_extents(&root->orphan_data_extents);
3635 return ret;
3638 static int fs_root_objectid(u64 objectid)
3640 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3641 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3642 return 1;
3643 return is_fstree(objectid);
3646 static int check_fs_roots(struct btrfs_root *root,
3647 struct cache_tree *root_cache)
3649 struct btrfs_path path;
3650 struct btrfs_key key;
3651 struct walk_control wc;
3652 struct extent_buffer *leaf, *tree_node;
3653 struct btrfs_root *tmp_root;
3654 struct btrfs_root *tree_root = root->fs_info->tree_root;
3655 int ret;
3656 int err = 0;
3658 if (ctx.progress_enabled) {
3659 ctx.tp = TASK_FS_ROOTS;
3660 task_start(ctx.info);
3664 * Just in case we made any changes to the extent tree that weren't
3665 * reflected into the free space cache yet.
3667 if (repair)
3668 reset_cached_block_groups(root->fs_info);
3669 memset(&wc, 0, sizeof(wc));
3670 cache_tree_init(&wc.shared);
3671 btrfs_init_path(&path);
3673 again:
3674 key.offset = 0;
3675 key.objectid = 0;
3676 key.type = BTRFS_ROOT_ITEM_KEY;
3677 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3678 if (ret < 0) {
3679 err = 1;
3680 goto out;
3682 tree_node = tree_root->node;
3683 while (1) {
3684 if (tree_node != tree_root->node) {
3685 free_root_recs_tree(root_cache);
3686 btrfs_release_path(&path);
3687 goto again;
3689 leaf = path.nodes[0];
3690 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3691 ret = btrfs_next_leaf(tree_root, &path);
3692 if (ret) {
3693 if (ret < 0)
3694 err = 1;
3695 break;
3697 leaf = path.nodes[0];
3699 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3700 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3701 fs_root_objectid(key.objectid)) {
3702 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3703 tmp_root = btrfs_read_fs_root_no_cache(
3704 root->fs_info, &key);
3705 } else {
3706 key.offset = (u64)-1;
3707 tmp_root = btrfs_read_fs_root(
3708 root->fs_info, &key);
3710 if (IS_ERR(tmp_root)) {
3711 err = 1;
3712 goto next;
3714 ret = check_fs_root(tmp_root, root_cache, &wc);
3715 if (ret == -EAGAIN) {
3716 free_root_recs_tree(root_cache);
3717 btrfs_release_path(&path);
3718 goto again;
3720 if (ret)
3721 err = 1;
3722 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3723 btrfs_free_fs_root(tmp_root);
3724 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3725 key.type == BTRFS_ROOT_BACKREF_KEY) {
3726 process_root_ref(leaf, path.slots[0], &key,
3727 root_cache);
3729 next:
3730 path.slots[0]++;
3732 out:
3733 btrfs_release_path(&path);
3734 if (err)
3735 free_extent_cache_tree(&wc.shared);
3736 if (!cache_tree_empty(&wc.shared))
3737 fprintf(stderr, "warning line %d\n", __LINE__);
3739 task_stop(ctx.info);
3741 return err;
3744 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3746 struct list_head *cur = rec->backrefs.next;
3747 struct extent_backref *back;
3748 struct tree_backref *tback;
3749 struct data_backref *dback;
3750 u64 found = 0;
3751 int err = 0;
3753 while(cur != &rec->backrefs) {
3754 back = list_entry(cur, struct extent_backref, list);
3755 cur = cur->next;
3756 if (!back->found_extent_tree) {
3757 err = 1;
3758 if (!print_errs)
3759 goto out;
3760 if (back->is_data) {
3761 dback = (struct data_backref *)back;
3762 fprintf(stderr, "Backref %llu %s %llu"
3763 " owner %llu offset %llu num_refs %lu"
3764 " not found in extent tree\n",
3765 (unsigned long long)rec->start,
3766 back->full_backref ?
3767 "parent" : "root",
3768 back->full_backref ?
3769 (unsigned long long)dback->parent:
3770 (unsigned long long)dback->root,
3771 (unsigned long long)dback->owner,
3772 (unsigned long long)dback->offset,
3773 (unsigned long)dback->num_refs);
3774 } else {
3775 tback = (struct tree_backref *)back;
3776 fprintf(stderr, "Backref %llu parent %llu"
3777 " root %llu not found in extent tree\n",
3778 (unsigned long long)rec->start,
3779 (unsigned long long)tback->parent,
3780 (unsigned long long)tback->root);
3783 if (!back->is_data && !back->found_ref) {
3784 err = 1;
3785 if (!print_errs)
3786 goto out;
3787 tback = (struct tree_backref *)back;
3788 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3789 (unsigned long long)rec->start,
3790 back->full_backref ? "parent" : "root",
3791 back->full_backref ?
3792 (unsigned long long)tback->parent :
3793 (unsigned long long)tback->root, back);
3795 if (back->is_data) {
3796 dback = (struct data_backref *)back;
3797 if (dback->found_ref != dback->num_refs) {
3798 err = 1;
3799 if (!print_errs)
3800 goto out;
3801 fprintf(stderr, "Incorrect local backref count"
3802 " on %llu %s %llu owner %llu"
3803 " offset %llu found %u wanted %u back %p\n",
3804 (unsigned long long)rec->start,
3805 back->full_backref ?
3806 "parent" : "root",
3807 back->full_backref ?
3808 (unsigned long long)dback->parent:
3809 (unsigned long long)dback->root,
3810 (unsigned long long)dback->owner,
3811 (unsigned long long)dback->offset,
3812 dback->found_ref, dback->num_refs, back);
3814 if (dback->disk_bytenr != rec->start) {
3815 err = 1;
3816 if (!print_errs)
3817 goto out;
3818 fprintf(stderr, "Backref disk bytenr does not"
3819 " match extent record, bytenr=%llu, "
3820 "ref bytenr=%llu\n",
3821 (unsigned long long)rec->start,
3822 (unsigned long long)dback->disk_bytenr);
3825 if (dback->bytes != rec->nr) {
3826 err = 1;
3827 if (!print_errs)
3828 goto out;
3829 fprintf(stderr, "Backref bytes do not match "
3830 "extent backref, bytenr=%llu, ref "
3831 "bytes=%llu, backref bytes=%llu\n",
3832 (unsigned long long)rec->start,
3833 (unsigned long long)rec->nr,
3834 (unsigned long long)dback->bytes);
3837 if (!back->is_data) {
3838 found += 1;
3839 } else {
3840 dback = (struct data_backref *)back;
3841 found += dback->found_ref;
3844 if (found != rec->refs) {
3845 err = 1;
3846 if (!print_errs)
3847 goto out;
3848 fprintf(stderr, "Incorrect global backref count "
3849 "on %llu found %llu wanted %llu\n",
3850 (unsigned long long)rec->start,
3851 (unsigned long long)found,
3852 (unsigned long long)rec->refs);
3854 out:
3855 return err;
3858 static int free_all_extent_backrefs(struct extent_record *rec)
3860 struct extent_backref *back;
3861 struct list_head *cur;
3862 while (!list_empty(&rec->backrefs)) {
3863 cur = rec->backrefs.next;
3864 back = list_entry(cur, struct extent_backref, list);
3865 list_del(cur);
3866 free(back);
3868 return 0;
3871 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3872 struct cache_tree *extent_cache)
3874 struct cache_extent *cache;
3875 struct extent_record *rec;
3877 while (1) {
3878 cache = first_cache_extent(extent_cache);
3879 if (!cache)
3880 break;
3881 rec = container_of(cache, struct extent_record, cache);
3882 remove_cache_extent(extent_cache, cache);
3883 free_all_extent_backrefs(rec);
3884 free(rec);
3888 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3889 struct extent_record *rec)
3891 if (rec->content_checked && rec->owner_ref_checked &&
3892 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3893 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3894 !rec->bad_full_backref && !rec->crossing_stripes &&
3895 !rec->wrong_chunk_type) {
3896 remove_cache_extent(extent_cache, &rec->cache);
3897 free_all_extent_backrefs(rec);
3898 list_del_init(&rec->list);
3899 free(rec);
3901 return 0;
3904 static int check_owner_ref(struct btrfs_root *root,
3905 struct extent_record *rec,
3906 struct extent_buffer *buf)
3908 struct extent_backref *node;
3909 struct tree_backref *back;
3910 struct btrfs_root *ref_root;
3911 struct btrfs_key key;
3912 struct btrfs_path path;
3913 struct extent_buffer *parent;
3914 int level;
3915 int found = 0;
3916 int ret;
3918 list_for_each_entry(node, &rec->backrefs, list) {
3919 if (node->is_data)
3920 continue;
3921 if (!node->found_ref)
3922 continue;
3923 if (node->full_backref)
3924 continue;
3925 back = (struct tree_backref *)node;
3926 if (btrfs_header_owner(buf) == back->root)
3927 return 0;
3929 BUG_ON(rec->is_root);
3931 /* try to find the block by search corresponding fs tree */
3932 key.objectid = btrfs_header_owner(buf);
3933 key.type = BTRFS_ROOT_ITEM_KEY;
3934 key.offset = (u64)-1;
3936 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3937 if (IS_ERR(ref_root))
3938 return 1;
3940 level = btrfs_header_level(buf);
3941 if (level == 0)
3942 btrfs_item_key_to_cpu(buf, &key, 0);
3943 else
3944 btrfs_node_key_to_cpu(buf, &key, 0);
3946 btrfs_init_path(&path);
3947 path.lowest_level = level + 1;
3948 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3949 if (ret < 0)
3950 return 0;
3952 parent = path.nodes[level + 1];
3953 if (parent && buf->start == btrfs_node_blockptr(parent,
3954 path.slots[level + 1]))
3955 found = 1;
3957 btrfs_release_path(&path);
3958 return found ? 0 : 1;
3961 static int is_extent_tree_record(struct extent_record *rec)
3963 struct list_head *cur = rec->backrefs.next;
3964 struct extent_backref *node;
3965 struct tree_backref *back;
3966 int is_extent = 0;
3968 while(cur != &rec->backrefs) {
3969 node = list_entry(cur, struct extent_backref, list);
3970 cur = cur->next;
3971 if (node->is_data)
3972 return 0;
3973 back = (struct tree_backref *)node;
3974 if (node->full_backref)
3975 return 0;
3976 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3977 is_extent = 1;
3979 return is_extent;
3983 static int record_bad_block_io(struct btrfs_fs_info *info,
3984 struct cache_tree *extent_cache,
3985 u64 start, u64 len)
3987 struct extent_record *rec;
3988 struct cache_extent *cache;
3989 struct btrfs_key key;
3991 cache = lookup_cache_extent(extent_cache, start, len);
3992 if (!cache)
3993 return 0;
3995 rec = container_of(cache, struct extent_record, cache);
3996 if (!is_extent_tree_record(rec))
3997 return 0;
3999 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4000 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4003 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4004 struct extent_buffer *buf, int slot)
4006 if (btrfs_header_level(buf)) {
4007 struct btrfs_key_ptr ptr1, ptr2;
4009 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4010 sizeof(struct btrfs_key_ptr));
4011 read_extent_buffer(buf, &ptr2,
4012 btrfs_node_key_ptr_offset(slot + 1),
4013 sizeof(struct btrfs_key_ptr));
4014 write_extent_buffer(buf, &ptr1,
4015 btrfs_node_key_ptr_offset(slot + 1),
4016 sizeof(struct btrfs_key_ptr));
4017 write_extent_buffer(buf, &ptr2,
4018 btrfs_node_key_ptr_offset(slot),
4019 sizeof(struct btrfs_key_ptr));
4020 if (slot == 0) {
4021 struct btrfs_disk_key key;
4022 btrfs_node_key(buf, &key, 0);
4023 btrfs_fixup_low_keys(root, path, &key,
4024 btrfs_header_level(buf) + 1);
4026 } else {
4027 struct btrfs_item *item1, *item2;
4028 struct btrfs_key k1, k2;
4029 char *item1_data, *item2_data;
4030 u32 item1_offset, item2_offset, item1_size, item2_size;
4032 item1 = btrfs_item_nr(slot);
4033 item2 = btrfs_item_nr(slot + 1);
4034 btrfs_item_key_to_cpu(buf, &k1, slot);
4035 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4036 item1_offset = btrfs_item_offset(buf, item1);
4037 item2_offset = btrfs_item_offset(buf, item2);
4038 item1_size = btrfs_item_size(buf, item1);
4039 item2_size = btrfs_item_size(buf, item2);
4041 item1_data = malloc(item1_size);
4042 if (!item1_data)
4043 return -ENOMEM;
4044 item2_data = malloc(item2_size);
4045 if (!item2_data) {
4046 free(item1_data);
4047 return -ENOMEM;
4050 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4051 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4053 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4054 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4055 free(item1_data);
4056 free(item2_data);
4058 btrfs_set_item_offset(buf, item1, item2_offset);
4059 btrfs_set_item_offset(buf, item2, item1_offset);
4060 btrfs_set_item_size(buf, item1, item2_size);
4061 btrfs_set_item_size(buf, item2, item1_size);
4063 path->slots[0] = slot;
4064 btrfs_set_item_key_unsafe(root, path, &k2);
4065 path->slots[0] = slot + 1;
4066 btrfs_set_item_key_unsafe(root, path, &k1);
4068 return 0;
4071 static int fix_key_order(struct btrfs_trans_handle *trans,
4072 struct btrfs_root *root,
4073 struct btrfs_path *path)
4075 struct extent_buffer *buf;
4076 struct btrfs_key k1, k2;
4077 int i;
4078 int level = path->lowest_level;
4079 int ret = -EIO;
4081 buf = path->nodes[level];
4082 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4083 if (level) {
4084 btrfs_node_key_to_cpu(buf, &k1, i);
4085 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4086 } else {
4087 btrfs_item_key_to_cpu(buf, &k1, i);
4088 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4090 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4091 continue;
4092 ret = swap_values(root, path, buf, i);
4093 if (ret)
4094 break;
4095 btrfs_mark_buffer_dirty(buf);
4096 i = 0;
4098 return ret;
4101 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4102 struct btrfs_root *root,
4103 struct btrfs_path *path,
4104 struct extent_buffer *buf, int slot)
4106 struct btrfs_key key;
4107 int nritems = btrfs_header_nritems(buf);
4109 btrfs_item_key_to_cpu(buf, &key, slot);
4111 /* These are all the keys we can deal with missing. */
4112 if (key.type != BTRFS_DIR_INDEX_KEY &&
4113 key.type != BTRFS_EXTENT_ITEM_KEY &&
4114 key.type != BTRFS_METADATA_ITEM_KEY &&
4115 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4116 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4117 return -1;
4119 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4120 (unsigned long long)key.objectid, key.type,
4121 (unsigned long long)key.offset, slot, buf->start);
4122 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4123 btrfs_item_nr_offset(slot + 1),
4124 sizeof(struct btrfs_item) *
4125 (nritems - slot - 1));
4126 btrfs_set_header_nritems(buf, nritems - 1);
4127 if (slot == 0) {
4128 struct btrfs_disk_key disk_key;
4130 btrfs_item_key(buf, &disk_key, 0);
4131 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4133 btrfs_mark_buffer_dirty(buf);
4134 return 0;
4137 static int fix_item_offset(struct btrfs_trans_handle *trans,
4138 struct btrfs_root *root,
4139 struct btrfs_path *path)
4141 struct extent_buffer *buf;
4142 int i;
4143 int ret = 0;
4145 /* We should only get this for leaves */
4146 BUG_ON(path->lowest_level);
4147 buf = path->nodes[0];
4148 again:
4149 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4150 unsigned int shift = 0, offset;
4152 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4153 BTRFS_LEAF_DATA_SIZE(root)) {
4154 if (btrfs_item_end_nr(buf, i) >
4155 BTRFS_LEAF_DATA_SIZE(root)) {
4156 ret = delete_bogus_item(trans, root, path,
4157 buf, i);
4158 if (!ret)
4159 goto again;
4160 fprintf(stderr, "item is off the end of the "
4161 "leaf, can't fix\n");
4162 ret = -EIO;
4163 break;
4165 shift = BTRFS_LEAF_DATA_SIZE(root) -
4166 btrfs_item_end_nr(buf, i);
4167 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4168 btrfs_item_offset_nr(buf, i - 1)) {
4169 if (btrfs_item_end_nr(buf, i) >
4170 btrfs_item_offset_nr(buf, i - 1)) {
4171 ret = delete_bogus_item(trans, root, path,
4172 buf, i);
4173 if (!ret)
4174 goto again;
4175 fprintf(stderr, "items overlap, can't fix\n");
4176 ret = -EIO;
4177 break;
4179 shift = btrfs_item_offset_nr(buf, i - 1) -
4180 btrfs_item_end_nr(buf, i);
4182 if (!shift)
4183 continue;
4185 printf("Shifting item nr %d by %u bytes in block %llu\n",
4186 i, shift, (unsigned long long)buf->start);
4187 offset = btrfs_item_offset_nr(buf, i);
4188 memmove_extent_buffer(buf,
4189 btrfs_leaf_data(buf) + offset + shift,
4190 btrfs_leaf_data(buf) + offset,
4191 btrfs_item_size_nr(buf, i));
4192 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4193 offset + shift);
4194 btrfs_mark_buffer_dirty(buf);
4198 * We may have moved things, in which case we want to exit so we don't
4199 * write those changes out. Once we have proper abort functionality in
4200 * progs this can be changed to something nicer.
4202 BUG_ON(ret);
4203 return ret;
4207 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4208 * then just return -EIO.
4210 static int try_to_fix_bad_block(struct btrfs_root *root,
4211 struct extent_buffer *buf,
4212 enum btrfs_tree_block_status status)
4214 struct btrfs_trans_handle *trans;
4215 struct ulist *roots;
4216 struct ulist_node *node;
4217 struct btrfs_root *search_root;
4218 struct btrfs_path *path;
4219 struct ulist_iterator iter;
4220 struct btrfs_key root_key, key;
4221 int ret;
4223 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4224 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4225 return -EIO;
4227 path = btrfs_alloc_path();
4228 if (!path)
4229 return -EIO;
4231 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4232 0, &roots);
4233 if (ret) {
4234 btrfs_free_path(path);
4235 return -EIO;
4238 ULIST_ITER_INIT(&iter);
4239 while ((node = ulist_next(roots, &iter))) {
4240 root_key.objectid = node->val;
4241 root_key.type = BTRFS_ROOT_ITEM_KEY;
4242 root_key.offset = (u64)-1;
4244 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4245 if (IS_ERR(root)) {
4246 ret = -EIO;
4247 break;
4251 trans = btrfs_start_transaction(search_root, 0);
4252 if (IS_ERR(trans)) {
4253 ret = PTR_ERR(trans);
4254 break;
4257 path->lowest_level = btrfs_header_level(buf);
4258 path->skip_check_block = 1;
4259 if (path->lowest_level)
4260 btrfs_node_key_to_cpu(buf, &key, 0);
4261 else
4262 btrfs_item_key_to_cpu(buf, &key, 0);
4263 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4264 if (ret) {
4265 ret = -EIO;
4266 btrfs_commit_transaction(trans, search_root);
4267 break;
4269 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4270 ret = fix_key_order(trans, search_root, path);
4271 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4272 ret = fix_item_offset(trans, search_root, path);
4273 if (ret) {
4274 btrfs_commit_transaction(trans, search_root);
4275 break;
4277 btrfs_release_path(path);
4278 btrfs_commit_transaction(trans, search_root);
4280 ulist_free(roots);
4281 btrfs_free_path(path);
4282 return ret;
4285 static int check_block(struct btrfs_root *root,
4286 struct cache_tree *extent_cache,
4287 struct extent_buffer *buf, u64 flags)
4289 struct extent_record *rec;
4290 struct cache_extent *cache;
4291 struct btrfs_key key;
4292 enum btrfs_tree_block_status status;
4293 int ret = 0;
4294 int level;
4296 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4297 if (!cache)
4298 return 1;
4299 rec = container_of(cache, struct extent_record, cache);
4300 rec->generation = btrfs_header_generation(buf);
4302 level = btrfs_header_level(buf);
4303 if (btrfs_header_nritems(buf) > 0) {
4305 if (level == 0)
4306 btrfs_item_key_to_cpu(buf, &key, 0);
4307 else
4308 btrfs_node_key_to_cpu(buf, &key, 0);
4310 rec->info_objectid = key.objectid;
4312 rec->info_level = level;
4314 if (btrfs_is_leaf(buf))
4315 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4316 else
4317 status = btrfs_check_node(root, &rec->parent_key, buf);
4319 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4320 if (repair)
4321 status = try_to_fix_bad_block(root, buf, status);
4322 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4323 ret = -EIO;
4324 fprintf(stderr, "bad block %llu\n",
4325 (unsigned long long)buf->start);
4326 } else {
4328 * Signal to callers we need to start the scan over
4329 * again since we'll have cowed blocks.
4331 ret = -EAGAIN;
4333 } else {
4334 rec->content_checked = 1;
4335 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4336 rec->owner_ref_checked = 1;
4337 else {
4338 ret = check_owner_ref(root, rec, buf);
4339 if (!ret)
4340 rec->owner_ref_checked = 1;
4343 if (!ret)
4344 maybe_free_extent_rec(extent_cache, rec);
4345 return ret;
4348 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4349 u64 parent, u64 root)
4351 struct list_head *cur = rec->backrefs.next;
4352 struct extent_backref *node;
4353 struct tree_backref *back;
4355 while(cur != &rec->backrefs) {
4356 node = list_entry(cur, struct extent_backref, list);
4357 cur = cur->next;
4358 if (node->is_data)
4359 continue;
4360 back = (struct tree_backref *)node;
4361 if (parent > 0) {
4362 if (!node->full_backref)
4363 continue;
4364 if (parent == back->parent)
4365 return back;
4366 } else {
4367 if (node->full_backref)
4368 continue;
4369 if (back->root == root)
4370 return back;
4373 return NULL;
4376 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4377 u64 parent, u64 root)
4379 struct tree_backref *ref = malloc(sizeof(*ref));
4381 if (!ref)
4382 return NULL;
4383 memset(&ref->node, 0, sizeof(ref->node));
4384 if (parent > 0) {
4385 ref->parent = parent;
4386 ref->node.full_backref = 1;
4387 } else {
4388 ref->root = root;
4389 ref->node.full_backref = 0;
4391 list_add_tail(&ref->node.list, &rec->backrefs);
4393 return ref;
4396 static struct data_backref *find_data_backref(struct extent_record *rec,
4397 u64 parent, u64 root,
4398 u64 owner, u64 offset,
4399 int found_ref,
4400 u64 disk_bytenr, u64 bytes)
4402 struct list_head *cur = rec->backrefs.next;
4403 struct extent_backref *node;
4404 struct data_backref *back;
4406 while(cur != &rec->backrefs) {
4407 node = list_entry(cur, struct extent_backref, list);
4408 cur = cur->next;
4409 if (!node->is_data)
4410 continue;
4411 back = (struct data_backref *)node;
4412 if (parent > 0) {
4413 if (!node->full_backref)
4414 continue;
4415 if (parent == back->parent)
4416 return back;
4417 } else {
4418 if (node->full_backref)
4419 continue;
4420 if (back->root == root && back->owner == owner &&
4421 back->offset == offset) {
4422 if (found_ref && node->found_ref &&
4423 (back->bytes != bytes ||
4424 back->disk_bytenr != disk_bytenr))
4425 continue;
4426 return back;
4430 return NULL;
4433 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4434 u64 parent, u64 root,
4435 u64 owner, u64 offset,
4436 u64 max_size)
4438 struct data_backref *ref = malloc(sizeof(*ref));
4440 if (!ref)
4441 return NULL;
4442 memset(&ref->node, 0, sizeof(ref->node));
4443 ref->node.is_data = 1;
4445 if (parent > 0) {
4446 ref->parent = parent;
4447 ref->owner = 0;
4448 ref->offset = 0;
4449 ref->node.full_backref = 1;
4450 } else {
4451 ref->root = root;
4452 ref->owner = owner;
4453 ref->offset = offset;
4454 ref->node.full_backref = 0;
4456 ref->bytes = max_size;
4457 ref->found_ref = 0;
4458 ref->num_refs = 0;
4459 list_add_tail(&ref->node.list, &rec->backrefs);
4460 if (max_size > rec->max_size)
4461 rec->max_size = max_size;
4462 return ref;
4465 /* Check if the type of extent matches with its chunk */
4466 static void check_extent_type(struct extent_record *rec)
4468 struct btrfs_block_group_cache *bg_cache;
4470 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4471 if (!bg_cache)
4472 return;
4474 /* data extent, check chunk directly*/
4475 if (!rec->metadata) {
4476 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4477 rec->wrong_chunk_type = 1;
4478 return;
4481 /* metadata extent, check the obvious case first */
4482 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4483 BTRFS_BLOCK_GROUP_METADATA))) {
4484 rec->wrong_chunk_type = 1;
4485 return;
4489 * Check SYSTEM extent, as it's also marked as metadata, we can only
4490 * make sure it's a SYSTEM extent by its backref
4492 if (!list_empty(&rec->backrefs)) {
4493 struct extent_backref *node;
4494 struct tree_backref *tback;
4495 u64 bg_type;
4497 node = list_entry(rec->backrefs.next, struct extent_backref,
4498 list);
4499 if (node->is_data) {
4500 /* tree block shouldn't have data backref */
4501 rec->wrong_chunk_type = 1;
4502 return;
4504 tback = container_of(node, struct tree_backref, node);
4506 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4507 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4508 else
4509 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4510 if (!(bg_cache->flags & bg_type))
4511 rec->wrong_chunk_type = 1;
4516 * Allocate a new extent record, fill default values from @tmpl and insert int
4517 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4518 * the cache, otherwise it fails.
4520 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4521 struct extent_record *tmpl)
4523 struct extent_record *rec;
4524 int ret = 0;
4526 rec = malloc(sizeof(*rec));
4527 if (!rec)
4528 return -ENOMEM;
4529 rec->start = tmpl->start;
4530 rec->max_size = tmpl->max_size;
4531 rec->nr = max(tmpl->nr, tmpl->max_size);
4532 rec->found_rec = tmpl->found_rec;
4533 rec->content_checked = tmpl->content_checked;
4534 rec->owner_ref_checked = tmpl->owner_ref_checked;
4535 rec->num_duplicates = 0;
4536 rec->metadata = tmpl->metadata;
4537 rec->flag_block_full_backref = FLAG_UNSET;
4538 rec->bad_full_backref = 0;
4539 rec->crossing_stripes = 0;
4540 rec->wrong_chunk_type = 0;
4541 rec->is_root = tmpl->is_root;
4542 rec->refs = tmpl->refs;
4543 rec->extent_item_refs = tmpl->extent_item_refs;
4544 rec->parent_generation = tmpl->parent_generation;
4545 INIT_LIST_HEAD(&rec->backrefs);
4546 INIT_LIST_HEAD(&rec->dups);
4547 INIT_LIST_HEAD(&rec->list);
4548 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4549 rec->cache.start = tmpl->start;
4550 rec->cache.size = tmpl->nr;
4551 ret = insert_cache_extent(extent_cache, &rec->cache);
4552 BUG_ON(ret);
4553 bytes_used += rec->nr;
4555 if (tmpl->metadata)
4556 rec->crossing_stripes = check_crossing_stripes(rec->start,
4557 global_info->tree_root->nodesize);
4558 check_extent_type(rec);
4559 return ret;
4563 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4564 * some are hints:
4565 * - refs - if found, increase refs
4566 * - is_root - if found, set
4567 * - content_checked - if found, set
4568 * - owner_ref_checked - if found, set
4570 * If not found, create a new one, initialize and insert.
4572 static int add_extent_rec(struct cache_tree *extent_cache,
4573 struct extent_record *tmpl)
4575 struct extent_record *rec;
4576 struct cache_extent *cache;
4577 int ret = 0;
4578 int dup = 0;
4580 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4581 if (cache) {
4582 rec = container_of(cache, struct extent_record, cache);
4583 if (tmpl->refs)
4584 rec->refs++;
4585 if (rec->nr == 1)
4586 rec->nr = max(tmpl->nr, tmpl->max_size);
4589 * We need to make sure to reset nr to whatever the extent
4590 * record says was the real size, this way we can compare it to
4591 * the backrefs.
4593 if (tmpl->found_rec) {
4594 if (tmpl->start != rec->start || rec->found_rec) {
4595 struct extent_record *tmp;
4597 dup = 1;
4598 if (list_empty(&rec->list))
4599 list_add_tail(&rec->list,
4600 &duplicate_extents);
4603 * We have to do this song and dance in case we
4604 * find an extent record that falls inside of
4605 * our current extent record but does not have
4606 * the same objectid.
4608 tmp = malloc(sizeof(*tmp));
4609 if (!tmp)
4610 return -ENOMEM;
4611 tmp->start = tmpl->start;
4612 tmp->max_size = tmpl->max_size;
4613 tmp->nr = tmpl->nr;
4614 tmp->found_rec = 1;
4615 tmp->metadata = tmpl->metadata;
4616 tmp->extent_item_refs = tmpl->extent_item_refs;
4617 INIT_LIST_HEAD(&tmp->list);
4618 list_add_tail(&tmp->list, &rec->dups);
4619 rec->num_duplicates++;
4620 } else {
4621 rec->nr = tmpl->nr;
4622 rec->found_rec = 1;
4626 if (tmpl->extent_item_refs && !dup) {
4627 if (rec->extent_item_refs) {
4628 fprintf(stderr, "block %llu rec "
4629 "extent_item_refs %llu, passed %llu\n",
4630 (unsigned long long)tmpl->start,
4631 (unsigned long long)
4632 rec->extent_item_refs,
4633 (unsigned long long)tmpl->extent_item_refs);
4635 rec->extent_item_refs = tmpl->extent_item_refs;
4637 if (tmpl->is_root)
4638 rec->is_root = 1;
4639 if (tmpl->content_checked)
4640 rec->content_checked = 1;
4641 if (tmpl->owner_ref_checked)
4642 rec->owner_ref_checked = 1;
4643 memcpy(&rec->parent_key, &tmpl->parent_key,
4644 sizeof(tmpl->parent_key));
4645 if (tmpl->parent_generation)
4646 rec->parent_generation = tmpl->parent_generation;
4647 if (rec->max_size < tmpl->max_size)
4648 rec->max_size = tmpl->max_size;
4651 * A metadata extent can't cross stripe_len boundary, otherwise
4652 * kernel scrub won't be able to handle it.
4653 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4654 * it.
4656 if (tmpl->metadata)
4657 rec->crossing_stripes = check_crossing_stripes(
4658 rec->start, global_info->tree_root->nodesize);
4659 check_extent_type(rec);
4660 maybe_free_extent_rec(extent_cache, rec);
4661 return ret;
4664 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4666 return ret;
4669 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4670 u64 parent, u64 root, int found_ref)
4672 struct extent_record *rec;
4673 struct tree_backref *back;
4674 struct cache_extent *cache;
4676 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4677 if (!cache) {
4678 struct extent_record tmpl;
4680 memset(&tmpl, 0, sizeof(tmpl));
4681 tmpl.start = bytenr;
4682 tmpl.nr = 1;
4683 tmpl.metadata = 1;
4685 add_extent_rec_nolookup(extent_cache, &tmpl);
4687 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4688 if (!cache)
4689 abort();
4692 rec = container_of(cache, struct extent_record, cache);
4693 if (rec->start != bytenr) {
4694 abort();
4697 back = find_tree_backref(rec, parent, root);
4698 if (!back) {
4699 back = alloc_tree_backref(rec, parent, root);
4700 BUG_ON(!back);
4703 if (found_ref) {
4704 if (back->node.found_ref) {
4705 fprintf(stderr, "Extent back ref already exists "
4706 "for %llu parent %llu root %llu \n",
4707 (unsigned long long)bytenr,
4708 (unsigned long long)parent,
4709 (unsigned long long)root);
4711 back->node.found_ref = 1;
4712 } else {
4713 if (back->node.found_extent_tree) {
4714 fprintf(stderr, "Extent back ref already exists "
4715 "for %llu parent %llu root %llu \n",
4716 (unsigned long long)bytenr,
4717 (unsigned long long)parent,
4718 (unsigned long long)root);
4720 back->node.found_extent_tree = 1;
4722 check_extent_type(rec);
4723 maybe_free_extent_rec(extent_cache, rec);
4724 return 0;
4727 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4728 u64 parent, u64 root, u64 owner, u64 offset,
4729 u32 num_refs, int found_ref, u64 max_size)
4731 struct extent_record *rec;
4732 struct data_backref *back;
4733 struct cache_extent *cache;
4735 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4736 if (!cache) {
4737 struct extent_record tmpl;
4739 memset(&tmpl, 0, sizeof(tmpl));
4740 tmpl.start = bytenr;
4741 tmpl.nr = 1;
4742 tmpl.max_size = max_size;
4744 add_extent_rec_nolookup(extent_cache, &tmpl);
4746 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4747 if (!cache)
4748 abort();
4751 rec = container_of(cache, struct extent_record, cache);
4752 if (rec->max_size < max_size)
4753 rec->max_size = max_size;
4756 * If found_ref is set then max_size is the real size and must match the
4757 * existing refs. So if we have already found a ref then we need to
4758 * make sure that this ref matches the existing one, otherwise we need
4759 * to add a new backref so we can notice that the backrefs don't match
4760 * and we need to figure out who is telling the truth. This is to
4761 * account for that awful fsync bug I introduced where we'd end up with
4762 * a btrfs_file_extent_item that would have its length include multiple
4763 * prealloc extents or point inside of a prealloc extent.
4765 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4766 bytenr, max_size);
4767 if (!back) {
4768 back = alloc_data_backref(rec, parent, root, owner, offset,
4769 max_size);
4770 BUG_ON(!back);
4773 if (found_ref) {
4774 BUG_ON(num_refs != 1);
4775 if (back->node.found_ref)
4776 BUG_ON(back->bytes != max_size);
4777 back->node.found_ref = 1;
4778 back->found_ref += 1;
4779 back->bytes = max_size;
4780 back->disk_bytenr = bytenr;
4781 rec->refs += 1;
4782 rec->content_checked = 1;
4783 rec->owner_ref_checked = 1;
4784 } else {
4785 if (back->node.found_extent_tree) {
4786 fprintf(stderr, "Extent back ref already exists "
4787 "for %llu parent %llu root %llu "
4788 "owner %llu offset %llu num_refs %lu\n",
4789 (unsigned long long)bytenr,
4790 (unsigned long long)parent,
4791 (unsigned long long)root,
4792 (unsigned long long)owner,
4793 (unsigned long long)offset,
4794 (unsigned long)num_refs);
4796 back->num_refs = num_refs;
4797 back->node.found_extent_tree = 1;
4799 maybe_free_extent_rec(extent_cache, rec);
4800 return 0;
4803 static int add_pending(struct cache_tree *pending,
4804 struct cache_tree *seen, u64 bytenr, u32 size)
4806 int ret;
4807 ret = add_cache_extent(seen, bytenr, size);
4808 if (ret)
4809 return ret;
4810 add_cache_extent(pending, bytenr, size);
4811 return 0;
4814 static int pick_next_pending(struct cache_tree *pending,
4815 struct cache_tree *reada,
4816 struct cache_tree *nodes,
4817 u64 last, struct block_info *bits, int bits_nr,
4818 int *reada_bits)
4820 unsigned long node_start = last;
4821 struct cache_extent *cache;
4822 int ret;
4824 cache = search_cache_extent(reada, 0);
4825 if (cache) {
4826 bits[0].start = cache->start;
4827 bits[0].size = cache->size;
4828 *reada_bits = 1;
4829 return 1;
4831 *reada_bits = 0;
4832 if (node_start > 32768)
4833 node_start -= 32768;
4835 cache = search_cache_extent(nodes, node_start);
4836 if (!cache)
4837 cache = search_cache_extent(nodes, 0);
4839 if (!cache) {
4840 cache = search_cache_extent(pending, 0);
4841 if (!cache)
4842 return 0;
4843 ret = 0;
4844 do {
4845 bits[ret].start = cache->start;
4846 bits[ret].size = cache->size;
4847 cache = next_cache_extent(cache);
4848 ret++;
4849 } while (cache && ret < bits_nr);
4850 return ret;
4853 ret = 0;
4854 do {
4855 bits[ret].start = cache->start;
4856 bits[ret].size = cache->size;
4857 cache = next_cache_extent(cache);
4858 ret++;
4859 } while (cache && ret < bits_nr);
4861 if (bits_nr - ret > 8) {
4862 u64 lookup = bits[0].start + bits[0].size;
4863 struct cache_extent *next;
4864 next = search_cache_extent(pending, lookup);
4865 while(next) {
4866 if (next->start - lookup > 32768)
4867 break;
4868 bits[ret].start = next->start;
4869 bits[ret].size = next->size;
4870 lookup = next->start + next->size;
4871 ret++;
4872 if (ret == bits_nr)
4873 break;
4874 next = next_cache_extent(next);
4875 if (!next)
4876 break;
4879 return ret;
4882 static void free_chunk_record(struct cache_extent *cache)
4884 struct chunk_record *rec;
4886 rec = container_of(cache, struct chunk_record, cache);
4887 list_del_init(&rec->list);
4888 list_del_init(&rec->dextents);
4889 free(rec);
4892 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4894 cache_tree_free_extents(chunk_cache, free_chunk_record);
4897 static void free_device_record(struct rb_node *node)
4899 struct device_record *rec;
4901 rec = container_of(node, struct device_record, node);
4902 free(rec);
4905 FREE_RB_BASED_TREE(device_cache, free_device_record);
4907 int insert_block_group_record(struct block_group_tree *tree,
4908 struct block_group_record *bg_rec)
4910 int ret;
4912 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4913 if (ret)
4914 return ret;
4916 list_add_tail(&bg_rec->list, &tree->block_groups);
4917 return 0;
4920 static void free_block_group_record(struct cache_extent *cache)
4922 struct block_group_record *rec;
4924 rec = container_of(cache, struct block_group_record, cache);
4925 list_del_init(&rec->list);
4926 free(rec);
4929 void free_block_group_tree(struct block_group_tree *tree)
4931 cache_tree_free_extents(&tree->tree, free_block_group_record);
4934 int insert_device_extent_record(struct device_extent_tree *tree,
4935 struct device_extent_record *de_rec)
4937 int ret;
4940 * Device extent is a bit different from the other extents, because
4941 * the extents which belong to the different devices may have the
4942 * same start and size, so we need use the special extent cache
4943 * search/insert functions.
4945 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4946 if (ret)
4947 return ret;
4949 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4950 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4951 return 0;
4954 static void free_device_extent_record(struct cache_extent *cache)
4956 struct device_extent_record *rec;
4958 rec = container_of(cache, struct device_extent_record, cache);
4959 if (!list_empty(&rec->chunk_list))
4960 list_del_init(&rec->chunk_list);
4961 if (!list_empty(&rec->device_list))
4962 list_del_init(&rec->device_list);
4963 free(rec);
4966 void free_device_extent_tree(struct device_extent_tree *tree)
4968 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4971 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4972 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4973 struct extent_buffer *leaf, int slot)
4975 struct btrfs_extent_ref_v0 *ref0;
4976 struct btrfs_key key;
4978 btrfs_item_key_to_cpu(leaf, &key, slot);
4979 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4980 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4981 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4982 } else {
4983 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4984 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4986 return 0;
4988 #endif
4990 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4991 struct btrfs_key *key,
4992 int slot)
4994 struct btrfs_chunk *ptr;
4995 struct chunk_record *rec;
4996 int num_stripes, i;
4998 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4999 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5001 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5002 if (!rec) {
5003 fprintf(stderr, "memory allocation failed\n");
5004 exit(-1);
5007 INIT_LIST_HEAD(&rec->list);
5008 INIT_LIST_HEAD(&rec->dextents);
5009 rec->bg_rec = NULL;
5011 rec->cache.start = key->offset;
5012 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5014 rec->generation = btrfs_header_generation(leaf);
5016 rec->objectid = key->objectid;
5017 rec->type = key->type;
5018 rec->offset = key->offset;
5020 rec->length = rec->cache.size;
5021 rec->owner = btrfs_chunk_owner(leaf, ptr);
5022 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5023 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5024 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5025 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5026 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5027 rec->num_stripes = num_stripes;
5028 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5030 for (i = 0; i < rec->num_stripes; ++i) {
5031 rec->stripes[i].devid =
5032 btrfs_stripe_devid_nr(leaf, ptr, i);
5033 rec->stripes[i].offset =
5034 btrfs_stripe_offset_nr(leaf, ptr, i);
5035 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5036 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5037 BTRFS_UUID_SIZE);
5040 return rec;
5043 static int process_chunk_item(struct cache_tree *chunk_cache,
5044 struct btrfs_key *key, struct extent_buffer *eb,
5045 int slot)
5047 struct chunk_record *rec;
5048 int ret = 0;
5050 rec = btrfs_new_chunk_record(eb, key, slot);
5051 ret = insert_cache_extent(chunk_cache, &rec->cache);
5052 if (ret) {
5053 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5054 rec->offset, rec->length);
5055 free(rec);
5058 return ret;
5061 static int process_device_item(struct rb_root *dev_cache,
5062 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5064 struct btrfs_dev_item *ptr;
5065 struct device_record *rec;
5066 int ret = 0;
5068 ptr = btrfs_item_ptr(eb,
5069 slot, struct btrfs_dev_item);
5071 rec = malloc(sizeof(*rec));
5072 if (!rec) {
5073 fprintf(stderr, "memory allocation failed\n");
5074 return -ENOMEM;
5077 rec->devid = key->offset;
5078 rec->generation = btrfs_header_generation(eb);
5080 rec->objectid = key->objectid;
5081 rec->type = key->type;
5082 rec->offset = key->offset;
5084 rec->devid = btrfs_device_id(eb, ptr);
5085 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5086 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5088 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5089 if (ret) {
5090 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5091 free(rec);
5094 return ret;
5097 struct block_group_record *
5098 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5099 int slot)
5101 struct btrfs_block_group_item *ptr;
5102 struct block_group_record *rec;
5104 rec = calloc(1, sizeof(*rec));
5105 if (!rec) {
5106 fprintf(stderr, "memory allocation failed\n");
5107 exit(-1);
5110 rec->cache.start = key->objectid;
5111 rec->cache.size = key->offset;
5113 rec->generation = btrfs_header_generation(leaf);
5115 rec->objectid = key->objectid;
5116 rec->type = key->type;
5117 rec->offset = key->offset;
5119 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5120 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5122 INIT_LIST_HEAD(&rec->list);
5124 return rec;
5127 static int process_block_group_item(struct block_group_tree *block_group_cache,
5128 struct btrfs_key *key,
5129 struct extent_buffer *eb, int slot)
5131 struct block_group_record *rec;
5132 int ret = 0;
5134 rec = btrfs_new_block_group_record(eb, key, slot);
5135 ret = insert_block_group_record(block_group_cache, rec);
5136 if (ret) {
5137 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5138 rec->objectid, rec->offset);
5139 free(rec);
5142 return ret;
5145 struct device_extent_record *
5146 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5147 struct btrfs_key *key, int slot)
5149 struct device_extent_record *rec;
5150 struct btrfs_dev_extent *ptr;
5152 rec = calloc(1, sizeof(*rec));
5153 if (!rec) {
5154 fprintf(stderr, "memory allocation failed\n");
5155 exit(-1);
5158 rec->cache.objectid = key->objectid;
5159 rec->cache.start = key->offset;
5161 rec->generation = btrfs_header_generation(leaf);
5163 rec->objectid = key->objectid;
5164 rec->type = key->type;
5165 rec->offset = key->offset;
5167 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5168 rec->chunk_objecteid =
5169 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5170 rec->chunk_offset =
5171 btrfs_dev_extent_chunk_offset(leaf, ptr);
5172 rec->length = btrfs_dev_extent_length(leaf, ptr);
5173 rec->cache.size = rec->length;
5175 INIT_LIST_HEAD(&rec->chunk_list);
5176 INIT_LIST_HEAD(&rec->device_list);
5178 return rec;
5181 static int
5182 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5183 struct btrfs_key *key, struct extent_buffer *eb,
5184 int slot)
5186 struct device_extent_record *rec;
5187 int ret;
5189 rec = btrfs_new_device_extent_record(eb, key, slot);
5190 ret = insert_device_extent_record(dev_extent_cache, rec);
5191 if (ret) {
5192 fprintf(stderr,
5193 "Device extent[%llu, %llu, %llu] existed.\n",
5194 rec->objectid, rec->offset, rec->length);
5195 free(rec);
5198 return ret;
5201 static int process_extent_item(struct btrfs_root *root,
5202 struct cache_tree *extent_cache,
5203 struct extent_buffer *eb, int slot)
5205 struct btrfs_extent_item *ei;
5206 struct btrfs_extent_inline_ref *iref;
5207 struct btrfs_extent_data_ref *dref;
5208 struct btrfs_shared_data_ref *sref;
5209 struct btrfs_key key;
5210 struct extent_record tmpl;
5211 unsigned long end;
5212 unsigned long ptr;
5213 int type;
5214 u32 item_size = btrfs_item_size_nr(eb, slot);
5215 u64 refs = 0;
5216 u64 offset;
5217 u64 num_bytes;
5218 int metadata = 0;
5220 btrfs_item_key_to_cpu(eb, &key, slot);
5222 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5223 metadata = 1;
5224 num_bytes = root->nodesize;
5225 } else {
5226 num_bytes = key.offset;
5229 if (item_size < sizeof(*ei)) {
5230 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5231 struct btrfs_extent_item_v0 *ei0;
5232 BUG_ON(item_size != sizeof(*ei0));
5233 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5234 refs = btrfs_extent_refs_v0(eb, ei0);
5235 #else
5236 BUG();
5237 #endif
5238 memset(&tmpl, 0, sizeof(tmpl));
5239 tmpl.start = key.objectid;
5240 tmpl.nr = num_bytes;
5241 tmpl.extent_item_refs = refs;
5242 tmpl.metadata = metadata;
5243 tmpl.found_rec = 1;
5244 tmpl.max_size = num_bytes;
5246 return add_extent_rec(extent_cache, &tmpl);
5249 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5250 refs = btrfs_extent_refs(eb, ei);
5251 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5252 metadata = 1;
5253 else
5254 metadata = 0;
5256 memset(&tmpl, 0, sizeof(tmpl));
5257 tmpl.start = key.objectid;
5258 tmpl.nr = num_bytes;
5259 tmpl.extent_item_refs = refs;
5260 tmpl.metadata = metadata;
5261 tmpl.found_rec = 1;
5262 tmpl.max_size = num_bytes;
5263 add_extent_rec(extent_cache, &tmpl);
5265 ptr = (unsigned long)(ei + 1);
5266 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5267 key.type == BTRFS_EXTENT_ITEM_KEY)
5268 ptr += sizeof(struct btrfs_tree_block_info);
5270 end = (unsigned long)ei + item_size;
5271 while (ptr < end) {
5272 iref = (struct btrfs_extent_inline_ref *)ptr;
5273 type = btrfs_extent_inline_ref_type(eb, iref);
5274 offset = btrfs_extent_inline_ref_offset(eb, iref);
5275 switch (type) {
5276 case BTRFS_TREE_BLOCK_REF_KEY:
5277 add_tree_backref(extent_cache, key.objectid,
5278 0, offset, 0);
5279 break;
5280 case BTRFS_SHARED_BLOCK_REF_KEY:
5281 add_tree_backref(extent_cache, key.objectid,
5282 offset, 0, 0);
5283 break;
5284 case BTRFS_EXTENT_DATA_REF_KEY:
5285 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5286 add_data_backref(extent_cache, key.objectid, 0,
5287 btrfs_extent_data_ref_root(eb, dref),
5288 btrfs_extent_data_ref_objectid(eb,
5289 dref),
5290 btrfs_extent_data_ref_offset(eb, dref),
5291 btrfs_extent_data_ref_count(eb, dref),
5292 0, num_bytes);
5293 break;
5294 case BTRFS_SHARED_DATA_REF_KEY:
5295 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5296 add_data_backref(extent_cache, key.objectid, offset,
5297 0, 0, 0,
5298 btrfs_shared_data_ref_count(eb, sref),
5299 0, num_bytes);
5300 break;
5301 default:
5302 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5303 key.objectid, key.type, num_bytes);
5304 goto out;
5306 ptr += btrfs_extent_inline_ref_size(type);
5308 WARN_ON(ptr > end);
5309 out:
5310 return 0;
5313 static int check_cache_range(struct btrfs_root *root,
5314 struct btrfs_block_group_cache *cache,
5315 u64 offset, u64 bytes)
5317 struct btrfs_free_space *entry;
5318 u64 *logical;
5319 u64 bytenr;
5320 int stripe_len;
5321 int i, nr, ret;
5323 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5324 bytenr = btrfs_sb_offset(i);
5325 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5326 cache->key.objectid, bytenr, 0,
5327 &logical, &nr, &stripe_len);
5328 if (ret)
5329 return ret;
5331 while (nr--) {
5332 if (logical[nr] + stripe_len <= offset)
5333 continue;
5334 if (offset + bytes <= logical[nr])
5335 continue;
5336 if (logical[nr] == offset) {
5337 if (stripe_len >= bytes) {
5338 kfree(logical);
5339 return 0;
5341 bytes -= stripe_len;
5342 offset += stripe_len;
5343 } else if (logical[nr] < offset) {
5344 if (logical[nr] + stripe_len >=
5345 offset + bytes) {
5346 kfree(logical);
5347 return 0;
5349 bytes = (offset + bytes) -
5350 (logical[nr] + stripe_len);
5351 offset = logical[nr] + stripe_len;
5352 } else {
5354 * Could be tricky, the super may land in the
5355 * middle of the area we're checking. First
5356 * check the easiest case, it's at the end.
5358 if (logical[nr] + stripe_len >=
5359 bytes + offset) {
5360 bytes = logical[nr] - offset;
5361 continue;
5364 /* Check the left side */
5365 ret = check_cache_range(root, cache,
5366 offset,
5367 logical[nr] - offset);
5368 if (ret) {
5369 kfree(logical);
5370 return ret;
5373 /* Now we continue with the right side */
5374 bytes = (offset + bytes) -
5375 (logical[nr] + stripe_len);
5376 offset = logical[nr] + stripe_len;
5380 kfree(logical);
5383 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5384 if (!entry) {
5385 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5386 offset, offset+bytes);
5387 return -EINVAL;
5390 if (entry->offset != offset) {
5391 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5392 entry->offset);
5393 return -EINVAL;
5396 if (entry->bytes != bytes) {
5397 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5398 bytes, entry->bytes, offset);
5399 return -EINVAL;
5402 unlink_free_space(cache->free_space_ctl, entry);
5403 free(entry);
5404 return 0;
5407 static int verify_space_cache(struct btrfs_root *root,
5408 struct btrfs_block_group_cache *cache)
5410 struct btrfs_path *path;
5411 struct extent_buffer *leaf;
5412 struct btrfs_key key;
5413 u64 last;
5414 int ret = 0;
5416 path = btrfs_alloc_path();
5417 if (!path)
5418 return -ENOMEM;
5420 root = root->fs_info->extent_root;
5422 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5424 key.objectid = last;
5425 key.offset = 0;
5426 key.type = BTRFS_EXTENT_ITEM_KEY;
5428 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5429 if (ret < 0)
5430 goto out;
5431 ret = 0;
5432 while (1) {
5433 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5434 ret = btrfs_next_leaf(root, path);
5435 if (ret < 0)
5436 goto out;
5437 if (ret > 0) {
5438 ret = 0;
5439 break;
5442 leaf = path->nodes[0];
5443 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5444 if (key.objectid >= cache->key.offset + cache->key.objectid)
5445 break;
5446 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5447 key.type != BTRFS_METADATA_ITEM_KEY) {
5448 path->slots[0]++;
5449 continue;
5452 if (last == key.objectid) {
5453 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5454 last = key.objectid + key.offset;
5455 else
5456 last = key.objectid + root->nodesize;
5457 path->slots[0]++;
5458 continue;
5461 ret = check_cache_range(root, cache, last,
5462 key.objectid - last);
5463 if (ret)
5464 break;
5465 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5466 last = key.objectid + key.offset;
5467 else
5468 last = key.objectid + root->nodesize;
5469 path->slots[0]++;
5472 if (last < cache->key.objectid + cache->key.offset)
5473 ret = check_cache_range(root, cache, last,
5474 cache->key.objectid +
5475 cache->key.offset - last);
5477 out:
5478 btrfs_free_path(path);
5480 if (!ret &&
5481 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5482 fprintf(stderr, "There are still entries left in the space "
5483 "cache\n");
5484 ret = -EINVAL;
5487 return ret;
5490 static int check_space_cache(struct btrfs_root *root)
5492 struct btrfs_block_group_cache *cache;
5493 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5494 int ret;
5495 int error = 0;
5497 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5498 btrfs_super_generation(root->fs_info->super_copy) !=
5499 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5500 printf("cache and super generation don't match, space cache "
5501 "will be invalidated\n");
5502 return 0;
5505 if (ctx.progress_enabled) {
5506 ctx.tp = TASK_FREE_SPACE;
5507 task_start(ctx.info);
5510 while (1) {
5511 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5512 if (!cache)
5513 break;
5515 start = cache->key.objectid + cache->key.offset;
5516 if (!cache->free_space_ctl) {
5517 if (btrfs_init_free_space_ctl(cache,
5518 root->sectorsize)) {
5519 ret = -ENOMEM;
5520 break;
5522 } else {
5523 btrfs_remove_free_space_cache(cache);
5526 if (btrfs_fs_compat_ro(root->fs_info,
5527 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5528 ret = exclude_super_stripes(root, cache);
5529 if (ret) {
5530 fprintf(stderr, "could not exclude super stripes: %s\n",
5531 strerror(-ret));
5532 error++;
5533 continue;
5535 ret = load_free_space_tree(root->fs_info, cache);
5536 free_excluded_extents(root, cache);
5537 if (ret < 0) {
5538 fprintf(stderr, "could not load free space tree: %s\n",
5539 strerror(-ret));
5540 error++;
5541 continue;
5543 error += ret;
5544 } else {
5545 ret = load_free_space_cache(root->fs_info, cache);
5546 if (!ret)
5547 continue;
5550 ret = verify_space_cache(root, cache);
5551 if (ret) {
5552 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5553 cache->key.objectid);
5554 error++;
5558 task_stop(ctx.info);
5560 return error ? -EINVAL : 0;
5563 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5564 u64 num_bytes, unsigned long leaf_offset,
5565 struct extent_buffer *eb) {
5567 u64 offset = 0;
5568 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5569 char *data;
5570 unsigned long csum_offset;
5571 u32 csum;
5572 u32 csum_expected;
5573 u64 read_len;
5574 u64 data_checked = 0;
5575 u64 tmp;
5576 int ret = 0;
5577 int mirror;
5578 int num_copies;
5580 if (num_bytes % root->sectorsize)
5581 return -EINVAL;
5583 data = malloc(num_bytes);
5584 if (!data)
5585 return -ENOMEM;
5587 while (offset < num_bytes) {
5588 mirror = 0;
5589 again:
5590 read_len = num_bytes - offset;
5591 /* read as much space once a time */
5592 ret = read_extent_data(root, data + offset,
5593 bytenr + offset, &read_len, mirror);
5594 if (ret)
5595 goto out;
5596 data_checked = 0;
5597 /* verify every 4k data's checksum */
5598 while (data_checked < read_len) {
5599 csum = ~(u32)0;
5600 tmp = offset + data_checked;
5602 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5603 csum, root->sectorsize);
5604 btrfs_csum_final(csum, (char *)&csum);
5606 csum_offset = leaf_offset +
5607 tmp / root->sectorsize * csum_size;
5608 read_extent_buffer(eb, (char *)&csum_expected,
5609 csum_offset, csum_size);
5610 /* try another mirror */
5611 if (csum != csum_expected) {
5612 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5613 mirror, bytenr + tmp,
5614 csum, csum_expected);
5615 num_copies = btrfs_num_copies(
5616 &root->fs_info->mapping_tree,
5617 bytenr, num_bytes);
5618 if (mirror < num_copies - 1) {
5619 mirror += 1;
5620 goto again;
5623 data_checked += root->sectorsize;
5625 offset += read_len;
5627 out:
5628 free(data);
5629 return ret;
5632 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5633 u64 num_bytes)
5635 struct btrfs_path *path;
5636 struct extent_buffer *leaf;
5637 struct btrfs_key key;
5638 int ret;
5640 path = btrfs_alloc_path();
5641 if (!path) {
5642 fprintf(stderr, "Error allocating path\n");
5643 return -ENOMEM;
5646 key.objectid = bytenr;
5647 key.type = BTRFS_EXTENT_ITEM_KEY;
5648 key.offset = (u64)-1;
5650 again:
5651 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5652 0, 0);
5653 if (ret < 0) {
5654 fprintf(stderr, "Error looking up extent record %d\n", ret);
5655 btrfs_free_path(path);
5656 return ret;
5657 } else if (ret) {
5658 if (path->slots[0] > 0) {
5659 path->slots[0]--;
5660 } else {
5661 ret = btrfs_prev_leaf(root, path);
5662 if (ret < 0) {
5663 goto out;
5664 } else if (ret > 0) {
5665 ret = 0;
5666 goto out;
5671 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5674 * Block group items come before extent items if they have the same
5675 * bytenr, so walk back one more just in case. Dear future traveller,
5676 * first congrats on mastering time travel. Now if it's not too much
5677 * trouble could you go back to 2006 and tell Chris to make the
5678 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5679 * EXTENT_ITEM_KEY please?
5681 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5682 if (path->slots[0] > 0) {
5683 path->slots[0]--;
5684 } else {
5685 ret = btrfs_prev_leaf(root, path);
5686 if (ret < 0) {
5687 goto out;
5688 } else if (ret > 0) {
5689 ret = 0;
5690 goto out;
5693 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5696 while (num_bytes) {
5697 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5698 ret = btrfs_next_leaf(root, path);
5699 if (ret < 0) {
5700 fprintf(stderr, "Error going to next leaf "
5701 "%d\n", ret);
5702 btrfs_free_path(path);
5703 return ret;
5704 } else if (ret) {
5705 break;
5708 leaf = path->nodes[0];
5709 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5710 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5711 path->slots[0]++;
5712 continue;
5714 if (key.objectid + key.offset < bytenr) {
5715 path->slots[0]++;
5716 continue;
5718 if (key.objectid > bytenr + num_bytes)
5719 break;
5721 if (key.objectid == bytenr) {
5722 if (key.offset >= num_bytes) {
5723 num_bytes = 0;
5724 break;
5726 num_bytes -= key.offset;
5727 bytenr += key.offset;
5728 } else if (key.objectid < bytenr) {
5729 if (key.objectid + key.offset >= bytenr + num_bytes) {
5730 num_bytes = 0;
5731 break;
5733 num_bytes = (bytenr + num_bytes) -
5734 (key.objectid + key.offset);
5735 bytenr = key.objectid + key.offset;
5736 } else {
5737 if (key.objectid + key.offset < bytenr + num_bytes) {
5738 u64 new_start = key.objectid + key.offset;
5739 u64 new_bytes = bytenr + num_bytes - new_start;
5742 * Weird case, the extent is in the middle of
5743 * our range, we'll have to search one side
5744 * and then the other. Not sure if this happens
5745 * in real life, but no harm in coding it up
5746 * anyway just in case.
5748 btrfs_release_path(path);
5749 ret = check_extent_exists(root, new_start,
5750 new_bytes);
5751 if (ret) {
5752 fprintf(stderr, "Right section didn't "
5753 "have a record\n");
5754 break;
5756 num_bytes = key.objectid - bytenr;
5757 goto again;
5759 num_bytes = key.objectid - bytenr;
5761 path->slots[0]++;
5763 ret = 0;
5765 out:
5766 if (num_bytes && !ret) {
5767 fprintf(stderr, "There are no extents for csum range "
5768 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5769 ret = 1;
5772 btrfs_free_path(path);
5773 return ret;
5776 static int check_csums(struct btrfs_root *root)
5778 struct btrfs_path *path;
5779 struct extent_buffer *leaf;
5780 struct btrfs_key key;
5781 u64 offset = 0, num_bytes = 0;
5782 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5783 int errors = 0;
5784 int ret;
5785 u64 data_len;
5786 unsigned long leaf_offset;
5788 root = root->fs_info->csum_root;
5789 if (!extent_buffer_uptodate(root->node)) {
5790 fprintf(stderr, "No valid csum tree found\n");
5791 return -ENOENT;
5794 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5795 key.type = BTRFS_EXTENT_CSUM_KEY;
5796 key.offset = 0;
5798 path = btrfs_alloc_path();
5799 if (!path)
5800 return -ENOMEM;
5802 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5803 if (ret < 0) {
5804 fprintf(stderr, "Error searching csum tree %d\n", ret);
5805 btrfs_free_path(path);
5806 return ret;
5809 if (ret > 0 && path->slots[0])
5810 path->slots[0]--;
5811 ret = 0;
5813 while (1) {
5814 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5815 ret = btrfs_next_leaf(root, path);
5816 if (ret < 0) {
5817 fprintf(stderr, "Error going to next leaf "
5818 "%d\n", ret);
5819 break;
5821 if (ret)
5822 break;
5824 leaf = path->nodes[0];
5826 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5827 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5828 path->slots[0]++;
5829 continue;
5832 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5833 csum_size) * root->sectorsize;
5834 if (!check_data_csum)
5835 goto skip_csum_check;
5836 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5837 ret = check_extent_csums(root, key.offset, data_len,
5838 leaf_offset, leaf);
5839 if (ret)
5840 break;
5841 skip_csum_check:
5842 if (!num_bytes) {
5843 offset = key.offset;
5844 } else if (key.offset != offset + num_bytes) {
5845 ret = check_extent_exists(root, offset, num_bytes);
5846 if (ret) {
5847 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5848 "there is no extent record\n",
5849 offset, offset+num_bytes);
5850 errors++;
5852 offset = key.offset;
5853 num_bytes = 0;
5855 num_bytes += data_len;
5856 path->slots[0]++;
5859 btrfs_free_path(path);
5860 return errors;
5863 static int is_dropped_key(struct btrfs_key *key,
5864 struct btrfs_key *drop_key) {
5865 if (key->objectid < drop_key->objectid)
5866 return 1;
5867 else if (key->objectid == drop_key->objectid) {
5868 if (key->type < drop_key->type)
5869 return 1;
5870 else if (key->type == drop_key->type) {
5871 if (key->offset < drop_key->offset)
5872 return 1;
5875 return 0;
5879 * Here are the rules for FULL_BACKREF.
5881 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5882 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5883 * FULL_BACKREF set.
5884 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5885 * if it happened after the relocation occurred since we'll have dropped the
5886 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5887 * have no real way to know for sure.
5889 * We process the blocks one root at a time, and we start from the lowest root
5890 * objectid and go to the highest. So we can just lookup the owner backref for
5891 * the record and if we don't find it then we know it doesn't exist and we have
5892 * a FULL BACKREF.
5894 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5895 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5896 * be set or not and then we can check later once we've gathered all the refs.
5898 static int calc_extent_flag(struct btrfs_root *root,
5899 struct cache_tree *extent_cache,
5900 struct extent_buffer *buf,
5901 struct root_item_record *ri,
5902 u64 *flags)
5904 struct extent_record *rec;
5905 struct cache_extent *cache;
5906 struct tree_backref *tback;
5907 u64 owner = 0;
5909 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5910 /* we have added this extent before */
5911 BUG_ON(!cache);
5912 rec = container_of(cache, struct extent_record, cache);
5915 * Except file/reloc tree, we can not have
5916 * FULL BACKREF MODE
5918 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5919 goto normal;
5921 * root node
5923 if (buf->start == ri->bytenr)
5924 goto normal;
5926 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5927 goto full_backref;
5929 owner = btrfs_header_owner(buf);
5930 if (owner == ri->objectid)
5931 goto normal;
5933 tback = find_tree_backref(rec, 0, owner);
5934 if (!tback)
5935 goto full_backref;
5936 normal:
5937 *flags = 0;
5938 if (rec->flag_block_full_backref != FLAG_UNSET &&
5939 rec->flag_block_full_backref != 0)
5940 rec->bad_full_backref = 1;
5941 return 0;
5942 full_backref:
5943 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5944 if (rec->flag_block_full_backref != FLAG_UNSET &&
5945 rec->flag_block_full_backref != 1)
5946 rec->bad_full_backref = 1;
5947 return 0;
5950 static int run_next_block(struct btrfs_root *root,
5951 struct block_info *bits,
5952 int bits_nr,
5953 u64 *last,
5954 struct cache_tree *pending,
5955 struct cache_tree *seen,
5956 struct cache_tree *reada,
5957 struct cache_tree *nodes,
5958 struct cache_tree *extent_cache,
5959 struct cache_tree *chunk_cache,
5960 struct rb_root *dev_cache,
5961 struct block_group_tree *block_group_cache,
5962 struct device_extent_tree *dev_extent_cache,
5963 struct root_item_record *ri)
5965 struct extent_buffer *buf;
5966 struct extent_record *rec = NULL;
5967 u64 bytenr;
5968 u32 size;
5969 u64 parent;
5970 u64 owner;
5971 u64 flags;
5972 u64 ptr;
5973 u64 gen = 0;
5974 int ret = 0;
5975 int i;
5976 int nritems;
5977 struct btrfs_key key;
5978 struct cache_extent *cache;
5979 int reada_bits;
5981 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5982 bits_nr, &reada_bits);
5983 if (nritems == 0)
5984 return 1;
5986 if (!reada_bits) {
5987 for(i = 0; i < nritems; i++) {
5988 ret = add_cache_extent(reada, bits[i].start,
5989 bits[i].size);
5990 if (ret == -EEXIST)
5991 continue;
5993 /* fixme, get the parent transid */
5994 readahead_tree_block(root, bits[i].start,
5995 bits[i].size, 0);
5998 *last = bits[0].start;
5999 bytenr = bits[0].start;
6000 size = bits[0].size;
6002 cache = lookup_cache_extent(pending, bytenr, size);
6003 if (cache) {
6004 remove_cache_extent(pending, cache);
6005 free(cache);
6007 cache = lookup_cache_extent(reada, bytenr, size);
6008 if (cache) {
6009 remove_cache_extent(reada, cache);
6010 free(cache);
6012 cache = lookup_cache_extent(nodes, bytenr, size);
6013 if (cache) {
6014 remove_cache_extent(nodes, cache);
6015 free(cache);
6017 cache = lookup_cache_extent(extent_cache, bytenr, size);
6018 if (cache) {
6019 rec = container_of(cache, struct extent_record, cache);
6020 gen = rec->parent_generation;
6023 /* fixme, get the real parent transid */
6024 buf = read_tree_block(root, bytenr, size, gen);
6025 if (!extent_buffer_uptodate(buf)) {
6026 record_bad_block_io(root->fs_info,
6027 extent_cache, bytenr, size);
6028 goto out;
6031 nritems = btrfs_header_nritems(buf);
6033 flags = 0;
6034 if (!init_extent_tree) {
6035 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6036 btrfs_header_level(buf), 1, NULL,
6037 &flags);
6038 if (ret < 0) {
6039 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6040 if (ret < 0) {
6041 fprintf(stderr, "Couldn't calc extent flags\n");
6042 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6045 } else {
6046 flags = 0;
6047 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6048 if (ret < 0) {
6049 fprintf(stderr, "Couldn't calc extent flags\n");
6050 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6054 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6055 if (ri != NULL &&
6056 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6057 ri->objectid == btrfs_header_owner(buf)) {
6059 * Ok we got to this block from it's original owner and
6060 * we have FULL_BACKREF set. Relocation can leave
6061 * converted blocks over so this is altogether possible,
6062 * however it's not possible if the generation > the
6063 * last snapshot, so check for this case.
6065 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6066 btrfs_header_generation(buf) > ri->last_snapshot) {
6067 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6068 rec->bad_full_backref = 1;
6071 } else {
6072 if (ri != NULL &&
6073 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6074 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6075 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6076 rec->bad_full_backref = 1;
6080 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6081 rec->flag_block_full_backref = 1;
6082 parent = bytenr;
6083 owner = 0;
6084 } else {
6085 rec->flag_block_full_backref = 0;
6086 parent = 0;
6087 owner = btrfs_header_owner(buf);
6090 ret = check_block(root, extent_cache, buf, flags);
6091 if (ret)
6092 goto out;
6094 if (btrfs_is_leaf(buf)) {
6095 btree_space_waste += btrfs_leaf_free_space(root, buf);
6096 for (i = 0; i < nritems; i++) {
6097 struct btrfs_file_extent_item *fi;
6098 btrfs_item_key_to_cpu(buf, &key, i);
6099 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6100 process_extent_item(root, extent_cache, buf,
6102 continue;
6104 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6105 process_extent_item(root, extent_cache, buf,
6107 continue;
6109 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6110 total_csum_bytes +=
6111 btrfs_item_size_nr(buf, i);
6112 continue;
6114 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6115 process_chunk_item(chunk_cache, &key, buf, i);
6116 continue;
6118 if (key.type == BTRFS_DEV_ITEM_KEY) {
6119 process_device_item(dev_cache, &key, buf, i);
6120 continue;
6122 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6123 process_block_group_item(block_group_cache,
6124 &key, buf, i);
6125 continue;
6127 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6128 process_device_extent_item(dev_extent_cache,
6129 &key, buf, i);
6130 continue;
6133 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6134 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6135 process_extent_ref_v0(extent_cache, buf, i);
6136 #else
6137 BUG();
6138 #endif
6139 continue;
6142 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6143 add_tree_backref(extent_cache, key.objectid, 0,
6144 key.offset, 0);
6145 continue;
6147 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6148 add_tree_backref(extent_cache, key.objectid,
6149 key.offset, 0, 0);
6150 continue;
6152 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6153 struct btrfs_extent_data_ref *ref;
6154 ref = btrfs_item_ptr(buf, i,
6155 struct btrfs_extent_data_ref);
6156 add_data_backref(extent_cache,
6157 key.objectid, 0,
6158 btrfs_extent_data_ref_root(buf, ref),
6159 btrfs_extent_data_ref_objectid(buf,
6160 ref),
6161 btrfs_extent_data_ref_offset(buf, ref),
6162 btrfs_extent_data_ref_count(buf, ref),
6163 0, root->sectorsize);
6164 continue;
6166 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6167 struct btrfs_shared_data_ref *ref;
6168 ref = btrfs_item_ptr(buf, i,
6169 struct btrfs_shared_data_ref);
6170 add_data_backref(extent_cache,
6171 key.objectid, key.offset, 0, 0, 0,
6172 btrfs_shared_data_ref_count(buf, ref),
6173 0, root->sectorsize);
6174 continue;
6176 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6177 struct bad_item *bad;
6179 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6180 continue;
6181 if (!owner)
6182 continue;
6183 bad = malloc(sizeof(struct bad_item));
6184 if (!bad)
6185 continue;
6186 INIT_LIST_HEAD(&bad->list);
6187 memcpy(&bad->key, &key,
6188 sizeof(struct btrfs_key));
6189 bad->root_id = owner;
6190 list_add_tail(&bad->list, &delete_items);
6191 continue;
6193 if (key.type != BTRFS_EXTENT_DATA_KEY)
6194 continue;
6195 fi = btrfs_item_ptr(buf, i,
6196 struct btrfs_file_extent_item);
6197 if (btrfs_file_extent_type(buf, fi) ==
6198 BTRFS_FILE_EXTENT_INLINE)
6199 continue;
6200 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6201 continue;
6203 data_bytes_allocated +=
6204 btrfs_file_extent_disk_num_bytes(buf, fi);
6205 if (data_bytes_allocated < root->sectorsize) {
6206 abort();
6208 data_bytes_referenced +=
6209 btrfs_file_extent_num_bytes(buf, fi);
6210 add_data_backref(extent_cache,
6211 btrfs_file_extent_disk_bytenr(buf, fi),
6212 parent, owner, key.objectid, key.offset -
6213 btrfs_file_extent_offset(buf, fi), 1, 1,
6214 btrfs_file_extent_disk_num_bytes(buf, fi));
6216 } else {
6217 int level;
6218 struct btrfs_key first_key;
6220 first_key.objectid = 0;
6222 if (nritems > 0)
6223 btrfs_item_key_to_cpu(buf, &first_key, 0);
6224 level = btrfs_header_level(buf);
6225 for (i = 0; i < nritems; i++) {
6226 struct extent_record tmpl;
6228 ptr = btrfs_node_blockptr(buf, i);
6229 size = root->nodesize;
6230 btrfs_node_key_to_cpu(buf, &key, i);
6231 if (ri != NULL) {
6232 if ((level == ri->drop_level)
6233 && is_dropped_key(&key, &ri->drop_key)) {
6234 continue;
6238 memset(&tmpl, 0, sizeof(tmpl));
6239 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6240 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6241 tmpl.start = ptr;
6242 tmpl.nr = size;
6243 tmpl.refs = 1;
6244 tmpl.metadata = 1;
6245 tmpl.max_size = size;
6246 ret = add_extent_rec(extent_cache, &tmpl);
6247 BUG_ON(ret);
6249 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6251 if (level > 1) {
6252 add_pending(nodes, seen, ptr, size);
6253 } else {
6254 add_pending(pending, seen, ptr, size);
6257 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6258 nritems) * sizeof(struct btrfs_key_ptr);
6260 total_btree_bytes += buf->len;
6261 if (fs_root_objectid(btrfs_header_owner(buf)))
6262 total_fs_tree_bytes += buf->len;
6263 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6264 total_extent_tree_bytes += buf->len;
6265 if (!found_old_backref &&
6266 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6267 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6268 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6269 found_old_backref = 1;
6270 out:
6271 free_extent_buffer(buf);
6272 return ret;
6275 static int add_root_to_pending(struct extent_buffer *buf,
6276 struct cache_tree *extent_cache,
6277 struct cache_tree *pending,
6278 struct cache_tree *seen,
6279 struct cache_tree *nodes,
6280 u64 objectid)
6282 struct extent_record tmpl;
6284 if (btrfs_header_level(buf) > 0)
6285 add_pending(nodes, seen, buf->start, buf->len);
6286 else
6287 add_pending(pending, seen, buf->start, buf->len);
6289 memset(&tmpl, 0, sizeof(tmpl));
6290 tmpl.start = buf->start;
6291 tmpl.nr = buf->len;
6292 tmpl.is_root = 1;
6293 tmpl.refs = 1;
6294 tmpl.metadata = 1;
6295 tmpl.max_size = buf->len;
6296 add_extent_rec(extent_cache, &tmpl);
6298 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6299 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6300 add_tree_backref(extent_cache, buf->start, buf->start,
6301 0, 1);
6302 else
6303 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6304 return 0;
6307 /* as we fix the tree, we might be deleting blocks that
6308 * we're tracking for repair. This hook makes sure we
6309 * remove any backrefs for blocks as we are fixing them.
6311 static int free_extent_hook(struct btrfs_trans_handle *trans,
6312 struct btrfs_root *root,
6313 u64 bytenr, u64 num_bytes, u64 parent,
6314 u64 root_objectid, u64 owner, u64 offset,
6315 int refs_to_drop)
6317 struct extent_record *rec;
6318 struct cache_extent *cache;
6319 int is_data;
6320 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6322 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6323 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6324 if (!cache)
6325 return 0;
6327 rec = container_of(cache, struct extent_record, cache);
6328 if (is_data) {
6329 struct data_backref *back;
6330 back = find_data_backref(rec, parent, root_objectid, owner,
6331 offset, 1, bytenr, num_bytes);
6332 if (!back)
6333 goto out;
6334 if (back->node.found_ref) {
6335 back->found_ref -= refs_to_drop;
6336 if (rec->refs)
6337 rec->refs -= refs_to_drop;
6339 if (back->node.found_extent_tree) {
6340 back->num_refs -= refs_to_drop;
6341 if (rec->extent_item_refs)
6342 rec->extent_item_refs -= refs_to_drop;
6344 if (back->found_ref == 0)
6345 back->node.found_ref = 0;
6346 if (back->num_refs == 0)
6347 back->node.found_extent_tree = 0;
6349 if (!back->node.found_extent_tree && back->node.found_ref) {
6350 list_del(&back->node.list);
6351 free(back);
6353 } else {
6354 struct tree_backref *back;
6355 back = find_tree_backref(rec, parent, root_objectid);
6356 if (!back)
6357 goto out;
6358 if (back->node.found_ref) {
6359 if (rec->refs)
6360 rec->refs--;
6361 back->node.found_ref = 0;
6363 if (back->node.found_extent_tree) {
6364 if (rec->extent_item_refs)
6365 rec->extent_item_refs--;
6366 back->node.found_extent_tree = 0;
6368 if (!back->node.found_extent_tree && back->node.found_ref) {
6369 list_del(&back->node.list);
6370 free(back);
6373 maybe_free_extent_rec(extent_cache, rec);
6374 out:
6375 return 0;
6378 static int delete_extent_records(struct btrfs_trans_handle *trans,
6379 struct btrfs_root *root,
6380 struct btrfs_path *path,
6381 u64 bytenr, u64 new_len)
6383 struct btrfs_key key;
6384 struct btrfs_key found_key;
6385 struct extent_buffer *leaf;
6386 int ret;
6387 int slot;
6390 key.objectid = bytenr;
6391 key.type = (u8)-1;
6392 key.offset = (u64)-1;
6394 while(1) {
6395 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6396 &key, path, 0, 1);
6397 if (ret < 0)
6398 break;
6400 if (ret > 0) {
6401 ret = 0;
6402 if (path->slots[0] == 0)
6403 break;
6404 path->slots[0]--;
6406 ret = 0;
6408 leaf = path->nodes[0];
6409 slot = path->slots[0];
6411 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6412 if (found_key.objectid != bytenr)
6413 break;
6415 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6416 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6417 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6418 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6419 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6420 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6421 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6422 btrfs_release_path(path);
6423 if (found_key.type == 0) {
6424 if (found_key.offset == 0)
6425 break;
6426 key.offset = found_key.offset - 1;
6427 key.type = found_key.type;
6429 key.type = found_key.type - 1;
6430 key.offset = (u64)-1;
6431 continue;
6434 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6435 found_key.objectid, found_key.type, found_key.offset);
6437 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6438 if (ret)
6439 break;
6440 btrfs_release_path(path);
6442 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6443 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6444 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6445 found_key.offset : root->nodesize;
6447 ret = btrfs_update_block_group(trans, root, bytenr,
6448 bytes, 0, 0);
6449 if (ret)
6450 break;
6454 btrfs_release_path(path);
6455 return ret;
6459 * for a single backref, this will allocate a new extent
6460 * and add the backref to it.
6462 static int record_extent(struct btrfs_trans_handle *trans,
6463 struct btrfs_fs_info *info,
6464 struct btrfs_path *path,
6465 struct extent_record *rec,
6466 struct extent_backref *back,
6467 int allocated, u64 flags)
6469 int ret;
6470 struct btrfs_root *extent_root = info->extent_root;
6471 struct extent_buffer *leaf;
6472 struct btrfs_key ins_key;
6473 struct btrfs_extent_item *ei;
6474 struct tree_backref *tback;
6475 struct data_backref *dback;
6476 struct btrfs_tree_block_info *bi;
6478 if (!back->is_data)
6479 rec->max_size = max_t(u64, rec->max_size,
6480 info->extent_root->nodesize);
6482 if (!allocated) {
6483 u32 item_size = sizeof(*ei);
6485 if (!back->is_data)
6486 item_size += sizeof(*bi);
6488 ins_key.objectid = rec->start;
6489 ins_key.offset = rec->max_size;
6490 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6492 ret = btrfs_insert_empty_item(trans, extent_root, path,
6493 &ins_key, item_size);
6494 if (ret)
6495 goto fail;
6497 leaf = path->nodes[0];
6498 ei = btrfs_item_ptr(leaf, path->slots[0],
6499 struct btrfs_extent_item);
6501 btrfs_set_extent_refs(leaf, ei, 0);
6502 btrfs_set_extent_generation(leaf, ei, rec->generation);
6504 if (back->is_data) {
6505 btrfs_set_extent_flags(leaf, ei,
6506 BTRFS_EXTENT_FLAG_DATA);
6507 } else {
6508 struct btrfs_disk_key copy_key;;
6510 tback = (struct tree_backref *)back;
6511 bi = (struct btrfs_tree_block_info *)(ei + 1);
6512 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6513 sizeof(*bi));
6515 btrfs_set_disk_key_objectid(&copy_key,
6516 rec->info_objectid);
6517 btrfs_set_disk_key_type(&copy_key, 0);
6518 btrfs_set_disk_key_offset(&copy_key, 0);
6520 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6521 btrfs_set_tree_block_key(leaf, bi, &copy_key);
6523 btrfs_set_extent_flags(leaf, ei,
6524 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6527 btrfs_mark_buffer_dirty(leaf);
6528 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6529 rec->max_size, 1, 0);
6530 if (ret)
6531 goto fail;
6532 btrfs_release_path(path);
6535 if (back->is_data) {
6536 u64 parent;
6537 int i;
6539 dback = (struct data_backref *)back;
6540 if (back->full_backref)
6541 parent = dback->parent;
6542 else
6543 parent = 0;
6545 for (i = 0; i < dback->found_ref; i++) {
6546 /* if parent != 0, we're doing a full backref
6547 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6548 * just makes the backref allocator create a data
6549 * backref
6551 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6552 rec->start, rec->max_size,
6553 parent,
6554 dback->root,
6555 parent ?
6556 BTRFS_FIRST_FREE_OBJECTID :
6557 dback->owner,
6558 dback->offset);
6559 if (ret)
6560 break;
6562 fprintf(stderr, "adding new data backref"
6563 " on %llu %s %llu owner %llu"
6564 " offset %llu found %d\n",
6565 (unsigned long long)rec->start,
6566 back->full_backref ?
6567 "parent" : "root",
6568 back->full_backref ?
6569 (unsigned long long)parent :
6570 (unsigned long long)dback->root,
6571 (unsigned long long)dback->owner,
6572 (unsigned long long)dback->offset,
6573 dback->found_ref);
6574 } else {
6575 u64 parent;
6577 tback = (struct tree_backref *)back;
6578 if (back->full_backref)
6579 parent = tback->parent;
6580 else
6581 parent = 0;
6583 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6584 rec->start, rec->max_size,
6585 parent, tback->root, 0, 0);
6586 fprintf(stderr, "adding new tree backref on "
6587 "start %llu len %llu parent %llu root %llu\n",
6588 rec->start, rec->max_size, parent, tback->root);
6590 fail:
6591 btrfs_release_path(path);
6592 return ret;
6595 static struct extent_entry *find_entry(struct list_head *entries,
6596 u64 bytenr, u64 bytes)
6598 struct extent_entry *entry = NULL;
6600 list_for_each_entry(entry, entries, list) {
6601 if (entry->bytenr == bytenr && entry->bytes == bytes)
6602 return entry;
6605 return NULL;
6608 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6610 struct extent_entry *entry, *best = NULL, *prev = NULL;
6612 list_for_each_entry(entry, entries, list) {
6613 if (!prev) {
6614 prev = entry;
6615 continue;
6619 * If there are as many broken entries as entries then we know
6620 * not to trust this particular entry.
6622 if (entry->broken == entry->count)
6623 continue;
6626 * If our current entry == best then we can't be sure our best
6627 * is really the best, so we need to keep searching.
6629 if (best && best->count == entry->count) {
6630 prev = entry;
6631 best = NULL;
6632 continue;
6635 /* Prev == entry, not good enough, have to keep searching */
6636 if (!prev->broken && prev->count == entry->count)
6637 continue;
6639 if (!best)
6640 best = (prev->count > entry->count) ? prev : entry;
6641 else if (best->count < entry->count)
6642 best = entry;
6643 prev = entry;
6646 return best;
6649 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6650 struct data_backref *dback, struct extent_entry *entry)
6652 struct btrfs_trans_handle *trans;
6653 struct btrfs_root *root;
6654 struct btrfs_file_extent_item *fi;
6655 struct extent_buffer *leaf;
6656 struct btrfs_key key;
6657 u64 bytenr, bytes;
6658 int ret, err;
6660 key.objectid = dback->root;
6661 key.type = BTRFS_ROOT_ITEM_KEY;
6662 key.offset = (u64)-1;
6663 root = btrfs_read_fs_root(info, &key);
6664 if (IS_ERR(root)) {
6665 fprintf(stderr, "Couldn't find root for our ref\n");
6666 return -EINVAL;
6670 * The backref points to the original offset of the extent if it was
6671 * split, so we need to search down to the offset we have and then walk
6672 * forward until we find the backref we're looking for.
6674 key.objectid = dback->owner;
6675 key.type = BTRFS_EXTENT_DATA_KEY;
6676 key.offset = dback->offset;
6677 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6678 if (ret < 0) {
6679 fprintf(stderr, "Error looking up ref %d\n", ret);
6680 return ret;
6683 while (1) {
6684 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6685 ret = btrfs_next_leaf(root, path);
6686 if (ret) {
6687 fprintf(stderr, "Couldn't find our ref, next\n");
6688 return -EINVAL;
6691 leaf = path->nodes[0];
6692 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6693 if (key.objectid != dback->owner ||
6694 key.type != BTRFS_EXTENT_DATA_KEY) {
6695 fprintf(stderr, "Couldn't find our ref, search\n");
6696 return -EINVAL;
6698 fi = btrfs_item_ptr(leaf, path->slots[0],
6699 struct btrfs_file_extent_item);
6700 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6701 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6703 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6704 break;
6705 path->slots[0]++;
6708 btrfs_release_path(path);
6710 trans = btrfs_start_transaction(root, 1);
6711 if (IS_ERR(trans))
6712 return PTR_ERR(trans);
6715 * Ok we have the key of the file extent we want to fix, now we can cow
6716 * down to the thing and fix it.
6718 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6719 if (ret < 0) {
6720 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6721 key.objectid, key.type, key.offset, ret);
6722 goto out;
6724 if (ret > 0) {
6725 fprintf(stderr, "Well that's odd, we just found this key "
6726 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6727 key.offset);
6728 ret = -EINVAL;
6729 goto out;
6731 leaf = path->nodes[0];
6732 fi = btrfs_item_ptr(leaf, path->slots[0],
6733 struct btrfs_file_extent_item);
6735 if (btrfs_file_extent_compression(leaf, fi) &&
6736 dback->disk_bytenr != entry->bytenr) {
6737 fprintf(stderr, "Ref doesn't match the record start and is "
6738 "compressed, please take a btrfs-image of this file "
6739 "system and send it to a btrfs developer so they can "
6740 "complete this functionality for bytenr %Lu\n",
6741 dback->disk_bytenr);
6742 ret = -EINVAL;
6743 goto out;
6746 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6747 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6748 } else if (dback->disk_bytenr > entry->bytenr) {
6749 u64 off_diff, offset;
6751 off_diff = dback->disk_bytenr - entry->bytenr;
6752 offset = btrfs_file_extent_offset(leaf, fi);
6753 if (dback->disk_bytenr + offset +
6754 btrfs_file_extent_num_bytes(leaf, fi) >
6755 entry->bytenr + entry->bytes) {
6756 fprintf(stderr, "Ref is past the entry end, please "
6757 "take a btrfs-image of this file system and "
6758 "send it to a btrfs developer, ref %Lu\n",
6759 dback->disk_bytenr);
6760 ret = -EINVAL;
6761 goto out;
6763 offset += off_diff;
6764 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6765 btrfs_set_file_extent_offset(leaf, fi, offset);
6766 } else if (dback->disk_bytenr < entry->bytenr) {
6767 u64 offset;
6769 offset = btrfs_file_extent_offset(leaf, fi);
6770 if (dback->disk_bytenr + offset < entry->bytenr) {
6771 fprintf(stderr, "Ref is before the entry start, please"
6772 " take a btrfs-image of this file system and "
6773 "send it to a btrfs developer, ref %Lu\n",
6774 dback->disk_bytenr);
6775 ret = -EINVAL;
6776 goto out;
6779 offset += dback->disk_bytenr;
6780 offset -= entry->bytenr;
6781 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6782 btrfs_set_file_extent_offset(leaf, fi, offset);
6785 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6788 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6789 * only do this if we aren't using compression, otherwise it's a
6790 * trickier case.
6792 if (!btrfs_file_extent_compression(leaf, fi))
6793 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6794 else
6795 printf("ram bytes may be wrong?\n");
6796 btrfs_mark_buffer_dirty(leaf);
6797 out:
6798 err = btrfs_commit_transaction(trans, root);
6799 btrfs_release_path(path);
6800 return ret ? ret : err;
6803 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6804 struct extent_record *rec)
6806 struct extent_backref *back;
6807 struct data_backref *dback;
6808 struct extent_entry *entry, *best = NULL;
6809 LIST_HEAD(entries);
6810 int nr_entries = 0;
6811 int broken_entries = 0;
6812 int ret = 0;
6813 short mismatch = 0;
6816 * Metadata is easy and the backrefs should always agree on bytenr and
6817 * size, if not we've got bigger issues.
6819 if (rec->metadata)
6820 return 0;
6822 list_for_each_entry(back, &rec->backrefs, list) {
6823 if (back->full_backref || !back->is_data)
6824 continue;
6826 dback = (struct data_backref *)back;
6829 * We only pay attention to backrefs that we found a real
6830 * backref for.
6832 if (dback->found_ref == 0)
6833 continue;
6836 * For now we only catch when the bytes don't match, not the
6837 * bytenr. We can easily do this at the same time, but I want
6838 * to have a fs image to test on before we just add repair
6839 * functionality willy-nilly so we know we won't screw up the
6840 * repair.
6843 entry = find_entry(&entries, dback->disk_bytenr,
6844 dback->bytes);
6845 if (!entry) {
6846 entry = malloc(sizeof(struct extent_entry));
6847 if (!entry) {
6848 ret = -ENOMEM;
6849 goto out;
6851 memset(entry, 0, sizeof(*entry));
6852 entry->bytenr = dback->disk_bytenr;
6853 entry->bytes = dback->bytes;
6854 list_add_tail(&entry->list, &entries);
6855 nr_entries++;
6859 * If we only have on entry we may think the entries agree when
6860 * in reality they don't so we have to do some extra checking.
6862 if (dback->disk_bytenr != rec->start ||
6863 dback->bytes != rec->nr || back->broken)
6864 mismatch = 1;
6866 if (back->broken) {
6867 entry->broken++;
6868 broken_entries++;
6871 entry->count++;
6874 /* Yay all the backrefs agree, carry on good sir */
6875 if (nr_entries <= 1 && !mismatch)
6876 goto out;
6878 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6879 "%Lu\n", rec->start);
6882 * First we want to see if the backrefs can agree amongst themselves who
6883 * is right, so figure out which one of the entries has the highest
6884 * count.
6886 best = find_most_right_entry(&entries);
6889 * Ok so we may have an even split between what the backrefs think, so
6890 * this is where we use the extent ref to see what it thinks.
6892 if (!best) {
6893 entry = find_entry(&entries, rec->start, rec->nr);
6894 if (!entry && (!broken_entries || !rec->found_rec)) {
6895 fprintf(stderr, "Backrefs don't agree with each other "
6896 "and extent record doesn't agree with anybody,"
6897 " so we can't fix bytenr %Lu bytes %Lu\n",
6898 rec->start, rec->nr);
6899 ret = -EINVAL;
6900 goto out;
6901 } else if (!entry) {
6903 * Ok our backrefs were broken, we'll assume this is the
6904 * correct value and add an entry for this range.
6906 entry = malloc(sizeof(struct extent_entry));
6907 if (!entry) {
6908 ret = -ENOMEM;
6909 goto out;
6911 memset(entry, 0, sizeof(*entry));
6912 entry->bytenr = rec->start;
6913 entry->bytes = rec->nr;
6914 list_add_tail(&entry->list, &entries);
6915 nr_entries++;
6917 entry->count++;
6918 best = find_most_right_entry(&entries);
6919 if (!best) {
6920 fprintf(stderr, "Backrefs and extent record evenly "
6921 "split on who is right, this is going to "
6922 "require user input to fix bytenr %Lu bytes "
6923 "%Lu\n", rec->start, rec->nr);
6924 ret = -EINVAL;
6925 goto out;
6930 * I don't think this can happen currently as we'll abort() if we catch
6931 * this case higher up, but in case somebody removes that we still can't
6932 * deal with it properly here yet, so just bail out of that's the case.
6934 if (best->bytenr != rec->start) {
6935 fprintf(stderr, "Extent start and backref starts don't match, "
6936 "please use btrfs-image on this file system and send "
6937 "it to a btrfs developer so they can make fsck fix "
6938 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6939 rec->start, rec->nr);
6940 ret = -EINVAL;
6941 goto out;
6945 * Ok great we all agreed on an extent record, let's go find the real
6946 * references and fix up the ones that don't match.
6948 list_for_each_entry(back, &rec->backrefs, list) {
6949 if (back->full_backref || !back->is_data)
6950 continue;
6952 dback = (struct data_backref *)back;
6955 * Still ignoring backrefs that don't have a real ref attached
6956 * to them.
6958 if (dback->found_ref == 0)
6959 continue;
6961 if (dback->bytes == best->bytes &&
6962 dback->disk_bytenr == best->bytenr)
6963 continue;
6965 ret = repair_ref(info, path, dback, best);
6966 if (ret)
6967 goto out;
6971 * Ok we messed with the actual refs, which means we need to drop our
6972 * entire cache and go back and rescan. I know this is a huge pain and
6973 * adds a lot of extra work, but it's the only way to be safe. Once all
6974 * the backrefs agree we may not need to do anything to the extent
6975 * record itself.
6977 ret = -EAGAIN;
6978 out:
6979 while (!list_empty(&entries)) {
6980 entry = list_entry(entries.next, struct extent_entry, list);
6981 list_del_init(&entry->list);
6982 free(entry);
6984 return ret;
6987 static int process_duplicates(struct btrfs_root *root,
6988 struct cache_tree *extent_cache,
6989 struct extent_record *rec)
6991 struct extent_record *good, *tmp;
6992 struct cache_extent *cache;
6993 int ret;
6996 * If we found a extent record for this extent then return, or if we
6997 * have more than one duplicate we are likely going to need to delete
6998 * something.
7000 if (rec->found_rec || rec->num_duplicates > 1)
7001 return 0;
7003 /* Shouldn't happen but just in case */
7004 BUG_ON(!rec->num_duplicates);
7007 * So this happens if we end up with a backref that doesn't match the
7008 * actual extent entry. So either the backref is bad or the extent
7009 * entry is bad. Either way we want to have the extent_record actually
7010 * reflect what we found in the extent_tree, so we need to take the
7011 * duplicate out and use that as the extent_record since the only way we
7012 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7014 remove_cache_extent(extent_cache, &rec->cache);
7016 good = list_entry(rec->dups.next, struct extent_record, list);
7017 list_del_init(&good->list);
7018 INIT_LIST_HEAD(&good->backrefs);
7019 INIT_LIST_HEAD(&good->dups);
7020 good->cache.start = good->start;
7021 good->cache.size = good->nr;
7022 good->content_checked = 0;
7023 good->owner_ref_checked = 0;
7024 good->num_duplicates = 0;
7025 good->refs = rec->refs;
7026 list_splice_init(&rec->backrefs, &good->backrefs);
7027 while (1) {
7028 cache = lookup_cache_extent(extent_cache, good->start,
7029 good->nr);
7030 if (!cache)
7031 break;
7032 tmp = container_of(cache, struct extent_record, cache);
7035 * If we find another overlapping extent and it's found_rec is
7036 * set then it's a duplicate and we need to try and delete
7037 * something.
7039 if (tmp->found_rec || tmp->num_duplicates > 0) {
7040 if (list_empty(&good->list))
7041 list_add_tail(&good->list,
7042 &duplicate_extents);
7043 good->num_duplicates += tmp->num_duplicates + 1;
7044 list_splice_init(&tmp->dups, &good->dups);
7045 list_del_init(&tmp->list);
7046 list_add_tail(&tmp->list, &good->dups);
7047 remove_cache_extent(extent_cache, &tmp->cache);
7048 continue;
7052 * Ok we have another non extent item backed extent rec, so lets
7053 * just add it to this extent and carry on like we did above.
7055 good->refs += tmp->refs;
7056 list_splice_init(&tmp->backrefs, &good->backrefs);
7057 remove_cache_extent(extent_cache, &tmp->cache);
7058 free(tmp);
7060 ret = insert_cache_extent(extent_cache, &good->cache);
7061 BUG_ON(ret);
7062 free(rec);
7063 return good->num_duplicates ? 0 : 1;
7066 static int delete_duplicate_records(struct btrfs_root *root,
7067 struct extent_record *rec)
7069 struct btrfs_trans_handle *trans;
7070 LIST_HEAD(delete_list);
7071 struct btrfs_path *path;
7072 struct extent_record *tmp, *good, *n;
7073 int nr_del = 0;
7074 int ret = 0, err;
7075 struct btrfs_key key;
7077 path = btrfs_alloc_path();
7078 if (!path) {
7079 ret = -ENOMEM;
7080 goto out;
7083 good = rec;
7084 /* Find the record that covers all of the duplicates. */
7085 list_for_each_entry(tmp, &rec->dups, list) {
7086 if (good->start < tmp->start)
7087 continue;
7088 if (good->nr > tmp->nr)
7089 continue;
7091 if (tmp->start + tmp->nr < good->start + good->nr) {
7092 fprintf(stderr, "Ok we have overlapping extents that "
7093 "aren't completely covered by each other, this "
7094 "is going to require more careful thought. "
7095 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7096 tmp->start, tmp->nr, good->start, good->nr);
7097 abort();
7099 good = tmp;
7102 if (good != rec)
7103 list_add_tail(&rec->list, &delete_list);
7105 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7106 if (tmp == good)
7107 continue;
7108 list_move_tail(&tmp->list, &delete_list);
7111 root = root->fs_info->extent_root;
7112 trans = btrfs_start_transaction(root, 1);
7113 if (IS_ERR(trans)) {
7114 ret = PTR_ERR(trans);
7115 goto out;
7118 list_for_each_entry(tmp, &delete_list, list) {
7119 if (tmp->found_rec == 0)
7120 continue;
7121 key.objectid = tmp->start;
7122 key.type = BTRFS_EXTENT_ITEM_KEY;
7123 key.offset = tmp->nr;
7125 /* Shouldn't happen but just in case */
7126 if (tmp->metadata) {
7127 fprintf(stderr, "Well this shouldn't happen, extent "
7128 "record overlaps but is metadata? "
7129 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7130 abort();
7133 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7134 if (ret) {
7135 if (ret > 0)
7136 ret = -EINVAL;
7137 break;
7139 ret = btrfs_del_item(trans, root, path);
7140 if (ret)
7141 break;
7142 btrfs_release_path(path);
7143 nr_del++;
7145 err = btrfs_commit_transaction(trans, root);
7146 if (err && !ret)
7147 ret = err;
7148 out:
7149 while (!list_empty(&delete_list)) {
7150 tmp = list_entry(delete_list.next, struct extent_record, list);
7151 list_del_init(&tmp->list);
7152 if (tmp == rec)
7153 continue;
7154 free(tmp);
7157 while (!list_empty(&rec->dups)) {
7158 tmp = list_entry(rec->dups.next, struct extent_record, list);
7159 list_del_init(&tmp->list);
7160 free(tmp);
7163 btrfs_free_path(path);
7165 if (!ret && !nr_del)
7166 rec->num_duplicates = 0;
7168 return ret ? ret : nr_del;
7171 static int find_possible_backrefs(struct btrfs_fs_info *info,
7172 struct btrfs_path *path,
7173 struct cache_tree *extent_cache,
7174 struct extent_record *rec)
7176 struct btrfs_root *root;
7177 struct extent_backref *back;
7178 struct data_backref *dback;
7179 struct cache_extent *cache;
7180 struct btrfs_file_extent_item *fi;
7181 struct btrfs_key key;
7182 u64 bytenr, bytes;
7183 int ret;
7185 list_for_each_entry(back, &rec->backrefs, list) {
7186 /* Don't care about full backrefs (poor unloved backrefs) */
7187 if (back->full_backref || !back->is_data)
7188 continue;
7190 dback = (struct data_backref *)back;
7192 /* We found this one, we don't need to do a lookup */
7193 if (dback->found_ref)
7194 continue;
7196 key.objectid = dback->root;
7197 key.type = BTRFS_ROOT_ITEM_KEY;
7198 key.offset = (u64)-1;
7200 root = btrfs_read_fs_root(info, &key);
7202 /* No root, definitely a bad ref, skip */
7203 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7204 continue;
7205 /* Other err, exit */
7206 if (IS_ERR(root))
7207 return PTR_ERR(root);
7209 key.objectid = dback->owner;
7210 key.type = BTRFS_EXTENT_DATA_KEY;
7211 key.offset = dback->offset;
7212 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7213 if (ret) {
7214 btrfs_release_path(path);
7215 if (ret < 0)
7216 return ret;
7217 /* Didn't find it, we can carry on */
7218 ret = 0;
7219 continue;
7222 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7223 struct btrfs_file_extent_item);
7224 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7225 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7226 btrfs_release_path(path);
7227 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7228 if (cache) {
7229 struct extent_record *tmp;
7230 tmp = container_of(cache, struct extent_record, cache);
7233 * If we found an extent record for the bytenr for this
7234 * particular backref then we can't add it to our
7235 * current extent record. We only want to add backrefs
7236 * that don't have a corresponding extent item in the
7237 * extent tree since they likely belong to this record
7238 * and we need to fix it if it doesn't match bytenrs.
7240 if (tmp->found_rec)
7241 continue;
7244 dback->found_ref += 1;
7245 dback->disk_bytenr = bytenr;
7246 dback->bytes = bytes;
7249 * Set this so the verify backref code knows not to trust the
7250 * values in this backref.
7252 back->broken = 1;
7255 return 0;
7259 * Record orphan data ref into corresponding root.
7261 * Return 0 if the extent item contains data ref and recorded.
7262 * Return 1 if the extent item contains no useful data ref
7263 * On that case, it may contains only shared_dataref or metadata backref
7264 * or the file extent exists(this should be handled by the extent bytenr
7265 * recovery routine)
7266 * Return <0 if something goes wrong.
7268 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7269 struct extent_record *rec)
7271 struct btrfs_key key;
7272 struct btrfs_root *dest_root;
7273 struct extent_backref *back;
7274 struct data_backref *dback;
7275 struct orphan_data_extent *orphan;
7276 struct btrfs_path *path;
7277 int recorded_data_ref = 0;
7278 int ret = 0;
7280 if (rec->metadata)
7281 return 1;
7282 path = btrfs_alloc_path();
7283 if (!path)
7284 return -ENOMEM;
7285 list_for_each_entry(back, &rec->backrefs, list) {
7286 if (back->full_backref || !back->is_data ||
7287 !back->found_extent_tree)
7288 continue;
7289 dback = (struct data_backref *)back;
7290 if (dback->found_ref)
7291 continue;
7292 key.objectid = dback->root;
7293 key.type = BTRFS_ROOT_ITEM_KEY;
7294 key.offset = (u64)-1;
7296 dest_root = btrfs_read_fs_root(fs_info, &key);
7298 /* For non-exist root we just skip it */
7299 if (IS_ERR(dest_root) || !dest_root)
7300 continue;
7302 key.objectid = dback->owner;
7303 key.type = BTRFS_EXTENT_DATA_KEY;
7304 key.offset = dback->offset;
7306 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7308 * For ret < 0, it's OK since the fs-tree may be corrupted,
7309 * we need to record it for inode/file extent rebuild.
7310 * For ret > 0, we record it only for file extent rebuild.
7311 * For ret == 0, the file extent exists but only bytenr
7312 * mismatch, let the original bytenr fix routine to handle,
7313 * don't record it.
7315 if (ret == 0)
7316 continue;
7317 ret = 0;
7318 orphan = malloc(sizeof(*orphan));
7319 if (!orphan) {
7320 ret = -ENOMEM;
7321 goto out;
7323 INIT_LIST_HEAD(&orphan->list);
7324 orphan->root = dback->root;
7325 orphan->objectid = dback->owner;
7326 orphan->offset = dback->offset;
7327 orphan->disk_bytenr = rec->cache.start;
7328 orphan->disk_len = rec->cache.size;
7329 list_add(&dest_root->orphan_data_extents, &orphan->list);
7330 recorded_data_ref = 1;
7332 out:
7333 btrfs_free_path(path);
7334 if (!ret)
7335 return !recorded_data_ref;
7336 else
7337 return ret;
7341 * when an incorrect extent item is found, this will delete
7342 * all of the existing entries for it and recreate them
7343 * based on what the tree scan found.
7345 static int fixup_extent_refs(struct btrfs_fs_info *info,
7346 struct cache_tree *extent_cache,
7347 struct extent_record *rec)
7349 struct btrfs_trans_handle *trans = NULL;
7350 int ret;
7351 struct btrfs_path *path;
7352 struct list_head *cur = rec->backrefs.next;
7353 struct cache_extent *cache;
7354 struct extent_backref *back;
7355 int allocated = 0;
7356 u64 flags = 0;
7358 if (rec->flag_block_full_backref)
7359 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7361 path = btrfs_alloc_path();
7362 if (!path)
7363 return -ENOMEM;
7365 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7367 * Sometimes the backrefs themselves are so broken they don't
7368 * get attached to any meaningful rec, so first go back and
7369 * check any of our backrefs that we couldn't find and throw
7370 * them into the list if we find the backref so that
7371 * verify_backrefs can figure out what to do.
7373 ret = find_possible_backrefs(info, path, extent_cache, rec);
7374 if (ret < 0)
7375 goto out;
7378 /* step one, make sure all of the backrefs agree */
7379 ret = verify_backrefs(info, path, rec);
7380 if (ret < 0)
7381 goto out;
7383 trans = btrfs_start_transaction(info->extent_root, 1);
7384 if (IS_ERR(trans)) {
7385 ret = PTR_ERR(trans);
7386 goto out;
7389 /* step two, delete all the existing records */
7390 ret = delete_extent_records(trans, info->extent_root, path,
7391 rec->start, rec->max_size);
7393 if (ret < 0)
7394 goto out;
7396 /* was this block corrupt? If so, don't add references to it */
7397 cache = lookup_cache_extent(info->corrupt_blocks,
7398 rec->start, rec->max_size);
7399 if (cache) {
7400 ret = 0;
7401 goto out;
7404 /* step three, recreate all the refs we did find */
7405 while(cur != &rec->backrefs) {
7406 back = list_entry(cur, struct extent_backref, list);
7407 cur = cur->next;
7410 * if we didn't find any references, don't create a
7411 * new extent record
7413 if (!back->found_ref)
7414 continue;
7416 rec->bad_full_backref = 0;
7417 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7418 allocated = 1;
7420 if (ret)
7421 goto out;
7423 out:
7424 if (trans) {
7425 int err = btrfs_commit_transaction(trans, info->extent_root);
7426 if (!ret)
7427 ret = err;
7430 btrfs_free_path(path);
7431 return ret;
7434 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7435 struct extent_record *rec)
7437 struct btrfs_trans_handle *trans;
7438 struct btrfs_root *root = fs_info->extent_root;
7439 struct btrfs_path *path;
7440 struct btrfs_extent_item *ei;
7441 struct btrfs_key key;
7442 u64 flags;
7443 int ret = 0;
7445 key.objectid = rec->start;
7446 if (rec->metadata) {
7447 key.type = BTRFS_METADATA_ITEM_KEY;
7448 key.offset = rec->info_level;
7449 } else {
7450 key.type = BTRFS_EXTENT_ITEM_KEY;
7451 key.offset = rec->max_size;
7454 path = btrfs_alloc_path();
7455 if (!path)
7456 return -ENOMEM;
7458 trans = btrfs_start_transaction(root, 0);
7459 if (IS_ERR(trans)) {
7460 btrfs_free_path(path);
7461 return PTR_ERR(trans);
7464 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7465 if (ret < 0) {
7466 btrfs_free_path(path);
7467 btrfs_commit_transaction(trans, root);
7468 return ret;
7469 } else if (ret) {
7470 fprintf(stderr, "Didn't find extent for %llu\n",
7471 (unsigned long long)rec->start);
7472 btrfs_free_path(path);
7473 btrfs_commit_transaction(trans, root);
7474 return -ENOENT;
7477 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7478 struct btrfs_extent_item);
7479 flags = btrfs_extent_flags(path->nodes[0], ei);
7480 if (rec->flag_block_full_backref) {
7481 fprintf(stderr, "setting full backref on %llu\n",
7482 (unsigned long long)key.objectid);
7483 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7484 } else {
7485 fprintf(stderr, "clearing full backref on %llu\n",
7486 (unsigned long long)key.objectid);
7487 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7489 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7490 btrfs_mark_buffer_dirty(path->nodes[0]);
7491 btrfs_free_path(path);
7492 return btrfs_commit_transaction(trans, root);
7495 /* right now we only prune from the extent allocation tree */
7496 static int prune_one_block(struct btrfs_trans_handle *trans,
7497 struct btrfs_fs_info *info,
7498 struct btrfs_corrupt_block *corrupt)
7500 int ret;
7501 struct btrfs_path path;
7502 struct extent_buffer *eb;
7503 u64 found;
7504 int slot;
7505 int nritems;
7506 int level = corrupt->level + 1;
7508 btrfs_init_path(&path);
7509 again:
7510 /* we want to stop at the parent to our busted block */
7511 path.lowest_level = level;
7513 ret = btrfs_search_slot(trans, info->extent_root,
7514 &corrupt->key, &path, -1, 1);
7516 if (ret < 0)
7517 goto out;
7519 eb = path.nodes[level];
7520 if (!eb) {
7521 ret = -ENOENT;
7522 goto out;
7526 * hopefully the search gave us the block we want to prune,
7527 * lets try that first
7529 slot = path.slots[level];
7530 found = btrfs_node_blockptr(eb, slot);
7531 if (found == corrupt->cache.start)
7532 goto del_ptr;
7534 nritems = btrfs_header_nritems(eb);
7536 /* the search failed, lets scan this node and hope we find it */
7537 for (slot = 0; slot < nritems; slot++) {
7538 found = btrfs_node_blockptr(eb, slot);
7539 if (found == corrupt->cache.start)
7540 goto del_ptr;
7543 * we couldn't find the bad block. TODO, search all the nodes for pointers
7544 * to this block
7546 if (eb == info->extent_root->node) {
7547 ret = -ENOENT;
7548 goto out;
7549 } else {
7550 level++;
7551 btrfs_release_path(&path);
7552 goto again;
7555 del_ptr:
7556 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7557 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7559 out:
7560 btrfs_release_path(&path);
7561 return ret;
7564 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7566 struct btrfs_trans_handle *trans = NULL;
7567 struct cache_extent *cache;
7568 struct btrfs_corrupt_block *corrupt;
7570 while (1) {
7571 cache = search_cache_extent(info->corrupt_blocks, 0);
7572 if (!cache)
7573 break;
7574 if (!trans) {
7575 trans = btrfs_start_transaction(info->extent_root, 1);
7576 if (IS_ERR(trans))
7577 return PTR_ERR(trans);
7579 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7580 prune_one_block(trans, info, corrupt);
7581 remove_cache_extent(info->corrupt_blocks, cache);
7583 if (trans)
7584 return btrfs_commit_transaction(trans, info->extent_root);
7585 return 0;
7588 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7590 struct btrfs_block_group_cache *cache;
7591 u64 start, end;
7592 int ret;
7594 while (1) {
7595 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7596 &start, &end, EXTENT_DIRTY);
7597 if (ret)
7598 break;
7599 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7600 GFP_NOFS);
7603 start = 0;
7604 while (1) {
7605 cache = btrfs_lookup_first_block_group(fs_info, start);
7606 if (!cache)
7607 break;
7608 if (cache->cached)
7609 cache->cached = 0;
7610 start = cache->key.objectid + cache->key.offset;
7614 static int check_extent_refs(struct btrfs_root *root,
7615 struct cache_tree *extent_cache)
7617 struct extent_record *rec;
7618 struct cache_extent *cache;
7619 int err = 0;
7620 int ret = 0;
7621 int fixed = 0;
7622 int had_dups = 0;
7623 int recorded = 0;
7625 if (repair) {
7627 * if we're doing a repair, we have to make sure
7628 * we don't allocate from the problem extents.
7629 * In the worst case, this will be all the
7630 * extents in the FS
7632 cache = search_cache_extent(extent_cache, 0);
7633 while(cache) {
7634 rec = container_of(cache, struct extent_record, cache);
7635 set_extent_dirty(root->fs_info->excluded_extents,
7636 rec->start,
7637 rec->start + rec->max_size - 1,
7638 GFP_NOFS);
7639 cache = next_cache_extent(cache);
7642 /* pin down all the corrupted blocks too */
7643 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7644 while(cache) {
7645 set_extent_dirty(root->fs_info->excluded_extents,
7646 cache->start,
7647 cache->start + cache->size - 1,
7648 GFP_NOFS);
7649 cache = next_cache_extent(cache);
7651 prune_corrupt_blocks(root->fs_info);
7652 reset_cached_block_groups(root->fs_info);
7655 reset_cached_block_groups(root->fs_info);
7658 * We need to delete any duplicate entries we find first otherwise we
7659 * could mess up the extent tree when we have backrefs that actually
7660 * belong to a different extent item and not the weird duplicate one.
7662 while (repair && !list_empty(&duplicate_extents)) {
7663 rec = list_entry(duplicate_extents.next, struct extent_record,
7664 list);
7665 list_del_init(&rec->list);
7667 /* Sometimes we can find a backref before we find an actual
7668 * extent, so we need to process it a little bit to see if there
7669 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7670 * if this is a backref screwup. If we need to delete stuff
7671 * process_duplicates() will return 0, otherwise it will return
7672 * 1 and we
7674 if (process_duplicates(root, extent_cache, rec))
7675 continue;
7676 ret = delete_duplicate_records(root, rec);
7677 if (ret < 0)
7678 return ret;
7680 * delete_duplicate_records will return the number of entries
7681 * deleted, so if it's greater than 0 then we know we actually
7682 * did something and we need to remove.
7684 if (ret)
7685 had_dups = 1;
7688 if (had_dups)
7689 return -EAGAIN;
7691 while(1) {
7692 int cur_err = 0;
7694 fixed = 0;
7695 recorded = 0;
7696 cache = search_cache_extent(extent_cache, 0);
7697 if (!cache)
7698 break;
7699 rec = container_of(cache, struct extent_record, cache);
7700 if (rec->num_duplicates) {
7701 fprintf(stderr, "extent item %llu has multiple extent "
7702 "items\n", (unsigned long long)rec->start);
7703 err = 1;
7704 cur_err = 1;
7707 if (rec->refs != rec->extent_item_refs) {
7708 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7709 (unsigned long long)rec->start,
7710 (unsigned long long)rec->nr);
7711 fprintf(stderr, "extent item %llu, found %llu\n",
7712 (unsigned long long)rec->extent_item_refs,
7713 (unsigned long long)rec->refs);
7714 ret = record_orphan_data_extents(root->fs_info, rec);
7715 if (ret < 0)
7716 goto repair_abort;
7717 if (ret == 0) {
7718 recorded = 1;
7719 } else {
7721 * we can't use the extent to repair file
7722 * extent, let the fallback method handle it.
7724 if (!fixed && repair) {
7725 ret = fixup_extent_refs(
7726 root->fs_info,
7727 extent_cache, rec);
7728 if (ret)
7729 goto repair_abort;
7730 fixed = 1;
7733 err = 1;
7734 cur_err = 1;
7736 if (all_backpointers_checked(rec, 1)) {
7737 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7738 (unsigned long long)rec->start,
7739 (unsigned long long)rec->nr);
7741 if (!fixed && !recorded && repair) {
7742 ret = fixup_extent_refs(root->fs_info,
7743 extent_cache, rec);
7744 if (ret)
7745 goto repair_abort;
7746 fixed = 1;
7748 cur_err = 1;
7749 err = 1;
7751 if (!rec->owner_ref_checked) {
7752 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7753 (unsigned long long)rec->start,
7754 (unsigned long long)rec->nr);
7755 if (!fixed && !recorded && repair) {
7756 ret = fixup_extent_refs(root->fs_info,
7757 extent_cache, rec);
7758 if (ret)
7759 goto repair_abort;
7760 fixed = 1;
7762 err = 1;
7763 cur_err = 1;
7765 if (rec->bad_full_backref) {
7766 fprintf(stderr, "bad full backref, on [%llu]\n",
7767 (unsigned long long)rec->start);
7768 if (repair) {
7769 ret = fixup_extent_flags(root->fs_info, rec);
7770 if (ret)
7771 goto repair_abort;
7772 fixed = 1;
7774 err = 1;
7775 cur_err = 1;
7778 * Although it's not a extent ref's problem, we reuse this
7779 * routine for error reporting.
7780 * No repair function yet.
7782 if (rec->crossing_stripes) {
7783 fprintf(stderr,
7784 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7785 rec->start, rec->start + rec->max_size);
7786 err = 1;
7787 cur_err = 1;
7790 if (rec->wrong_chunk_type) {
7791 fprintf(stderr,
7792 "bad extent [%llu, %llu), type mismatch with chunk\n",
7793 rec->start, rec->start + rec->max_size);
7794 err = 1;
7795 cur_err = 1;
7798 remove_cache_extent(extent_cache, cache);
7799 free_all_extent_backrefs(rec);
7800 if (!init_extent_tree && repair && (!cur_err || fixed))
7801 clear_extent_dirty(root->fs_info->excluded_extents,
7802 rec->start,
7803 rec->start + rec->max_size - 1,
7804 GFP_NOFS);
7805 free(rec);
7807 repair_abort:
7808 if (repair) {
7809 if (ret && ret != -EAGAIN) {
7810 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7811 exit(1);
7812 } else if (!ret) {
7813 struct btrfs_trans_handle *trans;
7815 root = root->fs_info->extent_root;
7816 trans = btrfs_start_transaction(root, 1);
7817 if (IS_ERR(trans)) {
7818 ret = PTR_ERR(trans);
7819 goto repair_abort;
7822 btrfs_fix_block_accounting(trans, root);
7823 ret = btrfs_commit_transaction(trans, root);
7824 if (ret)
7825 goto repair_abort;
7827 if (err)
7828 fprintf(stderr, "repaired damaged extent references\n");
7829 return ret;
7831 return err;
7834 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7836 u64 stripe_size;
7838 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7839 stripe_size = length;
7840 stripe_size /= num_stripes;
7841 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7842 stripe_size = length * 2;
7843 stripe_size /= num_stripes;
7844 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7845 stripe_size = length;
7846 stripe_size /= (num_stripes - 1);
7847 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7848 stripe_size = length;
7849 stripe_size /= (num_stripes - 2);
7850 } else {
7851 stripe_size = length;
7853 return stripe_size;
7857 * Check the chunk with its block group/dev list ref:
7858 * Return 0 if all refs seems valid.
7859 * Return 1 if part of refs seems valid, need later check for rebuild ref
7860 * like missing block group and needs to search extent tree to rebuild them.
7861 * Return -1 if essential refs are missing and unable to rebuild.
7863 static int check_chunk_refs(struct chunk_record *chunk_rec,
7864 struct block_group_tree *block_group_cache,
7865 struct device_extent_tree *dev_extent_cache,
7866 int silent)
7868 struct cache_extent *block_group_item;
7869 struct block_group_record *block_group_rec;
7870 struct cache_extent *dev_extent_item;
7871 struct device_extent_record *dev_extent_rec;
7872 u64 devid;
7873 u64 offset;
7874 u64 length;
7875 int metadump_v2 = 0;
7876 int i;
7877 int ret = 0;
7879 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7880 chunk_rec->offset,
7881 chunk_rec->length);
7882 if (block_group_item) {
7883 block_group_rec = container_of(block_group_item,
7884 struct block_group_record,
7885 cache);
7886 if (chunk_rec->length != block_group_rec->offset ||
7887 chunk_rec->offset != block_group_rec->objectid ||
7888 (!metadump_v2 &&
7889 chunk_rec->type_flags != block_group_rec->flags)) {
7890 if (!silent)
7891 fprintf(stderr,
7892 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7893 chunk_rec->objectid,
7894 chunk_rec->type,
7895 chunk_rec->offset,
7896 chunk_rec->length,
7897 chunk_rec->offset,
7898 chunk_rec->type_flags,
7899 block_group_rec->objectid,
7900 block_group_rec->type,
7901 block_group_rec->offset,
7902 block_group_rec->offset,
7903 block_group_rec->objectid,
7904 block_group_rec->flags);
7905 ret = -1;
7906 } else {
7907 list_del_init(&block_group_rec->list);
7908 chunk_rec->bg_rec = block_group_rec;
7910 } else {
7911 if (!silent)
7912 fprintf(stderr,
7913 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7914 chunk_rec->objectid,
7915 chunk_rec->type,
7916 chunk_rec->offset,
7917 chunk_rec->length,
7918 chunk_rec->offset,
7919 chunk_rec->type_flags);
7920 ret = 1;
7923 if (metadump_v2)
7924 return ret;
7926 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7927 chunk_rec->num_stripes);
7928 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7929 devid = chunk_rec->stripes[i].devid;
7930 offset = chunk_rec->stripes[i].offset;
7931 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7932 devid, offset, length);
7933 if (dev_extent_item) {
7934 dev_extent_rec = container_of(dev_extent_item,
7935 struct device_extent_record,
7936 cache);
7937 if (dev_extent_rec->objectid != devid ||
7938 dev_extent_rec->offset != offset ||
7939 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7940 dev_extent_rec->length != length) {
7941 if (!silent)
7942 fprintf(stderr,
7943 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7944 chunk_rec->objectid,
7945 chunk_rec->type,
7946 chunk_rec->offset,
7947 chunk_rec->stripes[i].devid,
7948 chunk_rec->stripes[i].offset,
7949 dev_extent_rec->objectid,
7950 dev_extent_rec->offset,
7951 dev_extent_rec->length);
7952 ret = -1;
7953 } else {
7954 list_move(&dev_extent_rec->chunk_list,
7955 &chunk_rec->dextents);
7957 } else {
7958 if (!silent)
7959 fprintf(stderr,
7960 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7961 chunk_rec->objectid,
7962 chunk_rec->type,
7963 chunk_rec->offset,
7964 chunk_rec->stripes[i].devid,
7965 chunk_rec->stripes[i].offset);
7966 ret = -1;
7969 return ret;
7972 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7973 int check_chunks(struct cache_tree *chunk_cache,
7974 struct block_group_tree *block_group_cache,
7975 struct device_extent_tree *dev_extent_cache,
7976 struct list_head *good, struct list_head *bad,
7977 struct list_head *rebuild, int silent)
7979 struct cache_extent *chunk_item;
7980 struct chunk_record *chunk_rec;
7981 struct block_group_record *bg_rec;
7982 struct device_extent_record *dext_rec;
7983 int err;
7984 int ret = 0;
7986 chunk_item = first_cache_extent(chunk_cache);
7987 while (chunk_item) {
7988 chunk_rec = container_of(chunk_item, struct chunk_record,
7989 cache);
7990 err = check_chunk_refs(chunk_rec, block_group_cache,
7991 dev_extent_cache, silent);
7992 if (err < 0)
7993 ret = err;
7994 if (err == 0 && good)
7995 list_add_tail(&chunk_rec->list, good);
7996 if (err > 0 && rebuild)
7997 list_add_tail(&chunk_rec->list, rebuild);
7998 if (err < 0 && bad)
7999 list_add_tail(&chunk_rec->list, bad);
8000 chunk_item = next_cache_extent(chunk_item);
8003 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8004 if (!silent)
8005 fprintf(stderr,
8006 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8007 bg_rec->objectid,
8008 bg_rec->offset,
8009 bg_rec->flags);
8010 if (!ret)
8011 ret = 1;
8014 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8015 chunk_list) {
8016 if (!silent)
8017 fprintf(stderr,
8018 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8019 dext_rec->objectid,
8020 dext_rec->offset,
8021 dext_rec->length);
8022 if (!ret)
8023 ret = 1;
8025 return ret;
8029 static int check_device_used(struct device_record *dev_rec,
8030 struct device_extent_tree *dext_cache)
8032 struct cache_extent *cache;
8033 struct device_extent_record *dev_extent_rec;
8034 u64 total_byte = 0;
8036 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8037 while (cache) {
8038 dev_extent_rec = container_of(cache,
8039 struct device_extent_record,
8040 cache);
8041 if (dev_extent_rec->objectid != dev_rec->devid)
8042 break;
8044 list_del_init(&dev_extent_rec->device_list);
8045 total_byte += dev_extent_rec->length;
8046 cache = next_cache_extent(cache);
8049 if (total_byte != dev_rec->byte_used) {
8050 fprintf(stderr,
8051 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8052 total_byte, dev_rec->byte_used, dev_rec->objectid,
8053 dev_rec->type, dev_rec->offset);
8054 return -1;
8055 } else {
8056 return 0;
8060 /* check btrfs_dev_item -> btrfs_dev_extent */
8061 static int check_devices(struct rb_root *dev_cache,
8062 struct device_extent_tree *dev_extent_cache)
8064 struct rb_node *dev_node;
8065 struct device_record *dev_rec;
8066 struct device_extent_record *dext_rec;
8067 int err;
8068 int ret = 0;
8070 dev_node = rb_first(dev_cache);
8071 while (dev_node) {
8072 dev_rec = container_of(dev_node, struct device_record, node);
8073 err = check_device_used(dev_rec, dev_extent_cache);
8074 if (err)
8075 ret = err;
8077 dev_node = rb_next(dev_node);
8079 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8080 device_list) {
8081 fprintf(stderr,
8082 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8083 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8084 if (!ret)
8085 ret = 1;
8087 return ret;
8090 static int add_root_item_to_list(struct list_head *head,
8091 u64 objectid, u64 bytenr, u64 last_snapshot,
8092 u8 level, u8 drop_level,
8093 int level_size, struct btrfs_key *drop_key)
8096 struct root_item_record *ri_rec;
8097 ri_rec = malloc(sizeof(*ri_rec));
8098 if (!ri_rec)
8099 return -ENOMEM;
8100 ri_rec->bytenr = bytenr;
8101 ri_rec->objectid = objectid;
8102 ri_rec->level = level;
8103 ri_rec->level_size = level_size;
8104 ri_rec->drop_level = drop_level;
8105 ri_rec->last_snapshot = last_snapshot;
8106 if (drop_key)
8107 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8108 list_add_tail(&ri_rec->list, head);
8110 return 0;
8113 static void free_root_item_list(struct list_head *list)
8115 struct root_item_record *ri_rec;
8117 while (!list_empty(list)) {
8118 ri_rec = list_first_entry(list, struct root_item_record,
8119 list);
8120 list_del_init(&ri_rec->list);
8121 free(ri_rec);
8125 static int deal_root_from_list(struct list_head *list,
8126 struct btrfs_root *root,
8127 struct block_info *bits,
8128 int bits_nr,
8129 struct cache_tree *pending,
8130 struct cache_tree *seen,
8131 struct cache_tree *reada,
8132 struct cache_tree *nodes,
8133 struct cache_tree *extent_cache,
8134 struct cache_tree *chunk_cache,
8135 struct rb_root *dev_cache,
8136 struct block_group_tree *block_group_cache,
8137 struct device_extent_tree *dev_extent_cache)
8139 int ret = 0;
8140 u64 last;
8142 while (!list_empty(list)) {
8143 struct root_item_record *rec;
8144 struct extent_buffer *buf;
8145 rec = list_entry(list->next,
8146 struct root_item_record, list);
8147 last = 0;
8148 buf = read_tree_block(root->fs_info->tree_root,
8149 rec->bytenr, rec->level_size, 0);
8150 if (!extent_buffer_uptodate(buf)) {
8151 free_extent_buffer(buf);
8152 ret = -EIO;
8153 break;
8155 add_root_to_pending(buf, extent_cache, pending,
8156 seen, nodes, rec->objectid);
8158 * To rebuild extent tree, we need deal with snapshot
8159 * one by one, otherwise we deal with node firstly which
8160 * can maximize readahead.
8162 while (1) {
8163 ret = run_next_block(root, bits, bits_nr, &last,
8164 pending, seen, reada, nodes,
8165 extent_cache, chunk_cache,
8166 dev_cache, block_group_cache,
8167 dev_extent_cache, rec);
8168 if (ret != 0)
8169 break;
8171 free_extent_buffer(buf);
8172 list_del(&rec->list);
8173 free(rec);
8174 if (ret < 0)
8175 break;
8177 while (ret >= 0) {
8178 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8179 reada, nodes, extent_cache, chunk_cache,
8180 dev_cache, block_group_cache,
8181 dev_extent_cache, NULL);
8182 if (ret != 0) {
8183 if (ret > 0)
8184 ret = 0;
8185 break;
8188 return ret;
8191 static int check_chunks_and_extents(struct btrfs_root *root)
8193 struct rb_root dev_cache;
8194 struct cache_tree chunk_cache;
8195 struct block_group_tree block_group_cache;
8196 struct device_extent_tree dev_extent_cache;
8197 struct cache_tree extent_cache;
8198 struct cache_tree seen;
8199 struct cache_tree pending;
8200 struct cache_tree reada;
8201 struct cache_tree nodes;
8202 struct extent_io_tree excluded_extents;
8203 struct cache_tree corrupt_blocks;
8204 struct btrfs_path path;
8205 struct btrfs_key key;
8206 struct btrfs_key found_key;
8207 int ret, err = 0;
8208 struct block_info *bits;
8209 int bits_nr;
8210 struct extent_buffer *leaf;
8211 int slot;
8212 struct btrfs_root_item ri;
8213 struct list_head dropping_trees;
8214 struct list_head normal_trees;
8215 struct btrfs_root *root1;
8216 u64 objectid;
8217 u32 level_size;
8218 u8 level;
8220 dev_cache = RB_ROOT;
8221 cache_tree_init(&chunk_cache);
8222 block_group_tree_init(&block_group_cache);
8223 device_extent_tree_init(&dev_extent_cache);
8225 cache_tree_init(&extent_cache);
8226 cache_tree_init(&seen);
8227 cache_tree_init(&pending);
8228 cache_tree_init(&nodes);
8229 cache_tree_init(&reada);
8230 cache_tree_init(&corrupt_blocks);
8231 extent_io_tree_init(&excluded_extents);
8232 INIT_LIST_HEAD(&dropping_trees);
8233 INIT_LIST_HEAD(&normal_trees);
8235 if (repair) {
8236 root->fs_info->excluded_extents = &excluded_extents;
8237 root->fs_info->fsck_extent_cache = &extent_cache;
8238 root->fs_info->free_extent_hook = free_extent_hook;
8239 root->fs_info->corrupt_blocks = &corrupt_blocks;
8242 bits_nr = 1024;
8243 bits = malloc(bits_nr * sizeof(struct block_info));
8244 if (!bits) {
8245 perror("malloc");
8246 exit(1);
8249 if (ctx.progress_enabled) {
8250 ctx.tp = TASK_EXTENTS;
8251 task_start(ctx.info);
8254 again:
8255 root1 = root->fs_info->tree_root;
8256 level = btrfs_header_level(root1->node);
8257 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8258 root1->node->start, 0, level, 0,
8259 root1->nodesize, NULL);
8260 if (ret < 0)
8261 goto out;
8262 root1 = root->fs_info->chunk_root;
8263 level = btrfs_header_level(root1->node);
8264 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8265 root1->node->start, 0, level, 0,
8266 root1->nodesize, NULL);
8267 if (ret < 0)
8268 goto out;
8269 btrfs_init_path(&path);
8270 key.offset = 0;
8271 key.objectid = 0;
8272 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8273 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8274 &key, &path, 0, 0);
8275 if (ret < 0)
8276 goto out;
8277 while(1) {
8278 leaf = path.nodes[0];
8279 slot = path.slots[0];
8280 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8281 ret = btrfs_next_leaf(root, &path);
8282 if (ret != 0)
8283 break;
8284 leaf = path.nodes[0];
8285 slot = path.slots[0];
8287 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8288 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8289 unsigned long offset;
8290 u64 last_snapshot;
8292 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8293 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8294 last_snapshot = btrfs_root_last_snapshot(&ri);
8295 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8296 level = btrfs_root_level(&ri);
8297 level_size = root->nodesize;
8298 ret = add_root_item_to_list(&normal_trees,
8299 found_key.objectid,
8300 btrfs_root_bytenr(&ri),
8301 last_snapshot, level,
8302 0, level_size, NULL);
8303 if (ret < 0)
8304 goto out;
8305 } else {
8306 level = btrfs_root_level(&ri);
8307 level_size = root->nodesize;
8308 objectid = found_key.objectid;
8309 btrfs_disk_key_to_cpu(&found_key,
8310 &ri.drop_progress);
8311 ret = add_root_item_to_list(&dropping_trees,
8312 objectid,
8313 btrfs_root_bytenr(&ri),
8314 last_snapshot, level,
8315 ri.drop_level,
8316 level_size, &found_key);
8317 if (ret < 0)
8318 goto out;
8321 path.slots[0]++;
8323 btrfs_release_path(&path);
8326 * check_block can return -EAGAIN if it fixes something, please keep
8327 * this in mind when dealing with return values from these functions, if
8328 * we get -EAGAIN we want to fall through and restart the loop.
8330 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8331 &seen, &reada, &nodes, &extent_cache,
8332 &chunk_cache, &dev_cache, &block_group_cache,
8333 &dev_extent_cache);
8334 if (ret < 0) {
8335 if (ret == -EAGAIN)
8336 goto loop;
8337 goto out;
8339 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8340 &pending, &seen, &reada, &nodes,
8341 &extent_cache, &chunk_cache, &dev_cache,
8342 &block_group_cache, &dev_extent_cache);
8343 if (ret < 0) {
8344 if (ret == -EAGAIN)
8345 goto loop;
8346 goto out;
8349 ret = check_chunks(&chunk_cache, &block_group_cache,
8350 &dev_extent_cache, NULL, NULL, NULL, 0);
8351 if (ret) {
8352 if (ret == -EAGAIN)
8353 goto loop;
8354 err = ret;
8357 ret = check_extent_refs(root, &extent_cache);
8358 if (ret < 0) {
8359 if (ret == -EAGAIN)
8360 goto loop;
8361 goto out;
8364 ret = check_devices(&dev_cache, &dev_extent_cache);
8365 if (ret && err)
8366 ret = err;
8368 out:
8369 task_stop(ctx.info);
8370 if (repair) {
8371 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8372 extent_io_tree_cleanup(&excluded_extents);
8373 root->fs_info->fsck_extent_cache = NULL;
8374 root->fs_info->free_extent_hook = NULL;
8375 root->fs_info->corrupt_blocks = NULL;
8376 root->fs_info->excluded_extents = NULL;
8378 free(bits);
8379 free_chunk_cache_tree(&chunk_cache);
8380 free_device_cache_tree(&dev_cache);
8381 free_block_group_tree(&block_group_cache);
8382 free_device_extent_tree(&dev_extent_cache);
8383 free_extent_cache_tree(&seen);
8384 free_extent_cache_tree(&pending);
8385 free_extent_cache_tree(&reada);
8386 free_extent_cache_tree(&nodes);
8387 return ret;
8388 loop:
8389 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8390 free_extent_cache_tree(&seen);
8391 free_extent_cache_tree(&pending);
8392 free_extent_cache_tree(&reada);
8393 free_extent_cache_tree(&nodes);
8394 free_chunk_cache_tree(&chunk_cache);
8395 free_block_group_tree(&block_group_cache);
8396 free_device_cache_tree(&dev_cache);
8397 free_device_extent_tree(&dev_extent_cache);
8398 free_extent_record_cache(root->fs_info, &extent_cache);
8399 free_root_item_list(&normal_trees);
8400 free_root_item_list(&dropping_trees);
8401 extent_io_tree_cleanup(&excluded_extents);
8402 goto again;
8405 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8406 struct btrfs_root *root, int overwrite)
8408 struct extent_buffer *c;
8409 struct extent_buffer *old = root->node;
8410 int level;
8411 int ret;
8412 struct btrfs_disk_key disk_key = {0,0,0};
8414 level = 0;
8416 if (overwrite) {
8417 c = old;
8418 extent_buffer_get(c);
8419 goto init;
8421 c = btrfs_alloc_free_block(trans, root,
8422 root->nodesize,
8423 root->root_key.objectid,
8424 &disk_key, level, 0, 0);
8425 if (IS_ERR(c)) {
8426 c = old;
8427 extent_buffer_get(c);
8428 overwrite = 1;
8430 init:
8431 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8432 btrfs_set_header_level(c, level);
8433 btrfs_set_header_bytenr(c, c->start);
8434 btrfs_set_header_generation(c, trans->transid);
8435 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8436 btrfs_set_header_owner(c, root->root_key.objectid);
8438 write_extent_buffer(c, root->fs_info->fsid,
8439 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8441 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8442 btrfs_header_chunk_tree_uuid(c),
8443 BTRFS_UUID_SIZE);
8445 btrfs_mark_buffer_dirty(c);
8447 * this case can happen in the following case:
8449 * 1.overwrite previous root.
8451 * 2.reinit reloc data root, this is because we skip pin
8452 * down reloc data tree before which means we can allocate
8453 * same block bytenr here.
8455 if (old->start == c->start) {
8456 btrfs_set_root_generation(&root->root_item,
8457 trans->transid);
8458 root->root_item.level = btrfs_header_level(root->node);
8459 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8460 &root->root_key, &root->root_item);
8461 if (ret) {
8462 free_extent_buffer(c);
8463 return ret;
8466 free_extent_buffer(old);
8467 root->node = c;
8468 add_root_to_dirty_list(root);
8469 return 0;
8472 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8473 struct extent_buffer *eb, int tree_root)
8475 struct extent_buffer *tmp;
8476 struct btrfs_root_item *ri;
8477 struct btrfs_key key;
8478 u64 bytenr;
8479 u32 nodesize;
8480 int level = btrfs_header_level(eb);
8481 int nritems;
8482 int ret;
8483 int i;
8486 * If we have pinned this block before, don't pin it again.
8487 * This can not only avoid forever loop with broken filesystem
8488 * but also give us some speedups.
8490 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8491 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8492 return 0;
8494 btrfs_pin_extent(fs_info, eb->start, eb->len);
8496 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8497 nritems = btrfs_header_nritems(eb);
8498 for (i = 0; i < nritems; i++) {
8499 if (level == 0) {
8500 btrfs_item_key_to_cpu(eb, &key, i);
8501 if (key.type != BTRFS_ROOT_ITEM_KEY)
8502 continue;
8503 /* Skip the extent root and reloc roots */
8504 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8505 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8506 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8507 continue;
8508 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8509 bytenr = btrfs_disk_root_bytenr(eb, ri);
8512 * If at any point we start needing the real root we
8513 * will have to build a stump root for the root we are
8514 * in, but for now this doesn't actually use the root so
8515 * just pass in extent_root.
8517 tmp = read_tree_block(fs_info->extent_root, bytenr,
8518 nodesize, 0);
8519 if (!extent_buffer_uptodate(tmp)) {
8520 fprintf(stderr, "Error reading root block\n");
8521 return -EIO;
8523 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8524 free_extent_buffer(tmp);
8525 if (ret)
8526 return ret;
8527 } else {
8528 bytenr = btrfs_node_blockptr(eb, i);
8530 /* If we aren't the tree root don't read the block */
8531 if (level == 1 && !tree_root) {
8532 btrfs_pin_extent(fs_info, bytenr, nodesize);
8533 continue;
8536 tmp = read_tree_block(fs_info->extent_root, bytenr,
8537 nodesize, 0);
8538 if (!extent_buffer_uptodate(tmp)) {
8539 fprintf(stderr, "Error reading tree block\n");
8540 return -EIO;
8542 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8543 free_extent_buffer(tmp);
8544 if (ret)
8545 return ret;
8549 return 0;
8552 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8554 int ret;
8556 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8557 if (ret)
8558 return ret;
8560 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8563 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8565 struct btrfs_block_group_cache *cache;
8566 struct btrfs_path *path;
8567 struct extent_buffer *leaf;
8568 struct btrfs_chunk *chunk;
8569 struct btrfs_key key;
8570 int ret;
8571 u64 start;
8573 path = btrfs_alloc_path();
8574 if (!path)
8575 return -ENOMEM;
8577 key.objectid = 0;
8578 key.type = BTRFS_CHUNK_ITEM_KEY;
8579 key.offset = 0;
8581 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8582 if (ret < 0) {
8583 btrfs_free_path(path);
8584 return ret;
8588 * We do this in case the block groups were screwed up and had alloc
8589 * bits that aren't actually set on the chunks. This happens with
8590 * restored images every time and could happen in real life I guess.
8592 fs_info->avail_data_alloc_bits = 0;
8593 fs_info->avail_metadata_alloc_bits = 0;
8594 fs_info->avail_system_alloc_bits = 0;
8596 /* First we need to create the in-memory block groups */
8597 while (1) {
8598 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8599 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8600 if (ret < 0) {
8601 btrfs_free_path(path);
8602 return ret;
8604 if (ret) {
8605 ret = 0;
8606 break;
8609 leaf = path->nodes[0];
8610 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8611 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8612 path->slots[0]++;
8613 continue;
8616 chunk = btrfs_item_ptr(leaf, path->slots[0],
8617 struct btrfs_chunk);
8618 btrfs_add_block_group(fs_info, 0,
8619 btrfs_chunk_type(leaf, chunk),
8620 key.objectid, key.offset,
8621 btrfs_chunk_length(leaf, chunk));
8622 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8623 key.offset + btrfs_chunk_length(leaf, chunk),
8624 GFP_NOFS);
8625 path->slots[0]++;
8627 start = 0;
8628 while (1) {
8629 cache = btrfs_lookup_first_block_group(fs_info, start);
8630 if (!cache)
8631 break;
8632 cache->cached = 1;
8633 start = cache->key.objectid + cache->key.offset;
8636 btrfs_free_path(path);
8637 return 0;
8640 static int reset_balance(struct btrfs_trans_handle *trans,
8641 struct btrfs_fs_info *fs_info)
8643 struct btrfs_root *root = fs_info->tree_root;
8644 struct btrfs_path *path;
8645 struct extent_buffer *leaf;
8646 struct btrfs_key key;
8647 int del_slot, del_nr = 0;
8648 int ret;
8649 int found = 0;
8651 path = btrfs_alloc_path();
8652 if (!path)
8653 return -ENOMEM;
8655 key.objectid = BTRFS_BALANCE_OBJECTID;
8656 key.type = BTRFS_BALANCE_ITEM_KEY;
8657 key.offset = 0;
8659 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8660 if (ret) {
8661 if (ret > 0)
8662 ret = 0;
8663 if (!ret)
8664 goto reinit_data_reloc;
8665 else
8666 goto out;
8669 ret = btrfs_del_item(trans, root, path);
8670 if (ret)
8671 goto out;
8672 btrfs_release_path(path);
8674 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8675 key.type = BTRFS_ROOT_ITEM_KEY;
8676 key.offset = 0;
8678 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8679 if (ret < 0)
8680 goto out;
8681 while (1) {
8682 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8683 if (!found)
8684 break;
8686 if (del_nr) {
8687 ret = btrfs_del_items(trans, root, path,
8688 del_slot, del_nr);
8689 del_nr = 0;
8690 if (ret)
8691 goto out;
8693 key.offset++;
8694 btrfs_release_path(path);
8696 found = 0;
8697 ret = btrfs_search_slot(trans, root, &key, path,
8698 -1, 1);
8699 if (ret < 0)
8700 goto out;
8701 continue;
8703 found = 1;
8704 leaf = path->nodes[0];
8705 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8706 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8707 break;
8708 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8709 path->slots[0]++;
8710 continue;
8712 if (!del_nr) {
8713 del_slot = path->slots[0];
8714 del_nr = 1;
8715 } else {
8716 del_nr++;
8718 path->slots[0]++;
8721 if (del_nr) {
8722 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8723 if (ret)
8724 goto out;
8726 btrfs_release_path(path);
8728 reinit_data_reloc:
8729 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8730 key.type = BTRFS_ROOT_ITEM_KEY;
8731 key.offset = (u64)-1;
8732 root = btrfs_read_fs_root(fs_info, &key);
8733 if (IS_ERR(root)) {
8734 fprintf(stderr, "Error reading data reloc tree\n");
8735 ret = PTR_ERR(root);
8736 goto out;
8738 record_root_in_trans(trans, root);
8739 ret = btrfs_fsck_reinit_root(trans, root, 0);
8740 if (ret)
8741 goto out;
8742 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8743 out:
8744 btrfs_free_path(path);
8745 return ret;
8748 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8749 struct btrfs_fs_info *fs_info)
8751 u64 start = 0;
8752 int ret;
8755 * The only reason we don't do this is because right now we're just
8756 * walking the trees we find and pinning down their bytes, we don't look
8757 * at any of the leaves. In order to do mixed groups we'd have to check
8758 * the leaves of any fs roots and pin down the bytes for any file
8759 * extents we find. Not hard but why do it if we don't have to?
8761 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8762 fprintf(stderr, "We don't support re-initing the extent tree "
8763 "for mixed block groups yet, please notify a btrfs "
8764 "developer you want to do this so they can add this "
8765 "functionality.\n");
8766 return -EINVAL;
8770 * first we need to walk all of the trees except the extent tree and pin
8771 * down the bytes that are in use so we don't overwrite any existing
8772 * metadata.
8774 ret = pin_metadata_blocks(fs_info);
8775 if (ret) {
8776 fprintf(stderr, "error pinning down used bytes\n");
8777 return ret;
8781 * Need to drop all the block groups since we're going to recreate all
8782 * of them again.
8784 btrfs_free_block_groups(fs_info);
8785 ret = reset_block_groups(fs_info);
8786 if (ret) {
8787 fprintf(stderr, "error resetting the block groups\n");
8788 return ret;
8791 /* Ok we can allocate now, reinit the extent root */
8792 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8793 if (ret) {
8794 fprintf(stderr, "extent root initialization failed\n");
8796 * When the transaction code is updated we should end the
8797 * transaction, but for now progs only knows about commit so
8798 * just return an error.
8800 return ret;
8804 * Now we have all the in-memory block groups setup so we can make
8805 * allocations properly, and the metadata we care about is safe since we
8806 * pinned all of it above.
8808 while (1) {
8809 struct btrfs_block_group_cache *cache;
8811 cache = btrfs_lookup_first_block_group(fs_info, start);
8812 if (!cache)
8813 break;
8814 start = cache->key.objectid + cache->key.offset;
8815 ret = btrfs_insert_item(trans, fs_info->extent_root,
8816 &cache->key, &cache->item,
8817 sizeof(cache->item));
8818 if (ret) {
8819 fprintf(stderr, "Error adding block group\n");
8820 return ret;
8822 btrfs_extent_post_op(trans, fs_info->extent_root);
8825 ret = reset_balance(trans, fs_info);
8826 if (ret)
8827 fprintf(stderr, "error resetting the pending balance\n");
8829 return ret;
8832 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8834 struct btrfs_path *path;
8835 struct btrfs_trans_handle *trans;
8836 struct btrfs_key key;
8837 int ret;
8839 printf("Recowing metadata block %llu\n", eb->start);
8840 key.objectid = btrfs_header_owner(eb);
8841 key.type = BTRFS_ROOT_ITEM_KEY;
8842 key.offset = (u64)-1;
8844 root = btrfs_read_fs_root(root->fs_info, &key);
8845 if (IS_ERR(root)) {
8846 fprintf(stderr, "Couldn't find owner root %llu\n",
8847 key.objectid);
8848 return PTR_ERR(root);
8851 path = btrfs_alloc_path();
8852 if (!path)
8853 return -ENOMEM;
8855 trans = btrfs_start_transaction(root, 1);
8856 if (IS_ERR(trans)) {
8857 btrfs_free_path(path);
8858 return PTR_ERR(trans);
8861 path->lowest_level = btrfs_header_level(eb);
8862 if (path->lowest_level)
8863 btrfs_node_key_to_cpu(eb, &key, 0);
8864 else
8865 btrfs_item_key_to_cpu(eb, &key, 0);
8867 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8868 btrfs_commit_transaction(trans, root);
8869 btrfs_free_path(path);
8870 return ret;
8873 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8875 struct btrfs_path *path;
8876 struct btrfs_trans_handle *trans;
8877 struct btrfs_key key;
8878 int ret;
8880 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8881 bad->key.type, bad->key.offset);
8882 key.objectid = bad->root_id;
8883 key.type = BTRFS_ROOT_ITEM_KEY;
8884 key.offset = (u64)-1;
8886 root = btrfs_read_fs_root(root->fs_info, &key);
8887 if (IS_ERR(root)) {
8888 fprintf(stderr, "Couldn't find owner root %llu\n",
8889 key.objectid);
8890 return PTR_ERR(root);
8893 path = btrfs_alloc_path();
8894 if (!path)
8895 return -ENOMEM;
8897 trans = btrfs_start_transaction(root, 1);
8898 if (IS_ERR(trans)) {
8899 btrfs_free_path(path);
8900 return PTR_ERR(trans);
8903 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8904 if (ret) {
8905 if (ret > 0)
8906 ret = 0;
8907 goto out;
8909 ret = btrfs_del_item(trans, root, path);
8910 out:
8911 btrfs_commit_transaction(trans, root);
8912 btrfs_free_path(path);
8913 return ret;
8916 static int zero_log_tree(struct btrfs_root *root)
8918 struct btrfs_trans_handle *trans;
8919 int ret;
8921 trans = btrfs_start_transaction(root, 1);
8922 if (IS_ERR(trans)) {
8923 ret = PTR_ERR(trans);
8924 return ret;
8926 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8927 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8928 ret = btrfs_commit_transaction(trans, root);
8929 return ret;
8932 static int populate_csum(struct btrfs_trans_handle *trans,
8933 struct btrfs_root *csum_root, char *buf, u64 start,
8934 u64 len)
8936 u64 offset = 0;
8937 u64 sectorsize;
8938 int ret = 0;
8940 while (offset < len) {
8941 sectorsize = csum_root->sectorsize;
8942 ret = read_extent_data(csum_root, buf, start + offset,
8943 &sectorsize, 0);
8944 if (ret)
8945 break;
8946 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8947 start + offset, buf, sectorsize);
8948 if (ret)
8949 break;
8950 offset += sectorsize;
8952 return ret;
8955 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8956 struct btrfs_root *csum_root,
8957 struct btrfs_root *cur_root)
8959 struct btrfs_path *path;
8960 struct btrfs_key key;
8961 struct extent_buffer *node;
8962 struct btrfs_file_extent_item *fi;
8963 char *buf = NULL;
8964 u64 start = 0;
8965 u64 len = 0;
8966 int slot = 0;
8967 int ret = 0;
8969 path = btrfs_alloc_path();
8970 if (!path)
8971 return -ENOMEM;
8972 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8973 if (!buf) {
8974 ret = -ENOMEM;
8975 goto out;
8978 key.objectid = 0;
8979 key.offset = 0;
8980 key.type = 0;
8982 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8983 if (ret < 0)
8984 goto out;
8985 /* Iterate all regular file extents and fill its csum */
8986 while (1) {
8987 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8989 if (key.type != BTRFS_EXTENT_DATA_KEY)
8990 goto next;
8991 node = path->nodes[0];
8992 slot = path->slots[0];
8993 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8994 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8995 goto next;
8996 start = btrfs_file_extent_disk_bytenr(node, fi);
8997 len = btrfs_file_extent_disk_num_bytes(node, fi);
8999 ret = populate_csum(trans, csum_root, buf, start, len);
9000 if (ret == -EEXIST)
9001 ret = 0;
9002 if (ret < 0)
9003 goto out;
9004 next:
9006 * TODO: if next leaf is corrupted, jump to nearest next valid
9007 * leaf.
9009 ret = btrfs_next_item(cur_root, path);
9010 if (ret < 0)
9011 goto out;
9012 if (ret > 0) {
9013 ret = 0;
9014 goto out;
9018 out:
9019 btrfs_free_path(path);
9020 free(buf);
9021 return ret;
9024 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9025 struct btrfs_root *csum_root)
9027 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9028 struct btrfs_path *path;
9029 struct btrfs_root *tree_root = fs_info->tree_root;
9030 struct btrfs_root *cur_root;
9031 struct extent_buffer *node;
9032 struct btrfs_key key;
9033 int slot = 0;
9034 int ret = 0;
9036 path = btrfs_alloc_path();
9037 if (!path)
9038 return -ENOMEM;
9040 key.objectid = BTRFS_FS_TREE_OBJECTID;
9041 key.offset = 0;
9042 key.type = BTRFS_ROOT_ITEM_KEY;
9044 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9045 if (ret < 0)
9046 goto out;
9047 if (ret > 0) {
9048 ret = -ENOENT;
9049 goto out;
9052 while (1) {
9053 node = path->nodes[0];
9054 slot = path->slots[0];
9055 btrfs_item_key_to_cpu(node, &key, slot);
9056 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9057 goto out;
9058 if (key.type != BTRFS_ROOT_ITEM_KEY)
9059 goto next;
9060 if (!is_fstree(key.objectid))
9061 goto next;
9062 key.offset = (u64)-1;
9064 cur_root = btrfs_read_fs_root(fs_info, &key);
9065 if (IS_ERR(cur_root) || !cur_root) {
9066 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9067 key.objectid);
9068 goto out;
9070 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9071 cur_root);
9072 if (ret < 0)
9073 goto out;
9074 next:
9075 ret = btrfs_next_item(tree_root, path);
9076 if (ret > 0) {
9077 ret = 0;
9078 goto out;
9080 if (ret < 0)
9081 goto out;
9084 out:
9085 btrfs_free_path(path);
9086 return ret;
9089 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9090 struct btrfs_root *csum_root)
9092 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9093 struct btrfs_path *path;
9094 struct btrfs_extent_item *ei;
9095 struct extent_buffer *leaf;
9096 char *buf;
9097 struct btrfs_key key;
9098 int ret;
9100 path = btrfs_alloc_path();
9101 if (!path)
9102 return -ENOMEM;
9104 key.objectid = 0;
9105 key.type = BTRFS_EXTENT_ITEM_KEY;
9106 key.offset = 0;
9108 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9109 if (ret < 0) {
9110 btrfs_free_path(path);
9111 return ret;
9114 buf = malloc(csum_root->sectorsize);
9115 if (!buf) {
9116 btrfs_free_path(path);
9117 return -ENOMEM;
9120 while (1) {
9121 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9122 ret = btrfs_next_leaf(extent_root, path);
9123 if (ret < 0)
9124 break;
9125 if (ret) {
9126 ret = 0;
9127 break;
9130 leaf = path->nodes[0];
9132 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9133 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9134 path->slots[0]++;
9135 continue;
9138 ei = btrfs_item_ptr(leaf, path->slots[0],
9139 struct btrfs_extent_item);
9140 if (!(btrfs_extent_flags(leaf, ei) &
9141 BTRFS_EXTENT_FLAG_DATA)) {
9142 path->slots[0]++;
9143 continue;
9146 ret = populate_csum(trans, csum_root, buf, key.objectid,
9147 key.offset);
9148 if (ret)
9149 break;
9150 path->slots[0]++;
9153 btrfs_free_path(path);
9154 free(buf);
9155 return ret;
9159 * Recalculate the csum and put it into the csum tree.
9161 * Extent tree init will wipe out all the extent info, so in that case, we
9162 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9163 * will use fs/subvol trees to init the csum tree.
9165 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9166 struct btrfs_root *csum_root,
9167 int search_fs_tree)
9169 if (search_fs_tree)
9170 return fill_csum_tree_from_fs(trans, csum_root);
9171 else
9172 return fill_csum_tree_from_extent(trans, csum_root);
9175 static void free_roots_info_cache(void)
9177 if (!roots_info_cache)
9178 return;
9180 while (!cache_tree_empty(roots_info_cache)) {
9181 struct cache_extent *entry;
9182 struct root_item_info *rii;
9184 entry = first_cache_extent(roots_info_cache);
9185 if (!entry)
9186 break;
9187 remove_cache_extent(roots_info_cache, entry);
9188 rii = container_of(entry, struct root_item_info, cache_extent);
9189 free(rii);
9192 free(roots_info_cache);
9193 roots_info_cache = NULL;
9196 static int build_roots_info_cache(struct btrfs_fs_info *info)
9198 int ret = 0;
9199 struct btrfs_key key;
9200 struct extent_buffer *leaf;
9201 struct btrfs_path *path;
9203 if (!roots_info_cache) {
9204 roots_info_cache = malloc(sizeof(*roots_info_cache));
9205 if (!roots_info_cache)
9206 return -ENOMEM;
9207 cache_tree_init(roots_info_cache);
9210 path = btrfs_alloc_path();
9211 if (!path)
9212 return -ENOMEM;
9214 key.objectid = 0;
9215 key.type = BTRFS_EXTENT_ITEM_KEY;
9216 key.offset = 0;
9218 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9219 if (ret < 0)
9220 goto out;
9221 leaf = path->nodes[0];
9223 while (1) {
9224 struct btrfs_key found_key;
9225 struct btrfs_extent_item *ei;
9226 struct btrfs_extent_inline_ref *iref;
9227 int slot = path->slots[0];
9228 int type;
9229 u64 flags;
9230 u64 root_id;
9231 u8 level;
9232 struct cache_extent *entry;
9233 struct root_item_info *rii;
9235 if (slot >= btrfs_header_nritems(leaf)) {
9236 ret = btrfs_next_leaf(info->extent_root, path);
9237 if (ret < 0) {
9238 break;
9239 } else if (ret) {
9240 ret = 0;
9241 break;
9243 leaf = path->nodes[0];
9244 slot = path->slots[0];
9247 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9249 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9250 found_key.type != BTRFS_METADATA_ITEM_KEY)
9251 goto next;
9253 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9254 flags = btrfs_extent_flags(leaf, ei);
9256 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9257 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9258 goto next;
9260 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9261 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9262 level = found_key.offset;
9263 } else {
9264 struct btrfs_tree_block_info *binfo;
9266 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9267 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9268 level = btrfs_tree_block_level(leaf, binfo);
9272 * For a root extent, it must be of the following type and the
9273 * first (and only one) iref in the item.
9275 type = btrfs_extent_inline_ref_type(leaf, iref);
9276 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9277 goto next;
9279 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9280 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9281 if (!entry) {
9282 rii = malloc(sizeof(struct root_item_info));
9283 if (!rii) {
9284 ret = -ENOMEM;
9285 goto out;
9287 rii->cache_extent.start = root_id;
9288 rii->cache_extent.size = 1;
9289 rii->level = (u8)-1;
9290 entry = &rii->cache_extent;
9291 ret = insert_cache_extent(roots_info_cache, entry);
9292 ASSERT(ret == 0);
9293 } else {
9294 rii = container_of(entry, struct root_item_info,
9295 cache_extent);
9298 ASSERT(rii->cache_extent.start == root_id);
9299 ASSERT(rii->cache_extent.size == 1);
9301 if (level > rii->level || rii->level == (u8)-1) {
9302 rii->level = level;
9303 rii->bytenr = found_key.objectid;
9304 rii->gen = btrfs_extent_generation(leaf, ei);
9305 rii->node_count = 1;
9306 } else if (level == rii->level) {
9307 rii->node_count++;
9309 next:
9310 path->slots[0]++;
9313 out:
9314 btrfs_free_path(path);
9316 return ret;
9319 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9320 struct btrfs_path *path,
9321 const struct btrfs_key *root_key,
9322 const int read_only_mode)
9324 const u64 root_id = root_key->objectid;
9325 struct cache_extent *entry;
9326 struct root_item_info *rii;
9327 struct btrfs_root_item ri;
9328 unsigned long offset;
9330 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9331 if (!entry) {
9332 fprintf(stderr,
9333 "Error: could not find extent items for root %llu\n",
9334 root_key->objectid);
9335 return -ENOENT;
9338 rii = container_of(entry, struct root_item_info, cache_extent);
9339 ASSERT(rii->cache_extent.start == root_id);
9340 ASSERT(rii->cache_extent.size == 1);
9342 if (rii->node_count != 1) {
9343 fprintf(stderr,
9344 "Error: could not find btree root extent for root %llu\n",
9345 root_id);
9346 return -ENOENT;
9349 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9350 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9352 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9353 btrfs_root_level(&ri) != rii->level ||
9354 btrfs_root_generation(&ri) != rii->gen) {
9357 * If we're in repair mode but our caller told us to not update
9358 * the root item, i.e. just check if it needs to be updated, don't
9359 * print this message, since the caller will call us again shortly
9360 * for the same root item without read only mode (the caller will
9361 * open a transaction first).
9363 if (!(read_only_mode && repair))
9364 fprintf(stderr,
9365 "%sroot item for root %llu,"
9366 " current bytenr %llu, current gen %llu, current level %u,"
9367 " new bytenr %llu, new gen %llu, new level %u\n",
9368 (read_only_mode ? "" : "fixing "),
9369 root_id,
9370 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9371 btrfs_root_level(&ri),
9372 rii->bytenr, rii->gen, rii->level);
9374 if (btrfs_root_generation(&ri) > rii->gen) {
9375 fprintf(stderr,
9376 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9377 root_id, btrfs_root_generation(&ri), rii->gen);
9378 return -EINVAL;
9381 if (!read_only_mode) {
9382 btrfs_set_root_bytenr(&ri, rii->bytenr);
9383 btrfs_set_root_level(&ri, rii->level);
9384 btrfs_set_root_generation(&ri, rii->gen);
9385 write_extent_buffer(path->nodes[0], &ri,
9386 offset, sizeof(ri));
9389 return 1;
9392 return 0;
9396 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9397 * caused read-only snapshots to be corrupted if they were created at a moment
9398 * when the source subvolume/snapshot had orphan items. The issue was that the
9399 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9400 * node instead of the post orphan cleanup root node.
9401 * So this function, and its callees, just detects and fixes those cases. Even
9402 * though the regression was for read-only snapshots, this function applies to
9403 * any snapshot/subvolume root.
9404 * This must be run before any other repair code - not doing it so, makes other
9405 * repair code delete or modify backrefs in the extent tree for example, which
9406 * will result in an inconsistent fs after repairing the root items.
9408 static int repair_root_items(struct btrfs_fs_info *info)
9410 struct btrfs_path *path = NULL;
9411 struct btrfs_key key;
9412 struct extent_buffer *leaf;
9413 struct btrfs_trans_handle *trans = NULL;
9414 int ret = 0;
9415 int bad_roots = 0;
9416 int need_trans = 0;
9418 ret = build_roots_info_cache(info);
9419 if (ret)
9420 goto out;
9422 path = btrfs_alloc_path();
9423 if (!path) {
9424 ret = -ENOMEM;
9425 goto out;
9428 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9429 key.type = BTRFS_ROOT_ITEM_KEY;
9430 key.offset = 0;
9432 again:
9434 * Avoid opening and committing transactions if a leaf doesn't have
9435 * any root items that need to be fixed, so that we avoid rotating
9436 * backup roots unnecessarily.
9438 if (need_trans) {
9439 trans = btrfs_start_transaction(info->tree_root, 1);
9440 if (IS_ERR(trans)) {
9441 ret = PTR_ERR(trans);
9442 goto out;
9446 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9447 0, trans ? 1 : 0);
9448 if (ret < 0)
9449 goto out;
9450 leaf = path->nodes[0];
9452 while (1) {
9453 struct btrfs_key found_key;
9455 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9456 int no_more_keys = find_next_key(path, &key);
9458 btrfs_release_path(path);
9459 if (trans) {
9460 ret = btrfs_commit_transaction(trans,
9461 info->tree_root);
9462 trans = NULL;
9463 if (ret < 0)
9464 goto out;
9466 need_trans = 0;
9467 if (no_more_keys)
9468 break;
9469 goto again;
9472 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9474 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9475 goto next;
9476 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9477 goto next;
9479 ret = maybe_repair_root_item(info, path, &found_key,
9480 trans ? 0 : 1);
9481 if (ret < 0)
9482 goto out;
9483 if (ret) {
9484 if (!trans && repair) {
9485 need_trans = 1;
9486 key = found_key;
9487 btrfs_release_path(path);
9488 goto again;
9490 bad_roots++;
9492 next:
9493 path->slots[0]++;
9495 ret = 0;
9496 out:
9497 free_roots_info_cache();
9498 btrfs_free_path(path);
9499 if (trans)
9500 btrfs_commit_transaction(trans, info->tree_root);
9501 if (ret < 0)
9502 return ret;
9504 return bad_roots;
9507 const char * const cmd_check_usage[] = {
9508 "btrfs check [options] <device>",
9509 "Check structural integrity of a filesystem (unmounted).",
9510 "Check structural integrity of an unmounted filesystem. Verify internal",
9511 "trees' consistency and item connectivity. In the repair mode try to",
9512 "fix the problems found.",
9513 "WARNING: the repair mode is considered dangerous",
9515 "-s|--super <superblock> use this superblock copy",
9516 "-b|--backup use the first valid backup root copy",
9517 "--repair try to repair the filesystem",
9518 "--readonly run in read-only mode (default)",
9519 "--init-csum-tree create a new CRC tree",
9520 "--init-extent-tree create a new extent tree",
9521 "--check-data-csum verify checksums of data blocks",
9522 "-Q|--qgroup-report print a report on qgroup consistency",
9523 "-E|--subvol-extents <subvolid>",
9524 " print subvolume extents and sharing state",
9525 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9526 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9527 "-p|--progress indicate progress",
9528 NULL
9531 int cmd_check(int argc, char **argv)
9533 struct cache_tree root_cache;
9534 struct btrfs_root *root;
9535 struct btrfs_fs_info *info;
9536 u64 bytenr = 0;
9537 u64 subvolid = 0;
9538 u64 tree_root_bytenr = 0;
9539 u64 chunk_root_bytenr = 0;
9540 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9541 int ret;
9542 u64 num;
9543 int init_csum_tree = 0;
9544 int readonly = 0;
9545 int qgroup_report = 0;
9546 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9548 while(1) {
9549 int c;
9550 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9551 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9552 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9553 static const struct option long_options[] = {
9554 { "super", required_argument, NULL, 's' },
9555 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9556 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9557 { "init-csum-tree", no_argument, NULL,
9558 GETOPT_VAL_INIT_CSUM },
9559 { "init-extent-tree", no_argument, NULL,
9560 GETOPT_VAL_INIT_EXTENT },
9561 { "check-data-csum", no_argument, NULL,
9562 GETOPT_VAL_CHECK_CSUM },
9563 { "backup", no_argument, NULL, 'b' },
9564 { "subvol-extents", required_argument, NULL, 'E' },
9565 { "qgroup-report", no_argument, NULL, 'Q' },
9566 { "tree-root", required_argument, NULL, 'r' },
9567 { "chunk-root", required_argument, NULL,
9568 GETOPT_VAL_CHUNK_TREE },
9569 { "progress", no_argument, NULL, 'p' },
9570 { NULL, 0, NULL, 0}
9573 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9574 if (c < 0)
9575 break;
9576 switch(c) {
9577 case 'a': /* ignored */ break;
9578 case 'b':
9579 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9580 break;
9581 case 's':
9582 num = arg_strtou64(optarg);
9583 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9584 fprintf(stderr,
9585 "ERROR: super mirror should be less than: %d\n",
9586 BTRFS_SUPER_MIRROR_MAX);
9587 exit(1);
9589 bytenr = btrfs_sb_offset(((int)num));
9590 printf("using SB copy %llu, bytenr %llu\n", num,
9591 (unsigned long long)bytenr);
9592 break;
9593 case 'Q':
9594 qgroup_report = 1;
9595 break;
9596 case 'E':
9597 subvolid = arg_strtou64(optarg);
9598 break;
9599 case 'r':
9600 tree_root_bytenr = arg_strtou64(optarg);
9601 break;
9602 case GETOPT_VAL_CHUNK_TREE:
9603 chunk_root_bytenr = arg_strtou64(optarg);
9604 break;
9605 case 'p':
9606 ctx.progress_enabled = true;
9607 break;
9608 case '?':
9609 case 'h':
9610 usage(cmd_check_usage);
9611 case GETOPT_VAL_REPAIR:
9612 printf("enabling repair mode\n");
9613 repair = 1;
9614 ctree_flags |= OPEN_CTREE_WRITES;
9615 break;
9616 case GETOPT_VAL_READONLY:
9617 readonly = 1;
9618 break;
9619 case GETOPT_VAL_INIT_CSUM:
9620 printf("Creating a new CRC tree\n");
9621 init_csum_tree = 1;
9622 repair = 1;
9623 ctree_flags |= OPEN_CTREE_WRITES;
9624 break;
9625 case GETOPT_VAL_INIT_EXTENT:
9626 init_extent_tree = 1;
9627 ctree_flags |= (OPEN_CTREE_WRITES |
9628 OPEN_CTREE_NO_BLOCK_GROUPS);
9629 repair = 1;
9630 break;
9631 case GETOPT_VAL_CHECK_CSUM:
9632 check_data_csum = 1;
9633 break;
9637 if (check_argc_exact(argc - optind, 1))
9638 usage(cmd_check_usage);
9640 if (ctx.progress_enabled) {
9641 ctx.tp = TASK_NOTHING;
9642 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9645 /* This check is the only reason for --readonly to exist */
9646 if (readonly && repair) {
9647 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9648 exit(1);
9651 radix_tree_init();
9652 cache_tree_init(&root_cache);
9654 if((ret = check_mounted(argv[optind])) < 0) {
9655 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9656 goto err_out;
9657 } else if(ret) {
9658 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9659 ret = -EBUSY;
9660 goto err_out;
9663 /* only allow partial opening under repair mode */
9664 if (repair)
9665 ctree_flags |= OPEN_CTREE_PARTIAL;
9667 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9668 chunk_root_bytenr, ctree_flags);
9669 if (!info) {
9670 fprintf(stderr, "Couldn't open file system\n");
9671 ret = -EIO;
9672 goto err_out;
9675 global_info = info;
9676 root = info->fs_root;
9679 * repair mode will force us to commit transaction which
9680 * will make us fail to load log tree when mounting.
9682 if (repair && btrfs_super_log_root(info->super_copy)) {
9683 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9684 if (!ret) {
9685 ret = 1;
9686 goto close_out;
9688 ret = zero_log_tree(root);
9689 if (ret) {
9690 fprintf(stderr, "fail to zero log tree\n");
9691 goto close_out;
9695 uuid_unparse(info->super_copy->fsid, uuidbuf);
9696 if (qgroup_report) {
9697 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9698 uuidbuf);
9699 ret = qgroup_verify_all(info);
9700 if (ret == 0)
9701 ret = report_qgroups(1);
9702 goto close_out;
9704 if (subvolid) {
9705 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9706 subvolid, argv[optind], uuidbuf);
9707 ret = print_extent_state(info, subvolid);
9708 goto close_out;
9710 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9712 if (!extent_buffer_uptodate(info->tree_root->node) ||
9713 !extent_buffer_uptodate(info->dev_root->node) ||
9714 !extent_buffer_uptodate(info->chunk_root->node)) {
9715 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9716 ret = -EIO;
9717 goto close_out;
9720 if (init_extent_tree || init_csum_tree) {
9721 struct btrfs_trans_handle *trans;
9723 trans = btrfs_start_transaction(info->extent_root, 0);
9724 if (IS_ERR(trans)) {
9725 fprintf(stderr, "Error starting transaction\n");
9726 ret = PTR_ERR(trans);
9727 goto close_out;
9730 if (init_extent_tree) {
9731 printf("Creating a new extent tree\n");
9732 ret = reinit_extent_tree(trans, info);
9733 if (ret)
9734 goto close_out;
9737 if (init_csum_tree) {
9738 fprintf(stderr, "Reinit crc root\n");
9739 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9740 if (ret) {
9741 fprintf(stderr, "crc root initialization failed\n");
9742 ret = -EIO;
9743 goto close_out;
9746 ret = fill_csum_tree(trans, info->csum_root,
9747 init_extent_tree);
9748 if (ret) {
9749 fprintf(stderr, "crc refilling failed\n");
9750 return -EIO;
9754 * Ok now we commit and run the normal fsck, which will add
9755 * extent entries for all of the items it finds.
9757 ret = btrfs_commit_transaction(trans, info->extent_root);
9758 if (ret)
9759 goto close_out;
9761 if (!extent_buffer_uptodate(info->extent_root->node)) {
9762 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9763 ret = -EIO;
9764 goto close_out;
9766 if (!extent_buffer_uptodate(info->csum_root->node)) {
9767 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9768 ret = -EIO;
9769 goto close_out;
9772 if (!ctx.progress_enabled)
9773 fprintf(stderr, "checking extents\n");
9774 ret = check_chunks_and_extents(root);
9775 if (ret)
9776 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9778 ret = repair_root_items(info);
9779 if (ret < 0)
9780 goto close_out;
9781 if (repair) {
9782 fprintf(stderr, "Fixed %d roots.\n", ret);
9783 ret = 0;
9784 } else if (ret > 0) {
9785 fprintf(stderr,
9786 "Found %d roots with an outdated root item.\n",
9787 ret);
9788 fprintf(stderr,
9789 "Please run a filesystem check with the option --repair to fix them.\n");
9790 ret = 1;
9791 goto close_out;
9794 if (!ctx.progress_enabled) {
9795 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9796 fprintf(stderr, "checking free space tree\n");
9797 else
9798 fprintf(stderr, "checking free space cache\n");
9800 ret = check_space_cache(root);
9801 if (ret)
9802 goto out;
9805 * We used to have to have these hole extents in between our real
9806 * extents so if we don't have this flag set we need to make sure there
9807 * are no gaps in the file extents for inodes, otherwise we can just
9808 * ignore it when this happens.
9810 no_holes = btrfs_fs_incompat(root->fs_info,
9811 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9812 if (!ctx.progress_enabled)
9813 fprintf(stderr, "checking fs roots\n");
9814 ret = check_fs_roots(root, &root_cache);
9815 if (ret)
9816 goto out;
9818 fprintf(stderr, "checking csums\n");
9819 ret = check_csums(root);
9820 if (ret)
9821 goto out;
9823 fprintf(stderr, "checking root refs\n");
9824 ret = check_root_refs(root, &root_cache);
9825 if (ret)
9826 goto out;
9828 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9829 struct extent_buffer *eb;
9831 eb = list_first_entry(&root->fs_info->recow_ebs,
9832 struct extent_buffer, recow);
9833 list_del_init(&eb->recow);
9834 ret = recow_extent_buffer(root, eb);
9835 if (ret)
9836 break;
9839 while (!list_empty(&delete_items)) {
9840 struct bad_item *bad;
9842 bad = list_first_entry(&delete_items, struct bad_item, list);
9843 list_del_init(&bad->list);
9844 if (repair)
9845 ret = delete_bad_item(root, bad);
9846 free(bad);
9849 if (info->quota_enabled) {
9850 int err;
9851 fprintf(stderr, "checking quota groups\n");
9852 err = qgroup_verify_all(info);
9853 if (err)
9854 goto out;
9857 if (!list_empty(&root->fs_info->recow_ebs)) {
9858 fprintf(stderr, "Transid errors in file system\n");
9859 ret = 1;
9861 out:
9862 /* Don't override original ret */
9863 if (ret)
9864 report_qgroups(0);
9865 else
9866 ret = report_qgroups(0);
9867 if (found_old_backref) { /*
9868 * there was a disk format change when mixed
9869 * backref was in testing tree. The old format
9870 * existed about one week.
9872 printf("\n * Found old mixed backref format. "
9873 "The old format is not supported! *"
9874 "\n * Please mount the FS in readonly mode, "
9875 "backup data and re-format the FS. *\n\n");
9876 ret = 1;
9878 printf("found %llu bytes used err is %d\n",
9879 (unsigned long long)bytes_used, ret);
9880 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9881 printf("total tree bytes: %llu\n",
9882 (unsigned long long)total_btree_bytes);
9883 printf("total fs tree bytes: %llu\n",
9884 (unsigned long long)total_fs_tree_bytes);
9885 printf("total extent tree bytes: %llu\n",
9886 (unsigned long long)total_extent_tree_bytes);
9887 printf("btree space waste bytes: %llu\n",
9888 (unsigned long long)btree_space_waste);
9889 printf("file data blocks allocated: %llu\n referenced %llu\n",
9890 (unsigned long long)data_bytes_allocated,
9891 (unsigned long long)data_bytes_referenced);
9893 free_root_recs_tree(&root_cache);
9894 close_out:
9895 close_ctree(root);
9896 err_out:
9897 if (ctx.progress_enabled)
9898 task_deinit(ctx.info);
9900 return ret;