btrfs-progs: extent-tree: Add add_merge_cache_extent function
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobd191221f37196f8ad0dc4c8f4631ef171e39d20d
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
44 enum task_position {
45 TASK_EXTENTS,
46 TASK_FREE_SPACE,
47 TASK_FS_ROOTS,
48 TASK_NOTHING, /* have to be the last element */
51 struct task_ctx {
52 int progress_enabled;
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
80 uint32_t count = 0;
81 static char *task_position_string[] = {
82 "checking extents",
83 "checking free space cache",
84 "checking fs roots",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
90 return NULL;
92 while (1) {
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
95 count++;
96 fflush(stdout);
97 task_period_wait(priv->info);
99 return NULL;
102 static int print_status_return(void *p)
104 printf("\n");
105 fflush(stdout);
107 return 0;
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
121 union {
122 u64 parent;
123 u64 root;
125 u64 owner;
126 u64 offset;
127 u64 disk_bytenr;
128 u64 bytes;
129 u64 ram_bytes;
130 u32 num_refs;
131 u32 found_ref;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
142 u64 root;
143 u64 objectid;
144 u64 offset;
145 u64 disk_bytenr;
146 u64 disk_len;
149 struct tree_backref {
150 struct extent_backref node;
151 union {
152 u64 parent;
153 u64 root;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
163 u64 start;
164 u64 max_size;
165 u64 nr;
166 u64 refs;
167 u64 extent_item_refs;
168 u64 generation;
169 u64 parent_generation;
170 u64 info_objectid;
171 u32 num_duplicates;
172 u8 info_level;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
190 int errors;
191 unsigned int ref_type;
192 u64 dir;
193 u64 index;
194 u16 namelen;
195 char name[0];
198 struct root_item_record {
199 struct list_head list;
200 u64 objectid;
201 u64 bytenr;
202 u64 last_snapshot;
203 u8 level;
204 u8 drop_level;
205 int level_size;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
224 struct rb_node node;
225 u64 start;
226 u64 len;
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
235 return (u64)-1;
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238 return hole->start;
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
250 return -1;
251 if (hole1->start < hole2->start)
252 return 1;
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
259 return -1;
260 /* Hole 2 will be merge center */
261 return 1;
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
271 u64 start, u64 len)
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
278 if (!hole)
279 return -ENOMEM;
280 hole->start = start;
281 hole->len = len;
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288 node);
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
293 free(prev);
294 prev = NULL;
297 /* iterate merge with next holes */
298 while (1) {
299 if (!rb_next(&hole->node))
300 break;
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302 node);
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
306 hole->start;
307 rb_erase(&next->node, holes);
308 free(next);
309 next = NULL;
310 } else
311 break;
313 return 0;
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
319 u64 start;
321 hole = (struct file_extent_hole *)data;
322 start = hole->start;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
326 return -1;
327 if (start >= hole->start && start < hole->start + hole->len)
328 return 0;
329 return 1;
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
338 u64 start, u64 len)
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
342 u64 prev_start = 0;
343 u64 prev_len = 0;
344 u64 next_start = 0;
345 u64 next_len = 0;
346 struct rb_node *node;
347 int have_prev = 0;
348 int have_next = 0;
349 int ret = 0;
351 tmp.start = start;
352 tmp.len = len;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
354 if (!node)
355 return -EEXIST;
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
358 return -EEXIST;
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
367 have_prev = 1;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
372 have_next = 1;
374 rb_erase(node, holes);
375 free(hole);
376 if (have_prev) {
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
378 if (ret < 0)
379 return ret;
381 if (have_next) {
382 ret = add_file_extent_hole(holes, next_start, next_len);
383 if (ret < 0)
384 return ret;
386 return 0;
389 static int copy_file_extent_holes(struct rb_root *dst,
390 struct rb_root *src)
392 struct file_extent_hole *hole;
393 struct rb_node *node;
394 int ret = 0;
396 node = rb_first(src);
397 while (node) {
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
400 if (ret)
401 break;
402 node = rb_next(node);
404 return ret;
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
413 while (node) {
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
416 free(hole);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
431 int errors;
433 u64 ino;
434 u32 nlink;
435 u32 imode;
436 u64 isize;
437 u64 nbytes;
439 u32 found_link;
440 u64 found_size;
441 u64 extent_start;
442 u64 extent_end;
443 struct rb_root holes;
444 struct list_head orphan_extents;
446 u32 refs;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
472 int errors;
473 u64 ref_root;
474 u64 dir;
475 u64 index;
476 u16 namelen;
477 char name[0];
480 struct root_record {
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
484 u64 objectid;
485 u32 found_ref;
488 struct ptr_node {
489 struct cache_extent cache;
490 void *data;
493 struct shared_node {
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
498 u32 refs;
501 struct block_info {
502 u64 start;
503 u32 size;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
509 int active_node;
510 int root_level;
513 struct bad_item {
514 struct btrfs_key key;
515 u64 root_id;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
534 #define S_SHIFT 12
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
557 return -1;
558 else if (rec1->devid < rec2->devid)
559 return 1;
560 else
561 return 0;
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct orphan_data_extent *src_orphan;
570 struct orphan_data_extent *dst_orphan;
571 size_t size;
572 int ret;
574 rec = malloc(sizeof(*rec));
575 memcpy(rec, orig_rec, sizeof(*rec));
576 rec->refs = 1;
577 INIT_LIST_HEAD(&rec->backrefs);
578 INIT_LIST_HEAD(&rec->orphan_extents);
579 rec->holes = RB_ROOT;
581 list_for_each_entry(orig, &orig_rec->backrefs, list) {
582 size = sizeof(*orig) + orig->namelen + 1;
583 backref = malloc(size);
584 memcpy(backref, orig, size);
585 list_add_tail(&backref->list, &rec->backrefs);
587 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588 dst_orphan = malloc(sizeof(*dst_orphan));
589 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
590 BUG_ON(!dst_orphan);
591 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
594 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
595 BUG_ON(ret < 0);
597 return rec;
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
601 u64 objectid)
603 struct orphan_data_extent *orphan;
605 if (list_empty(orphan_extents))
606 return;
607 printf("The following data extent is lost in tree %llu:\n",
608 objectid);
609 list_for_each_entry(orphan, orphan_extents, list) {
610 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611 orphan->objectid, orphan->offset, orphan->disk_bytenr,
612 orphan->disk_len);
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
618 u64 root_objectid = root->root_key.objectid;
619 int errors = rec->errors;
621 if (!errors)
622 return;
623 /* reloc root errors, we print its corresponding fs root objectid*/
624 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625 root_objectid = root->root_key.offset;
626 fprintf(stderr, "reloc");
628 fprintf(stderr, "root %llu inode %llu errors %x",
629 (unsigned long long) root_objectid,
630 (unsigned long long) rec->ino, rec->errors);
632 if (errors & I_ERR_NO_INODE_ITEM)
633 fprintf(stderr, ", no inode item");
634 if (errors & I_ERR_NO_ORPHAN_ITEM)
635 fprintf(stderr, ", no orphan item");
636 if (errors & I_ERR_DUP_INODE_ITEM)
637 fprintf(stderr, ", dup inode item");
638 if (errors & I_ERR_DUP_DIR_INDEX)
639 fprintf(stderr, ", dup dir index");
640 if (errors & I_ERR_ODD_DIR_ITEM)
641 fprintf(stderr, ", odd dir item");
642 if (errors & I_ERR_ODD_FILE_EXTENT)
643 fprintf(stderr, ", odd file extent");
644 if (errors & I_ERR_BAD_FILE_EXTENT)
645 fprintf(stderr, ", bad file extent");
646 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647 fprintf(stderr, ", file extent overlap");
648 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649 fprintf(stderr, ", file extent discount");
650 if (errors & I_ERR_DIR_ISIZE_WRONG)
651 fprintf(stderr, ", dir isize wrong");
652 if (errors & I_ERR_FILE_NBYTES_WRONG)
653 fprintf(stderr, ", nbytes wrong");
654 if (errors & I_ERR_ODD_CSUM_ITEM)
655 fprintf(stderr, ", odd csum item");
656 if (errors & I_ERR_SOME_CSUM_MISSING)
657 fprintf(stderr, ", some csum missing");
658 if (errors & I_ERR_LINK_COUNT_WRONG)
659 fprintf(stderr, ", link count wrong");
660 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661 fprintf(stderr, ", orphan file extent");
662 fprintf(stderr, "\n");
663 /* Print the orphan extents if needed */
664 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
667 /* Print the holes if needed */
668 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669 struct file_extent_hole *hole;
670 struct rb_node *node;
671 int found = 0;
673 node = rb_first(&rec->holes);
674 fprintf(stderr, "Found file extent holes:\n");
675 while (node) {
676 found = 1;
677 hole = rb_entry(node, struct file_extent_hole, node);
678 fprintf(stderr, "\tstart: %llu, len: %llu\n",
679 hole->start, hole->len);
680 node = rb_next(node);
682 if (!found)
683 fprintf(stderr, "\tstart: 0, len: %llu\n",
684 round_up(rec->isize, root->sectorsize));
688 static void print_ref_error(int errors)
690 if (errors & REF_ERR_NO_DIR_ITEM)
691 fprintf(stderr, ", no dir item");
692 if (errors & REF_ERR_NO_DIR_INDEX)
693 fprintf(stderr, ", no dir index");
694 if (errors & REF_ERR_NO_INODE_REF)
695 fprintf(stderr, ", no inode ref");
696 if (errors & REF_ERR_DUP_DIR_ITEM)
697 fprintf(stderr, ", dup dir item");
698 if (errors & REF_ERR_DUP_DIR_INDEX)
699 fprintf(stderr, ", dup dir index");
700 if (errors & REF_ERR_DUP_INODE_REF)
701 fprintf(stderr, ", dup inode ref");
702 if (errors & REF_ERR_INDEX_UNMATCH)
703 fprintf(stderr, ", index unmatch");
704 if (errors & REF_ERR_FILETYPE_UNMATCH)
705 fprintf(stderr, ", filetype unmatch");
706 if (errors & REF_ERR_NAME_TOO_LONG)
707 fprintf(stderr, ", name too long");
708 if (errors & REF_ERR_NO_ROOT_REF)
709 fprintf(stderr, ", no root ref");
710 if (errors & REF_ERR_NO_ROOT_BACKREF)
711 fprintf(stderr, ", no root backref");
712 if (errors & REF_ERR_DUP_ROOT_REF)
713 fprintf(stderr, ", dup root ref");
714 if (errors & REF_ERR_DUP_ROOT_BACKREF)
715 fprintf(stderr, ", dup root backref");
716 fprintf(stderr, "\n");
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
720 u64 ino, int mod)
722 struct ptr_node *node;
723 struct cache_extent *cache;
724 struct inode_record *rec = NULL;
725 int ret;
727 cache = lookup_cache_extent(inode_cache, ino, 1);
728 if (cache) {
729 node = container_of(cache, struct ptr_node, cache);
730 rec = node->data;
731 if (mod && rec->refs > 1) {
732 node->data = clone_inode_rec(rec);
733 rec->refs--;
734 rec = node->data;
736 } else if (mod) {
737 rec = calloc(1, sizeof(*rec));
738 rec->ino = ino;
739 rec->extent_start = (u64)-1;
740 rec->refs = 1;
741 INIT_LIST_HEAD(&rec->backrefs);
742 INIT_LIST_HEAD(&rec->orphan_extents);
743 rec->holes = RB_ROOT;
745 node = malloc(sizeof(*node));
746 node->cache.start = ino;
747 node->cache.size = 1;
748 node->data = rec;
750 if (ino == BTRFS_FREE_INO_OBJECTID)
751 rec->found_link = 1;
753 ret = insert_cache_extent(inode_cache, &node->cache);
754 BUG_ON(ret);
756 return rec;
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
761 struct orphan_data_extent *orphan;
763 while (!list_empty(orphan_extents)) {
764 orphan = list_entry(orphan_extents->next,
765 struct orphan_data_extent, list);
766 list_del(&orphan->list);
767 free(orphan);
771 static void free_inode_rec(struct inode_record *rec)
773 struct inode_backref *backref;
775 if (--rec->refs > 0)
776 return;
778 while (!list_empty(&rec->backrefs)) {
779 backref = list_entry(rec->backrefs.next,
780 struct inode_backref, list);
781 list_del(&backref->list);
782 free(backref);
784 free_orphan_data_extents(&rec->orphan_extents);
785 free_file_extent_holes(&rec->holes);
786 free(rec);
789 static int can_free_inode_rec(struct inode_record *rec)
791 if (!rec->errors && rec->checked && rec->found_inode_item &&
792 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
793 return 1;
794 return 0;
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798 struct inode_record *rec)
800 struct cache_extent *cache;
801 struct inode_backref *tmp, *backref;
802 struct ptr_node *node;
803 unsigned char filetype;
805 if (!rec->found_inode_item)
806 return;
808 filetype = imode_to_type(rec->imode);
809 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810 if (backref->found_dir_item && backref->found_dir_index) {
811 if (backref->filetype != filetype)
812 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813 if (!backref->errors && backref->found_inode_ref &&
814 rec->nlink == rec->found_link) {
815 list_del(&backref->list);
816 free(backref);
821 if (!rec->checked || rec->merging)
822 return;
824 if (S_ISDIR(rec->imode)) {
825 if (rec->found_size != rec->isize)
826 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
827 if (rec->found_file_extent)
828 rec->errors |= I_ERR_ODD_FILE_EXTENT;
829 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
830 if (rec->found_dir_item)
831 rec->errors |= I_ERR_ODD_DIR_ITEM;
832 if (rec->found_size != rec->nbytes)
833 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
834 if (rec->nlink > 0 && !no_holes &&
835 (rec->extent_end < rec->isize ||
836 first_extent_gap(&rec->holes) < rec->isize))
837 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
840 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
841 if (rec->found_csum_item && rec->nodatasum)
842 rec->errors |= I_ERR_ODD_CSUM_ITEM;
843 if (rec->some_csum_missing && !rec->nodatasum)
844 rec->errors |= I_ERR_SOME_CSUM_MISSING;
847 BUG_ON(rec->refs != 1);
848 if (can_free_inode_rec(rec)) {
849 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
850 node = container_of(cache, struct ptr_node, cache);
851 BUG_ON(node->data != rec);
852 remove_cache_extent(inode_cache, &node->cache);
853 free(node);
854 free_inode_rec(rec);
858 static int check_orphan_item(struct btrfs_root *root, u64 ino)
860 struct btrfs_path path;
861 struct btrfs_key key;
862 int ret;
864 key.objectid = BTRFS_ORPHAN_OBJECTID;
865 key.type = BTRFS_ORPHAN_ITEM_KEY;
866 key.offset = ino;
868 btrfs_init_path(&path);
869 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
870 btrfs_release_path(&path);
871 if (ret > 0)
872 ret = -ENOENT;
873 return ret;
876 static int process_inode_item(struct extent_buffer *eb,
877 int slot, struct btrfs_key *key,
878 struct shared_node *active_node)
880 struct inode_record *rec;
881 struct btrfs_inode_item *item;
883 rec = active_node->current;
884 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
885 if (rec->found_inode_item) {
886 rec->errors |= I_ERR_DUP_INODE_ITEM;
887 return 1;
889 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
890 rec->nlink = btrfs_inode_nlink(eb, item);
891 rec->isize = btrfs_inode_size(eb, item);
892 rec->nbytes = btrfs_inode_nbytes(eb, item);
893 rec->imode = btrfs_inode_mode(eb, item);
894 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
895 rec->nodatasum = 1;
896 rec->found_inode_item = 1;
897 if (rec->nlink == 0)
898 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
899 maybe_free_inode_rec(&active_node->inode_cache, rec);
900 return 0;
903 static struct inode_backref *get_inode_backref(struct inode_record *rec,
904 const char *name,
905 int namelen, u64 dir)
907 struct inode_backref *backref;
909 list_for_each_entry(backref, &rec->backrefs, list) {
910 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
911 break;
912 if (backref->dir != dir || backref->namelen != namelen)
913 continue;
914 if (memcmp(name, backref->name, namelen))
915 continue;
916 return backref;
919 backref = malloc(sizeof(*backref) + namelen + 1);
920 memset(backref, 0, sizeof(*backref));
921 backref->dir = dir;
922 backref->namelen = namelen;
923 memcpy(backref->name, name, namelen);
924 backref->name[namelen] = '\0';
925 list_add_tail(&backref->list, &rec->backrefs);
926 return backref;
929 static int add_inode_backref(struct cache_tree *inode_cache,
930 u64 ino, u64 dir, u64 index,
931 const char *name, int namelen,
932 int filetype, int itemtype, int errors)
934 struct inode_record *rec;
935 struct inode_backref *backref;
937 rec = get_inode_rec(inode_cache, ino, 1);
938 backref = get_inode_backref(rec, name, namelen, dir);
939 if (errors)
940 backref->errors |= errors;
941 if (itemtype == BTRFS_DIR_INDEX_KEY) {
942 if (backref->found_dir_index)
943 backref->errors |= REF_ERR_DUP_DIR_INDEX;
944 if (backref->found_inode_ref && backref->index != index)
945 backref->errors |= REF_ERR_INDEX_UNMATCH;
946 if (backref->found_dir_item && backref->filetype != filetype)
947 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 backref->index = index;
950 backref->filetype = filetype;
951 backref->found_dir_index = 1;
952 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
953 rec->found_link++;
954 if (backref->found_dir_item)
955 backref->errors |= REF_ERR_DUP_DIR_ITEM;
956 if (backref->found_dir_index && backref->filetype != filetype)
957 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
959 backref->filetype = filetype;
960 backref->found_dir_item = 1;
961 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
962 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
963 if (backref->found_inode_ref)
964 backref->errors |= REF_ERR_DUP_INODE_REF;
965 if (backref->found_dir_index && backref->index != index)
966 backref->errors |= REF_ERR_INDEX_UNMATCH;
967 else
968 backref->index = index;
970 backref->ref_type = itemtype;
971 backref->found_inode_ref = 1;
972 } else {
973 BUG_ON(1);
976 maybe_free_inode_rec(inode_cache, rec);
977 return 0;
980 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
981 struct cache_tree *dst_cache)
983 struct inode_backref *backref;
984 u32 dir_count = 0;
985 int ret = 0;
987 dst->merging = 1;
988 list_for_each_entry(backref, &src->backrefs, list) {
989 if (backref->found_dir_index) {
990 add_inode_backref(dst_cache, dst->ino, backref->dir,
991 backref->index, backref->name,
992 backref->namelen, backref->filetype,
993 BTRFS_DIR_INDEX_KEY, backref->errors);
995 if (backref->found_dir_item) {
996 dir_count++;
997 add_inode_backref(dst_cache, dst->ino,
998 backref->dir, 0, backref->name,
999 backref->namelen, backref->filetype,
1000 BTRFS_DIR_ITEM_KEY, backref->errors);
1002 if (backref->found_inode_ref) {
1003 add_inode_backref(dst_cache, dst->ino,
1004 backref->dir, backref->index,
1005 backref->name, backref->namelen, 0,
1006 backref->ref_type, backref->errors);
1010 if (src->found_dir_item)
1011 dst->found_dir_item = 1;
1012 if (src->found_file_extent)
1013 dst->found_file_extent = 1;
1014 if (src->found_csum_item)
1015 dst->found_csum_item = 1;
1016 if (src->some_csum_missing)
1017 dst->some_csum_missing = 1;
1018 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1019 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1020 if (ret < 0)
1021 return ret;
1024 BUG_ON(src->found_link < dir_count);
1025 dst->found_link += src->found_link - dir_count;
1026 dst->found_size += src->found_size;
1027 if (src->extent_start != (u64)-1) {
1028 if (dst->extent_start == (u64)-1) {
1029 dst->extent_start = src->extent_start;
1030 dst->extent_end = src->extent_end;
1031 } else {
1032 if (dst->extent_end > src->extent_start)
1033 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1034 else if (dst->extent_end < src->extent_start) {
1035 ret = add_file_extent_hole(&dst->holes,
1036 dst->extent_end,
1037 src->extent_start - dst->extent_end);
1039 if (dst->extent_end < src->extent_end)
1040 dst->extent_end = src->extent_end;
1044 dst->errors |= src->errors;
1045 if (src->found_inode_item) {
1046 if (!dst->found_inode_item) {
1047 dst->nlink = src->nlink;
1048 dst->isize = src->isize;
1049 dst->nbytes = src->nbytes;
1050 dst->imode = src->imode;
1051 dst->nodatasum = src->nodatasum;
1052 dst->found_inode_item = 1;
1053 } else {
1054 dst->errors |= I_ERR_DUP_INODE_ITEM;
1057 dst->merging = 0;
1059 return 0;
1062 static int splice_shared_node(struct shared_node *src_node,
1063 struct shared_node *dst_node)
1065 struct cache_extent *cache;
1066 struct ptr_node *node, *ins;
1067 struct cache_tree *src, *dst;
1068 struct inode_record *rec, *conflict;
1069 u64 current_ino = 0;
1070 int splice = 0;
1071 int ret;
1073 if (--src_node->refs == 0)
1074 splice = 1;
1075 if (src_node->current)
1076 current_ino = src_node->current->ino;
1078 src = &src_node->root_cache;
1079 dst = &dst_node->root_cache;
1080 again:
1081 cache = search_cache_extent(src, 0);
1082 while (cache) {
1083 node = container_of(cache, struct ptr_node, cache);
1084 rec = node->data;
1085 cache = next_cache_extent(cache);
1087 if (splice) {
1088 remove_cache_extent(src, &node->cache);
1089 ins = node;
1090 } else {
1091 ins = malloc(sizeof(*ins));
1092 ins->cache.start = node->cache.start;
1093 ins->cache.size = node->cache.size;
1094 ins->data = rec;
1095 rec->refs++;
1097 ret = insert_cache_extent(dst, &ins->cache);
1098 if (ret == -EEXIST) {
1099 conflict = get_inode_rec(dst, rec->ino, 1);
1100 merge_inode_recs(rec, conflict, dst);
1101 if (rec->checked) {
1102 conflict->checked = 1;
1103 if (dst_node->current == conflict)
1104 dst_node->current = NULL;
1106 maybe_free_inode_rec(dst, conflict);
1107 free_inode_rec(rec);
1108 free(ins);
1109 } else {
1110 BUG_ON(ret);
1114 if (src == &src_node->root_cache) {
1115 src = &src_node->inode_cache;
1116 dst = &dst_node->inode_cache;
1117 goto again;
1120 if (current_ino > 0 && (!dst_node->current ||
1121 current_ino > dst_node->current->ino)) {
1122 if (dst_node->current) {
1123 dst_node->current->checked = 1;
1124 maybe_free_inode_rec(dst, dst_node->current);
1126 dst_node->current = get_inode_rec(dst, current_ino, 1);
1128 return 0;
1131 static void free_inode_ptr(struct cache_extent *cache)
1133 struct ptr_node *node;
1134 struct inode_record *rec;
1136 node = container_of(cache, struct ptr_node, cache);
1137 rec = node->data;
1138 free_inode_rec(rec);
1139 free(node);
1142 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1144 static struct shared_node *find_shared_node(struct cache_tree *shared,
1145 u64 bytenr)
1147 struct cache_extent *cache;
1148 struct shared_node *node;
1150 cache = lookup_cache_extent(shared, bytenr, 1);
1151 if (cache) {
1152 node = container_of(cache, struct shared_node, cache);
1153 return node;
1155 return NULL;
1158 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1160 int ret;
1161 struct shared_node *node;
1163 node = calloc(1, sizeof(*node));
1164 node->cache.start = bytenr;
1165 node->cache.size = 1;
1166 cache_tree_init(&node->root_cache);
1167 cache_tree_init(&node->inode_cache);
1168 node->refs = refs;
1170 ret = insert_cache_extent(shared, &node->cache);
1171 BUG_ON(ret);
1172 return 0;
1175 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1176 struct walk_control *wc, int level)
1178 struct shared_node *node;
1179 struct shared_node *dest;
1181 if (level == wc->active_node)
1182 return 0;
1184 BUG_ON(wc->active_node <= level);
1185 node = find_shared_node(&wc->shared, bytenr);
1186 if (!node) {
1187 add_shared_node(&wc->shared, bytenr, refs);
1188 node = find_shared_node(&wc->shared, bytenr);
1189 wc->nodes[level] = node;
1190 wc->active_node = level;
1191 return 0;
1194 if (wc->root_level == wc->active_node &&
1195 btrfs_root_refs(&root->root_item) == 0) {
1196 if (--node->refs == 0) {
1197 free_inode_recs_tree(&node->root_cache);
1198 free_inode_recs_tree(&node->inode_cache);
1199 remove_cache_extent(&wc->shared, &node->cache);
1200 free(node);
1202 return 1;
1205 dest = wc->nodes[wc->active_node];
1206 splice_shared_node(node, dest);
1207 if (node->refs == 0) {
1208 remove_cache_extent(&wc->shared, &node->cache);
1209 free(node);
1211 return 1;
1214 static int leave_shared_node(struct btrfs_root *root,
1215 struct walk_control *wc, int level)
1217 struct shared_node *node;
1218 struct shared_node *dest;
1219 int i;
1221 if (level == wc->root_level)
1222 return 0;
1224 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1225 if (wc->nodes[i])
1226 break;
1228 BUG_ON(i >= BTRFS_MAX_LEVEL);
1230 node = wc->nodes[wc->active_node];
1231 wc->nodes[wc->active_node] = NULL;
1232 wc->active_node = i;
1234 dest = wc->nodes[wc->active_node];
1235 if (wc->active_node < wc->root_level ||
1236 btrfs_root_refs(&root->root_item) > 0) {
1237 BUG_ON(node->refs <= 1);
1238 splice_shared_node(node, dest);
1239 } else {
1240 BUG_ON(node->refs < 2);
1241 node->refs--;
1243 return 0;
1247 * Returns:
1248 * < 0 - on error
1249 * 1 - if the root with id child_root_id is a child of root parent_root_id
1250 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1251 * has other root(s) as parent(s)
1252 * 2 - if the root child_root_id doesn't have any parent roots
1254 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1255 u64 child_root_id)
1257 struct btrfs_path path;
1258 struct btrfs_key key;
1259 struct extent_buffer *leaf;
1260 int has_parent = 0;
1261 int ret;
1263 btrfs_init_path(&path);
1265 key.objectid = parent_root_id;
1266 key.type = BTRFS_ROOT_REF_KEY;
1267 key.offset = child_root_id;
1268 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1269 0, 0);
1270 if (ret < 0)
1271 return ret;
1272 btrfs_release_path(&path);
1273 if (!ret)
1274 return 1;
1276 key.objectid = child_root_id;
1277 key.type = BTRFS_ROOT_BACKREF_KEY;
1278 key.offset = 0;
1279 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1280 0, 0);
1281 if (ret < 0)
1282 goto out;
1284 while (1) {
1285 leaf = path.nodes[0];
1286 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1287 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1288 if (ret)
1289 break;
1290 leaf = path.nodes[0];
1293 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1294 if (key.objectid != child_root_id ||
1295 key.type != BTRFS_ROOT_BACKREF_KEY)
1296 break;
1298 has_parent = 1;
1300 if (key.offset == parent_root_id) {
1301 btrfs_release_path(&path);
1302 return 1;
1305 path.slots[0]++;
1307 out:
1308 btrfs_release_path(&path);
1309 if (ret < 0)
1310 return ret;
1311 return has_parent ? 0 : 2;
1314 static int process_dir_item(struct btrfs_root *root,
1315 struct extent_buffer *eb,
1316 int slot, struct btrfs_key *key,
1317 struct shared_node *active_node)
1319 u32 total;
1320 u32 cur = 0;
1321 u32 len;
1322 u32 name_len;
1323 u32 data_len;
1324 int error;
1325 int nritems = 0;
1326 int filetype;
1327 struct btrfs_dir_item *di;
1328 struct inode_record *rec;
1329 struct cache_tree *root_cache;
1330 struct cache_tree *inode_cache;
1331 struct btrfs_key location;
1332 char namebuf[BTRFS_NAME_LEN];
1334 root_cache = &active_node->root_cache;
1335 inode_cache = &active_node->inode_cache;
1336 rec = active_node->current;
1337 rec->found_dir_item = 1;
1339 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1340 total = btrfs_item_size_nr(eb, slot);
1341 while (cur < total) {
1342 nritems++;
1343 btrfs_dir_item_key_to_cpu(eb, di, &location);
1344 name_len = btrfs_dir_name_len(eb, di);
1345 data_len = btrfs_dir_data_len(eb, di);
1346 filetype = btrfs_dir_type(eb, di);
1348 rec->found_size += name_len;
1349 if (name_len <= BTRFS_NAME_LEN) {
1350 len = name_len;
1351 error = 0;
1352 } else {
1353 len = BTRFS_NAME_LEN;
1354 error = REF_ERR_NAME_TOO_LONG;
1356 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1358 if (location.type == BTRFS_INODE_ITEM_KEY) {
1359 add_inode_backref(inode_cache, location.objectid,
1360 key->objectid, key->offset, namebuf,
1361 len, filetype, key->type, error);
1362 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1363 add_inode_backref(root_cache, location.objectid,
1364 key->objectid, key->offset,
1365 namebuf, len, filetype,
1366 key->type, error);
1367 } else {
1368 fprintf(stderr, "invalid location in dir item %u\n",
1369 location.type);
1370 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1371 key->objectid, key->offset, namebuf,
1372 len, filetype, key->type, error);
1375 len = sizeof(*di) + name_len + data_len;
1376 di = (struct btrfs_dir_item *)((char *)di + len);
1377 cur += len;
1379 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1380 rec->errors |= I_ERR_DUP_DIR_INDEX;
1382 return 0;
1385 static int process_inode_ref(struct extent_buffer *eb,
1386 int slot, struct btrfs_key *key,
1387 struct shared_node *active_node)
1389 u32 total;
1390 u32 cur = 0;
1391 u32 len;
1392 u32 name_len;
1393 u64 index;
1394 int error;
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_ref *ref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_ref_name_len(eb, ref);
1405 index = btrfs_inode_ref_index(eb, ref);
1406 if (name_len <= BTRFS_NAME_LEN) {
1407 len = name_len;
1408 error = 0;
1409 } else {
1410 len = BTRFS_NAME_LEN;
1411 error = REF_ERR_NAME_TOO_LONG;
1413 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1414 add_inode_backref(inode_cache, key->objectid, key->offset,
1415 index, namebuf, len, 0, key->type, error);
1417 len = sizeof(*ref) + name_len;
1418 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1419 cur += len;
1421 return 0;
1424 static int process_inode_extref(struct extent_buffer *eb,
1425 int slot, struct btrfs_key *key,
1426 struct shared_node *active_node)
1428 u32 total;
1429 u32 cur = 0;
1430 u32 len;
1431 u32 name_len;
1432 u64 index;
1433 u64 parent;
1434 int error;
1435 struct cache_tree *inode_cache;
1436 struct btrfs_inode_extref *extref;
1437 char namebuf[BTRFS_NAME_LEN];
1439 inode_cache = &active_node->inode_cache;
1441 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1442 total = btrfs_item_size_nr(eb, slot);
1443 while (cur < total) {
1444 name_len = btrfs_inode_extref_name_len(eb, extref);
1445 index = btrfs_inode_extref_index(eb, extref);
1446 parent = btrfs_inode_extref_parent(eb, extref);
1447 if (name_len <= BTRFS_NAME_LEN) {
1448 len = name_len;
1449 error = 0;
1450 } else {
1451 len = BTRFS_NAME_LEN;
1452 error = REF_ERR_NAME_TOO_LONG;
1454 read_extent_buffer(eb, namebuf,
1455 (unsigned long)(extref + 1), len);
1456 add_inode_backref(inode_cache, key->objectid, parent,
1457 index, namebuf, len, 0, key->type, error);
1459 len = sizeof(*extref) + name_len;
1460 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1461 cur += len;
1463 return 0;
1467 static int count_csum_range(struct btrfs_root *root, u64 start,
1468 u64 len, u64 *found)
1470 struct btrfs_key key;
1471 struct btrfs_path path;
1472 struct extent_buffer *leaf;
1473 int ret;
1474 size_t size;
1475 *found = 0;
1476 u64 csum_end;
1477 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1479 btrfs_init_path(&path);
1481 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1482 key.offset = start;
1483 key.type = BTRFS_EXTENT_CSUM_KEY;
1485 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1486 &key, &path, 0, 0);
1487 if (ret < 0)
1488 goto out;
1489 if (ret > 0 && path.slots[0] > 0) {
1490 leaf = path.nodes[0];
1491 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1492 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1493 key.type == BTRFS_EXTENT_CSUM_KEY)
1494 path.slots[0]--;
1497 while (len > 0) {
1498 leaf = path.nodes[0];
1499 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1500 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1501 if (ret > 0)
1502 break;
1503 else if (ret < 0)
1504 goto out;
1505 leaf = path.nodes[0];
1508 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1509 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1510 key.type != BTRFS_EXTENT_CSUM_KEY)
1511 break;
1513 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1514 if (key.offset >= start + len)
1515 break;
1517 if (key.offset > start)
1518 start = key.offset;
1520 size = btrfs_item_size_nr(leaf, path.slots[0]);
1521 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1522 if (csum_end > start) {
1523 size = min(csum_end - start, len);
1524 len -= size;
1525 start += size;
1526 *found += size;
1529 path.slots[0]++;
1531 out:
1532 btrfs_release_path(&path);
1533 if (ret < 0)
1534 return ret;
1535 return 0;
1538 static int process_file_extent(struct btrfs_root *root,
1539 struct extent_buffer *eb,
1540 int slot, struct btrfs_key *key,
1541 struct shared_node *active_node)
1543 struct inode_record *rec;
1544 struct btrfs_file_extent_item *fi;
1545 u64 num_bytes = 0;
1546 u64 disk_bytenr = 0;
1547 u64 extent_offset = 0;
1548 u64 mask = root->sectorsize - 1;
1549 int extent_type;
1550 int ret;
1552 rec = active_node->current;
1553 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1554 rec->found_file_extent = 1;
1556 if (rec->extent_start == (u64)-1) {
1557 rec->extent_start = key->offset;
1558 rec->extent_end = key->offset;
1561 if (rec->extent_end > key->offset)
1562 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1563 else if (rec->extent_end < key->offset) {
1564 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1565 key->offset - rec->extent_end);
1566 if (ret < 0)
1567 return ret;
1570 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1571 extent_type = btrfs_file_extent_type(eb, fi);
1573 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1574 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1575 if (num_bytes == 0)
1576 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1577 rec->found_size += num_bytes;
1578 num_bytes = (num_bytes + mask) & ~mask;
1579 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1580 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1581 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1582 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1583 extent_offset = btrfs_file_extent_offset(eb, fi);
1584 if (num_bytes == 0 || (num_bytes & mask))
1585 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1586 if (num_bytes + extent_offset >
1587 btrfs_file_extent_ram_bytes(eb, fi))
1588 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1589 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1590 (btrfs_file_extent_compression(eb, fi) ||
1591 btrfs_file_extent_encryption(eb, fi) ||
1592 btrfs_file_extent_other_encoding(eb, fi)))
1593 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1594 if (disk_bytenr > 0)
1595 rec->found_size += num_bytes;
1596 } else {
1597 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1599 rec->extent_end = key->offset + num_bytes;
1602 * The data reloc tree will copy full extents into its inode and then
1603 * copy the corresponding csums. Because the extent it copied could be
1604 * a preallocated extent that hasn't been written to yet there may be no
1605 * csums to copy, ergo we won't have csums for our file extent. This is
1606 * ok so just don't bother checking csums if the inode belongs to the
1607 * data reloc tree.
1609 if (disk_bytenr > 0 &&
1610 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1611 u64 found;
1612 if (btrfs_file_extent_compression(eb, fi))
1613 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1614 else
1615 disk_bytenr += extent_offset;
1617 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1618 if (ret < 0)
1619 return ret;
1620 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1621 if (found > 0)
1622 rec->found_csum_item = 1;
1623 if (found < num_bytes)
1624 rec->some_csum_missing = 1;
1625 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1626 if (found > 0)
1627 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1630 return 0;
1633 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1634 struct walk_control *wc)
1636 struct btrfs_key key;
1637 u32 nritems;
1638 int i;
1639 int ret = 0;
1640 struct cache_tree *inode_cache;
1641 struct shared_node *active_node;
1643 if (wc->root_level == wc->active_node &&
1644 btrfs_root_refs(&root->root_item) == 0)
1645 return 0;
1647 active_node = wc->nodes[wc->active_node];
1648 inode_cache = &active_node->inode_cache;
1649 nritems = btrfs_header_nritems(eb);
1650 for (i = 0; i < nritems; i++) {
1651 btrfs_item_key_to_cpu(eb, &key, i);
1653 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1654 continue;
1655 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1656 continue;
1658 if (active_node->current == NULL ||
1659 active_node->current->ino < key.objectid) {
1660 if (active_node->current) {
1661 active_node->current->checked = 1;
1662 maybe_free_inode_rec(inode_cache,
1663 active_node->current);
1665 active_node->current = get_inode_rec(inode_cache,
1666 key.objectid, 1);
1668 switch (key.type) {
1669 case BTRFS_DIR_ITEM_KEY:
1670 case BTRFS_DIR_INDEX_KEY:
1671 ret = process_dir_item(root, eb, i, &key, active_node);
1672 break;
1673 case BTRFS_INODE_REF_KEY:
1674 ret = process_inode_ref(eb, i, &key, active_node);
1675 break;
1676 case BTRFS_INODE_EXTREF_KEY:
1677 ret = process_inode_extref(eb, i, &key, active_node);
1678 break;
1679 case BTRFS_INODE_ITEM_KEY:
1680 ret = process_inode_item(eb, i, &key, active_node);
1681 break;
1682 case BTRFS_EXTENT_DATA_KEY:
1683 ret = process_file_extent(root, eb, i, &key,
1684 active_node);
1685 break;
1686 default:
1687 break;
1690 return ret;
1693 static void reada_walk_down(struct btrfs_root *root,
1694 struct extent_buffer *node, int slot)
1696 u64 bytenr;
1697 u64 ptr_gen;
1698 u32 nritems;
1699 u32 blocksize;
1700 int i;
1701 int level;
1703 level = btrfs_header_level(node);
1704 if (level != 1)
1705 return;
1707 nritems = btrfs_header_nritems(node);
1708 blocksize = btrfs_level_size(root, level - 1);
1709 for (i = slot; i < nritems; i++) {
1710 bytenr = btrfs_node_blockptr(node, i);
1711 ptr_gen = btrfs_node_ptr_generation(node, i);
1712 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1717 * Check the child node/leaf by the following condition:
1718 * 1. the first item key of the node/leaf should be the same with the one
1719 * in parent.
1720 * 2. block in parent node should match the child node/leaf.
1721 * 3. generation of parent node and child's header should be consistent.
1723 * Or the child node/leaf pointed by the key in parent is not valid.
1725 * We hope to check leaf owner too, but since subvol may share leaves,
1726 * which makes leaf owner check not so strong, key check should be
1727 * sufficient enough for that case.
1729 static int check_child_node(struct btrfs_root *root,
1730 struct extent_buffer *parent, int slot,
1731 struct extent_buffer *child)
1733 struct btrfs_key parent_key;
1734 struct btrfs_key child_key;
1735 int ret = 0;
1737 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1738 if (btrfs_header_level(child) == 0)
1739 btrfs_item_key_to_cpu(child, &child_key, 0);
1740 else
1741 btrfs_node_key_to_cpu(child, &child_key, 0);
1743 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1744 ret = -EINVAL;
1745 fprintf(stderr,
1746 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1747 parent_key.objectid, parent_key.type, parent_key.offset,
1748 child_key.objectid, child_key.type, child_key.offset);
1750 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1751 ret = -EINVAL;
1752 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1753 btrfs_node_blockptr(parent, slot),
1754 btrfs_header_bytenr(child));
1756 if (btrfs_node_ptr_generation(parent, slot) !=
1757 btrfs_header_generation(child)) {
1758 ret = -EINVAL;
1759 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1760 btrfs_header_generation(child),
1761 btrfs_node_ptr_generation(parent, slot));
1763 return ret;
1766 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1767 struct walk_control *wc, int *level)
1769 enum btrfs_tree_block_status status;
1770 u64 bytenr;
1771 u64 ptr_gen;
1772 struct extent_buffer *next;
1773 struct extent_buffer *cur;
1774 u32 blocksize;
1775 int ret, err = 0;
1776 u64 refs;
1778 WARN_ON(*level < 0);
1779 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1780 ret = btrfs_lookup_extent_info(NULL, root,
1781 path->nodes[*level]->start,
1782 *level, 1, &refs, NULL);
1783 if (ret < 0) {
1784 err = ret;
1785 goto out;
1788 if (refs > 1) {
1789 ret = enter_shared_node(root, path->nodes[*level]->start,
1790 refs, wc, *level);
1791 if (ret > 0) {
1792 err = ret;
1793 goto out;
1797 while (*level >= 0) {
1798 WARN_ON(*level < 0);
1799 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1800 cur = path->nodes[*level];
1802 if (btrfs_header_level(cur) != *level)
1803 WARN_ON(1);
1805 if (path->slots[*level] >= btrfs_header_nritems(cur))
1806 break;
1807 if (*level == 0) {
1808 ret = process_one_leaf(root, cur, wc);
1809 if (ret < 0)
1810 err = ret;
1811 break;
1813 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1814 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1815 blocksize = btrfs_level_size(root, *level - 1);
1816 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1817 1, &refs, NULL);
1818 if (ret < 0)
1819 refs = 0;
1821 if (refs > 1) {
1822 ret = enter_shared_node(root, bytenr, refs,
1823 wc, *level - 1);
1824 if (ret > 0) {
1825 path->slots[*level]++;
1826 continue;
1830 next = btrfs_find_tree_block(root, bytenr, blocksize);
1831 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1832 free_extent_buffer(next);
1833 reada_walk_down(root, cur, path->slots[*level]);
1834 next = read_tree_block(root, bytenr, blocksize,
1835 ptr_gen);
1836 if (!extent_buffer_uptodate(next)) {
1837 struct btrfs_key node_key;
1839 btrfs_node_key_to_cpu(path->nodes[*level],
1840 &node_key,
1841 path->slots[*level]);
1842 btrfs_add_corrupt_extent_record(root->fs_info,
1843 &node_key,
1844 path->nodes[*level]->start,
1845 root->leafsize, *level);
1846 err = -EIO;
1847 goto out;
1851 ret = check_child_node(root, cur, path->slots[*level], next);
1852 if (ret) {
1853 err = ret;
1854 goto out;
1857 if (btrfs_is_leaf(next))
1858 status = btrfs_check_leaf(root, NULL, next);
1859 else
1860 status = btrfs_check_node(root, NULL, next);
1861 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1862 free_extent_buffer(next);
1863 err = -EIO;
1864 goto out;
1867 *level = *level - 1;
1868 free_extent_buffer(path->nodes[*level]);
1869 path->nodes[*level] = next;
1870 path->slots[*level] = 0;
1872 out:
1873 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1874 return err;
1877 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1878 struct walk_control *wc, int *level)
1880 int i;
1881 struct extent_buffer *leaf;
1883 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1884 leaf = path->nodes[i];
1885 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1886 path->slots[i]++;
1887 *level = i;
1888 return 0;
1889 } else {
1890 free_extent_buffer(path->nodes[*level]);
1891 path->nodes[*level] = NULL;
1892 BUG_ON(*level > wc->active_node);
1893 if (*level == wc->active_node)
1894 leave_shared_node(root, wc, *level);
1895 *level = i + 1;
1898 return 1;
1901 static int check_root_dir(struct inode_record *rec)
1903 struct inode_backref *backref;
1904 int ret = -1;
1906 if (!rec->found_inode_item || rec->errors)
1907 goto out;
1908 if (rec->nlink != 1 || rec->found_link != 0)
1909 goto out;
1910 if (list_empty(&rec->backrefs))
1911 goto out;
1912 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1913 if (!backref->found_inode_ref)
1914 goto out;
1915 if (backref->index != 0 || backref->namelen != 2 ||
1916 memcmp(backref->name, "..", 2))
1917 goto out;
1918 if (backref->found_dir_index || backref->found_dir_item)
1919 goto out;
1920 ret = 0;
1921 out:
1922 return ret;
1925 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1926 struct btrfs_root *root, struct btrfs_path *path,
1927 struct inode_record *rec)
1929 struct btrfs_inode_item *ei;
1930 struct btrfs_key key;
1931 int ret;
1933 key.objectid = rec->ino;
1934 key.type = BTRFS_INODE_ITEM_KEY;
1935 key.offset = (u64)-1;
1937 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1938 if (ret < 0)
1939 goto out;
1940 if (ret) {
1941 if (!path->slots[0]) {
1942 ret = -ENOENT;
1943 goto out;
1945 path->slots[0]--;
1946 ret = 0;
1948 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1949 if (key.objectid != rec->ino) {
1950 ret = -ENOENT;
1951 goto out;
1954 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1955 struct btrfs_inode_item);
1956 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1957 btrfs_mark_buffer_dirty(path->nodes[0]);
1958 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1959 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1960 root->root_key.objectid);
1961 out:
1962 btrfs_release_path(path);
1963 return ret;
1966 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1967 struct btrfs_root *root,
1968 struct btrfs_path *path,
1969 struct inode_record *rec)
1971 int ret;
1973 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1974 btrfs_release_path(path);
1975 if (!ret)
1976 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1977 return ret;
1980 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1981 struct btrfs_root *root,
1982 struct btrfs_path *path,
1983 struct inode_record *rec)
1985 struct btrfs_inode_item *ei;
1986 struct btrfs_key key;
1987 int ret = 0;
1989 key.objectid = rec->ino;
1990 key.type = BTRFS_INODE_ITEM_KEY;
1991 key.offset = 0;
1993 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1994 if (ret) {
1995 if (ret > 0)
1996 ret = -ENOENT;
1997 goto out;
2000 /* Since ret == 0, no need to check anything */
2001 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002 struct btrfs_inode_item);
2003 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2004 btrfs_mark_buffer_dirty(path->nodes[0]);
2005 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2006 printf("reset nbytes for ino %llu root %llu\n",
2007 rec->ino, root->root_key.objectid);
2008 out:
2009 btrfs_release_path(path);
2010 return ret;
2013 static int add_missing_dir_index(struct btrfs_root *root,
2014 struct cache_tree *inode_cache,
2015 struct inode_record *rec,
2016 struct inode_backref *backref)
2018 struct btrfs_path *path;
2019 struct btrfs_trans_handle *trans;
2020 struct btrfs_dir_item *dir_item;
2021 struct extent_buffer *leaf;
2022 struct btrfs_key key;
2023 struct btrfs_disk_key disk_key;
2024 struct inode_record *dir_rec;
2025 unsigned long name_ptr;
2026 u32 data_size = sizeof(*dir_item) + backref->namelen;
2027 int ret;
2029 path = btrfs_alloc_path();
2030 if (!path)
2031 return -ENOMEM;
2033 trans = btrfs_start_transaction(root, 1);
2034 if (IS_ERR(trans)) {
2035 btrfs_free_path(path);
2036 return PTR_ERR(trans);
2039 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2040 (unsigned long long)rec->ino);
2041 key.objectid = backref->dir;
2042 key.type = BTRFS_DIR_INDEX_KEY;
2043 key.offset = backref->index;
2045 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2046 BUG_ON(ret);
2048 leaf = path->nodes[0];
2049 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2051 disk_key.objectid = cpu_to_le64(rec->ino);
2052 disk_key.type = BTRFS_INODE_ITEM_KEY;
2053 disk_key.offset = 0;
2055 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2056 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2057 btrfs_set_dir_data_len(leaf, dir_item, 0);
2058 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2059 name_ptr = (unsigned long)(dir_item + 1);
2060 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2061 btrfs_mark_buffer_dirty(leaf);
2062 btrfs_free_path(path);
2063 btrfs_commit_transaction(trans, root);
2065 backref->found_dir_index = 1;
2066 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2067 if (!dir_rec)
2068 return 0;
2069 dir_rec->found_size += backref->namelen;
2070 if (dir_rec->found_size == dir_rec->isize &&
2071 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2072 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2073 if (dir_rec->found_size != dir_rec->isize)
2074 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2076 return 0;
2079 static int delete_dir_index(struct btrfs_root *root,
2080 struct cache_tree *inode_cache,
2081 struct inode_record *rec,
2082 struct inode_backref *backref)
2084 struct btrfs_trans_handle *trans;
2085 struct btrfs_dir_item *di;
2086 struct btrfs_path *path;
2087 int ret = 0;
2089 path = btrfs_alloc_path();
2090 if (!path)
2091 return -ENOMEM;
2093 trans = btrfs_start_transaction(root, 1);
2094 if (IS_ERR(trans)) {
2095 btrfs_free_path(path);
2096 return PTR_ERR(trans);
2100 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2101 (unsigned long long)backref->dir,
2102 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2103 (unsigned long long)root->objectid);
2105 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2106 backref->name, backref->namelen,
2107 backref->index, -1);
2108 if (IS_ERR(di)) {
2109 ret = PTR_ERR(di);
2110 btrfs_free_path(path);
2111 btrfs_commit_transaction(trans, root);
2112 if (ret == -ENOENT)
2113 return 0;
2114 return ret;
2117 if (!di)
2118 ret = btrfs_del_item(trans, root, path);
2119 else
2120 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2121 BUG_ON(ret);
2122 btrfs_free_path(path);
2123 btrfs_commit_transaction(trans, root);
2124 return ret;
2127 static int create_inode_item(struct btrfs_root *root,
2128 struct inode_record *rec,
2129 struct inode_backref *backref, int root_dir)
2131 struct btrfs_trans_handle *trans;
2132 struct btrfs_inode_item inode_item;
2133 time_t now = time(NULL);
2134 int ret;
2136 trans = btrfs_start_transaction(root, 1);
2137 if (IS_ERR(trans)) {
2138 ret = PTR_ERR(trans);
2139 return ret;
2142 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2143 "be incomplete, please check permissions and content after "
2144 "the fsck completes.\n", (unsigned long long)root->objectid,
2145 (unsigned long long)rec->ino);
2147 memset(&inode_item, 0, sizeof(inode_item));
2148 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2149 if (root_dir)
2150 btrfs_set_stack_inode_nlink(&inode_item, 1);
2151 else
2152 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2153 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2154 if (rec->found_dir_item) {
2155 if (rec->found_file_extent)
2156 fprintf(stderr, "root %llu inode %llu has both a dir "
2157 "item and extents, unsure if it is a dir or a "
2158 "regular file so setting it as a directory\n",
2159 (unsigned long long)root->objectid,
2160 (unsigned long long)rec->ino);
2161 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2162 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2163 } else if (!rec->found_dir_item) {
2164 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2165 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2167 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2168 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2169 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2170 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2171 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2172 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2173 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2174 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2176 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2177 BUG_ON(ret);
2178 btrfs_commit_transaction(trans, root);
2179 return 0;
2182 static int repair_inode_backrefs(struct btrfs_root *root,
2183 struct inode_record *rec,
2184 struct cache_tree *inode_cache,
2185 int delete)
2187 struct inode_backref *tmp, *backref;
2188 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2189 int ret = 0;
2190 int repaired = 0;
2192 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2193 if (!delete && rec->ino == root_dirid) {
2194 if (!rec->found_inode_item) {
2195 ret = create_inode_item(root, rec, backref, 1);
2196 if (ret)
2197 break;
2198 repaired++;
2202 /* Index 0 for root dir's are special, don't mess with it */
2203 if (rec->ino == root_dirid && backref->index == 0)
2204 continue;
2206 if (delete &&
2207 ((backref->found_dir_index && !backref->found_inode_ref) ||
2208 (backref->found_dir_index && backref->found_inode_ref &&
2209 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2210 ret = delete_dir_index(root, inode_cache, rec, backref);
2211 if (ret)
2212 break;
2213 repaired++;
2214 list_del(&backref->list);
2215 free(backref);
2218 if (!delete && !backref->found_dir_index &&
2219 backref->found_dir_item && backref->found_inode_ref) {
2220 ret = add_missing_dir_index(root, inode_cache, rec,
2221 backref);
2222 if (ret)
2223 break;
2224 repaired++;
2225 if (backref->found_dir_item &&
2226 backref->found_dir_index &&
2227 backref->found_dir_index) {
2228 if (!backref->errors &&
2229 backref->found_inode_ref) {
2230 list_del(&backref->list);
2231 free(backref);
2236 if (!delete && (!backref->found_dir_index &&
2237 !backref->found_dir_item &&
2238 backref->found_inode_ref)) {
2239 struct btrfs_trans_handle *trans;
2240 struct btrfs_key location;
2242 ret = check_dir_conflict(root, backref->name,
2243 backref->namelen,
2244 backref->dir,
2245 backref->index);
2246 if (ret) {
2248 * let nlink fixing routine to handle it,
2249 * which can do it better.
2251 ret = 0;
2252 break;
2254 location.objectid = rec->ino;
2255 location.type = BTRFS_INODE_ITEM_KEY;
2256 location.offset = 0;
2258 trans = btrfs_start_transaction(root, 1);
2259 if (IS_ERR(trans)) {
2260 ret = PTR_ERR(trans);
2261 break;
2263 fprintf(stderr, "adding missing dir index/item pair "
2264 "for inode %llu\n",
2265 (unsigned long long)rec->ino);
2266 ret = btrfs_insert_dir_item(trans, root, backref->name,
2267 backref->namelen,
2268 backref->dir, &location,
2269 imode_to_type(rec->imode),
2270 backref->index);
2271 BUG_ON(ret);
2272 btrfs_commit_transaction(trans, root);
2273 repaired++;
2276 if (!delete && (backref->found_inode_ref &&
2277 backref->found_dir_index &&
2278 backref->found_dir_item &&
2279 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2280 !rec->found_inode_item)) {
2281 ret = create_inode_item(root, rec, backref, 0);
2282 if (ret)
2283 break;
2284 repaired++;
2288 return ret ? ret : repaired;
2292 * To determine the file type for nlink/inode_item repair
2294 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2295 * Return -ENOENT if file type is not found.
2297 static int find_file_type(struct inode_record *rec, u8 *type)
2299 struct inode_backref *backref;
2301 /* For inode item recovered case */
2302 if (rec->found_inode_item) {
2303 *type = imode_to_type(rec->imode);
2304 return 0;
2307 list_for_each_entry(backref, &rec->backrefs, list) {
2308 if (backref->found_dir_index || backref->found_dir_item) {
2309 *type = backref->filetype;
2310 return 0;
2313 return -ENOENT;
2317 * To determine the file name for nlink repair
2319 * Return 0 if file name is found, set name and namelen.
2320 * Return -ENOENT if file name is not found.
2322 static int find_file_name(struct inode_record *rec,
2323 char *name, int *namelen)
2325 struct inode_backref *backref;
2327 list_for_each_entry(backref, &rec->backrefs, list) {
2328 if (backref->found_dir_index || backref->found_dir_item ||
2329 backref->found_inode_ref) {
2330 memcpy(name, backref->name, backref->namelen);
2331 *namelen = backref->namelen;
2332 return 0;
2335 return -ENOENT;
2338 /* Reset the nlink of the inode to the correct one */
2339 static int reset_nlink(struct btrfs_trans_handle *trans,
2340 struct btrfs_root *root,
2341 struct btrfs_path *path,
2342 struct inode_record *rec)
2344 struct inode_backref *backref;
2345 struct inode_backref *tmp;
2346 struct btrfs_key key;
2347 struct btrfs_inode_item *inode_item;
2348 int ret = 0;
2350 /* We don't believe this either, reset it and iterate backref */
2351 rec->found_link = 0;
2353 /* Remove all backref including the valid ones */
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2356 backref->index, backref->name,
2357 backref->namelen, 0);
2358 if (ret < 0)
2359 goto out;
2361 /* remove invalid backref, so it won't be added back */
2362 if (!(backref->found_dir_index &&
2363 backref->found_dir_item &&
2364 backref->found_inode_ref)) {
2365 list_del(&backref->list);
2366 free(backref);
2367 } else {
2368 rec->found_link++;
2372 /* Set nlink to 0 */
2373 key.objectid = rec->ino;
2374 key.type = BTRFS_INODE_ITEM_KEY;
2375 key.offset = 0;
2376 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2377 if (ret < 0)
2378 goto out;
2379 if (ret > 0) {
2380 ret = -ENOENT;
2381 goto out;
2383 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2384 struct btrfs_inode_item);
2385 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2386 btrfs_mark_buffer_dirty(path->nodes[0]);
2387 btrfs_release_path(path);
2390 * Add back valid inode_ref/dir_item/dir_index,
2391 * add_link() will handle the nlink inc, so new nlink must be correct
2393 list_for_each_entry(backref, &rec->backrefs, list) {
2394 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2395 backref->name, backref->namelen,
2396 backref->filetype, &backref->index, 1);
2397 if (ret < 0)
2398 goto out;
2400 out:
2401 btrfs_release_path(path);
2402 return ret;
2405 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2406 struct btrfs_root *root,
2407 struct btrfs_path *path,
2408 struct inode_record *rec)
2410 char *dir_name = "lost+found";
2411 char namebuf[BTRFS_NAME_LEN] = {0};
2412 u64 lost_found_ino;
2413 u32 mode = 0700;
2414 u8 type = 0;
2415 int namelen = 0;
2416 int name_recovered = 0;
2417 int type_recovered = 0;
2418 int ret = 0;
2421 * Get file name and type first before these invalid inode ref
2422 * are deleted by remove_all_invalid_backref()
2424 name_recovered = !find_file_name(rec, namebuf, &namelen);
2425 type_recovered = !find_file_type(rec, &type);
2427 if (!name_recovered) {
2428 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2429 rec->ino, rec->ino);
2430 namelen = count_digits(rec->ino);
2431 sprintf(namebuf, "%llu", rec->ino);
2432 name_recovered = 1;
2434 if (!type_recovered) {
2435 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2436 rec->ino);
2437 type = BTRFS_FT_REG_FILE;
2438 type_recovered = 1;
2441 ret = reset_nlink(trans, root, path, rec);
2442 if (ret < 0) {
2443 fprintf(stderr,
2444 "Failed to reset nlink for inode %llu: %s\n",
2445 rec->ino, strerror(-ret));
2446 goto out;
2449 if (rec->found_link == 0) {
2450 lost_found_ino = root->highest_inode;
2451 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2452 ret = -EOVERFLOW;
2453 goto out;
2455 lost_found_ino++;
2456 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2457 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2458 mode);
2459 if (ret < 0) {
2460 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2461 dir_name, strerror(-ret));
2462 goto out;
2464 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2465 namebuf, namelen, type, NULL, 1);
2467 * Add ".INO" suffix several times to handle case where
2468 * "FILENAME.INO" is already taken by another file.
2470 while (ret == -EEXIST) {
2472 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2474 if (namelen + count_digits(rec->ino) + 1 >
2475 BTRFS_NAME_LEN) {
2476 ret = -EFBIG;
2477 goto out;
2479 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2480 ".%llu", rec->ino);
2481 namelen += count_digits(rec->ino) + 1;
2482 ret = btrfs_add_link(trans, root, rec->ino,
2483 lost_found_ino, namebuf,
2484 namelen, type, NULL, 1);
2486 if (ret < 0) {
2487 fprintf(stderr,
2488 "Failed to link the inode %llu to %s dir: %s\n",
2489 rec->ino, dir_name, strerror(-ret));
2490 goto out;
2493 * Just increase the found_link, don't actually add the
2494 * backref. This will make things easier and this inode
2495 * record will be freed after the repair is done.
2496 * So fsck will not report problem about this inode.
2498 rec->found_link++;
2499 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2500 namelen, namebuf, dir_name);
2502 printf("Fixed the nlink of inode %llu\n", rec->ino);
2503 out:
2505 * Clear the flag anyway, or we will loop forever for the same inode
2506 * as it will not be removed from the bad inode list and the dead loop
2507 * happens.
2509 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2510 btrfs_release_path(path);
2511 return ret;
2515 * Check if there is any normal(reg or prealloc) file extent for given
2516 * ino.
2517 * This is used to determine the file type when neither its dir_index/item or
2518 * inode_item exists.
2520 * This will *NOT* report error, if any error happens, just consider it does
2521 * not have any normal file extent.
2523 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2525 struct btrfs_path *path;
2526 struct btrfs_key key;
2527 struct btrfs_key found_key;
2528 struct btrfs_file_extent_item *fi;
2529 u8 type;
2530 int ret = 0;
2532 path = btrfs_alloc_path();
2533 if (!path)
2534 goto out;
2535 key.objectid = ino;
2536 key.type = BTRFS_EXTENT_DATA_KEY;
2537 key.offset = 0;
2539 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2540 if (ret < 0) {
2541 ret = 0;
2542 goto out;
2544 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2545 ret = btrfs_next_leaf(root, path);
2546 if (ret) {
2547 ret = 0;
2548 goto out;
2551 while (1) {
2552 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2553 path->slots[0]);
2554 if (found_key.objectid != ino ||
2555 found_key.type != BTRFS_EXTENT_DATA_KEY)
2556 break;
2557 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2558 struct btrfs_file_extent_item);
2559 type = btrfs_file_extent_type(path->nodes[0], fi);
2560 if (type != BTRFS_FILE_EXTENT_INLINE) {
2561 ret = 1;
2562 goto out;
2565 out:
2566 btrfs_free_path(path);
2567 return ret;
2570 static u32 btrfs_type_to_imode(u8 type)
2572 static u32 imode_by_btrfs_type[] = {
2573 [BTRFS_FT_REG_FILE] = S_IFREG,
2574 [BTRFS_FT_DIR] = S_IFDIR,
2575 [BTRFS_FT_CHRDEV] = S_IFCHR,
2576 [BTRFS_FT_BLKDEV] = S_IFBLK,
2577 [BTRFS_FT_FIFO] = S_IFIFO,
2578 [BTRFS_FT_SOCK] = S_IFSOCK,
2579 [BTRFS_FT_SYMLINK] = S_IFLNK,
2582 return imode_by_btrfs_type[(type)];
2585 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2586 struct btrfs_root *root,
2587 struct btrfs_path *path,
2588 struct inode_record *rec)
2590 u8 filetype;
2591 u32 mode = 0700;
2592 int type_recovered = 0;
2593 int ret = 0;
2595 printf("Trying to rebuild inode:%llu\n", rec->ino);
2597 type_recovered = !find_file_type(rec, &filetype);
2600 * Try to determine inode type if type not found.
2602 * For found regular file extent, it must be FILE.
2603 * For found dir_item/index, it must be DIR.
2605 * For undetermined one, use FILE as fallback.
2607 * TODO:
2608 * 1. If found backref(inode_index/item is already handled) to it,
2609 * it must be DIR.
2610 * Need new inode-inode ref structure to allow search for that.
2612 if (!type_recovered) {
2613 if (rec->found_file_extent &&
2614 find_normal_file_extent(root, rec->ino)) {
2615 type_recovered = 1;
2616 filetype = BTRFS_FT_REG_FILE;
2617 } else if (rec->found_dir_item) {
2618 type_recovered = 1;
2619 filetype = BTRFS_FT_DIR;
2620 } else if (!list_empty(&rec->orphan_extents)) {
2621 type_recovered = 1;
2622 filetype = BTRFS_FT_REG_FILE;
2623 } else{
2624 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2625 rec->ino);
2626 type_recovered = 1;
2627 filetype = BTRFS_FT_REG_FILE;
2631 ret = btrfs_new_inode(trans, root, rec->ino,
2632 mode | btrfs_type_to_imode(filetype));
2633 if (ret < 0)
2634 goto out;
2637 * Here inode rebuild is done, we only rebuild the inode item,
2638 * don't repair the nlink(like move to lost+found).
2639 * That is the job of nlink repair.
2641 * We just fill the record and return
2643 rec->found_dir_item = 1;
2644 rec->imode = mode | btrfs_type_to_imode(filetype);
2645 rec->nlink = 0;
2646 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2647 /* Ensure the inode_nlinks repair function will be called */
2648 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2649 out:
2650 return ret;
2653 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2654 struct btrfs_root *root,
2655 struct btrfs_path *path,
2656 struct inode_record *rec)
2658 struct orphan_data_extent *orphan;
2659 struct orphan_data_extent *tmp;
2660 int ret = 0;
2662 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2664 * Check for conflicting file extents
2666 * Here we don't know whether the extents is compressed or not,
2667 * so we can only assume it not compressed nor data offset,
2668 * and use its disk_len as extent length.
2670 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2671 orphan->offset, orphan->disk_len, 0);
2672 btrfs_release_path(path);
2673 if (ret < 0)
2674 goto out;
2675 if (!ret) {
2676 fprintf(stderr,
2677 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2678 orphan->disk_bytenr, orphan->disk_len);
2679 ret = btrfs_free_extent(trans,
2680 root->fs_info->extent_root,
2681 orphan->disk_bytenr, orphan->disk_len,
2682 0, root->objectid, orphan->objectid,
2683 orphan->offset);
2684 if (ret < 0)
2685 goto out;
2687 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2688 orphan->offset, orphan->disk_bytenr,
2689 orphan->disk_len, orphan->disk_len);
2690 if (ret < 0)
2691 goto out;
2693 /* Update file size info */
2694 rec->found_size += orphan->disk_len;
2695 if (rec->found_size == rec->nbytes)
2696 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2698 /* Update the file extent hole info too */
2699 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2700 orphan->disk_len);
2701 if (ret < 0)
2702 goto out;
2703 if (RB_EMPTY_ROOT(&rec->holes))
2704 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2706 list_del(&orphan->list);
2707 free(orphan);
2709 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2710 out:
2711 return ret;
2714 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2715 struct btrfs_root *root,
2716 struct btrfs_path *path,
2717 struct inode_record *rec)
2719 struct rb_node *node;
2720 struct file_extent_hole *hole;
2721 int found = 0;
2722 int ret = 0;
2724 node = rb_first(&rec->holes);
2726 while (node) {
2727 found = 1;
2728 hole = rb_entry(node, struct file_extent_hole, node);
2729 ret = btrfs_punch_hole(trans, root, rec->ino,
2730 hole->start, hole->len);
2731 if (ret < 0)
2732 goto out;
2733 ret = del_file_extent_hole(&rec->holes, hole->start,
2734 hole->len);
2735 if (ret < 0)
2736 goto out;
2737 if (RB_EMPTY_ROOT(&rec->holes))
2738 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2739 node = rb_first(&rec->holes);
2741 /* special case for a file losing all its file extent */
2742 if (!found) {
2743 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2744 round_up(rec->isize, root->sectorsize));
2745 if (ret < 0)
2746 goto out;
2748 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2749 rec->ino, root->objectid);
2750 out:
2751 return ret;
2754 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2756 struct btrfs_trans_handle *trans;
2757 struct btrfs_path *path;
2758 int ret = 0;
2760 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2761 I_ERR_NO_ORPHAN_ITEM |
2762 I_ERR_LINK_COUNT_WRONG |
2763 I_ERR_NO_INODE_ITEM |
2764 I_ERR_FILE_EXTENT_ORPHAN |
2765 I_ERR_FILE_EXTENT_DISCOUNT|
2766 I_ERR_FILE_NBYTES_WRONG)))
2767 return rec->errors;
2769 path = btrfs_alloc_path();
2770 if (!path)
2771 return -ENOMEM;
2774 * For nlink repair, it may create a dir and add link, so
2775 * 2 for parent(256)'s dir_index and dir_item
2776 * 2 for lost+found dir's inode_item and inode_ref
2777 * 1 for the new inode_ref of the file
2778 * 2 for lost+found dir's dir_index and dir_item for the file
2780 trans = btrfs_start_transaction(root, 7);
2781 if (IS_ERR(trans)) {
2782 btrfs_free_path(path);
2783 return PTR_ERR(trans);
2786 if (rec->errors & I_ERR_NO_INODE_ITEM)
2787 ret = repair_inode_no_item(trans, root, path, rec);
2788 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2789 ret = repair_inode_orphan_extent(trans, root, path, rec);
2790 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2791 ret = repair_inode_discount_extent(trans, root, path, rec);
2792 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2793 ret = repair_inode_isize(trans, root, path, rec);
2794 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2795 ret = repair_inode_orphan_item(trans, root, path, rec);
2796 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2797 ret = repair_inode_nlinks(trans, root, path, rec);
2798 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2799 ret = repair_inode_nbytes(trans, root, path, rec);
2800 btrfs_commit_transaction(trans, root);
2801 btrfs_free_path(path);
2802 return ret;
2805 static int check_inode_recs(struct btrfs_root *root,
2806 struct cache_tree *inode_cache)
2808 struct cache_extent *cache;
2809 struct ptr_node *node;
2810 struct inode_record *rec;
2811 struct inode_backref *backref;
2812 int stage = 0;
2813 int ret = 0;
2814 int err = 0;
2815 u64 error = 0;
2816 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2818 if (btrfs_root_refs(&root->root_item) == 0) {
2819 if (!cache_tree_empty(inode_cache))
2820 fprintf(stderr, "warning line %d\n", __LINE__);
2821 return 0;
2825 * We need to record the highest inode number for later 'lost+found'
2826 * dir creation.
2827 * We must select a ino not used/refered by any existing inode, or
2828 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2829 * this may cause 'lost+found' dir has wrong nlinks.
2831 cache = last_cache_extent(inode_cache);
2832 if (cache) {
2833 node = container_of(cache, struct ptr_node, cache);
2834 rec = node->data;
2835 if (rec->ino > root->highest_inode)
2836 root->highest_inode = rec->ino;
2840 * We need to repair backrefs first because we could change some of the
2841 * errors in the inode recs.
2843 * We also need to go through and delete invalid backrefs first and then
2844 * add the correct ones second. We do this because we may get EEXIST
2845 * when adding back the correct index because we hadn't yet deleted the
2846 * invalid index.
2848 * For example, if we were missing a dir index then the directories
2849 * isize would be wrong, so if we fixed the isize to what we thought it
2850 * would be and then fixed the backref we'd still have a invalid fs, so
2851 * we need to add back the dir index and then check to see if the isize
2852 * is still wrong.
2854 while (stage < 3) {
2855 stage++;
2856 if (stage == 3 && !err)
2857 break;
2859 cache = search_cache_extent(inode_cache, 0);
2860 while (repair && cache) {
2861 node = container_of(cache, struct ptr_node, cache);
2862 rec = node->data;
2863 cache = next_cache_extent(cache);
2865 /* Need to free everything up and rescan */
2866 if (stage == 3) {
2867 remove_cache_extent(inode_cache, &node->cache);
2868 free(node);
2869 free_inode_rec(rec);
2870 continue;
2873 if (list_empty(&rec->backrefs))
2874 continue;
2876 ret = repair_inode_backrefs(root, rec, inode_cache,
2877 stage == 1);
2878 if (ret < 0) {
2879 err = ret;
2880 stage = 2;
2881 break;
2882 } if (ret > 0) {
2883 err = -EAGAIN;
2887 if (err)
2888 return err;
2890 rec = get_inode_rec(inode_cache, root_dirid, 0);
2891 if (rec) {
2892 ret = check_root_dir(rec);
2893 if (ret) {
2894 fprintf(stderr, "root %llu root dir %llu error\n",
2895 (unsigned long long)root->root_key.objectid,
2896 (unsigned long long)root_dirid);
2897 print_inode_error(root, rec);
2898 error++;
2900 } else {
2901 if (repair) {
2902 struct btrfs_trans_handle *trans;
2904 trans = btrfs_start_transaction(root, 1);
2905 if (IS_ERR(trans)) {
2906 err = PTR_ERR(trans);
2907 return err;
2910 fprintf(stderr,
2911 "root %llu missing its root dir, recreating\n",
2912 (unsigned long long)root->objectid);
2914 ret = btrfs_make_root_dir(trans, root, root_dirid);
2915 BUG_ON(ret);
2917 btrfs_commit_transaction(trans, root);
2918 return -EAGAIN;
2921 fprintf(stderr, "root %llu root dir %llu not found\n",
2922 (unsigned long long)root->root_key.objectid,
2923 (unsigned long long)root_dirid);
2926 while (1) {
2927 cache = search_cache_extent(inode_cache, 0);
2928 if (!cache)
2929 break;
2930 node = container_of(cache, struct ptr_node, cache);
2931 rec = node->data;
2932 remove_cache_extent(inode_cache, &node->cache);
2933 free(node);
2934 if (rec->ino == root_dirid ||
2935 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2936 free_inode_rec(rec);
2937 continue;
2940 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2941 ret = check_orphan_item(root, rec->ino);
2942 if (ret == 0)
2943 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2944 if (can_free_inode_rec(rec)) {
2945 free_inode_rec(rec);
2946 continue;
2950 if (!rec->found_inode_item)
2951 rec->errors |= I_ERR_NO_INODE_ITEM;
2952 if (rec->found_link != rec->nlink)
2953 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2954 if (repair) {
2955 ret = try_repair_inode(root, rec);
2956 if (ret == 0 && can_free_inode_rec(rec)) {
2957 free_inode_rec(rec);
2958 continue;
2960 ret = 0;
2963 if (!(repair && ret == 0))
2964 error++;
2965 print_inode_error(root, rec);
2966 list_for_each_entry(backref, &rec->backrefs, list) {
2967 if (!backref->found_dir_item)
2968 backref->errors |= REF_ERR_NO_DIR_ITEM;
2969 if (!backref->found_dir_index)
2970 backref->errors |= REF_ERR_NO_DIR_INDEX;
2971 if (!backref->found_inode_ref)
2972 backref->errors |= REF_ERR_NO_INODE_REF;
2973 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2974 " namelen %u name %s filetype %d errors %x",
2975 (unsigned long long)backref->dir,
2976 (unsigned long long)backref->index,
2977 backref->namelen, backref->name,
2978 backref->filetype, backref->errors);
2979 print_ref_error(backref->errors);
2981 free_inode_rec(rec);
2983 return (error > 0) ? -1 : 0;
2986 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2987 u64 objectid)
2989 struct cache_extent *cache;
2990 struct root_record *rec = NULL;
2991 int ret;
2993 cache = lookup_cache_extent(root_cache, objectid, 1);
2994 if (cache) {
2995 rec = container_of(cache, struct root_record, cache);
2996 } else {
2997 rec = calloc(1, sizeof(*rec));
2998 rec->objectid = objectid;
2999 INIT_LIST_HEAD(&rec->backrefs);
3000 rec->cache.start = objectid;
3001 rec->cache.size = 1;
3003 ret = insert_cache_extent(root_cache, &rec->cache);
3004 BUG_ON(ret);
3006 return rec;
3009 static struct root_backref *get_root_backref(struct root_record *rec,
3010 u64 ref_root, u64 dir, u64 index,
3011 const char *name, int namelen)
3013 struct root_backref *backref;
3015 list_for_each_entry(backref, &rec->backrefs, list) {
3016 if (backref->ref_root != ref_root || backref->dir != dir ||
3017 backref->namelen != namelen)
3018 continue;
3019 if (memcmp(name, backref->name, namelen))
3020 continue;
3021 return backref;
3024 backref = calloc(1, sizeof(*backref) + namelen + 1);
3025 backref->ref_root = ref_root;
3026 backref->dir = dir;
3027 backref->index = index;
3028 backref->namelen = namelen;
3029 memcpy(backref->name, name, namelen);
3030 backref->name[namelen] = '\0';
3031 list_add_tail(&backref->list, &rec->backrefs);
3032 return backref;
3035 static void free_root_record(struct cache_extent *cache)
3037 struct root_record *rec;
3038 struct root_backref *backref;
3040 rec = container_of(cache, struct root_record, cache);
3041 while (!list_empty(&rec->backrefs)) {
3042 backref = list_entry(rec->backrefs.next,
3043 struct root_backref, list);
3044 list_del(&backref->list);
3045 free(backref);
3048 kfree(rec);
3051 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3053 static int add_root_backref(struct cache_tree *root_cache,
3054 u64 root_id, u64 ref_root, u64 dir, u64 index,
3055 const char *name, int namelen,
3056 int item_type, int errors)
3058 struct root_record *rec;
3059 struct root_backref *backref;
3061 rec = get_root_rec(root_cache, root_id);
3062 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3064 backref->errors |= errors;
3066 if (item_type != BTRFS_DIR_ITEM_KEY) {
3067 if (backref->found_dir_index || backref->found_back_ref ||
3068 backref->found_forward_ref) {
3069 if (backref->index != index)
3070 backref->errors |= REF_ERR_INDEX_UNMATCH;
3071 } else {
3072 backref->index = index;
3076 if (item_type == BTRFS_DIR_ITEM_KEY) {
3077 if (backref->found_forward_ref)
3078 rec->found_ref++;
3079 backref->found_dir_item = 1;
3080 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3081 backref->found_dir_index = 1;
3082 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3083 if (backref->found_forward_ref)
3084 backref->errors |= REF_ERR_DUP_ROOT_REF;
3085 else if (backref->found_dir_item)
3086 rec->found_ref++;
3087 backref->found_forward_ref = 1;
3088 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3089 if (backref->found_back_ref)
3090 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3091 backref->found_back_ref = 1;
3092 } else {
3093 BUG_ON(1);
3096 if (backref->found_forward_ref && backref->found_dir_item)
3097 backref->reachable = 1;
3098 return 0;
3101 static int merge_root_recs(struct btrfs_root *root,
3102 struct cache_tree *src_cache,
3103 struct cache_tree *dst_cache)
3105 struct cache_extent *cache;
3106 struct ptr_node *node;
3107 struct inode_record *rec;
3108 struct inode_backref *backref;
3109 int ret = 0;
3111 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3112 free_inode_recs_tree(src_cache);
3113 return 0;
3116 while (1) {
3117 cache = search_cache_extent(src_cache, 0);
3118 if (!cache)
3119 break;
3120 node = container_of(cache, struct ptr_node, cache);
3121 rec = node->data;
3122 remove_cache_extent(src_cache, &node->cache);
3123 free(node);
3125 ret = is_child_root(root, root->objectid, rec->ino);
3126 if (ret < 0)
3127 break;
3128 else if (ret == 0)
3129 goto skip;
3131 list_for_each_entry(backref, &rec->backrefs, list) {
3132 BUG_ON(backref->found_inode_ref);
3133 if (backref->found_dir_item)
3134 add_root_backref(dst_cache, rec->ino,
3135 root->root_key.objectid, backref->dir,
3136 backref->index, backref->name,
3137 backref->namelen, BTRFS_DIR_ITEM_KEY,
3138 backref->errors);
3139 if (backref->found_dir_index)
3140 add_root_backref(dst_cache, rec->ino,
3141 root->root_key.objectid, backref->dir,
3142 backref->index, backref->name,
3143 backref->namelen, BTRFS_DIR_INDEX_KEY,
3144 backref->errors);
3146 skip:
3147 free_inode_rec(rec);
3149 if (ret < 0)
3150 return ret;
3151 return 0;
3154 static int check_root_refs(struct btrfs_root *root,
3155 struct cache_tree *root_cache)
3157 struct root_record *rec;
3158 struct root_record *ref_root;
3159 struct root_backref *backref;
3160 struct cache_extent *cache;
3161 int loop = 1;
3162 int ret;
3163 int error;
3164 int errors = 0;
3166 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3167 rec->found_ref = 1;
3169 /* fixme: this can not detect circular references */
3170 while (loop) {
3171 loop = 0;
3172 cache = search_cache_extent(root_cache, 0);
3173 while (1) {
3174 if (!cache)
3175 break;
3176 rec = container_of(cache, struct root_record, cache);
3177 cache = next_cache_extent(cache);
3179 if (rec->found_ref == 0)
3180 continue;
3182 list_for_each_entry(backref, &rec->backrefs, list) {
3183 if (!backref->reachable)
3184 continue;
3186 ref_root = get_root_rec(root_cache,
3187 backref->ref_root);
3188 if (ref_root->found_ref > 0)
3189 continue;
3191 backref->reachable = 0;
3192 rec->found_ref--;
3193 if (rec->found_ref == 0)
3194 loop = 1;
3199 cache = search_cache_extent(root_cache, 0);
3200 while (1) {
3201 if (!cache)
3202 break;
3203 rec = container_of(cache, struct root_record, cache);
3204 cache = next_cache_extent(cache);
3206 if (rec->found_ref == 0 &&
3207 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3208 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3209 ret = check_orphan_item(root->fs_info->tree_root,
3210 rec->objectid);
3211 if (ret == 0)
3212 continue;
3215 * If we don't have a root item then we likely just have
3216 * a dir item in a snapshot for this root but no actual
3217 * ref key or anything so it's meaningless.
3219 if (!rec->found_root_item)
3220 continue;
3221 errors++;
3222 fprintf(stderr, "fs tree %llu not referenced\n",
3223 (unsigned long long)rec->objectid);
3226 error = 0;
3227 if (rec->found_ref > 0 && !rec->found_root_item)
3228 error = 1;
3229 list_for_each_entry(backref, &rec->backrefs, list) {
3230 if (!backref->found_dir_item)
3231 backref->errors |= REF_ERR_NO_DIR_ITEM;
3232 if (!backref->found_dir_index)
3233 backref->errors |= REF_ERR_NO_DIR_INDEX;
3234 if (!backref->found_back_ref)
3235 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3236 if (!backref->found_forward_ref)
3237 backref->errors |= REF_ERR_NO_ROOT_REF;
3238 if (backref->reachable && backref->errors)
3239 error = 1;
3241 if (!error)
3242 continue;
3244 errors++;
3245 fprintf(stderr, "fs tree %llu refs %u %s\n",
3246 (unsigned long long)rec->objectid, rec->found_ref,
3247 rec->found_root_item ? "" : "not found");
3249 list_for_each_entry(backref, &rec->backrefs, list) {
3250 if (!backref->reachable)
3251 continue;
3252 if (!backref->errors && rec->found_root_item)
3253 continue;
3254 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3255 " index %llu namelen %u name %s errors %x\n",
3256 (unsigned long long)backref->ref_root,
3257 (unsigned long long)backref->dir,
3258 (unsigned long long)backref->index,
3259 backref->namelen, backref->name,
3260 backref->errors);
3261 print_ref_error(backref->errors);
3264 return errors > 0 ? 1 : 0;
3267 static int process_root_ref(struct extent_buffer *eb, int slot,
3268 struct btrfs_key *key,
3269 struct cache_tree *root_cache)
3271 u64 dirid;
3272 u64 index;
3273 u32 len;
3274 u32 name_len;
3275 struct btrfs_root_ref *ref;
3276 char namebuf[BTRFS_NAME_LEN];
3277 int error;
3279 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3281 dirid = btrfs_root_ref_dirid(eb, ref);
3282 index = btrfs_root_ref_sequence(eb, ref);
3283 name_len = btrfs_root_ref_name_len(eb, ref);
3285 if (name_len <= BTRFS_NAME_LEN) {
3286 len = name_len;
3287 error = 0;
3288 } else {
3289 len = BTRFS_NAME_LEN;
3290 error = REF_ERR_NAME_TOO_LONG;
3292 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3294 if (key->type == BTRFS_ROOT_REF_KEY) {
3295 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3296 index, namebuf, len, key->type, error);
3297 } else {
3298 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3299 index, namebuf, len, key->type, error);
3301 return 0;
3304 static void free_corrupt_block(struct cache_extent *cache)
3306 struct btrfs_corrupt_block *corrupt;
3308 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3309 free(corrupt);
3312 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3315 * Repair the btree of the given root.
3317 * The fix is to remove the node key in corrupt_blocks cache_tree.
3318 * and rebalance the tree.
3319 * After the fix, the btree should be writeable.
3321 static int repair_btree(struct btrfs_root *root,
3322 struct cache_tree *corrupt_blocks)
3324 struct btrfs_trans_handle *trans;
3325 struct btrfs_path *path;
3326 struct btrfs_corrupt_block *corrupt;
3327 struct cache_extent *cache;
3328 struct btrfs_key key;
3329 u64 offset;
3330 int level;
3331 int ret = 0;
3333 if (cache_tree_empty(corrupt_blocks))
3334 return 0;
3336 path = btrfs_alloc_path();
3337 if (!path)
3338 return -ENOMEM;
3340 trans = btrfs_start_transaction(root, 1);
3341 if (IS_ERR(trans)) {
3342 ret = PTR_ERR(trans);
3343 fprintf(stderr, "Error starting transaction: %s\n",
3344 strerror(-ret));
3345 goto out_free_path;
3347 cache = first_cache_extent(corrupt_blocks);
3348 while (cache) {
3349 corrupt = container_of(cache, struct btrfs_corrupt_block,
3350 cache);
3351 level = corrupt->level;
3352 path->lowest_level = level;
3353 key.objectid = corrupt->key.objectid;
3354 key.type = corrupt->key.type;
3355 key.offset = corrupt->key.offset;
3358 * Here we don't want to do any tree balance, since it may
3359 * cause a balance with corrupted brother leaf/node,
3360 * so ins_len set to 0 here.
3361 * Balance will be done after all corrupt node/leaf is deleted.
3363 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3364 if (ret < 0)
3365 goto out;
3366 offset = btrfs_node_blockptr(path->nodes[level],
3367 path->slots[level]);
3369 /* Remove the ptr */
3370 ret = btrfs_del_ptr(trans, root, path, level,
3371 path->slots[level]);
3372 if (ret < 0)
3373 goto out;
3375 * Remove the corresponding extent
3376 * return value is not concerned.
3378 btrfs_release_path(path);
3379 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3380 0, root->root_key.objectid,
3381 level - 1, 0);
3382 cache = next_cache_extent(cache);
3385 /* Balance the btree using btrfs_search_slot() */
3386 cache = first_cache_extent(corrupt_blocks);
3387 while (cache) {
3388 corrupt = container_of(cache, struct btrfs_corrupt_block,
3389 cache);
3390 memcpy(&key, &corrupt->key, sizeof(key));
3391 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3392 if (ret < 0)
3393 goto out;
3394 /* return will always >0 since it won't find the item */
3395 ret = 0;
3396 btrfs_release_path(path);
3397 cache = next_cache_extent(cache);
3399 out:
3400 btrfs_commit_transaction(trans, root);
3401 out_free_path:
3402 btrfs_free_path(path);
3403 return ret;
3406 static int check_fs_root(struct btrfs_root *root,
3407 struct cache_tree *root_cache,
3408 struct walk_control *wc)
3410 int ret = 0;
3411 int err = 0;
3412 int wret;
3413 int level;
3414 struct btrfs_path path;
3415 struct shared_node root_node;
3416 struct root_record *rec;
3417 struct btrfs_root_item *root_item = &root->root_item;
3418 struct cache_tree corrupt_blocks;
3419 struct orphan_data_extent *orphan;
3420 struct orphan_data_extent *tmp;
3421 enum btrfs_tree_block_status status;
3424 * Reuse the corrupt_block cache tree to record corrupted tree block
3426 * Unlike the usage in extent tree check, here we do it in a per
3427 * fs/subvol tree base.
3429 cache_tree_init(&corrupt_blocks);
3430 root->fs_info->corrupt_blocks = &corrupt_blocks;
3432 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3433 rec = get_root_rec(root_cache, root->root_key.objectid);
3434 if (btrfs_root_refs(root_item) > 0)
3435 rec->found_root_item = 1;
3438 btrfs_init_path(&path);
3439 memset(&root_node, 0, sizeof(root_node));
3440 cache_tree_init(&root_node.root_cache);
3441 cache_tree_init(&root_node.inode_cache);
3443 /* Move the orphan extent record to corresponding inode_record */
3444 list_for_each_entry_safe(orphan, tmp,
3445 &root->orphan_data_extents, list) {
3446 struct inode_record *inode;
3448 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3450 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3451 list_move(&orphan->list, &inode->orphan_extents);
3454 level = btrfs_header_level(root->node);
3455 memset(wc->nodes, 0, sizeof(wc->nodes));
3456 wc->nodes[level] = &root_node;
3457 wc->active_node = level;
3458 wc->root_level = level;
3460 /* We may not have checked the root block, lets do that now */
3461 if (btrfs_is_leaf(root->node))
3462 status = btrfs_check_leaf(root, NULL, root->node);
3463 else
3464 status = btrfs_check_node(root, NULL, root->node);
3465 if (status != BTRFS_TREE_BLOCK_CLEAN)
3466 return -EIO;
3468 if (btrfs_root_refs(root_item) > 0 ||
3469 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3470 path.nodes[level] = root->node;
3471 extent_buffer_get(root->node);
3472 path.slots[level] = 0;
3473 } else {
3474 struct btrfs_key key;
3475 struct btrfs_disk_key found_key;
3477 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3478 level = root_item->drop_level;
3479 path.lowest_level = level;
3480 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3481 if (wret < 0)
3482 goto skip_walking;
3483 btrfs_node_key(path.nodes[level], &found_key,
3484 path.slots[level]);
3485 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3486 sizeof(found_key)));
3489 while (1) {
3490 wret = walk_down_tree(root, &path, wc, &level);
3491 if (wret < 0)
3492 ret = wret;
3493 if (wret != 0)
3494 break;
3496 wret = walk_up_tree(root, &path, wc, &level);
3497 if (wret < 0)
3498 ret = wret;
3499 if (wret != 0)
3500 break;
3502 skip_walking:
3503 btrfs_release_path(&path);
3505 if (!cache_tree_empty(&corrupt_blocks)) {
3506 struct cache_extent *cache;
3507 struct btrfs_corrupt_block *corrupt;
3509 printf("The following tree block(s) is corrupted in tree %llu:\n",
3510 root->root_key.objectid);
3511 cache = first_cache_extent(&corrupt_blocks);
3512 while (cache) {
3513 corrupt = container_of(cache,
3514 struct btrfs_corrupt_block,
3515 cache);
3516 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3517 cache->start, corrupt->level,
3518 corrupt->key.objectid, corrupt->key.type,
3519 corrupt->key.offset);
3520 cache = next_cache_extent(cache);
3522 if (repair) {
3523 printf("Try to repair the btree for root %llu\n",
3524 root->root_key.objectid);
3525 ret = repair_btree(root, &corrupt_blocks);
3526 if (ret < 0)
3527 fprintf(stderr, "Failed to repair btree: %s\n",
3528 strerror(-ret));
3529 if (!ret)
3530 printf("Btree for root %llu is fixed\n",
3531 root->root_key.objectid);
3535 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3536 if (err < 0)
3537 ret = err;
3539 if (root_node.current) {
3540 root_node.current->checked = 1;
3541 maybe_free_inode_rec(&root_node.inode_cache,
3542 root_node.current);
3545 err = check_inode_recs(root, &root_node.inode_cache);
3546 if (!ret)
3547 ret = err;
3549 free_corrupt_blocks_tree(&corrupt_blocks);
3550 root->fs_info->corrupt_blocks = NULL;
3551 free_orphan_data_extents(&root->orphan_data_extents);
3552 return ret;
3555 static int fs_root_objectid(u64 objectid)
3557 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3558 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3559 return 1;
3560 return is_fstree(objectid);
3563 static int check_fs_roots(struct btrfs_root *root,
3564 struct cache_tree *root_cache)
3566 struct btrfs_path path;
3567 struct btrfs_key key;
3568 struct walk_control wc;
3569 struct extent_buffer *leaf, *tree_node;
3570 struct btrfs_root *tmp_root;
3571 struct btrfs_root *tree_root = root->fs_info->tree_root;
3572 int ret;
3573 int err = 0;
3575 if (ctx.progress_enabled) {
3576 ctx.tp = TASK_FS_ROOTS;
3577 task_start(ctx.info);
3581 * Just in case we made any changes to the extent tree that weren't
3582 * reflected into the free space cache yet.
3584 if (repair)
3585 reset_cached_block_groups(root->fs_info);
3586 memset(&wc, 0, sizeof(wc));
3587 cache_tree_init(&wc.shared);
3588 btrfs_init_path(&path);
3590 again:
3591 key.offset = 0;
3592 key.objectid = 0;
3593 key.type = BTRFS_ROOT_ITEM_KEY;
3594 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3595 if (ret < 0) {
3596 err = 1;
3597 goto out;
3599 tree_node = tree_root->node;
3600 while (1) {
3601 if (tree_node != tree_root->node) {
3602 free_root_recs_tree(root_cache);
3603 btrfs_release_path(&path);
3604 goto again;
3606 leaf = path.nodes[0];
3607 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3608 ret = btrfs_next_leaf(tree_root, &path);
3609 if (ret) {
3610 if (ret < 0)
3611 err = 1;
3612 break;
3614 leaf = path.nodes[0];
3616 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3617 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3618 fs_root_objectid(key.objectid)) {
3619 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3620 tmp_root = btrfs_read_fs_root_no_cache(
3621 root->fs_info, &key);
3622 } else {
3623 key.offset = (u64)-1;
3624 tmp_root = btrfs_read_fs_root(
3625 root->fs_info, &key);
3627 if (IS_ERR(tmp_root)) {
3628 err = 1;
3629 goto next;
3631 ret = check_fs_root(tmp_root, root_cache, &wc);
3632 if (ret == -EAGAIN) {
3633 free_root_recs_tree(root_cache);
3634 btrfs_release_path(&path);
3635 goto again;
3637 if (ret)
3638 err = 1;
3639 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3640 btrfs_free_fs_root(tmp_root);
3641 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3642 key.type == BTRFS_ROOT_BACKREF_KEY) {
3643 process_root_ref(leaf, path.slots[0], &key,
3644 root_cache);
3646 next:
3647 path.slots[0]++;
3649 out:
3650 btrfs_release_path(&path);
3651 if (err)
3652 free_extent_cache_tree(&wc.shared);
3653 if (!cache_tree_empty(&wc.shared))
3654 fprintf(stderr, "warning line %d\n", __LINE__);
3656 task_stop(ctx.info);
3658 return err;
3661 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3663 struct list_head *cur = rec->backrefs.next;
3664 struct extent_backref *back;
3665 struct tree_backref *tback;
3666 struct data_backref *dback;
3667 u64 found = 0;
3668 int err = 0;
3670 while(cur != &rec->backrefs) {
3671 back = list_entry(cur, struct extent_backref, list);
3672 cur = cur->next;
3673 if (!back->found_extent_tree) {
3674 err = 1;
3675 if (!print_errs)
3676 goto out;
3677 if (back->is_data) {
3678 dback = (struct data_backref *)back;
3679 fprintf(stderr, "Backref %llu %s %llu"
3680 " owner %llu offset %llu num_refs %lu"
3681 " not found in extent tree\n",
3682 (unsigned long long)rec->start,
3683 back->full_backref ?
3684 "parent" : "root",
3685 back->full_backref ?
3686 (unsigned long long)dback->parent:
3687 (unsigned long long)dback->root,
3688 (unsigned long long)dback->owner,
3689 (unsigned long long)dback->offset,
3690 (unsigned long)dback->num_refs);
3691 } else {
3692 tback = (struct tree_backref *)back;
3693 fprintf(stderr, "Backref %llu parent %llu"
3694 " root %llu not found in extent tree\n",
3695 (unsigned long long)rec->start,
3696 (unsigned long long)tback->parent,
3697 (unsigned long long)tback->root);
3700 if (!back->is_data && !back->found_ref) {
3701 err = 1;
3702 if (!print_errs)
3703 goto out;
3704 tback = (struct tree_backref *)back;
3705 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3706 (unsigned long long)rec->start,
3707 back->full_backref ? "parent" : "root",
3708 back->full_backref ?
3709 (unsigned long long)tback->parent :
3710 (unsigned long long)tback->root, back);
3712 if (back->is_data) {
3713 dback = (struct data_backref *)back;
3714 if (dback->found_ref != dback->num_refs) {
3715 err = 1;
3716 if (!print_errs)
3717 goto out;
3718 fprintf(stderr, "Incorrect local backref count"
3719 " on %llu %s %llu owner %llu"
3720 " offset %llu found %u wanted %u back %p\n",
3721 (unsigned long long)rec->start,
3722 back->full_backref ?
3723 "parent" : "root",
3724 back->full_backref ?
3725 (unsigned long long)dback->parent:
3726 (unsigned long long)dback->root,
3727 (unsigned long long)dback->owner,
3728 (unsigned long long)dback->offset,
3729 dback->found_ref, dback->num_refs, back);
3731 if (dback->disk_bytenr != rec->start) {
3732 err = 1;
3733 if (!print_errs)
3734 goto out;
3735 fprintf(stderr, "Backref disk bytenr does not"
3736 " match extent record, bytenr=%llu, "
3737 "ref bytenr=%llu\n",
3738 (unsigned long long)rec->start,
3739 (unsigned long long)dback->disk_bytenr);
3742 if (dback->bytes != rec->nr) {
3743 err = 1;
3744 if (!print_errs)
3745 goto out;
3746 fprintf(stderr, "Backref bytes do not match "
3747 "extent backref, bytenr=%llu, ref "
3748 "bytes=%llu, backref bytes=%llu\n",
3749 (unsigned long long)rec->start,
3750 (unsigned long long)rec->nr,
3751 (unsigned long long)dback->bytes);
3754 if (!back->is_data) {
3755 found += 1;
3756 } else {
3757 dback = (struct data_backref *)back;
3758 found += dback->found_ref;
3761 if (found != rec->refs) {
3762 err = 1;
3763 if (!print_errs)
3764 goto out;
3765 fprintf(stderr, "Incorrect global backref count "
3766 "on %llu found %llu wanted %llu\n",
3767 (unsigned long long)rec->start,
3768 (unsigned long long)found,
3769 (unsigned long long)rec->refs);
3771 out:
3772 return err;
3775 static int free_all_extent_backrefs(struct extent_record *rec)
3777 struct extent_backref *back;
3778 struct list_head *cur;
3779 while (!list_empty(&rec->backrefs)) {
3780 cur = rec->backrefs.next;
3781 back = list_entry(cur, struct extent_backref, list);
3782 list_del(cur);
3783 free(back);
3785 return 0;
3788 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3789 struct cache_tree *extent_cache)
3791 struct cache_extent *cache;
3792 struct extent_record *rec;
3794 while (1) {
3795 cache = first_cache_extent(extent_cache);
3796 if (!cache)
3797 break;
3798 rec = container_of(cache, struct extent_record, cache);
3799 remove_cache_extent(extent_cache, cache);
3800 free_all_extent_backrefs(rec);
3801 free(rec);
3805 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3806 struct extent_record *rec)
3808 if (rec->content_checked && rec->owner_ref_checked &&
3809 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3810 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3811 !rec->bad_full_backref && !rec->crossing_stripes &&
3812 !rec->wrong_chunk_type) {
3813 remove_cache_extent(extent_cache, &rec->cache);
3814 free_all_extent_backrefs(rec);
3815 list_del_init(&rec->list);
3816 free(rec);
3818 return 0;
3821 static int check_owner_ref(struct btrfs_root *root,
3822 struct extent_record *rec,
3823 struct extent_buffer *buf)
3825 struct extent_backref *node;
3826 struct tree_backref *back;
3827 struct btrfs_root *ref_root;
3828 struct btrfs_key key;
3829 struct btrfs_path path;
3830 struct extent_buffer *parent;
3831 int level;
3832 int found = 0;
3833 int ret;
3835 list_for_each_entry(node, &rec->backrefs, list) {
3836 if (node->is_data)
3837 continue;
3838 if (!node->found_ref)
3839 continue;
3840 if (node->full_backref)
3841 continue;
3842 back = (struct tree_backref *)node;
3843 if (btrfs_header_owner(buf) == back->root)
3844 return 0;
3846 BUG_ON(rec->is_root);
3848 /* try to find the block by search corresponding fs tree */
3849 key.objectid = btrfs_header_owner(buf);
3850 key.type = BTRFS_ROOT_ITEM_KEY;
3851 key.offset = (u64)-1;
3853 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3854 if (IS_ERR(ref_root))
3855 return 1;
3857 level = btrfs_header_level(buf);
3858 if (level == 0)
3859 btrfs_item_key_to_cpu(buf, &key, 0);
3860 else
3861 btrfs_node_key_to_cpu(buf, &key, 0);
3863 btrfs_init_path(&path);
3864 path.lowest_level = level + 1;
3865 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3866 if (ret < 0)
3867 return 0;
3869 parent = path.nodes[level + 1];
3870 if (parent && buf->start == btrfs_node_blockptr(parent,
3871 path.slots[level + 1]))
3872 found = 1;
3874 btrfs_release_path(&path);
3875 return found ? 0 : 1;
3878 static int is_extent_tree_record(struct extent_record *rec)
3880 struct list_head *cur = rec->backrefs.next;
3881 struct extent_backref *node;
3882 struct tree_backref *back;
3883 int is_extent = 0;
3885 while(cur != &rec->backrefs) {
3886 node = list_entry(cur, struct extent_backref, list);
3887 cur = cur->next;
3888 if (node->is_data)
3889 return 0;
3890 back = (struct tree_backref *)node;
3891 if (node->full_backref)
3892 return 0;
3893 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3894 is_extent = 1;
3896 return is_extent;
3900 static int record_bad_block_io(struct btrfs_fs_info *info,
3901 struct cache_tree *extent_cache,
3902 u64 start, u64 len)
3904 struct extent_record *rec;
3905 struct cache_extent *cache;
3906 struct btrfs_key key;
3908 cache = lookup_cache_extent(extent_cache, start, len);
3909 if (!cache)
3910 return 0;
3912 rec = container_of(cache, struct extent_record, cache);
3913 if (!is_extent_tree_record(rec))
3914 return 0;
3916 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3917 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3920 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3921 struct extent_buffer *buf, int slot)
3923 if (btrfs_header_level(buf)) {
3924 struct btrfs_key_ptr ptr1, ptr2;
3926 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3927 sizeof(struct btrfs_key_ptr));
3928 read_extent_buffer(buf, &ptr2,
3929 btrfs_node_key_ptr_offset(slot + 1),
3930 sizeof(struct btrfs_key_ptr));
3931 write_extent_buffer(buf, &ptr1,
3932 btrfs_node_key_ptr_offset(slot + 1),
3933 sizeof(struct btrfs_key_ptr));
3934 write_extent_buffer(buf, &ptr2,
3935 btrfs_node_key_ptr_offset(slot),
3936 sizeof(struct btrfs_key_ptr));
3937 if (slot == 0) {
3938 struct btrfs_disk_key key;
3939 btrfs_node_key(buf, &key, 0);
3940 btrfs_fixup_low_keys(root, path, &key,
3941 btrfs_header_level(buf) + 1);
3943 } else {
3944 struct btrfs_item *item1, *item2;
3945 struct btrfs_key k1, k2;
3946 char *item1_data, *item2_data;
3947 u32 item1_offset, item2_offset, item1_size, item2_size;
3949 item1 = btrfs_item_nr(slot);
3950 item2 = btrfs_item_nr(slot + 1);
3951 btrfs_item_key_to_cpu(buf, &k1, slot);
3952 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3953 item1_offset = btrfs_item_offset(buf, item1);
3954 item2_offset = btrfs_item_offset(buf, item2);
3955 item1_size = btrfs_item_size(buf, item1);
3956 item2_size = btrfs_item_size(buf, item2);
3958 item1_data = malloc(item1_size);
3959 if (!item1_data)
3960 return -ENOMEM;
3961 item2_data = malloc(item2_size);
3962 if (!item2_data) {
3963 free(item1_data);
3964 return -ENOMEM;
3967 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3968 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3970 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3971 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3972 free(item1_data);
3973 free(item2_data);
3975 btrfs_set_item_offset(buf, item1, item2_offset);
3976 btrfs_set_item_offset(buf, item2, item1_offset);
3977 btrfs_set_item_size(buf, item1, item2_size);
3978 btrfs_set_item_size(buf, item2, item1_size);
3980 path->slots[0] = slot;
3981 btrfs_set_item_key_unsafe(root, path, &k2);
3982 path->slots[0] = slot + 1;
3983 btrfs_set_item_key_unsafe(root, path, &k1);
3985 return 0;
3988 static int fix_key_order(struct btrfs_trans_handle *trans,
3989 struct btrfs_root *root,
3990 struct btrfs_path *path)
3992 struct extent_buffer *buf;
3993 struct btrfs_key k1, k2;
3994 int i;
3995 int level = path->lowest_level;
3996 int ret = -EIO;
3998 buf = path->nodes[level];
3999 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4000 if (level) {
4001 btrfs_node_key_to_cpu(buf, &k1, i);
4002 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4003 } else {
4004 btrfs_item_key_to_cpu(buf, &k1, i);
4005 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4007 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4008 continue;
4009 ret = swap_values(root, path, buf, i);
4010 if (ret)
4011 break;
4012 btrfs_mark_buffer_dirty(buf);
4013 i = 0;
4015 return ret;
4018 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4019 struct btrfs_root *root,
4020 struct btrfs_path *path,
4021 struct extent_buffer *buf, int slot)
4023 struct btrfs_key key;
4024 int nritems = btrfs_header_nritems(buf);
4026 btrfs_item_key_to_cpu(buf, &key, slot);
4028 /* These are all the keys we can deal with missing. */
4029 if (key.type != BTRFS_DIR_INDEX_KEY &&
4030 key.type != BTRFS_EXTENT_ITEM_KEY &&
4031 key.type != BTRFS_METADATA_ITEM_KEY &&
4032 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4033 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4034 return -1;
4036 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4037 (unsigned long long)key.objectid, key.type,
4038 (unsigned long long)key.offset, slot, buf->start);
4039 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4040 btrfs_item_nr_offset(slot + 1),
4041 sizeof(struct btrfs_item) *
4042 (nritems - slot - 1));
4043 btrfs_set_header_nritems(buf, nritems - 1);
4044 if (slot == 0) {
4045 struct btrfs_disk_key disk_key;
4047 btrfs_item_key(buf, &disk_key, 0);
4048 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4050 btrfs_mark_buffer_dirty(buf);
4051 return 0;
4054 static int fix_item_offset(struct btrfs_trans_handle *trans,
4055 struct btrfs_root *root,
4056 struct btrfs_path *path)
4058 struct extent_buffer *buf;
4059 int i;
4060 int ret = 0;
4062 /* We should only get this for leaves */
4063 BUG_ON(path->lowest_level);
4064 buf = path->nodes[0];
4065 again:
4066 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4067 unsigned int shift = 0, offset;
4069 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4070 BTRFS_LEAF_DATA_SIZE(root)) {
4071 if (btrfs_item_end_nr(buf, i) >
4072 BTRFS_LEAF_DATA_SIZE(root)) {
4073 ret = delete_bogus_item(trans, root, path,
4074 buf, i);
4075 if (!ret)
4076 goto again;
4077 fprintf(stderr, "item is off the end of the "
4078 "leaf, can't fix\n");
4079 ret = -EIO;
4080 break;
4082 shift = BTRFS_LEAF_DATA_SIZE(root) -
4083 btrfs_item_end_nr(buf, i);
4084 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4085 btrfs_item_offset_nr(buf, i - 1)) {
4086 if (btrfs_item_end_nr(buf, i) >
4087 btrfs_item_offset_nr(buf, i - 1)) {
4088 ret = delete_bogus_item(trans, root, path,
4089 buf, i);
4090 if (!ret)
4091 goto again;
4092 fprintf(stderr, "items overlap, can't fix\n");
4093 ret = -EIO;
4094 break;
4096 shift = btrfs_item_offset_nr(buf, i - 1) -
4097 btrfs_item_end_nr(buf, i);
4099 if (!shift)
4100 continue;
4102 printf("Shifting item nr %d by %u bytes in block %llu\n",
4103 i, shift, (unsigned long long)buf->start);
4104 offset = btrfs_item_offset_nr(buf, i);
4105 memmove_extent_buffer(buf,
4106 btrfs_leaf_data(buf) + offset + shift,
4107 btrfs_leaf_data(buf) + offset,
4108 btrfs_item_size_nr(buf, i));
4109 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4110 offset + shift);
4111 btrfs_mark_buffer_dirty(buf);
4115 * We may have moved things, in which case we want to exit so we don't
4116 * write those changes out. Once we have proper abort functionality in
4117 * progs this can be changed to something nicer.
4119 BUG_ON(ret);
4120 return ret;
4124 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4125 * then just return -EIO.
4127 static int try_to_fix_bad_block(struct btrfs_root *root,
4128 struct extent_buffer *buf,
4129 enum btrfs_tree_block_status status)
4131 struct btrfs_trans_handle *trans;
4132 struct ulist *roots;
4133 struct ulist_node *node;
4134 struct btrfs_root *search_root;
4135 struct btrfs_path *path;
4136 struct ulist_iterator iter;
4137 struct btrfs_key root_key, key;
4138 int ret;
4140 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4141 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4142 return -EIO;
4144 path = btrfs_alloc_path();
4145 if (!path)
4146 return -EIO;
4148 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4149 0, &roots);
4150 if (ret) {
4151 btrfs_free_path(path);
4152 return -EIO;
4155 ULIST_ITER_INIT(&iter);
4156 while ((node = ulist_next(roots, &iter))) {
4157 root_key.objectid = node->val;
4158 root_key.type = BTRFS_ROOT_ITEM_KEY;
4159 root_key.offset = (u64)-1;
4161 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4162 if (IS_ERR(root)) {
4163 ret = -EIO;
4164 break;
4168 trans = btrfs_start_transaction(search_root, 0);
4169 if (IS_ERR(trans)) {
4170 ret = PTR_ERR(trans);
4171 break;
4174 path->lowest_level = btrfs_header_level(buf);
4175 path->skip_check_block = 1;
4176 if (path->lowest_level)
4177 btrfs_node_key_to_cpu(buf, &key, 0);
4178 else
4179 btrfs_item_key_to_cpu(buf, &key, 0);
4180 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4181 if (ret) {
4182 ret = -EIO;
4183 btrfs_commit_transaction(trans, search_root);
4184 break;
4186 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4187 ret = fix_key_order(trans, search_root, path);
4188 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4189 ret = fix_item_offset(trans, search_root, path);
4190 if (ret) {
4191 btrfs_commit_transaction(trans, search_root);
4192 break;
4194 btrfs_release_path(path);
4195 btrfs_commit_transaction(trans, search_root);
4197 ulist_free(roots);
4198 btrfs_free_path(path);
4199 return ret;
4202 static int check_block(struct btrfs_root *root,
4203 struct cache_tree *extent_cache,
4204 struct extent_buffer *buf, u64 flags)
4206 struct extent_record *rec;
4207 struct cache_extent *cache;
4208 struct btrfs_key key;
4209 enum btrfs_tree_block_status status;
4210 int ret = 0;
4211 int level;
4213 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4214 if (!cache)
4215 return 1;
4216 rec = container_of(cache, struct extent_record, cache);
4217 rec->generation = btrfs_header_generation(buf);
4219 level = btrfs_header_level(buf);
4220 if (btrfs_header_nritems(buf) > 0) {
4222 if (level == 0)
4223 btrfs_item_key_to_cpu(buf, &key, 0);
4224 else
4225 btrfs_node_key_to_cpu(buf, &key, 0);
4227 rec->info_objectid = key.objectid;
4229 rec->info_level = level;
4231 if (btrfs_is_leaf(buf))
4232 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4233 else
4234 status = btrfs_check_node(root, &rec->parent_key, buf);
4236 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4237 if (repair)
4238 status = try_to_fix_bad_block(root, buf, status);
4239 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4240 ret = -EIO;
4241 fprintf(stderr, "bad block %llu\n",
4242 (unsigned long long)buf->start);
4243 } else {
4245 * Signal to callers we need to start the scan over
4246 * again since we'll have cow'ed blocks.
4248 ret = -EAGAIN;
4250 } else {
4251 rec->content_checked = 1;
4252 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4253 rec->owner_ref_checked = 1;
4254 else {
4255 ret = check_owner_ref(root, rec, buf);
4256 if (!ret)
4257 rec->owner_ref_checked = 1;
4260 if (!ret)
4261 maybe_free_extent_rec(extent_cache, rec);
4262 return ret;
4265 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4266 u64 parent, u64 root)
4268 struct list_head *cur = rec->backrefs.next;
4269 struct extent_backref *node;
4270 struct tree_backref *back;
4272 while(cur != &rec->backrefs) {
4273 node = list_entry(cur, struct extent_backref, list);
4274 cur = cur->next;
4275 if (node->is_data)
4276 continue;
4277 back = (struct tree_backref *)node;
4278 if (parent > 0) {
4279 if (!node->full_backref)
4280 continue;
4281 if (parent == back->parent)
4282 return back;
4283 } else {
4284 if (node->full_backref)
4285 continue;
4286 if (back->root == root)
4287 return back;
4290 return NULL;
4293 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4294 u64 parent, u64 root)
4296 struct tree_backref *ref = malloc(sizeof(*ref));
4297 memset(&ref->node, 0, sizeof(ref->node));
4298 if (parent > 0) {
4299 ref->parent = parent;
4300 ref->node.full_backref = 1;
4301 } else {
4302 ref->root = root;
4303 ref->node.full_backref = 0;
4305 list_add_tail(&ref->node.list, &rec->backrefs);
4307 return ref;
4310 static struct data_backref *find_data_backref(struct extent_record *rec,
4311 u64 parent, u64 root,
4312 u64 owner, u64 offset,
4313 int found_ref,
4314 u64 disk_bytenr, u64 bytes)
4316 struct list_head *cur = rec->backrefs.next;
4317 struct extent_backref *node;
4318 struct data_backref *back;
4320 while(cur != &rec->backrefs) {
4321 node = list_entry(cur, struct extent_backref, list);
4322 cur = cur->next;
4323 if (!node->is_data)
4324 continue;
4325 back = (struct data_backref *)node;
4326 if (parent > 0) {
4327 if (!node->full_backref)
4328 continue;
4329 if (parent == back->parent)
4330 return back;
4331 } else {
4332 if (node->full_backref)
4333 continue;
4334 if (back->root == root && back->owner == owner &&
4335 back->offset == offset) {
4336 if (found_ref && node->found_ref &&
4337 (back->bytes != bytes ||
4338 back->disk_bytenr != disk_bytenr))
4339 continue;
4340 return back;
4344 return NULL;
4347 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4348 u64 parent, u64 root,
4349 u64 owner, u64 offset,
4350 u64 max_size)
4352 struct data_backref *ref = malloc(sizeof(*ref));
4353 memset(&ref->node, 0, sizeof(ref->node));
4354 ref->node.is_data = 1;
4356 if (parent > 0) {
4357 ref->parent = parent;
4358 ref->owner = 0;
4359 ref->offset = 0;
4360 ref->node.full_backref = 1;
4361 } else {
4362 ref->root = root;
4363 ref->owner = owner;
4364 ref->offset = offset;
4365 ref->node.full_backref = 0;
4367 ref->bytes = max_size;
4368 ref->found_ref = 0;
4369 ref->num_refs = 0;
4370 list_add_tail(&ref->node.list, &rec->backrefs);
4371 if (max_size > rec->max_size)
4372 rec->max_size = max_size;
4373 return ref;
4376 /* Check if the type of extent matches with its chunk */
4377 static void check_extent_type(struct extent_record *rec)
4379 struct btrfs_block_group_cache *bg_cache;
4381 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4382 if (!bg_cache)
4383 return;
4385 /* data extent, check chunk directly*/
4386 if (!rec->metadata) {
4387 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4388 rec->wrong_chunk_type = 1;
4389 return;
4392 /* metadata extent, check the obvious case first */
4393 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4394 BTRFS_BLOCK_GROUP_METADATA))) {
4395 rec->wrong_chunk_type = 1;
4396 return;
4400 * Check SYSTEM extent, as it's also marked as metadata, we can only
4401 * make sure it's a SYSTEM extent by its backref
4403 if (!list_empty(&rec->backrefs)) {
4404 struct extent_backref *node;
4405 struct tree_backref *tback;
4406 u64 bg_type;
4408 node = list_entry(rec->backrefs.next, struct extent_backref,
4409 list);
4410 if (node->is_data) {
4411 /* tree block shouldn't have data backref */
4412 rec->wrong_chunk_type = 1;
4413 return;
4415 tback = container_of(node, struct tree_backref, node);
4417 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4418 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4419 else
4420 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4421 if (!(bg_cache->flags & bg_type))
4422 rec->wrong_chunk_type = 1;
4426 static int add_extent_rec(struct cache_tree *extent_cache,
4427 struct btrfs_key *parent_key, u64 parent_gen,
4428 u64 start, u64 nr, u64 extent_item_refs,
4429 int is_root, int inc_ref, int set_checked,
4430 int metadata, int extent_rec, u64 max_size)
4432 struct extent_record *rec;
4433 struct cache_extent *cache;
4434 int ret = 0;
4435 int dup = 0;
4437 cache = lookup_cache_extent(extent_cache, start, nr);
4438 if (cache) {
4439 rec = container_of(cache, struct extent_record, cache);
4440 if (inc_ref)
4441 rec->refs++;
4442 if (rec->nr == 1)
4443 rec->nr = max(nr, max_size);
4446 * We need to make sure to reset nr to whatever the extent
4447 * record says was the real size, this way we can compare it to
4448 * the backrefs.
4450 if (extent_rec) {
4451 if (start != rec->start || rec->found_rec) {
4452 struct extent_record *tmp;
4454 dup = 1;
4455 if (list_empty(&rec->list))
4456 list_add_tail(&rec->list,
4457 &duplicate_extents);
4460 * We have to do this song and dance in case we
4461 * find an extent record that falls inside of
4462 * our current extent record but does not have
4463 * the same objectid.
4465 tmp = malloc(sizeof(*tmp));
4466 if (!tmp)
4467 return -ENOMEM;
4468 tmp->start = start;
4469 tmp->max_size = max_size;
4470 tmp->nr = nr;
4471 tmp->found_rec = 1;
4472 tmp->metadata = metadata;
4473 tmp->extent_item_refs = extent_item_refs;
4474 INIT_LIST_HEAD(&tmp->list);
4475 list_add_tail(&tmp->list, &rec->dups);
4476 rec->num_duplicates++;
4477 } else {
4478 rec->nr = nr;
4479 rec->found_rec = 1;
4483 if (extent_item_refs && !dup) {
4484 if (rec->extent_item_refs) {
4485 fprintf(stderr, "block %llu rec "
4486 "extent_item_refs %llu, passed %llu\n",
4487 (unsigned long long)start,
4488 (unsigned long long)
4489 rec->extent_item_refs,
4490 (unsigned long long)extent_item_refs);
4492 rec->extent_item_refs = extent_item_refs;
4494 if (is_root)
4495 rec->is_root = 1;
4496 if (set_checked) {
4497 rec->content_checked = 1;
4498 rec->owner_ref_checked = 1;
4501 if (parent_key)
4502 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4503 if (parent_gen)
4504 rec->parent_generation = parent_gen;
4506 if (rec->max_size < max_size)
4507 rec->max_size = max_size;
4510 * A metadata extent can't cross stripe_len boundary, otherwise
4511 * kernel scrub won't be able to handle it.
4512 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4513 * it.
4515 if (metadata && check_crossing_stripes(rec->start,
4516 rec->max_size))
4517 rec->crossing_stripes = 1;
4518 check_extent_type(rec);
4519 maybe_free_extent_rec(extent_cache, rec);
4520 return ret;
4522 rec = malloc(sizeof(*rec));
4523 rec->start = start;
4524 rec->max_size = max_size;
4525 rec->nr = max(nr, max_size);
4526 rec->found_rec = !!extent_rec;
4527 rec->content_checked = 0;
4528 rec->owner_ref_checked = 0;
4529 rec->num_duplicates = 0;
4530 rec->metadata = metadata;
4531 rec->flag_block_full_backref = -1;
4532 rec->bad_full_backref = 0;
4533 rec->crossing_stripes = 0;
4534 rec->wrong_chunk_type = 0;
4535 INIT_LIST_HEAD(&rec->backrefs);
4536 INIT_LIST_HEAD(&rec->dups);
4537 INIT_LIST_HEAD(&rec->list);
4539 if (is_root)
4540 rec->is_root = 1;
4541 else
4542 rec->is_root = 0;
4544 if (inc_ref)
4545 rec->refs = 1;
4546 else
4547 rec->refs = 0;
4549 if (extent_item_refs)
4550 rec->extent_item_refs = extent_item_refs;
4551 else
4552 rec->extent_item_refs = 0;
4554 if (parent_key)
4555 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4556 else
4557 memset(&rec->parent_key, 0, sizeof(*parent_key));
4559 if (parent_gen)
4560 rec->parent_generation = parent_gen;
4561 else
4562 rec->parent_generation = 0;
4564 rec->cache.start = start;
4565 rec->cache.size = nr;
4566 ret = insert_cache_extent(extent_cache, &rec->cache);
4567 BUG_ON(ret);
4568 bytes_used += nr;
4569 if (set_checked) {
4570 rec->content_checked = 1;
4571 rec->owner_ref_checked = 1;
4574 if (metadata)
4575 if (check_crossing_stripes(rec->start, rec->max_size))
4576 rec->crossing_stripes = 1;
4577 check_extent_type(rec);
4578 return ret;
4581 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4582 u64 parent, u64 root, int found_ref)
4584 struct extent_record *rec;
4585 struct tree_backref *back;
4586 struct cache_extent *cache;
4588 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4589 if (!cache) {
4590 add_extent_rec(extent_cache, NULL, 0, bytenr,
4591 1, 0, 0, 0, 0, 1, 0, 0);
4592 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4593 if (!cache)
4594 abort();
4597 rec = container_of(cache, struct extent_record, cache);
4598 if (rec->start != bytenr) {
4599 abort();
4602 back = find_tree_backref(rec, parent, root);
4603 if (!back)
4604 back = alloc_tree_backref(rec, parent, root);
4606 if (found_ref) {
4607 if (back->node.found_ref) {
4608 fprintf(stderr, "Extent back ref already exists "
4609 "for %llu parent %llu root %llu \n",
4610 (unsigned long long)bytenr,
4611 (unsigned long long)parent,
4612 (unsigned long long)root);
4614 back->node.found_ref = 1;
4615 } else {
4616 if (back->node.found_extent_tree) {
4617 fprintf(stderr, "Extent back ref already exists "
4618 "for %llu parent %llu root %llu \n",
4619 (unsigned long long)bytenr,
4620 (unsigned long long)parent,
4621 (unsigned long long)root);
4623 back->node.found_extent_tree = 1;
4625 check_extent_type(rec);
4626 maybe_free_extent_rec(extent_cache, rec);
4627 return 0;
4630 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4631 u64 parent, u64 root, u64 owner, u64 offset,
4632 u32 num_refs, int found_ref, u64 max_size)
4634 struct extent_record *rec;
4635 struct data_backref *back;
4636 struct cache_extent *cache;
4638 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4639 if (!cache) {
4640 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4641 0, 0, max_size);
4642 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4643 if (!cache)
4644 abort();
4647 rec = container_of(cache, struct extent_record, cache);
4648 if (rec->max_size < max_size)
4649 rec->max_size = max_size;
4652 * If found_ref is set then max_size is the real size and must match the
4653 * existing refs. So if we have already found a ref then we need to
4654 * make sure that this ref matches the existing one, otherwise we need
4655 * to add a new backref so we can notice that the backrefs don't match
4656 * and we need to figure out who is telling the truth. This is to
4657 * account for that awful fsync bug I introduced where we'd end up with
4658 * a btrfs_file_extent_item that would have its length include multiple
4659 * prealloc extents or point inside of a prealloc extent.
4661 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4662 bytenr, max_size);
4663 if (!back)
4664 back = alloc_data_backref(rec, parent, root, owner, offset,
4665 max_size);
4667 if (found_ref) {
4668 BUG_ON(num_refs != 1);
4669 if (back->node.found_ref)
4670 BUG_ON(back->bytes != max_size);
4671 back->node.found_ref = 1;
4672 back->found_ref += 1;
4673 back->bytes = max_size;
4674 back->disk_bytenr = bytenr;
4675 rec->refs += 1;
4676 rec->content_checked = 1;
4677 rec->owner_ref_checked = 1;
4678 } else {
4679 if (back->node.found_extent_tree) {
4680 fprintf(stderr, "Extent back ref already exists "
4681 "for %llu parent %llu root %llu "
4682 "owner %llu offset %llu num_refs %lu\n",
4683 (unsigned long long)bytenr,
4684 (unsigned long long)parent,
4685 (unsigned long long)root,
4686 (unsigned long long)owner,
4687 (unsigned long long)offset,
4688 (unsigned long)num_refs);
4690 back->num_refs = num_refs;
4691 back->node.found_extent_tree = 1;
4693 maybe_free_extent_rec(extent_cache, rec);
4694 return 0;
4697 static int add_pending(struct cache_tree *pending,
4698 struct cache_tree *seen, u64 bytenr, u32 size)
4700 int ret;
4701 ret = add_cache_extent(seen, bytenr, size);
4702 if (ret)
4703 return ret;
4704 add_cache_extent(pending, bytenr, size);
4705 return 0;
4708 static int pick_next_pending(struct cache_tree *pending,
4709 struct cache_tree *reada,
4710 struct cache_tree *nodes,
4711 u64 last, struct block_info *bits, int bits_nr,
4712 int *reada_bits)
4714 unsigned long node_start = last;
4715 struct cache_extent *cache;
4716 int ret;
4718 cache = search_cache_extent(reada, 0);
4719 if (cache) {
4720 bits[0].start = cache->start;
4721 bits[0].size = cache->size;
4722 *reada_bits = 1;
4723 return 1;
4725 *reada_bits = 0;
4726 if (node_start > 32768)
4727 node_start -= 32768;
4729 cache = search_cache_extent(nodes, node_start);
4730 if (!cache)
4731 cache = search_cache_extent(nodes, 0);
4733 if (!cache) {
4734 cache = search_cache_extent(pending, 0);
4735 if (!cache)
4736 return 0;
4737 ret = 0;
4738 do {
4739 bits[ret].start = cache->start;
4740 bits[ret].size = cache->size;
4741 cache = next_cache_extent(cache);
4742 ret++;
4743 } while (cache && ret < bits_nr);
4744 return ret;
4747 ret = 0;
4748 do {
4749 bits[ret].start = cache->start;
4750 bits[ret].size = cache->size;
4751 cache = next_cache_extent(cache);
4752 ret++;
4753 } while (cache && ret < bits_nr);
4755 if (bits_nr - ret > 8) {
4756 u64 lookup = bits[0].start + bits[0].size;
4757 struct cache_extent *next;
4758 next = search_cache_extent(pending, lookup);
4759 while(next) {
4760 if (next->start - lookup > 32768)
4761 break;
4762 bits[ret].start = next->start;
4763 bits[ret].size = next->size;
4764 lookup = next->start + next->size;
4765 ret++;
4766 if (ret == bits_nr)
4767 break;
4768 next = next_cache_extent(next);
4769 if (!next)
4770 break;
4773 return ret;
4776 static void free_chunk_record(struct cache_extent *cache)
4778 struct chunk_record *rec;
4780 rec = container_of(cache, struct chunk_record, cache);
4781 list_del_init(&rec->list);
4782 list_del_init(&rec->dextents);
4783 free(rec);
4786 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4788 cache_tree_free_extents(chunk_cache, free_chunk_record);
4791 static void free_device_record(struct rb_node *node)
4793 struct device_record *rec;
4795 rec = container_of(node, struct device_record, node);
4796 free(rec);
4799 FREE_RB_BASED_TREE(device_cache, free_device_record);
4801 int insert_block_group_record(struct block_group_tree *tree,
4802 struct block_group_record *bg_rec)
4804 int ret;
4806 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4807 if (ret)
4808 return ret;
4810 list_add_tail(&bg_rec->list, &tree->block_groups);
4811 return 0;
4814 static void free_block_group_record(struct cache_extent *cache)
4816 struct block_group_record *rec;
4818 rec = container_of(cache, struct block_group_record, cache);
4819 list_del_init(&rec->list);
4820 free(rec);
4823 void free_block_group_tree(struct block_group_tree *tree)
4825 cache_tree_free_extents(&tree->tree, free_block_group_record);
4828 int insert_device_extent_record(struct device_extent_tree *tree,
4829 struct device_extent_record *de_rec)
4831 int ret;
4834 * Device extent is a bit different from the other extents, because
4835 * the extents which belong to the different devices may have the
4836 * same start and size, so we need use the special extent cache
4837 * search/insert functions.
4839 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4840 if (ret)
4841 return ret;
4843 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4844 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4845 return 0;
4848 static void free_device_extent_record(struct cache_extent *cache)
4850 struct device_extent_record *rec;
4852 rec = container_of(cache, struct device_extent_record, cache);
4853 if (!list_empty(&rec->chunk_list))
4854 list_del_init(&rec->chunk_list);
4855 if (!list_empty(&rec->device_list))
4856 list_del_init(&rec->device_list);
4857 free(rec);
4860 void free_device_extent_tree(struct device_extent_tree *tree)
4862 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4865 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4866 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4867 struct extent_buffer *leaf, int slot)
4869 struct btrfs_extent_ref_v0 *ref0;
4870 struct btrfs_key key;
4872 btrfs_item_key_to_cpu(leaf, &key, slot);
4873 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4874 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4875 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4876 } else {
4877 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4878 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4880 return 0;
4882 #endif
4884 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4885 struct btrfs_key *key,
4886 int slot)
4888 struct btrfs_chunk *ptr;
4889 struct chunk_record *rec;
4890 int num_stripes, i;
4892 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4893 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4895 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4896 if (!rec) {
4897 fprintf(stderr, "memory allocation failed\n");
4898 exit(-1);
4901 INIT_LIST_HEAD(&rec->list);
4902 INIT_LIST_HEAD(&rec->dextents);
4903 rec->bg_rec = NULL;
4905 rec->cache.start = key->offset;
4906 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4908 rec->generation = btrfs_header_generation(leaf);
4910 rec->objectid = key->objectid;
4911 rec->type = key->type;
4912 rec->offset = key->offset;
4914 rec->length = rec->cache.size;
4915 rec->owner = btrfs_chunk_owner(leaf, ptr);
4916 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4917 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4918 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4919 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4920 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4921 rec->num_stripes = num_stripes;
4922 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4924 for (i = 0; i < rec->num_stripes; ++i) {
4925 rec->stripes[i].devid =
4926 btrfs_stripe_devid_nr(leaf, ptr, i);
4927 rec->stripes[i].offset =
4928 btrfs_stripe_offset_nr(leaf, ptr, i);
4929 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4930 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4931 BTRFS_UUID_SIZE);
4934 return rec;
4937 static int process_chunk_item(struct cache_tree *chunk_cache,
4938 struct btrfs_key *key, struct extent_buffer *eb,
4939 int slot)
4941 struct chunk_record *rec;
4942 int ret = 0;
4944 rec = btrfs_new_chunk_record(eb, key, slot);
4945 ret = insert_cache_extent(chunk_cache, &rec->cache);
4946 if (ret) {
4947 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4948 rec->offset, rec->length);
4949 free(rec);
4952 return ret;
4955 static int process_device_item(struct rb_root *dev_cache,
4956 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4958 struct btrfs_dev_item *ptr;
4959 struct device_record *rec;
4960 int ret = 0;
4962 ptr = btrfs_item_ptr(eb,
4963 slot, struct btrfs_dev_item);
4965 rec = malloc(sizeof(*rec));
4966 if (!rec) {
4967 fprintf(stderr, "memory allocation failed\n");
4968 return -ENOMEM;
4971 rec->devid = key->offset;
4972 rec->generation = btrfs_header_generation(eb);
4974 rec->objectid = key->objectid;
4975 rec->type = key->type;
4976 rec->offset = key->offset;
4978 rec->devid = btrfs_device_id(eb, ptr);
4979 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4980 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4982 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4983 if (ret) {
4984 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4985 free(rec);
4988 return ret;
4991 struct block_group_record *
4992 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4993 int slot)
4995 struct btrfs_block_group_item *ptr;
4996 struct block_group_record *rec;
4998 rec = calloc(1, sizeof(*rec));
4999 if (!rec) {
5000 fprintf(stderr, "memory allocation failed\n");
5001 exit(-1);
5004 rec->cache.start = key->objectid;
5005 rec->cache.size = key->offset;
5007 rec->generation = btrfs_header_generation(leaf);
5009 rec->objectid = key->objectid;
5010 rec->type = key->type;
5011 rec->offset = key->offset;
5013 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5014 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5016 INIT_LIST_HEAD(&rec->list);
5018 return rec;
5021 static int process_block_group_item(struct block_group_tree *block_group_cache,
5022 struct btrfs_key *key,
5023 struct extent_buffer *eb, int slot)
5025 struct block_group_record *rec;
5026 int ret = 0;
5028 rec = btrfs_new_block_group_record(eb, key, slot);
5029 ret = insert_block_group_record(block_group_cache, rec);
5030 if (ret) {
5031 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5032 rec->objectid, rec->offset);
5033 free(rec);
5036 return ret;
5039 struct device_extent_record *
5040 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5041 struct btrfs_key *key, int slot)
5043 struct device_extent_record *rec;
5044 struct btrfs_dev_extent *ptr;
5046 rec = calloc(1, sizeof(*rec));
5047 if (!rec) {
5048 fprintf(stderr, "memory allocation failed\n");
5049 exit(-1);
5052 rec->cache.objectid = key->objectid;
5053 rec->cache.start = key->offset;
5055 rec->generation = btrfs_header_generation(leaf);
5057 rec->objectid = key->objectid;
5058 rec->type = key->type;
5059 rec->offset = key->offset;
5061 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5062 rec->chunk_objecteid =
5063 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5064 rec->chunk_offset =
5065 btrfs_dev_extent_chunk_offset(leaf, ptr);
5066 rec->length = btrfs_dev_extent_length(leaf, ptr);
5067 rec->cache.size = rec->length;
5069 INIT_LIST_HEAD(&rec->chunk_list);
5070 INIT_LIST_HEAD(&rec->device_list);
5072 return rec;
5075 static int
5076 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5077 struct btrfs_key *key, struct extent_buffer *eb,
5078 int slot)
5080 struct device_extent_record *rec;
5081 int ret;
5083 rec = btrfs_new_device_extent_record(eb, key, slot);
5084 ret = insert_device_extent_record(dev_extent_cache, rec);
5085 if (ret) {
5086 fprintf(stderr,
5087 "Device extent[%llu, %llu, %llu] existed.\n",
5088 rec->objectid, rec->offset, rec->length);
5089 free(rec);
5092 return ret;
5095 static int process_extent_item(struct btrfs_root *root,
5096 struct cache_tree *extent_cache,
5097 struct extent_buffer *eb, int slot)
5099 struct btrfs_extent_item *ei;
5100 struct btrfs_extent_inline_ref *iref;
5101 struct btrfs_extent_data_ref *dref;
5102 struct btrfs_shared_data_ref *sref;
5103 struct btrfs_key key;
5104 unsigned long end;
5105 unsigned long ptr;
5106 int type;
5107 u32 item_size = btrfs_item_size_nr(eb, slot);
5108 u64 refs = 0;
5109 u64 offset;
5110 u64 num_bytes;
5111 int metadata = 0;
5113 btrfs_item_key_to_cpu(eb, &key, slot);
5115 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5116 metadata = 1;
5117 num_bytes = root->leafsize;
5118 } else {
5119 num_bytes = key.offset;
5122 if (item_size < sizeof(*ei)) {
5123 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5124 struct btrfs_extent_item_v0 *ei0;
5125 BUG_ON(item_size != sizeof(*ei0));
5126 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5127 refs = btrfs_extent_refs_v0(eb, ei0);
5128 #else
5129 BUG();
5130 #endif
5131 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5132 num_bytes, refs, 0, 0, 0, metadata, 1,
5133 num_bytes);
5136 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5137 refs = btrfs_extent_refs(eb, ei);
5138 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5139 metadata = 1;
5140 else
5141 metadata = 0;
5143 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5144 refs, 0, 0, 0, metadata, 1, num_bytes);
5146 ptr = (unsigned long)(ei + 1);
5147 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5148 key.type == BTRFS_EXTENT_ITEM_KEY)
5149 ptr += sizeof(struct btrfs_tree_block_info);
5151 end = (unsigned long)ei + item_size;
5152 while (ptr < end) {
5153 iref = (struct btrfs_extent_inline_ref *)ptr;
5154 type = btrfs_extent_inline_ref_type(eb, iref);
5155 offset = btrfs_extent_inline_ref_offset(eb, iref);
5156 switch (type) {
5157 case BTRFS_TREE_BLOCK_REF_KEY:
5158 add_tree_backref(extent_cache, key.objectid,
5159 0, offset, 0);
5160 break;
5161 case BTRFS_SHARED_BLOCK_REF_KEY:
5162 add_tree_backref(extent_cache, key.objectid,
5163 offset, 0, 0);
5164 break;
5165 case BTRFS_EXTENT_DATA_REF_KEY:
5166 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5167 add_data_backref(extent_cache, key.objectid, 0,
5168 btrfs_extent_data_ref_root(eb, dref),
5169 btrfs_extent_data_ref_objectid(eb,
5170 dref),
5171 btrfs_extent_data_ref_offset(eb, dref),
5172 btrfs_extent_data_ref_count(eb, dref),
5173 0, num_bytes);
5174 break;
5175 case BTRFS_SHARED_DATA_REF_KEY:
5176 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5177 add_data_backref(extent_cache, key.objectid, offset,
5178 0, 0, 0,
5179 btrfs_shared_data_ref_count(eb, sref),
5180 0, num_bytes);
5181 break;
5182 default:
5183 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5184 key.objectid, key.type, num_bytes);
5185 goto out;
5187 ptr += btrfs_extent_inline_ref_size(type);
5189 WARN_ON(ptr > end);
5190 out:
5191 return 0;
5194 static int check_cache_range(struct btrfs_root *root,
5195 struct btrfs_block_group_cache *cache,
5196 u64 offset, u64 bytes)
5198 struct btrfs_free_space *entry;
5199 u64 *logical;
5200 u64 bytenr;
5201 int stripe_len;
5202 int i, nr, ret;
5204 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5205 bytenr = btrfs_sb_offset(i);
5206 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5207 cache->key.objectid, bytenr, 0,
5208 &logical, &nr, &stripe_len);
5209 if (ret)
5210 return ret;
5212 while (nr--) {
5213 if (logical[nr] + stripe_len <= offset)
5214 continue;
5215 if (offset + bytes <= logical[nr])
5216 continue;
5217 if (logical[nr] == offset) {
5218 if (stripe_len >= bytes) {
5219 kfree(logical);
5220 return 0;
5222 bytes -= stripe_len;
5223 offset += stripe_len;
5224 } else if (logical[nr] < offset) {
5225 if (logical[nr] + stripe_len >=
5226 offset + bytes) {
5227 kfree(logical);
5228 return 0;
5230 bytes = (offset + bytes) -
5231 (logical[nr] + stripe_len);
5232 offset = logical[nr] + stripe_len;
5233 } else {
5235 * Could be tricky, the super may land in the
5236 * middle of the area we're checking. First
5237 * check the easiest case, it's at the end.
5239 if (logical[nr] + stripe_len >=
5240 bytes + offset) {
5241 bytes = logical[nr] - offset;
5242 continue;
5245 /* Check the left side */
5246 ret = check_cache_range(root, cache,
5247 offset,
5248 logical[nr] - offset);
5249 if (ret) {
5250 kfree(logical);
5251 return ret;
5254 /* Now we continue with the right side */
5255 bytes = (offset + bytes) -
5256 (logical[nr] + stripe_len);
5257 offset = logical[nr] + stripe_len;
5261 kfree(logical);
5264 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5265 if (!entry) {
5266 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5267 offset, offset+bytes);
5268 return -EINVAL;
5271 if (entry->offset != offset) {
5272 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5273 entry->offset);
5274 return -EINVAL;
5277 if (entry->bytes != bytes) {
5278 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5279 bytes, entry->bytes, offset);
5280 return -EINVAL;
5283 unlink_free_space(cache->free_space_ctl, entry);
5284 free(entry);
5285 return 0;
5288 static int verify_space_cache(struct btrfs_root *root,
5289 struct btrfs_block_group_cache *cache)
5291 struct btrfs_path *path;
5292 struct extent_buffer *leaf;
5293 struct btrfs_key key;
5294 u64 last;
5295 int ret = 0;
5297 path = btrfs_alloc_path();
5298 if (!path)
5299 return -ENOMEM;
5301 root = root->fs_info->extent_root;
5303 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5305 key.objectid = last;
5306 key.offset = 0;
5307 key.type = BTRFS_EXTENT_ITEM_KEY;
5309 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5310 if (ret < 0)
5311 goto out;
5312 ret = 0;
5313 while (1) {
5314 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5315 ret = btrfs_next_leaf(root, path);
5316 if (ret < 0)
5317 goto out;
5318 if (ret > 0) {
5319 ret = 0;
5320 break;
5323 leaf = path->nodes[0];
5324 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5325 if (key.objectid >= cache->key.offset + cache->key.objectid)
5326 break;
5327 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5328 key.type != BTRFS_METADATA_ITEM_KEY) {
5329 path->slots[0]++;
5330 continue;
5333 if (last == key.objectid) {
5334 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5335 last = key.objectid + key.offset;
5336 else
5337 last = key.objectid + root->leafsize;
5338 path->slots[0]++;
5339 continue;
5342 ret = check_cache_range(root, cache, last,
5343 key.objectid - last);
5344 if (ret)
5345 break;
5346 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5347 last = key.objectid + key.offset;
5348 else
5349 last = key.objectid + root->leafsize;
5350 path->slots[0]++;
5353 if (last < cache->key.objectid + cache->key.offset)
5354 ret = check_cache_range(root, cache, last,
5355 cache->key.objectid +
5356 cache->key.offset - last);
5358 out:
5359 btrfs_free_path(path);
5361 if (!ret &&
5362 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5363 fprintf(stderr, "There are still entries left in the space "
5364 "cache\n");
5365 ret = -EINVAL;
5368 return ret;
5371 static int check_space_cache(struct btrfs_root *root)
5373 struct btrfs_block_group_cache *cache;
5374 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5375 int ret;
5376 int error = 0;
5378 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5379 btrfs_super_generation(root->fs_info->super_copy) !=
5380 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5381 printf("cache and super generation don't match, space cache "
5382 "will be invalidated\n");
5383 return 0;
5386 if (ctx.progress_enabled) {
5387 ctx.tp = TASK_FREE_SPACE;
5388 task_start(ctx.info);
5391 while (1) {
5392 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5393 if (!cache)
5394 break;
5396 start = cache->key.objectid + cache->key.offset;
5397 if (!cache->free_space_ctl) {
5398 if (btrfs_init_free_space_ctl(cache,
5399 root->sectorsize)) {
5400 ret = -ENOMEM;
5401 break;
5403 } else {
5404 btrfs_remove_free_space_cache(cache);
5407 ret = load_free_space_cache(root->fs_info, cache);
5408 if (!ret)
5409 continue;
5411 ret = verify_space_cache(root, cache);
5412 if (ret) {
5413 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5414 cache->key.objectid);
5415 error++;
5419 task_stop(ctx.info);
5421 return error ? -EINVAL : 0;
5424 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5425 u64 num_bytes, unsigned long leaf_offset,
5426 struct extent_buffer *eb) {
5428 u64 offset = 0;
5429 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5430 char *data;
5431 unsigned long csum_offset;
5432 u32 csum;
5433 u32 csum_expected;
5434 u64 read_len;
5435 u64 data_checked = 0;
5436 u64 tmp;
5437 int ret = 0;
5438 int mirror;
5439 int num_copies;
5441 if (num_bytes % root->sectorsize)
5442 return -EINVAL;
5444 data = malloc(num_bytes);
5445 if (!data)
5446 return -ENOMEM;
5448 while (offset < num_bytes) {
5449 mirror = 0;
5450 again:
5451 read_len = num_bytes - offset;
5452 /* read as much space once a time */
5453 ret = read_extent_data(root, data + offset,
5454 bytenr + offset, &read_len, mirror);
5455 if (ret)
5456 goto out;
5457 data_checked = 0;
5458 /* verify every 4k data's checksum */
5459 while (data_checked < read_len) {
5460 csum = ~(u32)0;
5461 tmp = offset + data_checked;
5463 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5464 csum, root->sectorsize);
5465 btrfs_csum_final(csum, (char *)&csum);
5467 csum_offset = leaf_offset +
5468 tmp / root->sectorsize * csum_size;
5469 read_extent_buffer(eb, (char *)&csum_expected,
5470 csum_offset, csum_size);
5471 /* try another mirror */
5472 if (csum != csum_expected) {
5473 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5474 mirror, bytenr + tmp,
5475 csum, csum_expected);
5476 num_copies = btrfs_num_copies(
5477 &root->fs_info->mapping_tree,
5478 bytenr, num_bytes);
5479 if (mirror < num_copies - 1) {
5480 mirror += 1;
5481 goto again;
5484 data_checked += root->sectorsize;
5486 offset += read_len;
5488 out:
5489 free(data);
5490 return ret;
5493 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5494 u64 num_bytes)
5496 struct btrfs_path *path;
5497 struct extent_buffer *leaf;
5498 struct btrfs_key key;
5499 int ret;
5501 path = btrfs_alloc_path();
5502 if (!path) {
5503 fprintf(stderr, "Error allocing path\n");
5504 return -ENOMEM;
5507 key.objectid = bytenr;
5508 key.type = BTRFS_EXTENT_ITEM_KEY;
5509 key.offset = (u64)-1;
5511 again:
5512 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5513 0, 0);
5514 if (ret < 0) {
5515 fprintf(stderr, "Error looking up extent record %d\n", ret);
5516 btrfs_free_path(path);
5517 return ret;
5518 } else if (ret) {
5519 if (path->slots[0] > 0) {
5520 path->slots[0]--;
5521 } else {
5522 ret = btrfs_prev_leaf(root, path);
5523 if (ret < 0) {
5524 goto out;
5525 } else if (ret > 0) {
5526 ret = 0;
5527 goto out;
5532 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5535 * Block group items come before extent items if they have the same
5536 * bytenr, so walk back one more just in case. Dear future traveler,
5537 * first congrats on mastering time travel. Now if it's not too much
5538 * trouble could you go back to 2006 and tell Chris to make the
5539 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5540 * EXTENT_ITEM_KEY please?
5542 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5543 if (path->slots[0] > 0) {
5544 path->slots[0]--;
5545 } else {
5546 ret = btrfs_prev_leaf(root, path);
5547 if (ret < 0) {
5548 goto out;
5549 } else if (ret > 0) {
5550 ret = 0;
5551 goto out;
5554 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5557 while (num_bytes) {
5558 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5559 ret = btrfs_next_leaf(root, path);
5560 if (ret < 0) {
5561 fprintf(stderr, "Error going to next leaf "
5562 "%d\n", ret);
5563 btrfs_free_path(path);
5564 return ret;
5565 } else if (ret) {
5566 break;
5569 leaf = path->nodes[0];
5570 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5571 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5572 path->slots[0]++;
5573 continue;
5575 if (key.objectid + key.offset < bytenr) {
5576 path->slots[0]++;
5577 continue;
5579 if (key.objectid > bytenr + num_bytes)
5580 break;
5582 if (key.objectid == bytenr) {
5583 if (key.offset >= num_bytes) {
5584 num_bytes = 0;
5585 break;
5587 num_bytes -= key.offset;
5588 bytenr += key.offset;
5589 } else if (key.objectid < bytenr) {
5590 if (key.objectid + key.offset >= bytenr + num_bytes) {
5591 num_bytes = 0;
5592 break;
5594 num_bytes = (bytenr + num_bytes) -
5595 (key.objectid + key.offset);
5596 bytenr = key.objectid + key.offset;
5597 } else {
5598 if (key.objectid + key.offset < bytenr + num_bytes) {
5599 u64 new_start = key.objectid + key.offset;
5600 u64 new_bytes = bytenr + num_bytes - new_start;
5603 * Weird case, the extent is in the middle of
5604 * our range, we'll have to search one side
5605 * and then the other. Not sure if this happens
5606 * in real life, but no harm in coding it up
5607 * anyway just in case.
5609 btrfs_release_path(path);
5610 ret = check_extent_exists(root, new_start,
5611 new_bytes);
5612 if (ret) {
5613 fprintf(stderr, "Right section didn't "
5614 "have a record\n");
5615 break;
5617 num_bytes = key.objectid - bytenr;
5618 goto again;
5620 num_bytes = key.objectid - bytenr;
5622 path->slots[0]++;
5624 ret = 0;
5626 out:
5627 if (num_bytes && !ret) {
5628 fprintf(stderr, "There are no extents for csum range "
5629 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5630 ret = 1;
5633 btrfs_free_path(path);
5634 return ret;
5637 static int check_csums(struct btrfs_root *root)
5639 struct btrfs_path *path;
5640 struct extent_buffer *leaf;
5641 struct btrfs_key key;
5642 u64 offset = 0, num_bytes = 0;
5643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5644 int errors = 0;
5645 int ret;
5646 u64 data_len;
5647 unsigned long leaf_offset;
5649 root = root->fs_info->csum_root;
5650 if (!extent_buffer_uptodate(root->node)) {
5651 fprintf(stderr, "No valid csum tree found\n");
5652 return -ENOENT;
5655 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5656 key.type = BTRFS_EXTENT_CSUM_KEY;
5657 key.offset = 0;
5659 path = btrfs_alloc_path();
5660 if (!path)
5661 return -ENOMEM;
5663 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5664 if (ret < 0) {
5665 fprintf(stderr, "Error searching csum tree %d\n", ret);
5666 btrfs_free_path(path);
5667 return ret;
5670 if (ret > 0 && path->slots[0])
5671 path->slots[0]--;
5672 ret = 0;
5674 while (1) {
5675 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5676 ret = btrfs_next_leaf(root, path);
5677 if (ret < 0) {
5678 fprintf(stderr, "Error going to next leaf "
5679 "%d\n", ret);
5680 break;
5682 if (ret)
5683 break;
5685 leaf = path->nodes[0];
5687 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5688 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5689 path->slots[0]++;
5690 continue;
5693 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5694 csum_size) * root->sectorsize;
5695 if (!check_data_csum)
5696 goto skip_csum_check;
5697 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5698 ret = check_extent_csums(root, key.offset, data_len,
5699 leaf_offset, leaf);
5700 if (ret)
5701 break;
5702 skip_csum_check:
5703 if (!num_bytes) {
5704 offset = key.offset;
5705 } else if (key.offset != offset + num_bytes) {
5706 ret = check_extent_exists(root, offset, num_bytes);
5707 if (ret) {
5708 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5709 "there is no extent record\n",
5710 offset, offset+num_bytes);
5711 errors++;
5713 offset = key.offset;
5714 num_bytes = 0;
5716 num_bytes += data_len;
5717 path->slots[0]++;
5720 btrfs_free_path(path);
5721 return errors;
5724 static int is_dropped_key(struct btrfs_key *key,
5725 struct btrfs_key *drop_key) {
5726 if (key->objectid < drop_key->objectid)
5727 return 1;
5728 else if (key->objectid == drop_key->objectid) {
5729 if (key->type < drop_key->type)
5730 return 1;
5731 else if (key->type == drop_key->type) {
5732 if (key->offset < drop_key->offset)
5733 return 1;
5736 return 0;
5740 * Here are the rules for FULL_BACKREF.
5742 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5743 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5744 * FULL_BACKREF set.
5745 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5746 * if it happened after the relocation occurred since we'll have dropped the
5747 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5748 * have no real way to know for sure.
5750 * We process the blocks one root at a time, and we start from the lowest root
5751 * objectid and go to the highest. So we can just lookup the owner backref for
5752 * the record and if we don't find it then we know it doesn't exist and we have
5753 * a FULL BACKREF.
5755 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5756 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5757 * be set or not and then we can check later once we've gathered all the refs.
5759 static int calc_extent_flag(struct btrfs_root *root,
5760 struct cache_tree *extent_cache,
5761 struct extent_buffer *buf,
5762 struct root_item_record *ri,
5763 u64 *flags)
5765 struct extent_record *rec;
5766 struct cache_extent *cache;
5767 struct tree_backref *tback;
5768 u64 owner = 0;
5770 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5771 /* we have added this extent before */
5772 BUG_ON(!cache);
5773 rec = container_of(cache, struct extent_record, cache);
5776 * Except file/reloc tree, we can not have
5777 * FULL BACKREF MODE
5779 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5780 goto normal;
5782 * root node
5784 if (buf->start == ri->bytenr)
5785 goto normal;
5787 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5788 goto full_backref;
5790 owner = btrfs_header_owner(buf);
5791 if (owner == ri->objectid)
5792 goto normal;
5794 tback = find_tree_backref(rec, 0, owner);
5795 if (!tback)
5796 goto full_backref;
5797 normal:
5798 *flags = 0;
5799 if (rec->flag_block_full_backref != -1 &&
5800 rec->flag_block_full_backref != 0)
5801 rec->bad_full_backref = 1;
5802 return 0;
5803 full_backref:
5804 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5805 if (rec->flag_block_full_backref != -1 &&
5806 rec->flag_block_full_backref != 1)
5807 rec->bad_full_backref = 1;
5808 return 0;
5811 static int run_next_block(struct btrfs_root *root,
5812 struct block_info *bits,
5813 int bits_nr,
5814 u64 *last,
5815 struct cache_tree *pending,
5816 struct cache_tree *seen,
5817 struct cache_tree *reada,
5818 struct cache_tree *nodes,
5819 struct cache_tree *extent_cache,
5820 struct cache_tree *chunk_cache,
5821 struct rb_root *dev_cache,
5822 struct block_group_tree *block_group_cache,
5823 struct device_extent_tree *dev_extent_cache,
5824 struct root_item_record *ri)
5826 struct extent_buffer *buf;
5827 struct extent_record *rec = NULL;
5828 u64 bytenr;
5829 u32 size;
5830 u64 parent;
5831 u64 owner;
5832 u64 flags;
5833 u64 ptr;
5834 u64 gen = 0;
5835 int ret = 0;
5836 int i;
5837 int nritems;
5838 struct btrfs_key key;
5839 struct cache_extent *cache;
5840 int reada_bits;
5842 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5843 bits_nr, &reada_bits);
5844 if (nritems == 0)
5845 return 1;
5847 if (!reada_bits) {
5848 for(i = 0; i < nritems; i++) {
5849 ret = add_cache_extent(reada, bits[i].start,
5850 bits[i].size);
5851 if (ret == -EEXIST)
5852 continue;
5854 /* fixme, get the parent transid */
5855 readahead_tree_block(root, bits[i].start,
5856 bits[i].size, 0);
5859 *last = bits[0].start;
5860 bytenr = bits[0].start;
5861 size = bits[0].size;
5863 cache = lookup_cache_extent(pending, bytenr, size);
5864 if (cache) {
5865 remove_cache_extent(pending, cache);
5866 free(cache);
5868 cache = lookup_cache_extent(reada, bytenr, size);
5869 if (cache) {
5870 remove_cache_extent(reada, cache);
5871 free(cache);
5873 cache = lookup_cache_extent(nodes, bytenr, size);
5874 if (cache) {
5875 remove_cache_extent(nodes, cache);
5876 free(cache);
5878 cache = lookup_cache_extent(extent_cache, bytenr, size);
5879 if (cache) {
5880 rec = container_of(cache, struct extent_record, cache);
5881 gen = rec->parent_generation;
5884 /* fixme, get the real parent transid */
5885 buf = read_tree_block(root, bytenr, size, gen);
5886 if (!extent_buffer_uptodate(buf)) {
5887 record_bad_block_io(root->fs_info,
5888 extent_cache, bytenr, size);
5889 goto out;
5892 nritems = btrfs_header_nritems(buf);
5894 flags = 0;
5895 if (!init_extent_tree) {
5896 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5897 btrfs_header_level(buf), 1, NULL,
5898 &flags);
5899 if (ret < 0) {
5900 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5901 if (ret < 0) {
5902 fprintf(stderr, "Couldn't calc extent flags\n");
5903 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5906 } else {
5907 flags = 0;
5908 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5909 if (ret < 0) {
5910 fprintf(stderr, "Couldn't calc extent flags\n");
5911 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5915 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5916 if (ri != NULL &&
5917 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5918 ri->objectid == btrfs_header_owner(buf)) {
5920 * Ok we got to this block from it's original owner and
5921 * we have FULL_BACKREF set. Relocation can leave
5922 * converted blocks over so this is altogether possible,
5923 * however it's not possible if the generation > the
5924 * last snapshot, so check for this case.
5926 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5927 btrfs_header_generation(buf) > ri->last_snapshot) {
5928 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5929 rec->bad_full_backref = 1;
5932 } else {
5933 if (ri != NULL &&
5934 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5935 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5936 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5937 rec->bad_full_backref = 1;
5941 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5942 rec->flag_block_full_backref = 1;
5943 parent = bytenr;
5944 owner = 0;
5945 } else {
5946 rec->flag_block_full_backref = 0;
5947 parent = 0;
5948 owner = btrfs_header_owner(buf);
5951 ret = check_block(root, extent_cache, buf, flags);
5952 if (ret)
5953 goto out;
5955 if (btrfs_is_leaf(buf)) {
5956 btree_space_waste += btrfs_leaf_free_space(root, buf);
5957 for (i = 0; i < nritems; i++) {
5958 struct btrfs_file_extent_item *fi;
5959 btrfs_item_key_to_cpu(buf, &key, i);
5960 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5961 process_extent_item(root, extent_cache, buf,
5963 continue;
5965 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5966 process_extent_item(root, extent_cache, buf,
5968 continue;
5970 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5971 total_csum_bytes +=
5972 btrfs_item_size_nr(buf, i);
5973 continue;
5975 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5976 process_chunk_item(chunk_cache, &key, buf, i);
5977 continue;
5979 if (key.type == BTRFS_DEV_ITEM_KEY) {
5980 process_device_item(dev_cache, &key, buf, i);
5981 continue;
5983 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5984 process_block_group_item(block_group_cache,
5985 &key, buf, i);
5986 continue;
5988 if (key.type == BTRFS_DEV_EXTENT_KEY) {
5989 process_device_extent_item(dev_extent_cache,
5990 &key, buf, i);
5991 continue;
5994 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5995 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5996 process_extent_ref_v0(extent_cache, buf, i);
5997 #else
5998 BUG();
5999 #endif
6000 continue;
6003 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6004 add_tree_backref(extent_cache, key.objectid, 0,
6005 key.offset, 0);
6006 continue;
6008 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6009 add_tree_backref(extent_cache, key.objectid,
6010 key.offset, 0, 0);
6011 continue;
6013 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6014 struct btrfs_extent_data_ref *ref;
6015 ref = btrfs_item_ptr(buf, i,
6016 struct btrfs_extent_data_ref);
6017 add_data_backref(extent_cache,
6018 key.objectid, 0,
6019 btrfs_extent_data_ref_root(buf, ref),
6020 btrfs_extent_data_ref_objectid(buf,
6021 ref),
6022 btrfs_extent_data_ref_offset(buf, ref),
6023 btrfs_extent_data_ref_count(buf, ref),
6024 0, root->sectorsize);
6025 continue;
6027 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6028 struct btrfs_shared_data_ref *ref;
6029 ref = btrfs_item_ptr(buf, i,
6030 struct btrfs_shared_data_ref);
6031 add_data_backref(extent_cache,
6032 key.objectid, key.offset, 0, 0, 0,
6033 btrfs_shared_data_ref_count(buf, ref),
6034 0, root->sectorsize);
6035 continue;
6037 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6038 struct bad_item *bad;
6040 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6041 continue;
6042 if (!owner)
6043 continue;
6044 bad = malloc(sizeof(struct bad_item));
6045 if (!bad)
6046 continue;
6047 INIT_LIST_HEAD(&bad->list);
6048 memcpy(&bad->key, &key,
6049 sizeof(struct btrfs_key));
6050 bad->root_id = owner;
6051 list_add_tail(&bad->list, &delete_items);
6052 continue;
6054 if (key.type != BTRFS_EXTENT_DATA_KEY)
6055 continue;
6056 fi = btrfs_item_ptr(buf, i,
6057 struct btrfs_file_extent_item);
6058 if (btrfs_file_extent_type(buf, fi) ==
6059 BTRFS_FILE_EXTENT_INLINE)
6060 continue;
6061 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6062 continue;
6064 data_bytes_allocated +=
6065 btrfs_file_extent_disk_num_bytes(buf, fi);
6066 if (data_bytes_allocated < root->sectorsize) {
6067 abort();
6069 data_bytes_referenced +=
6070 btrfs_file_extent_num_bytes(buf, fi);
6071 add_data_backref(extent_cache,
6072 btrfs_file_extent_disk_bytenr(buf, fi),
6073 parent, owner, key.objectid, key.offset -
6074 btrfs_file_extent_offset(buf, fi), 1, 1,
6075 btrfs_file_extent_disk_num_bytes(buf, fi));
6077 } else {
6078 int level;
6079 struct btrfs_key first_key;
6081 first_key.objectid = 0;
6083 if (nritems > 0)
6084 btrfs_item_key_to_cpu(buf, &first_key, 0);
6085 level = btrfs_header_level(buf);
6086 for (i = 0; i < nritems; i++) {
6087 ptr = btrfs_node_blockptr(buf, i);
6088 size = btrfs_level_size(root, level - 1);
6089 btrfs_node_key_to_cpu(buf, &key, i);
6090 if (ri != NULL) {
6091 if ((level == ri->drop_level)
6092 && is_dropped_key(&key, &ri->drop_key)) {
6093 continue;
6096 ret = add_extent_rec(extent_cache, &key,
6097 btrfs_node_ptr_generation(buf, i),
6098 ptr, size, 0, 0, 1, 0, 1, 0,
6099 size);
6100 BUG_ON(ret);
6102 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6104 if (level > 1) {
6105 add_pending(nodes, seen, ptr, size);
6106 } else {
6107 add_pending(pending, seen, ptr, size);
6110 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6111 nritems) * sizeof(struct btrfs_key_ptr);
6113 total_btree_bytes += buf->len;
6114 if (fs_root_objectid(btrfs_header_owner(buf)))
6115 total_fs_tree_bytes += buf->len;
6116 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6117 total_extent_tree_bytes += buf->len;
6118 if (!found_old_backref &&
6119 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6120 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6121 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6122 found_old_backref = 1;
6123 out:
6124 free_extent_buffer(buf);
6125 return ret;
6128 static int add_root_to_pending(struct extent_buffer *buf,
6129 struct cache_tree *extent_cache,
6130 struct cache_tree *pending,
6131 struct cache_tree *seen,
6132 struct cache_tree *nodes,
6133 u64 objectid)
6135 if (btrfs_header_level(buf) > 0)
6136 add_pending(nodes, seen, buf->start, buf->len);
6137 else
6138 add_pending(pending, seen, buf->start, buf->len);
6139 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6140 0, 1, 1, 0, 1, 0, buf->len);
6142 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6143 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6144 add_tree_backref(extent_cache, buf->start, buf->start,
6145 0, 1);
6146 else
6147 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6148 return 0;
6151 /* as we fix the tree, we might be deleting blocks that
6152 * we're tracking for repair. This hook makes sure we
6153 * remove any backrefs for blocks as we are fixing them.
6155 static int free_extent_hook(struct btrfs_trans_handle *trans,
6156 struct btrfs_root *root,
6157 u64 bytenr, u64 num_bytes, u64 parent,
6158 u64 root_objectid, u64 owner, u64 offset,
6159 int refs_to_drop)
6161 struct extent_record *rec;
6162 struct cache_extent *cache;
6163 int is_data;
6164 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6166 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6167 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6168 if (!cache)
6169 return 0;
6171 rec = container_of(cache, struct extent_record, cache);
6172 if (is_data) {
6173 struct data_backref *back;
6174 back = find_data_backref(rec, parent, root_objectid, owner,
6175 offset, 1, bytenr, num_bytes);
6176 if (!back)
6177 goto out;
6178 if (back->node.found_ref) {
6179 back->found_ref -= refs_to_drop;
6180 if (rec->refs)
6181 rec->refs -= refs_to_drop;
6183 if (back->node.found_extent_tree) {
6184 back->num_refs -= refs_to_drop;
6185 if (rec->extent_item_refs)
6186 rec->extent_item_refs -= refs_to_drop;
6188 if (back->found_ref == 0)
6189 back->node.found_ref = 0;
6190 if (back->num_refs == 0)
6191 back->node.found_extent_tree = 0;
6193 if (!back->node.found_extent_tree && back->node.found_ref) {
6194 list_del(&back->node.list);
6195 free(back);
6197 } else {
6198 struct tree_backref *back;
6199 back = find_tree_backref(rec, parent, root_objectid);
6200 if (!back)
6201 goto out;
6202 if (back->node.found_ref) {
6203 if (rec->refs)
6204 rec->refs--;
6205 back->node.found_ref = 0;
6207 if (back->node.found_extent_tree) {
6208 if (rec->extent_item_refs)
6209 rec->extent_item_refs--;
6210 back->node.found_extent_tree = 0;
6212 if (!back->node.found_extent_tree && back->node.found_ref) {
6213 list_del(&back->node.list);
6214 free(back);
6217 maybe_free_extent_rec(extent_cache, rec);
6218 out:
6219 return 0;
6222 static int delete_extent_records(struct btrfs_trans_handle *trans,
6223 struct btrfs_root *root,
6224 struct btrfs_path *path,
6225 u64 bytenr, u64 new_len)
6227 struct btrfs_key key;
6228 struct btrfs_key found_key;
6229 struct extent_buffer *leaf;
6230 int ret;
6231 int slot;
6234 key.objectid = bytenr;
6235 key.type = (u8)-1;
6236 key.offset = (u64)-1;
6238 while(1) {
6239 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6240 &key, path, 0, 1);
6241 if (ret < 0)
6242 break;
6244 if (ret > 0) {
6245 ret = 0;
6246 if (path->slots[0] == 0)
6247 break;
6248 path->slots[0]--;
6250 ret = 0;
6252 leaf = path->nodes[0];
6253 slot = path->slots[0];
6255 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6256 if (found_key.objectid != bytenr)
6257 break;
6259 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6260 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6261 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6262 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6263 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6264 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6265 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6266 btrfs_release_path(path);
6267 if (found_key.type == 0) {
6268 if (found_key.offset == 0)
6269 break;
6270 key.offset = found_key.offset - 1;
6271 key.type = found_key.type;
6273 key.type = found_key.type - 1;
6274 key.offset = (u64)-1;
6275 continue;
6278 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6279 found_key.objectid, found_key.type, found_key.offset);
6281 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6282 if (ret)
6283 break;
6284 btrfs_release_path(path);
6286 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6287 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6288 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6289 found_key.offset : root->leafsize;
6291 ret = btrfs_update_block_group(trans, root, bytenr,
6292 bytes, 0, 0);
6293 if (ret)
6294 break;
6298 btrfs_release_path(path);
6299 return ret;
6303 * for a single backref, this will allocate a new extent
6304 * and add the backref to it.
6306 static int record_extent(struct btrfs_trans_handle *trans,
6307 struct btrfs_fs_info *info,
6308 struct btrfs_path *path,
6309 struct extent_record *rec,
6310 struct extent_backref *back,
6311 int allocated, u64 flags)
6313 int ret;
6314 struct btrfs_root *extent_root = info->extent_root;
6315 struct extent_buffer *leaf;
6316 struct btrfs_key ins_key;
6317 struct btrfs_extent_item *ei;
6318 struct tree_backref *tback;
6319 struct data_backref *dback;
6320 struct btrfs_tree_block_info *bi;
6322 if (!back->is_data)
6323 rec->max_size = max_t(u64, rec->max_size,
6324 info->extent_root->leafsize);
6326 if (!allocated) {
6327 u32 item_size = sizeof(*ei);
6329 if (!back->is_data)
6330 item_size += sizeof(*bi);
6332 ins_key.objectid = rec->start;
6333 ins_key.offset = rec->max_size;
6334 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6336 ret = btrfs_insert_empty_item(trans, extent_root, path,
6337 &ins_key, item_size);
6338 if (ret)
6339 goto fail;
6341 leaf = path->nodes[0];
6342 ei = btrfs_item_ptr(leaf, path->slots[0],
6343 struct btrfs_extent_item);
6345 btrfs_set_extent_refs(leaf, ei, 0);
6346 btrfs_set_extent_generation(leaf, ei, rec->generation);
6348 if (back->is_data) {
6349 btrfs_set_extent_flags(leaf, ei,
6350 BTRFS_EXTENT_FLAG_DATA);
6351 } else {
6352 struct btrfs_disk_key copy_key;;
6354 tback = (struct tree_backref *)back;
6355 bi = (struct btrfs_tree_block_info *)(ei + 1);
6356 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6357 sizeof(*bi));
6359 btrfs_set_disk_key_objectid(&copy_key,
6360 rec->info_objectid);
6361 btrfs_set_disk_key_type(&copy_key, 0);
6362 btrfs_set_disk_key_offset(&copy_key, 0);
6364 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6365 btrfs_set_tree_block_key(leaf, bi, &copy_key);
6367 btrfs_set_extent_flags(leaf, ei,
6368 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6371 btrfs_mark_buffer_dirty(leaf);
6372 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6373 rec->max_size, 1, 0);
6374 if (ret)
6375 goto fail;
6376 btrfs_release_path(path);
6379 if (back->is_data) {
6380 u64 parent;
6381 int i;
6383 dback = (struct data_backref *)back;
6384 if (back->full_backref)
6385 parent = dback->parent;
6386 else
6387 parent = 0;
6389 for (i = 0; i < dback->found_ref; i++) {
6390 /* if parent != 0, we're doing a full backref
6391 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6392 * just makes the backref allocator create a data
6393 * backref
6395 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6396 rec->start, rec->max_size,
6397 parent,
6398 dback->root,
6399 parent ?
6400 BTRFS_FIRST_FREE_OBJECTID :
6401 dback->owner,
6402 dback->offset);
6403 if (ret)
6404 break;
6406 fprintf(stderr, "adding new data backref"
6407 " on %llu %s %llu owner %llu"
6408 " offset %llu found %d\n",
6409 (unsigned long long)rec->start,
6410 back->full_backref ?
6411 "parent" : "root",
6412 back->full_backref ?
6413 (unsigned long long)parent :
6414 (unsigned long long)dback->root,
6415 (unsigned long long)dback->owner,
6416 (unsigned long long)dback->offset,
6417 dback->found_ref);
6418 } else {
6419 u64 parent;
6421 tback = (struct tree_backref *)back;
6422 if (back->full_backref)
6423 parent = tback->parent;
6424 else
6425 parent = 0;
6427 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6428 rec->start, rec->max_size,
6429 parent, tback->root, 0, 0);
6430 fprintf(stderr, "adding new tree backref on "
6431 "start %llu len %llu parent %llu root %llu\n",
6432 rec->start, rec->max_size, parent, tback->root);
6434 fail:
6435 btrfs_release_path(path);
6436 return ret;
6439 struct extent_entry {
6440 u64 bytenr;
6441 u64 bytes;
6442 int count;
6443 int broken;
6444 struct list_head list;
6447 static struct extent_entry *find_entry(struct list_head *entries,
6448 u64 bytenr, u64 bytes)
6450 struct extent_entry *entry = NULL;
6452 list_for_each_entry(entry, entries, list) {
6453 if (entry->bytenr == bytenr && entry->bytes == bytes)
6454 return entry;
6457 return NULL;
6460 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6462 struct extent_entry *entry, *best = NULL, *prev = NULL;
6464 list_for_each_entry(entry, entries, list) {
6465 if (!prev) {
6466 prev = entry;
6467 continue;
6471 * If there are as many broken entries as entries then we know
6472 * not to trust this particular entry.
6474 if (entry->broken == entry->count)
6475 continue;
6478 * If our current entry == best then we can't be sure our best
6479 * is really the best, so we need to keep searching.
6481 if (best && best->count == entry->count) {
6482 prev = entry;
6483 best = NULL;
6484 continue;
6487 /* Prev == entry, not good enough, have to keep searching */
6488 if (!prev->broken && prev->count == entry->count)
6489 continue;
6491 if (!best)
6492 best = (prev->count > entry->count) ? prev : entry;
6493 else if (best->count < entry->count)
6494 best = entry;
6495 prev = entry;
6498 return best;
6501 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6502 struct data_backref *dback, struct extent_entry *entry)
6504 struct btrfs_trans_handle *trans;
6505 struct btrfs_root *root;
6506 struct btrfs_file_extent_item *fi;
6507 struct extent_buffer *leaf;
6508 struct btrfs_key key;
6509 u64 bytenr, bytes;
6510 int ret, err;
6512 key.objectid = dback->root;
6513 key.type = BTRFS_ROOT_ITEM_KEY;
6514 key.offset = (u64)-1;
6515 root = btrfs_read_fs_root(info, &key);
6516 if (IS_ERR(root)) {
6517 fprintf(stderr, "Couldn't find root for our ref\n");
6518 return -EINVAL;
6522 * The backref points to the original offset of the extent if it was
6523 * split, so we need to search down to the offset we have and then walk
6524 * forward until we find the backref we're looking for.
6526 key.objectid = dback->owner;
6527 key.type = BTRFS_EXTENT_DATA_KEY;
6528 key.offset = dback->offset;
6529 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6530 if (ret < 0) {
6531 fprintf(stderr, "Error looking up ref %d\n", ret);
6532 return ret;
6535 while (1) {
6536 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6537 ret = btrfs_next_leaf(root, path);
6538 if (ret) {
6539 fprintf(stderr, "Couldn't find our ref, next\n");
6540 return -EINVAL;
6543 leaf = path->nodes[0];
6544 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6545 if (key.objectid != dback->owner ||
6546 key.type != BTRFS_EXTENT_DATA_KEY) {
6547 fprintf(stderr, "Couldn't find our ref, search\n");
6548 return -EINVAL;
6550 fi = btrfs_item_ptr(leaf, path->slots[0],
6551 struct btrfs_file_extent_item);
6552 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6553 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6555 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6556 break;
6557 path->slots[0]++;
6560 btrfs_release_path(path);
6562 trans = btrfs_start_transaction(root, 1);
6563 if (IS_ERR(trans))
6564 return PTR_ERR(trans);
6567 * Ok we have the key of the file extent we want to fix, now we can cow
6568 * down to the thing and fix it.
6570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6571 if (ret < 0) {
6572 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6573 key.objectid, key.type, key.offset, ret);
6574 goto out;
6576 if (ret > 0) {
6577 fprintf(stderr, "Well that's odd, we just found this key "
6578 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6579 key.offset);
6580 ret = -EINVAL;
6581 goto out;
6583 leaf = path->nodes[0];
6584 fi = btrfs_item_ptr(leaf, path->slots[0],
6585 struct btrfs_file_extent_item);
6587 if (btrfs_file_extent_compression(leaf, fi) &&
6588 dback->disk_bytenr != entry->bytenr) {
6589 fprintf(stderr, "Ref doesn't match the record start and is "
6590 "compressed, please take a btrfs-image of this file "
6591 "system and send it to a btrfs developer so they can "
6592 "complete this functionality for bytenr %Lu\n",
6593 dback->disk_bytenr);
6594 ret = -EINVAL;
6595 goto out;
6598 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6599 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6600 } else if (dback->disk_bytenr > entry->bytenr) {
6601 u64 off_diff, offset;
6603 off_diff = dback->disk_bytenr - entry->bytenr;
6604 offset = btrfs_file_extent_offset(leaf, fi);
6605 if (dback->disk_bytenr + offset +
6606 btrfs_file_extent_num_bytes(leaf, fi) >
6607 entry->bytenr + entry->bytes) {
6608 fprintf(stderr, "Ref is past the entry end, please "
6609 "take a btrfs-image of this file system and "
6610 "send it to a btrfs developer, ref %Lu\n",
6611 dback->disk_bytenr);
6612 ret = -EINVAL;
6613 goto out;
6615 offset += off_diff;
6616 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6617 btrfs_set_file_extent_offset(leaf, fi, offset);
6618 } else if (dback->disk_bytenr < entry->bytenr) {
6619 u64 offset;
6621 offset = btrfs_file_extent_offset(leaf, fi);
6622 if (dback->disk_bytenr + offset < entry->bytenr) {
6623 fprintf(stderr, "Ref is before the entry start, please"
6624 " take a btrfs-image of this file system and "
6625 "send it to a btrfs developer, ref %Lu\n",
6626 dback->disk_bytenr);
6627 ret = -EINVAL;
6628 goto out;
6631 offset += dback->disk_bytenr;
6632 offset -= entry->bytenr;
6633 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6634 btrfs_set_file_extent_offset(leaf, fi, offset);
6637 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6640 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6641 * only do this if we aren't using compression, otherwise it's a
6642 * trickier case.
6644 if (!btrfs_file_extent_compression(leaf, fi))
6645 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6646 else
6647 printf("ram bytes may be wrong?\n");
6648 btrfs_mark_buffer_dirty(leaf);
6649 out:
6650 err = btrfs_commit_transaction(trans, root);
6651 btrfs_release_path(path);
6652 return ret ? ret : err;
6655 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6656 struct extent_record *rec)
6658 struct extent_backref *back;
6659 struct data_backref *dback;
6660 struct extent_entry *entry, *best = NULL;
6661 LIST_HEAD(entries);
6662 int nr_entries = 0;
6663 int broken_entries = 0;
6664 int ret = 0;
6665 short mismatch = 0;
6668 * Metadata is easy and the backrefs should always agree on bytenr and
6669 * size, if not we've got bigger issues.
6671 if (rec->metadata)
6672 return 0;
6674 list_for_each_entry(back, &rec->backrefs, list) {
6675 if (back->full_backref || !back->is_data)
6676 continue;
6678 dback = (struct data_backref *)back;
6681 * We only pay attention to backrefs that we found a real
6682 * backref for.
6684 if (dback->found_ref == 0)
6685 continue;
6688 * For now we only catch when the bytes don't match, not the
6689 * bytenr. We can easily do this at the same time, but I want
6690 * to have a fs image to test on before we just add repair
6691 * functionality willy-nilly so we know we won't screw up the
6692 * repair.
6695 entry = find_entry(&entries, dback->disk_bytenr,
6696 dback->bytes);
6697 if (!entry) {
6698 entry = malloc(sizeof(struct extent_entry));
6699 if (!entry) {
6700 ret = -ENOMEM;
6701 goto out;
6703 memset(entry, 0, sizeof(*entry));
6704 entry->bytenr = dback->disk_bytenr;
6705 entry->bytes = dback->bytes;
6706 list_add_tail(&entry->list, &entries);
6707 nr_entries++;
6711 * If we only have on entry we may think the entries agree when
6712 * in reality they don't so we have to do some extra checking.
6714 if (dback->disk_bytenr != rec->start ||
6715 dback->bytes != rec->nr || back->broken)
6716 mismatch = 1;
6718 if (back->broken) {
6719 entry->broken++;
6720 broken_entries++;
6723 entry->count++;
6726 /* Yay all the backrefs agree, carry on good sir */
6727 if (nr_entries <= 1 && !mismatch)
6728 goto out;
6730 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6731 "%Lu\n", rec->start);
6734 * First we want to see if the backrefs can agree amongst themselves who
6735 * is right, so figure out which one of the entries has the highest
6736 * count.
6738 best = find_most_right_entry(&entries);
6741 * Ok so we may have an even split between what the backrefs think, so
6742 * this is where we use the extent ref to see what it thinks.
6744 if (!best) {
6745 entry = find_entry(&entries, rec->start, rec->nr);
6746 if (!entry && (!broken_entries || !rec->found_rec)) {
6747 fprintf(stderr, "Backrefs don't agree with each other "
6748 "and extent record doesn't agree with anybody,"
6749 " so we can't fix bytenr %Lu bytes %Lu\n",
6750 rec->start, rec->nr);
6751 ret = -EINVAL;
6752 goto out;
6753 } else if (!entry) {
6755 * Ok our backrefs were broken, we'll assume this is the
6756 * correct value and add an entry for this range.
6758 entry = malloc(sizeof(struct extent_entry));
6759 if (!entry) {
6760 ret = -ENOMEM;
6761 goto out;
6763 memset(entry, 0, sizeof(*entry));
6764 entry->bytenr = rec->start;
6765 entry->bytes = rec->nr;
6766 list_add_tail(&entry->list, &entries);
6767 nr_entries++;
6769 entry->count++;
6770 best = find_most_right_entry(&entries);
6771 if (!best) {
6772 fprintf(stderr, "Backrefs and extent record evenly "
6773 "split on who is right, this is going to "
6774 "require user input to fix bytenr %Lu bytes "
6775 "%Lu\n", rec->start, rec->nr);
6776 ret = -EINVAL;
6777 goto out;
6782 * I don't think this can happen currently as we'll abort() if we catch
6783 * this case higher up, but in case somebody removes that we still can't
6784 * deal with it properly here yet, so just bail out of that's the case.
6786 if (best->bytenr != rec->start) {
6787 fprintf(stderr, "Extent start and backref starts don't match, "
6788 "please use btrfs-image on this file system and send "
6789 "it to a btrfs developer so they can make fsck fix "
6790 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6791 rec->start, rec->nr);
6792 ret = -EINVAL;
6793 goto out;
6797 * Ok great we all agreed on an extent record, let's go find the real
6798 * references and fix up the ones that don't match.
6800 list_for_each_entry(back, &rec->backrefs, list) {
6801 if (back->full_backref || !back->is_data)
6802 continue;
6804 dback = (struct data_backref *)back;
6807 * Still ignoring backrefs that don't have a real ref attached
6808 * to them.
6810 if (dback->found_ref == 0)
6811 continue;
6813 if (dback->bytes == best->bytes &&
6814 dback->disk_bytenr == best->bytenr)
6815 continue;
6817 ret = repair_ref(info, path, dback, best);
6818 if (ret)
6819 goto out;
6823 * Ok we messed with the actual refs, which means we need to drop our
6824 * entire cache and go back and rescan. I know this is a huge pain and
6825 * adds a lot of extra work, but it's the only way to be safe. Once all
6826 * the backrefs agree we may not need to do anything to the extent
6827 * record itself.
6829 ret = -EAGAIN;
6830 out:
6831 while (!list_empty(&entries)) {
6832 entry = list_entry(entries.next, struct extent_entry, list);
6833 list_del_init(&entry->list);
6834 free(entry);
6836 return ret;
6839 static int process_duplicates(struct btrfs_root *root,
6840 struct cache_tree *extent_cache,
6841 struct extent_record *rec)
6843 struct extent_record *good, *tmp;
6844 struct cache_extent *cache;
6845 int ret;
6848 * If we found a extent record for this extent then return, or if we
6849 * have more than one duplicate we are likely going to need to delete
6850 * something.
6852 if (rec->found_rec || rec->num_duplicates > 1)
6853 return 0;
6855 /* Shouldn't happen but just in case */
6856 BUG_ON(!rec->num_duplicates);
6859 * So this happens if we end up with a backref that doesn't match the
6860 * actual extent entry. So either the backref is bad or the extent
6861 * entry is bad. Either way we want to have the extent_record actually
6862 * reflect what we found in the extent_tree, so we need to take the
6863 * duplicate out and use that as the extent_record since the only way we
6864 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6866 remove_cache_extent(extent_cache, &rec->cache);
6868 good = list_entry(rec->dups.next, struct extent_record, list);
6869 list_del_init(&good->list);
6870 INIT_LIST_HEAD(&good->backrefs);
6871 INIT_LIST_HEAD(&good->dups);
6872 good->cache.start = good->start;
6873 good->cache.size = good->nr;
6874 good->content_checked = 0;
6875 good->owner_ref_checked = 0;
6876 good->num_duplicates = 0;
6877 good->refs = rec->refs;
6878 list_splice_init(&rec->backrefs, &good->backrefs);
6879 while (1) {
6880 cache = lookup_cache_extent(extent_cache, good->start,
6881 good->nr);
6882 if (!cache)
6883 break;
6884 tmp = container_of(cache, struct extent_record, cache);
6887 * If we find another overlapping extent and it's found_rec is
6888 * set then it's a duplicate and we need to try and delete
6889 * something.
6891 if (tmp->found_rec || tmp->num_duplicates > 0) {
6892 if (list_empty(&good->list))
6893 list_add_tail(&good->list,
6894 &duplicate_extents);
6895 good->num_duplicates += tmp->num_duplicates + 1;
6896 list_splice_init(&tmp->dups, &good->dups);
6897 list_del_init(&tmp->list);
6898 list_add_tail(&tmp->list, &good->dups);
6899 remove_cache_extent(extent_cache, &tmp->cache);
6900 continue;
6904 * Ok we have another non extent item backed extent rec, so lets
6905 * just add it to this extent and carry on like we did above.
6907 good->refs += tmp->refs;
6908 list_splice_init(&tmp->backrefs, &good->backrefs);
6909 remove_cache_extent(extent_cache, &tmp->cache);
6910 free(tmp);
6912 ret = insert_cache_extent(extent_cache, &good->cache);
6913 BUG_ON(ret);
6914 free(rec);
6915 return good->num_duplicates ? 0 : 1;
6918 static int delete_duplicate_records(struct btrfs_root *root,
6919 struct extent_record *rec)
6921 struct btrfs_trans_handle *trans;
6922 LIST_HEAD(delete_list);
6923 struct btrfs_path *path;
6924 struct extent_record *tmp, *good, *n;
6925 int nr_del = 0;
6926 int ret = 0, err;
6927 struct btrfs_key key;
6929 path = btrfs_alloc_path();
6930 if (!path) {
6931 ret = -ENOMEM;
6932 goto out;
6935 good = rec;
6936 /* Find the record that covers all of the duplicates. */
6937 list_for_each_entry(tmp, &rec->dups, list) {
6938 if (good->start < tmp->start)
6939 continue;
6940 if (good->nr > tmp->nr)
6941 continue;
6943 if (tmp->start + tmp->nr < good->start + good->nr) {
6944 fprintf(stderr, "Ok we have overlapping extents that "
6945 "aren't completely covered by eachother, this "
6946 "is going to require more careful thought. "
6947 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6948 tmp->start, tmp->nr, good->start, good->nr);
6949 abort();
6951 good = tmp;
6954 if (good != rec)
6955 list_add_tail(&rec->list, &delete_list);
6957 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6958 if (tmp == good)
6959 continue;
6960 list_move_tail(&tmp->list, &delete_list);
6963 root = root->fs_info->extent_root;
6964 trans = btrfs_start_transaction(root, 1);
6965 if (IS_ERR(trans)) {
6966 ret = PTR_ERR(trans);
6967 goto out;
6970 list_for_each_entry(tmp, &delete_list, list) {
6971 if (tmp->found_rec == 0)
6972 continue;
6973 key.objectid = tmp->start;
6974 key.type = BTRFS_EXTENT_ITEM_KEY;
6975 key.offset = tmp->nr;
6977 /* Shouldn't happen but just in case */
6978 if (tmp->metadata) {
6979 fprintf(stderr, "Well this shouldn't happen, extent "
6980 "record overlaps but is metadata? "
6981 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6982 abort();
6985 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6986 if (ret) {
6987 if (ret > 0)
6988 ret = -EINVAL;
6989 break;
6991 ret = btrfs_del_item(trans, root, path);
6992 if (ret)
6993 break;
6994 btrfs_release_path(path);
6995 nr_del++;
6997 err = btrfs_commit_transaction(trans, root);
6998 if (err && !ret)
6999 ret = err;
7000 out:
7001 while (!list_empty(&delete_list)) {
7002 tmp = list_entry(delete_list.next, struct extent_record, list);
7003 list_del_init(&tmp->list);
7004 if (tmp == rec)
7005 continue;
7006 free(tmp);
7009 while (!list_empty(&rec->dups)) {
7010 tmp = list_entry(rec->dups.next, struct extent_record, list);
7011 list_del_init(&tmp->list);
7012 free(tmp);
7015 btrfs_free_path(path);
7017 if (!ret && !nr_del)
7018 rec->num_duplicates = 0;
7020 return ret ? ret : nr_del;
7023 static int find_possible_backrefs(struct btrfs_fs_info *info,
7024 struct btrfs_path *path,
7025 struct cache_tree *extent_cache,
7026 struct extent_record *rec)
7028 struct btrfs_root *root;
7029 struct extent_backref *back;
7030 struct data_backref *dback;
7031 struct cache_extent *cache;
7032 struct btrfs_file_extent_item *fi;
7033 struct btrfs_key key;
7034 u64 bytenr, bytes;
7035 int ret;
7037 list_for_each_entry(back, &rec->backrefs, list) {
7038 /* Don't care about full backrefs (poor unloved backrefs) */
7039 if (back->full_backref || !back->is_data)
7040 continue;
7042 dback = (struct data_backref *)back;
7044 /* We found this one, we don't need to do a lookup */
7045 if (dback->found_ref)
7046 continue;
7048 key.objectid = dback->root;
7049 key.type = BTRFS_ROOT_ITEM_KEY;
7050 key.offset = (u64)-1;
7052 root = btrfs_read_fs_root(info, &key);
7054 /* No root, definitely a bad ref, skip */
7055 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7056 continue;
7057 /* Other err, exit */
7058 if (IS_ERR(root))
7059 return PTR_ERR(root);
7061 key.objectid = dback->owner;
7062 key.type = BTRFS_EXTENT_DATA_KEY;
7063 key.offset = dback->offset;
7064 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7065 if (ret) {
7066 btrfs_release_path(path);
7067 if (ret < 0)
7068 return ret;
7069 /* Didn't find it, we can carry on */
7070 ret = 0;
7071 continue;
7074 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7075 struct btrfs_file_extent_item);
7076 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7077 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7078 btrfs_release_path(path);
7079 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7080 if (cache) {
7081 struct extent_record *tmp;
7082 tmp = container_of(cache, struct extent_record, cache);
7085 * If we found an extent record for the bytenr for this
7086 * particular backref then we can't add it to our
7087 * current extent record. We only want to add backrefs
7088 * that don't have a corresponding extent item in the
7089 * extent tree since they likely belong to this record
7090 * and we need to fix it if it doesn't match bytenrs.
7092 if (tmp->found_rec)
7093 continue;
7096 dback->found_ref += 1;
7097 dback->disk_bytenr = bytenr;
7098 dback->bytes = bytes;
7101 * Set this so the verify backref code knows not to trust the
7102 * values in this backref.
7104 back->broken = 1;
7107 return 0;
7111 * Record orphan data ref into corresponding root.
7113 * Return 0 if the extent item contains data ref and recorded.
7114 * Return 1 if the extent item contains no useful data ref
7115 * On that case, it may contains only shared_dataref or metadata backref
7116 * or the file extent exists(this should be handled by the extent bytenr
7117 * recovery routine)
7118 * Return <0 if something goes wrong.
7120 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7121 struct extent_record *rec)
7123 struct btrfs_key key;
7124 struct btrfs_root *dest_root;
7125 struct extent_backref *back;
7126 struct data_backref *dback;
7127 struct orphan_data_extent *orphan;
7128 struct btrfs_path *path;
7129 int recorded_data_ref = 0;
7130 int ret = 0;
7132 if (rec->metadata)
7133 return 1;
7134 path = btrfs_alloc_path();
7135 if (!path)
7136 return -ENOMEM;
7137 list_for_each_entry(back, &rec->backrefs, list) {
7138 if (back->full_backref || !back->is_data ||
7139 !back->found_extent_tree)
7140 continue;
7141 dback = (struct data_backref *)back;
7142 if (dback->found_ref)
7143 continue;
7144 key.objectid = dback->root;
7145 key.type = BTRFS_ROOT_ITEM_KEY;
7146 key.offset = (u64)-1;
7148 dest_root = btrfs_read_fs_root(fs_info, &key);
7150 /* For non-exist root we just skip it */
7151 if (IS_ERR(dest_root) || !dest_root)
7152 continue;
7154 key.objectid = dback->owner;
7155 key.type = BTRFS_EXTENT_DATA_KEY;
7156 key.offset = dback->offset;
7158 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7160 * For ret < 0, it's OK since the fs-tree may be corrupted,
7161 * we need to record it for inode/file extent rebuild.
7162 * For ret > 0, we record it only for file extent rebuild.
7163 * For ret == 0, the file extent exists but only bytenr
7164 * mismatch, let the original bytenr fix routine to handle,
7165 * don't record it.
7167 if (ret == 0)
7168 continue;
7169 ret = 0;
7170 orphan = malloc(sizeof(*orphan));
7171 if (!orphan) {
7172 ret = -ENOMEM;
7173 goto out;
7175 INIT_LIST_HEAD(&orphan->list);
7176 orphan->root = dback->root;
7177 orphan->objectid = dback->owner;
7178 orphan->offset = dback->offset;
7179 orphan->disk_bytenr = rec->cache.start;
7180 orphan->disk_len = rec->cache.size;
7181 list_add(&dest_root->orphan_data_extents, &orphan->list);
7182 recorded_data_ref = 1;
7184 out:
7185 btrfs_free_path(path);
7186 if (!ret)
7187 return !recorded_data_ref;
7188 else
7189 return ret;
7193 * when an incorrect extent item is found, this will delete
7194 * all of the existing entries for it and recreate them
7195 * based on what the tree scan found.
7197 static int fixup_extent_refs(struct btrfs_fs_info *info,
7198 struct cache_tree *extent_cache,
7199 struct extent_record *rec)
7201 struct btrfs_trans_handle *trans = NULL;
7202 int ret;
7203 struct btrfs_path *path;
7204 struct list_head *cur = rec->backrefs.next;
7205 struct cache_extent *cache;
7206 struct extent_backref *back;
7207 int allocated = 0;
7208 u64 flags = 0;
7210 if (rec->flag_block_full_backref)
7211 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7213 path = btrfs_alloc_path();
7214 if (!path)
7215 return -ENOMEM;
7217 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7219 * Sometimes the backrefs themselves are so broken they don't
7220 * get attached to any meaningful rec, so first go back and
7221 * check any of our backrefs that we couldn't find and throw
7222 * them into the list if we find the backref so that
7223 * verify_backrefs can figure out what to do.
7225 ret = find_possible_backrefs(info, path, extent_cache, rec);
7226 if (ret < 0)
7227 goto out;
7230 /* step one, make sure all of the backrefs agree */
7231 ret = verify_backrefs(info, path, rec);
7232 if (ret < 0)
7233 goto out;
7235 trans = btrfs_start_transaction(info->extent_root, 1);
7236 if (IS_ERR(trans)) {
7237 ret = PTR_ERR(trans);
7238 goto out;
7241 /* step two, delete all the existing records */
7242 ret = delete_extent_records(trans, info->extent_root, path,
7243 rec->start, rec->max_size);
7245 if (ret < 0)
7246 goto out;
7248 /* was this block corrupt? If so, don't add references to it */
7249 cache = lookup_cache_extent(info->corrupt_blocks,
7250 rec->start, rec->max_size);
7251 if (cache) {
7252 ret = 0;
7253 goto out;
7256 /* step three, recreate all the refs we did find */
7257 while(cur != &rec->backrefs) {
7258 back = list_entry(cur, struct extent_backref, list);
7259 cur = cur->next;
7262 * if we didn't find any references, don't create a
7263 * new extent record
7265 if (!back->found_ref)
7266 continue;
7268 rec->bad_full_backref = 0;
7269 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7270 allocated = 1;
7272 if (ret)
7273 goto out;
7275 out:
7276 if (trans) {
7277 int err = btrfs_commit_transaction(trans, info->extent_root);
7278 if (!ret)
7279 ret = err;
7282 btrfs_free_path(path);
7283 return ret;
7286 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7287 struct extent_record *rec)
7289 struct btrfs_trans_handle *trans;
7290 struct btrfs_root *root = fs_info->extent_root;
7291 struct btrfs_path *path;
7292 struct btrfs_extent_item *ei;
7293 struct btrfs_key key;
7294 u64 flags;
7295 int ret = 0;
7297 key.objectid = rec->start;
7298 if (rec->metadata) {
7299 key.type = BTRFS_METADATA_ITEM_KEY;
7300 key.offset = rec->info_level;
7301 } else {
7302 key.type = BTRFS_EXTENT_ITEM_KEY;
7303 key.offset = rec->max_size;
7306 path = btrfs_alloc_path();
7307 if (!path)
7308 return -ENOMEM;
7310 trans = btrfs_start_transaction(root, 0);
7311 if (IS_ERR(trans)) {
7312 btrfs_free_path(path);
7313 return PTR_ERR(trans);
7316 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7317 if (ret < 0) {
7318 btrfs_free_path(path);
7319 btrfs_commit_transaction(trans, root);
7320 return ret;
7321 } else if (ret) {
7322 fprintf(stderr, "Didn't find extent for %llu\n",
7323 (unsigned long long)rec->start);
7324 btrfs_free_path(path);
7325 btrfs_commit_transaction(trans, root);
7326 return -ENOENT;
7329 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7330 struct btrfs_extent_item);
7331 flags = btrfs_extent_flags(path->nodes[0], ei);
7332 if (rec->flag_block_full_backref) {
7333 fprintf(stderr, "setting full backref on %llu\n",
7334 (unsigned long long)key.objectid);
7335 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7336 } else {
7337 fprintf(stderr, "clearing full backref on %llu\n",
7338 (unsigned long long)key.objectid);
7339 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7341 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7342 btrfs_mark_buffer_dirty(path->nodes[0]);
7343 btrfs_free_path(path);
7344 return btrfs_commit_transaction(trans, root);
7347 /* right now we only prune from the extent allocation tree */
7348 static int prune_one_block(struct btrfs_trans_handle *trans,
7349 struct btrfs_fs_info *info,
7350 struct btrfs_corrupt_block *corrupt)
7352 int ret;
7353 struct btrfs_path path;
7354 struct extent_buffer *eb;
7355 u64 found;
7356 int slot;
7357 int nritems;
7358 int level = corrupt->level + 1;
7360 btrfs_init_path(&path);
7361 again:
7362 /* we want to stop at the parent to our busted block */
7363 path.lowest_level = level;
7365 ret = btrfs_search_slot(trans, info->extent_root,
7366 &corrupt->key, &path, -1, 1);
7368 if (ret < 0)
7369 goto out;
7371 eb = path.nodes[level];
7372 if (!eb) {
7373 ret = -ENOENT;
7374 goto out;
7378 * hopefully the search gave us the block we want to prune,
7379 * lets try that first
7381 slot = path.slots[level];
7382 found = btrfs_node_blockptr(eb, slot);
7383 if (found == corrupt->cache.start)
7384 goto del_ptr;
7386 nritems = btrfs_header_nritems(eb);
7388 /* the search failed, lets scan this node and hope we find it */
7389 for (slot = 0; slot < nritems; slot++) {
7390 found = btrfs_node_blockptr(eb, slot);
7391 if (found == corrupt->cache.start)
7392 goto del_ptr;
7395 * we couldn't find the bad block. TODO, search all the nodes for pointers
7396 * to this block
7398 if (eb == info->extent_root->node) {
7399 ret = -ENOENT;
7400 goto out;
7401 } else {
7402 level++;
7403 btrfs_release_path(&path);
7404 goto again;
7407 del_ptr:
7408 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7409 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7411 out:
7412 btrfs_release_path(&path);
7413 return ret;
7416 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7418 struct btrfs_trans_handle *trans = NULL;
7419 struct cache_extent *cache;
7420 struct btrfs_corrupt_block *corrupt;
7422 while (1) {
7423 cache = search_cache_extent(info->corrupt_blocks, 0);
7424 if (!cache)
7425 break;
7426 if (!trans) {
7427 trans = btrfs_start_transaction(info->extent_root, 1);
7428 if (IS_ERR(trans))
7429 return PTR_ERR(trans);
7431 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7432 prune_one_block(trans, info, corrupt);
7433 remove_cache_extent(info->corrupt_blocks, cache);
7435 if (trans)
7436 return btrfs_commit_transaction(trans, info->extent_root);
7437 return 0;
7440 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7442 struct btrfs_block_group_cache *cache;
7443 u64 start, end;
7444 int ret;
7446 while (1) {
7447 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7448 &start, &end, EXTENT_DIRTY);
7449 if (ret)
7450 break;
7451 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7452 GFP_NOFS);
7455 start = 0;
7456 while (1) {
7457 cache = btrfs_lookup_first_block_group(fs_info, start);
7458 if (!cache)
7459 break;
7460 if (cache->cached)
7461 cache->cached = 0;
7462 start = cache->key.objectid + cache->key.offset;
7466 static int check_extent_refs(struct btrfs_root *root,
7467 struct cache_tree *extent_cache)
7469 struct extent_record *rec;
7470 struct cache_extent *cache;
7471 int err = 0;
7472 int ret = 0;
7473 int fixed = 0;
7474 int had_dups = 0;
7475 int recorded = 0;
7477 if (repair) {
7479 * if we're doing a repair, we have to make sure
7480 * we don't allocate from the problem extents.
7481 * In the worst case, this will be all the
7482 * extents in the FS
7484 cache = search_cache_extent(extent_cache, 0);
7485 while(cache) {
7486 rec = container_of(cache, struct extent_record, cache);
7487 set_extent_dirty(root->fs_info->excluded_extents,
7488 rec->start,
7489 rec->start + rec->max_size - 1,
7490 GFP_NOFS);
7491 cache = next_cache_extent(cache);
7494 /* pin down all the corrupted blocks too */
7495 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7496 while(cache) {
7497 set_extent_dirty(root->fs_info->excluded_extents,
7498 cache->start,
7499 cache->start + cache->size - 1,
7500 GFP_NOFS);
7501 cache = next_cache_extent(cache);
7503 prune_corrupt_blocks(root->fs_info);
7504 reset_cached_block_groups(root->fs_info);
7507 reset_cached_block_groups(root->fs_info);
7510 * We need to delete any duplicate entries we find first otherwise we
7511 * could mess up the extent tree when we have backrefs that actually
7512 * belong to a different extent item and not the weird duplicate one.
7514 while (repair && !list_empty(&duplicate_extents)) {
7515 rec = list_entry(duplicate_extents.next, struct extent_record,
7516 list);
7517 list_del_init(&rec->list);
7519 /* Sometimes we can find a backref before we find an actual
7520 * extent, so we need to process it a little bit to see if there
7521 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7522 * if this is a backref screwup. If we need to delete stuff
7523 * process_duplicates() will return 0, otherwise it will return
7524 * 1 and we
7526 if (process_duplicates(root, extent_cache, rec))
7527 continue;
7528 ret = delete_duplicate_records(root, rec);
7529 if (ret < 0)
7530 return ret;
7532 * delete_duplicate_records will return the number of entries
7533 * deleted, so if it's greater than 0 then we know we actually
7534 * did something and we need to remove.
7536 if (ret)
7537 had_dups = 1;
7540 if (had_dups)
7541 return -EAGAIN;
7543 while(1) {
7544 int cur_err = 0;
7546 fixed = 0;
7547 recorded = 0;
7548 cache = search_cache_extent(extent_cache, 0);
7549 if (!cache)
7550 break;
7551 rec = container_of(cache, struct extent_record, cache);
7552 if (rec->num_duplicates) {
7553 fprintf(stderr, "extent item %llu has multiple extent "
7554 "items\n", (unsigned long long)rec->start);
7555 err = 1;
7556 cur_err = 1;
7559 if (rec->refs != rec->extent_item_refs) {
7560 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7561 (unsigned long long)rec->start,
7562 (unsigned long long)rec->nr);
7563 fprintf(stderr, "extent item %llu, found %llu\n",
7564 (unsigned long long)rec->extent_item_refs,
7565 (unsigned long long)rec->refs);
7566 ret = record_orphan_data_extents(root->fs_info, rec);
7567 if (ret < 0)
7568 goto repair_abort;
7569 if (ret == 0) {
7570 recorded = 1;
7571 } else {
7573 * we can't use the extent to repair file
7574 * extent, let the fallback method handle it.
7576 if (!fixed && repair) {
7577 ret = fixup_extent_refs(
7578 root->fs_info,
7579 extent_cache, rec);
7580 if (ret)
7581 goto repair_abort;
7582 fixed = 1;
7585 err = 1;
7586 cur_err = 1;
7588 if (all_backpointers_checked(rec, 1)) {
7589 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7590 (unsigned long long)rec->start,
7591 (unsigned long long)rec->nr);
7593 if (!fixed && !recorded && repair) {
7594 ret = fixup_extent_refs(root->fs_info,
7595 extent_cache, rec);
7596 if (ret)
7597 goto repair_abort;
7598 fixed = 1;
7600 cur_err = 1;
7601 err = 1;
7603 if (!rec->owner_ref_checked) {
7604 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7605 (unsigned long long)rec->start,
7606 (unsigned long long)rec->nr);
7607 if (!fixed && !recorded && repair) {
7608 ret = fixup_extent_refs(root->fs_info,
7609 extent_cache, rec);
7610 if (ret)
7611 goto repair_abort;
7612 fixed = 1;
7614 err = 1;
7615 cur_err = 1;
7617 if (rec->bad_full_backref) {
7618 fprintf(stderr, "bad full backref, on [%llu]\n",
7619 (unsigned long long)rec->start);
7620 if (repair) {
7621 ret = fixup_extent_flags(root->fs_info, rec);
7622 if (ret)
7623 goto repair_abort;
7624 fixed = 1;
7626 err = 1;
7627 cur_err = 1;
7630 * Although it's not a extent ref's problem, we reuse this
7631 * routine for error reporting.
7632 * No repair function yet.
7634 if (rec->crossing_stripes) {
7635 fprintf(stderr,
7636 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7637 rec->start, rec->start + rec->max_size);
7638 err = 1;
7639 cur_err = 1;
7642 if (rec->wrong_chunk_type) {
7643 fprintf(stderr,
7644 "bad extent [%llu, %llu), type mismatch with chunk\n",
7645 rec->start, rec->start + rec->max_size);
7646 err = 1;
7647 cur_err = 1;
7650 remove_cache_extent(extent_cache, cache);
7651 free_all_extent_backrefs(rec);
7652 if (!init_extent_tree && repair && (!cur_err || fixed))
7653 clear_extent_dirty(root->fs_info->excluded_extents,
7654 rec->start,
7655 rec->start + rec->max_size - 1,
7656 GFP_NOFS);
7657 free(rec);
7659 repair_abort:
7660 if (repair) {
7661 if (ret && ret != -EAGAIN) {
7662 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7663 exit(1);
7664 } else if (!ret) {
7665 struct btrfs_trans_handle *trans;
7667 root = root->fs_info->extent_root;
7668 trans = btrfs_start_transaction(root, 1);
7669 if (IS_ERR(trans)) {
7670 ret = PTR_ERR(trans);
7671 goto repair_abort;
7674 btrfs_fix_block_accounting(trans, root);
7675 ret = btrfs_commit_transaction(trans, root);
7676 if (ret)
7677 goto repair_abort;
7679 if (err)
7680 fprintf(stderr, "repaired damaged extent references\n");
7681 return ret;
7683 return err;
7686 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7688 u64 stripe_size;
7690 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7691 stripe_size = length;
7692 stripe_size /= num_stripes;
7693 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7694 stripe_size = length * 2;
7695 stripe_size /= num_stripes;
7696 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7697 stripe_size = length;
7698 stripe_size /= (num_stripes - 1);
7699 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7700 stripe_size = length;
7701 stripe_size /= (num_stripes - 2);
7702 } else {
7703 stripe_size = length;
7705 return stripe_size;
7709 * Check the chunk with its block group/dev list ref:
7710 * Return 0 if all refs seems valid.
7711 * Return 1 if part of refs seems valid, need later check for rebuild ref
7712 * like missing block group and needs to search extent tree to rebuild them.
7713 * Return -1 if essential refs are missing and unable to rebuild.
7715 static int check_chunk_refs(struct chunk_record *chunk_rec,
7716 struct block_group_tree *block_group_cache,
7717 struct device_extent_tree *dev_extent_cache,
7718 int silent)
7720 struct cache_extent *block_group_item;
7721 struct block_group_record *block_group_rec;
7722 struct cache_extent *dev_extent_item;
7723 struct device_extent_record *dev_extent_rec;
7724 u64 devid;
7725 u64 offset;
7726 u64 length;
7727 int metadump_v2 = 0;
7728 int i;
7729 int ret = 0;
7731 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7732 chunk_rec->offset,
7733 chunk_rec->length);
7734 if (block_group_item) {
7735 block_group_rec = container_of(block_group_item,
7736 struct block_group_record,
7737 cache);
7738 if (chunk_rec->length != block_group_rec->offset ||
7739 chunk_rec->offset != block_group_rec->objectid ||
7740 (!metadump_v2 &&
7741 chunk_rec->type_flags != block_group_rec->flags)) {
7742 if (!silent)
7743 fprintf(stderr,
7744 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7745 chunk_rec->objectid,
7746 chunk_rec->type,
7747 chunk_rec->offset,
7748 chunk_rec->length,
7749 chunk_rec->offset,
7750 chunk_rec->type_flags,
7751 block_group_rec->objectid,
7752 block_group_rec->type,
7753 block_group_rec->offset,
7754 block_group_rec->offset,
7755 block_group_rec->objectid,
7756 block_group_rec->flags);
7757 ret = -1;
7758 } else {
7759 list_del_init(&block_group_rec->list);
7760 chunk_rec->bg_rec = block_group_rec;
7762 } else {
7763 if (!silent)
7764 fprintf(stderr,
7765 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7766 chunk_rec->objectid,
7767 chunk_rec->type,
7768 chunk_rec->offset,
7769 chunk_rec->length,
7770 chunk_rec->offset,
7771 chunk_rec->type_flags);
7772 ret = 1;
7775 if (metadump_v2)
7776 return ret;
7778 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7779 chunk_rec->num_stripes);
7780 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7781 devid = chunk_rec->stripes[i].devid;
7782 offset = chunk_rec->stripes[i].offset;
7783 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7784 devid, offset, length);
7785 if (dev_extent_item) {
7786 dev_extent_rec = container_of(dev_extent_item,
7787 struct device_extent_record,
7788 cache);
7789 if (dev_extent_rec->objectid != devid ||
7790 dev_extent_rec->offset != offset ||
7791 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7792 dev_extent_rec->length != length) {
7793 if (!silent)
7794 fprintf(stderr,
7795 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7796 chunk_rec->objectid,
7797 chunk_rec->type,
7798 chunk_rec->offset,
7799 chunk_rec->stripes[i].devid,
7800 chunk_rec->stripes[i].offset,
7801 dev_extent_rec->objectid,
7802 dev_extent_rec->offset,
7803 dev_extent_rec->length);
7804 ret = -1;
7805 } else {
7806 list_move(&dev_extent_rec->chunk_list,
7807 &chunk_rec->dextents);
7809 } else {
7810 if (!silent)
7811 fprintf(stderr,
7812 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7813 chunk_rec->objectid,
7814 chunk_rec->type,
7815 chunk_rec->offset,
7816 chunk_rec->stripes[i].devid,
7817 chunk_rec->stripes[i].offset);
7818 ret = -1;
7821 return ret;
7824 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7825 int check_chunks(struct cache_tree *chunk_cache,
7826 struct block_group_tree *block_group_cache,
7827 struct device_extent_tree *dev_extent_cache,
7828 struct list_head *good, struct list_head *bad,
7829 struct list_head *rebuild, int silent)
7831 struct cache_extent *chunk_item;
7832 struct chunk_record *chunk_rec;
7833 struct block_group_record *bg_rec;
7834 struct device_extent_record *dext_rec;
7835 int err;
7836 int ret = 0;
7838 chunk_item = first_cache_extent(chunk_cache);
7839 while (chunk_item) {
7840 chunk_rec = container_of(chunk_item, struct chunk_record,
7841 cache);
7842 err = check_chunk_refs(chunk_rec, block_group_cache,
7843 dev_extent_cache, silent);
7844 if (err < 0)
7845 ret = err;
7846 if (err == 0 && good)
7847 list_add_tail(&chunk_rec->list, good);
7848 if (err > 0 && rebuild)
7849 list_add_tail(&chunk_rec->list, rebuild);
7850 if (err < 0 && bad)
7851 list_add_tail(&chunk_rec->list, bad);
7852 chunk_item = next_cache_extent(chunk_item);
7855 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7856 if (!silent)
7857 fprintf(stderr,
7858 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7859 bg_rec->objectid,
7860 bg_rec->offset,
7861 bg_rec->flags);
7862 if (!ret)
7863 ret = 1;
7866 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7867 chunk_list) {
7868 if (!silent)
7869 fprintf(stderr,
7870 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7871 dext_rec->objectid,
7872 dext_rec->offset,
7873 dext_rec->length);
7874 if (!ret)
7875 ret = 1;
7877 return ret;
7881 static int check_device_used(struct device_record *dev_rec,
7882 struct device_extent_tree *dext_cache)
7884 struct cache_extent *cache;
7885 struct device_extent_record *dev_extent_rec;
7886 u64 total_byte = 0;
7888 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7889 while (cache) {
7890 dev_extent_rec = container_of(cache,
7891 struct device_extent_record,
7892 cache);
7893 if (dev_extent_rec->objectid != dev_rec->devid)
7894 break;
7896 list_del_init(&dev_extent_rec->device_list);
7897 total_byte += dev_extent_rec->length;
7898 cache = next_cache_extent(cache);
7901 if (total_byte != dev_rec->byte_used) {
7902 fprintf(stderr,
7903 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7904 total_byte, dev_rec->byte_used, dev_rec->objectid,
7905 dev_rec->type, dev_rec->offset);
7906 return -1;
7907 } else {
7908 return 0;
7912 /* check btrfs_dev_item -> btrfs_dev_extent */
7913 static int check_devices(struct rb_root *dev_cache,
7914 struct device_extent_tree *dev_extent_cache)
7916 struct rb_node *dev_node;
7917 struct device_record *dev_rec;
7918 struct device_extent_record *dext_rec;
7919 int err;
7920 int ret = 0;
7922 dev_node = rb_first(dev_cache);
7923 while (dev_node) {
7924 dev_rec = container_of(dev_node, struct device_record, node);
7925 err = check_device_used(dev_rec, dev_extent_cache);
7926 if (err)
7927 ret = err;
7929 dev_node = rb_next(dev_node);
7931 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7932 device_list) {
7933 fprintf(stderr,
7934 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7935 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7936 if (!ret)
7937 ret = 1;
7939 return ret;
7942 static int add_root_item_to_list(struct list_head *head,
7943 u64 objectid, u64 bytenr, u64 last_snapshot,
7944 u8 level, u8 drop_level,
7945 int level_size, struct btrfs_key *drop_key)
7948 struct root_item_record *ri_rec;
7949 ri_rec = malloc(sizeof(*ri_rec));
7950 if (!ri_rec)
7951 return -ENOMEM;
7952 ri_rec->bytenr = bytenr;
7953 ri_rec->objectid = objectid;
7954 ri_rec->level = level;
7955 ri_rec->level_size = level_size;
7956 ri_rec->drop_level = drop_level;
7957 ri_rec->last_snapshot = last_snapshot;
7958 if (drop_key)
7959 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7960 list_add_tail(&ri_rec->list, head);
7962 return 0;
7965 static void free_root_item_list(struct list_head *list)
7967 struct root_item_record *ri_rec;
7969 while (!list_empty(list)) {
7970 ri_rec = list_first_entry(list, struct root_item_record,
7971 list);
7972 list_del_init(&ri_rec->list);
7973 free(ri_rec);
7977 static int deal_root_from_list(struct list_head *list,
7978 struct btrfs_root *root,
7979 struct block_info *bits,
7980 int bits_nr,
7981 struct cache_tree *pending,
7982 struct cache_tree *seen,
7983 struct cache_tree *reada,
7984 struct cache_tree *nodes,
7985 struct cache_tree *extent_cache,
7986 struct cache_tree *chunk_cache,
7987 struct rb_root *dev_cache,
7988 struct block_group_tree *block_group_cache,
7989 struct device_extent_tree *dev_extent_cache)
7991 int ret = 0;
7992 u64 last;
7994 while (!list_empty(list)) {
7995 struct root_item_record *rec;
7996 struct extent_buffer *buf;
7997 rec = list_entry(list->next,
7998 struct root_item_record, list);
7999 last = 0;
8000 buf = read_tree_block(root->fs_info->tree_root,
8001 rec->bytenr, rec->level_size, 0);
8002 if (!extent_buffer_uptodate(buf)) {
8003 free_extent_buffer(buf);
8004 ret = -EIO;
8005 break;
8007 add_root_to_pending(buf, extent_cache, pending,
8008 seen, nodes, rec->objectid);
8010 * To rebuild extent tree, we need deal with snapshot
8011 * one by one, otherwise we deal with node firstly which
8012 * can maximize readahead.
8014 while (1) {
8015 ret = run_next_block(root, bits, bits_nr, &last,
8016 pending, seen, reada, nodes,
8017 extent_cache, chunk_cache,
8018 dev_cache, block_group_cache,
8019 dev_extent_cache, rec);
8020 if (ret != 0)
8021 break;
8023 free_extent_buffer(buf);
8024 list_del(&rec->list);
8025 free(rec);
8026 if (ret < 0)
8027 break;
8029 while (ret >= 0) {
8030 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8031 reada, nodes, extent_cache, chunk_cache,
8032 dev_cache, block_group_cache,
8033 dev_extent_cache, NULL);
8034 if (ret != 0) {
8035 if (ret > 0)
8036 ret = 0;
8037 break;
8040 return ret;
8043 static int check_chunks_and_extents(struct btrfs_root *root)
8045 struct rb_root dev_cache;
8046 struct cache_tree chunk_cache;
8047 struct block_group_tree block_group_cache;
8048 struct device_extent_tree dev_extent_cache;
8049 struct cache_tree extent_cache;
8050 struct cache_tree seen;
8051 struct cache_tree pending;
8052 struct cache_tree reada;
8053 struct cache_tree nodes;
8054 struct extent_io_tree excluded_extents;
8055 struct cache_tree corrupt_blocks;
8056 struct btrfs_path path;
8057 struct btrfs_key key;
8058 struct btrfs_key found_key;
8059 int ret, err = 0;
8060 struct block_info *bits;
8061 int bits_nr;
8062 struct extent_buffer *leaf;
8063 int slot;
8064 struct btrfs_root_item ri;
8065 struct list_head dropping_trees;
8066 struct list_head normal_trees;
8067 struct btrfs_root *root1;
8068 u64 objectid;
8069 u32 level_size;
8070 u8 level;
8072 dev_cache = RB_ROOT;
8073 cache_tree_init(&chunk_cache);
8074 block_group_tree_init(&block_group_cache);
8075 device_extent_tree_init(&dev_extent_cache);
8077 cache_tree_init(&extent_cache);
8078 cache_tree_init(&seen);
8079 cache_tree_init(&pending);
8080 cache_tree_init(&nodes);
8081 cache_tree_init(&reada);
8082 cache_tree_init(&corrupt_blocks);
8083 extent_io_tree_init(&excluded_extents);
8084 INIT_LIST_HEAD(&dropping_trees);
8085 INIT_LIST_HEAD(&normal_trees);
8087 if (repair) {
8088 root->fs_info->excluded_extents = &excluded_extents;
8089 root->fs_info->fsck_extent_cache = &extent_cache;
8090 root->fs_info->free_extent_hook = free_extent_hook;
8091 root->fs_info->corrupt_blocks = &corrupt_blocks;
8094 bits_nr = 1024;
8095 bits = malloc(bits_nr * sizeof(struct block_info));
8096 if (!bits) {
8097 perror("malloc");
8098 exit(1);
8101 if (ctx.progress_enabled) {
8102 ctx.tp = TASK_EXTENTS;
8103 task_start(ctx.info);
8106 again:
8107 root1 = root->fs_info->tree_root;
8108 level = btrfs_header_level(root1->node);
8109 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8110 root1->node->start, 0, level, 0,
8111 btrfs_level_size(root1, level), NULL);
8112 if (ret < 0)
8113 goto out;
8114 root1 = root->fs_info->chunk_root;
8115 level = btrfs_header_level(root1->node);
8116 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8117 root1->node->start, 0, level, 0,
8118 btrfs_level_size(root1, level), NULL);
8119 if (ret < 0)
8120 goto out;
8121 btrfs_init_path(&path);
8122 key.offset = 0;
8123 key.objectid = 0;
8124 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8125 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8126 &key, &path, 0, 0);
8127 if (ret < 0)
8128 goto out;
8129 while(1) {
8130 leaf = path.nodes[0];
8131 slot = path.slots[0];
8132 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8133 ret = btrfs_next_leaf(root, &path);
8134 if (ret != 0)
8135 break;
8136 leaf = path.nodes[0];
8137 slot = path.slots[0];
8139 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8140 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8141 unsigned long offset;
8142 u64 last_snapshot;
8144 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8145 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8146 last_snapshot = btrfs_root_last_snapshot(&ri);
8147 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8148 level = btrfs_root_level(&ri);
8149 level_size = btrfs_level_size(root, level);
8150 ret = add_root_item_to_list(&normal_trees,
8151 found_key.objectid,
8152 btrfs_root_bytenr(&ri),
8153 last_snapshot, level,
8154 0, level_size, NULL);
8155 if (ret < 0)
8156 goto out;
8157 } else {
8158 level = btrfs_root_level(&ri);
8159 level_size = btrfs_level_size(root, level);
8160 objectid = found_key.objectid;
8161 btrfs_disk_key_to_cpu(&found_key,
8162 &ri.drop_progress);
8163 ret = add_root_item_to_list(&dropping_trees,
8164 objectid,
8165 btrfs_root_bytenr(&ri),
8166 last_snapshot, level,
8167 ri.drop_level,
8168 level_size, &found_key);
8169 if (ret < 0)
8170 goto out;
8173 path.slots[0]++;
8175 btrfs_release_path(&path);
8178 * check_block can return -EAGAIN if it fixes something, please keep
8179 * this in mind when dealing with return values from these functions, if
8180 * we get -EAGAIN we want to fall through and restart the loop.
8182 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8183 &seen, &reada, &nodes, &extent_cache,
8184 &chunk_cache, &dev_cache, &block_group_cache,
8185 &dev_extent_cache);
8186 if (ret < 0) {
8187 if (ret == -EAGAIN)
8188 goto loop;
8189 goto out;
8191 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8192 &pending, &seen, &reada, &nodes,
8193 &extent_cache, &chunk_cache, &dev_cache,
8194 &block_group_cache, &dev_extent_cache);
8195 if (ret < 0) {
8196 if (ret == -EAGAIN)
8197 goto loop;
8198 goto out;
8201 ret = check_chunks(&chunk_cache, &block_group_cache,
8202 &dev_extent_cache, NULL, NULL, NULL, 0);
8203 if (ret) {
8204 if (ret == -EAGAIN)
8205 goto loop;
8206 err = ret;
8209 ret = check_extent_refs(root, &extent_cache);
8210 if (ret < 0) {
8211 if (ret == -EAGAIN)
8212 goto loop;
8213 goto out;
8216 ret = check_devices(&dev_cache, &dev_extent_cache);
8217 if (ret && err)
8218 ret = err;
8220 out:
8221 task_stop(ctx.info);
8222 if (repair) {
8223 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8224 extent_io_tree_cleanup(&excluded_extents);
8225 root->fs_info->fsck_extent_cache = NULL;
8226 root->fs_info->free_extent_hook = NULL;
8227 root->fs_info->corrupt_blocks = NULL;
8228 root->fs_info->excluded_extents = NULL;
8230 free(bits);
8231 free_chunk_cache_tree(&chunk_cache);
8232 free_device_cache_tree(&dev_cache);
8233 free_block_group_tree(&block_group_cache);
8234 free_device_extent_tree(&dev_extent_cache);
8235 free_extent_cache_tree(&seen);
8236 free_extent_cache_tree(&pending);
8237 free_extent_cache_tree(&reada);
8238 free_extent_cache_tree(&nodes);
8239 return ret;
8240 loop:
8241 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8242 free_extent_cache_tree(&seen);
8243 free_extent_cache_tree(&pending);
8244 free_extent_cache_tree(&reada);
8245 free_extent_cache_tree(&nodes);
8246 free_chunk_cache_tree(&chunk_cache);
8247 free_block_group_tree(&block_group_cache);
8248 free_device_cache_tree(&dev_cache);
8249 free_device_extent_tree(&dev_extent_cache);
8250 free_extent_record_cache(root->fs_info, &extent_cache);
8251 free_root_item_list(&normal_trees);
8252 free_root_item_list(&dropping_trees);
8253 extent_io_tree_cleanup(&excluded_extents);
8254 goto again;
8257 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8258 struct btrfs_root *root, int overwrite)
8260 struct extent_buffer *c;
8261 struct extent_buffer *old = root->node;
8262 int level;
8263 int ret;
8264 struct btrfs_disk_key disk_key = {0,0,0};
8266 level = 0;
8268 if (overwrite) {
8269 c = old;
8270 extent_buffer_get(c);
8271 goto init;
8273 c = btrfs_alloc_free_block(trans, root,
8274 btrfs_level_size(root, 0),
8275 root->root_key.objectid,
8276 &disk_key, level, 0, 0);
8277 if (IS_ERR(c)) {
8278 c = old;
8279 extent_buffer_get(c);
8280 overwrite = 1;
8282 init:
8283 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8284 btrfs_set_header_level(c, level);
8285 btrfs_set_header_bytenr(c, c->start);
8286 btrfs_set_header_generation(c, trans->transid);
8287 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8288 btrfs_set_header_owner(c, root->root_key.objectid);
8290 write_extent_buffer(c, root->fs_info->fsid,
8291 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8293 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8294 btrfs_header_chunk_tree_uuid(c),
8295 BTRFS_UUID_SIZE);
8297 btrfs_mark_buffer_dirty(c);
8299 * this case can happen in the following case:
8301 * 1.overwrite previous root.
8303 * 2.reinit reloc data root, this is because we skip pin
8304 * down reloc data tree before which means we can allocate
8305 * same block bytenr here.
8307 if (old->start == c->start) {
8308 btrfs_set_root_generation(&root->root_item,
8309 trans->transid);
8310 root->root_item.level = btrfs_header_level(root->node);
8311 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8312 &root->root_key, &root->root_item);
8313 if (ret) {
8314 free_extent_buffer(c);
8315 return ret;
8318 free_extent_buffer(old);
8319 root->node = c;
8320 add_root_to_dirty_list(root);
8321 return 0;
8324 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8325 struct extent_buffer *eb, int tree_root)
8327 struct extent_buffer *tmp;
8328 struct btrfs_root_item *ri;
8329 struct btrfs_key key;
8330 u64 bytenr;
8331 u32 leafsize;
8332 int level = btrfs_header_level(eb);
8333 int nritems;
8334 int ret;
8335 int i;
8338 * If we have pinned this block before, don't pin it again.
8339 * This can not only avoid forever loop with broken filesystem
8340 * but also give us some speedups.
8342 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8343 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8344 return 0;
8346 btrfs_pin_extent(fs_info, eb->start, eb->len);
8348 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8349 nritems = btrfs_header_nritems(eb);
8350 for (i = 0; i < nritems; i++) {
8351 if (level == 0) {
8352 btrfs_item_key_to_cpu(eb, &key, i);
8353 if (key.type != BTRFS_ROOT_ITEM_KEY)
8354 continue;
8355 /* Skip the extent root and reloc roots */
8356 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8357 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8358 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8359 continue;
8360 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8361 bytenr = btrfs_disk_root_bytenr(eb, ri);
8364 * If at any point we start needing the real root we
8365 * will have to build a stump root for the root we are
8366 * in, but for now this doesn't actually use the root so
8367 * just pass in extent_root.
8369 tmp = read_tree_block(fs_info->extent_root, bytenr,
8370 leafsize, 0);
8371 if (!extent_buffer_uptodate(tmp)) {
8372 fprintf(stderr, "Error reading root block\n");
8373 return -EIO;
8375 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8376 free_extent_buffer(tmp);
8377 if (ret)
8378 return ret;
8379 } else {
8380 bytenr = btrfs_node_blockptr(eb, i);
8382 /* If we aren't the tree root don't read the block */
8383 if (level == 1 && !tree_root) {
8384 btrfs_pin_extent(fs_info, bytenr, leafsize);
8385 continue;
8388 tmp = read_tree_block(fs_info->extent_root, bytenr,
8389 leafsize, 0);
8390 if (!extent_buffer_uptodate(tmp)) {
8391 fprintf(stderr, "Error reading tree block\n");
8392 return -EIO;
8394 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8395 free_extent_buffer(tmp);
8396 if (ret)
8397 return ret;
8401 return 0;
8404 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8406 int ret;
8408 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8409 if (ret)
8410 return ret;
8412 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8415 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8417 struct btrfs_block_group_cache *cache;
8418 struct btrfs_path *path;
8419 struct extent_buffer *leaf;
8420 struct btrfs_chunk *chunk;
8421 struct btrfs_key key;
8422 int ret;
8423 u64 start;
8425 path = btrfs_alloc_path();
8426 if (!path)
8427 return -ENOMEM;
8429 key.objectid = 0;
8430 key.type = BTRFS_CHUNK_ITEM_KEY;
8431 key.offset = 0;
8433 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8434 if (ret < 0) {
8435 btrfs_free_path(path);
8436 return ret;
8440 * We do this in case the block groups were screwed up and had alloc
8441 * bits that aren't actually set on the chunks. This happens with
8442 * restored images every time and could happen in real life I guess.
8444 fs_info->avail_data_alloc_bits = 0;
8445 fs_info->avail_metadata_alloc_bits = 0;
8446 fs_info->avail_system_alloc_bits = 0;
8448 /* First we need to create the in-memory block groups */
8449 while (1) {
8450 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8451 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8452 if (ret < 0) {
8453 btrfs_free_path(path);
8454 return ret;
8456 if (ret) {
8457 ret = 0;
8458 break;
8461 leaf = path->nodes[0];
8462 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8463 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8464 path->slots[0]++;
8465 continue;
8468 chunk = btrfs_item_ptr(leaf, path->slots[0],
8469 struct btrfs_chunk);
8470 btrfs_add_block_group(fs_info, 0,
8471 btrfs_chunk_type(leaf, chunk),
8472 key.objectid, key.offset,
8473 btrfs_chunk_length(leaf, chunk));
8474 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8475 key.offset + btrfs_chunk_length(leaf, chunk),
8476 GFP_NOFS);
8477 path->slots[0]++;
8479 start = 0;
8480 while (1) {
8481 cache = btrfs_lookup_first_block_group(fs_info, start);
8482 if (!cache)
8483 break;
8484 cache->cached = 1;
8485 start = cache->key.objectid + cache->key.offset;
8488 btrfs_free_path(path);
8489 return 0;
8492 static int reset_balance(struct btrfs_trans_handle *trans,
8493 struct btrfs_fs_info *fs_info)
8495 struct btrfs_root *root = fs_info->tree_root;
8496 struct btrfs_path *path;
8497 struct extent_buffer *leaf;
8498 struct btrfs_key key;
8499 int del_slot, del_nr = 0;
8500 int ret;
8501 int found = 0;
8503 path = btrfs_alloc_path();
8504 if (!path)
8505 return -ENOMEM;
8507 key.objectid = BTRFS_BALANCE_OBJECTID;
8508 key.type = BTRFS_BALANCE_ITEM_KEY;
8509 key.offset = 0;
8511 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8512 if (ret) {
8513 if (ret > 0)
8514 ret = 0;
8515 if (!ret)
8516 goto reinit_data_reloc;
8517 else
8518 goto out;
8521 ret = btrfs_del_item(trans, root, path);
8522 if (ret)
8523 goto out;
8524 btrfs_release_path(path);
8526 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8527 key.type = BTRFS_ROOT_ITEM_KEY;
8528 key.offset = 0;
8530 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8531 if (ret < 0)
8532 goto out;
8533 while (1) {
8534 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8535 if (!found)
8536 break;
8538 if (del_nr) {
8539 ret = btrfs_del_items(trans, root, path,
8540 del_slot, del_nr);
8541 del_nr = 0;
8542 if (ret)
8543 goto out;
8545 key.offset++;
8546 btrfs_release_path(path);
8548 found = 0;
8549 ret = btrfs_search_slot(trans, root, &key, path,
8550 -1, 1);
8551 if (ret < 0)
8552 goto out;
8553 continue;
8555 found = 1;
8556 leaf = path->nodes[0];
8557 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8558 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8559 break;
8560 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8561 path->slots[0]++;
8562 continue;
8564 if (!del_nr) {
8565 del_slot = path->slots[0];
8566 del_nr = 1;
8567 } else {
8568 del_nr++;
8570 path->slots[0]++;
8573 if (del_nr) {
8574 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8575 if (ret)
8576 goto out;
8578 btrfs_release_path(path);
8580 reinit_data_reloc:
8581 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8582 key.type = BTRFS_ROOT_ITEM_KEY;
8583 key.offset = (u64)-1;
8584 root = btrfs_read_fs_root(fs_info, &key);
8585 if (IS_ERR(root)) {
8586 fprintf(stderr, "Error reading data reloc tree\n");
8587 ret = PTR_ERR(root);
8588 goto out;
8590 record_root_in_trans(trans, root);
8591 ret = btrfs_fsck_reinit_root(trans, root, 0);
8592 if (ret)
8593 goto out;
8594 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8595 out:
8596 btrfs_free_path(path);
8597 return ret;
8600 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8601 struct btrfs_fs_info *fs_info)
8603 u64 start = 0;
8604 int ret;
8607 * The only reason we don't do this is because right now we're just
8608 * walking the trees we find and pinning down their bytes, we don't look
8609 * at any of the leaves. In order to do mixed groups we'd have to check
8610 * the leaves of any fs roots and pin down the bytes for any file
8611 * extents we find. Not hard but why do it if we don't have to?
8613 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8614 fprintf(stderr, "We don't support re-initing the extent tree "
8615 "for mixed block groups yet, please notify a btrfs "
8616 "developer you want to do this so they can add this "
8617 "functionality.\n");
8618 return -EINVAL;
8622 * first we need to walk all of the trees except the extent tree and pin
8623 * down the bytes that are in use so we don't overwrite any existing
8624 * metadata.
8626 ret = pin_metadata_blocks(fs_info);
8627 if (ret) {
8628 fprintf(stderr, "error pinning down used bytes\n");
8629 return ret;
8633 * Need to drop all the block groups since we're going to recreate all
8634 * of them again.
8636 btrfs_free_block_groups(fs_info);
8637 ret = reset_block_groups(fs_info);
8638 if (ret) {
8639 fprintf(stderr, "error resetting the block groups\n");
8640 return ret;
8643 /* Ok we can allocate now, reinit the extent root */
8644 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8645 if (ret) {
8646 fprintf(stderr, "extent root initialization failed\n");
8648 * When the transaction code is updated we should end the
8649 * transaction, but for now progs only knows about commit so
8650 * just return an error.
8652 return ret;
8656 * Now we have all the in-memory block groups setup so we can make
8657 * allocations properly, and the metadata we care about is safe since we
8658 * pinned all of it above.
8660 while (1) {
8661 struct btrfs_block_group_cache *cache;
8663 cache = btrfs_lookup_first_block_group(fs_info, start);
8664 if (!cache)
8665 break;
8666 start = cache->key.objectid + cache->key.offset;
8667 ret = btrfs_insert_item(trans, fs_info->extent_root,
8668 &cache->key, &cache->item,
8669 sizeof(cache->item));
8670 if (ret) {
8671 fprintf(stderr, "Error adding block group\n");
8672 return ret;
8674 btrfs_extent_post_op(trans, fs_info->extent_root);
8677 ret = reset_balance(trans, fs_info);
8678 if (ret)
8679 fprintf(stderr, "error reseting the pending balance\n");
8681 return ret;
8684 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8686 struct btrfs_path *path;
8687 struct btrfs_trans_handle *trans;
8688 struct btrfs_key key;
8689 int ret;
8691 printf("Recowing metadata block %llu\n", eb->start);
8692 key.objectid = btrfs_header_owner(eb);
8693 key.type = BTRFS_ROOT_ITEM_KEY;
8694 key.offset = (u64)-1;
8696 root = btrfs_read_fs_root(root->fs_info, &key);
8697 if (IS_ERR(root)) {
8698 fprintf(stderr, "Couldn't find owner root %llu\n",
8699 key.objectid);
8700 return PTR_ERR(root);
8703 path = btrfs_alloc_path();
8704 if (!path)
8705 return -ENOMEM;
8707 trans = btrfs_start_transaction(root, 1);
8708 if (IS_ERR(trans)) {
8709 btrfs_free_path(path);
8710 return PTR_ERR(trans);
8713 path->lowest_level = btrfs_header_level(eb);
8714 if (path->lowest_level)
8715 btrfs_node_key_to_cpu(eb, &key, 0);
8716 else
8717 btrfs_item_key_to_cpu(eb, &key, 0);
8719 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8720 btrfs_commit_transaction(trans, root);
8721 btrfs_free_path(path);
8722 return ret;
8725 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8727 struct btrfs_path *path;
8728 struct btrfs_trans_handle *trans;
8729 struct btrfs_key key;
8730 int ret;
8732 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8733 bad->key.type, bad->key.offset);
8734 key.objectid = bad->root_id;
8735 key.type = BTRFS_ROOT_ITEM_KEY;
8736 key.offset = (u64)-1;
8738 root = btrfs_read_fs_root(root->fs_info, &key);
8739 if (IS_ERR(root)) {
8740 fprintf(stderr, "Couldn't find owner root %llu\n",
8741 key.objectid);
8742 return PTR_ERR(root);
8745 path = btrfs_alloc_path();
8746 if (!path)
8747 return -ENOMEM;
8749 trans = btrfs_start_transaction(root, 1);
8750 if (IS_ERR(trans)) {
8751 btrfs_free_path(path);
8752 return PTR_ERR(trans);
8755 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8756 if (ret) {
8757 if (ret > 0)
8758 ret = 0;
8759 goto out;
8761 ret = btrfs_del_item(trans, root, path);
8762 out:
8763 btrfs_commit_transaction(trans, root);
8764 btrfs_free_path(path);
8765 return ret;
8768 static int zero_log_tree(struct btrfs_root *root)
8770 struct btrfs_trans_handle *trans;
8771 int ret;
8773 trans = btrfs_start_transaction(root, 1);
8774 if (IS_ERR(trans)) {
8775 ret = PTR_ERR(trans);
8776 return ret;
8778 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8779 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8780 ret = btrfs_commit_transaction(trans, root);
8781 return ret;
8784 static int populate_csum(struct btrfs_trans_handle *trans,
8785 struct btrfs_root *csum_root, char *buf, u64 start,
8786 u64 len)
8788 u64 offset = 0;
8789 u64 sectorsize;
8790 int ret = 0;
8792 while (offset < len) {
8793 sectorsize = csum_root->sectorsize;
8794 ret = read_extent_data(csum_root, buf, start + offset,
8795 &sectorsize, 0);
8796 if (ret)
8797 break;
8798 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8799 start + offset, buf, sectorsize);
8800 if (ret)
8801 break;
8802 offset += sectorsize;
8804 return ret;
8807 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8808 struct btrfs_root *csum_root,
8809 struct btrfs_root *cur_root)
8811 struct btrfs_path *path;
8812 struct btrfs_key key;
8813 struct extent_buffer *node;
8814 struct btrfs_file_extent_item *fi;
8815 char *buf = NULL;
8816 u64 start = 0;
8817 u64 len = 0;
8818 int slot = 0;
8819 int ret = 0;
8821 path = btrfs_alloc_path();
8822 if (!path)
8823 return -ENOMEM;
8824 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8825 if (!buf) {
8826 ret = -ENOMEM;
8827 goto out;
8830 key.objectid = 0;
8831 key.offset = 0;
8832 key.type = 0;
8834 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8835 if (ret < 0)
8836 goto out;
8837 /* Iterate all regular file extents and fill its csum */
8838 while (1) {
8839 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8841 if (key.type != BTRFS_EXTENT_DATA_KEY)
8842 goto next;
8843 node = path->nodes[0];
8844 slot = path->slots[0];
8845 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8846 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8847 goto next;
8848 start = btrfs_file_extent_disk_bytenr(node, fi);
8849 len = btrfs_file_extent_disk_num_bytes(node, fi);
8851 ret = populate_csum(trans, csum_root, buf, start, len);
8852 if (ret == -EEXIST)
8853 ret = 0;
8854 if (ret < 0)
8855 goto out;
8856 next:
8858 * TODO: if next leaf is corrupted, jump to nearest next valid
8859 * leaf.
8861 ret = btrfs_next_item(cur_root, path);
8862 if (ret < 0)
8863 goto out;
8864 if (ret > 0) {
8865 ret = 0;
8866 goto out;
8870 out:
8871 btrfs_free_path(path);
8872 free(buf);
8873 return ret;
8876 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8877 struct btrfs_root *csum_root)
8879 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8880 struct btrfs_path *path;
8881 struct btrfs_root *tree_root = fs_info->tree_root;
8882 struct btrfs_root *cur_root;
8883 struct extent_buffer *node;
8884 struct btrfs_key key;
8885 int slot = 0;
8886 int ret = 0;
8888 path = btrfs_alloc_path();
8889 if (!path)
8890 return -ENOMEM;
8892 key.objectid = BTRFS_FS_TREE_OBJECTID;
8893 key.offset = 0;
8894 key.type = BTRFS_ROOT_ITEM_KEY;
8896 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8897 if (ret < 0)
8898 goto out;
8899 if (ret > 0) {
8900 ret = -ENOENT;
8901 goto out;
8904 while (1) {
8905 node = path->nodes[0];
8906 slot = path->slots[0];
8907 btrfs_item_key_to_cpu(node, &key, slot);
8908 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8909 goto out;
8910 if (key.type != BTRFS_ROOT_ITEM_KEY)
8911 goto next;
8912 if (!is_fstree(key.objectid))
8913 goto next;
8914 key.offset = (u64)-1;
8916 cur_root = btrfs_read_fs_root(fs_info, &key);
8917 if (IS_ERR(cur_root) || !cur_root) {
8918 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8919 key.objectid);
8920 goto out;
8922 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8923 cur_root);
8924 if (ret < 0)
8925 goto out;
8926 next:
8927 ret = btrfs_next_item(tree_root, path);
8928 if (ret > 0) {
8929 ret = 0;
8930 goto out;
8932 if (ret < 0)
8933 goto out;
8936 out:
8937 btrfs_free_path(path);
8938 return ret;
8941 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8942 struct btrfs_root *csum_root)
8944 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8945 struct btrfs_path *path;
8946 struct btrfs_extent_item *ei;
8947 struct extent_buffer *leaf;
8948 char *buf;
8949 struct btrfs_key key;
8950 int ret;
8952 path = btrfs_alloc_path();
8953 if (!path)
8954 return -ENOMEM;
8956 key.objectid = 0;
8957 key.type = BTRFS_EXTENT_ITEM_KEY;
8958 key.offset = 0;
8960 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8961 if (ret < 0) {
8962 btrfs_free_path(path);
8963 return ret;
8966 buf = malloc(csum_root->sectorsize);
8967 if (!buf) {
8968 btrfs_free_path(path);
8969 return -ENOMEM;
8972 while (1) {
8973 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8974 ret = btrfs_next_leaf(extent_root, path);
8975 if (ret < 0)
8976 break;
8977 if (ret) {
8978 ret = 0;
8979 break;
8982 leaf = path->nodes[0];
8984 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8985 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8986 path->slots[0]++;
8987 continue;
8990 ei = btrfs_item_ptr(leaf, path->slots[0],
8991 struct btrfs_extent_item);
8992 if (!(btrfs_extent_flags(leaf, ei) &
8993 BTRFS_EXTENT_FLAG_DATA)) {
8994 path->slots[0]++;
8995 continue;
8998 ret = populate_csum(trans, csum_root, buf, key.objectid,
8999 key.offset);
9000 if (ret)
9001 break;
9002 path->slots[0]++;
9005 btrfs_free_path(path);
9006 free(buf);
9007 return ret;
9011 * Recalculate the csum and put it into the csum tree.
9013 * Extent tree init will wipe out all the extent info, so in that case, we
9014 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9015 * will use fs/subvol trees to init the csum tree.
9017 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9018 struct btrfs_root *csum_root,
9019 int search_fs_tree)
9021 if (search_fs_tree)
9022 return fill_csum_tree_from_fs(trans, csum_root);
9023 else
9024 return fill_csum_tree_from_extent(trans, csum_root);
9027 struct root_item_info {
9028 /* level of the root */
9029 u8 level;
9030 /* number of nodes at this level, must be 1 for a root */
9031 int node_count;
9032 u64 bytenr;
9033 u64 gen;
9034 struct cache_extent cache_extent;
9037 static struct cache_tree *roots_info_cache = NULL;
9039 static void free_roots_info_cache(void)
9041 if (!roots_info_cache)
9042 return;
9044 while (!cache_tree_empty(roots_info_cache)) {
9045 struct cache_extent *entry;
9046 struct root_item_info *rii;
9048 entry = first_cache_extent(roots_info_cache);
9049 if (!entry)
9050 break;
9051 remove_cache_extent(roots_info_cache, entry);
9052 rii = container_of(entry, struct root_item_info, cache_extent);
9053 free(rii);
9056 free(roots_info_cache);
9057 roots_info_cache = NULL;
9060 static int build_roots_info_cache(struct btrfs_fs_info *info)
9062 int ret = 0;
9063 struct btrfs_key key;
9064 struct extent_buffer *leaf;
9065 struct btrfs_path *path;
9067 if (!roots_info_cache) {
9068 roots_info_cache = malloc(sizeof(*roots_info_cache));
9069 if (!roots_info_cache)
9070 return -ENOMEM;
9071 cache_tree_init(roots_info_cache);
9074 path = btrfs_alloc_path();
9075 if (!path)
9076 return -ENOMEM;
9078 key.objectid = 0;
9079 key.type = BTRFS_EXTENT_ITEM_KEY;
9080 key.offset = 0;
9082 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9083 if (ret < 0)
9084 goto out;
9085 leaf = path->nodes[0];
9087 while (1) {
9088 struct btrfs_key found_key;
9089 struct btrfs_extent_item *ei;
9090 struct btrfs_extent_inline_ref *iref;
9091 int slot = path->slots[0];
9092 int type;
9093 u64 flags;
9094 u64 root_id;
9095 u8 level;
9096 struct cache_extent *entry;
9097 struct root_item_info *rii;
9099 if (slot >= btrfs_header_nritems(leaf)) {
9100 ret = btrfs_next_leaf(info->extent_root, path);
9101 if (ret < 0) {
9102 break;
9103 } else if (ret) {
9104 ret = 0;
9105 break;
9107 leaf = path->nodes[0];
9108 slot = path->slots[0];
9111 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9113 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9114 found_key.type != BTRFS_METADATA_ITEM_KEY)
9115 goto next;
9117 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9118 flags = btrfs_extent_flags(leaf, ei);
9120 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9121 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9122 goto next;
9124 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9125 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9126 level = found_key.offset;
9127 } else {
9128 struct btrfs_tree_block_info *info;
9130 info = (struct btrfs_tree_block_info *)(ei + 1);
9131 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9132 level = btrfs_tree_block_level(leaf, info);
9136 * For a root extent, it must be of the following type and the
9137 * first (and only one) iref in the item.
9139 type = btrfs_extent_inline_ref_type(leaf, iref);
9140 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9141 goto next;
9143 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9144 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9145 if (!entry) {
9146 rii = malloc(sizeof(struct root_item_info));
9147 if (!rii) {
9148 ret = -ENOMEM;
9149 goto out;
9151 rii->cache_extent.start = root_id;
9152 rii->cache_extent.size = 1;
9153 rii->level = (u8)-1;
9154 entry = &rii->cache_extent;
9155 ret = insert_cache_extent(roots_info_cache, entry);
9156 ASSERT(ret == 0);
9157 } else {
9158 rii = container_of(entry, struct root_item_info,
9159 cache_extent);
9162 ASSERT(rii->cache_extent.start == root_id);
9163 ASSERT(rii->cache_extent.size == 1);
9165 if (level > rii->level || rii->level == (u8)-1) {
9166 rii->level = level;
9167 rii->bytenr = found_key.objectid;
9168 rii->gen = btrfs_extent_generation(leaf, ei);
9169 rii->node_count = 1;
9170 } else if (level == rii->level) {
9171 rii->node_count++;
9173 next:
9174 path->slots[0]++;
9177 out:
9178 btrfs_free_path(path);
9180 return ret;
9183 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9184 struct btrfs_path *path,
9185 const struct btrfs_key *root_key,
9186 const int read_only_mode)
9188 const u64 root_id = root_key->objectid;
9189 struct cache_extent *entry;
9190 struct root_item_info *rii;
9191 struct btrfs_root_item ri;
9192 unsigned long offset;
9194 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9195 if (!entry) {
9196 fprintf(stderr,
9197 "Error: could not find extent items for root %llu\n",
9198 root_key->objectid);
9199 return -ENOENT;
9202 rii = container_of(entry, struct root_item_info, cache_extent);
9203 ASSERT(rii->cache_extent.start == root_id);
9204 ASSERT(rii->cache_extent.size == 1);
9206 if (rii->node_count != 1) {
9207 fprintf(stderr,
9208 "Error: could not find btree root extent for root %llu\n",
9209 root_id);
9210 return -ENOENT;
9213 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9214 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9216 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9217 btrfs_root_level(&ri) != rii->level ||
9218 btrfs_root_generation(&ri) != rii->gen) {
9221 * If we're in repair mode but our caller told us to not update
9222 * the root item, i.e. just check if it needs to be updated, don't
9223 * print this message, since the caller will call us again shortly
9224 * for the same root item without read only mode (the caller will
9225 * open a transaction first).
9227 if (!(read_only_mode && repair))
9228 fprintf(stderr,
9229 "%sroot item for root %llu,"
9230 " current bytenr %llu, current gen %llu, current level %u,"
9231 " new bytenr %llu, new gen %llu, new level %u\n",
9232 (read_only_mode ? "" : "fixing "),
9233 root_id,
9234 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9235 btrfs_root_level(&ri),
9236 rii->bytenr, rii->gen, rii->level);
9238 if (btrfs_root_generation(&ri) > rii->gen) {
9239 fprintf(stderr,
9240 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9241 root_id, btrfs_root_generation(&ri), rii->gen);
9242 return -EINVAL;
9245 if (!read_only_mode) {
9246 btrfs_set_root_bytenr(&ri, rii->bytenr);
9247 btrfs_set_root_level(&ri, rii->level);
9248 btrfs_set_root_generation(&ri, rii->gen);
9249 write_extent_buffer(path->nodes[0], &ri,
9250 offset, sizeof(ri));
9253 return 1;
9256 return 0;
9260 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9261 * caused read-only snapshots to be corrupted if they were created at a moment
9262 * when the source subvolume/snapshot had orphan items. The issue was that the
9263 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9264 * node instead of the post orphan cleanup root node.
9265 * So this function, and its callees, just detects and fixes those cases. Even
9266 * though the regression was for read-only snapshots, this function applies to
9267 * any snapshot/subvolume root.
9268 * This must be run before any other repair code - not doing it so, makes other
9269 * repair code delete or modify backrefs in the extent tree for example, which
9270 * will result in an inconsistent fs after repairing the root items.
9272 static int repair_root_items(struct btrfs_fs_info *info)
9274 struct btrfs_path *path = NULL;
9275 struct btrfs_key key;
9276 struct extent_buffer *leaf;
9277 struct btrfs_trans_handle *trans = NULL;
9278 int ret = 0;
9279 int bad_roots = 0;
9280 int need_trans = 0;
9282 ret = build_roots_info_cache(info);
9283 if (ret)
9284 goto out;
9286 path = btrfs_alloc_path();
9287 if (!path) {
9288 ret = -ENOMEM;
9289 goto out;
9292 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9293 key.type = BTRFS_ROOT_ITEM_KEY;
9294 key.offset = 0;
9296 again:
9298 * Avoid opening and committing transactions if a leaf doesn't have
9299 * any root items that need to be fixed, so that we avoid rotating
9300 * backup roots unnecessarily.
9302 if (need_trans) {
9303 trans = btrfs_start_transaction(info->tree_root, 1);
9304 if (IS_ERR(trans)) {
9305 ret = PTR_ERR(trans);
9306 goto out;
9310 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9311 0, trans ? 1 : 0);
9312 if (ret < 0)
9313 goto out;
9314 leaf = path->nodes[0];
9316 while (1) {
9317 struct btrfs_key found_key;
9319 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9320 int no_more_keys = find_next_key(path, &key);
9322 btrfs_release_path(path);
9323 if (trans) {
9324 ret = btrfs_commit_transaction(trans,
9325 info->tree_root);
9326 trans = NULL;
9327 if (ret < 0)
9328 goto out;
9330 need_trans = 0;
9331 if (no_more_keys)
9332 break;
9333 goto again;
9336 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9338 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9339 goto next;
9340 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9341 goto next;
9343 ret = maybe_repair_root_item(info, path, &found_key,
9344 trans ? 0 : 1);
9345 if (ret < 0)
9346 goto out;
9347 if (ret) {
9348 if (!trans && repair) {
9349 need_trans = 1;
9350 key = found_key;
9351 btrfs_release_path(path);
9352 goto again;
9354 bad_roots++;
9356 next:
9357 path->slots[0]++;
9359 ret = 0;
9360 out:
9361 free_roots_info_cache();
9362 btrfs_free_path(path);
9363 if (trans)
9364 btrfs_commit_transaction(trans, info->tree_root);
9365 if (ret < 0)
9366 return ret;
9368 return bad_roots;
9371 const char * const cmd_check_usage[] = {
9372 "btrfs check [options] <device>",
9373 "Check structural inegrity of a filesystem (unmounted).",
9374 "Check structural inegrity of an unmounted filesystem. Verify internal",
9375 "trees' consistency and item connectivity. In the repair mode try to",
9376 "fix the problems found.",
9377 "WARNING: the repair mode is considered dangerous",
9379 "-s|--super <superblock> use this superblock copy",
9380 "-b|--backup use the backup root copy",
9381 "--repair try to repair the filesystem",
9382 "--readonly run in read-only mode (default)",
9383 "--init-csum-tree create a new CRC tree",
9384 "--init-extent-tree create a new extent tree",
9385 "--check-data-csum verify checkums of data blocks",
9386 "-Q|--qgroup-report print a report on qgroup consistency",
9387 "-E|--subvol-extents <subvolid>",
9388 " print subvolume extents and sharing state",
9389 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9390 "-p|--progress indicate progress",
9391 NULL
9394 int cmd_check(int argc, char **argv)
9396 struct cache_tree root_cache;
9397 struct btrfs_root *root;
9398 struct btrfs_fs_info *info;
9399 u64 bytenr = 0;
9400 u64 subvolid = 0;
9401 u64 tree_root_bytenr = 0;
9402 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9403 int ret;
9404 u64 num;
9405 int init_csum_tree = 0;
9406 int readonly = 0;
9407 int qgroup_report = 0;
9408 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9410 while(1) {
9411 int c;
9412 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9413 OPT_CHECK_CSUM, OPT_READONLY };
9414 static const struct option long_options[] = {
9415 { "super", required_argument, NULL, 's' },
9416 { "repair", no_argument, NULL, OPT_REPAIR },
9417 { "readonly", no_argument, NULL, OPT_READONLY },
9418 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9419 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9420 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9421 { "backup", no_argument, NULL, 'b' },
9422 { "subvol-extents", required_argument, NULL, 'E' },
9423 { "qgroup-report", no_argument, NULL, 'Q' },
9424 { "tree-root", required_argument, NULL, 'r' },
9425 { "progress", no_argument, NULL, 'p' },
9426 { NULL, 0, NULL, 0}
9429 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9430 if (c < 0)
9431 break;
9432 switch(c) {
9433 case 'a': /* ignored */ break;
9434 case 'b':
9435 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9436 break;
9437 case 's':
9438 num = arg_strtou64(optarg);
9439 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9440 fprintf(stderr,
9441 "ERROR: super mirror should be less than: %d\n",
9442 BTRFS_SUPER_MIRROR_MAX);
9443 exit(1);
9445 bytenr = btrfs_sb_offset(((int)num));
9446 printf("using SB copy %llu, bytenr %llu\n", num,
9447 (unsigned long long)bytenr);
9448 break;
9449 case 'Q':
9450 qgroup_report = 1;
9451 break;
9452 case 'E':
9453 subvolid = arg_strtou64(optarg);
9454 break;
9455 case 'r':
9456 tree_root_bytenr = arg_strtou64(optarg);
9457 break;
9458 case 'p':
9459 ctx.progress_enabled = true;
9460 break;
9461 case '?':
9462 case 'h':
9463 usage(cmd_check_usage);
9464 case OPT_REPAIR:
9465 printf("enabling repair mode\n");
9466 repair = 1;
9467 ctree_flags |= OPEN_CTREE_WRITES;
9468 break;
9469 case OPT_READONLY:
9470 readonly = 1;
9471 break;
9472 case OPT_INIT_CSUM:
9473 printf("Creating a new CRC tree\n");
9474 init_csum_tree = 1;
9475 repair = 1;
9476 ctree_flags |= OPEN_CTREE_WRITES;
9477 break;
9478 case OPT_INIT_EXTENT:
9479 init_extent_tree = 1;
9480 ctree_flags |= (OPEN_CTREE_WRITES |
9481 OPEN_CTREE_NO_BLOCK_GROUPS);
9482 repair = 1;
9483 break;
9484 case OPT_CHECK_CSUM:
9485 check_data_csum = 1;
9486 break;
9489 argc = argc - optind;
9491 if (check_argc_exact(argc, 1))
9492 usage(cmd_check_usage);
9494 if (ctx.progress_enabled) {
9495 ctx.tp = TASK_NOTHING;
9496 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9499 /* This check is the only reason for --readonly to exist */
9500 if (readonly && repair) {
9501 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9502 exit(1);
9505 radix_tree_init();
9506 cache_tree_init(&root_cache);
9508 if((ret = check_mounted(argv[optind])) < 0) {
9509 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9510 goto err_out;
9511 } else if(ret) {
9512 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9513 ret = -EBUSY;
9514 goto err_out;
9517 /* only allow partial opening under repair mode */
9518 if (repair)
9519 ctree_flags |= OPEN_CTREE_PARTIAL;
9521 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9522 ctree_flags);
9523 if (!info) {
9524 fprintf(stderr, "Couldn't open file system\n");
9525 ret = -EIO;
9526 goto err_out;
9529 global_info = info;
9530 root = info->fs_root;
9533 * repair mode will force us to commit transaction which
9534 * will make us fail to load log tree when mounting.
9536 if (repair && btrfs_super_log_root(info->super_copy)) {
9537 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9538 if (!ret) {
9539 ret = 1;
9540 goto close_out;
9542 ret = zero_log_tree(root);
9543 if (ret) {
9544 fprintf(stderr, "fail to zero log tree\n");
9545 goto close_out;
9549 uuid_unparse(info->super_copy->fsid, uuidbuf);
9550 if (qgroup_report) {
9551 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9552 uuidbuf);
9553 ret = qgroup_verify_all(info);
9554 if (ret == 0)
9555 print_qgroup_report(1);
9556 goto close_out;
9558 if (subvolid) {
9559 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9560 subvolid, argv[optind], uuidbuf);
9561 ret = print_extent_state(info, subvolid);
9562 goto close_out;
9564 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9566 if (!extent_buffer_uptodate(info->tree_root->node) ||
9567 !extent_buffer_uptodate(info->dev_root->node) ||
9568 !extent_buffer_uptodate(info->chunk_root->node)) {
9569 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9570 ret = -EIO;
9571 goto close_out;
9574 if (init_extent_tree || init_csum_tree) {
9575 struct btrfs_trans_handle *trans;
9577 trans = btrfs_start_transaction(info->extent_root, 0);
9578 if (IS_ERR(trans)) {
9579 fprintf(stderr, "Error starting transaction\n");
9580 ret = PTR_ERR(trans);
9581 goto close_out;
9584 if (init_extent_tree) {
9585 printf("Creating a new extent tree\n");
9586 ret = reinit_extent_tree(trans, info);
9587 if (ret)
9588 goto close_out;
9591 if (init_csum_tree) {
9592 fprintf(stderr, "Reinit crc root\n");
9593 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9594 if (ret) {
9595 fprintf(stderr, "crc root initialization failed\n");
9596 ret = -EIO;
9597 goto close_out;
9600 ret = fill_csum_tree(trans, info->csum_root,
9601 init_extent_tree);
9602 if (ret) {
9603 fprintf(stderr, "crc refilling failed\n");
9604 return -EIO;
9608 * Ok now we commit and run the normal fsck, which will add
9609 * extent entries for all of the items it finds.
9611 ret = btrfs_commit_transaction(trans, info->extent_root);
9612 if (ret)
9613 goto close_out;
9615 if (!extent_buffer_uptodate(info->extent_root->node)) {
9616 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9617 ret = -EIO;
9618 goto close_out;
9620 if (!extent_buffer_uptodate(info->csum_root->node)) {
9621 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9622 ret = -EIO;
9623 goto close_out;
9626 if (!ctx.progress_enabled)
9627 fprintf(stderr, "checking extents\n");
9628 ret = check_chunks_and_extents(root);
9629 if (ret)
9630 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9632 ret = repair_root_items(info);
9633 if (ret < 0)
9634 goto close_out;
9635 if (repair) {
9636 fprintf(stderr, "Fixed %d roots.\n", ret);
9637 ret = 0;
9638 } else if (ret > 0) {
9639 fprintf(stderr,
9640 "Found %d roots with an outdated root item.\n",
9641 ret);
9642 fprintf(stderr,
9643 "Please run a filesystem check with the option --repair to fix them.\n");
9644 ret = 1;
9645 goto close_out;
9648 if (!ctx.progress_enabled)
9649 fprintf(stderr, "checking free space cache\n");
9650 ret = check_space_cache(root);
9651 if (ret)
9652 goto out;
9655 * We used to have to have these hole extents in between our real
9656 * extents so if we don't have this flag set we need to make sure there
9657 * are no gaps in the file extents for inodes, otherwise we can just
9658 * ignore it when this happens.
9660 no_holes = btrfs_fs_incompat(root->fs_info,
9661 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9662 if (!ctx.progress_enabled)
9663 fprintf(stderr, "checking fs roots\n");
9664 ret = check_fs_roots(root, &root_cache);
9665 if (ret)
9666 goto out;
9668 fprintf(stderr, "checking csums\n");
9669 ret = check_csums(root);
9670 if (ret)
9671 goto out;
9673 fprintf(stderr, "checking root refs\n");
9674 ret = check_root_refs(root, &root_cache);
9675 if (ret)
9676 goto out;
9678 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9679 struct extent_buffer *eb;
9681 eb = list_first_entry(&root->fs_info->recow_ebs,
9682 struct extent_buffer, recow);
9683 list_del_init(&eb->recow);
9684 ret = recow_extent_buffer(root, eb);
9685 if (ret)
9686 break;
9689 while (!list_empty(&delete_items)) {
9690 struct bad_item *bad;
9692 bad = list_first_entry(&delete_items, struct bad_item, list);
9693 list_del_init(&bad->list);
9694 if (repair)
9695 ret = delete_bad_item(root, bad);
9696 free(bad);
9699 if (info->quota_enabled) {
9700 int err;
9701 fprintf(stderr, "checking quota groups\n");
9702 err = qgroup_verify_all(info);
9703 if (err)
9704 goto out;
9707 if (!list_empty(&root->fs_info->recow_ebs)) {
9708 fprintf(stderr, "Transid errors in file system\n");
9709 ret = 1;
9711 out:
9712 print_qgroup_report(0);
9713 if (found_old_backref) { /*
9714 * there was a disk format change when mixed
9715 * backref was in testing tree. The old format
9716 * existed about one week.
9718 printf("\n * Found old mixed backref format. "
9719 "The old format is not supported! *"
9720 "\n * Please mount the FS in readonly mode, "
9721 "backup data and re-format the FS. *\n\n");
9722 ret = 1;
9724 printf("found %llu bytes used err is %d\n",
9725 (unsigned long long)bytes_used, ret);
9726 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9727 printf("total tree bytes: %llu\n",
9728 (unsigned long long)total_btree_bytes);
9729 printf("total fs tree bytes: %llu\n",
9730 (unsigned long long)total_fs_tree_bytes);
9731 printf("total extent tree bytes: %llu\n",
9732 (unsigned long long)total_extent_tree_bytes);
9733 printf("btree space waste bytes: %llu\n",
9734 (unsigned long long)btree_space_waste);
9735 printf("file data blocks allocated: %llu\n referenced %llu\n",
9736 (unsigned long long)data_bytes_allocated,
9737 (unsigned long long)data_bytes_referenced);
9739 free_root_recs_tree(&root_cache);
9740 close_out:
9741 close_ctree(root);
9742 err_out:
9743 if (ctx.progress_enabled)
9744 task_deinit(ctx.info);
9746 return ret;