btrfs-progs: rename __strncpy__null to __strncpy_null
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobd1570751c4a13a94ea60cbfb7a44ff2154b53344
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
45 enum task_position {
46 TASK_EXTENTS,
47 TASK_FREE_SPACE,
48 TASK_FS_ROOTS,
49 TASK_NOTHING, /* have to be the last element */
52 struct task_ctx {
53 int progress_enabled;
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
77 static void *print_status_check(void *p)
79 struct task_ctx *priv = p;
80 const char work_indicator[] = { '.', 'o', 'O', 'o' };
81 uint32_t count = 0;
82 static char *task_position_string[] = {
83 "checking extents",
84 "checking free space cache",
85 "checking fs roots",
88 task_period_start(priv->info, 1000 /* 1s */);
90 if (priv->tp == TASK_NOTHING)
91 return NULL;
93 while (1) {
94 printf("%s [%c]\r", task_position_string[priv->tp],
95 work_indicator[count % 4]);
96 count++;
97 fflush(stdout);
98 task_period_wait(priv->info);
100 return NULL;
103 static int print_status_return(void *p)
105 printf("\n");
106 fflush(stdout);
108 return 0;
111 struct extent_backref {
112 struct list_head list;
113 unsigned int is_data:1;
114 unsigned int found_extent_tree:1;
115 unsigned int full_backref:1;
116 unsigned int found_ref:1;
117 unsigned int broken:1;
120 struct data_backref {
121 struct extent_backref node;
122 union {
123 u64 parent;
124 u64 root;
126 u64 owner;
127 u64 offset;
128 u64 disk_bytenr;
129 u64 bytes;
130 u64 ram_bytes;
131 u32 num_refs;
132 u32 found_ref;
136 * Much like data_backref, just removed the undetermined members
137 * and change it to use list_head.
138 * During extent scan, it is stored in root->orphan_data_extent.
139 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
141 struct orphan_data_extent {
142 struct list_head list;
143 u64 root;
144 u64 objectid;
145 u64 offset;
146 u64 disk_bytenr;
147 u64 disk_len;
150 struct tree_backref {
151 struct extent_backref node;
152 union {
153 u64 parent;
154 u64 root;
158 struct extent_record {
159 struct list_head backrefs;
160 struct list_head dups;
161 struct list_head list;
162 struct cache_extent cache;
163 struct btrfs_disk_key parent_key;
164 u64 start;
165 u64 max_size;
166 u64 nr;
167 u64 refs;
168 u64 extent_item_refs;
169 u64 generation;
170 u64 parent_generation;
171 u64 info_objectid;
172 u32 num_duplicates;
173 u8 info_level;
174 int flag_block_full_backref;
175 unsigned int found_rec:1;
176 unsigned int content_checked:1;
177 unsigned int owner_ref_checked:1;
178 unsigned int is_root:1;
179 unsigned int metadata:1;
180 unsigned int bad_full_backref:1;
181 unsigned int crossing_stripes:1;
182 unsigned int wrong_chunk_type:1;
185 struct inode_backref {
186 struct list_head list;
187 unsigned int found_dir_item:1;
188 unsigned int found_dir_index:1;
189 unsigned int found_inode_ref:1;
190 unsigned int filetype:8;
191 int errors;
192 unsigned int ref_type;
193 u64 dir;
194 u64 index;
195 u16 namelen;
196 char name[0];
199 struct root_item_record {
200 struct list_head list;
201 u64 objectid;
202 u64 bytenr;
203 u64 last_snapshot;
204 u8 level;
205 u8 drop_level;
206 int level_size;
207 struct btrfs_key drop_key;
210 #define REF_ERR_NO_DIR_ITEM (1 << 0)
211 #define REF_ERR_NO_DIR_INDEX (1 << 1)
212 #define REF_ERR_NO_INODE_REF (1 << 2)
213 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
214 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
215 #define REF_ERR_DUP_INODE_REF (1 << 5)
216 #define REF_ERR_INDEX_UNMATCH (1 << 6)
217 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
218 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
219 #define REF_ERR_NO_ROOT_REF (1 << 9)
220 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
221 #define REF_ERR_DUP_ROOT_REF (1 << 11)
222 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
224 struct file_extent_hole {
225 struct rb_node node;
226 u64 start;
227 u64 len;
230 /* Compatible function to allow reuse of old codes */
231 static u64 first_extent_gap(struct rb_root *holes)
233 struct file_extent_hole *hole;
235 if (RB_EMPTY_ROOT(holes))
236 return (u64)-1;
238 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
239 return hole->start;
242 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
244 struct file_extent_hole *hole1;
245 struct file_extent_hole *hole2;
247 hole1 = rb_entry(node1, struct file_extent_hole, node);
248 hole2 = rb_entry(node2, struct file_extent_hole, node);
250 if (hole1->start > hole2->start)
251 return -1;
252 if (hole1->start < hole2->start)
253 return 1;
254 /* Now hole1->start == hole2->start */
255 if (hole1->len >= hole2->len)
257 * Hole 1 will be merge center
258 * Same hole will be merged later
260 return -1;
261 /* Hole 2 will be merge center */
262 return 1;
266 * Add a hole to the record
268 * This will do hole merge for copy_file_extent_holes(),
269 * which will ensure there won't be continuous holes.
271 static int add_file_extent_hole(struct rb_root *holes,
272 u64 start, u64 len)
274 struct file_extent_hole *hole;
275 struct file_extent_hole *prev = NULL;
276 struct file_extent_hole *next = NULL;
278 hole = malloc(sizeof(*hole));
279 if (!hole)
280 return -ENOMEM;
281 hole->start = start;
282 hole->len = len;
283 /* Since compare will not return 0, no -EEXIST will happen */
284 rb_insert(holes, &hole->node, compare_hole);
286 /* simple merge with previous hole */
287 if (rb_prev(&hole->node))
288 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289 node);
290 if (prev && prev->start + prev->len >= hole->start) {
291 hole->len = hole->start + hole->len - prev->start;
292 hole->start = prev->start;
293 rb_erase(&prev->node, holes);
294 free(prev);
295 prev = NULL;
298 /* iterate merge with next holes */
299 while (1) {
300 if (!rb_next(&hole->node))
301 break;
302 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303 node);
304 if (hole->start + hole->len >= next->start) {
305 if (hole->start + hole->len <= next->start + next->len)
306 hole->len = next->start + next->len -
307 hole->start;
308 rb_erase(&next->node, holes);
309 free(next);
310 next = NULL;
311 } else
312 break;
314 return 0;
317 static int compare_hole_range(struct rb_node *node, void *data)
319 struct file_extent_hole *hole;
320 u64 start;
322 hole = (struct file_extent_hole *)data;
323 start = hole->start;
325 hole = rb_entry(node, struct file_extent_hole, node);
326 if (start < hole->start)
327 return -1;
328 if (start >= hole->start && start < hole->start + hole->len)
329 return 0;
330 return 1;
334 * Delete a hole in the record
336 * This will do the hole split and is much restrict than add.
338 static int del_file_extent_hole(struct rb_root *holes,
339 u64 start, u64 len)
341 struct file_extent_hole *hole;
342 struct file_extent_hole tmp;
343 u64 prev_start = 0;
344 u64 prev_len = 0;
345 u64 next_start = 0;
346 u64 next_len = 0;
347 struct rb_node *node;
348 int have_prev = 0;
349 int have_next = 0;
350 int ret = 0;
352 tmp.start = start;
353 tmp.len = len;
354 node = rb_search(holes, &tmp, compare_hole_range, NULL);
355 if (!node)
356 return -EEXIST;
357 hole = rb_entry(node, struct file_extent_hole, node);
358 if (start + len > hole->start + hole->len)
359 return -EEXIST;
362 * Now there will be no overflap, delete the hole and re-add the
363 * split(s) if they exists.
365 if (start > hole->start) {
366 prev_start = hole->start;
367 prev_len = start - hole->start;
368 have_prev = 1;
370 if (hole->start + hole->len > start + len) {
371 next_start = start + len;
372 next_len = hole->start + hole->len - start - len;
373 have_next = 1;
375 rb_erase(node, holes);
376 free(hole);
377 if (have_prev) {
378 ret = add_file_extent_hole(holes, prev_start, prev_len);
379 if (ret < 0)
380 return ret;
382 if (have_next) {
383 ret = add_file_extent_hole(holes, next_start, next_len);
384 if (ret < 0)
385 return ret;
387 return 0;
390 static int copy_file_extent_holes(struct rb_root *dst,
391 struct rb_root *src)
393 struct file_extent_hole *hole;
394 struct rb_node *node;
395 int ret = 0;
397 node = rb_first(src);
398 while (node) {
399 hole = rb_entry(node, struct file_extent_hole, node);
400 ret = add_file_extent_hole(dst, hole->start, hole->len);
401 if (ret)
402 break;
403 node = rb_next(node);
405 return ret;
408 static void free_file_extent_holes(struct rb_root *holes)
410 struct rb_node *node;
411 struct file_extent_hole *hole;
413 node = rb_first(holes);
414 while (node) {
415 hole = rb_entry(node, struct file_extent_hole, node);
416 rb_erase(node, holes);
417 free(hole);
418 node = rb_first(holes);
422 struct inode_record {
423 struct list_head backrefs;
424 unsigned int checked:1;
425 unsigned int merging:1;
426 unsigned int found_inode_item:1;
427 unsigned int found_dir_item:1;
428 unsigned int found_file_extent:1;
429 unsigned int found_csum_item:1;
430 unsigned int some_csum_missing:1;
431 unsigned int nodatasum:1;
432 int errors;
434 u64 ino;
435 u32 nlink;
436 u32 imode;
437 u64 isize;
438 u64 nbytes;
440 u32 found_link;
441 u64 found_size;
442 u64 extent_start;
443 u64 extent_end;
444 struct rb_root holes;
445 struct list_head orphan_extents;
447 u32 refs;
450 #define I_ERR_NO_INODE_ITEM (1 << 0)
451 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
452 #define I_ERR_DUP_INODE_ITEM (1 << 2)
453 #define I_ERR_DUP_DIR_INDEX (1 << 3)
454 #define I_ERR_ODD_DIR_ITEM (1 << 4)
455 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
456 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
457 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
458 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
459 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
460 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
461 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
462 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
463 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
464 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
466 struct root_backref {
467 struct list_head list;
468 unsigned int found_dir_item:1;
469 unsigned int found_dir_index:1;
470 unsigned int found_back_ref:1;
471 unsigned int found_forward_ref:1;
472 unsigned int reachable:1;
473 int errors;
474 u64 ref_root;
475 u64 dir;
476 u64 index;
477 u16 namelen;
478 char name[0];
481 struct root_record {
482 struct list_head backrefs;
483 struct cache_extent cache;
484 unsigned int found_root_item:1;
485 u64 objectid;
486 u32 found_ref;
489 struct ptr_node {
490 struct cache_extent cache;
491 void *data;
494 struct shared_node {
495 struct cache_extent cache;
496 struct cache_tree root_cache;
497 struct cache_tree inode_cache;
498 struct inode_record *current;
499 u32 refs;
502 struct block_info {
503 u64 start;
504 u32 size;
507 struct walk_control {
508 struct cache_tree shared;
509 struct shared_node *nodes[BTRFS_MAX_LEVEL];
510 int active_node;
511 int root_level;
514 struct bad_item {
515 struct btrfs_key key;
516 u64 root_id;
517 struct list_head list;
520 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
522 static void record_root_in_trans(struct btrfs_trans_handle *trans,
523 struct btrfs_root *root)
525 if (root->last_trans != trans->transid) {
526 root->track_dirty = 1;
527 root->last_trans = trans->transid;
528 root->commit_root = root->node;
529 extent_buffer_get(root->node);
533 static u8 imode_to_type(u32 imode)
535 #define S_SHIFT 12
536 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
537 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
538 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
539 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
540 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
541 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
542 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
543 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
546 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
547 #undef S_SHIFT
550 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
552 struct device_record *rec1;
553 struct device_record *rec2;
555 rec1 = rb_entry(node1, struct device_record, node);
556 rec2 = rb_entry(node2, struct device_record, node);
557 if (rec1->devid > rec2->devid)
558 return -1;
559 else if (rec1->devid < rec2->devid)
560 return 1;
561 else
562 return 0;
565 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
567 struct inode_record *rec;
568 struct inode_backref *backref;
569 struct inode_backref *orig;
570 struct inode_backref *tmp;
571 struct orphan_data_extent *src_orphan;
572 struct orphan_data_extent *dst_orphan;
573 size_t size;
574 int ret;
576 rec = malloc(sizeof(*rec));
577 if (!rec)
578 return ERR_PTR(-ENOMEM);
579 memcpy(rec, orig_rec, sizeof(*rec));
580 rec->refs = 1;
581 INIT_LIST_HEAD(&rec->backrefs);
582 INIT_LIST_HEAD(&rec->orphan_extents);
583 rec->holes = RB_ROOT;
585 list_for_each_entry(orig, &orig_rec->backrefs, list) {
586 size = sizeof(*orig) + orig->namelen + 1;
587 backref = malloc(size);
588 if (!backref) {
589 ret = -ENOMEM;
590 goto cleanup;
592 memcpy(backref, orig, size);
593 list_add_tail(&backref->list, &rec->backrefs);
595 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
596 dst_orphan = malloc(sizeof(*dst_orphan));
597 if (!dst_orphan) {
598 ret = -ENOMEM;
599 goto cleanup;
601 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
602 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
604 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
605 BUG_ON(ret < 0);
607 return rec;
609 cleanup:
610 if (!list_empty(&rec->backrefs))
611 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
612 list_del(&orig->list);
613 free(orig);
616 if (!list_empty(&rec->orphan_extents))
617 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
618 list_del(&orig->list);
619 free(orig);
622 free(rec);
624 return ERR_PTR(ret);
627 static void print_orphan_data_extents(struct list_head *orphan_extents,
628 u64 objectid)
630 struct orphan_data_extent *orphan;
632 if (list_empty(orphan_extents))
633 return;
634 printf("The following data extent is lost in tree %llu:\n",
635 objectid);
636 list_for_each_entry(orphan, orphan_extents, list) {
637 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
638 orphan->objectid, orphan->offset, orphan->disk_bytenr,
639 orphan->disk_len);
643 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
645 u64 root_objectid = root->root_key.objectid;
646 int errors = rec->errors;
648 if (!errors)
649 return;
650 /* reloc root errors, we print its corresponding fs root objectid*/
651 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
652 root_objectid = root->root_key.offset;
653 fprintf(stderr, "reloc");
655 fprintf(stderr, "root %llu inode %llu errors %x",
656 (unsigned long long) root_objectid,
657 (unsigned long long) rec->ino, rec->errors);
659 if (errors & I_ERR_NO_INODE_ITEM)
660 fprintf(stderr, ", no inode item");
661 if (errors & I_ERR_NO_ORPHAN_ITEM)
662 fprintf(stderr, ", no orphan item");
663 if (errors & I_ERR_DUP_INODE_ITEM)
664 fprintf(stderr, ", dup inode item");
665 if (errors & I_ERR_DUP_DIR_INDEX)
666 fprintf(stderr, ", dup dir index");
667 if (errors & I_ERR_ODD_DIR_ITEM)
668 fprintf(stderr, ", odd dir item");
669 if (errors & I_ERR_ODD_FILE_EXTENT)
670 fprintf(stderr, ", odd file extent");
671 if (errors & I_ERR_BAD_FILE_EXTENT)
672 fprintf(stderr, ", bad file extent");
673 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
674 fprintf(stderr, ", file extent overlap");
675 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
676 fprintf(stderr, ", file extent discount");
677 if (errors & I_ERR_DIR_ISIZE_WRONG)
678 fprintf(stderr, ", dir isize wrong");
679 if (errors & I_ERR_FILE_NBYTES_WRONG)
680 fprintf(stderr, ", nbytes wrong");
681 if (errors & I_ERR_ODD_CSUM_ITEM)
682 fprintf(stderr, ", odd csum item");
683 if (errors & I_ERR_SOME_CSUM_MISSING)
684 fprintf(stderr, ", some csum missing");
685 if (errors & I_ERR_LINK_COUNT_WRONG)
686 fprintf(stderr, ", link count wrong");
687 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
688 fprintf(stderr, ", orphan file extent");
689 fprintf(stderr, "\n");
690 /* Print the orphan extents if needed */
691 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
692 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
694 /* Print the holes if needed */
695 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
696 struct file_extent_hole *hole;
697 struct rb_node *node;
698 int found = 0;
700 node = rb_first(&rec->holes);
701 fprintf(stderr, "Found file extent holes:\n");
702 while (node) {
703 found = 1;
704 hole = rb_entry(node, struct file_extent_hole, node);
705 fprintf(stderr, "\tstart: %llu, len: %llu\n",
706 hole->start, hole->len);
707 node = rb_next(node);
709 if (!found)
710 fprintf(stderr, "\tstart: 0, len: %llu\n",
711 round_up(rec->isize, root->sectorsize));
715 static void print_ref_error(int errors)
717 if (errors & REF_ERR_NO_DIR_ITEM)
718 fprintf(stderr, ", no dir item");
719 if (errors & REF_ERR_NO_DIR_INDEX)
720 fprintf(stderr, ", no dir index");
721 if (errors & REF_ERR_NO_INODE_REF)
722 fprintf(stderr, ", no inode ref");
723 if (errors & REF_ERR_DUP_DIR_ITEM)
724 fprintf(stderr, ", dup dir item");
725 if (errors & REF_ERR_DUP_DIR_INDEX)
726 fprintf(stderr, ", dup dir index");
727 if (errors & REF_ERR_DUP_INODE_REF)
728 fprintf(stderr, ", dup inode ref");
729 if (errors & REF_ERR_INDEX_UNMATCH)
730 fprintf(stderr, ", index unmatch");
731 if (errors & REF_ERR_FILETYPE_UNMATCH)
732 fprintf(stderr, ", filetype unmatch");
733 if (errors & REF_ERR_NAME_TOO_LONG)
734 fprintf(stderr, ", name too long");
735 if (errors & REF_ERR_NO_ROOT_REF)
736 fprintf(stderr, ", no root ref");
737 if (errors & REF_ERR_NO_ROOT_BACKREF)
738 fprintf(stderr, ", no root backref");
739 if (errors & REF_ERR_DUP_ROOT_REF)
740 fprintf(stderr, ", dup root ref");
741 if (errors & REF_ERR_DUP_ROOT_BACKREF)
742 fprintf(stderr, ", dup root backref");
743 fprintf(stderr, "\n");
746 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
747 u64 ino, int mod)
749 struct ptr_node *node;
750 struct cache_extent *cache;
751 struct inode_record *rec = NULL;
752 int ret;
754 cache = lookup_cache_extent(inode_cache, ino, 1);
755 if (cache) {
756 node = container_of(cache, struct ptr_node, cache);
757 rec = node->data;
758 if (mod && rec->refs > 1) {
759 node->data = clone_inode_rec(rec);
760 if (IS_ERR(node->data))
761 return node->data;
762 rec->refs--;
763 rec = node->data;
765 } else if (mod) {
766 rec = calloc(1, sizeof(*rec));
767 if (!rec)
768 return ERR_PTR(-ENOMEM);
769 rec->ino = ino;
770 rec->extent_start = (u64)-1;
771 rec->refs = 1;
772 INIT_LIST_HEAD(&rec->backrefs);
773 INIT_LIST_HEAD(&rec->orphan_extents);
774 rec->holes = RB_ROOT;
776 node = malloc(sizeof(*node));
777 if (!node) {
778 free(rec);
779 return ERR_PTR(-ENOMEM);
781 node->cache.start = ino;
782 node->cache.size = 1;
783 node->data = rec;
785 if (ino == BTRFS_FREE_INO_OBJECTID)
786 rec->found_link = 1;
788 ret = insert_cache_extent(inode_cache, &node->cache);
789 if (ret)
790 return ERR_PTR(-EEXIST);
792 return rec;
795 static void free_orphan_data_extents(struct list_head *orphan_extents)
797 struct orphan_data_extent *orphan;
799 while (!list_empty(orphan_extents)) {
800 orphan = list_entry(orphan_extents->next,
801 struct orphan_data_extent, list);
802 list_del(&orphan->list);
803 free(orphan);
807 static void free_inode_rec(struct inode_record *rec)
809 struct inode_backref *backref;
811 if (--rec->refs > 0)
812 return;
814 while (!list_empty(&rec->backrefs)) {
815 backref = list_entry(rec->backrefs.next,
816 struct inode_backref, list);
817 list_del(&backref->list);
818 free(backref);
820 free_orphan_data_extents(&rec->orphan_extents);
821 free_file_extent_holes(&rec->holes);
822 free(rec);
825 static int can_free_inode_rec(struct inode_record *rec)
827 if (!rec->errors && rec->checked && rec->found_inode_item &&
828 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
829 return 1;
830 return 0;
833 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
834 struct inode_record *rec)
836 struct cache_extent *cache;
837 struct inode_backref *tmp, *backref;
838 struct ptr_node *node;
839 unsigned char filetype;
841 if (!rec->found_inode_item)
842 return;
844 filetype = imode_to_type(rec->imode);
845 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
846 if (backref->found_dir_item && backref->found_dir_index) {
847 if (backref->filetype != filetype)
848 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
849 if (!backref->errors && backref->found_inode_ref &&
850 rec->nlink == rec->found_link) {
851 list_del(&backref->list);
852 free(backref);
857 if (!rec->checked || rec->merging)
858 return;
860 if (S_ISDIR(rec->imode)) {
861 if (rec->found_size != rec->isize)
862 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
863 if (rec->found_file_extent)
864 rec->errors |= I_ERR_ODD_FILE_EXTENT;
865 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
866 if (rec->found_dir_item)
867 rec->errors |= I_ERR_ODD_DIR_ITEM;
868 if (rec->found_size != rec->nbytes)
869 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
870 if (rec->nlink > 0 && !no_holes &&
871 (rec->extent_end < rec->isize ||
872 first_extent_gap(&rec->holes) < rec->isize))
873 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
876 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
877 if (rec->found_csum_item && rec->nodatasum)
878 rec->errors |= I_ERR_ODD_CSUM_ITEM;
879 if (rec->some_csum_missing && !rec->nodatasum)
880 rec->errors |= I_ERR_SOME_CSUM_MISSING;
883 BUG_ON(rec->refs != 1);
884 if (can_free_inode_rec(rec)) {
885 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
886 node = container_of(cache, struct ptr_node, cache);
887 BUG_ON(node->data != rec);
888 remove_cache_extent(inode_cache, &node->cache);
889 free(node);
890 free_inode_rec(rec);
894 static int check_orphan_item(struct btrfs_root *root, u64 ino)
896 struct btrfs_path path;
897 struct btrfs_key key;
898 int ret;
900 key.objectid = BTRFS_ORPHAN_OBJECTID;
901 key.type = BTRFS_ORPHAN_ITEM_KEY;
902 key.offset = ino;
904 btrfs_init_path(&path);
905 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
906 btrfs_release_path(&path);
907 if (ret > 0)
908 ret = -ENOENT;
909 return ret;
912 static int process_inode_item(struct extent_buffer *eb,
913 int slot, struct btrfs_key *key,
914 struct shared_node *active_node)
916 struct inode_record *rec;
917 struct btrfs_inode_item *item;
919 rec = active_node->current;
920 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
921 if (rec->found_inode_item) {
922 rec->errors |= I_ERR_DUP_INODE_ITEM;
923 return 1;
925 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
926 rec->nlink = btrfs_inode_nlink(eb, item);
927 rec->isize = btrfs_inode_size(eb, item);
928 rec->nbytes = btrfs_inode_nbytes(eb, item);
929 rec->imode = btrfs_inode_mode(eb, item);
930 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931 rec->nodatasum = 1;
932 rec->found_inode_item = 1;
933 if (rec->nlink == 0)
934 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
935 maybe_free_inode_rec(&active_node->inode_cache, rec);
936 return 0;
939 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940 const char *name,
941 int namelen, u64 dir)
943 struct inode_backref *backref;
945 list_for_each_entry(backref, &rec->backrefs, list) {
946 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947 break;
948 if (backref->dir != dir || backref->namelen != namelen)
949 continue;
950 if (memcmp(name, backref->name, namelen))
951 continue;
952 return backref;
955 backref = malloc(sizeof(*backref) + namelen + 1);
956 if (!backref)
957 return NULL;
958 memset(backref, 0, sizeof(*backref));
959 backref->dir = dir;
960 backref->namelen = namelen;
961 memcpy(backref->name, name, namelen);
962 backref->name[namelen] = '\0';
963 list_add_tail(&backref->list, &rec->backrefs);
964 return backref;
967 static int add_inode_backref(struct cache_tree *inode_cache,
968 u64 ino, u64 dir, u64 index,
969 const char *name, int namelen,
970 int filetype, int itemtype, int errors)
972 struct inode_record *rec;
973 struct inode_backref *backref;
975 rec = get_inode_rec(inode_cache, ino, 1);
976 BUG_ON(IS_ERR(rec));
977 backref = get_inode_backref(rec, name, namelen, dir);
978 BUG_ON(!backref);
979 if (errors)
980 backref->errors |= errors;
981 if (itemtype == BTRFS_DIR_INDEX_KEY) {
982 if (backref->found_dir_index)
983 backref->errors |= REF_ERR_DUP_DIR_INDEX;
984 if (backref->found_inode_ref && backref->index != index)
985 backref->errors |= REF_ERR_INDEX_UNMATCH;
986 if (backref->found_dir_item && backref->filetype != filetype)
987 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
989 backref->index = index;
990 backref->filetype = filetype;
991 backref->found_dir_index = 1;
992 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
993 rec->found_link++;
994 if (backref->found_dir_item)
995 backref->errors |= REF_ERR_DUP_DIR_ITEM;
996 if (backref->found_dir_index && backref->filetype != filetype)
997 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
999 backref->filetype = filetype;
1000 backref->found_dir_item = 1;
1001 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1002 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1003 if (backref->found_inode_ref)
1004 backref->errors |= REF_ERR_DUP_INODE_REF;
1005 if (backref->found_dir_index && backref->index != index)
1006 backref->errors |= REF_ERR_INDEX_UNMATCH;
1007 else
1008 backref->index = index;
1010 backref->ref_type = itemtype;
1011 backref->found_inode_ref = 1;
1012 } else {
1013 BUG_ON(1);
1016 maybe_free_inode_rec(inode_cache, rec);
1017 return 0;
1020 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1021 struct cache_tree *dst_cache)
1023 struct inode_backref *backref;
1024 u32 dir_count = 0;
1025 int ret = 0;
1027 dst->merging = 1;
1028 list_for_each_entry(backref, &src->backrefs, list) {
1029 if (backref->found_dir_index) {
1030 add_inode_backref(dst_cache, dst->ino, backref->dir,
1031 backref->index, backref->name,
1032 backref->namelen, backref->filetype,
1033 BTRFS_DIR_INDEX_KEY, backref->errors);
1035 if (backref->found_dir_item) {
1036 dir_count++;
1037 add_inode_backref(dst_cache, dst->ino,
1038 backref->dir, 0, backref->name,
1039 backref->namelen, backref->filetype,
1040 BTRFS_DIR_ITEM_KEY, backref->errors);
1042 if (backref->found_inode_ref) {
1043 add_inode_backref(dst_cache, dst->ino,
1044 backref->dir, backref->index,
1045 backref->name, backref->namelen, 0,
1046 backref->ref_type, backref->errors);
1050 if (src->found_dir_item)
1051 dst->found_dir_item = 1;
1052 if (src->found_file_extent)
1053 dst->found_file_extent = 1;
1054 if (src->found_csum_item)
1055 dst->found_csum_item = 1;
1056 if (src->some_csum_missing)
1057 dst->some_csum_missing = 1;
1058 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1059 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1060 if (ret < 0)
1061 return ret;
1064 BUG_ON(src->found_link < dir_count);
1065 dst->found_link += src->found_link - dir_count;
1066 dst->found_size += src->found_size;
1067 if (src->extent_start != (u64)-1) {
1068 if (dst->extent_start == (u64)-1) {
1069 dst->extent_start = src->extent_start;
1070 dst->extent_end = src->extent_end;
1071 } else {
1072 if (dst->extent_end > src->extent_start)
1073 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1074 else if (dst->extent_end < src->extent_start) {
1075 ret = add_file_extent_hole(&dst->holes,
1076 dst->extent_end,
1077 src->extent_start - dst->extent_end);
1079 if (dst->extent_end < src->extent_end)
1080 dst->extent_end = src->extent_end;
1084 dst->errors |= src->errors;
1085 if (src->found_inode_item) {
1086 if (!dst->found_inode_item) {
1087 dst->nlink = src->nlink;
1088 dst->isize = src->isize;
1089 dst->nbytes = src->nbytes;
1090 dst->imode = src->imode;
1091 dst->nodatasum = src->nodatasum;
1092 dst->found_inode_item = 1;
1093 } else {
1094 dst->errors |= I_ERR_DUP_INODE_ITEM;
1097 dst->merging = 0;
1099 return 0;
1102 static int splice_shared_node(struct shared_node *src_node,
1103 struct shared_node *dst_node)
1105 struct cache_extent *cache;
1106 struct ptr_node *node, *ins;
1107 struct cache_tree *src, *dst;
1108 struct inode_record *rec, *conflict;
1109 u64 current_ino = 0;
1110 int splice = 0;
1111 int ret;
1113 if (--src_node->refs == 0)
1114 splice = 1;
1115 if (src_node->current)
1116 current_ino = src_node->current->ino;
1118 src = &src_node->root_cache;
1119 dst = &dst_node->root_cache;
1120 again:
1121 cache = search_cache_extent(src, 0);
1122 while (cache) {
1123 node = container_of(cache, struct ptr_node, cache);
1124 rec = node->data;
1125 cache = next_cache_extent(cache);
1127 if (splice) {
1128 remove_cache_extent(src, &node->cache);
1129 ins = node;
1130 } else {
1131 ins = malloc(sizeof(*ins));
1132 BUG_ON(!ins);
1133 ins->cache.start = node->cache.start;
1134 ins->cache.size = node->cache.size;
1135 ins->data = rec;
1136 rec->refs++;
1138 ret = insert_cache_extent(dst, &ins->cache);
1139 if (ret == -EEXIST) {
1140 conflict = get_inode_rec(dst, rec->ino, 1);
1141 BUG_ON(IS_ERR(conflict));
1142 merge_inode_recs(rec, conflict, dst);
1143 if (rec->checked) {
1144 conflict->checked = 1;
1145 if (dst_node->current == conflict)
1146 dst_node->current = NULL;
1148 maybe_free_inode_rec(dst, conflict);
1149 free_inode_rec(rec);
1150 free(ins);
1151 } else {
1152 BUG_ON(ret);
1156 if (src == &src_node->root_cache) {
1157 src = &src_node->inode_cache;
1158 dst = &dst_node->inode_cache;
1159 goto again;
1162 if (current_ino > 0 && (!dst_node->current ||
1163 current_ino > dst_node->current->ino)) {
1164 if (dst_node->current) {
1165 dst_node->current->checked = 1;
1166 maybe_free_inode_rec(dst, dst_node->current);
1168 dst_node->current = get_inode_rec(dst, current_ino, 1);
1169 BUG_ON(IS_ERR(dst_node->current));
1171 return 0;
1174 static void free_inode_ptr(struct cache_extent *cache)
1176 struct ptr_node *node;
1177 struct inode_record *rec;
1179 node = container_of(cache, struct ptr_node, cache);
1180 rec = node->data;
1181 free_inode_rec(rec);
1182 free(node);
1185 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1187 static struct shared_node *find_shared_node(struct cache_tree *shared,
1188 u64 bytenr)
1190 struct cache_extent *cache;
1191 struct shared_node *node;
1193 cache = lookup_cache_extent(shared, bytenr, 1);
1194 if (cache) {
1195 node = container_of(cache, struct shared_node, cache);
1196 return node;
1198 return NULL;
1201 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1203 int ret;
1204 struct shared_node *node;
1206 node = calloc(1, sizeof(*node));
1207 if (!node)
1208 return -ENOMEM;
1209 node->cache.start = bytenr;
1210 node->cache.size = 1;
1211 cache_tree_init(&node->root_cache);
1212 cache_tree_init(&node->inode_cache);
1213 node->refs = refs;
1215 ret = insert_cache_extent(shared, &node->cache);
1217 return ret;
1220 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1221 struct walk_control *wc, int level)
1223 struct shared_node *node;
1224 struct shared_node *dest;
1225 int ret;
1227 if (level == wc->active_node)
1228 return 0;
1230 BUG_ON(wc->active_node <= level);
1231 node = find_shared_node(&wc->shared, bytenr);
1232 if (!node) {
1233 ret = add_shared_node(&wc->shared, bytenr, refs);
1234 BUG_ON(ret);
1235 node = find_shared_node(&wc->shared, bytenr);
1236 wc->nodes[level] = node;
1237 wc->active_node = level;
1238 return 0;
1241 if (wc->root_level == wc->active_node &&
1242 btrfs_root_refs(&root->root_item) == 0) {
1243 if (--node->refs == 0) {
1244 free_inode_recs_tree(&node->root_cache);
1245 free_inode_recs_tree(&node->inode_cache);
1246 remove_cache_extent(&wc->shared, &node->cache);
1247 free(node);
1249 return 1;
1252 dest = wc->nodes[wc->active_node];
1253 splice_shared_node(node, dest);
1254 if (node->refs == 0) {
1255 remove_cache_extent(&wc->shared, &node->cache);
1256 free(node);
1258 return 1;
1261 static int leave_shared_node(struct btrfs_root *root,
1262 struct walk_control *wc, int level)
1264 struct shared_node *node;
1265 struct shared_node *dest;
1266 int i;
1268 if (level == wc->root_level)
1269 return 0;
1271 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1272 if (wc->nodes[i])
1273 break;
1275 BUG_ON(i >= BTRFS_MAX_LEVEL);
1277 node = wc->nodes[wc->active_node];
1278 wc->nodes[wc->active_node] = NULL;
1279 wc->active_node = i;
1281 dest = wc->nodes[wc->active_node];
1282 if (wc->active_node < wc->root_level ||
1283 btrfs_root_refs(&root->root_item) > 0) {
1284 BUG_ON(node->refs <= 1);
1285 splice_shared_node(node, dest);
1286 } else {
1287 BUG_ON(node->refs < 2);
1288 node->refs--;
1290 return 0;
1294 * Returns:
1295 * < 0 - on error
1296 * 1 - if the root with id child_root_id is a child of root parent_root_id
1297 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1298 * has other root(s) as parent(s)
1299 * 2 - if the root child_root_id doesn't have any parent roots
1301 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1302 u64 child_root_id)
1304 struct btrfs_path path;
1305 struct btrfs_key key;
1306 struct extent_buffer *leaf;
1307 int has_parent = 0;
1308 int ret;
1310 btrfs_init_path(&path);
1312 key.objectid = parent_root_id;
1313 key.type = BTRFS_ROOT_REF_KEY;
1314 key.offset = child_root_id;
1315 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1316 0, 0);
1317 if (ret < 0)
1318 return ret;
1319 btrfs_release_path(&path);
1320 if (!ret)
1321 return 1;
1323 key.objectid = child_root_id;
1324 key.type = BTRFS_ROOT_BACKREF_KEY;
1325 key.offset = 0;
1326 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1327 0, 0);
1328 if (ret < 0)
1329 goto out;
1331 while (1) {
1332 leaf = path.nodes[0];
1333 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1334 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1335 if (ret)
1336 break;
1337 leaf = path.nodes[0];
1340 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1341 if (key.objectid != child_root_id ||
1342 key.type != BTRFS_ROOT_BACKREF_KEY)
1343 break;
1345 has_parent = 1;
1347 if (key.offset == parent_root_id) {
1348 btrfs_release_path(&path);
1349 return 1;
1352 path.slots[0]++;
1354 out:
1355 btrfs_release_path(&path);
1356 if (ret < 0)
1357 return ret;
1358 return has_parent ? 0 : 2;
1361 static int process_dir_item(struct btrfs_root *root,
1362 struct extent_buffer *eb,
1363 int slot, struct btrfs_key *key,
1364 struct shared_node *active_node)
1366 u32 total;
1367 u32 cur = 0;
1368 u32 len;
1369 u32 name_len;
1370 u32 data_len;
1371 int error;
1372 int nritems = 0;
1373 int filetype;
1374 struct btrfs_dir_item *di;
1375 struct inode_record *rec;
1376 struct cache_tree *root_cache;
1377 struct cache_tree *inode_cache;
1378 struct btrfs_key location;
1379 char namebuf[BTRFS_NAME_LEN];
1381 root_cache = &active_node->root_cache;
1382 inode_cache = &active_node->inode_cache;
1383 rec = active_node->current;
1384 rec->found_dir_item = 1;
1386 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1387 total = btrfs_item_size_nr(eb, slot);
1388 while (cur < total) {
1389 nritems++;
1390 btrfs_dir_item_key_to_cpu(eb, di, &location);
1391 name_len = btrfs_dir_name_len(eb, di);
1392 data_len = btrfs_dir_data_len(eb, di);
1393 filetype = btrfs_dir_type(eb, di);
1395 rec->found_size += name_len;
1396 if (name_len <= BTRFS_NAME_LEN) {
1397 len = name_len;
1398 error = 0;
1399 } else {
1400 len = BTRFS_NAME_LEN;
1401 error = REF_ERR_NAME_TOO_LONG;
1403 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1405 if (location.type == BTRFS_INODE_ITEM_KEY) {
1406 add_inode_backref(inode_cache, location.objectid,
1407 key->objectid, key->offset, namebuf,
1408 len, filetype, key->type, error);
1409 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1410 add_inode_backref(root_cache, location.objectid,
1411 key->objectid, key->offset,
1412 namebuf, len, filetype,
1413 key->type, error);
1414 } else {
1415 fprintf(stderr, "invalid location in dir item %u\n",
1416 location.type);
1417 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1418 key->objectid, key->offset, namebuf,
1419 len, filetype, key->type, error);
1422 len = sizeof(*di) + name_len + data_len;
1423 di = (struct btrfs_dir_item *)((char *)di + len);
1424 cur += len;
1426 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1427 rec->errors |= I_ERR_DUP_DIR_INDEX;
1429 return 0;
1432 static int process_inode_ref(struct extent_buffer *eb,
1433 int slot, struct btrfs_key *key,
1434 struct shared_node *active_node)
1436 u32 total;
1437 u32 cur = 0;
1438 u32 len;
1439 u32 name_len;
1440 u64 index;
1441 int error;
1442 struct cache_tree *inode_cache;
1443 struct btrfs_inode_ref *ref;
1444 char namebuf[BTRFS_NAME_LEN];
1446 inode_cache = &active_node->inode_cache;
1448 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1449 total = btrfs_item_size_nr(eb, slot);
1450 while (cur < total) {
1451 name_len = btrfs_inode_ref_name_len(eb, ref);
1452 index = btrfs_inode_ref_index(eb, ref);
1453 if (name_len <= BTRFS_NAME_LEN) {
1454 len = name_len;
1455 error = 0;
1456 } else {
1457 len = BTRFS_NAME_LEN;
1458 error = REF_ERR_NAME_TOO_LONG;
1460 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1461 add_inode_backref(inode_cache, key->objectid, key->offset,
1462 index, namebuf, len, 0, key->type, error);
1464 len = sizeof(*ref) + name_len;
1465 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1466 cur += len;
1468 return 0;
1471 static int process_inode_extref(struct extent_buffer *eb,
1472 int slot, struct btrfs_key *key,
1473 struct shared_node *active_node)
1475 u32 total;
1476 u32 cur = 0;
1477 u32 len;
1478 u32 name_len;
1479 u64 index;
1480 u64 parent;
1481 int error;
1482 struct cache_tree *inode_cache;
1483 struct btrfs_inode_extref *extref;
1484 char namebuf[BTRFS_NAME_LEN];
1486 inode_cache = &active_node->inode_cache;
1488 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1489 total = btrfs_item_size_nr(eb, slot);
1490 while (cur < total) {
1491 name_len = btrfs_inode_extref_name_len(eb, extref);
1492 index = btrfs_inode_extref_index(eb, extref);
1493 parent = btrfs_inode_extref_parent(eb, extref);
1494 if (name_len <= BTRFS_NAME_LEN) {
1495 len = name_len;
1496 error = 0;
1497 } else {
1498 len = BTRFS_NAME_LEN;
1499 error = REF_ERR_NAME_TOO_LONG;
1501 read_extent_buffer(eb, namebuf,
1502 (unsigned long)(extref + 1), len);
1503 add_inode_backref(inode_cache, key->objectid, parent,
1504 index, namebuf, len, 0, key->type, error);
1506 len = sizeof(*extref) + name_len;
1507 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1508 cur += len;
1510 return 0;
1514 static int count_csum_range(struct btrfs_root *root, u64 start,
1515 u64 len, u64 *found)
1517 struct btrfs_key key;
1518 struct btrfs_path path;
1519 struct extent_buffer *leaf;
1520 int ret;
1521 size_t size;
1522 *found = 0;
1523 u64 csum_end;
1524 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1526 btrfs_init_path(&path);
1528 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1529 key.offset = start;
1530 key.type = BTRFS_EXTENT_CSUM_KEY;
1532 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1533 &key, &path, 0, 0);
1534 if (ret < 0)
1535 goto out;
1536 if (ret > 0 && path.slots[0] > 0) {
1537 leaf = path.nodes[0];
1538 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1539 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1540 key.type == BTRFS_EXTENT_CSUM_KEY)
1541 path.slots[0]--;
1544 while (len > 0) {
1545 leaf = path.nodes[0];
1546 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1547 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1548 if (ret > 0)
1549 break;
1550 else if (ret < 0)
1551 goto out;
1552 leaf = path.nodes[0];
1555 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1556 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1557 key.type != BTRFS_EXTENT_CSUM_KEY)
1558 break;
1560 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1561 if (key.offset >= start + len)
1562 break;
1564 if (key.offset > start)
1565 start = key.offset;
1567 size = btrfs_item_size_nr(leaf, path.slots[0]);
1568 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1569 if (csum_end > start) {
1570 size = min(csum_end - start, len);
1571 len -= size;
1572 start += size;
1573 *found += size;
1576 path.slots[0]++;
1578 out:
1579 btrfs_release_path(&path);
1580 if (ret < 0)
1581 return ret;
1582 return 0;
1585 static int process_file_extent(struct btrfs_root *root,
1586 struct extent_buffer *eb,
1587 int slot, struct btrfs_key *key,
1588 struct shared_node *active_node)
1590 struct inode_record *rec;
1591 struct btrfs_file_extent_item *fi;
1592 u64 num_bytes = 0;
1593 u64 disk_bytenr = 0;
1594 u64 extent_offset = 0;
1595 u64 mask = root->sectorsize - 1;
1596 int extent_type;
1597 int ret;
1599 rec = active_node->current;
1600 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1601 rec->found_file_extent = 1;
1603 if (rec->extent_start == (u64)-1) {
1604 rec->extent_start = key->offset;
1605 rec->extent_end = key->offset;
1608 if (rec->extent_end > key->offset)
1609 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1610 else if (rec->extent_end < key->offset) {
1611 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1612 key->offset - rec->extent_end);
1613 if (ret < 0)
1614 return ret;
1617 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1618 extent_type = btrfs_file_extent_type(eb, fi);
1620 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1621 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1622 if (num_bytes == 0)
1623 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1624 rec->found_size += num_bytes;
1625 num_bytes = (num_bytes + mask) & ~mask;
1626 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1627 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1628 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1629 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1630 extent_offset = btrfs_file_extent_offset(eb, fi);
1631 if (num_bytes == 0 || (num_bytes & mask))
1632 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1633 if (num_bytes + extent_offset >
1634 btrfs_file_extent_ram_bytes(eb, fi))
1635 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1636 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1637 (btrfs_file_extent_compression(eb, fi) ||
1638 btrfs_file_extent_encryption(eb, fi) ||
1639 btrfs_file_extent_other_encoding(eb, fi)))
1640 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1641 if (disk_bytenr > 0)
1642 rec->found_size += num_bytes;
1643 } else {
1644 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1646 rec->extent_end = key->offset + num_bytes;
1649 * The data reloc tree will copy full extents into its inode and then
1650 * copy the corresponding csums. Because the extent it copied could be
1651 * a preallocated extent that hasn't been written to yet there may be no
1652 * csums to copy, ergo we won't have csums for our file extent. This is
1653 * ok so just don't bother checking csums if the inode belongs to the
1654 * data reloc tree.
1656 if (disk_bytenr > 0 &&
1657 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1658 u64 found;
1659 if (btrfs_file_extent_compression(eb, fi))
1660 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1661 else
1662 disk_bytenr += extent_offset;
1664 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1665 if (ret < 0)
1666 return ret;
1667 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1668 if (found > 0)
1669 rec->found_csum_item = 1;
1670 if (found < num_bytes)
1671 rec->some_csum_missing = 1;
1672 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1673 if (found > 0)
1674 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1677 return 0;
1680 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1681 struct walk_control *wc)
1683 struct btrfs_key key;
1684 u32 nritems;
1685 int i;
1686 int ret = 0;
1687 struct cache_tree *inode_cache;
1688 struct shared_node *active_node;
1690 if (wc->root_level == wc->active_node &&
1691 btrfs_root_refs(&root->root_item) == 0)
1692 return 0;
1694 active_node = wc->nodes[wc->active_node];
1695 inode_cache = &active_node->inode_cache;
1696 nritems = btrfs_header_nritems(eb);
1697 for (i = 0; i < nritems; i++) {
1698 btrfs_item_key_to_cpu(eb, &key, i);
1700 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1701 continue;
1702 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1703 continue;
1705 if (active_node->current == NULL ||
1706 active_node->current->ino < key.objectid) {
1707 if (active_node->current) {
1708 active_node->current->checked = 1;
1709 maybe_free_inode_rec(inode_cache,
1710 active_node->current);
1712 active_node->current = get_inode_rec(inode_cache,
1713 key.objectid, 1);
1714 BUG_ON(IS_ERR(active_node->current));
1716 switch (key.type) {
1717 case BTRFS_DIR_ITEM_KEY:
1718 case BTRFS_DIR_INDEX_KEY:
1719 ret = process_dir_item(root, eb, i, &key, active_node);
1720 break;
1721 case BTRFS_INODE_REF_KEY:
1722 ret = process_inode_ref(eb, i, &key, active_node);
1723 break;
1724 case BTRFS_INODE_EXTREF_KEY:
1725 ret = process_inode_extref(eb, i, &key, active_node);
1726 break;
1727 case BTRFS_INODE_ITEM_KEY:
1728 ret = process_inode_item(eb, i, &key, active_node);
1729 break;
1730 case BTRFS_EXTENT_DATA_KEY:
1731 ret = process_file_extent(root, eb, i, &key,
1732 active_node);
1733 break;
1734 default:
1735 break;
1738 return ret;
1741 static void reada_walk_down(struct btrfs_root *root,
1742 struct extent_buffer *node, int slot)
1744 u64 bytenr;
1745 u64 ptr_gen;
1746 u32 nritems;
1747 u32 blocksize;
1748 int i;
1749 int level;
1751 level = btrfs_header_level(node);
1752 if (level != 1)
1753 return;
1755 nritems = btrfs_header_nritems(node);
1756 blocksize = btrfs_level_size(root, level - 1);
1757 for (i = slot; i < nritems; i++) {
1758 bytenr = btrfs_node_blockptr(node, i);
1759 ptr_gen = btrfs_node_ptr_generation(node, i);
1760 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1765 * Check the child node/leaf by the following condition:
1766 * 1. the first item key of the node/leaf should be the same with the one
1767 * in parent.
1768 * 2. block in parent node should match the child node/leaf.
1769 * 3. generation of parent node and child's header should be consistent.
1771 * Or the child node/leaf pointed by the key in parent is not valid.
1773 * We hope to check leaf owner too, but since subvol may share leaves,
1774 * which makes leaf owner check not so strong, key check should be
1775 * sufficient enough for that case.
1777 static int check_child_node(struct btrfs_root *root,
1778 struct extent_buffer *parent, int slot,
1779 struct extent_buffer *child)
1781 struct btrfs_key parent_key;
1782 struct btrfs_key child_key;
1783 int ret = 0;
1785 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1786 if (btrfs_header_level(child) == 0)
1787 btrfs_item_key_to_cpu(child, &child_key, 0);
1788 else
1789 btrfs_node_key_to_cpu(child, &child_key, 0);
1791 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1792 ret = -EINVAL;
1793 fprintf(stderr,
1794 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1795 parent_key.objectid, parent_key.type, parent_key.offset,
1796 child_key.objectid, child_key.type, child_key.offset);
1798 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1799 ret = -EINVAL;
1800 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1801 btrfs_node_blockptr(parent, slot),
1802 btrfs_header_bytenr(child));
1804 if (btrfs_node_ptr_generation(parent, slot) !=
1805 btrfs_header_generation(child)) {
1806 ret = -EINVAL;
1807 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1808 btrfs_header_generation(child),
1809 btrfs_node_ptr_generation(parent, slot));
1811 return ret;
1814 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1815 struct walk_control *wc, int *level)
1817 enum btrfs_tree_block_status status;
1818 u64 bytenr;
1819 u64 ptr_gen;
1820 struct extent_buffer *next;
1821 struct extent_buffer *cur;
1822 u32 blocksize;
1823 int ret, err = 0;
1824 u64 refs;
1826 WARN_ON(*level < 0);
1827 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1828 ret = btrfs_lookup_extent_info(NULL, root,
1829 path->nodes[*level]->start,
1830 *level, 1, &refs, NULL);
1831 if (ret < 0) {
1832 err = ret;
1833 goto out;
1836 if (refs > 1) {
1837 ret = enter_shared_node(root, path->nodes[*level]->start,
1838 refs, wc, *level);
1839 if (ret > 0) {
1840 err = ret;
1841 goto out;
1845 while (*level >= 0) {
1846 WARN_ON(*level < 0);
1847 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1848 cur = path->nodes[*level];
1850 if (btrfs_header_level(cur) != *level)
1851 WARN_ON(1);
1853 if (path->slots[*level] >= btrfs_header_nritems(cur))
1854 break;
1855 if (*level == 0) {
1856 ret = process_one_leaf(root, cur, wc);
1857 if (ret < 0)
1858 err = ret;
1859 break;
1861 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1862 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1863 blocksize = btrfs_level_size(root, *level - 1);
1864 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1865 1, &refs, NULL);
1866 if (ret < 0)
1867 refs = 0;
1869 if (refs > 1) {
1870 ret = enter_shared_node(root, bytenr, refs,
1871 wc, *level - 1);
1872 if (ret > 0) {
1873 path->slots[*level]++;
1874 continue;
1878 next = btrfs_find_tree_block(root, bytenr, blocksize);
1879 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1880 free_extent_buffer(next);
1881 reada_walk_down(root, cur, path->slots[*level]);
1882 next = read_tree_block(root, bytenr, blocksize,
1883 ptr_gen);
1884 if (!extent_buffer_uptodate(next)) {
1885 struct btrfs_key node_key;
1887 btrfs_node_key_to_cpu(path->nodes[*level],
1888 &node_key,
1889 path->slots[*level]);
1890 btrfs_add_corrupt_extent_record(root->fs_info,
1891 &node_key,
1892 path->nodes[*level]->start,
1893 root->leafsize, *level);
1894 err = -EIO;
1895 goto out;
1899 ret = check_child_node(root, cur, path->slots[*level], next);
1900 if (ret) {
1901 err = ret;
1902 goto out;
1905 if (btrfs_is_leaf(next))
1906 status = btrfs_check_leaf(root, NULL, next);
1907 else
1908 status = btrfs_check_node(root, NULL, next);
1909 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1910 free_extent_buffer(next);
1911 err = -EIO;
1912 goto out;
1915 *level = *level - 1;
1916 free_extent_buffer(path->nodes[*level]);
1917 path->nodes[*level] = next;
1918 path->slots[*level] = 0;
1920 out:
1921 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1922 return err;
1925 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1926 struct walk_control *wc, int *level)
1928 int i;
1929 struct extent_buffer *leaf;
1931 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1932 leaf = path->nodes[i];
1933 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1934 path->slots[i]++;
1935 *level = i;
1936 return 0;
1937 } else {
1938 free_extent_buffer(path->nodes[*level]);
1939 path->nodes[*level] = NULL;
1940 BUG_ON(*level > wc->active_node);
1941 if (*level == wc->active_node)
1942 leave_shared_node(root, wc, *level);
1943 *level = i + 1;
1946 return 1;
1949 static int check_root_dir(struct inode_record *rec)
1951 struct inode_backref *backref;
1952 int ret = -1;
1954 if (!rec->found_inode_item || rec->errors)
1955 goto out;
1956 if (rec->nlink != 1 || rec->found_link != 0)
1957 goto out;
1958 if (list_empty(&rec->backrefs))
1959 goto out;
1960 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1961 if (!backref->found_inode_ref)
1962 goto out;
1963 if (backref->index != 0 || backref->namelen != 2 ||
1964 memcmp(backref->name, "..", 2))
1965 goto out;
1966 if (backref->found_dir_index || backref->found_dir_item)
1967 goto out;
1968 ret = 0;
1969 out:
1970 return ret;
1973 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1974 struct btrfs_root *root, struct btrfs_path *path,
1975 struct inode_record *rec)
1977 struct btrfs_inode_item *ei;
1978 struct btrfs_key key;
1979 int ret;
1981 key.objectid = rec->ino;
1982 key.type = BTRFS_INODE_ITEM_KEY;
1983 key.offset = (u64)-1;
1985 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1986 if (ret < 0)
1987 goto out;
1988 if (ret) {
1989 if (!path->slots[0]) {
1990 ret = -ENOENT;
1991 goto out;
1993 path->slots[0]--;
1994 ret = 0;
1996 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1997 if (key.objectid != rec->ino) {
1998 ret = -ENOENT;
1999 goto out;
2002 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2003 struct btrfs_inode_item);
2004 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2005 btrfs_mark_buffer_dirty(path->nodes[0]);
2006 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2007 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2008 root->root_key.objectid);
2009 out:
2010 btrfs_release_path(path);
2011 return ret;
2014 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2015 struct btrfs_root *root,
2016 struct btrfs_path *path,
2017 struct inode_record *rec)
2019 int ret;
2021 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2022 btrfs_release_path(path);
2023 if (!ret)
2024 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2025 return ret;
2028 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2029 struct btrfs_root *root,
2030 struct btrfs_path *path,
2031 struct inode_record *rec)
2033 struct btrfs_inode_item *ei;
2034 struct btrfs_key key;
2035 int ret = 0;
2037 key.objectid = rec->ino;
2038 key.type = BTRFS_INODE_ITEM_KEY;
2039 key.offset = 0;
2041 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2042 if (ret) {
2043 if (ret > 0)
2044 ret = -ENOENT;
2045 goto out;
2048 /* Since ret == 0, no need to check anything */
2049 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2050 struct btrfs_inode_item);
2051 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2052 btrfs_mark_buffer_dirty(path->nodes[0]);
2053 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2054 printf("reset nbytes for ino %llu root %llu\n",
2055 rec->ino, root->root_key.objectid);
2056 out:
2057 btrfs_release_path(path);
2058 return ret;
2061 static int add_missing_dir_index(struct btrfs_root *root,
2062 struct cache_tree *inode_cache,
2063 struct inode_record *rec,
2064 struct inode_backref *backref)
2066 struct btrfs_path *path;
2067 struct btrfs_trans_handle *trans;
2068 struct btrfs_dir_item *dir_item;
2069 struct extent_buffer *leaf;
2070 struct btrfs_key key;
2071 struct btrfs_disk_key disk_key;
2072 struct inode_record *dir_rec;
2073 unsigned long name_ptr;
2074 u32 data_size = sizeof(*dir_item) + backref->namelen;
2075 int ret;
2077 path = btrfs_alloc_path();
2078 if (!path)
2079 return -ENOMEM;
2081 trans = btrfs_start_transaction(root, 1);
2082 if (IS_ERR(trans)) {
2083 btrfs_free_path(path);
2084 return PTR_ERR(trans);
2087 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2088 (unsigned long long)rec->ino);
2089 key.objectid = backref->dir;
2090 key.type = BTRFS_DIR_INDEX_KEY;
2091 key.offset = backref->index;
2093 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2094 BUG_ON(ret);
2096 leaf = path->nodes[0];
2097 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2099 disk_key.objectid = cpu_to_le64(rec->ino);
2100 disk_key.type = BTRFS_INODE_ITEM_KEY;
2101 disk_key.offset = 0;
2103 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2104 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2105 btrfs_set_dir_data_len(leaf, dir_item, 0);
2106 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2107 name_ptr = (unsigned long)(dir_item + 1);
2108 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2109 btrfs_mark_buffer_dirty(leaf);
2110 btrfs_free_path(path);
2111 btrfs_commit_transaction(trans, root);
2113 backref->found_dir_index = 1;
2114 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2115 BUG_ON(IS_ERR(dir_rec));
2116 if (!dir_rec)
2117 return 0;
2118 dir_rec->found_size += backref->namelen;
2119 if (dir_rec->found_size == dir_rec->isize &&
2120 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2121 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2122 if (dir_rec->found_size != dir_rec->isize)
2123 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2125 return 0;
2128 static int delete_dir_index(struct btrfs_root *root,
2129 struct cache_tree *inode_cache,
2130 struct inode_record *rec,
2131 struct inode_backref *backref)
2133 struct btrfs_trans_handle *trans;
2134 struct btrfs_dir_item *di;
2135 struct btrfs_path *path;
2136 int ret = 0;
2138 path = btrfs_alloc_path();
2139 if (!path)
2140 return -ENOMEM;
2142 trans = btrfs_start_transaction(root, 1);
2143 if (IS_ERR(trans)) {
2144 btrfs_free_path(path);
2145 return PTR_ERR(trans);
2149 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2150 (unsigned long long)backref->dir,
2151 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2152 (unsigned long long)root->objectid);
2154 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2155 backref->name, backref->namelen,
2156 backref->index, -1);
2157 if (IS_ERR(di)) {
2158 ret = PTR_ERR(di);
2159 btrfs_free_path(path);
2160 btrfs_commit_transaction(trans, root);
2161 if (ret == -ENOENT)
2162 return 0;
2163 return ret;
2166 if (!di)
2167 ret = btrfs_del_item(trans, root, path);
2168 else
2169 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2170 BUG_ON(ret);
2171 btrfs_free_path(path);
2172 btrfs_commit_transaction(trans, root);
2173 return ret;
2176 static int create_inode_item(struct btrfs_root *root,
2177 struct inode_record *rec,
2178 struct inode_backref *backref, int root_dir)
2180 struct btrfs_trans_handle *trans;
2181 struct btrfs_inode_item inode_item;
2182 time_t now = time(NULL);
2183 int ret;
2185 trans = btrfs_start_transaction(root, 1);
2186 if (IS_ERR(trans)) {
2187 ret = PTR_ERR(trans);
2188 return ret;
2191 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2192 "be incomplete, please check permissions and content after "
2193 "the fsck completes.\n", (unsigned long long)root->objectid,
2194 (unsigned long long)rec->ino);
2196 memset(&inode_item, 0, sizeof(inode_item));
2197 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2198 if (root_dir)
2199 btrfs_set_stack_inode_nlink(&inode_item, 1);
2200 else
2201 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2202 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2203 if (rec->found_dir_item) {
2204 if (rec->found_file_extent)
2205 fprintf(stderr, "root %llu inode %llu has both a dir "
2206 "item and extents, unsure if it is a dir or a "
2207 "regular file so setting it as a directory\n",
2208 (unsigned long long)root->objectid,
2209 (unsigned long long)rec->ino);
2210 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2211 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2212 } else if (!rec->found_dir_item) {
2213 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2214 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2216 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2217 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2218 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2219 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2220 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2221 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2222 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2223 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2225 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2226 BUG_ON(ret);
2227 btrfs_commit_transaction(trans, root);
2228 return 0;
2231 static int repair_inode_backrefs(struct btrfs_root *root,
2232 struct inode_record *rec,
2233 struct cache_tree *inode_cache,
2234 int delete)
2236 struct inode_backref *tmp, *backref;
2237 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2238 int ret = 0;
2239 int repaired = 0;
2241 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2242 if (!delete && rec->ino == root_dirid) {
2243 if (!rec->found_inode_item) {
2244 ret = create_inode_item(root, rec, backref, 1);
2245 if (ret)
2246 break;
2247 repaired++;
2251 /* Index 0 for root dir's are special, don't mess with it */
2252 if (rec->ino == root_dirid && backref->index == 0)
2253 continue;
2255 if (delete &&
2256 ((backref->found_dir_index && !backref->found_inode_ref) ||
2257 (backref->found_dir_index && backref->found_inode_ref &&
2258 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2259 ret = delete_dir_index(root, inode_cache, rec, backref);
2260 if (ret)
2261 break;
2262 repaired++;
2263 list_del(&backref->list);
2264 free(backref);
2267 if (!delete && !backref->found_dir_index &&
2268 backref->found_dir_item && backref->found_inode_ref) {
2269 ret = add_missing_dir_index(root, inode_cache, rec,
2270 backref);
2271 if (ret)
2272 break;
2273 repaired++;
2274 if (backref->found_dir_item &&
2275 backref->found_dir_index &&
2276 backref->found_dir_index) {
2277 if (!backref->errors &&
2278 backref->found_inode_ref) {
2279 list_del(&backref->list);
2280 free(backref);
2285 if (!delete && (!backref->found_dir_index &&
2286 !backref->found_dir_item &&
2287 backref->found_inode_ref)) {
2288 struct btrfs_trans_handle *trans;
2289 struct btrfs_key location;
2291 ret = check_dir_conflict(root, backref->name,
2292 backref->namelen,
2293 backref->dir,
2294 backref->index);
2295 if (ret) {
2297 * let nlink fixing routine to handle it,
2298 * which can do it better.
2300 ret = 0;
2301 break;
2303 location.objectid = rec->ino;
2304 location.type = BTRFS_INODE_ITEM_KEY;
2305 location.offset = 0;
2307 trans = btrfs_start_transaction(root, 1);
2308 if (IS_ERR(trans)) {
2309 ret = PTR_ERR(trans);
2310 break;
2312 fprintf(stderr, "adding missing dir index/item pair "
2313 "for inode %llu\n",
2314 (unsigned long long)rec->ino);
2315 ret = btrfs_insert_dir_item(trans, root, backref->name,
2316 backref->namelen,
2317 backref->dir, &location,
2318 imode_to_type(rec->imode),
2319 backref->index);
2320 BUG_ON(ret);
2321 btrfs_commit_transaction(trans, root);
2322 repaired++;
2325 if (!delete && (backref->found_inode_ref &&
2326 backref->found_dir_index &&
2327 backref->found_dir_item &&
2328 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2329 !rec->found_inode_item)) {
2330 ret = create_inode_item(root, rec, backref, 0);
2331 if (ret)
2332 break;
2333 repaired++;
2337 return ret ? ret : repaired;
2341 * To determine the file type for nlink/inode_item repair
2343 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2344 * Return -ENOENT if file type is not found.
2346 static int find_file_type(struct inode_record *rec, u8 *type)
2348 struct inode_backref *backref;
2350 /* For inode item recovered case */
2351 if (rec->found_inode_item) {
2352 *type = imode_to_type(rec->imode);
2353 return 0;
2356 list_for_each_entry(backref, &rec->backrefs, list) {
2357 if (backref->found_dir_index || backref->found_dir_item) {
2358 *type = backref->filetype;
2359 return 0;
2362 return -ENOENT;
2366 * To determine the file name for nlink repair
2368 * Return 0 if file name is found, set name and namelen.
2369 * Return -ENOENT if file name is not found.
2371 static int find_file_name(struct inode_record *rec,
2372 char *name, int *namelen)
2374 struct inode_backref *backref;
2376 list_for_each_entry(backref, &rec->backrefs, list) {
2377 if (backref->found_dir_index || backref->found_dir_item ||
2378 backref->found_inode_ref) {
2379 memcpy(name, backref->name, backref->namelen);
2380 *namelen = backref->namelen;
2381 return 0;
2384 return -ENOENT;
2387 /* Reset the nlink of the inode to the correct one */
2388 static int reset_nlink(struct btrfs_trans_handle *trans,
2389 struct btrfs_root *root,
2390 struct btrfs_path *path,
2391 struct inode_record *rec)
2393 struct inode_backref *backref;
2394 struct inode_backref *tmp;
2395 struct btrfs_key key;
2396 struct btrfs_inode_item *inode_item;
2397 int ret = 0;
2399 /* We don't believe this either, reset it and iterate backref */
2400 rec->found_link = 0;
2402 /* Remove all backref including the valid ones */
2403 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2404 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2405 backref->index, backref->name,
2406 backref->namelen, 0);
2407 if (ret < 0)
2408 goto out;
2410 /* remove invalid backref, so it won't be added back */
2411 if (!(backref->found_dir_index &&
2412 backref->found_dir_item &&
2413 backref->found_inode_ref)) {
2414 list_del(&backref->list);
2415 free(backref);
2416 } else {
2417 rec->found_link++;
2421 /* Set nlink to 0 */
2422 key.objectid = rec->ino;
2423 key.type = BTRFS_INODE_ITEM_KEY;
2424 key.offset = 0;
2425 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2426 if (ret < 0)
2427 goto out;
2428 if (ret > 0) {
2429 ret = -ENOENT;
2430 goto out;
2432 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2433 struct btrfs_inode_item);
2434 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2435 btrfs_mark_buffer_dirty(path->nodes[0]);
2436 btrfs_release_path(path);
2439 * Add back valid inode_ref/dir_item/dir_index,
2440 * add_link() will handle the nlink inc, so new nlink must be correct
2442 list_for_each_entry(backref, &rec->backrefs, list) {
2443 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2444 backref->name, backref->namelen,
2445 backref->filetype, &backref->index, 1);
2446 if (ret < 0)
2447 goto out;
2449 out:
2450 btrfs_release_path(path);
2451 return ret;
2454 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2455 struct btrfs_root *root,
2456 struct btrfs_path *path,
2457 struct inode_record *rec)
2459 char *dir_name = "lost+found";
2460 char namebuf[BTRFS_NAME_LEN] = {0};
2461 u64 lost_found_ino;
2462 u32 mode = 0700;
2463 u8 type = 0;
2464 int namelen = 0;
2465 int name_recovered = 0;
2466 int type_recovered = 0;
2467 int ret = 0;
2470 * Get file name and type first before these invalid inode ref
2471 * are deleted by remove_all_invalid_backref()
2473 name_recovered = !find_file_name(rec, namebuf, &namelen);
2474 type_recovered = !find_file_type(rec, &type);
2476 if (!name_recovered) {
2477 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2478 rec->ino, rec->ino);
2479 namelen = count_digits(rec->ino);
2480 sprintf(namebuf, "%llu", rec->ino);
2481 name_recovered = 1;
2483 if (!type_recovered) {
2484 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2485 rec->ino);
2486 type = BTRFS_FT_REG_FILE;
2487 type_recovered = 1;
2490 ret = reset_nlink(trans, root, path, rec);
2491 if (ret < 0) {
2492 fprintf(stderr,
2493 "Failed to reset nlink for inode %llu: %s\n",
2494 rec->ino, strerror(-ret));
2495 goto out;
2498 if (rec->found_link == 0) {
2499 lost_found_ino = root->highest_inode;
2500 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2501 ret = -EOVERFLOW;
2502 goto out;
2504 lost_found_ino++;
2505 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2506 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2507 mode);
2508 if (ret < 0) {
2509 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2510 dir_name, strerror(-ret));
2511 goto out;
2513 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2514 namebuf, namelen, type, NULL, 1);
2516 * Add ".INO" suffix several times to handle case where
2517 * "FILENAME.INO" is already taken by another file.
2519 while (ret == -EEXIST) {
2521 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2523 if (namelen + count_digits(rec->ino) + 1 >
2524 BTRFS_NAME_LEN) {
2525 ret = -EFBIG;
2526 goto out;
2528 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2529 ".%llu", rec->ino);
2530 namelen += count_digits(rec->ino) + 1;
2531 ret = btrfs_add_link(trans, root, rec->ino,
2532 lost_found_ino, namebuf,
2533 namelen, type, NULL, 1);
2535 if (ret < 0) {
2536 fprintf(stderr,
2537 "Failed to link the inode %llu to %s dir: %s\n",
2538 rec->ino, dir_name, strerror(-ret));
2539 goto out;
2542 * Just increase the found_link, don't actually add the
2543 * backref. This will make things easier and this inode
2544 * record will be freed after the repair is done.
2545 * So fsck will not report problem about this inode.
2547 rec->found_link++;
2548 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2549 namelen, namebuf, dir_name);
2551 printf("Fixed the nlink of inode %llu\n", rec->ino);
2552 out:
2554 * Clear the flag anyway, or we will loop forever for the same inode
2555 * as it will not be removed from the bad inode list and the dead loop
2556 * happens.
2558 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2559 btrfs_release_path(path);
2560 return ret;
2564 * Check if there is any normal(reg or prealloc) file extent for given
2565 * ino.
2566 * This is used to determine the file type when neither its dir_index/item or
2567 * inode_item exists.
2569 * This will *NOT* report error, if any error happens, just consider it does
2570 * not have any normal file extent.
2572 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2574 struct btrfs_path *path;
2575 struct btrfs_key key;
2576 struct btrfs_key found_key;
2577 struct btrfs_file_extent_item *fi;
2578 u8 type;
2579 int ret = 0;
2581 path = btrfs_alloc_path();
2582 if (!path)
2583 goto out;
2584 key.objectid = ino;
2585 key.type = BTRFS_EXTENT_DATA_KEY;
2586 key.offset = 0;
2588 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2589 if (ret < 0) {
2590 ret = 0;
2591 goto out;
2593 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2594 ret = btrfs_next_leaf(root, path);
2595 if (ret) {
2596 ret = 0;
2597 goto out;
2600 while (1) {
2601 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2602 path->slots[0]);
2603 if (found_key.objectid != ino ||
2604 found_key.type != BTRFS_EXTENT_DATA_KEY)
2605 break;
2606 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2607 struct btrfs_file_extent_item);
2608 type = btrfs_file_extent_type(path->nodes[0], fi);
2609 if (type != BTRFS_FILE_EXTENT_INLINE) {
2610 ret = 1;
2611 goto out;
2614 out:
2615 btrfs_free_path(path);
2616 return ret;
2619 static u32 btrfs_type_to_imode(u8 type)
2621 static u32 imode_by_btrfs_type[] = {
2622 [BTRFS_FT_REG_FILE] = S_IFREG,
2623 [BTRFS_FT_DIR] = S_IFDIR,
2624 [BTRFS_FT_CHRDEV] = S_IFCHR,
2625 [BTRFS_FT_BLKDEV] = S_IFBLK,
2626 [BTRFS_FT_FIFO] = S_IFIFO,
2627 [BTRFS_FT_SOCK] = S_IFSOCK,
2628 [BTRFS_FT_SYMLINK] = S_IFLNK,
2631 return imode_by_btrfs_type[(type)];
2634 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2635 struct btrfs_root *root,
2636 struct btrfs_path *path,
2637 struct inode_record *rec)
2639 u8 filetype;
2640 u32 mode = 0700;
2641 int type_recovered = 0;
2642 int ret = 0;
2644 printf("Trying to rebuild inode:%llu\n", rec->ino);
2646 type_recovered = !find_file_type(rec, &filetype);
2649 * Try to determine inode type if type not found.
2651 * For found regular file extent, it must be FILE.
2652 * For found dir_item/index, it must be DIR.
2654 * For undetermined one, use FILE as fallback.
2656 * TODO:
2657 * 1. If found backref(inode_index/item is already handled) to it,
2658 * it must be DIR.
2659 * Need new inode-inode ref structure to allow search for that.
2661 if (!type_recovered) {
2662 if (rec->found_file_extent &&
2663 find_normal_file_extent(root, rec->ino)) {
2664 type_recovered = 1;
2665 filetype = BTRFS_FT_REG_FILE;
2666 } else if (rec->found_dir_item) {
2667 type_recovered = 1;
2668 filetype = BTRFS_FT_DIR;
2669 } else if (!list_empty(&rec->orphan_extents)) {
2670 type_recovered = 1;
2671 filetype = BTRFS_FT_REG_FILE;
2672 } else{
2673 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2674 rec->ino);
2675 type_recovered = 1;
2676 filetype = BTRFS_FT_REG_FILE;
2680 ret = btrfs_new_inode(trans, root, rec->ino,
2681 mode | btrfs_type_to_imode(filetype));
2682 if (ret < 0)
2683 goto out;
2686 * Here inode rebuild is done, we only rebuild the inode item,
2687 * don't repair the nlink(like move to lost+found).
2688 * That is the job of nlink repair.
2690 * We just fill the record and return
2692 rec->found_dir_item = 1;
2693 rec->imode = mode | btrfs_type_to_imode(filetype);
2694 rec->nlink = 0;
2695 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2696 /* Ensure the inode_nlinks repair function will be called */
2697 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2698 out:
2699 return ret;
2702 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2703 struct btrfs_root *root,
2704 struct btrfs_path *path,
2705 struct inode_record *rec)
2707 struct orphan_data_extent *orphan;
2708 struct orphan_data_extent *tmp;
2709 int ret = 0;
2711 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2713 * Check for conflicting file extents
2715 * Here we don't know whether the extents is compressed or not,
2716 * so we can only assume it not compressed nor data offset,
2717 * and use its disk_len as extent length.
2719 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2720 orphan->offset, orphan->disk_len, 0);
2721 btrfs_release_path(path);
2722 if (ret < 0)
2723 goto out;
2724 if (!ret) {
2725 fprintf(stderr,
2726 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2727 orphan->disk_bytenr, orphan->disk_len);
2728 ret = btrfs_free_extent(trans,
2729 root->fs_info->extent_root,
2730 orphan->disk_bytenr, orphan->disk_len,
2731 0, root->objectid, orphan->objectid,
2732 orphan->offset);
2733 if (ret < 0)
2734 goto out;
2736 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2737 orphan->offset, orphan->disk_bytenr,
2738 orphan->disk_len, orphan->disk_len);
2739 if (ret < 0)
2740 goto out;
2742 /* Update file size info */
2743 rec->found_size += orphan->disk_len;
2744 if (rec->found_size == rec->nbytes)
2745 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2747 /* Update the file extent hole info too */
2748 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2749 orphan->disk_len);
2750 if (ret < 0)
2751 goto out;
2752 if (RB_EMPTY_ROOT(&rec->holes))
2753 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2755 list_del(&orphan->list);
2756 free(orphan);
2758 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2759 out:
2760 return ret;
2763 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2764 struct btrfs_root *root,
2765 struct btrfs_path *path,
2766 struct inode_record *rec)
2768 struct rb_node *node;
2769 struct file_extent_hole *hole;
2770 int found = 0;
2771 int ret = 0;
2773 node = rb_first(&rec->holes);
2775 while (node) {
2776 found = 1;
2777 hole = rb_entry(node, struct file_extent_hole, node);
2778 ret = btrfs_punch_hole(trans, root, rec->ino,
2779 hole->start, hole->len);
2780 if (ret < 0)
2781 goto out;
2782 ret = del_file_extent_hole(&rec->holes, hole->start,
2783 hole->len);
2784 if (ret < 0)
2785 goto out;
2786 if (RB_EMPTY_ROOT(&rec->holes))
2787 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2788 node = rb_first(&rec->holes);
2790 /* special case for a file losing all its file extent */
2791 if (!found) {
2792 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2793 round_up(rec->isize, root->sectorsize));
2794 if (ret < 0)
2795 goto out;
2797 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2798 rec->ino, root->objectid);
2799 out:
2800 return ret;
2803 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2805 struct btrfs_trans_handle *trans;
2806 struct btrfs_path *path;
2807 int ret = 0;
2809 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2810 I_ERR_NO_ORPHAN_ITEM |
2811 I_ERR_LINK_COUNT_WRONG |
2812 I_ERR_NO_INODE_ITEM |
2813 I_ERR_FILE_EXTENT_ORPHAN |
2814 I_ERR_FILE_EXTENT_DISCOUNT|
2815 I_ERR_FILE_NBYTES_WRONG)))
2816 return rec->errors;
2818 path = btrfs_alloc_path();
2819 if (!path)
2820 return -ENOMEM;
2823 * For nlink repair, it may create a dir and add link, so
2824 * 2 for parent(256)'s dir_index and dir_item
2825 * 2 for lost+found dir's inode_item and inode_ref
2826 * 1 for the new inode_ref of the file
2827 * 2 for lost+found dir's dir_index and dir_item for the file
2829 trans = btrfs_start_transaction(root, 7);
2830 if (IS_ERR(trans)) {
2831 btrfs_free_path(path);
2832 return PTR_ERR(trans);
2835 if (rec->errors & I_ERR_NO_INODE_ITEM)
2836 ret = repair_inode_no_item(trans, root, path, rec);
2837 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2838 ret = repair_inode_orphan_extent(trans, root, path, rec);
2839 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2840 ret = repair_inode_discount_extent(trans, root, path, rec);
2841 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2842 ret = repair_inode_isize(trans, root, path, rec);
2843 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2844 ret = repair_inode_orphan_item(trans, root, path, rec);
2845 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2846 ret = repair_inode_nlinks(trans, root, path, rec);
2847 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2848 ret = repair_inode_nbytes(trans, root, path, rec);
2849 btrfs_commit_transaction(trans, root);
2850 btrfs_free_path(path);
2851 return ret;
2854 static int check_inode_recs(struct btrfs_root *root,
2855 struct cache_tree *inode_cache)
2857 struct cache_extent *cache;
2858 struct ptr_node *node;
2859 struct inode_record *rec;
2860 struct inode_backref *backref;
2861 int stage = 0;
2862 int ret = 0;
2863 int err = 0;
2864 u64 error = 0;
2865 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2867 if (btrfs_root_refs(&root->root_item) == 0) {
2868 if (!cache_tree_empty(inode_cache))
2869 fprintf(stderr, "warning line %d\n", __LINE__);
2870 return 0;
2874 * We need to record the highest inode number for later 'lost+found'
2875 * dir creation.
2876 * We must select a ino not used/refered by any existing inode, or
2877 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2878 * this may cause 'lost+found' dir has wrong nlinks.
2880 cache = last_cache_extent(inode_cache);
2881 if (cache) {
2882 node = container_of(cache, struct ptr_node, cache);
2883 rec = node->data;
2884 if (rec->ino > root->highest_inode)
2885 root->highest_inode = rec->ino;
2889 * We need to repair backrefs first because we could change some of the
2890 * errors in the inode recs.
2892 * We also need to go through and delete invalid backrefs first and then
2893 * add the correct ones second. We do this because we may get EEXIST
2894 * when adding back the correct index because we hadn't yet deleted the
2895 * invalid index.
2897 * For example, if we were missing a dir index then the directories
2898 * isize would be wrong, so if we fixed the isize to what we thought it
2899 * would be and then fixed the backref we'd still have a invalid fs, so
2900 * we need to add back the dir index and then check to see if the isize
2901 * is still wrong.
2903 while (stage < 3) {
2904 stage++;
2905 if (stage == 3 && !err)
2906 break;
2908 cache = search_cache_extent(inode_cache, 0);
2909 while (repair && cache) {
2910 node = container_of(cache, struct ptr_node, cache);
2911 rec = node->data;
2912 cache = next_cache_extent(cache);
2914 /* Need to free everything up and rescan */
2915 if (stage == 3) {
2916 remove_cache_extent(inode_cache, &node->cache);
2917 free(node);
2918 free_inode_rec(rec);
2919 continue;
2922 if (list_empty(&rec->backrefs))
2923 continue;
2925 ret = repair_inode_backrefs(root, rec, inode_cache,
2926 stage == 1);
2927 if (ret < 0) {
2928 err = ret;
2929 stage = 2;
2930 break;
2931 } if (ret > 0) {
2932 err = -EAGAIN;
2936 if (err)
2937 return err;
2939 rec = get_inode_rec(inode_cache, root_dirid, 0);
2940 BUG_ON(IS_ERR(rec));
2941 if (rec) {
2942 ret = check_root_dir(rec);
2943 if (ret) {
2944 fprintf(stderr, "root %llu root dir %llu error\n",
2945 (unsigned long long)root->root_key.objectid,
2946 (unsigned long long)root_dirid);
2947 print_inode_error(root, rec);
2948 error++;
2950 } else {
2951 if (repair) {
2952 struct btrfs_trans_handle *trans;
2954 trans = btrfs_start_transaction(root, 1);
2955 if (IS_ERR(trans)) {
2956 err = PTR_ERR(trans);
2957 return err;
2960 fprintf(stderr,
2961 "root %llu missing its root dir, recreating\n",
2962 (unsigned long long)root->objectid);
2964 ret = btrfs_make_root_dir(trans, root, root_dirid);
2965 BUG_ON(ret);
2967 btrfs_commit_transaction(trans, root);
2968 return -EAGAIN;
2971 fprintf(stderr, "root %llu root dir %llu not found\n",
2972 (unsigned long long)root->root_key.objectid,
2973 (unsigned long long)root_dirid);
2976 while (1) {
2977 cache = search_cache_extent(inode_cache, 0);
2978 if (!cache)
2979 break;
2980 node = container_of(cache, struct ptr_node, cache);
2981 rec = node->data;
2982 remove_cache_extent(inode_cache, &node->cache);
2983 free(node);
2984 if (rec->ino == root_dirid ||
2985 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2986 free_inode_rec(rec);
2987 continue;
2990 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2991 ret = check_orphan_item(root, rec->ino);
2992 if (ret == 0)
2993 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2994 if (can_free_inode_rec(rec)) {
2995 free_inode_rec(rec);
2996 continue;
3000 if (!rec->found_inode_item)
3001 rec->errors |= I_ERR_NO_INODE_ITEM;
3002 if (rec->found_link != rec->nlink)
3003 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3004 if (repair) {
3005 ret = try_repair_inode(root, rec);
3006 if (ret == 0 && can_free_inode_rec(rec)) {
3007 free_inode_rec(rec);
3008 continue;
3010 ret = 0;
3013 if (!(repair && ret == 0))
3014 error++;
3015 print_inode_error(root, rec);
3016 list_for_each_entry(backref, &rec->backrefs, list) {
3017 if (!backref->found_dir_item)
3018 backref->errors |= REF_ERR_NO_DIR_ITEM;
3019 if (!backref->found_dir_index)
3020 backref->errors |= REF_ERR_NO_DIR_INDEX;
3021 if (!backref->found_inode_ref)
3022 backref->errors |= REF_ERR_NO_INODE_REF;
3023 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3024 " namelen %u name %s filetype %d errors %x",
3025 (unsigned long long)backref->dir,
3026 (unsigned long long)backref->index,
3027 backref->namelen, backref->name,
3028 backref->filetype, backref->errors);
3029 print_ref_error(backref->errors);
3031 free_inode_rec(rec);
3033 return (error > 0) ? -1 : 0;
3036 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3037 u64 objectid)
3039 struct cache_extent *cache;
3040 struct root_record *rec = NULL;
3041 int ret;
3043 cache = lookup_cache_extent(root_cache, objectid, 1);
3044 if (cache) {
3045 rec = container_of(cache, struct root_record, cache);
3046 } else {
3047 rec = calloc(1, sizeof(*rec));
3048 if (!rec)
3049 return ERR_PTR(-ENOMEM);
3050 rec->objectid = objectid;
3051 INIT_LIST_HEAD(&rec->backrefs);
3052 rec->cache.start = objectid;
3053 rec->cache.size = 1;
3055 ret = insert_cache_extent(root_cache, &rec->cache);
3056 if (ret)
3057 return ERR_PTR(-EEXIST);
3059 return rec;
3062 static struct root_backref *get_root_backref(struct root_record *rec,
3063 u64 ref_root, u64 dir, u64 index,
3064 const char *name, int namelen)
3066 struct root_backref *backref;
3068 list_for_each_entry(backref, &rec->backrefs, list) {
3069 if (backref->ref_root != ref_root || backref->dir != dir ||
3070 backref->namelen != namelen)
3071 continue;
3072 if (memcmp(name, backref->name, namelen))
3073 continue;
3074 return backref;
3077 backref = calloc(1, sizeof(*backref) + namelen + 1);
3078 if (!backref)
3079 return NULL;
3080 backref->ref_root = ref_root;
3081 backref->dir = dir;
3082 backref->index = index;
3083 backref->namelen = namelen;
3084 memcpy(backref->name, name, namelen);
3085 backref->name[namelen] = '\0';
3086 list_add_tail(&backref->list, &rec->backrefs);
3087 return backref;
3090 static void free_root_record(struct cache_extent *cache)
3092 struct root_record *rec;
3093 struct root_backref *backref;
3095 rec = container_of(cache, struct root_record, cache);
3096 while (!list_empty(&rec->backrefs)) {
3097 backref = list_entry(rec->backrefs.next,
3098 struct root_backref, list);
3099 list_del(&backref->list);
3100 free(backref);
3103 kfree(rec);
3106 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3108 static int add_root_backref(struct cache_tree *root_cache,
3109 u64 root_id, u64 ref_root, u64 dir, u64 index,
3110 const char *name, int namelen,
3111 int item_type, int errors)
3113 struct root_record *rec;
3114 struct root_backref *backref;
3116 rec = get_root_rec(root_cache, root_id);
3117 BUG_ON(IS_ERR(rec));
3118 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3119 BUG_ON(!backref);
3121 backref->errors |= errors;
3123 if (item_type != BTRFS_DIR_ITEM_KEY) {
3124 if (backref->found_dir_index || backref->found_back_ref ||
3125 backref->found_forward_ref) {
3126 if (backref->index != index)
3127 backref->errors |= REF_ERR_INDEX_UNMATCH;
3128 } else {
3129 backref->index = index;
3133 if (item_type == BTRFS_DIR_ITEM_KEY) {
3134 if (backref->found_forward_ref)
3135 rec->found_ref++;
3136 backref->found_dir_item = 1;
3137 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3138 backref->found_dir_index = 1;
3139 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3140 if (backref->found_forward_ref)
3141 backref->errors |= REF_ERR_DUP_ROOT_REF;
3142 else if (backref->found_dir_item)
3143 rec->found_ref++;
3144 backref->found_forward_ref = 1;
3145 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3146 if (backref->found_back_ref)
3147 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3148 backref->found_back_ref = 1;
3149 } else {
3150 BUG_ON(1);
3153 if (backref->found_forward_ref && backref->found_dir_item)
3154 backref->reachable = 1;
3155 return 0;
3158 static int merge_root_recs(struct btrfs_root *root,
3159 struct cache_tree *src_cache,
3160 struct cache_tree *dst_cache)
3162 struct cache_extent *cache;
3163 struct ptr_node *node;
3164 struct inode_record *rec;
3165 struct inode_backref *backref;
3166 int ret = 0;
3168 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3169 free_inode_recs_tree(src_cache);
3170 return 0;
3173 while (1) {
3174 cache = search_cache_extent(src_cache, 0);
3175 if (!cache)
3176 break;
3177 node = container_of(cache, struct ptr_node, cache);
3178 rec = node->data;
3179 remove_cache_extent(src_cache, &node->cache);
3180 free(node);
3182 ret = is_child_root(root, root->objectid, rec->ino);
3183 if (ret < 0)
3184 break;
3185 else if (ret == 0)
3186 goto skip;
3188 list_for_each_entry(backref, &rec->backrefs, list) {
3189 BUG_ON(backref->found_inode_ref);
3190 if (backref->found_dir_item)
3191 add_root_backref(dst_cache, rec->ino,
3192 root->root_key.objectid, backref->dir,
3193 backref->index, backref->name,
3194 backref->namelen, BTRFS_DIR_ITEM_KEY,
3195 backref->errors);
3196 if (backref->found_dir_index)
3197 add_root_backref(dst_cache, rec->ino,
3198 root->root_key.objectid, backref->dir,
3199 backref->index, backref->name,
3200 backref->namelen, BTRFS_DIR_INDEX_KEY,
3201 backref->errors);
3203 skip:
3204 free_inode_rec(rec);
3206 if (ret < 0)
3207 return ret;
3208 return 0;
3211 static int check_root_refs(struct btrfs_root *root,
3212 struct cache_tree *root_cache)
3214 struct root_record *rec;
3215 struct root_record *ref_root;
3216 struct root_backref *backref;
3217 struct cache_extent *cache;
3218 int loop = 1;
3219 int ret;
3220 int error;
3221 int errors = 0;
3223 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3224 BUG_ON(IS_ERR(rec));
3225 rec->found_ref = 1;
3227 /* fixme: this can not detect circular references */
3228 while (loop) {
3229 loop = 0;
3230 cache = search_cache_extent(root_cache, 0);
3231 while (1) {
3232 if (!cache)
3233 break;
3234 rec = container_of(cache, struct root_record, cache);
3235 cache = next_cache_extent(cache);
3237 if (rec->found_ref == 0)
3238 continue;
3240 list_for_each_entry(backref, &rec->backrefs, list) {
3241 if (!backref->reachable)
3242 continue;
3244 ref_root = get_root_rec(root_cache,
3245 backref->ref_root);
3246 BUG_ON(IS_ERR(ref_root));
3247 if (ref_root->found_ref > 0)
3248 continue;
3250 backref->reachable = 0;
3251 rec->found_ref--;
3252 if (rec->found_ref == 0)
3253 loop = 1;
3258 cache = search_cache_extent(root_cache, 0);
3259 while (1) {
3260 if (!cache)
3261 break;
3262 rec = container_of(cache, struct root_record, cache);
3263 cache = next_cache_extent(cache);
3265 if (rec->found_ref == 0 &&
3266 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3267 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3268 ret = check_orphan_item(root->fs_info->tree_root,
3269 rec->objectid);
3270 if (ret == 0)
3271 continue;
3274 * If we don't have a root item then we likely just have
3275 * a dir item in a snapshot for this root but no actual
3276 * ref key or anything so it's meaningless.
3278 if (!rec->found_root_item)
3279 continue;
3280 errors++;
3281 fprintf(stderr, "fs tree %llu not referenced\n",
3282 (unsigned long long)rec->objectid);
3285 error = 0;
3286 if (rec->found_ref > 0 && !rec->found_root_item)
3287 error = 1;
3288 list_for_each_entry(backref, &rec->backrefs, list) {
3289 if (!backref->found_dir_item)
3290 backref->errors |= REF_ERR_NO_DIR_ITEM;
3291 if (!backref->found_dir_index)
3292 backref->errors |= REF_ERR_NO_DIR_INDEX;
3293 if (!backref->found_back_ref)
3294 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3295 if (!backref->found_forward_ref)
3296 backref->errors |= REF_ERR_NO_ROOT_REF;
3297 if (backref->reachable && backref->errors)
3298 error = 1;
3300 if (!error)
3301 continue;
3303 errors++;
3304 fprintf(stderr, "fs tree %llu refs %u %s\n",
3305 (unsigned long long)rec->objectid, rec->found_ref,
3306 rec->found_root_item ? "" : "not found");
3308 list_for_each_entry(backref, &rec->backrefs, list) {
3309 if (!backref->reachable)
3310 continue;
3311 if (!backref->errors && rec->found_root_item)
3312 continue;
3313 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3314 " index %llu namelen %u name %s errors %x\n",
3315 (unsigned long long)backref->ref_root,
3316 (unsigned long long)backref->dir,
3317 (unsigned long long)backref->index,
3318 backref->namelen, backref->name,
3319 backref->errors);
3320 print_ref_error(backref->errors);
3323 return errors > 0 ? 1 : 0;
3326 static int process_root_ref(struct extent_buffer *eb, int slot,
3327 struct btrfs_key *key,
3328 struct cache_tree *root_cache)
3330 u64 dirid;
3331 u64 index;
3332 u32 len;
3333 u32 name_len;
3334 struct btrfs_root_ref *ref;
3335 char namebuf[BTRFS_NAME_LEN];
3336 int error;
3338 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3340 dirid = btrfs_root_ref_dirid(eb, ref);
3341 index = btrfs_root_ref_sequence(eb, ref);
3342 name_len = btrfs_root_ref_name_len(eb, ref);
3344 if (name_len <= BTRFS_NAME_LEN) {
3345 len = name_len;
3346 error = 0;
3347 } else {
3348 len = BTRFS_NAME_LEN;
3349 error = REF_ERR_NAME_TOO_LONG;
3351 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3353 if (key->type == BTRFS_ROOT_REF_KEY) {
3354 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3355 index, namebuf, len, key->type, error);
3356 } else {
3357 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3358 index, namebuf, len, key->type, error);
3360 return 0;
3363 static void free_corrupt_block(struct cache_extent *cache)
3365 struct btrfs_corrupt_block *corrupt;
3367 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3368 free(corrupt);
3371 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3374 * Repair the btree of the given root.
3376 * The fix is to remove the node key in corrupt_blocks cache_tree.
3377 * and rebalance the tree.
3378 * After the fix, the btree should be writeable.
3380 static int repair_btree(struct btrfs_root *root,
3381 struct cache_tree *corrupt_blocks)
3383 struct btrfs_trans_handle *trans;
3384 struct btrfs_path *path;
3385 struct btrfs_corrupt_block *corrupt;
3386 struct cache_extent *cache;
3387 struct btrfs_key key;
3388 u64 offset;
3389 int level;
3390 int ret = 0;
3392 if (cache_tree_empty(corrupt_blocks))
3393 return 0;
3395 path = btrfs_alloc_path();
3396 if (!path)
3397 return -ENOMEM;
3399 trans = btrfs_start_transaction(root, 1);
3400 if (IS_ERR(trans)) {
3401 ret = PTR_ERR(trans);
3402 fprintf(stderr, "Error starting transaction: %s\n",
3403 strerror(-ret));
3404 goto out_free_path;
3406 cache = first_cache_extent(corrupt_blocks);
3407 while (cache) {
3408 corrupt = container_of(cache, struct btrfs_corrupt_block,
3409 cache);
3410 level = corrupt->level;
3411 path->lowest_level = level;
3412 key.objectid = corrupt->key.objectid;
3413 key.type = corrupt->key.type;
3414 key.offset = corrupt->key.offset;
3417 * Here we don't want to do any tree balance, since it may
3418 * cause a balance with corrupted brother leaf/node,
3419 * so ins_len set to 0 here.
3420 * Balance will be done after all corrupt node/leaf is deleted.
3422 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3423 if (ret < 0)
3424 goto out;
3425 offset = btrfs_node_blockptr(path->nodes[level],
3426 path->slots[level]);
3428 /* Remove the ptr */
3429 ret = btrfs_del_ptr(trans, root, path, level,
3430 path->slots[level]);
3431 if (ret < 0)
3432 goto out;
3434 * Remove the corresponding extent
3435 * return value is not concerned.
3437 btrfs_release_path(path);
3438 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3439 0, root->root_key.objectid,
3440 level - 1, 0);
3441 cache = next_cache_extent(cache);
3444 /* Balance the btree using btrfs_search_slot() */
3445 cache = first_cache_extent(corrupt_blocks);
3446 while (cache) {
3447 corrupt = container_of(cache, struct btrfs_corrupt_block,
3448 cache);
3449 memcpy(&key, &corrupt->key, sizeof(key));
3450 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3451 if (ret < 0)
3452 goto out;
3453 /* return will always >0 since it won't find the item */
3454 ret = 0;
3455 btrfs_release_path(path);
3456 cache = next_cache_extent(cache);
3458 out:
3459 btrfs_commit_transaction(trans, root);
3460 out_free_path:
3461 btrfs_free_path(path);
3462 return ret;
3465 static int check_fs_root(struct btrfs_root *root,
3466 struct cache_tree *root_cache,
3467 struct walk_control *wc)
3469 int ret = 0;
3470 int err = 0;
3471 int wret;
3472 int level;
3473 struct btrfs_path path;
3474 struct shared_node root_node;
3475 struct root_record *rec;
3476 struct btrfs_root_item *root_item = &root->root_item;
3477 struct cache_tree corrupt_blocks;
3478 struct orphan_data_extent *orphan;
3479 struct orphan_data_extent *tmp;
3480 enum btrfs_tree_block_status status;
3483 * Reuse the corrupt_block cache tree to record corrupted tree block
3485 * Unlike the usage in extent tree check, here we do it in a per
3486 * fs/subvol tree base.
3488 cache_tree_init(&corrupt_blocks);
3489 root->fs_info->corrupt_blocks = &corrupt_blocks;
3491 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3492 rec = get_root_rec(root_cache, root->root_key.objectid);
3493 BUG_ON(IS_ERR(rec));
3494 if (btrfs_root_refs(root_item) > 0)
3495 rec->found_root_item = 1;
3498 btrfs_init_path(&path);
3499 memset(&root_node, 0, sizeof(root_node));
3500 cache_tree_init(&root_node.root_cache);
3501 cache_tree_init(&root_node.inode_cache);
3503 /* Move the orphan extent record to corresponding inode_record */
3504 list_for_each_entry_safe(orphan, tmp,
3505 &root->orphan_data_extents, list) {
3506 struct inode_record *inode;
3508 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3510 BUG_ON(IS_ERR(inode));
3511 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3512 list_move(&orphan->list, &inode->orphan_extents);
3515 level = btrfs_header_level(root->node);
3516 memset(wc->nodes, 0, sizeof(wc->nodes));
3517 wc->nodes[level] = &root_node;
3518 wc->active_node = level;
3519 wc->root_level = level;
3521 /* We may not have checked the root block, lets do that now */
3522 if (btrfs_is_leaf(root->node))
3523 status = btrfs_check_leaf(root, NULL, root->node);
3524 else
3525 status = btrfs_check_node(root, NULL, root->node);
3526 if (status != BTRFS_TREE_BLOCK_CLEAN)
3527 return -EIO;
3529 if (btrfs_root_refs(root_item) > 0 ||
3530 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3531 path.nodes[level] = root->node;
3532 extent_buffer_get(root->node);
3533 path.slots[level] = 0;
3534 } else {
3535 struct btrfs_key key;
3536 struct btrfs_disk_key found_key;
3538 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3539 level = root_item->drop_level;
3540 path.lowest_level = level;
3541 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3542 if (wret < 0)
3543 goto skip_walking;
3544 btrfs_node_key(path.nodes[level], &found_key,
3545 path.slots[level]);
3546 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3547 sizeof(found_key)));
3550 while (1) {
3551 wret = walk_down_tree(root, &path, wc, &level);
3552 if (wret < 0)
3553 ret = wret;
3554 if (wret != 0)
3555 break;
3557 wret = walk_up_tree(root, &path, wc, &level);
3558 if (wret < 0)
3559 ret = wret;
3560 if (wret != 0)
3561 break;
3563 skip_walking:
3564 btrfs_release_path(&path);
3566 if (!cache_tree_empty(&corrupt_blocks)) {
3567 struct cache_extent *cache;
3568 struct btrfs_corrupt_block *corrupt;
3570 printf("The following tree block(s) is corrupted in tree %llu:\n",
3571 root->root_key.objectid);
3572 cache = first_cache_extent(&corrupt_blocks);
3573 while (cache) {
3574 corrupt = container_of(cache,
3575 struct btrfs_corrupt_block,
3576 cache);
3577 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3578 cache->start, corrupt->level,
3579 corrupt->key.objectid, corrupt->key.type,
3580 corrupt->key.offset);
3581 cache = next_cache_extent(cache);
3583 if (repair) {
3584 printf("Try to repair the btree for root %llu\n",
3585 root->root_key.objectid);
3586 ret = repair_btree(root, &corrupt_blocks);
3587 if (ret < 0)
3588 fprintf(stderr, "Failed to repair btree: %s\n",
3589 strerror(-ret));
3590 if (!ret)
3591 printf("Btree for root %llu is fixed\n",
3592 root->root_key.objectid);
3596 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3597 if (err < 0)
3598 ret = err;
3600 if (root_node.current) {
3601 root_node.current->checked = 1;
3602 maybe_free_inode_rec(&root_node.inode_cache,
3603 root_node.current);
3606 err = check_inode_recs(root, &root_node.inode_cache);
3607 if (!ret)
3608 ret = err;
3610 free_corrupt_blocks_tree(&corrupt_blocks);
3611 root->fs_info->corrupt_blocks = NULL;
3612 free_orphan_data_extents(&root->orphan_data_extents);
3613 return ret;
3616 static int fs_root_objectid(u64 objectid)
3618 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3619 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3620 return 1;
3621 return is_fstree(objectid);
3624 static int check_fs_roots(struct btrfs_root *root,
3625 struct cache_tree *root_cache)
3627 struct btrfs_path path;
3628 struct btrfs_key key;
3629 struct walk_control wc;
3630 struct extent_buffer *leaf, *tree_node;
3631 struct btrfs_root *tmp_root;
3632 struct btrfs_root *tree_root = root->fs_info->tree_root;
3633 int ret;
3634 int err = 0;
3636 if (ctx.progress_enabled) {
3637 ctx.tp = TASK_FS_ROOTS;
3638 task_start(ctx.info);
3642 * Just in case we made any changes to the extent tree that weren't
3643 * reflected into the free space cache yet.
3645 if (repair)
3646 reset_cached_block_groups(root->fs_info);
3647 memset(&wc, 0, sizeof(wc));
3648 cache_tree_init(&wc.shared);
3649 btrfs_init_path(&path);
3651 again:
3652 key.offset = 0;
3653 key.objectid = 0;
3654 key.type = BTRFS_ROOT_ITEM_KEY;
3655 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3656 if (ret < 0) {
3657 err = 1;
3658 goto out;
3660 tree_node = tree_root->node;
3661 while (1) {
3662 if (tree_node != tree_root->node) {
3663 free_root_recs_tree(root_cache);
3664 btrfs_release_path(&path);
3665 goto again;
3667 leaf = path.nodes[0];
3668 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3669 ret = btrfs_next_leaf(tree_root, &path);
3670 if (ret) {
3671 if (ret < 0)
3672 err = 1;
3673 break;
3675 leaf = path.nodes[0];
3677 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3678 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3679 fs_root_objectid(key.objectid)) {
3680 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3681 tmp_root = btrfs_read_fs_root_no_cache(
3682 root->fs_info, &key);
3683 } else {
3684 key.offset = (u64)-1;
3685 tmp_root = btrfs_read_fs_root(
3686 root->fs_info, &key);
3688 if (IS_ERR(tmp_root)) {
3689 err = 1;
3690 goto next;
3692 ret = check_fs_root(tmp_root, root_cache, &wc);
3693 if (ret == -EAGAIN) {
3694 free_root_recs_tree(root_cache);
3695 btrfs_release_path(&path);
3696 goto again;
3698 if (ret)
3699 err = 1;
3700 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3701 btrfs_free_fs_root(tmp_root);
3702 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3703 key.type == BTRFS_ROOT_BACKREF_KEY) {
3704 process_root_ref(leaf, path.slots[0], &key,
3705 root_cache);
3707 next:
3708 path.slots[0]++;
3710 out:
3711 btrfs_release_path(&path);
3712 if (err)
3713 free_extent_cache_tree(&wc.shared);
3714 if (!cache_tree_empty(&wc.shared))
3715 fprintf(stderr, "warning line %d\n", __LINE__);
3717 task_stop(ctx.info);
3719 return err;
3722 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3724 struct list_head *cur = rec->backrefs.next;
3725 struct extent_backref *back;
3726 struct tree_backref *tback;
3727 struct data_backref *dback;
3728 u64 found = 0;
3729 int err = 0;
3731 while(cur != &rec->backrefs) {
3732 back = list_entry(cur, struct extent_backref, list);
3733 cur = cur->next;
3734 if (!back->found_extent_tree) {
3735 err = 1;
3736 if (!print_errs)
3737 goto out;
3738 if (back->is_data) {
3739 dback = (struct data_backref *)back;
3740 fprintf(stderr, "Backref %llu %s %llu"
3741 " owner %llu offset %llu num_refs %lu"
3742 " not found in extent tree\n",
3743 (unsigned long long)rec->start,
3744 back->full_backref ?
3745 "parent" : "root",
3746 back->full_backref ?
3747 (unsigned long long)dback->parent:
3748 (unsigned long long)dback->root,
3749 (unsigned long long)dback->owner,
3750 (unsigned long long)dback->offset,
3751 (unsigned long)dback->num_refs);
3752 } else {
3753 tback = (struct tree_backref *)back;
3754 fprintf(stderr, "Backref %llu parent %llu"
3755 " root %llu not found in extent tree\n",
3756 (unsigned long long)rec->start,
3757 (unsigned long long)tback->parent,
3758 (unsigned long long)tback->root);
3761 if (!back->is_data && !back->found_ref) {
3762 err = 1;
3763 if (!print_errs)
3764 goto out;
3765 tback = (struct tree_backref *)back;
3766 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3767 (unsigned long long)rec->start,
3768 back->full_backref ? "parent" : "root",
3769 back->full_backref ?
3770 (unsigned long long)tback->parent :
3771 (unsigned long long)tback->root, back);
3773 if (back->is_data) {
3774 dback = (struct data_backref *)back;
3775 if (dback->found_ref != dback->num_refs) {
3776 err = 1;
3777 if (!print_errs)
3778 goto out;
3779 fprintf(stderr, "Incorrect local backref count"
3780 " on %llu %s %llu owner %llu"
3781 " offset %llu found %u wanted %u back %p\n",
3782 (unsigned long long)rec->start,
3783 back->full_backref ?
3784 "parent" : "root",
3785 back->full_backref ?
3786 (unsigned long long)dback->parent:
3787 (unsigned long long)dback->root,
3788 (unsigned long long)dback->owner,
3789 (unsigned long long)dback->offset,
3790 dback->found_ref, dback->num_refs, back);
3792 if (dback->disk_bytenr != rec->start) {
3793 err = 1;
3794 if (!print_errs)
3795 goto out;
3796 fprintf(stderr, "Backref disk bytenr does not"
3797 " match extent record, bytenr=%llu, "
3798 "ref bytenr=%llu\n",
3799 (unsigned long long)rec->start,
3800 (unsigned long long)dback->disk_bytenr);
3803 if (dback->bytes != rec->nr) {
3804 err = 1;
3805 if (!print_errs)
3806 goto out;
3807 fprintf(stderr, "Backref bytes do not match "
3808 "extent backref, bytenr=%llu, ref "
3809 "bytes=%llu, backref bytes=%llu\n",
3810 (unsigned long long)rec->start,
3811 (unsigned long long)rec->nr,
3812 (unsigned long long)dback->bytes);
3815 if (!back->is_data) {
3816 found += 1;
3817 } else {
3818 dback = (struct data_backref *)back;
3819 found += dback->found_ref;
3822 if (found != rec->refs) {
3823 err = 1;
3824 if (!print_errs)
3825 goto out;
3826 fprintf(stderr, "Incorrect global backref count "
3827 "on %llu found %llu wanted %llu\n",
3828 (unsigned long long)rec->start,
3829 (unsigned long long)found,
3830 (unsigned long long)rec->refs);
3832 out:
3833 return err;
3836 static int free_all_extent_backrefs(struct extent_record *rec)
3838 struct extent_backref *back;
3839 struct list_head *cur;
3840 while (!list_empty(&rec->backrefs)) {
3841 cur = rec->backrefs.next;
3842 back = list_entry(cur, struct extent_backref, list);
3843 list_del(cur);
3844 free(back);
3846 return 0;
3849 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3850 struct cache_tree *extent_cache)
3852 struct cache_extent *cache;
3853 struct extent_record *rec;
3855 while (1) {
3856 cache = first_cache_extent(extent_cache);
3857 if (!cache)
3858 break;
3859 rec = container_of(cache, struct extent_record, cache);
3860 remove_cache_extent(extent_cache, cache);
3861 free_all_extent_backrefs(rec);
3862 free(rec);
3866 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3867 struct extent_record *rec)
3869 if (rec->content_checked && rec->owner_ref_checked &&
3870 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3871 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3872 !rec->bad_full_backref && !rec->crossing_stripes &&
3873 !rec->wrong_chunk_type) {
3874 remove_cache_extent(extent_cache, &rec->cache);
3875 free_all_extent_backrefs(rec);
3876 list_del_init(&rec->list);
3877 free(rec);
3879 return 0;
3882 static int check_owner_ref(struct btrfs_root *root,
3883 struct extent_record *rec,
3884 struct extent_buffer *buf)
3886 struct extent_backref *node;
3887 struct tree_backref *back;
3888 struct btrfs_root *ref_root;
3889 struct btrfs_key key;
3890 struct btrfs_path path;
3891 struct extent_buffer *parent;
3892 int level;
3893 int found = 0;
3894 int ret;
3896 list_for_each_entry(node, &rec->backrefs, list) {
3897 if (node->is_data)
3898 continue;
3899 if (!node->found_ref)
3900 continue;
3901 if (node->full_backref)
3902 continue;
3903 back = (struct tree_backref *)node;
3904 if (btrfs_header_owner(buf) == back->root)
3905 return 0;
3907 BUG_ON(rec->is_root);
3909 /* try to find the block by search corresponding fs tree */
3910 key.objectid = btrfs_header_owner(buf);
3911 key.type = BTRFS_ROOT_ITEM_KEY;
3912 key.offset = (u64)-1;
3914 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3915 if (IS_ERR(ref_root))
3916 return 1;
3918 level = btrfs_header_level(buf);
3919 if (level == 0)
3920 btrfs_item_key_to_cpu(buf, &key, 0);
3921 else
3922 btrfs_node_key_to_cpu(buf, &key, 0);
3924 btrfs_init_path(&path);
3925 path.lowest_level = level + 1;
3926 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3927 if (ret < 0)
3928 return 0;
3930 parent = path.nodes[level + 1];
3931 if (parent && buf->start == btrfs_node_blockptr(parent,
3932 path.slots[level + 1]))
3933 found = 1;
3935 btrfs_release_path(&path);
3936 return found ? 0 : 1;
3939 static int is_extent_tree_record(struct extent_record *rec)
3941 struct list_head *cur = rec->backrefs.next;
3942 struct extent_backref *node;
3943 struct tree_backref *back;
3944 int is_extent = 0;
3946 while(cur != &rec->backrefs) {
3947 node = list_entry(cur, struct extent_backref, list);
3948 cur = cur->next;
3949 if (node->is_data)
3950 return 0;
3951 back = (struct tree_backref *)node;
3952 if (node->full_backref)
3953 return 0;
3954 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3955 is_extent = 1;
3957 return is_extent;
3961 static int record_bad_block_io(struct btrfs_fs_info *info,
3962 struct cache_tree *extent_cache,
3963 u64 start, u64 len)
3965 struct extent_record *rec;
3966 struct cache_extent *cache;
3967 struct btrfs_key key;
3969 cache = lookup_cache_extent(extent_cache, start, len);
3970 if (!cache)
3971 return 0;
3973 rec = container_of(cache, struct extent_record, cache);
3974 if (!is_extent_tree_record(rec))
3975 return 0;
3977 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3978 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3981 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3982 struct extent_buffer *buf, int slot)
3984 if (btrfs_header_level(buf)) {
3985 struct btrfs_key_ptr ptr1, ptr2;
3987 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3988 sizeof(struct btrfs_key_ptr));
3989 read_extent_buffer(buf, &ptr2,
3990 btrfs_node_key_ptr_offset(slot + 1),
3991 sizeof(struct btrfs_key_ptr));
3992 write_extent_buffer(buf, &ptr1,
3993 btrfs_node_key_ptr_offset(slot + 1),
3994 sizeof(struct btrfs_key_ptr));
3995 write_extent_buffer(buf, &ptr2,
3996 btrfs_node_key_ptr_offset(slot),
3997 sizeof(struct btrfs_key_ptr));
3998 if (slot == 0) {
3999 struct btrfs_disk_key key;
4000 btrfs_node_key(buf, &key, 0);
4001 btrfs_fixup_low_keys(root, path, &key,
4002 btrfs_header_level(buf) + 1);
4004 } else {
4005 struct btrfs_item *item1, *item2;
4006 struct btrfs_key k1, k2;
4007 char *item1_data, *item2_data;
4008 u32 item1_offset, item2_offset, item1_size, item2_size;
4010 item1 = btrfs_item_nr(slot);
4011 item2 = btrfs_item_nr(slot + 1);
4012 btrfs_item_key_to_cpu(buf, &k1, slot);
4013 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4014 item1_offset = btrfs_item_offset(buf, item1);
4015 item2_offset = btrfs_item_offset(buf, item2);
4016 item1_size = btrfs_item_size(buf, item1);
4017 item2_size = btrfs_item_size(buf, item2);
4019 item1_data = malloc(item1_size);
4020 if (!item1_data)
4021 return -ENOMEM;
4022 item2_data = malloc(item2_size);
4023 if (!item2_data) {
4024 free(item1_data);
4025 return -ENOMEM;
4028 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4029 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4031 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4032 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4033 free(item1_data);
4034 free(item2_data);
4036 btrfs_set_item_offset(buf, item1, item2_offset);
4037 btrfs_set_item_offset(buf, item2, item1_offset);
4038 btrfs_set_item_size(buf, item1, item2_size);
4039 btrfs_set_item_size(buf, item2, item1_size);
4041 path->slots[0] = slot;
4042 btrfs_set_item_key_unsafe(root, path, &k2);
4043 path->slots[0] = slot + 1;
4044 btrfs_set_item_key_unsafe(root, path, &k1);
4046 return 0;
4049 static int fix_key_order(struct btrfs_trans_handle *trans,
4050 struct btrfs_root *root,
4051 struct btrfs_path *path)
4053 struct extent_buffer *buf;
4054 struct btrfs_key k1, k2;
4055 int i;
4056 int level = path->lowest_level;
4057 int ret = -EIO;
4059 buf = path->nodes[level];
4060 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4061 if (level) {
4062 btrfs_node_key_to_cpu(buf, &k1, i);
4063 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4064 } else {
4065 btrfs_item_key_to_cpu(buf, &k1, i);
4066 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4068 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4069 continue;
4070 ret = swap_values(root, path, buf, i);
4071 if (ret)
4072 break;
4073 btrfs_mark_buffer_dirty(buf);
4074 i = 0;
4076 return ret;
4079 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4080 struct btrfs_root *root,
4081 struct btrfs_path *path,
4082 struct extent_buffer *buf, int slot)
4084 struct btrfs_key key;
4085 int nritems = btrfs_header_nritems(buf);
4087 btrfs_item_key_to_cpu(buf, &key, slot);
4089 /* These are all the keys we can deal with missing. */
4090 if (key.type != BTRFS_DIR_INDEX_KEY &&
4091 key.type != BTRFS_EXTENT_ITEM_KEY &&
4092 key.type != BTRFS_METADATA_ITEM_KEY &&
4093 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4094 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4095 return -1;
4097 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4098 (unsigned long long)key.objectid, key.type,
4099 (unsigned long long)key.offset, slot, buf->start);
4100 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4101 btrfs_item_nr_offset(slot + 1),
4102 sizeof(struct btrfs_item) *
4103 (nritems - slot - 1));
4104 btrfs_set_header_nritems(buf, nritems - 1);
4105 if (slot == 0) {
4106 struct btrfs_disk_key disk_key;
4108 btrfs_item_key(buf, &disk_key, 0);
4109 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4111 btrfs_mark_buffer_dirty(buf);
4112 return 0;
4115 static int fix_item_offset(struct btrfs_trans_handle *trans,
4116 struct btrfs_root *root,
4117 struct btrfs_path *path)
4119 struct extent_buffer *buf;
4120 int i;
4121 int ret = 0;
4123 /* We should only get this for leaves */
4124 BUG_ON(path->lowest_level);
4125 buf = path->nodes[0];
4126 again:
4127 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4128 unsigned int shift = 0, offset;
4130 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4131 BTRFS_LEAF_DATA_SIZE(root)) {
4132 if (btrfs_item_end_nr(buf, i) >
4133 BTRFS_LEAF_DATA_SIZE(root)) {
4134 ret = delete_bogus_item(trans, root, path,
4135 buf, i);
4136 if (!ret)
4137 goto again;
4138 fprintf(stderr, "item is off the end of the "
4139 "leaf, can't fix\n");
4140 ret = -EIO;
4141 break;
4143 shift = BTRFS_LEAF_DATA_SIZE(root) -
4144 btrfs_item_end_nr(buf, i);
4145 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4146 btrfs_item_offset_nr(buf, i - 1)) {
4147 if (btrfs_item_end_nr(buf, i) >
4148 btrfs_item_offset_nr(buf, i - 1)) {
4149 ret = delete_bogus_item(trans, root, path,
4150 buf, i);
4151 if (!ret)
4152 goto again;
4153 fprintf(stderr, "items overlap, can't fix\n");
4154 ret = -EIO;
4155 break;
4157 shift = btrfs_item_offset_nr(buf, i - 1) -
4158 btrfs_item_end_nr(buf, i);
4160 if (!shift)
4161 continue;
4163 printf("Shifting item nr %d by %u bytes in block %llu\n",
4164 i, shift, (unsigned long long)buf->start);
4165 offset = btrfs_item_offset_nr(buf, i);
4166 memmove_extent_buffer(buf,
4167 btrfs_leaf_data(buf) + offset + shift,
4168 btrfs_leaf_data(buf) + offset,
4169 btrfs_item_size_nr(buf, i));
4170 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4171 offset + shift);
4172 btrfs_mark_buffer_dirty(buf);
4176 * We may have moved things, in which case we want to exit so we don't
4177 * write those changes out. Once we have proper abort functionality in
4178 * progs this can be changed to something nicer.
4180 BUG_ON(ret);
4181 return ret;
4185 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4186 * then just return -EIO.
4188 static int try_to_fix_bad_block(struct btrfs_root *root,
4189 struct extent_buffer *buf,
4190 enum btrfs_tree_block_status status)
4192 struct btrfs_trans_handle *trans;
4193 struct ulist *roots;
4194 struct ulist_node *node;
4195 struct btrfs_root *search_root;
4196 struct btrfs_path *path;
4197 struct ulist_iterator iter;
4198 struct btrfs_key root_key, key;
4199 int ret;
4201 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4202 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4203 return -EIO;
4205 path = btrfs_alloc_path();
4206 if (!path)
4207 return -EIO;
4209 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4210 0, &roots);
4211 if (ret) {
4212 btrfs_free_path(path);
4213 return -EIO;
4216 ULIST_ITER_INIT(&iter);
4217 while ((node = ulist_next(roots, &iter))) {
4218 root_key.objectid = node->val;
4219 root_key.type = BTRFS_ROOT_ITEM_KEY;
4220 root_key.offset = (u64)-1;
4222 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4223 if (IS_ERR(root)) {
4224 ret = -EIO;
4225 break;
4229 trans = btrfs_start_transaction(search_root, 0);
4230 if (IS_ERR(trans)) {
4231 ret = PTR_ERR(trans);
4232 break;
4235 path->lowest_level = btrfs_header_level(buf);
4236 path->skip_check_block = 1;
4237 if (path->lowest_level)
4238 btrfs_node_key_to_cpu(buf, &key, 0);
4239 else
4240 btrfs_item_key_to_cpu(buf, &key, 0);
4241 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4242 if (ret) {
4243 ret = -EIO;
4244 btrfs_commit_transaction(trans, search_root);
4245 break;
4247 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4248 ret = fix_key_order(trans, search_root, path);
4249 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4250 ret = fix_item_offset(trans, search_root, path);
4251 if (ret) {
4252 btrfs_commit_transaction(trans, search_root);
4253 break;
4255 btrfs_release_path(path);
4256 btrfs_commit_transaction(trans, search_root);
4258 ulist_free(roots);
4259 btrfs_free_path(path);
4260 return ret;
4263 static int check_block(struct btrfs_root *root,
4264 struct cache_tree *extent_cache,
4265 struct extent_buffer *buf, u64 flags)
4267 struct extent_record *rec;
4268 struct cache_extent *cache;
4269 struct btrfs_key key;
4270 enum btrfs_tree_block_status status;
4271 int ret = 0;
4272 int level;
4274 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4275 if (!cache)
4276 return 1;
4277 rec = container_of(cache, struct extent_record, cache);
4278 rec->generation = btrfs_header_generation(buf);
4280 level = btrfs_header_level(buf);
4281 if (btrfs_header_nritems(buf) > 0) {
4283 if (level == 0)
4284 btrfs_item_key_to_cpu(buf, &key, 0);
4285 else
4286 btrfs_node_key_to_cpu(buf, &key, 0);
4288 rec->info_objectid = key.objectid;
4290 rec->info_level = level;
4292 if (btrfs_is_leaf(buf))
4293 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4294 else
4295 status = btrfs_check_node(root, &rec->parent_key, buf);
4297 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4298 if (repair)
4299 status = try_to_fix_bad_block(root, buf, status);
4300 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4301 ret = -EIO;
4302 fprintf(stderr, "bad block %llu\n",
4303 (unsigned long long)buf->start);
4304 } else {
4306 * Signal to callers we need to start the scan over
4307 * again since we'll have cow'ed blocks.
4309 ret = -EAGAIN;
4311 } else {
4312 rec->content_checked = 1;
4313 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4314 rec->owner_ref_checked = 1;
4315 else {
4316 ret = check_owner_ref(root, rec, buf);
4317 if (!ret)
4318 rec->owner_ref_checked = 1;
4321 if (!ret)
4322 maybe_free_extent_rec(extent_cache, rec);
4323 return ret;
4326 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4327 u64 parent, u64 root)
4329 struct list_head *cur = rec->backrefs.next;
4330 struct extent_backref *node;
4331 struct tree_backref *back;
4333 while(cur != &rec->backrefs) {
4334 node = list_entry(cur, struct extent_backref, list);
4335 cur = cur->next;
4336 if (node->is_data)
4337 continue;
4338 back = (struct tree_backref *)node;
4339 if (parent > 0) {
4340 if (!node->full_backref)
4341 continue;
4342 if (parent == back->parent)
4343 return back;
4344 } else {
4345 if (node->full_backref)
4346 continue;
4347 if (back->root == root)
4348 return back;
4351 return NULL;
4354 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4355 u64 parent, u64 root)
4357 struct tree_backref *ref = malloc(sizeof(*ref));
4359 if (!ref)
4360 return NULL;
4361 memset(&ref->node, 0, sizeof(ref->node));
4362 if (parent > 0) {
4363 ref->parent = parent;
4364 ref->node.full_backref = 1;
4365 } else {
4366 ref->root = root;
4367 ref->node.full_backref = 0;
4369 list_add_tail(&ref->node.list, &rec->backrefs);
4371 return ref;
4374 static struct data_backref *find_data_backref(struct extent_record *rec,
4375 u64 parent, u64 root,
4376 u64 owner, u64 offset,
4377 int found_ref,
4378 u64 disk_bytenr, u64 bytes)
4380 struct list_head *cur = rec->backrefs.next;
4381 struct extent_backref *node;
4382 struct data_backref *back;
4384 while(cur != &rec->backrefs) {
4385 node = list_entry(cur, struct extent_backref, list);
4386 cur = cur->next;
4387 if (!node->is_data)
4388 continue;
4389 back = (struct data_backref *)node;
4390 if (parent > 0) {
4391 if (!node->full_backref)
4392 continue;
4393 if (parent == back->parent)
4394 return back;
4395 } else {
4396 if (node->full_backref)
4397 continue;
4398 if (back->root == root && back->owner == owner &&
4399 back->offset == offset) {
4400 if (found_ref && node->found_ref &&
4401 (back->bytes != bytes ||
4402 back->disk_bytenr != disk_bytenr))
4403 continue;
4404 return back;
4408 return NULL;
4411 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4412 u64 parent, u64 root,
4413 u64 owner, u64 offset,
4414 u64 max_size)
4416 struct data_backref *ref = malloc(sizeof(*ref));
4418 if (!ref)
4419 return NULL;
4420 memset(&ref->node, 0, sizeof(ref->node));
4421 ref->node.is_data = 1;
4423 if (parent > 0) {
4424 ref->parent = parent;
4425 ref->owner = 0;
4426 ref->offset = 0;
4427 ref->node.full_backref = 1;
4428 } else {
4429 ref->root = root;
4430 ref->owner = owner;
4431 ref->offset = offset;
4432 ref->node.full_backref = 0;
4434 ref->bytes = max_size;
4435 ref->found_ref = 0;
4436 ref->num_refs = 0;
4437 list_add_tail(&ref->node.list, &rec->backrefs);
4438 if (max_size > rec->max_size)
4439 rec->max_size = max_size;
4440 return ref;
4443 /* Check if the type of extent matches with its chunk */
4444 static void check_extent_type(struct extent_record *rec)
4446 struct btrfs_block_group_cache *bg_cache;
4448 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4449 if (!bg_cache)
4450 return;
4452 /* data extent, check chunk directly*/
4453 if (!rec->metadata) {
4454 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4455 rec->wrong_chunk_type = 1;
4456 return;
4459 /* metadata extent, check the obvious case first */
4460 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4461 BTRFS_BLOCK_GROUP_METADATA))) {
4462 rec->wrong_chunk_type = 1;
4463 return;
4467 * Check SYSTEM extent, as it's also marked as metadata, we can only
4468 * make sure it's a SYSTEM extent by its backref
4470 if (!list_empty(&rec->backrefs)) {
4471 struct extent_backref *node;
4472 struct tree_backref *tback;
4473 u64 bg_type;
4475 node = list_entry(rec->backrefs.next, struct extent_backref,
4476 list);
4477 if (node->is_data) {
4478 /* tree block shouldn't have data backref */
4479 rec->wrong_chunk_type = 1;
4480 return;
4482 tback = container_of(node, struct tree_backref, node);
4484 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4485 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4486 else
4487 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4488 if (!(bg_cache->flags & bg_type))
4489 rec->wrong_chunk_type = 1;
4493 static int add_extent_rec(struct cache_tree *extent_cache,
4494 struct btrfs_key *parent_key, u64 parent_gen,
4495 u64 start, u64 nr, u64 extent_item_refs,
4496 int is_root, int inc_ref, int set_checked,
4497 int metadata, int extent_rec, u64 max_size)
4499 struct extent_record *rec;
4500 struct cache_extent *cache;
4501 int ret = 0;
4502 int dup = 0;
4504 cache = lookup_cache_extent(extent_cache, start, nr);
4505 if (cache) {
4506 rec = container_of(cache, struct extent_record, cache);
4507 if (inc_ref)
4508 rec->refs++;
4509 if (rec->nr == 1)
4510 rec->nr = max(nr, max_size);
4513 * We need to make sure to reset nr to whatever the extent
4514 * record says was the real size, this way we can compare it to
4515 * the backrefs.
4517 if (extent_rec) {
4518 if (start != rec->start || rec->found_rec) {
4519 struct extent_record *tmp;
4521 dup = 1;
4522 if (list_empty(&rec->list))
4523 list_add_tail(&rec->list,
4524 &duplicate_extents);
4527 * We have to do this song and dance in case we
4528 * find an extent record that falls inside of
4529 * our current extent record but does not have
4530 * the same objectid.
4532 tmp = malloc(sizeof(*tmp));
4533 if (!tmp)
4534 return -ENOMEM;
4535 tmp->start = start;
4536 tmp->max_size = max_size;
4537 tmp->nr = nr;
4538 tmp->found_rec = 1;
4539 tmp->metadata = metadata;
4540 tmp->extent_item_refs = extent_item_refs;
4541 INIT_LIST_HEAD(&tmp->list);
4542 list_add_tail(&tmp->list, &rec->dups);
4543 rec->num_duplicates++;
4544 } else {
4545 rec->nr = nr;
4546 rec->found_rec = 1;
4550 if (extent_item_refs && !dup) {
4551 if (rec->extent_item_refs) {
4552 fprintf(stderr, "block %llu rec "
4553 "extent_item_refs %llu, passed %llu\n",
4554 (unsigned long long)start,
4555 (unsigned long long)
4556 rec->extent_item_refs,
4557 (unsigned long long)extent_item_refs);
4559 rec->extent_item_refs = extent_item_refs;
4561 if (is_root)
4562 rec->is_root = 1;
4563 if (set_checked) {
4564 rec->content_checked = 1;
4565 rec->owner_ref_checked = 1;
4568 if (parent_key)
4569 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4570 if (parent_gen)
4571 rec->parent_generation = parent_gen;
4573 if (rec->max_size < max_size)
4574 rec->max_size = max_size;
4577 * A metadata extent can't cross stripe_len boundary, otherwise
4578 * kernel scrub won't be able to handle it.
4579 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4580 * it.
4582 if (metadata && check_crossing_stripes(rec->start,
4583 rec->max_size))
4584 rec->crossing_stripes = 1;
4585 check_extent_type(rec);
4586 maybe_free_extent_rec(extent_cache, rec);
4587 return ret;
4589 rec = malloc(sizeof(*rec));
4590 if (!rec)
4591 return -ENOMEM;
4592 rec->start = start;
4593 rec->max_size = max_size;
4594 rec->nr = max(nr, max_size);
4595 rec->found_rec = !!extent_rec;
4596 rec->content_checked = 0;
4597 rec->owner_ref_checked = 0;
4598 rec->num_duplicates = 0;
4599 rec->metadata = metadata;
4600 rec->flag_block_full_backref = -1;
4601 rec->bad_full_backref = 0;
4602 rec->crossing_stripes = 0;
4603 rec->wrong_chunk_type = 0;
4604 INIT_LIST_HEAD(&rec->backrefs);
4605 INIT_LIST_HEAD(&rec->dups);
4606 INIT_LIST_HEAD(&rec->list);
4608 if (is_root)
4609 rec->is_root = 1;
4610 else
4611 rec->is_root = 0;
4613 if (inc_ref)
4614 rec->refs = 1;
4615 else
4616 rec->refs = 0;
4618 if (extent_item_refs)
4619 rec->extent_item_refs = extent_item_refs;
4620 else
4621 rec->extent_item_refs = 0;
4623 if (parent_key)
4624 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4625 else
4626 memset(&rec->parent_key, 0, sizeof(*parent_key));
4628 if (parent_gen)
4629 rec->parent_generation = parent_gen;
4630 else
4631 rec->parent_generation = 0;
4633 rec->cache.start = start;
4634 rec->cache.size = nr;
4635 ret = insert_cache_extent(extent_cache, &rec->cache);
4636 BUG_ON(ret);
4637 bytes_used += nr;
4638 if (set_checked) {
4639 rec->content_checked = 1;
4640 rec->owner_ref_checked = 1;
4643 if (metadata)
4644 if (check_crossing_stripes(rec->start, rec->max_size))
4645 rec->crossing_stripes = 1;
4646 check_extent_type(rec);
4647 return ret;
4650 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4651 u64 parent, u64 root, int found_ref)
4653 struct extent_record *rec;
4654 struct tree_backref *back;
4655 struct cache_extent *cache;
4657 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4658 if (!cache) {
4659 add_extent_rec(extent_cache, NULL, 0, bytenr,
4660 1, 0, 0, 0, 0, 1, 0, 0);
4661 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4662 if (!cache)
4663 abort();
4666 rec = container_of(cache, struct extent_record, cache);
4667 if (rec->start != bytenr) {
4668 abort();
4671 back = find_tree_backref(rec, parent, root);
4672 if (!back) {
4673 back = alloc_tree_backref(rec, parent, root);
4674 BUG_ON(!back);
4677 if (found_ref) {
4678 if (back->node.found_ref) {
4679 fprintf(stderr, "Extent back ref already exists "
4680 "for %llu parent %llu root %llu \n",
4681 (unsigned long long)bytenr,
4682 (unsigned long long)parent,
4683 (unsigned long long)root);
4685 back->node.found_ref = 1;
4686 } else {
4687 if (back->node.found_extent_tree) {
4688 fprintf(stderr, "Extent back ref already exists "
4689 "for %llu parent %llu root %llu \n",
4690 (unsigned long long)bytenr,
4691 (unsigned long long)parent,
4692 (unsigned long long)root);
4694 back->node.found_extent_tree = 1;
4696 check_extent_type(rec);
4697 maybe_free_extent_rec(extent_cache, rec);
4698 return 0;
4701 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4702 u64 parent, u64 root, u64 owner, u64 offset,
4703 u32 num_refs, int found_ref, u64 max_size)
4705 struct extent_record *rec;
4706 struct data_backref *back;
4707 struct cache_extent *cache;
4709 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4710 if (!cache) {
4711 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4712 0, 0, max_size);
4713 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4714 if (!cache)
4715 abort();
4718 rec = container_of(cache, struct extent_record, cache);
4719 if (rec->max_size < max_size)
4720 rec->max_size = max_size;
4723 * If found_ref is set then max_size is the real size and must match the
4724 * existing refs. So if we have already found a ref then we need to
4725 * make sure that this ref matches the existing one, otherwise we need
4726 * to add a new backref so we can notice that the backrefs don't match
4727 * and we need to figure out who is telling the truth. This is to
4728 * account for that awful fsync bug I introduced where we'd end up with
4729 * a btrfs_file_extent_item that would have its length include multiple
4730 * prealloc extents or point inside of a prealloc extent.
4732 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4733 bytenr, max_size);
4734 if (!back) {
4735 back = alloc_data_backref(rec, parent, root, owner, offset,
4736 max_size);
4737 BUG_ON(!back);
4740 if (found_ref) {
4741 BUG_ON(num_refs != 1);
4742 if (back->node.found_ref)
4743 BUG_ON(back->bytes != max_size);
4744 back->node.found_ref = 1;
4745 back->found_ref += 1;
4746 back->bytes = max_size;
4747 back->disk_bytenr = bytenr;
4748 rec->refs += 1;
4749 rec->content_checked = 1;
4750 rec->owner_ref_checked = 1;
4751 } else {
4752 if (back->node.found_extent_tree) {
4753 fprintf(stderr, "Extent back ref already exists "
4754 "for %llu parent %llu root %llu "
4755 "owner %llu offset %llu num_refs %lu\n",
4756 (unsigned long long)bytenr,
4757 (unsigned long long)parent,
4758 (unsigned long long)root,
4759 (unsigned long long)owner,
4760 (unsigned long long)offset,
4761 (unsigned long)num_refs);
4763 back->num_refs = num_refs;
4764 back->node.found_extent_tree = 1;
4766 maybe_free_extent_rec(extent_cache, rec);
4767 return 0;
4770 static int add_pending(struct cache_tree *pending,
4771 struct cache_tree *seen, u64 bytenr, u32 size)
4773 int ret;
4774 ret = add_cache_extent(seen, bytenr, size);
4775 if (ret)
4776 return ret;
4777 add_cache_extent(pending, bytenr, size);
4778 return 0;
4781 static int pick_next_pending(struct cache_tree *pending,
4782 struct cache_tree *reada,
4783 struct cache_tree *nodes,
4784 u64 last, struct block_info *bits, int bits_nr,
4785 int *reada_bits)
4787 unsigned long node_start = last;
4788 struct cache_extent *cache;
4789 int ret;
4791 cache = search_cache_extent(reada, 0);
4792 if (cache) {
4793 bits[0].start = cache->start;
4794 bits[0].size = cache->size;
4795 *reada_bits = 1;
4796 return 1;
4798 *reada_bits = 0;
4799 if (node_start > 32768)
4800 node_start -= 32768;
4802 cache = search_cache_extent(nodes, node_start);
4803 if (!cache)
4804 cache = search_cache_extent(nodes, 0);
4806 if (!cache) {
4807 cache = search_cache_extent(pending, 0);
4808 if (!cache)
4809 return 0;
4810 ret = 0;
4811 do {
4812 bits[ret].start = cache->start;
4813 bits[ret].size = cache->size;
4814 cache = next_cache_extent(cache);
4815 ret++;
4816 } while (cache && ret < bits_nr);
4817 return ret;
4820 ret = 0;
4821 do {
4822 bits[ret].start = cache->start;
4823 bits[ret].size = cache->size;
4824 cache = next_cache_extent(cache);
4825 ret++;
4826 } while (cache && ret < bits_nr);
4828 if (bits_nr - ret > 8) {
4829 u64 lookup = bits[0].start + bits[0].size;
4830 struct cache_extent *next;
4831 next = search_cache_extent(pending, lookup);
4832 while(next) {
4833 if (next->start - lookup > 32768)
4834 break;
4835 bits[ret].start = next->start;
4836 bits[ret].size = next->size;
4837 lookup = next->start + next->size;
4838 ret++;
4839 if (ret == bits_nr)
4840 break;
4841 next = next_cache_extent(next);
4842 if (!next)
4843 break;
4846 return ret;
4849 static void free_chunk_record(struct cache_extent *cache)
4851 struct chunk_record *rec;
4853 rec = container_of(cache, struct chunk_record, cache);
4854 list_del_init(&rec->list);
4855 list_del_init(&rec->dextents);
4856 free(rec);
4859 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4861 cache_tree_free_extents(chunk_cache, free_chunk_record);
4864 static void free_device_record(struct rb_node *node)
4866 struct device_record *rec;
4868 rec = container_of(node, struct device_record, node);
4869 free(rec);
4872 FREE_RB_BASED_TREE(device_cache, free_device_record);
4874 int insert_block_group_record(struct block_group_tree *tree,
4875 struct block_group_record *bg_rec)
4877 int ret;
4879 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4880 if (ret)
4881 return ret;
4883 list_add_tail(&bg_rec->list, &tree->block_groups);
4884 return 0;
4887 static void free_block_group_record(struct cache_extent *cache)
4889 struct block_group_record *rec;
4891 rec = container_of(cache, struct block_group_record, cache);
4892 list_del_init(&rec->list);
4893 free(rec);
4896 void free_block_group_tree(struct block_group_tree *tree)
4898 cache_tree_free_extents(&tree->tree, free_block_group_record);
4901 int insert_device_extent_record(struct device_extent_tree *tree,
4902 struct device_extent_record *de_rec)
4904 int ret;
4907 * Device extent is a bit different from the other extents, because
4908 * the extents which belong to the different devices may have the
4909 * same start and size, so we need use the special extent cache
4910 * search/insert functions.
4912 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4913 if (ret)
4914 return ret;
4916 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4917 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4918 return 0;
4921 static void free_device_extent_record(struct cache_extent *cache)
4923 struct device_extent_record *rec;
4925 rec = container_of(cache, struct device_extent_record, cache);
4926 if (!list_empty(&rec->chunk_list))
4927 list_del_init(&rec->chunk_list);
4928 if (!list_empty(&rec->device_list))
4929 list_del_init(&rec->device_list);
4930 free(rec);
4933 void free_device_extent_tree(struct device_extent_tree *tree)
4935 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4938 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4939 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4940 struct extent_buffer *leaf, int slot)
4942 struct btrfs_extent_ref_v0 *ref0;
4943 struct btrfs_key key;
4945 btrfs_item_key_to_cpu(leaf, &key, slot);
4946 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4947 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4948 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4949 } else {
4950 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4951 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4953 return 0;
4955 #endif
4957 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4958 struct btrfs_key *key,
4959 int slot)
4961 struct btrfs_chunk *ptr;
4962 struct chunk_record *rec;
4963 int num_stripes, i;
4965 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4966 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4968 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4969 if (!rec) {
4970 fprintf(stderr, "memory allocation failed\n");
4971 exit(-1);
4974 INIT_LIST_HEAD(&rec->list);
4975 INIT_LIST_HEAD(&rec->dextents);
4976 rec->bg_rec = NULL;
4978 rec->cache.start = key->offset;
4979 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4981 rec->generation = btrfs_header_generation(leaf);
4983 rec->objectid = key->objectid;
4984 rec->type = key->type;
4985 rec->offset = key->offset;
4987 rec->length = rec->cache.size;
4988 rec->owner = btrfs_chunk_owner(leaf, ptr);
4989 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4990 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4991 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4992 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4993 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4994 rec->num_stripes = num_stripes;
4995 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4997 for (i = 0; i < rec->num_stripes; ++i) {
4998 rec->stripes[i].devid =
4999 btrfs_stripe_devid_nr(leaf, ptr, i);
5000 rec->stripes[i].offset =
5001 btrfs_stripe_offset_nr(leaf, ptr, i);
5002 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5003 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5004 BTRFS_UUID_SIZE);
5007 return rec;
5010 static int process_chunk_item(struct cache_tree *chunk_cache,
5011 struct btrfs_key *key, struct extent_buffer *eb,
5012 int slot)
5014 struct chunk_record *rec;
5015 int ret = 0;
5017 rec = btrfs_new_chunk_record(eb, key, slot);
5018 ret = insert_cache_extent(chunk_cache, &rec->cache);
5019 if (ret) {
5020 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5021 rec->offset, rec->length);
5022 free(rec);
5025 return ret;
5028 static int process_device_item(struct rb_root *dev_cache,
5029 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5031 struct btrfs_dev_item *ptr;
5032 struct device_record *rec;
5033 int ret = 0;
5035 ptr = btrfs_item_ptr(eb,
5036 slot, struct btrfs_dev_item);
5038 rec = malloc(sizeof(*rec));
5039 if (!rec) {
5040 fprintf(stderr, "memory allocation failed\n");
5041 return -ENOMEM;
5044 rec->devid = key->offset;
5045 rec->generation = btrfs_header_generation(eb);
5047 rec->objectid = key->objectid;
5048 rec->type = key->type;
5049 rec->offset = key->offset;
5051 rec->devid = btrfs_device_id(eb, ptr);
5052 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5053 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5055 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5056 if (ret) {
5057 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5058 free(rec);
5061 return ret;
5064 struct block_group_record *
5065 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5066 int slot)
5068 struct btrfs_block_group_item *ptr;
5069 struct block_group_record *rec;
5071 rec = calloc(1, sizeof(*rec));
5072 if (!rec) {
5073 fprintf(stderr, "memory allocation failed\n");
5074 exit(-1);
5077 rec->cache.start = key->objectid;
5078 rec->cache.size = key->offset;
5080 rec->generation = btrfs_header_generation(leaf);
5082 rec->objectid = key->objectid;
5083 rec->type = key->type;
5084 rec->offset = key->offset;
5086 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5087 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5089 INIT_LIST_HEAD(&rec->list);
5091 return rec;
5094 static int process_block_group_item(struct block_group_tree *block_group_cache,
5095 struct btrfs_key *key,
5096 struct extent_buffer *eb, int slot)
5098 struct block_group_record *rec;
5099 int ret = 0;
5101 rec = btrfs_new_block_group_record(eb, key, slot);
5102 ret = insert_block_group_record(block_group_cache, rec);
5103 if (ret) {
5104 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5105 rec->objectid, rec->offset);
5106 free(rec);
5109 return ret;
5112 struct device_extent_record *
5113 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5114 struct btrfs_key *key, int slot)
5116 struct device_extent_record *rec;
5117 struct btrfs_dev_extent *ptr;
5119 rec = calloc(1, sizeof(*rec));
5120 if (!rec) {
5121 fprintf(stderr, "memory allocation failed\n");
5122 exit(-1);
5125 rec->cache.objectid = key->objectid;
5126 rec->cache.start = key->offset;
5128 rec->generation = btrfs_header_generation(leaf);
5130 rec->objectid = key->objectid;
5131 rec->type = key->type;
5132 rec->offset = key->offset;
5134 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5135 rec->chunk_objecteid =
5136 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5137 rec->chunk_offset =
5138 btrfs_dev_extent_chunk_offset(leaf, ptr);
5139 rec->length = btrfs_dev_extent_length(leaf, ptr);
5140 rec->cache.size = rec->length;
5142 INIT_LIST_HEAD(&rec->chunk_list);
5143 INIT_LIST_HEAD(&rec->device_list);
5145 return rec;
5148 static int
5149 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5150 struct btrfs_key *key, struct extent_buffer *eb,
5151 int slot)
5153 struct device_extent_record *rec;
5154 int ret;
5156 rec = btrfs_new_device_extent_record(eb, key, slot);
5157 ret = insert_device_extent_record(dev_extent_cache, rec);
5158 if (ret) {
5159 fprintf(stderr,
5160 "Device extent[%llu, %llu, %llu] existed.\n",
5161 rec->objectid, rec->offset, rec->length);
5162 free(rec);
5165 return ret;
5168 static int process_extent_item(struct btrfs_root *root,
5169 struct cache_tree *extent_cache,
5170 struct extent_buffer *eb, int slot)
5172 struct btrfs_extent_item *ei;
5173 struct btrfs_extent_inline_ref *iref;
5174 struct btrfs_extent_data_ref *dref;
5175 struct btrfs_shared_data_ref *sref;
5176 struct btrfs_key key;
5177 unsigned long end;
5178 unsigned long ptr;
5179 int type;
5180 u32 item_size = btrfs_item_size_nr(eb, slot);
5181 u64 refs = 0;
5182 u64 offset;
5183 u64 num_bytes;
5184 int metadata = 0;
5186 btrfs_item_key_to_cpu(eb, &key, slot);
5188 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5189 metadata = 1;
5190 num_bytes = root->leafsize;
5191 } else {
5192 num_bytes = key.offset;
5195 if (item_size < sizeof(*ei)) {
5196 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5197 struct btrfs_extent_item_v0 *ei0;
5198 BUG_ON(item_size != sizeof(*ei0));
5199 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5200 refs = btrfs_extent_refs_v0(eb, ei0);
5201 #else
5202 BUG();
5203 #endif
5204 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5205 num_bytes, refs, 0, 0, 0, metadata, 1,
5206 num_bytes);
5209 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5210 refs = btrfs_extent_refs(eb, ei);
5211 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5212 metadata = 1;
5213 else
5214 metadata = 0;
5216 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5217 refs, 0, 0, 0, metadata, 1, num_bytes);
5219 ptr = (unsigned long)(ei + 1);
5220 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5221 key.type == BTRFS_EXTENT_ITEM_KEY)
5222 ptr += sizeof(struct btrfs_tree_block_info);
5224 end = (unsigned long)ei + item_size;
5225 while (ptr < end) {
5226 iref = (struct btrfs_extent_inline_ref *)ptr;
5227 type = btrfs_extent_inline_ref_type(eb, iref);
5228 offset = btrfs_extent_inline_ref_offset(eb, iref);
5229 switch (type) {
5230 case BTRFS_TREE_BLOCK_REF_KEY:
5231 add_tree_backref(extent_cache, key.objectid,
5232 0, offset, 0);
5233 break;
5234 case BTRFS_SHARED_BLOCK_REF_KEY:
5235 add_tree_backref(extent_cache, key.objectid,
5236 offset, 0, 0);
5237 break;
5238 case BTRFS_EXTENT_DATA_REF_KEY:
5239 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5240 add_data_backref(extent_cache, key.objectid, 0,
5241 btrfs_extent_data_ref_root(eb, dref),
5242 btrfs_extent_data_ref_objectid(eb,
5243 dref),
5244 btrfs_extent_data_ref_offset(eb, dref),
5245 btrfs_extent_data_ref_count(eb, dref),
5246 0, num_bytes);
5247 break;
5248 case BTRFS_SHARED_DATA_REF_KEY:
5249 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5250 add_data_backref(extent_cache, key.objectid, offset,
5251 0, 0, 0,
5252 btrfs_shared_data_ref_count(eb, sref),
5253 0, num_bytes);
5254 break;
5255 default:
5256 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5257 key.objectid, key.type, num_bytes);
5258 goto out;
5260 ptr += btrfs_extent_inline_ref_size(type);
5262 WARN_ON(ptr > end);
5263 out:
5264 return 0;
5267 static int check_cache_range(struct btrfs_root *root,
5268 struct btrfs_block_group_cache *cache,
5269 u64 offset, u64 bytes)
5271 struct btrfs_free_space *entry;
5272 u64 *logical;
5273 u64 bytenr;
5274 int stripe_len;
5275 int i, nr, ret;
5277 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5278 bytenr = btrfs_sb_offset(i);
5279 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5280 cache->key.objectid, bytenr, 0,
5281 &logical, &nr, &stripe_len);
5282 if (ret)
5283 return ret;
5285 while (nr--) {
5286 if (logical[nr] + stripe_len <= offset)
5287 continue;
5288 if (offset + bytes <= logical[nr])
5289 continue;
5290 if (logical[nr] == offset) {
5291 if (stripe_len >= bytes) {
5292 kfree(logical);
5293 return 0;
5295 bytes -= stripe_len;
5296 offset += stripe_len;
5297 } else if (logical[nr] < offset) {
5298 if (logical[nr] + stripe_len >=
5299 offset + bytes) {
5300 kfree(logical);
5301 return 0;
5303 bytes = (offset + bytes) -
5304 (logical[nr] + stripe_len);
5305 offset = logical[nr] + stripe_len;
5306 } else {
5308 * Could be tricky, the super may land in the
5309 * middle of the area we're checking. First
5310 * check the easiest case, it's at the end.
5312 if (logical[nr] + stripe_len >=
5313 bytes + offset) {
5314 bytes = logical[nr] - offset;
5315 continue;
5318 /* Check the left side */
5319 ret = check_cache_range(root, cache,
5320 offset,
5321 logical[nr] - offset);
5322 if (ret) {
5323 kfree(logical);
5324 return ret;
5327 /* Now we continue with the right side */
5328 bytes = (offset + bytes) -
5329 (logical[nr] + stripe_len);
5330 offset = logical[nr] + stripe_len;
5334 kfree(logical);
5337 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5338 if (!entry) {
5339 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5340 offset, offset+bytes);
5341 return -EINVAL;
5344 if (entry->offset != offset) {
5345 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5346 entry->offset);
5347 return -EINVAL;
5350 if (entry->bytes != bytes) {
5351 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5352 bytes, entry->bytes, offset);
5353 return -EINVAL;
5356 unlink_free_space(cache->free_space_ctl, entry);
5357 free(entry);
5358 return 0;
5361 static int verify_space_cache(struct btrfs_root *root,
5362 struct btrfs_block_group_cache *cache)
5364 struct btrfs_path *path;
5365 struct extent_buffer *leaf;
5366 struct btrfs_key key;
5367 u64 last;
5368 int ret = 0;
5370 path = btrfs_alloc_path();
5371 if (!path)
5372 return -ENOMEM;
5374 root = root->fs_info->extent_root;
5376 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5378 key.objectid = last;
5379 key.offset = 0;
5380 key.type = BTRFS_EXTENT_ITEM_KEY;
5382 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5383 if (ret < 0)
5384 goto out;
5385 ret = 0;
5386 while (1) {
5387 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5388 ret = btrfs_next_leaf(root, path);
5389 if (ret < 0)
5390 goto out;
5391 if (ret > 0) {
5392 ret = 0;
5393 break;
5396 leaf = path->nodes[0];
5397 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5398 if (key.objectid >= cache->key.offset + cache->key.objectid)
5399 break;
5400 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5401 key.type != BTRFS_METADATA_ITEM_KEY) {
5402 path->slots[0]++;
5403 continue;
5406 if (last == key.objectid) {
5407 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5408 last = key.objectid + key.offset;
5409 else
5410 last = key.objectid + root->leafsize;
5411 path->slots[0]++;
5412 continue;
5415 ret = check_cache_range(root, cache, last,
5416 key.objectid - last);
5417 if (ret)
5418 break;
5419 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5420 last = key.objectid + key.offset;
5421 else
5422 last = key.objectid + root->leafsize;
5423 path->slots[0]++;
5426 if (last < cache->key.objectid + cache->key.offset)
5427 ret = check_cache_range(root, cache, last,
5428 cache->key.objectid +
5429 cache->key.offset - last);
5431 out:
5432 btrfs_free_path(path);
5434 if (!ret &&
5435 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5436 fprintf(stderr, "There are still entries left in the space "
5437 "cache\n");
5438 ret = -EINVAL;
5441 return ret;
5444 static int check_space_cache(struct btrfs_root *root)
5446 struct btrfs_block_group_cache *cache;
5447 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5448 int ret;
5449 int error = 0;
5451 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5452 btrfs_super_generation(root->fs_info->super_copy) !=
5453 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5454 printf("cache and super generation don't match, space cache "
5455 "will be invalidated\n");
5456 return 0;
5459 if (ctx.progress_enabled) {
5460 ctx.tp = TASK_FREE_SPACE;
5461 task_start(ctx.info);
5464 while (1) {
5465 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5466 if (!cache)
5467 break;
5469 start = cache->key.objectid + cache->key.offset;
5470 if (!cache->free_space_ctl) {
5471 if (btrfs_init_free_space_ctl(cache,
5472 root->sectorsize)) {
5473 ret = -ENOMEM;
5474 break;
5476 } else {
5477 btrfs_remove_free_space_cache(cache);
5480 if (btrfs_fs_compat_ro(root->fs_info,
5481 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5482 ret = exclude_super_stripes(root, cache);
5483 if (ret) {
5484 fprintf(stderr, "could not exclude super stripes: %s\n",
5485 strerror(-ret));
5486 error++;
5487 continue;
5489 ret = load_free_space_tree(root->fs_info, cache);
5490 free_excluded_extents(root, cache);
5491 if (ret < 0) {
5492 fprintf(stderr, "could not load free space tree: %s\n",
5493 strerror(-ret));
5494 error++;
5495 continue;
5497 error += ret;
5498 } else {
5499 ret = load_free_space_cache(root->fs_info, cache);
5500 if (!ret)
5501 continue;
5504 ret = verify_space_cache(root, cache);
5505 if (ret) {
5506 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5507 cache->key.objectid);
5508 error++;
5512 task_stop(ctx.info);
5514 return error ? -EINVAL : 0;
5517 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5518 u64 num_bytes, unsigned long leaf_offset,
5519 struct extent_buffer *eb) {
5521 u64 offset = 0;
5522 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5523 char *data;
5524 unsigned long csum_offset;
5525 u32 csum;
5526 u32 csum_expected;
5527 u64 read_len;
5528 u64 data_checked = 0;
5529 u64 tmp;
5530 int ret = 0;
5531 int mirror;
5532 int num_copies;
5534 if (num_bytes % root->sectorsize)
5535 return -EINVAL;
5537 data = malloc(num_bytes);
5538 if (!data)
5539 return -ENOMEM;
5541 while (offset < num_bytes) {
5542 mirror = 0;
5543 again:
5544 read_len = num_bytes - offset;
5545 /* read as much space once a time */
5546 ret = read_extent_data(root, data + offset,
5547 bytenr + offset, &read_len, mirror);
5548 if (ret)
5549 goto out;
5550 data_checked = 0;
5551 /* verify every 4k data's checksum */
5552 while (data_checked < read_len) {
5553 csum = ~(u32)0;
5554 tmp = offset + data_checked;
5556 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5557 csum, root->sectorsize);
5558 btrfs_csum_final(csum, (char *)&csum);
5560 csum_offset = leaf_offset +
5561 tmp / root->sectorsize * csum_size;
5562 read_extent_buffer(eb, (char *)&csum_expected,
5563 csum_offset, csum_size);
5564 /* try another mirror */
5565 if (csum != csum_expected) {
5566 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5567 mirror, bytenr + tmp,
5568 csum, csum_expected);
5569 num_copies = btrfs_num_copies(
5570 &root->fs_info->mapping_tree,
5571 bytenr, num_bytes);
5572 if (mirror < num_copies - 1) {
5573 mirror += 1;
5574 goto again;
5577 data_checked += root->sectorsize;
5579 offset += read_len;
5581 out:
5582 free(data);
5583 return ret;
5586 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5587 u64 num_bytes)
5589 struct btrfs_path *path;
5590 struct extent_buffer *leaf;
5591 struct btrfs_key key;
5592 int ret;
5594 path = btrfs_alloc_path();
5595 if (!path) {
5596 fprintf(stderr, "Error allocing path\n");
5597 return -ENOMEM;
5600 key.objectid = bytenr;
5601 key.type = BTRFS_EXTENT_ITEM_KEY;
5602 key.offset = (u64)-1;
5604 again:
5605 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5606 0, 0);
5607 if (ret < 0) {
5608 fprintf(stderr, "Error looking up extent record %d\n", ret);
5609 btrfs_free_path(path);
5610 return ret;
5611 } else if (ret) {
5612 if (path->slots[0] > 0) {
5613 path->slots[0]--;
5614 } else {
5615 ret = btrfs_prev_leaf(root, path);
5616 if (ret < 0) {
5617 goto out;
5618 } else if (ret > 0) {
5619 ret = 0;
5620 goto out;
5625 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5628 * Block group items come before extent items if they have the same
5629 * bytenr, so walk back one more just in case. Dear future traveler,
5630 * first congrats on mastering time travel. Now if it's not too much
5631 * trouble could you go back to 2006 and tell Chris to make the
5632 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5633 * EXTENT_ITEM_KEY please?
5635 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5636 if (path->slots[0] > 0) {
5637 path->slots[0]--;
5638 } else {
5639 ret = btrfs_prev_leaf(root, path);
5640 if (ret < 0) {
5641 goto out;
5642 } else if (ret > 0) {
5643 ret = 0;
5644 goto out;
5647 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5650 while (num_bytes) {
5651 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5652 ret = btrfs_next_leaf(root, path);
5653 if (ret < 0) {
5654 fprintf(stderr, "Error going to next leaf "
5655 "%d\n", ret);
5656 btrfs_free_path(path);
5657 return ret;
5658 } else if (ret) {
5659 break;
5662 leaf = path->nodes[0];
5663 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5664 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5665 path->slots[0]++;
5666 continue;
5668 if (key.objectid + key.offset < bytenr) {
5669 path->slots[0]++;
5670 continue;
5672 if (key.objectid > bytenr + num_bytes)
5673 break;
5675 if (key.objectid == bytenr) {
5676 if (key.offset >= num_bytes) {
5677 num_bytes = 0;
5678 break;
5680 num_bytes -= key.offset;
5681 bytenr += key.offset;
5682 } else if (key.objectid < bytenr) {
5683 if (key.objectid + key.offset >= bytenr + num_bytes) {
5684 num_bytes = 0;
5685 break;
5687 num_bytes = (bytenr + num_bytes) -
5688 (key.objectid + key.offset);
5689 bytenr = key.objectid + key.offset;
5690 } else {
5691 if (key.objectid + key.offset < bytenr + num_bytes) {
5692 u64 new_start = key.objectid + key.offset;
5693 u64 new_bytes = bytenr + num_bytes - new_start;
5696 * Weird case, the extent is in the middle of
5697 * our range, we'll have to search one side
5698 * and then the other. Not sure if this happens
5699 * in real life, but no harm in coding it up
5700 * anyway just in case.
5702 btrfs_release_path(path);
5703 ret = check_extent_exists(root, new_start,
5704 new_bytes);
5705 if (ret) {
5706 fprintf(stderr, "Right section didn't "
5707 "have a record\n");
5708 break;
5710 num_bytes = key.objectid - bytenr;
5711 goto again;
5713 num_bytes = key.objectid - bytenr;
5715 path->slots[0]++;
5717 ret = 0;
5719 out:
5720 if (num_bytes && !ret) {
5721 fprintf(stderr, "There are no extents for csum range "
5722 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5723 ret = 1;
5726 btrfs_free_path(path);
5727 return ret;
5730 static int check_csums(struct btrfs_root *root)
5732 struct btrfs_path *path;
5733 struct extent_buffer *leaf;
5734 struct btrfs_key key;
5735 u64 offset = 0, num_bytes = 0;
5736 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5737 int errors = 0;
5738 int ret;
5739 u64 data_len;
5740 unsigned long leaf_offset;
5742 root = root->fs_info->csum_root;
5743 if (!extent_buffer_uptodate(root->node)) {
5744 fprintf(stderr, "No valid csum tree found\n");
5745 return -ENOENT;
5748 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5749 key.type = BTRFS_EXTENT_CSUM_KEY;
5750 key.offset = 0;
5752 path = btrfs_alloc_path();
5753 if (!path)
5754 return -ENOMEM;
5756 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5757 if (ret < 0) {
5758 fprintf(stderr, "Error searching csum tree %d\n", ret);
5759 btrfs_free_path(path);
5760 return ret;
5763 if (ret > 0 && path->slots[0])
5764 path->slots[0]--;
5765 ret = 0;
5767 while (1) {
5768 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5769 ret = btrfs_next_leaf(root, path);
5770 if (ret < 0) {
5771 fprintf(stderr, "Error going to next leaf "
5772 "%d\n", ret);
5773 break;
5775 if (ret)
5776 break;
5778 leaf = path->nodes[0];
5780 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5781 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5782 path->slots[0]++;
5783 continue;
5786 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5787 csum_size) * root->sectorsize;
5788 if (!check_data_csum)
5789 goto skip_csum_check;
5790 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5791 ret = check_extent_csums(root, key.offset, data_len,
5792 leaf_offset, leaf);
5793 if (ret)
5794 break;
5795 skip_csum_check:
5796 if (!num_bytes) {
5797 offset = key.offset;
5798 } else if (key.offset != offset + num_bytes) {
5799 ret = check_extent_exists(root, offset, num_bytes);
5800 if (ret) {
5801 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5802 "there is no extent record\n",
5803 offset, offset+num_bytes);
5804 errors++;
5806 offset = key.offset;
5807 num_bytes = 0;
5809 num_bytes += data_len;
5810 path->slots[0]++;
5813 btrfs_free_path(path);
5814 return errors;
5817 static int is_dropped_key(struct btrfs_key *key,
5818 struct btrfs_key *drop_key) {
5819 if (key->objectid < drop_key->objectid)
5820 return 1;
5821 else if (key->objectid == drop_key->objectid) {
5822 if (key->type < drop_key->type)
5823 return 1;
5824 else if (key->type == drop_key->type) {
5825 if (key->offset < drop_key->offset)
5826 return 1;
5829 return 0;
5833 * Here are the rules for FULL_BACKREF.
5835 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5836 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5837 * FULL_BACKREF set.
5838 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5839 * if it happened after the relocation occurred since we'll have dropped the
5840 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5841 * have no real way to know for sure.
5843 * We process the blocks one root at a time, and we start from the lowest root
5844 * objectid and go to the highest. So we can just lookup the owner backref for
5845 * the record and if we don't find it then we know it doesn't exist and we have
5846 * a FULL BACKREF.
5848 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5849 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5850 * be set or not and then we can check later once we've gathered all the refs.
5852 static int calc_extent_flag(struct btrfs_root *root,
5853 struct cache_tree *extent_cache,
5854 struct extent_buffer *buf,
5855 struct root_item_record *ri,
5856 u64 *flags)
5858 struct extent_record *rec;
5859 struct cache_extent *cache;
5860 struct tree_backref *tback;
5861 u64 owner = 0;
5863 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5864 /* we have added this extent before */
5865 BUG_ON(!cache);
5866 rec = container_of(cache, struct extent_record, cache);
5869 * Except file/reloc tree, we can not have
5870 * FULL BACKREF MODE
5872 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5873 goto normal;
5875 * root node
5877 if (buf->start == ri->bytenr)
5878 goto normal;
5880 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5881 goto full_backref;
5883 owner = btrfs_header_owner(buf);
5884 if (owner == ri->objectid)
5885 goto normal;
5887 tback = find_tree_backref(rec, 0, owner);
5888 if (!tback)
5889 goto full_backref;
5890 normal:
5891 *flags = 0;
5892 if (rec->flag_block_full_backref != -1 &&
5893 rec->flag_block_full_backref != 0)
5894 rec->bad_full_backref = 1;
5895 return 0;
5896 full_backref:
5897 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5898 if (rec->flag_block_full_backref != -1 &&
5899 rec->flag_block_full_backref != 1)
5900 rec->bad_full_backref = 1;
5901 return 0;
5904 static int run_next_block(struct btrfs_root *root,
5905 struct block_info *bits,
5906 int bits_nr,
5907 u64 *last,
5908 struct cache_tree *pending,
5909 struct cache_tree *seen,
5910 struct cache_tree *reada,
5911 struct cache_tree *nodes,
5912 struct cache_tree *extent_cache,
5913 struct cache_tree *chunk_cache,
5914 struct rb_root *dev_cache,
5915 struct block_group_tree *block_group_cache,
5916 struct device_extent_tree *dev_extent_cache,
5917 struct root_item_record *ri)
5919 struct extent_buffer *buf;
5920 struct extent_record *rec = NULL;
5921 u64 bytenr;
5922 u32 size;
5923 u64 parent;
5924 u64 owner;
5925 u64 flags;
5926 u64 ptr;
5927 u64 gen = 0;
5928 int ret = 0;
5929 int i;
5930 int nritems;
5931 struct btrfs_key key;
5932 struct cache_extent *cache;
5933 int reada_bits;
5935 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5936 bits_nr, &reada_bits);
5937 if (nritems == 0)
5938 return 1;
5940 if (!reada_bits) {
5941 for(i = 0; i < nritems; i++) {
5942 ret = add_cache_extent(reada, bits[i].start,
5943 bits[i].size);
5944 if (ret == -EEXIST)
5945 continue;
5947 /* fixme, get the parent transid */
5948 readahead_tree_block(root, bits[i].start,
5949 bits[i].size, 0);
5952 *last = bits[0].start;
5953 bytenr = bits[0].start;
5954 size = bits[0].size;
5956 cache = lookup_cache_extent(pending, bytenr, size);
5957 if (cache) {
5958 remove_cache_extent(pending, cache);
5959 free(cache);
5961 cache = lookup_cache_extent(reada, bytenr, size);
5962 if (cache) {
5963 remove_cache_extent(reada, cache);
5964 free(cache);
5966 cache = lookup_cache_extent(nodes, bytenr, size);
5967 if (cache) {
5968 remove_cache_extent(nodes, cache);
5969 free(cache);
5971 cache = lookup_cache_extent(extent_cache, bytenr, size);
5972 if (cache) {
5973 rec = container_of(cache, struct extent_record, cache);
5974 gen = rec->parent_generation;
5977 /* fixme, get the real parent transid */
5978 buf = read_tree_block(root, bytenr, size, gen);
5979 if (!extent_buffer_uptodate(buf)) {
5980 record_bad_block_io(root->fs_info,
5981 extent_cache, bytenr, size);
5982 goto out;
5985 nritems = btrfs_header_nritems(buf);
5987 flags = 0;
5988 if (!init_extent_tree) {
5989 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5990 btrfs_header_level(buf), 1, NULL,
5991 &flags);
5992 if (ret < 0) {
5993 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5994 if (ret < 0) {
5995 fprintf(stderr, "Couldn't calc extent flags\n");
5996 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5999 } else {
6000 flags = 0;
6001 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6002 if (ret < 0) {
6003 fprintf(stderr, "Couldn't calc extent flags\n");
6004 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6008 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6009 if (ri != NULL &&
6010 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6011 ri->objectid == btrfs_header_owner(buf)) {
6013 * Ok we got to this block from it's original owner and
6014 * we have FULL_BACKREF set. Relocation can leave
6015 * converted blocks over so this is altogether possible,
6016 * however it's not possible if the generation > the
6017 * last snapshot, so check for this case.
6019 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6020 btrfs_header_generation(buf) > ri->last_snapshot) {
6021 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6022 rec->bad_full_backref = 1;
6025 } else {
6026 if (ri != NULL &&
6027 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6028 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6029 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6030 rec->bad_full_backref = 1;
6034 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6035 rec->flag_block_full_backref = 1;
6036 parent = bytenr;
6037 owner = 0;
6038 } else {
6039 rec->flag_block_full_backref = 0;
6040 parent = 0;
6041 owner = btrfs_header_owner(buf);
6044 ret = check_block(root, extent_cache, buf, flags);
6045 if (ret)
6046 goto out;
6048 if (btrfs_is_leaf(buf)) {
6049 btree_space_waste += btrfs_leaf_free_space(root, buf);
6050 for (i = 0; i < nritems; i++) {
6051 struct btrfs_file_extent_item *fi;
6052 btrfs_item_key_to_cpu(buf, &key, i);
6053 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6054 process_extent_item(root, extent_cache, buf,
6056 continue;
6058 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6059 process_extent_item(root, extent_cache, buf,
6061 continue;
6063 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6064 total_csum_bytes +=
6065 btrfs_item_size_nr(buf, i);
6066 continue;
6068 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6069 process_chunk_item(chunk_cache, &key, buf, i);
6070 continue;
6072 if (key.type == BTRFS_DEV_ITEM_KEY) {
6073 process_device_item(dev_cache, &key, buf, i);
6074 continue;
6076 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6077 process_block_group_item(block_group_cache,
6078 &key, buf, i);
6079 continue;
6081 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6082 process_device_extent_item(dev_extent_cache,
6083 &key, buf, i);
6084 continue;
6087 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6088 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6089 process_extent_ref_v0(extent_cache, buf, i);
6090 #else
6091 BUG();
6092 #endif
6093 continue;
6096 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6097 add_tree_backref(extent_cache, key.objectid, 0,
6098 key.offset, 0);
6099 continue;
6101 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6102 add_tree_backref(extent_cache, key.objectid,
6103 key.offset, 0, 0);
6104 continue;
6106 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6107 struct btrfs_extent_data_ref *ref;
6108 ref = btrfs_item_ptr(buf, i,
6109 struct btrfs_extent_data_ref);
6110 add_data_backref(extent_cache,
6111 key.objectid, 0,
6112 btrfs_extent_data_ref_root(buf, ref),
6113 btrfs_extent_data_ref_objectid(buf,
6114 ref),
6115 btrfs_extent_data_ref_offset(buf, ref),
6116 btrfs_extent_data_ref_count(buf, ref),
6117 0, root->sectorsize);
6118 continue;
6120 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6121 struct btrfs_shared_data_ref *ref;
6122 ref = btrfs_item_ptr(buf, i,
6123 struct btrfs_shared_data_ref);
6124 add_data_backref(extent_cache,
6125 key.objectid, key.offset, 0, 0, 0,
6126 btrfs_shared_data_ref_count(buf, ref),
6127 0, root->sectorsize);
6128 continue;
6130 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6131 struct bad_item *bad;
6133 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6134 continue;
6135 if (!owner)
6136 continue;
6137 bad = malloc(sizeof(struct bad_item));
6138 if (!bad)
6139 continue;
6140 INIT_LIST_HEAD(&bad->list);
6141 memcpy(&bad->key, &key,
6142 sizeof(struct btrfs_key));
6143 bad->root_id = owner;
6144 list_add_tail(&bad->list, &delete_items);
6145 continue;
6147 if (key.type != BTRFS_EXTENT_DATA_KEY)
6148 continue;
6149 fi = btrfs_item_ptr(buf, i,
6150 struct btrfs_file_extent_item);
6151 if (btrfs_file_extent_type(buf, fi) ==
6152 BTRFS_FILE_EXTENT_INLINE)
6153 continue;
6154 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6155 continue;
6157 data_bytes_allocated +=
6158 btrfs_file_extent_disk_num_bytes(buf, fi);
6159 if (data_bytes_allocated < root->sectorsize) {
6160 abort();
6162 data_bytes_referenced +=
6163 btrfs_file_extent_num_bytes(buf, fi);
6164 add_data_backref(extent_cache,
6165 btrfs_file_extent_disk_bytenr(buf, fi),
6166 parent, owner, key.objectid, key.offset -
6167 btrfs_file_extent_offset(buf, fi), 1, 1,
6168 btrfs_file_extent_disk_num_bytes(buf, fi));
6170 } else {
6171 int level;
6172 struct btrfs_key first_key;
6174 first_key.objectid = 0;
6176 if (nritems > 0)
6177 btrfs_item_key_to_cpu(buf, &first_key, 0);
6178 level = btrfs_header_level(buf);
6179 for (i = 0; i < nritems; i++) {
6180 ptr = btrfs_node_blockptr(buf, i);
6181 size = btrfs_level_size(root, level - 1);
6182 btrfs_node_key_to_cpu(buf, &key, i);
6183 if (ri != NULL) {
6184 if ((level == ri->drop_level)
6185 && is_dropped_key(&key, &ri->drop_key)) {
6186 continue;
6189 ret = add_extent_rec(extent_cache, &key,
6190 btrfs_node_ptr_generation(buf, i),
6191 ptr, size, 0, 0, 1, 0, 1, 0,
6192 size);
6193 BUG_ON(ret);
6195 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6197 if (level > 1) {
6198 add_pending(nodes, seen, ptr, size);
6199 } else {
6200 add_pending(pending, seen, ptr, size);
6203 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6204 nritems) * sizeof(struct btrfs_key_ptr);
6206 total_btree_bytes += buf->len;
6207 if (fs_root_objectid(btrfs_header_owner(buf)))
6208 total_fs_tree_bytes += buf->len;
6209 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6210 total_extent_tree_bytes += buf->len;
6211 if (!found_old_backref &&
6212 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6213 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6214 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6215 found_old_backref = 1;
6216 out:
6217 free_extent_buffer(buf);
6218 return ret;
6221 static int add_root_to_pending(struct extent_buffer *buf,
6222 struct cache_tree *extent_cache,
6223 struct cache_tree *pending,
6224 struct cache_tree *seen,
6225 struct cache_tree *nodes,
6226 u64 objectid)
6228 if (btrfs_header_level(buf) > 0)
6229 add_pending(nodes, seen, buf->start, buf->len);
6230 else
6231 add_pending(pending, seen, buf->start, buf->len);
6232 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6233 0, 1, 1, 0, 1, 0, buf->len);
6235 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6236 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6237 add_tree_backref(extent_cache, buf->start, buf->start,
6238 0, 1);
6239 else
6240 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6241 return 0;
6244 /* as we fix the tree, we might be deleting blocks that
6245 * we're tracking for repair. This hook makes sure we
6246 * remove any backrefs for blocks as we are fixing them.
6248 static int free_extent_hook(struct btrfs_trans_handle *trans,
6249 struct btrfs_root *root,
6250 u64 bytenr, u64 num_bytes, u64 parent,
6251 u64 root_objectid, u64 owner, u64 offset,
6252 int refs_to_drop)
6254 struct extent_record *rec;
6255 struct cache_extent *cache;
6256 int is_data;
6257 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6259 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6260 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6261 if (!cache)
6262 return 0;
6264 rec = container_of(cache, struct extent_record, cache);
6265 if (is_data) {
6266 struct data_backref *back;
6267 back = find_data_backref(rec, parent, root_objectid, owner,
6268 offset, 1, bytenr, num_bytes);
6269 if (!back)
6270 goto out;
6271 if (back->node.found_ref) {
6272 back->found_ref -= refs_to_drop;
6273 if (rec->refs)
6274 rec->refs -= refs_to_drop;
6276 if (back->node.found_extent_tree) {
6277 back->num_refs -= refs_to_drop;
6278 if (rec->extent_item_refs)
6279 rec->extent_item_refs -= refs_to_drop;
6281 if (back->found_ref == 0)
6282 back->node.found_ref = 0;
6283 if (back->num_refs == 0)
6284 back->node.found_extent_tree = 0;
6286 if (!back->node.found_extent_tree && back->node.found_ref) {
6287 list_del(&back->node.list);
6288 free(back);
6290 } else {
6291 struct tree_backref *back;
6292 back = find_tree_backref(rec, parent, root_objectid);
6293 if (!back)
6294 goto out;
6295 if (back->node.found_ref) {
6296 if (rec->refs)
6297 rec->refs--;
6298 back->node.found_ref = 0;
6300 if (back->node.found_extent_tree) {
6301 if (rec->extent_item_refs)
6302 rec->extent_item_refs--;
6303 back->node.found_extent_tree = 0;
6305 if (!back->node.found_extent_tree && back->node.found_ref) {
6306 list_del(&back->node.list);
6307 free(back);
6310 maybe_free_extent_rec(extent_cache, rec);
6311 out:
6312 return 0;
6315 static int delete_extent_records(struct btrfs_trans_handle *trans,
6316 struct btrfs_root *root,
6317 struct btrfs_path *path,
6318 u64 bytenr, u64 new_len)
6320 struct btrfs_key key;
6321 struct btrfs_key found_key;
6322 struct extent_buffer *leaf;
6323 int ret;
6324 int slot;
6327 key.objectid = bytenr;
6328 key.type = (u8)-1;
6329 key.offset = (u64)-1;
6331 while(1) {
6332 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6333 &key, path, 0, 1);
6334 if (ret < 0)
6335 break;
6337 if (ret > 0) {
6338 ret = 0;
6339 if (path->slots[0] == 0)
6340 break;
6341 path->slots[0]--;
6343 ret = 0;
6345 leaf = path->nodes[0];
6346 slot = path->slots[0];
6348 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6349 if (found_key.objectid != bytenr)
6350 break;
6352 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6353 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6354 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6355 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6356 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6357 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6358 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6359 btrfs_release_path(path);
6360 if (found_key.type == 0) {
6361 if (found_key.offset == 0)
6362 break;
6363 key.offset = found_key.offset - 1;
6364 key.type = found_key.type;
6366 key.type = found_key.type - 1;
6367 key.offset = (u64)-1;
6368 continue;
6371 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6372 found_key.objectid, found_key.type, found_key.offset);
6374 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6375 if (ret)
6376 break;
6377 btrfs_release_path(path);
6379 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6380 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6381 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6382 found_key.offset : root->leafsize;
6384 ret = btrfs_update_block_group(trans, root, bytenr,
6385 bytes, 0, 0);
6386 if (ret)
6387 break;
6391 btrfs_release_path(path);
6392 return ret;
6396 * for a single backref, this will allocate a new extent
6397 * and add the backref to it.
6399 static int record_extent(struct btrfs_trans_handle *trans,
6400 struct btrfs_fs_info *info,
6401 struct btrfs_path *path,
6402 struct extent_record *rec,
6403 struct extent_backref *back,
6404 int allocated, u64 flags)
6406 int ret;
6407 struct btrfs_root *extent_root = info->extent_root;
6408 struct extent_buffer *leaf;
6409 struct btrfs_key ins_key;
6410 struct btrfs_extent_item *ei;
6411 struct tree_backref *tback;
6412 struct data_backref *dback;
6413 struct btrfs_tree_block_info *bi;
6415 if (!back->is_data)
6416 rec->max_size = max_t(u64, rec->max_size,
6417 info->extent_root->leafsize);
6419 if (!allocated) {
6420 u32 item_size = sizeof(*ei);
6422 if (!back->is_data)
6423 item_size += sizeof(*bi);
6425 ins_key.objectid = rec->start;
6426 ins_key.offset = rec->max_size;
6427 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6429 ret = btrfs_insert_empty_item(trans, extent_root, path,
6430 &ins_key, item_size);
6431 if (ret)
6432 goto fail;
6434 leaf = path->nodes[0];
6435 ei = btrfs_item_ptr(leaf, path->slots[0],
6436 struct btrfs_extent_item);
6438 btrfs_set_extent_refs(leaf, ei, 0);
6439 btrfs_set_extent_generation(leaf, ei, rec->generation);
6441 if (back->is_data) {
6442 btrfs_set_extent_flags(leaf, ei,
6443 BTRFS_EXTENT_FLAG_DATA);
6444 } else {
6445 struct btrfs_disk_key copy_key;;
6447 tback = (struct tree_backref *)back;
6448 bi = (struct btrfs_tree_block_info *)(ei + 1);
6449 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6450 sizeof(*bi));
6452 btrfs_set_disk_key_objectid(&copy_key,
6453 rec->info_objectid);
6454 btrfs_set_disk_key_type(&copy_key, 0);
6455 btrfs_set_disk_key_offset(&copy_key, 0);
6457 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6458 btrfs_set_tree_block_key(leaf, bi, &copy_key);
6460 btrfs_set_extent_flags(leaf, ei,
6461 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6464 btrfs_mark_buffer_dirty(leaf);
6465 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6466 rec->max_size, 1, 0);
6467 if (ret)
6468 goto fail;
6469 btrfs_release_path(path);
6472 if (back->is_data) {
6473 u64 parent;
6474 int i;
6476 dback = (struct data_backref *)back;
6477 if (back->full_backref)
6478 parent = dback->parent;
6479 else
6480 parent = 0;
6482 for (i = 0; i < dback->found_ref; i++) {
6483 /* if parent != 0, we're doing a full backref
6484 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6485 * just makes the backref allocator create a data
6486 * backref
6488 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6489 rec->start, rec->max_size,
6490 parent,
6491 dback->root,
6492 parent ?
6493 BTRFS_FIRST_FREE_OBJECTID :
6494 dback->owner,
6495 dback->offset);
6496 if (ret)
6497 break;
6499 fprintf(stderr, "adding new data backref"
6500 " on %llu %s %llu owner %llu"
6501 " offset %llu found %d\n",
6502 (unsigned long long)rec->start,
6503 back->full_backref ?
6504 "parent" : "root",
6505 back->full_backref ?
6506 (unsigned long long)parent :
6507 (unsigned long long)dback->root,
6508 (unsigned long long)dback->owner,
6509 (unsigned long long)dback->offset,
6510 dback->found_ref);
6511 } else {
6512 u64 parent;
6514 tback = (struct tree_backref *)back;
6515 if (back->full_backref)
6516 parent = tback->parent;
6517 else
6518 parent = 0;
6520 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6521 rec->start, rec->max_size,
6522 parent, tback->root, 0, 0);
6523 fprintf(stderr, "adding new tree backref on "
6524 "start %llu len %llu parent %llu root %llu\n",
6525 rec->start, rec->max_size, parent, tback->root);
6527 fail:
6528 btrfs_release_path(path);
6529 return ret;
6532 struct extent_entry {
6533 u64 bytenr;
6534 u64 bytes;
6535 int count;
6536 int broken;
6537 struct list_head list;
6540 static struct extent_entry *find_entry(struct list_head *entries,
6541 u64 bytenr, u64 bytes)
6543 struct extent_entry *entry = NULL;
6545 list_for_each_entry(entry, entries, list) {
6546 if (entry->bytenr == bytenr && entry->bytes == bytes)
6547 return entry;
6550 return NULL;
6553 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6555 struct extent_entry *entry, *best = NULL, *prev = NULL;
6557 list_for_each_entry(entry, entries, list) {
6558 if (!prev) {
6559 prev = entry;
6560 continue;
6564 * If there are as many broken entries as entries then we know
6565 * not to trust this particular entry.
6567 if (entry->broken == entry->count)
6568 continue;
6571 * If our current entry == best then we can't be sure our best
6572 * is really the best, so we need to keep searching.
6574 if (best && best->count == entry->count) {
6575 prev = entry;
6576 best = NULL;
6577 continue;
6580 /* Prev == entry, not good enough, have to keep searching */
6581 if (!prev->broken && prev->count == entry->count)
6582 continue;
6584 if (!best)
6585 best = (prev->count > entry->count) ? prev : entry;
6586 else if (best->count < entry->count)
6587 best = entry;
6588 prev = entry;
6591 return best;
6594 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6595 struct data_backref *dback, struct extent_entry *entry)
6597 struct btrfs_trans_handle *trans;
6598 struct btrfs_root *root;
6599 struct btrfs_file_extent_item *fi;
6600 struct extent_buffer *leaf;
6601 struct btrfs_key key;
6602 u64 bytenr, bytes;
6603 int ret, err;
6605 key.objectid = dback->root;
6606 key.type = BTRFS_ROOT_ITEM_KEY;
6607 key.offset = (u64)-1;
6608 root = btrfs_read_fs_root(info, &key);
6609 if (IS_ERR(root)) {
6610 fprintf(stderr, "Couldn't find root for our ref\n");
6611 return -EINVAL;
6615 * The backref points to the original offset of the extent if it was
6616 * split, so we need to search down to the offset we have and then walk
6617 * forward until we find the backref we're looking for.
6619 key.objectid = dback->owner;
6620 key.type = BTRFS_EXTENT_DATA_KEY;
6621 key.offset = dback->offset;
6622 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6623 if (ret < 0) {
6624 fprintf(stderr, "Error looking up ref %d\n", ret);
6625 return ret;
6628 while (1) {
6629 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6630 ret = btrfs_next_leaf(root, path);
6631 if (ret) {
6632 fprintf(stderr, "Couldn't find our ref, next\n");
6633 return -EINVAL;
6636 leaf = path->nodes[0];
6637 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6638 if (key.objectid != dback->owner ||
6639 key.type != BTRFS_EXTENT_DATA_KEY) {
6640 fprintf(stderr, "Couldn't find our ref, search\n");
6641 return -EINVAL;
6643 fi = btrfs_item_ptr(leaf, path->slots[0],
6644 struct btrfs_file_extent_item);
6645 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6646 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6648 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6649 break;
6650 path->slots[0]++;
6653 btrfs_release_path(path);
6655 trans = btrfs_start_transaction(root, 1);
6656 if (IS_ERR(trans))
6657 return PTR_ERR(trans);
6660 * Ok we have the key of the file extent we want to fix, now we can cow
6661 * down to the thing and fix it.
6663 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6664 if (ret < 0) {
6665 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6666 key.objectid, key.type, key.offset, ret);
6667 goto out;
6669 if (ret > 0) {
6670 fprintf(stderr, "Well that's odd, we just found this key "
6671 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6672 key.offset);
6673 ret = -EINVAL;
6674 goto out;
6676 leaf = path->nodes[0];
6677 fi = btrfs_item_ptr(leaf, path->slots[0],
6678 struct btrfs_file_extent_item);
6680 if (btrfs_file_extent_compression(leaf, fi) &&
6681 dback->disk_bytenr != entry->bytenr) {
6682 fprintf(stderr, "Ref doesn't match the record start and is "
6683 "compressed, please take a btrfs-image of this file "
6684 "system and send it to a btrfs developer so they can "
6685 "complete this functionality for bytenr %Lu\n",
6686 dback->disk_bytenr);
6687 ret = -EINVAL;
6688 goto out;
6691 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6692 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6693 } else if (dback->disk_bytenr > entry->bytenr) {
6694 u64 off_diff, offset;
6696 off_diff = dback->disk_bytenr - entry->bytenr;
6697 offset = btrfs_file_extent_offset(leaf, fi);
6698 if (dback->disk_bytenr + offset +
6699 btrfs_file_extent_num_bytes(leaf, fi) >
6700 entry->bytenr + entry->bytes) {
6701 fprintf(stderr, "Ref is past the entry end, please "
6702 "take a btrfs-image of this file system and "
6703 "send it to a btrfs developer, ref %Lu\n",
6704 dback->disk_bytenr);
6705 ret = -EINVAL;
6706 goto out;
6708 offset += off_diff;
6709 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6710 btrfs_set_file_extent_offset(leaf, fi, offset);
6711 } else if (dback->disk_bytenr < entry->bytenr) {
6712 u64 offset;
6714 offset = btrfs_file_extent_offset(leaf, fi);
6715 if (dback->disk_bytenr + offset < entry->bytenr) {
6716 fprintf(stderr, "Ref is before the entry start, please"
6717 " take a btrfs-image of this file system and "
6718 "send it to a btrfs developer, ref %Lu\n",
6719 dback->disk_bytenr);
6720 ret = -EINVAL;
6721 goto out;
6724 offset += dback->disk_bytenr;
6725 offset -= entry->bytenr;
6726 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6727 btrfs_set_file_extent_offset(leaf, fi, offset);
6730 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6733 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6734 * only do this if we aren't using compression, otherwise it's a
6735 * trickier case.
6737 if (!btrfs_file_extent_compression(leaf, fi))
6738 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6739 else
6740 printf("ram bytes may be wrong?\n");
6741 btrfs_mark_buffer_dirty(leaf);
6742 out:
6743 err = btrfs_commit_transaction(trans, root);
6744 btrfs_release_path(path);
6745 return ret ? ret : err;
6748 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6749 struct extent_record *rec)
6751 struct extent_backref *back;
6752 struct data_backref *dback;
6753 struct extent_entry *entry, *best = NULL;
6754 LIST_HEAD(entries);
6755 int nr_entries = 0;
6756 int broken_entries = 0;
6757 int ret = 0;
6758 short mismatch = 0;
6761 * Metadata is easy and the backrefs should always agree on bytenr and
6762 * size, if not we've got bigger issues.
6764 if (rec->metadata)
6765 return 0;
6767 list_for_each_entry(back, &rec->backrefs, list) {
6768 if (back->full_backref || !back->is_data)
6769 continue;
6771 dback = (struct data_backref *)back;
6774 * We only pay attention to backrefs that we found a real
6775 * backref for.
6777 if (dback->found_ref == 0)
6778 continue;
6781 * For now we only catch when the bytes don't match, not the
6782 * bytenr. We can easily do this at the same time, but I want
6783 * to have a fs image to test on before we just add repair
6784 * functionality willy-nilly so we know we won't screw up the
6785 * repair.
6788 entry = find_entry(&entries, dback->disk_bytenr,
6789 dback->bytes);
6790 if (!entry) {
6791 entry = malloc(sizeof(struct extent_entry));
6792 if (!entry) {
6793 ret = -ENOMEM;
6794 goto out;
6796 memset(entry, 0, sizeof(*entry));
6797 entry->bytenr = dback->disk_bytenr;
6798 entry->bytes = dback->bytes;
6799 list_add_tail(&entry->list, &entries);
6800 nr_entries++;
6804 * If we only have on entry we may think the entries agree when
6805 * in reality they don't so we have to do some extra checking.
6807 if (dback->disk_bytenr != rec->start ||
6808 dback->bytes != rec->nr || back->broken)
6809 mismatch = 1;
6811 if (back->broken) {
6812 entry->broken++;
6813 broken_entries++;
6816 entry->count++;
6819 /* Yay all the backrefs agree, carry on good sir */
6820 if (nr_entries <= 1 && !mismatch)
6821 goto out;
6823 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6824 "%Lu\n", rec->start);
6827 * First we want to see if the backrefs can agree amongst themselves who
6828 * is right, so figure out which one of the entries has the highest
6829 * count.
6831 best = find_most_right_entry(&entries);
6834 * Ok so we may have an even split between what the backrefs think, so
6835 * this is where we use the extent ref to see what it thinks.
6837 if (!best) {
6838 entry = find_entry(&entries, rec->start, rec->nr);
6839 if (!entry && (!broken_entries || !rec->found_rec)) {
6840 fprintf(stderr, "Backrefs don't agree with each other "
6841 "and extent record doesn't agree with anybody,"
6842 " so we can't fix bytenr %Lu bytes %Lu\n",
6843 rec->start, rec->nr);
6844 ret = -EINVAL;
6845 goto out;
6846 } else if (!entry) {
6848 * Ok our backrefs were broken, we'll assume this is the
6849 * correct value and add an entry for this range.
6851 entry = malloc(sizeof(struct extent_entry));
6852 if (!entry) {
6853 ret = -ENOMEM;
6854 goto out;
6856 memset(entry, 0, sizeof(*entry));
6857 entry->bytenr = rec->start;
6858 entry->bytes = rec->nr;
6859 list_add_tail(&entry->list, &entries);
6860 nr_entries++;
6862 entry->count++;
6863 best = find_most_right_entry(&entries);
6864 if (!best) {
6865 fprintf(stderr, "Backrefs and extent record evenly "
6866 "split on who is right, this is going to "
6867 "require user input to fix bytenr %Lu bytes "
6868 "%Lu\n", rec->start, rec->nr);
6869 ret = -EINVAL;
6870 goto out;
6875 * I don't think this can happen currently as we'll abort() if we catch
6876 * this case higher up, but in case somebody removes that we still can't
6877 * deal with it properly here yet, so just bail out of that's the case.
6879 if (best->bytenr != rec->start) {
6880 fprintf(stderr, "Extent start and backref starts don't match, "
6881 "please use btrfs-image on this file system and send "
6882 "it to a btrfs developer so they can make fsck fix "
6883 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6884 rec->start, rec->nr);
6885 ret = -EINVAL;
6886 goto out;
6890 * Ok great we all agreed on an extent record, let's go find the real
6891 * references and fix up the ones that don't match.
6893 list_for_each_entry(back, &rec->backrefs, list) {
6894 if (back->full_backref || !back->is_data)
6895 continue;
6897 dback = (struct data_backref *)back;
6900 * Still ignoring backrefs that don't have a real ref attached
6901 * to them.
6903 if (dback->found_ref == 0)
6904 continue;
6906 if (dback->bytes == best->bytes &&
6907 dback->disk_bytenr == best->bytenr)
6908 continue;
6910 ret = repair_ref(info, path, dback, best);
6911 if (ret)
6912 goto out;
6916 * Ok we messed with the actual refs, which means we need to drop our
6917 * entire cache and go back and rescan. I know this is a huge pain and
6918 * adds a lot of extra work, but it's the only way to be safe. Once all
6919 * the backrefs agree we may not need to do anything to the extent
6920 * record itself.
6922 ret = -EAGAIN;
6923 out:
6924 while (!list_empty(&entries)) {
6925 entry = list_entry(entries.next, struct extent_entry, list);
6926 list_del_init(&entry->list);
6927 free(entry);
6929 return ret;
6932 static int process_duplicates(struct btrfs_root *root,
6933 struct cache_tree *extent_cache,
6934 struct extent_record *rec)
6936 struct extent_record *good, *tmp;
6937 struct cache_extent *cache;
6938 int ret;
6941 * If we found a extent record for this extent then return, or if we
6942 * have more than one duplicate we are likely going to need to delete
6943 * something.
6945 if (rec->found_rec || rec->num_duplicates > 1)
6946 return 0;
6948 /* Shouldn't happen but just in case */
6949 BUG_ON(!rec->num_duplicates);
6952 * So this happens if we end up with a backref that doesn't match the
6953 * actual extent entry. So either the backref is bad or the extent
6954 * entry is bad. Either way we want to have the extent_record actually
6955 * reflect what we found in the extent_tree, so we need to take the
6956 * duplicate out and use that as the extent_record since the only way we
6957 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6959 remove_cache_extent(extent_cache, &rec->cache);
6961 good = list_entry(rec->dups.next, struct extent_record, list);
6962 list_del_init(&good->list);
6963 INIT_LIST_HEAD(&good->backrefs);
6964 INIT_LIST_HEAD(&good->dups);
6965 good->cache.start = good->start;
6966 good->cache.size = good->nr;
6967 good->content_checked = 0;
6968 good->owner_ref_checked = 0;
6969 good->num_duplicates = 0;
6970 good->refs = rec->refs;
6971 list_splice_init(&rec->backrefs, &good->backrefs);
6972 while (1) {
6973 cache = lookup_cache_extent(extent_cache, good->start,
6974 good->nr);
6975 if (!cache)
6976 break;
6977 tmp = container_of(cache, struct extent_record, cache);
6980 * If we find another overlapping extent and it's found_rec is
6981 * set then it's a duplicate and we need to try and delete
6982 * something.
6984 if (tmp->found_rec || tmp->num_duplicates > 0) {
6985 if (list_empty(&good->list))
6986 list_add_tail(&good->list,
6987 &duplicate_extents);
6988 good->num_duplicates += tmp->num_duplicates + 1;
6989 list_splice_init(&tmp->dups, &good->dups);
6990 list_del_init(&tmp->list);
6991 list_add_tail(&tmp->list, &good->dups);
6992 remove_cache_extent(extent_cache, &tmp->cache);
6993 continue;
6997 * Ok we have another non extent item backed extent rec, so lets
6998 * just add it to this extent and carry on like we did above.
7000 good->refs += tmp->refs;
7001 list_splice_init(&tmp->backrefs, &good->backrefs);
7002 remove_cache_extent(extent_cache, &tmp->cache);
7003 free(tmp);
7005 ret = insert_cache_extent(extent_cache, &good->cache);
7006 BUG_ON(ret);
7007 free(rec);
7008 return good->num_duplicates ? 0 : 1;
7011 static int delete_duplicate_records(struct btrfs_root *root,
7012 struct extent_record *rec)
7014 struct btrfs_trans_handle *trans;
7015 LIST_HEAD(delete_list);
7016 struct btrfs_path *path;
7017 struct extent_record *tmp, *good, *n;
7018 int nr_del = 0;
7019 int ret = 0, err;
7020 struct btrfs_key key;
7022 path = btrfs_alloc_path();
7023 if (!path) {
7024 ret = -ENOMEM;
7025 goto out;
7028 good = rec;
7029 /* Find the record that covers all of the duplicates. */
7030 list_for_each_entry(tmp, &rec->dups, list) {
7031 if (good->start < tmp->start)
7032 continue;
7033 if (good->nr > tmp->nr)
7034 continue;
7036 if (tmp->start + tmp->nr < good->start + good->nr) {
7037 fprintf(stderr, "Ok we have overlapping extents that "
7038 "aren't completely covered by eachother, this "
7039 "is going to require more careful thought. "
7040 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7041 tmp->start, tmp->nr, good->start, good->nr);
7042 abort();
7044 good = tmp;
7047 if (good != rec)
7048 list_add_tail(&rec->list, &delete_list);
7050 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7051 if (tmp == good)
7052 continue;
7053 list_move_tail(&tmp->list, &delete_list);
7056 root = root->fs_info->extent_root;
7057 trans = btrfs_start_transaction(root, 1);
7058 if (IS_ERR(trans)) {
7059 ret = PTR_ERR(trans);
7060 goto out;
7063 list_for_each_entry(tmp, &delete_list, list) {
7064 if (tmp->found_rec == 0)
7065 continue;
7066 key.objectid = tmp->start;
7067 key.type = BTRFS_EXTENT_ITEM_KEY;
7068 key.offset = tmp->nr;
7070 /* Shouldn't happen but just in case */
7071 if (tmp->metadata) {
7072 fprintf(stderr, "Well this shouldn't happen, extent "
7073 "record overlaps but is metadata? "
7074 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7075 abort();
7078 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7079 if (ret) {
7080 if (ret > 0)
7081 ret = -EINVAL;
7082 break;
7084 ret = btrfs_del_item(trans, root, path);
7085 if (ret)
7086 break;
7087 btrfs_release_path(path);
7088 nr_del++;
7090 err = btrfs_commit_transaction(trans, root);
7091 if (err && !ret)
7092 ret = err;
7093 out:
7094 while (!list_empty(&delete_list)) {
7095 tmp = list_entry(delete_list.next, struct extent_record, list);
7096 list_del_init(&tmp->list);
7097 if (tmp == rec)
7098 continue;
7099 free(tmp);
7102 while (!list_empty(&rec->dups)) {
7103 tmp = list_entry(rec->dups.next, struct extent_record, list);
7104 list_del_init(&tmp->list);
7105 free(tmp);
7108 btrfs_free_path(path);
7110 if (!ret && !nr_del)
7111 rec->num_duplicates = 0;
7113 return ret ? ret : nr_del;
7116 static int find_possible_backrefs(struct btrfs_fs_info *info,
7117 struct btrfs_path *path,
7118 struct cache_tree *extent_cache,
7119 struct extent_record *rec)
7121 struct btrfs_root *root;
7122 struct extent_backref *back;
7123 struct data_backref *dback;
7124 struct cache_extent *cache;
7125 struct btrfs_file_extent_item *fi;
7126 struct btrfs_key key;
7127 u64 bytenr, bytes;
7128 int ret;
7130 list_for_each_entry(back, &rec->backrefs, list) {
7131 /* Don't care about full backrefs (poor unloved backrefs) */
7132 if (back->full_backref || !back->is_data)
7133 continue;
7135 dback = (struct data_backref *)back;
7137 /* We found this one, we don't need to do a lookup */
7138 if (dback->found_ref)
7139 continue;
7141 key.objectid = dback->root;
7142 key.type = BTRFS_ROOT_ITEM_KEY;
7143 key.offset = (u64)-1;
7145 root = btrfs_read_fs_root(info, &key);
7147 /* No root, definitely a bad ref, skip */
7148 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7149 continue;
7150 /* Other err, exit */
7151 if (IS_ERR(root))
7152 return PTR_ERR(root);
7154 key.objectid = dback->owner;
7155 key.type = BTRFS_EXTENT_DATA_KEY;
7156 key.offset = dback->offset;
7157 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7158 if (ret) {
7159 btrfs_release_path(path);
7160 if (ret < 0)
7161 return ret;
7162 /* Didn't find it, we can carry on */
7163 ret = 0;
7164 continue;
7167 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7168 struct btrfs_file_extent_item);
7169 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7170 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7171 btrfs_release_path(path);
7172 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7173 if (cache) {
7174 struct extent_record *tmp;
7175 tmp = container_of(cache, struct extent_record, cache);
7178 * If we found an extent record for the bytenr for this
7179 * particular backref then we can't add it to our
7180 * current extent record. We only want to add backrefs
7181 * that don't have a corresponding extent item in the
7182 * extent tree since they likely belong to this record
7183 * and we need to fix it if it doesn't match bytenrs.
7185 if (tmp->found_rec)
7186 continue;
7189 dback->found_ref += 1;
7190 dback->disk_bytenr = bytenr;
7191 dback->bytes = bytes;
7194 * Set this so the verify backref code knows not to trust the
7195 * values in this backref.
7197 back->broken = 1;
7200 return 0;
7204 * Record orphan data ref into corresponding root.
7206 * Return 0 if the extent item contains data ref and recorded.
7207 * Return 1 if the extent item contains no useful data ref
7208 * On that case, it may contains only shared_dataref or metadata backref
7209 * or the file extent exists(this should be handled by the extent bytenr
7210 * recovery routine)
7211 * Return <0 if something goes wrong.
7213 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7214 struct extent_record *rec)
7216 struct btrfs_key key;
7217 struct btrfs_root *dest_root;
7218 struct extent_backref *back;
7219 struct data_backref *dback;
7220 struct orphan_data_extent *orphan;
7221 struct btrfs_path *path;
7222 int recorded_data_ref = 0;
7223 int ret = 0;
7225 if (rec->metadata)
7226 return 1;
7227 path = btrfs_alloc_path();
7228 if (!path)
7229 return -ENOMEM;
7230 list_for_each_entry(back, &rec->backrefs, list) {
7231 if (back->full_backref || !back->is_data ||
7232 !back->found_extent_tree)
7233 continue;
7234 dback = (struct data_backref *)back;
7235 if (dback->found_ref)
7236 continue;
7237 key.objectid = dback->root;
7238 key.type = BTRFS_ROOT_ITEM_KEY;
7239 key.offset = (u64)-1;
7241 dest_root = btrfs_read_fs_root(fs_info, &key);
7243 /* For non-exist root we just skip it */
7244 if (IS_ERR(dest_root) || !dest_root)
7245 continue;
7247 key.objectid = dback->owner;
7248 key.type = BTRFS_EXTENT_DATA_KEY;
7249 key.offset = dback->offset;
7251 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7253 * For ret < 0, it's OK since the fs-tree may be corrupted,
7254 * we need to record it for inode/file extent rebuild.
7255 * For ret > 0, we record it only for file extent rebuild.
7256 * For ret == 0, the file extent exists but only bytenr
7257 * mismatch, let the original bytenr fix routine to handle,
7258 * don't record it.
7260 if (ret == 0)
7261 continue;
7262 ret = 0;
7263 orphan = malloc(sizeof(*orphan));
7264 if (!orphan) {
7265 ret = -ENOMEM;
7266 goto out;
7268 INIT_LIST_HEAD(&orphan->list);
7269 orphan->root = dback->root;
7270 orphan->objectid = dback->owner;
7271 orphan->offset = dback->offset;
7272 orphan->disk_bytenr = rec->cache.start;
7273 orphan->disk_len = rec->cache.size;
7274 list_add(&dest_root->orphan_data_extents, &orphan->list);
7275 recorded_data_ref = 1;
7277 out:
7278 btrfs_free_path(path);
7279 if (!ret)
7280 return !recorded_data_ref;
7281 else
7282 return ret;
7286 * when an incorrect extent item is found, this will delete
7287 * all of the existing entries for it and recreate them
7288 * based on what the tree scan found.
7290 static int fixup_extent_refs(struct btrfs_fs_info *info,
7291 struct cache_tree *extent_cache,
7292 struct extent_record *rec)
7294 struct btrfs_trans_handle *trans = NULL;
7295 int ret;
7296 struct btrfs_path *path;
7297 struct list_head *cur = rec->backrefs.next;
7298 struct cache_extent *cache;
7299 struct extent_backref *back;
7300 int allocated = 0;
7301 u64 flags = 0;
7303 if (rec->flag_block_full_backref)
7304 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7306 path = btrfs_alloc_path();
7307 if (!path)
7308 return -ENOMEM;
7310 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7312 * Sometimes the backrefs themselves are so broken they don't
7313 * get attached to any meaningful rec, so first go back and
7314 * check any of our backrefs that we couldn't find and throw
7315 * them into the list if we find the backref so that
7316 * verify_backrefs can figure out what to do.
7318 ret = find_possible_backrefs(info, path, extent_cache, rec);
7319 if (ret < 0)
7320 goto out;
7323 /* step one, make sure all of the backrefs agree */
7324 ret = verify_backrefs(info, path, rec);
7325 if (ret < 0)
7326 goto out;
7328 trans = btrfs_start_transaction(info->extent_root, 1);
7329 if (IS_ERR(trans)) {
7330 ret = PTR_ERR(trans);
7331 goto out;
7334 /* step two, delete all the existing records */
7335 ret = delete_extent_records(trans, info->extent_root, path,
7336 rec->start, rec->max_size);
7338 if (ret < 0)
7339 goto out;
7341 /* was this block corrupt? If so, don't add references to it */
7342 cache = lookup_cache_extent(info->corrupt_blocks,
7343 rec->start, rec->max_size);
7344 if (cache) {
7345 ret = 0;
7346 goto out;
7349 /* step three, recreate all the refs we did find */
7350 while(cur != &rec->backrefs) {
7351 back = list_entry(cur, struct extent_backref, list);
7352 cur = cur->next;
7355 * if we didn't find any references, don't create a
7356 * new extent record
7358 if (!back->found_ref)
7359 continue;
7361 rec->bad_full_backref = 0;
7362 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7363 allocated = 1;
7365 if (ret)
7366 goto out;
7368 out:
7369 if (trans) {
7370 int err = btrfs_commit_transaction(trans, info->extent_root);
7371 if (!ret)
7372 ret = err;
7375 btrfs_free_path(path);
7376 return ret;
7379 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7380 struct extent_record *rec)
7382 struct btrfs_trans_handle *trans;
7383 struct btrfs_root *root = fs_info->extent_root;
7384 struct btrfs_path *path;
7385 struct btrfs_extent_item *ei;
7386 struct btrfs_key key;
7387 u64 flags;
7388 int ret = 0;
7390 key.objectid = rec->start;
7391 if (rec->metadata) {
7392 key.type = BTRFS_METADATA_ITEM_KEY;
7393 key.offset = rec->info_level;
7394 } else {
7395 key.type = BTRFS_EXTENT_ITEM_KEY;
7396 key.offset = rec->max_size;
7399 path = btrfs_alloc_path();
7400 if (!path)
7401 return -ENOMEM;
7403 trans = btrfs_start_transaction(root, 0);
7404 if (IS_ERR(trans)) {
7405 btrfs_free_path(path);
7406 return PTR_ERR(trans);
7409 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7410 if (ret < 0) {
7411 btrfs_free_path(path);
7412 btrfs_commit_transaction(trans, root);
7413 return ret;
7414 } else if (ret) {
7415 fprintf(stderr, "Didn't find extent for %llu\n",
7416 (unsigned long long)rec->start);
7417 btrfs_free_path(path);
7418 btrfs_commit_transaction(trans, root);
7419 return -ENOENT;
7422 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7423 struct btrfs_extent_item);
7424 flags = btrfs_extent_flags(path->nodes[0], ei);
7425 if (rec->flag_block_full_backref) {
7426 fprintf(stderr, "setting full backref on %llu\n",
7427 (unsigned long long)key.objectid);
7428 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7429 } else {
7430 fprintf(stderr, "clearing full backref on %llu\n",
7431 (unsigned long long)key.objectid);
7432 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7434 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7435 btrfs_mark_buffer_dirty(path->nodes[0]);
7436 btrfs_free_path(path);
7437 return btrfs_commit_transaction(trans, root);
7440 /* right now we only prune from the extent allocation tree */
7441 static int prune_one_block(struct btrfs_trans_handle *trans,
7442 struct btrfs_fs_info *info,
7443 struct btrfs_corrupt_block *corrupt)
7445 int ret;
7446 struct btrfs_path path;
7447 struct extent_buffer *eb;
7448 u64 found;
7449 int slot;
7450 int nritems;
7451 int level = corrupt->level + 1;
7453 btrfs_init_path(&path);
7454 again:
7455 /* we want to stop at the parent to our busted block */
7456 path.lowest_level = level;
7458 ret = btrfs_search_slot(trans, info->extent_root,
7459 &corrupt->key, &path, -1, 1);
7461 if (ret < 0)
7462 goto out;
7464 eb = path.nodes[level];
7465 if (!eb) {
7466 ret = -ENOENT;
7467 goto out;
7471 * hopefully the search gave us the block we want to prune,
7472 * lets try that first
7474 slot = path.slots[level];
7475 found = btrfs_node_blockptr(eb, slot);
7476 if (found == corrupt->cache.start)
7477 goto del_ptr;
7479 nritems = btrfs_header_nritems(eb);
7481 /* the search failed, lets scan this node and hope we find it */
7482 for (slot = 0; slot < nritems; slot++) {
7483 found = btrfs_node_blockptr(eb, slot);
7484 if (found == corrupt->cache.start)
7485 goto del_ptr;
7488 * we couldn't find the bad block. TODO, search all the nodes for pointers
7489 * to this block
7491 if (eb == info->extent_root->node) {
7492 ret = -ENOENT;
7493 goto out;
7494 } else {
7495 level++;
7496 btrfs_release_path(&path);
7497 goto again;
7500 del_ptr:
7501 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7502 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7504 out:
7505 btrfs_release_path(&path);
7506 return ret;
7509 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7511 struct btrfs_trans_handle *trans = NULL;
7512 struct cache_extent *cache;
7513 struct btrfs_corrupt_block *corrupt;
7515 while (1) {
7516 cache = search_cache_extent(info->corrupt_blocks, 0);
7517 if (!cache)
7518 break;
7519 if (!trans) {
7520 trans = btrfs_start_transaction(info->extent_root, 1);
7521 if (IS_ERR(trans))
7522 return PTR_ERR(trans);
7524 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7525 prune_one_block(trans, info, corrupt);
7526 remove_cache_extent(info->corrupt_blocks, cache);
7528 if (trans)
7529 return btrfs_commit_transaction(trans, info->extent_root);
7530 return 0;
7533 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7535 struct btrfs_block_group_cache *cache;
7536 u64 start, end;
7537 int ret;
7539 while (1) {
7540 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7541 &start, &end, EXTENT_DIRTY);
7542 if (ret)
7543 break;
7544 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7545 GFP_NOFS);
7548 start = 0;
7549 while (1) {
7550 cache = btrfs_lookup_first_block_group(fs_info, start);
7551 if (!cache)
7552 break;
7553 if (cache->cached)
7554 cache->cached = 0;
7555 start = cache->key.objectid + cache->key.offset;
7559 static int check_extent_refs(struct btrfs_root *root,
7560 struct cache_tree *extent_cache)
7562 struct extent_record *rec;
7563 struct cache_extent *cache;
7564 int err = 0;
7565 int ret = 0;
7566 int fixed = 0;
7567 int had_dups = 0;
7568 int recorded = 0;
7570 if (repair) {
7572 * if we're doing a repair, we have to make sure
7573 * we don't allocate from the problem extents.
7574 * In the worst case, this will be all the
7575 * extents in the FS
7577 cache = search_cache_extent(extent_cache, 0);
7578 while(cache) {
7579 rec = container_of(cache, struct extent_record, cache);
7580 set_extent_dirty(root->fs_info->excluded_extents,
7581 rec->start,
7582 rec->start + rec->max_size - 1,
7583 GFP_NOFS);
7584 cache = next_cache_extent(cache);
7587 /* pin down all the corrupted blocks too */
7588 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7589 while(cache) {
7590 set_extent_dirty(root->fs_info->excluded_extents,
7591 cache->start,
7592 cache->start + cache->size - 1,
7593 GFP_NOFS);
7594 cache = next_cache_extent(cache);
7596 prune_corrupt_blocks(root->fs_info);
7597 reset_cached_block_groups(root->fs_info);
7600 reset_cached_block_groups(root->fs_info);
7603 * We need to delete any duplicate entries we find first otherwise we
7604 * could mess up the extent tree when we have backrefs that actually
7605 * belong to a different extent item and not the weird duplicate one.
7607 while (repair && !list_empty(&duplicate_extents)) {
7608 rec = list_entry(duplicate_extents.next, struct extent_record,
7609 list);
7610 list_del_init(&rec->list);
7612 /* Sometimes we can find a backref before we find an actual
7613 * extent, so we need to process it a little bit to see if there
7614 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7615 * if this is a backref screwup. If we need to delete stuff
7616 * process_duplicates() will return 0, otherwise it will return
7617 * 1 and we
7619 if (process_duplicates(root, extent_cache, rec))
7620 continue;
7621 ret = delete_duplicate_records(root, rec);
7622 if (ret < 0)
7623 return ret;
7625 * delete_duplicate_records will return the number of entries
7626 * deleted, so if it's greater than 0 then we know we actually
7627 * did something and we need to remove.
7629 if (ret)
7630 had_dups = 1;
7633 if (had_dups)
7634 return -EAGAIN;
7636 while(1) {
7637 int cur_err = 0;
7639 fixed = 0;
7640 recorded = 0;
7641 cache = search_cache_extent(extent_cache, 0);
7642 if (!cache)
7643 break;
7644 rec = container_of(cache, struct extent_record, cache);
7645 if (rec->num_duplicates) {
7646 fprintf(stderr, "extent item %llu has multiple extent "
7647 "items\n", (unsigned long long)rec->start);
7648 err = 1;
7649 cur_err = 1;
7652 if (rec->refs != rec->extent_item_refs) {
7653 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7654 (unsigned long long)rec->start,
7655 (unsigned long long)rec->nr);
7656 fprintf(stderr, "extent item %llu, found %llu\n",
7657 (unsigned long long)rec->extent_item_refs,
7658 (unsigned long long)rec->refs);
7659 ret = record_orphan_data_extents(root->fs_info, rec);
7660 if (ret < 0)
7661 goto repair_abort;
7662 if (ret == 0) {
7663 recorded = 1;
7664 } else {
7666 * we can't use the extent to repair file
7667 * extent, let the fallback method handle it.
7669 if (!fixed && repair) {
7670 ret = fixup_extent_refs(
7671 root->fs_info,
7672 extent_cache, rec);
7673 if (ret)
7674 goto repair_abort;
7675 fixed = 1;
7678 err = 1;
7679 cur_err = 1;
7681 if (all_backpointers_checked(rec, 1)) {
7682 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7683 (unsigned long long)rec->start,
7684 (unsigned long long)rec->nr);
7686 if (!fixed && !recorded && repair) {
7687 ret = fixup_extent_refs(root->fs_info,
7688 extent_cache, rec);
7689 if (ret)
7690 goto repair_abort;
7691 fixed = 1;
7693 cur_err = 1;
7694 err = 1;
7696 if (!rec->owner_ref_checked) {
7697 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7698 (unsigned long long)rec->start,
7699 (unsigned long long)rec->nr);
7700 if (!fixed && !recorded && repair) {
7701 ret = fixup_extent_refs(root->fs_info,
7702 extent_cache, rec);
7703 if (ret)
7704 goto repair_abort;
7705 fixed = 1;
7707 err = 1;
7708 cur_err = 1;
7710 if (rec->bad_full_backref) {
7711 fprintf(stderr, "bad full backref, on [%llu]\n",
7712 (unsigned long long)rec->start);
7713 if (repair) {
7714 ret = fixup_extent_flags(root->fs_info, rec);
7715 if (ret)
7716 goto repair_abort;
7717 fixed = 1;
7719 err = 1;
7720 cur_err = 1;
7723 * Although it's not a extent ref's problem, we reuse this
7724 * routine for error reporting.
7725 * No repair function yet.
7727 if (rec->crossing_stripes) {
7728 fprintf(stderr,
7729 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7730 rec->start, rec->start + rec->max_size);
7731 err = 1;
7732 cur_err = 1;
7735 if (rec->wrong_chunk_type) {
7736 fprintf(stderr,
7737 "bad extent [%llu, %llu), type mismatch with chunk\n",
7738 rec->start, rec->start + rec->max_size);
7739 err = 1;
7740 cur_err = 1;
7743 remove_cache_extent(extent_cache, cache);
7744 free_all_extent_backrefs(rec);
7745 if (!init_extent_tree && repair && (!cur_err || fixed))
7746 clear_extent_dirty(root->fs_info->excluded_extents,
7747 rec->start,
7748 rec->start + rec->max_size - 1,
7749 GFP_NOFS);
7750 free(rec);
7752 repair_abort:
7753 if (repair) {
7754 if (ret && ret != -EAGAIN) {
7755 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7756 exit(1);
7757 } else if (!ret) {
7758 struct btrfs_trans_handle *trans;
7760 root = root->fs_info->extent_root;
7761 trans = btrfs_start_transaction(root, 1);
7762 if (IS_ERR(trans)) {
7763 ret = PTR_ERR(trans);
7764 goto repair_abort;
7767 btrfs_fix_block_accounting(trans, root);
7768 ret = btrfs_commit_transaction(trans, root);
7769 if (ret)
7770 goto repair_abort;
7772 if (err)
7773 fprintf(stderr, "repaired damaged extent references\n");
7774 return ret;
7776 return err;
7779 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7781 u64 stripe_size;
7783 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7784 stripe_size = length;
7785 stripe_size /= num_stripes;
7786 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7787 stripe_size = length * 2;
7788 stripe_size /= num_stripes;
7789 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7790 stripe_size = length;
7791 stripe_size /= (num_stripes - 1);
7792 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7793 stripe_size = length;
7794 stripe_size /= (num_stripes - 2);
7795 } else {
7796 stripe_size = length;
7798 return stripe_size;
7802 * Check the chunk with its block group/dev list ref:
7803 * Return 0 if all refs seems valid.
7804 * Return 1 if part of refs seems valid, need later check for rebuild ref
7805 * like missing block group and needs to search extent tree to rebuild them.
7806 * Return -1 if essential refs are missing and unable to rebuild.
7808 static int check_chunk_refs(struct chunk_record *chunk_rec,
7809 struct block_group_tree *block_group_cache,
7810 struct device_extent_tree *dev_extent_cache,
7811 int silent)
7813 struct cache_extent *block_group_item;
7814 struct block_group_record *block_group_rec;
7815 struct cache_extent *dev_extent_item;
7816 struct device_extent_record *dev_extent_rec;
7817 u64 devid;
7818 u64 offset;
7819 u64 length;
7820 int metadump_v2 = 0;
7821 int i;
7822 int ret = 0;
7824 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7825 chunk_rec->offset,
7826 chunk_rec->length);
7827 if (block_group_item) {
7828 block_group_rec = container_of(block_group_item,
7829 struct block_group_record,
7830 cache);
7831 if (chunk_rec->length != block_group_rec->offset ||
7832 chunk_rec->offset != block_group_rec->objectid ||
7833 (!metadump_v2 &&
7834 chunk_rec->type_flags != block_group_rec->flags)) {
7835 if (!silent)
7836 fprintf(stderr,
7837 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7838 chunk_rec->objectid,
7839 chunk_rec->type,
7840 chunk_rec->offset,
7841 chunk_rec->length,
7842 chunk_rec->offset,
7843 chunk_rec->type_flags,
7844 block_group_rec->objectid,
7845 block_group_rec->type,
7846 block_group_rec->offset,
7847 block_group_rec->offset,
7848 block_group_rec->objectid,
7849 block_group_rec->flags);
7850 ret = -1;
7851 } else {
7852 list_del_init(&block_group_rec->list);
7853 chunk_rec->bg_rec = block_group_rec;
7855 } else {
7856 if (!silent)
7857 fprintf(stderr,
7858 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7859 chunk_rec->objectid,
7860 chunk_rec->type,
7861 chunk_rec->offset,
7862 chunk_rec->length,
7863 chunk_rec->offset,
7864 chunk_rec->type_flags);
7865 ret = 1;
7868 if (metadump_v2)
7869 return ret;
7871 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7872 chunk_rec->num_stripes);
7873 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7874 devid = chunk_rec->stripes[i].devid;
7875 offset = chunk_rec->stripes[i].offset;
7876 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7877 devid, offset, length);
7878 if (dev_extent_item) {
7879 dev_extent_rec = container_of(dev_extent_item,
7880 struct device_extent_record,
7881 cache);
7882 if (dev_extent_rec->objectid != devid ||
7883 dev_extent_rec->offset != offset ||
7884 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7885 dev_extent_rec->length != length) {
7886 if (!silent)
7887 fprintf(stderr,
7888 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7889 chunk_rec->objectid,
7890 chunk_rec->type,
7891 chunk_rec->offset,
7892 chunk_rec->stripes[i].devid,
7893 chunk_rec->stripes[i].offset,
7894 dev_extent_rec->objectid,
7895 dev_extent_rec->offset,
7896 dev_extent_rec->length);
7897 ret = -1;
7898 } else {
7899 list_move(&dev_extent_rec->chunk_list,
7900 &chunk_rec->dextents);
7902 } else {
7903 if (!silent)
7904 fprintf(stderr,
7905 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7906 chunk_rec->objectid,
7907 chunk_rec->type,
7908 chunk_rec->offset,
7909 chunk_rec->stripes[i].devid,
7910 chunk_rec->stripes[i].offset);
7911 ret = -1;
7914 return ret;
7917 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7918 int check_chunks(struct cache_tree *chunk_cache,
7919 struct block_group_tree *block_group_cache,
7920 struct device_extent_tree *dev_extent_cache,
7921 struct list_head *good, struct list_head *bad,
7922 struct list_head *rebuild, int silent)
7924 struct cache_extent *chunk_item;
7925 struct chunk_record *chunk_rec;
7926 struct block_group_record *bg_rec;
7927 struct device_extent_record *dext_rec;
7928 int err;
7929 int ret = 0;
7931 chunk_item = first_cache_extent(chunk_cache);
7932 while (chunk_item) {
7933 chunk_rec = container_of(chunk_item, struct chunk_record,
7934 cache);
7935 err = check_chunk_refs(chunk_rec, block_group_cache,
7936 dev_extent_cache, silent);
7937 if (err < 0)
7938 ret = err;
7939 if (err == 0 && good)
7940 list_add_tail(&chunk_rec->list, good);
7941 if (err > 0 && rebuild)
7942 list_add_tail(&chunk_rec->list, rebuild);
7943 if (err < 0 && bad)
7944 list_add_tail(&chunk_rec->list, bad);
7945 chunk_item = next_cache_extent(chunk_item);
7948 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7949 if (!silent)
7950 fprintf(stderr,
7951 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7952 bg_rec->objectid,
7953 bg_rec->offset,
7954 bg_rec->flags);
7955 if (!ret)
7956 ret = 1;
7959 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7960 chunk_list) {
7961 if (!silent)
7962 fprintf(stderr,
7963 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7964 dext_rec->objectid,
7965 dext_rec->offset,
7966 dext_rec->length);
7967 if (!ret)
7968 ret = 1;
7970 return ret;
7974 static int check_device_used(struct device_record *dev_rec,
7975 struct device_extent_tree *dext_cache)
7977 struct cache_extent *cache;
7978 struct device_extent_record *dev_extent_rec;
7979 u64 total_byte = 0;
7981 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7982 while (cache) {
7983 dev_extent_rec = container_of(cache,
7984 struct device_extent_record,
7985 cache);
7986 if (dev_extent_rec->objectid != dev_rec->devid)
7987 break;
7989 list_del_init(&dev_extent_rec->device_list);
7990 total_byte += dev_extent_rec->length;
7991 cache = next_cache_extent(cache);
7994 if (total_byte != dev_rec->byte_used) {
7995 fprintf(stderr,
7996 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7997 total_byte, dev_rec->byte_used, dev_rec->objectid,
7998 dev_rec->type, dev_rec->offset);
7999 return -1;
8000 } else {
8001 return 0;
8005 /* check btrfs_dev_item -> btrfs_dev_extent */
8006 static int check_devices(struct rb_root *dev_cache,
8007 struct device_extent_tree *dev_extent_cache)
8009 struct rb_node *dev_node;
8010 struct device_record *dev_rec;
8011 struct device_extent_record *dext_rec;
8012 int err;
8013 int ret = 0;
8015 dev_node = rb_first(dev_cache);
8016 while (dev_node) {
8017 dev_rec = container_of(dev_node, struct device_record, node);
8018 err = check_device_used(dev_rec, dev_extent_cache);
8019 if (err)
8020 ret = err;
8022 dev_node = rb_next(dev_node);
8024 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8025 device_list) {
8026 fprintf(stderr,
8027 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8028 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8029 if (!ret)
8030 ret = 1;
8032 return ret;
8035 static int add_root_item_to_list(struct list_head *head,
8036 u64 objectid, u64 bytenr, u64 last_snapshot,
8037 u8 level, u8 drop_level,
8038 int level_size, struct btrfs_key *drop_key)
8041 struct root_item_record *ri_rec;
8042 ri_rec = malloc(sizeof(*ri_rec));
8043 if (!ri_rec)
8044 return -ENOMEM;
8045 ri_rec->bytenr = bytenr;
8046 ri_rec->objectid = objectid;
8047 ri_rec->level = level;
8048 ri_rec->level_size = level_size;
8049 ri_rec->drop_level = drop_level;
8050 ri_rec->last_snapshot = last_snapshot;
8051 if (drop_key)
8052 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8053 list_add_tail(&ri_rec->list, head);
8055 return 0;
8058 static void free_root_item_list(struct list_head *list)
8060 struct root_item_record *ri_rec;
8062 while (!list_empty(list)) {
8063 ri_rec = list_first_entry(list, struct root_item_record,
8064 list);
8065 list_del_init(&ri_rec->list);
8066 free(ri_rec);
8070 static int deal_root_from_list(struct list_head *list,
8071 struct btrfs_root *root,
8072 struct block_info *bits,
8073 int bits_nr,
8074 struct cache_tree *pending,
8075 struct cache_tree *seen,
8076 struct cache_tree *reada,
8077 struct cache_tree *nodes,
8078 struct cache_tree *extent_cache,
8079 struct cache_tree *chunk_cache,
8080 struct rb_root *dev_cache,
8081 struct block_group_tree *block_group_cache,
8082 struct device_extent_tree *dev_extent_cache)
8084 int ret = 0;
8085 u64 last;
8087 while (!list_empty(list)) {
8088 struct root_item_record *rec;
8089 struct extent_buffer *buf;
8090 rec = list_entry(list->next,
8091 struct root_item_record, list);
8092 last = 0;
8093 buf = read_tree_block(root->fs_info->tree_root,
8094 rec->bytenr, rec->level_size, 0);
8095 if (!extent_buffer_uptodate(buf)) {
8096 free_extent_buffer(buf);
8097 ret = -EIO;
8098 break;
8100 add_root_to_pending(buf, extent_cache, pending,
8101 seen, nodes, rec->objectid);
8103 * To rebuild extent tree, we need deal with snapshot
8104 * one by one, otherwise we deal with node firstly which
8105 * can maximize readahead.
8107 while (1) {
8108 ret = run_next_block(root, bits, bits_nr, &last,
8109 pending, seen, reada, nodes,
8110 extent_cache, chunk_cache,
8111 dev_cache, block_group_cache,
8112 dev_extent_cache, rec);
8113 if (ret != 0)
8114 break;
8116 free_extent_buffer(buf);
8117 list_del(&rec->list);
8118 free(rec);
8119 if (ret < 0)
8120 break;
8122 while (ret >= 0) {
8123 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8124 reada, nodes, extent_cache, chunk_cache,
8125 dev_cache, block_group_cache,
8126 dev_extent_cache, NULL);
8127 if (ret != 0) {
8128 if (ret > 0)
8129 ret = 0;
8130 break;
8133 return ret;
8136 static int check_chunks_and_extents(struct btrfs_root *root)
8138 struct rb_root dev_cache;
8139 struct cache_tree chunk_cache;
8140 struct block_group_tree block_group_cache;
8141 struct device_extent_tree dev_extent_cache;
8142 struct cache_tree extent_cache;
8143 struct cache_tree seen;
8144 struct cache_tree pending;
8145 struct cache_tree reada;
8146 struct cache_tree nodes;
8147 struct extent_io_tree excluded_extents;
8148 struct cache_tree corrupt_blocks;
8149 struct btrfs_path path;
8150 struct btrfs_key key;
8151 struct btrfs_key found_key;
8152 int ret, err = 0;
8153 struct block_info *bits;
8154 int bits_nr;
8155 struct extent_buffer *leaf;
8156 int slot;
8157 struct btrfs_root_item ri;
8158 struct list_head dropping_trees;
8159 struct list_head normal_trees;
8160 struct btrfs_root *root1;
8161 u64 objectid;
8162 u32 level_size;
8163 u8 level;
8165 dev_cache = RB_ROOT;
8166 cache_tree_init(&chunk_cache);
8167 block_group_tree_init(&block_group_cache);
8168 device_extent_tree_init(&dev_extent_cache);
8170 cache_tree_init(&extent_cache);
8171 cache_tree_init(&seen);
8172 cache_tree_init(&pending);
8173 cache_tree_init(&nodes);
8174 cache_tree_init(&reada);
8175 cache_tree_init(&corrupt_blocks);
8176 extent_io_tree_init(&excluded_extents);
8177 INIT_LIST_HEAD(&dropping_trees);
8178 INIT_LIST_HEAD(&normal_trees);
8180 if (repair) {
8181 root->fs_info->excluded_extents = &excluded_extents;
8182 root->fs_info->fsck_extent_cache = &extent_cache;
8183 root->fs_info->free_extent_hook = free_extent_hook;
8184 root->fs_info->corrupt_blocks = &corrupt_blocks;
8187 bits_nr = 1024;
8188 bits = malloc(bits_nr * sizeof(struct block_info));
8189 if (!bits) {
8190 perror("malloc");
8191 exit(1);
8194 if (ctx.progress_enabled) {
8195 ctx.tp = TASK_EXTENTS;
8196 task_start(ctx.info);
8199 again:
8200 root1 = root->fs_info->tree_root;
8201 level = btrfs_header_level(root1->node);
8202 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8203 root1->node->start, 0, level, 0,
8204 btrfs_level_size(root1, level), NULL);
8205 if (ret < 0)
8206 goto out;
8207 root1 = root->fs_info->chunk_root;
8208 level = btrfs_header_level(root1->node);
8209 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8210 root1->node->start, 0, level, 0,
8211 btrfs_level_size(root1, level), NULL);
8212 if (ret < 0)
8213 goto out;
8214 btrfs_init_path(&path);
8215 key.offset = 0;
8216 key.objectid = 0;
8217 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8218 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8219 &key, &path, 0, 0);
8220 if (ret < 0)
8221 goto out;
8222 while(1) {
8223 leaf = path.nodes[0];
8224 slot = path.slots[0];
8225 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8226 ret = btrfs_next_leaf(root, &path);
8227 if (ret != 0)
8228 break;
8229 leaf = path.nodes[0];
8230 slot = path.slots[0];
8232 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8233 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8234 unsigned long offset;
8235 u64 last_snapshot;
8237 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8238 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8239 last_snapshot = btrfs_root_last_snapshot(&ri);
8240 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8241 level = btrfs_root_level(&ri);
8242 level_size = btrfs_level_size(root, level);
8243 ret = add_root_item_to_list(&normal_trees,
8244 found_key.objectid,
8245 btrfs_root_bytenr(&ri),
8246 last_snapshot, level,
8247 0, level_size, NULL);
8248 if (ret < 0)
8249 goto out;
8250 } else {
8251 level = btrfs_root_level(&ri);
8252 level_size = btrfs_level_size(root, level);
8253 objectid = found_key.objectid;
8254 btrfs_disk_key_to_cpu(&found_key,
8255 &ri.drop_progress);
8256 ret = add_root_item_to_list(&dropping_trees,
8257 objectid,
8258 btrfs_root_bytenr(&ri),
8259 last_snapshot, level,
8260 ri.drop_level,
8261 level_size, &found_key);
8262 if (ret < 0)
8263 goto out;
8266 path.slots[0]++;
8268 btrfs_release_path(&path);
8271 * check_block can return -EAGAIN if it fixes something, please keep
8272 * this in mind when dealing with return values from these functions, if
8273 * we get -EAGAIN we want to fall through and restart the loop.
8275 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8276 &seen, &reada, &nodes, &extent_cache,
8277 &chunk_cache, &dev_cache, &block_group_cache,
8278 &dev_extent_cache);
8279 if (ret < 0) {
8280 if (ret == -EAGAIN)
8281 goto loop;
8282 goto out;
8284 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8285 &pending, &seen, &reada, &nodes,
8286 &extent_cache, &chunk_cache, &dev_cache,
8287 &block_group_cache, &dev_extent_cache);
8288 if (ret < 0) {
8289 if (ret == -EAGAIN)
8290 goto loop;
8291 goto out;
8294 ret = check_chunks(&chunk_cache, &block_group_cache,
8295 &dev_extent_cache, NULL, NULL, NULL, 0);
8296 if (ret) {
8297 if (ret == -EAGAIN)
8298 goto loop;
8299 err = ret;
8302 ret = check_extent_refs(root, &extent_cache);
8303 if (ret < 0) {
8304 if (ret == -EAGAIN)
8305 goto loop;
8306 goto out;
8309 ret = check_devices(&dev_cache, &dev_extent_cache);
8310 if (ret && err)
8311 ret = err;
8313 out:
8314 task_stop(ctx.info);
8315 if (repair) {
8316 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8317 extent_io_tree_cleanup(&excluded_extents);
8318 root->fs_info->fsck_extent_cache = NULL;
8319 root->fs_info->free_extent_hook = NULL;
8320 root->fs_info->corrupt_blocks = NULL;
8321 root->fs_info->excluded_extents = NULL;
8323 free(bits);
8324 free_chunk_cache_tree(&chunk_cache);
8325 free_device_cache_tree(&dev_cache);
8326 free_block_group_tree(&block_group_cache);
8327 free_device_extent_tree(&dev_extent_cache);
8328 free_extent_cache_tree(&seen);
8329 free_extent_cache_tree(&pending);
8330 free_extent_cache_tree(&reada);
8331 free_extent_cache_tree(&nodes);
8332 return ret;
8333 loop:
8334 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8335 free_extent_cache_tree(&seen);
8336 free_extent_cache_tree(&pending);
8337 free_extent_cache_tree(&reada);
8338 free_extent_cache_tree(&nodes);
8339 free_chunk_cache_tree(&chunk_cache);
8340 free_block_group_tree(&block_group_cache);
8341 free_device_cache_tree(&dev_cache);
8342 free_device_extent_tree(&dev_extent_cache);
8343 free_extent_record_cache(root->fs_info, &extent_cache);
8344 free_root_item_list(&normal_trees);
8345 free_root_item_list(&dropping_trees);
8346 extent_io_tree_cleanup(&excluded_extents);
8347 goto again;
8350 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8351 struct btrfs_root *root, int overwrite)
8353 struct extent_buffer *c;
8354 struct extent_buffer *old = root->node;
8355 int level;
8356 int ret;
8357 struct btrfs_disk_key disk_key = {0,0,0};
8359 level = 0;
8361 if (overwrite) {
8362 c = old;
8363 extent_buffer_get(c);
8364 goto init;
8366 c = btrfs_alloc_free_block(trans, root,
8367 btrfs_level_size(root, 0),
8368 root->root_key.objectid,
8369 &disk_key, level, 0, 0);
8370 if (IS_ERR(c)) {
8371 c = old;
8372 extent_buffer_get(c);
8373 overwrite = 1;
8375 init:
8376 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8377 btrfs_set_header_level(c, level);
8378 btrfs_set_header_bytenr(c, c->start);
8379 btrfs_set_header_generation(c, trans->transid);
8380 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8381 btrfs_set_header_owner(c, root->root_key.objectid);
8383 write_extent_buffer(c, root->fs_info->fsid,
8384 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8386 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8387 btrfs_header_chunk_tree_uuid(c),
8388 BTRFS_UUID_SIZE);
8390 btrfs_mark_buffer_dirty(c);
8392 * this case can happen in the following case:
8394 * 1.overwrite previous root.
8396 * 2.reinit reloc data root, this is because we skip pin
8397 * down reloc data tree before which means we can allocate
8398 * same block bytenr here.
8400 if (old->start == c->start) {
8401 btrfs_set_root_generation(&root->root_item,
8402 trans->transid);
8403 root->root_item.level = btrfs_header_level(root->node);
8404 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8405 &root->root_key, &root->root_item);
8406 if (ret) {
8407 free_extent_buffer(c);
8408 return ret;
8411 free_extent_buffer(old);
8412 root->node = c;
8413 add_root_to_dirty_list(root);
8414 return 0;
8417 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8418 struct extent_buffer *eb, int tree_root)
8420 struct extent_buffer *tmp;
8421 struct btrfs_root_item *ri;
8422 struct btrfs_key key;
8423 u64 bytenr;
8424 u32 leafsize;
8425 int level = btrfs_header_level(eb);
8426 int nritems;
8427 int ret;
8428 int i;
8431 * If we have pinned this block before, don't pin it again.
8432 * This can not only avoid forever loop with broken filesystem
8433 * but also give us some speedups.
8435 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8436 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8437 return 0;
8439 btrfs_pin_extent(fs_info, eb->start, eb->len);
8441 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8442 nritems = btrfs_header_nritems(eb);
8443 for (i = 0; i < nritems; i++) {
8444 if (level == 0) {
8445 btrfs_item_key_to_cpu(eb, &key, i);
8446 if (key.type != BTRFS_ROOT_ITEM_KEY)
8447 continue;
8448 /* Skip the extent root and reloc roots */
8449 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8450 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8451 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8452 continue;
8453 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8454 bytenr = btrfs_disk_root_bytenr(eb, ri);
8457 * If at any point we start needing the real root we
8458 * will have to build a stump root for the root we are
8459 * in, but for now this doesn't actually use the root so
8460 * just pass in extent_root.
8462 tmp = read_tree_block(fs_info->extent_root, bytenr,
8463 leafsize, 0);
8464 if (!extent_buffer_uptodate(tmp)) {
8465 fprintf(stderr, "Error reading root block\n");
8466 return -EIO;
8468 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8469 free_extent_buffer(tmp);
8470 if (ret)
8471 return ret;
8472 } else {
8473 bytenr = btrfs_node_blockptr(eb, i);
8475 /* If we aren't the tree root don't read the block */
8476 if (level == 1 && !tree_root) {
8477 btrfs_pin_extent(fs_info, bytenr, leafsize);
8478 continue;
8481 tmp = read_tree_block(fs_info->extent_root, bytenr,
8482 leafsize, 0);
8483 if (!extent_buffer_uptodate(tmp)) {
8484 fprintf(stderr, "Error reading tree block\n");
8485 return -EIO;
8487 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8488 free_extent_buffer(tmp);
8489 if (ret)
8490 return ret;
8494 return 0;
8497 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8499 int ret;
8501 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8502 if (ret)
8503 return ret;
8505 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8508 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8510 struct btrfs_block_group_cache *cache;
8511 struct btrfs_path *path;
8512 struct extent_buffer *leaf;
8513 struct btrfs_chunk *chunk;
8514 struct btrfs_key key;
8515 int ret;
8516 u64 start;
8518 path = btrfs_alloc_path();
8519 if (!path)
8520 return -ENOMEM;
8522 key.objectid = 0;
8523 key.type = BTRFS_CHUNK_ITEM_KEY;
8524 key.offset = 0;
8526 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8527 if (ret < 0) {
8528 btrfs_free_path(path);
8529 return ret;
8533 * We do this in case the block groups were screwed up and had alloc
8534 * bits that aren't actually set on the chunks. This happens with
8535 * restored images every time and could happen in real life I guess.
8537 fs_info->avail_data_alloc_bits = 0;
8538 fs_info->avail_metadata_alloc_bits = 0;
8539 fs_info->avail_system_alloc_bits = 0;
8541 /* First we need to create the in-memory block groups */
8542 while (1) {
8543 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8544 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8545 if (ret < 0) {
8546 btrfs_free_path(path);
8547 return ret;
8549 if (ret) {
8550 ret = 0;
8551 break;
8554 leaf = path->nodes[0];
8555 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8556 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8557 path->slots[0]++;
8558 continue;
8561 chunk = btrfs_item_ptr(leaf, path->slots[0],
8562 struct btrfs_chunk);
8563 btrfs_add_block_group(fs_info, 0,
8564 btrfs_chunk_type(leaf, chunk),
8565 key.objectid, key.offset,
8566 btrfs_chunk_length(leaf, chunk));
8567 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8568 key.offset + btrfs_chunk_length(leaf, chunk),
8569 GFP_NOFS);
8570 path->slots[0]++;
8572 start = 0;
8573 while (1) {
8574 cache = btrfs_lookup_first_block_group(fs_info, start);
8575 if (!cache)
8576 break;
8577 cache->cached = 1;
8578 start = cache->key.objectid + cache->key.offset;
8581 btrfs_free_path(path);
8582 return 0;
8585 static int reset_balance(struct btrfs_trans_handle *trans,
8586 struct btrfs_fs_info *fs_info)
8588 struct btrfs_root *root = fs_info->tree_root;
8589 struct btrfs_path *path;
8590 struct extent_buffer *leaf;
8591 struct btrfs_key key;
8592 int del_slot, del_nr = 0;
8593 int ret;
8594 int found = 0;
8596 path = btrfs_alloc_path();
8597 if (!path)
8598 return -ENOMEM;
8600 key.objectid = BTRFS_BALANCE_OBJECTID;
8601 key.type = BTRFS_BALANCE_ITEM_KEY;
8602 key.offset = 0;
8604 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8605 if (ret) {
8606 if (ret > 0)
8607 ret = 0;
8608 if (!ret)
8609 goto reinit_data_reloc;
8610 else
8611 goto out;
8614 ret = btrfs_del_item(trans, root, path);
8615 if (ret)
8616 goto out;
8617 btrfs_release_path(path);
8619 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8620 key.type = BTRFS_ROOT_ITEM_KEY;
8621 key.offset = 0;
8623 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8624 if (ret < 0)
8625 goto out;
8626 while (1) {
8627 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8628 if (!found)
8629 break;
8631 if (del_nr) {
8632 ret = btrfs_del_items(trans, root, path,
8633 del_slot, del_nr);
8634 del_nr = 0;
8635 if (ret)
8636 goto out;
8638 key.offset++;
8639 btrfs_release_path(path);
8641 found = 0;
8642 ret = btrfs_search_slot(trans, root, &key, path,
8643 -1, 1);
8644 if (ret < 0)
8645 goto out;
8646 continue;
8648 found = 1;
8649 leaf = path->nodes[0];
8650 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8651 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8652 break;
8653 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8654 path->slots[0]++;
8655 continue;
8657 if (!del_nr) {
8658 del_slot = path->slots[0];
8659 del_nr = 1;
8660 } else {
8661 del_nr++;
8663 path->slots[0]++;
8666 if (del_nr) {
8667 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8668 if (ret)
8669 goto out;
8671 btrfs_release_path(path);
8673 reinit_data_reloc:
8674 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8675 key.type = BTRFS_ROOT_ITEM_KEY;
8676 key.offset = (u64)-1;
8677 root = btrfs_read_fs_root(fs_info, &key);
8678 if (IS_ERR(root)) {
8679 fprintf(stderr, "Error reading data reloc tree\n");
8680 ret = PTR_ERR(root);
8681 goto out;
8683 record_root_in_trans(trans, root);
8684 ret = btrfs_fsck_reinit_root(trans, root, 0);
8685 if (ret)
8686 goto out;
8687 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8688 out:
8689 btrfs_free_path(path);
8690 return ret;
8693 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8694 struct btrfs_fs_info *fs_info)
8696 u64 start = 0;
8697 int ret;
8700 * The only reason we don't do this is because right now we're just
8701 * walking the trees we find and pinning down their bytes, we don't look
8702 * at any of the leaves. In order to do mixed groups we'd have to check
8703 * the leaves of any fs roots and pin down the bytes for any file
8704 * extents we find. Not hard but why do it if we don't have to?
8706 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8707 fprintf(stderr, "We don't support re-initing the extent tree "
8708 "for mixed block groups yet, please notify a btrfs "
8709 "developer you want to do this so they can add this "
8710 "functionality.\n");
8711 return -EINVAL;
8715 * first we need to walk all of the trees except the extent tree and pin
8716 * down the bytes that are in use so we don't overwrite any existing
8717 * metadata.
8719 ret = pin_metadata_blocks(fs_info);
8720 if (ret) {
8721 fprintf(stderr, "error pinning down used bytes\n");
8722 return ret;
8726 * Need to drop all the block groups since we're going to recreate all
8727 * of them again.
8729 btrfs_free_block_groups(fs_info);
8730 ret = reset_block_groups(fs_info);
8731 if (ret) {
8732 fprintf(stderr, "error resetting the block groups\n");
8733 return ret;
8736 /* Ok we can allocate now, reinit the extent root */
8737 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8738 if (ret) {
8739 fprintf(stderr, "extent root initialization failed\n");
8741 * When the transaction code is updated we should end the
8742 * transaction, but for now progs only knows about commit so
8743 * just return an error.
8745 return ret;
8749 * Now we have all the in-memory block groups setup so we can make
8750 * allocations properly, and the metadata we care about is safe since we
8751 * pinned all of it above.
8753 while (1) {
8754 struct btrfs_block_group_cache *cache;
8756 cache = btrfs_lookup_first_block_group(fs_info, start);
8757 if (!cache)
8758 break;
8759 start = cache->key.objectid + cache->key.offset;
8760 ret = btrfs_insert_item(trans, fs_info->extent_root,
8761 &cache->key, &cache->item,
8762 sizeof(cache->item));
8763 if (ret) {
8764 fprintf(stderr, "Error adding block group\n");
8765 return ret;
8767 btrfs_extent_post_op(trans, fs_info->extent_root);
8770 ret = reset_balance(trans, fs_info);
8771 if (ret)
8772 fprintf(stderr, "error reseting the pending balance\n");
8774 return ret;
8777 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8779 struct btrfs_path *path;
8780 struct btrfs_trans_handle *trans;
8781 struct btrfs_key key;
8782 int ret;
8784 printf("Recowing metadata block %llu\n", eb->start);
8785 key.objectid = btrfs_header_owner(eb);
8786 key.type = BTRFS_ROOT_ITEM_KEY;
8787 key.offset = (u64)-1;
8789 root = btrfs_read_fs_root(root->fs_info, &key);
8790 if (IS_ERR(root)) {
8791 fprintf(stderr, "Couldn't find owner root %llu\n",
8792 key.objectid);
8793 return PTR_ERR(root);
8796 path = btrfs_alloc_path();
8797 if (!path)
8798 return -ENOMEM;
8800 trans = btrfs_start_transaction(root, 1);
8801 if (IS_ERR(trans)) {
8802 btrfs_free_path(path);
8803 return PTR_ERR(trans);
8806 path->lowest_level = btrfs_header_level(eb);
8807 if (path->lowest_level)
8808 btrfs_node_key_to_cpu(eb, &key, 0);
8809 else
8810 btrfs_item_key_to_cpu(eb, &key, 0);
8812 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8813 btrfs_commit_transaction(trans, root);
8814 btrfs_free_path(path);
8815 return ret;
8818 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8820 struct btrfs_path *path;
8821 struct btrfs_trans_handle *trans;
8822 struct btrfs_key key;
8823 int ret;
8825 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8826 bad->key.type, bad->key.offset);
8827 key.objectid = bad->root_id;
8828 key.type = BTRFS_ROOT_ITEM_KEY;
8829 key.offset = (u64)-1;
8831 root = btrfs_read_fs_root(root->fs_info, &key);
8832 if (IS_ERR(root)) {
8833 fprintf(stderr, "Couldn't find owner root %llu\n",
8834 key.objectid);
8835 return PTR_ERR(root);
8838 path = btrfs_alloc_path();
8839 if (!path)
8840 return -ENOMEM;
8842 trans = btrfs_start_transaction(root, 1);
8843 if (IS_ERR(trans)) {
8844 btrfs_free_path(path);
8845 return PTR_ERR(trans);
8848 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8849 if (ret) {
8850 if (ret > 0)
8851 ret = 0;
8852 goto out;
8854 ret = btrfs_del_item(trans, root, path);
8855 out:
8856 btrfs_commit_transaction(trans, root);
8857 btrfs_free_path(path);
8858 return ret;
8861 static int zero_log_tree(struct btrfs_root *root)
8863 struct btrfs_trans_handle *trans;
8864 int ret;
8866 trans = btrfs_start_transaction(root, 1);
8867 if (IS_ERR(trans)) {
8868 ret = PTR_ERR(trans);
8869 return ret;
8871 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8872 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8873 ret = btrfs_commit_transaction(trans, root);
8874 return ret;
8877 static int populate_csum(struct btrfs_trans_handle *trans,
8878 struct btrfs_root *csum_root, char *buf, u64 start,
8879 u64 len)
8881 u64 offset = 0;
8882 u64 sectorsize;
8883 int ret = 0;
8885 while (offset < len) {
8886 sectorsize = csum_root->sectorsize;
8887 ret = read_extent_data(csum_root, buf, start + offset,
8888 &sectorsize, 0);
8889 if (ret)
8890 break;
8891 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8892 start + offset, buf, sectorsize);
8893 if (ret)
8894 break;
8895 offset += sectorsize;
8897 return ret;
8900 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8901 struct btrfs_root *csum_root,
8902 struct btrfs_root *cur_root)
8904 struct btrfs_path *path;
8905 struct btrfs_key key;
8906 struct extent_buffer *node;
8907 struct btrfs_file_extent_item *fi;
8908 char *buf = NULL;
8909 u64 start = 0;
8910 u64 len = 0;
8911 int slot = 0;
8912 int ret = 0;
8914 path = btrfs_alloc_path();
8915 if (!path)
8916 return -ENOMEM;
8917 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8918 if (!buf) {
8919 ret = -ENOMEM;
8920 goto out;
8923 key.objectid = 0;
8924 key.offset = 0;
8925 key.type = 0;
8927 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8928 if (ret < 0)
8929 goto out;
8930 /* Iterate all regular file extents and fill its csum */
8931 while (1) {
8932 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8934 if (key.type != BTRFS_EXTENT_DATA_KEY)
8935 goto next;
8936 node = path->nodes[0];
8937 slot = path->slots[0];
8938 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8939 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8940 goto next;
8941 start = btrfs_file_extent_disk_bytenr(node, fi);
8942 len = btrfs_file_extent_disk_num_bytes(node, fi);
8944 ret = populate_csum(trans, csum_root, buf, start, len);
8945 if (ret == -EEXIST)
8946 ret = 0;
8947 if (ret < 0)
8948 goto out;
8949 next:
8951 * TODO: if next leaf is corrupted, jump to nearest next valid
8952 * leaf.
8954 ret = btrfs_next_item(cur_root, path);
8955 if (ret < 0)
8956 goto out;
8957 if (ret > 0) {
8958 ret = 0;
8959 goto out;
8963 out:
8964 btrfs_free_path(path);
8965 free(buf);
8966 return ret;
8969 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8970 struct btrfs_root *csum_root)
8972 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8973 struct btrfs_path *path;
8974 struct btrfs_root *tree_root = fs_info->tree_root;
8975 struct btrfs_root *cur_root;
8976 struct extent_buffer *node;
8977 struct btrfs_key key;
8978 int slot = 0;
8979 int ret = 0;
8981 path = btrfs_alloc_path();
8982 if (!path)
8983 return -ENOMEM;
8985 key.objectid = BTRFS_FS_TREE_OBJECTID;
8986 key.offset = 0;
8987 key.type = BTRFS_ROOT_ITEM_KEY;
8989 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8990 if (ret < 0)
8991 goto out;
8992 if (ret > 0) {
8993 ret = -ENOENT;
8994 goto out;
8997 while (1) {
8998 node = path->nodes[0];
8999 slot = path->slots[0];
9000 btrfs_item_key_to_cpu(node, &key, slot);
9001 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9002 goto out;
9003 if (key.type != BTRFS_ROOT_ITEM_KEY)
9004 goto next;
9005 if (!is_fstree(key.objectid))
9006 goto next;
9007 key.offset = (u64)-1;
9009 cur_root = btrfs_read_fs_root(fs_info, &key);
9010 if (IS_ERR(cur_root) || !cur_root) {
9011 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9012 key.objectid);
9013 goto out;
9015 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9016 cur_root);
9017 if (ret < 0)
9018 goto out;
9019 next:
9020 ret = btrfs_next_item(tree_root, path);
9021 if (ret > 0) {
9022 ret = 0;
9023 goto out;
9025 if (ret < 0)
9026 goto out;
9029 out:
9030 btrfs_free_path(path);
9031 return ret;
9034 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9035 struct btrfs_root *csum_root)
9037 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9038 struct btrfs_path *path;
9039 struct btrfs_extent_item *ei;
9040 struct extent_buffer *leaf;
9041 char *buf;
9042 struct btrfs_key key;
9043 int ret;
9045 path = btrfs_alloc_path();
9046 if (!path)
9047 return -ENOMEM;
9049 key.objectid = 0;
9050 key.type = BTRFS_EXTENT_ITEM_KEY;
9051 key.offset = 0;
9053 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9054 if (ret < 0) {
9055 btrfs_free_path(path);
9056 return ret;
9059 buf = malloc(csum_root->sectorsize);
9060 if (!buf) {
9061 btrfs_free_path(path);
9062 return -ENOMEM;
9065 while (1) {
9066 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9067 ret = btrfs_next_leaf(extent_root, path);
9068 if (ret < 0)
9069 break;
9070 if (ret) {
9071 ret = 0;
9072 break;
9075 leaf = path->nodes[0];
9077 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9078 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9079 path->slots[0]++;
9080 continue;
9083 ei = btrfs_item_ptr(leaf, path->slots[0],
9084 struct btrfs_extent_item);
9085 if (!(btrfs_extent_flags(leaf, ei) &
9086 BTRFS_EXTENT_FLAG_DATA)) {
9087 path->slots[0]++;
9088 continue;
9091 ret = populate_csum(trans, csum_root, buf, key.objectid,
9092 key.offset);
9093 if (ret)
9094 break;
9095 path->slots[0]++;
9098 btrfs_free_path(path);
9099 free(buf);
9100 return ret;
9104 * Recalculate the csum and put it into the csum tree.
9106 * Extent tree init will wipe out all the extent info, so in that case, we
9107 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9108 * will use fs/subvol trees to init the csum tree.
9110 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9111 struct btrfs_root *csum_root,
9112 int search_fs_tree)
9114 if (search_fs_tree)
9115 return fill_csum_tree_from_fs(trans, csum_root);
9116 else
9117 return fill_csum_tree_from_extent(trans, csum_root);
9120 struct root_item_info {
9121 /* level of the root */
9122 u8 level;
9123 /* number of nodes at this level, must be 1 for a root */
9124 int node_count;
9125 u64 bytenr;
9126 u64 gen;
9127 struct cache_extent cache_extent;
9130 static struct cache_tree *roots_info_cache = NULL;
9132 static void free_roots_info_cache(void)
9134 if (!roots_info_cache)
9135 return;
9137 while (!cache_tree_empty(roots_info_cache)) {
9138 struct cache_extent *entry;
9139 struct root_item_info *rii;
9141 entry = first_cache_extent(roots_info_cache);
9142 if (!entry)
9143 break;
9144 remove_cache_extent(roots_info_cache, entry);
9145 rii = container_of(entry, struct root_item_info, cache_extent);
9146 free(rii);
9149 free(roots_info_cache);
9150 roots_info_cache = NULL;
9153 static int build_roots_info_cache(struct btrfs_fs_info *info)
9155 int ret = 0;
9156 struct btrfs_key key;
9157 struct extent_buffer *leaf;
9158 struct btrfs_path *path;
9160 if (!roots_info_cache) {
9161 roots_info_cache = malloc(sizeof(*roots_info_cache));
9162 if (!roots_info_cache)
9163 return -ENOMEM;
9164 cache_tree_init(roots_info_cache);
9167 path = btrfs_alloc_path();
9168 if (!path)
9169 return -ENOMEM;
9171 key.objectid = 0;
9172 key.type = BTRFS_EXTENT_ITEM_KEY;
9173 key.offset = 0;
9175 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9176 if (ret < 0)
9177 goto out;
9178 leaf = path->nodes[0];
9180 while (1) {
9181 struct btrfs_key found_key;
9182 struct btrfs_extent_item *ei;
9183 struct btrfs_extent_inline_ref *iref;
9184 int slot = path->slots[0];
9185 int type;
9186 u64 flags;
9187 u64 root_id;
9188 u8 level;
9189 struct cache_extent *entry;
9190 struct root_item_info *rii;
9192 if (slot >= btrfs_header_nritems(leaf)) {
9193 ret = btrfs_next_leaf(info->extent_root, path);
9194 if (ret < 0) {
9195 break;
9196 } else if (ret) {
9197 ret = 0;
9198 break;
9200 leaf = path->nodes[0];
9201 slot = path->slots[0];
9204 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9206 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9207 found_key.type != BTRFS_METADATA_ITEM_KEY)
9208 goto next;
9210 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9211 flags = btrfs_extent_flags(leaf, ei);
9213 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9214 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9215 goto next;
9217 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9218 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9219 level = found_key.offset;
9220 } else {
9221 struct btrfs_tree_block_info *binfo;
9223 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9224 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9225 level = btrfs_tree_block_level(leaf, binfo);
9229 * For a root extent, it must be of the following type and the
9230 * first (and only one) iref in the item.
9232 type = btrfs_extent_inline_ref_type(leaf, iref);
9233 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9234 goto next;
9236 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9237 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9238 if (!entry) {
9239 rii = malloc(sizeof(struct root_item_info));
9240 if (!rii) {
9241 ret = -ENOMEM;
9242 goto out;
9244 rii->cache_extent.start = root_id;
9245 rii->cache_extent.size = 1;
9246 rii->level = (u8)-1;
9247 entry = &rii->cache_extent;
9248 ret = insert_cache_extent(roots_info_cache, entry);
9249 ASSERT(ret == 0);
9250 } else {
9251 rii = container_of(entry, struct root_item_info,
9252 cache_extent);
9255 ASSERT(rii->cache_extent.start == root_id);
9256 ASSERT(rii->cache_extent.size == 1);
9258 if (level > rii->level || rii->level == (u8)-1) {
9259 rii->level = level;
9260 rii->bytenr = found_key.objectid;
9261 rii->gen = btrfs_extent_generation(leaf, ei);
9262 rii->node_count = 1;
9263 } else if (level == rii->level) {
9264 rii->node_count++;
9266 next:
9267 path->slots[0]++;
9270 out:
9271 btrfs_free_path(path);
9273 return ret;
9276 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9277 struct btrfs_path *path,
9278 const struct btrfs_key *root_key,
9279 const int read_only_mode)
9281 const u64 root_id = root_key->objectid;
9282 struct cache_extent *entry;
9283 struct root_item_info *rii;
9284 struct btrfs_root_item ri;
9285 unsigned long offset;
9287 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9288 if (!entry) {
9289 fprintf(stderr,
9290 "Error: could not find extent items for root %llu\n",
9291 root_key->objectid);
9292 return -ENOENT;
9295 rii = container_of(entry, struct root_item_info, cache_extent);
9296 ASSERT(rii->cache_extent.start == root_id);
9297 ASSERT(rii->cache_extent.size == 1);
9299 if (rii->node_count != 1) {
9300 fprintf(stderr,
9301 "Error: could not find btree root extent for root %llu\n",
9302 root_id);
9303 return -ENOENT;
9306 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9307 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9309 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9310 btrfs_root_level(&ri) != rii->level ||
9311 btrfs_root_generation(&ri) != rii->gen) {
9314 * If we're in repair mode but our caller told us to not update
9315 * the root item, i.e. just check if it needs to be updated, don't
9316 * print this message, since the caller will call us again shortly
9317 * for the same root item without read only mode (the caller will
9318 * open a transaction first).
9320 if (!(read_only_mode && repair))
9321 fprintf(stderr,
9322 "%sroot item for root %llu,"
9323 " current bytenr %llu, current gen %llu, current level %u,"
9324 " new bytenr %llu, new gen %llu, new level %u\n",
9325 (read_only_mode ? "" : "fixing "),
9326 root_id,
9327 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9328 btrfs_root_level(&ri),
9329 rii->bytenr, rii->gen, rii->level);
9331 if (btrfs_root_generation(&ri) > rii->gen) {
9332 fprintf(stderr,
9333 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9334 root_id, btrfs_root_generation(&ri), rii->gen);
9335 return -EINVAL;
9338 if (!read_only_mode) {
9339 btrfs_set_root_bytenr(&ri, rii->bytenr);
9340 btrfs_set_root_level(&ri, rii->level);
9341 btrfs_set_root_generation(&ri, rii->gen);
9342 write_extent_buffer(path->nodes[0], &ri,
9343 offset, sizeof(ri));
9346 return 1;
9349 return 0;
9353 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9354 * caused read-only snapshots to be corrupted if they were created at a moment
9355 * when the source subvolume/snapshot had orphan items. The issue was that the
9356 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9357 * node instead of the post orphan cleanup root node.
9358 * So this function, and its callees, just detects and fixes those cases. Even
9359 * though the regression was for read-only snapshots, this function applies to
9360 * any snapshot/subvolume root.
9361 * This must be run before any other repair code - not doing it so, makes other
9362 * repair code delete or modify backrefs in the extent tree for example, which
9363 * will result in an inconsistent fs after repairing the root items.
9365 static int repair_root_items(struct btrfs_fs_info *info)
9367 struct btrfs_path *path = NULL;
9368 struct btrfs_key key;
9369 struct extent_buffer *leaf;
9370 struct btrfs_trans_handle *trans = NULL;
9371 int ret = 0;
9372 int bad_roots = 0;
9373 int need_trans = 0;
9375 ret = build_roots_info_cache(info);
9376 if (ret)
9377 goto out;
9379 path = btrfs_alloc_path();
9380 if (!path) {
9381 ret = -ENOMEM;
9382 goto out;
9385 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9386 key.type = BTRFS_ROOT_ITEM_KEY;
9387 key.offset = 0;
9389 again:
9391 * Avoid opening and committing transactions if a leaf doesn't have
9392 * any root items that need to be fixed, so that we avoid rotating
9393 * backup roots unnecessarily.
9395 if (need_trans) {
9396 trans = btrfs_start_transaction(info->tree_root, 1);
9397 if (IS_ERR(trans)) {
9398 ret = PTR_ERR(trans);
9399 goto out;
9403 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9404 0, trans ? 1 : 0);
9405 if (ret < 0)
9406 goto out;
9407 leaf = path->nodes[0];
9409 while (1) {
9410 struct btrfs_key found_key;
9412 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9413 int no_more_keys = find_next_key(path, &key);
9415 btrfs_release_path(path);
9416 if (trans) {
9417 ret = btrfs_commit_transaction(trans,
9418 info->tree_root);
9419 trans = NULL;
9420 if (ret < 0)
9421 goto out;
9423 need_trans = 0;
9424 if (no_more_keys)
9425 break;
9426 goto again;
9429 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9431 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9432 goto next;
9433 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9434 goto next;
9436 ret = maybe_repair_root_item(info, path, &found_key,
9437 trans ? 0 : 1);
9438 if (ret < 0)
9439 goto out;
9440 if (ret) {
9441 if (!trans && repair) {
9442 need_trans = 1;
9443 key = found_key;
9444 btrfs_release_path(path);
9445 goto again;
9447 bad_roots++;
9449 next:
9450 path->slots[0]++;
9452 ret = 0;
9453 out:
9454 free_roots_info_cache();
9455 btrfs_free_path(path);
9456 if (trans)
9457 btrfs_commit_transaction(trans, info->tree_root);
9458 if (ret < 0)
9459 return ret;
9461 return bad_roots;
9464 const char * const cmd_check_usage[] = {
9465 "btrfs check [options] <device>",
9466 "Check structural inegrity of a filesystem (unmounted).",
9467 "Check structural inegrity of an unmounted filesystem. Verify internal",
9468 "trees' consistency and item connectivity. In the repair mode try to",
9469 "fix the problems found.",
9470 "WARNING: the repair mode is considered dangerous",
9472 "-s|--super <superblock> use this superblock copy",
9473 "-b|--backup use the first valid backup root copy",
9474 "--repair try to repair the filesystem",
9475 "--readonly run in read-only mode (default)",
9476 "--init-csum-tree create a new CRC tree",
9477 "--init-extent-tree create a new extent tree",
9478 "--check-data-csum verify checkums of data blocks",
9479 "-Q|--qgroup-report print a report on qgroup consistency",
9480 "-E|--subvol-extents <subvolid>",
9481 " print subvolume extents and sharing state",
9482 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9483 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9484 "-p|--progress indicate progress",
9485 NULL
9488 int cmd_check(int argc, char **argv)
9490 struct cache_tree root_cache;
9491 struct btrfs_root *root;
9492 struct btrfs_fs_info *info;
9493 u64 bytenr = 0;
9494 u64 subvolid = 0;
9495 u64 tree_root_bytenr = 0;
9496 u64 chunk_root_bytenr = 0;
9497 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9498 int ret;
9499 u64 num;
9500 int init_csum_tree = 0;
9501 int readonly = 0;
9502 int qgroup_report = 0;
9503 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9505 while(1) {
9506 int c;
9507 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9508 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9509 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9510 static const struct option long_options[] = {
9511 { "super", required_argument, NULL, 's' },
9512 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9513 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9514 { "init-csum-tree", no_argument, NULL,
9515 GETOPT_VAL_INIT_CSUM },
9516 { "init-extent-tree", no_argument, NULL,
9517 GETOPT_VAL_INIT_EXTENT },
9518 { "check-data-csum", no_argument, NULL,
9519 GETOPT_VAL_CHECK_CSUM },
9520 { "backup", no_argument, NULL, 'b' },
9521 { "subvol-extents", required_argument, NULL, 'E' },
9522 { "qgroup-report", no_argument, NULL, 'Q' },
9523 { "tree-root", required_argument, NULL, 'r' },
9524 { "chunk-root", required_argument, NULL,
9525 GETOPT_VAL_CHUNK_TREE },
9526 { "progress", no_argument, NULL, 'p' },
9527 { NULL, 0, NULL, 0}
9530 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9531 if (c < 0)
9532 break;
9533 switch(c) {
9534 case 'a': /* ignored */ break;
9535 case 'b':
9536 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9537 break;
9538 case 's':
9539 num = arg_strtou64(optarg);
9540 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9541 fprintf(stderr,
9542 "ERROR: super mirror should be less than: %d\n",
9543 BTRFS_SUPER_MIRROR_MAX);
9544 exit(1);
9546 bytenr = btrfs_sb_offset(((int)num));
9547 printf("using SB copy %llu, bytenr %llu\n", num,
9548 (unsigned long long)bytenr);
9549 break;
9550 case 'Q':
9551 qgroup_report = 1;
9552 break;
9553 case 'E':
9554 subvolid = arg_strtou64(optarg);
9555 break;
9556 case 'r':
9557 tree_root_bytenr = arg_strtou64(optarg);
9558 break;
9559 case GETOPT_VAL_CHUNK_TREE:
9560 chunk_root_bytenr = arg_strtou64(optarg);
9561 break;
9562 case 'p':
9563 ctx.progress_enabled = true;
9564 break;
9565 case '?':
9566 case 'h':
9567 usage(cmd_check_usage);
9568 case GETOPT_VAL_REPAIR:
9569 printf("enabling repair mode\n");
9570 repair = 1;
9571 ctree_flags |= OPEN_CTREE_WRITES;
9572 break;
9573 case GETOPT_VAL_READONLY:
9574 readonly = 1;
9575 break;
9576 case GETOPT_VAL_INIT_CSUM:
9577 printf("Creating a new CRC tree\n");
9578 init_csum_tree = 1;
9579 repair = 1;
9580 ctree_flags |= OPEN_CTREE_WRITES;
9581 break;
9582 case GETOPT_VAL_INIT_EXTENT:
9583 init_extent_tree = 1;
9584 ctree_flags |= (OPEN_CTREE_WRITES |
9585 OPEN_CTREE_NO_BLOCK_GROUPS);
9586 repair = 1;
9587 break;
9588 case GETOPT_VAL_CHECK_CSUM:
9589 check_data_csum = 1;
9590 break;
9594 if (check_argc_exact(argc - optind, 1))
9595 usage(cmd_check_usage);
9597 if (ctx.progress_enabled) {
9598 ctx.tp = TASK_NOTHING;
9599 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9602 /* This check is the only reason for --readonly to exist */
9603 if (readonly && repair) {
9604 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9605 exit(1);
9608 radix_tree_init();
9609 cache_tree_init(&root_cache);
9611 if((ret = check_mounted(argv[optind])) < 0) {
9612 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9613 goto err_out;
9614 } else if(ret) {
9615 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9616 ret = -EBUSY;
9617 goto err_out;
9620 /* only allow partial opening under repair mode */
9621 if (repair)
9622 ctree_flags |= OPEN_CTREE_PARTIAL;
9624 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9625 chunk_root_bytenr, ctree_flags);
9626 if (!info) {
9627 fprintf(stderr, "Couldn't open file system\n");
9628 ret = -EIO;
9629 goto err_out;
9632 global_info = info;
9633 root = info->fs_root;
9636 * repair mode will force us to commit transaction which
9637 * will make us fail to load log tree when mounting.
9639 if (repair && btrfs_super_log_root(info->super_copy)) {
9640 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9641 if (!ret) {
9642 ret = 1;
9643 goto close_out;
9645 ret = zero_log_tree(root);
9646 if (ret) {
9647 fprintf(stderr, "fail to zero log tree\n");
9648 goto close_out;
9652 uuid_unparse(info->super_copy->fsid, uuidbuf);
9653 if (qgroup_report) {
9654 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9655 uuidbuf);
9656 ret = qgroup_verify_all(info);
9657 if (ret == 0)
9658 print_qgroup_report(1);
9659 goto close_out;
9661 if (subvolid) {
9662 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9663 subvolid, argv[optind], uuidbuf);
9664 ret = print_extent_state(info, subvolid);
9665 goto close_out;
9667 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9669 if (!extent_buffer_uptodate(info->tree_root->node) ||
9670 !extent_buffer_uptodate(info->dev_root->node) ||
9671 !extent_buffer_uptodate(info->chunk_root->node)) {
9672 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9673 ret = -EIO;
9674 goto close_out;
9677 if (init_extent_tree || init_csum_tree) {
9678 struct btrfs_trans_handle *trans;
9680 trans = btrfs_start_transaction(info->extent_root, 0);
9681 if (IS_ERR(trans)) {
9682 fprintf(stderr, "Error starting transaction\n");
9683 ret = PTR_ERR(trans);
9684 goto close_out;
9687 if (init_extent_tree) {
9688 printf("Creating a new extent tree\n");
9689 ret = reinit_extent_tree(trans, info);
9690 if (ret)
9691 goto close_out;
9694 if (init_csum_tree) {
9695 fprintf(stderr, "Reinit crc root\n");
9696 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9697 if (ret) {
9698 fprintf(stderr, "crc root initialization failed\n");
9699 ret = -EIO;
9700 goto close_out;
9703 ret = fill_csum_tree(trans, info->csum_root,
9704 init_extent_tree);
9705 if (ret) {
9706 fprintf(stderr, "crc refilling failed\n");
9707 return -EIO;
9711 * Ok now we commit and run the normal fsck, which will add
9712 * extent entries for all of the items it finds.
9714 ret = btrfs_commit_transaction(trans, info->extent_root);
9715 if (ret)
9716 goto close_out;
9718 if (!extent_buffer_uptodate(info->extent_root->node)) {
9719 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9720 ret = -EIO;
9721 goto close_out;
9723 if (!extent_buffer_uptodate(info->csum_root->node)) {
9724 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9725 ret = -EIO;
9726 goto close_out;
9729 if (!ctx.progress_enabled)
9730 fprintf(stderr, "checking extents\n");
9731 ret = check_chunks_and_extents(root);
9732 if (ret)
9733 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9735 ret = repair_root_items(info);
9736 if (ret < 0)
9737 goto close_out;
9738 if (repair) {
9739 fprintf(stderr, "Fixed %d roots.\n", ret);
9740 ret = 0;
9741 } else if (ret > 0) {
9742 fprintf(stderr,
9743 "Found %d roots with an outdated root item.\n",
9744 ret);
9745 fprintf(stderr,
9746 "Please run a filesystem check with the option --repair to fix them.\n");
9747 ret = 1;
9748 goto close_out;
9751 if (!ctx.progress_enabled) {
9752 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9753 fprintf(stderr, "checking free space tree\n");
9754 else
9755 fprintf(stderr, "checking free space cache\n");
9757 ret = check_space_cache(root);
9758 if (ret)
9759 goto out;
9762 * We used to have to have these hole extents in between our real
9763 * extents so if we don't have this flag set we need to make sure there
9764 * are no gaps in the file extents for inodes, otherwise we can just
9765 * ignore it when this happens.
9767 no_holes = btrfs_fs_incompat(root->fs_info,
9768 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9769 if (!ctx.progress_enabled)
9770 fprintf(stderr, "checking fs roots\n");
9771 ret = check_fs_roots(root, &root_cache);
9772 if (ret)
9773 goto out;
9775 fprintf(stderr, "checking csums\n");
9776 ret = check_csums(root);
9777 if (ret)
9778 goto out;
9780 fprintf(stderr, "checking root refs\n");
9781 ret = check_root_refs(root, &root_cache);
9782 if (ret)
9783 goto out;
9785 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9786 struct extent_buffer *eb;
9788 eb = list_first_entry(&root->fs_info->recow_ebs,
9789 struct extent_buffer, recow);
9790 list_del_init(&eb->recow);
9791 ret = recow_extent_buffer(root, eb);
9792 if (ret)
9793 break;
9796 while (!list_empty(&delete_items)) {
9797 struct bad_item *bad;
9799 bad = list_first_entry(&delete_items, struct bad_item, list);
9800 list_del_init(&bad->list);
9801 if (repair)
9802 ret = delete_bad_item(root, bad);
9803 free(bad);
9806 if (info->quota_enabled) {
9807 int err;
9808 fprintf(stderr, "checking quota groups\n");
9809 err = qgroup_verify_all(info);
9810 if (err)
9811 goto out;
9814 if (!list_empty(&root->fs_info->recow_ebs)) {
9815 fprintf(stderr, "Transid errors in file system\n");
9816 ret = 1;
9818 out:
9819 print_qgroup_report(0);
9820 if (found_old_backref) { /*
9821 * there was a disk format change when mixed
9822 * backref was in testing tree. The old format
9823 * existed about one week.
9825 printf("\n * Found old mixed backref format. "
9826 "The old format is not supported! *"
9827 "\n * Please mount the FS in readonly mode, "
9828 "backup data and re-format the FS. *\n\n");
9829 ret = 1;
9831 printf("found %llu bytes used err is %d\n",
9832 (unsigned long long)bytes_used, ret);
9833 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9834 printf("total tree bytes: %llu\n",
9835 (unsigned long long)total_btree_bytes);
9836 printf("total fs tree bytes: %llu\n",
9837 (unsigned long long)total_fs_tree_bytes);
9838 printf("total extent tree bytes: %llu\n",
9839 (unsigned long long)total_extent_tree_bytes);
9840 printf("btree space waste bytes: %llu\n",
9841 (unsigned long long)btree_space_waste);
9842 printf("file data blocks allocated: %llu\n referenced %llu\n",
9843 (unsigned long long)data_bytes_allocated,
9844 (unsigned long long)data_bytes_referenced);
9846 free_root_recs_tree(&root_cache);
9847 close_out:
9848 close_ctree(root);
9849 err_out:
9850 if (ctx.progress_enabled)
9851 task_deinit(ctx.info);
9853 return ret;