2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "transaction.h"
36 #include "free-space-cache.h"
38 #include "qgroup-verify.h"
39 #include "rbtree-utils.h"
43 static u64 bytes_used
= 0;
44 static u64 total_csum_bytes
= 0;
45 static u64 total_btree_bytes
= 0;
46 static u64 total_fs_tree_bytes
= 0;
47 static u64 total_extent_tree_bytes
= 0;
48 static u64 btree_space_waste
= 0;
49 static u64 data_bytes_allocated
= 0;
50 static u64 data_bytes_referenced
= 0;
51 static int found_old_backref
= 0;
52 static LIST_HEAD(duplicate_extents
);
53 static LIST_HEAD(delete_items
);
54 static int repair
= 0;
55 static int no_holes
= 0;
56 static int init_extent_tree
= 0;
57 static int check_data_csum
= 0;
59 struct extent_backref
{
60 struct list_head list
;
61 unsigned int is_data
:1;
62 unsigned int found_extent_tree
:1;
63 unsigned int full_backref
:1;
64 unsigned int found_ref
:1;
65 unsigned int broken
:1;
69 struct extent_backref node
;
84 * Much like data_backref, just removed the undetermined members
85 * and change it to use list_head.
86 * During extent scan, it is stored in root->orphan_data_extent.
87 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
89 struct orphan_data_extent
{
90 struct list_head list
;
99 struct extent_backref node
;
106 struct extent_record
{
107 struct list_head backrefs
;
108 struct list_head dups
;
109 struct list_head list
;
110 struct cache_extent cache
;
111 struct btrfs_disk_key parent_key
;
116 u64 extent_item_refs
;
118 u64 parent_generation
;
122 int flag_block_full_backref
;
123 unsigned int found_rec
:1;
124 unsigned int content_checked
:1;
125 unsigned int owner_ref_checked
:1;
126 unsigned int is_root
:1;
127 unsigned int metadata
:1;
128 unsigned int bad_full_backref
:1;
131 struct inode_backref
{
132 struct list_head list
;
133 unsigned int found_dir_item
:1;
134 unsigned int found_dir_index
:1;
135 unsigned int found_inode_ref
:1;
136 unsigned int filetype
:8;
138 unsigned int ref_type
;
145 struct root_item_record
{
146 struct list_head list
;
153 struct btrfs_key drop_key
;
156 #define REF_ERR_NO_DIR_ITEM (1 << 0)
157 #define REF_ERR_NO_DIR_INDEX (1 << 1)
158 #define REF_ERR_NO_INODE_REF (1 << 2)
159 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
160 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
161 #define REF_ERR_DUP_INODE_REF (1 << 5)
162 #define REF_ERR_INDEX_UNMATCH (1 << 6)
163 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
164 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
165 #define REF_ERR_NO_ROOT_REF (1 << 9)
166 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
167 #define REF_ERR_DUP_ROOT_REF (1 << 11)
168 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
170 struct file_extent_hole
{
176 /* Compatible function to allow reuse of old codes */
177 static u64
first_extent_gap(struct rb_root
*holes
)
179 struct file_extent_hole
*hole
;
181 if (RB_EMPTY_ROOT(holes
))
184 hole
= rb_entry(rb_first(holes
), struct file_extent_hole
, node
);
188 int compare_hole(struct rb_node
*node1
, struct rb_node
*node2
)
190 struct file_extent_hole
*hole1
;
191 struct file_extent_hole
*hole2
;
193 hole1
= rb_entry(node1
, struct file_extent_hole
, node
);
194 hole2
= rb_entry(node2
, struct file_extent_hole
, node
);
196 if (hole1
->start
> hole2
->start
)
198 if (hole1
->start
< hole2
->start
)
200 /* Now hole1->start == hole2->start */
201 if (hole1
->len
>= hole2
->len
)
203 * Hole 1 will be merge center
204 * Same hole will be merged later
207 /* Hole 2 will be merge center */
212 * Add a hole to the record
214 * This will do hole merge for copy_file_extent_holes(),
215 * which will ensure there won't be continuous holes.
217 static int add_file_extent_hole(struct rb_root
*holes
,
220 struct file_extent_hole
*hole
;
221 struct file_extent_hole
*prev
= NULL
;
222 struct file_extent_hole
*next
= NULL
;
224 hole
= malloc(sizeof(*hole
));
229 /* Since compare will not return 0, no -EEXIST will happen */
230 rb_insert(holes
, &hole
->node
, compare_hole
);
232 /* simple merge with previous hole */
233 if (rb_prev(&hole
->node
))
234 prev
= rb_entry(rb_prev(&hole
->node
), struct file_extent_hole
,
236 if (prev
&& prev
->start
+ prev
->len
>= hole
->start
) {
237 hole
->len
= hole
->start
+ hole
->len
- prev
->start
;
238 hole
->start
= prev
->start
;
239 rb_erase(&prev
->node
, holes
);
244 /* iterate merge with next holes */
246 if (!rb_next(&hole
->node
))
248 next
= rb_entry(rb_next(&hole
->node
), struct file_extent_hole
,
250 if (hole
->start
+ hole
->len
>= next
->start
) {
251 if (hole
->start
+ hole
->len
<= next
->start
+ next
->len
)
252 hole
->len
= next
->start
+ next
->len
-
254 rb_erase(&next
->node
, holes
);
263 static int compare_hole_range(struct rb_node
*node
, void *data
)
265 struct file_extent_hole
*hole
;
268 hole
= (struct file_extent_hole
*)data
;
271 hole
= rb_entry(node
, struct file_extent_hole
, node
);
272 if (start
< hole
->start
)
274 if (start
>= hole
->start
&& start
< hole
->start
+ hole
->len
)
280 * Delete a hole in the record
282 * This will do the hole split and is much restrict than add.
284 static int del_file_extent_hole(struct rb_root
*holes
,
287 struct file_extent_hole
*hole
;
288 struct file_extent_hole tmp
;
293 struct rb_node
*node
;
300 node
= rb_search(holes
, &tmp
, compare_hole_range
, NULL
);
303 hole
= rb_entry(node
, struct file_extent_hole
, node
);
304 if (start
+ len
> hole
->start
+ hole
->len
)
308 * Now there will be no overflap, delete the hole and re-add the
309 * split(s) if they exists.
311 if (start
> hole
->start
) {
312 prev_start
= hole
->start
;
313 prev_len
= start
- hole
->start
;
316 if (hole
->start
+ hole
->len
> start
+ len
) {
317 next_start
= start
+ len
;
318 next_len
= hole
->start
+ hole
->len
- start
- len
;
321 rb_erase(node
, holes
);
324 ret
= add_file_extent_hole(holes
, prev_start
, prev_len
);
329 ret
= add_file_extent_hole(holes
, next_start
, next_len
);
336 static int copy_file_extent_holes(struct rb_root
*dst
,
339 struct file_extent_hole
*hole
;
340 struct rb_node
*node
;
343 node
= rb_first(src
);
345 hole
= rb_entry(node
, struct file_extent_hole
, node
);
346 ret
= add_file_extent_hole(dst
, hole
->start
, hole
->len
);
349 node
= rb_next(node
);
354 static void free_file_extent_holes(struct rb_root
*holes
)
356 struct rb_node
*node
;
357 struct file_extent_hole
*hole
;
359 node
= rb_first(holes
);
361 hole
= rb_entry(node
, struct file_extent_hole
, node
);
362 rb_erase(node
, holes
);
364 node
= rb_first(holes
);
368 struct inode_record
{
369 struct list_head backrefs
;
370 unsigned int checked
:1;
371 unsigned int merging
:1;
372 unsigned int found_inode_item
:1;
373 unsigned int found_dir_item
:1;
374 unsigned int found_file_extent
:1;
375 unsigned int found_csum_item
:1;
376 unsigned int some_csum_missing
:1;
377 unsigned int nodatasum
:1;
390 struct rb_root holes
;
391 struct list_head orphan_extents
;
396 #define I_ERR_NO_INODE_ITEM (1 << 0)
397 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
398 #define I_ERR_DUP_INODE_ITEM (1 << 2)
399 #define I_ERR_DUP_DIR_INDEX (1 << 3)
400 #define I_ERR_ODD_DIR_ITEM (1 << 4)
401 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
402 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
403 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
404 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
405 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
406 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
407 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
408 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
409 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
410 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
412 struct root_backref
{
413 struct list_head list
;
414 unsigned int found_dir_item
:1;
415 unsigned int found_dir_index
:1;
416 unsigned int found_back_ref
:1;
417 unsigned int found_forward_ref
:1;
418 unsigned int reachable
:1;
428 struct list_head backrefs
;
429 struct cache_extent cache
;
430 unsigned int found_root_item
:1;
436 struct cache_extent cache
;
441 struct cache_extent cache
;
442 struct cache_tree root_cache
;
443 struct cache_tree inode_cache
;
444 struct inode_record
*current
;
453 struct walk_control
{
454 struct cache_tree shared
;
455 struct shared_node
*nodes
[BTRFS_MAX_LEVEL
];
461 struct btrfs_key key
;
463 struct list_head list
;
466 static void reset_cached_block_groups(struct btrfs_fs_info
*fs_info
);
468 static void record_root_in_trans(struct btrfs_trans_handle
*trans
,
469 struct btrfs_root
*root
)
471 if (root
->last_trans
!= trans
->transid
) {
472 root
->track_dirty
= 1;
473 root
->last_trans
= trans
->transid
;
474 root
->commit_root
= root
->node
;
475 extent_buffer_get(root
->node
);
479 static u8
imode_to_type(u32 imode
)
482 static unsigned char btrfs_type_by_mode
[S_IFMT
>> S_SHIFT
] = {
483 [S_IFREG
>> S_SHIFT
] = BTRFS_FT_REG_FILE
,
484 [S_IFDIR
>> S_SHIFT
] = BTRFS_FT_DIR
,
485 [S_IFCHR
>> S_SHIFT
] = BTRFS_FT_CHRDEV
,
486 [S_IFBLK
>> S_SHIFT
] = BTRFS_FT_BLKDEV
,
487 [S_IFIFO
>> S_SHIFT
] = BTRFS_FT_FIFO
,
488 [S_IFSOCK
>> S_SHIFT
] = BTRFS_FT_SOCK
,
489 [S_IFLNK
>> S_SHIFT
] = BTRFS_FT_SYMLINK
,
492 return btrfs_type_by_mode
[(imode
& S_IFMT
) >> S_SHIFT
];
496 static int device_record_compare(struct rb_node
*node1
, struct rb_node
*node2
)
498 struct device_record
*rec1
;
499 struct device_record
*rec2
;
501 rec1
= rb_entry(node1
, struct device_record
, node
);
502 rec2
= rb_entry(node2
, struct device_record
, node
);
503 if (rec1
->devid
> rec2
->devid
)
505 else if (rec1
->devid
< rec2
->devid
)
511 static struct inode_record
*clone_inode_rec(struct inode_record
*orig_rec
)
513 struct inode_record
*rec
;
514 struct inode_backref
*backref
;
515 struct inode_backref
*orig
;
516 struct orphan_data_extent
*src_orphan
;
517 struct orphan_data_extent
*dst_orphan
;
521 rec
= malloc(sizeof(*rec
));
522 memcpy(rec
, orig_rec
, sizeof(*rec
));
524 INIT_LIST_HEAD(&rec
->backrefs
);
525 INIT_LIST_HEAD(&rec
->orphan_extents
);
526 rec
->holes
= RB_ROOT
;
528 list_for_each_entry(orig
, &orig_rec
->backrefs
, list
) {
529 size
= sizeof(*orig
) + orig
->namelen
+ 1;
530 backref
= malloc(size
);
531 memcpy(backref
, orig
, size
);
532 list_add_tail(&backref
->list
, &rec
->backrefs
);
534 list_for_each_entry(src_orphan
, &orig_rec
->orphan_extents
, list
) {
535 dst_orphan
= malloc(sizeof(*dst_orphan
));
536 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
538 memcpy(dst_orphan
, src_orphan
, sizeof(*src_orphan
));
539 list_add_tail(&dst_orphan
->list
, &rec
->orphan_extents
);
541 ret
= copy_file_extent_holes(&rec
->holes
, &orig_rec
->holes
);
547 static void print_orphan_data_extents(struct list_head
*orphan_extents
,
550 struct orphan_data_extent
*orphan
;
552 if (list_empty(orphan_extents
))
554 printf("The following data extent is lost in tree %llu:\n",
556 list_for_each_entry(orphan
, orphan_extents
, list
) {
557 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
558 orphan
->objectid
, orphan
->offset
, orphan
->disk_bytenr
,
563 static void print_inode_error(struct btrfs_root
*root
, struct inode_record
*rec
)
565 u64 root_objectid
= root
->root_key
.objectid
;
566 int errors
= rec
->errors
;
570 /* reloc root errors, we print its corresponding fs root objectid*/
571 if (root_objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
572 root_objectid
= root
->root_key
.offset
;
573 fprintf(stderr
, "reloc");
575 fprintf(stderr
, "root %llu inode %llu errors %x",
576 (unsigned long long) root_objectid
,
577 (unsigned long long) rec
->ino
, rec
->errors
);
579 if (errors
& I_ERR_NO_INODE_ITEM
)
580 fprintf(stderr
, ", no inode item");
581 if (errors
& I_ERR_NO_ORPHAN_ITEM
)
582 fprintf(stderr
, ", no orphan item");
583 if (errors
& I_ERR_DUP_INODE_ITEM
)
584 fprintf(stderr
, ", dup inode item");
585 if (errors
& I_ERR_DUP_DIR_INDEX
)
586 fprintf(stderr
, ", dup dir index");
587 if (errors
& I_ERR_ODD_DIR_ITEM
)
588 fprintf(stderr
, ", odd dir item");
589 if (errors
& I_ERR_ODD_FILE_EXTENT
)
590 fprintf(stderr
, ", odd file extent");
591 if (errors
& I_ERR_BAD_FILE_EXTENT
)
592 fprintf(stderr
, ", bad file extent");
593 if (errors
& I_ERR_FILE_EXTENT_OVERLAP
)
594 fprintf(stderr
, ", file extent overlap");
595 if (errors
& I_ERR_FILE_EXTENT_DISCOUNT
)
596 fprintf(stderr
, ", file extent discount");
597 if (errors
& I_ERR_DIR_ISIZE_WRONG
)
598 fprintf(stderr
, ", dir isize wrong");
599 if (errors
& I_ERR_FILE_NBYTES_WRONG
)
600 fprintf(stderr
, ", nbytes wrong");
601 if (errors
& I_ERR_ODD_CSUM_ITEM
)
602 fprintf(stderr
, ", odd csum item");
603 if (errors
& I_ERR_SOME_CSUM_MISSING
)
604 fprintf(stderr
, ", some csum missing");
605 if (errors
& I_ERR_LINK_COUNT_WRONG
)
606 fprintf(stderr
, ", link count wrong");
607 if (errors
& I_ERR_FILE_EXTENT_ORPHAN
)
608 fprintf(stderr
, ", orphan file extent");
609 fprintf(stderr
, "\n");
610 /* Print the orphan extents if needed */
611 if (errors
& I_ERR_FILE_EXTENT_ORPHAN
)
612 print_orphan_data_extents(&rec
->orphan_extents
, root
->objectid
);
614 /* Print the holes if needed */
615 if (errors
& I_ERR_FILE_EXTENT_DISCOUNT
) {
616 struct file_extent_hole
*hole
;
617 struct rb_node
*node
;
619 node
= rb_first(&rec
->holes
);
620 fprintf(stderr
, "Found file extent holes:\n");
622 hole
= rb_entry(node
, struct file_extent_hole
, node
);
623 fprintf(stderr
, "\tstart: %llu, len:%llu\n",
624 hole
->start
, hole
->len
);
625 node
= rb_next(node
);
630 static void print_ref_error(int errors
)
632 if (errors
& REF_ERR_NO_DIR_ITEM
)
633 fprintf(stderr
, ", no dir item");
634 if (errors
& REF_ERR_NO_DIR_INDEX
)
635 fprintf(stderr
, ", no dir index");
636 if (errors
& REF_ERR_NO_INODE_REF
)
637 fprintf(stderr
, ", no inode ref");
638 if (errors
& REF_ERR_DUP_DIR_ITEM
)
639 fprintf(stderr
, ", dup dir item");
640 if (errors
& REF_ERR_DUP_DIR_INDEX
)
641 fprintf(stderr
, ", dup dir index");
642 if (errors
& REF_ERR_DUP_INODE_REF
)
643 fprintf(stderr
, ", dup inode ref");
644 if (errors
& REF_ERR_INDEX_UNMATCH
)
645 fprintf(stderr
, ", index unmatch");
646 if (errors
& REF_ERR_FILETYPE_UNMATCH
)
647 fprintf(stderr
, ", filetype unmatch");
648 if (errors
& REF_ERR_NAME_TOO_LONG
)
649 fprintf(stderr
, ", name too long");
650 if (errors
& REF_ERR_NO_ROOT_REF
)
651 fprintf(stderr
, ", no root ref");
652 if (errors
& REF_ERR_NO_ROOT_BACKREF
)
653 fprintf(stderr
, ", no root backref");
654 if (errors
& REF_ERR_DUP_ROOT_REF
)
655 fprintf(stderr
, ", dup root ref");
656 if (errors
& REF_ERR_DUP_ROOT_BACKREF
)
657 fprintf(stderr
, ", dup root backref");
658 fprintf(stderr
, "\n");
661 static struct inode_record
*get_inode_rec(struct cache_tree
*inode_cache
,
664 struct ptr_node
*node
;
665 struct cache_extent
*cache
;
666 struct inode_record
*rec
= NULL
;
669 cache
= lookup_cache_extent(inode_cache
, ino
, 1);
671 node
= container_of(cache
, struct ptr_node
, cache
);
673 if (mod
&& rec
->refs
> 1) {
674 node
->data
= clone_inode_rec(rec
);
679 rec
= calloc(1, sizeof(*rec
));
681 rec
->extent_start
= (u64
)-1;
683 INIT_LIST_HEAD(&rec
->backrefs
);
684 INIT_LIST_HEAD(&rec
->orphan_extents
);
685 rec
->holes
= RB_ROOT
;
687 node
= malloc(sizeof(*node
));
688 node
->cache
.start
= ino
;
689 node
->cache
.size
= 1;
692 if (ino
== BTRFS_FREE_INO_OBJECTID
)
695 ret
= insert_cache_extent(inode_cache
, &node
->cache
);
701 static void free_orphan_data_extents(struct list_head
*orphan_extents
)
703 struct orphan_data_extent
*orphan
;
705 while (!list_empty(orphan_extents
)) {
706 orphan
= list_entry(orphan_extents
->next
,
707 struct orphan_data_extent
, list
);
708 list_del(&orphan
->list
);
713 static void free_inode_rec(struct inode_record
*rec
)
715 struct inode_backref
*backref
;
720 while (!list_empty(&rec
->backrefs
)) {
721 backref
= list_entry(rec
->backrefs
.next
,
722 struct inode_backref
, list
);
723 list_del(&backref
->list
);
726 free_orphan_data_extents(&rec
->orphan_extents
);
727 free_file_extent_holes(&rec
->holes
);
731 static int can_free_inode_rec(struct inode_record
*rec
)
733 if (!rec
->errors
&& rec
->checked
&& rec
->found_inode_item
&&
734 rec
->nlink
== rec
->found_link
&& list_empty(&rec
->backrefs
))
739 static void maybe_free_inode_rec(struct cache_tree
*inode_cache
,
740 struct inode_record
*rec
)
742 struct cache_extent
*cache
;
743 struct inode_backref
*tmp
, *backref
;
744 struct ptr_node
*node
;
745 unsigned char filetype
;
747 if (!rec
->found_inode_item
)
750 filetype
= imode_to_type(rec
->imode
);
751 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
752 if (backref
->found_dir_item
&& backref
->found_dir_index
) {
753 if (backref
->filetype
!= filetype
)
754 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
755 if (!backref
->errors
&& backref
->found_inode_ref
) {
756 list_del(&backref
->list
);
762 if (!rec
->checked
|| rec
->merging
)
765 if (S_ISDIR(rec
->imode
)) {
766 if (rec
->found_size
!= rec
->isize
)
767 rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
768 if (rec
->found_file_extent
)
769 rec
->errors
|= I_ERR_ODD_FILE_EXTENT
;
770 } else if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
771 if (rec
->found_dir_item
)
772 rec
->errors
|= I_ERR_ODD_DIR_ITEM
;
773 if (rec
->found_size
!= rec
->nbytes
)
774 rec
->errors
|= I_ERR_FILE_NBYTES_WRONG
;
775 if (rec
->nlink
> 0 && !no_holes
&&
776 (rec
->extent_end
< rec
->isize
||
777 first_extent_gap(&rec
->holes
) < rec
->isize
))
778 rec
->errors
|= I_ERR_FILE_EXTENT_DISCOUNT
;
781 if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
782 if (rec
->found_csum_item
&& rec
->nodatasum
)
783 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
784 if (rec
->some_csum_missing
&& !rec
->nodatasum
)
785 rec
->errors
|= I_ERR_SOME_CSUM_MISSING
;
788 BUG_ON(rec
->refs
!= 1);
789 if (can_free_inode_rec(rec
)) {
790 cache
= lookup_cache_extent(inode_cache
, rec
->ino
, 1);
791 node
= container_of(cache
, struct ptr_node
, cache
);
792 BUG_ON(node
->data
!= rec
);
793 remove_cache_extent(inode_cache
, &node
->cache
);
799 static int check_orphan_item(struct btrfs_root
*root
, u64 ino
)
801 struct btrfs_path path
;
802 struct btrfs_key key
;
805 key
.objectid
= BTRFS_ORPHAN_OBJECTID
;
806 key
.type
= BTRFS_ORPHAN_ITEM_KEY
;
809 btrfs_init_path(&path
);
810 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
811 btrfs_release_path(&path
);
817 static int process_inode_item(struct extent_buffer
*eb
,
818 int slot
, struct btrfs_key
*key
,
819 struct shared_node
*active_node
)
821 struct inode_record
*rec
;
822 struct btrfs_inode_item
*item
;
824 rec
= active_node
->current
;
825 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
826 if (rec
->found_inode_item
) {
827 rec
->errors
|= I_ERR_DUP_INODE_ITEM
;
830 item
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_item
);
831 rec
->nlink
= btrfs_inode_nlink(eb
, item
);
832 rec
->isize
= btrfs_inode_size(eb
, item
);
833 rec
->nbytes
= btrfs_inode_nbytes(eb
, item
);
834 rec
->imode
= btrfs_inode_mode(eb
, item
);
835 if (btrfs_inode_flags(eb
, item
) & BTRFS_INODE_NODATASUM
)
837 rec
->found_inode_item
= 1;
839 rec
->errors
|= I_ERR_NO_ORPHAN_ITEM
;
840 maybe_free_inode_rec(&active_node
->inode_cache
, rec
);
844 static struct inode_backref
*get_inode_backref(struct inode_record
*rec
,
846 int namelen
, u64 dir
)
848 struct inode_backref
*backref
;
850 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
851 if (rec
->ino
== BTRFS_MULTIPLE_OBJECTIDS
)
853 if (backref
->dir
!= dir
|| backref
->namelen
!= namelen
)
855 if (memcmp(name
, backref
->name
, namelen
))
860 backref
= malloc(sizeof(*backref
) + namelen
+ 1);
861 memset(backref
, 0, sizeof(*backref
));
863 backref
->namelen
= namelen
;
864 memcpy(backref
->name
, name
, namelen
);
865 backref
->name
[namelen
] = '\0';
866 list_add_tail(&backref
->list
, &rec
->backrefs
);
870 static int add_inode_backref(struct cache_tree
*inode_cache
,
871 u64 ino
, u64 dir
, u64 index
,
872 const char *name
, int namelen
,
873 int filetype
, int itemtype
, int errors
)
875 struct inode_record
*rec
;
876 struct inode_backref
*backref
;
878 rec
= get_inode_rec(inode_cache
, ino
, 1);
879 backref
= get_inode_backref(rec
, name
, namelen
, dir
);
881 backref
->errors
|= errors
;
882 if (itemtype
== BTRFS_DIR_INDEX_KEY
) {
883 if (backref
->found_dir_index
)
884 backref
->errors
|= REF_ERR_DUP_DIR_INDEX
;
885 if (backref
->found_inode_ref
&& backref
->index
!= index
)
886 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
887 if (backref
->found_dir_item
&& backref
->filetype
!= filetype
)
888 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
890 backref
->index
= index
;
891 backref
->filetype
= filetype
;
892 backref
->found_dir_index
= 1;
893 } else if (itemtype
== BTRFS_DIR_ITEM_KEY
) {
895 if (backref
->found_dir_item
)
896 backref
->errors
|= REF_ERR_DUP_DIR_ITEM
;
897 if (backref
->found_dir_index
&& backref
->filetype
!= filetype
)
898 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
900 backref
->filetype
= filetype
;
901 backref
->found_dir_item
= 1;
902 } else if ((itemtype
== BTRFS_INODE_REF_KEY
) ||
903 (itemtype
== BTRFS_INODE_EXTREF_KEY
)) {
904 if (backref
->found_inode_ref
)
905 backref
->errors
|= REF_ERR_DUP_INODE_REF
;
906 if (backref
->found_dir_index
&& backref
->index
!= index
)
907 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
909 backref
->index
= index
;
911 backref
->ref_type
= itemtype
;
912 backref
->found_inode_ref
= 1;
917 maybe_free_inode_rec(inode_cache
, rec
);
921 static int merge_inode_recs(struct inode_record
*src
, struct inode_record
*dst
,
922 struct cache_tree
*dst_cache
)
924 struct inode_backref
*backref
;
929 list_for_each_entry(backref
, &src
->backrefs
, list
) {
930 if (backref
->found_dir_index
) {
931 add_inode_backref(dst_cache
, dst
->ino
, backref
->dir
,
932 backref
->index
, backref
->name
,
933 backref
->namelen
, backref
->filetype
,
934 BTRFS_DIR_INDEX_KEY
, backref
->errors
);
936 if (backref
->found_dir_item
) {
938 add_inode_backref(dst_cache
, dst
->ino
,
939 backref
->dir
, 0, backref
->name
,
940 backref
->namelen
, backref
->filetype
,
941 BTRFS_DIR_ITEM_KEY
, backref
->errors
);
943 if (backref
->found_inode_ref
) {
944 add_inode_backref(dst_cache
, dst
->ino
,
945 backref
->dir
, backref
->index
,
946 backref
->name
, backref
->namelen
, 0,
947 backref
->ref_type
, backref
->errors
);
951 if (src
->found_dir_item
)
952 dst
->found_dir_item
= 1;
953 if (src
->found_file_extent
)
954 dst
->found_file_extent
= 1;
955 if (src
->found_csum_item
)
956 dst
->found_csum_item
= 1;
957 if (src
->some_csum_missing
)
958 dst
->some_csum_missing
= 1;
959 if (first_extent_gap(&dst
->holes
) > first_extent_gap(&src
->holes
)) {
960 ret
= copy_file_extent_holes(&dst
->holes
, &src
->holes
);
965 BUG_ON(src
->found_link
< dir_count
);
966 dst
->found_link
+= src
->found_link
- dir_count
;
967 dst
->found_size
+= src
->found_size
;
968 if (src
->extent_start
!= (u64
)-1) {
969 if (dst
->extent_start
== (u64
)-1) {
970 dst
->extent_start
= src
->extent_start
;
971 dst
->extent_end
= src
->extent_end
;
973 if (dst
->extent_end
> src
->extent_start
)
974 dst
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
975 else if (dst
->extent_end
< src
->extent_start
) {
976 ret
= add_file_extent_hole(&dst
->holes
,
978 src
->extent_start
- dst
->extent_end
);
980 if (dst
->extent_end
< src
->extent_end
)
981 dst
->extent_end
= src
->extent_end
;
985 dst
->errors
|= src
->errors
;
986 if (src
->found_inode_item
) {
987 if (!dst
->found_inode_item
) {
988 dst
->nlink
= src
->nlink
;
989 dst
->isize
= src
->isize
;
990 dst
->nbytes
= src
->nbytes
;
991 dst
->imode
= src
->imode
;
992 dst
->nodatasum
= src
->nodatasum
;
993 dst
->found_inode_item
= 1;
995 dst
->errors
|= I_ERR_DUP_INODE_ITEM
;
1003 static int splice_shared_node(struct shared_node
*src_node
,
1004 struct shared_node
*dst_node
)
1006 struct cache_extent
*cache
;
1007 struct ptr_node
*node
, *ins
;
1008 struct cache_tree
*src
, *dst
;
1009 struct inode_record
*rec
, *conflict
;
1010 u64 current_ino
= 0;
1014 if (--src_node
->refs
== 0)
1016 if (src_node
->current
)
1017 current_ino
= src_node
->current
->ino
;
1019 src
= &src_node
->root_cache
;
1020 dst
= &dst_node
->root_cache
;
1022 cache
= search_cache_extent(src
, 0);
1024 node
= container_of(cache
, struct ptr_node
, cache
);
1026 cache
= next_cache_extent(cache
);
1029 remove_cache_extent(src
, &node
->cache
);
1032 ins
= malloc(sizeof(*ins
));
1033 ins
->cache
.start
= node
->cache
.start
;
1034 ins
->cache
.size
= node
->cache
.size
;
1038 ret
= insert_cache_extent(dst
, &ins
->cache
);
1039 if (ret
== -EEXIST
) {
1040 conflict
= get_inode_rec(dst
, rec
->ino
, 1);
1041 merge_inode_recs(rec
, conflict
, dst
);
1043 conflict
->checked
= 1;
1044 if (dst_node
->current
== conflict
)
1045 dst_node
->current
= NULL
;
1047 maybe_free_inode_rec(dst
, conflict
);
1048 free_inode_rec(rec
);
1055 if (src
== &src_node
->root_cache
) {
1056 src
= &src_node
->inode_cache
;
1057 dst
= &dst_node
->inode_cache
;
1061 if (current_ino
> 0 && (!dst_node
->current
||
1062 current_ino
> dst_node
->current
->ino
)) {
1063 if (dst_node
->current
) {
1064 dst_node
->current
->checked
= 1;
1065 maybe_free_inode_rec(dst
, dst_node
->current
);
1067 dst_node
->current
= get_inode_rec(dst
, current_ino
, 1);
1072 static void free_inode_ptr(struct cache_extent
*cache
)
1074 struct ptr_node
*node
;
1075 struct inode_record
*rec
;
1077 node
= container_of(cache
, struct ptr_node
, cache
);
1079 free_inode_rec(rec
);
1083 FREE_EXTENT_CACHE_BASED_TREE(inode_recs
, free_inode_ptr
);
1085 static struct shared_node
*find_shared_node(struct cache_tree
*shared
,
1088 struct cache_extent
*cache
;
1089 struct shared_node
*node
;
1091 cache
= lookup_cache_extent(shared
, bytenr
, 1);
1093 node
= container_of(cache
, struct shared_node
, cache
);
1099 static int add_shared_node(struct cache_tree
*shared
, u64 bytenr
, u32 refs
)
1102 struct shared_node
*node
;
1104 node
= calloc(1, sizeof(*node
));
1105 node
->cache
.start
= bytenr
;
1106 node
->cache
.size
= 1;
1107 cache_tree_init(&node
->root_cache
);
1108 cache_tree_init(&node
->inode_cache
);
1111 ret
= insert_cache_extent(shared
, &node
->cache
);
1116 static int enter_shared_node(struct btrfs_root
*root
, u64 bytenr
, u32 refs
,
1117 struct walk_control
*wc
, int level
)
1119 struct shared_node
*node
;
1120 struct shared_node
*dest
;
1122 if (level
== wc
->active_node
)
1125 BUG_ON(wc
->active_node
<= level
);
1126 node
= find_shared_node(&wc
->shared
, bytenr
);
1128 add_shared_node(&wc
->shared
, bytenr
, refs
);
1129 node
= find_shared_node(&wc
->shared
, bytenr
);
1130 wc
->nodes
[level
] = node
;
1131 wc
->active_node
= level
;
1135 if (wc
->root_level
== wc
->active_node
&&
1136 btrfs_root_refs(&root
->root_item
) == 0) {
1137 if (--node
->refs
== 0) {
1138 free_inode_recs_tree(&node
->root_cache
);
1139 free_inode_recs_tree(&node
->inode_cache
);
1140 remove_cache_extent(&wc
->shared
, &node
->cache
);
1146 dest
= wc
->nodes
[wc
->active_node
];
1147 splice_shared_node(node
, dest
);
1148 if (node
->refs
== 0) {
1149 remove_cache_extent(&wc
->shared
, &node
->cache
);
1155 static int leave_shared_node(struct btrfs_root
*root
,
1156 struct walk_control
*wc
, int level
)
1158 struct shared_node
*node
;
1159 struct shared_node
*dest
;
1162 if (level
== wc
->root_level
)
1165 for (i
= level
+ 1; i
< BTRFS_MAX_LEVEL
; i
++) {
1169 BUG_ON(i
>= BTRFS_MAX_LEVEL
);
1171 node
= wc
->nodes
[wc
->active_node
];
1172 wc
->nodes
[wc
->active_node
] = NULL
;
1173 wc
->active_node
= i
;
1175 dest
= wc
->nodes
[wc
->active_node
];
1176 if (wc
->active_node
< wc
->root_level
||
1177 btrfs_root_refs(&root
->root_item
) > 0) {
1178 BUG_ON(node
->refs
<= 1);
1179 splice_shared_node(node
, dest
);
1181 BUG_ON(node
->refs
< 2);
1190 * 1 - if the root with id child_root_id is a child of root parent_root_id
1191 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1192 * has other root(s) as parent(s)
1193 * 2 - if the root child_root_id doesn't have any parent roots
1195 static int is_child_root(struct btrfs_root
*root
, u64 parent_root_id
,
1198 struct btrfs_path path
;
1199 struct btrfs_key key
;
1200 struct extent_buffer
*leaf
;
1204 btrfs_init_path(&path
);
1206 key
.objectid
= parent_root_id
;
1207 key
.type
= BTRFS_ROOT_REF_KEY
;
1208 key
.offset
= child_root_id
;
1209 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
1213 btrfs_release_path(&path
);
1217 key
.objectid
= child_root_id
;
1218 key
.type
= BTRFS_ROOT_BACKREF_KEY
;
1220 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
1226 leaf
= path
.nodes
[0];
1227 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
1228 ret
= btrfs_next_leaf(root
->fs_info
->tree_root
, &path
);
1231 leaf
= path
.nodes
[0];
1234 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
1235 if (key
.objectid
!= child_root_id
||
1236 key
.type
!= BTRFS_ROOT_BACKREF_KEY
)
1241 if (key
.offset
== parent_root_id
) {
1242 btrfs_release_path(&path
);
1249 btrfs_release_path(&path
);
1252 return has_parent
? 0 : 2;
1255 static int process_dir_item(struct btrfs_root
*root
,
1256 struct extent_buffer
*eb
,
1257 int slot
, struct btrfs_key
*key
,
1258 struct shared_node
*active_node
)
1268 struct btrfs_dir_item
*di
;
1269 struct inode_record
*rec
;
1270 struct cache_tree
*root_cache
;
1271 struct cache_tree
*inode_cache
;
1272 struct btrfs_key location
;
1273 char namebuf
[BTRFS_NAME_LEN
];
1275 root_cache
= &active_node
->root_cache
;
1276 inode_cache
= &active_node
->inode_cache
;
1277 rec
= active_node
->current
;
1278 rec
->found_dir_item
= 1;
1280 di
= btrfs_item_ptr(eb
, slot
, struct btrfs_dir_item
);
1281 total
= btrfs_item_size_nr(eb
, slot
);
1282 while (cur
< total
) {
1284 btrfs_dir_item_key_to_cpu(eb
, di
, &location
);
1285 name_len
= btrfs_dir_name_len(eb
, di
);
1286 data_len
= btrfs_dir_data_len(eb
, di
);
1287 filetype
= btrfs_dir_type(eb
, di
);
1289 rec
->found_size
+= name_len
;
1290 if (name_len
<= BTRFS_NAME_LEN
) {
1294 len
= BTRFS_NAME_LEN
;
1295 error
= REF_ERR_NAME_TOO_LONG
;
1297 read_extent_buffer(eb
, namebuf
, (unsigned long)(di
+ 1), len
);
1299 if (location
.type
== BTRFS_INODE_ITEM_KEY
) {
1300 add_inode_backref(inode_cache
, location
.objectid
,
1301 key
->objectid
, key
->offset
, namebuf
,
1302 len
, filetype
, key
->type
, error
);
1303 } else if (location
.type
== BTRFS_ROOT_ITEM_KEY
) {
1304 add_inode_backref(root_cache
, location
.objectid
,
1305 key
->objectid
, key
->offset
,
1306 namebuf
, len
, filetype
,
1309 fprintf(stderr
, "invalid location in dir item %u\n",
1311 add_inode_backref(inode_cache
, BTRFS_MULTIPLE_OBJECTIDS
,
1312 key
->objectid
, key
->offset
, namebuf
,
1313 len
, filetype
, key
->type
, error
);
1316 len
= sizeof(*di
) + name_len
+ data_len
;
1317 di
= (struct btrfs_dir_item
*)((char *)di
+ len
);
1320 if (key
->type
== BTRFS_DIR_INDEX_KEY
&& nritems
> 1)
1321 rec
->errors
|= I_ERR_DUP_DIR_INDEX
;
1326 static int process_inode_ref(struct extent_buffer
*eb
,
1327 int slot
, struct btrfs_key
*key
,
1328 struct shared_node
*active_node
)
1336 struct cache_tree
*inode_cache
;
1337 struct btrfs_inode_ref
*ref
;
1338 char namebuf
[BTRFS_NAME_LEN
];
1340 inode_cache
= &active_node
->inode_cache
;
1342 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_ref
);
1343 total
= btrfs_item_size_nr(eb
, slot
);
1344 while (cur
< total
) {
1345 name_len
= btrfs_inode_ref_name_len(eb
, ref
);
1346 index
= btrfs_inode_ref_index(eb
, ref
);
1347 if (name_len
<= BTRFS_NAME_LEN
) {
1351 len
= BTRFS_NAME_LEN
;
1352 error
= REF_ERR_NAME_TOO_LONG
;
1354 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
1355 add_inode_backref(inode_cache
, key
->objectid
, key
->offset
,
1356 index
, namebuf
, len
, 0, key
->type
, error
);
1358 len
= sizeof(*ref
) + name_len
;
1359 ref
= (struct btrfs_inode_ref
*)((char *)ref
+ len
);
1365 static int process_inode_extref(struct extent_buffer
*eb
,
1366 int slot
, struct btrfs_key
*key
,
1367 struct shared_node
*active_node
)
1376 struct cache_tree
*inode_cache
;
1377 struct btrfs_inode_extref
*extref
;
1378 char namebuf
[BTRFS_NAME_LEN
];
1380 inode_cache
= &active_node
->inode_cache
;
1382 extref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_extref
);
1383 total
= btrfs_item_size_nr(eb
, slot
);
1384 while (cur
< total
) {
1385 name_len
= btrfs_inode_extref_name_len(eb
, extref
);
1386 index
= btrfs_inode_extref_index(eb
, extref
);
1387 parent
= btrfs_inode_extref_parent(eb
, extref
);
1388 if (name_len
<= BTRFS_NAME_LEN
) {
1392 len
= BTRFS_NAME_LEN
;
1393 error
= REF_ERR_NAME_TOO_LONG
;
1395 read_extent_buffer(eb
, namebuf
,
1396 (unsigned long)(extref
+ 1), len
);
1397 add_inode_backref(inode_cache
, key
->objectid
, parent
,
1398 index
, namebuf
, len
, 0, key
->type
, error
);
1400 len
= sizeof(*extref
) + name_len
;
1401 extref
= (struct btrfs_inode_extref
*)((char *)extref
+ len
);
1408 static int count_csum_range(struct btrfs_root
*root
, u64 start
,
1409 u64 len
, u64
*found
)
1411 struct btrfs_key key
;
1412 struct btrfs_path path
;
1413 struct extent_buffer
*leaf
;
1418 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
1420 btrfs_init_path(&path
);
1422 key
.objectid
= BTRFS_EXTENT_CSUM_OBJECTID
;
1424 key
.type
= BTRFS_EXTENT_CSUM_KEY
;
1426 ret
= btrfs_search_slot(NULL
, root
->fs_info
->csum_root
,
1430 if (ret
> 0 && path
.slots
[0] > 0) {
1431 leaf
= path
.nodes
[0];
1432 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0] - 1);
1433 if (key
.objectid
== BTRFS_EXTENT_CSUM_OBJECTID
&&
1434 key
.type
== BTRFS_EXTENT_CSUM_KEY
)
1439 leaf
= path
.nodes
[0];
1440 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
1441 ret
= btrfs_next_leaf(root
->fs_info
->csum_root
, &path
);
1446 leaf
= path
.nodes
[0];
1449 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
1450 if (key
.objectid
!= BTRFS_EXTENT_CSUM_OBJECTID
||
1451 key
.type
!= BTRFS_EXTENT_CSUM_KEY
)
1454 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
1455 if (key
.offset
>= start
+ len
)
1458 if (key
.offset
> start
)
1461 size
= btrfs_item_size_nr(leaf
, path
.slots
[0]);
1462 csum_end
= key
.offset
+ (size
/ csum_size
) * root
->sectorsize
;
1463 if (csum_end
> start
) {
1464 size
= min(csum_end
- start
, len
);
1473 btrfs_release_path(&path
);
1479 static int process_file_extent(struct btrfs_root
*root
,
1480 struct extent_buffer
*eb
,
1481 int slot
, struct btrfs_key
*key
,
1482 struct shared_node
*active_node
)
1484 struct inode_record
*rec
;
1485 struct btrfs_file_extent_item
*fi
;
1487 u64 disk_bytenr
= 0;
1488 u64 extent_offset
= 0;
1489 u64 mask
= root
->sectorsize
- 1;
1493 rec
= active_node
->current
;
1494 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
1495 rec
->found_file_extent
= 1;
1497 if (rec
->extent_start
== (u64
)-1) {
1498 rec
->extent_start
= key
->offset
;
1499 rec
->extent_end
= key
->offset
;
1502 if (rec
->extent_end
> key
->offset
)
1503 rec
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
1504 else if (rec
->extent_end
< key
->offset
) {
1505 ret
= add_file_extent_hole(&rec
->holes
, rec
->extent_end
,
1506 key
->offset
- rec
->extent_end
);
1511 fi
= btrfs_item_ptr(eb
, slot
, struct btrfs_file_extent_item
);
1512 extent_type
= btrfs_file_extent_type(eb
, fi
);
1514 if (extent_type
== BTRFS_FILE_EXTENT_INLINE
) {
1515 num_bytes
= btrfs_file_extent_inline_len(eb
, slot
, fi
);
1517 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1518 rec
->found_size
+= num_bytes
;
1519 num_bytes
= (num_bytes
+ mask
) & ~mask
;
1520 } else if (extent_type
== BTRFS_FILE_EXTENT_REG
||
1521 extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1522 num_bytes
= btrfs_file_extent_num_bytes(eb
, fi
);
1523 disk_bytenr
= btrfs_file_extent_disk_bytenr(eb
, fi
);
1524 extent_offset
= btrfs_file_extent_offset(eb
, fi
);
1525 if (num_bytes
== 0 || (num_bytes
& mask
))
1526 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1527 if (num_bytes
+ extent_offset
>
1528 btrfs_file_extent_ram_bytes(eb
, fi
))
1529 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1530 if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
&&
1531 (btrfs_file_extent_compression(eb
, fi
) ||
1532 btrfs_file_extent_encryption(eb
, fi
) ||
1533 btrfs_file_extent_other_encoding(eb
, fi
)))
1534 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1535 if (disk_bytenr
> 0)
1536 rec
->found_size
+= num_bytes
;
1538 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1540 rec
->extent_end
= key
->offset
+ num_bytes
;
1543 * The data reloc tree will copy full extents into its inode and then
1544 * copy the corresponding csums. Because the extent it copied could be
1545 * a preallocated extent that hasn't been written to yet there may be no
1546 * csums to copy, ergo we won't have csums for our file extent. This is
1547 * ok so just don't bother checking csums if the inode belongs to the
1550 if (disk_bytenr
> 0 &&
1551 btrfs_header_owner(eb
) != BTRFS_DATA_RELOC_TREE_OBJECTID
) {
1553 if (btrfs_file_extent_compression(eb
, fi
))
1554 num_bytes
= btrfs_file_extent_disk_num_bytes(eb
, fi
);
1556 disk_bytenr
+= extent_offset
;
1558 ret
= count_csum_range(root
, disk_bytenr
, num_bytes
, &found
);
1561 if (extent_type
== BTRFS_FILE_EXTENT_REG
) {
1563 rec
->found_csum_item
= 1;
1564 if (found
< num_bytes
)
1565 rec
->some_csum_missing
= 1;
1566 } else if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1568 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
1574 static int process_one_leaf(struct btrfs_root
*root
, struct extent_buffer
*eb
,
1575 struct walk_control
*wc
)
1577 struct btrfs_key key
;
1581 struct cache_tree
*inode_cache
;
1582 struct shared_node
*active_node
;
1584 if (wc
->root_level
== wc
->active_node
&&
1585 btrfs_root_refs(&root
->root_item
) == 0)
1588 active_node
= wc
->nodes
[wc
->active_node
];
1589 inode_cache
= &active_node
->inode_cache
;
1590 nritems
= btrfs_header_nritems(eb
);
1591 for (i
= 0; i
< nritems
; i
++) {
1592 btrfs_item_key_to_cpu(eb
, &key
, i
);
1594 if (key
.objectid
== BTRFS_FREE_SPACE_OBJECTID
)
1596 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
)
1599 if (active_node
->current
== NULL
||
1600 active_node
->current
->ino
< key
.objectid
) {
1601 if (active_node
->current
) {
1602 active_node
->current
->checked
= 1;
1603 maybe_free_inode_rec(inode_cache
,
1604 active_node
->current
);
1606 active_node
->current
= get_inode_rec(inode_cache
,
1610 case BTRFS_DIR_ITEM_KEY
:
1611 case BTRFS_DIR_INDEX_KEY
:
1612 ret
= process_dir_item(root
, eb
, i
, &key
, active_node
);
1614 case BTRFS_INODE_REF_KEY
:
1615 ret
= process_inode_ref(eb
, i
, &key
, active_node
);
1617 case BTRFS_INODE_EXTREF_KEY
:
1618 ret
= process_inode_extref(eb
, i
, &key
, active_node
);
1620 case BTRFS_INODE_ITEM_KEY
:
1621 ret
= process_inode_item(eb
, i
, &key
, active_node
);
1623 case BTRFS_EXTENT_DATA_KEY
:
1624 ret
= process_file_extent(root
, eb
, i
, &key
,
1634 static void reada_walk_down(struct btrfs_root
*root
,
1635 struct extent_buffer
*node
, int slot
)
1644 level
= btrfs_header_level(node
);
1648 nritems
= btrfs_header_nritems(node
);
1649 blocksize
= btrfs_level_size(root
, level
- 1);
1650 for (i
= slot
; i
< nritems
; i
++) {
1651 bytenr
= btrfs_node_blockptr(node
, i
);
1652 ptr_gen
= btrfs_node_ptr_generation(node
, i
);
1653 readahead_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
1658 * Check the child node/leaf by the following condition:
1659 * 1. the first item key of the node/leaf should be the same with the one
1661 * 2. block in parent node should match the child node/leaf.
1662 * 3. generation of parent node and child's header should be consistent.
1664 * Or the child node/leaf pointed by the key in parent is not valid.
1666 * We hope to check leaf owner too, but since subvol may share leaves,
1667 * which makes leaf owner check not so strong, key check should be
1668 * sufficient enough for that case.
1670 static int check_child_node(struct btrfs_root
*root
,
1671 struct extent_buffer
*parent
, int slot
,
1672 struct extent_buffer
*child
)
1674 struct btrfs_key parent_key
;
1675 struct btrfs_key child_key
;
1678 btrfs_node_key_to_cpu(parent
, &parent_key
, slot
);
1679 if (btrfs_header_level(child
) == 0)
1680 btrfs_item_key_to_cpu(child
, &child_key
, 0);
1682 btrfs_node_key_to_cpu(child
, &child_key
, 0);
1684 if (memcmp(&parent_key
, &child_key
, sizeof(parent_key
))) {
1687 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1688 parent_key
.objectid
, parent_key
.type
, parent_key
.offset
,
1689 child_key
.objectid
, child_key
.type
, child_key
.offset
);
1691 if (btrfs_header_bytenr(child
) != btrfs_node_blockptr(parent
, slot
)) {
1693 fprintf(stderr
, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1694 btrfs_node_blockptr(parent
, slot
),
1695 btrfs_header_bytenr(child
));
1697 if (btrfs_node_ptr_generation(parent
, slot
) !=
1698 btrfs_header_generation(child
)) {
1700 fprintf(stderr
, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1701 btrfs_header_generation(child
),
1702 btrfs_node_ptr_generation(parent
, slot
));
1707 static int walk_down_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1708 struct walk_control
*wc
, int *level
)
1710 enum btrfs_tree_block_status status
;
1713 struct extent_buffer
*next
;
1714 struct extent_buffer
*cur
;
1719 WARN_ON(*level
< 0);
1720 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1721 ret
= btrfs_lookup_extent_info(NULL
, root
,
1722 path
->nodes
[*level
]->start
,
1723 *level
, 1, &refs
, NULL
);
1730 ret
= enter_shared_node(root
, path
->nodes
[*level
]->start
,
1738 while (*level
>= 0) {
1739 WARN_ON(*level
< 0);
1740 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1741 cur
= path
->nodes
[*level
];
1743 if (btrfs_header_level(cur
) != *level
)
1746 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
))
1749 ret
= process_one_leaf(root
, cur
, wc
);
1754 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1755 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
1756 blocksize
= btrfs_level_size(root
, *level
- 1);
1757 ret
= btrfs_lookup_extent_info(NULL
, root
, bytenr
, *level
- 1,
1763 ret
= enter_shared_node(root
, bytenr
, refs
,
1766 path
->slots
[*level
]++;
1771 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
1772 if (!next
|| !btrfs_buffer_uptodate(next
, ptr_gen
)) {
1773 free_extent_buffer(next
);
1774 reada_walk_down(root
, cur
, path
->slots
[*level
]);
1775 next
= read_tree_block(root
, bytenr
, blocksize
,
1777 if (!extent_buffer_uptodate(next
)) {
1778 struct btrfs_key node_key
;
1780 btrfs_node_key_to_cpu(path
->nodes
[*level
],
1782 path
->slots
[*level
]);
1783 btrfs_add_corrupt_extent_record(root
->fs_info
,
1785 path
->nodes
[*level
]->start
,
1786 root
->leafsize
, *level
);
1792 ret
= check_child_node(root
, cur
, path
->slots
[*level
], next
);
1798 if (btrfs_is_leaf(next
))
1799 status
= btrfs_check_leaf(root
, NULL
, next
);
1801 status
= btrfs_check_node(root
, NULL
, next
);
1802 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
1803 free_extent_buffer(next
);
1808 *level
= *level
- 1;
1809 free_extent_buffer(path
->nodes
[*level
]);
1810 path
->nodes
[*level
] = next
;
1811 path
->slots
[*level
] = 0;
1814 path
->slots
[*level
] = btrfs_header_nritems(path
->nodes
[*level
]);
1818 static int walk_up_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1819 struct walk_control
*wc
, int *level
)
1822 struct extent_buffer
*leaf
;
1824 for (i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1825 leaf
= path
->nodes
[i
];
1826 if (path
->slots
[i
] + 1 < btrfs_header_nritems(leaf
)) {
1831 free_extent_buffer(path
->nodes
[*level
]);
1832 path
->nodes
[*level
] = NULL
;
1833 BUG_ON(*level
> wc
->active_node
);
1834 if (*level
== wc
->active_node
)
1835 leave_shared_node(root
, wc
, *level
);
1842 static int check_root_dir(struct inode_record
*rec
)
1844 struct inode_backref
*backref
;
1847 if (!rec
->found_inode_item
|| rec
->errors
)
1849 if (rec
->nlink
!= 1 || rec
->found_link
!= 0)
1851 if (list_empty(&rec
->backrefs
))
1853 backref
= list_entry(rec
->backrefs
.next
, struct inode_backref
, list
);
1854 if (!backref
->found_inode_ref
)
1856 if (backref
->index
!= 0 || backref
->namelen
!= 2 ||
1857 memcmp(backref
->name
, "..", 2))
1859 if (backref
->found_dir_index
|| backref
->found_dir_item
)
1866 static int repair_inode_isize(struct btrfs_trans_handle
*trans
,
1867 struct btrfs_root
*root
, struct btrfs_path
*path
,
1868 struct inode_record
*rec
)
1870 struct btrfs_inode_item
*ei
;
1871 struct btrfs_key key
;
1874 key
.objectid
= rec
->ino
;
1875 key
.type
= BTRFS_INODE_ITEM_KEY
;
1876 key
.offset
= (u64
)-1;
1878 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
1882 if (!path
->slots
[0]) {
1889 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1890 if (key
.objectid
!= rec
->ino
) {
1895 ei
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
1896 struct btrfs_inode_item
);
1897 btrfs_set_inode_size(path
->nodes
[0], ei
, rec
->found_size
);
1898 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1899 rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1900 printf("reset isize for dir %Lu root %Lu\n", rec
->ino
,
1901 root
->root_key
.objectid
);
1903 btrfs_release_path(path
);
1907 static int repair_inode_orphan_item(struct btrfs_trans_handle
*trans
,
1908 struct btrfs_root
*root
,
1909 struct btrfs_path
*path
,
1910 struct inode_record
*rec
)
1914 ret
= btrfs_add_orphan_item(trans
, root
, path
, rec
->ino
);
1915 btrfs_release_path(path
);
1917 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
1921 static int add_missing_dir_index(struct btrfs_root
*root
,
1922 struct cache_tree
*inode_cache
,
1923 struct inode_record
*rec
,
1924 struct inode_backref
*backref
)
1926 struct btrfs_path
*path
;
1927 struct btrfs_trans_handle
*trans
;
1928 struct btrfs_dir_item
*dir_item
;
1929 struct extent_buffer
*leaf
;
1930 struct btrfs_key key
;
1931 struct btrfs_disk_key disk_key
;
1932 struct inode_record
*dir_rec
;
1933 unsigned long name_ptr
;
1934 u32 data_size
= sizeof(*dir_item
) + backref
->namelen
;
1937 path
= btrfs_alloc_path();
1941 trans
= btrfs_start_transaction(root
, 1);
1942 if (IS_ERR(trans
)) {
1943 btrfs_free_path(path
);
1944 return PTR_ERR(trans
);
1947 fprintf(stderr
, "repairing missing dir index item for inode %llu\n",
1948 (unsigned long long)rec
->ino
);
1949 key
.objectid
= backref
->dir
;
1950 key
.type
= BTRFS_DIR_INDEX_KEY
;
1951 key
.offset
= backref
->index
;
1953 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, data_size
);
1956 leaf
= path
->nodes
[0];
1957 dir_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dir_item
);
1959 disk_key
.objectid
= cpu_to_le64(rec
->ino
);
1960 disk_key
.type
= BTRFS_INODE_ITEM_KEY
;
1961 disk_key
.offset
= 0;
1963 btrfs_set_dir_item_key(leaf
, dir_item
, &disk_key
);
1964 btrfs_set_dir_type(leaf
, dir_item
, imode_to_type(rec
->imode
));
1965 btrfs_set_dir_data_len(leaf
, dir_item
, 0);
1966 btrfs_set_dir_name_len(leaf
, dir_item
, backref
->namelen
);
1967 name_ptr
= (unsigned long)(dir_item
+ 1);
1968 write_extent_buffer(leaf
, backref
->name
, name_ptr
, backref
->namelen
);
1969 btrfs_mark_buffer_dirty(leaf
);
1970 btrfs_free_path(path
);
1971 btrfs_commit_transaction(trans
, root
);
1973 backref
->found_dir_index
= 1;
1974 dir_rec
= get_inode_rec(inode_cache
, backref
->dir
, 0);
1977 dir_rec
->found_size
+= backref
->namelen
;
1978 if (dir_rec
->found_size
== dir_rec
->isize
&&
1979 (dir_rec
->errors
& I_ERR_DIR_ISIZE_WRONG
))
1980 dir_rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1981 if (dir_rec
->found_size
!= dir_rec
->isize
)
1982 dir_rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
1987 static int delete_dir_index(struct btrfs_root
*root
,
1988 struct cache_tree
*inode_cache
,
1989 struct inode_record
*rec
,
1990 struct inode_backref
*backref
)
1992 struct btrfs_trans_handle
*trans
;
1993 struct btrfs_dir_item
*di
;
1994 struct btrfs_path
*path
;
1997 path
= btrfs_alloc_path();
2001 trans
= btrfs_start_transaction(root
, 1);
2002 if (IS_ERR(trans
)) {
2003 btrfs_free_path(path
);
2004 return PTR_ERR(trans
);
2008 fprintf(stderr
, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2009 (unsigned long long)backref
->dir
,
2010 BTRFS_DIR_INDEX_KEY
, (unsigned long long)backref
->index
,
2011 (unsigned long long)root
->objectid
);
2013 di
= btrfs_lookup_dir_index(trans
, root
, path
, backref
->dir
,
2014 backref
->name
, backref
->namelen
,
2015 backref
->index
, -1);
2018 btrfs_free_path(path
);
2019 btrfs_commit_transaction(trans
, root
);
2026 ret
= btrfs_del_item(trans
, root
, path
);
2028 ret
= btrfs_delete_one_dir_name(trans
, root
, path
, di
);
2030 btrfs_free_path(path
);
2031 btrfs_commit_transaction(trans
, root
);
2035 static int create_inode_item(struct btrfs_root
*root
,
2036 struct inode_record
*rec
,
2037 struct inode_backref
*backref
, int root_dir
)
2039 struct btrfs_trans_handle
*trans
;
2040 struct btrfs_inode_item inode_item
;
2041 time_t now
= time(NULL
);
2044 trans
= btrfs_start_transaction(root
, 1);
2045 if (IS_ERR(trans
)) {
2046 ret
= PTR_ERR(trans
);
2050 fprintf(stderr
, "root %llu inode %llu recreating inode item, this may "
2051 "be incomplete, please check permissions and content after "
2052 "the fsck completes.\n", (unsigned long long)root
->objectid
,
2053 (unsigned long long)rec
->ino
);
2055 memset(&inode_item
, 0, sizeof(inode_item
));
2056 btrfs_set_stack_inode_generation(&inode_item
, trans
->transid
);
2058 btrfs_set_stack_inode_nlink(&inode_item
, 1);
2060 btrfs_set_stack_inode_nlink(&inode_item
, rec
->found_link
);
2061 btrfs_set_stack_inode_nbytes(&inode_item
, rec
->found_size
);
2062 if (rec
->found_dir_item
) {
2063 if (rec
->found_file_extent
)
2064 fprintf(stderr
, "root %llu inode %llu has both a dir "
2065 "item and extents, unsure if it is a dir or a "
2066 "regular file so setting it as a directory\n",
2067 (unsigned long long)root
->objectid
,
2068 (unsigned long long)rec
->ino
);
2069 btrfs_set_stack_inode_mode(&inode_item
, S_IFDIR
| 0755);
2070 btrfs_set_stack_inode_size(&inode_item
, rec
->found_size
);
2071 } else if (!rec
->found_dir_item
) {
2072 btrfs_set_stack_inode_size(&inode_item
, rec
->extent_end
);
2073 btrfs_set_stack_inode_mode(&inode_item
, S_IFREG
| 0755);
2075 btrfs_set_stack_timespec_sec(&inode_item
.atime
, now
);
2076 btrfs_set_stack_timespec_nsec(&inode_item
.atime
, 0);
2077 btrfs_set_stack_timespec_sec(&inode_item
.ctime
, now
);
2078 btrfs_set_stack_timespec_nsec(&inode_item
.ctime
, 0);
2079 btrfs_set_stack_timespec_sec(&inode_item
.mtime
, now
);
2080 btrfs_set_stack_timespec_nsec(&inode_item
.mtime
, 0);
2081 btrfs_set_stack_timespec_sec(&inode_item
.otime
, 0);
2082 btrfs_set_stack_timespec_nsec(&inode_item
.otime
, 0);
2084 ret
= btrfs_insert_inode(trans
, root
, rec
->ino
, &inode_item
);
2086 btrfs_commit_transaction(trans
, root
);
2090 static int repair_inode_backrefs(struct btrfs_root
*root
,
2091 struct inode_record
*rec
,
2092 struct cache_tree
*inode_cache
,
2095 struct inode_backref
*tmp
, *backref
;
2096 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
2100 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
2101 if (!delete && rec
->ino
== root_dirid
) {
2102 if (!rec
->found_inode_item
) {
2103 ret
= create_inode_item(root
, rec
, backref
, 1);
2110 /* Index 0 for root dir's are special, don't mess with it */
2111 if (rec
->ino
== root_dirid
&& backref
->index
== 0)
2115 ((backref
->found_dir_index
&& !backref
->found_inode_ref
) ||
2116 (backref
->found_dir_index
&& backref
->found_inode_ref
&&
2117 (backref
->errors
& REF_ERR_INDEX_UNMATCH
)))) {
2118 ret
= delete_dir_index(root
, inode_cache
, rec
, backref
);
2122 list_del(&backref
->list
);
2126 if (!delete && !backref
->found_dir_index
&&
2127 backref
->found_dir_item
&& backref
->found_inode_ref
) {
2128 ret
= add_missing_dir_index(root
, inode_cache
, rec
,
2133 if (backref
->found_dir_item
&&
2134 backref
->found_dir_index
&&
2135 backref
->found_dir_index
) {
2136 if (!backref
->errors
&&
2137 backref
->found_inode_ref
) {
2138 list_del(&backref
->list
);
2144 if (!delete && (!backref
->found_dir_index
&&
2145 !backref
->found_dir_item
&&
2146 backref
->found_inode_ref
)) {
2147 struct btrfs_trans_handle
*trans
;
2148 struct btrfs_key location
;
2150 ret
= check_dir_conflict(root
, backref
->name
,
2156 * let nlink fixing routine to handle it,
2157 * which can do it better.
2162 location
.objectid
= rec
->ino
;
2163 location
.type
= BTRFS_INODE_ITEM_KEY
;
2164 location
.offset
= 0;
2166 trans
= btrfs_start_transaction(root
, 1);
2167 if (IS_ERR(trans
)) {
2168 ret
= PTR_ERR(trans
);
2171 fprintf(stderr
, "adding missing dir index/item pair "
2173 (unsigned long long)rec
->ino
);
2174 ret
= btrfs_insert_dir_item(trans
, root
, backref
->name
,
2176 backref
->dir
, &location
,
2177 imode_to_type(rec
->imode
),
2180 btrfs_commit_transaction(trans
, root
);
2184 if (!delete && (backref
->found_inode_ref
&&
2185 backref
->found_dir_index
&&
2186 backref
->found_dir_item
&&
2187 !(backref
->errors
& REF_ERR_INDEX_UNMATCH
) &&
2188 !rec
->found_inode_item
)) {
2189 ret
= create_inode_item(root
, rec
, backref
, 0);
2196 return ret
? ret
: repaired
;
2200 * To determine the file type for nlink/inode_item repair
2202 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2203 * Return -ENOENT if file type is not found.
2205 static int find_file_type(struct inode_record
*rec
, u8
*type
)
2207 struct inode_backref
*backref
;
2209 /* For inode item recovered case */
2210 if (rec
->found_inode_item
) {
2211 *type
= imode_to_type(rec
->imode
);
2215 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2216 if (backref
->found_dir_index
|| backref
->found_dir_item
) {
2217 *type
= backref
->filetype
;
2225 * To determine the file name for nlink repair
2227 * Return 0 if file name is found, set name and namelen.
2228 * Return -ENOENT if file name is not found.
2230 static int find_file_name(struct inode_record
*rec
,
2231 char *name
, int *namelen
)
2233 struct inode_backref
*backref
;
2235 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2236 if (backref
->found_dir_index
|| backref
->found_dir_item
||
2237 backref
->found_inode_ref
) {
2238 memcpy(name
, backref
->name
, backref
->namelen
);
2239 *namelen
= backref
->namelen
;
2246 /* Reset the nlink of the inode to the correct one */
2247 static int reset_nlink(struct btrfs_trans_handle
*trans
,
2248 struct btrfs_root
*root
,
2249 struct btrfs_path
*path
,
2250 struct inode_record
*rec
)
2252 struct inode_backref
*backref
;
2253 struct inode_backref
*tmp
;
2254 struct btrfs_key key
;
2255 struct btrfs_inode_item
*inode_item
;
2258 /* We don't believe this either, reset it and iterate backref */
2259 rec
->found_link
= 0;
2261 /* Remove all backref including the valid ones */
2262 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
2263 ret
= btrfs_unlink(trans
, root
, rec
->ino
, backref
->dir
,
2264 backref
->index
, backref
->name
,
2265 backref
->namelen
, 0);
2269 /* remove invalid backref, so it won't be added back */
2270 if (!(backref
->found_dir_index
&&
2271 backref
->found_dir_item
&&
2272 backref
->found_inode_ref
)) {
2273 list_del(&backref
->list
);
2280 /* Set nlink to 0 */
2281 key
.objectid
= rec
->ino
;
2282 key
.type
= BTRFS_INODE_ITEM_KEY
;
2284 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
2291 inode_item
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2292 struct btrfs_inode_item
);
2293 btrfs_set_inode_nlink(path
->nodes
[0], inode_item
, 0);
2294 btrfs_mark_buffer_dirty(path
->nodes
[0]);
2295 btrfs_release_path(path
);
2298 * Add back valid inode_ref/dir_item/dir_index,
2299 * add_link() will handle the nlink inc, so new nlink must be correct
2301 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2302 ret
= btrfs_add_link(trans
, root
, rec
->ino
, backref
->dir
,
2303 backref
->name
, backref
->namelen
,
2304 backref
->ref_type
, &backref
->index
, 1);
2309 btrfs_release_path(path
);
2313 static int repair_inode_nlinks(struct btrfs_trans_handle
*trans
,
2314 struct btrfs_root
*root
,
2315 struct btrfs_path
*path
,
2316 struct inode_record
*rec
)
2318 char *dir_name
= "lost+found";
2319 char namebuf
[BTRFS_NAME_LEN
] = {0};
2324 int name_recovered
= 0;
2325 int type_recovered
= 0;
2329 * Get file name and type first before these invalid inode ref
2330 * are deleted by remove_all_invalid_backref()
2332 name_recovered
= !find_file_name(rec
, namebuf
, &namelen
);
2333 type_recovered
= !find_file_type(rec
, &type
);
2335 if (!name_recovered
) {
2336 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2337 rec
->ino
, rec
->ino
);
2338 namelen
= count_digits(rec
->ino
);
2339 sprintf(namebuf
, "%llu", rec
->ino
);
2342 if (!type_recovered
) {
2343 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2345 type
= BTRFS_FT_REG_FILE
;
2349 ret
= reset_nlink(trans
, root
, path
, rec
);
2352 "Failed to reset nlink for inode %llu: %s\n",
2353 rec
->ino
, strerror(-ret
));
2357 if (rec
->found_link
== 0) {
2358 lost_found_ino
= root
->highest_inode
;
2359 if (lost_found_ino
>= BTRFS_LAST_FREE_OBJECTID
) {
2364 ret
= btrfs_mkdir(trans
, root
, dir_name
, strlen(dir_name
),
2365 BTRFS_FIRST_FREE_OBJECTID
, &lost_found_ino
,
2368 fprintf(stderr
, "Failed to create '%s' dir: %s",
2369 dir_name
, strerror(-ret
));
2372 ret
= btrfs_add_link(trans
, root
, rec
->ino
, lost_found_ino
,
2373 namebuf
, namelen
, type
, NULL
, 1);
2375 * Add ".INO" suffix several times to handle case where
2376 * "FILENAME.INO" is already taken by another file.
2378 while (ret
== -EEXIST
) {
2380 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2382 if (namelen
+ count_digits(rec
->ino
) + 1 >
2387 snprintf(namebuf
+ namelen
, BTRFS_NAME_LEN
- namelen
,
2389 namelen
+= count_digits(rec
->ino
) + 1;
2390 ret
= btrfs_add_link(trans
, root
, rec
->ino
,
2391 lost_found_ino
, namebuf
,
2392 namelen
, type
, NULL
, 1);
2396 "Failed to link the inode %llu to %s dir: %s",
2397 rec
->ino
, dir_name
, strerror(-ret
));
2401 * Just increase the found_link, don't actually add the
2402 * backref. This will make things easier and this inode
2403 * record will be freed after the repair is done.
2404 * So fsck will not report problem about this inode.
2407 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2408 namelen
, namebuf
, dir_name
);
2410 printf("Fixed the nlink of inode %llu\n", rec
->ino
);
2413 * Clear the flag anyway, or we will loop forever for the same inode
2414 * as it will not be removed from the bad inode list and the dead loop
2417 rec
->errors
&= ~I_ERR_LINK_COUNT_WRONG
;
2418 btrfs_release_path(path
);
2423 * Check if there is any normal(reg or prealloc) file extent for given
2425 * This is used to determine the file type when neither its dir_index/item or
2426 * inode_item exists.
2428 * This will *NOT* report error, if any error happens, just consider it does
2429 * not have any normal file extent.
2431 static int find_normal_file_extent(struct btrfs_root
*root
, u64 ino
)
2433 struct btrfs_path
*path
;
2434 struct btrfs_key key
;
2435 struct btrfs_key found_key
;
2436 struct btrfs_file_extent_item
*fi
;
2440 path
= btrfs_alloc_path();
2444 key
.type
= BTRFS_EXTENT_DATA_KEY
;
2447 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
2452 if (ret
&& path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
2453 ret
= btrfs_next_leaf(root
, path
);
2460 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
2462 if (found_key
.objectid
!= ino
||
2463 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
)
2465 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2466 struct btrfs_file_extent_item
);
2467 type
= btrfs_file_extent_type(path
->nodes
[0], fi
);
2468 if (type
!= BTRFS_FILE_EXTENT_INLINE
) {
2474 btrfs_free_path(path
);
2478 static u32
btrfs_type_to_imode(u8 type
)
2480 static u32 imode_by_btrfs_type
[] = {
2481 [BTRFS_FT_REG_FILE
] = S_IFREG
,
2482 [BTRFS_FT_DIR
] = S_IFDIR
,
2483 [BTRFS_FT_CHRDEV
] = S_IFCHR
,
2484 [BTRFS_FT_BLKDEV
] = S_IFBLK
,
2485 [BTRFS_FT_FIFO
] = S_IFIFO
,
2486 [BTRFS_FT_SOCK
] = S_IFSOCK
,
2487 [BTRFS_FT_SYMLINK
] = S_IFLNK
,
2490 return imode_by_btrfs_type
[(type
)];
2493 static int repair_inode_no_item(struct btrfs_trans_handle
*trans
,
2494 struct btrfs_root
*root
,
2495 struct btrfs_path
*path
,
2496 struct inode_record
*rec
)
2500 int type_recovered
= 0;
2503 printf("Trying to rebuild inode:%llu\n", rec
->ino
);
2505 type_recovered
= !find_file_type(rec
, &filetype
);
2508 * Try to determine inode type if type not found.
2510 * For found regular file extent, it must be FILE.
2511 * For found dir_item/index, it must be DIR.
2513 * For undetermined one, use FILE as fallback.
2516 * 1. If found backref(inode_index/item is already handled) to it,
2518 * Need new inode-inode ref structure to allow search for that.
2520 if (!type_recovered
) {
2521 if (rec
->found_file_extent
&&
2522 find_normal_file_extent(root
, rec
->ino
)) {
2524 filetype
= BTRFS_FT_REG_FILE
;
2525 } else if (rec
->found_dir_item
) {
2527 filetype
= BTRFS_FT_DIR
;
2528 } else if (!list_empty(&rec
->orphan_extents
)) {
2530 filetype
= BTRFS_FT_REG_FILE
;
2532 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2535 filetype
= BTRFS_FT_REG_FILE
;
2539 ret
= btrfs_new_inode(trans
, root
, rec
->ino
,
2540 mode
| btrfs_type_to_imode(filetype
));
2545 * Here inode rebuild is done, we only rebuild the inode item,
2546 * don't repair the nlink(like move to lost+found).
2547 * That is the job of nlink repair.
2549 * We just fill the record and return
2551 rec
->found_dir_item
= 1;
2552 rec
->imode
= mode
| btrfs_type_to_imode(filetype
);
2554 rec
->errors
&= ~I_ERR_NO_INODE_ITEM
;
2555 /* Ensure the inode_nlinks repair function will be called */
2556 rec
->errors
|= I_ERR_LINK_COUNT_WRONG
;
2561 static int repair_inode_orphan_extent(struct btrfs_trans_handle
*trans
,
2562 struct btrfs_root
*root
,
2563 struct btrfs_path
*path
,
2564 struct inode_record
*rec
)
2566 struct orphan_data_extent
*orphan
;
2567 struct orphan_data_extent
*tmp
;
2570 list_for_each_entry_safe(orphan
, tmp
, &rec
->orphan_extents
, list
) {
2572 * Check for conflicting file extents
2574 * Here we don't know whether the extents is compressed or not,
2575 * so we can only assume it not compressed nor data offset,
2576 * and use its disk_len as extent length.
2578 ret
= btrfs_get_extent(NULL
, root
, path
, orphan
->objectid
,
2579 orphan
->offset
, orphan
->disk_len
, 0);
2580 btrfs_release_path(path
);
2585 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2586 orphan
->disk_bytenr
, orphan
->disk_len
);
2587 ret
= btrfs_free_extent(trans
,
2588 root
->fs_info
->extent_root
,
2589 orphan
->disk_bytenr
, orphan
->disk_len
,
2590 0, root
->objectid
, orphan
->objectid
,
2595 ret
= btrfs_insert_file_extent(trans
, root
, orphan
->objectid
,
2596 orphan
->offset
, orphan
->disk_bytenr
,
2597 orphan
->disk_len
, orphan
->disk_len
);
2601 /* Update file size info */
2602 rec
->found_size
+= orphan
->disk_len
;
2603 if (rec
->found_size
== rec
->nbytes
)
2604 rec
->errors
&= ~I_ERR_FILE_NBYTES_WRONG
;
2606 /* Update the file extent hole info too */
2607 ret
= del_file_extent_hole(&rec
->holes
, orphan
->offset
,
2611 if (RB_EMPTY_ROOT(&rec
->holes
))
2612 rec
->errors
&= ~I_ERR_FILE_EXTENT_DISCOUNT
;
2614 list_del(&orphan
->list
);
2617 rec
->errors
&= ~I_ERR_FILE_EXTENT_ORPHAN
;
2622 static int repair_inode_discount_extent(struct btrfs_trans_handle
*trans
,
2623 struct btrfs_root
*root
,
2624 struct btrfs_path
*path
,
2625 struct inode_record
*rec
)
2627 struct rb_node
*node
;
2628 struct file_extent_hole
*hole
;
2631 node
= rb_first(&rec
->holes
);
2634 hole
= rb_entry(node
, struct file_extent_hole
, node
);
2635 ret
= btrfs_punch_hole(trans
, root
, rec
->ino
,
2636 hole
->start
, hole
->len
);
2639 ret
= del_file_extent_hole(&rec
->holes
, hole
->start
,
2643 if (RB_EMPTY_ROOT(&rec
->holes
))
2644 rec
->errors
&= ~I_ERR_FILE_EXTENT_DISCOUNT
;
2645 node
= rb_first(&rec
->holes
);
2647 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2648 rec
->ino
, root
->objectid
);
2653 static int try_repair_inode(struct btrfs_root
*root
, struct inode_record
*rec
)
2655 struct btrfs_trans_handle
*trans
;
2656 struct btrfs_path
*path
;
2659 if (!(rec
->errors
& (I_ERR_DIR_ISIZE_WRONG
|
2660 I_ERR_NO_ORPHAN_ITEM
|
2661 I_ERR_LINK_COUNT_WRONG
|
2662 I_ERR_NO_INODE_ITEM
|
2663 I_ERR_FILE_EXTENT_ORPHAN
|
2664 I_ERR_FILE_EXTENT_DISCOUNT
)))
2667 path
= btrfs_alloc_path();
2672 * For nlink repair, it may create a dir and add link, so
2673 * 2 for parent(256)'s dir_index and dir_item
2674 * 2 for lost+found dir's inode_item and inode_ref
2675 * 1 for the new inode_ref of the file
2676 * 2 for lost+found dir's dir_index and dir_item for the file
2678 trans
= btrfs_start_transaction(root
, 7);
2679 if (IS_ERR(trans
)) {
2680 btrfs_free_path(path
);
2681 return PTR_ERR(trans
);
2684 if (rec
->errors
& I_ERR_NO_INODE_ITEM
)
2685 ret
= repair_inode_no_item(trans
, root
, path
, rec
);
2686 if (!ret
&& rec
->errors
& I_ERR_FILE_EXTENT_ORPHAN
)
2687 ret
= repair_inode_orphan_extent(trans
, root
, path
, rec
);
2688 if (!ret
&& rec
->errors
& I_ERR_FILE_EXTENT_DISCOUNT
)
2689 ret
= repair_inode_discount_extent(trans
, root
, path
, rec
);
2690 if (!ret
&& rec
->errors
& I_ERR_DIR_ISIZE_WRONG
)
2691 ret
= repair_inode_isize(trans
, root
, path
, rec
);
2692 if (!ret
&& rec
->errors
& I_ERR_NO_ORPHAN_ITEM
)
2693 ret
= repair_inode_orphan_item(trans
, root
, path
, rec
);
2694 if (!ret
&& rec
->errors
& I_ERR_LINK_COUNT_WRONG
)
2695 ret
= repair_inode_nlinks(trans
, root
, path
, rec
);
2696 btrfs_commit_transaction(trans
, root
);
2697 btrfs_free_path(path
);
2701 static int check_inode_recs(struct btrfs_root
*root
,
2702 struct cache_tree
*inode_cache
)
2704 struct cache_extent
*cache
;
2705 struct ptr_node
*node
;
2706 struct inode_record
*rec
;
2707 struct inode_backref
*backref
;
2712 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
2714 if (btrfs_root_refs(&root
->root_item
) == 0) {
2715 if (!cache_tree_empty(inode_cache
))
2716 fprintf(stderr
, "warning line %d\n", __LINE__
);
2721 * We need to record the highest inode number for later 'lost+found'
2723 * We must select a ino not used/refered by any existing inode, or
2724 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2725 * this may cause 'lost+found' dir has wrong nlinks.
2727 cache
= last_cache_extent(inode_cache
);
2729 node
= container_of(cache
, struct ptr_node
, cache
);
2731 if (rec
->ino
> root
->highest_inode
)
2732 root
->highest_inode
= rec
->ino
;
2736 * We need to repair backrefs first because we could change some of the
2737 * errors in the inode recs.
2739 * We also need to go through and delete invalid backrefs first and then
2740 * add the correct ones second. We do this because we may get EEXIST
2741 * when adding back the correct index because we hadn't yet deleted the
2744 * For example, if we were missing a dir index then the directories
2745 * isize would be wrong, so if we fixed the isize to what we thought it
2746 * would be and then fixed the backref we'd still have a invalid fs, so
2747 * we need to add back the dir index and then check to see if the isize
2752 if (stage
== 3 && !err
)
2755 cache
= search_cache_extent(inode_cache
, 0);
2756 while (repair
&& cache
) {
2757 node
= container_of(cache
, struct ptr_node
, cache
);
2759 cache
= next_cache_extent(cache
);
2761 /* Need to free everything up and rescan */
2763 remove_cache_extent(inode_cache
, &node
->cache
);
2765 free_inode_rec(rec
);
2769 if (list_empty(&rec
->backrefs
))
2772 ret
= repair_inode_backrefs(root
, rec
, inode_cache
,
2786 rec
= get_inode_rec(inode_cache
, root_dirid
, 0);
2788 ret
= check_root_dir(rec
);
2790 fprintf(stderr
, "root %llu root dir %llu error\n",
2791 (unsigned long long)root
->root_key
.objectid
,
2792 (unsigned long long)root_dirid
);
2793 print_inode_error(root
, rec
);
2798 struct btrfs_trans_handle
*trans
;
2800 trans
= btrfs_start_transaction(root
, 1);
2801 if (IS_ERR(trans
)) {
2802 err
= PTR_ERR(trans
);
2807 "root %llu missing its root dir, recreating\n",
2808 (unsigned long long)root
->objectid
);
2810 ret
= btrfs_make_root_dir(trans
, root
, root_dirid
);
2813 btrfs_commit_transaction(trans
, root
);
2817 fprintf(stderr
, "root %llu root dir %llu not found\n",
2818 (unsigned long long)root
->root_key
.objectid
,
2819 (unsigned long long)root_dirid
);
2823 cache
= search_cache_extent(inode_cache
, 0);
2826 node
= container_of(cache
, struct ptr_node
, cache
);
2828 remove_cache_extent(inode_cache
, &node
->cache
);
2830 if (rec
->ino
== root_dirid
||
2831 rec
->ino
== BTRFS_ORPHAN_OBJECTID
) {
2832 free_inode_rec(rec
);
2836 if (rec
->errors
& I_ERR_NO_ORPHAN_ITEM
) {
2837 ret
= check_orphan_item(root
, rec
->ino
);
2839 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
2840 if (can_free_inode_rec(rec
)) {
2841 free_inode_rec(rec
);
2846 if (!rec
->found_inode_item
)
2847 rec
->errors
|= I_ERR_NO_INODE_ITEM
;
2848 if (rec
->found_link
!= rec
->nlink
)
2849 rec
->errors
|= I_ERR_LINK_COUNT_WRONG
;
2851 ret
= try_repair_inode(root
, rec
);
2852 if (ret
== 0 && can_free_inode_rec(rec
)) {
2853 free_inode_rec(rec
);
2859 if (!(repair
&& ret
== 0))
2861 print_inode_error(root
, rec
);
2862 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2863 if (!backref
->found_dir_item
)
2864 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
2865 if (!backref
->found_dir_index
)
2866 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
2867 if (!backref
->found_inode_ref
)
2868 backref
->errors
|= REF_ERR_NO_INODE_REF
;
2869 fprintf(stderr
, "\tunresolved ref dir %llu index %llu"
2870 " namelen %u name %s filetype %d errors %x",
2871 (unsigned long long)backref
->dir
,
2872 (unsigned long long)backref
->index
,
2873 backref
->namelen
, backref
->name
,
2874 backref
->filetype
, backref
->errors
);
2875 print_ref_error(backref
->errors
);
2877 free_inode_rec(rec
);
2879 return (error
> 0) ? -1 : 0;
2882 static struct root_record
*get_root_rec(struct cache_tree
*root_cache
,
2885 struct cache_extent
*cache
;
2886 struct root_record
*rec
= NULL
;
2889 cache
= lookup_cache_extent(root_cache
, objectid
, 1);
2891 rec
= container_of(cache
, struct root_record
, cache
);
2893 rec
= calloc(1, sizeof(*rec
));
2894 rec
->objectid
= objectid
;
2895 INIT_LIST_HEAD(&rec
->backrefs
);
2896 rec
->cache
.start
= objectid
;
2897 rec
->cache
.size
= 1;
2899 ret
= insert_cache_extent(root_cache
, &rec
->cache
);
2905 static struct root_backref
*get_root_backref(struct root_record
*rec
,
2906 u64 ref_root
, u64 dir
, u64 index
,
2907 const char *name
, int namelen
)
2909 struct root_backref
*backref
;
2911 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2912 if (backref
->ref_root
!= ref_root
|| backref
->dir
!= dir
||
2913 backref
->namelen
!= namelen
)
2915 if (memcmp(name
, backref
->name
, namelen
))
2920 backref
= malloc(sizeof(*backref
) + namelen
+ 1);
2921 memset(backref
, 0, sizeof(*backref
));
2922 backref
->ref_root
= ref_root
;
2924 backref
->index
= index
;
2925 backref
->namelen
= namelen
;
2926 memcpy(backref
->name
, name
, namelen
);
2927 backref
->name
[namelen
] = '\0';
2928 list_add_tail(&backref
->list
, &rec
->backrefs
);
2932 static void free_root_record(struct cache_extent
*cache
)
2934 struct root_record
*rec
;
2935 struct root_backref
*backref
;
2937 rec
= container_of(cache
, struct root_record
, cache
);
2938 while (!list_empty(&rec
->backrefs
)) {
2939 backref
= list_entry(rec
->backrefs
.next
,
2940 struct root_backref
, list
);
2941 list_del(&backref
->list
);
2948 FREE_EXTENT_CACHE_BASED_TREE(root_recs
, free_root_record
);
2950 static int add_root_backref(struct cache_tree
*root_cache
,
2951 u64 root_id
, u64 ref_root
, u64 dir
, u64 index
,
2952 const char *name
, int namelen
,
2953 int item_type
, int errors
)
2955 struct root_record
*rec
;
2956 struct root_backref
*backref
;
2958 rec
= get_root_rec(root_cache
, root_id
);
2959 backref
= get_root_backref(rec
, ref_root
, dir
, index
, name
, namelen
);
2961 backref
->errors
|= errors
;
2963 if (item_type
!= BTRFS_DIR_ITEM_KEY
) {
2964 if (backref
->found_dir_index
|| backref
->found_back_ref
||
2965 backref
->found_forward_ref
) {
2966 if (backref
->index
!= index
)
2967 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
2969 backref
->index
= index
;
2973 if (item_type
== BTRFS_DIR_ITEM_KEY
) {
2974 if (backref
->found_forward_ref
)
2976 backref
->found_dir_item
= 1;
2977 } else if (item_type
== BTRFS_DIR_INDEX_KEY
) {
2978 backref
->found_dir_index
= 1;
2979 } else if (item_type
== BTRFS_ROOT_REF_KEY
) {
2980 if (backref
->found_forward_ref
)
2981 backref
->errors
|= REF_ERR_DUP_ROOT_REF
;
2982 else if (backref
->found_dir_item
)
2984 backref
->found_forward_ref
= 1;
2985 } else if (item_type
== BTRFS_ROOT_BACKREF_KEY
) {
2986 if (backref
->found_back_ref
)
2987 backref
->errors
|= REF_ERR_DUP_ROOT_BACKREF
;
2988 backref
->found_back_ref
= 1;
2993 if (backref
->found_forward_ref
&& backref
->found_dir_item
)
2994 backref
->reachable
= 1;
2998 static int merge_root_recs(struct btrfs_root
*root
,
2999 struct cache_tree
*src_cache
,
3000 struct cache_tree
*dst_cache
)
3002 struct cache_extent
*cache
;
3003 struct ptr_node
*node
;
3004 struct inode_record
*rec
;
3005 struct inode_backref
*backref
;
3008 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
3009 free_inode_recs_tree(src_cache
);
3014 cache
= search_cache_extent(src_cache
, 0);
3017 node
= container_of(cache
, struct ptr_node
, cache
);
3019 remove_cache_extent(src_cache
, &node
->cache
);
3022 ret
= is_child_root(root
, root
->objectid
, rec
->ino
);
3028 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3029 BUG_ON(backref
->found_inode_ref
);
3030 if (backref
->found_dir_item
)
3031 add_root_backref(dst_cache
, rec
->ino
,
3032 root
->root_key
.objectid
, backref
->dir
,
3033 backref
->index
, backref
->name
,
3034 backref
->namelen
, BTRFS_DIR_ITEM_KEY
,
3036 if (backref
->found_dir_index
)
3037 add_root_backref(dst_cache
, rec
->ino
,
3038 root
->root_key
.objectid
, backref
->dir
,
3039 backref
->index
, backref
->name
,
3040 backref
->namelen
, BTRFS_DIR_INDEX_KEY
,
3044 free_inode_rec(rec
);
3051 static int check_root_refs(struct btrfs_root
*root
,
3052 struct cache_tree
*root_cache
)
3054 struct root_record
*rec
;
3055 struct root_record
*ref_root
;
3056 struct root_backref
*backref
;
3057 struct cache_extent
*cache
;
3063 rec
= get_root_rec(root_cache
, BTRFS_FS_TREE_OBJECTID
);
3066 /* fixme: this can not detect circular references */
3069 cache
= search_cache_extent(root_cache
, 0);
3073 rec
= container_of(cache
, struct root_record
, cache
);
3074 cache
= next_cache_extent(cache
);
3076 if (rec
->found_ref
== 0)
3079 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3080 if (!backref
->reachable
)
3083 ref_root
= get_root_rec(root_cache
,
3085 if (ref_root
->found_ref
> 0)
3088 backref
->reachable
= 0;
3090 if (rec
->found_ref
== 0)
3096 cache
= search_cache_extent(root_cache
, 0);
3100 rec
= container_of(cache
, struct root_record
, cache
);
3101 cache
= next_cache_extent(cache
);
3103 if (rec
->found_ref
== 0 &&
3104 rec
->objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
3105 rec
->objectid
<= BTRFS_LAST_FREE_OBJECTID
) {
3106 ret
= check_orphan_item(root
->fs_info
->tree_root
,
3112 * If we don't have a root item then we likely just have
3113 * a dir item in a snapshot for this root but no actual
3114 * ref key or anything so it's meaningless.
3116 if (!rec
->found_root_item
)
3119 fprintf(stderr
, "fs tree %llu not referenced\n",
3120 (unsigned long long)rec
->objectid
);
3124 if (rec
->found_ref
> 0 && !rec
->found_root_item
)
3126 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3127 if (!backref
->found_dir_item
)
3128 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
3129 if (!backref
->found_dir_index
)
3130 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
3131 if (!backref
->found_back_ref
)
3132 backref
->errors
|= REF_ERR_NO_ROOT_BACKREF
;
3133 if (!backref
->found_forward_ref
)
3134 backref
->errors
|= REF_ERR_NO_ROOT_REF
;
3135 if (backref
->reachable
&& backref
->errors
)
3142 fprintf(stderr
, "fs tree %llu refs %u %s\n",
3143 (unsigned long long)rec
->objectid
, rec
->found_ref
,
3144 rec
->found_root_item
? "" : "not found");
3146 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3147 if (!backref
->reachable
)
3149 if (!backref
->errors
&& rec
->found_root_item
)
3151 fprintf(stderr
, "\tunresolved ref root %llu dir %llu"
3152 " index %llu namelen %u name %s errors %x\n",
3153 (unsigned long long)backref
->ref_root
,
3154 (unsigned long long)backref
->dir
,
3155 (unsigned long long)backref
->index
,
3156 backref
->namelen
, backref
->name
,
3158 print_ref_error(backref
->errors
);
3161 return errors
> 0 ? 1 : 0;
3164 static int process_root_ref(struct extent_buffer
*eb
, int slot
,
3165 struct btrfs_key
*key
,
3166 struct cache_tree
*root_cache
)
3172 struct btrfs_root_ref
*ref
;
3173 char namebuf
[BTRFS_NAME_LEN
];
3176 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_root_ref
);
3178 dirid
= btrfs_root_ref_dirid(eb
, ref
);
3179 index
= btrfs_root_ref_sequence(eb
, ref
);
3180 name_len
= btrfs_root_ref_name_len(eb
, ref
);
3182 if (name_len
<= BTRFS_NAME_LEN
) {
3186 len
= BTRFS_NAME_LEN
;
3187 error
= REF_ERR_NAME_TOO_LONG
;
3189 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
3191 if (key
->type
== BTRFS_ROOT_REF_KEY
) {
3192 add_root_backref(root_cache
, key
->offset
, key
->objectid
, dirid
,
3193 index
, namebuf
, len
, key
->type
, error
);
3195 add_root_backref(root_cache
, key
->objectid
, key
->offset
, dirid
,
3196 index
, namebuf
, len
, key
->type
, error
);
3201 static void free_corrupt_block(struct cache_extent
*cache
)
3203 struct btrfs_corrupt_block
*corrupt
;
3205 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
3209 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks
, free_corrupt_block
);
3212 * Repair the btree of the given root.
3214 * The fix is to remove the node key in corrupt_blocks cache_tree.
3215 * and rebalance the tree.
3216 * After the fix, the btree should be writeable.
3218 static int repair_btree(struct btrfs_root
*root
,
3219 struct cache_tree
*corrupt_blocks
)
3221 struct btrfs_trans_handle
*trans
;
3222 struct btrfs_path
*path
;
3223 struct btrfs_corrupt_block
*corrupt
;
3224 struct cache_extent
*cache
;
3225 struct btrfs_key key
;
3230 if (cache_tree_empty(corrupt_blocks
))
3233 path
= btrfs_alloc_path();
3237 trans
= btrfs_start_transaction(root
, 1);
3238 if (IS_ERR(trans
)) {
3239 ret
= PTR_ERR(trans
);
3240 fprintf(stderr
, "Error starting transaction: %s\n",
3244 cache
= first_cache_extent(corrupt_blocks
);
3246 corrupt
= container_of(cache
, struct btrfs_corrupt_block
,
3248 level
= corrupt
->level
;
3249 path
->lowest_level
= level
;
3250 key
.objectid
= corrupt
->key
.objectid
;
3251 key
.type
= corrupt
->key
.type
;
3252 key
.offset
= corrupt
->key
.offset
;
3255 * Here we don't want to do any tree balance, since it may
3256 * cause a balance with corrupted brother leaf/node,
3257 * so ins_len set to 0 here.
3258 * Balance will be done after all corrupt node/leaf is deleted.
3260 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
3263 offset
= btrfs_node_blockptr(path
->nodes
[level
],
3264 path
->slots
[level
]);
3266 /* Remove the ptr */
3267 ret
= btrfs_del_ptr(trans
, root
, path
, level
,
3268 path
->slots
[level
]);
3272 * Remove the corresponding extent
3273 * return value is not concerned.
3275 btrfs_release_path(path
);
3276 ret
= btrfs_free_extent(trans
, root
, offset
, root
->nodesize
,
3277 0, root
->root_key
.objectid
,
3279 cache
= next_cache_extent(cache
);
3282 /* Balance the btree using btrfs_search_slot() */
3283 cache
= first_cache_extent(corrupt_blocks
);
3285 corrupt
= container_of(cache
, struct btrfs_corrupt_block
,
3287 memcpy(&key
, &corrupt
->key
, sizeof(key
));
3288 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
3291 /* return will always >0 since it won't find the item */
3293 btrfs_release_path(path
);
3294 cache
= next_cache_extent(cache
);
3297 btrfs_commit_transaction(trans
, root
);
3299 btrfs_free_path(path
);
3303 static int check_fs_root(struct btrfs_root
*root
,
3304 struct cache_tree
*root_cache
,
3305 struct walk_control
*wc
)
3311 struct btrfs_path path
;
3312 struct shared_node root_node
;
3313 struct root_record
*rec
;
3314 struct btrfs_root_item
*root_item
= &root
->root_item
;
3315 struct cache_tree corrupt_blocks
;
3316 struct orphan_data_extent
*orphan
;
3317 struct orphan_data_extent
*tmp
;
3318 enum btrfs_tree_block_status status
;
3321 * Reuse the corrupt_block cache tree to record corrupted tree block
3323 * Unlike the usage in extent tree check, here we do it in a per
3324 * fs/subvol tree base.
3326 cache_tree_init(&corrupt_blocks
);
3327 root
->fs_info
->corrupt_blocks
= &corrupt_blocks
;
3329 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
3330 rec
= get_root_rec(root_cache
, root
->root_key
.objectid
);
3331 if (btrfs_root_refs(root_item
) > 0)
3332 rec
->found_root_item
= 1;
3335 btrfs_init_path(&path
);
3336 memset(&root_node
, 0, sizeof(root_node
));
3337 cache_tree_init(&root_node
.root_cache
);
3338 cache_tree_init(&root_node
.inode_cache
);
3340 /* Move the orphan extent record to corresponding inode_record */
3341 list_for_each_entry_safe(orphan
, tmp
,
3342 &root
->orphan_data_extents
, list
) {
3343 struct inode_record
*inode
;
3345 inode
= get_inode_rec(&root_node
.inode_cache
, orphan
->objectid
,
3347 inode
->errors
|= I_ERR_FILE_EXTENT_ORPHAN
;
3348 list_move(&orphan
->list
, &inode
->orphan_extents
);
3351 level
= btrfs_header_level(root
->node
);
3352 memset(wc
->nodes
, 0, sizeof(wc
->nodes
));
3353 wc
->nodes
[level
] = &root_node
;
3354 wc
->active_node
= level
;
3355 wc
->root_level
= level
;
3357 /* We may not have checked the root block, lets do that now */
3358 if (btrfs_is_leaf(root
->node
))
3359 status
= btrfs_check_leaf(root
, NULL
, root
->node
);
3361 status
= btrfs_check_node(root
, NULL
, root
->node
);
3362 if (status
!= BTRFS_TREE_BLOCK_CLEAN
)
3365 if (btrfs_root_refs(root_item
) > 0 ||
3366 btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
3367 path
.nodes
[level
] = root
->node
;
3368 extent_buffer_get(root
->node
);
3369 path
.slots
[level
] = 0;
3371 struct btrfs_key key
;
3372 struct btrfs_disk_key found_key
;
3374 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
3375 level
= root_item
->drop_level
;
3376 path
.lowest_level
= level
;
3377 wret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
3380 btrfs_node_key(path
.nodes
[level
], &found_key
,
3382 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
3383 sizeof(found_key
)));
3387 wret
= walk_down_tree(root
, &path
, wc
, &level
);
3393 wret
= walk_up_tree(root
, &path
, wc
, &level
);
3400 btrfs_release_path(&path
);
3402 if (!cache_tree_empty(&corrupt_blocks
)) {
3403 struct cache_extent
*cache
;
3404 struct btrfs_corrupt_block
*corrupt
;
3406 printf("The following tree block(s) is corrupted in tree %llu:\n",
3407 root
->root_key
.objectid
);
3408 cache
= first_cache_extent(&corrupt_blocks
);
3410 corrupt
= container_of(cache
,
3411 struct btrfs_corrupt_block
,
3413 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3414 cache
->start
, corrupt
->level
,
3415 corrupt
->key
.objectid
, corrupt
->key
.type
,
3416 corrupt
->key
.offset
);
3417 cache
= next_cache_extent(cache
);
3420 printf("Try to repair the btree for root %llu\n",
3421 root
->root_key
.objectid
);
3422 ret
= repair_btree(root
, &corrupt_blocks
);
3424 fprintf(stderr
, "Failed to repair btree: %s\n",
3427 printf("Btree for root %llu is fixed\n",
3428 root
->root_key
.objectid
);
3432 err
= merge_root_recs(root
, &root_node
.root_cache
, root_cache
);
3436 if (root_node
.current
) {
3437 root_node
.current
->checked
= 1;
3438 maybe_free_inode_rec(&root_node
.inode_cache
,
3442 err
= check_inode_recs(root
, &root_node
.inode_cache
);
3446 free_corrupt_blocks_tree(&corrupt_blocks
);
3447 root
->fs_info
->corrupt_blocks
= NULL
;
3448 free_orphan_data_extents(&root
->orphan_data_extents
);
3452 static int fs_root_objectid(u64 objectid
)
3454 if (objectid
== BTRFS_TREE_RELOC_OBJECTID
||
3455 objectid
== BTRFS_DATA_RELOC_TREE_OBJECTID
)
3457 return is_fstree(objectid
);
3460 static int check_fs_roots(struct btrfs_root
*root
,
3461 struct cache_tree
*root_cache
)
3463 struct btrfs_path path
;
3464 struct btrfs_key key
;
3465 struct walk_control wc
;
3466 struct extent_buffer
*leaf
, *tree_node
;
3467 struct btrfs_root
*tmp_root
;
3468 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
3473 * Just in case we made any changes to the extent tree that weren't
3474 * reflected into the free space cache yet.
3477 reset_cached_block_groups(root
->fs_info
);
3478 memset(&wc
, 0, sizeof(wc
));
3479 cache_tree_init(&wc
.shared
);
3480 btrfs_init_path(&path
);
3485 key
.type
= BTRFS_ROOT_ITEM_KEY
;
3486 ret
= btrfs_search_slot(NULL
, tree_root
, &key
, &path
, 0, 0);
3491 tree_node
= tree_root
->node
;
3493 if (tree_node
!= tree_root
->node
) {
3494 free_root_recs_tree(root_cache
);
3495 btrfs_release_path(&path
);
3498 leaf
= path
.nodes
[0];
3499 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
3500 ret
= btrfs_next_leaf(tree_root
, &path
);
3506 leaf
= path
.nodes
[0];
3508 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
3509 if (key
.type
== BTRFS_ROOT_ITEM_KEY
&&
3510 fs_root_objectid(key
.objectid
)) {
3511 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
3512 tmp_root
= btrfs_read_fs_root_no_cache(
3513 root
->fs_info
, &key
);
3515 key
.offset
= (u64
)-1;
3516 tmp_root
= btrfs_read_fs_root(
3517 root
->fs_info
, &key
);
3519 if (IS_ERR(tmp_root
)) {
3523 ret
= check_fs_root(tmp_root
, root_cache
, &wc
);
3524 if (ret
== -EAGAIN
) {
3525 free_root_recs_tree(root_cache
);
3526 btrfs_release_path(&path
);
3531 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
3532 btrfs_free_fs_root(tmp_root
);
3533 } else if (key
.type
== BTRFS_ROOT_REF_KEY
||
3534 key
.type
== BTRFS_ROOT_BACKREF_KEY
) {
3535 process_root_ref(leaf
, path
.slots
[0], &key
,
3542 btrfs_release_path(&path
);
3544 free_extent_cache_tree(&wc
.shared
);
3545 if (!cache_tree_empty(&wc
.shared
))
3546 fprintf(stderr
, "warning line %d\n", __LINE__
);
3551 static int all_backpointers_checked(struct extent_record
*rec
, int print_errs
)
3553 struct list_head
*cur
= rec
->backrefs
.next
;
3554 struct extent_backref
*back
;
3555 struct tree_backref
*tback
;
3556 struct data_backref
*dback
;
3560 while(cur
!= &rec
->backrefs
) {
3561 back
= list_entry(cur
, struct extent_backref
, list
);
3563 if (!back
->found_extent_tree
) {
3567 if (back
->is_data
) {
3568 dback
= (struct data_backref
*)back
;
3569 fprintf(stderr
, "Backref %llu %s %llu"
3570 " owner %llu offset %llu num_refs %lu"
3571 " not found in extent tree\n",
3572 (unsigned long long)rec
->start
,
3573 back
->full_backref
?
3575 back
->full_backref
?
3576 (unsigned long long)dback
->parent
:
3577 (unsigned long long)dback
->root
,
3578 (unsigned long long)dback
->owner
,
3579 (unsigned long long)dback
->offset
,
3580 (unsigned long)dback
->num_refs
);
3582 tback
= (struct tree_backref
*)back
;
3583 fprintf(stderr
, "Backref %llu parent %llu"
3584 " root %llu not found in extent tree\n",
3585 (unsigned long long)rec
->start
,
3586 (unsigned long long)tback
->parent
,
3587 (unsigned long long)tback
->root
);
3590 if (!back
->is_data
&& !back
->found_ref
) {
3594 tback
= (struct tree_backref
*)back
;
3595 fprintf(stderr
, "Backref %llu %s %llu not referenced back %p\n",
3596 (unsigned long long)rec
->start
,
3597 back
->full_backref
? "parent" : "root",
3598 back
->full_backref
?
3599 (unsigned long long)tback
->parent
:
3600 (unsigned long long)tback
->root
, back
);
3602 if (back
->is_data
) {
3603 dback
= (struct data_backref
*)back
;
3604 if (dback
->found_ref
!= dback
->num_refs
) {
3608 fprintf(stderr
, "Incorrect local backref count"
3609 " on %llu %s %llu owner %llu"
3610 " offset %llu found %u wanted %u back %p\n",
3611 (unsigned long long)rec
->start
,
3612 back
->full_backref
?
3614 back
->full_backref
?
3615 (unsigned long long)dback
->parent
:
3616 (unsigned long long)dback
->root
,
3617 (unsigned long long)dback
->owner
,
3618 (unsigned long long)dback
->offset
,
3619 dback
->found_ref
, dback
->num_refs
, back
);
3621 if (dback
->disk_bytenr
!= rec
->start
) {
3625 fprintf(stderr
, "Backref disk bytenr does not"
3626 " match extent record, bytenr=%llu, "
3627 "ref bytenr=%llu\n",
3628 (unsigned long long)rec
->start
,
3629 (unsigned long long)dback
->disk_bytenr
);
3632 if (dback
->bytes
!= rec
->nr
) {
3636 fprintf(stderr
, "Backref bytes do not match "
3637 "extent backref, bytenr=%llu, ref "
3638 "bytes=%llu, backref bytes=%llu\n",
3639 (unsigned long long)rec
->start
,
3640 (unsigned long long)rec
->nr
,
3641 (unsigned long long)dback
->bytes
);
3644 if (!back
->is_data
) {
3647 dback
= (struct data_backref
*)back
;
3648 found
+= dback
->found_ref
;
3651 if (found
!= rec
->refs
) {
3655 fprintf(stderr
, "Incorrect global backref count "
3656 "on %llu found %llu wanted %llu\n",
3657 (unsigned long long)rec
->start
,
3658 (unsigned long long)found
,
3659 (unsigned long long)rec
->refs
);
3665 static int free_all_extent_backrefs(struct extent_record
*rec
)
3667 struct extent_backref
*back
;
3668 struct list_head
*cur
;
3669 while (!list_empty(&rec
->backrefs
)) {
3670 cur
= rec
->backrefs
.next
;
3671 back
= list_entry(cur
, struct extent_backref
, list
);
3678 static void free_extent_record_cache(struct btrfs_fs_info
*fs_info
,
3679 struct cache_tree
*extent_cache
)
3681 struct cache_extent
*cache
;
3682 struct extent_record
*rec
;
3685 cache
= first_cache_extent(extent_cache
);
3688 rec
= container_of(cache
, struct extent_record
, cache
);
3689 remove_cache_extent(extent_cache
, cache
);
3690 free_all_extent_backrefs(rec
);
3695 static int maybe_free_extent_rec(struct cache_tree
*extent_cache
,
3696 struct extent_record
*rec
)
3698 if (rec
->content_checked
&& rec
->owner_ref_checked
&&
3699 rec
->extent_item_refs
== rec
->refs
&& rec
->refs
> 0 &&
3700 rec
->num_duplicates
== 0 && !all_backpointers_checked(rec
, 0) &&
3701 !rec
->bad_full_backref
) {
3702 remove_cache_extent(extent_cache
, &rec
->cache
);
3703 free_all_extent_backrefs(rec
);
3704 list_del_init(&rec
->list
);
3710 static int check_owner_ref(struct btrfs_root
*root
,
3711 struct extent_record
*rec
,
3712 struct extent_buffer
*buf
)
3714 struct extent_backref
*node
;
3715 struct tree_backref
*back
;
3716 struct btrfs_root
*ref_root
;
3717 struct btrfs_key key
;
3718 struct btrfs_path path
;
3719 struct extent_buffer
*parent
;
3724 list_for_each_entry(node
, &rec
->backrefs
, list
) {
3727 if (!node
->found_ref
)
3729 if (node
->full_backref
)
3731 back
= (struct tree_backref
*)node
;
3732 if (btrfs_header_owner(buf
) == back
->root
)
3735 BUG_ON(rec
->is_root
);
3737 /* try to find the block by search corresponding fs tree */
3738 key
.objectid
= btrfs_header_owner(buf
);
3739 key
.type
= BTRFS_ROOT_ITEM_KEY
;
3740 key
.offset
= (u64
)-1;
3742 ref_root
= btrfs_read_fs_root(root
->fs_info
, &key
);
3743 if (IS_ERR(ref_root
))
3746 level
= btrfs_header_level(buf
);
3748 btrfs_item_key_to_cpu(buf
, &key
, 0);
3750 btrfs_node_key_to_cpu(buf
, &key
, 0);
3752 btrfs_init_path(&path
);
3753 path
.lowest_level
= level
+ 1;
3754 ret
= btrfs_search_slot(NULL
, ref_root
, &key
, &path
, 0, 0);
3758 parent
= path
.nodes
[level
+ 1];
3759 if (parent
&& buf
->start
== btrfs_node_blockptr(parent
,
3760 path
.slots
[level
+ 1]))
3763 btrfs_release_path(&path
);
3764 return found
? 0 : 1;
3767 static int is_extent_tree_record(struct extent_record
*rec
)
3769 struct list_head
*cur
= rec
->backrefs
.next
;
3770 struct extent_backref
*node
;
3771 struct tree_backref
*back
;
3774 while(cur
!= &rec
->backrefs
) {
3775 node
= list_entry(cur
, struct extent_backref
, list
);
3779 back
= (struct tree_backref
*)node
;
3780 if (node
->full_backref
)
3782 if (back
->root
== BTRFS_EXTENT_TREE_OBJECTID
)
3789 static int record_bad_block_io(struct btrfs_fs_info
*info
,
3790 struct cache_tree
*extent_cache
,
3793 struct extent_record
*rec
;
3794 struct cache_extent
*cache
;
3795 struct btrfs_key key
;
3797 cache
= lookup_cache_extent(extent_cache
, start
, len
);
3801 rec
= container_of(cache
, struct extent_record
, cache
);
3802 if (!is_extent_tree_record(rec
))
3805 btrfs_disk_key_to_cpu(&key
, &rec
->parent_key
);
3806 return btrfs_add_corrupt_extent_record(info
, &key
, start
, len
, 0);
3809 static int swap_values(struct btrfs_root
*root
, struct btrfs_path
*path
,
3810 struct extent_buffer
*buf
, int slot
)
3812 if (btrfs_header_level(buf
)) {
3813 struct btrfs_key_ptr ptr1
, ptr2
;
3815 read_extent_buffer(buf
, &ptr1
, btrfs_node_key_ptr_offset(slot
),
3816 sizeof(struct btrfs_key_ptr
));
3817 read_extent_buffer(buf
, &ptr2
,
3818 btrfs_node_key_ptr_offset(slot
+ 1),
3819 sizeof(struct btrfs_key_ptr
));
3820 write_extent_buffer(buf
, &ptr1
,
3821 btrfs_node_key_ptr_offset(slot
+ 1),
3822 sizeof(struct btrfs_key_ptr
));
3823 write_extent_buffer(buf
, &ptr2
,
3824 btrfs_node_key_ptr_offset(slot
),
3825 sizeof(struct btrfs_key_ptr
));
3827 struct btrfs_disk_key key
;
3828 btrfs_node_key(buf
, &key
, 0);
3829 btrfs_fixup_low_keys(root
, path
, &key
,
3830 btrfs_header_level(buf
) + 1);
3833 struct btrfs_item
*item1
, *item2
;
3834 struct btrfs_key k1
, k2
;
3835 char *item1_data
, *item2_data
;
3836 u32 item1_offset
, item2_offset
, item1_size
, item2_size
;
3838 item1
= btrfs_item_nr(slot
);
3839 item2
= btrfs_item_nr(slot
+ 1);
3840 btrfs_item_key_to_cpu(buf
, &k1
, slot
);
3841 btrfs_item_key_to_cpu(buf
, &k2
, slot
+ 1);
3842 item1_offset
= btrfs_item_offset(buf
, item1
);
3843 item2_offset
= btrfs_item_offset(buf
, item2
);
3844 item1_size
= btrfs_item_size(buf
, item1
);
3845 item2_size
= btrfs_item_size(buf
, item2
);
3847 item1_data
= malloc(item1_size
);
3850 item2_data
= malloc(item2_size
);
3856 read_extent_buffer(buf
, item1_data
, item1_offset
, item1_size
);
3857 read_extent_buffer(buf
, item2_data
, item2_offset
, item2_size
);
3859 write_extent_buffer(buf
, item1_data
, item2_offset
, item2_size
);
3860 write_extent_buffer(buf
, item2_data
, item1_offset
, item1_size
);
3864 btrfs_set_item_offset(buf
, item1
, item2_offset
);
3865 btrfs_set_item_offset(buf
, item2
, item1_offset
);
3866 btrfs_set_item_size(buf
, item1
, item2_size
);
3867 btrfs_set_item_size(buf
, item2
, item1_size
);
3869 path
->slots
[0] = slot
;
3870 btrfs_set_item_key_unsafe(root
, path
, &k2
);
3871 path
->slots
[0] = slot
+ 1;
3872 btrfs_set_item_key_unsafe(root
, path
, &k1
);
3877 static int fix_key_order(struct btrfs_trans_handle
*trans
,
3878 struct btrfs_root
*root
,
3879 struct btrfs_path
*path
)
3881 struct extent_buffer
*buf
;
3882 struct btrfs_key k1
, k2
;
3884 int level
= path
->lowest_level
;
3887 buf
= path
->nodes
[level
];
3888 for (i
= 0; i
< btrfs_header_nritems(buf
) - 1; i
++) {
3890 btrfs_node_key_to_cpu(buf
, &k1
, i
);
3891 btrfs_node_key_to_cpu(buf
, &k2
, i
+ 1);
3893 btrfs_item_key_to_cpu(buf
, &k1
, i
);
3894 btrfs_item_key_to_cpu(buf
, &k2
, i
+ 1);
3896 if (btrfs_comp_cpu_keys(&k1
, &k2
) < 0)
3898 ret
= swap_values(root
, path
, buf
, i
);
3901 btrfs_mark_buffer_dirty(buf
);
3907 static int delete_bogus_item(struct btrfs_trans_handle
*trans
,
3908 struct btrfs_root
*root
,
3909 struct btrfs_path
*path
,
3910 struct extent_buffer
*buf
, int slot
)
3912 struct btrfs_key key
;
3913 int nritems
= btrfs_header_nritems(buf
);
3915 btrfs_item_key_to_cpu(buf
, &key
, slot
);
3917 /* These are all the keys we can deal with missing. */
3918 if (key
.type
!= BTRFS_DIR_INDEX_KEY
&&
3919 key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
3920 key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
3921 key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
3922 key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
)
3925 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3926 (unsigned long long)key
.objectid
, key
.type
,
3927 (unsigned long long)key
.offset
, slot
, buf
->start
);
3928 memmove_extent_buffer(buf
, btrfs_item_nr_offset(slot
),
3929 btrfs_item_nr_offset(slot
+ 1),
3930 sizeof(struct btrfs_item
) *
3931 (nritems
- slot
- 1));
3932 btrfs_set_header_nritems(buf
, nritems
- 1);
3934 struct btrfs_disk_key disk_key
;
3936 btrfs_item_key(buf
, &disk_key
, 0);
3937 btrfs_fixup_low_keys(root
, path
, &disk_key
, 1);
3939 btrfs_mark_buffer_dirty(buf
);
3943 static int fix_item_offset(struct btrfs_trans_handle
*trans
,
3944 struct btrfs_root
*root
,
3945 struct btrfs_path
*path
)
3947 struct extent_buffer
*buf
;
3951 /* We should only get this for leaves */
3952 BUG_ON(path
->lowest_level
);
3953 buf
= path
->nodes
[0];
3955 for (i
= 0; i
< btrfs_header_nritems(buf
); i
++) {
3956 unsigned int shift
= 0, offset
;
3958 if (i
== 0 && btrfs_item_end_nr(buf
, i
) !=
3959 BTRFS_LEAF_DATA_SIZE(root
)) {
3960 if (btrfs_item_end_nr(buf
, i
) >
3961 BTRFS_LEAF_DATA_SIZE(root
)) {
3962 ret
= delete_bogus_item(trans
, root
, path
,
3966 fprintf(stderr
, "item is off the end of the "
3967 "leaf, can't fix\n");
3971 shift
= BTRFS_LEAF_DATA_SIZE(root
) -
3972 btrfs_item_end_nr(buf
, i
);
3973 } else if (i
> 0 && btrfs_item_end_nr(buf
, i
) !=
3974 btrfs_item_offset_nr(buf
, i
- 1)) {
3975 if (btrfs_item_end_nr(buf
, i
) >
3976 btrfs_item_offset_nr(buf
, i
- 1)) {
3977 ret
= delete_bogus_item(trans
, root
, path
,
3981 fprintf(stderr
, "items overlap, can't fix\n");
3985 shift
= btrfs_item_offset_nr(buf
, i
- 1) -
3986 btrfs_item_end_nr(buf
, i
);
3991 printf("Shifting item nr %d by %u bytes in block %llu\n",
3992 i
, shift
, (unsigned long long)buf
->start
);
3993 offset
= btrfs_item_offset_nr(buf
, i
);
3994 memmove_extent_buffer(buf
,
3995 btrfs_leaf_data(buf
) + offset
+ shift
,
3996 btrfs_leaf_data(buf
) + offset
,
3997 btrfs_item_size_nr(buf
, i
));
3998 btrfs_set_item_offset(buf
, btrfs_item_nr(i
),
4000 btrfs_mark_buffer_dirty(buf
);
4004 * We may have moved things, in which case we want to exit so we don't
4005 * write those changes out. Once we have proper abort functionality in
4006 * progs this can be changed to something nicer.
4013 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4014 * then just return -EIO.
4016 static int try_to_fix_bad_block(struct btrfs_root
*root
,
4017 struct extent_buffer
*buf
,
4018 enum btrfs_tree_block_status status
)
4020 struct btrfs_trans_handle
*trans
;
4021 struct ulist
*roots
;
4022 struct ulist_node
*node
;
4023 struct btrfs_root
*search_root
;
4024 struct btrfs_path
*path
;
4025 struct ulist_iterator iter
;
4026 struct btrfs_key root_key
, key
;
4029 if (status
!= BTRFS_TREE_BLOCK_BAD_KEY_ORDER
&&
4030 status
!= BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
4033 path
= btrfs_alloc_path();
4037 ret
= btrfs_find_all_roots(NULL
, root
->fs_info
, buf
->start
,
4040 btrfs_free_path(path
);
4044 ULIST_ITER_INIT(&iter
);
4045 while ((node
= ulist_next(roots
, &iter
))) {
4046 root_key
.objectid
= node
->val
;
4047 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
4048 root_key
.offset
= (u64
)-1;
4050 search_root
= btrfs_read_fs_root(root
->fs_info
, &root_key
);
4057 trans
= btrfs_start_transaction(search_root
, 0);
4058 if (IS_ERR(trans
)) {
4059 ret
= PTR_ERR(trans
);
4063 path
->lowest_level
= btrfs_header_level(buf
);
4064 path
->skip_check_block
= 1;
4065 if (path
->lowest_level
)
4066 btrfs_node_key_to_cpu(buf
, &key
, 0);
4068 btrfs_item_key_to_cpu(buf
, &key
, 0);
4069 ret
= btrfs_search_slot(trans
, search_root
, &key
, path
, 0, 1);
4072 btrfs_commit_transaction(trans
, search_root
);
4075 if (status
== BTRFS_TREE_BLOCK_BAD_KEY_ORDER
)
4076 ret
= fix_key_order(trans
, search_root
, path
);
4077 else if (status
== BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
4078 ret
= fix_item_offset(trans
, search_root
, path
);
4080 btrfs_commit_transaction(trans
, search_root
);
4083 btrfs_release_path(path
);
4084 btrfs_commit_transaction(trans
, search_root
);
4087 btrfs_free_path(path
);
4091 static int check_block(struct btrfs_root
*root
,
4092 struct cache_tree
*extent_cache
,
4093 struct extent_buffer
*buf
, u64 flags
)
4095 struct extent_record
*rec
;
4096 struct cache_extent
*cache
;
4097 struct btrfs_key key
;
4098 enum btrfs_tree_block_status status
;
4102 cache
= lookup_cache_extent(extent_cache
, buf
->start
, buf
->len
);
4105 rec
= container_of(cache
, struct extent_record
, cache
);
4106 rec
->generation
= btrfs_header_generation(buf
);
4108 level
= btrfs_header_level(buf
);
4109 if (btrfs_header_nritems(buf
) > 0) {
4112 btrfs_item_key_to_cpu(buf
, &key
, 0);
4114 btrfs_node_key_to_cpu(buf
, &key
, 0);
4116 rec
->info_objectid
= key
.objectid
;
4118 rec
->info_level
= level
;
4120 if (btrfs_is_leaf(buf
))
4121 status
= btrfs_check_leaf(root
, &rec
->parent_key
, buf
);
4123 status
= btrfs_check_node(root
, &rec
->parent_key
, buf
);
4125 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
4127 status
= try_to_fix_bad_block(root
, buf
, status
);
4128 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
4130 fprintf(stderr
, "bad block %llu\n",
4131 (unsigned long long)buf
->start
);
4134 * Signal to callers we need to start the scan over
4135 * again since we'll have cow'ed blocks.
4140 rec
->content_checked
= 1;
4141 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
)
4142 rec
->owner_ref_checked
= 1;
4144 ret
= check_owner_ref(root
, rec
, buf
);
4146 rec
->owner_ref_checked
= 1;
4150 maybe_free_extent_rec(extent_cache
, rec
);
4154 static struct tree_backref
*find_tree_backref(struct extent_record
*rec
,
4155 u64 parent
, u64 root
)
4157 struct list_head
*cur
= rec
->backrefs
.next
;
4158 struct extent_backref
*node
;
4159 struct tree_backref
*back
;
4161 while(cur
!= &rec
->backrefs
) {
4162 node
= list_entry(cur
, struct extent_backref
, list
);
4166 back
= (struct tree_backref
*)node
;
4168 if (!node
->full_backref
)
4170 if (parent
== back
->parent
)
4173 if (node
->full_backref
)
4175 if (back
->root
== root
)
4182 static struct tree_backref
*alloc_tree_backref(struct extent_record
*rec
,
4183 u64 parent
, u64 root
)
4185 struct tree_backref
*ref
= malloc(sizeof(*ref
));
4186 memset(&ref
->node
, 0, sizeof(ref
->node
));
4188 ref
->parent
= parent
;
4189 ref
->node
.full_backref
= 1;
4192 ref
->node
.full_backref
= 0;
4194 list_add_tail(&ref
->node
.list
, &rec
->backrefs
);
4199 static struct data_backref
*find_data_backref(struct extent_record
*rec
,
4200 u64 parent
, u64 root
,
4201 u64 owner
, u64 offset
,
4203 u64 disk_bytenr
, u64 bytes
)
4205 struct list_head
*cur
= rec
->backrefs
.next
;
4206 struct extent_backref
*node
;
4207 struct data_backref
*back
;
4209 while(cur
!= &rec
->backrefs
) {
4210 node
= list_entry(cur
, struct extent_backref
, list
);
4214 back
= (struct data_backref
*)node
;
4216 if (!node
->full_backref
)
4218 if (parent
== back
->parent
)
4221 if (node
->full_backref
)
4223 if (back
->root
== root
&& back
->owner
== owner
&&
4224 back
->offset
== offset
) {
4225 if (found_ref
&& node
->found_ref
&&
4226 (back
->bytes
!= bytes
||
4227 back
->disk_bytenr
!= disk_bytenr
))
4236 static struct data_backref
*alloc_data_backref(struct extent_record
*rec
,
4237 u64 parent
, u64 root
,
4238 u64 owner
, u64 offset
,
4241 struct data_backref
*ref
= malloc(sizeof(*ref
));
4242 memset(&ref
->node
, 0, sizeof(ref
->node
));
4243 ref
->node
.is_data
= 1;
4246 ref
->parent
= parent
;
4249 ref
->node
.full_backref
= 1;
4253 ref
->offset
= offset
;
4254 ref
->node
.full_backref
= 0;
4256 ref
->bytes
= max_size
;
4259 list_add_tail(&ref
->node
.list
, &rec
->backrefs
);
4260 if (max_size
> rec
->max_size
)
4261 rec
->max_size
= max_size
;
4265 static int add_extent_rec(struct cache_tree
*extent_cache
,
4266 struct btrfs_key
*parent_key
, u64 parent_gen
,
4267 u64 start
, u64 nr
, u64 extent_item_refs
,
4268 int is_root
, int inc_ref
, int set_checked
,
4269 int metadata
, int extent_rec
, u64 max_size
)
4271 struct extent_record
*rec
;
4272 struct cache_extent
*cache
;
4276 cache
= lookup_cache_extent(extent_cache
, start
, nr
);
4278 rec
= container_of(cache
, struct extent_record
, cache
);
4282 rec
->nr
= max(nr
, max_size
);
4285 * We need to make sure to reset nr to whatever the extent
4286 * record says was the real size, this way we can compare it to
4290 if (start
!= rec
->start
|| rec
->found_rec
) {
4291 struct extent_record
*tmp
;
4294 if (list_empty(&rec
->list
))
4295 list_add_tail(&rec
->list
,
4296 &duplicate_extents
);
4299 * We have to do this song and dance in case we
4300 * find an extent record that falls inside of
4301 * our current extent record but does not have
4302 * the same objectid.
4304 tmp
= malloc(sizeof(*tmp
));
4308 tmp
->max_size
= max_size
;
4311 tmp
->metadata
= metadata
;
4312 tmp
->extent_item_refs
= extent_item_refs
;
4313 INIT_LIST_HEAD(&tmp
->list
);
4314 list_add_tail(&tmp
->list
, &rec
->dups
);
4315 rec
->num_duplicates
++;
4322 if (extent_item_refs
&& !dup
) {
4323 if (rec
->extent_item_refs
) {
4324 fprintf(stderr
, "block %llu rec "
4325 "extent_item_refs %llu, passed %llu\n",
4326 (unsigned long long)start
,
4327 (unsigned long long)
4328 rec
->extent_item_refs
,
4329 (unsigned long long)extent_item_refs
);
4331 rec
->extent_item_refs
= extent_item_refs
;
4336 rec
->content_checked
= 1;
4337 rec
->owner_ref_checked
= 1;
4341 btrfs_cpu_key_to_disk(&rec
->parent_key
, parent_key
);
4343 rec
->parent_generation
= parent_gen
;
4345 if (rec
->max_size
< max_size
)
4346 rec
->max_size
= max_size
;
4348 maybe_free_extent_rec(extent_cache
, rec
);
4351 rec
= malloc(sizeof(*rec
));
4353 rec
->max_size
= max_size
;
4354 rec
->nr
= max(nr
, max_size
);
4355 rec
->found_rec
= !!extent_rec
;
4356 rec
->content_checked
= 0;
4357 rec
->owner_ref_checked
= 0;
4358 rec
->num_duplicates
= 0;
4359 rec
->metadata
= metadata
;
4360 rec
->flag_block_full_backref
= -1;
4361 rec
->bad_full_backref
= 0;
4362 INIT_LIST_HEAD(&rec
->backrefs
);
4363 INIT_LIST_HEAD(&rec
->dups
);
4364 INIT_LIST_HEAD(&rec
->list
);
4376 if (extent_item_refs
)
4377 rec
->extent_item_refs
= extent_item_refs
;
4379 rec
->extent_item_refs
= 0;
4382 btrfs_cpu_key_to_disk(&rec
->parent_key
, parent_key
);
4384 memset(&rec
->parent_key
, 0, sizeof(*parent_key
));
4387 rec
->parent_generation
= parent_gen
;
4389 rec
->parent_generation
= 0;
4391 rec
->cache
.start
= start
;
4392 rec
->cache
.size
= nr
;
4393 ret
= insert_cache_extent(extent_cache
, &rec
->cache
);
4397 rec
->content_checked
= 1;
4398 rec
->owner_ref_checked
= 1;
4403 static int add_tree_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
4404 u64 parent
, u64 root
, int found_ref
)
4406 struct extent_record
*rec
;
4407 struct tree_backref
*back
;
4408 struct cache_extent
*cache
;
4410 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4412 add_extent_rec(extent_cache
, NULL
, 0, bytenr
,
4413 1, 0, 0, 0, 0, 1, 0, 0);
4414 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4419 rec
= container_of(cache
, struct extent_record
, cache
);
4420 if (rec
->start
!= bytenr
) {
4424 back
= find_tree_backref(rec
, parent
, root
);
4426 back
= alloc_tree_backref(rec
, parent
, root
);
4429 if (back
->node
.found_ref
) {
4430 fprintf(stderr
, "Extent back ref already exists "
4431 "for %llu parent %llu root %llu \n",
4432 (unsigned long long)bytenr
,
4433 (unsigned long long)parent
,
4434 (unsigned long long)root
);
4436 back
->node
.found_ref
= 1;
4438 if (back
->node
.found_extent_tree
) {
4439 fprintf(stderr
, "Extent back ref already exists "
4440 "for %llu parent %llu root %llu \n",
4441 (unsigned long long)bytenr
,
4442 (unsigned long long)parent
,
4443 (unsigned long long)root
);
4445 back
->node
.found_extent_tree
= 1;
4447 maybe_free_extent_rec(extent_cache
, rec
);
4451 static int add_data_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
4452 u64 parent
, u64 root
, u64 owner
, u64 offset
,
4453 u32 num_refs
, int found_ref
, u64 max_size
)
4455 struct extent_record
*rec
;
4456 struct data_backref
*back
;
4457 struct cache_extent
*cache
;
4459 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4461 add_extent_rec(extent_cache
, NULL
, 0, bytenr
, 1, 0, 0, 0, 0,
4463 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4468 rec
= container_of(cache
, struct extent_record
, cache
);
4469 if (rec
->max_size
< max_size
)
4470 rec
->max_size
= max_size
;
4473 * If found_ref is set then max_size is the real size and must match the
4474 * existing refs. So if we have already found a ref then we need to
4475 * make sure that this ref matches the existing one, otherwise we need
4476 * to add a new backref so we can notice that the backrefs don't match
4477 * and we need to figure out who is telling the truth. This is to
4478 * account for that awful fsync bug I introduced where we'd end up with
4479 * a btrfs_file_extent_item that would have its length include multiple
4480 * prealloc extents or point inside of a prealloc extent.
4482 back
= find_data_backref(rec
, parent
, root
, owner
, offset
, found_ref
,
4485 back
= alloc_data_backref(rec
, parent
, root
, owner
, offset
,
4489 BUG_ON(num_refs
!= 1);
4490 if (back
->node
.found_ref
)
4491 BUG_ON(back
->bytes
!= max_size
);
4492 back
->node
.found_ref
= 1;
4493 back
->found_ref
+= 1;
4494 back
->bytes
= max_size
;
4495 back
->disk_bytenr
= bytenr
;
4497 rec
->content_checked
= 1;
4498 rec
->owner_ref_checked
= 1;
4500 if (back
->node
.found_extent_tree
) {
4501 fprintf(stderr
, "Extent back ref already exists "
4502 "for %llu parent %llu root %llu "
4503 "owner %llu offset %llu num_refs %lu\n",
4504 (unsigned long long)bytenr
,
4505 (unsigned long long)parent
,
4506 (unsigned long long)root
,
4507 (unsigned long long)owner
,
4508 (unsigned long long)offset
,
4509 (unsigned long)num_refs
);
4511 back
->num_refs
= num_refs
;
4512 back
->node
.found_extent_tree
= 1;
4514 maybe_free_extent_rec(extent_cache
, rec
);
4518 static int add_pending(struct cache_tree
*pending
,
4519 struct cache_tree
*seen
, u64 bytenr
, u32 size
)
4522 ret
= add_cache_extent(seen
, bytenr
, size
);
4525 add_cache_extent(pending
, bytenr
, size
);
4529 static int pick_next_pending(struct cache_tree
*pending
,
4530 struct cache_tree
*reada
,
4531 struct cache_tree
*nodes
,
4532 u64 last
, struct block_info
*bits
, int bits_nr
,
4535 unsigned long node_start
= last
;
4536 struct cache_extent
*cache
;
4539 cache
= search_cache_extent(reada
, 0);
4541 bits
[0].start
= cache
->start
;
4542 bits
[0].size
= cache
->size
;
4547 if (node_start
> 32768)
4548 node_start
-= 32768;
4550 cache
= search_cache_extent(nodes
, node_start
);
4552 cache
= search_cache_extent(nodes
, 0);
4555 cache
= search_cache_extent(pending
, 0);
4560 bits
[ret
].start
= cache
->start
;
4561 bits
[ret
].size
= cache
->size
;
4562 cache
= next_cache_extent(cache
);
4564 } while (cache
&& ret
< bits_nr
);
4570 bits
[ret
].start
= cache
->start
;
4571 bits
[ret
].size
= cache
->size
;
4572 cache
= next_cache_extent(cache
);
4574 } while (cache
&& ret
< bits_nr
);
4576 if (bits_nr
- ret
> 8) {
4577 u64 lookup
= bits
[0].start
+ bits
[0].size
;
4578 struct cache_extent
*next
;
4579 next
= search_cache_extent(pending
, lookup
);
4581 if (next
->start
- lookup
> 32768)
4583 bits
[ret
].start
= next
->start
;
4584 bits
[ret
].size
= next
->size
;
4585 lookup
= next
->start
+ next
->size
;
4589 next
= next_cache_extent(next
);
4597 static void free_chunk_record(struct cache_extent
*cache
)
4599 struct chunk_record
*rec
;
4601 rec
= container_of(cache
, struct chunk_record
, cache
);
4602 list_del_init(&rec
->list
);
4603 list_del_init(&rec
->dextents
);
4607 void free_chunk_cache_tree(struct cache_tree
*chunk_cache
)
4609 cache_tree_free_extents(chunk_cache
, free_chunk_record
);
4612 static void free_device_record(struct rb_node
*node
)
4614 struct device_record
*rec
;
4616 rec
= container_of(node
, struct device_record
, node
);
4620 FREE_RB_BASED_TREE(device_cache
, free_device_record
);
4622 int insert_block_group_record(struct block_group_tree
*tree
,
4623 struct block_group_record
*bg_rec
)
4627 ret
= insert_cache_extent(&tree
->tree
, &bg_rec
->cache
);
4631 list_add_tail(&bg_rec
->list
, &tree
->block_groups
);
4635 static void free_block_group_record(struct cache_extent
*cache
)
4637 struct block_group_record
*rec
;
4639 rec
= container_of(cache
, struct block_group_record
, cache
);
4640 list_del_init(&rec
->list
);
4644 void free_block_group_tree(struct block_group_tree
*tree
)
4646 cache_tree_free_extents(&tree
->tree
, free_block_group_record
);
4649 int insert_device_extent_record(struct device_extent_tree
*tree
,
4650 struct device_extent_record
*de_rec
)
4655 * Device extent is a bit different from the other extents, because
4656 * the extents which belong to the different devices may have the
4657 * same start and size, so we need use the special extent cache
4658 * search/insert functions.
4660 ret
= insert_cache_extent2(&tree
->tree
, &de_rec
->cache
);
4664 list_add_tail(&de_rec
->chunk_list
, &tree
->no_chunk_orphans
);
4665 list_add_tail(&de_rec
->device_list
, &tree
->no_device_orphans
);
4669 static void free_device_extent_record(struct cache_extent
*cache
)
4671 struct device_extent_record
*rec
;
4673 rec
= container_of(cache
, struct device_extent_record
, cache
);
4674 if (!list_empty(&rec
->chunk_list
))
4675 list_del_init(&rec
->chunk_list
);
4676 if (!list_empty(&rec
->device_list
))
4677 list_del_init(&rec
->device_list
);
4681 void free_device_extent_tree(struct device_extent_tree
*tree
)
4683 cache_tree_free_extents(&tree
->tree
, free_device_extent_record
);
4686 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4687 static int process_extent_ref_v0(struct cache_tree
*extent_cache
,
4688 struct extent_buffer
*leaf
, int slot
)
4690 struct btrfs_extent_ref_v0
*ref0
;
4691 struct btrfs_key key
;
4693 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
4694 ref0
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_ref_v0
);
4695 if (btrfs_ref_objectid_v0(leaf
, ref0
) < BTRFS_FIRST_FREE_OBJECTID
) {
4696 add_tree_backref(extent_cache
, key
.objectid
, key
.offset
, 0, 0);
4698 add_data_backref(extent_cache
, key
.objectid
, key
.offset
, 0,
4699 0, 0, btrfs_ref_count_v0(leaf
, ref0
), 0, 0);
4705 struct chunk_record
*btrfs_new_chunk_record(struct extent_buffer
*leaf
,
4706 struct btrfs_key
*key
,
4709 struct btrfs_chunk
*ptr
;
4710 struct chunk_record
*rec
;
4713 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
4714 num_stripes
= btrfs_chunk_num_stripes(leaf
, ptr
);
4716 rec
= malloc(btrfs_chunk_record_size(num_stripes
));
4718 fprintf(stderr
, "memory allocation failed\n");
4722 memset(rec
, 0, btrfs_chunk_record_size(num_stripes
));
4724 INIT_LIST_HEAD(&rec
->list
);
4725 INIT_LIST_HEAD(&rec
->dextents
);
4728 rec
->cache
.start
= key
->offset
;
4729 rec
->cache
.size
= btrfs_chunk_length(leaf
, ptr
);
4731 rec
->generation
= btrfs_header_generation(leaf
);
4733 rec
->objectid
= key
->objectid
;
4734 rec
->type
= key
->type
;
4735 rec
->offset
= key
->offset
;
4737 rec
->length
= rec
->cache
.size
;
4738 rec
->owner
= btrfs_chunk_owner(leaf
, ptr
);
4739 rec
->stripe_len
= btrfs_chunk_stripe_len(leaf
, ptr
);
4740 rec
->type_flags
= btrfs_chunk_type(leaf
, ptr
);
4741 rec
->io_width
= btrfs_chunk_io_width(leaf
, ptr
);
4742 rec
->io_align
= btrfs_chunk_io_align(leaf
, ptr
);
4743 rec
->sector_size
= btrfs_chunk_sector_size(leaf
, ptr
);
4744 rec
->num_stripes
= num_stripes
;
4745 rec
->sub_stripes
= btrfs_chunk_sub_stripes(leaf
, ptr
);
4747 for (i
= 0; i
< rec
->num_stripes
; ++i
) {
4748 rec
->stripes
[i
].devid
=
4749 btrfs_stripe_devid_nr(leaf
, ptr
, i
);
4750 rec
->stripes
[i
].offset
=
4751 btrfs_stripe_offset_nr(leaf
, ptr
, i
);
4752 read_extent_buffer(leaf
, rec
->stripes
[i
].dev_uuid
,
4753 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr
, i
),
4760 static int process_chunk_item(struct cache_tree
*chunk_cache
,
4761 struct btrfs_key
*key
, struct extent_buffer
*eb
,
4764 struct chunk_record
*rec
;
4767 rec
= btrfs_new_chunk_record(eb
, key
, slot
);
4768 ret
= insert_cache_extent(chunk_cache
, &rec
->cache
);
4770 fprintf(stderr
, "Chunk[%llu, %llu] existed.\n",
4771 rec
->offset
, rec
->length
);
4778 static int process_device_item(struct rb_root
*dev_cache
,
4779 struct btrfs_key
*key
, struct extent_buffer
*eb
, int slot
)
4781 struct btrfs_dev_item
*ptr
;
4782 struct device_record
*rec
;
4785 ptr
= btrfs_item_ptr(eb
,
4786 slot
, struct btrfs_dev_item
);
4788 rec
= malloc(sizeof(*rec
));
4790 fprintf(stderr
, "memory allocation failed\n");
4794 rec
->devid
= key
->offset
;
4795 rec
->generation
= btrfs_header_generation(eb
);
4797 rec
->objectid
= key
->objectid
;
4798 rec
->type
= key
->type
;
4799 rec
->offset
= key
->offset
;
4801 rec
->devid
= btrfs_device_id(eb
, ptr
);
4802 rec
->total_byte
= btrfs_device_total_bytes(eb
, ptr
);
4803 rec
->byte_used
= btrfs_device_bytes_used(eb
, ptr
);
4805 ret
= rb_insert(dev_cache
, &rec
->node
, device_record_compare
);
4807 fprintf(stderr
, "Device[%llu] existed.\n", rec
->devid
);
4814 struct block_group_record
*
4815 btrfs_new_block_group_record(struct extent_buffer
*leaf
, struct btrfs_key
*key
,
4818 struct btrfs_block_group_item
*ptr
;
4819 struct block_group_record
*rec
;
4821 rec
= malloc(sizeof(*rec
));
4823 fprintf(stderr
, "memory allocation failed\n");
4826 memset(rec
, 0, sizeof(*rec
));
4828 rec
->cache
.start
= key
->objectid
;
4829 rec
->cache
.size
= key
->offset
;
4831 rec
->generation
= btrfs_header_generation(leaf
);
4833 rec
->objectid
= key
->objectid
;
4834 rec
->type
= key
->type
;
4835 rec
->offset
= key
->offset
;
4837 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_block_group_item
);
4838 rec
->flags
= btrfs_disk_block_group_flags(leaf
, ptr
);
4840 INIT_LIST_HEAD(&rec
->list
);
4845 static int process_block_group_item(struct block_group_tree
*block_group_cache
,
4846 struct btrfs_key
*key
,
4847 struct extent_buffer
*eb
, int slot
)
4849 struct block_group_record
*rec
;
4852 rec
= btrfs_new_block_group_record(eb
, key
, slot
);
4853 ret
= insert_block_group_record(block_group_cache
, rec
);
4855 fprintf(stderr
, "Block Group[%llu, %llu] existed.\n",
4856 rec
->objectid
, rec
->offset
);
4863 struct device_extent_record
*
4864 btrfs_new_device_extent_record(struct extent_buffer
*leaf
,
4865 struct btrfs_key
*key
, int slot
)
4867 struct device_extent_record
*rec
;
4868 struct btrfs_dev_extent
*ptr
;
4870 rec
= malloc(sizeof(*rec
));
4872 fprintf(stderr
, "memory allocation failed\n");
4875 memset(rec
, 0, sizeof(*rec
));
4877 rec
->cache
.objectid
= key
->objectid
;
4878 rec
->cache
.start
= key
->offset
;
4880 rec
->generation
= btrfs_header_generation(leaf
);
4882 rec
->objectid
= key
->objectid
;
4883 rec
->type
= key
->type
;
4884 rec
->offset
= key
->offset
;
4886 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_dev_extent
);
4887 rec
->chunk_objecteid
=
4888 btrfs_dev_extent_chunk_objectid(leaf
, ptr
);
4890 btrfs_dev_extent_chunk_offset(leaf
, ptr
);
4891 rec
->length
= btrfs_dev_extent_length(leaf
, ptr
);
4892 rec
->cache
.size
= rec
->length
;
4894 INIT_LIST_HEAD(&rec
->chunk_list
);
4895 INIT_LIST_HEAD(&rec
->device_list
);
4901 process_device_extent_item(struct device_extent_tree
*dev_extent_cache
,
4902 struct btrfs_key
*key
, struct extent_buffer
*eb
,
4905 struct device_extent_record
*rec
;
4908 rec
= btrfs_new_device_extent_record(eb
, key
, slot
);
4909 ret
= insert_device_extent_record(dev_extent_cache
, rec
);
4912 "Device extent[%llu, %llu, %llu] existed.\n",
4913 rec
->objectid
, rec
->offset
, rec
->length
);
4920 static int process_extent_item(struct btrfs_root
*root
,
4921 struct cache_tree
*extent_cache
,
4922 struct extent_buffer
*eb
, int slot
)
4924 struct btrfs_extent_item
*ei
;
4925 struct btrfs_extent_inline_ref
*iref
;
4926 struct btrfs_extent_data_ref
*dref
;
4927 struct btrfs_shared_data_ref
*sref
;
4928 struct btrfs_key key
;
4932 u32 item_size
= btrfs_item_size_nr(eb
, slot
);
4938 btrfs_item_key_to_cpu(eb
, &key
, slot
);
4940 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
4942 num_bytes
= root
->leafsize
;
4944 num_bytes
= key
.offset
;
4947 if (item_size
< sizeof(*ei
)) {
4948 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4949 struct btrfs_extent_item_v0
*ei0
;
4950 BUG_ON(item_size
!= sizeof(*ei0
));
4951 ei0
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item_v0
);
4952 refs
= btrfs_extent_refs_v0(eb
, ei0
);
4956 return add_extent_rec(extent_cache
, NULL
, 0, key
.objectid
,
4957 num_bytes
, refs
, 0, 0, 0, metadata
, 1,
4961 ei
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item
);
4962 refs
= btrfs_extent_refs(eb
, ei
);
4964 add_extent_rec(extent_cache
, NULL
, 0, key
.objectid
, num_bytes
,
4965 refs
, 0, 0, 0, metadata
, 1, num_bytes
);
4967 ptr
= (unsigned long)(ei
+ 1);
4968 if (btrfs_extent_flags(eb
, ei
) & BTRFS_EXTENT_FLAG_TREE_BLOCK
&&
4969 key
.type
== BTRFS_EXTENT_ITEM_KEY
)
4970 ptr
+= sizeof(struct btrfs_tree_block_info
);
4972 end
= (unsigned long)ei
+ item_size
;
4974 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
4975 type
= btrfs_extent_inline_ref_type(eb
, iref
);
4976 offset
= btrfs_extent_inline_ref_offset(eb
, iref
);
4978 case BTRFS_TREE_BLOCK_REF_KEY
:
4979 add_tree_backref(extent_cache
, key
.objectid
,
4982 case BTRFS_SHARED_BLOCK_REF_KEY
:
4983 add_tree_backref(extent_cache
, key
.objectid
,
4986 case BTRFS_EXTENT_DATA_REF_KEY
:
4987 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
4988 add_data_backref(extent_cache
, key
.objectid
, 0,
4989 btrfs_extent_data_ref_root(eb
, dref
),
4990 btrfs_extent_data_ref_objectid(eb
,
4992 btrfs_extent_data_ref_offset(eb
, dref
),
4993 btrfs_extent_data_ref_count(eb
, dref
),
4996 case BTRFS_SHARED_DATA_REF_KEY
:
4997 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
4998 add_data_backref(extent_cache
, key
.objectid
, offset
,
5000 btrfs_shared_data_ref_count(eb
, sref
),
5004 fprintf(stderr
, "corrupt extent record: key %Lu %u %Lu\n",
5005 key
.objectid
, key
.type
, num_bytes
);
5008 ptr
+= btrfs_extent_inline_ref_size(type
);
5015 static int check_cache_range(struct btrfs_root
*root
,
5016 struct btrfs_block_group_cache
*cache
,
5017 u64 offset
, u64 bytes
)
5019 struct btrfs_free_space
*entry
;
5025 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
5026 bytenr
= btrfs_sb_offset(i
);
5027 ret
= btrfs_rmap_block(&root
->fs_info
->mapping_tree
,
5028 cache
->key
.objectid
, bytenr
, 0,
5029 &logical
, &nr
, &stripe_len
);
5034 if (logical
[nr
] + stripe_len
<= offset
)
5036 if (offset
+ bytes
<= logical
[nr
])
5038 if (logical
[nr
] == offset
) {
5039 if (stripe_len
>= bytes
) {
5043 bytes
-= stripe_len
;
5044 offset
+= stripe_len
;
5045 } else if (logical
[nr
] < offset
) {
5046 if (logical
[nr
] + stripe_len
>=
5051 bytes
= (offset
+ bytes
) -
5052 (logical
[nr
] + stripe_len
);
5053 offset
= logical
[nr
] + stripe_len
;
5056 * Could be tricky, the super may land in the
5057 * middle of the area we're checking. First
5058 * check the easiest case, it's at the end.
5060 if (logical
[nr
] + stripe_len
>=
5062 bytes
= logical
[nr
] - offset
;
5066 /* Check the left side */
5067 ret
= check_cache_range(root
, cache
,
5069 logical
[nr
] - offset
);
5075 /* Now we continue with the right side */
5076 bytes
= (offset
+ bytes
) -
5077 (logical
[nr
] + stripe_len
);
5078 offset
= logical
[nr
] + stripe_len
;
5085 entry
= btrfs_find_free_space(cache
->free_space_ctl
, offset
, bytes
);
5087 fprintf(stderr
, "There is no free space entry for %Lu-%Lu\n",
5088 offset
, offset
+bytes
);
5092 if (entry
->offset
!= offset
) {
5093 fprintf(stderr
, "Wanted offset %Lu, found %Lu\n", offset
,
5098 if (entry
->bytes
!= bytes
) {
5099 fprintf(stderr
, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5100 bytes
, entry
->bytes
, offset
);
5104 unlink_free_space(cache
->free_space_ctl
, entry
);
5109 static int verify_space_cache(struct btrfs_root
*root
,
5110 struct btrfs_block_group_cache
*cache
)
5112 struct btrfs_path
*path
;
5113 struct extent_buffer
*leaf
;
5114 struct btrfs_key key
;
5118 path
= btrfs_alloc_path();
5122 root
= root
->fs_info
->extent_root
;
5124 last
= max_t(u64
, cache
->key
.objectid
, BTRFS_SUPER_INFO_OFFSET
);
5126 key
.objectid
= last
;
5128 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5130 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5135 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
5136 ret
= btrfs_next_leaf(root
, path
);
5144 leaf
= path
->nodes
[0];
5145 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5146 if (key
.objectid
>= cache
->key
.offset
+ cache
->key
.objectid
)
5148 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
5149 key
.type
!= BTRFS_METADATA_ITEM_KEY
) {
5154 if (last
== key
.objectid
) {
5155 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5156 last
= key
.objectid
+ key
.offset
;
5158 last
= key
.objectid
+ root
->leafsize
;
5163 ret
= check_cache_range(root
, cache
, last
,
5164 key
.objectid
- last
);
5167 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5168 last
= key
.objectid
+ key
.offset
;
5170 last
= key
.objectid
+ root
->leafsize
;
5174 if (last
< cache
->key
.objectid
+ cache
->key
.offset
)
5175 ret
= check_cache_range(root
, cache
, last
,
5176 cache
->key
.objectid
+
5177 cache
->key
.offset
- last
);
5180 btrfs_free_path(path
);
5183 !RB_EMPTY_ROOT(&cache
->free_space_ctl
->free_space_offset
)) {
5184 fprintf(stderr
, "There are still entries left in the space "
5192 static int check_space_cache(struct btrfs_root
*root
)
5194 struct btrfs_block_group_cache
*cache
;
5195 u64 start
= BTRFS_SUPER_INFO_OFFSET
+ BTRFS_SUPER_INFO_SIZE
;
5199 if (btrfs_super_cache_generation(root
->fs_info
->super_copy
) != -1ULL &&
5200 btrfs_super_generation(root
->fs_info
->super_copy
) !=
5201 btrfs_super_cache_generation(root
->fs_info
->super_copy
)) {
5202 printf("cache and super generation don't match, space cache "
5203 "will be invalidated\n");
5208 cache
= btrfs_lookup_first_block_group(root
->fs_info
, start
);
5212 start
= cache
->key
.objectid
+ cache
->key
.offset
;
5213 if (!cache
->free_space_ctl
) {
5214 if (btrfs_init_free_space_ctl(cache
,
5215 root
->sectorsize
)) {
5220 btrfs_remove_free_space_cache(cache
);
5223 ret
= load_free_space_cache(root
->fs_info
, cache
);
5227 ret
= verify_space_cache(root
, cache
);
5229 fprintf(stderr
, "cache appears valid but isnt %Lu\n",
5230 cache
->key
.objectid
);
5235 return error
? -EINVAL
: 0;
5238 static int check_extent_csums(struct btrfs_root
*root
, u64 bytenr
,
5239 u64 num_bytes
, unsigned long leaf_offset
,
5240 struct extent_buffer
*eb
) {
5243 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
5245 unsigned long csum_offset
;
5249 u64 data_checked
= 0;
5255 if (num_bytes
% root
->sectorsize
)
5258 data
= malloc(num_bytes
);
5262 while (offset
< num_bytes
) {
5265 read_len
= num_bytes
- offset
;
5266 /* read as much space once a time */
5267 ret
= read_extent_data(root
, data
+ offset
,
5268 bytenr
+ offset
, &read_len
, mirror
);
5272 /* verify every 4k data's checksum */
5273 while (data_checked
< read_len
) {
5275 tmp
= offset
+ data_checked
;
5277 csum
= btrfs_csum_data(NULL
, (char *)data
+ tmp
,
5278 csum
, root
->sectorsize
);
5279 btrfs_csum_final(csum
, (char *)&csum
);
5281 csum_offset
= leaf_offset
+
5282 tmp
/ root
->sectorsize
* csum_size
;
5283 read_extent_buffer(eb
, (char *)&csum_expected
,
5284 csum_offset
, csum_size
);
5285 /* try another mirror */
5286 if (csum
!= csum_expected
) {
5287 fprintf(stderr
, "mirror %d bytenr %llu csum %u expected csum %u\n",
5288 mirror
, bytenr
+ tmp
,
5289 csum
, csum_expected
);
5290 num_copies
= btrfs_num_copies(
5291 &root
->fs_info
->mapping_tree
,
5293 if (mirror
< num_copies
- 1) {
5298 data_checked
+= root
->sectorsize
;
5307 static int check_extent_exists(struct btrfs_root
*root
, u64 bytenr
,
5310 struct btrfs_path
*path
;
5311 struct extent_buffer
*leaf
;
5312 struct btrfs_key key
;
5315 path
= btrfs_alloc_path();
5317 fprintf(stderr
, "Error allocing path\n");
5321 key
.objectid
= bytenr
;
5322 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5323 key
.offset
= (u64
)-1;
5326 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
5329 fprintf(stderr
, "Error looking up extent record %d\n", ret
);
5330 btrfs_free_path(path
);
5333 if (path
->slots
[0] > 0) {
5336 ret
= btrfs_prev_leaf(root
, path
);
5339 } else if (ret
> 0) {
5346 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
5349 * Block group items come before extent items if they have the same
5350 * bytenr, so walk back one more just in case. Dear future traveler,
5351 * first congrats on mastering time travel. Now if it's not too much
5352 * trouble could you go back to 2006 and tell Chris to make the
5353 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5354 * EXTENT_ITEM_KEY please?
5356 while (key
.type
> BTRFS_EXTENT_ITEM_KEY
) {
5357 if (path
->slots
[0] > 0) {
5360 ret
= btrfs_prev_leaf(root
, path
);
5363 } else if (ret
> 0) {
5368 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
5372 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
5373 ret
= btrfs_next_leaf(root
, path
);
5375 fprintf(stderr
, "Error going to next leaf "
5377 btrfs_free_path(path
);
5383 leaf
= path
->nodes
[0];
5384 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5385 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
5389 if (key
.objectid
+ key
.offset
< bytenr
) {
5393 if (key
.objectid
> bytenr
+ num_bytes
)
5396 if (key
.objectid
== bytenr
) {
5397 if (key
.offset
>= num_bytes
) {
5401 num_bytes
-= key
.offset
;
5402 bytenr
+= key
.offset
;
5403 } else if (key
.objectid
< bytenr
) {
5404 if (key
.objectid
+ key
.offset
>= bytenr
+ num_bytes
) {
5408 num_bytes
= (bytenr
+ num_bytes
) -
5409 (key
.objectid
+ key
.offset
);
5410 bytenr
= key
.objectid
+ key
.offset
;
5412 if (key
.objectid
+ key
.offset
< bytenr
+ num_bytes
) {
5413 u64 new_start
= key
.objectid
+ key
.offset
;
5414 u64 new_bytes
= bytenr
+ num_bytes
- new_start
;
5417 * Weird case, the extent is in the middle of
5418 * our range, we'll have to search one side
5419 * and then the other. Not sure if this happens
5420 * in real life, but no harm in coding it up
5421 * anyway just in case.
5423 btrfs_release_path(path
);
5424 ret
= check_extent_exists(root
, new_start
,
5427 fprintf(stderr
, "Right section didn't "
5431 num_bytes
= key
.objectid
- bytenr
;
5434 num_bytes
= key
.objectid
- bytenr
;
5441 if (num_bytes
&& !ret
) {
5442 fprintf(stderr
, "There are no extents for csum range "
5443 "%Lu-%Lu\n", bytenr
, bytenr
+num_bytes
);
5447 btrfs_free_path(path
);
5451 static int check_csums(struct btrfs_root
*root
)
5453 struct btrfs_path
*path
;
5454 struct extent_buffer
*leaf
;
5455 struct btrfs_key key
;
5456 u64 offset
= 0, num_bytes
= 0;
5457 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
5461 unsigned long leaf_offset
;
5463 root
= root
->fs_info
->csum_root
;
5464 if (!extent_buffer_uptodate(root
->node
)) {
5465 fprintf(stderr
, "No valid csum tree found\n");
5469 key
.objectid
= BTRFS_EXTENT_CSUM_OBJECTID
;
5470 key
.type
= BTRFS_EXTENT_CSUM_KEY
;
5473 path
= btrfs_alloc_path();
5477 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5479 fprintf(stderr
, "Error searching csum tree %d\n", ret
);
5480 btrfs_free_path(path
);
5484 if (ret
> 0 && path
->slots
[0])
5489 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
5490 ret
= btrfs_next_leaf(root
, path
);
5492 fprintf(stderr
, "Error going to next leaf "
5499 leaf
= path
->nodes
[0];
5501 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5502 if (key
.type
!= BTRFS_EXTENT_CSUM_KEY
) {
5507 data_len
= (btrfs_item_size_nr(leaf
, path
->slots
[0]) /
5508 csum_size
) * root
->sectorsize
;
5509 if (!check_data_csum
)
5510 goto skip_csum_check
;
5511 leaf_offset
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
5512 ret
= check_extent_csums(root
, key
.offset
, data_len
,
5518 offset
= key
.offset
;
5519 } else if (key
.offset
!= offset
+ num_bytes
) {
5520 ret
= check_extent_exists(root
, offset
, num_bytes
);
5522 fprintf(stderr
, "Csum exists for %Lu-%Lu but "
5523 "there is no extent record\n",
5524 offset
, offset
+num_bytes
);
5527 offset
= key
.offset
;
5530 num_bytes
+= data_len
;
5534 btrfs_free_path(path
);
5538 static int is_dropped_key(struct btrfs_key
*key
,
5539 struct btrfs_key
*drop_key
) {
5540 if (key
->objectid
< drop_key
->objectid
)
5542 else if (key
->objectid
== drop_key
->objectid
) {
5543 if (key
->type
< drop_key
->type
)
5545 else if (key
->type
== drop_key
->type
) {
5546 if (key
->offset
< drop_key
->offset
)
5554 * Here are the rules for FULL_BACKREF.
5556 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5557 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5559 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5560 * if it happened after the relocation occurred since we'll have dropped the
5561 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5562 * have no real way to know for sure.
5564 * We process the blocks one root at a time, and we start from the lowest root
5565 * objectid and go to the highest. So we can just lookup the owner backref for
5566 * the record and if we don't find it then we know it doesn't exist and we have
5569 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5570 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5571 * be set or not and then we can check later once we've gathered all the refs.
5573 static int calc_extent_flag(struct btrfs_root
*root
,
5574 struct cache_tree
*extent_cache
,
5575 struct extent_buffer
*buf
,
5576 struct root_item_record
*ri
,
5579 struct extent_record
*rec
;
5580 struct cache_extent
*cache
;
5581 struct tree_backref
*tback
;
5584 cache
= lookup_cache_extent(extent_cache
, buf
->start
, 1);
5585 /* we have added this extent before */
5587 rec
= container_of(cache
, struct extent_record
, cache
);
5590 * Except file/reloc tree, we can not have
5593 if (ri
->objectid
< BTRFS_FIRST_FREE_OBJECTID
)
5598 if (buf
->start
== ri
->bytenr
)
5601 if (btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))
5604 owner
= btrfs_header_owner(buf
);
5605 if (owner
== ri
->objectid
)
5608 tback
= find_tree_backref(rec
, 0, owner
);
5613 if (rec
->flag_block_full_backref
!= -1 &&
5614 rec
->flag_block_full_backref
!= 0)
5615 rec
->bad_full_backref
= 1;
5618 *flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5619 if (rec
->flag_block_full_backref
!= -1 &&
5620 rec
->flag_block_full_backref
!= 1)
5621 rec
->bad_full_backref
= 1;
5625 static int run_next_block(struct btrfs_root
*root
,
5626 struct block_info
*bits
,
5629 struct cache_tree
*pending
,
5630 struct cache_tree
*seen
,
5631 struct cache_tree
*reada
,
5632 struct cache_tree
*nodes
,
5633 struct cache_tree
*extent_cache
,
5634 struct cache_tree
*chunk_cache
,
5635 struct rb_root
*dev_cache
,
5636 struct block_group_tree
*block_group_cache
,
5637 struct device_extent_tree
*dev_extent_cache
,
5638 struct root_item_record
*ri
)
5640 struct extent_buffer
*buf
;
5641 struct extent_record
*rec
= NULL
;
5652 struct btrfs_key key
;
5653 struct cache_extent
*cache
;
5656 nritems
= pick_next_pending(pending
, reada
, nodes
, *last
, bits
,
5657 bits_nr
, &reada_bits
);
5662 for(i
= 0; i
< nritems
; i
++) {
5663 ret
= add_cache_extent(reada
, bits
[i
].start
,
5668 /* fixme, get the parent transid */
5669 readahead_tree_block(root
, bits
[i
].start
,
5673 *last
= bits
[0].start
;
5674 bytenr
= bits
[0].start
;
5675 size
= bits
[0].size
;
5677 cache
= lookup_cache_extent(pending
, bytenr
, size
);
5679 remove_cache_extent(pending
, cache
);
5682 cache
= lookup_cache_extent(reada
, bytenr
, size
);
5684 remove_cache_extent(reada
, cache
);
5687 cache
= lookup_cache_extent(nodes
, bytenr
, size
);
5689 remove_cache_extent(nodes
, cache
);
5692 cache
= lookup_cache_extent(extent_cache
, bytenr
, size
);
5694 rec
= container_of(cache
, struct extent_record
, cache
);
5695 gen
= rec
->parent_generation
;
5698 /* fixme, get the real parent transid */
5699 buf
= read_tree_block(root
, bytenr
, size
, gen
);
5700 if (!extent_buffer_uptodate(buf
)) {
5701 record_bad_block_io(root
->fs_info
,
5702 extent_cache
, bytenr
, size
);
5706 nritems
= btrfs_header_nritems(buf
);
5709 if (!init_extent_tree
) {
5710 ret
= btrfs_lookup_extent_info(NULL
, root
, bytenr
,
5711 btrfs_header_level(buf
), 1, NULL
,
5714 ret
= calc_extent_flag(root
, extent_cache
, buf
, ri
, &flags
);
5716 fprintf(stderr
, "Couldn't calc extent flags\n");
5717 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5722 ret
= calc_extent_flag(root
, extent_cache
, buf
, ri
, &flags
);
5724 fprintf(stderr
, "Couldn't calc extent flags\n");
5725 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5729 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
5731 ri
->objectid
!= BTRFS_TREE_RELOC_OBJECTID
&&
5732 ri
->objectid
== btrfs_header_owner(buf
)) {
5734 * Ok we got to this block from it's original owner and
5735 * we have FULL_BACKREF set. Relocation can leave
5736 * converted blocks over so this is altogether possible,
5737 * however it's not possible if the generation > the
5738 * last snapshot, so check for this case.
5740 if (!btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
) &&
5741 btrfs_header_generation(buf
) > ri
->last_snapshot
) {
5742 flags
&= ~BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5743 rec
->bad_full_backref
= 1;
5748 (ri
->objectid
== BTRFS_TREE_RELOC_OBJECTID
||
5749 btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))) {
5750 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5751 rec
->bad_full_backref
= 1;
5755 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
5756 rec
->flag_block_full_backref
= 1;
5760 rec
->flag_block_full_backref
= 0;
5762 owner
= btrfs_header_owner(buf
);
5765 ret
= check_block(root
, extent_cache
, buf
, flags
);
5769 if (btrfs_is_leaf(buf
)) {
5770 btree_space_waste
+= btrfs_leaf_free_space(root
, buf
);
5771 for (i
= 0; i
< nritems
; i
++) {
5772 struct btrfs_file_extent_item
*fi
;
5773 btrfs_item_key_to_cpu(buf
, &key
, i
);
5774 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
) {
5775 process_extent_item(root
, extent_cache
, buf
,
5779 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
5780 process_extent_item(root
, extent_cache
, buf
,
5784 if (key
.type
== BTRFS_EXTENT_CSUM_KEY
) {
5786 btrfs_item_size_nr(buf
, i
);
5789 if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
5790 process_chunk_item(chunk_cache
, &key
, buf
, i
);
5793 if (key
.type
== BTRFS_DEV_ITEM_KEY
) {
5794 process_device_item(dev_cache
, &key
, buf
, i
);
5797 if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
5798 process_block_group_item(block_group_cache
,
5802 if (key
.type
== BTRFS_DEV_EXTENT_KEY
) {
5803 process_device_extent_item(dev_extent_cache
,
5808 if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
5809 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5810 process_extent_ref_v0(extent_cache
, buf
, i
);
5817 if (key
.type
== BTRFS_TREE_BLOCK_REF_KEY
) {
5818 add_tree_backref(extent_cache
, key
.objectid
, 0,
5822 if (key
.type
== BTRFS_SHARED_BLOCK_REF_KEY
) {
5823 add_tree_backref(extent_cache
, key
.objectid
,
5827 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
5828 struct btrfs_extent_data_ref
*ref
;
5829 ref
= btrfs_item_ptr(buf
, i
,
5830 struct btrfs_extent_data_ref
);
5831 add_data_backref(extent_cache
,
5833 btrfs_extent_data_ref_root(buf
, ref
),
5834 btrfs_extent_data_ref_objectid(buf
,
5836 btrfs_extent_data_ref_offset(buf
, ref
),
5837 btrfs_extent_data_ref_count(buf
, ref
),
5838 0, root
->sectorsize
);
5841 if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
5842 struct btrfs_shared_data_ref
*ref
;
5843 ref
= btrfs_item_ptr(buf
, i
,
5844 struct btrfs_shared_data_ref
);
5845 add_data_backref(extent_cache
,
5846 key
.objectid
, key
.offset
, 0, 0, 0,
5847 btrfs_shared_data_ref_count(buf
, ref
),
5848 0, root
->sectorsize
);
5851 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
) {
5852 struct bad_item
*bad
;
5854 if (key
.objectid
== BTRFS_ORPHAN_OBJECTID
)
5858 bad
= malloc(sizeof(struct bad_item
));
5861 INIT_LIST_HEAD(&bad
->list
);
5862 memcpy(&bad
->key
, &key
,
5863 sizeof(struct btrfs_key
));
5864 bad
->root_id
= owner
;
5865 list_add_tail(&bad
->list
, &delete_items
);
5868 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
5870 fi
= btrfs_item_ptr(buf
, i
,
5871 struct btrfs_file_extent_item
);
5872 if (btrfs_file_extent_type(buf
, fi
) ==
5873 BTRFS_FILE_EXTENT_INLINE
)
5875 if (btrfs_file_extent_disk_bytenr(buf
, fi
) == 0)
5878 data_bytes_allocated
+=
5879 btrfs_file_extent_disk_num_bytes(buf
, fi
);
5880 if (data_bytes_allocated
< root
->sectorsize
) {
5883 data_bytes_referenced
+=
5884 btrfs_file_extent_num_bytes(buf
, fi
);
5885 add_data_backref(extent_cache
,
5886 btrfs_file_extent_disk_bytenr(buf
, fi
),
5887 parent
, owner
, key
.objectid
, key
.offset
-
5888 btrfs_file_extent_offset(buf
, fi
), 1, 1,
5889 btrfs_file_extent_disk_num_bytes(buf
, fi
));
5893 struct btrfs_key first_key
;
5895 first_key
.objectid
= 0;
5898 btrfs_item_key_to_cpu(buf
, &first_key
, 0);
5899 level
= btrfs_header_level(buf
);
5900 for (i
= 0; i
< nritems
; i
++) {
5901 ptr
= btrfs_node_blockptr(buf
, i
);
5902 size
= btrfs_level_size(root
, level
- 1);
5903 btrfs_node_key_to_cpu(buf
, &key
, i
);
5905 if ((level
== ri
->drop_level
)
5906 && is_dropped_key(&key
, &ri
->drop_key
)) {
5910 ret
= add_extent_rec(extent_cache
, &key
,
5911 btrfs_node_ptr_generation(buf
, i
),
5912 ptr
, size
, 0, 0, 1, 0, 1, 0,
5916 add_tree_backref(extent_cache
, ptr
, parent
, owner
, 1);
5919 add_pending(nodes
, seen
, ptr
, size
);
5921 add_pending(pending
, seen
, ptr
, size
);
5924 btree_space_waste
+= (BTRFS_NODEPTRS_PER_BLOCK(root
) -
5925 nritems
) * sizeof(struct btrfs_key_ptr
);
5927 total_btree_bytes
+= buf
->len
;
5928 if (fs_root_objectid(btrfs_header_owner(buf
)))
5929 total_fs_tree_bytes
+= buf
->len
;
5930 if (btrfs_header_owner(buf
) == BTRFS_EXTENT_TREE_OBJECTID
)
5931 total_extent_tree_bytes
+= buf
->len
;
5932 if (!found_old_backref
&&
5933 btrfs_header_owner(buf
) == BTRFS_TREE_RELOC_OBJECTID
&&
5934 btrfs_header_backref_rev(buf
) == BTRFS_MIXED_BACKREF_REV
&&
5935 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))
5936 found_old_backref
= 1;
5938 free_extent_buffer(buf
);
5942 static int add_root_to_pending(struct extent_buffer
*buf
,
5943 struct cache_tree
*extent_cache
,
5944 struct cache_tree
*pending
,
5945 struct cache_tree
*seen
,
5946 struct cache_tree
*nodes
,
5949 if (btrfs_header_level(buf
) > 0)
5950 add_pending(nodes
, seen
, buf
->start
, buf
->len
);
5952 add_pending(pending
, seen
, buf
->start
, buf
->len
);
5953 add_extent_rec(extent_cache
, NULL
, 0, buf
->start
, buf
->len
,
5954 0, 1, 1, 0, 1, 0, buf
->len
);
5956 if (objectid
== BTRFS_TREE_RELOC_OBJECTID
||
5957 btrfs_header_backref_rev(buf
) < BTRFS_MIXED_BACKREF_REV
)
5958 add_tree_backref(extent_cache
, buf
->start
, buf
->start
,
5961 add_tree_backref(extent_cache
, buf
->start
, 0, objectid
, 1);
5965 /* as we fix the tree, we might be deleting blocks that
5966 * we're tracking for repair. This hook makes sure we
5967 * remove any backrefs for blocks as we are fixing them.
5969 static int free_extent_hook(struct btrfs_trans_handle
*trans
,
5970 struct btrfs_root
*root
,
5971 u64 bytenr
, u64 num_bytes
, u64 parent
,
5972 u64 root_objectid
, u64 owner
, u64 offset
,
5975 struct extent_record
*rec
;
5976 struct cache_extent
*cache
;
5978 struct cache_tree
*extent_cache
= root
->fs_info
->fsck_extent_cache
;
5980 is_data
= owner
>= BTRFS_FIRST_FREE_OBJECTID
;
5981 cache
= lookup_cache_extent(extent_cache
, bytenr
, num_bytes
);
5985 rec
= container_of(cache
, struct extent_record
, cache
);
5987 struct data_backref
*back
;
5988 back
= find_data_backref(rec
, parent
, root_objectid
, owner
,
5989 offset
, 1, bytenr
, num_bytes
);
5992 if (back
->node
.found_ref
) {
5993 back
->found_ref
-= refs_to_drop
;
5995 rec
->refs
-= refs_to_drop
;
5997 if (back
->node
.found_extent_tree
) {
5998 back
->num_refs
-= refs_to_drop
;
5999 if (rec
->extent_item_refs
)
6000 rec
->extent_item_refs
-= refs_to_drop
;
6002 if (back
->found_ref
== 0)
6003 back
->node
.found_ref
= 0;
6004 if (back
->num_refs
== 0)
6005 back
->node
.found_extent_tree
= 0;
6007 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
6008 list_del(&back
->node
.list
);
6012 struct tree_backref
*back
;
6013 back
= find_tree_backref(rec
, parent
, root_objectid
);
6016 if (back
->node
.found_ref
) {
6019 back
->node
.found_ref
= 0;
6021 if (back
->node
.found_extent_tree
) {
6022 if (rec
->extent_item_refs
)
6023 rec
->extent_item_refs
--;
6024 back
->node
.found_extent_tree
= 0;
6026 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
6027 list_del(&back
->node
.list
);
6031 maybe_free_extent_rec(extent_cache
, rec
);
6036 static int delete_extent_records(struct btrfs_trans_handle
*trans
,
6037 struct btrfs_root
*root
,
6038 struct btrfs_path
*path
,
6039 u64 bytenr
, u64 new_len
)
6041 struct btrfs_key key
;
6042 struct btrfs_key found_key
;
6043 struct extent_buffer
*leaf
;
6048 key
.objectid
= bytenr
;
6050 key
.offset
= (u64
)-1;
6053 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
,
6060 if (path
->slots
[0] == 0)
6066 leaf
= path
->nodes
[0];
6067 slot
= path
->slots
[0];
6069 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
6070 if (found_key
.objectid
!= bytenr
)
6073 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
6074 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
6075 found_key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
6076 found_key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
&&
6077 found_key
.type
!= BTRFS_EXTENT_REF_V0_KEY
&&
6078 found_key
.type
!= BTRFS_SHARED_BLOCK_REF_KEY
&&
6079 found_key
.type
!= BTRFS_SHARED_DATA_REF_KEY
) {
6080 btrfs_release_path(path
);
6081 if (found_key
.type
== 0) {
6082 if (found_key
.offset
== 0)
6084 key
.offset
= found_key
.offset
- 1;
6085 key
.type
= found_key
.type
;
6087 key
.type
= found_key
.type
- 1;
6088 key
.offset
= (u64
)-1;
6092 fprintf(stderr
, "repair deleting extent record: key %Lu %u %Lu\n",
6093 found_key
.objectid
, found_key
.type
, found_key
.offset
);
6095 ret
= btrfs_del_item(trans
, root
->fs_info
->extent_root
, path
);
6098 btrfs_release_path(path
);
6100 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
||
6101 found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
6102 u64 bytes
= (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
) ?
6103 found_key
.offset
: root
->leafsize
;
6105 ret
= btrfs_update_block_group(trans
, root
, bytenr
,
6112 btrfs_release_path(path
);
6117 * for a single backref, this will allocate a new extent
6118 * and add the backref to it.
6120 static int record_extent(struct btrfs_trans_handle
*trans
,
6121 struct btrfs_fs_info
*info
,
6122 struct btrfs_path
*path
,
6123 struct extent_record
*rec
,
6124 struct extent_backref
*back
,
6125 int allocated
, u64 flags
)
6128 struct btrfs_root
*extent_root
= info
->extent_root
;
6129 struct extent_buffer
*leaf
;
6130 struct btrfs_key ins_key
;
6131 struct btrfs_extent_item
*ei
;
6132 struct tree_backref
*tback
;
6133 struct data_backref
*dback
;
6134 struct btrfs_tree_block_info
*bi
;
6137 rec
->max_size
= max_t(u64
, rec
->max_size
,
6138 info
->extent_root
->leafsize
);
6141 u32 item_size
= sizeof(*ei
);
6144 item_size
+= sizeof(*bi
);
6146 ins_key
.objectid
= rec
->start
;
6147 ins_key
.offset
= rec
->max_size
;
6148 ins_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
6150 ret
= btrfs_insert_empty_item(trans
, extent_root
, path
,
6151 &ins_key
, item_size
);
6155 leaf
= path
->nodes
[0];
6156 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
6157 struct btrfs_extent_item
);
6159 btrfs_set_extent_refs(leaf
, ei
, 0);
6160 btrfs_set_extent_generation(leaf
, ei
, rec
->generation
);
6162 if (back
->is_data
) {
6163 btrfs_set_extent_flags(leaf
, ei
,
6164 BTRFS_EXTENT_FLAG_DATA
);
6166 struct btrfs_disk_key copy_key
;;
6168 tback
= (struct tree_backref
*)back
;
6169 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
6170 memset_extent_buffer(leaf
, 0, (unsigned long)bi
,
6173 btrfs_set_disk_key_objectid(©_key
,
6174 rec
->info_objectid
);
6175 btrfs_set_disk_key_type(©_key
, 0);
6176 btrfs_set_disk_key_offset(©_key
, 0);
6178 btrfs_set_tree_block_level(leaf
, bi
, rec
->info_level
);
6179 btrfs_set_tree_block_key(leaf
, bi
, ©_key
);
6181 btrfs_set_extent_flags(leaf
, ei
,
6182 BTRFS_EXTENT_FLAG_TREE_BLOCK
| flags
);
6185 btrfs_mark_buffer_dirty(leaf
);
6186 ret
= btrfs_update_block_group(trans
, extent_root
, rec
->start
,
6187 rec
->max_size
, 1, 0);
6190 btrfs_release_path(path
);
6193 if (back
->is_data
) {
6197 dback
= (struct data_backref
*)back
;
6198 if (back
->full_backref
)
6199 parent
= dback
->parent
;
6203 for (i
= 0; i
< dback
->found_ref
; i
++) {
6204 /* if parent != 0, we're doing a full backref
6205 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6206 * just makes the backref allocator create a data
6209 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
6210 rec
->start
, rec
->max_size
,
6214 BTRFS_FIRST_FREE_OBJECTID
:
6220 fprintf(stderr
, "adding new data backref"
6221 " on %llu %s %llu owner %llu"
6222 " offset %llu found %d\n",
6223 (unsigned long long)rec
->start
,
6224 back
->full_backref
?
6226 back
->full_backref
?
6227 (unsigned long long)parent
:
6228 (unsigned long long)dback
->root
,
6229 (unsigned long long)dback
->owner
,
6230 (unsigned long long)dback
->offset
,
6235 tback
= (struct tree_backref
*)back
;
6236 if (back
->full_backref
)
6237 parent
= tback
->parent
;
6241 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
6242 rec
->start
, rec
->max_size
,
6243 parent
, tback
->root
, 0, 0);
6244 fprintf(stderr
, "adding new tree backref on "
6245 "start %llu len %llu parent %llu root %llu\n",
6246 rec
->start
, rec
->max_size
, parent
, tback
->root
);
6251 btrfs_release_path(path
);
6255 struct extent_entry
{
6260 struct list_head list
;
6263 static struct extent_entry
*find_entry(struct list_head
*entries
,
6264 u64 bytenr
, u64 bytes
)
6266 struct extent_entry
*entry
= NULL
;
6268 list_for_each_entry(entry
, entries
, list
) {
6269 if (entry
->bytenr
== bytenr
&& entry
->bytes
== bytes
)
6276 static struct extent_entry
*find_most_right_entry(struct list_head
*entries
)
6278 struct extent_entry
*entry
, *best
= NULL
, *prev
= NULL
;
6280 list_for_each_entry(entry
, entries
, list
) {
6287 * If there are as many broken entries as entries then we know
6288 * not to trust this particular entry.
6290 if (entry
->broken
== entry
->count
)
6294 * If our current entry == best then we can't be sure our best
6295 * is really the best, so we need to keep searching.
6297 if (best
&& best
->count
== entry
->count
) {
6303 /* Prev == entry, not good enough, have to keep searching */
6304 if (!prev
->broken
&& prev
->count
== entry
->count
)
6308 best
= (prev
->count
> entry
->count
) ? prev
: entry
;
6309 else if (best
->count
< entry
->count
)
6317 static int repair_ref(struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
6318 struct data_backref
*dback
, struct extent_entry
*entry
)
6320 struct btrfs_trans_handle
*trans
;
6321 struct btrfs_root
*root
;
6322 struct btrfs_file_extent_item
*fi
;
6323 struct extent_buffer
*leaf
;
6324 struct btrfs_key key
;
6328 key
.objectid
= dback
->root
;
6329 key
.type
= BTRFS_ROOT_ITEM_KEY
;
6330 key
.offset
= (u64
)-1;
6331 root
= btrfs_read_fs_root(info
, &key
);
6333 fprintf(stderr
, "Couldn't find root for our ref\n");
6338 * The backref points to the original offset of the extent if it was
6339 * split, so we need to search down to the offset we have and then walk
6340 * forward until we find the backref we're looking for.
6342 key
.objectid
= dback
->owner
;
6343 key
.type
= BTRFS_EXTENT_DATA_KEY
;
6344 key
.offset
= dback
->offset
;
6345 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
6347 fprintf(stderr
, "Error looking up ref %d\n", ret
);
6352 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
6353 ret
= btrfs_next_leaf(root
, path
);
6355 fprintf(stderr
, "Couldn't find our ref, next\n");
6359 leaf
= path
->nodes
[0];
6360 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
6361 if (key
.objectid
!= dback
->owner
||
6362 key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
6363 fprintf(stderr
, "Couldn't find our ref, search\n");
6366 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6367 struct btrfs_file_extent_item
);
6368 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
6369 bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
6371 if (bytenr
== dback
->disk_bytenr
&& bytes
== dback
->bytes
)
6376 btrfs_release_path(path
);
6378 trans
= btrfs_start_transaction(root
, 1);
6380 return PTR_ERR(trans
);
6383 * Ok we have the key of the file extent we want to fix, now we can cow
6384 * down to the thing and fix it.
6386 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
6388 fprintf(stderr
, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6389 key
.objectid
, key
.type
, key
.offset
, ret
);
6393 fprintf(stderr
, "Well that's odd, we just found this key "
6394 "[%Lu, %u, %Lu]\n", key
.objectid
, key
.type
,
6399 leaf
= path
->nodes
[0];
6400 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6401 struct btrfs_file_extent_item
);
6403 if (btrfs_file_extent_compression(leaf
, fi
) &&
6404 dback
->disk_bytenr
!= entry
->bytenr
) {
6405 fprintf(stderr
, "Ref doesn't match the record start and is "
6406 "compressed, please take a btrfs-image of this file "
6407 "system and send it to a btrfs developer so they can "
6408 "complete this functionality for bytenr %Lu\n",
6409 dback
->disk_bytenr
);
6414 if (dback
->node
.broken
&& dback
->disk_bytenr
!= entry
->bytenr
) {
6415 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6416 } else if (dback
->disk_bytenr
> entry
->bytenr
) {
6417 u64 off_diff
, offset
;
6419 off_diff
= dback
->disk_bytenr
- entry
->bytenr
;
6420 offset
= btrfs_file_extent_offset(leaf
, fi
);
6421 if (dback
->disk_bytenr
+ offset
+
6422 btrfs_file_extent_num_bytes(leaf
, fi
) >
6423 entry
->bytenr
+ entry
->bytes
) {
6424 fprintf(stderr
, "Ref is past the entry end, please "
6425 "take a btrfs-image of this file system and "
6426 "send it to a btrfs developer, ref %Lu\n",
6427 dback
->disk_bytenr
);
6432 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6433 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
6434 } else if (dback
->disk_bytenr
< entry
->bytenr
) {
6437 offset
= btrfs_file_extent_offset(leaf
, fi
);
6438 if (dback
->disk_bytenr
+ offset
< entry
->bytenr
) {
6439 fprintf(stderr
, "Ref is before the entry start, please"
6440 " take a btrfs-image of this file system and "
6441 "send it to a btrfs developer, ref %Lu\n",
6442 dback
->disk_bytenr
);
6447 offset
+= dback
->disk_bytenr
;
6448 offset
-= entry
->bytenr
;
6449 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6450 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
6453 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
, entry
->bytes
);
6456 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6457 * only do this if we aren't using compression, otherwise it's a
6460 if (!btrfs_file_extent_compression(leaf
, fi
))
6461 btrfs_set_file_extent_ram_bytes(leaf
, fi
, entry
->bytes
);
6463 printf("ram bytes may be wrong?\n");
6464 btrfs_mark_buffer_dirty(leaf
);
6466 err
= btrfs_commit_transaction(trans
, root
);
6467 btrfs_release_path(path
);
6468 return ret
? ret
: err
;
6471 static int verify_backrefs(struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
6472 struct extent_record
*rec
)
6474 struct extent_backref
*back
;
6475 struct data_backref
*dback
;
6476 struct extent_entry
*entry
, *best
= NULL
;
6479 int broken_entries
= 0;
6484 * Metadata is easy and the backrefs should always agree on bytenr and
6485 * size, if not we've got bigger issues.
6490 list_for_each_entry(back
, &rec
->backrefs
, list
) {
6491 if (back
->full_backref
|| !back
->is_data
)
6494 dback
= (struct data_backref
*)back
;
6497 * We only pay attention to backrefs that we found a real
6500 if (dback
->found_ref
== 0)
6504 * For now we only catch when the bytes don't match, not the
6505 * bytenr. We can easily do this at the same time, but I want
6506 * to have a fs image to test on before we just add repair
6507 * functionality willy-nilly so we know we won't screw up the
6511 entry
= find_entry(&entries
, dback
->disk_bytenr
,
6514 entry
= malloc(sizeof(struct extent_entry
));
6519 memset(entry
, 0, sizeof(*entry
));
6520 entry
->bytenr
= dback
->disk_bytenr
;
6521 entry
->bytes
= dback
->bytes
;
6522 list_add_tail(&entry
->list
, &entries
);
6527 * If we only have on entry we may think the entries agree when
6528 * in reality they don't so we have to do some extra checking.
6530 if (dback
->disk_bytenr
!= rec
->start
||
6531 dback
->bytes
!= rec
->nr
|| back
->broken
)
6542 /* Yay all the backrefs agree, carry on good sir */
6543 if (nr_entries
<= 1 && !mismatch
)
6546 fprintf(stderr
, "attempting to repair backref discrepency for bytenr "
6547 "%Lu\n", rec
->start
);
6550 * First we want to see if the backrefs can agree amongst themselves who
6551 * is right, so figure out which one of the entries has the highest
6554 best
= find_most_right_entry(&entries
);
6557 * Ok so we may have an even split between what the backrefs think, so
6558 * this is where we use the extent ref to see what it thinks.
6561 entry
= find_entry(&entries
, rec
->start
, rec
->nr
);
6562 if (!entry
&& (!broken_entries
|| !rec
->found_rec
)) {
6563 fprintf(stderr
, "Backrefs don't agree with each other "
6564 "and extent record doesn't agree with anybody,"
6565 " so we can't fix bytenr %Lu bytes %Lu\n",
6566 rec
->start
, rec
->nr
);
6569 } else if (!entry
) {
6571 * Ok our backrefs were broken, we'll assume this is the
6572 * correct value and add an entry for this range.
6574 entry
= malloc(sizeof(struct extent_entry
));
6579 memset(entry
, 0, sizeof(*entry
));
6580 entry
->bytenr
= rec
->start
;
6581 entry
->bytes
= rec
->nr
;
6582 list_add_tail(&entry
->list
, &entries
);
6586 best
= find_most_right_entry(&entries
);
6588 fprintf(stderr
, "Backrefs and extent record evenly "
6589 "split on who is right, this is going to "
6590 "require user input to fix bytenr %Lu bytes "
6591 "%Lu\n", rec
->start
, rec
->nr
);
6598 * I don't think this can happen currently as we'll abort() if we catch
6599 * this case higher up, but in case somebody removes that we still can't
6600 * deal with it properly here yet, so just bail out of that's the case.
6602 if (best
->bytenr
!= rec
->start
) {
6603 fprintf(stderr
, "Extent start and backref starts don't match, "
6604 "please use btrfs-image on this file system and send "
6605 "it to a btrfs developer so they can make fsck fix "
6606 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6607 rec
->start
, rec
->nr
);
6613 * Ok great we all agreed on an extent record, let's go find the real
6614 * references and fix up the ones that don't match.
6616 list_for_each_entry(back
, &rec
->backrefs
, list
) {
6617 if (back
->full_backref
|| !back
->is_data
)
6620 dback
= (struct data_backref
*)back
;
6623 * Still ignoring backrefs that don't have a real ref attached
6626 if (dback
->found_ref
== 0)
6629 if (dback
->bytes
== best
->bytes
&&
6630 dback
->disk_bytenr
== best
->bytenr
)
6633 ret
= repair_ref(info
, path
, dback
, best
);
6639 * Ok we messed with the actual refs, which means we need to drop our
6640 * entire cache and go back and rescan. I know this is a huge pain and
6641 * adds a lot of extra work, but it's the only way to be safe. Once all
6642 * the backrefs agree we may not need to do anything to the extent
6647 while (!list_empty(&entries
)) {
6648 entry
= list_entry(entries
.next
, struct extent_entry
, list
);
6649 list_del_init(&entry
->list
);
6655 static int process_duplicates(struct btrfs_root
*root
,
6656 struct cache_tree
*extent_cache
,
6657 struct extent_record
*rec
)
6659 struct extent_record
*good
, *tmp
;
6660 struct cache_extent
*cache
;
6664 * If we found a extent record for this extent then return, or if we
6665 * have more than one duplicate we are likely going to need to delete
6668 if (rec
->found_rec
|| rec
->num_duplicates
> 1)
6671 /* Shouldn't happen but just in case */
6672 BUG_ON(!rec
->num_duplicates
);
6675 * So this happens if we end up with a backref that doesn't match the
6676 * actual extent entry. So either the backref is bad or the extent
6677 * entry is bad. Either way we want to have the extent_record actually
6678 * reflect what we found in the extent_tree, so we need to take the
6679 * duplicate out and use that as the extent_record since the only way we
6680 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6682 remove_cache_extent(extent_cache
, &rec
->cache
);
6684 good
= list_entry(rec
->dups
.next
, struct extent_record
, list
);
6685 list_del_init(&good
->list
);
6686 INIT_LIST_HEAD(&good
->backrefs
);
6687 INIT_LIST_HEAD(&good
->dups
);
6688 good
->cache
.start
= good
->start
;
6689 good
->cache
.size
= good
->nr
;
6690 good
->content_checked
= 0;
6691 good
->owner_ref_checked
= 0;
6692 good
->num_duplicates
= 0;
6693 good
->refs
= rec
->refs
;
6694 list_splice_init(&rec
->backrefs
, &good
->backrefs
);
6696 cache
= lookup_cache_extent(extent_cache
, good
->start
,
6700 tmp
= container_of(cache
, struct extent_record
, cache
);
6703 * If we find another overlapping extent and it's found_rec is
6704 * set then it's a duplicate and we need to try and delete
6707 if (tmp
->found_rec
|| tmp
->num_duplicates
> 0) {
6708 if (list_empty(&good
->list
))
6709 list_add_tail(&good
->list
,
6710 &duplicate_extents
);
6711 good
->num_duplicates
+= tmp
->num_duplicates
+ 1;
6712 list_splice_init(&tmp
->dups
, &good
->dups
);
6713 list_del_init(&tmp
->list
);
6714 list_add_tail(&tmp
->list
, &good
->dups
);
6715 remove_cache_extent(extent_cache
, &tmp
->cache
);
6720 * Ok we have another non extent item backed extent rec, so lets
6721 * just add it to this extent and carry on like we did above.
6723 good
->refs
+= tmp
->refs
;
6724 list_splice_init(&tmp
->backrefs
, &good
->backrefs
);
6725 remove_cache_extent(extent_cache
, &tmp
->cache
);
6728 ret
= insert_cache_extent(extent_cache
, &good
->cache
);
6731 return good
->num_duplicates
? 0 : 1;
6734 static int delete_duplicate_records(struct btrfs_root
*root
,
6735 struct extent_record
*rec
)
6737 struct btrfs_trans_handle
*trans
;
6738 LIST_HEAD(delete_list
);
6739 struct btrfs_path
*path
;
6740 struct extent_record
*tmp
, *good
, *n
;
6743 struct btrfs_key key
;
6745 path
= btrfs_alloc_path();
6752 /* Find the record that covers all of the duplicates. */
6753 list_for_each_entry(tmp
, &rec
->dups
, list
) {
6754 if (good
->start
< tmp
->start
)
6756 if (good
->nr
> tmp
->nr
)
6759 if (tmp
->start
+ tmp
->nr
< good
->start
+ good
->nr
) {
6760 fprintf(stderr
, "Ok we have overlapping extents that "
6761 "aren't completely covered by eachother, this "
6762 "is going to require more careful thought. "
6763 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6764 tmp
->start
, tmp
->nr
, good
->start
, good
->nr
);
6771 list_add_tail(&rec
->list
, &delete_list
);
6773 list_for_each_entry_safe(tmp
, n
, &rec
->dups
, list
) {
6776 list_move_tail(&tmp
->list
, &delete_list
);
6779 root
= root
->fs_info
->extent_root
;
6780 trans
= btrfs_start_transaction(root
, 1);
6781 if (IS_ERR(trans
)) {
6782 ret
= PTR_ERR(trans
);
6786 list_for_each_entry(tmp
, &delete_list
, list
) {
6787 if (tmp
->found_rec
== 0)
6789 key
.objectid
= tmp
->start
;
6790 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
6791 key
.offset
= tmp
->nr
;
6793 /* Shouldn't happen but just in case */
6794 if (tmp
->metadata
) {
6795 fprintf(stderr
, "Well this shouldn't happen, extent "
6796 "record overlaps but is metadata? "
6797 "[%Lu, %Lu]\n", tmp
->start
, tmp
->nr
);
6801 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
6807 ret
= btrfs_del_item(trans
, root
, path
);
6810 btrfs_release_path(path
);
6813 err
= btrfs_commit_transaction(trans
, root
);
6817 while (!list_empty(&delete_list
)) {
6818 tmp
= list_entry(delete_list
.next
, struct extent_record
, list
);
6819 list_del_init(&tmp
->list
);
6825 while (!list_empty(&rec
->dups
)) {
6826 tmp
= list_entry(rec
->dups
.next
, struct extent_record
, list
);
6827 list_del_init(&tmp
->list
);
6831 btrfs_free_path(path
);
6833 if (!ret
&& !nr_del
)
6834 rec
->num_duplicates
= 0;
6836 return ret
? ret
: nr_del
;
6839 static int find_possible_backrefs(struct btrfs_fs_info
*info
,
6840 struct btrfs_path
*path
,
6841 struct cache_tree
*extent_cache
,
6842 struct extent_record
*rec
)
6844 struct btrfs_root
*root
;
6845 struct extent_backref
*back
;
6846 struct data_backref
*dback
;
6847 struct cache_extent
*cache
;
6848 struct btrfs_file_extent_item
*fi
;
6849 struct btrfs_key key
;
6853 list_for_each_entry(back
, &rec
->backrefs
, list
) {
6854 /* Don't care about full backrefs (poor unloved backrefs) */
6855 if (back
->full_backref
|| !back
->is_data
)
6858 dback
= (struct data_backref
*)back
;
6860 /* We found this one, we don't need to do a lookup */
6861 if (dback
->found_ref
)
6864 key
.objectid
= dback
->root
;
6865 key
.type
= BTRFS_ROOT_ITEM_KEY
;
6866 key
.offset
= (u64
)-1;
6868 root
= btrfs_read_fs_root(info
, &key
);
6870 /* No root, definitely a bad ref, skip */
6871 if (IS_ERR(root
) && PTR_ERR(root
) == -ENOENT
)
6873 /* Other err, exit */
6875 return PTR_ERR(root
);
6877 key
.objectid
= dback
->owner
;
6878 key
.type
= BTRFS_EXTENT_DATA_KEY
;
6879 key
.offset
= dback
->offset
;
6880 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
6882 btrfs_release_path(path
);
6885 /* Didn't find it, we can carry on */
6890 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
6891 struct btrfs_file_extent_item
);
6892 bytenr
= btrfs_file_extent_disk_bytenr(path
->nodes
[0], fi
);
6893 bytes
= btrfs_file_extent_disk_num_bytes(path
->nodes
[0], fi
);
6894 btrfs_release_path(path
);
6895 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
6897 struct extent_record
*tmp
;
6898 tmp
= container_of(cache
, struct extent_record
, cache
);
6901 * If we found an extent record for the bytenr for this
6902 * particular backref then we can't add it to our
6903 * current extent record. We only want to add backrefs
6904 * that don't have a corresponding extent item in the
6905 * extent tree since they likely belong to this record
6906 * and we need to fix it if it doesn't match bytenrs.
6912 dback
->found_ref
+= 1;
6913 dback
->disk_bytenr
= bytenr
;
6914 dback
->bytes
= bytes
;
6917 * Set this so the verify backref code knows not to trust the
6918 * values in this backref.
6927 * Record orphan data ref into corresponding root.
6929 * Return 0 if the extent item contains data ref and recorded.
6930 * Return 1 if the extent item contains no useful data ref
6931 * On that case, it may contains only shared_dataref or metadata backref
6932 * or the file extent exists(this should be handled by the extent bytenr
6934 * Return <0 if something goes wrong.
6936 static int record_orphan_data_extents(struct btrfs_fs_info
*fs_info
,
6937 struct extent_record
*rec
)
6939 struct btrfs_key key
;
6940 struct btrfs_root
*dest_root
;
6941 struct extent_backref
*back
;
6942 struct data_backref
*dback
;
6943 struct orphan_data_extent
*orphan
;
6944 struct btrfs_path
*path
;
6945 int recorded_data_ref
= 0;
6950 path
= btrfs_alloc_path();
6953 list_for_each_entry(back
, &rec
->backrefs
, list
) {
6954 if (back
->full_backref
|| !back
->is_data
||
6955 !back
->found_extent_tree
)
6957 dback
= (struct data_backref
*)back
;
6958 if (dback
->found_ref
)
6960 key
.objectid
= dback
->root
;
6961 key
.type
= BTRFS_ROOT_ITEM_KEY
;
6962 key
.offset
= (u64
)-1;
6964 dest_root
= btrfs_read_fs_root(fs_info
, &key
);
6966 /* For non-exist root we just skip it */
6967 if (IS_ERR(dest_root
) || !dest_root
)
6970 key
.objectid
= dback
->owner
;
6971 key
.type
= BTRFS_EXTENT_DATA_KEY
;
6972 key
.offset
= dback
->offset
;
6974 ret
= btrfs_search_slot(NULL
, dest_root
, &key
, path
, 0, 0);
6976 * For ret < 0, it's OK since the fs-tree may be corrupted,
6977 * we need to record it for inode/file extent rebuild.
6978 * For ret > 0, we record it only for file extent rebuild.
6979 * For ret == 0, the file extent exists but only bytenr
6980 * mismatch, let the original bytenr fix routine to handle,
6986 orphan
= malloc(sizeof(*orphan
));
6991 INIT_LIST_HEAD(&orphan
->list
);
6992 orphan
->root
= dback
->root
;
6993 orphan
->objectid
= dback
->owner
;
6994 orphan
->offset
= dback
->offset
;
6995 orphan
->disk_bytenr
= rec
->cache
.start
;
6996 orphan
->disk_len
= rec
->cache
.size
;
6997 list_add(&dest_root
->orphan_data_extents
, &orphan
->list
);
6998 recorded_data_ref
= 1;
7001 btrfs_free_path(path
);
7003 return !recorded_data_ref
;
7009 * when an incorrect extent item is found, this will delete
7010 * all of the existing entries for it and recreate them
7011 * based on what the tree scan found.
7013 static int fixup_extent_refs(struct btrfs_fs_info
*info
,
7014 struct cache_tree
*extent_cache
,
7015 struct extent_record
*rec
)
7017 struct btrfs_trans_handle
*trans
= NULL
;
7019 struct btrfs_path
*path
;
7020 struct list_head
*cur
= rec
->backrefs
.next
;
7021 struct cache_extent
*cache
;
7022 struct extent_backref
*back
;
7026 if (rec
->flag_block_full_backref
)
7027 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7029 path
= btrfs_alloc_path();
7033 if (rec
->refs
!= rec
->extent_item_refs
&& !rec
->metadata
) {
7035 * Sometimes the backrefs themselves are so broken they don't
7036 * get attached to any meaningful rec, so first go back and
7037 * check any of our backrefs that we couldn't find and throw
7038 * them into the list if we find the backref so that
7039 * verify_backrefs can figure out what to do.
7041 ret
= find_possible_backrefs(info
, path
, extent_cache
, rec
);
7046 /* step one, make sure all of the backrefs agree */
7047 ret
= verify_backrefs(info
, path
, rec
);
7051 trans
= btrfs_start_transaction(info
->extent_root
, 1);
7052 if (IS_ERR(trans
)) {
7053 ret
= PTR_ERR(trans
);
7057 /* step two, delete all the existing records */
7058 ret
= delete_extent_records(trans
, info
->extent_root
, path
,
7059 rec
->start
, rec
->max_size
);
7064 /* was this block corrupt? If so, don't add references to it */
7065 cache
= lookup_cache_extent(info
->corrupt_blocks
,
7066 rec
->start
, rec
->max_size
);
7072 /* step three, recreate all the refs we did find */
7073 while(cur
!= &rec
->backrefs
) {
7074 back
= list_entry(cur
, struct extent_backref
, list
);
7078 * if we didn't find any references, don't create a
7081 if (!back
->found_ref
)
7084 rec
->bad_full_backref
= 0;
7085 ret
= record_extent(trans
, info
, path
, rec
, back
, allocated
, flags
);
7093 int err
= btrfs_commit_transaction(trans
, info
->extent_root
);
7098 btrfs_free_path(path
);
7102 static int fixup_extent_flags(struct btrfs_fs_info
*fs_info
,
7103 struct extent_record
*rec
)
7105 struct btrfs_trans_handle
*trans
;
7106 struct btrfs_root
*root
= fs_info
->extent_root
;
7107 struct btrfs_path
*path
;
7108 struct btrfs_extent_item
*ei
;
7109 struct btrfs_key key
;
7113 key
.objectid
= rec
->start
;
7114 if (rec
->metadata
) {
7115 key
.type
= BTRFS_METADATA_ITEM_KEY
;
7116 key
.offset
= rec
->info_level
;
7118 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
7119 key
.offset
= rec
->max_size
;
7122 path
= btrfs_alloc_path();
7126 trans
= btrfs_start_transaction(root
, 0);
7127 if (IS_ERR(trans
)) {
7128 btrfs_free_path(path
);
7129 return PTR_ERR(trans
);
7132 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
7134 btrfs_free_path(path
);
7135 btrfs_commit_transaction(trans
, root
);
7138 fprintf(stderr
, "Didn't find extent for %llu\n",
7139 (unsigned long long)rec
->start
);
7140 btrfs_free_path(path
);
7141 btrfs_commit_transaction(trans
, root
);
7145 ei
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
7146 struct btrfs_extent_item
);
7147 flags
= btrfs_extent_flags(path
->nodes
[0], ei
);
7148 if (rec
->flag_block_full_backref
) {
7149 fprintf(stderr
, "setting full backref on %llu\n",
7150 (unsigned long long)key
.objectid
);
7151 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7153 fprintf(stderr
, "clearing full backref on %llu\n",
7154 (unsigned long long)key
.objectid
);
7155 flags
&= ~BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7157 btrfs_set_extent_flags(path
->nodes
[0], ei
, flags
);
7158 btrfs_mark_buffer_dirty(path
->nodes
[0]);
7159 btrfs_free_path(path
);
7160 return btrfs_commit_transaction(trans
, root
);
7163 /* right now we only prune from the extent allocation tree */
7164 static int prune_one_block(struct btrfs_trans_handle
*trans
,
7165 struct btrfs_fs_info
*info
,
7166 struct btrfs_corrupt_block
*corrupt
)
7169 struct btrfs_path path
;
7170 struct extent_buffer
*eb
;
7174 int level
= corrupt
->level
+ 1;
7176 btrfs_init_path(&path
);
7178 /* we want to stop at the parent to our busted block */
7179 path
.lowest_level
= level
;
7181 ret
= btrfs_search_slot(trans
, info
->extent_root
,
7182 &corrupt
->key
, &path
, -1, 1);
7187 eb
= path
.nodes
[level
];
7194 * hopefully the search gave us the block we want to prune,
7195 * lets try that first
7197 slot
= path
.slots
[level
];
7198 found
= btrfs_node_blockptr(eb
, slot
);
7199 if (found
== corrupt
->cache
.start
)
7202 nritems
= btrfs_header_nritems(eb
);
7204 /* the search failed, lets scan this node and hope we find it */
7205 for (slot
= 0; slot
< nritems
; slot
++) {
7206 found
= btrfs_node_blockptr(eb
, slot
);
7207 if (found
== corrupt
->cache
.start
)
7211 * we couldn't find the bad block. TODO, search all the nodes for pointers
7214 if (eb
== info
->extent_root
->node
) {
7219 btrfs_release_path(&path
);
7224 printk("deleting pointer to block %Lu\n", corrupt
->cache
.start
);
7225 ret
= btrfs_del_ptr(trans
, info
->extent_root
, &path
, level
, slot
);
7228 btrfs_release_path(&path
);
7232 static int prune_corrupt_blocks(struct btrfs_fs_info
*info
)
7234 struct btrfs_trans_handle
*trans
= NULL
;
7235 struct cache_extent
*cache
;
7236 struct btrfs_corrupt_block
*corrupt
;
7239 cache
= search_cache_extent(info
->corrupt_blocks
, 0);
7243 trans
= btrfs_start_transaction(info
->extent_root
, 1);
7245 return PTR_ERR(trans
);
7247 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
7248 prune_one_block(trans
, info
, corrupt
);
7249 remove_cache_extent(info
->corrupt_blocks
, cache
);
7252 return btrfs_commit_transaction(trans
, info
->extent_root
);
7256 static void reset_cached_block_groups(struct btrfs_fs_info
*fs_info
)
7258 struct btrfs_block_group_cache
*cache
;
7263 ret
= find_first_extent_bit(&fs_info
->free_space_cache
, 0,
7264 &start
, &end
, EXTENT_DIRTY
);
7267 clear_extent_dirty(&fs_info
->free_space_cache
, start
, end
,
7273 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
7278 start
= cache
->key
.objectid
+ cache
->key
.offset
;
7282 static int check_extent_refs(struct btrfs_root
*root
,
7283 struct cache_tree
*extent_cache
)
7285 struct extent_record
*rec
;
7286 struct cache_extent
*cache
;
7295 * if we're doing a repair, we have to make sure
7296 * we don't allocate from the problem extents.
7297 * In the worst case, this will be all the
7300 cache
= search_cache_extent(extent_cache
, 0);
7302 rec
= container_of(cache
, struct extent_record
, cache
);
7303 set_extent_dirty(root
->fs_info
->excluded_extents
,
7305 rec
->start
+ rec
->max_size
- 1,
7307 cache
= next_cache_extent(cache
);
7310 /* pin down all the corrupted blocks too */
7311 cache
= search_cache_extent(root
->fs_info
->corrupt_blocks
, 0);
7313 set_extent_dirty(root
->fs_info
->excluded_extents
,
7315 cache
->start
+ cache
->size
- 1,
7317 cache
= next_cache_extent(cache
);
7319 prune_corrupt_blocks(root
->fs_info
);
7320 reset_cached_block_groups(root
->fs_info
);
7323 reset_cached_block_groups(root
->fs_info
);
7326 * We need to delete any duplicate entries we find first otherwise we
7327 * could mess up the extent tree when we have backrefs that actually
7328 * belong to a different extent item and not the weird duplicate one.
7330 while (repair
&& !list_empty(&duplicate_extents
)) {
7331 rec
= list_entry(duplicate_extents
.next
, struct extent_record
,
7333 list_del_init(&rec
->list
);
7335 /* Sometimes we can find a backref before we find an actual
7336 * extent, so we need to process it a little bit to see if there
7337 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7338 * if this is a backref screwup. If we need to delete stuff
7339 * process_duplicates() will return 0, otherwise it will return
7342 if (process_duplicates(root
, extent_cache
, rec
))
7344 ret
= delete_duplicate_records(root
, rec
);
7348 * delete_duplicate_records will return the number of entries
7349 * deleted, so if it's greater than 0 then we know we actually
7350 * did something and we need to remove.
7364 cache
= search_cache_extent(extent_cache
, 0);
7367 rec
= container_of(cache
, struct extent_record
, cache
);
7368 if (rec
->num_duplicates
) {
7369 fprintf(stderr
, "extent item %llu has multiple extent "
7370 "items\n", (unsigned long long)rec
->start
);
7375 if (rec
->refs
!= rec
->extent_item_refs
) {
7376 fprintf(stderr
, "ref mismatch on [%llu %llu] ",
7377 (unsigned long long)rec
->start
,
7378 (unsigned long long)rec
->nr
);
7379 fprintf(stderr
, "extent item %llu, found %llu\n",
7380 (unsigned long long)rec
->extent_item_refs
,
7381 (unsigned long long)rec
->refs
);
7382 ret
= record_orphan_data_extents(root
->fs_info
, rec
);
7389 * we can't use the extent to repair file
7390 * extent, let the fallback method handle it.
7392 if (!fixed
&& repair
) {
7393 ret
= fixup_extent_refs(
7404 if (all_backpointers_checked(rec
, 1)) {
7405 fprintf(stderr
, "backpointer mismatch on [%llu %llu]\n",
7406 (unsigned long long)rec
->start
,
7407 (unsigned long long)rec
->nr
);
7409 if (!fixed
&& !recorded
&& repair
) {
7410 ret
= fixup_extent_refs(root
->fs_info
,
7419 if (!rec
->owner_ref_checked
) {
7420 fprintf(stderr
, "owner ref check failed [%llu %llu]\n",
7421 (unsigned long long)rec
->start
,
7422 (unsigned long long)rec
->nr
);
7423 if (!fixed
&& !recorded
&& repair
) {
7424 ret
= fixup_extent_refs(root
->fs_info
,
7433 if (rec
->bad_full_backref
) {
7434 fprintf(stderr
, "bad full backref, on [%llu]\n",
7435 (unsigned long long)rec
->start
);
7437 ret
= fixup_extent_flags(root
->fs_info
, rec
);
7446 remove_cache_extent(extent_cache
, cache
);
7447 free_all_extent_backrefs(rec
);
7448 if (!init_extent_tree
&& repair
&& (!cur_err
|| fixed
))
7449 clear_extent_dirty(root
->fs_info
->excluded_extents
,
7451 rec
->start
+ rec
->max_size
- 1,
7457 if (ret
&& ret
!= -EAGAIN
) {
7458 fprintf(stderr
, "failed to repair damaged filesystem, aborting\n");
7461 struct btrfs_trans_handle
*trans
;
7463 root
= root
->fs_info
->extent_root
;
7464 trans
= btrfs_start_transaction(root
, 1);
7465 if (IS_ERR(trans
)) {
7466 ret
= PTR_ERR(trans
);
7470 btrfs_fix_block_accounting(trans
, root
);
7471 ret
= btrfs_commit_transaction(trans
, root
);
7476 fprintf(stderr
, "repaired damaged extent references\n");
7482 u64
calc_stripe_length(u64 type
, u64 length
, int num_stripes
)
7486 if (type
& BTRFS_BLOCK_GROUP_RAID0
) {
7487 stripe_size
= length
;
7488 stripe_size
/= num_stripes
;
7489 } else if (type
& BTRFS_BLOCK_GROUP_RAID10
) {
7490 stripe_size
= length
* 2;
7491 stripe_size
/= num_stripes
;
7492 } else if (type
& BTRFS_BLOCK_GROUP_RAID5
) {
7493 stripe_size
= length
;
7494 stripe_size
/= (num_stripes
- 1);
7495 } else if (type
& BTRFS_BLOCK_GROUP_RAID6
) {
7496 stripe_size
= length
;
7497 stripe_size
/= (num_stripes
- 2);
7499 stripe_size
= length
;
7505 * Check the chunk with its block group/dev list ref:
7506 * Return 0 if all refs seems valid.
7507 * Return 1 if part of refs seems valid, need later check for rebuild ref
7508 * like missing block group and needs to search extent tree to rebuild them.
7509 * Return -1 if essential refs are missing and unable to rebuild.
7511 static int check_chunk_refs(struct chunk_record
*chunk_rec
,
7512 struct block_group_tree
*block_group_cache
,
7513 struct device_extent_tree
*dev_extent_cache
,
7516 struct cache_extent
*block_group_item
;
7517 struct block_group_record
*block_group_rec
;
7518 struct cache_extent
*dev_extent_item
;
7519 struct device_extent_record
*dev_extent_rec
;
7523 int metadump_v2
= 0;
7527 block_group_item
= lookup_cache_extent(&block_group_cache
->tree
,
7530 if (block_group_item
) {
7531 block_group_rec
= container_of(block_group_item
,
7532 struct block_group_record
,
7534 if (chunk_rec
->length
!= block_group_rec
->offset
||
7535 chunk_rec
->offset
!= block_group_rec
->objectid
||
7537 chunk_rec
->type_flags
!= block_group_rec
->flags
)) {
7540 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7541 chunk_rec
->objectid
,
7546 chunk_rec
->type_flags
,
7547 block_group_rec
->objectid
,
7548 block_group_rec
->type
,
7549 block_group_rec
->offset
,
7550 block_group_rec
->offset
,
7551 block_group_rec
->objectid
,
7552 block_group_rec
->flags
);
7555 list_del_init(&block_group_rec
->list
);
7556 chunk_rec
->bg_rec
= block_group_rec
;
7561 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7562 chunk_rec
->objectid
,
7567 chunk_rec
->type_flags
);
7574 length
= calc_stripe_length(chunk_rec
->type_flags
, chunk_rec
->length
,
7575 chunk_rec
->num_stripes
);
7576 for (i
= 0; i
< chunk_rec
->num_stripes
; ++i
) {
7577 devid
= chunk_rec
->stripes
[i
].devid
;
7578 offset
= chunk_rec
->stripes
[i
].offset
;
7579 dev_extent_item
= lookup_cache_extent2(&dev_extent_cache
->tree
,
7580 devid
, offset
, length
);
7581 if (dev_extent_item
) {
7582 dev_extent_rec
= container_of(dev_extent_item
,
7583 struct device_extent_record
,
7585 if (dev_extent_rec
->objectid
!= devid
||
7586 dev_extent_rec
->offset
!= offset
||
7587 dev_extent_rec
->chunk_offset
!= chunk_rec
->offset
||
7588 dev_extent_rec
->length
!= length
) {
7591 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7592 chunk_rec
->objectid
,
7595 chunk_rec
->stripes
[i
].devid
,
7596 chunk_rec
->stripes
[i
].offset
,
7597 dev_extent_rec
->objectid
,
7598 dev_extent_rec
->offset
,
7599 dev_extent_rec
->length
);
7602 list_move(&dev_extent_rec
->chunk_list
,
7603 &chunk_rec
->dextents
);
7608 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7609 chunk_rec
->objectid
,
7612 chunk_rec
->stripes
[i
].devid
,
7613 chunk_rec
->stripes
[i
].offset
);
7620 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7621 int check_chunks(struct cache_tree
*chunk_cache
,
7622 struct block_group_tree
*block_group_cache
,
7623 struct device_extent_tree
*dev_extent_cache
,
7624 struct list_head
*good
, struct list_head
*bad
,
7625 struct list_head
*rebuild
, int silent
)
7627 struct cache_extent
*chunk_item
;
7628 struct chunk_record
*chunk_rec
;
7629 struct block_group_record
*bg_rec
;
7630 struct device_extent_record
*dext_rec
;
7634 chunk_item
= first_cache_extent(chunk_cache
);
7635 while (chunk_item
) {
7636 chunk_rec
= container_of(chunk_item
, struct chunk_record
,
7638 err
= check_chunk_refs(chunk_rec
, block_group_cache
,
7639 dev_extent_cache
, silent
);
7642 if (err
== 0 && good
)
7643 list_add_tail(&chunk_rec
->list
, good
);
7644 if (err
> 0 && rebuild
)
7645 list_add_tail(&chunk_rec
->list
, rebuild
);
7647 list_add_tail(&chunk_rec
->list
, bad
);
7648 chunk_item
= next_cache_extent(chunk_item
);
7651 list_for_each_entry(bg_rec
, &block_group_cache
->block_groups
, list
) {
7654 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7662 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_chunk_orphans
,
7666 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7677 static int check_device_used(struct device_record
*dev_rec
,
7678 struct device_extent_tree
*dext_cache
)
7680 struct cache_extent
*cache
;
7681 struct device_extent_record
*dev_extent_rec
;
7684 cache
= search_cache_extent2(&dext_cache
->tree
, dev_rec
->devid
, 0);
7686 dev_extent_rec
= container_of(cache
,
7687 struct device_extent_record
,
7689 if (dev_extent_rec
->objectid
!= dev_rec
->devid
)
7692 list_del_init(&dev_extent_rec
->device_list
);
7693 total_byte
+= dev_extent_rec
->length
;
7694 cache
= next_cache_extent(cache
);
7697 if (total_byte
!= dev_rec
->byte_used
) {
7699 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7700 total_byte
, dev_rec
->byte_used
, dev_rec
->objectid
,
7701 dev_rec
->type
, dev_rec
->offset
);
7708 /* check btrfs_dev_item -> btrfs_dev_extent */
7709 static int check_devices(struct rb_root
*dev_cache
,
7710 struct device_extent_tree
*dev_extent_cache
)
7712 struct rb_node
*dev_node
;
7713 struct device_record
*dev_rec
;
7714 struct device_extent_record
*dext_rec
;
7718 dev_node
= rb_first(dev_cache
);
7720 dev_rec
= container_of(dev_node
, struct device_record
, node
);
7721 err
= check_device_used(dev_rec
, dev_extent_cache
);
7725 dev_node
= rb_next(dev_node
);
7727 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_device_orphans
,
7730 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7731 dext_rec
->objectid
, dext_rec
->offset
, dext_rec
->length
);
7738 static int add_root_item_to_list(struct list_head
*head
,
7739 u64 objectid
, u64 bytenr
, u64 last_snapshot
,
7740 u8 level
, u8 drop_level
,
7741 int level_size
, struct btrfs_key
*drop_key
)
7744 struct root_item_record
*ri_rec
;
7745 ri_rec
= malloc(sizeof(*ri_rec
));
7748 ri_rec
->bytenr
= bytenr
;
7749 ri_rec
->objectid
= objectid
;
7750 ri_rec
->level
= level
;
7751 ri_rec
->level_size
= level_size
;
7752 ri_rec
->drop_level
= drop_level
;
7753 ri_rec
->last_snapshot
= last_snapshot
;
7755 memcpy(&ri_rec
->drop_key
, drop_key
, sizeof(*drop_key
));
7756 list_add_tail(&ri_rec
->list
, head
);
7761 static void free_root_item_list(struct list_head
*list
)
7763 struct root_item_record
*ri_rec
;
7765 while (!list_empty(list
)) {
7766 ri_rec
= list_first_entry(list
, struct root_item_record
,
7768 list_del_init(&ri_rec
->list
);
7773 static int deal_root_from_list(struct list_head
*list
,
7774 struct btrfs_root
*root
,
7775 struct block_info
*bits
,
7777 struct cache_tree
*pending
,
7778 struct cache_tree
*seen
,
7779 struct cache_tree
*reada
,
7780 struct cache_tree
*nodes
,
7781 struct cache_tree
*extent_cache
,
7782 struct cache_tree
*chunk_cache
,
7783 struct rb_root
*dev_cache
,
7784 struct block_group_tree
*block_group_cache
,
7785 struct device_extent_tree
*dev_extent_cache
)
7790 while (!list_empty(list
)) {
7791 struct root_item_record
*rec
;
7792 struct extent_buffer
*buf
;
7793 rec
= list_entry(list
->next
,
7794 struct root_item_record
, list
);
7796 buf
= read_tree_block(root
->fs_info
->tree_root
,
7797 rec
->bytenr
, rec
->level_size
, 0);
7798 if (!extent_buffer_uptodate(buf
)) {
7799 free_extent_buffer(buf
);
7803 add_root_to_pending(buf
, extent_cache
, pending
,
7804 seen
, nodes
, rec
->objectid
);
7806 * To rebuild extent tree, we need deal with snapshot
7807 * one by one, otherwise we deal with node firstly which
7808 * can maximize readahead.
7811 ret
= run_next_block(root
, bits
, bits_nr
, &last
,
7812 pending
, seen
, reada
, nodes
,
7813 extent_cache
, chunk_cache
,
7814 dev_cache
, block_group_cache
,
7815 dev_extent_cache
, rec
);
7819 free_extent_buffer(buf
);
7820 list_del(&rec
->list
);
7826 ret
= run_next_block(root
, bits
, bits_nr
, &last
, pending
, seen
,
7827 reada
, nodes
, extent_cache
, chunk_cache
,
7828 dev_cache
, block_group_cache
,
7829 dev_extent_cache
, NULL
);
7839 static int check_chunks_and_extents(struct btrfs_root
*root
)
7841 struct rb_root dev_cache
;
7842 struct cache_tree chunk_cache
;
7843 struct block_group_tree block_group_cache
;
7844 struct device_extent_tree dev_extent_cache
;
7845 struct cache_tree extent_cache
;
7846 struct cache_tree seen
;
7847 struct cache_tree pending
;
7848 struct cache_tree reada
;
7849 struct cache_tree nodes
;
7850 struct extent_io_tree excluded_extents
;
7851 struct cache_tree corrupt_blocks
;
7852 struct btrfs_path path
;
7853 struct btrfs_key key
;
7854 struct btrfs_key found_key
;
7856 struct block_info
*bits
;
7858 struct extent_buffer
*leaf
;
7860 struct btrfs_root_item ri
;
7861 struct list_head dropping_trees
;
7862 struct list_head normal_trees
;
7863 struct btrfs_root
*root1
;
7868 dev_cache
= RB_ROOT
;
7869 cache_tree_init(&chunk_cache
);
7870 block_group_tree_init(&block_group_cache
);
7871 device_extent_tree_init(&dev_extent_cache
);
7873 cache_tree_init(&extent_cache
);
7874 cache_tree_init(&seen
);
7875 cache_tree_init(&pending
);
7876 cache_tree_init(&nodes
);
7877 cache_tree_init(&reada
);
7878 cache_tree_init(&corrupt_blocks
);
7879 extent_io_tree_init(&excluded_extents
);
7880 INIT_LIST_HEAD(&dropping_trees
);
7881 INIT_LIST_HEAD(&normal_trees
);
7884 root
->fs_info
->excluded_extents
= &excluded_extents
;
7885 root
->fs_info
->fsck_extent_cache
= &extent_cache
;
7886 root
->fs_info
->free_extent_hook
= free_extent_hook
;
7887 root
->fs_info
->corrupt_blocks
= &corrupt_blocks
;
7891 bits
= malloc(bits_nr
* sizeof(struct block_info
));
7898 root1
= root
->fs_info
->tree_root
;
7899 level
= btrfs_header_level(root1
->node
);
7900 ret
= add_root_item_to_list(&normal_trees
, root1
->root_key
.objectid
,
7901 root1
->node
->start
, 0, level
, 0,
7902 btrfs_level_size(root1
, level
), NULL
);
7905 root1
= root
->fs_info
->chunk_root
;
7906 level
= btrfs_header_level(root1
->node
);
7907 ret
= add_root_item_to_list(&normal_trees
, root1
->root_key
.objectid
,
7908 root1
->node
->start
, 0, level
, 0,
7909 btrfs_level_size(root1
, level
), NULL
);
7912 btrfs_init_path(&path
);
7915 btrfs_set_key_type(&key
, BTRFS_ROOT_ITEM_KEY
);
7916 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
,
7921 leaf
= path
.nodes
[0];
7922 slot
= path
.slots
[0];
7923 if (slot
>= btrfs_header_nritems(path
.nodes
[0])) {
7924 ret
= btrfs_next_leaf(root
, &path
);
7927 leaf
= path
.nodes
[0];
7928 slot
= path
.slots
[0];
7930 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
7931 if (btrfs_key_type(&found_key
) == BTRFS_ROOT_ITEM_KEY
) {
7932 unsigned long offset
;
7935 offset
= btrfs_item_ptr_offset(leaf
, path
.slots
[0]);
7936 read_extent_buffer(leaf
, &ri
, offset
, sizeof(ri
));
7937 last_snapshot
= btrfs_root_last_snapshot(&ri
);
7938 if (btrfs_disk_key_objectid(&ri
.drop_progress
) == 0) {
7939 level
= btrfs_root_level(&ri
);
7940 level_size
= btrfs_level_size(root
, level
);
7941 ret
= add_root_item_to_list(&normal_trees
,
7943 btrfs_root_bytenr(&ri
),
7944 last_snapshot
, level
,
7945 0, level_size
, NULL
);
7949 level
= btrfs_root_level(&ri
);
7950 level_size
= btrfs_level_size(root
, level
);
7951 objectid
= found_key
.objectid
;
7952 btrfs_disk_key_to_cpu(&found_key
,
7954 ret
= add_root_item_to_list(&dropping_trees
,
7956 btrfs_root_bytenr(&ri
),
7957 last_snapshot
, level
,
7959 level_size
, &found_key
);
7966 btrfs_release_path(&path
);
7969 * check_block can return -EAGAIN if it fixes something, please keep
7970 * this in mind when dealing with return values from these functions, if
7971 * we get -EAGAIN we want to fall through and restart the loop.
7973 ret
= deal_root_from_list(&normal_trees
, root
, bits
, bits_nr
, &pending
,
7974 &seen
, &reada
, &nodes
, &extent_cache
,
7975 &chunk_cache
, &dev_cache
, &block_group_cache
,
7982 ret
= deal_root_from_list(&dropping_trees
, root
, bits
, bits_nr
,
7983 &pending
, &seen
, &reada
, &nodes
,
7984 &extent_cache
, &chunk_cache
, &dev_cache
,
7985 &block_group_cache
, &dev_extent_cache
);
7992 err
= check_chunks(&chunk_cache
, &block_group_cache
,
7993 &dev_extent_cache
, NULL
, NULL
, NULL
, 0);
8001 ret
= check_extent_refs(root
, &extent_cache
);
8008 err
= check_devices(&dev_cache
, &dev_extent_cache
);
8014 free_corrupt_blocks_tree(root
->fs_info
->corrupt_blocks
);
8015 extent_io_tree_cleanup(&excluded_extents
);
8016 root
->fs_info
->fsck_extent_cache
= NULL
;
8017 root
->fs_info
->free_extent_hook
= NULL
;
8018 root
->fs_info
->corrupt_blocks
= NULL
;
8019 root
->fs_info
->excluded_extents
= NULL
;
8022 free_chunk_cache_tree(&chunk_cache
);
8023 free_device_cache_tree(&dev_cache
);
8024 free_block_group_tree(&block_group_cache
);
8025 free_device_extent_tree(&dev_extent_cache
);
8026 free_extent_cache_tree(&seen
);
8027 free_extent_cache_tree(&pending
);
8028 free_extent_cache_tree(&reada
);
8029 free_extent_cache_tree(&nodes
);
8032 free_corrupt_blocks_tree(root
->fs_info
->corrupt_blocks
);
8033 free_extent_cache_tree(&seen
);
8034 free_extent_cache_tree(&pending
);
8035 free_extent_cache_tree(&reada
);
8036 free_extent_cache_tree(&nodes
);
8037 free_chunk_cache_tree(&chunk_cache
);
8038 free_block_group_tree(&block_group_cache
);
8039 free_device_cache_tree(&dev_cache
);
8040 free_device_extent_tree(&dev_extent_cache
);
8041 free_extent_record_cache(root
->fs_info
, &extent_cache
);
8042 free_root_item_list(&normal_trees
);
8043 free_root_item_list(&dropping_trees
);
8044 extent_io_tree_cleanup(&excluded_extents
);
8048 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle
*trans
,
8049 struct btrfs_root
*root
, int overwrite
)
8051 struct extent_buffer
*c
;
8052 struct extent_buffer
*old
= root
->node
;
8055 struct btrfs_disk_key disk_key
= {0,0,0};
8061 extent_buffer_get(c
);
8064 c
= btrfs_alloc_free_block(trans
, root
,
8065 btrfs_level_size(root
, 0),
8066 root
->root_key
.objectid
,
8067 &disk_key
, level
, 0, 0);
8070 extent_buffer_get(c
);
8074 memset_extent_buffer(c
, 0, 0, sizeof(struct btrfs_header
));
8075 btrfs_set_header_level(c
, level
);
8076 btrfs_set_header_bytenr(c
, c
->start
);
8077 btrfs_set_header_generation(c
, trans
->transid
);
8078 btrfs_set_header_backref_rev(c
, BTRFS_MIXED_BACKREF_REV
);
8079 btrfs_set_header_owner(c
, root
->root_key
.objectid
);
8081 write_extent_buffer(c
, root
->fs_info
->fsid
,
8082 btrfs_header_fsid(), BTRFS_FSID_SIZE
);
8084 write_extent_buffer(c
, root
->fs_info
->chunk_tree_uuid
,
8085 btrfs_header_chunk_tree_uuid(c
),
8088 btrfs_mark_buffer_dirty(c
);
8090 * this case can happen in the following case:
8092 * 1.overwrite previous root.
8094 * 2.reinit reloc data root, this is because we skip pin
8095 * down reloc data tree before which means we can allocate
8096 * same block bytenr here.
8098 if (old
->start
== c
->start
) {
8099 btrfs_set_root_generation(&root
->root_item
,
8101 root
->root_item
.level
= btrfs_header_level(root
->node
);
8102 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
8103 &root
->root_key
, &root
->root_item
);
8105 free_extent_buffer(c
);
8109 free_extent_buffer(old
);
8111 add_root_to_dirty_list(root
);
8115 static int pin_down_tree_blocks(struct btrfs_fs_info
*fs_info
,
8116 struct extent_buffer
*eb
, int tree_root
)
8118 struct extent_buffer
*tmp
;
8119 struct btrfs_root_item
*ri
;
8120 struct btrfs_key key
;
8123 int level
= btrfs_header_level(eb
);
8129 * If we have pinned this block before, don't pin it again.
8130 * This can not only avoid forever loop with broken filesystem
8131 * but also give us some speedups.
8133 if (test_range_bit(&fs_info
->pinned_extents
, eb
->start
,
8134 eb
->start
+ eb
->len
- 1, EXTENT_DIRTY
, 0))
8137 btrfs_pin_extent(fs_info
, eb
->start
, eb
->len
);
8139 leafsize
= btrfs_super_leafsize(fs_info
->super_copy
);
8140 nritems
= btrfs_header_nritems(eb
);
8141 for (i
= 0; i
< nritems
; i
++) {
8143 btrfs_item_key_to_cpu(eb
, &key
, i
);
8144 if (key
.type
!= BTRFS_ROOT_ITEM_KEY
)
8146 /* Skip the extent root and reloc roots */
8147 if (key
.objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
8148 key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
||
8149 key
.objectid
== BTRFS_DATA_RELOC_TREE_OBJECTID
)
8151 ri
= btrfs_item_ptr(eb
, i
, struct btrfs_root_item
);
8152 bytenr
= btrfs_disk_root_bytenr(eb
, ri
);
8155 * If at any point we start needing the real root we
8156 * will have to build a stump root for the root we are
8157 * in, but for now this doesn't actually use the root so
8158 * just pass in extent_root.
8160 tmp
= read_tree_block(fs_info
->extent_root
, bytenr
,
8162 if (!extent_buffer_uptodate(tmp
)) {
8163 fprintf(stderr
, "Error reading root block\n");
8166 ret
= pin_down_tree_blocks(fs_info
, tmp
, 0);
8167 free_extent_buffer(tmp
);
8171 bytenr
= btrfs_node_blockptr(eb
, i
);
8173 /* If we aren't the tree root don't read the block */
8174 if (level
== 1 && !tree_root
) {
8175 btrfs_pin_extent(fs_info
, bytenr
, leafsize
);
8179 tmp
= read_tree_block(fs_info
->extent_root
, bytenr
,
8181 if (!extent_buffer_uptodate(tmp
)) {
8182 fprintf(stderr
, "Error reading tree block\n");
8185 ret
= pin_down_tree_blocks(fs_info
, tmp
, tree_root
);
8186 free_extent_buffer(tmp
);
8195 static int pin_metadata_blocks(struct btrfs_fs_info
*fs_info
)
8199 ret
= pin_down_tree_blocks(fs_info
, fs_info
->chunk_root
->node
, 0);
8203 return pin_down_tree_blocks(fs_info
, fs_info
->tree_root
->node
, 1);
8206 static int reset_block_groups(struct btrfs_fs_info
*fs_info
)
8208 struct btrfs_block_group_cache
*cache
;
8209 struct btrfs_path
*path
;
8210 struct extent_buffer
*leaf
;
8211 struct btrfs_chunk
*chunk
;
8212 struct btrfs_key key
;
8216 path
= btrfs_alloc_path();
8221 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
8224 ret
= btrfs_search_slot(NULL
, fs_info
->chunk_root
, &key
, path
, 0, 0);
8226 btrfs_free_path(path
);
8231 * We do this in case the block groups were screwed up and had alloc
8232 * bits that aren't actually set on the chunks. This happens with
8233 * restored images every time and could happen in real life I guess.
8235 fs_info
->avail_data_alloc_bits
= 0;
8236 fs_info
->avail_metadata_alloc_bits
= 0;
8237 fs_info
->avail_system_alloc_bits
= 0;
8239 /* First we need to create the in-memory block groups */
8241 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
8242 ret
= btrfs_next_leaf(fs_info
->chunk_root
, path
);
8244 btrfs_free_path(path
);
8252 leaf
= path
->nodes
[0];
8253 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
8254 if (key
.type
!= BTRFS_CHUNK_ITEM_KEY
) {
8259 chunk
= btrfs_item_ptr(leaf
, path
->slots
[0],
8260 struct btrfs_chunk
);
8261 btrfs_add_block_group(fs_info
, 0,
8262 btrfs_chunk_type(leaf
, chunk
),
8263 key
.objectid
, key
.offset
,
8264 btrfs_chunk_length(leaf
, chunk
));
8265 set_extent_dirty(&fs_info
->free_space_cache
, key
.offset
,
8266 key
.offset
+ btrfs_chunk_length(leaf
, chunk
),
8272 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
8276 start
= cache
->key
.objectid
+ cache
->key
.offset
;
8279 btrfs_free_path(path
);
8283 static int reset_balance(struct btrfs_trans_handle
*trans
,
8284 struct btrfs_fs_info
*fs_info
)
8286 struct btrfs_root
*root
= fs_info
->tree_root
;
8287 struct btrfs_path
*path
;
8288 struct extent_buffer
*leaf
;
8289 struct btrfs_key key
;
8290 int del_slot
, del_nr
= 0;
8294 path
= btrfs_alloc_path();
8298 key
.objectid
= BTRFS_BALANCE_OBJECTID
;
8299 key
.type
= BTRFS_BALANCE_ITEM_KEY
;
8302 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
8307 goto reinit_data_reloc
;
8312 ret
= btrfs_del_item(trans
, root
, path
);
8315 btrfs_release_path(path
);
8317 key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
8318 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8321 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
8325 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
8330 ret
= btrfs_del_items(trans
, root
, path
,
8337 btrfs_release_path(path
);
8340 ret
= btrfs_search_slot(trans
, root
, &key
, path
,
8347 leaf
= path
->nodes
[0];
8348 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
8349 if (key
.objectid
> BTRFS_TREE_RELOC_OBJECTID
)
8351 if (key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
8356 del_slot
= path
->slots
[0];
8365 ret
= btrfs_del_items(trans
, root
, path
, del_slot
, del_nr
);
8369 btrfs_release_path(path
);
8372 key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
8373 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8374 key
.offset
= (u64
)-1;
8375 root
= btrfs_read_fs_root(fs_info
, &key
);
8377 fprintf(stderr
, "Error reading data reloc tree\n");
8378 ret
= PTR_ERR(root
);
8381 record_root_in_trans(trans
, root
);
8382 ret
= btrfs_fsck_reinit_root(trans
, root
, 0);
8385 ret
= btrfs_make_root_dir(trans
, root
, BTRFS_FIRST_FREE_OBJECTID
);
8387 btrfs_free_path(path
);
8391 static int reinit_extent_tree(struct btrfs_trans_handle
*trans
,
8392 struct btrfs_fs_info
*fs_info
)
8398 * The only reason we don't do this is because right now we're just
8399 * walking the trees we find and pinning down their bytes, we don't look
8400 * at any of the leaves. In order to do mixed groups we'd have to check
8401 * the leaves of any fs roots and pin down the bytes for any file
8402 * extents we find. Not hard but why do it if we don't have to?
8404 if (btrfs_fs_incompat(fs_info
, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS
)) {
8405 fprintf(stderr
, "We don't support re-initing the extent tree "
8406 "for mixed block groups yet, please notify a btrfs "
8407 "developer you want to do this so they can add this "
8408 "functionality.\n");
8413 * first we need to walk all of the trees except the extent tree and pin
8414 * down the bytes that are in use so we don't overwrite any existing
8417 ret
= pin_metadata_blocks(fs_info
);
8419 fprintf(stderr
, "error pinning down used bytes\n");
8424 * Need to drop all the block groups since we're going to recreate all
8427 btrfs_free_block_groups(fs_info
);
8428 ret
= reset_block_groups(fs_info
);
8430 fprintf(stderr
, "error resetting the block groups\n");
8434 /* Ok we can allocate now, reinit the extent root */
8435 ret
= btrfs_fsck_reinit_root(trans
, fs_info
->extent_root
, 0);
8437 fprintf(stderr
, "extent root initialization failed\n");
8439 * When the transaction code is updated we should end the
8440 * transaction, but for now progs only knows about commit so
8441 * just return an error.
8447 * Now we have all the in-memory block groups setup so we can make
8448 * allocations properly, and the metadata we care about is safe since we
8449 * pinned all of it above.
8452 struct btrfs_block_group_cache
*cache
;
8454 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
8457 start
= cache
->key
.objectid
+ cache
->key
.offset
;
8458 ret
= btrfs_insert_item(trans
, fs_info
->extent_root
,
8459 &cache
->key
, &cache
->item
,
8460 sizeof(cache
->item
));
8462 fprintf(stderr
, "Error adding block group\n");
8465 btrfs_extent_post_op(trans
, fs_info
->extent_root
);
8468 ret
= reset_balance(trans
, fs_info
);
8470 fprintf(stderr
, "error reseting the pending balance\n");
8475 static int recow_extent_buffer(struct btrfs_root
*root
, struct extent_buffer
*eb
)
8477 struct btrfs_path
*path
;
8478 struct btrfs_trans_handle
*trans
;
8479 struct btrfs_key key
;
8482 printf("Recowing metadata block %llu\n", eb
->start
);
8483 key
.objectid
= btrfs_header_owner(eb
);
8484 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8485 key
.offset
= (u64
)-1;
8487 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
8489 fprintf(stderr
, "Couldn't find owner root %llu\n",
8491 return PTR_ERR(root
);
8494 path
= btrfs_alloc_path();
8498 trans
= btrfs_start_transaction(root
, 1);
8499 if (IS_ERR(trans
)) {
8500 btrfs_free_path(path
);
8501 return PTR_ERR(trans
);
8504 path
->lowest_level
= btrfs_header_level(eb
);
8505 if (path
->lowest_level
)
8506 btrfs_node_key_to_cpu(eb
, &key
, 0);
8508 btrfs_item_key_to_cpu(eb
, &key
, 0);
8510 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
8511 btrfs_commit_transaction(trans
, root
);
8512 btrfs_free_path(path
);
8516 static int delete_bad_item(struct btrfs_root
*root
, struct bad_item
*bad
)
8518 struct btrfs_path
*path
;
8519 struct btrfs_trans_handle
*trans
;
8520 struct btrfs_key key
;
8523 printf("Deleting bad item [%llu,%u,%llu]\n", bad
->key
.objectid
,
8524 bad
->key
.type
, bad
->key
.offset
);
8525 key
.objectid
= bad
->root_id
;
8526 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8527 key
.offset
= (u64
)-1;
8529 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
8531 fprintf(stderr
, "Couldn't find owner root %llu\n",
8533 return PTR_ERR(root
);
8536 path
= btrfs_alloc_path();
8540 trans
= btrfs_start_transaction(root
, 1);
8541 if (IS_ERR(trans
)) {
8542 btrfs_free_path(path
);
8543 return PTR_ERR(trans
);
8546 ret
= btrfs_search_slot(trans
, root
, &bad
->key
, path
, -1, 1);
8552 ret
= btrfs_del_item(trans
, root
, path
);
8554 btrfs_commit_transaction(trans
, root
);
8555 btrfs_free_path(path
);
8559 static int zero_log_tree(struct btrfs_root
*root
)
8561 struct btrfs_trans_handle
*trans
;
8564 trans
= btrfs_start_transaction(root
, 1);
8565 if (IS_ERR(trans
)) {
8566 ret
= PTR_ERR(trans
);
8569 btrfs_set_super_log_root(root
->fs_info
->super_copy
, 0);
8570 btrfs_set_super_log_root_level(root
->fs_info
->super_copy
, 0);
8571 ret
= btrfs_commit_transaction(trans
, root
);
8575 static int populate_csum(struct btrfs_trans_handle
*trans
,
8576 struct btrfs_root
*csum_root
, char *buf
, u64 start
,
8583 while (offset
< len
) {
8584 sectorsize
= csum_root
->sectorsize
;
8585 ret
= read_extent_data(csum_root
, buf
, start
+ offset
,
8589 ret
= btrfs_csum_file_block(trans
, csum_root
, start
+ len
,
8590 start
+ offset
, buf
, sectorsize
);
8593 offset
+= sectorsize
;
8598 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle
*trans
,
8599 struct btrfs_root
*csum_root
,
8600 struct btrfs_root
*cur_root
)
8602 struct btrfs_path
*path
;
8603 struct btrfs_key key
;
8604 struct extent_buffer
*node
;
8605 struct btrfs_file_extent_item
*fi
;
8612 path
= btrfs_alloc_path();
8615 buf
= malloc(cur_root
->fs_info
->csum_root
->sectorsize
);
8625 ret
= btrfs_search_slot(NULL
, cur_root
, &key
, path
, 0, 0);
8628 /* Iterate all regular file extents and fill its csum */
8630 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
8632 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
8634 node
= path
->nodes
[0];
8635 slot
= path
->slots
[0];
8636 fi
= btrfs_item_ptr(node
, slot
, struct btrfs_file_extent_item
);
8637 if (btrfs_file_extent_type(node
, fi
) != BTRFS_FILE_EXTENT_REG
)
8639 start
= btrfs_file_extent_disk_bytenr(node
, fi
);
8640 len
= btrfs_file_extent_disk_num_bytes(node
, fi
);
8642 ret
= populate_csum(trans
, csum_root
, buf
, start
, len
);
8649 * TODO: if next leaf is corrupted, jump to nearest next valid
8652 ret
= btrfs_next_item(cur_root
, path
);
8662 btrfs_free_path(path
);
8667 static int fill_csum_tree_from_fs(struct btrfs_trans_handle
*trans
,
8668 struct btrfs_root
*csum_root
)
8670 struct btrfs_fs_info
*fs_info
= csum_root
->fs_info
;
8671 struct btrfs_path
*path
;
8672 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
8673 struct btrfs_root
*cur_root
;
8674 struct extent_buffer
*node
;
8675 struct btrfs_key key
;
8679 path
= btrfs_alloc_path();
8683 key
.objectid
= BTRFS_FS_TREE_OBJECTID
;
8685 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8687 ret
= btrfs_search_slot(NULL
, tree_root
, &key
, path
, 0, 0);
8696 node
= path
->nodes
[0];
8697 slot
= path
->slots
[0];
8698 btrfs_item_key_to_cpu(node
, &key
, slot
);
8699 if (key
.objectid
> BTRFS_LAST_FREE_OBJECTID
)
8701 if (key
.type
!= BTRFS_ROOT_ITEM_KEY
)
8703 if (!is_fstree(key
.objectid
))
8705 key
.offset
= (u64
)-1;
8707 cur_root
= btrfs_read_fs_root(fs_info
, &key
);
8708 if (IS_ERR(cur_root
) || !cur_root
) {
8709 fprintf(stderr
, "Fail to read fs/subvol tree: %lld\n",
8713 ret
= fill_csum_tree_from_one_fs_root(trans
, csum_root
,
8718 ret
= btrfs_next_item(tree_root
, path
);
8728 btrfs_free_path(path
);
8732 static int fill_csum_tree_from_extent(struct btrfs_trans_handle
*trans
,
8733 struct btrfs_root
*csum_root
)
8735 struct btrfs_root
*extent_root
= csum_root
->fs_info
->extent_root
;
8736 struct btrfs_path
*path
;
8737 struct btrfs_extent_item
*ei
;
8738 struct extent_buffer
*leaf
;
8740 struct btrfs_key key
;
8743 path
= btrfs_alloc_path();
8748 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
8751 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
8753 btrfs_free_path(path
);
8757 buf
= malloc(csum_root
->sectorsize
);
8759 btrfs_free_path(path
);
8764 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
8765 ret
= btrfs_next_leaf(extent_root
, path
);
8773 leaf
= path
->nodes
[0];
8775 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
8776 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
8781 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
8782 struct btrfs_extent_item
);
8783 if (!(btrfs_extent_flags(leaf
, ei
) &
8784 BTRFS_EXTENT_FLAG_DATA
)) {
8789 ret
= populate_csum(trans
, csum_root
, buf
, key
.objectid
,
8796 btrfs_free_path(path
);
8802 * Recalculate the csum and put it into the csum tree.
8804 * Extent tree init will wipe out all the extent info, so in that case, we
8805 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
8806 * will use fs/subvol trees to init the csum tree.
8808 static int fill_csum_tree(struct btrfs_trans_handle
*trans
,
8809 struct btrfs_root
*csum_root
,
8813 return fill_csum_tree_from_fs(trans
, csum_root
);
8815 return fill_csum_tree_from_extent(trans
, csum_root
);
8818 struct root_item_info
{
8819 /* level of the root */
8821 /* number of nodes at this level, must be 1 for a root */
8825 struct cache_extent cache_extent
;
8828 static struct cache_tree
*roots_info_cache
= NULL
;
8830 static void free_roots_info_cache(void)
8832 if (!roots_info_cache
)
8835 while (!cache_tree_empty(roots_info_cache
)) {
8836 struct cache_extent
*entry
;
8837 struct root_item_info
*rii
;
8839 entry
= first_cache_extent(roots_info_cache
);
8842 remove_cache_extent(roots_info_cache
, entry
);
8843 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
8847 free(roots_info_cache
);
8848 roots_info_cache
= NULL
;
8851 static int build_roots_info_cache(struct btrfs_fs_info
*info
)
8854 struct btrfs_key key
;
8855 struct extent_buffer
*leaf
;
8856 struct btrfs_path
*path
;
8858 if (!roots_info_cache
) {
8859 roots_info_cache
= malloc(sizeof(*roots_info_cache
));
8860 if (!roots_info_cache
)
8862 cache_tree_init(roots_info_cache
);
8865 path
= btrfs_alloc_path();
8870 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
8873 ret
= btrfs_search_slot(NULL
, info
->extent_root
, &key
, path
, 0, 0);
8876 leaf
= path
->nodes
[0];
8879 struct btrfs_key found_key
;
8880 struct btrfs_extent_item
*ei
;
8881 struct btrfs_extent_inline_ref
*iref
;
8882 int slot
= path
->slots
[0];
8887 struct cache_extent
*entry
;
8888 struct root_item_info
*rii
;
8890 if (slot
>= btrfs_header_nritems(leaf
)) {
8891 ret
= btrfs_next_leaf(info
->extent_root
, path
);
8898 leaf
= path
->nodes
[0];
8899 slot
= path
->slots
[0];
8902 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
8904 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
8905 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
)
8908 ei
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_item
);
8909 flags
= btrfs_extent_flags(leaf
, ei
);
8911 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
8912 !(flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
))
8915 if (found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
8916 iref
= (struct btrfs_extent_inline_ref
*)(ei
+ 1);
8917 level
= found_key
.offset
;
8919 struct btrfs_tree_block_info
*info
;
8921 info
= (struct btrfs_tree_block_info
*)(ei
+ 1);
8922 iref
= (struct btrfs_extent_inline_ref
*)(info
+ 1);
8923 level
= btrfs_tree_block_level(leaf
, info
);
8927 * For a root extent, it must be of the following type and the
8928 * first (and only one) iref in the item.
8930 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
8931 if (type
!= BTRFS_TREE_BLOCK_REF_KEY
)
8934 root_id
= btrfs_extent_inline_ref_offset(leaf
, iref
);
8935 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
8937 rii
= malloc(sizeof(struct root_item_info
));
8942 rii
->cache_extent
.start
= root_id
;
8943 rii
->cache_extent
.size
= 1;
8944 rii
->level
= (u8
)-1;
8945 entry
= &rii
->cache_extent
;
8946 ret
= insert_cache_extent(roots_info_cache
, entry
);
8949 rii
= container_of(entry
, struct root_item_info
,
8953 ASSERT(rii
->cache_extent
.start
== root_id
);
8954 ASSERT(rii
->cache_extent
.size
== 1);
8956 if (level
> rii
->level
|| rii
->level
== (u8
)-1) {
8958 rii
->bytenr
= found_key
.objectid
;
8959 rii
->gen
= btrfs_extent_generation(leaf
, ei
);
8960 rii
->node_count
= 1;
8961 } else if (level
== rii
->level
) {
8969 btrfs_free_path(path
);
8974 static int maybe_repair_root_item(struct btrfs_fs_info
*info
,
8975 struct btrfs_path
*path
,
8976 const struct btrfs_key
*root_key
,
8977 const int read_only_mode
)
8979 const u64 root_id
= root_key
->objectid
;
8980 struct cache_extent
*entry
;
8981 struct root_item_info
*rii
;
8982 struct btrfs_root_item ri
;
8983 unsigned long offset
;
8985 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
8988 "Error: could not find extent items for root %llu\n",
8989 root_key
->objectid
);
8993 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
8994 ASSERT(rii
->cache_extent
.start
== root_id
);
8995 ASSERT(rii
->cache_extent
.size
== 1);
8997 if (rii
->node_count
!= 1) {
8999 "Error: could not find btree root extent for root %llu\n",
9004 offset
= btrfs_item_ptr_offset(path
->nodes
[0], path
->slots
[0]);
9005 read_extent_buffer(path
->nodes
[0], &ri
, offset
, sizeof(ri
));
9007 if (btrfs_root_bytenr(&ri
) != rii
->bytenr
||
9008 btrfs_root_level(&ri
) != rii
->level
||
9009 btrfs_root_generation(&ri
) != rii
->gen
) {
9012 * If we're in repair mode but our caller told us to not update
9013 * the root item, i.e. just check if it needs to be updated, don't
9014 * print this message, since the caller will call us again shortly
9015 * for the same root item without read only mode (the caller will
9016 * open a transaction first).
9018 if (!(read_only_mode
&& repair
))
9020 "%sroot item for root %llu,"
9021 " current bytenr %llu, current gen %llu, current level %u,"
9022 " new bytenr %llu, new gen %llu, new level %u\n",
9023 (read_only_mode
? "" : "fixing "),
9025 btrfs_root_bytenr(&ri
), btrfs_root_generation(&ri
),
9026 btrfs_root_level(&ri
),
9027 rii
->bytenr
, rii
->gen
, rii
->level
);
9029 if (btrfs_root_generation(&ri
) > rii
->gen
) {
9031 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9032 root_id
, btrfs_root_generation(&ri
), rii
->gen
);
9036 if (!read_only_mode
) {
9037 btrfs_set_root_bytenr(&ri
, rii
->bytenr
);
9038 btrfs_set_root_level(&ri
, rii
->level
);
9039 btrfs_set_root_generation(&ri
, rii
->gen
);
9040 write_extent_buffer(path
->nodes
[0], &ri
,
9041 offset
, sizeof(ri
));
9051 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9052 * caused read-only snapshots to be corrupted if they were created at a moment
9053 * when the source subvolume/snapshot had orphan items. The issue was that the
9054 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9055 * node instead of the post orphan cleanup root node.
9056 * So this function, and its callees, just detects and fixes those cases. Even
9057 * though the regression was for read-only snapshots, this function applies to
9058 * any snapshot/subvolume root.
9059 * This must be run before any other repair code - not doing it so, makes other
9060 * repair code delete or modify backrefs in the extent tree for example, which
9061 * will result in an inconsistent fs after repairing the root items.
9063 static int repair_root_items(struct btrfs_fs_info
*info
)
9065 struct btrfs_path
*path
= NULL
;
9066 struct btrfs_key key
;
9067 struct extent_buffer
*leaf
;
9068 struct btrfs_trans_handle
*trans
= NULL
;
9073 ret
= build_roots_info_cache(info
);
9077 path
= btrfs_alloc_path();
9083 key
.objectid
= BTRFS_FIRST_FREE_OBJECTID
;
9084 key
.type
= BTRFS_ROOT_ITEM_KEY
;
9089 * Avoid opening and committing transactions if a leaf doesn't have
9090 * any root items that need to be fixed, so that we avoid rotating
9091 * backup roots unnecessarily.
9094 trans
= btrfs_start_transaction(info
->tree_root
, 1);
9095 if (IS_ERR(trans
)) {
9096 ret
= PTR_ERR(trans
);
9101 ret
= btrfs_search_slot(trans
, info
->tree_root
, &key
, path
,
9105 leaf
= path
->nodes
[0];
9108 struct btrfs_key found_key
;
9110 if (path
->slots
[0] >= btrfs_header_nritems(leaf
)) {
9111 int no_more_keys
= find_next_key(path
, &key
);
9113 btrfs_release_path(path
);
9115 ret
= btrfs_commit_transaction(trans
,
9127 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
9129 if (found_key
.type
!= BTRFS_ROOT_ITEM_KEY
)
9131 if (found_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
9134 ret
= maybe_repair_root_item(info
, path
, &found_key
,
9139 if (!trans
&& repair
) {
9142 btrfs_release_path(path
);
9152 free_roots_info_cache();
9154 btrfs_free_path(path
);
9156 btrfs_commit_transaction(trans
, info
->tree_root
);
9163 const char * const cmd_check_usage
[] = {
9164 "btrfs check [options] <device>",
9165 "Check an unmounted btrfs filesystem.",
9167 "-s|--super <superblock> use this superblock copy",
9168 "-b|--backup use the backup root copy",
9169 "--repair try to repair the filesystem",
9170 "--init-csum-tree create a new CRC tree",
9171 "--init-extent-tree create a new extent tree",
9172 "--check-data-csum verify checkums of data blocks",
9173 "--qgroup-report print a report on qgroup consistency",
9174 "--subvol-extents <subvolid> print subvolume extents and sharing state",
9175 "--tree-root <bytenr> use the given bytenr for the tree root",
9179 int cmd_check(int argc
, char **argv
)
9181 struct cache_tree root_cache
;
9182 struct btrfs_root
*root
;
9183 struct btrfs_fs_info
*info
;
9186 u64 tree_root_bytenr
= 0;
9187 char uuidbuf
[BTRFS_UUID_UNPARSED_SIZE
];
9190 int init_csum_tree
= 0;
9192 int qgroup_report
= 0;
9193 enum btrfs_open_ctree_flags ctree_flags
= OPEN_CTREE_EXCLUSIVE
;
9197 enum { OPT_REPAIR
= 257, OPT_INIT_CSUM
, OPT_INIT_EXTENT
,
9198 OPT_CHECK_CSUM
, OPT_READONLY
};
9199 static const struct option long_options
[] = {
9200 { "super", required_argument
, NULL
, 's' },
9201 { "repair", no_argument
, NULL
, OPT_REPAIR
},
9202 { "readonly", no_argument
, NULL
, OPT_READONLY
},
9203 { "init-csum-tree", no_argument
, NULL
, OPT_INIT_CSUM
},
9204 { "init-extent-tree", no_argument
, NULL
, OPT_INIT_EXTENT
},
9205 { "check-data-csum", no_argument
, NULL
, OPT_CHECK_CSUM
},
9206 { "backup", no_argument
, NULL
, 'b' },
9207 { "subvol-extents", required_argument
, NULL
, 'E' },
9208 { "qgroup-report", no_argument
, NULL
, 'Q' },
9209 { "tree-root", required_argument
, NULL
, 'r' },
9213 c
= getopt_long(argc
, argv
, "as:br:", long_options
, NULL
);
9217 case 'a': /* ignored */ break;
9219 ctree_flags
|= OPEN_CTREE_BACKUP_ROOT
;
9222 num
= arg_strtou64(optarg
);
9223 if (num
>= BTRFS_SUPER_MIRROR_MAX
) {
9225 "ERROR: super mirror should be less than: %d\n",
9226 BTRFS_SUPER_MIRROR_MAX
);
9229 bytenr
= btrfs_sb_offset(((int)num
));
9230 printf("using SB copy %llu, bytenr %llu\n", num
,
9231 (unsigned long long)bytenr
);
9237 subvolid
= arg_strtou64(optarg
);
9240 tree_root_bytenr
= arg_strtou64(optarg
);
9244 usage(cmd_check_usage
);
9246 printf("enabling repair mode\n");
9248 ctree_flags
|= OPEN_CTREE_WRITES
;
9254 printf("Creating a new CRC tree\n");
9257 ctree_flags
|= OPEN_CTREE_WRITES
;
9259 case OPT_INIT_EXTENT
:
9260 init_extent_tree
= 1;
9261 ctree_flags
|= (OPEN_CTREE_WRITES
|
9262 OPEN_CTREE_NO_BLOCK_GROUPS
);
9265 case OPT_CHECK_CSUM
:
9266 check_data_csum
= 1;
9270 argc
= argc
- optind
;
9272 if (check_argc_exact(argc
, 1))
9273 usage(cmd_check_usage
);
9275 /* This check is the only reason for --readonly to exist */
9276 if (readonly
&& repair
) {
9277 fprintf(stderr
, "Repair options are not compatible with --readonly\n");
9282 cache_tree_init(&root_cache
);
9284 if((ret
= check_mounted(argv
[optind
])) < 0) {
9285 fprintf(stderr
, "Could not check mount status: %s\n", strerror(-ret
));
9288 fprintf(stderr
, "%s is currently mounted. Aborting.\n", argv
[optind
]);
9293 /* only allow partial opening under repair mode */
9295 ctree_flags
|= OPEN_CTREE_PARTIAL
;
9297 info
= open_ctree_fs_info(argv
[optind
], bytenr
, tree_root_bytenr
,
9300 fprintf(stderr
, "Couldn't open file system\n");
9305 root
= info
->fs_root
;
9308 * repair mode will force us to commit transaction which
9309 * will make us fail to load log tree when mounting.
9311 if (repair
&& btrfs_super_log_root(info
->super_copy
)) {
9312 ret
= ask_user("repair mode will force to clear out log tree, Are you sure?");
9317 ret
= zero_log_tree(root
);
9319 fprintf(stderr
, "fail to zero log tree\n");
9324 uuid_unparse(info
->super_copy
->fsid
, uuidbuf
);
9325 if (qgroup_report
) {
9326 printf("Print quota groups for %s\nUUID: %s\n", argv
[optind
],
9328 ret
= qgroup_verify_all(info
);
9330 print_qgroup_report(1);
9334 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9335 subvolid
, argv
[optind
], uuidbuf
);
9336 ret
= print_extent_state(info
, subvolid
);
9339 printf("Checking filesystem on %s\nUUID: %s\n", argv
[optind
], uuidbuf
);
9341 if (!extent_buffer_uptodate(info
->tree_root
->node
) ||
9342 !extent_buffer_uptodate(info
->dev_root
->node
) ||
9343 !extent_buffer_uptodate(info
->chunk_root
->node
)) {
9344 fprintf(stderr
, "Critical roots corrupted, unable to fsck the FS\n");
9349 if (init_extent_tree
|| init_csum_tree
) {
9350 struct btrfs_trans_handle
*trans
;
9352 trans
= btrfs_start_transaction(info
->extent_root
, 0);
9353 if (IS_ERR(trans
)) {
9354 fprintf(stderr
, "Error starting transaction\n");
9355 ret
= PTR_ERR(trans
);
9359 if (init_extent_tree
) {
9360 printf("Creating a new extent tree\n");
9361 ret
= reinit_extent_tree(trans
, info
);
9366 if (init_csum_tree
) {
9367 fprintf(stderr
, "Reinit crc root\n");
9368 ret
= btrfs_fsck_reinit_root(trans
, info
->csum_root
, 0);
9370 fprintf(stderr
, "crc root initialization failed\n");
9375 ret
= fill_csum_tree(trans
, info
->csum_root
,
9378 fprintf(stderr
, "crc refilling failed\n");
9383 * Ok now we commit and run the normal fsck, which will add
9384 * extent entries for all of the items it finds.
9386 ret
= btrfs_commit_transaction(trans
, info
->extent_root
);
9390 if (!extent_buffer_uptodate(info
->extent_root
->node
)) {
9391 fprintf(stderr
, "Critical roots corrupted, unable to fsck the FS\n");
9395 if (!extent_buffer_uptodate(info
->csum_root
->node
)) {
9396 fprintf(stderr
, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9401 fprintf(stderr
, "checking extents\n");
9402 ret
= check_chunks_and_extents(root
);
9404 fprintf(stderr
, "Errors found in extent allocation tree or chunk allocation\n");
9406 ret
= repair_root_items(info
);
9410 fprintf(stderr
, "Fixed %d roots.\n", ret
);
9412 } else if (ret
> 0) {
9414 "Found %d roots with an outdated root item.\n",
9417 "Please run a filesystem check with the option --repair to fix them.\n");
9422 fprintf(stderr
, "checking free space cache\n");
9423 ret
= check_space_cache(root
);
9428 * We used to have to have these hole extents in between our real
9429 * extents so if we don't have this flag set we need to make sure there
9430 * are no gaps in the file extents for inodes, otherwise we can just
9431 * ignore it when this happens.
9433 no_holes
= btrfs_fs_incompat(root
->fs_info
,
9434 BTRFS_FEATURE_INCOMPAT_NO_HOLES
);
9435 fprintf(stderr
, "checking fs roots\n");
9436 ret
= check_fs_roots(root
, &root_cache
);
9440 fprintf(stderr
, "checking csums\n");
9441 ret
= check_csums(root
);
9445 fprintf(stderr
, "checking root refs\n");
9446 ret
= check_root_refs(root
, &root_cache
);
9450 while (repair
&& !list_empty(&root
->fs_info
->recow_ebs
)) {
9451 struct extent_buffer
*eb
;
9453 eb
= list_first_entry(&root
->fs_info
->recow_ebs
,
9454 struct extent_buffer
, recow
);
9455 list_del_init(&eb
->recow
);
9456 ret
= recow_extent_buffer(root
, eb
);
9461 while (!list_empty(&delete_items
)) {
9462 struct bad_item
*bad
;
9464 bad
= list_first_entry(&delete_items
, struct bad_item
, list
);
9465 list_del_init(&bad
->list
);
9467 ret
= delete_bad_item(root
, bad
);
9471 if (info
->quota_enabled
) {
9473 fprintf(stderr
, "checking quota groups\n");
9474 err
= qgroup_verify_all(info
);
9479 if (!list_empty(&root
->fs_info
->recow_ebs
)) {
9480 fprintf(stderr
, "Transid errors in file system\n");
9484 print_qgroup_report(0);
9485 if (found_old_backref
) { /*
9486 * there was a disk format change when mixed
9487 * backref was in testing tree. The old format
9488 * existed about one week.
9490 printf("\n * Found old mixed backref format. "
9491 "The old format is not supported! *"
9492 "\n * Please mount the FS in readonly mode, "
9493 "backup data and re-format the FS. *\n\n");
9496 printf("found %llu bytes used err is %d\n",
9497 (unsigned long long)bytes_used
, ret
);
9498 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes
);
9499 printf("total tree bytes: %llu\n",
9500 (unsigned long long)total_btree_bytes
);
9501 printf("total fs tree bytes: %llu\n",
9502 (unsigned long long)total_fs_tree_bytes
);
9503 printf("total extent tree bytes: %llu\n",
9504 (unsigned long long)total_extent_tree_bytes
);
9505 printf("btree space waste bytes: %llu\n",
9506 (unsigned long long)btree_space_waste
);
9507 printf("file data blocks allocated: %llu\n referenced %llu\n",
9508 (unsigned long long)data_bytes_allocated
,
9509 (unsigned long long)data_bytes_referenced
);
9510 printf("%s\n", PACKAGE_STRING
);
9512 free_root_recs_tree(&root_cache
);