2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 500
25 #include <sys/types.h>
29 #include <uuid/uuid.h>
34 #include "print-tree.h"
35 #include "transaction.h"
39 #include "free-space-cache.h"
41 #include "qgroup-verify.h"
42 #include "rbtree-utils.h"
46 static u64 bytes_used
= 0;
47 static u64 total_csum_bytes
= 0;
48 static u64 total_btree_bytes
= 0;
49 static u64 total_fs_tree_bytes
= 0;
50 static u64 total_extent_tree_bytes
= 0;
51 static u64 btree_space_waste
= 0;
52 static u64 data_bytes_allocated
= 0;
53 static u64 data_bytes_referenced
= 0;
54 static int found_old_backref
= 0;
55 static LIST_HEAD(duplicate_extents
);
56 static LIST_HEAD(delete_items
);
57 static int repair
= 0;
58 static int no_holes
= 0;
59 static int init_extent_tree
= 0;
60 static int check_data_csum
= 0;
62 struct extent_backref
{
63 struct list_head list
;
64 unsigned int is_data
:1;
65 unsigned int found_extent_tree
:1;
66 unsigned int full_backref
:1;
67 unsigned int found_ref
:1;
68 unsigned int broken
:1;
72 struct extent_backref node
;
87 struct extent_backref node
;
94 struct extent_record
{
95 struct list_head backrefs
;
96 struct list_head dups
;
97 struct list_head list
;
98 struct cache_extent cache
;
99 struct btrfs_disk_key parent_key
;
104 u64 extent_item_refs
;
106 u64 parent_generation
;
110 unsigned int found_rec
:1;
111 unsigned int content_checked
:1;
112 unsigned int owner_ref_checked
:1;
113 unsigned int is_root
:1;
114 unsigned int metadata
:1;
117 struct inode_backref
{
118 struct list_head list
;
119 unsigned int found_dir_item
:1;
120 unsigned int found_dir_index
:1;
121 unsigned int found_inode_ref
:1;
122 unsigned int filetype
:8;
124 unsigned int ref_type
;
131 struct dropping_root_item_record
{
132 struct list_head list
;
133 struct btrfs_root_item ri
;
134 struct btrfs_key found_key
;
137 #define REF_ERR_NO_DIR_ITEM (1 << 0)
138 #define REF_ERR_NO_DIR_INDEX (1 << 1)
139 #define REF_ERR_NO_INODE_REF (1 << 2)
140 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
141 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
142 #define REF_ERR_DUP_INODE_REF (1 << 5)
143 #define REF_ERR_INDEX_UNMATCH (1 << 6)
144 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
145 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
146 #define REF_ERR_NO_ROOT_REF (1 << 9)
147 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
148 #define REF_ERR_DUP_ROOT_REF (1 << 11)
149 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
151 struct inode_record
{
152 struct list_head backrefs
;
153 unsigned int checked
:1;
154 unsigned int merging
:1;
155 unsigned int found_inode_item
:1;
156 unsigned int found_dir_item
:1;
157 unsigned int found_file_extent
:1;
158 unsigned int found_csum_item
:1;
159 unsigned int some_csum_missing
:1;
160 unsigned int nodatasum
:1;
173 u64 first_extent_gap
;
178 #define I_ERR_NO_INODE_ITEM (1 << 0)
179 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
180 #define I_ERR_DUP_INODE_ITEM (1 << 2)
181 #define I_ERR_DUP_DIR_INDEX (1 << 3)
182 #define I_ERR_ODD_DIR_ITEM (1 << 4)
183 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
184 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
185 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
186 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
187 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
188 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
189 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
190 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
191 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
193 struct root_backref
{
194 struct list_head list
;
195 unsigned int found_dir_item
:1;
196 unsigned int found_dir_index
:1;
197 unsigned int found_back_ref
:1;
198 unsigned int found_forward_ref
:1;
199 unsigned int reachable
:1;
209 struct list_head backrefs
;
210 struct cache_extent cache
;
211 unsigned int found_root_item
:1;
217 struct cache_extent cache
;
222 struct cache_extent cache
;
223 struct cache_tree root_cache
;
224 struct cache_tree inode_cache
;
225 struct inode_record
*current
;
234 struct walk_control
{
235 struct cache_tree shared
;
236 struct shared_node
*nodes
[BTRFS_MAX_LEVEL
];
242 struct btrfs_key key
;
244 struct list_head list
;
247 static void reset_cached_block_groups(struct btrfs_fs_info
*fs_info
);
249 static void record_root_in_trans(struct btrfs_trans_handle
*trans
,
250 struct btrfs_root
*root
)
252 if (root
->last_trans
!= trans
->transid
) {
253 root
->track_dirty
= 1;
254 root
->last_trans
= trans
->transid
;
255 root
->commit_root
= root
->node
;
256 extent_buffer_get(root
->node
);
260 static u8
imode_to_type(u32 imode
)
263 static unsigned char btrfs_type_by_mode
[S_IFMT
>> S_SHIFT
] = {
264 [S_IFREG
>> S_SHIFT
] = BTRFS_FT_REG_FILE
,
265 [S_IFDIR
>> S_SHIFT
] = BTRFS_FT_DIR
,
266 [S_IFCHR
>> S_SHIFT
] = BTRFS_FT_CHRDEV
,
267 [S_IFBLK
>> S_SHIFT
] = BTRFS_FT_BLKDEV
,
268 [S_IFIFO
>> S_SHIFT
] = BTRFS_FT_FIFO
,
269 [S_IFSOCK
>> S_SHIFT
] = BTRFS_FT_SOCK
,
270 [S_IFLNK
>> S_SHIFT
] = BTRFS_FT_SYMLINK
,
273 return btrfs_type_by_mode
[(imode
& S_IFMT
) >> S_SHIFT
];
277 static int device_record_compare(struct rb_node
*node1
, struct rb_node
*node2
)
279 struct device_record
*rec1
;
280 struct device_record
*rec2
;
282 rec1
= rb_entry(node1
, struct device_record
, node
);
283 rec2
= rb_entry(node2
, struct device_record
, node
);
284 if (rec1
->devid
> rec2
->devid
)
286 else if (rec1
->devid
< rec2
->devid
)
292 static struct inode_record
*clone_inode_rec(struct inode_record
*orig_rec
)
294 struct inode_record
*rec
;
295 struct inode_backref
*backref
;
296 struct inode_backref
*orig
;
299 rec
= malloc(sizeof(*rec
));
300 memcpy(rec
, orig_rec
, sizeof(*rec
));
302 INIT_LIST_HEAD(&rec
->backrefs
);
304 list_for_each_entry(orig
, &orig_rec
->backrefs
, list
) {
305 size
= sizeof(*orig
) + orig
->namelen
+ 1;
306 backref
= malloc(size
);
307 memcpy(backref
, orig
, size
);
308 list_add_tail(&backref
->list
, &rec
->backrefs
);
313 static void print_inode_error(struct btrfs_root
*root
, struct inode_record
*rec
)
315 u64 root_objectid
= root
->root_key
.objectid
;
316 int errors
= rec
->errors
;
320 /* reloc root errors, we print its corresponding fs root objectid*/
321 if (root_objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
322 root_objectid
= root
->root_key
.offset
;
323 fprintf(stderr
, "reloc");
325 fprintf(stderr
, "root %llu inode %llu errors %x",
326 (unsigned long long) root_objectid
,
327 (unsigned long long) rec
->ino
, rec
->errors
);
329 if (errors
& I_ERR_NO_INODE_ITEM
)
330 fprintf(stderr
, ", no inode item");
331 if (errors
& I_ERR_NO_ORPHAN_ITEM
)
332 fprintf(stderr
, ", no orphan item");
333 if (errors
& I_ERR_DUP_INODE_ITEM
)
334 fprintf(stderr
, ", dup inode item");
335 if (errors
& I_ERR_DUP_DIR_INDEX
)
336 fprintf(stderr
, ", dup dir index");
337 if (errors
& I_ERR_ODD_DIR_ITEM
)
338 fprintf(stderr
, ", odd dir item");
339 if (errors
& I_ERR_ODD_FILE_EXTENT
)
340 fprintf(stderr
, ", odd file extent");
341 if (errors
& I_ERR_BAD_FILE_EXTENT
)
342 fprintf(stderr
, ", bad file extent");
343 if (errors
& I_ERR_FILE_EXTENT_OVERLAP
)
344 fprintf(stderr
, ", file extent overlap");
345 if (errors
& I_ERR_FILE_EXTENT_DISCOUNT
)
346 fprintf(stderr
, ", file extent discount");
347 if (errors
& I_ERR_DIR_ISIZE_WRONG
)
348 fprintf(stderr
, ", dir isize wrong");
349 if (errors
& I_ERR_FILE_NBYTES_WRONG
)
350 fprintf(stderr
, ", nbytes wrong");
351 if (errors
& I_ERR_ODD_CSUM_ITEM
)
352 fprintf(stderr
, ", odd csum item");
353 if (errors
& I_ERR_SOME_CSUM_MISSING
)
354 fprintf(stderr
, ", some csum missing");
355 if (errors
& I_ERR_LINK_COUNT_WRONG
)
356 fprintf(stderr
, ", link count wrong");
357 fprintf(stderr
, "\n");
360 static void print_ref_error(int errors
)
362 if (errors
& REF_ERR_NO_DIR_ITEM
)
363 fprintf(stderr
, ", no dir item");
364 if (errors
& REF_ERR_NO_DIR_INDEX
)
365 fprintf(stderr
, ", no dir index");
366 if (errors
& REF_ERR_NO_INODE_REF
)
367 fprintf(stderr
, ", no inode ref");
368 if (errors
& REF_ERR_DUP_DIR_ITEM
)
369 fprintf(stderr
, ", dup dir item");
370 if (errors
& REF_ERR_DUP_DIR_INDEX
)
371 fprintf(stderr
, ", dup dir index");
372 if (errors
& REF_ERR_DUP_INODE_REF
)
373 fprintf(stderr
, ", dup inode ref");
374 if (errors
& REF_ERR_INDEX_UNMATCH
)
375 fprintf(stderr
, ", index unmatch");
376 if (errors
& REF_ERR_FILETYPE_UNMATCH
)
377 fprintf(stderr
, ", filetype unmatch");
378 if (errors
& REF_ERR_NAME_TOO_LONG
)
379 fprintf(stderr
, ", name too long");
380 if (errors
& REF_ERR_NO_ROOT_REF
)
381 fprintf(stderr
, ", no root ref");
382 if (errors
& REF_ERR_NO_ROOT_BACKREF
)
383 fprintf(stderr
, ", no root backref");
384 if (errors
& REF_ERR_DUP_ROOT_REF
)
385 fprintf(stderr
, ", dup root ref");
386 if (errors
& REF_ERR_DUP_ROOT_BACKREF
)
387 fprintf(stderr
, ", dup root backref");
388 fprintf(stderr
, "\n");
391 static struct inode_record
*get_inode_rec(struct cache_tree
*inode_cache
,
394 struct ptr_node
*node
;
395 struct cache_extent
*cache
;
396 struct inode_record
*rec
= NULL
;
399 cache
= lookup_cache_extent(inode_cache
, ino
, 1);
401 node
= container_of(cache
, struct ptr_node
, cache
);
403 if (mod
&& rec
->refs
> 1) {
404 node
->data
= clone_inode_rec(rec
);
409 rec
= calloc(1, sizeof(*rec
));
411 rec
->extent_start
= (u64
)-1;
412 rec
->first_extent_gap
= (u64
)-1;
414 INIT_LIST_HEAD(&rec
->backrefs
);
416 node
= malloc(sizeof(*node
));
417 node
->cache
.start
= ino
;
418 node
->cache
.size
= 1;
421 if (ino
== BTRFS_FREE_INO_OBJECTID
)
424 ret
= insert_cache_extent(inode_cache
, &node
->cache
);
430 static void free_inode_rec(struct inode_record
*rec
)
432 struct inode_backref
*backref
;
437 while (!list_empty(&rec
->backrefs
)) {
438 backref
= list_entry(rec
->backrefs
.next
,
439 struct inode_backref
, list
);
440 list_del(&backref
->list
);
446 static int can_free_inode_rec(struct inode_record
*rec
)
448 if (!rec
->errors
&& rec
->checked
&& rec
->found_inode_item
&&
449 rec
->nlink
== rec
->found_link
&& list_empty(&rec
->backrefs
))
454 static void maybe_free_inode_rec(struct cache_tree
*inode_cache
,
455 struct inode_record
*rec
)
457 struct cache_extent
*cache
;
458 struct inode_backref
*tmp
, *backref
;
459 struct ptr_node
*node
;
460 unsigned char filetype
;
462 if (!rec
->found_inode_item
)
465 filetype
= imode_to_type(rec
->imode
);
466 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
467 if (backref
->found_dir_item
&& backref
->found_dir_index
) {
468 if (backref
->filetype
!= filetype
)
469 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
470 if (!backref
->errors
&& backref
->found_inode_ref
) {
471 list_del(&backref
->list
);
477 if (!rec
->checked
|| rec
->merging
)
480 if (S_ISDIR(rec
->imode
)) {
481 if (rec
->found_size
!= rec
->isize
)
482 rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
483 if (rec
->found_file_extent
)
484 rec
->errors
|= I_ERR_ODD_FILE_EXTENT
;
485 } else if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
486 if (rec
->found_dir_item
)
487 rec
->errors
|= I_ERR_ODD_DIR_ITEM
;
488 if (rec
->found_size
!= rec
->nbytes
)
489 rec
->errors
|= I_ERR_FILE_NBYTES_WRONG
;
490 if (rec
->extent_start
== (u64
)-1 || rec
->extent_start
> 0)
491 rec
->first_extent_gap
= 0;
492 if (rec
->nlink
> 0 && !no_holes
&&
493 (rec
->extent_end
< rec
->isize
||
494 rec
->first_extent_gap
< rec
->isize
))
495 rec
->errors
|= I_ERR_FILE_EXTENT_DISCOUNT
;
498 if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
499 if (rec
->found_csum_item
&& rec
->nodatasum
)
500 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
501 if (rec
->some_csum_missing
&& !rec
->nodatasum
)
502 rec
->errors
|= I_ERR_SOME_CSUM_MISSING
;
505 BUG_ON(rec
->refs
!= 1);
506 if (can_free_inode_rec(rec
)) {
507 cache
= lookup_cache_extent(inode_cache
, rec
->ino
, 1);
508 node
= container_of(cache
, struct ptr_node
, cache
);
509 BUG_ON(node
->data
!= rec
);
510 remove_cache_extent(inode_cache
, &node
->cache
);
516 static int check_orphan_item(struct btrfs_root
*root
, u64 ino
)
518 struct btrfs_path path
;
519 struct btrfs_key key
;
522 key
.objectid
= BTRFS_ORPHAN_OBJECTID
;
523 key
.type
= BTRFS_ORPHAN_ITEM_KEY
;
526 btrfs_init_path(&path
);
527 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
528 btrfs_release_path(&path
);
534 static int process_inode_item(struct extent_buffer
*eb
,
535 int slot
, struct btrfs_key
*key
,
536 struct shared_node
*active_node
)
538 struct inode_record
*rec
;
539 struct btrfs_inode_item
*item
;
541 rec
= active_node
->current
;
542 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
543 if (rec
->found_inode_item
) {
544 rec
->errors
|= I_ERR_DUP_INODE_ITEM
;
547 item
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_item
);
548 rec
->nlink
= btrfs_inode_nlink(eb
, item
);
549 rec
->isize
= btrfs_inode_size(eb
, item
);
550 rec
->nbytes
= btrfs_inode_nbytes(eb
, item
);
551 rec
->imode
= btrfs_inode_mode(eb
, item
);
552 if (btrfs_inode_flags(eb
, item
) & BTRFS_INODE_NODATASUM
)
554 rec
->found_inode_item
= 1;
556 rec
->errors
|= I_ERR_NO_ORPHAN_ITEM
;
557 maybe_free_inode_rec(&active_node
->inode_cache
, rec
);
561 static struct inode_backref
*get_inode_backref(struct inode_record
*rec
,
563 int namelen
, u64 dir
)
565 struct inode_backref
*backref
;
567 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
568 if (rec
->ino
== BTRFS_MULTIPLE_OBJECTIDS
)
570 if (backref
->dir
!= dir
|| backref
->namelen
!= namelen
)
572 if (memcmp(name
, backref
->name
, namelen
))
577 backref
= malloc(sizeof(*backref
) + namelen
+ 1);
578 memset(backref
, 0, sizeof(*backref
));
580 backref
->namelen
= namelen
;
581 memcpy(backref
->name
, name
, namelen
);
582 backref
->name
[namelen
] = '\0';
583 list_add_tail(&backref
->list
, &rec
->backrefs
);
587 static int add_inode_backref(struct cache_tree
*inode_cache
,
588 u64 ino
, u64 dir
, u64 index
,
589 const char *name
, int namelen
,
590 int filetype
, int itemtype
, int errors
)
592 struct inode_record
*rec
;
593 struct inode_backref
*backref
;
595 rec
= get_inode_rec(inode_cache
, ino
, 1);
596 backref
= get_inode_backref(rec
, name
, namelen
, dir
);
598 backref
->errors
|= errors
;
599 if (itemtype
== BTRFS_DIR_INDEX_KEY
) {
600 if (backref
->found_dir_index
)
601 backref
->errors
|= REF_ERR_DUP_DIR_INDEX
;
602 if (backref
->found_inode_ref
&& backref
->index
!= index
)
603 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
604 if (backref
->found_dir_item
&& backref
->filetype
!= filetype
)
605 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
607 backref
->index
= index
;
608 backref
->filetype
= filetype
;
609 backref
->found_dir_index
= 1;
610 } else if (itemtype
== BTRFS_DIR_ITEM_KEY
) {
612 if (backref
->found_dir_item
)
613 backref
->errors
|= REF_ERR_DUP_DIR_ITEM
;
614 if (backref
->found_dir_index
&& backref
->filetype
!= filetype
)
615 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
617 backref
->filetype
= filetype
;
618 backref
->found_dir_item
= 1;
619 } else if ((itemtype
== BTRFS_INODE_REF_KEY
) ||
620 (itemtype
== BTRFS_INODE_EXTREF_KEY
)) {
621 if (backref
->found_inode_ref
)
622 backref
->errors
|= REF_ERR_DUP_INODE_REF
;
623 if (backref
->found_dir_index
&& backref
->index
!= index
)
624 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
626 backref
->index
= index
;
628 backref
->ref_type
= itemtype
;
629 backref
->found_inode_ref
= 1;
634 maybe_free_inode_rec(inode_cache
, rec
);
638 static int merge_inode_recs(struct inode_record
*src
, struct inode_record
*dst
,
639 struct cache_tree
*dst_cache
)
641 struct inode_backref
*backref
;
645 list_for_each_entry(backref
, &src
->backrefs
, list
) {
646 if (backref
->found_dir_index
) {
647 add_inode_backref(dst_cache
, dst
->ino
, backref
->dir
,
648 backref
->index
, backref
->name
,
649 backref
->namelen
, backref
->filetype
,
650 BTRFS_DIR_INDEX_KEY
, backref
->errors
);
652 if (backref
->found_dir_item
) {
654 add_inode_backref(dst_cache
, dst
->ino
,
655 backref
->dir
, 0, backref
->name
,
656 backref
->namelen
, backref
->filetype
,
657 BTRFS_DIR_ITEM_KEY
, backref
->errors
);
659 if (backref
->found_inode_ref
) {
660 add_inode_backref(dst_cache
, dst
->ino
,
661 backref
->dir
, backref
->index
,
662 backref
->name
, backref
->namelen
, 0,
663 backref
->ref_type
, backref
->errors
);
667 if (src
->found_dir_item
)
668 dst
->found_dir_item
= 1;
669 if (src
->found_file_extent
)
670 dst
->found_file_extent
= 1;
671 if (src
->found_csum_item
)
672 dst
->found_csum_item
= 1;
673 if (src
->some_csum_missing
)
674 dst
->some_csum_missing
= 1;
675 if (dst
->first_extent_gap
> src
->first_extent_gap
)
676 dst
->first_extent_gap
= src
->first_extent_gap
;
678 BUG_ON(src
->found_link
< dir_count
);
679 dst
->found_link
+= src
->found_link
- dir_count
;
680 dst
->found_size
+= src
->found_size
;
681 if (src
->extent_start
!= (u64
)-1) {
682 if (dst
->extent_start
== (u64
)-1) {
683 dst
->extent_start
= src
->extent_start
;
684 dst
->extent_end
= src
->extent_end
;
686 if (dst
->extent_end
> src
->extent_start
)
687 dst
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
688 else if (dst
->extent_end
< src
->extent_start
&&
689 dst
->extent_end
< dst
->first_extent_gap
)
690 dst
->first_extent_gap
= dst
->extent_end
;
691 if (dst
->extent_end
< src
->extent_end
)
692 dst
->extent_end
= src
->extent_end
;
696 dst
->errors
|= src
->errors
;
697 if (src
->found_inode_item
) {
698 if (!dst
->found_inode_item
) {
699 dst
->nlink
= src
->nlink
;
700 dst
->isize
= src
->isize
;
701 dst
->nbytes
= src
->nbytes
;
702 dst
->imode
= src
->imode
;
703 dst
->nodatasum
= src
->nodatasum
;
704 dst
->found_inode_item
= 1;
706 dst
->errors
|= I_ERR_DUP_INODE_ITEM
;
714 static int splice_shared_node(struct shared_node
*src_node
,
715 struct shared_node
*dst_node
)
717 struct cache_extent
*cache
;
718 struct ptr_node
*node
, *ins
;
719 struct cache_tree
*src
, *dst
;
720 struct inode_record
*rec
, *conflict
;
725 if (--src_node
->refs
== 0)
727 if (src_node
->current
)
728 current_ino
= src_node
->current
->ino
;
730 src
= &src_node
->root_cache
;
731 dst
= &dst_node
->root_cache
;
733 cache
= search_cache_extent(src
, 0);
735 node
= container_of(cache
, struct ptr_node
, cache
);
737 cache
= next_cache_extent(cache
);
740 remove_cache_extent(src
, &node
->cache
);
743 ins
= malloc(sizeof(*ins
));
744 ins
->cache
.start
= node
->cache
.start
;
745 ins
->cache
.size
= node
->cache
.size
;
749 ret
= insert_cache_extent(dst
, &ins
->cache
);
750 if (ret
== -EEXIST
) {
751 conflict
= get_inode_rec(dst
, rec
->ino
, 1);
752 merge_inode_recs(rec
, conflict
, dst
);
754 conflict
->checked
= 1;
755 if (dst_node
->current
== conflict
)
756 dst_node
->current
= NULL
;
758 maybe_free_inode_rec(dst
, conflict
);
766 if (src
== &src_node
->root_cache
) {
767 src
= &src_node
->inode_cache
;
768 dst
= &dst_node
->inode_cache
;
772 if (current_ino
> 0 && (!dst_node
->current
||
773 current_ino
> dst_node
->current
->ino
)) {
774 if (dst_node
->current
) {
775 dst_node
->current
->checked
= 1;
776 maybe_free_inode_rec(dst
, dst_node
->current
);
778 dst_node
->current
= get_inode_rec(dst
, current_ino
, 1);
783 static void free_inode_ptr(struct cache_extent
*cache
)
785 struct ptr_node
*node
;
786 struct inode_record
*rec
;
788 node
= container_of(cache
, struct ptr_node
, cache
);
794 FREE_EXTENT_CACHE_BASED_TREE(inode_recs
, free_inode_ptr
);
796 static struct shared_node
*find_shared_node(struct cache_tree
*shared
,
799 struct cache_extent
*cache
;
800 struct shared_node
*node
;
802 cache
= lookup_cache_extent(shared
, bytenr
, 1);
804 node
= container_of(cache
, struct shared_node
, cache
);
810 static int add_shared_node(struct cache_tree
*shared
, u64 bytenr
, u32 refs
)
813 struct shared_node
*node
;
815 node
= calloc(1, sizeof(*node
));
816 node
->cache
.start
= bytenr
;
817 node
->cache
.size
= 1;
818 cache_tree_init(&node
->root_cache
);
819 cache_tree_init(&node
->inode_cache
);
822 ret
= insert_cache_extent(shared
, &node
->cache
);
827 static int enter_shared_node(struct btrfs_root
*root
, u64 bytenr
, u32 refs
,
828 struct walk_control
*wc
, int level
)
830 struct shared_node
*node
;
831 struct shared_node
*dest
;
833 if (level
== wc
->active_node
)
836 BUG_ON(wc
->active_node
<= level
);
837 node
= find_shared_node(&wc
->shared
, bytenr
);
839 add_shared_node(&wc
->shared
, bytenr
, refs
);
840 node
= find_shared_node(&wc
->shared
, bytenr
);
841 wc
->nodes
[level
] = node
;
842 wc
->active_node
= level
;
846 if (wc
->root_level
== wc
->active_node
&&
847 btrfs_root_refs(&root
->root_item
) == 0) {
848 if (--node
->refs
== 0) {
849 free_inode_recs_tree(&node
->root_cache
);
850 free_inode_recs_tree(&node
->inode_cache
);
851 remove_cache_extent(&wc
->shared
, &node
->cache
);
857 dest
= wc
->nodes
[wc
->active_node
];
858 splice_shared_node(node
, dest
);
859 if (node
->refs
== 0) {
860 remove_cache_extent(&wc
->shared
, &node
->cache
);
866 static int leave_shared_node(struct btrfs_root
*root
,
867 struct walk_control
*wc
, int level
)
869 struct shared_node
*node
;
870 struct shared_node
*dest
;
873 if (level
== wc
->root_level
)
876 for (i
= level
+ 1; i
< BTRFS_MAX_LEVEL
; i
++) {
880 BUG_ON(i
>= BTRFS_MAX_LEVEL
);
882 node
= wc
->nodes
[wc
->active_node
];
883 wc
->nodes
[wc
->active_node
] = NULL
;
886 dest
= wc
->nodes
[wc
->active_node
];
887 if (wc
->active_node
< wc
->root_level
||
888 btrfs_root_refs(&root
->root_item
) > 0) {
889 BUG_ON(node
->refs
<= 1);
890 splice_shared_node(node
, dest
);
892 BUG_ON(node
->refs
< 2);
901 * 1 - if the root with id child_root_id is a child of root parent_root_id
902 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
903 * has other root(s) as parent(s)
904 * 2 - if the root child_root_id doesn't have any parent roots
906 static int is_child_root(struct btrfs_root
*root
, u64 parent_root_id
,
909 struct btrfs_path path
;
910 struct btrfs_key key
;
911 struct extent_buffer
*leaf
;
915 btrfs_init_path(&path
);
917 key
.objectid
= parent_root_id
;
918 key
.type
= BTRFS_ROOT_REF_KEY
;
919 key
.offset
= child_root_id
;
920 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
924 btrfs_release_path(&path
);
928 key
.objectid
= child_root_id
;
929 key
.type
= BTRFS_ROOT_BACKREF_KEY
;
931 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
937 leaf
= path
.nodes
[0];
938 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
939 ret
= btrfs_next_leaf(root
->fs_info
->tree_root
, &path
);
942 leaf
= path
.nodes
[0];
945 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
946 if (key
.objectid
!= child_root_id
||
947 key
.type
!= BTRFS_ROOT_BACKREF_KEY
)
952 if (key
.offset
== parent_root_id
) {
953 btrfs_release_path(&path
);
960 btrfs_release_path(&path
);
963 return has_parent
? 0 : 2;
966 static int process_dir_item(struct btrfs_root
*root
,
967 struct extent_buffer
*eb
,
968 int slot
, struct btrfs_key
*key
,
969 struct shared_node
*active_node
)
979 struct btrfs_dir_item
*di
;
980 struct inode_record
*rec
;
981 struct cache_tree
*root_cache
;
982 struct cache_tree
*inode_cache
;
983 struct btrfs_key location
;
984 char namebuf
[BTRFS_NAME_LEN
];
986 root_cache
= &active_node
->root_cache
;
987 inode_cache
= &active_node
->inode_cache
;
988 rec
= active_node
->current
;
989 rec
->found_dir_item
= 1;
991 di
= btrfs_item_ptr(eb
, slot
, struct btrfs_dir_item
);
992 total
= btrfs_item_size_nr(eb
, slot
);
993 while (cur
< total
) {
995 btrfs_dir_item_key_to_cpu(eb
, di
, &location
);
996 name_len
= btrfs_dir_name_len(eb
, di
);
997 data_len
= btrfs_dir_data_len(eb
, di
);
998 filetype
= btrfs_dir_type(eb
, di
);
1000 rec
->found_size
+= name_len
;
1001 if (name_len
<= BTRFS_NAME_LEN
) {
1005 len
= BTRFS_NAME_LEN
;
1006 error
= REF_ERR_NAME_TOO_LONG
;
1008 read_extent_buffer(eb
, namebuf
, (unsigned long)(di
+ 1), len
);
1010 if (location
.type
== BTRFS_INODE_ITEM_KEY
) {
1011 add_inode_backref(inode_cache
, location
.objectid
,
1012 key
->objectid
, key
->offset
, namebuf
,
1013 len
, filetype
, key
->type
, error
);
1014 } else if (location
.type
== BTRFS_ROOT_ITEM_KEY
) {
1015 add_inode_backref(root_cache
, location
.objectid
,
1016 key
->objectid
, key
->offset
,
1017 namebuf
, len
, filetype
,
1020 fprintf(stderr
, "invalid location in dir item %u\n",
1022 add_inode_backref(inode_cache
, BTRFS_MULTIPLE_OBJECTIDS
,
1023 key
->objectid
, key
->offset
, namebuf
,
1024 len
, filetype
, key
->type
, error
);
1027 len
= sizeof(*di
) + name_len
+ data_len
;
1028 di
= (struct btrfs_dir_item
*)((char *)di
+ len
);
1031 if (key
->type
== BTRFS_DIR_INDEX_KEY
&& nritems
> 1)
1032 rec
->errors
|= I_ERR_DUP_DIR_INDEX
;
1037 static int process_inode_ref(struct extent_buffer
*eb
,
1038 int slot
, struct btrfs_key
*key
,
1039 struct shared_node
*active_node
)
1047 struct cache_tree
*inode_cache
;
1048 struct btrfs_inode_ref
*ref
;
1049 char namebuf
[BTRFS_NAME_LEN
];
1051 inode_cache
= &active_node
->inode_cache
;
1053 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_ref
);
1054 total
= btrfs_item_size_nr(eb
, slot
);
1055 while (cur
< total
) {
1056 name_len
= btrfs_inode_ref_name_len(eb
, ref
);
1057 index
= btrfs_inode_ref_index(eb
, ref
);
1058 if (name_len
<= BTRFS_NAME_LEN
) {
1062 len
= BTRFS_NAME_LEN
;
1063 error
= REF_ERR_NAME_TOO_LONG
;
1065 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
1066 add_inode_backref(inode_cache
, key
->objectid
, key
->offset
,
1067 index
, namebuf
, len
, 0, key
->type
, error
);
1069 len
= sizeof(*ref
) + name_len
;
1070 ref
= (struct btrfs_inode_ref
*)((char *)ref
+ len
);
1076 static int process_inode_extref(struct extent_buffer
*eb
,
1077 int slot
, struct btrfs_key
*key
,
1078 struct shared_node
*active_node
)
1087 struct cache_tree
*inode_cache
;
1088 struct btrfs_inode_extref
*extref
;
1089 char namebuf
[BTRFS_NAME_LEN
];
1091 inode_cache
= &active_node
->inode_cache
;
1093 extref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_extref
);
1094 total
= btrfs_item_size_nr(eb
, slot
);
1095 while (cur
< total
) {
1096 name_len
= btrfs_inode_extref_name_len(eb
, extref
);
1097 index
= btrfs_inode_extref_index(eb
, extref
);
1098 parent
= btrfs_inode_extref_parent(eb
, extref
);
1099 if (name_len
<= BTRFS_NAME_LEN
) {
1103 len
= BTRFS_NAME_LEN
;
1104 error
= REF_ERR_NAME_TOO_LONG
;
1106 read_extent_buffer(eb
, namebuf
,
1107 (unsigned long)(extref
+ 1), len
);
1108 add_inode_backref(inode_cache
, key
->objectid
, parent
,
1109 index
, namebuf
, len
, 0, key
->type
, error
);
1111 len
= sizeof(*extref
) + name_len
;
1112 extref
= (struct btrfs_inode_extref
*)((char *)extref
+ len
);
1119 static int count_csum_range(struct btrfs_root
*root
, u64 start
,
1120 u64 len
, u64
*found
)
1122 struct btrfs_key key
;
1123 struct btrfs_path path
;
1124 struct extent_buffer
*leaf
;
1129 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
1131 btrfs_init_path(&path
);
1133 key
.objectid
= BTRFS_EXTENT_CSUM_OBJECTID
;
1135 key
.type
= BTRFS_EXTENT_CSUM_KEY
;
1137 ret
= btrfs_search_slot(NULL
, root
->fs_info
->csum_root
,
1141 if (ret
> 0 && path
.slots
[0] > 0) {
1142 leaf
= path
.nodes
[0];
1143 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0] - 1);
1144 if (key
.objectid
== BTRFS_EXTENT_CSUM_OBJECTID
&&
1145 key
.type
== BTRFS_EXTENT_CSUM_KEY
)
1150 leaf
= path
.nodes
[0];
1151 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
1152 ret
= btrfs_next_leaf(root
->fs_info
->csum_root
, &path
);
1157 leaf
= path
.nodes
[0];
1160 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
1161 if (key
.objectid
!= BTRFS_EXTENT_CSUM_OBJECTID
||
1162 key
.type
!= BTRFS_EXTENT_CSUM_KEY
)
1165 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
1166 if (key
.offset
>= start
+ len
)
1169 if (key
.offset
> start
)
1172 size
= btrfs_item_size_nr(leaf
, path
.slots
[0]);
1173 csum_end
= key
.offset
+ (size
/ csum_size
) * root
->sectorsize
;
1174 if (csum_end
> start
) {
1175 size
= min(csum_end
- start
, len
);
1186 btrfs_release_path(&path
);
1190 static int process_file_extent(struct btrfs_root
*root
,
1191 struct extent_buffer
*eb
,
1192 int slot
, struct btrfs_key
*key
,
1193 struct shared_node
*active_node
)
1195 struct inode_record
*rec
;
1196 struct btrfs_file_extent_item
*fi
;
1198 u64 disk_bytenr
= 0;
1199 u64 extent_offset
= 0;
1200 u64 mask
= root
->sectorsize
- 1;
1204 rec
= active_node
->current
;
1205 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
1206 rec
->found_file_extent
= 1;
1208 if (rec
->extent_start
== (u64
)-1) {
1209 rec
->extent_start
= key
->offset
;
1210 rec
->extent_end
= key
->offset
;
1213 if (rec
->extent_end
> key
->offset
)
1214 rec
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
1215 else if (rec
->extent_end
< key
->offset
&&
1216 rec
->extent_end
< rec
->first_extent_gap
)
1217 rec
->first_extent_gap
= rec
->extent_end
;
1219 fi
= btrfs_item_ptr(eb
, slot
, struct btrfs_file_extent_item
);
1220 extent_type
= btrfs_file_extent_type(eb
, fi
);
1222 if (extent_type
== BTRFS_FILE_EXTENT_INLINE
) {
1223 num_bytes
= btrfs_file_extent_inline_len(eb
, slot
, fi
);
1225 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1226 rec
->found_size
+= num_bytes
;
1227 num_bytes
= (num_bytes
+ mask
) & ~mask
;
1228 } else if (extent_type
== BTRFS_FILE_EXTENT_REG
||
1229 extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1230 num_bytes
= btrfs_file_extent_num_bytes(eb
, fi
);
1231 disk_bytenr
= btrfs_file_extent_disk_bytenr(eb
, fi
);
1232 extent_offset
= btrfs_file_extent_offset(eb
, fi
);
1233 if (num_bytes
== 0 || (num_bytes
& mask
))
1234 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1235 if (num_bytes
+ extent_offset
>
1236 btrfs_file_extent_ram_bytes(eb
, fi
))
1237 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1238 if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
&&
1239 (btrfs_file_extent_compression(eb
, fi
) ||
1240 btrfs_file_extent_encryption(eb
, fi
) ||
1241 btrfs_file_extent_other_encoding(eb
, fi
)))
1242 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1243 if (disk_bytenr
> 0)
1244 rec
->found_size
+= num_bytes
;
1246 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1248 rec
->extent_end
= key
->offset
+ num_bytes
;
1250 if (disk_bytenr
> 0) {
1252 if (btrfs_file_extent_compression(eb
, fi
))
1253 num_bytes
= btrfs_file_extent_disk_num_bytes(eb
, fi
);
1255 disk_bytenr
+= extent_offset
;
1257 ret
= count_csum_range(root
, disk_bytenr
, num_bytes
, &found
);
1260 if (extent_type
== BTRFS_FILE_EXTENT_REG
) {
1262 rec
->found_csum_item
= 1;
1263 if (found
< num_bytes
)
1264 rec
->some_csum_missing
= 1;
1265 } else if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1267 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
1273 static int process_one_leaf(struct btrfs_root
*root
, struct extent_buffer
*eb
,
1274 struct walk_control
*wc
)
1276 struct btrfs_key key
;
1280 struct cache_tree
*inode_cache
;
1281 struct shared_node
*active_node
;
1283 if (wc
->root_level
== wc
->active_node
&&
1284 btrfs_root_refs(&root
->root_item
) == 0)
1287 active_node
= wc
->nodes
[wc
->active_node
];
1288 inode_cache
= &active_node
->inode_cache
;
1289 nritems
= btrfs_header_nritems(eb
);
1290 for (i
= 0; i
< nritems
; i
++) {
1291 btrfs_item_key_to_cpu(eb
, &key
, i
);
1293 if (key
.objectid
== BTRFS_FREE_SPACE_OBJECTID
)
1295 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
)
1298 if (active_node
->current
== NULL
||
1299 active_node
->current
->ino
< key
.objectid
) {
1300 if (active_node
->current
) {
1301 active_node
->current
->checked
= 1;
1302 maybe_free_inode_rec(inode_cache
,
1303 active_node
->current
);
1305 active_node
->current
= get_inode_rec(inode_cache
,
1309 case BTRFS_DIR_ITEM_KEY
:
1310 case BTRFS_DIR_INDEX_KEY
:
1311 ret
= process_dir_item(root
, eb
, i
, &key
, active_node
);
1313 case BTRFS_INODE_REF_KEY
:
1314 ret
= process_inode_ref(eb
, i
, &key
, active_node
);
1316 case BTRFS_INODE_EXTREF_KEY
:
1317 ret
= process_inode_extref(eb
, i
, &key
, active_node
);
1319 case BTRFS_INODE_ITEM_KEY
:
1320 ret
= process_inode_item(eb
, i
, &key
, active_node
);
1322 case BTRFS_EXTENT_DATA_KEY
:
1323 ret
= process_file_extent(root
, eb
, i
, &key
,
1333 static void reada_walk_down(struct btrfs_root
*root
,
1334 struct extent_buffer
*node
, int slot
)
1343 level
= btrfs_header_level(node
);
1347 nritems
= btrfs_header_nritems(node
);
1348 blocksize
= btrfs_level_size(root
, level
- 1);
1349 for (i
= slot
; i
< nritems
; i
++) {
1350 bytenr
= btrfs_node_blockptr(node
, i
);
1351 ptr_gen
= btrfs_node_ptr_generation(node
, i
);
1352 readahead_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
1357 * Check the child node/leaf by the following condition:
1358 * 1. the first item key of the node/leaf should be the same with the one
1360 * 2. block in parent node should match the child node/leaf.
1361 * 3. generation of parent node and child's header should be consistent.
1363 * Or the child node/leaf pointed by the key in parent is not valid.
1365 * We hope to check leaf owner too, but since subvol may share leaves,
1366 * which makes leaf owner check not so strong, key check should be
1367 * sufficient enough for that case.
1369 static int check_child_node(struct btrfs_root
*root
,
1370 struct extent_buffer
*parent
, int slot
,
1371 struct extent_buffer
*child
)
1373 struct btrfs_key parent_key
;
1374 struct btrfs_key child_key
;
1377 btrfs_node_key_to_cpu(parent
, &parent_key
, slot
);
1378 if (btrfs_header_level(child
) == 0)
1379 btrfs_item_key_to_cpu(child
, &child_key
, 0);
1381 btrfs_node_key_to_cpu(child
, &child_key
, 0);
1383 if (memcmp(&parent_key
, &child_key
, sizeof(parent_key
))) {
1386 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1387 parent_key
.objectid
, parent_key
.type
, parent_key
.offset
,
1388 child_key
.objectid
, child_key
.type
, child_key
.offset
);
1390 if (btrfs_header_bytenr(child
) != btrfs_node_blockptr(parent
, slot
)) {
1392 fprintf(stderr
, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1393 btrfs_node_blockptr(parent
, slot
),
1394 btrfs_header_bytenr(child
));
1396 if (btrfs_node_ptr_generation(parent
, slot
) !=
1397 btrfs_header_generation(child
)) {
1399 fprintf(stderr
, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1400 btrfs_header_generation(child
),
1401 btrfs_node_ptr_generation(parent
, slot
));
1406 static int walk_down_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1407 struct walk_control
*wc
, int *level
)
1409 enum btrfs_tree_block_status status
;
1412 struct extent_buffer
*next
;
1413 struct extent_buffer
*cur
;
1418 WARN_ON(*level
< 0);
1419 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1420 ret
= btrfs_lookup_extent_info(NULL
, root
,
1421 path
->nodes
[*level
]->start
,
1422 *level
, 1, &refs
, NULL
);
1429 ret
= enter_shared_node(root
, path
->nodes
[*level
]->start
,
1437 while (*level
>= 0) {
1438 WARN_ON(*level
< 0);
1439 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1440 cur
= path
->nodes
[*level
];
1442 if (btrfs_header_level(cur
) != *level
)
1445 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
))
1448 ret
= process_one_leaf(root
, cur
, wc
);
1453 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1454 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
1455 blocksize
= btrfs_level_size(root
, *level
- 1);
1456 ret
= btrfs_lookup_extent_info(NULL
, root
, bytenr
, *level
- 1,
1462 ret
= enter_shared_node(root
, bytenr
, refs
,
1465 path
->slots
[*level
]++;
1470 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
1471 if (!next
|| !btrfs_buffer_uptodate(next
, ptr_gen
)) {
1472 free_extent_buffer(next
);
1473 reada_walk_down(root
, cur
, path
->slots
[*level
]);
1474 next
= read_tree_block(root
, bytenr
, blocksize
,
1482 ret
= check_child_node(root
, cur
, path
->slots
[*level
], next
);
1488 if (btrfs_is_leaf(next
))
1489 status
= btrfs_check_leaf(root
, NULL
, next
);
1491 status
= btrfs_check_node(root
, NULL
, next
);
1492 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
1493 free_extent_buffer(next
);
1498 *level
= *level
- 1;
1499 free_extent_buffer(path
->nodes
[*level
]);
1500 path
->nodes
[*level
] = next
;
1501 path
->slots
[*level
] = 0;
1504 path
->slots
[*level
] = btrfs_header_nritems(path
->nodes
[*level
]);
1508 static int walk_up_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1509 struct walk_control
*wc
, int *level
)
1512 struct extent_buffer
*leaf
;
1514 for (i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1515 leaf
= path
->nodes
[i
];
1516 if (path
->slots
[i
] + 1 < btrfs_header_nritems(leaf
)) {
1521 free_extent_buffer(path
->nodes
[*level
]);
1522 path
->nodes
[*level
] = NULL
;
1523 BUG_ON(*level
> wc
->active_node
);
1524 if (*level
== wc
->active_node
)
1525 leave_shared_node(root
, wc
, *level
);
1532 static int check_root_dir(struct inode_record
*rec
)
1534 struct inode_backref
*backref
;
1537 if (!rec
->found_inode_item
|| rec
->errors
)
1539 if (rec
->nlink
!= 1 || rec
->found_link
!= 0)
1541 if (list_empty(&rec
->backrefs
))
1543 backref
= list_entry(rec
->backrefs
.next
, struct inode_backref
, list
);
1544 if (!backref
->found_inode_ref
)
1546 if (backref
->index
!= 0 || backref
->namelen
!= 2 ||
1547 memcmp(backref
->name
, "..", 2))
1549 if (backref
->found_dir_index
|| backref
->found_dir_item
)
1556 static int repair_inode_isize(struct btrfs_trans_handle
*trans
,
1557 struct btrfs_root
*root
, struct btrfs_path
*path
,
1558 struct inode_record
*rec
)
1560 struct btrfs_inode_item
*ei
;
1561 struct btrfs_key key
;
1564 key
.objectid
= rec
->ino
;
1565 key
.type
= BTRFS_INODE_ITEM_KEY
;
1566 key
.offset
= (u64
)-1;
1568 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
1572 if (!path
->slots
[0]) {
1579 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1580 if (key
.objectid
!= rec
->ino
) {
1585 ei
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
1586 struct btrfs_inode_item
);
1587 btrfs_set_inode_size(path
->nodes
[0], ei
, rec
->found_size
);
1588 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1589 rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1590 printf("reset isize for dir %Lu root %Lu\n", rec
->ino
,
1591 root
->root_key
.objectid
);
1593 btrfs_release_path(path
);
1597 static int repair_inode_orphan_item(struct btrfs_trans_handle
*trans
,
1598 struct btrfs_root
*root
,
1599 struct btrfs_path
*path
,
1600 struct inode_record
*rec
)
1602 struct btrfs_key key
;
1605 key
.objectid
= BTRFS_ORPHAN_OBJECTID
;
1606 key
.type
= BTRFS_ORPHAN_ITEM_KEY
;
1607 key
.offset
= rec
->ino
;
1609 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
1610 btrfs_release_path(path
);
1612 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
1616 static int add_missing_dir_index(struct btrfs_root
*root
,
1617 struct cache_tree
*inode_cache
,
1618 struct inode_record
*rec
,
1619 struct inode_backref
*backref
)
1621 struct btrfs_path
*path
;
1622 struct btrfs_trans_handle
*trans
;
1623 struct btrfs_dir_item
*dir_item
;
1624 struct extent_buffer
*leaf
;
1625 struct btrfs_key key
;
1626 struct btrfs_disk_key disk_key
;
1627 struct inode_record
*dir_rec
;
1628 unsigned long name_ptr
;
1629 u32 data_size
= sizeof(*dir_item
) + backref
->namelen
;
1632 path
= btrfs_alloc_path();
1636 trans
= btrfs_start_transaction(root
, 1);
1637 if (IS_ERR(trans
)) {
1638 btrfs_free_path(path
);
1639 return PTR_ERR(trans
);
1642 fprintf(stderr
, "repairing missing dir index item for inode %llu\n",
1643 (unsigned long long)rec
->ino
);
1644 key
.objectid
= backref
->dir
;
1645 key
.type
= BTRFS_DIR_INDEX_KEY
;
1646 key
.offset
= backref
->index
;
1648 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, data_size
);
1651 leaf
= path
->nodes
[0];
1652 dir_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dir_item
);
1654 disk_key
.objectid
= cpu_to_le64(rec
->ino
);
1655 disk_key
.type
= BTRFS_INODE_ITEM_KEY
;
1656 disk_key
.offset
= 0;
1658 btrfs_set_dir_item_key(leaf
, dir_item
, &disk_key
);
1659 btrfs_set_dir_type(leaf
, dir_item
, imode_to_type(rec
->imode
));
1660 btrfs_set_dir_data_len(leaf
, dir_item
, 0);
1661 btrfs_set_dir_name_len(leaf
, dir_item
, backref
->namelen
);
1662 name_ptr
= (unsigned long)(dir_item
+ 1);
1663 write_extent_buffer(leaf
, backref
->name
, name_ptr
, backref
->namelen
);
1664 btrfs_mark_buffer_dirty(leaf
);
1665 btrfs_free_path(path
);
1666 btrfs_commit_transaction(trans
, root
);
1668 backref
->found_dir_index
= 1;
1669 dir_rec
= get_inode_rec(inode_cache
, backref
->dir
, 0);
1672 dir_rec
->found_size
+= backref
->namelen
;
1673 if (dir_rec
->found_size
== dir_rec
->isize
&&
1674 (dir_rec
->errors
& I_ERR_DIR_ISIZE_WRONG
))
1675 dir_rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1676 if (dir_rec
->found_size
!= dir_rec
->isize
)
1677 dir_rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
1682 static int delete_dir_index(struct btrfs_root
*root
,
1683 struct cache_tree
*inode_cache
,
1684 struct inode_record
*rec
,
1685 struct inode_backref
*backref
)
1687 struct btrfs_trans_handle
*trans
;
1688 struct btrfs_dir_item
*di
;
1689 struct btrfs_path
*path
;
1692 path
= btrfs_alloc_path();
1696 trans
= btrfs_start_transaction(root
, 1);
1697 if (IS_ERR(trans
)) {
1698 btrfs_free_path(path
);
1699 return PTR_ERR(trans
);
1703 fprintf(stderr
, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1704 (unsigned long long)backref
->dir
,
1705 BTRFS_DIR_INDEX_KEY
, (unsigned long long)backref
->index
,
1706 (unsigned long long)root
->objectid
);
1708 di
= btrfs_lookup_dir_index(trans
, root
, path
, backref
->dir
,
1709 backref
->name
, backref
->namelen
,
1710 backref
->index
, -1);
1713 btrfs_free_path(path
);
1714 btrfs_commit_transaction(trans
, root
);
1721 ret
= btrfs_del_item(trans
, root
, path
);
1723 ret
= btrfs_delete_one_dir_name(trans
, root
, path
, di
);
1725 btrfs_free_path(path
);
1726 btrfs_commit_transaction(trans
, root
);
1730 static int create_inode_item(struct btrfs_root
*root
,
1731 struct inode_record
*rec
,
1732 struct inode_backref
*backref
, int root_dir
)
1734 struct btrfs_trans_handle
*trans
;
1735 struct btrfs_inode_item inode_item
;
1736 time_t now
= time(NULL
);
1739 trans
= btrfs_start_transaction(root
, 1);
1740 if (IS_ERR(trans
)) {
1741 ret
= PTR_ERR(trans
);
1745 fprintf(stderr
, "root %llu inode %llu recreating inode item, this may "
1746 "be incomplete, please check permissions and content after "
1747 "the fsck completes.\n", (unsigned long long)root
->objectid
,
1748 (unsigned long long)rec
->ino
);
1750 memset(&inode_item
, 0, sizeof(inode_item
));
1751 btrfs_set_stack_inode_generation(&inode_item
, trans
->transid
);
1753 btrfs_set_stack_inode_nlink(&inode_item
, 1);
1755 btrfs_set_stack_inode_nlink(&inode_item
, rec
->found_link
);
1756 btrfs_set_stack_inode_nbytes(&inode_item
, rec
->found_size
);
1757 if (rec
->found_dir_item
) {
1758 if (rec
->found_file_extent
)
1759 fprintf(stderr
, "root %llu inode %llu has both a dir "
1760 "item and extents, unsure if it is a dir or a "
1761 "regular file so setting it as a directory\n",
1762 (unsigned long long)root
->objectid
,
1763 (unsigned long long)rec
->ino
);
1764 btrfs_set_stack_inode_mode(&inode_item
, S_IFDIR
| 0755);
1765 btrfs_set_stack_inode_size(&inode_item
, rec
->found_size
);
1766 } else if (!rec
->found_dir_item
) {
1767 btrfs_set_stack_inode_size(&inode_item
, rec
->extent_end
);
1768 btrfs_set_stack_inode_mode(&inode_item
, S_IFREG
| 0755);
1770 btrfs_set_stack_timespec_sec(&inode_item
.atime
, now
);
1771 btrfs_set_stack_timespec_nsec(&inode_item
.atime
, 0);
1772 btrfs_set_stack_timespec_sec(&inode_item
.ctime
, now
);
1773 btrfs_set_stack_timespec_nsec(&inode_item
.ctime
, 0);
1774 btrfs_set_stack_timespec_sec(&inode_item
.mtime
, now
);
1775 btrfs_set_stack_timespec_nsec(&inode_item
.mtime
, 0);
1776 btrfs_set_stack_timespec_sec(&inode_item
.otime
, 0);
1777 btrfs_set_stack_timespec_nsec(&inode_item
.otime
, 0);
1779 ret
= btrfs_insert_inode(trans
, root
, rec
->ino
, &inode_item
);
1781 btrfs_commit_transaction(trans
, root
);
1785 static int repair_inode_backrefs(struct btrfs_root
*root
,
1786 struct inode_record
*rec
,
1787 struct cache_tree
*inode_cache
,
1790 struct inode_backref
*tmp
, *backref
;
1791 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
1795 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
1796 if (!delete && rec
->ino
== root_dirid
) {
1797 if (!rec
->found_inode_item
) {
1798 ret
= create_inode_item(root
, rec
, backref
, 1);
1805 /* Index 0 for root dir's are special, don't mess with it */
1806 if (rec
->ino
== root_dirid
&& backref
->index
== 0)
1810 ((backref
->found_dir_index
&& !backref
->found_inode_ref
) ||
1811 (backref
->found_dir_index
&& backref
->found_inode_ref
&&
1812 (backref
->errors
& REF_ERR_INDEX_UNMATCH
)))) {
1813 ret
= delete_dir_index(root
, inode_cache
, rec
, backref
);
1817 list_del(&backref
->list
);
1821 if (!delete && !backref
->found_dir_index
&&
1822 backref
->found_dir_item
&& backref
->found_inode_ref
) {
1823 ret
= add_missing_dir_index(root
, inode_cache
, rec
,
1828 if (backref
->found_dir_item
&&
1829 backref
->found_dir_index
&&
1830 backref
->found_dir_index
) {
1831 if (!backref
->errors
&&
1832 backref
->found_inode_ref
) {
1833 list_del(&backref
->list
);
1839 if (!delete && (!backref
->found_dir_index
&&
1840 !backref
->found_dir_item
&&
1841 backref
->found_inode_ref
)) {
1842 struct btrfs_trans_handle
*trans
;
1843 struct btrfs_key location
;
1845 location
.objectid
= rec
->ino
;
1846 location
.type
= BTRFS_INODE_ITEM_KEY
;
1847 location
.offset
= 0;
1849 trans
= btrfs_start_transaction(root
, 1);
1850 if (IS_ERR(trans
)) {
1851 ret
= PTR_ERR(trans
);
1854 fprintf(stderr
, "adding missing dir index/item pair "
1856 (unsigned long long)rec
->ino
);
1857 ret
= btrfs_insert_dir_item(trans
, root
, backref
->name
,
1859 backref
->dir
, &location
,
1860 imode_to_type(rec
->imode
),
1863 btrfs_commit_transaction(trans
, root
);
1867 if (!delete && (backref
->found_inode_ref
&&
1868 backref
->found_dir_index
&&
1869 backref
->found_dir_item
&&
1870 !(backref
->errors
& REF_ERR_INDEX_UNMATCH
) &&
1871 !rec
->found_inode_item
)) {
1872 ret
= create_inode_item(root
, rec
, backref
, 0);
1879 return ret
? ret
: repaired
;
1882 static int try_repair_inode(struct btrfs_root
*root
, struct inode_record
*rec
)
1884 struct btrfs_trans_handle
*trans
;
1885 struct btrfs_path
*path
;
1888 if (!(rec
->errors
& (I_ERR_DIR_ISIZE_WRONG
| I_ERR_NO_ORPHAN_ITEM
)))
1891 path
= btrfs_alloc_path();
1895 trans
= btrfs_start_transaction(root
, 1);
1896 if (IS_ERR(trans
)) {
1897 btrfs_free_path(path
);
1898 return PTR_ERR(trans
);
1901 if (rec
->errors
& I_ERR_DIR_ISIZE_WRONG
)
1902 ret
= repair_inode_isize(trans
, root
, path
, rec
);
1903 if (!ret
&& rec
->errors
& I_ERR_NO_ORPHAN_ITEM
)
1904 ret
= repair_inode_orphan_item(trans
, root
, path
, rec
);
1905 btrfs_commit_transaction(trans
, root
);
1906 btrfs_free_path(path
);
1910 static int check_inode_recs(struct btrfs_root
*root
,
1911 struct cache_tree
*inode_cache
)
1913 struct cache_extent
*cache
;
1914 struct ptr_node
*node
;
1915 struct inode_record
*rec
;
1916 struct inode_backref
*backref
;
1921 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
1923 if (btrfs_root_refs(&root
->root_item
) == 0) {
1924 if (!cache_tree_empty(inode_cache
))
1925 fprintf(stderr
, "warning line %d\n", __LINE__
);
1930 * We need to repair backrefs first because we could change some of the
1931 * errors in the inode recs.
1933 * We also need to go through and delete invalid backrefs first and then
1934 * add the correct ones second. We do this because we may get EEXIST
1935 * when adding back the correct index because we hadn't yet deleted the
1938 * For example, if we were missing a dir index then the directories
1939 * isize would be wrong, so if we fixed the isize to what we thought it
1940 * would be and then fixed the backref we'd still have a invalid fs, so
1941 * we need to add back the dir index and then check to see if the isize
1946 if (stage
== 3 && !err
)
1949 cache
= search_cache_extent(inode_cache
, 0);
1950 while (repair
&& cache
) {
1951 node
= container_of(cache
, struct ptr_node
, cache
);
1953 cache
= next_cache_extent(cache
);
1955 /* Need to free everything up and rescan */
1957 remove_cache_extent(inode_cache
, &node
->cache
);
1959 free_inode_rec(rec
);
1963 if (list_empty(&rec
->backrefs
))
1966 ret
= repair_inode_backrefs(root
, rec
, inode_cache
,
1980 rec
= get_inode_rec(inode_cache
, root_dirid
, 0);
1982 ret
= check_root_dir(rec
);
1984 fprintf(stderr
, "root %llu root dir %llu error\n",
1985 (unsigned long long)root
->root_key
.objectid
,
1986 (unsigned long long)root_dirid
);
1987 print_inode_error(root
, rec
);
1992 struct btrfs_trans_handle
*trans
;
1994 trans
= btrfs_start_transaction(root
, 1);
1995 if (IS_ERR(trans
)) {
1996 err
= PTR_ERR(trans
);
2001 "root %llu missing its root dir, recreating\n",
2002 (unsigned long long)root
->objectid
);
2004 ret
= btrfs_make_root_dir(trans
, root
, root_dirid
);
2007 btrfs_commit_transaction(trans
, root
);
2011 fprintf(stderr
, "root %llu root dir %llu not found\n",
2012 (unsigned long long)root
->root_key
.objectid
,
2013 (unsigned long long)root_dirid
);
2017 cache
= search_cache_extent(inode_cache
, 0);
2020 node
= container_of(cache
, struct ptr_node
, cache
);
2022 remove_cache_extent(inode_cache
, &node
->cache
);
2024 if (rec
->ino
== root_dirid
||
2025 rec
->ino
== BTRFS_ORPHAN_OBJECTID
) {
2026 free_inode_rec(rec
);
2030 if (rec
->errors
& I_ERR_NO_ORPHAN_ITEM
) {
2031 ret
= check_orphan_item(root
, rec
->ino
);
2033 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
2034 if (can_free_inode_rec(rec
)) {
2035 free_inode_rec(rec
);
2041 ret
= try_repair_inode(root
, rec
);
2042 if (ret
== 0 && can_free_inode_rec(rec
)) {
2043 free_inode_rec(rec
);
2050 if (!rec
->found_inode_item
)
2051 rec
->errors
|= I_ERR_NO_INODE_ITEM
;
2052 if (rec
->found_link
!= rec
->nlink
)
2053 rec
->errors
|= I_ERR_LINK_COUNT_WRONG
;
2054 print_inode_error(root
, rec
);
2055 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2056 if (!backref
->found_dir_item
)
2057 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
2058 if (!backref
->found_dir_index
)
2059 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
2060 if (!backref
->found_inode_ref
)
2061 backref
->errors
|= REF_ERR_NO_INODE_REF
;
2062 fprintf(stderr
, "\tunresolved ref dir %llu index %llu"
2063 " namelen %u name %s filetype %d errors %x",
2064 (unsigned long long)backref
->dir
,
2065 (unsigned long long)backref
->index
,
2066 backref
->namelen
, backref
->name
,
2067 backref
->filetype
, backref
->errors
);
2068 print_ref_error(backref
->errors
);
2070 free_inode_rec(rec
);
2072 return (error
> 0) ? -1 : 0;
2075 static struct root_record
*get_root_rec(struct cache_tree
*root_cache
,
2078 struct cache_extent
*cache
;
2079 struct root_record
*rec
= NULL
;
2082 cache
= lookup_cache_extent(root_cache
, objectid
, 1);
2084 rec
= container_of(cache
, struct root_record
, cache
);
2086 rec
= calloc(1, sizeof(*rec
));
2087 rec
->objectid
= objectid
;
2088 INIT_LIST_HEAD(&rec
->backrefs
);
2089 rec
->cache
.start
= objectid
;
2090 rec
->cache
.size
= 1;
2092 ret
= insert_cache_extent(root_cache
, &rec
->cache
);
2098 static struct root_backref
*get_root_backref(struct root_record
*rec
,
2099 u64 ref_root
, u64 dir
, u64 index
,
2100 const char *name
, int namelen
)
2102 struct root_backref
*backref
;
2104 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2105 if (backref
->ref_root
!= ref_root
|| backref
->dir
!= dir
||
2106 backref
->namelen
!= namelen
)
2108 if (memcmp(name
, backref
->name
, namelen
))
2113 backref
= malloc(sizeof(*backref
) + namelen
+ 1);
2114 memset(backref
, 0, sizeof(*backref
));
2115 backref
->ref_root
= ref_root
;
2117 backref
->index
= index
;
2118 backref
->namelen
= namelen
;
2119 memcpy(backref
->name
, name
, namelen
);
2120 backref
->name
[namelen
] = '\0';
2121 list_add_tail(&backref
->list
, &rec
->backrefs
);
2125 static void free_root_record(struct cache_extent
*cache
)
2127 struct root_record
*rec
;
2128 struct root_backref
*backref
;
2130 rec
= container_of(cache
, struct root_record
, cache
);
2131 while (!list_empty(&rec
->backrefs
)) {
2132 backref
= list_entry(rec
->backrefs
.next
,
2133 struct root_backref
, list
);
2134 list_del(&backref
->list
);
2141 FREE_EXTENT_CACHE_BASED_TREE(root_recs
, free_root_record
);
2143 static int add_root_backref(struct cache_tree
*root_cache
,
2144 u64 root_id
, u64 ref_root
, u64 dir
, u64 index
,
2145 const char *name
, int namelen
,
2146 int item_type
, int errors
)
2148 struct root_record
*rec
;
2149 struct root_backref
*backref
;
2151 rec
= get_root_rec(root_cache
, root_id
);
2152 backref
= get_root_backref(rec
, ref_root
, dir
, index
, name
, namelen
);
2154 backref
->errors
|= errors
;
2156 if (item_type
!= BTRFS_DIR_ITEM_KEY
) {
2157 if (backref
->found_dir_index
|| backref
->found_back_ref
||
2158 backref
->found_forward_ref
) {
2159 if (backref
->index
!= index
)
2160 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
2162 backref
->index
= index
;
2166 if (item_type
== BTRFS_DIR_ITEM_KEY
) {
2167 if (backref
->found_forward_ref
)
2169 backref
->found_dir_item
= 1;
2170 } else if (item_type
== BTRFS_DIR_INDEX_KEY
) {
2171 backref
->found_dir_index
= 1;
2172 } else if (item_type
== BTRFS_ROOT_REF_KEY
) {
2173 if (backref
->found_forward_ref
)
2174 backref
->errors
|= REF_ERR_DUP_ROOT_REF
;
2175 else if (backref
->found_dir_item
)
2177 backref
->found_forward_ref
= 1;
2178 } else if (item_type
== BTRFS_ROOT_BACKREF_KEY
) {
2179 if (backref
->found_back_ref
)
2180 backref
->errors
|= REF_ERR_DUP_ROOT_BACKREF
;
2181 backref
->found_back_ref
= 1;
2186 if (backref
->found_forward_ref
&& backref
->found_dir_item
)
2187 backref
->reachable
= 1;
2191 static int merge_root_recs(struct btrfs_root
*root
,
2192 struct cache_tree
*src_cache
,
2193 struct cache_tree
*dst_cache
)
2195 struct cache_extent
*cache
;
2196 struct ptr_node
*node
;
2197 struct inode_record
*rec
;
2198 struct inode_backref
*backref
;
2201 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
2202 free_inode_recs_tree(src_cache
);
2207 cache
= search_cache_extent(src_cache
, 0);
2210 node
= container_of(cache
, struct ptr_node
, cache
);
2212 remove_cache_extent(src_cache
, &node
->cache
);
2215 ret
= is_child_root(root
, root
->objectid
, rec
->ino
);
2221 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2222 BUG_ON(backref
->found_inode_ref
);
2223 if (backref
->found_dir_item
)
2224 add_root_backref(dst_cache
, rec
->ino
,
2225 root
->root_key
.objectid
, backref
->dir
,
2226 backref
->index
, backref
->name
,
2227 backref
->namelen
, BTRFS_DIR_ITEM_KEY
,
2229 if (backref
->found_dir_index
)
2230 add_root_backref(dst_cache
, rec
->ino
,
2231 root
->root_key
.objectid
, backref
->dir
,
2232 backref
->index
, backref
->name
,
2233 backref
->namelen
, BTRFS_DIR_INDEX_KEY
,
2237 free_inode_rec(rec
);
2244 static int check_root_refs(struct btrfs_root
*root
,
2245 struct cache_tree
*root_cache
)
2247 struct root_record
*rec
;
2248 struct root_record
*ref_root
;
2249 struct root_backref
*backref
;
2250 struct cache_extent
*cache
;
2256 rec
= get_root_rec(root_cache
, BTRFS_FS_TREE_OBJECTID
);
2259 /* fixme: this can not detect circular references */
2262 cache
= search_cache_extent(root_cache
, 0);
2266 rec
= container_of(cache
, struct root_record
, cache
);
2267 cache
= next_cache_extent(cache
);
2269 if (rec
->found_ref
== 0)
2272 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2273 if (!backref
->reachable
)
2276 ref_root
= get_root_rec(root_cache
,
2278 if (ref_root
->found_ref
> 0)
2281 backref
->reachable
= 0;
2283 if (rec
->found_ref
== 0)
2289 cache
= search_cache_extent(root_cache
, 0);
2293 rec
= container_of(cache
, struct root_record
, cache
);
2294 cache
= next_cache_extent(cache
);
2296 if (rec
->found_ref
== 0 &&
2297 rec
->objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
2298 rec
->objectid
<= BTRFS_LAST_FREE_OBJECTID
) {
2299 ret
= check_orphan_item(root
->fs_info
->tree_root
,
2305 * If we don't have a root item then we likely just have
2306 * a dir item in a snapshot for this root but no actual
2307 * ref key or anything so it's meaningless.
2309 if (!rec
->found_root_item
)
2312 fprintf(stderr
, "fs tree %llu not referenced\n",
2313 (unsigned long long)rec
->objectid
);
2317 if (rec
->found_ref
> 0 && !rec
->found_root_item
)
2319 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2320 if (!backref
->found_dir_item
)
2321 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
2322 if (!backref
->found_dir_index
)
2323 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
2324 if (!backref
->found_back_ref
)
2325 backref
->errors
|= REF_ERR_NO_ROOT_BACKREF
;
2326 if (!backref
->found_forward_ref
)
2327 backref
->errors
|= REF_ERR_NO_ROOT_REF
;
2328 if (backref
->reachable
&& backref
->errors
)
2335 fprintf(stderr
, "fs tree %llu refs %u %s\n",
2336 (unsigned long long)rec
->objectid
, rec
->found_ref
,
2337 rec
->found_root_item
? "" : "not found");
2339 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2340 if (!backref
->reachable
)
2342 if (!backref
->errors
&& rec
->found_root_item
)
2344 fprintf(stderr
, "\tunresolved ref root %llu dir %llu"
2345 " index %llu namelen %u name %s errors %x\n",
2346 (unsigned long long)backref
->ref_root
,
2347 (unsigned long long)backref
->dir
,
2348 (unsigned long long)backref
->index
,
2349 backref
->namelen
, backref
->name
,
2351 print_ref_error(backref
->errors
);
2354 return errors
> 0 ? 1 : 0;
2357 static int process_root_ref(struct extent_buffer
*eb
, int slot
,
2358 struct btrfs_key
*key
,
2359 struct cache_tree
*root_cache
)
2365 struct btrfs_root_ref
*ref
;
2366 char namebuf
[BTRFS_NAME_LEN
];
2369 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_root_ref
);
2371 dirid
= btrfs_root_ref_dirid(eb
, ref
);
2372 index
= btrfs_root_ref_sequence(eb
, ref
);
2373 name_len
= btrfs_root_ref_name_len(eb
, ref
);
2375 if (name_len
<= BTRFS_NAME_LEN
) {
2379 len
= BTRFS_NAME_LEN
;
2380 error
= REF_ERR_NAME_TOO_LONG
;
2382 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
2384 if (key
->type
== BTRFS_ROOT_REF_KEY
) {
2385 add_root_backref(root_cache
, key
->offset
, key
->objectid
, dirid
,
2386 index
, namebuf
, len
, key
->type
, error
);
2388 add_root_backref(root_cache
, key
->objectid
, key
->offset
, dirid
,
2389 index
, namebuf
, len
, key
->type
, error
);
2394 static int check_fs_root(struct btrfs_root
*root
,
2395 struct cache_tree
*root_cache
,
2396 struct walk_control
*wc
)
2402 struct btrfs_path path
;
2403 struct shared_node root_node
;
2404 struct root_record
*rec
;
2405 struct btrfs_root_item
*root_item
= &root
->root_item
;
2406 enum btrfs_tree_block_status status
;
2408 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
2409 rec
= get_root_rec(root_cache
, root
->root_key
.objectid
);
2410 if (btrfs_root_refs(root_item
) > 0)
2411 rec
->found_root_item
= 1;
2414 btrfs_init_path(&path
);
2415 memset(&root_node
, 0, sizeof(root_node
));
2416 cache_tree_init(&root_node
.root_cache
);
2417 cache_tree_init(&root_node
.inode_cache
);
2419 level
= btrfs_header_level(root
->node
);
2420 memset(wc
->nodes
, 0, sizeof(wc
->nodes
));
2421 wc
->nodes
[level
] = &root_node
;
2422 wc
->active_node
= level
;
2423 wc
->root_level
= level
;
2425 /* We may not have checked the root block, lets do that now */
2426 if (btrfs_is_leaf(root
->node
))
2427 status
= btrfs_check_leaf(root
, NULL
, root
->node
);
2429 status
= btrfs_check_node(root
, NULL
, root
->node
);
2430 if (status
!= BTRFS_TREE_BLOCK_CLEAN
)
2433 if (btrfs_root_refs(root_item
) > 0 ||
2434 btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
2435 path
.nodes
[level
] = root
->node
;
2436 extent_buffer_get(root
->node
);
2437 path
.slots
[level
] = 0;
2439 struct btrfs_key key
;
2440 struct btrfs_disk_key found_key
;
2442 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
2443 level
= root_item
->drop_level
;
2444 path
.lowest_level
= level
;
2445 wret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
2448 btrfs_node_key(path
.nodes
[level
], &found_key
,
2450 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
2451 sizeof(found_key
)));
2455 wret
= walk_down_tree(root
, &path
, wc
, &level
);
2461 wret
= walk_up_tree(root
, &path
, wc
, &level
);
2468 btrfs_release_path(&path
);
2470 err
= merge_root_recs(root
, &root_node
.root_cache
, root_cache
);
2474 if (root_node
.current
) {
2475 root_node
.current
->checked
= 1;
2476 maybe_free_inode_rec(&root_node
.inode_cache
,
2480 err
= check_inode_recs(root
, &root_node
.inode_cache
);
2486 static int fs_root_objectid(u64 objectid
)
2488 if (objectid
== BTRFS_TREE_RELOC_OBJECTID
||
2489 objectid
== BTRFS_DATA_RELOC_TREE_OBJECTID
)
2491 return is_fstree(objectid
);
2494 static int check_fs_roots(struct btrfs_root
*root
,
2495 struct cache_tree
*root_cache
)
2497 struct btrfs_path path
;
2498 struct btrfs_key key
;
2499 struct walk_control wc
;
2500 struct extent_buffer
*leaf
, *tree_node
;
2501 struct btrfs_root
*tmp_root
;
2502 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
2507 * Just in case we made any changes to the extent tree that weren't
2508 * reflected into the free space cache yet.
2511 reset_cached_block_groups(root
->fs_info
);
2512 memset(&wc
, 0, sizeof(wc
));
2513 cache_tree_init(&wc
.shared
);
2514 btrfs_init_path(&path
);
2519 key
.type
= BTRFS_ROOT_ITEM_KEY
;
2520 ret
= btrfs_search_slot(NULL
, tree_root
, &key
, &path
, 0, 0);
2525 tree_node
= tree_root
->node
;
2527 if (tree_node
!= tree_root
->node
) {
2528 free_root_recs_tree(root_cache
);
2529 btrfs_release_path(&path
);
2532 leaf
= path
.nodes
[0];
2533 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
2534 ret
= btrfs_next_leaf(tree_root
, &path
);
2540 leaf
= path
.nodes
[0];
2542 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
2543 if (key
.type
== BTRFS_ROOT_ITEM_KEY
&&
2544 fs_root_objectid(key
.objectid
)) {
2545 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
2546 tmp_root
= btrfs_read_fs_root_no_cache(
2547 root
->fs_info
, &key
);
2549 key
.offset
= (u64
)-1;
2550 tmp_root
= btrfs_read_fs_root(
2551 root
->fs_info
, &key
);
2553 if (IS_ERR(tmp_root
)) {
2557 ret
= check_fs_root(tmp_root
, root_cache
, &wc
);
2558 if (ret
== -EAGAIN
) {
2559 free_root_recs_tree(root_cache
);
2560 btrfs_release_path(&path
);
2565 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
2566 btrfs_free_fs_root(tmp_root
);
2567 } else if (key
.type
== BTRFS_ROOT_REF_KEY
||
2568 key
.type
== BTRFS_ROOT_BACKREF_KEY
) {
2569 process_root_ref(leaf
, path
.slots
[0], &key
,
2576 btrfs_release_path(&path
);
2578 free_extent_cache_tree(&wc
.shared
);
2579 if (!cache_tree_empty(&wc
.shared
))
2580 fprintf(stderr
, "warning line %d\n", __LINE__
);
2585 static int all_backpointers_checked(struct extent_record
*rec
, int print_errs
)
2587 struct list_head
*cur
= rec
->backrefs
.next
;
2588 struct extent_backref
*back
;
2589 struct tree_backref
*tback
;
2590 struct data_backref
*dback
;
2594 while(cur
!= &rec
->backrefs
) {
2595 back
= list_entry(cur
, struct extent_backref
, list
);
2597 if (!back
->found_extent_tree
) {
2601 if (back
->is_data
) {
2602 dback
= (struct data_backref
*)back
;
2603 fprintf(stderr
, "Backref %llu %s %llu"
2604 " owner %llu offset %llu num_refs %lu"
2605 " not found in extent tree\n",
2606 (unsigned long long)rec
->start
,
2607 back
->full_backref
?
2609 back
->full_backref
?
2610 (unsigned long long)dback
->parent
:
2611 (unsigned long long)dback
->root
,
2612 (unsigned long long)dback
->owner
,
2613 (unsigned long long)dback
->offset
,
2614 (unsigned long)dback
->num_refs
);
2616 tback
= (struct tree_backref
*)back
;
2617 fprintf(stderr
, "Backref %llu parent %llu"
2618 " root %llu not found in extent tree\n",
2619 (unsigned long long)rec
->start
,
2620 (unsigned long long)tback
->parent
,
2621 (unsigned long long)tback
->root
);
2624 if (!back
->is_data
&& !back
->found_ref
) {
2628 tback
= (struct tree_backref
*)back
;
2629 fprintf(stderr
, "Backref %llu %s %llu not referenced back %p\n",
2630 (unsigned long long)rec
->start
,
2631 back
->full_backref
? "parent" : "root",
2632 back
->full_backref
?
2633 (unsigned long long)tback
->parent
:
2634 (unsigned long long)tback
->root
, back
);
2636 if (back
->is_data
) {
2637 dback
= (struct data_backref
*)back
;
2638 if (dback
->found_ref
!= dback
->num_refs
) {
2642 fprintf(stderr
, "Incorrect local backref count"
2643 " on %llu %s %llu owner %llu"
2644 " offset %llu found %u wanted %u back %p\n",
2645 (unsigned long long)rec
->start
,
2646 back
->full_backref
?
2648 back
->full_backref
?
2649 (unsigned long long)dback
->parent
:
2650 (unsigned long long)dback
->root
,
2651 (unsigned long long)dback
->owner
,
2652 (unsigned long long)dback
->offset
,
2653 dback
->found_ref
, dback
->num_refs
, back
);
2655 if (dback
->disk_bytenr
!= rec
->start
) {
2659 fprintf(stderr
, "Backref disk bytenr does not"
2660 " match extent record, bytenr=%llu, "
2661 "ref bytenr=%llu\n",
2662 (unsigned long long)rec
->start
,
2663 (unsigned long long)dback
->disk_bytenr
);
2666 if (dback
->bytes
!= rec
->nr
) {
2670 fprintf(stderr
, "Backref bytes do not match "
2671 "extent backref, bytenr=%llu, ref "
2672 "bytes=%llu, backref bytes=%llu\n",
2673 (unsigned long long)rec
->start
,
2674 (unsigned long long)rec
->nr
,
2675 (unsigned long long)dback
->bytes
);
2678 if (!back
->is_data
) {
2681 dback
= (struct data_backref
*)back
;
2682 found
+= dback
->found_ref
;
2685 if (found
!= rec
->refs
) {
2689 fprintf(stderr
, "Incorrect global backref count "
2690 "on %llu found %llu wanted %llu\n",
2691 (unsigned long long)rec
->start
,
2692 (unsigned long long)found
,
2693 (unsigned long long)rec
->refs
);
2699 static int free_all_extent_backrefs(struct extent_record
*rec
)
2701 struct extent_backref
*back
;
2702 struct list_head
*cur
;
2703 while (!list_empty(&rec
->backrefs
)) {
2704 cur
= rec
->backrefs
.next
;
2705 back
= list_entry(cur
, struct extent_backref
, list
);
2712 static void free_extent_record_cache(struct btrfs_fs_info
*fs_info
,
2713 struct cache_tree
*extent_cache
)
2715 struct cache_extent
*cache
;
2716 struct extent_record
*rec
;
2719 cache
= first_cache_extent(extent_cache
);
2722 rec
= container_of(cache
, struct extent_record
, cache
);
2723 btrfs_unpin_extent(fs_info
, rec
->start
, rec
->max_size
);
2724 remove_cache_extent(extent_cache
, cache
);
2725 free_all_extent_backrefs(rec
);
2730 static int maybe_free_extent_rec(struct cache_tree
*extent_cache
,
2731 struct extent_record
*rec
)
2733 if (rec
->content_checked
&& rec
->owner_ref_checked
&&
2734 rec
->extent_item_refs
== rec
->refs
&& rec
->refs
> 0 &&
2735 rec
->num_duplicates
== 0 && !all_backpointers_checked(rec
, 0)) {
2736 remove_cache_extent(extent_cache
, &rec
->cache
);
2737 free_all_extent_backrefs(rec
);
2738 list_del_init(&rec
->list
);
2744 static int check_owner_ref(struct btrfs_root
*root
,
2745 struct extent_record
*rec
,
2746 struct extent_buffer
*buf
)
2748 struct extent_backref
*node
;
2749 struct tree_backref
*back
;
2750 struct btrfs_root
*ref_root
;
2751 struct btrfs_key key
;
2752 struct btrfs_path path
;
2753 struct extent_buffer
*parent
;
2758 list_for_each_entry(node
, &rec
->backrefs
, list
) {
2761 if (!node
->found_ref
)
2763 if (node
->full_backref
)
2765 back
= (struct tree_backref
*)node
;
2766 if (btrfs_header_owner(buf
) == back
->root
)
2769 BUG_ON(rec
->is_root
);
2771 /* try to find the block by search corresponding fs tree */
2772 key
.objectid
= btrfs_header_owner(buf
);
2773 key
.type
= BTRFS_ROOT_ITEM_KEY
;
2774 key
.offset
= (u64
)-1;
2776 ref_root
= btrfs_read_fs_root(root
->fs_info
, &key
);
2777 if (IS_ERR(ref_root
))
2780 level
= btrfs_header_level(buf
);
2782 btrfs_item_key_to_cpu(buf
, &key
, 0);
2784 btrfs_node_key_to_cpu(buf
, &key
, 0);
2786 btrfs_init_path(&path
);
2787 path
.lowest_level
= level
+ 1;
2788 ret
= btrfs_search_slot(NULL
, ref_root
, &key
, &path
, 0, 0);
2792 parent
= path
.nodes
[level
+ 1];
2793 if (parent
&& buf
->start
== btrfs_node_blockptr(parent
,
2794 path
.slots
[level
+ 1]))
2797 btrfs_release_path(&path
);
2798 return found
? 0 : 1;
2801 static int is_extent_tree_record(struct extent_record
*rec
)
2803 struct list_head
*cur
= rec
->backrefs
.next
;
2804 struct extent_backref
*node
;
2805 struct tree_backref
*back
;
2808 while(cur
!= &rec
->backrefs
) {
2809 node
= list_entry(cur
, struct extent_backref
, list
);
2813 back
= (struct tree_backref
*)node
;
2814 if (node
->full_backref
)
2816 if (back
->root
== BTRFS_EXTENT_TREE_OBJECTID
)
2823 static int record_bad_block_io(struct btrfs_fs_info
*info
,
2824 struct cache_tree
*extent_cache
,
2827 struct extent_record
*rec
;
2828 struct cache_extent
*cache
;
2829 struct btrfs_key key
;
2831 cache
= lookup_cache_extent(extent_cache
, start
, len
);
2835 rec
= container_of(cache
, struct extent_record
, cache
);
2836 if (!is_extent_tree_record(rec
))
2839 btrfs_disk_key_to_cpu(&key
, &rec
->parent_key
);
2840 return btrfs_add_corrupt_extent_record(info
, &key
, start
, len
, 0);
2843 static int swap_values(struct btrfs_root
*root
, struct btrfs_path
*path
,
2844 struct extent_buffer
*buf
, int slot
)
2846 if (btrfs_header_level(buf
)) {
2847 struct btrfs_key_ptr ptr1
, ptr2
;
2849 read_extent_buffer(buf
, &ptr1
, btrfs_node_key_ptr_offset(slot
),
2850 sizeof(struct btrfs_key_ptr
));
2851 read_extent_buffer(buf
, &ptr2
,
2852 btrfs_node_key_ptr_offset(slot
+ 1),
2853 sizeof(struct btrfs_key_ptr
));
2854 write_extent_buffer(buf
, &ptr1
,
2855 btrfs_node_key_ptr_offset(slot
+ 1),
2856 sizeof(struct btrfs_key_ptr
));
2857 write_extent_buffer(buf
, &ptr2
,
2858 btrfs_node_key_ptr_offset(slot
),
2859 sizeof(struct btrfs_key_ptr
));
2861 struct btrfs_disk_key key
;
2862 btrfs_node_key(buf
, &key
, 0);
2863 btrfs_fixup_low_keys(root
, path
, &key
,
2864 btrfs_header_level(buf
) + 1);
2867 struct btrfs_item
*item1
, *item2
;
2868 struct btrfs_key k1
, k2
;
2869 char *item1_data
, *item2_data
;
2870 u32 item1_offset
, item2_offset
, item1_size
, item2_size
;
2872 item1
= btrfs_item_nr(slot
);
2873 item2
= btrfs_item_nr(slot
+ 1);
2874 btrfs_item_key_to_cpu(buf
, &k1
, slot
);
2875 btrfs_item_key_to_cpu(buf
, &k2
, slot
+ 1);
2876 item1_offset
= btrfs_item_offset(buf
, item1
);
2877 item2_offset
= btrfs_item_offset(buf
, item2
);
2878 item1_size
= btrfs_item_size(buf
, item1
);
2879 item2_size
= btrfs_item_size(buf
, item2
);
2881 item1_data
= malloc(item1_size
);
2884 item2_data
= malloc(item2_size
);
2890 read_extent_buffer(buf
, item1_data
, item1_offset
, item1_size
);
2891 read_extent_buffer(buf
, item2_data
, item2_offset
, item2_size
);
2893 write_extent_buffer(buf
, item1_data
, item2_offset
, item2_size
);
2894 write_extent_buffer(buf
, item2_data
, item1_offset
, item1_size
);
2898 btrfs_set_item_offset(buf
, item1
, item2_offset
);
2899 btrfs_set_item_offset(buf
, item2
, item1_offset
);
2900 btrfs_set_item_size(buf
, item1
, item2_size
);
2901 btrfs_set_item_size(buf
, item2
, item1_size
);
2903 path
->slots
[0] = slot
;
2904 btrfs_set_item_key_unsafe(root
, path
, &k2
);
2905 path
->slots
[0] = slot
+ 1;
2906 btrfs_set_item_key_unsafe(root
, path
, &k1
);
2911 static int fix_key_order(struct btrfs_trans_handle
*trans
,
2912 struct btrfs_root
*root
,
2913 struct btrfs_path
*path
)
2915 struct extent_buffer
*buf
;
2916 struct btrfs_key k1
, k2
;
2918 int level
= path
->lowest_level
;
2921 buf
= path
->nodes
[level
];
2922 for (i
= 0; i
< btrfs_header_nritems(buf
) - 1; i
++) {
2924 btrfs_node_key_to_cpu(buf
, &k1
, i
);
2925 btrfs_node_key_to_cpu(buf
, &k2
, i
+ 1);
2927 btrfs_item_key_to_cpu(buf
, &k1
, i
);
2928 btrfs_item_key_to_cpu(buf
, &k2
, i
+ 1);
2930 if (btrfs_comp_cpu_keys(&k1
, &k2
) < 0)
2932 ret
= swap_values(root
, path
, buf
, i
);
2935 btrfs_mark_buffer_dirty(buf
);
2941 static int delete_bogus_item(struct btrfs_trans_handle
*trans
,
2942 struct btrfs_root
*root
,
2943 struct btrfs_path
*path
,
2944 struct extent_buffer
*buf
, int slot
)
2946 struct btrfs_key key
;
2947 int nritems
= btrfs_header_nritems(buf
);
2949 btrfs_item_key_to_cpu(buf
, &key
, slot
);
2951 /* These are all the keys we can deal with missing. */
2952 if (key
.type
!= BTRFS_DIR_INDEX_KEY
&&
2953 key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
2954 key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
2955 key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
2956 key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
)
2959 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
2960 (unsigned long long)key
.objectid
, key
.type
,
2961 (unsigned long long)key
.offset
, slot
, buf
->start
);
2962 memmove_extent_buffer(buf
, btrfs_item_nr_offset(slot
),
2963 btrfs_item_nr_offset(slot
+ 1),
2964 sizeof(struct btrfs_item
) *
2965 (nritems
- slot
- 1));
2966 btrfs_set_header_nritems(buf
, nritems
- 1);
2968 struct btrfs_disk_key disk_key
;
2970 btrfs_item_key(buf
, &disk_key
, 0);
2971 btrfs_fixup_low_keys(root
, path
, &disk_key
, 1);
2973 btrfs_mark_buffer_dirty(buf
);
2977 static int fix_item_offset(struct btrfs_trans_handle
*trans
,
2978 struct btrfs_root
*root
,
2979 struct btrfs_path
*path
)
2981 struct extent_buffer
*buf
;
2985 /* We should only get this for leaves */
2986 BUG_ON(path
->lowest_level
);
2987 buf
= path
->nodes
[0];
2989 for (i
= 0; i
< btrfs_header_nritems(buf
); i
++) {
2990 unsigned int shift
= 0, offset
;
2992 if (i
== 0 && btrfs_item_end_nr(buf
, i
) !=
2993 BTRFS_LEAF_DATA_SIZE(root
)) {
2994 if (btrfs_item_end_nr(buf
, i
) >
2995 BTRFS_LEAF_DATA_SIZE(root
)) {
2996 ret
= delete_bogus_item(trans
, root
, path
,
3000 fprintf(stderr
, "item is off the end of the "
3001 "leaf, can't fix\n");
3005 shift
= BTRFS_LEAF_DATA_SIZE(root
) -
3006 btrfs_item_end_nr(buf
, i
);
3007 } else if (i
> 0 && btrfs_item_end_nr(buf
, i
) !=
3008 btrfs_item_offset_nr(buf
, i
- 1)) {
3009 if (btrfs_item_end_nr(buf
, i
) >
3010 btrfs_item_offset_nr(buf
, i
- 1)) {
3011 ret
= delete_bogus_item(trans
, root
, path
,
3015 fprintf(stderr
, "items overlap, can't fix\n");
3019 shift
= btrfs_item_offset_nr(buf
, i
- 1) -
3020 btrfs_item_end_nr(buf
, i
);
3025 printf("Shifting item nr %d by %u bytes in block %llu\n",
3026 i
, shift
, (unsigned long long)buf
->start
);
3027 offset
= btrfs_item_offset_nr(buf
, i
);
3028 memmove_extent_buffer(buf
,
3029 btrfs_leaf_data(buf
) + offset
+ shift
,
3030 btrfs_leaf_data(buf
) + offset
,
3031 btrfs_item_size_nr(buf
, i
));
3032 btrfs_set_item_offset(buf
, btrfs_item_nr(i
),
3034 btrfs_mark_buffer_dirty(buf
);
3038 * We may have moved things, in which case we want to exit so we don't
3039 * write those changes out. Once we have proper abort functionality in
3040 * progs this can be changed to something nicer.
3047 * Attempt to fix basic block failures. If we can't fix it for whatever reason
3048 * then just return -EIO.
3050 static int try_to_fix_bad_block(struct btrfs_trans_handle
*trans
,
3051 struct btrfs_root
*root
,
3052 struct extent_buffer
*buf
,
3053 enum btrfs_tree_block_status status
)
3055 struct ulist
*roots
;
3056 struct ulist_node
*node
;
3057 struct btrfs_root
*search_root
;
3058 struct btrfs_path
*path
;
3059 struct ulist_iterator iter
;
3060 struct btrfs_key root_key
, key
;
3063 if (status
!= BTRFS_TREE_BLOCK_BAD_KEY_ORDER
&&
3064 status
!= BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
3067 path
= btrfs_alloc_path();
3071 ret
= btrfs_find_all_roots(trans
, root
->fs_info
, buf
->start
,
3074 btrfs_free_path(path
);
3078 ULIST_ITER_INIT(&iter
);
3079 while ((node
= ulist_next(roots
, &iter
))) {
3080 root_key
.objectid
= node
->val
;
3081 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
3082 root_key
.offset
= (u64
)-1;
3084 search_root
= btrfs_read_fs_root(root
->fs_info
, &root_key
);
3090 record_root_in_trans(trans
, search_root
);
3092 path
->lowest_level
= btrfs_header_level(buf
);
3093 path
->skip_check_block
= 1;
3094 if (path
->lowest_level
)
3095 btrfs_node_key_to_cpu(buf
, &key
, 0);
3097 btrfs_item_key_to_cpu(buf
, &key
, 0);
3098 ret
= btrfs_search_slot(trans
, search_root
, &key
, path
, 0, 1);
3103 if (status
== BTRFS_TREE_BLOCK_BAD_KEY_ORDER
)
3104 ret
= fix_key_order(trans
, search_root
, path
);
3105 else if (status
== BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
3106 ret
= fix_item_offset(trans
, search_root
, path
);
3109 btrfs_release_path(path
);
3112 btrfs_free_path(path
);
3116 static int check_block(struct btrfs_trans_handle
*trans
,
3117 struct btrfs_root
*root
,
3118 struct cache_tree
*extent_cache
,
3119 struct extent_buffer
*buf
, u64 flags
)
3121 struct extent_record
*rec
;
3122 struct cache_extent
*cache
;
3123 struct btrfs_key key
;
3124 enum btrfs_tree_block_status status
;
3128 cache
= lookup_cache_extent(extent_cache
, buf
->start
, buf
->len
);
3131 rec
= container_of(cache
, struct extent_record
, cache
);
3132 rec
->generation
= btrfs_header_generation(buf
);
3134 level
= btrfs_header_level(buf
);
3135 if (btrfs_header_nritems(buf
) > 0) {
3138 btrfs_item_key_to_cpu(buf
, &key
, 0);
3140 btrfs_node_key_to_cpu(buf
, &key
, 0);
3142 rec
->info_objectid
= key
.objectid
;
3144 rec
->info_level
= level
;
3146 if (btrfs_is_leaf(buf
))
3147 status
= btrfs_check_leaf(root
, &rec
->parent_key
, buf
);
3149 status
= btrfs_check_node(root
, &rec
->parent_key
, buf
);
3151 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
3153 status
= try_to_fix_bad_block(trans
, root
, buf
,
3155 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
3157 fprintf(stderr
, "bad block %llu\n",
3158 (unsigned long long)buf
->start
);
3161 * Signal to callers we need to start the scan over
3162 * again since we'll have cow'ed blocks.
3167 rec
->content_checked
= 1;
3168 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
)
3169 rec
->owner_ref_checked
= 1;
3171 ret
= check_owner_ref(root
, rec
, buf
);
3173 rec
->owner_ref_checked
= 1;
3177 maybe_free_extent_rec(extent_cache
, rec
);
3181 static struct tree_backref
*find_tree_backref(struct extent_record
*rec
,
3182 u64 parent
, u64 root
)
3184 struct list_head
*cur
= rec
->backrefs
.next
;
3185 struct extent_backref
*node
;
3186 struct tree_backref
*back
;
3188 while(cur
!= &rec
->backrefs
) {
3189 node
= list_entry(cur
, struct extent_backref
, list
);
3193 back
= (struct tree_backref
*)node
;
3195 if (!node
->full_backref
)
3197 if (parent
== back
->parent
)
3200 if (node
->full_backref
)
3202 if (back
->root
== root
)
3209 static struct tree_backref
*alloc_tree_backref(struct extent_record
*rec
,
3210 u64 parent
, u64 root
)
3212 struct tree_backref
*ref
= malloc(sizeof(*ref
));
3213 memset(&ref
->node
, 0, sizeof(ref
->node
));
3215 ref
->parent
= parent
;
3216 ref
->node
.full_backref
= 1;
3219 ref
->node
.full_backref
= 0;
3221 list_add_tail(&ref
->node
.list
, &rec
->backrefs
);
3226 static struct data_backref
*find_data_backref(struct extent_record
*rec
,
3227 u64 parent
, u64 root
,
3228 u64 owner
, u64 offset
,
3230 u64 disk_bytenr
, u64 bytes
)
3232 struct list_head
*cur
= rec
->backrefs
.next
;
3233 struct extent_backref
*node
;
3234 struct data_backref
*back
;
3236 while(cur
!= &rec
->backrefs
) {
3237 node
= list_entry(cur
, struct extent_backref
, list
);
3241 back
= (struct data_backref
*)node
;
3243 if (!node
->full_backref
)
3245 if (parent
== back
->parent
)
3248 if (node
->full_backref
)
3250 if (back
->root
== root
&& back
->owner
== owner
&&
3251 back
->offset
== offset
) {
3252 if (found_ref
&& node
->found_ref
&&
3253 (back
->bytes
!= bytes
||
3254 back
->disk_bytenr
!= disk_bytenr
))
3263 static struct data_backref
*alloc_data_backref(struct extent_record
*rec
,
3264 u64 parent
, u64 root
,
3265 u64 owner
, u64 offset
,
3268 struct data_backref
*ref
= malloc(sizeof(*ref
));
3269 memset(&ref
->node
, 0, sizeof(ref
->node
));
3270 ref
->node
.is_data
= 1;
3273 ref
->parent
= parent
;
3276 ref
->node
.full_backref
= 1;
3280 ref
->offset
= offset
;
3281 ref
->node
.full_backref
= 0;
3283 ref
->bytes
= max_size
;
3286 list_add_tail(&ref
->node
.list
, &rec
->backrefs
);
3287 if (max_size
> rec
->max_size
)
3288 rec
->max_size
= max_size
;
3292 static int add_extent_rec(struct cache_tree
*extent_cache
,
3293 struct btrfs_key
*parent_key
, u64 parent_gen
,
3294 u64 start
, u64 nr
, u64 extent_item_refs
,
3295 int is_root
, int inc_ref
, int set_checked
,
3296 int metadata
, int extent_rec
, u64 max_size
)
3298 struct extent_record
*rec
;
3299 struct cache_extent
*cache
;
3303 cache
= lookup_cache_extent(extent_cache
, start
, nr
);
3305 rec
= container_of(cache
, struct extent_record
, cache
);
3309 rec
->nr
= max(nr
, max_size
);
3312 * We need to make sure to reset nr to whatever the extent
3313 * record says was the real size, this way we can compare it to
3317 if (start
!= rec
->start
|| rec
->found_rec
) {
3318 struct extent_record
*tmp
;
3321 if (list_empty(&rec
->list
))
3322 list_add_tail(&rec
->list
,
3323 &duplicate_extents
);
3326 * We have to do this song and dance in case we
3327 * find an extent record that falls inside of
3328 * our current extent record but does not have
3329 * the same objectid.
3331 tmp
= malloc(sizeof(*tmp
));
3335 tmp
->max_size
= max_size
;
3338 tmp
->metadata
= metadata
;
3339 tmp
->extent_item_refs
= extent_item_refs
;
3340 INIT_LIST_HEAD(&tmp
->list
);
3341 list_add_tail(&tmp
->list
, &rec
->dups
);
3342 rec
->num_duplicates
++;
3349 if (extent_item_refs
&& !dup
) {
3350 if (rec
->extent_item_refs
) {
3351 fprintf(stderr
, "block %llu rec "
3352 "extent_item_refs %llu, passed %llu\n",
3353 (unsigned long long)start
,
3354 (unsigned long long)
3355 rec
->extent_item_refs
,
3356 (unsigned long long)extent_item_refs
);
3358 rec
->extent_item_refs
= extent_item_refs
;
3363 rec
->content_checked
= 1;
3364 rec
->owner_ref_checked
= 1;
3368 btrfs_cpu_key_to_disk(&rec
->parent_key
, parent_key
);
3370 rec
->parent_generation
= parent_gen
;
3372 if (rec
->max_size
< max_size
)
3373 rec
->max_size
= max_size
;
3375 maybe_free_extent_rec(extent_cache
, rec
);
3378 rec
= malloc(sizeof(*rec
));
3380 rec
->max_size
= max_size
;
3381 rec
->nr
= max(nr
, max_size
);
3382 rec
->found_rec
= !!extent_rec
;
3383 rec
->content_checked
= 0;
3384 rec
->owner_ref_checked
= 0;
3385 rec
->num_duplicates
= 0;
3386 rec
->metadata
= metadata
;
3387 INIT_LIST_HEAD(&rec
->backrefs
);
3388 INIT_LIST_HEAD(&rec
->dups
);
3389 INIT_LIST_HEAD(&rec
->list
);
3401 if (extent_item_refs
)
3402 rec
->extent_item_refs
= extent_item_refs
;
3404 rec
->extent_item_refs
= 0;
3407 btrfs_cpu_key_to_disk(&rec
->parent_key
, parent_key
);
3409 memset(&rec
->parent_key
, 0, sizeof(*parent_key
));
3412 rec
->parent_generation
= parent_gen
;
3414 rec
->parent_generation
= 0;
3416 rec
->cache
.start
= start
;
3417 rec
->cache
.size
= nr
;
3418 ret
= insert_cache_extent(extent_cache
, &rec
->cache
);
3422 rec
->content_checked
= 1;
3423 rec
->owner_ref_checked
= 1;
3428 static int add_tree_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
3429 u64 parent
, u64 root
, int found_ref
)
3431 struct extent_record
*rec
;
3432 struct tree_backref
*back
;
3433 struct cache_extent
*cache
;
3435 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
3437 add_extent_rec(extent_cache
, NULL
, 0, bytenr
,
3438 1, 0, 0, 0, 0, 1, 0, 0);
3439 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
3444 rec
= container_of(cache
, struct extent_record
, cache
);
3445 if (rec
->start
!= bytenr
) {
3449 back
= find_tree_backref(rec
, parent
, root
);
3451 back
= alloc_tree_backref(rec
, parent
, root
);
3454 if (back
->node
.found_ref
) {
3455 fprintf(stderr
, "Extent back ref already exists "
3456 "for %llu parent %llu root %llu \n",
3457 (unsigned long long)bytenr
,
3458 (unsigned long long)parent
,
3459 (unsigned long long)root
);
3461 back
->node
.found_ref
= 1;
3463 if (back
->node
.found_extent_tree
) {
3464 fprintf(stderr
, "Extent back ref already exists "
3465 "for %llu parent %llu root %llu \n",
3466 (unsigned long long)bytenr
,
3467 (unsigned long long)parent
,
3468 (unsigned long long)root
);
3470 back
->node
.found_extent_tree
= 1;
3472 maybe_free_extent_rec(extent_cache
, rec
);
3476 static int add_data_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
3477 u64 parent
, u64 root
, u64 owner
, u64 offset
,
3478 u32 num_refs
, int found_ref
, u64 max_size
)
3480 struct extent_record
*rec
;
3481 struct data_backref
*back
;
3482 struct cache_extent
*cache
;
3484 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
3486 add_extent_rec(extent_cache
, NULL
, 0, bytenr
, 1, 0, 0, 0, 0,
3488 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
3493 rec
= container_of(cache
, struct extent_record
, cache
);
3494 if (rec
->max_size
< max_size
)
3495 rec
->max_size
= max_size
;
3498 * If found_ref is set then max_size is the real size and must match the
3499 * existing refs. So if we have already found a ref then we need to
3500 * make sure that this ref matches the existing one, otherwise we need
3501 * to add a new backref so we can notice that the backrefs don't match
3502 * and we need to figure out who is telling the truth. This is to
3503 * account for that awful fsync bug I introduced where we'd end up with
3504 * a btrfs_file_extent_item that would have its length include multiple
3505 * prealloc extents or point inside of a prealloc extent.
3507 back
= find_data_backref(rec
, parent
, root
, owner
, offset
, found_ref
,
3510 back
= alloc_data_backref(rec
, parent
, root
, owner
, offset
,
3514 BUG_ON(num_refs
!= 1);
3515 if (back
->node
.found_ref
)
3516 BUG_ON(back
->bytes
!= max_size
);
3517 back
->node
.found_ref
= 1;
3518 back
->found_ref
+= 1;
3519 back
->bytes
= max_size
;
3520 back
->disk_bytenr
= bytenr
;
3522 rec
->content_checked
= 1;
3523 rec
->owner_ref_checked
= 1;
3525 if (back
->node
.found_extent_tree
) {
3526 fprintf(stderr
, "Extent back ref already exists "
3527 "for %llu parent %llu root %llu "
3528 "owner %llu offset %llu num_refs %lu\n",
3529 (unsigned long long)bytenr
,
3530 (unsigned long long)parent
,
3531 (unsigned long long)root
,
3532 (unsigned long long)owner
,
3533 (unsigned long long)offset
,
3534 (unsigned long)num_refs
);
3536 back
->num_refs
= num_refs
;
3537 back
->node
.found_extent_tree
= 1;
3539 maybe_free_extent_rec(extent_cache
, rec
);
3543 static int add_pending(struct cache_tree
*pending
,
3544 struct cache_tree
*seen
, u64 bytenr
, u32 size
)
3547 ret
= add_cache_extent(seen
, bytenr
, size
);
3550 add_cache_extent(pending
, bytenr
, size
);
3554 static int pick_next_pending(struct cache_tree
*pending
,
3555 struct cache_tree
*reada
,
3556 struct cache_tree
*nodes
,
3557 u64 last
, struct block_info
*bits
, int bits_nr
,
3560 unsigned long node_start
= last
;
3561 struct cache_extent
*cache
;
3564 cache
= search_cache_extent(reada
, 0);
3566 bits
[0].start
= cache
->start
;
3567 bits
[0].size
= cache
->size
;
3572 if (node_start
> 32768)
3573 node_start
-= 32768;
3575 cache
= search_cache_extent(nodes
, node_start
);
3577 cache
= search_cache_extent(nodes
, 0);
3580 cache
= search_cache_extent(pending
, 0);
3585 bits
[ret
].start
= cache
->start
;
3586 bits
[ret
].size
= cache
->size
;
3587 cache
= next_cache_extent(cache
);
3589 } while (cache
&& ret
< bits_nr
);
3595 bits
[ret
].start
= cache
->start
;
3596 bits
[ret
].size
= cache
->size
;
3597 cache
= next_cache_extent(cache
);
3599 } while (cache
&& ret
< bits_nr
);
3601 if (bits_nr
- ret
> 8) {
3602 u64 lookup
= bits
[0].start
+ bits
[0].size
;
3603 struct cache_extent
*next
;
3604 next
= search_cache_extent(pending
, lookup
);
3606 if (next
->start
- lookup
> 32768)
3608 bits
[ret
].start
= next
->start
;
3609 bits
[ret
].size
= next
->size
;
3610 lookup
= next
->start
+ next
->size
;
3614 next
= next_cache_extent(next
);
3622 static void free_chunk_record(struct cache_extent
*cache
)
3624 struct chunk_record
*rec
;
3626 rec
= container_of(cache
, struct chunk_record
, cache
);
3627 list_del_init(&rec
->list
);
3628 list_del_init(&rec
->dextents
);
3632 void free_chunk_cache_tree(struct cache_tree
*chunk_cache
)
3634 cache_tree_free_extents(chunk_cache
, free_chunk_record
);
3637 static void free_device_record(struct rb_node
*node
)
3639 struct device_record
*rec
;
3641 rec
= container_of(node
, struct device_record
, node
);
3645 FREE_RB_BASED_TREE(device_cache
, free_device_record
);
3647 int insert_block_group_record(struct block_group_tree
*tree
,
3648 struct block_group_record
*bg_rec
)
3652 ret
= insert_cache_extent(&tree
->tree
, &bg_rec
->cache
);
3656 list_add_tail(&bg_rec
->list
, &tree
->block_groups
);
3660 static void free_block_group_record(struct cache_extent
*cache
)
3662 struct block_group_record
*rec
;
3664 rec
= container_of(cache
, struct block_group_record
, cache
);
3665 list_del_init(&rec
->list
);
3669 void free_block_group_tree(struct block_group_tree
*tree
)
3671 cache_tree_free_extents(&tree
->tree
, free_block_group_record
);
3674 int insert_device_extent_record(struct device_extent_tree
*tree
,
3675 struct device_extent_record
*de_rec
)
3680 * Device extent is a bit different from the other extents, because
3681 * the extents which belong to the different devices may have the
3682 * same start and size, so we need use the special extent cache
3683 * search/insert functions.
3685 ret
= insert_cache_extent2(&tree
->tree
, &de_rec
->cache
);
3689 list_add_tail(&de_rec
->chunk_list
, &tree
->no_chunk_orphans
);
3690 list_add_tail(&de_rec
->device_list
, &tree
->no_device_orphans
);
3694 static void free_device_extent_record(struct cache_extent
*cache
)
3696 struct device_extent_record
*rec
;
3698 rec
= container_of(cache
, struct device_extent_record
, cache
);
3699 if (!list_empty(&rec
->chunk_list
))
3700 list_del_init(&rec
->chunk_list
);
3701 if (!list_empty(&rec
->device_list
))
3702 list_del_init(&rec
->device_list
);
3706 void free_device_extent_tree(struct device_extent_tree
*tree
)
3708 cache_tree_free_extents(&tree
->tree
, free_device_extent_record
);
3711 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3712 static int process_extent_ref_v0(struct cache_tree
*extent_cache
,
3713 struct extent_buffer
*leaf
, int slot
)
3715 struct btrfs_extent_ref_v0
*ref0
;
3716 struct btrfs_key key
;
3718 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
3719 ref0
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_ref_v0
);
3720 if (btrfs_ref_objectid_v0(leaf
, ref0
) < BTRFS_FIRST_FREE_OBJECTID
) {
3721 add_tree_backref(extent_cache
, key
.objectid
, key
.offset
, 0, 0);
3723 add_data_backref(extent_cache
, key
.objectid
, key
.offset
, 0,
3724 0, 0, btrfs_ref_count_v0(leaf
, ref0
), 0, 0);
3730 struct chunk_record
*btrfs_new_chunk_record(struct extent_buffer
*leaf
,
3731 struct btrfs_key
*key
,
3734 struct btrfs_chunk
*ptr
;
3735 struct chunk_record
*rec
;
3738 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
3739 num_stripes
= btrfs_chunk_num_stripes(leaf
, ptr
);
3741 rec
= malloc(btrfs_chunk_record_size(num_stripes
));
3743 fprintf(stderr
, "memory allocation failed\n");
3747 memset(rec
, 0, btrfs_chunk_record_size(num_stripes
));
3749 INIT_LIST_HEAD(&rec
->list
);
3750 INIT_LIST_HEAD(&rec
->dextents
);
3753 rec
->cache
.start
= key
->offset
;
3754 rec
->cache
.size
= btrfs_chunk_length(leaf
, ptr
);
3756 rec
->generation
= btrfs_header_generation(leaf
);
3758 rec
->objectid
= key
->objectid
;
3759 rec
->type
= key
->type
;
3760 rec
->offset
= key
->offset
;
3762 rec
->length
= rec
->cache
.size
;
3763 rec
->owner
= btrfs_chunk_owner(leaf
, ptr
);
3764 rec
->stripe_len
= btrfs_chunk_stripe_len(leaf
, ptr
);
3765 rec
->type_flags
= btrfs_chunk_type(leaf
, ptr
);
3766 rec
->io_width
= btrfs_chunk_io_width(leaf
, ptr
);
3767 rec
->io_align
= btrfs_chunk_io_align(leaf
, ptr
);
3768 rec
->sector_size
= btrfs_chunk_sector_size(leaf
, ptr
);
3769 rec
->num_stripes
= num_stripes
;
3770 rec
->sub_stripes
= btrfs_chunk_sub_stripes(leaf
, ptr
);
3772 for (i
= 0; i
< rec
->num_stripes
; ++i
) {
3773 rec
->stripes
[i
].devid
=
3774 btrfs_stripe_devid_nr(leaf
, ptr
, i
);
3775 rec
->stripes
[i
].offset
=
3776 btrfs_stripe_offset_nr(leaf
, ptr
, i
);
3777 read_extent_buffer(leaf
, rec
->stripes
[i
].dev_uuid
,
3778 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr
, i
),
3785 static int process_chunk_item(struct cache_tree
*chunk_cache
,
3786 struct btrfs_key
*key
, struct extent_buffer
*eb
,
3789 struct chunk_record
*rec
;
3792 rec
= btrfs_new_chunk_record(eb
, key
, slot
);
3793 ret
= insert_cache_extent(chunk_cache
, &rec
->cache
);
3795 fprintf(stderr
, "Chunk[%llu, %llu] existed.\n",
3796 rec
->offset
, rec
->length
);
3803 static int process_device_item(struct rb_root
*dev_cache
,
3804 struct btrfs_key
*key
, struct extent_buffer
*eb
, int slot
)
3806 struct btrfs_dev_item
*ptr
;
3807 struct device_record
*rec
;
3810 ptr
= btrfs_item_ptr(eb
,
3811 slot
, struct btrfs_dev_item
);
3813 rec
= malloc(sizeof(*rec
));
3815 fprintf(stderr
, "memory allocation failed\n");
3819 rec
->devid
= key
->offset
;
3820 rec
->generation
= btrfs_header_generation(eb
);
3822 rec
->objectid
= key
->objectid
;
3823 rec
->type
= key
->type
;
3824 rec
->offset
= key
->offset
;
3826 rec
->devid
= btrfs_device_id(eb
, ptr
);
3827 rec
->total_byte
= btrfs_device_total_bytes(eb
, ptr
);
3828 rec
->byte_used
= btrfs_device_bytes_used(eb
, ptr
);
3830 ret
= rb_insert(dev_cache
, &rec
->node
, device_record_compare
);
3832 fprintf(stderr
, "Device[%llu] existed.\n", rec
->devid
);
3839 struct block_group_record
*
3840 btrfs_new_block_group_record(struct extent_buffer
*leaf
, struct btrfs_key
*key
,
3843 struct btrfs_block_group_item
*ptr
;
3844 struct block_group_record
*rec
;
3846 rec
= malloc(sizeof(*rec
));
3848 fprintf(stderr
, "memory allocation failed\n");
3851 memset(rec
, 0, sizeof(*rec
));
3853 rec
->cache
.start
= key
->objectid
;
3854 rec
->cache
.size
= key
->offset
;
3856 rec
->generation
= btrfs_header_generation(leaf
);
3858 rec
->objectid
= key
->objectid
;
3859 rec
->type
= key
->type
;
3860 rec
->offset
= key
->offset
;
3862 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_block_group_item
);
3863 rec
->flags
= btrfs_disk_block_group_flags(leaf
, ptr
);
3865 INIT_LIST_HEAD(&rec
->list
);
3870 static int process_block_group_item(struct block_group_tree
*block_group_cache
,
3871 struct btrfs_key
*key
,
3872 struct extent_buffer
*eb
, int slot
)
3874 struct block_group_record
*rec
;
3877 rec
= btrfs_new_block_group_record(eb
, key
, slot
);
3878 ret
= insert_block_group_record(block_group_cache
, rec
);
3880 fprintf(stderr
, "Block Group[%llu, %llu] existed.\n",
3881 rec
->objectid
, rec
->offset
);
3888 struct device_extent_record
*
3889 btrfs_new_device_extent_record(struct extent_buffer
*leaf
,
3890 struct btrfs_key
*key
, int slot
)
3892 struct device_extent_record
*rec
;
3893 struct btrfs_dev_extent
*ptr
;
3895 rec
= malloc(sizeof(*rec
));
3897 fprintf(stderr
, "memory allocation failed\n");
3900 memset(rec
, 0, sizeof(*rec
));
3902 rec
->cache
.objectid
= key
->objectid
;
3903 rec
->cache
.start
= key
->offset
;
3905 rec
->generation
= btrfs_header_generation(leaf
);
3907 rec
->objectid
= key
->objectid
;
3908 rec
->type
= key
->type
;
3909 rec
->offset
= key
->offset
;
3911 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_dev_extent
);
3912 rec
->chunk_objecteid
=
3913 btrfs_dev_extent_chunk_objectid(leaf
, ptr
);
3915 btrfs_dev_extent_chunk_offset(leaf
, ptr
);
3916 rec
->length
= btrfs_dev_extent_length(leaf
, ptr
);
3917 rec
->cache
.size
= rec
->length
;
3919 INIT_LIST_HEAD(&rec
->chunk_list
);
3920 INIT_LIST_HEAD(&rec
->device_list
);
3926 process_device_extent_item(struct device_extent_tree
*dev_extent_cache
,
3927 struct btrfs_key
*key
, struct extent_buffer
*eb
,
3930 struct device_extent_record
*rec
;
3933 rec
= btrfs_new_device_extent_record(eb
, key
, slot
);
3934 ret
= insert_device_extent_record(dev_extent_cache
, rec
);
3937 "Device extent[%llu, %llu, %llu] existed.\n",
3938 rec
->objectid
, rec
->offset
, rec
->length
);
3945 static int process_extent_item(struct btrfs_root
*root
,
3946 struct cache_tree
*extent_cache
,
3947 struct extent_buffer
*eb
, int slot
)
3949 struct btrfs_extent_item
*ei
;
3950 struct btrfs_extent_inline_ref
*iref
;
3951 struct btrfs_extent_data_ref
*dref
;
3952 struct btrfs_shared_data_ref
*sref
;
3953 struct btrfs_key key
;
3957 u32 item_size
= btrfs_item_size_nr(eb
, slot
);
3963 btrfs_item_key_to_cpu(eb
, &key
, slot
);
3965 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
3967 num_bytes
= root
->leafsize
;
3969 num_bytes
= key
.offset
;
3972 if (item_size
< sizeof(*ei
)) {
3973 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3974 struct btrfs_extent_item_v0
*ei0
;
3975 BUG_ON(item_size
!= sizeof(*ei0
));
3976 ei0
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item_v0
);
3977 refs
= btrfs_extent_refs_v0(eb
, ei0
);
3981 return add_extent_rec(extent_cache
, NULL
, 0, key
.objectid
,
3982 num_bytes
, refs
, 0, 0, 0, metadata
, 1,
3986 ei
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item
);
3987 refs
= btrfs_extent_refs(eb
, ei
);
3989 add_extent_rec(extent_cache
, NULL
, 0, key
.objectid
, num_bytes
,
3990 refs
, 0, 0, 0, metadata
, 1, num_bytes
);
3992 ptr
= (unsigned long)(ei
+ 1);
3993 if (btrfs_extent_flags(eb
, ei
) & BTRFS_EXTENT_FLAG_TREE_BLOCK
&&
3994 key
.type
== BTRFS_EXTENT_ITEM_KEY
)
3995 ptr
+= sizeof(struct btrfs_tree_block_info
);
3997 end
= (unsigned long)ei
+ item_size
;
3999 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
4000 type
= btrfs_extent_inline_ref_type(eb
, iref
);
4001 offset
= btrfs_extent_inline_ref_offset(eb
, iref
);
4003 case BTRFS_TREE_BLOCK_REF_KEY
:
4004 add_tree_backref(extent_cache
, key
.objectid
,
4007 case BTRFS_SHARED_BLOCK_REF_KEY
:
4008 add_tree_backref(extent_cache
, key
.objectid
,
4011 case BTRFS_EXTENT_DATA_REF_KEY
:
4012 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
4013 add_data_backref(extent_cache
, key
.objectid
, 0,
4014 btrfs_extent_data_ref_root(eb
, dref
),
4015 btrfs_extent_data_ref_objectid(eb
,
4017 btrfs_extent_data_ref_offset(eb
, dref
),
4018 btrfs_extent_data_ref_count(eb
, dref
),
4021 case BTRFS_SHARED_DATA_REF_KEY
:
4022 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
4023 add_data_backref(extent_cache
, key
.objectid
, offset
,
4025 btrfs_shared_data_ref_count(eb
, sref
),
4029 fprintf(stderr
, "corrupt extent record: key %Lu %u %Lu\n",
4030 key
.objectid
, key
.type
, num_bytes
);
4033 ptr
+= btrfs_extent_inline_ref_size(type
);
4040 static int check_cache_range(struct btrfs_root
*root
,
4041 struct btrfs_block_group_cache
*cache
,
4042 u64 offset
, u64 bytes
)
4044 struct btrfs_free_space
*entry
;
4050 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
4051 bytenr
= btrfs_sb_offset(i
);
4052 ret
= btrfs_rmap_block(&root
->fs_info
->mapping_tree
,
4053 cache
->key
.objectid
, bytenr
, 0,
4054 &logical
, &nr
, &stripe_len
);
4059 if (logical
[nr
] + stripe_len
<= offset
)
4061 if (offset
+ bytes
<= logical
[nr
])
4063 if (logical
[nr
] == offset
) {
4064 if (stripe_len
>= bytes
) {
4068 bytes
-= stripe_len
;
4069 offset
+= stripe_len
;
4070 } else if (logical
[nr
] < offset
) {
4071 if (logical
[nr
] + stripe_len
>=
4076 bytes
= (offset
+ bytes
) -
4077 (logical
[nr
] + stripe_len
);
4078 offset
= logical
[nr
] + stripe_len
;
4081 * Could be tricky, the super may land in the
4082 * middle of the area we're checking. First
4083 * check the easiest case, it's at the end.
4085 if (logical
[nr
] + stripe_len
>=
4087 bytes
= logical
[nr
] - offset
;
4091 /* Check the left side */
4092 ret
= check_cache_range(root
, cache
,
4094 logical
[nr
] - offset
);
4100 /* Now we continue with the right side */
4101 bytes
= (offset
+ bytes
) -
4102 (logical
[nr
] + stripe_len
);
4103 offset
= logical
[nr
] + stripe_len
;
4110 entry
= btrfs_find_free_space(cache
->free_space_ctl
, offset
, bytes
);
4112 fprintf(stderr
, "There is no free space entry for %Lu-%Lu\n",
4113 offset
, offset
+bytes
);
4117 if (entry
->offset
!= offset
) {
4118 fprintf(stderr
, "Wanted offset %Lu, found %Lu\n", offset
,
4123 if (entry
->bytes
!= bytes
) {
4124 fprintf(stderr
, "Wanted bytes %Lu, found %Lu for off %Lu\n",
4125 bytes
, entry
->bytes
, offset
);
4129 unlink_free_space(cache
->free_space_ctl
, entry
);
4134 static int verify_space_cache(struct btrfs_root
*root
,
4135 struct btrfs_block_group_cache
*cache
)
4137 struct btrfs_path
*path
;
4138 struct extent_buffer
*leaf
;
4139 struct btrfs_key key
;
4143 path
= btrfs_alloc_path();
4147 root
= root
->fs_info
->extent_root
;
4149 last
= max_t(u64
, cache
->key
.objectid
, BTRFS_SUPER_INFO_OFFSET
);
4151 key
.objectid
= last
;
4153 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
4155 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
4160 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
4161 ret
= btrfs_next_leaf(root
, path
);
4169 leaf
= path
->nodes
[0];
4170 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
4171 if (key
.objectid
>= cache
->key
.offset
+ cache
->key
.objectid
)
4173 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
4174 key
.type
!= BTRFS_METADATA_ITEM_KEY
) {
4179 if (last
== key
.objectid
) {
4180 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
4181 last
= key
.objectid
+ key
.offset
;
4183 last
= key
.objectid
+ root
->leafsize
;
4188 ret
= check_cache_range(root
, cache
, last
,
4189 key
.objectid
- last
);
4192 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
4193 last
= key
.objectid
+ key
.offset
;
4195 last
= key
.objectid
+ root
->leafsize
;
4199 if (last
< cache
->key
.objectid
+ cache
->key
.offset
)
4200 ret
= check_cache_range(root
, cache
, last
,
4201 cache
->key
.objectid
+
4202 cache
->key
.offset
- last
);
4205 btrfs_free_path(path
);
4208 !RB_EMPTY_ROOT(&cache
->free_space_ctl
->free_space_offset
)) {
4209 fprintf(stderr
, "There are still entries left in the space "
4217 static int check_space_cache(struct btrfs_root
*root
)
4219 struct btrfs_block_group_cache
*cache
;
4220 u64 start
= BTRFS_SUPER_INFO_OFFSET
+ BTRFS_SUPER_INFO_SIZE
;
4224 if (btrfs_super_cache_generation(root
->fs_info
->super_copy
) != -1ULL &&
4225 btrfs_super_generation(root
->fs_info
->super_copy
) !=
4226 btrfs_super_cache_generation(root
->fs_info
->super_copy
)) {
4227 printf("cache and super generation don't match, space cache "
4228 "will be invalidated\n");
4233 cache
= btrfs_lookup_first_block_group(root
->fs_info
, start
);
4237 start
= cache
->key
.objectid
+ cache
->key
.offset
;
4238 if (!cache
->free_space_ctl
) {
4239 if (btrfs_init_free_space_ctl(cache
,
4240 root
->sectorsize
)) {
4245 btrfs_remove_free_space_cache(cache
);
4248 ret
= load_free_space_cache(root
->fs_info
, cache
);
4252 ret
= verify_space_cache(root
, cache
);
4254 fprintf(stderr
, "cache appears valid but isnt %Lu\n",
4255 cache
->key
.objectid
);
4260 return error
? -EINVAL
: 0;
4263 static int read_extent_data(struct btrfs_root
*root
, char *data
,
4264 u64 logical
, u64
*len
, int mirror
)
4267 struct btrfs_multi_bio
*multi
= NULL
;
4268 struct btrfs_fs_info
*info
= root
->fs_info
;
4269 struct btrfs_device
*device
;
4273 ret
= btrfs_map_block(&info
->mapping_tree
, READ
, logical
, len
,
4274 &multi
, mirror
, NULL
);
4276 fprintf(stderr
, "Couldn't map the block %llu\n",
4280 device
= multi
->stripes
[0].dev
;
4282 if (device
->fd
== 0)
4287 ret
= pread64(device
->fd
, data
, *len
, multi
->stripes
[0].physical
);
4297 static int check_extent_csums(struct btrfs_root
*root
, u64 bytenr
,
4298 u64 num_bytes
, unsigned long leaf_offset
,
4299 struct extent_buffer
*eb
) {
4302 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
4304 unsigned long csum_offset
;
4308 u64 data_checked
= 0;
4314 if (num_bytes
% root
->sectorsize
)
4317 data
= malloc(num_bytes
);
4321 while (offset
< num_bytes
) {
4324 read_len
= num_bytes
- offset
;
4325 /* read as much space once a time */
4326 ret
= read_extent_data(root
, data
+ offset
,
4327 bytenr
+ offset
, &read_len
, mirror
);
4331 /* verify every 4k data's checksum */
4332 while (data_checked
< read_len
) {
4334 tmp
= offset
+ data_checked
;
4336 csum
= btrfs_csum_data(NULL
, (char *)data
+ tmp
,
4337 csum
, root
->sectorsize
);
4338 btrfs_csum_final(csum
, (char *)&csum
);
4340 csum_offset
= leaf_offset
+
4341 tmp
/ root
->sectorsize
* csum_size
;
4342 read_extent_buffer(eb
, (char *)&csum_expected
,
4343 csum_offset
, csum_size
);
4344 /* try another mirror */
4345 if (csum
!= csum_expected
) {
4346 fprintf(stderr
, "mirror %d bytenr %llu csum %u expected csum %u\n",
4347 mirror
, bytenr
+ tmp
,
4348 csum
, csum_expected
);
4349 num_copies
= btrfs_num_copies(
4350 &root
->fs_info
->mapping_tree
,
4352 if (mirror
< num_copies
- 1) {
4357 data_checked
+= root
->sectorsize
;
4366 static int check_extent_exists(struct btrfs_root
*root
, u64 bytenr
,
4369 struct btrfs_path
*path
;
4370 struct extent_buffer
*leaf
;
4371 struct btrfs_key key
;
4374 path
= btrfs_alloc_path();
4376 fprintf(stderr
, "Error allocing path\n");
4380 key
.objectid
= bytenr
;
4381 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
4382 key
.offset
= (u64
)-1;
4385 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
4388 fprintf(stderr
, "Error looking up extent record %d\n", ret
);
4389 btrfs_free_path(path
);
4392 if (path
->slots
[0] > 0) {
4395 ret
= btrfs_prev_leaf(root
, path
);
4398 } else if (ret
> 0) {
4405 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
4408 * Block group items come before extent items if they have the same
4409 * bytenr, so walk back one more just in case. Dear future traveler,
4410 * first congrats on mastering time travel. Now if it's not too much
4411 * trouble could you go back to 2006 and tell Chris to make the
4412 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
4413 * EXTENT_ITEM_KEY please?
4415 while (key
.type
> BTRFS_EXTENT_ITEM_KEY
) {
4416 if (path
->slots
[0] > 0) {
4419 ret
= btrfs_prev_leaf(root
, path
);
4422 } else if (ret
> 0) {
4427 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
4431 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
4432 ret
= btrfs_next_leaf(root
, path
);
4434 fprintf(stderr
, "Error going to next leaf "
4436 btrfs_free_path(path
);
4442 leaf
= path
->nodes
[0];
4443 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
4444 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
4448 if (key
.objectid
+ key
.offset
< bytenr
) {
4452 if (key
.objectid
> bytenr
+ num_bytes
)
4455 if (key
.objectid
== bytenr
) {
4456 if (key
.offset
>= num_bytes
) {
4460 num_bytes
-= key
.offset
;
4461 bytenr
+= key
.offset
;
4462 } else if (key
.objectid
< bytenr
) {
4463 if (key
.objectid
+ key
.offset
>= bytenr
+ num_bytes
) {
4467 num_bytes
= (bytenr
+ num_bytes
) -
4468 (key
.objectid
+ key
.offset
);
4469 bytenr
= key
.objectid
+ key
.offset
;
4471 if (key
.objectid
+ key
.offset
< bytenr
+ num_bytes
) {
4472 u64 new_start
= key
.objectid
+ key
.offset
;
4473 u64 new_bytes
= bytenr
+ num_bytes
- new_start
;
4476 * Weird case, the extent is in the middle of
4477 * our range, we'll have to search one side
4478 * and then the other. Not sure if this happens
4479 * in real life, but no harm in coding it up
4480 * anyway just in case.
4482 btrfs_release_path(path
);
4483 ret
= check_extent_exists(root
, new_start
,
4486 fprintf(stderr
, "Right section didn't "
4490 num_bytes
= key
.objectid
- bytenr
;
4493 num_bytes
= key
.objectid
- bytenr
;
4500 if (num_bytes
&& !ret
) {
4501 fprintf(stderr
, "There are no extents for csum range "
4502 "%Lu-%Lu\n", bytenr
, bytenr
+num_bytes
);
4506 btrfs_free_path(path
);
4510 static int check_csums(struct btrfs_root
*root
)
4512 struct btrfs_path
*path
;
4513 struct extent_buffer
*leaf
;
4514 struct btrfs_key key
;
4515 u64 offset
= 0, num_bytes
= 0;
4516 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
4520 unsigned long leaf_offset
;
4522 root
= root
->fs_info
->csum_root
;
4523 if (!extent_buffer_uptodate(root
->node
)) {
4524 fprintf(stderr
, "No valid csum tree found\n");
4528 key
.objectid
= BTRFS_EXTENT_CSUM_OBJECTID
;
4529 key
.type
= BTRFS_EXTENT_CSUM_KEY
;
4532 path
= btrfs_alloc_path();
4536 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
4538 fprintf(stderr
, "Error searching csum tree %d\n", ret
);
4539 btrfs_free_path(path
);
4543 if (ret
> 0 && path
->slots
[0])
4548 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
4549 ret
= btrfs_next_leaf(root
, path
);
4551 fprintf(stderr
, "Error going to next leaf "
4558 leaf
= path
->nodes
[0];
4560 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
4561 if (key
.type
!= BTRFS_EXTENT_CSUM_KEY
) {
4566 data_len
= (btrfs_item_size_nr(leaf
, path
->slots
[0]) /
4567 csum_size
) * root
->sectorsize
;
4568 if (!check_data_csum
)
4569 goto skip_csum_check
;
4570 leaf_offset
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
4571 ret
= check_extent_csums(root
, key
.offset
, data_len
,
4577 offset
= key
.offset
;
4578 } else if (key
.offset
!= offset
+ num_bytes
) {
4579 ret
= check_extent_exists(root
, offset
, num_bytes
);
4581 fprintf(stderr
, "Csum exists for %Lu-%Lu but "
4582 "there is no extent record\n",
4583 offset
, offset
+num_bytes
);
4586 offset
= key
.offset
;
4589 num_bytes
+= data_len
;
4593 btrfs_free_path(path
);
4597 static int is_dropped_key(struct btrfs_key
*key
,
4598 struct btrfs_key
*drop_key
) {
4599 if (key
->objectid
< drop_key
->objectid
)
4601 else if (key
->objectid
== drop_key
->objectid
) {
4602 if (key
->type
< drop_key
->type
)
4604 else if (key
->type
== drop_key
->type
) {
4605 if (key
->offset
< drop_key
->offset
)
4612 static int run_next_block(struct btrfs_trans_handle
*trans
,
4613 struct btrfs_root
*root
,
4614 struct block_info
*bits
,
4617 struct cache_tree
*pending
,
4618 struct cache_tree
*seen
,
4619 struct cache_tree
*reada
,
4620 struct cache_tree
*nodes
,
4621 struct cache_tree
*extent_cache
,
4622 struct cache_tree
*chunk_cache
,
4623 struct rb_root
*dev_cache
,
4624 struct block_group_tree
*block_group_cache
,
4625 struct device_extent_tree
*dev_extent_cache
,
4626 struct btrfs_root_item
*ri
)
4628 struct extent_buffer
*buf
;
4639 struct btrfs_key key
;
4640 struct cache_extent
*cache
;
4643 nritems
= pick_next_pending(pending
, reada
, nodes
, *last
, bits
,
4644 bits_nr
, &reada_bits
);
4649 for(i
= 0; i
< nritems
; i
++) {
4650 ret
= add_cache_extent(reada
, bits
[i
].start
,
4655 /* fixme, get the parent transid */
4656 readahead_tree_block(root
, bits
[i
].start
,
4660 *last
= bits
[0].start
;
4661 bytenr
= bits
[0].start
;
4662 size
= bits
[0].size
;
4664 cache
= lookup_cache_extent(pending
, bytenr
, size
);
4666 remove_cache_extent(pending
, cache
);
4669 cache
= lookup_cache_extent(reada
, bytenr
, size
);
4671 remove_cache_extent(reada
, cache
);
4674 cache
= lookup_cache_extent(nodes
, bytenr
, size
);
4676 remove_cache_extent(nodes
, cache
);
4679 cache
= lookup_cache_extent(extent_cache
, bytenr
, size
);
4681 struct extent_record
*rec
;
4683 rec
= container_of(cache
, struct extent_record
, cache
);
4684 gen
= rec
->parent_generation
;
4687 /* fixme, get the real parent transid */
4688 buf
= read_tree_block(root
, bytenr
, size
, gen
);
4689 if (!extent_buffer_uptodate(buf
)) {
4690 record_bad_block_io(root
->fs_info
,
4691 extent_cache
, bytenr
, size
);
4695 nritems
= btrfs_header_nritems(buf
);
4698 * FIXME, this only works only if we don't have any full
4701 if (!init_extent_tree
) {
4702 ret
= btrfs_lookup_extent_info(NULL
, root
, bytenr
,
4703 btrfs_header_level(buf
), 1, NULL
,
4711 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
4716 owner
= btrfs_header_owner(buf
);
4719 ret
= check_block(trans
, root
, extent_cache
, buf
, flags
);
4723 if (btrfs_is_leaf(buf
)) {
4724 btree_space_waste
+= btrfs_leaf_free_space(root
, buf
);
4725 for (i
= 0; i
< nritems
; i
++) {
4726 struct btrfs_file_extent_item
*fi
;
4727 btrfs_item_key_to_cpu(buf
, &key
, i
);
4728 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
) {
4729 process_extent_item(root
, extent_cache
, buf
,
4733 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
4734 process_extent_item(root
, extent_cache
, buf
,
4738 if (key
.type
== BTRFS_EXTENT_CSUM_KEY
) {
4740 btrfs_item_size_nr(buf
, i
);
4743 if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
4744 process_chunk_item(chunk_cache
, &key
, buf
, i
);
4747 if (key
.type
== BTRFS_DEV_ITEM_KEY
) {
4748 process_device_item(dev_cache
, &key
, buf
, i
);
4751 if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
4752 process_block_group_item(block_group_cache
,
4756 if (key
.type
== BTRFS_DEV_EXTENT_KEY
) {
4757 process_device_extent_item(dev_extent_cache
,
4762 if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
4763 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4764 process_extent_ref_v0(extent_cache
, buf
, i
);
4771 if (key
.type
== BTRFS_TREE_BLOCK_REF_KEY
) {
4772 add_tree_backref(extent_cache
, key
.objectid
, 0,
4776 if (key
.type
== BTRFS_SHARED_BLOCK_REF_KEY
) {
4777 add_tree_backref(extent_cache
, key
.objectid
,
4781 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
4782 struct btrfs_extent_data_ref
*ref
;
4783 ref
= btrfs_item_ptr(buf
, i
,
4784 struct btrfs_extent_data_ref
);
4785 add_data_backref(extent_cache
,
4787 btrfs_extent_data_ref_root(buf
, ref
),
4788 btrfs_extent_data_ref_objectid(buf
,
4790 btrfs_extent_data_ref_offset(buf
, ref
),
4791 btrfs_extent_data_ref_count(buf
, ref
),
4792 0, root
->sectorsize
);
4795 if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
4796 struct btrfs_shared_data_ref
*ref
;
4797 ref
= btrfs_item_ptr(buf
, i
,
4798 struct btrfs_shared_data_ref
);
4799 add_data_backref(extent_cache
,
4800 key
.objectid
, key
.offset
, 0, 0, 0,
4801 btrfs_shared_data_ref_count(buf
, ref
),
4802 0, root
->sectorsize
);
4805 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
) {
4806 struct bad_item
*bad
;
4808 if (key
.objectid
== BTRFS_ORPHAN_OBJECTID
)
4812 bad
= malloc(sizeof(struct bad_item
));
4815 INIT_LIST_HEAD(&bad
->list
);
4816 memcpy(&bad
->key
, &key
,
4817 sizeof(struct btrfs_key
));
4818 bad
->root_id
= owner
;
4819 list_add_tail(&bad
->list
, &delete_items
);
4822 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
4824 fi
= btrfs_item_ptr(buf
, i
,
4825 struct btrfs_file_extent_item
);
4826 if (btrfs_file_extent_type(buf
, fi
) ==
4827 BTRFS_FILE_EXTENT_INLINE
)
4829 if (btrfs_file_extent_disk_bytenr(buf
, fi
) == 0)
4832 data_bytes_allocated
+=
4833 btrfs_file_extent_disk_num_bytes(buf
, fi
);
4834 if (data_bytes_allocated
< root
->sectorsize
) {
4837 data_bytes_referenced
+=
4838 btrfs_file_extent_num_bytes(buf
, fi
);
4839 add_data_backref(extent_cache
,
4840 btrfs_file_extent_disk_bytenr(buf
, fi
),
4841 parent
, owner
, key
.objectid
, key
.offset
-
4842 btrfs_file_extent_offset(buf
, fi
), 1, 1,
4843 btrfs_file_extent_disk_num_bytes(buf
, fi
));
4847 struct btrfs_key first_key
;
4849 first_key
.objectid
= 0;
4852 btrfs_item_key_to_cpu(buf
, &first_key
, 0);
4853 level
= btrfs_header_level(buf
);
4854 for (i
= 0; i
< nritems
; i
++) {
4855 ptr
= btrfs_node_blockptr(buf
, i
);
4856 size
= btrfs_level_size(root
, level
- 1);
4857 btrfs_node_key_to_cpu(buf
, &key
, i
);
4859 struct btrfs_key drop_key
;
4860 btrfs_disk_key_to_cpu(&drop_key
,
4861 &ri
->drop_progress
);
4862 if ((level
== ri
->drop_level
)
4863 && is_dropped_key(&key
, &drop_key
)) {
4867 ret
= add_extent_rec(extent_cache
, &key
,
4868 btrfs_node_ptr_generation(buf
, i
),
4869 ptr
, size
, 0, 0, 1, 0, 1, 0,
4873 add_tree_backref(extent_cache
, ptr
, parent
, owner
, 1);
4876 add_pending(nodes
, seen
, ptr
, size
);
4878 add_pending(pending
, seen
, ptr
, size
);
4881 btree_space_waste
+= (BTRFS_NODEPTRS_PER_BLOCK(root
) -
4882 nritems
) * sizeof(struct btrfs_key_ptr
);
4884 total_btree_bytes
+= buf
->len
;
4885 if (fs_root_objectid(btrfs_header_owner(buf
)))
4886 total_fs_tree_bytes
+= buf
->len
;
4887 if (btrfs_header_owner(buf
) == BTRFS_EXTENT_TREE_OBJECTID
)
4888 total_extent_tree_bytes
+= buf
->len
;
4889 if (!found_old_backref
&&
4890 btrfs_header_owner(buf
) == BTRFS_TREE_RELOC_OBJECTID
&&
4891 btrfs_header_backref_rev(buf
) == BTRFS_MIXED_BACKREF_REV
&&
4892 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))
4893 found_old_backref
= 1;
4895 free_extent_buffer(buf
);
4899 static int add_root_to_pending(struct extent_buffer
*buf
,
4900 struct cache_tree
*extent_cache
,
4901 struct cache_tree
*pending
,
4902 struct cache_tree
*seen
,
4903 struct cache_tree
*nodes
,
4904 struct btrfs_key
*root_key
)
4906 if (btrfs_header_level(buf
) > 0)
4907 add_pending(nodes
, seen
, buf
->start
, buf
->len
);
4909 add_pending(pending
, seen
, buf
->start
, buf
->len
);
4910 add_extent_rec(extent_cache
, NULL
, 0, buf
->start
, buf
->len
,
4911 0, 1, 1, 0, 1, 0, buf
->len
);
4913 if (root_key
->objectid
== BTRFS_TREE_RELOC_OBJECTID
||
4914 btrfs_header_backref_rev(buf
) < BTRFS_MIXED_BACKREF_REV
)
4915 add_tree_backref(extent_cache
, buf
->start
, buf
->start
,
4918 add_tree_backref(extent_cache
, buf
->start
, 0,
4919 root_key
->objectid
, 1);
4923 /* as we fix the tree, we might be deleting blocks that
4924 * we're tracking for repair. This hook makes sure we
4925 * remove any backrefs for blocks as we are fixing them.
4927 static int free_extent_hook(struct btrfs_trans_handle
*trans
,
4928 struct btrfs_root
*root
,
4929 u64 bytenr
, u64 num_bytes
, u64 parent
,
4930 u64 root_objectid
, u64 owner
, u64 offset
,
4933 struct extent_record
*rec
;
4934 struct cache_extent
*cache
;
4936 struct cache_tree
*extent_cache
= root
->fs_info
->fsck_extent_cache
;
4938 is_data
= owner
>= BTRFS_FIRST_FREE_OBJECTID
;
4939 cache
= lookup_cache_extent(extent_cache
, bytenr
, num_bytes
);
4943 rec
= container_of(cache
, struct extent_record
, cache
);
4945 struct data_backref
*back
;
4946 back
= find_data_backref(rec
, parent
, root_objectid
, owner
,
4947 offset
, 1, bytenr
, num_bytes
);
4950 if (back
->node
.found_ref
) {
4951 back
->found_ref
-= refs_to_drop
;
4953 rec
->refs
-= refs_to_drop
;
4955 if (back
->node
.found_extent_tree
) {
4956 back
->num_refs
-= refs_to_drop
;
4957 if (rec
->extent_item_refs
)
4958 rec
->extent_item_refs
-= refs_to_drop
;
4960 if (back
->found_ref
== 0)
4961 back
->node
.found_ref
= 0;
4962 if (back
->num_refs
== 0)
4963 back
->node
.found_extent_tree
= 0;
4965 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
4966 list_del(&back
->node
.list
);
4970 struct tree_backref
*back
;
4971 back
= find_tree_backref(rec
, parent
, root_objectid
);
4974 if (back
->node
.found_ref
) {
4977 back
->node
.found_ref
= 0;
4979 if (back
->node
.found_extent_tree
) {
4980 if (rec
->extent_item_refs
)
4981 rec
->extent_item_refs
--;
4982 back
->node
.found_extent_tree
= 0;
4984 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
4985 list_del(&back
->node
.list
);
4989 maybe_free_extent_rec(extent_cache
, rec
);
4994 static int delete_extent_records(struct btrfs_trans_handle
*trans
,
4995 struct btrfs_root
*root
,
4996 struct btrfs_path
*path
,
4997 u64 bytenr
, u64 new_len
)
4999 struct btrfs_key key
;
5000 struct btrfs_key found_key
;
5001 struct extent_buffer
*leaf
;
5006 key
.objectid
= bytenr
;
5008 key
.offset
= (u64
)-1;
5011 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
,
5018 if (path
->slots
[0] == 0)
5024 leaf
= path
->nodes
[0];
5025 slot
= path
->slots
[0];
5027 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
5028 if (found_key
.objectid
!= bytenr
)
5031 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
5032 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
5033 found_key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
5034 found_key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
&&
5035 found_key
.type
!= BTRFS_EXTENT_REF_V0_KEY
&&
5036 found_key
.type
!= BTRFS_SHARED_BLOCK_REF_KEY
&&
5037 found_key
.type
!= BTRFS_SHARED_DATA_REF_KEY
) {
5038 btrfs_release_path(path
);
5039 if (found_key
.type
== 0) {
5040 if (found_key
.offset
== 0)
5042 key
.offset
= found_key
.offset
- 1;
5043 key
.type
= found_key
.type
;
5045 key
.type
= found_key
.type
- 1;
5046 key
.offset
= (u64
)-1;
5050 fprintf(stderr
, "repair deleting extent record: key %Lu %u %Lu\n",
5051 found_key
.objectid
, found_key
.type
, found_key
.offset
);
5053 ret
= btrfs_del_item(trans
, root
->fs_info
->extent_root
, path
);
5056 btrfs_release_path(path
);
5058 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
||
5059 found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
5060 u64 bytes
= (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
) ?
5061 found_key
.offset
: root
->leafsize
;
5063 ret
= btrfs_update_block_group(trans
, root
, bytenr
,
5070 btrfs_release_path(path
);
5075 * for a single backref, this will allocate a new extent
5076 * and add the backref to it.
5078 static int record_extent(struct btrfs_trans_handle
*trans
,
5079 struct btrfs_fs_info
*info
,
5080 struct btrfs_path
*path
,
5081 struct extent_record
*rec
,
5082 struct extent_backref
*back
,
5083 int allocated
, u64 flags
)
5086 struct btrfs_root
*extent_root
= info
->extent_root
;
5087 struct extent_buffer
*leaf
;
5088 struct btrfs_key ins_key
;
5089 struct btrfs_extent_item
*ei
;
5090 struct tree_backref
*tback
;
5091 struct data_backref
*dback
;
5092 struct btrfs_tree_block_info
*bi
;
5095 rec
->max_size
= max_t(u64
, rec
->max_size
,
5096 info
->extent_root
->leafsize
);
5099 u32 item_size
= sizeof(*ei
);
5102 item_size
+= sizeof(*bi
);
5104 ins_key
.objectid
= rec
->start
;
5105 ins_key
.offset
= rec
->max_size
;
5106 ins_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5108 ret
= btrfs_insert_empty_item(trans
, extent_root
, path
,
5109 &ins_key
, item_size
);
5113 leaf
= path
->nodes
[0];
5114 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
5115 struct btrfs_extent_item
);
5117 btrfs_set_extent_refs(leaf
, ei
, 0);
5118 btrfs_set_extent_generation(leaf
, ei
, rec
->generation
);
5120 if (back
->is_data
) {
5121 btrfs_set_extent_flags(leaf
, ei
,
5122 BTRFS_EXTENT_FLAG_DATA
);
5124 struct btrfs_disk_key copy_key
;;
5126 tback
= (struct tree_backref
*)back
;
5127 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
5128 memset_extent_buffer(leaf
, 0, (unsigned long)bi
,
5131 btrfs_set_disk_key_objectid(©_key
,
5132 rec
->info_objectid
);
5133 btrfs_set_disk_key_type(©_key
, 0);
5134 btrfs_set_disk_key_offset(©_key
, 0);
5136 btrfs_set_tree_block_level(leaf
, bi
, rec
->info_level
);
5137 btrfs_set_tree_block_key(leaf
, bi
, ©_key
);
5139 btrfs_set_extent_flags(leaf
, ei
,
5140 BTRFS_EXTENT_FLAG_TREE_BLOCK
| flags
);
5143 btrfs_mark_buffer_dirty(leaf
);
5144 ret
= btrfs_update_block_group(trans
, extent_root
, rec
->start
,
5145 rec
->max_size
, 1, 0);
5148 btrfs_release_path(path
);
5151 if (back
->is_data
) {
5155 dback
= (struct data_backref
*)back
;
5156 if (back
->full_backref
)
5157 parent
= dback
->parent
;
5161 for (i
= 0; i
< dback
->found_ref
; i
++) {
5162 /* if parent != 0, we're doing a full backref
5163 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
5164 * just makes the backref allocator create a data
5167 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
5168 rec
->start
, rec
->max_size
,
5172 BTRFS_FIRST_FREE_OBJECTID
:
5178 fprintf(stderr
, "adding new data backref"
5179 " on %llu %s %llu owner %llu"
5180 " offset %llu found %d\n",
5181 (unsigned long long)rec
->start
,
5182 back
->full_backref
?
5184 back
->full_backref
?
5185 (unsigned long long)parent
:
5186 (unsigned long long)dback
->root
,
5187 (unsigned long long)dback
->owner
,
5188 (unsigned long long)dback
->offset
,
5193 tback
= (struct tree_backref
*)back
;
5194 if (back
->full_backref
)
5195 parent
= tback
->parent
;
5199 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
5200 rec
->start
, rec
->max_size
,
5201 parent
, tback
->root
, 0, 0);
5202 fprintf(stderr
, "adding new tree backref on "
5203 "start %llu len %llu parent %llu root %llu\n",
5204 rec
->start
, rec
->max_size
, tback
->parent
, tback
->root
);
5209 btrfs_release_path(path
);
5213 struct extent_entry
{
5218 struct list_head list
;
5221 static struct extent_entry
*find_entry(struct list_head
*entries
,
5222 u64 bytenr
, u64 bytes
)
5224 struct extent_entry
*entry
= NULL
;
5226 list_for_each_entry(entry
, entries
, list
) {
5227 if (entry
->bytenr
== bytenr
&& entry
->bytes
== bytes
)
5234 static struct extent_entry
*find_most_right_entry(struct list_head
*entries
)
5236 struct extent_entry
*entry
, *best
= NULL
, *prev
= NULL
;
5238 list_for_each_entry(entry
, entries
, list
) {
5245 * If there are as many broken entries as entries then we know
5246 * not to trust this particular entry.
5248 if (entry
->broken
== entry
->count
)
5252 * If our current entry == best then we can't be sure our best
5253 * is really the best, so we need to keep searching.
5255 if (best
&& best
->count
== entry
->count
) {
5261 /* Prev == entry, not good enough, have to keep searching */
5262 if (!prev
->broken
&& prev
->count
== entry
->count
)
5266 best
= (prev
->count
> entry
->count
) ? prev
: entry
;
5267 else if (best
->count
< entry
->count
)
5275 static int repair_ref(struct btrfs_trans_handle
*trans
,
5276 struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
5277 struct data_backref
*dback
, struct extent_entry
*entry
)
5279 struct btrfs_root
*root
;
5280 struct btrfs_file_extent_item
*fi
;
5281 struct extent_buffer
*leaf
;
5282 struct btrfs_key key
;
5286 key
.objectid
= dback
->root
;
5287 key
.type
= BTRFS_ROOT_ITEM_KEY
;
5288 key
.offset
= (u64
)-1;
5289 root
= btrfs_read_fs_root(info
, &key
);
5291 fprintf(stderr
, "Couldn't find root for our ref\n");
5296 * The backref points to the original offset of the extent if it was
5297 * split, so we need to search down to the offset we have and then walk
5298 * forward until we find the backref we're looking for.
5300 key
.objectid
= dback
->owner
;
5301 key
.type
= BTRFS_EXTENT_DATA_KEY
;
5302 key
.offset
= dback
->offset
;
5303 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5305 fprintf(stderr
, "Error looking up ref %d\n", ret
);
5310 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
5311 ret
= btrfs_next_leaf(root
, path
);
5313 fprintf(stderr
, "Couldn't find our ref, next\n");
5317 leaf
= path
->nodes
[0];
5318 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5319 if (key
.objectid
!= dback
->owner
||
5320 key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
5321 fprintf(stderr
, "Couldn't find our ref, search\n");
5324 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
5325 struct btrfs_file_extent_item
);
5326 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
5327 bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
5329 if (bytenr
== dback
->disk_bytenr
&& bytes
== dback
->bytes
)
5334 btrfs_release_path(path
);
5337 * Have to make sure that this root gets updated when we commit the
5340 record_root_in_trans(trans
, root
);
5343 * Ok we have the key of the file extent we want to fix, now we can cow
5344 * down to the thing and fix it.
5346 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
5348 fprintf(stderr
, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
5349 key
.objectid
, key
.type
, key
.offset
, ret
);
5353 fprintf(stderr
, "Well that's odd, we just found this key "
5354 "[%Lu, %u, %Lu]\n", key
.objectid
, key
.type
,
5358 leaf
= path
->nodes
[0];
5359 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
5360 struct btrfs_file_extent_item
);
5362 if (btrfs_file_extent_compression(leaf
, fi
) &&
5363 dback
->disk_bytenr
!= entry
->bytenr
) {
5364 fprintf(stderr
, "Ref doesn't match the record start and is "
5365 "compressed, please take a btrfs-image of this file "
5366 "system and send it to a btrfs developer so they can "
5367 "complete this functionality for bytenr %Lu\n",
5368 dback
->disk_bytenr
);
5372 if (dback
->node
.broken
&& dback
->disk_bytenr
!= entry
->bytenr
) {
5373 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
5374 } else if (dback
->disk_bytenr
> entry
->bytenr
) {
5375 u64 off_diff
, offset
;
5377 off_diff
= dback
->disk_bytenr
- entry
->bytenr
;
5378 offset
= btrfs_file_extent_offset(leaf
, fi
);
5379 if (dback
->disk_bytenr
+ offset
+
5380 btrfs_file_extent_num_bytes(leaf
, fi
) >
5381 entry
->bytenr
+ entry
->bytes
) {
5382 fprintf(stderr
, "Ref is past the entry end, please "
5383 "take a btrfs-image of this file system and "
5384 "send it to a btrfs developer, ref %Lu\n",
5385 dback
->disk_bytenr
);
5389 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
5390 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
5391 } else if (dback
->disk_bytenr
< entry
->bytenr
) {
5394 offset
= btrfs_file_extent_offset(leaf
, fi
);
5395 if (dback
->disk_bytenr
+ offset
< entry
->bytenr
) {
5396 fprintf(stderr
, "Ref is before the entry start, please"
5397 " take a btrfs-image of this file system and "
5398 "send it to a btrfs developer, ref %Lu\n",
5399 dback
->disk_bytenr
);
5403 offset
+= dback
->disk_bytenr
;
5404 offset
-= entry
->bytenr
;
5405 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
5406 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
5409 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
, entry
->bytes
);
5412 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
5413 * only do this if we aren't using compression, otherwise it's a
5416 if (!btrfs_file_extent_compression(leaf
, fi
))
5417 btrfs_set_file_extent_ram_bytes(leaf
, fi
, entry
->bytes
);
5419 printf("ram bytes may be wrong?\n");
5420 btrfs_mark_buffer_dirty(leaf
);
5421 btrfs_release_path(path
);
5425 static int verify_backrefs(struct btrfs_trans_handle
*trans
,
5426 struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
5427 struct extent_record
*rec
)
5429 struct extent_backref
*back
;
5430 struct data_backref
*dback
;
5431 struct extent_entry
*entry
, *best
= NULL
;
5434 int broken_entries
= 0;
5439 * Metadata is easy and the backrefs should always agree on bytenr and
5440 * size, if not we've got bigger issues.
5445 list_for_each_entry(back
, &rec
->backrefs
, list
) {
5446 if (back
->full_backref
|| !back
->is_data
)
5449 dback
= (struct data_backref
*)back
;
5452 * We only pay attention to backrefs that we found a real
5455 if (dback
->found_ref
== 0)
5459 * For now we only catch when the bytes don't match, not the
5460 * bytenr. We can easily do this at the same time, but I want
5461 * to have a fs image to test on before we just add repair
5462 * functionality willy-nilly so we know we won't screw up the
5466 entry
= find_entry(&entries
, dback
->disk_bytenr
,
5469 entry
= malloc(sizeof(struct extent_entry
));
5474 memset(entry
, 0, sizeof(*entry
));
5475 entry
->bytenr
= dback
->disk_bytenr
;
5476 entry
->bytes
= dback
->bytes
;
5477 list_add_tail(&entry
->list
, &entries
);
5482 * If we only have on entry we may think the entries agree when
5483 * in reality they don't so we have to do some extra checking.
5485 if (dback
->disk_bytenr
!= rec
->start
||
5486 dback
->bytes
!= rec
->nr
|| back
->broken
)
5497 /* Yay all the backrefs agree, carry on good sir */
5498 if (nr_entries
<= 1 && !mismatch
)
5501 fprintf(stderr
, "attempting to repair backref discrepency for bytenr "
5502 "%Lu\n", rec
->start
);
5505 * First we want to see if the backrefs can agree amongst themselves who
5506 * is right, so figure out which one of the entries has the highest
5509 best
= find_most_right_entry(&entries
);
5512 * Ok so we may have an even split between what the backrefs think, so
5513 * this is where we use the extent ref to see what it thinks.
5516 entry
= find_entry(&entries
, rec
->start
, rec
->nr
);
5517 if (!entry
&& (!broken_entries
|| !rec
->found_rec
)) {
5518 fprintf(stderr
, "Backrefs don't agree with each other "
5519 "and extent record doesn't agree with anybody,"
5520 " so we can't fix bytenr %Lu bytes %Lu\n",
5521 rec
->start
, rec
->nr
);
5524 } else if (!entry
) {
5526 * Ok our backrefs were broken, we'll assume this is the
5527 * correct value and add an entry for this range.
5529 entry
= malloc(sizeof(struct extent_entry
));
5534 memset(entry
, 0, sizeof(*entry
));
5535 entry
->bytenr
= rec
->start
;
5536 entry
->bytes
= rec
->nr
;
5537 list_add_tail(&entry
->list
, &entries
);
5541 best
= find_most_right_entry(&entries
);
5543 fprintf(stderr
, "Backrefs and extent record evenly "
5544 "split on who is right, this is going to "
5545 "require user input to fix bytenr %Lu bytes "
5546 "%Lu\n", rec
->start
, rec
->nr
);
5553 * I don't think this can happen currently as we'll abort() if we catch
5554 * this case higher up, but in case somebody removes that we still can't
5555 * deal with it properly here yet, so just bail out of that's the case.
5557 if (best
->bytenr
!= rec
->start
) {
5558 fprintf(stderr
, "Extent start and backref starts don't match, "
5559 "please use btrfs-image on this file system and send "
5560 "it to a btrfs developer so they can make fsck fix "
5561 "this particular case. bytenr is %Lu, bytes is %Lu\n",
5562 rec
->start
, rec
->nr
);
5568 * Ok great we all agreed on an extent record, let's go find the real
5569 * references and fix up the ones that don't match.
5571 list_for_each_entry(back
, &rec
->backrefs
, list
) {
5572 if (back
->full_backref
|| !back
->is_data
)
5575 dback
= (struct data_backref
*)back
;
5578 * Still ignoring backrefs that don't have a real ref attached
5581 if (dback
->found_ref
== 0)
5584 if (dback
->bytes
== best
->bytes
&&
5585 dback
->disk_bytenr
== best
->bytenr
)
5588 ret
= repair_ref(trans
, info
, path
, dback
, best
);
5594 * Ok we messed with the actual refs, which means we need to drop our
5595 * entire cache and go back and rescan. I know this is a huge pain and
5596 * adds a lot of extra work, but it's the only way to be safe. Once all
5597 * the backrefs agree we may not need to do anything to the extent
5602 while (!list_empty(&entries
)) {
5603 entry
= list_entry(entries
.next
, struct extent_entry
, list
);
5604 list_del_init(&entry
->list
);
5610 static int process_duplicates(struct btrfs_root
*root
,
5611 struct cache_tree
*extent_cache
,
5612 struct extent_record
*rec
)
5614 struct extent_record
*good
, *tmp
;
5615 struct cache_extent
*cache
;
5619 * If we found a extent record for this extent then return, or if we
5620 * have more than one duplicate we are likely going to need to delete
5623 if (rec
->found_rec
|| rec
->num_duplicates
> 1)
5626 /* Shouldn't happen but just in case */
5627 BUG_ON(!rec
->num_duplicates
);
5630 * So this happens if we end up with a backref that doesn't match the
5631 * actual extent entry. So either the backref is bad or the extent
5632 * entry is bad. Either way we want to have the extent_record actually
5633 * reflect what we found in the extent_tree, so we need to take the
5634 * duplicate out and use that as the extent_record since the only way we
5635 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
5637 remove_cache_extent(extent_cache
, &rec
->cache
);
5639 good
= list_entry(rec
->dups
.next
, struct extent_record
, list
);
5640 list_del_init(&good
->list
);
5641 INIT_LIST_HEAD(&good
->backrefs
);
5642 INIT_LIST_HEAD(&good
->dups
);
5643 good
->cache
.start
= good
->start
;
5644 good
->cache
.size
= good
->nr
;
5645 good
->content_checked
= 0;
5646 good
->owner_ref_checked
= 0;
5647 good
->num_duplicates
= 0;
5648 good
->refs
= rec
->refs
;
5649 list_splice_init(&rec
->backrefs
, &good
->backrefs
);
5651 cache
= lookup_cache_extent(extent_cache
, good
->start
,
5655 tmp
= container_of(cache
, struct extent_record
, cache
);
5658 * If we find another overlapping extent and it's found_rec is
5659 * set then it's a duplicate and we need to try and delete
5662 if (tmp
->found_rec
|| tmp
->num_duplicates
> 0) {
5663 if (list_empty(&good
->list
))
5664 list_add_tail(&good
->list
,
5665 &duplicate_extents
);
5666 good
->num_duplicates
+= tmp
->num_duplicates
+ 1;
5667 list_splice_init(&tmp
->dups
, &good
->dups
);
5668 list_del_init(&tmp
->list
);
5669 list_add_tail(&tmp
->list
, &good
->dups
);
5670 remove_cache_extent(extent_cache
, &tmp
->cache
);
5675 * Ok we have another non extent item backed extent rec, so lets
5676 * just add it to this extent and carry on like we did above.
5678 good
->refs
+= tmp
->refs
;
5679 list_splice_init(&tmp
->backrefs
, &good
->backrefs
);
5680 remove_cache_extent(extent_cache
, &tmp
->cache
);
5683 ret
= insert_cache_extent(extent_cache
, &good
->cache
);
5686 return good
->num_duplicates
? 0 : 1;
5689 static int delete_duplicate_records(struct btrfs_trans_handle
*trans
,
5690 struct btrfs_root
*root
,
5691 struct extent_record
*rec
)
5693 LIST_HEAD(delete_list
);
5694 struct btrfs_path
*path
;
5695 struct extent_record
*tmp
, *good
, *n
;
5698 struct btrfs_key key
;
5700 path
= btrfs_alloc_path();
5707 /* Find the record that covers all of the duplicates. */
5708 list_for_each_entry(tmp
, &rec
->dups
, list
) {
5709 if (good
->start
< tmp
->start
)
5711 if (good
->nr
> tmp
->nr
)
5714 if (tmp
->start
+ tmp
->nr
< good
->start
+ good
->nr
) {
5715 fprintf(stderr
, "Ok we have overlapping extents that "
5716 "aren't completely covered by eachother, this "
5717 "is going to require more careful thought. "
5718 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5719 tmp
->start
, tmp
->nr
, good
->start
, good
->nr
);
5726 list_add_tail(&rec
->list
, &delete_list
);
5728 list_for_each_entry_safe(tmp
, n
, &rec
->dups
, list
) {
5731 list_move_tail(&tmp
->list
, &delete_list
);
5734 root
= root
->fs_info
->extent_root
;
5735 list_for_each_entry(tmp
, &delete_list
, list
) {
5736 if (tmp
->found_rec
== 0)
5738 key
.objectid
= tmp
->start
;
5739 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5740 key
.offset
= tmp
->nr
;
5742 /* Shouldn't happen but just in case */
5743 if (tmp
->metadata
) {
5744 fprintf(stderr
, "Well this shouldn't happen, extent "
5745 "record overlaps but is metadata? "
5746 "[%Lu, %Lu]\n", tmp
->start
, tmp
->nr
);
5750 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
5756 ret
= btrfs_del_item(trans
, root
, path
);
5759 btrfs_release_path(path
);
5764 while (!list_empty(&delete_list
)) {
5765 tmp
= list_entry(delete_list
.next
, struct extent_record
, list
);
5766 list_del_init(&tmp
->list
);
5772 while (!list_empty(&rec
->dups
)) {
5773 tmp
= list_entry(rec
->dups
.next
, struct extent_record
, list
);
5774 list_del_init(&tmp
->list
);
5778 btrfs_free_path(path
);
5780 if (!ret
&& !nr_del
)
5781 rec
->num_duplicates
= 0;
5783 return ret
? ret
: nr_del
;
5786 static int find_possible_backrefs(struct btrfs_trans_handle
*trans
,
5787 struct btrfs_fs_info
*info
,
5788 struct btrfs_path
*path
,
5789 struct cache_tree
*extent_cache
,
5790 struct extent_record
*rec
)
5792 struct btrfs_root
*root
;
5793 struct extent_backref
*back
;
5794 struct data_backref
*dback
;
5795 struct cache_extent
*cache
;
5796 struct btrfs_file_extent_item
*fi
;
5797 struct btrfs_key key
;
5801 list_for_each_entry(back
, &rec
->backrefs
, list
) {
5802 /* Don't care about full backrefs (poor unloved backrefs) */
5803 if (back
->full_backref
|| !back
->is_data
)
5806 dback
= (struct data_backref
*)back
;
5808 /* We found this one, we don't need to do a lookup */
5809 if (dback
->found_ref
)
5812 key
.objectid
= dback
->root
;
5813 key
.type
= BTRFS_ROOT_ITEM_KEY
;
5814 key
.offset
= (u64
)-1;
5816 root
= btrfs_read_fs_root(info
, &key
);
5818 /* No root, definitely a bad ref, skip */
5819 if (IS_ERR(root
) && PTR_ERR(root
) == -ENOENT
)
5821 /* Other err, exit */
5823 return PTR_ERR(root
);
5825 key
.objectid
= dback
->owner
;
5826 key
.type
= BTRFS_EXTENT_DATA_KEY
;
5827 key
.offset
= dback
->offset
;
5828 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5830 btrfs_release_path(path
);
5833 /* Didn't find it, we can carry on */
5838 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
5839 struct btrfs_file_extent_item
);
5840 bytenr
= btrfs_file_extent_disk_bytenr(path
->nodes
[0], fi
);
5841 bytes
= btrfs_file_extent_disk_num_bytes(path
->nodes
[0], fi
);
5842 btrfs_release_path(path
);
5843 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
5845 struct extent_record
*tmp
;
5846 tmp
= container_of(cache
, struct extent_record
, cache
);
5849 * If we found an extent record for the bytenr for this
5850 * particular backref then we can't add it to our
5851 * current extent record. We only want to add backrefs
5852 * that don't have a corresponding extent item in the
5853 * extent tree since they likely belong to this record
5854 * and we need to fix it if it doesn't match bytenrs.
5860 dback
->found_ref
+= 1;
5861 dback
->disk_bytenr
= bytenr
;
5862 dback
->bytes
= bytes
;
5865 * Set this so the verify backref code knows not to trust the
5866 * values in this backref.
5875 * when an incorrect extent item is found, this will delete
5876 * all of the existing entries for it and recreate them
5877 * based on what the tree scan found.
5879 static int fixup_extent_refs(struct btrfs_trans_handle
*trans
,
5880 struct btrfs_fs_info
*info
,
5881 struct cache_tree
*extent_cache
,
5882 struct extent_record
*rec
)
5885 struct btrfs_path
*path
;
5886 struct list_head
*cur
= rec
->backrefs
.next
;
5887 struct cache_extent
*cache
;
5888 struct extent_backref
*back
;
5893 * remember our flags for recreating the extent.
5894 * FIXME, if we have cleared extent tree, we can not
5895 * lookup extent info in extent tree.
5897 if (!init_extent_tree
) {
5898 ret
= btrfs_lookup_extent_info(NULL
, info
->extent_root
,
5899 rec
->start
, rec
->max_size
,
5900 rec
->metadata
, NULL
, &flags
);
5907 path
= btrfs_alloc_path();
5911 if (rec
->refs
!= rec
->extent_item_refs
&& !rec
->metadata
) {
5913 * Sometimes the backrefs themselves are so broken they don't
5914 * get attached to any meaningful rec, so first go back and
5915 * check any of our backrefs that we couldn't find and throw
5916 * them into the list if we find the backref so that
5917 * verify_backrefs can figure out what to do.
5919 ret
= find_possible_backrefs(trans
, info
, path
, extent_cache
,
5925 /* step one, make sure all of the backrefs agree */
5926 ret
= verify_backrefs(trans
, info
, path
, rec
);
5930 /* step two, delete all the existing records */
5931 ret
= delete_extent_records(trans
, info
->extent_root
, path
,
5932 rec
->start
, rec
->max_size
);
5937 /* was this block corrupt? If so, don't add references to it */
5938 cache
= lookup_cache_extent(info
->corrupt_blocks
,
5939 rec
->start
, rec
->max_size
);
5945 /* step three, recreate all the refs we did find */
5946 while(cur
!= &rec
->backrefs
) {
5947 back
= list_entry(cur
, struct extent_backref
, list
);
5951 * if we didn't find any references, don't create a
5954 if (!back
->found_ref
)
5957 ret
= record_extent(trans
, info
, path
, rec
, back
, allocated
, flags
);
5964 btrfs_free_path(path
);
5968 /* right now we only prune from the extent allocation tree */
5969 static int prune_one_block(struct btrfs_trans_handle
*trans
,
5970 struct btrfs_fs_info
*info
,
5971 struct btrfs_corrupt_block
*corrupt
)
5974 struct btrfs_path path
;
5975 struct extent_buffer
*eb
;
5979 int level
= corrupt
->level
+ 1;
5981 btrfs_init_path(&path
);
5983 /* we want to stop at the parent to our busted block */
5984 path
.lowest_level
= level
;
5986 ret
= btrfs_search_slot(trans
, info
->extent_root
,
5987 &corrupt
->key
, &path
, -1, 1);
5992 eb
= path
.nodes
[level
];
5999 * hopefully the search gave us the block we want to prune,
6000 * lets try that first
6002 slot
= path
.slots
[level
];
6003 found
= btrfs_node_blockptr(eb
, slot
);
6004 if (found
== corrupt
->cache
.start
)
6007 nritems
= btrfs_header_nritems(eb
);
6009 /* the search failed, lets scan this node and hope we find it */
6010 for (slot
= 0; slot
< nritems
; slot
++) {
6011 found
= btrfs_node_blockptr(eb
, slot
);
6012 if (found
== corrupt
->cache
.start
)
6016 * we couldn't find the bad block. TODO, search all the nodes for pointers
6019 if (eb
== info
->extent_root
->node
) {
6024 btrfs_release_path(&path
);
6029 printk("deleting pointer to block %Lu\n", corrupt
->cache
.start
);
6030 ret
= btrfs_del_ptr(trans
, info
->extent_root
, &path
, level
, slot
);
6033 btrfs_release_path(&path
);
6037 static int prune_corrupt_blocks(struct btrfs_trans_handle
*trans
,
6038 struct btrfs_fs_info
*info
)
6040 struct cache_extent
*cache
;
6041 struct btrfs_corrupt_block
*corrupt
;
6043 cache
= search_cache_extent(info
->corrupt_blocks
, 0);
6047 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
6048 prune_one_block(trans
, info
, corrupt
);
6049 cache
= next_cache_extent(cache
);
6054 static void free_corrupt_block(struct cache_extent
*cache
)
6056 struct btrfs_corrupt_block
*corrupt
;
6058 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
6062 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks
, free_corrupt_block
);
6064 static void reset_cached_block_groups(struct btrfs_fs_info
*fs_info
)
6066 struct btrfs_block_group_cache
*cache
;
6071 ret
= find_first_extent_bit(&fs_info
->free_space_cache
, 0,
6072 &start
, &end
, EXTENT_DIRTY
);
6075 clear_extent_dirty(&fs_info
->free_space_cache
, start
, end
,
6081 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
6086 start
= cache
->key
.objectid
+ cache
->key
.offset
;
6090 static int check_extent_refs(struct btrfs_trans_handle
*trans
,
6091 struct btrfs_root
*root
,
6092 struct cache_tree
*extent_cache
)
6094 struct extent_record
*rec
;
6095 struct cache_extent
*cache
;
6103 * if we're doing a repair, we have to make sure
6104 * we don't allocate from the problem extents.
6105 * In the worst case, this will be all the
6108 cache
= search_cache_extent(extent_cache
, 0);
6110 rec
= container_of(cache
, struct extent_record
, cache
);
6111 btrfs_pin_extent(root
->fs_info
,
6112 rec
->start
, rec
->max_size
);
6113 cache
= next_cache_extent(cache
);
6116 /* pin down all the corrupted blocks too */
6117 cache
= search_cache_extent(root
->fs_info
->corrupt_blocks
, 0);
6119 btrfs_pin_extent(root
->fs_info
,
6120 cache
->start
, cache
->size
);
6121 cache
= next_cache_extent(cache
);
6123 prune_corrupt_blocks(trans
, root
->fs_info
);
6124 reset_cached_block_groups(root
->fs_info
);
6128 * We need to delete any duplicate entries we find first otherwise we
6129 * could mess up the extent tree when we have backrefs that actually
6130 * belong to a different extent item and not the weird duplicate one.
6132 while (repair
&& !list_empty(&duplicate_extents
)) {
6133 rec
= list_entry(duplicate_extents
.next
, struct extent_record
,
6135 list_del_init(&rec
->list
);
6137 /* Sometimes we can find a backref before we find an actual
6138 * extent, so we need to process it a little bit to see if there
6139 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
6140 * if this is a backref screwup. If we need to delete stuff
6141 * process_duplicates() will return 0, otherwise it will return
6144 if (process_duplicates(root
, extent_cache
, rec
))
6146 ret
= delete_duplicate_records(trans
, root
, rec
);
6150 * delete_duplicate_records will return the number of entries
6151 * deleted, so if it's greater than 0 then we know we actually
6152 * did something and we need to remove.
6163 cache
= search_cache_extent(extent_cache
, 0);
6166 rec
= container_of(cache
, struct extent_record
, cache
);
6167 if (rec
->num_duplicates
) {
6168 fprintf(stderr
, "extent item %llu has multiple extent "
6169 "items\n", (unsigned long long)rec
->start
);
6173 if (rec
->refs
!= rec
->extent_item_refs
) {
6174 fprintf(stderr
, "ref mismatch on [%llu %llu] ",
6175 (unsigned long long)rec
->start
,
6176 (unsigned long long)rec
->nr
);
6177 fprintf(stderr
, "extent item %llu, found %llu\n",
6178 (unsigned long long)rec
->extent_item_refs
,
6179 (unsigned long long)rec
->refs
);
6180 if (!fixed
&& repair
) {
6181 ret
= fixup_extent_refs(trans
, root
->fs_info
,
6190 if (all_backpointers_checked(rec
, 1)) {
6191 fprintf(stderr
, "backpointer mismatch on [%llu %llu]\n",
6192 (unsigned long long)rec
->start
,
6193 (unsigned long long)rec
->nr
);
6195 if (!fixed
&& repair
) {
6196 ret
= fixup_extent_refs(trans
, root
->fs_info
,
6205 if (!rec
->owner_ref_checked
) {
6206 fprintf(stderr
, "owner ref check failed [%llu %llu]\n",
6207 (unsigned long long)rec
->start
,
6208 (unsigned long long)rec
->nr
);
6209 if (!fixed
&& repair
) {
6210 ret
= fixup_extent_refs(trans
, root
->fs_info
,
6219 remove_cache_extent(extent_cache
, cache
);
6220 free_all_extent_backrefs(rec
);
6225 if (ret
&& ret
!= -EAGAIN
) {
6226 fprintf(stderr
, "failed to repair damaged filesystem, aborting\n");
6229 btrfs_fix_block_accounting(trans
, root
);
6232 fprintf(stderr
, "repaired damaged extent references\n");
6238 u64
calc_stripe_length(u64 type
, u64 length
, int num_stripes
)
6242 if (type
& BTRFS_BLOCK_GROUP_RAID0
) {
6243 stripe_size
= length
;
6244 stripe_size
/= num_stripes
;
6245 } else if (type
& BTRFS_BLOCK_GROUP_RAID10
) {
6246 stripe_size
= length
* 2;
6247 stripe_size
/= num_stripes
;
6248 } else if (type
& BTRFS_BLOCK_GROUP_RAID5
) {
6249 stripe_size
= length
;
6250 stripe_size
/= (num_stripes
- 1);
6251 } else if (type
& BTRFS_BLOCK_GROUP_RAID6
) {
6252 stripe_size
= length
;
6253 stripe_size
/= (num_stripes
- 2);
6255 stripe_size
= length
;
6260 static int check_chunk_refs(struct chunk_record
*chunk_rec
,
6261 struct block_group_tree
*block_group_cache
,
6262 struct device_extent_tree
*dev_extent_cache
,
6265 struct cache_extent
*block_group_item
;
6266 struct block_group_record
*block_group_rec
;
6267 struct cache_extent
*dev_extent_item
;
6268 struct device_extent_record
*dev_extent_rec
;
6275 block_group_item
= lookup_cache_extent(&block_group_cache
->tree
,
6278 if (block_group_item
) {
6279 block_group_rec
= container_of(block_group_item
,
6280 struct block_group_record
,
6282 if (chunk_rec
->length
!= block_group_rec
->offset
||
6283 chunk_rec
->offset
!= block_group_rec
->objectid
||
6284 chunk_rec
->type_flags
!= block_group_rec
->flags
) {
6287 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
6288 chunk_rec
->objectid
,
6293 chunk_rec
->type_flags
,
6294 block_group_rec
->objectid
,
6295 block_group_rec
->type
,
6296 block_group_rec
->offset
,
6297 block_group_rec
->offset
,
6298 block_group_rec
->objectid
,
6299 block_group_rec
->flags
);
6302 list_del_init(&block_group_rec
->list
);
6303 chunk_rec
->bg_rec
= block_group_rec
;
6308 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
6309 chunk_rec
->objectid
,
6314 chunk_rec
->type_flags
);
6318 length
= calc_stripe_length(chunk_rec
->type_flags
, chunk_rec
->length
,
6319 chunk_rec
->num_stripes
);
6320 for (i
= 0; i
< chunk_rec
->num_stripes
; ++i
) {
6321 devid
= chunk_rec
->stripes
[i
].devid
;
6322 offset
= chunk_rec
->stripes
[i
].offset
;
6323 dev_extent_item
= lookup_cache_extent2(&dev_extent_cache
->tree
,
6324 devid
, offset
, length
);
6325 if (dev_extent_item
) {
6326 dev_extent_rec
= container_of(dev_extent_item
,
6327 struct device_extent_record
,
6329 if (dev_extent_rec
->objectid
!= devid
||
6330 dev_extent_rec
->offset
!= offset
||
6331 dev_extent_rec
->chunk_offset
!= chunk_rec
->offset
||
6332 dev_extent_rec
->length
!= length
) {
6335 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
6336 chunk_rec
->objectid
,
6339 chunk_rec
->stripes
[i
].devid
,
6340 chunk_rec
->stripes
[i
].offset
,
6341 dev_extent_rec
->objectid
,
6342 dev_extent_rec
->offset
,
6343 dev_extent_rec
->length
);
6346 list_move(&dev_extent_rec
->chunk_list
,
6347 &chunk_rec
->dextents
);
6352 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
6353 chunk_rec
->objectid
,
6356 chunk_rec
->stripes
[i
].devid
,
6357 chunk_rec
->stripes
[i
].offset
);
6364 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
6365 int check_chunks(struct cache_tree
*chunk_cache
,
6366 struct block_group_tree
*block_group_cache
,
6367 struct device_extent_tree
*dev_extent_cache
,
6368 struct list_head
*good
, struct list_head
*bad
, int silent
)
6370 struct cache_extent
*chunk_item
;
6371 struct chunk_record
*chunk_rec
;
6372 struct block_group_record
*bg_rec
;
6373 struct device_extent_record
*dext_rec
;
6377 chunk_item
= first_cache_extent(chunk_cache
);
6378 while (chunk_item
) {
6379 chunk_rec
= container_of(chunk_item
, struct chunk_record
,
6381 err
= check_chunk_refs(chunk_rec
, block_group_cache
,
6382 dev_extent_cache
, silent
);
6386 list_add_tail(&chunk_rec
->list
, bad
);
6389 list_add_tail(&chunk_rec
->list
, good
);
6392 chunk_item
= next_cache_extent(chunk_item
);
6395 list_for_each_entry(bg_rec
, &block_group_cache
->block_groups
, list
) {
6398 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
6406 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_chunk_orphans
,
6410 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
6421 static int check_device_used(struct device_record
*dev_rec
,
6422 struct device_extent_tree
*dext_cache
)
6424 struct cache_extent
*cache
;
6425 struct device_extent_record
*dev_extent_rec
;
6428 cache
= search_cache_extent2(&dext_cache
->tree
, dev_rec
->devid
, 0);
6430 dev_extent_rec
= container_of(cache
,
6431 struct device_extent_record
,
6433 if (dev_extent_rec
->objectid
!= dev_rec
->devid
)
6436 list_del_init(&dev_extent_rec
->device_list
);
6437 total_byte
+= dev_extent_rec
->length
;
6438 cache
= next_cache_extent(cache
);
6441 if (total_byte
!= dev_rec
->byte_used
) {
6443 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
6444 total_byte
, dev_rec
->byte_used
, dev_rec
->objectid
,
6445 dev_rec
->type
, dev_rec
->offset
);
6452 /* check btrfs_dev_item -> btrfs_dev_extent */
6453 static int check_devices(struct rb_root
*dev_cache
,
6454 struct device_extent_tree
*dev_extent_cache
)
6456 struct rb_node
*dev_node
;
6457 struct device_record
*dev_rec
;
6458 struct device_extent_record
*dext_rec
;
6462 dev_node
= rb_first(dev_cache
);
6464 dev_rec
= container_of(dev_node
, struct device_record
, node
);
6465 err
= check_device_used(dev_rec
, dev_extent_cache
);
6469 dev_node
= rb_next(dev_node
);
6471 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_device_orphans
,
6474 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
6475 dext_rec
->objectid
, dext_rec
->offset
, dext_rec
->length
);
6482 static int check_chunks_and_extents(struct btrfs_root
*root
)
6484 struct rb_root dev_cache
;
6485 struct cache_tree chunk_cache
;
6486 struct block_group_tree block_group_cache
;
6487 struct device_extent_tree dev_extent_cache
;
6488 struct cache_tree extent_cache
;
6489 struct cache_tree seen
;
6490 struct cache_tree pending
;
6491 struct cache_tree reada
;
6492 struct cache_tree nodes
;
6493 struct cache_tree corrupt_blocks
;
6494 struct btrfs_path path
;
6495 struct btrfs_key key
;
6496 struct btrfs_key found_key
;
6499 struct block_info
*bits
;
6501 struct extent_buffer
*leaf
;
6502 struct btrfs_trans_handle
*trans
= NULL
;
6504 struct btrfs_root_item ri
;
6505 struct list_head dropping_trees
;
6507 dev_cache
= RB_ROOT
;
6508 cache_tree_init(&chunk_cache
);
6509 block_group_tree_init(&block_group_cache
);
6510 device_extent_tree_init(&dev_extent_cache
);
6512 cache_tree_init(&extent_cache
);
6513 cache_tree_init(&seen
);
6514 cache_tree_init(&pending
);
6515 cache_tree_init(&nodes
);
6516 cache_tree_init(&reada
);
6517 cache_tree_init(&corrupt_blocks
);
6518 INIT_LIST_HEAD(&dropping_trees
);
6521 trans
= btrfs_start_transaction(root
, 1);
6522 if (IS_ERR(trans
)) {
6523 fprintf(stderr
, "Error starting transaction\n");
6524 return PTR_ERR(trans
);
6526 root
->fs_info
->fsck_extent_cache
= &extent_cache
;
6527 root
->fs_info
->free_extent_hook
= free_extent_hook
;
6528 root
->fs_info
->corrupt_blocks
= &corrupt_blocks
;
6532 bits
= malloc(bits_nr
* sizeof(struct block_info
));
6539 add_root_to_pending(root
->fs_info
->tree_root
->node
,
6540 &extent_cache
, &pending
, &seen
, &nodes
,
6541 &root
->fs_info
->tree_root
->root_key
);
6543 add_root_to_pending(root
->fs_info
->chunk_root
->node
,
6544 &extent_cache
, &pending
, &seen
, &nodes
,
6545 &root
->fs_info
->chunk_root
->root_key
);
6547 btrfs_init_path(&path
);
6550 btrfs_set_key_type(&key
, BTRFS_ROOT_ITEM_KEY
);
6551 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
,
6556 leaf
= path
.nodes
[0];
6557 slot
= path
.slots
[0];
6558 if (slot
>= btrfs_header_nritems(path
.nodes
[0])) {
6559 ret
= btrfs_next_leaf(root
, &path
);
6562 leaf
= path
.nodes
[0];
6563 slot
= path
.slots
[0];
6565 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
6566 if (btrfs_key_type(&found_key
) == BTRFS_ROOT_ITEM_KEY
) {
6567 unsigned long offset
;
6568 struct extent_buffer
*buf
;
6570 offset
= btrfs_item_ptr_offset(leaf
, path
.slots
[0]);
6571 read_extent_buffer(leaf
, &ri
, offset
, sizeof(ri
));
6572 if (btrfs_disk_key_objectid(&ri
.drop_progress
) == 0) {
6573 buf
= read_tree_block(root
->fs_info
->tree_root
,
6574 btrfs_root_bytenr(&ri
),
6575 btrfs_level_size(root
,
6576 btrfs_root_level(&ri
)),
6582 add_root_to_pending(buf
, &extent_cache
,
6583 &pending
, &seen
, &nodes
,
6585 free_extent_buffer(buf
);
6587 struct dropping_root_item_record
*dri_rec
;
6588 dri_rec
= malloc(sizeof(*dri_rec
));
6593 memcpy(&dri_rec
->ri
, &ri
, sizeof(ri
));
6594 memcpy(&dri_rec
->found_key
, &found_key
,
6596 list_add_tail(&dri_rec
->list
, &dropping_trees
);
6601 btrfs_release_path(&path
);
6603 ret
= run_next_block(trans
, root
, bits
, bits_nr
, &last
,
6604 &pending
, &seen
, &reada
, &nodes
,
6605 &extent_cache
, &chunk_cache
, &dev_cache
,
6606 &block_group_cache
, &dev_extent_cache
,
6612 while (!list_empty(&dropping_trees
)) {
6613 struct dropping_root_item_record
*rec
;
6614 struct extent_buffer
*buf
;
6615 rec
= list_entry(dropping_trees
.next
,
6616 struct dropping_root_item_record
, list
);
6622 buf
= read_tree_block(root
->fs_info
->tree_root
,
6623 btrfs_root_bytenr(&rec
->ri
),
6624 btrfs_level_size(root
,
6625 btrfs_root_level(&rec
->ri
)), 0);
6630 add_root_to_pending(buf
, &extent_cache
, &pending
,
6631 &seen
, &nodes
, &rec
->found_key
);
6633 ret
= run_next_block(trans
, root
, bits
, bits_nr
, &last
,
6634 &pending
, &seen
, &reada
,
6635 &nodes
, &extent_cache
,
6636 &chunk_cache
, &dev_cache
,
6643 free_extent_buffer(buf
);
6644 list_del(&rec
->list
);
6649 ret
= check_extent_refs(trans
, root
, &extent_cache
);
6650 if (ret
== -EAGAIN
) {
6651 ret
= btrfs_commit_transaction(trans
, root
);
6655 trans
= btrfs_start_transaction(root
, 1);
6656 if (IS_ERR(trans
)) {
6657 ret
= PTR_ERR(trans
);
6661 free_corrupt_blocks_tree(root
->fs_info
->corrupt_blocks
);
6662 free_extent_cache_tree(&seen
);
6663 free_extent_cache_tree(&pending
);
6664 free_extent_cache_tree(&reada
);
6665 free_extent_cache_tree(&nodes
);
6666 free_chunk_cache_tree(&chunk_cache
);
6667 free_block_group_tree(&block_group_cache
);
6668 free_device_cache_tree(&dev_cache
);
6669 free_device_extent_tree(&dev_extent_cache
);
6670 free_extent_record_cache(root
->fs_info
, &extent_cache
);
6674 err
= check_chunks(&chunk_cache
, &block_group_cache
,
6675 &dev_extent_cache
, NULL
, NULL
, 0);
6679 err
= check_devices(&dev_cache
, &dev_extent_cache
);
6685 err
= btrfs_commit_transaction(trans
, root
);
6690 free_corrupt_blocks_tree(root
->fs_info
->corrupt_blocks
);
6691 root
->fs_info
->fsck_extent_cache
= NULL
;
6692 root
->fs_info
->free_extent_hook
= NULL
;
6693 root
->fs_info
->corrupt_blocks
= NULL
;
6696 free_chunk_cache_tree(&chunk_cache
);
6697 free_device_cache_tree(&dev_cache
);
6698 free_block_group_tree(&block_group_cache
);
6699 free_device_extent_tree(&dev_extent_cache
);
6700 free_extent_cache_tree(&seen
);
6701 free_extent_cache_tree(&pending
);
6702 free_extent_cache_tree(&reada
);
6703 free_extent_cache_tree(&nodes
);
6707 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle
*trans
,
6708 struct btrfs_root
*root
, int overwrite
)
6710 struct extent_buffer
*c
;
6711 struct extent_buffer
*old
= root
->node
;
6714 struct btrfs_disk_key disk_key
= {0,0,0};
6720 extent_buffer_get(c
);
6723 c
= btrfs_alloc_free_block(trans
, root
,
6724 btrfs_level_size(root
, 0),
6725 root
->root_key
.objectid
,
6726 &disk_key
, level
, 0, 0);
6729 extent_buffer_get(c
);
6733 memset_extent_buffer(c
, 0, 0, sizeof(struct btrfs_header
));
6734 btrfs_set_header_level(c
, level
);
6735 btrfs_set_header_bytenr(c
, c
->start
);
6736 btrfs_set_header_generation(c
, trans
->transid
);
6737 btrfs_set_header_backref_rev(c
, BTRFS_MIXED_BACKREF_REV
);
6738 btrfs_set_header_owner(c
, root
->root_key
.objectid
);
6740 write_extent_buffer(c
, root
->fs_info
->fsid
,
6741 btrfs_header_fsid(), BTRFS_FSID_SIZE
);
6743 write_extent_buffer(c
, root
->fs_info
->chunk_tree_uuid
,
6744 btrfs_header_chunk_tree_uuid(c
),
6747 btrfs_mark_buffer_dirty(c
);
6749 * this case can happen in the following case:
6751 * 1.overwrite previous root.
6753 * 2.reinit reloc data root, this is because we skip pin
6754 * down reloc data tree before which means we can allocate
6755 * same block bytenr here.
6757 if (old
->start
== c
->start
) {
6758 btrfs_set_root_generation(&root
->root_item
,
6760 root
->root_item
.level
= btrfs_header_level(root
->node
);
6761 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
6762 &root
->root_key
, &root
->root_item
);
6764 free_extent_buffer(c
);
6768 free_extent_buffer(old
);
6770 add_root_to_dirty_list(root
);
6774 static int pin_down_tree_blocks(struct btrfs_fs_info
*fs_info
,
6775 struct extent_buffer
*eb
, int tree_root
)
6777 struct extent_buffer
*tmp
;
6778 struct btrfs_root_item
*ri
;
6779 struct btrfs_key key
;
6782 int level
= btrfs_header_level(eb
);
6788 * If we have pinned this block before, don't pin it again.
6789 * This can not only avoid forever loop with broken filesystem
6790 * but also give us some speedups.
6792 if (test_range_bit(&fs_info
->pinned_extents
, eb
->start
,
6793 eb
->start
+ eb
->len
- 1, EXTENT_DIRTY
, 0))
6796 btrfs_pin_extent(fs_info
, eb
->start
, eb
->len
);
6798 leafsize
= btrfs_super_leafsize(fs_info
->super_copy
);
6799 nritems
= btrfs_header_nritems(eb
);
6800 for (i
= 0; i
< nritems
; i
++) {
6802 btrfs_item_key_to_cpu(eb
, &key
, i
);
6803 if (key
.type
!= BTRFS_ROOT_ITEM_KEY
)
6805 /* Skip the extent root and reloc roots */
6806 if (key
.objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
6807 key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
||
6808 key
.objectid
== BTRFS_DATA_RELOC_TREE_OBJECTID
)
6810 ri
= btrfs_item_ptr(eb
, i
, struct btrfs_root_item
);
6811 bytenr
= btrfs_disk_root_bytenr(eb
, ri
);
6814 * If at any point we start needing the real root we
6815 * will have to build a stump root for the root we are
6816 * in, but for now this doesn't actually use the root so
6817 * just pass in extent_root.
6819 tmp
= read_tree_block(fs_info
->extent_root
, bytenr
,
6822 fprintf(stderr
, "Error reading root block\n");
6825 ret
= pin_down_tree_blocks(fs_info
, tmp
, 0);
6826 free_extent_buffer(tmp
);
6830 bytenr
= btrfs_node_blockptr(eb
, i
);
6832 /* If we aren't the tree root don't read the block */
6833 if (level
== 1 && !tree_root
) {
6834 btrfs_pin_extent(fs_info
, bytenr
, leafsize
);
6838 tmp
= read_tree_block(fs_info
->extent_root
, bytenr
,
6841 fprintf(stderr
, "Error reading tree block\n");
6844 ret
= pin_down_tree_blocks(fs_info
, tmp
, tree_root
);
6845 free_extent_buffer(tmp
);
6854 static int pin_metadata_blocks(struct btrfs_fs_info
*fs_info
)
6858 ret
= pin_down_tree_blocks(fs_info
, fs_info
->chunk_root
->node
, 0);
6862 return pin_down_tree_blocks(fs_info
, fs_info
->tree_root
->node
, 1);
6865 static int reset_block_groups(struct btrfs_fs_info
*fs_info
)
6867 struct btrfs_block_group_cache
*cache
;
6868 struct btrfs_path
*path
;
6869 struct extent_buffer
*leaf
;
6870 struct btrfs_chunk
*chunk
;
6871 struct btrfs_key key
;
6875 path
= btrfs_alloc_path();
6880 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
6883 ret
= btrfs_search_slot(NULL
, fs_info
->chunk_root
, &key
, path
, 0, 0);
6885 btrfs_free_path(path
);
6890 * We do this in case the block groups were screwed up and had alloc
6891 * bits that aren't actually set on the chunks. This happens with
6892 * restored images every time and could happen in real life I guess.
6894 fs_info
->avail_data_alloc_bits
= 0;
6895 fs_info
->avail_metadata_alloc_bits
= 0;
6896 fs_info
->avail_system_alloc_bits
= 0;
6898 /* First we need to create the in-memory block groups */
6900 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
6901 ret
= btrfs_next_leaf(fs_info
->chunk_root
, path
);
6903 btrfs_free_path(path
);
6911 leaf
= path
->nodes
[0];
6912 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
6913 if (key
.type
!= BTRFS_CHUNK_ITEM_KEY
) {
6918 chunk
= btrfs_item_ptr(leaf
, path
->slots
[0],
6919 struct btrfs_chunk
);
6920 btrfs_add_block_group(fs_info
, 0,
6921 btrfs_chunk_type(leaf
, chunk
),
6922 key
.objectid
, key
.offset
,
6923 btrfs_chunk_length(leaf
, chunk
));
6924 set_extent_dirty(&fs_info
->free_space_cache
, key
.offset
,
6925 key
.offset
+ btrfs_chunk_length(leaf
, chunk
),
6931 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
6935 start
= cache
->key
.objectid
+ cache
->key
.offset
;
6938 btrfs_free_path(path
);
6942 static int reset_balance(struct btrfs_trans_handle
*trans
,
6943 struct btrfs_fs_info
*fs_info
)
6945 struct btrfs_root
*root
= fs_info
->tree_root
;
6946 struct btrfs_path
*path
;
6947 struct extent_buffer
*leaf
;
6948 struct btrfs_key key
;
6949 int del_slot
, del_nr
= 0;
6953 path
= btrfs_alloc_path();
6957 key
.objectid
= BTRFS_BALANCE_OBJECTID
;
6958 key
.type
= BTRFS_BALANCE_ITEM_KEY
;
6961 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
6966 goto reinit_data_reloc
;
6971 ret
= btrfs_del_item(trans
, root
, path
);
6974 btrfs_release_path(path
);
6976 key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
6977 key
.type
= BTRFS_ROOT_ITEM_KEY
;
6980 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
6984 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
6989 ret
= btrfs_del_items(trans
, root
, path
,
6996 btrfs_release_path(path
);
6999 ret
= btrfs_search_slot(trans
, root
, &key
, path
,
7006 leaf
= path
->nodes
[0];
7007 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
7008 if (key
.objectid
> BTRFS_TREE_RELOC_OBJECTID
)
7010 if (key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
7015 del_slot
= path
->slots
[0];
7024 ret
= btrfs_del_items(trans
, root
, path
, del_slot
, del_nr
);
7028 btrfs_release_path(path
);
7031 key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
7032 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7033 key
.offset
= (u64
)-1;
7034 root
= btrfs_read_fs_root(fs_info
, &key
);
7036 fprintf(stderr
, "Error reading data reloc tree\n");
7037 return PTR_ERR(root
);
7039 record_root_in_trans(trans
, root
);
7040 ret
= btrfs_fsck_reinit_root(trans
, root
, 0);
7043 ret
= btrfs_make_root_dir(trans
, root
, BTRFS_FIRST_FREE_OBJECTID
);
7045 btrfs_free_path(path
);
7049 static int reinit_extent_tree(struct btrfs_trans_handle
*trans
,
7050 struct btrfs_fs_info
*fs_info
)
7056 * The only reason we don't do this is because right now we're just
7057 * walking the trees we find and pinning down their bytes, we don't look
7058 * at any of the leaves. In order to do mixed groups we'd have to check
7059 * the leaves of any fs roots and pin down the bytes for any file
7060 * extents we find. Not hard but why do it if we don't have to?
7062 if (btrfs_fs_incompat(fs_info
, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS
)) {
7063 fprintf(stderr
, "We don't support re-initing the extent tree "
7064 "for mixed block groups yet, please notify a btrfs "
7065 "developer you want to do this so they can add this "
7066 "functionality.\n");
7071 * first we need to walk all of the trees except the extent tree and pin
7072 * down the bytes that are in use so we don't overwrite any existing
7075 ret
= pin_metadata_blocks(fs_info
);
7077 fprintf(stderr
, "error pinning down used bytes\n");
7082 * Need to drop all the block groups since we're going to recreate all
7085 btrfs_free_block_groups(fs_info
);
7086 ret
= reset_block_groups(fs_info
);
7088 fprintf(stderr
, "error resetting the block groups\n");
7092 /* Ok we can allocate now, reinit the extent root */
7093 ret
= btrfs_fsck_reinit_root(trans
, fs_info
->extent_root
, 0);
7095 fprintf(stderr
, "extent root initialization failed\n");
7097 * When the transaction code is updated we should end the
7098 * transaction, but for now progs only knows about commit so
7099 * just return an error.
7105 * Now we have all the in-memory block groups setup so we can make
7106 * allocations properly, and the metadata we care about is safe since we
7107 * pinned all of it above.
7110 struct btrfs_block_group_cache
*cache
;
7112 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
7115 start
= cache
->key
.objectid
+ cache
->key
.offset
;
7116 ret
= btrfs_insert_item(trans
, fs_info
->extent_root
,
7117 &cache
->key
, &cache
->item
,
7118 sizeof(cache
->item
));
7120 fprintf(stderr
, "Error adding block group\n");
7123 btrfs_extent_post_op(trans
, fs_info
->extent_root
);
7126 ret
= reset_balance(trans
, fs_info
);
7128 fprintf(stderr
, "error reseting the pending balance\n");
7133 static int recow_extent_buffer(struct btrfs_root
*root
, struct extent_buffer
*eb
)
7135 struct btrfs_path
*path
;
7136 struct btrfs_trans_handle
*trans
;
7137 struct btrfs_key key
;
7140 printf("Recowing metadata block %llu\n", eb
->start
);
7141 key
.objectid
= btrfs_header_owner(eb
);
7142 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7143 key
.offset
= (u64
)-1;
7145 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
7147 fprintf(stderr
, "Couldn't find owner root %llu\n",
7149 return PTR_ERR(root
);
7152 path
= btrfs_alloc_path();
7156 trans
= btrfs_start_transaction(root
, 1);
7157 if (IS_ERR(trans
)) {
7158 btrfs_free_path(path
);
7159 return PTR_ERR(trans
);
7162 path
->lowest_level
= btrfs_header_level(eb
);
7163 if (path
->lowest_level
)
7164 btrfs_node_key_to_cpu(eb
, &key
, 0);
7166 btrfs_item_key_to_cpu(eb
, &key
, 0);
7168 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
7169 btrfs_commit_transaction(trans
, root
);
7170 btrfs_free_path(path
);
7174 static int delete_bad_item(struct btrfs_root
*root
, struct bad_item
*bad
)
7176 struct btrfs_path
*path
;
7177 struct btrfs_trans_handle
*trans
;
7178 struct btrfs_key key
;
7181 printf("Deleting bad item [%llu,%u,%llu]\n", bad
->key
.objectid
,
7182 bad
->key
.type
, bad
->key
.offset
);
7183 key
.objectid
= bad
->root_id
;
7184 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7185 key
.offset
= (u64
)-1;
7187 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
7189 fprintf(stderr
, "Couldn't find owner root %llu\n",
7191 return PTR_ERR(root
);
7194 path
= btrfs_alloc_path();
7198 trans
= btrfs_start_transaction(root
, 1);
7199 if (IS_ERR(trans
)) {
7200 btrfs_free_path(path
);
7201 return PTR_ERR(trans
);
7204 ret
= btrfs_search_slot(trans
, root
, &bad
->key
, path
, -1, 1);
7210 ret
= btrfs_del_item(trans
, root
, path
);
7212 btrfs_commit_transaction(trans
, root
);
7213 btrfs_free_path(path
);
7217 static int zero_log_tree(struct btrfs_root
*root
)
7219 struct btrfs_trans_handle
*trans
;
7222 trans
= btrfs_start_transaction(root
, 1);
7223 if (IS_ERR(trans
)) {
7224 ret
= PTR_ERR(trans
);
7227 btrfs_set_super_log_root(root
->fs_info
->super_copy
, 0);
7228 btrfs_set_super_log_root_level(root
->fs_info
->super_copy
, 0);
7229 ret
= btrfs_commit_transaction(trans
, root
);
7233 static int populate_csum(struct btrfs_trans_handle
*trans
,
7234 struct btrfs_root
*csum_root
, char *buf
, u64 start
,
7241 while (offset
< len
) {
7242 sectorsize
= csum_root
->sectorsize
;
7243 ret
= read_extent_data(csum_root
, buf
, start
+ offset
,
7247 ret
= btrfs_csum_file_block(trans
, csum_root
, start
+ len
,
7248 start
+ offset
, buf
, sectorsize
);
7251 offset
+= sectorsize
;
7256 static int fill_csum_tree(struct btrfs_trans_handle
*trans
,
7257 struct btrfs_root
*csum_root
)
7259 struct btrfs_root
*extent_root
= csum_root
->fs_info
->extent_root
;
7260 struct btrfs_path
*path
;
7261 struct btrfs_extent_item
*ei
;
7262 struct extent_buffer
*leaf
;
7264 struct btrfs_key key
;
7267 path
= btrfs_alloc_path();
7272 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
7275 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
7277 btrfs_free_path(path
);
7281 buf
= malloc(csum_root
->sectorsize
);
7283 btrfs_free_path(path
);
7288 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
7289 ret
= btrfs_next_leaf(extent_root
, path
);
7297 leaf
= path
->nodes
[0];
7299 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
7300 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
7305 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
7306 struct btrfs_extent_item
);
7307 if (!(btrfs_extent_flags(leaf
, ei
) &
7308 BTRFS_EXTENT_FLAG_DATA
)) {
7313 ret
= populate_csum(trans
, csum_root
, buf
, key
.objectid
,
7320 btrfs_free_path(path
);
7325 struct root_item_info
{
7326 /* level of the root */
7328 /* number of nodes at this level, must be 1 for a root */
7332 struct cache_extent cache_extent
;
7335 static struct cache_tree
*roots_info_cache
= NULL
;
7337 static void free_roots_info_cache(void)
7339 if (!roots_info_cache
)
7342 while (!cache_tree_empty(roots_info_cache
)) {
7343 struct cache_extent
*entry
;
7344 struct root_item_info
*rii
;
7346 entry
= first_cache_extent(roots_info_cache
);
7347 remove_cache_extent(roots_info_cache
, entry
);
7348 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
7352 free(roots_info_cache
);
7353 roots_info_cache
= NULL
;
7356 static int build_roots_info_cache(struct btrfs_fs_info
*info
)
7359 struct btrfs_key key
;
7360 struct extent_buffer
*leaf
;
7361 struct btrfs_path
*path
;
7363 if (!roots_info_cache
) {
7364 roots_info_cache
= malloc(sizeof(*roots_info_cache
));
7365 if (!roots_info_cache
)
7367 cache_tree_init(roots_info_cache
);
7370 path
= btrfs_alloc_path();
7375 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
7378 ret
= btrfs_search_slot(NULL
, info
->extent_root
, &key
, path
, 0, 0);
7381 leaf
= path
->nodes
[0];
7384 struct btrfs_key found_key
;
7385 struct btrfs_extent_item
*ei
;
7386 struct btrfs_extent_inline_ref
*iref
;
7387 int slot
= path
->slots
[0];
7392 struct cache_extent
*entry
;
7393 struct root_item_info
*rii
;
7395 if (slot
>= btrfs_header_nritems(leaf
)) {
7396 ret
= btrfs_next_leaf(info
->extent_root
, path
);
7403 leaf
= path
->nodes
[0];
7404 slot
= path
->slots
[0];
7407 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
7409 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
7410 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
)
7413 ei
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_item
);
7414 flags
= btrfs_extent_flags(leaf
, ei
);
7416 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
7417 !(flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
))
7420 if (found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
7421 iref
= (struct btrfs_extent_inline_ref
*)(ei
+ 1);
7422 level
= found_key
.offset
;
7424 struct btrfs_tree_block_info
*info
;
7426 info
= (struct btrfs_tree_block_info
*)(ei
+ 1);
7427 iref
= (struct btrfs_extent_inline_ref
*)(info
+ 1);
7428 level
= btrfs_tree_block_level(leaf
, info
);
7432 * For a root extent, it must be of the following type and the
7433 * first (and only one) iref in the item.
7435 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
7436 if (type
!= BTRFS_TREE_BLOCK_REF_KEY
)
7439 root_id
= btrfs_extent_inline_ref_offset(leaf
, iref
);
7440 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
7442 rii
= malloc(sizeof(struct root_item_info
));
7447 rii
->cache_extent
.start
= root_id
;
7448 rii
->cache_extent
.size
= 1;
7449 rii
->level
= (u8
)-1;
7450 entry
= &rii
->cache_extent
;
7451 ret
= insert_cache_extent(roots_info_cache
, entry
);
7454 rii
= container_of(entry
, struct root_item_info
,
7458 ASSERT(rii
->cache_extent
.start
== root_id
);
7459 ASSERT(rii
->cache_extent
.size
== 1);
7461 if (level
> rii
->level
|| rii
->level
== (u8
)-1) {
7463 rii
->bytenr
= found_key
.objectid
;
7464 rii
->gen
= btrfs_extent_generation(leaf
, ei
);
7465 rii
->node_count
= 1;
7466 } else if (level
== rii
->level
) {
7474 btrfs_free_path(path
);
7479 static int maybe_repair_root_item(struct btrfs_fs_info
*info
,
7480 struct btrfs_path
*path
,
7481 const struct btrfs_key
*root_key
,
7482 const int read_only_mode
)
7484 const u64 root_id
= root_key
->objectid
;
7485 struct cache_extent
*entry
;
7486 struct root_item_info
*rii
;
7487 struct btrfs_root_item ri
;
7488 unsigned long offset
;
7490 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
7493 "Error: could not find extent items for root %llu\n",
7494 root_key
->objectid
);
7498 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
7499 ASSERT(rii
->cache_extent
.start
== root_id
);
7500 ASSERT(rii
->cache_extent
.size
== 1);
7502 if (rii
->node_count
!= 1) {
7504 "Error: could not find btree root extent for root %llu\n",
7509 offset
= btrfs_item_ptr_offset(path
->nodes
[0], path
->slots
[0]);
7510 read_extent_buffer(path
->nodes
[0], &ri
, offset
, sizeof(ri
));
7512 if (btrfs_root_bytenr(&ri
) != rii
->bytenr
||
7513 btrfs_root_level(&ri
) != rii
->level
||
7514 btrfs_root_generation(&ri
) != rii
->gen
) {
7517 * If we're in repair mode but our caller told us to not update
7518 * the root item, i.e. just check if it needs to be updated, don't
7519 * print this message, since the caller will call us again shortly
7520 * for the same root item without read only mode (the caller will
7521 * open a transaction first).
7523 if (!(read_only_mode
&& repair
))
7525 "%sroot item for root %llu,"
7526 " current bytenr %llu, current gen %llu, current level %u,"
7527 " new bytenr %llu, new gen %llu, new level %u\n",
7528 (read_only_mode
? "" : "fixing "),
7530 btrfs_root_bytenr(&ri
), btrfs_root_generation(&ri
),
7531 btrfs_root_level(&ri
),
7532 rii
->bytenr
, rii
->gen
, rii
->level
);
7534 if (btrfs_root_generation(&ri
) > rii
->gen
) {
7536 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
7537 root_id
, btrfs_root_generation(&ri
), rii
->gen
);
7541 if (!read_only_mode
) {
7542 btrfs_set_root_bytenr(&ri
, rii
->bytenr
);
7543 btrfs_set_root_level(&ri
, rii
->level
);
7544 btrfs_set_root_generation(&ri
, rii
->gen
);
7545 write_extent_buffer(path
->nodes
[0], &ri
,
7546 offset
, sizeof(ri
));
7556 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
7557 * caused read-only snapshots to be corrupted if they were created at a moment
7558 * when the source subvolume/snapshot had orphan items. The issue was that the
7559 * on-disk root items became incorrect, referring to the pre orphan cleanup root
7560 * node instead of the post orphan cleanup root node.
7561 * So this function, and its callees, just detects and fixes those cases. Even
7562 * though the regression was for read-only snapshots, this function applies to
7563 * any snapshot/subvolume root.
7564 * This must be run before any other repair code - not doing it so, makes other
7565 * repair code delete or modify backrefs in the extent tree for example, which
7566 * will result in an inconsistent fs after repairing the root items.
7568 static int repair_root_items(struct btrfs_fs_info
*info
)
7570 struct btrfs_path
*path
= NULL
;
7571 struct btrfs_key key
;
7572 struct extent_buffer
*leaf
;
7573 struct btrfs_trans_handle
*trans
= NULL
;
7578 ret
= build_roots_info_cache(info
);
7582 path
= btrfs_alloc_path();
7588 key
.objectid
= BTRFS_FIRST_FREE_OBJECTID
;
7589 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7594 * Avoid opening and committing transactions if a leaf doesn't have
7595 * any root items that need to be fixed, so that we avoid rotating
7596 * backup roots unnecessarily.
7599 trans
= btrfs_start_transaction(info
->tree_root
, 1);
7600 if (IS_ERR(trans
)) {
7601 ret
= PTR_ERR(trans
);
7606 ret
= btrfs_search_slot(trans
, info
->tree_root
, &key
, path
,
7610 leaf
= path
->nodes
[0];
7613 struct btrfs_key found_key
;
7615 if (path
->slots
[0] >= btrfs_header_nritems(leaf
)) {
7616 int no_more_keys
= find_next_key(path
, &key
);
7618 btrfs_release_path(path
);
7620 ret
= btrfs_commit_transaction(trans
,
7632 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
7634 if (found_key
.type
!= BTRFS_ROOT_ITEM_KEY
)
7637 ret
= maybe_repair_root_item(info
, path
, &found_key
,
7642 if (!trans
&& repair
) {
7645 btrfs_release_path(path
);
7655 free_roots_info_cache();
7657 btrfs_free_path(path
);
7664 static struct option long_options
[] = {
7665 { "super", 1, NULL
, 's' },
7666 { "repair", 0, NULL
, 0 },
7667 { "init-csum-tree", 0, NULL
, 0 },
7668 { "init-extent-tree", 0, NULL
, 0 },
7669 { "check-data-csum", 0, NULL
, 0 },
7670 { "backup", 0, NULL
, 0 },
7671 { "subvol-extents", 1, NULL
, 'E' },
7672 { "qgroup-report", 0, NULL
, 'Q' },
7676 const char * const cmd_check_usage
[] = {
7677 "btrfs check [options] <device>",
7678 "Check an unmounted btrfs filesystem.",
7680 "-s|--super <superblock> use this superblock copy",
7681 "-b|--backup use the backup root copy",
7682 "--repair try to repair the filesystem",
7683 "--init-csum-tree create a new CRC tree",
7684 "--init-extent-tree create a new extent tree",
7685 "--check-data-csum verify checkums of data blocks",
7686 "--qgroup-report print a report on qgroup consistency",
7687 "--subvol-extents <subvolid> print subvolume extents and sharing state",
7691 int cmd_check(int argc
, char **argv
)
7693 struct cache_tree root_cache
;
7694 struct btrfs_root
*root
;
7695 struct btrfs_fs_info
*info
;
7698 char uuidbuf
[BTRFS_UUID_UNPARSED_SIZE
];
7701 int option_index
= 0;
7702 int init_csum_tree
= 0;
7703 int qgroup_report
= 0;
7704 enum btrfs_open_ctree_flags ctree_flags
= OPEN_CTREE_EXCLUSIVE
;
7708 c
= getopt_long(argc
, argv
, "as:b", long_options
,
7713 case 'a': /* ignored */ break;
7715 ctree_flags
|= OPEN_CTREE_BACKUP_ROOT
;
7718 num
= arg_strtou64(optarg
);
7719 if (num
>= BTRFS_SUPER_MIRROR_MAX
) {
7721 "ERROR: super mirror should be less than: %d\n",
7722 BTRFS_SUPER_MIRROR_MAX
);
7725 bytenr
= btrfs_sb_offset(((int)num
));
7726 printf("using SB copy %llu, bytenr %llu\n", num
,
7727 (unsigned long long)bytenr
);
7733 subvolid
= arg_strtou64(optarg
);
7737 usage(cmd_check_usage
);
7739 if (option_index
== 1) {
7740 printf("enabling repair mode\n");
7742 ctree_flags
|= OPEN_CTREE_WRITES
;
7743 } else if (option_index
== 2) {
7744 printf("Creating a new CRC tree\n");
7747 ctree_flags
|= OPEN_CTREE_WRITES
;
7748 } else if (option_index
== 3) {
7749 init_extent_tree
= 1;
7750 ctree_flags
|= (OPEN_CTREE_WRITES
|
7751 OPEN_CTREE_NO_BLOCK_GROUPS
);
7753 } else if (option_index
== 4) {
7754 check_data_csum
= 1;
7757 argc
= argc
- optind
;
7759 if (check_argc_exact(argc
, 1))
7760 usage(cmd_check_usage
);
7763 cache_tree_init(&root_cache
);
7765 if((ret
= check_mounted(argv
[optind
])) < 0) {
7766 fprintf(stderr
, "Could not check mount status: %s\n", strerror(-ret
));
7769 fprintf(stderr
, "%s is currently mounted. Aborting.\n", argv
[optind
]);
7774 /* only allow partial opening under repair mode */
7776 ctree_flags
|= OPEN_CTREE_PARTIAL
;
7778 info
= open_ctree_fs_info(argv
[optind
], bytenr
, 0, ctree_flags
);
7780 fprintf(stderr
, "Couldn't open file system\n");
7785 root
= info
->fs_root
;
7787 ret
= repair_root_items(info
);
7791 fprintf(stderr
, "Fixed %d roots.\n", ret
);
7793 } else if (ret
> 0) {
7795 "Found %d roots with an outdated root item.\n",
7798 "Please run a filesystem check with the option --repair to fix them.\n");
7804 * repair mode will force us to commit transaction which
7805 * will make us fail to load log tree when mounting.
7807 if (repair
&& btrfs_super_log_root(info
->super_copy
)) {
7808 ret
= ask_user("repair mode will force to clear out log tree, Are you sure?");
7813 ret
= zero_log_tree(root
);
7815 fprintf(stderr
, "fail to zero log tree\n");
7820 uuid_unparse(info
->super_copy
->fsid
, uuidbuf
);
7821 if (qgroup_report
) {
7822 printf("Print quota groups for %s\nUUID: %s\n", argv
[optind
],
7824 ret
= qgroup_verify_all(info
);
7826 print_qgroup_report(1);
7830 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
7831 subvolid
, argv
[optind
], uuidbuf
);
7832 ret
= print_extent_state(info
, subvolid
);
7835 printf("Checking filesystem on %s\nUUID: %s\n", argv
[optind
], uuidbuf
);
7837 if (!extent_buffer_uptodate(info
->tree_root
->node
) ||
7838 !extent_buffer_uptodate(info
->dev_root
->node
) ||
7839 !extent_buffer_uptodate(info
->chunk_root
->node
)) {
7840 fprintf(stderr
, "Critical roots corrupted, unable to fsck the FS\n");
7845 if (init_extent_tree
|| init_csum_tree
) {
7846 struct btrfs_trans_handle
*trans
;
7848 trans
= btrfs_start_transaction(info
->extent_root
, 0);
7849 if (IS_ERR(trans
)) {
7850 fprintf(stderr
, "Error starting transaction\n");
7851 ret
= PTR_ERR(trans
);
7855 if (init_extent_tree
) {
7856 printf("Creating a new extent tree\n");
7857 ret
= reinit_extent_tree(trans
, info
);
7862 if (init_csum_tree
) {
7863 fprintf(stderr
, "Reinit crc root\n");
7864 ret
= btrfs_fsck_reinit_root(trans
, info
->csum_root
, 0);
7866 fprintf(stderr
, "crc root initialization failed\n");
7871 ret
= fill_csum_tree(trans
, info
->csum_root
);
7873 fprintf(stderr
, "crc refilling failed\n");
7878 * Ok now we commit and run the normal fsck, which will add
7879 * extent entries for all of the items it finds.
7881 ret
= btrfs_commit_transaction(trans
, info
->extent_root
);
7885 if (!extent_buffer_uptodate(info
->extent_root
->node
)) {
7886 fprintf(stderr
, "Critical roots corrupted, unable to fsck the FS\n");
7890 if (!extent_buffer_uptodate(info
->csum_root
->node
)) {
7891 fprintf(stderr
, "Checksum root corrupted, rerun with --init-csum-tree option\n");
7896 fprintf(stderr
, "checking extents\n");
7897 ret
= check_chunks_and_extents(root
);
7899 fprintf(stderr
, "Errors found in extent allocation tree or chunk allocation\n");
7901 fprintf(stderr
, "checking free space cache\n");
7902 ret
= check_space_cache(root
);
7907 * We used to have to have these hole extents in between our real
7908 * extents so if we don't have this flag set we need to make sure there
7909 * are no gaps in the file extents for inodes, otherwise we can just
7910 * ignore it when this happens.
7912 no_holes
= btrfs_fs_incompat(root
->fs_info
,
7913 BTRFS_FEATURE_INCOMPAT_NO_HOLES
);
7914 fprintf(stderr
, "checking fs roots\n");
7915 ret
= check_fs_roots(root
, &root_cache
);
7919 fprintf(stderr
, "checking csums\n");
7920 ret
= check_csums(root
);
7924 fprintf(stderr
, "checking root refs\n");
7925 ret
= check_root_refs(root
, &root_cache
);
7929 while (repair
&& !list_empty(&root
->fs_info
->recow_ebs
)) {
7930 struct extent_buffer
*eb
;
7932 eb
= list_first_entry(&root
->fs_info
->recow_ebs
,
7933 struct extent_buffer
, recow
);
7934 list_del_init(&eb
->recow
);
7935 ret
= recow_extent_buffer(root
, eb
);
7940 while (!list_empty(&delete_items
)) {
7941 struct bad_item
*bad
;
7943 bad
= list_first_entry(&delete_items
, struct bad_item
, list
);
7944 list_del_init(&bad
->list
);
7946 ret
= delete_bad_item(root
, bad
);
7950 if (info
->quota_enabled
) {
7952 fprintf(stderr
, "checking quota groups\n");
7953 err
= qgroup_verify_all(info
);
7958 if (!list_empty(&root
->fs_info
->recow_ebs
)) {
7959 fprintf(stderr
, "Transid errors in file system\n");
7963 print_qgroup_report(0);
7964 if (found_old_backref
) { /*
7965 * there was a disk format change when mixed
7966 * backref was in testing tree. The old format
7967 * existed about one week.
7969 printf("\n * Found old mixed backref format. "
7970 "The old format is not supported! *"
7971 "\n * Please mount the FS in readonly mode, "
7972 "backup data and re-format the FS. *\n\n");
7975 printf("found %llu bytes used err is %d\n",
7976 (unsigned long long)bytes_used
, ret
);
7977 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes
);
7978 printf("total tree bytes: %llu\n",
7979 (unsigned long long)total_btree_bytes
);
7980 printf("total fs tree bytes: %llu\n",
7981 (unsigned long long)total_fs_tree_bytes
);
7982 printf("total extent tree bytes: %llu\n",
7983 (unsigned long long)total_extent_tree_bytes
);
7984 printf("btree space waste bytes: %llu\n",
7985 (unsigned long long)btree_space_waste
);
7986 printf("file data blocks allocated: %llu\n referenced %llu\n",
7987 (unsigned long long)data_bytes_allocated
,
7988 (unsigned long long)data_bytes_referenced
);
7989 printf("%s\n", BTRFS_BUILD_VERSION
);
7991 free_root_recs_tree(&root_cache
);