2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * The UNDO algorithm is trivial. The nominal UNDO range in the
39 * FIFO is determined by taking the first/next offset stored in
40 * the volume header. The next offset may not be correct since
41 * UNDO flushes are not required to flush the volume header, so
42 * the code also scans forward until it finds a discontinuous
45 * The UNDOs are then scanned and executed in reverse order. These
46 * UNDOs are effectively just data restorations based on HAMMER offsets.
50 * REDO records are laid down in the UNDO/REDO FIFO for nominal
51 * writes, truncations, and file extension ops. On a per-inode
52 * basis two types of REDO records are generated, REDO_WRITE
55 * Essentially the recovery block will contain UNDO records backing
56 * out partial operations and REDO records to regenerate those partial
57 * operations guaranteed by the filesystem during recovery.
59 * REDO generation is optional, and can also be started and then
60 * later stopped due to excessive write()s inbetween fsyncs, or not
61 * started at all. Because of this the recovery code must determine
62 * when REDOs are valid and when they are not. Additional records are
63 * generated to help figure it out.
65 * The REDO_TERM_WRITE and REDO_TERM_TRUNC records are generated
66 * during a flush cycle indicating which records the flush cycle
67 * has synched meta-data for, and HAMMER_REDO_SYNC is generated in
68 * each flush cycle to indicate how far back in the UNDO/REDO FIFO
69 * the recovery code must go to find the earliest applicable REDO
70 * record. Applicable REDO records can be far outside the nominal
71 * UNDO recovery range, for example if a write() lays down a REDO but
72 * the related file is not flushed for several cycles.
74 * The SYNC reference is to a point prior to the nominal UNDO FIFO
75 * range, creating an extended REDO range which must be scanned.
77 * Any REDO_WRITE/REDO_TRUNC encountered within the extended range
78 * which have no matching REDO_TERM_WRITE/REDO_TERM_TRUNC records
79 * prior to the start of the nominal UNDO range are applicable.
80 * That is, any REDO_TERM_* records in the extended range but not in
81 * the nominal undo range will mask any redo operations for prior REDO
82 * records. This is necessary because once the TERM is laid down
83 * followup operations may make additional changes to the related
84 * records but not necessarily record them as REDOs (because REDOs are
87 * REDO_TERM_WRITE/REDO_TERM_TRUNC records in the nominal UNDO range
88 * must be ignored since they represent meta-data flushes which are
89 * undone by the UNDOs in that nominal UNDO range by the recovery
90 * code. Only REDO_TERM_* records in the extended range but not
91 * in the nominal undo range are applicable.
93 * The REDO_SYNC record itself always exists in the nominal UNDO range
94 * (this is how the extended range is determined). For recovery
95 * purposes the most recent REDO_SYNC record is always used if several
98 * CRASHES DURING UNDO/REDO
100 * A crash during the UNDO phase requires no additional effort. The
101 * UNDOs will simply be re-run again. The state of the UNDO/REDO fifo
102 * remains unchanged and has no re-crash issues.
104 * A crash during the REDO phase is more complex because the REDOs
105 * run normal filesystem ops and generate additional UNDO/REDO records.
106 * REDO is disabled during REDO recovery and any SYNC records generated
107 * by flushes during REDO recovery must continue to reference the
108 * original extended range.
110 * If multiple crashes occur and the UNDO/REDO FIFO wraps, REDO recovery
111 * may become impossible. This is detected when the start of the
112 * extended range fails to have monotonically increasing sequence
113 * numbers leading into the nominal undo range.
119 * Specify the way we want to handle stage2 errors.
121 * Following values are accepted:
123 * 0 - Run redo recovery normally and fail to mount if
124 * the operation fails (default).
125 * 1 - Run redo recovery, but don't fail to mount if the
127 * 2 - Completely skip redo recovery (only for severe error
128 * conditions and/or debugging.
130 static int hammer_skip_redo
= 0;
131 TUNABLE_INT("vfs.hammer.skip_redo", &hammer_skip_redo
);
134 * Each rterm entry has a list of fifo offsets indicating termination
135 * points. These are stripped as the scan progresses.
137 typedef struct hammer_rterm_entry
{
138 struct hammer_rterm_entry
*next
;
139 hammer_off_t fifo_offset
;
140 } *hammer_rterm_entry_t
;
143 * rterm entries sorted in RB tree are indexed by objid, flags, and offset.
144 * TRUNC entries ignore the offset.
146 typedef struct hammer_rterm
{
147 RB_ENTRY(hammer_rterm
) rb_node
;
149 uint32_t redo_localization
;
151 hammer_off_t redo_offset
;
152 hammer_rterm_entry_t term_list
;
155 static int hammer_rterm_rb_cmp(hammer_rterm_t rt1
, hammer_rterm_t rt2
);
156 struct hammer_rterm_rb_tree
;
157 RB_HEAD(hammer_rterm_rb_tree
, hammer_rterm
);
158 RB_PROTOTYPE(hammer_rterm_rb_tree
, hammer_rterm
, rb_node
, hammer_rterm_rb_cmp
);
160 static int hammer_check_tail_signature(hammer_mount_t hmp
,
161 hammer_fifo_tail_t tail
, hammer_off_t end_off
);
162 static int hammer_check_head_signature(hammer_mount_t hmp
,
163 hammer_fifo_head_t head
, hammer_off_t beg_off
);
164 static void hammer_recover_copy_undo(hammer_off_t undo_offset
,
165 char *src
, char *dst
, int bytes
);
166 static hammer_fifo_any_t
hammer_recover_scan_fwd(hammer_mount_t hmp
,
167 hammer_volume_t root_volume
,
168 hammer_off_t
*scan_offsetp
,
169 int *errorp
, hammer_buffer_t
*bufferp
);
170 static hammer_fifo_any_t
hammer_recover_scan_rev(hammer_mount_t hmp
,
171 hammer_volume_t root_volume
,
172 hammer_off_t
*scan_offsetp
,
173 int *errorp
, hammer_buffer_t
*bufferp
);
175 static void hammer_recover_debug_dump(int w
, char *buf
, int bytes
);
177 static int hammer_recover_undo(hammer_mount_t hmp
, hammer_volume_t root_volume
,
178 hammer_fifo_undo_t undo
);
179 static int hammer_recover_redo_rec(hammer_mount_t hmp
,
180 struct hammer_rterm_rb_tree
*root
,
181 hammer_off_t redo_fifo_offset
, hammer_fifo_redo_t redo
);
182 static int hammer_recover_redo_run(hammer_mount_t hmp
,
183 struct hammer_rterm_rb_tree
*root
,
184 hammer_off_t redo_fifo_offset
, hammer_fifo_redo_t redo
);
185 static void hammer_recover_redo_exec(hammer_mount_t hmp
,
186 hammer_fifo_redo_t redo
);
188 RB_GENERATE(hammer_rterm_rb_tree
, hammer_rterm
, rb_node
, hammer_rterm_rb_cmp
);
191 * Recover filesystem meta-data on mount. This procedure figures out the
192 * UNDO FIFO range and runs the UNDOs backwards. The FIFO pointers are not
193 * resynchronized by this procedure.
195 * This procedure is run near the beginning of the mount sequence, before
196 * any B-Tree or high-level accesses are enabled, and is responsible for
197 * restoring the meta-data to a consistent state. High level HAMMER data
198 * structures (such as the B-Tree) cannot be accessed here.
200 * NOTE: No information from the root volume has been cached in the
201 * hammer_mount structure yet, so we need to access the root volume's
207 hammer_recover_stage1(hammer_mount_t hmp
, hammer_volume_t root_volume
)
209 hammer_blockmap_t rootmap
;
210 hammer_buffer_t buffer
;
211 hammer_off_t scan_offset
;
212 hammer_off_t scan_offset_save
;
214 hammer_fifo_any_t head
;
215 hammer_off_t first_offset
;
216 hammer_off_t last_offset
;
219 int degenerate_case
= 0;
222 * Examine the UNDO FIFO indices in the volume header.
224 rootmap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
225 first_offset
= rootmap
->first_offset
;
226 last_offset
= rootmap
->next_offset
;
230 hmp
->recover_stage2_offset
= 0;
232 if (first_offset
> rootmap
->alloc_offset
||
233 last_offset
> rootmap
->alloc_offset
) {
234 hvkprintf(root_volume
,
235 "Illegal UNDO FIFO index range "
236 "%016jx, %016jx limit %016jx\n",
237 (intmax_t)first_offset
,
238 (intmax_t)last_offset
,
239 (intmax_t)rootmap
->alloc_offset
);
245 * In HAMMER version 4+ filesystems the volume header does NOT
246 * contain definitive UNDO FIFO state. In particular, the
247 * rootmap->next_offset may not be indexed completely to the
248 * end of the active UNDO FIFO.
250 if (hmp
->version
>= HAMMER_VOL_VERSION_FOUR
) {
252 * To find the definitive range we must first scan backwards
253 * from first_offset to locate the first real record and
254 * extract the sequence number from it. This record is not
255 * part of the active undo space.
257 scan_offset
= first_offset
;
261 head
= hammer_recover_scan_rev(hmp
, root_volume
,
266 if (head
->head
.hdr_type
!= HAMMER_HEAD_TYPE_PAD
) {
267 seqno
= head
->head
.hdr_seq
;
272 hvkprintf(root_volume
,
273 "recovery failure during seqno backscan\n");
278 * Scan forwards from first_offset and (seqno+1) looking
279 * for a sequence space discontinuity. This denotes the
280 * end of the active FIFO area.
282 * NOTE: For the case where the FIFO is empty the very first
283 * record we find will be discontinuous.
285 * NOTE: Do not include trailing PADs in the scan range,
286 * and remember the returned scan_offset after a
287 * fwd iteration points to the end of the returned
290 hvkprintf(root_volume
, "recovery check seqno=%08x\n", seqno
);
292 scan_offset
= first_offset
;
293 scan_offset_save
= scan_offset
;
295 hmp
->recover_stage2_seqno
= seqno
;
298 head
= hammer_recover_scan_fwd(hmp
, root_volume
,
303 if (head
->head
.hdr_type
!= HAMMER_HEAD_TYPE_PAD
) {
304 if (seqno
!= head
->head
.hdr_seq
) {
305 scan_offset
= scan_offset_save
;
308 scan_offset_save
= scan_offset
;
314 * If the forward scan is grossly ahead of last_offset
315 * then something is wrong. last_offset is supposed
318 if (last_offset
>= scan_offset
) {
319 bytes
= last_offset
- scan_offset
;
321 bytes
= rootmap
->alloc_offset
- scan_offset
+
322 HAMMER_OFF_LONG_ENCODE(last_offset
);
325 HAMMER_OFF_LONG_ENCODE(rootmap
->alloc_offset
) *
327 hvkprintf(root_volume
,
328 "recovery forward scan is "
329 "grossly beyond the last_offset in "
330 "the volume header, this can't be "
339 * Store the seqno. This will be the next seqno we lay down
340 * when generating new UNDOs.
342 hmp
->undo_seqno
= seqno
;
344 hvkprintf(root_volume
,
345 "recovery failure during seqno fwdscan\n");
348 last_offset
= scan_offset
;
349 hvkprintf(root_volume
,
350 "recovery range %016jx-%016jx\n",
351 (intmax_t)first_offset
,
352 (intmax_t)last_offset
);
353 hvkprintf(root_volume
,
354 "recovery nexto %016jx endseqno=%08x\n",
355 (intmax_t)rootmap
->next_offset
,
360 * Calculate the size of the active portion of the FIFO. If the
361 * FIFO is empty the filesystem is clean and no further action is
364 if (last_offset
>= first_offset
) {
365 bytes
= last_offset
- first_offset
;
367 bytes
= rootmap
->alloc_offset
- first_offset
+
368 HAMMER_OFF_LONG_ENCODE(last_offset
);
376 hvkprintf(root_volume
,
377 "recovery undo %016jx-%016jx (%jd bytes)%s\n",
378 (intmax_t)first_offset
,
379 (intmax_t)last_offset
,
381 (hmp
->ronly
? " (RO)" : "(RW)"));
382 if (bytes
> HAMMER_OFF_LONG_ENCODE(rootmap
->alloc_offset
)) {
383 hkprintf("Undo size is absurd, unable to mount\n");
389 * Scan the UNDOs backwards.
391 scan_offset
= last_offset
;
393 while ((int64_t)bytes
> 0) {
394 KKASSERT(scan_offset
!= first_offset
);
395 head
= hammer_recover_scan_rev(hmp
, root_volume
,
396 &scan_offset
, &error
, &buffer
);
403 error
= hammer_recover_undo(hmp
, root_volume
, &head
->undo
);
405 hvkprintf(root_volume
,
406 "UNDO record at %016jx failed\n",
407 (intmax_t)scan_offset
- head
->head
.hdr_size
);
412 * The first REDO_SYNC record encountered (scanning backwards)
413 * enables REDO processing.
415 if (head
->head
.hdr_type
== HAMMER_HEAD_TYPE_REDO
&&
416 head
->redo
.redo_flags
== HAMMER_REDO_SYNC
) {
417 if (hmp
->flags
& HAMMER_MOUNT_REDO_RECOVERY_REQ
) {
418 hvkprintf(root_volume
,
419 "Ignoring extra REDO_SYNC "
420 "records in UNDO/REDO FIFO.\n");
422 hmp
->flags
|= HAMMER_MOUNT_REDO_RECOVERY_REQ
;
423 hmp
->recover_stage2_offset
=
424 head
->redo
.redo_offset
;
425 hvkprintf(root_volume
,
426 "Found REDO_SYNC %016jx\n",
427 (intmax_t)head
->redo
.redo_offset
);
431 bytes
-= head
->head
.hdr_size
;
434 * If too many dirty buffers have built up we have to flush'm
435 * out. As long as we do not flush out the volume header
436 * a crash here should not cause any problems.
438 * buffer must be released so the flush can assert that
439 * all buffers are idle.
441 if (hammer_flusher_meta_limit(hmp
)) {
443 hammer_rel_buffer(buffer
, 0);
446 if (hmp
->ronly
== 0) {
447 hammer_recover_flush_buffers(hmp
, root_volume
,
449 hvkprintf(root_volume
, "Continuing recovery\n");
451 hvkprintf(root_volume
,
453 "Insufficient buffer cache to hold "
454 "dirty buffers on read-only mount!\n");
460 KKASSERT(error
|| bytes
== 0);
463 hammer_rel_buffer(buffer
, 0);
468 * After completely flushing all the recovered buffers the volume
469 * header will also be flushed.
471 if (root_volume
->io
.recovered
== 0) {
472 hammer_ref_volume(root_volume
);
473 root_volume
->io
.recovered
= 1;
477 * Finish up flushing (or discarding) recovered buffers. FIFO
478 * indices in the volume header are updated to the actual undo
479 * range but will not be collapsed until stage 2.
482 hammer_modify_volume_noundo(NULL
, root_volume
);
483 rootmap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
484 rootmap
->first_offset
= first_offset
;
485 rootmap
->next_offset
= last_offset
;
486 hammer_modify_volume_done(root_volume
);
488 hammer_recover_flush_buffers(hmp
, root_volume
, 1);
490 hammer_recover_flush_buffers(hmp
, root_volume
, -1);
492 if (degenerate_case
== 0) {
493 hvkprintf(root_volume
, "recovery complete\n");
495 hvkprintf(root_volume
, "mounted clean, no recovery needed\n");
501 * Execute redo operations
503 * This procedure is run at the end of the mount sequence, after the hammer
504 * mount structure has been completely initialized but before the filesystem
505 * goes live. It can access standard cursors, the B-Tree, flush the
506 * filesystem, and so forth.
508 * This code may only be called for read-write mounts or when a mount
509 * switches from read-only to read-write. vnodes may or may not be present.
511 * The stage1 code will have already calculated the correct FIFO range
512 * for the nominal UNDO FIFO and stored it in the rootmap. The extended
513 * range for REDO is stored in hmp->recover_stage2_offset.
516 hammer_recover_stage2(hammer_mount_t hmp
, hammer_volume_t root_volume
)
518 hammer_blockmap_t rootmap
;
519 hammer_buffer_t buffer
;
520 hammer_off_t scan_offset
;
521 hammer_off_t oscan_offset
;
523 hammer_off_t ext_bytes
;
524 hammer_fifo_any_t head
;
525 hammer_off_t first_offset
;
526 hammer_off_t last_offset
;
527 hammer_off_t ext_offset
;
528 struct hammer_rterm_rb_tree rterm_root
;
535 * Stage 2 can only be run on a RW mount, or when the mount is
536 * switched from RO to RW.
538 KKASSERT(hmp
->ronly
== 0);
539 RB_INIT(&rterm_root
);
541 if (hammer_skip_redo
== 1)
542 hvkprintf(root_volume
, "recovery redo marked as optional\n");
544 if (hammer_skip_redo
== 2) {
545 hvkprintf(root_volume
, "recovery redo skipped.\n");
550 * Examine the UNDO FIFO. If it is empty the filesystem is clean
551 * and no action need be taken.
553 rootmap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
554 first_offset
= rootmap
->first_offset
;
555 last_offset
= rootmap
->next_offset
;
556 if (first_offset
== last_offset
) {
557 KKASSERT((hmp
->flags
& HAMMER_MOUNT_REDO_RECOVERY_REQ
) == 0);
562 * Stage2 must only be run once, and will not be run at all
563 * if Stage1 did not find a REDO_SYNC record.
568 if ((hmp
->flags
& HAMMER_MOUNT_REDO_RECOVERY_REQ
) == 0)
570 hmp
->flags
&= ~HAMMER_MOUNT_REDO_RECOVERY_REQ
;
571 hmp
->flags
|= HAMMER_MOUNT_REDO_RECOVERY_RUN
;
572 ext_offset
= hmp
->recover_stage2_offset
;
573 if (ext_offset
== 0) {
574 hvkprintf(root_volume
,
575 "REDO stage specified but no REDO_SYNC "
576 "offset, ignoring\n");
581 * Calculate nominal UNDO range (this is not yet the extended
584 if (last_offset
>= first_offset
) {
585 bytes
= last_offset
- first_offset
;
587 bytes
= rootmap
->alloc_offset
- first_offset
+
588 HAMMER_OFF_LONG_ENCODE(last_offset
);
590 hvkprintf(root_volume
,
591 "recovery redo %016jx-%016jx (%jd bytes)%s\n",
592 (intmax_t)first_offset
,
593 (intmax_t)last_offset
,
595 (hmp
->ronly
? " (RO)" : "(RW)"));
597 if (bytes
> HAMMER_OFF_LONG_ENCODE(rootmap
->alloc_offset
)) {
598 hkprintf("Undo size is absurd, unable to mount\n");
604 * Scan the REDOs backwards collecting REDO_TERM_* information.
605 * This information is only collected for the extended range,
606 * non-inclusive of any TERMs in the nominal UNDO range.
608 * If the stage2 extended range is inside the nominal undo range
609 * we have nothing to scan.
611 * This must fit in memory!
613 if (first_offset
< last_offset
) {
615 * [ first_offset........last_offset ]
617 if (ext_offset
< first_offset
) {
619 ext_bytes
= first_offset
- ext_offset
;
620 } else if (ext_offset
> last_offset
) {
622 ext_bytes
= (rootmap
->alloc_offset
- ext_offset
) +
623 HAMMER_OFF_LONG_ENCODE(first_offset
);
625 ext_bytes
= -(ext_offset
- first_offset
);
630 * [......last_offset first_offset.....]
632 if (ext_offset
< last_offset
) {
633 ext_bytes
= -((rootmap
->alloc_offset
- first_offset
) +
634 HAMMER_OFF_LONG_ENCODE(ext_offset
));
636 } else if (ext_offset
> first_offset
) {
637 ext_bytes
= -(ext_offset
- first_offset
);
640 ext_bytes
= first_offset
- ext_offset
;
646 scan_offset
= first_offset
;
647 hvkprintf(root_volume
,
648 "Find extended redo %016jx, %jd extbytes\n",
649 (intmax_t)ext_offset
,
650 (intmax_t)ext_bytes
);
651 seqno
= hmp
->recover_stage2_seqno
- 1;
653 head
= hammer_recover_scan_rev(hmp
, root_volume
,
658 if (head
->head
.hdr_type
!= HAMMER_HEAD_TYPE_PAD
) {
659 if (head
->head
.hdr_seq
!= seqno
) {
663 error
= hammer_recover_redo_rec(
665 scan_offset
, &head
->redo
);
668 if (scan_offset
== ext_offset
)
672 hvkprintf(root_volume
,
673 "Find extended redo failed %d, "
674 "unable to run REDO\n",
679 hvkprintf(root_volume
,
680 "Embedded extended redo %016jx, %jd extbytes\n",
681 (intmax_t)ext_offset
,
682 (intmax_t)ext_bytes
);
686 * Scan the REDO forwards through the entire extended range.
687 * Anything with a previously recorded matching TERM is discarded.
689 scan_offset
= ext_offset
;
693 * NOTE: when doing a forward scan the returned scan_offset is
694 * for the record following the returned record, so we
695 * have to play a bit.
697 while ((int64_t)bytes
> 0) {
698 KKASSERT(scan_offset
!= last_offset
);
700 oscan_offset
= scan_offset
;
701 head
= hammer_recover_scan_fwd(hmp
, root_volume
,
702 &scan_offset
, &error
, &buffer
);
706 error
= hammer_recover_redo_run(hmp
, &rterm_root
,
707 oscan_offset
, &head
->redo
);
709 hvkprintf(root_volume
,
710 "UNDO record at %016jx failed\n",
711 (intmax_t)scan_offset
- head
->head
.hdr_size
);
714 bytes
-= head
->head
.hdr_size
;
716 KKASSERT(error
|| bytes
== 0);
720 hammer_rel_buffer(buffer
, 0);
728 hammer_rterm_t rterm
;
729 hammer_rterm_entry_t rte
;
731 while ((rterm
= RB_ROOT(&rterm_root
)) != NULL
) {
732 RB_REMOVE(hammer_rterm_rb_tree
, &rterm_root
, rterm
);
733 while ((rte
= rterm
->term_list
) != NULL
) {
734 rterm
->term_list
= rte
->next
;
735 kfree(rte
, hmp
->m_misc
);
737 kfree(rterm
, hmp
->m_misc
);
742 * Finish up flushing (or discarding) recovered buffers by executing
743 * a normal flush cycle. Setting HMNT_UNDO_DIRTY bypasses degenerate
744 * case tests and forces the flush in order to update the FIFO indices.
746 * If a crash occurs during the flush the entire undo/redo will be
747 * re-run during recovery on the next mount.
750 if (rootmap
->first_offset
!= rootmap
->next_offset
)
751 hmp
->hflags
|= HMNT_UNDO_DIRTY
;
752 hammer_flusher_sync(hmp
);
755 hmp
->flags
&= ~HAMMER_MOUNT_REDO_RECOVERY_RUN
;
757 hvkprintf(root_volume
, "End redo recovery\n");
760 if (error
&& hammer_skip_redo
== 1)
761 hvkprintf(root_volume
,
762 "recovery redo error %d, skipping.\n",
765 return (hammer_skip_redo
? 0 : error
);
769 * Scan backwards from *scan_offsetp, return the FIFO record prior to the
770 * record at *scan_offsetp or NULL if an error occured.
772 * On return *scan_offsetp will be the offset of the returned record.
775 hammer_recover_scan_rev(hammer_mount_t hmp
, hammer_volume_t root_volume
,
776 hammer_off_t
*scan_offsetp
,
777 int *errorp
, hammer_buffer_t
*bufferp
)
779 hammer_off_t scan_offset
;
780 hammer_blockmap_t rootmap
;
781 hammer_fifo_any_t head
;
782 hammer_fifo_tail_t tail
;
784 rootmap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
785 scan_offset
= *scan_offsetp
;
787 if (hammer_debug_general
& 0x0080)
788 hdkprintf("rev scan_offset %016jx\n", (intmax_t)scan_offset
);
789 if (scan_offset
== HAMMER_ENCODE_UNDO(0))
790 scan_offset
= rootmap
->alloc_offset
;
791 if (scan_offset
- sizeof(*tail
) < HAMMER_ENCODE_UNDO(0)) {
792 hvkprintf(root_volume
,
793 "UNDO record at %016jx FIFO underflow\n",
794 (intmax_t)scan_offset
);
798 tail
= hammer_bread(hmp
, scan_offset
- sizeof(*tail
),
801 hvkprintf(root_volume
,
802 "Unable to read UNDO TAIL at %016jx\n",
803 (intmax_t)scan_offset
- sizeof(*tail
));
807 if (hammer_check_tail_signature(hmp
, tail
, scan_offset
) != 0) {
808 hvkprintf(root_volume
,
809 "Illegal UNDO TAIL signature at %016jx\n",
810 (intmax_t)scan_offset
- sizeof(*tail
));
814 head
= (void *)((char *)tail
+ sizeof(*tail
) - tail
->tail_size
);
815 *scan_offsetp
= scan_offset
- head
->head
.hdr_size
;
821 * Scan forwards from *scan_offsetp, return the FIFO record or NULL if
824 * On return *scan_offsetp will be the offset of the record following
825 * the returned record.
828 hammer_recover_scan_fwd(hammer_mount_t hmp
, hammer_volume_t root_volume
,
829 hammer_off_t
*scan_offsetp
,
830 int *errorp
, hammer_buffer_t
*bufferp
)
832 hammer_off_t scan_offset
;
833 hammer_blockmap_t rootmap
;
834 hammer_fifo_any_t head
;
836 rootmap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
837 scan_offset
= *scan_offsetp
;
839 if (hammer_debug_general
& 0x0080)
840 hdkprintf("fwd scan_offset %016jx\n", (intmax_t)scan_offset
);
841 if (scan_offset
== rootmap
->alloc_offset
)
842 scan_offset
= HAMMER_ENCODE_UNDO(0);
844 head
= hammer_bread(hmp
, scan_offset
, errorp
, bufferp
);
846 hvkprintf(root_volume
,
847 "Unable to read UNDO HEAD at %016jx\n",
848 (intmax_t)scan_offset
);
852 if (hammer_check_head_signature(hmp
, &head
->head
, scan_offset
) != 0) {
853 hvkprintf(root_volume
,
854 "Illegal UNDO TAIL signature at %016jx\n",
855 (intmax_t)scan_offset
);
859 scan_offset
+= head
->head
.hdr_size
;
860 if (scan_offset
== rootmap
->alloc_offset
)
861 scan_offset
= HAMMER_ENCODE_UNDO(0);
862 *scan_offsetp
= scan_offset
;
868 * Helper function for hammer_check_{head,tail}_signature(). Check stuff
869 * once the head and tail has been established.
871 * This function validates the entire FIFO record wrapper.
875 _hammer_check_signature(hammer_mount_t hmp
,
876 hammer_fifo_head_t head
, hammer_fifo_tail_t tail
,
877 hammer_off_t beg_off
)
879 hammer_off_t end_off
;
883 * Check signatures. The tail signature is allowed to be the
884 * head signature only for 8-byte PADs.
886 if (head
->hdr_signature
!= HAMMER_HEAD_SIGNATURE
) {
887 hkprintf("FIFO record bad head signature %04x at %016jx\n",
892 if (head
->hdr_size
< HAMMER_HEAD_ALIGN
||
893 (head
->hdr_size
& HAMMER_HEAD_ALIGN_MASK
)) {
894 hkprintf("FIFO record unaligned or bad size %04x at %016jx\n",
899 end_off
= beg_off
+ head
->hdr_size
;
901 if (head
->hdr_type
!= HAMMER_HEAD_TYPE_PAD
||
902 (size_t)(end_off
- beg_off
) != sizeof(*tail
)) {
903 if (head
->hdr_type
!= tail
->tail_type
) {
904 hkprintf("FIFO record head/tail type mismatch "
905 "%04x %04x at %016jx\n",
906 head
->hdr_type
, tail
->tail_type
,
910 if (head
->hdr_size
!= tail
->tail_size
) {
911 hkprintf("FIFO record head/tail size mismatch "
912 "%04x %04x at %016jx\n",
913 head
->hdr_size
, tail
->tail_size
,
917 if (tail
->tail_signature
!= HAMMER_TAIL_SIGNATURE
) {
918 hkprintf("FIFO record bad tail signature "
920 tail
->tail_signature
,
927 * Non-PAD records must have a CRC and must be sized at
928 * least large enough to fit the head and tail.
930 if (head
->hdr_type
!= HAMMER_HEAD_TYPE_PAD
) {
931 if (hammer_crc_test_fifo_head(hmp
->version
,
932 head
, head
->hdr_size
) == 0) {
933 hkprintf("FIFO record CRC failed %08x at %016jx\n",
934 head
->hdr_crc
, (intmax_t)beg_off
);
937 if (head
->hdr_size
< sizeof(*head
) + sizeof(*tail
)) {
938 hkprintf("FIFO record too small %04x at %016jx\n",
948 bytes
= head
->hdr_size
;
949 tail
= (void *)((char *)head
+ bytes
- sizeof(*tail
));
950 if (tail
->tail_size
!= head
->hdr_size
) {
951 hkprintf("Bad tail size %04x vs %04x at %016jx\n",
952 tail
->tail_size
, head
->hdr_size
,
956 if (tail
->tail_type
!= head
->hdr_type
) {
957 hkprintf("Bad tail type %04x vs %04x at %016jx\n",
958 tail
->tail_type
, head
->hdr_type
,
967 * Check that the FIFO record is in-bounds given the head and the
970 * Also checks that the head and tail structures agree with each other,
971 * but does not check beyond the signature, type, and size.
974 hammer_check_head_signature(hammer_mount_t hmp
, hammer_fifo_head_t head
,
975 hammer_off_t beg_off
)
977 hammer_fifo_tail_t tail
;
978 hammer_off_t end_off
;
981 * head overlaps buffer boundary. This could be a PAD so only
982 * check the minimum PAD size here.
984 if (((beg_off
+ sizeof(*tail
) - 1) ^ (beg_off
)) & ~HAMMER_BUFMASK64
)
988 * Calculate the ending offset and make sure the record does
989 * not cross a buffer boundary.
991 end_off
= beg_off
+ head
->hdr_size
;
992 if ((beg_off
^ (end_off
- 1)) & ~HAMMER_BUFMASK64
)
994 tail
= (void *)((char *)head
+ head
->hdr_size
- sizeof(*tail
));
995 return (_hammer_check_signature(hmp
, head
, tail
, beg_off
));
999 * Check that the FIFO record is in-bounds given the tail and the
1000 * hammer offset. The offset is pointing at the ending boundary of the
1003 * Also checks that the head and tail structures agree with each other,
1004 * but does not check beyond the signature, type, and size.
1007 hammer_check_tail_signature(hammer_mount_t hmp
, hammer_fifo_tail_t tail
,
1008 hammer_off_t end_off
)
1010 hammer_fifo_head_t head
;
1011 hammer_off_t beg_off
;
1014 * tail overlaps buffer boundary
1016 if (((end_off
- sizeof(*tail
)) ^ (end_off
- 1)) & ~HAMMER_BUFMASK64
)
1020 * Calculate the begining offset and make sure the record does
1021 * not cross a buffer boundary.
1023 beg_off
= end_off
- tail
->tail_size
;
1024 if ((beg_off
^ (end_off
- 1)) & ~HAMMER_BUFMASK64
)
1026 head
= (void *)((char *)tail
+ sizeof(*tail
) - tail
->tail_size
);
1027 return (_hammer_check_signature(hmp
, head
, tail
, beg_off
));
1031 hammer_recover_undo(hammer_mount_t hmp
, hammer_volume_t root_volume
,
1032 hammer_fifo_undo_t undo
)
1034 hammer_volume_t volume
;
1035 hammer_buffer_t buffer
;
1036 hammer_off_t buf_offset
;
1044 * Only process UNDO records. Flag if we find other records to
1045 * optimize stage2 recovery.
1047 if (undo
->head
.hdr_type
!= HAMMER_HEAD_TYPE_UNDO
)
1051 * Validate the UNDO record.
1053 bytes
= undo
->head
.hdr_size
- sizeof(*undo
) -
1054 sizeof(struct hammer_fifo_tail
);
1055 if (bytes
< 0 || undo
->undo_data_bytes
< 0 ||
1056 undo
->undo_data_bytes
> bytes
) {
1057 hkprintf("Corrupt UNDO record, undo_data_bytes %d/%d\n",
1058 undo
->undo_data_bytes
, bytes
);
1062 bytes
= undo
->undo_data_bytes
;
1065 * The undo offset may only be a zone-1 or zone-2 offset.
1067 * Currently we only support a zone-1 offset representing the
1070 zone
= HAMMER_ZONE_DECODE(undo
->undo_offset
);
1071 offset
= undo
->undo_offset
& HAMMER_BUFMASK
;
1073 if (offset
+ bytes
> HAMMER_BUFSIZE
) {
1074 hkprintf("Corrupt UNDO record, bad offset\n");
1079 case HAMMER_ZONE_RAW_VOLUME_INDEX
:
1080 vol_no
= HAMMER_VOL_DECODE(undo
->undo_offset
);
1081 volume
= hammer_get_volume(hmp
, vol_no
, &error
);
1082 if (volume
== NULL
) {
1083 hkprintf("UNDO record, cannot access volume %d\n",
1087 hammer_modify_volume_noundo(NULL
, volume
);
1088 hammer_recover_copy_undo(undo
->undo_offset
,
1090 (char *)volume
->ondisk
+ offset
,
1092 hammer_modify_volume_done(volume
);
1095 * Multiple modifications may be made to the same buffer.
1096 * Also, the volume header cannot be written out until
1097 * everything else has been flushed. This also
1098 * covers the read-only case by preventing the kernel from
1099 * flushing the buffer.
1101 if (volume
->io
.recovered
== 0)
1102 volume
->io
.recovered
= 1;
1104 hammer_rel_volume(volume
, 0);
1106 case HAMMER_ZONE_RAW_BUFFER_INDEX
:
1107 buf_offset
= undo
->undo_offset
& ~HAMMER_BUFMASK64
;
1108 buffer
= hammer_get_buffer(hmp
, buf_offset
, HAMMER_BUFSIZE
,
1110 if (buffer
== NULL
) {
1111 hkprintf("UNDO record, cannot access buffer %016jx\n",
1112 (intmax_t)undo
->undo_offset
);
1115 hammer_modify_buffer_noundo(NULL
, buffer
);
1116 hammer_recover_copy_undo(undo
->undo_offset
,
1118 (char *)buffer
->ondisk
+ offset
,
1120 hammer_modify_buffer_done(buffer
);
1123 * Multiple modifications may be made to the same buffer,
1124 * improve performance by delaying the flush. This also
1125 * covers the read-only case by preventing the kernel from
1126 * flushing the buffer.
1128 if (buffer
->io
.recovered
== 0)
1129 buffer
->io
.recovered
= 1;
1131 hammer_rel_buffer(buffer
, 0);
1134 hkprintf("Corrupt UNDO record\n");
1141 hammer_recover_copy_undo(hammer_off_t undo_offset
,
1142 char *src
, char *dst
, int bytes
)
1144 if (hammer_debug_general
& 0x0080) {
1145 hdkprintf("UNDO %016jx: %d\n",
1146 (intmax_t)undo_offset
, bytes
);
1149 hkprintf("UNDO %016jx:", (intmax_t)undo_offset
);
1150 hammer_recover_debug_dump(22, dst
, bytes
);
1151 kprintf("%22s", "to:");
1152 hammer_recover_debug_dump(22, src
, bytes
);
1154 bcopy(src
, dst
, bytes
);
1158 * Record HAMMER_REDO_TERM_WRITE and HAMMER_REDO_TERM_TRUNC operations
1159 * during the backwards scan of the extended UNDO/REDO FIFO. This scan
1160 * does not include the nominal UNDO range, just the extended range.
1163 hammer_recover_redo_rec(hammer_mount_t hmp
, struct hammer_rterm_rb_tree
*root
,
1164 hammer_off_t scan_offset
, hammer_fifo_redo_t redo
)
1166 hammer_rterm_t rterm
;
1167 hammer_rterm_t nrterm
;
1168 hammer_rterm_entry_t rte
;
1170 if (redo
->head
.hdr_type
!= HAMMER_HEAD_TYPE_REDO
)
1172 if (redo
->redo_flags
!= HAMMER_REDO_TERM_WRITE
&&
1173 redo
->redo_flags
!= HAMMER_REDO_TERM_TRUNC
) {
1177 nrterm
= kmalloc(sizeof(*nrterm
), hmp
->m_misc
, M_WAITOK
|M_ZERO
);
1178 nrterm
->redo_objid
= redo
->redo_objid
;
1179 nrterm
->redo_localization
= redo
->redo_localization
;
1180 nrterm
->redo_flags
= redo
->redo_flags
;
1181 nrterm
->redo_offset
= redo
->redo_offset
;
1183 rterm
= RB_INSERT(hammer_rterm_rb_tree
, root
, nrterm
);
1185 kfree(nrterm
, hmp
->m_misc
);
1190 hkprintf("record record %016jx objid %016jx "
1191 "offset %016jx flags %08x\n",
1192 (intmax_t)scan_offset
,
1193 (intmax_t)redo
->redo_objid
,
1194 (intmax_t)redo
->redo_offset
,
1195 (int)redo
->redo_flags
);
1199 * Scan in reverse order, rte prepended, so the rte list will be
1202 rte
= kmalloc(sizeof(*rte
), hmp
->m_misc
, M_WAITOK
|M_ZERO
);
1203 rte
->fifo_offset
= scan_offset
;
1204 rte
->next
= rterm
->term_list
;
1205 rterm
->term_list
= rte
;
1211 * Execute HAMMER_REDO_WRITE and HAMMER_REDO_TRUNC operations during
1212 * the forwards scan of the entire extended UNDO/REDO FIFO range.
1214 * Records matching previously recorded TERMs have already been committed
1218 hammer_recover_redo_run(hammer_mount_t hmp
, struct hammer_rterm_rb_tree
*root
,
1219 hammer_off_t scan_offset
, hammer_fifo_redo_t redo
)
1221 struct hammer_rterm rtval
;
1222 hammer_rterm_t rterm
;
1223 hammer_rterm_entry_t rte
;
1225 if (redo
->head
.hdr_type
!= HAMMER_HEAD_TYPE_REDO
)
1228 switch(redo
->redo_flags
) {
1229 case HAMMER_REDO_WRITE
:
1230 case HAMMER_REDO_TRUNC
:
1232 * We hit a REDO request. The REDO request is only executed
1233 * if there is no matching TERM.
1235 bzero(&rtval
, sizeof(rtval
));
1236 rtval
.redo_objid
= redo
->redo_objid
;
1237 rtval
.redo_localization
= redo
->redo_localization
;
1238 rtval
.redo_offset
= redo
->redo_offset
;
1239 rtval
.redo_flags
= (redo
->redo_flags
== HAMMER_REDO_WRITE
) ?
1240 HAMMER_REDO_TERM_WRITE
:
1241 HAMMER_REDO_TERM_TRUNC
;
1243 rterm
= RB_FIND(hammer_rterm_rb_tree
, root
, &rtval
);
1246 hkprintf("ignore record %016jx objid %016jx "
1247 "offset %016jx flags %08x\n",
1248 (intmax_t)scan_offset
,
1249 (intmax_t)redo
->redo_objid
,
1250 (intmax_t)redo
->redo_offset
,
1251 (int)redo
->redo_flags
);
1256 hkprintf("run record %016jx objid %016jx "
1257 "offset %016jx flags %08x\n",
1258 (intmax_t)scan_offset
,
1259 (intmax_t)redo
->redo_objid
,
1260 (intmax_t)redo
->redo_offset
,
1261 (int)redo
->redo_flags
);
1265 * Redo stage2 can access a live filesystem, acquire the
1268 hammer_recover_redo_exec(hmp
, redo
);
1270 case HAMMER_REDO_TERM_WRITE
:
1271 case HAMMER_REDO_TERM_TRUNC
:
1273 * As we encounter TERMs in the forward scan we remove
1274 * them. Once the forward scan hits the nominal undo range
1275 * there will be no more recorded TERMs.
1277 bzero(&rtval
, sizeof(rtval
));
1278 rtval
.redo_objid
= redo
->redo_objid
;
1279 rtval
.redo_localization
= redo
->redo_localization
;
1280 rtval
.redo_flags
= redo
->redo_flags
;
1281 rtval
.redo_offset
= redo
->redo_offset
;
1283 rterm
= RB_FIND(hammer_rterm_rb_tree
, root
, &rtval
);
1285 if ((rte
= rterm
->term_list
) != NULL
) {
1286 KKASSERT(rte
->fifo_offset
== scan_offset
);
1287 rterm
->term_list
= rte
->next
;
1288 kfree(rte
, hmp
->m_misc
);
1297 hammer_recover_redo_exec(hammer_mount_t hmp
, hammer_fifo_redo_t redo
)
1299 struct hammer_transaction trans
;
1302 struct vnode
*vp
= NULL
;
1305 hammer_start_transaction(&trans
, hmp
);
1307 ip
= hammer_get_inode(&trans
, NULL
, redo
->redo_objid
,
1308 HAMMER_MAX_TID
, redo
->redo_localization
,
1311 hkprintf("unable to find objid %016jx:%08x\n",
1312 (intmax_t)redo
->redo_objid
, redo
->redo_localization
);
1315 error
= hammer_get_vnode(ip
, &vp
);
1317 hkprintf("unable to acquire vnode for %016jx:%08x\n",
1318 (intmax_t)redo
->redo_objid
, redo
->redo_localization
);
1322 switch(redo
->redo_flags
) {
1323 case HAMMER_REDO_WRITE
:
1324 error
= VOP_OPEN(vp
, FREAD
|FWRITE
, proc0
.p_ucred
, NULL
);
1326 hkprintf("vn_rdwr open %016jx:%08x returned %d\n",
1327 (intmax_t)redo
->redo_objid
,
1328 redo
->redo_localization
, error
);
1332 error
= vn_rdwr(UIO_WRITE
, vp
, (void *)(redo
+ 1),
1333 redo
->redo_data_bytes
,
1334 redo
->redo_offset
, UIO_SYSSPACE
,
1335 0, proc0
.p_ucred
, NULL
);
1336 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
1338 hkprintf("write %016jx:%08x returned %d\n",
1339 (intmax_t)redo
->redo_objid
,
1340 redo
->redo_localization
, error
);
1342 VOP_CLOSE(vp
, FREAD
|FWRITE
, NULL
);
1344 case HAMMER_REDO_TRUNC
:
1346 va
.va_size
= redo
->redo_offset
;
1347 error
= VOP_SETATTR(vp
, &va
, proc0
.p_ucred
);
1349 hkprintf("setattr offset %016jx error %d\n",
1350 (intmax_t)redo
->redo_offset
, error
);
1356 hammer_rel_inode(ip
, 0);
1358 hammer_done_transaction(&trans
);
1362 * RB tree compare function. Note that REDO_TERM_TRUNC ops ignore
1365 * WRITE@0 TERM@0 WRITE@0 .... (no TERM@0) etc.
1368 hammer_rterm_rb_cmp(hammer_rterm_t rt1
, hammer_rterm_t rt2
)
1370 if (rt1
->redo_objid
< rt2
->redo_objid
)
1372 if (rt1
->redo_objid
> rt2
->redo_objid
)
1374 if (rt1
->redo_localization
< rt2
->redo_localization
)
1376 if (rt1
->redo_localization
> rt2
->redo_localization
)
1378 if (rt1
->redo_flags
< rt2
->redo_flags
)
1380 if (rt1
->redo_flags
> rt2
->redo_flags
)
1382 if (rt1
->redo_flags
!= HAMMER_REDO_TERM_TRUNC
) {
1383 if (rt1
->redo_offset
< rt2
->redo_offset
)
1385 if (rt1
->redo_offset
> rt2
->redo_offset
)
1394 hammer_recover_debug_dump(int w
, char *buf
, int bytes
)
1398 for (i
= 0; i
< bytes
; ++i
) {
1399 if (i
&& (i
& 15) == 0)
1400 kprintf("\n%*.*s", w
, w
, "");
1401 kprintf(" %02x", (unsigned char)buf
[i
]);
1409 * Flush recovered buffers from recovery operations. The call to this
1410 * routine may be delayed if a read-only mount was made and then later
1411 * upgraded to read-write. This routine is also called when unmounting
1412 * a read-only mount to clean out recovered (dirty) buffers which we
1413 * couldn't flush (because the mount is read-only).
1415 * The volume header is always written last. The UNDO FIFO will be forced
1416 * to zero-length by setting next_offset to first_offset. This leaves the
1417 * (now stale) UNDO information used to recover the disk available for
1418 * forensic analysis.
1420 * final is typically 0 or 1. The volume header is only written if final
1421 * is 1. If final is -1 the recovered buffers are discarded instead of
1422 * written and root_volume can also be passed as NULL in that case.
1424 static int hammer_recover_flush_volume_callback(hammer_volume_t
, void *);
1425 static int hammer_recover_flush_buffer_callback(hammer_buffer_t
, void *);
1428 hammer_recover_flush_buffers(hammer_mount_t hmp
, hammer_volume_t root_volume
,
1432 * Flush the buffers out asynchronously, wait for all the I/O to
1433 * complete, then do it again to destroy the buffer cache buffer
1434 * so it doesn't alias something later on.
1436 RB_SCAN(hammer_buf_rb_tree
, &hmp
->rb_bufs_root
, NULL
,
1437 hammer_recover_flush_buffer_callback
, &final
);
1438 hammer_io_wait_all(hmp
, "hmrrcw", 1);
1439 RB_SCAN(hammer_buf_rb_tree
, &hmp
->rb_bufs_root
, NULL
,
1440 hammer_recover_flush_buffer_callback
, &final
);
1443 * Flush all volume headers except the root volume. If final < 0
1444 * we discard all volume headers including the root volume.
1447 RB_SCAN(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, NULL
,
1448 hammer_recover_flush_volume_callback
, root_volume
);
1450 RB_SCAN(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, NULL
,
1451 hammer_recover_flush_volume_callback
, NULL
);
1455 * Finalize the root volume header.
1457 * No interlock is needed, volume buffers are not
1458 * messed with by bioops.
1460 if (root_volume
&& root_volume
->io
.recovered
&& final
> 0) {
1461 hammer_io_wait_all(hmp
, "hmrflx", 1);
1462 root_volume
->io
.recovered
= 0;
1463 hammer_io_flush(&root_volume
->io
, 0);
1464 hammer_rel_volume(root_volume
, 0);
1465 hammer_io_wait_all(hmp
, "hmrfly", 1);
1470 * Callback to flush volume headers. If discarding data will be NULL and
1471 * all volume headers (including the root volume) will be discarded.
1472 * Otherwise data is the root_volume and we flush all volume headers
1473 * EXCEPT the root_volume.
1475 * Clear any I/O error or modified condition when discarding buffers to
1476 * clean up the reference count, otherwise the buffer may have extra refs
1481 hammer_recover_flush_volume_callback(hammer_volume_t volume
, void *data
)
1483 hammer_volume_t root_volume
= data
;
1485 if (volume
->io
.recovered
&& volume
!= root_volume
) {
1486 volume
->io
.recovered
= 0;
1487 if (root_volume
!= NULL
) {
1489 * No interlock is needed, volume buffers are not
1490 * messed with by bioops.
1492 hammer_io_flush(&volume
->io
, 0);
1494 hammer_io_clear_error(&volume
->io
);
1495 hammer_io_clear_modify(&volume
->io
, 1);
1497 hammer_rel_volume(volume
, 0);
1503 * Flush or discard recovered I/O buffers.
1505 * Clear any I/O error or modified condition when discarding buffers to
1506 * clean up the reference count, otherwise the buffer may have extra refs
1511 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer
, void *data
)
1513 int final
= *(int *)data
;
1516 if (buffer
->io
.recovered
) {
1517 buffer
->io
.recovered
= 0;
1518 buffer
->io
.reclaim
= 1;
1520 hammer_io_clear_error(&buffer
->io
);
1521 hammer_io_clear_modify(&buffer
->io
, 1);
1523 hammer_io_write_interlock(&buffer
->io
);
1524 hammer_io_flush(&buffer
->io
, 0);
1525 hammer_io_done_interlock(&buffer
->io
);
1527 hammer_rel_buffer(buffer
, 0);
1529 flush
= hammer_ref_interlock(&buffer
->io
.lock
);
1531 atomic_add_int(&hammer_count_refedbufs
, 1);
1534 hammer_io_clear_error(&buffer
->io
);
1535 hammer_io_clear_modify(&buffer
->io
, 1);
1537 KKASSERT(hammer_oneref(&buffer
->io
.lock
));
1538 buffer
->io
.reclaim
= 1;
1539 hammer_rel_buffer(buffer
, flush
);