2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.29 2008/07/26 05:36:21 dillon Exp $
39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail
,
40 hammer_off_t end_off
);
41 static void hammer_recover_copy_undo(hammer_off_t undo_offset
,
42 char *src
, char *dst
, int bytes
);
44 static void hammer_recover_debug_dump(int w
, char *buf
, int bytes
);
46 static int hammer_recover_undo(hammer_mount_t hmp
, hammer_volume_t root_volume
,
47 hammer_fifo_undo_t undo
, int bytes
);
50 * Recover a filesystem on mount
52 * NOTE: No information from the root volume has been cached in the
53 * hammer_mount structure yet, so we need to access the root volume's
57 hammer_recover(hammer_mount_t hmp
, hammer_volume_t root_volume
)
59 hammer_blockmap_t rootmap
;
60 hammer_buffer_t buffer
;
61 hammer_off_t scan_offset
;
63 hammer_fifo_tail_t tail
;
64 hammer_fifo_undo_t undo
;
65 hammer_off_t first_offset
;
66 hammer_off_t last_offset
;
70 * Examine the UNDO FIFO. If it is empty the filesystem is clean
71 * and no action need be taken.
73 rootmap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
75 if (rootmap
->first_offset
== rootmap
->next_offset
)
78 first_offset
= rootmap
->first_offset
;
79 last_offset
= rootmap
->next_offset
;
81 if (last_offset
>= first_offset
) {
82 bytes
= last_offset
- first_offset
;
84 bytes
= rootmap
->alloc_offset
- first_offset
+
85 (last_offset
& HAMMER_OFF_LONG_MASK
);
87 kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
88 "(%lld bytes of UNDO)%s\n",
89 root_volume
->ondisk
->vol_name
,
90 (long long)first_offset
,
91 (long long)last_offset
,
93 (hmp
->ronly
? " (RO)" : "(RW)"));
94 if (bytes
> (rootmap
->alloc_offset
& HAMMER_OFF_LONG_MASK
)) {
95 kprintf("Undo size is absurd, unable to mount\n");
100 * Scan the UNDOs backwards.
102 scan_offset
= last_offset
;
104 if (scan_offset
> rootmap
->alloc_offset
) {
105 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
106 root_volume
->ondisk
->vol_name
,
107 (long long)scan_offset
);
112 while ((int64_t)bytes
> 0) {
113 if (hammer_debug_general
& 0x0080)
114 kprintf("scan_offset %016llx\n",
115 (long long)scan_offset
);
116 if (scan_offset
== HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX
, 0)) {
117 scan_offset
= rootmap
->alloc_offset
;
120 if (scan_offset
- sizeof(*tail
) <
121 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX
, 0)) {
122 kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
124 root_volume
->ondisk
->vol_name
,
125 (long long)scan_offset
);
129 tail
= hammer_bread(hmp
, scan_offset
- sizeof(*tail
),
132 kprintf("HAMMER(%s) Unable to read UNDO TAIL "
134 root_volume
->ondisk
->vol_name
,
135 (long long)scan_offset
- sizeof(*tail
));
139 if (hammer_check_tail_signature(tail
, scan_offset
) != 0) {
140 kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
142 root_volume
->ondisk
->vol_name
,
143 (long long)scan_offset
- sizeof(*tail
));
147 undo
= (void *)((char *)tail
+ sizeof(*tail
) - tail
->tail_size
);
149 error
= hammer_recover_undo(hmp
, root_volume
, undo
,
151 (int)((char *)undo
- (char *)buffer
->ondisk
));
153 kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
154 root_volume
->ondisk
->vol_name
,
155 (long long)scan_offset
- tail
->tail_size
);
158 scan_offset
-= tail
->tail_size
;
159 bytes
-= tail
->tail_size
;
162 * If too many dirty buffers have built up we have to flush'm
163 * out. As long as we do not flush out the volume header
164 * a crash here should not cause any problems.
166 * buffer must be released so the flush can assert that
167 * all buffers are idle.
169 if (hammer_flusher_meta_limit(hmp
)) {
171 hammer_rel_buffer(buffer
, 0);
174 if (hmp
->ronly
== 0) {
175 hammer_recover_flush_buffers(hmp
, root_volume
,
177 kprintf("HAMMER(%s) Continuing recovery\n",
178 root_volume
->ondisk
->vol_name
);
180 kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n",
181 root_volume
->ondisk
->vol_name
);
189 hammer_rel_buffer(buffer
, 0);
192 * After completely flushing all the recovered buffers the volume
193 * header will also be flushed. Force the UNDO FIFO to 0-length.
195 if (root_volume
->io
.recovered
== 0) {
196 hammer_ref_volume(root_volume
);
197 root_volume
->io
.recovered
= 1;
201 * Finish up flushing (or discarding) recovered buffers
204 hammer_modify_volume(NULL
, root_volume
, NULL
, 0);
205 rootmap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
206 rootmap
->first_offset
= last_offset
;
207 rootmap
->next_offset
= last_offset
;
208 hammer_modify_volume_done(root_volume
);
210 hammer_recover_flush_buffers(hmp
, root_volume
, 1);
212 hammer_recover_flush_buffers(hmp
, root_volume
, -1);
214 kprintf("HAMMER(%s) End Recovery\n", root_volume
->ondisk
->vol_name
);
219 hammer_check_tail_signature(hammer_fifo_tail_t tail
, hammer_off_t end_off
)
223 max_bytes
= ((end_off
- sizeof(*tail
)) & HAMMER_BUFMASK
);
224 max_bytes
+= sizeof(*tail
);
227 * tail overlaps buffer boundary
229 if (((end_off
- sizeof(*tail
)) ^ (end_off
- 1)) & ~HAMMER_BUFMASK64
) {
234 * signature check, the tail signature is allowed to be the head
235 * signature only for 8-byte PADs.
237 switch(tail
->tail_signature
) {
238 case HAMMER_TAIL_SIGNATURE
:
240 case HAMMER_HEAD_SIGNATURE
:
241 if (tail
->tail_type
!= HAMMER_HEAD_TYPE_PAD
||
242 tail
->tail_size
!= sizeof(*tail
)) {
249 * The undo structure must not overlap a buffer boundary.
251 if (tail
->tail_size
< sizeof(*tail
) || tail
->tail_size
> max_bytes
) {
258 hammer_recover_undo(hammer_mount_t hmp
, hammer_volume_t root_volume
,
259 hammer_fifo_undo_t undo
, int bytes
)
261 hammer_fifo_tail_t tail
;
262 hammer_volume_t volume
;
263 hammer_buffer_t buffer
;
264 hammer_off_t buf_offset
;
273 * Basic sanity checks
275 if (bytes
< HAMMER_HEAD_ALIGN
) {
276 kprintf("HAMMER: Undo alignment error (%d)\n", bytes
);
279 if (undo
->head
.hdr_signature
!= HAMMER_HEAD_SIGNATURE
) {
280 kprintf("HAMMER: Bad head signature %04x\n",
281 undo
->head
.hdr_signature
);
284 if (undo
->head
.hdr_size
< HAMMER_HEAD_ALIGN
||
285 undo
->head
.hdr_size
> bytes
) {
286 kprintf("HAMMER: Bad size %d\n", bytes
);
291 * Skip PAD records. Note that PAD records also do not require
292 * a tail and may have a truncated structure.
294 if (undo
->head
.hdr_type
== HAMMER_HEAD_TYPE_PAD
)
300 crc
= crc32(undo
, HAMMER_FIFO_HEAD_CRCOFF
) ^
301 crc32(&undo
->head
+ 1, undo
->head
.hdr_size
- sizeof(undo
->head
));
302 if (undo
->head
.hdr_crc
!= crc
) {
303 kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
304 undo
->head
.hdr_crc
, crc
);
312 bytes
= undo
->head
.hdr_size
;
313 tail
= (void *)((char *)undo
+ bytes
- sizeof(*tail
));
314 if (tail
->tail_size
!= undo
->head
.hdr_size
) {
315 kprintf("HAMMER: Bad tail size %d\n", tail
->tail_size
);
318 if (tail
->tail_type
!= undo
->head
.hdr_type
) {
319 kprintf("HAMMER: Bad tail type %d\n", tail
->tail_type
);
324 * Only process UNDO records
326 if (undo
->head
.hdr_type
!= HAMMER_HEAD_TYPE_UNDO
)
330 * Validate the UNDO record.
332 max_bytes
= undo
->head
.hdr_size
- sizeof(*undo
) - sizeof(*tail
);
333 if (undo
->undo_data_bytes
< 0 || undo
->undo_data_bytes
> max_bytes
) {
334 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
335 undo
->undo_data_bytes
, max_bytes
);
340 * The undo offset may only be a zone-1 or zone-2 offset.
342 * Currently we only support a zone-1 offset representing the
345 zone
= HAMMER_ZONE_DECODE(undo
->undo_offset
);
346 offset
= undo
->undo_offset
& HAMMER_BUFMASK
;
348 if (offset
+ undo
->undo_data_bytes
> HAMMER_BUFSIZE
) {
349 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
354 case HAMMER_ZONE_RAW_VOLUME_INDEX
:
355 vol_no
= HAMMER_VOL_DECODE(undo
->undo_offset
);
356 volume
= hammer_get_volume(hmp
, vol_no
, &error
);
357 if (volume
== NULL
) {
358 kprintf("HAMMER: UNDO record, "
359 "cannot access volume %d\n", vol_no
);
362 hammer_modify_volume(NULL
, volume
, NULL
, 0);
363 hammer_recover_copy_undo(undo
->undo_offset
,
365 (char *)volume
->ondisk
+ offset
,
366 undo
->undo_data_bytes
);
367 hammer_modify_volume_done(volume
);
370 * Multiple modifications may be made to the same buffer.
371 * Also, the volume header cannot be written out until
372 * everything else has been flushed. This also
373 * covers the read-only case by preventing the kernel from
374 * flushing the buffer.
376 if (volume
->io
.recovered
== 0)
377 volume
->io
.recovered
= 1;
379 hammer_rel_volume(volume
, 0);
381 case HAMMER_ZONE_RAW_BUFFER_INDEX
:
382 buf_offset
= undo
->undo_offset
& ~HAMMER_BUFMASK64
;
383 buffer
= hammer_get_buffer(hmp
, buf_offset
, HAMMER_BUFSIZE
,
385 if (buffer
== NULL
) {
386 kprintf("HAMMER: UNDO record, "
387 "cannot access buffer %016llx\n",
388 (long long)undo
->undo_offset
);
391 hammer_modify_buffer(NULL
, buffer
, NULL
, 0);
392 hammer_recover_copy_undo(undo
->undo_offset
,
394 (char *)buffer
->ondisk
+ offset
,
395 undo
->undo_data_bytes
);
396 hammer_modify_buffer_done(buffer
);
399 * Multiple modifications may be made to the same buffer,
400 * improve performance by delaying the flush. This also
401 * covers the read-only case by preventing the kernel from
402 * flushing the buffer.
404 if (buffer
->io
.recovered
== 0)
405 buffer
->io
.recovered
= 1;
407 hammer_rel_buffer(buffer
, 0);
410 kprintf("HAMMER: Corrupt UNDO record\n");
417 hammer_recover_copy_undo(hammer_off_t undo_offset
,
418 char *src
, char *dst
, int bytes
)
420 if (hammer_debug_general
& 0x0080) {
421 kprintf("UNDO %016llx: %d\n",
422 (long long)undo_offset
, bytes
);
425 kprintf("UNDO %016llx:", (long long)undo_offset
);
426 hammer_recover_debug_dump(22, dst
, bytes
);
427 kprintf("%22s", "to:");
428 hammer_recover_debug_dump(22, src
, bytes
);
430 bcopy(src
, dst
, bytes
);
436 hammer_recover_debug_dump(int w
, char *buf
, int bytes
)
440 for (i
= 0; i
< bytes
; ++i
) {
441 if (i
&& (i
& 15) == 0)
442 kprintf("\n%*.*s", w
, w
, "");
443 kprintf(" %02x", (unsigned char)buf
[i
]);
451 * Flush recovered buffers from recovery operations. The call to this
452 * routine may be delayed if a read-only mount was made and then later
453 * upgraded to read-write.
455 * The volume header is always written last. The UNDO FIFO will be forced
456 * to zero-length by setting next_offset to first_offset. This leaves the
457 * (now stale) UNDO information used to recover the disk available for
460 * final is typically 0 or 1. The volume header is only written if final
461 * is 1. If final is -1 the recovered buffers are discarded instead of
462 * written and root_volume can also be passed as NULL in that case.
464 static int hammer_recover_flush_volume_callback(hammer_volume_t
, void *);
465 static int hammer_recover_flush_buffer_callback(hammer_buffer_t
, void *);
468 hammer_recover_flush_buffers(hammer_mount_t hmp
, hammer_volume_t root_volume
,
472 * Flush the buffers out asynchronously, wait for all the I/O to
473 * complete, then do it again to destroy the buffer cache buffer
474 * so it doesn't alias something later on.
476 RB_SCAN(hammer_buf_rb_tree
, &hmp
->rb_bufs_root
, NULL
,
477 hammer_recover_flush_buffer_callback
, &final
);
478 hammer_io_wait_all(hmp
, "hmrrcw");
479 RB_SCAN(hammer_buf_rb_tree
, &hmp
->rb_bufs_root
, NULL
,
480 hammer_recover_flush_buffer_callback
, &final
);
483 * Flush all volume headers except the root volume. If final < 0
484 * we discard all volume headers including the root volume.
487 RB_SCAN(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, NULL
,
488 hammer_recover_flush_volume_callback
, root_volume
);
490 RB_SCAN(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, NULL
,
491 hammer_recover_flush_volume_callback
, NULL
);
495 * Finalize the root volume header.
497 if (root_volume
&& root_volume
->io
.recovered
&& final
> 0) {
499 while (hmp
->io_running_space
> 0)
500 tsleep(&hmp
->io_running_space
, 0, "hmrflx", 0);
502 root_volume
->io
.recovered
= 0;
503 hammer_io_flush(&root_volume
->io
, 0);
504 hammer_rel_volume(root_volume
, 0);
509 * Callback to flush volume headers. If discarding data will be NULL and
510 * all volume headers (including the root volume) will be discarded.
511 * Otherwise data is the root_volume and we flush all volume headers
512 * EXCEPT the root_volume.
516 hammer_recover_flush_volume_callback(hammer_volume_t volume
, void *data
)
518 hammer_volume_t root_volume
= data
;
520 if (volume
->io
.recovered
&& volume
!= root_volume
) {
521 volume
->io
.recovered
= 0;
522 if (root_volume
!= NULL
)
523 hammer_io_flush(&volume
->io
, 0);
525 hammer_io_clear_modify(&volume
->io
, 1);
526 hammer_rel_volume(volume
, 0);
533 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer
, void *data
)
535 int final
= *(int *)data
;
537 if (buffer
->io
.recovered
) {
538 buffer
->io
.recovered
= 0;
539 buffer
->io
.reclaim
= 1;
541 hammer_io_clear_modify(&buffer
->io
, 1);
543 hammer_io_flush(&buffer
->io
, 0);
544 hammer_rel_buffer(buffer
, 0);
546 KKASSERT(buffer
->io
.lock
.refs
== 0);
547 ++hammer_count_refedbufs
;
548 hammer_ref(&buffer
->io
.lock
);
549 buffer
->io
.reclaim
= 1;
550 hammer_rel_buffer(buffer
, 1);