HAMMER commit - MFC ref count related panics when a HAMMER mount fails, etc.
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
blobd1d99809a3abc01f1de593f538d3018eacaec9fb
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.28.2.1 2008/07/26 05:37:20 dillon Exp $
37 #include "hammer.h"
39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40 hammer_off_t end_off);
41 static void hammer_recover_copy_undo(hammer_off_t undo_offset,
42 char *src, char *dst, int bytes);
43 #if 0
44 static void hammer_recover_debug_dump(int w, char *buf, int bytes);
45 #endif
46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
47 hammer_fifo_undo_t undo, int bytes);
50 * Recover a filesystem on mount
52 * NOTE: No information from the root volume has been cached in the
53 * hammer_mount structure yet, so we need to access the root volume's
54 * buffer directly.
56 int
57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume)
59 hammer_blockmap_t rootmap;
60 hammer_buffer_t buffer;
61 hammer_off_t scan_offset;
62 hammer_off_t bytes;
63 hammer_fifo_tail_t tail;
64 hammer_fifo_undo_t undo;
65 hammer_off_t first_offset;
66 hammer_off_t last_offset;
67 int error;
70 * Examine the UNDO FIFO. If it is empty the filesystem is clean
71 * and no action need be taken.
73 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
75 if (rootmap->first_offset == rootmap->next_offset)
76 return(0);
78 first_offset = rootmap->first_offset;
79 last_offset = rootmap->next_offset;
81 if (last_offset >= first_offset) {
82 bytes = last_offset - first_offset;
83 } else {
84 bytes = rootmap->alloc_offset - first_offset +
85 (last_offset & HAMMER_OFF_LONG_MASK);
87 kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
88 "(%lld bytes of UNDO)%s\n",
89 root_volume->ondisk->vol_name,
90 first_offset, last_offset,
91 bytes,
92 (hmp->ronly ? " (RO)" : "(RW)"));
93 if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
94 kprintf("Undo size is absurd, unable to mount\n");
95 return(EIO);
99 * Scan the UNDOs backwards.
101 scan_offset = last_offset;
102 buffer = NULL;
103 if (scan_offset > rootmap->alloc_offset) {
104 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
105 root_volume->ondisk->vol_name,
106 scan_offset);
107 error = EIO;
108 goto done;
111 while ((int64_t)bytes > 0) {
112 if (hammer_debug_general & 0x0080)
113 kprintf("scan_offset %016llx\n", scan_offset);
114 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
115 scan_offset = rootmap->alloc_offset;
116 continue;
118 if (scan_offset - sizeof(*tail) <
119 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
120 kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
121 "underflow\n",
122 root_volume->ondisk->vol_name,
123 scan_offset);
124 error = EIO;
125 break;
127 tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
128 &error, &buffer);
129 if (error) {
130 kprintf("HAMMER(%s) Unable to read UNDO TAIL "
131 "at %016llx\n",
132 root_volume->ondisk->vol_name,
133 scan_offset - sizeof(*tail));
134 break;
137 if (hammer_check_tail_signature(tail, scan_offset) != 0) {
138 kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
139 "at %016llx\n",
140 root_volume->ondisk->vol_name,
141 scan_offset - sizeof(*tail));
142 error = EIO;
143 break;
145 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
147 error = hammer_recover_undo(hmp, root_volume, undo,
148 HAMMER_BUFSIZE -
149 (int)((char *)undo - (char *)buffer->ondisk));
150 if (error) {
151 kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
152 root_volume->ondisk->vol_name,
153 scan_offset - tail->tail_size);
154 break;
156 scan_offset -= tail->tail_size;
157 bytes -= tail->tail_size;
160 * If too many dirty buffers have built up we have to flush'm
161 * out. As long as we do not flush out the volume header
162 * a crash here should not cause any problems.
164 * buffer must be released so the flush can assert that
165 * all buffers are idle.
167 if (hammer_flusher_meta_limit(hmp)) {
168 if (buffer) {
169 hammer_rel_buffer(buffer, 0);
170 buffer = NULL;
172 if (hmp->ronly == 0) {
173 hammer_recover_flush_buffers(hmp, root_volume,
175 kprintf("HAMMER(%s) Continuing recovery\n",
176 root_volume->ondisk->vol_name);
177 } else {
178 kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n",
179 root_volume->ondisk->vol_name);
180 error = EIO;
181 break;
185 done:
186 if (buffer)
187 hammer_rel_buffer(buffer, 0);
190 * After completely flushing all the recovered buffers the volume
191 * header will also be flushed. Force the UNDO FIFO to 0-length.
193 if (root_volume->io.recovered == 0) {
194 hammer_ref_volume(root_volume);
195 root_volume->io.recovered = 1;
199 * Finish up flushing (or discarding) recovered buffers
201 if (error == 0) {
202 hammer_modify_volume(NULL, root_volume, NULL, 0);
203 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
204 rootmap->first_offset = last_offset;
205 rootmap->next_offset = last_offset;
206 hammer_modify_volume_done(root_volume);
207 if (hmp->ronly == 0)
208 hammer_recover_flush_buffers(hmp, root_volume, 1);
209 } else {
210 hammer_recover_flush_buffers(hmp, root_volume, -1);
212 kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name);
213 return (error);
216 static int
217 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
219 int max_bytes;
221 max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK);
222 max_bytes += sizeof(*tail);
225 * tail overlaps buffer boundary
227 if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) {
228 return(1);
232 * signature check, the tail signature is allowed to be the head
233 * signature only for 8-byte PADs.
235 switch(tail->tail_signature) {
236 case HAMMER_TAIL_SIGNATURE:
237 break;
238 case HAMMER_HEAD_SIGNATURE:
239 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD ||
240 tail->tail_size != sizeof(*tail)) {
241 return(2);
243 break;
247 * The undo structure must not overlap a buffer boundary.
249 if (tail->tail_size < sizeof(*tail) || tail->tail_size > max_bytes) {
250 return(3);
252 return(0);
255 static int
256 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
257 hammer_fifo_undo_t undo, int bytes)
259 hammer_fifo_tail_t tail;
260 hammer_volume_t volume;
261 hammer_buffer_t buffer;
262 hammer_off_t buf_offset;
263 int zone;
264 int error;
265 int vol_no;
266 int max_bytes;
267 u_int32_t offset;
268 u_int32_t crc;
271 * Basic sanity checks
273 if (bytes < HAMMER_HEAD_ALIGN) {
274 kprintf("HAMMER: Undo alignment error (%d)\n", bytes);
275 return(EIO);
277 if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) {
278 kprintf("HAMMER: Bad head signature %04x\n",
279 undo->head.hdr_signature);
280 return(EIO);
282 if (undo->head.hdr_size < HAMMER_HEAD_ALIGN ||
283 undo->head.hdr_size > bytes) {
284 kprintf("HAMMER: Bad size %d\n", bytes);
285 return(EIO);
289 * Skip PAD records. Note that PAD records also do not require
290 * a tail and may have a truncated structure.
292 if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD)
293 return(0);
296 * Check the CRC
298 crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
299 crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head));
300 if (undo->head.hdr_crc != crc) {
301 kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
302 undo->head.hdr_crc, crc);
303 return(EIO);
308 * Check the tail
310 bytes = undo->head.hdr_size;
311 tail = (void *)((char *)undo + bytes - sizeof(*tail));
312 if (tail->tail_size != undo->head.hdr_size) {
313 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size);
314 return(EIO);
316 if (tail->tail_type != undo->head.hdr_type) {
317 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type);
318 return(EIO);
322 * Only process UNDO records
324 if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
325 return(0);
328 * Validate the UNDO record.
330 max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail);
331 if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) {
332 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
333 undo->undo_data_bytes, max_bytes);
334 return(EIO);
338 * The undo offset may only be a zone-1 or zone-2 offset.
340 * Currently we only support a zone-1 offset representing the
341 * volume header.
343 zone = HAMMER_ZONE_DECODE(undo->undo_offset);
344 offset = undo->undo_offset & HAMMER_BUFMASK;
346 if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) {
347 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
348 return (EIO);
351 switch(zone) {
352 case HAMMER_ZONE_RAW_VOLUME_INDEX:
353 vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
354 volume = hammer_get_volume(hmp, vol_no, &error);
355 if (volume == NULL) {
356 kprintf("HAMMER: UNDO record, "
357 "cannot access volume %d\n", vol_no);
358 break;
360 hammer_modify_volume(NULL, volume, NULL, 0);
361 hammer_recover_copy_undo(undo->undo_offset,
362 (char *)(undo + 1),
363 (char *)volume->ondisk + offset,
364 undo->undo_data_bytes);
365 hammer_modify_volume_done(volume);
368 * Multiple modifications may be made to the same buffer.
369 * Also, the volume header cannot be written out until
370 * everything else has been flushed. This also
371 * covers the read-only case by preventing the kernel from
372 * flushing the buffer.
374 if (volume->io.recovered == 0)
375 volume->io.recovered = 1;
376 else
377 hammer_rel_volume(volume, 0);
378 break;
379 case HAMMER_ZONE_RAW_BUFFER_INDEX:
380 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
381 buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
382 0, &error);
383 if (buffer == NULL) {
384 kprintf("HAMMER: UNDO record, "
385 "cannot access buffer %016llx\n",
386 undo->undo_offset);
387 break;
389 hammer_modify_buffer(NULL, buffer, NULL, 0);
390 hammer_recover_copy_undo(undo->undo_offset,
391 (char *)(undo + 1),
392 (char *)buffer->ondisk + offset,
393 undo->undo_data_bytes);
394 hammer_modify_buffer_done(buffer);
397 * Multiple modifications may be made to the same buffer,
398 * improve performance by delaying the flush. This also
399 * covers the read-only case by preventing the kernel from
400 * flushing the buffer.
402 if (buffer->io.recovered == 0)
403 buffer->io.recovered = 1;
404 else
405 hammer_rel_buffer(buffer, 0);
406 break;
407 default:
408 kprintf("HAMMER: Corrupt UNDO record\n");
409 error = EIO;
411 return (error);
414 static void
415 hammer_recover_copy_undo(hammer_off_t undo_offset,
416 char *src, char *dst, int bytes)
418 if (hammer_debug_general & 0x0080)
419 kprintf("UNDO %016llx: %d\n", undo_offset, bytes);
420 #if 0
421 kprintf("UNDO %016llx:", undo_offset);
422 hammer_recover_debug_dump(22, dst, bytes);
423 kprintf("%22s", "to:");
424 hammer_recover_debug_dump(22, src, bytes);
425 #endif
426 bcopy(src, dst, bytes);
429 #if 0
431 static void
432 hammer_recover_debug_dump(int w, char *buf, int bytes)
434 int i;
436 for (i = 0; i < bytes; ++i) {
437 if (i && (i & 15) == 0)
438 kprintf("\n%*.*s", w, w, "");
439 kprintf(" %02x", (unsigned char)buf[i]);
441 kprintf("\n");
444 #endif
447 * Flush recovered buffers from recovery operations. The call to this
448 * routine may be delayed if a read-only mount was made and then later
449 * upgraded to read-write.
451 * The volume header is always written last. The UNDO FIFO will be forced
452 * to zero-length by setting next_offset to first_offset. This leaves the
453 * (now stale) UNDO information used to recover the disk available for
454 * forensic analysis.
456 * final is typically 0 or 1. The volume header is only written if final
457 * is 1. If final is -1 the recovered buffers are discarded instead of
458 * written and root_volume can also be passed as NULL in that case.
460 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
461 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
463 void
464 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
465 int final)
468 * Flush the buffers out asynchronously, wait for all the I/O to
469 * complete, then do it again to destroy the buffer cache buffer
470 * so it doesn't alias something later on.
472 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
473 hammer_recover_flush_buffer_callback, &final);
474 hammer_io_wait_all(hmp, "hmrrcw");
475 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
476 hammer_recover_flush_buffer_callback, &final);
479 * Flush all volume headers except the root volume. If final < 0
480 * we discard all volume headers including the root volume.
482 if (final >= 0) {
483 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
484 hammer_recover_flush_volume_callback, root_volume);
485 } else {
486 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
487 hammer_recover_flush_volume_callback, NULL);
491 * Finalize the root volume header.
493 if (root_volume && root_volume->io.recovered && final > 0) {
494 crit_enter();
495 while (hmp->io_running_space > 0)
496 tsleep(&hmp->io_running_space, 0, "hmrflx", 0);
497 crit_exit();
498 root_volume->io.recovered = 0;
499 hammer_io_flush(&root_volume->io);
500 hammer_rel_volume(root_volume, 0);
505 * Callback to flush volume headers. If discarding data will be NULL and
506 * all volume headers (including the root volume) will be discarded.
507 * Otherwise data is the root_volume and we flush all volume headers
508 * EXCEPT the root_volume.
510 static
512 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
514 hammer_volume_t root_volume = data;
516 if (volume->io.recovered && volume != root_volume) {
517 volume->io.recovered = 0;
518 if (root_volume != NULL)
519 hammer_io_flush(&volume->io);
520 else
521 hammer_io_clear_modify(&volume->io, 1);
522 hammer_rel_volume(volume, 0);
524 return(0);
527 static
529 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
531 int final = *(int *)data;
533 if (buffer->io.recovered) {
534 buffer->io.recovered = 0;
535 buffer->io.reclaim = 1;
536 if (final < 0)
537 hammer_io_clear_modify(&buffer->io, 1);
538 else
539 hammer_io_flush(&buffer->io);
540 hammer_rel_buffer(buffer, 0);
541 } else {
542 KKASSERT(buffer->io.lock.refs == 0);
543 ++hammer_count_refedbufs;
544 hammer_ref(&buffer->io.lock);
545 buffer->io.reclaim = 1;
546 hammer_rel_buffer(buffer, 1);
548 return(0);