HAMMER 59B/Many: Stabilization pass - fixes for large file issues
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
bloba0364f92cfd78841cdb2bd89facb593c74e5517f
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.26 2008/06/27 20:56:59 dillon Exp $
37 #include "hammer.h"
39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40 hammer_off_t end_off);
41 static void hammer_recover_copy_undo(hammer_off_t undo_offset,
42 char *src, char *dst, int bytes);
43 #if 0
44 static void hammer_recover_debug_dump(int w, char *buf, int bytes);
45 #endif
46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
47 hammer_fifo_undo_t undo, int bytes);
50 * Recover a filesystem on mount
52 * NOTE: No information from the root volume has been cached in the
53 * hammer_mount structure yet, so we need to access the root volume's
54 * buffer directly.
56 int
57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume)
59 hammer_blockmap_t rootmap;
60 hammer_buffer_t buffer;
61 hammer_off_t scan_offset;
62 hammer_off_t bytes;
63 hammer_fifo_tail_t tail;
64 hammer_fifo_undo_t undo;
65 hammer_off_t first_offset;
66 hammer_off_t last_offset;
67 int error;
68 int reported = 0;
71 * Examine the UNDO FIFO. If it is empty the filesystem is clean
72 * and no action need be taken.
74 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
76 if (rootmap->first_offset == rootmap->next_offset)
77 return(0);
79 first_offset = rootmap->first_offset;
80 last_offset = rootmap->next_offset;
82 if (last_offset >= first_offset) {
83 bytes = last_offset - first_offset;
84 } else {
85 bytes = rootmap->alloc_offset - first_offset +
86 (last_offset & HAMMER_OFF_LONG_MASK);
88 kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
89 "(%lld bytes of UNDO)%s\n",
90 root_volume->ondisk->vol_name,
91 first_offset, last_offset,
92 bytes,
93 (hmp->ronly ? " (RO)" : "(RW)"));
94 if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
95 kprintf("Undo size is absurd, unable to mount\n");
96 return(EIO);
100 * Scan the UNDOs backwards.
102 scan_offset = last_offset;
103 buffer = NULL;
104 if (scan_offset > rootmap->alloc_offset) {
105 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
106 root_volume->ondisk->vol_name,
107 scan_offset);
108 error = EIO;
109 goto done;
112 while ((int64_t)bytes > 0) {
113 if (hammer_debug_general & 0x0080)
114 kprintf("scan_offset %016llx\n", scan_offset);
115 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
116 scan_offset = rootmap->alloc_offset;
117 continue;
119 if (scan_offset - sizeof(*tail) <
120 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
121 kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
122 "underflow\n",
123 root_volume->ondisk->vol_name,
124 scan_offset);
125 error = EIO;
126 break;
128 tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
129 &error, &buffer);
130 if (error) {
131 kprintf("HAMMER(%s) Unable to read UNDO TAIL "
132 "at %016llx\n",
133 root_volume->ondisk->vol_name,
134 scan_offset - sizeof(*tail));
135 break;
138 if (hammer_check_tail_signature(tail, scan_offset) != 0) {
139 kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
140 "at %016llx\n",
141 root_volume->ondisk->vol_name,
142 scan_offset - sizeof(*tail));
143 error = EIO;
144 break;
146 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
148 error = hammer_recover_undo(hmp, root_volume, undo,
149 HAMMER_BUFSIZE -
150 (int)((char *)undo - (char *)buffer->ondisk));
151 if (error) {
152 kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
153 root_volume->ondisk->vol_name,
154 scan_offset - tail->tail_size);
155 break;
157 scan_offset -= tail->tail_size;
158 bytes -= tail->tail_size;
161 * If too many dirty buffers have built up
163 if (hammer_flusher_meta_limit(hmp)) {
164 if (hmp->ronly == 0) {
165 hammer_recover_flush_buffers(hmp, root_volume,
167 kprintf("HAMMER(%s) Continuing recovery\n",
168 root_volume->ondisk->vol_name);
169 } else if (reported == 0) {
170 reported = 1;
171 kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n",
172 root_volume->ondisk->vol_name);
176 done:
177 if (buffer)
178 hammer_rel_buffer(buffer, 0);
181 * After completely flushing all the recovered buffers the volume
182 * header will also be flushed. Force the UNDO FIFO to 0-length.
184 if (root_volume->io.recovered == 0) {
185 hammer_ref_volume(root_volume);
186 root_volume->io.recovered = 1;
188 hammer_modify_volume(NULL, root_volume, NULL, 0);
189 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
190 rootmap->first_offset = last_offset;
191 rootmap->next_offset = last_offset;
192 hammer_modify_volume_done(root_volume);
195 * We have collected a large number of dirty buffers during the
196 * recovery, flush them all out. The root volume header will
197 * be flushed out last.
199 if (hmp->ronly == 0 && error == 0)
200 hammer_recover_flush_buffers(hmp, root_volume, 1);
201 kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name);
202 return (error);
205 static int
206 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
208 int max_bytes;
210 max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK);
211 max_bytes += sizeof(*tail);
214 * tail overlaps buffer boundary
216 if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) {
217 return(1);
221 * signature check, the tail signature is allowed to be the head
222 * signature only for 8-byte PADs.
224 switch(tail->tail_signature) {
225 case HAMMER_TAIL_SIGNATURE:
226 break;
227 case HAMMER_HEAD_SIGNATURE:
228 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD ||
229 tail->tail_size != sizeof(*tail)) {
230 return(2);
232 break;
236 * The undo structure must not overlap a buffer boundary.
238 if (tail->tail_size < 0 || tail->tail_size > max_bytes) {
239 return(3);
241 return(0);
244 static int
245 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
246 hammer_fifo_undo_t undo, int bytes)
248 hammer_fifo_tail_t tail;
249 hammer_volume_t volume;
250 hammer_buffer_t buffer;
251 hammer_off_t buf_offset;
252 int zone;
253 int error;
254 int vol_no;
255 int max_bytes;
256 u_int32_t offset;
257 u_int32_t crc;
260 * Basic sanity checks
262 if (bytes < HAMMER_HEAD_ALIGN) {
263 kprintf("HAMMER: Undo alignment error (%d)\n", bytes);
264 return(EIO);
266 if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) {
267 kprintf("HAMMER: Bad head signature %04x\n",
268 undo->head.hdr_signature);
269 return(EIO);
271 if (undo->head.hdr_size < HAMMER_HEAD_ALIGN ||
272 undo->head.hdr_size > bytes) {
273 kprintf("HAMMER: Bad size %d\n", bytes);
274 return(EIO);
278 * Skip PAD records. Note that PAD records also do not require
279 * a tail and may have a truncated structure.
281 if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD)
282 return(0);
285 * Check the CRC
287 crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
288 crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head));
289 if (undo->head.hdr_crc != crc) {
290 kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
291 undo->head.hdr_crc, crc);
292 return(EIO);
297 * Check the tail
299 bytes = undo->head.hdr_size;
300 tail = (void *)((char *)undo + bytes - sizeof(*tail));
301 if (tail->tail_size != undo->head.hdr_size) {
302 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size);
303 return(EIO);
305 if (tail->tail_type != undo->head.hdr_type) {
306 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type);
307 return(EIO);
311 * Only process UNDO records
313 if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
314 return(0);
317 * Validate the UNDO record.
319 max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail);
320 if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) {
321 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
322 undo->undo_data_bytes, max_bytes);
323 return(EIO);
327 * The undo offset may only be a zone-1 or zone-2 offset.
329 * Currently we only support a zone-1 offset representing the
330 * volume header.
332 zone = HAMMER_ZONE_DECODE(undo->undo_offset);
333 offset = undo->undo_offset & HAMMER_BUFMASK;
335 if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) {
336 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
337 return (EIO);
340 switch(zone) {
341 case HAMMER_ZONE_RAW_VOLUME_INDEX:
342 vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
343 volume = hammer_get_volume(hmp, vol_no, &error);
344 if (volume == NULL) {
345 kprintf("HAMMER: UNDO record, "
346 "cannot access volume %d\n", vol_no);
347 break;
349 hammer_modify_volume(NULL, volume, NULL, 0);
350 hammer_recover_copy_undo(undo->undo_offset,
351 (char *)(undo + 1),
352 (char *)volume->ondisk + offset,
353 undo->undo_data_bytes);
354 hammer_modify_volume_done(volume);
357 * Multiple modifications may be made to the same buffer.
358 * Also, the volume header cannot be written out until
359 * everything else has been flushed. This also
360 * covers the read-only case by preventing the kernel from
361 * flushing the buffer.
363 if (volume->io.recovered == 0)
364 volume->io.recovered = 1;
365 else
366 hammer_rel_volume(volume, 0);
367 break;
368 case HAMMER_ZONE_RAW_BUFFER_INDEX:
369 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
370 buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
371 0, &error);
372 if (buffer == NULL) {
373 kprintf("HAMMER: UNDO record, "
374 "cannot access buffer %016llx\n",
375 undo->undo_offset);
376 break;
378 hammer_modify_buffer(NULL, buffer, NULL, 0);
379 hammer_recover_copy_undo(undo->undo_offset,
380 (char *)(undo + 1),
381 (char *)buffer->ondisk + offset,
382 undo->undo_data_bytes);
383 hammer_modify_buffer_done(buffer);
386 * Multiple modifications may be made to the same buffer,
387 * improve performance by delaying the flush. This also
388 * covers the read-only case by preventing the kernel from
389 * flushing the buffer.
391 if (buffer->io.recovered == 0)
392 buffer->io.recovered = 1;
393 else
394 hammer_rel_buffer(buffer, 0);
395 break;
396 default:
397 kprintf("HAMMER: Corrupt UNDO record\n");
398 error = EIO;
400 return (error);
403 static void
404 hammer_recover_copy_undo(hammer_off_t undo_offset,
405 char *src, char *dst, int bytes)
407 if (hammer_debug_general & 0x0080)
408 kprintf("UNDO %016llx: %d\n", undo_offset, bytes);
409 #if 0
410 kprintf("UNDO %016llx:", undo_offset);
411 hammer_recover_debug_dump(22, dst, bytes);
412 kprintf("%22s", "to:");
413 hammer_recover_debug_dump(22, src, bytes);
414 #endif
415 bcopy(src, dst, bytes);
418 #if 0
420 static void
421 hammer_recover_debug_dump(int w, char *buf, int bytes)
423 int i;
425 for (i = 0; i < bytes; ++i) {
426 if (i && (i & 15) == 0)
427 kprintf("\n%*.*s", w, w, "");
428 kprintf(" %02x", (unsigned char)buf[i]);
430 kprintf("\n");
433 #endif
436 * Flush recovered buffers from recovery operations. The call to this
437 * routine may be delayed if a read-only mount was made and then later
438 * upgraded to read-write.
440 * The volume header is always written last. The UNDO FIFO will be forced
441 * to zero-length by setting next_offset to first_offset. This leaves the
442 * (now stale) UNDO information used to recover the disk available for
443 * forensic analysis.
445 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
446 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
448 void
449 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
450 int final)
453 * Flush the buffers out asynchronously, wait for all the I/O to
454 * complete, then do it again to destroy the buffer cache buffer
455 * so it doesn't alias something later on.
457 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
458 hammer_recover_flush_buffer_callback, NULL);
459 hammer_io_wait_all(hmp, "hmrrcw");
460 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
461 hammer_recover_flush_buffer_callback, NULL);
463 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
464 hammer_recover_flush_volume_callback, root_volume);
467 * Finaly, deal with the volume header.
469 if (root_volume->io.recovered && final) {
470 crit_enter();
471 while (hmp->io_running_count)
472 tsleep(&hmp->io_running_count, 0, "hmrflx", 0);
473 crit_exit();
474 root_volume->io.recovered = 0;
475 hammer_io_flush(&root_volume->io);
476 hammer_rel_volume(root_volume, 0);
480 static
482 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
484 hammer_volume_t root_volume = data;
486 if (volume->io.recovered && volume != root_volume) {
487 volume->io.recovered = 0;
488 hammer_io_flush(&volume->io);
489 hammer_rel_volume(volume, 0);
491 return(0);
494 static
496 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
498 if (buffer->io.recovered) {
499 buffer->io.recovered = 0;
500 buffer->io.reclaim = 1;
501 hammer_io_flush(&buffer->io);
502 hammer_rel_buffer(buffer, 0);
503 } else {
504 KKASSERT(buffer->io.lock.refs == 0);
505 ++hammer_count_refedbufs;
506 hammer_ref(&buffer->io.lock);
507 buffer->io.reclaim = 1;
508 hammer_rel_buffer(buffer, 1);
510 return(0);