2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.2 2008/01/10 07:41:03 dillon Exp $
39 static void hammer_recover_buffer_stage1(hammer_cluster_t cluster
,
41 static void hammer_recover_buffer_stage2(hammer_cluster_t cluster
,
43 static int hammer_recover_record(hammer_cluster_t cluster
,
44 hammer_buffer_t buffer
, int32_t rec_offset
,
45 hammer_record_ondisk_t rec
);
46 static int hammer_recover_btree(hammer_cluster_t cluster
,
47 hammer_buffer_t buffer
, int32_t rec_offset
,
48 hammer_record_ondisk_t rec
);
51 * Recover a cluster. The caller has referenced and locked the cluster.
53 * Generally returns 0 on success and EIO if the recovery was unsuccessful.
56 hammer_recover(hammer_cluster_t cluster
)
63 return(0); /* XXX temporarily disabled */
64 Debugger("hammer_recover");
65 KKASSERT(cluster
->ondisk
->synchronized_tid
);
67 nbuffers
= cluster
->ondisk
->clu_limit
/ HAMMER_BUFSIZE
;
68 hammer_modify_cluster(cluster
);
71 * Re-initialize the A-lists.
73 hammer_alist_init(&cluster
->alist_master
, 1, nbuffers
- 1,
75 hammer_alist_init(&cluster
->alist_btree
,
77 (nbuffers
- 1) * HAMMER_FSBUF_MAXBLKS
,
79 hammer_alist_init(&cluster
->alist_mdata
,
81 (nbuffers
- 1) * HAMMER_FSBUF_MAXBLKS
,
83 hammer_alist_init(&cluster
->alist_record
,
85 (nbuffers
- 1) * HAMMER_FSBUF_MAXBLKS
,
89 * Scan the cluster's clu_record_buf_bitmap, reserve record buffers
90 * from the master bitmap before we try to recover their data. Free
91 * the block of records to alist_record.
93 * We need to mark the blocks as free in alist_record so future
94 * allocations will dive into the buffer A-list's, but we don't
95 * want to destroy the underlying buffer A-list's. Because the
96 * meta data in cluster->alist_record indicates state 00 (all-allocated
97 * but not initialized), it will not dive down into the buffer when
98 * freeing the entire buffer.
100 for (buf_no
= 1; buf_no
< nbuffers
; ++buf_no
) {
101 bitmap
= cluster
->ondisk
->clu_record_buf_bitmap
[buf_no
>> 5];
103 buf_no
= ((buf_no
+ 32) & ~31) - 1;
106 if ((bitmap
& (1 << (buf_no
& 31))) == 0)
108 r
= hammer_alist_alloc_fwd(&cluster
->alist_master
, 1, buf_no
);
109 KKASSERT(r
== buf_no
);
110 hammer_alist_free(&cluster
->alist_record
,
111 buf_no
* HAMMER_FSBUF_MAXBLKS
,
112 HAMMER_FSBUF_MAXBLKS
);
116 * Scan the cluster's clu_record_buf_bitmap, reassign buffers
117 * from alist_master to alist_record, and reallocate individual
118 * records and any related data reference if they meet the critera.
120 for (buf_no
= 1; buf_no
< nbuffers
; ++buf_no
) {
121 bitmap
= cluster
->ondisk
->clu_record_buf_bitmap
[buf_no
>> 5];
123 buf_no
= ((buf_no
+ 32) & ~31) - 1;
126 if ((bitmap
& (1 << (buf_no
& 31))) == 0)
128 hammer_recover_buffer_stage1(cluster
, buf_no
);
132 * The cluster is now in good enough shape that general allocations
133 * are possible. Construct an empty B-Tree root.
139 croot
= hammer_alloc_btree(cluster
, &error
);
141 hammer_modify_node(croot
);
142 bzero(croot
->ondisk
, sizeof(*croot
->ondisk
));
143 croot
->ondisk
->count
= 0;
144 croot
->ondisk
->type
= HAMMER_BTREE_TYPE_LEAF
;
145 cluster
->ondisk
->clu_btree_root
= croot
->node_offset
;
150 * Scan the cluster's clu_record_buf_bitmap again and regenerate
153 * XXX B-tree record for cluster-push
155 for (buf_no
= 1; buf_no
< nbuffers
; ++buf_no
) {
156 bitmap
= cluster
->ondisk
->clu_record_buf_bitmap
[buf_no
>> 5];
158 buf_no
= ((buf_no
+ 32) & ~31) - 1;
161 if ((bitmap
& (1 << (buf_no
& 31))) == 0)
163 hammer_recover_buffer_stage2(cluster
, buf_no
);
167 * Validate the parent cluster pointer. XXX
173 * Reassign buf_no as a record buffer and recover any related data
177 hammer_recover_buffer_stage1(hammer_cluster_t cluster
, int32_t buf_no
)
179 hammer_record_ondisk_t rec
;
180 hammer_buffer_t buffer
;
185 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, &error
);
188 * If we are unable to access the buffer leave it in a
189 * reserved state on the master alist.
191 kprintf("hammer_recover_buffer_stage1: error "
192 "recovering %d:%d:%d\n",
193 cluster
->volume
->vol_no
, cluster
->clu_no
, buf_no
);
198 * Recover the buffer, scan and validate allocated records. Records
199 * which cannot be recovered are freed.
201 hammer_modify_buffer(buffer
);
202 hammer_alist_recover(&buffer
->alist
, 0, 0, HAMMER_RECORD_NODES
);
205 rec_no
= hammer_alist_find(&buffer
->alist
, rec_no
+ 1);
206 if (rec_no
== HAMMER_ALIST_BLOCK_NONE
)
208 rec_offset
= offsetof(union hammer_fsbuf_ondisk
,
209 record
.recs
[rec_no
]);
210 rec_offset
+= buf_no
* HAMMER_BUFSIZE
;
211 rec
= &buffer
->ondisk
->record
.recs
[rec_no
];
212 error
= hammer_recover_record(cluster
, buffer
, rec_offset
, rec
);
214 kprintf("hammer_recover_record: failed %d:%d@%d\n",
215 cluster
->clu_no
, buffer
->buf_no
, rec_offset
);
216 hammer_alist_free(&buffer
->alist
, rec_no
, 1);
219 hammer_rel_buffer(buffer
, 0);
223 * Recover a record, at least into a state that doesn't blow up the
224 * filesystem. Returns 0 on success, non-zero if the record is
228 hammer_recover_record(hammer_cluster_t cluster
, hammer_buffer_t buffer
,
229 int32_t rec_offset
, hammer_record_ondisk_t rec
)
231 hammer_buffer_t dbuf
;
232 hammer_tid_t syncid
= cluster
->ondisk
->synchronized_tid
;
242 * Discard records created after the synchronization point and
243 * undo any deletions made after the synchronization point.
245 if (rec
->base
.base
.create_tid
> syncid
)
248 if (rec
->base
.base
.delete_tid
> syncid
)
249 rec
->base
.base
.delete_tid
= 0;
252 * Validate the record's B-Tree key
254 if (hammer_btree_cmp(&rec
->base
.base
,
255 &cluster
->ondisk
->clu_btree_beg
) < 0) {
258 if (hammer_btree_cmp(&rec
->base
.base
,
259 &cluster
->ondisk
->clu_btree_end
) >= 0) {
264 * Validate the record's data. If the offset is 0 there is no data
265 * (or it is zero-fill) and we can return success immediately.
266 * Otherwise make sure everything is ok.
268 data_offset
= rec
->base
.data_offset
;
269 data_len
= rec
->base
.data_len
;
272 rec
->base
.data_offset
= 0;
273 if (data_offset
== 0)
275 if (data_offset
< HAMMER_BUFSIZE
||
276 data_offset
>= cluster
->ondisk
->clu_limit
||
277 data_len
< 0 || data_len
> HAMMER_MAXDATA
||
278 data_offset
+ data_len
> cluster
->ondisk
->clu_limit
) {
283 * Check data_offset relative to rec_offset
285 if (data_offset
< rec_offset
&& data_offset
+ data_len
> rec_offset
)
287 if (data_offset
>= rec_offset
&&
288 data_offset
< rec_offset
+ sizeof(struct hammer_base_record
)) {
293 * Check for data embedded in the record
295 if (data_offset
>= rec_offset
&&
296 data_offset
< rec_offset
+ HAMMER_RECORD_SIZE
) {
297 if (data_offset
+ data_len
> rec_offset
+ HAMMER_RECORD_SIZE
)
303 * Recover the allocated data either out of the cluster's master alist
304 * or as a buffer sub-allocation.
306 if ((data_len
& HAMMER_BUFMASK
) == 0) {
307 if (data_offset
& HAMMER_BUFMASK
)
309 nblks
= data_len
/ HAMMER_BUFSIZE
;
310 dbuf_no
= data_offset
/ HAMMER_BUFSIZE
;
312 r
= hammer_alist_alloc_fwd(&cluster
->alist_master
,
314 if (r
== HAMMER_ALIST_BLOCK_NONE
)
317 hammer_alist_free(&cluster
->alist_master
, r
, nblks
);
321 if ((data_offset
& ~HAMMER_BUFMASK
) !=
322 ((data_offset
+ data_len
) & ~HAMMER_BUFMASK
)) {
325 if ((data_offset
& HAMMER_BUFMASK
) <
326 sizeof(struct hammer_fsbuf_head
)) {
329 if (data_offset
& HAMMER_DATA_BLKMASK
)
333 * Ok, recover the space in the data buffer.
335 dbuf_no
= data_offset
/ HAMMER_BUFSIZE
;
336 r
= hammer_alist_alloc_fwd(&cluster
->alist_master
, 1, dbuf_no
);
337 if (r
!= dbuf_no
&& r
!= HAMMER_ALIST_BLOCK_NONE
)
338 hammer_alist_free(&cluster
->alist_master
, r
, 1);
341 * This is the first time we've tried to recover
342 * data in this data buffer, reinit it (but don't
343 * zero it out, obviously).
345 * Calling initbuffer marks the data blocks within
346 * the buffer as being free.
348 dbuf
= hammer_get_buffer(cluster
, dbuf_no
,
351 hammer_modify_buffer(dbuf
);
352 hammer_initbuffer(&dbuf
->alist
,
355 dbuf
->buf_type
= HAMMER_FSBUF_DATA
;
359 * We've seen this data buffer before.
361 dbuf
= hammer_get_buffer(cluster
, dbuf_no
,
367 if (dbuf
->buf_type
!= HAMMER_FSBUF_DATA
) {
368 hammer_rel_buffer(dbuf
, 0);
373 * Figure out the data block number and number of blocks.
375 nblks
= (data_len
+ HAMMER_DATA_BLKMASK
) & ~HAMMER_DATA_BLKMASK
;
376 nblks
/= HAMMER_DATA_BLKSIZE
;
377 dblk_no
= ((data_offset
& HAMMER_BUFMASK
) - offsetof(union hammer_fsbuf_ondisk
, data
.data
)) / HAMMER_DATA_BLKSIZE
;
378 if ((data_offset
& HAMMER_BUFMASK
) != offsetof(union hammer_fsbuf_ondisk
, data
.data
[dblk_no
])) {
379 kprintf("dblk_no %d does not match data_offset %d/%d\n",
381 offsetof(union hammer_fsbuf_ondisk
, data
.data
[dblk_no
]),
382 (data_offset
& HAMMER_BUFMASK
));
383 hammer_rel_buffer(dbuf
, 0);
384 Debugger("bad data");
387 dblk_no
*= HAMMER_FSBUF_MAXBLKS
;
388 r
= hammer_alist_alloc_fwd(&cluster
->alist_mdata
, nblks
, dblk_no
);
390 if (r
!= HAMMER_ALIST_BLOCK_NONE
)
391 hammer_alist_free(&cluster
->alist_mdata
, r
, nblks
);
392 hammer_rel_buffer(dbuf
, 0);
395 hammer_rel_buffer(dbuf
, 0);
401 * Rebuild the B-Ttree for the records residing in the specified buffer.
404 hammer_recover_buffer_stage2(hammer_cluster_t cluster
, int32_t buf_no
)
406 hammer_record_ondisk_t rec
;
407 hammer_buffer_t buffer
;
412 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, &error
);
415 * If we are unable to access the buffer leave it in a
416 * reserved state on the master alist.
418 kprintf("hammer_recover_buffer_stage2: error "
419 "recovering %d:%d:%d\n",
420 cluster
->volume
->vol_no
, cluster
->clu_no
, buf_no
);
425 * Recover the buffer, scan and validate allocated records. Records
426 * which cannot be recovered are freed.
430 rec_no
= hammer_alist_find(&buffer
->alist
, rec_no
+ 1);
431 if (rec_no
== HAMMER_ALIST_BLOCK_NONE
)
433 rec_offset
= offsetof(union hammer_fsbuf_ondisk
,
434 record
.recs
[rec_no
]);
435 rec_offset
+= buf_no
* HAMMER_BUFSIZE
;
436 rec
= &buffer
->ondisk
->record
.recs
[rec_no
];
437 error
= hammer_recover_btree(cluster
, buffer
, rec_offset
, rec
);
439 kprintf("hammer_recover_btree: failed %d:%d@%d\n",
440 cluster
->clu_no
, buffer
->buf_no
, rec_offset
);
441 /* XXX free the record and its data? */
442 /*hammer_alist_free(&buffer->alist, rec_no, 1);*/
445 hammer_rel_buffer(buffer
, 0);
449 hammer_recover_btree(hammer_cluster_t cluster
, hammer_buffer_t buffer
,
450 int32_t rec_offset
, hammer_record_ondisk_t rec
)
452 struct hammer_cursor cursor
;
453 union hammer_btree_elm elm
;
456 error
= hammer_init_cursor_cluster(&cursor
, cluster
);
459 cursor
.key_beg
= rec
->base
.base
;
460 cursor
.flags
= HAMMER_CURSOR_INSERT
;
461 error
= hammer_btree_lookup(&cursor
);
463 kprintf("hammer_recover_btree: Duplicate record\n");
464 hammer_print_btree_elm(&cursor
.node
->ondisk
->elms
[cursor
.index
], HAMMER_BTREE_TYPE_LEAF
, cursor
.index
);
465 Debugger("duplicate record");
470 elm
.leaf
.base
= rec
->base
.base
;
471 elm
.leaf
.rec_offset
= rec_offset
;
472 elm
.leaf
.data_offset
= rec
->base
.data_offset
;
473 elm
.leaf
.data_len
= rec
->base
.data_len
;
474 elm
.leaf
.data_crc
= rec
->base
.data_crc
;
476 error
= hammer_btree_insert(&cursor
, &elm
);
478 kprintf("hammer_recover_btree: insertion failed\n");
480 /* XXX cluster pushes? */
483 hammer_done_cursor(&cursor
);