2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.17 2008/05/18 01:48:50 dillon Exp $
37 * HAMMER dependancy flusher thread
39 * Meta data updates create buffer dependancies which are arranged as a
45 static void hammer_flusher_thread(void *arg
);
46 static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp
);
47 static void hammer_flusher_flush(hammer_mount_t hmp
);
48 static int hammer_must_finalize_undo(hammer_mount_t hmp
);
49 static void hammer_flusher_finalize(hammer_transaction_t trans
);
51 #define HAMMER_FLUSHER_IMMEDIATE 16
54 hammer_flusher_sync(hammer_mount_t hmp
)
58 if (hmp
->flusher_td
) {
59 seq
= hmp
->flusher_next
;
60 if (hmp
->flusher_signal
== 0) {
61 hmp
->flusher_signal
= HAMMER_FLUSHER_IMMEDIATE
;
62 wakeup(&hmp
->flusher_signal
);
64 while ((int)(seq
- hmp
->flusher_done
) > 0)
65 tsleep(&hmp
->flusher_done
, 0, "hmrfls", 0);
70 hammer_flusher_async(hammer_mount_t hmp
)
72 if (hmp
->flusher_td
) {
73 if (hmp
->flusher_signal
++ == 0)
74 wakeup(&hmp
->flusher_signal
);
79 hammer_flusher_create(hammer_mount_t hmp
)
81 hmp
->flusher_signal
= 0;
83 hmp
->flusher_done
= 0;
84 hmp
->flusher_next
= 1;
85 lwkt_create(hammer_flusher_thread
, hmp
, &hmp
->flusher_td
, NULL
,
90 hammer_flusher_destroy(hammer_mount_t hmp
)
92 if (hmp
->flusher_td
) {
93 hmp
->flusher_exiting
= 1;
94 while (hmp
->flusher_td
) {
95 hmp
->flusher_signal
= HAMMER_FLUSHER_IMMEDIATE
;
96 wakeup(&hmp
->flusher_signal
);
97 tsleep(&hmp
->flusher_exiting
, 0, "hmrwex", 0);
103 hammer_flusher_thread(void *arg
)
105 hammer_mount_t hmp
= arg
;
108 while (hmp
->flusher_lock
)
109 tsleep(&hmp
->flusher_lock
, 0, "hmrhld", 0);
110 hmp
->flusher_act
= hmp
->flusher_next
;
113 hammer_flusher_clean_loose_ios(hmp
);
114 hammer_flusher_flush(hmp
);
115 hammer_flusher_clean_loose_ios(hmp
);
116 hmp
->flusher_done
= hmp
->flusher_act
;
118 wakeup(&hmp
->flusher_done
);
123 if (hmp
->flusher_exiting
&& TAILQ_EMPTY(&hmp
->flush_list
))
128 * This is a hack until we can dispose of frontend buffer
129 * cache buffers on the frontend.
131 if (hmp
->flusher_signal
&&
132 hmp
->flusher_signal
< HAMMER_FLUSHER_IMMEDIATE
) {
133 --hmp
->flusher_signal
;
134 tsleep(&hmp
->flusher_signal
, 0, "hmrqwk", hz
/ 10);
136 while (hmp
->flusher_signal
== 0 &&
137 TAILQ_EMPTY(&hmp
->flush_list
)) {
138 tsleep(&hmp
->flusher_signal
, 0, "hmrwwa", 0);
140 hmp
->flusher_signal
= 0;
143 hmp
->flusher_td
= NULL
;
144 wakeup(&hmp
->flusher_exiting
);
149 hammer_flusher_clean_loose_ios(hammer_mount_t hmp
)
151 hammer_buffer_t buffer
;
155 * loose ends - buffers without bp's aren't tracked by the kernel
156 * and can build up, so clean them out. This can occur when an
157 * IO completes on a buffer with no references left.
159 while ((io
= TAILQ_FIRST(&hmp
->lose_list
)) != NULL
) {
160 KKASSERT(io
->mod_list
== &hmp
->lose_list
);
161 TAILQ_REMOVE(io
->mod_list
, io
, mod_entry
);
163 hammer_ref(&io
->lock
);
165 hammer_rel_buffer(buffer
, 0);
173 hammer_flusher_flush(hammer_mount_t hmp
)
175 struct hammer_transaction trans
;
176 hammer_blockmap_t rootmap
;
179 hammer_start_transaction_fls(&trans
, hmp
);
180 rootmap
= &hmp
->blockmap
[HAMMER_ZONE_UNDO_INDEX
];
183 * Flush all pending inodes
185 while ((ip
= TAILQ_FIRST(&hmp
->flush_list
)) != NULL
) {
187 * Stop when we hit a different flush group
189 if (ip
->flush_group
!= hmp
->flusher_act
)
193 * Remove the inode from the flush list and inherit
194 * its reference, sync, and clean-up.
196 TAILQ_REMOVE(&hmp
->flush_list
, ip
, flush_entry
);
197 ip
->error
= hammer_sync_inode(ip
);
198 hammer_flush_inode_done(ip
);
201 * XXX this breaks atomicy
203 if (hammer_must_finalize_undo(hmp
)) {
204 Debugger("Too many undos!!");
205 hammer_flusher_finalize(&trans
);
208 hammer_flusher_finalize(&trans
);
209 hmp
->flusher_tid
= trans
.tid
;
210 hammer_done_transaction(&trans
);
214 * If the UNDO area gets over half full we have to flush it. We can't
215 * afford the UNDO area becoming completely full as that would break
216 * the crash recovery atomicy.
220 hammer_must_finalize_undo(hammer_mount_t hmp
)
222 if (hammer_undo_space(hmp
) < hammer_undo_max(hmp
) / 2) {
231 * To finalize the flush we finish flushing all undo and data buffers
232 * still present, then we update the volume header and flush it,
233 * then we flush out the mata-data (that can now be undone).
235 * Note that as long as the undo fifo's start and end points do not
236 * match, we always must at least update the volume header.
238 * The sync_lock is used by other threads to issue modifying operations
239 * to HAMMER media without crossing a synchronization boundary or messing
240 * up the media synchronization operation. Specifically, the pruning
241 * the reblocking ioctls, and allowing the frontend strategy code to
242 * allocate media data space.
246 hammer_flusher_finalize(hammer_transaction_t trans
)
248 hammer_mount_t hmp
= trans
->hmp
;
249 hammer_volume_t root_volume
= trans
->rootvol
;
250 hammer_blockmap_t rootmap
;
251 const int bmsize
= sizeof(root_volume
->ondisk
->vol0_blockmap
);
256 hammer_sync_lock_ex(trans
);
257 rootmap
= &hmp
->blockmap
[HAMMER_ZONE_UNDO_INDEX
];
260 * Sync the blockmap to the root volume ondisk buffer and generate
261 * the appropriate undo record. We have to generate the UNDO even
262 * though we flush the volume header along with the UNDO fifo update
263 * because the meta-data (including the volume header) is flushed
264 * after the fifo update, not before, and may have to be undone.
266 * No UNDOs can be created after this point until we finish the
269 if (root_volume
->io
.modified
&&
270 bcmp(hmp
->blockmap
, root_volume
->ondisk
->vol0_blockmap
, bmsize
)) {
271 hammer_modify_volume(trans
, root_volume
,
272 &root_volume
->ondisk
->vol0_blockmap
,
274 for (i
= 0; i
< HAMMER_MAX_ZONES
; ++i
)
275 hammer_crc_set_blockmap(&hmp
->blockmap
[i
]);
276 bcopy(hmp
->blockmap
, root_volume
->ondisk
->vol0_blockmap
,
278 hammer_modify_volume_done(root_volume
);
282 * Flush the undo bufs, clear the undo cache.
284 hammer_clear_undo_history(hmp
);
287 while ((io
= TAILQ_FIRST(&hmp
->undo_list
)) != NULL
) {
288 KKASSERT(io
->modify_refs
== 0);
289 hammer_ref(&io
->lock
);
290 KKASSERT(io
->type
!= HAMMER_STRUCTURE_VOLUME
);
292 hammer_rel_buffer((hammer_buffer_t
)io
, 0);
296 hkprintf("X%d", count
);
302 while ((io
= TAILQ_FIRST(&hmp
->data_list
)) != NULL
) {
303 KKASSERT(io
->modify_refs
== 0);
304 hammer_ref(&io
->lock
);
305 KKASSERT(io
->type
!= HAMMER_STRUCTURE_VOLUME
);
307 hammer_rel_buffer((hammer_buffer_t
)io
, 0);
311 hkprintf("Y%d", count
);
314 * Wait for I/O to complete
317 while (hmp
->io_running_count
)
318 tsleep(&hmp
->io_running_count
, 0, "hmrfl1", 0);
322 * Update the root volume's next_tid field. This field is updated
323 * without any related undo.
325 if (root_volume
->ondisk
->vol0_next_tid
!= hmp
->next_tid
) {
326 hammer_modify_volume(NULL
, root_volume
, NULL
, 0);
327 root_volume
->ondisk
->vol0_next_tid
= hmp
->next_tid
;
328 hammer_modify_volume_done(root_volume
);
331 if (hammer_debug_recover_faults
> 0) {
332 if (--hammer_debug_recover_faults
== 0) {
333 Debugger("hammer_debug_recover_faults");
339 * Update the UNDO FIFO's first_offset. Same deal.
341 if (rootmap
->first_offset
!= hmp
->flusher_undo_start
) {
342 hammer_modify_volume(NULL
, root_volume
, NULL
, 0);
343 rootmap
->first_offset
= hmp
->flusher_undo_start
;
344 root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
].first_offset
= rootmap
->first_offset
;
345 hammer_crc_set_blockmap(&root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
]);
346 hammer_modify_volume_done(root_volume
);
348 hmp
->flusher_undo_start
= rootmap
->next_offset
;
351 * Flush the root volume header.
353 * If a crash occurs while the root volume header is being written
354 * we just have to hope that the undo range has been updated. It
355 * should be done in one I/O but XXX this won't be perfect.
357 if (root_volume
->io
.modified
) {
358 hammer_crc_set_volume(root_volume
->ondisk
);
359 hammer_io_flush(&root_volume
->io
);
363 * Wait for I/O to complete
366 while (hmp
->io_running_count
)
367 tsleep(&hmp
->io_running_count
, 0, "hmrfl2", 0);
371 * Flush meta-data. The meta-data will be undone if we crash
372 * so we can safely flush it asynchronously.
375 while ((io
= TAILQ_FIRST(&hmp
->meta_list
)) != NULL
) {
376 KKASSERT(io
->modify_refs
== 0);
377 hammer_ref(&io
->lock
);
378 KKASSERT(io
->type
!= HAMMER_STRUCTURE_VOLUME
);
380 hammer_rel_buffer((hammer_buffer_t
)io
, 0);
383 hammer_sync_unlock(trans
);
385 hkprintf("Z%d", count
);