2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.22 2008/06/10 05:06:20 dillon Exp $
37 * HAMMER dependancy flusher thread
39 * Meta data updates create buffer dependancies which are arranged as a
45 static void hammer_flusher_thread(void *arg
);
46 static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp
);
47 static void hammer_flusher_flush(hammer_mount_t hmp
);
48 static void hammer_flusher_flush_inode(hammer_inode_t ip
,
49 hammer_transaction_t trans
);
50 static int hammer_must_finalize_undo(hammer_mount_t hmp
);
51 static void hammer_flusher_finalize(hammer_transaction_t trans
, int final
);
53 #define HAMMER_FLUSHER_IMMEDIATE 16
56 hammer_flusher_sync(hammer_mount_t hmp
)
60 if (hmp
->flusher_td
) {
61 seq
= hmp
->flusher_next
;
62 if (hmp
->flusher_signal
++ == 0)
63 wakeup(&hmp
->flusher_signal
);
64 while ((int)(seq
- hmp
->flusher_done
) > 0)
65 tsleep(&hmp
->flusher_done
, 0, "hmrfls", 0);
70 hammer_flusher_async(hammer_mount_t hmp
)
72 if (hmp
->flusher_td
) {
73 if (hmp
->flusher_signal
++ == 0)
74 wakeup(&hmp
->flusher_signal
);
79 hammer_flusher_create(hammer_mount_t hmp
)
81 hmp
->flusher_signal
= 0;
83 hmp
->flusher_done
= 0;
84 hmp
->flusher_next
= 1;
85 lwkt_create(hammer_flusher_thread
, hmp
, &hmp
->flusher_td
, NULL
,
90 hammer_flusher_destroy(hammer_mount_t hmp
)
92 if (hmp
->flusher_td
) {
93 hmp
->flusher_exiting
= 1;
94 while (hmp
->flusher_td
) {
95 ++hmp
->flusher_signal
;
96 wakeup(&hmp
->flusher_signal
);
97 tsleep(&hmp
->flusher_exiting
, 0, "hmrwex", 0);
103 hammer_flusher_thread(void *arg
)
105 hammer_mount_t hmp
= arg
;
108 while (hmp
->flusher_lock
)
109 tsleep(&hmp
->flusher_lock
, 0, "hmrhld", 0);
111 hmp
->flusher_act
= hmp
->flusher_next
;
113 hammer_flusher_clean_loose_ios(hmp
);
114 hammer_flusher_flush(hmp
);
115 hammer_flusher_clean_loose_ios(hmp
);
116 hmp
->flusher_done
= hmp
->flusher_act
;
118 wakeup(&hmp
->flusher_done
);
123 if (hmp
->flusher_exiting
&& TAILQ_EMPTY(&hmp
->flush_list
))
127 * This is a hack until we can dispose of frontend buffer
128 * cache buffers on the frontend.
130 while (hmp
->flusher_signal
== 0)
131 tsleep(&hmp
->flusher_signal
, 0, "hmrwwa", 0);
132 hmp
->flusher_signal
= 0;
134 hmp
->flusher_td
= NULL
;
135 wakeup(&hmp
->flusher_exiting
);
140 hammer_flusher_clean_loose_ios(hammer_mount_t hmp
)
142 hammer_buffer_t buffer
;
146 * loose ends - buffers without bp's aren't tracked by the kernel
147 * and can build up, so clean them out. This can occur when an
148 * IO completes on a buffer with no references left.
150 while ((io
= TAILQ_FIRST(&hmp
->lose_list
)) != NULL
) {
151 KKASSERT(io
->mod_list
== &hmp
->lose_list
);
152 TAILQ_REMOVE(io
->mod_list
, io
, mod_entry
);
154 hammer_ref(&io
->lock
);
156 hammer_rel_buffer(buffer
, 0);
161 * Flush all inodes in the current flush group.
164 hammer_flusher_flush(hammer_mount_t hmp
)
166 struct hammer_transaction trans
;
168 hammer_reserve_t resv
;
173 hammer_start_transaction_fls(&trans
, hmp
);
174 while ((ip
= TAILQ_FIRST(&hmp
->flush_list
)) != NULL
) {
175 if (ip
->flush_group
!= hmp
->flusher_act
)
177 TAILQ_REMOVE(&hmp
->flush_list
, ip
, flush_entry
);
178 hammer_flusher_flush_inode(ip
, &trans
);
180 hammer_flusher_finalize(&trans
, 1);
181 hmp
->flusher_tid
= trans
.tid
;
184 * Clean up any freed big-blocks (typically zone-2).
185 * resv->flush_group is typically set several flush groups ahead
186 * of the free to ensure that the freed block is not reused until
187 * it can no longer be reused.
189 while ((resv
= TAILQ_FIRST(&hmp
->delay_list
)) != NULL
) {
190 if (resv
->flush_group
!= hmp
->flusher_act
)
192 TAILQ_REMOVE(&hmp
->delay_list
, resv
, delay_entry
);
193 hammer_blockmap_reserve_complete(hmp
, resv
);
197 hammer_done_transaction(&trans
);
201 * Flush a single inode that is part of a flush group.
205 hammer_flusher_flush_inode(hammer_inode_t ip
, hammer_transaction_t trans
)
207 hammer_mount_t hmp
= ip
->hmp
;
209 /*hammer_lock_ex(&ip->lock);*/
210 ip
->error
= hammer_sync_inode(ip
);
211 hammer_flush_inode_done(ip
);
212 /*hammer_unlock(&ip->lock);*/
214 if (hammer_must_finalize_undo(hmp
)) {
215 kprintf("HAMMER: Warning: UNDO area too small!");
216 hammer_flusher_finalize(trans
, 1);
217 } else if (trans
->hmp
->locked_dirty_count
+
218 trans
->hmp
->io_running_count
> hammer_limit_dirtybufs
) {
220 hammer_flusher_finalize(trans
, 0);
225 * If the UNDO area gets over half full we have to flush it. We can't
226 * afford the UNDO area becoming completely full as that would break
227 * the crash recovery atomicy.
231 hammer_must_finalize_undo(hammer_mount_t hmp
)
233 if (hammer_undo_space(hmp
) < hammer_undo_max(hmp
) / 2) {
242 * Flush all pending UNDOs, wait for write completion, update the volume
243 * header with the new UNDO end position, and flush it. Then
244 * asynchronously flush the meta-data.
246 * If this is the last finalization in a flush group we also synchronize
247 * our cached blockmap and set hmp->flusher_undo_start and our cached undo
248 * fifo first_offset so the next flush resets the FIFO pointers.
252 hammer_flusher_finalize(hammer_transaction_t trans
, int final
)
254 hammer_volume_t root_volume
;
255 hammer_blockmap_t cundomap
, dundomap
;
262 root_volume
= trans
->rootvol
;
265 * Flush data buffers. This can occur asynchronously and at any
266 * time. We must interlock against the frontend direct-data write
267 * but do not have to acquire the sync-lock yet.
270 while ((io
= TAILQ_FIRST(&hmp
->data_list
)) != NULL
) {
271 hammer_ref(&io
->lock
);
272 hammer_io_write_interlock(io
);
273 KKASSERT(io
->type
!= HAMMER_STRUCTURE_VOLUME
);
275 hammer_io_done_interlock(io
);
276 hammer_rel_buffer((hammer_buffer_t
)io
, 0);
281 * The sync-lock is required for the remaining sequence. This lock
282 * prevents meta-data from being modified.
284 hammer_sync_lock_ex(trans
);
287 * If we have been asked to finalize the volume header sync the
288 * cached blockmap to the on-disk blockmap. Generate an UNDO
289 * record for the update.
292 cundomap
= &hmp
->blockmap
[0];
293 dundomap
= &root_volume
->ondisk
->vol0_blockmap
[0];
294 if (root_volume
->io
.modified
) {
295 hammer_modify_volume(trans
, root_volume
,
296 dundomap
, sizeof(hmp
->blockmap
));
297 for (i
= 0; i
< HAMMER_MAX_ZONES
; ++i
)
298 hammer_crc_set_blockmap(&cundomap
[i
]);
299 bcopy(cundomap
, dundomap
, sizeof(hmp
->blockmap
));
300 hammer_modify_volume_done(root_volume
);
308 while ((io
= TAILQ_FIRST(&hmp
->undo_list
)) != NULL
) {
309 KKASSERT(io
->modify_refs
== 0);
310 hammer_ref(&io
->lock
);
311 KKASSERT(io
->type
!= HAMMER_STRUCTURE_VOLUME
);
313 hammer_rel_buffer((hammer_buffer_t
)io
, 0);
318 * Wait for I/Os to complete
321 while (hmp
->io_running_count
)
322 tsleep(&hmp
->io_running_count
, 0, "hmrfl1", 0);
326 * Update the on-disk volume header with new UNDO FIFO end position
327 * (do not generate new UNDO records for this change). We have to
328 * do this for the UNDO FIFO whether (final) is set or not.
330 * Also update the on-disk next_tid field. This does not require
331 * an UNDO. However, because our TID is generated before we get
332 * the sync lock another sync may have beat us to the punch.
334 * The volume header will be flushed out synchronously.
336 dundomap
= &root_volume
->ondisk
->vol0_blockmap
[HAMMER_ZONE_UNDO_INDEX
];
337 cundomap
= &hmp
->blockmap
[HAMMER_ZONE_UNDO_INDEX
];
339 if (dundomap
->first_offset
!= cundomap
->first_offset
||
340 dundomap
->next_offset
!= cundomap
->next_offset
) {
341 hammer_modify_volume(NULL
, root_volume
, NULL
, 0);
342 dundomap
->first_offset
= cundomap
->first_offset
;
343 dundomap
->next_offset
= cundomap
->next_offset
;
344 hammer_crc_set_blockmap(dundomap
);
345 hammer_crc_set_volume(root_volume
->ondisk
);
346 if (root_volume
->ondisk
->vol0_next_tid
< trans
->tid
)
347 root_volume
->ondisk
->vol0_next_tid
= trans
->tid
;
348 hammer_modify_volume_done(root_volume
);
351 if (root_volume
->io
.modified
) {
352 hammer_io_flush(&root_volume
->io
);
356 * Wait for I/Os to complete
359 while (hmp
->io_running_count
)
360 tsleep(&hmp
->io_running_count
, 0, "hmrfl2", 0);
364 * Flush meta-data. The meta-data will be undone if we crash
365 * so we can safely flush it asynchronously.
367 * Repeated catchups will wind up flushing this update's meta-data
368 * and the UNDO buffers for the next update simultaniously. This
372 while ((io
= TAILQ_FIRST(&hmp
->meta_list
)) != NULL
) {
373 KKASSERT(io
->modify_refs
== 0);
374 hammer_ref(&io
->lock
);
375 KKASSERT(io
->type
!= HAMMER_STRUCTURE_VOLUME
);
377 hammer_rel_buffer((hammer_buffer_t
)io
, 0);
382 * If this is the final finalization for the flush group set
383 * up for the next sequence by setting a new first_offset in
384 * our cached blockmap and
385 * clearing the undo history.
388 cundomap
= &hmp
->blockmap
[HAMMER_ZONE_UNDO_INDEX
];
389 cundomap
->first_offset
= cundomap
->next_offset
;
390 hammer_clear_undo_history(hmp
);
393 hammer_sync_unlock(trans
);