HAMMER 53E/Many: Performance tuning
[dragonfly.git] / sys / vfs / hammer / hammer_flusher.c
blobbdfd2bd7777ba2c475bed9603e82ae34bf1e86e4
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.22 2008/06/10 05:06:20 dillon Exp $
37 * HAMMER dependancy flusher thread
39 * Meta data updates create buffer dependancies which are arranged as a
40 * hierarchy of lists.
43 #include "hammer.h"
45 static void hammer_flusher_thread(void *arg);
46 static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp);
47 static void hammer_flusher_flush(hammer_mount_t hmp);
48 static void hammer_flusher_flush_inode(hammer_inode_t ip,
49 hammer_transaction_t trans);
50 static int hammer_must_finalize_undo(hammer_mount_t hmp);
51 static void hammer_flusher_finalize(hammer_transaction_t trans, int final);
53 #define HAMMER_FLUSHER_IMMEDIATE 16
55 void
56 hammer_flusher_sync(hammer_mount_t hmp)
58 int seq;
60 if (hmp->flusher_td) {
61 seq = hmp->flusher_next;
62 if (hmp->flusher_signal++ == 0)
63 wakeup(&hmp->flusher_signal);
64 while ((int)(seq - hmp->flusher_done) > 0)
65 tsleep(&hmp->flusher_done, 0, "hmrfls", 0);
69 void
70 hammer_flusher_async(hammer_mount_t hmp)
72 if (hmp->flusher_td) {
73 if (hmp->flusher_signal++ == 0)
74 wakeup(&hmp->flusher_signal);
78 void
79 hammer_flusher_create(hammer_mount_t hmp)
81 hmp->flusher_signal = 0;
82 hmp->flusher_act = 0;
83 hmp->flusher_done = 0;
84 hmp->flusher_next = 1;
85 lwkt_create(hammer_flusher_thread, hmp, &hmp->flusher_td, NULL,
86 0, -1, "hammer");
89 void
90 hammer_flusher_destroy(hammer_mount_t hmp)
92 if (hmp->flusher_td) {
93 hmp->flusher_exiting = 1;
94 while (hmp->flusher_td) {
95 ++hmp->flusher_signal;
96 wakeup(&hmp->flusher_signal);
97 tsleep(&hmp->flusher_exiting, 0, "hmrwex", 0);
102 static void
103 hammer_flusher_thread(void *arg)
105 hammer_mount_t hmp = arg;
107 for (;;) {
108 while (hmp->flusher_lock)
109 tsleep(&hmp->flusher_lock, 0, "hmrhld", 0);
110 kprintf("S");
111 hmp->flusher_act = hmp->flusher_next;
112 ++hmp->flusher_next;
113 hammer_flusher_clean_loose_ios(hmp);
114 hammer_flusher_flush(hmp);
115 hammer_flusher_clean_loose_ios(hmp);
116 hmp->flusher_done = hmp->flusher_act;
118 wakeup(&hmp->flusher_done);
121 * Wait for activity.
123 if (hmp->flusher_exiting && TAILQ_EMPTY(&hmp->flush_list))
124 break;
127 * This is a hack until we can dispose of frontend buffer
128 * cache buffers on the frontend.
130 while (hmp->flusher_signal == 0)
131 tsleep(&hmp->flusher_signal, 0, "hmrwwa", 0);
132 hmp->flusher_signal = 0;
134 hmp->flusher_td = NULL;
135 wakeup(&hmp->flusher_exiting);
136 lwkt_exit();
139 static void
140 hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
142 hammer_buffer_t buffer;
143 hammer_io_t io;
146 * loose ends - buffers without bp's aren't tracked by the kernel
147 * and can build up, so clean them out. This can occur when an
148 * IO completes on a buffer with no references left.
150 while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
151 KKASSERT(io->mod_list == &hmp->lose_list);
152 TAILQ_REMOVE(io->mod_list, io, mod_entry);
153 io->mod_list = NULL;
154 hammer_ref(&io->lock);
155 buffer = (void *)io;
156 hammer_rel_buffer(buffer, 0);
161 * Flush all inodes in the current flush group.
163 static void
164 hammer_flusher_flush(hammer_mount_t hmp)
166 struct hammer_transaction trans;
167 hammer_inode_t ip;
168 hammer_reserve_t resv;
171 * Flush the inodes
173 hammer_start_transaction_fls(&trans, hmp);
174 while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
175 if (ip->flush_group != hmp->flusher_act)
176 break;
177 TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry);
178 hammer_flusher_flush_inode(ip, &trans);
180 hammer_flusher_finalize(&trans, 1);
181 hmp->flusher_tid = trans.tid;
184 * Clean up any freed big-blocks (typically zone-2).
185 * resv->flush_group is typically set several flush groups ahead
186 * of the free to ensure that the freed block is not reused until
187 * it can no longer be reused.
189 while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) {
190 if (resv->flush_group != hmp->flusher_act)
191 break;
192 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
193 hammer_blockmap_reserve_complete(hmp, resv);
197 hammer_done_transaction(&trans);
201 * Flush a single inode that is part of a flush group.
203 static
204 void
205 hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
207 hammer_mount_t hmp = ip->hmp;
209 /*hammer_lock_ex(&ip->lock);*/
210 ip->error = hammer_sync_inode(ip);
211 hammer_flush_inode_done(ip);
212 /*hammer_unlock(&ip->lock);*/
214 if (hammer_must_finalize_undo(hmp)) {
215 kprintf("HAMMER: Warning: UNDO area too small!");
216 hammer_flusher_finalize(trans, 1);
217 } else if (trans->hmp->locked_dirty_count +
218 trans->hmp->io_running_count > hammer_limit_dirtybufs) {
219 kprintf("t");
220 hammer_flusher_finalize(trans, 0);
225 * If the UNDO area gets over half full we have to flush it. We can't
226 * afford the UNDO area becoming completely full as that would break
227 * the crash recovery atomicy.
229 static
231 hammer_must_finalize_undo(hammer_mount_t hmp)
233 if (hammer_undo_space(hmp) < hammer_undo_max(hmp) / 2) {
234 hkprintf("*");
235 return(1);
236 } else {
237 return(0);
242 * Flush all pending UNDOs, wait for write completion, update the volume
243 * header with the new UNDO end position, and flush it. Then
244 * asynchronously flush the meta-data.
246 * If this is the last finalization in a flush group we also synchronize
247 * our cached blockmap and set hmp->flusher_undo_start and our cached undo
248 * fifo first_offset so the next flush resets the FIFO pointers.
250 static
251 void
252 hammer_flusher_finalize(hammer_transaction_t trans, int final)
254 hammer_volume_t root_volume;
255 hammer_blockmap_t cundomap, dundomap;
256 hammer_mount_t hmp;
257 hammer_io_t io;
258 int count;
259 int i;
261 hmp = trans->hmp;
262 root_volume = trans->rootvol;
265 * Flush data buffers. This can occur asynchronously and at any
266 * time. We must interlock against the frontend direct-data write
267 * but do not have to acquire the sync-lock yet.
269 count = 0;
270 while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
271 hammer_ref(&io->lock);
272 hammer_io_write_interlock(io);
273 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
274 hammer_io_flush(io);
275 hammer_io_done_interlock(io);
276 hammer_rel_buffer((hammer_buffer_t)io, 0);
277 ++count;
281 * The sync-lock is required for the remaining sequence. This lock
282 * prevents meta-data from being modified.
284 hammer_sync_lock_ex(trans);
287 * If we have been asked to finalize the volume header sync the
288 * cached blockmap to the on-disk blockmap. Generate an UNDO
289 * record for the update.
291 if (final) {
292 cundomap = &hmp->blockmap[0];
293 dundomap = &root_volume->ondisk->vol0_blockmap[0];
294 if (root_volume->io.modified) {
295 hammer_modify_volume(trans, root_volume,
296 dundomap, sizeof(hmp->blockmap));
297 for (i = 0; i < HAMMER_MAX_ZONES; ++i)
298 hammer_crc_set_blockmap(&cundomap[i]);
299 bcopy(cundomap, dundomap, sizeof(hmp->blockmap));
300 hammer_modify_volume_done(root_volume);
305 * Flush UNDOs
307 count = 0;
308 while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
309 KKASSERT(io->modify_refs == 0);
310 hammer_ref(&io->lock);
311 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
312 hammer_io_flush(io);
313 hammer_rel_buffer((hammer_buffer_t)io, 0);
314 ++count;
318 * Wait for I/Os to complete
320 crit_enter();
321 while (hmp->io_running_count)
322 tsleep(&hmp->io_running_count, 0, "hmrfl1", 0);
323 crit_exit();
326 * Update the on-disk volume header with new UNDO FIFO end position
327 * (do not generate new UNDO records for this change). We have to
328 * do this for the UNDO FIFO whether (final) is set or not.
330 * Also update the on-disk next_tid field. This does not require
331 * an UNDO. However, because our TID is generated before we get
332 * the sync lock another sync may have beat us to the punch.
334 * The volume header will be flushed out synchronously.
336 dundomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
337 cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
339 if (dundomap->first_offset != cundomap->first_offset ||
340 dundomap->next_offset != cundomap->next_offset) {
341 hammer_modify_volume(NULL, root_volume, NULL, 0);
342 dundomap->first_offset = cundomap->first_offset;
343 dundomap->next_offset = cundomap->next_offset;
344 hammer_crc_set_blockmap(dundomap);
345 hammer_crc_set_volume(root_volume->ondisk);
346 if (root_volume->ondisk->vol0_next_tid < trans->tid)
347 root_volume->ondisk->vol0_next_tid = trans->tid;
348 hammer_modify_volume_done(root_volume);
351 if (root_volume->io.modified) {
352 hammer_io_flush(&root_volume->io);
356 * Wait for I/Os to complete
358 crit_enter();
359 while (hmp->io_running_count)
360 tsleep(&hmp->io_running_count, 0, "hmrfl2", 0);
361 crit_exit();
364 * Flush meta-data. The meta-data will be undone if we crash
365 * so we can safely flush it asynchronously.
367 * Repeated catchups will wind up flushing this update's meta-data
368 * and the UNDO buffers for the next update simultaniously. This
369 * is ok.
371 count = 0;
372 while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
373 KKASSERT(io->modify_refs == 0);
374 hammer_ref(&io->lock);
375 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
376 hammer_io_flush(io);
377 hammer_rel_buffer((hammer_buffer_t)io, 0);
378 ++count;
382 * If this is the final finalization for the flush group set
383 * up for the next sequence by setting a new first_offset in
384 * our cached blockmap and
385 * clearing the undo history.
387 if (final) {
388 cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
389 cundomap->first_offset = cundomap->next_offset;
390 hammer_clear_undo_history(hmp);
393 hammer_sync_unlock(trans);