2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.11 2008/01/01 01:00:03 dillon Exp $
37 * IO Primitives and buffer cache management
39 * All major data-tracking structures in HAMMER contain a struct hammer_io
40 * which is used to manage their backing store. We use filesystem buffers
41 * for backing store and we leave them passively associated with their
44 * If the kernel tries to release a passively associated buf which we cannot
45 * yet let go we set B_LOCKED in the buffer and then actively released it
50 #include <sys/fcntl.h>
51 #include <sys/nlookup.h>
56 * Helper routine to disassociate a buffer cache buffer from an I/O
60 hammer_io_disassociate(union hammer_io_structure
*io
)
62 struct buf
*bp
= io
->io
.bp
;
64 KKASSERT(io
->io
.released
&& io
->io
.modified
== 0);
65 LIST_INIT(&bp
->b_dep
); /* clear the association */
68 bp
->b_flags
&= ~B_LOCKED
;
71 case HAMMER_STRUCTURE_VOLUME
:
72 io
->volume
.ondisk
= NULL
;
73 io
->volume
.alist
.meta
= NULL
;
75 case HAMMER_STRUCTURE_SUPERCL
:
76 io
->supercl
.ondisk
= NULL
;
77 io
->supercl
.alist
.meta
= NULL
;
79 case HAMMER_STRUCTURE_CLUSTER
:
80 io
->cluster
.ondisk
= NULL
;
81 io
->cluster
.alist_master
.meta
= NULL
;
82 io
->cluster
.alist_btree
.meta
= NULL
;
83 io
->cluster
.alist_record
.meta
= NULL
;
84 io
->cluster
.alist_mdata
.meta
= NULL
;
86 case HAMMER_STRUCTURE_BUFFER
:
87 io
->buffer
.ondisk
= NULL
;
88 io
->buffer
.alist
.meta
= NULL
;
94 * Mark a cluster as being closed. This is done as late as possible,
95 * only when we are asked to flush the cluster
98 hammer_close_cluster(hammer_cluster_t cluster
)
100 while (cluster
->state
== HAMMER_CLUSTER_ASYNC
)
101 tsleep(cluster
, 0, "hmrdep", 0);
102 if (cluster
->state
== HAMMER_CLUSTER_OPEN
) {
103 cluster
->state
= HAMMER_CLUSTER_IDLE
;
104 hammer_modify_cluster(cluster
);
105 cluster
->ondisk
->clu_flags
&= ~HAMMER_CLUF_OPEN
;
106 hammer_modify_cluster_done(cluster
);
107 kprintf("CLOSE CLUSTER\n");
113 * Load bp for a HAMMER structure.
116 hammer_io_read(struct vnode
*devvp
, struct hammer_io
*io
)
121 if ((bp
= io
->bp
) == NULL
) {
122 error
= bread(devvp
, io
->offset
, HAMMER_BUFSIZE
, &io
->bp
);
125 bp
->b_ops
= &hammer_bioops
;
126 LIST_INSERT_HEAD(&bp
->b_dep
, &io
->worklist
, node
);
129 io
->modified
= 0; /* no new modifications yet */
130 io
->released
= 0; /* we hold an active lock on bp */
138 * Similar to hammer_io_read() but returns a zero'd out buffer instead.
139 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background
140 * I/O so we can call it.
143 hammer_io_new(struct vnode
*devvp
, struct hammer_io
*io
)
147 if ((bp
= io
->bp
) == NULL
) {
148 io
->bp
= getblk(devvp
, io
->offset
, HAMMER_BUFSIZE
, 0, 0);
150 bp
->b_ops
= &hammer_bioops
;
151 LIST_INSERT_HEAD(&bp
->b_dep
, &io
->worklist
, node
);
152 io
->released
= 0; /* we hold an active lock on bp */
167 * This routine is called when a buffer within a cluster is modified. We
168 * mark the cluster open and immediately initiate asynchronous I/O. Any
169 * related hammer_buffer write I/O blocks until our async write completes.
170 * This guarentees (inasmuch as the OS can) that the cluster recovery code
171 * will see a cluster marked open if a crash occured while the filesystem
172 * still had dirty buffers associated with that cluster.
177 hammer_io_notify_cluster(hammer_cluster_t cluster
)
179 struct hammer_io
*io
= &cluster
->io
;
181 if (cluster
->state
== HAMMER_CLUSTER_IDLE
) {
182 hammer_lock_ex(&cluster
->io
.lock
);
183 if (cluster
->state
== HAMMER_CLUSTER_IDLE
) {
188 kprintf("MARK CLUSTER OPEN\n");
189 cluster
->ondisk
->clu_flags
|= HAMMER_CLUF_OPEN
;
190 cluster
->state
= HAMMER_CLUSTER_ASYNC
;
191 cluster
->io
.modified
= 1;
194 hammer_unlock(&cluster
->io
.lock
);
199 * This routine is called on the last reference to a hammer structure.
200 * Regardless of the state io->modified must be cleared when we return.
202 * If flush is non-zero we have to completely disassociate the bp from the
203 * structure (which may involve blocking). Otherwise we can leave the bp
204 * passively associated with the structure.
206 * The caller is holding io->lock exclusively.
209 hammer_io_release(struct hammer_io
*io
, int flush
)
211 union hammer_io_structure
*iou
= (void *)io
;
212 hammer_cluster_t cluster
;
216 if ((bp
= io
->bp
) != NULL
) {
218 * If neither we nor the kernel want to flush the bp, we can
219 * stop here. Make sure the bp is passively released
220 * before returning. Even though we are still holding it,
221 * we want to be notified when the kernel wishes to flush
222 * it out so make sure B_DELWRI is properly set if we had
223 * made modifications.
225 if (flush
== 0 && (bp
->b_flags
& B_LOCKED
) == 0) {
226 if ((bp
->b_flags
& B_DELWRI
) == 0 && io
->modified
) {
233 } else if (io
->released
== 0) {
234 /* buffer write state already synchronized */
239 /* buffer write state already synchronized */
246 * Either we want to flush the buffer or the kernel tried.
248 * If this is a hammer_buffer we may have to wait for the
249 * cluster header write to complete.
251 if (iou
->io
.type
== HAMMER_STRUCTURE_BUFFER
&&
252 (io
->modified
|| (bp
->b_flags
& B_DELWRI
))) {
253 cluster
= iou
->buffer
.cluster
;
254 while (cluster
->state
== HAMMER_CLUSTER_ASYNC
)
255 tsleep(iou
->buffer
.cluster
, 0, "hmrdep", 0);
259 * If we have an open cluster header, close it
261 if (iou
->io
.type
== HAMMER_STRUCTURE_CLUSTER
) {
262 hammer_close_cluster(&iou
->cluster
);
266 * Gain ownership of the buffer. Nothing can take it away
267 * from the io structure while we have it locked, so we
270 * Once our thread owns the buffer we can disassociate it
271 * from the io structure.
277 modified
= io
->modified
;
279 hammer_io_disassociate(iou
);
282 * Now dispose of the buffer. Someone tried to flush, so
283 * issue the I/O immediately.
285 if (modified
|| (bp
->b_flags
& B_DELWRI
))
293 * Flush dirty data, if any.
296 hammer_io_flush(struct hammer_io
*io
, struct hammer_sync_info
*info
)
302 if ((bp
= io
->bp
) == NULL
)
304 if (bp
->b_flags
& B_DELWRI
)
308 * We can't initiate a write while the buffer is being modified
311 while (io
->lock
.modifying
) {
313 kprintf("DELAYING IO FLUSH BP %p TYPE %d REFS %d modifying %d\n",
314 bp
, io
->type
, io
->lock
.refs
, io
->lock
.modifying
);
315 tsleep(&io
->lock
, 0, "hmrfls", 0);
317 hammer_lock_ex(&io
->lock
);
318 if (io
->lock
.modifying
|| io
->bp
== NULL
) {
319 hammer_unlock(&io
->lock
);
324 * Acquire ownership of the buffer cache buffer so we can flush it
328 if (io
->modified
== 0)
336 * Return the bp to the system, issuing I/O if necessary. The
337 * system will issue a callback to us when it actually wants to
340 if (io
->modified
== 0) {
342 } else if (info
->waitfor
& MNT_WAIT
) {
352 hammer_unlock(&io
->lock
);
356 * Called prior to any modifications being made to ondisk data. This
357 * forces the caller to wait for any writes to complete. We explicitly
358 * avoid the write-modify race.
360 * This routine is only called on hammer structures which are already
361 * actively referenced.
364 hammer_io_intend_modify(struct hammer_io
*io
)
366 KKASSERT(io
->lock
.refs
!= 0 && io
->bp
!= NULL
);
368 hammer_lock_ex(&io
->lock
);
371 BUF_KERNPROC(io
->bp
);
374 hammer_unlock(&io
->lock
);
379 hammer_io_modify_done(struct hammer_io
*io
)
381 KKASSERT(io
->lock
.modifying
> 0);
382 --io
->lock
.modifying
;
383 if (io
->lock
.wanted
&& io
->lock
.modifying
== 0) {
394 * Pre and post I/O callbacks.
396 static void hammer_io_deallocate(struct buf
*bp
);
399 hammer_io_start(struct buf
*bp
)
402 union hammer_io_structure
*io
= (void *)LIST_FIRST(&bp
->b_dep
);
404 if (io
->io
.type
== HAMMER_STRUCTURE_BUFFER
) {
405 while (io
->buffer
.cluster
->io_in_progress
) {
406 kprintf("hammer_io_start: wait for cluster\n");
407 tsleep(io
->buffer
.cluster
, 0, "hmrdep", 0);
408 kprintf("hammer_io_start: wait for cluster done\n");
415 hammer_io_complete(struct buf
*bp
)
417 union hammer_io_structure
*io
= (void *)LIST_FIRST(&bp
->b_dep
);
419 if (io
->io
.type
== HAMMER_STRUCTURE_CLUSTER
) {
420 if (io
->cluster
.state
== HAMMER_CLUSTER_ASYNC
) {
421 io
->cluster
.state
= HAMMER_CLUSTER_OPEN
;
422 wakeup(&io
->cluster
);
428 * Callback from kernel when it wishes to deallocate a passively
429 * associated structure. This can only occur if the buffer is
430 * passively associated with the structure. The kernel has locked
433 * If we cannot disassociate we set B_LOCKED to prevent the buffer
434 * from getting reused.
437 hammer_io_deallocate(struct buf
*bp
)
439 union hammer_io_structure
*io
= (void *)LIST_FIRST(&bp
->b_dep
);
441 /* XXX memory interlock, spinlock to sync cpus */
444 * Since the kernel is passing us a locked buffer, the HAMMER
445 * structure had better not believe it has a lock on the buffer.
447 KKASSERT(io
->io
.released
);
451 * First, ref the structure to prevent either the buffer or the
452 * structure from going away or being unexpectedly flushed.
454 hammer_ref(&io
->io
.lock
);
457 * Buffers can have active references from cached hammer_node's,
458 * even if those nodes are themselves passively cached. Attempt
459 * to clean them out. This may not succeed.
461 * We have to do some magic with io.released because
462 * hammer_io_intend_modify() can be called indirectly from the
463 * flush code, otherwise we might panic with a recursive bp lock.
465 if (io
->io
.type
== HAMMER_STRUCTURE_BUFFER
&&
466 hammer_lock_ex_try(&io
->io
.lock
) == 0) {
468 hammer_flush_buffer_nodes(&io
->buffer
);
469 KKASSERT(io
->io
.released
== 0);
471 hammer_unlock(&io
->io
.lock
);
474 if (hammer_islastref(&io
->io
.lock
)) {
476 * If we are the only ref left we can disassociate the I/O.
477 * It had better still be in a released state because the
478 * kernel is holding a lock on the buffer. Any passive
479 * modifications should have already been synchronized with
482 KKASSERT(io
->io
.modified
== 0);
483 hammer_io_disassociate(io
);
486 * Perform final rights on the structure. This can cause
487 * a chain reaction - e.g. last buffer -> last cluster ->
488 * last supercluster -> last volume.
490 switch(io
->io
.type
) {
491 case HAMMER_STRUCTURE_VOLUME
:
492 hammer_rel_volume(&io
->volume
, 1);
494 case HAMMER_STRUCTURE_SUPERCL
:
495 hammer_rel_supercl(&io
->supercl
, 1);
497 case HAMMER_STRUCTURE_CLUSTER
:
498 hammer_rel_cluster(&io
->cluster
, 1);
500 case HAMMER_STRUCTURE_BUFFER
:
501 hammer_rel_buffer(&io
->buffer
, 1);
506 * Otherwise tell the kernel not to destroy the buffer.
508 * We have to unref the structure without performing any
509 * final rights to it to avoid a deadlock.
511 bp
->b_flags
|= B_LOCKED
;
512 hammer_unref(&io
->io
.lock
);
518 hammer_io_fsync(struct vnode
*vp
)
524 * NOTE: will not be called unless we tell the kernel about the
525 * bioops. Unused... we use the mount's VFS_SYNC instead.
528 hammer_io_sync(struct mount
*mp
)
534 hammer_io_movedeps(struct buf
*bp1
, struct buf
*bp2
)
539 * I/O pre-check for reading and writing. HAMMER only uses this for
540 * B_CACHE buffers so checkread just shouldn't happen, but if it does
543 * Writing is a different case. We don't want the kernel to try to write
544 * out a buffer that HAMMER may be modifying passively or which has a
547 * This code enforces the following write ordering: buffers, then cluster
548 * headers, then volume headers.
551 hammer_io_checkread(struct buf
*bp
)
557 hammer_io_checkwrite(struct buf
*bp
)
559 union hammer_io_structure
*iou
= (void *)LIST_FIRST(&bp
->b_dep
);
561 if (iou
->io
.type
== HAMMER_STRUCTURE_BUFFER
&&
562 iou
->buffer
.cluster
->state
== HAMMER_CLUSTER_ASYNC
) {
564 * Cannot write out a cluster buffer if the cluster header
565 * I/O opening the cluster has not completed.
567 bp
->b_flags
|= B_LOCKED
;
569 } else if (iou
->io
.lock
.refs
) {
571 * Cannot write out a bp if its associated buffer has active
574 bp
->b_flags
|= B_LOCKED
;
578 * We're good, but before we can let the kernel proceed we
579 * may have to make some adjustments.
581 * Since there are no refs on the io structure, HAMMER must
582 * have already synchronized its modify state with the bp
583 * so iou->io.modified should be 0.
585 if (iou
->io
.type
== HAMMER_STRUCTURE_CLUSTER
)
586 hammer_close_cluster(&iou
->cluster
);
587 hammer_io_disassociate(iou
);
593 * Return non-zero if the caller should flush the structure associated
594 * with this io sub-structure.
597 hammer_io_checkflush(struct hammer_io
*io
)
599 if (io
->bp
== NULL
|| (io
->bp
->b_flags
& B_LOCKED
))
605 * Return non-zero if we wish to delay the kernel's attempt to flush
606 * this buffer to disk.
609 hammer_io_countdeps(struct buf
*bp
, int n
)
614 struct bio_ops hammer_bioops
= {
615 .io_start
= hammer_io_start
,
616 .io_complete
= hammer_io_complete
,
617 .io_deallocate
= hammer_io_deallocate
,
618 .io_fsync
= hammer_io_fsync
,
619 .io_sync
= hammer_io_sync
,
620 .io_movedeps
= hammer_io_movedeps
,
621 .io_countdeps
= hammer_io_countdeps
,
622 .io_checkread
= hammer_io_checkread
,
623 .io_checkwrite
= hammer_io_checkwrite
,