2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used as an OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
43 * The DIOs also record temporary state with limited persistence. This
44 * feature is used to keep track of dedupable blocks.
46 static int hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
);
47 static void dio_write_stats_update(hammer2_io_t
*dio
, struct buf
*bp
);
50 hammer2_io_cmp(hammer2_io_t
*io1
, hammer2_io_t
*io2
)
52 if (io1
->pbase
< io2
->pbase
)
54 if (io1
->pbase
> io2
->pbase
)
59 RB_PROTOTYPE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
, off_t
);
60 RB_GENERATE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
,
63 struct hammer2_cleanupcb_info
{
64 struct hammer2_io_tree tmptree
;
70 hammer2_io_mask(hammer2_io_t
*dio
, hammer2_off_t off
, u_int bytes
)
75 if (bytes
< 1024) /* smaller chunks not supported */
79 * Calculate crc check mask for larger chunks
81 i
= (((off
& ~HAMMER2_OFF_MASK_RADIX
) - dio
->pbase
) &
82 HAMMER2_PBUFMASK
) >> 10;
83 if (i
== 0 && bytes
== HAMMER2_PBUFSIZE
)
85 mask
= ((uint64_t)1U << (bytes
>> 10)) - 1;
91 #define HAMMER2_GETBLK_GOOD 0
92 #define HAMMER2_GETBLK_QUEUED 1
93 #define HAMMER2_GETBLK_OWNED 2
96 * Returns the DIO corresponding to the data|radix, creating it if necessary.
98 * If createit is 0, NULL can be returned indicating that the DIO does not
99 * exist. (btype) is ignored when createit is 0.
103 hammer2_io_alloc(hammer2_dev_t
*hmp
, hammer2_key_t data_off
, uint8_t btype
,
114 psize
= HAMMER2_PBUFSIZE
;
115 pmask
= ~(hammer2_off_t
)(psize
- 1);
116 lsize
= 1 << (int)(data_off
& HAMMER2_OFF_MASK_RADIX
);
117 lbase
= data_off
& ~HAMMER2_OFF_MASK_RADIX
;
118 pbase
= lbase
& pmask
;
120 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
121 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
122 pbase
, lbase
, lsize
, pmask
);
124 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
127 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
129 hammer2_spin_sh(&hmp
->io_spin
);
130 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
132 if ((atomic_fetchadd_64(&dio
->refs
, 1) &
133 HAMMER2_DIO_MASK
) == 0) {
134 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
136 hammer2_spin_unsh(&hmp
->io_spin
);
137 } else if (createit
) {
138 hammer2_spin_unsh(&hmp
->io_spin
);
139 dio
= kmalloc(sizeof(*dio
), M_HAMMER2
, M_INTWAIT
| M_ZERO
);
146 hammer2_spin_init(&dio
->spin
, "h2dio");
147 TAILQ_INIT(&dio
->iocbq
);
148 hammer2_spin_ex(&hmp
->io_spin
);
149 xio
= RB_INSERT(hammer2_io_tree
, &hmp
->iotree
, dio
);
151 atomic_add_int(&hammer2_dio_count
, 1);
152 hammer2_spin_unex(&hmp
->io_spin
);
154 if ((atomic_fetchadd_64(&xio
->refs
, 1) &
155 HAMMER2_DIO_MASK
) == 0) {
156 atomic_add_int(&xio
->hmp
->iofree_count
, -1);
158 hammer2_spin_unex(&hmp
->io_spin
);
159 kfree(dio
, M_HAMMER2
);
163 hammer2_spin_unsh(&hmp
->io_spin
);
174 * Allocate/Locate the requested dio, reference it, issue or queue iocb.
177 hammer2_io_getblk(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
,
178 hammer2_iocb_t
*iocb
)
183 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
184 dio
= hammer2_io_alloc(hmp
, lbase
, iocb
->btype
, 1);
193 * Issue the iocb immediately if the buffer is already good.
194 * Once set GOOD cannot be cleared until refs drops to 0.
196 * lfence required because dio's are not interlocked for
199 if (refs
& HAMMER2_DIO_GOOD
) {
201 iocb
->callback(iocb
);
206 * Try to own the DIO by setting INPROG so we can issue
209 if (refs
& HAMMER2_DIO_INPROG
) {
211 * If DIO_INPROG is already set then set WAITING and
214 hammer2_spin_ex(&dio
->spin
);
215 if (atomic_cmpset_64(&dio
->refs
, refs
,
216 refs
| HAMMER2_DIO_WAITING
)) {
217 iocb
->flags
|= HAMMER2_IOCB_ONQ
|
219 TAILQ_INSERT_TAIL(&dio
->iocbq
, iocb
, entry
);
220 hammer2_spin_unex(&dio
->spin
);
223 hammer2_spin_unex(&dio
->spin
);
227 * If DIO_INPROG is not set then set it and issue the
228 * callback immediately to start I/O.
230 if (atomic_cmpset_64(&dio
->refs
, refs
,
231 refs
| HAMMER2_DIO_INPROG
)) {
232 iocb
->flags
|= HAMMER2_IOCB_INPROG
;
233 iocb
->callback(iocb
);
243 * Quickly obtain a good DIO buffer, return NULL if the system no longer
247 hammer2_io_getquick(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
, int notgood
)
254 int psize
= HAMMER2_PBUFSIZE
;
258 pmask
= ~(hammer2_off_t
)(psize
- 1);
260 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
261 lbase
&= ~HAMMER2_OFF_MASK_RADIX
;
262 pbase
= lbase
& pmask
;
263 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
264 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
265 pbase
, lbase
, lsize
, pmask
);
267 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
270 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
272 hammer2_spin_sh(&hmp
->io_spin
);
273 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
275 hammer2_spin_unsh(&hmp
->io_spin
);
279 if ((atomic_fetchadd_64(&dio
->refs
, 1) & HAMMER2_DIO_MASK
) == 0)
280 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
281 hammer2_spin_unsh(&hmp
->io_spin
);
285 ++dio
->act
; /* SMP race ok */
288 * Obtain/validate the buffer. Do NOT issue I/O. Discard if
289 * the system does not have the data already cached.
291 nrefs
= (uint64_t)-1;
297 * Issue the iocb immediately if the buffer is already good.
298 * Once set GOOD cannot be cleared until refs drops to 0.
300 * lfence required because dio is not interlockedf for
303 if (orefs
& HAMMER2_DIO_GOOD
) {
309 * Try to own the DIO by setting INPROG so we can issue
310 * I/O on it. INPROG might already be set, in which case
311 * there is no way we can do this non-blocking so we punt.
313 if ((orefs
& HAMMER2_DIO_INPROG
))
315 nrefs
= orefs
| HAMMER2_DIO_INPROG
;
316 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
) == 0)
320 * We own DIO_INPROG, try to set DIO_GOOD.
322 * If (notgood) specified caller just wants the dio and doesn't
323 * care about the buffer a whole lot. However, if the buffer
324 * is good (or dirty), we still want to return it.
326 * Otherwise we are trying to resolve a dedup and bread()
327 * is expected to always be better than building a new buffer
328 * that will be written. Use bread() for better determinism
335 bp
= getblk(hmp
->devvp
, dio
->pbase
,
338 bread(hmp
->devvp
, dio
->pbase
, dio
->psize
, &bp
);
342 * System buffer must also have remained cached.
345 if ((bp
->b_flags
& B_ERROR
) == 0 &&
346 (bp
->b_flags
& B_CACHE
)) {
347 dio
->bp
= bp
; /* assign BEFORE setting flag */
348 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
358 * This is actually a bit complicated, see
359 * hammer2_io_complete() for more information.
362 iocb
.flags
= HAMMER2_IOCB_INPROG
;
363 hammer2_io_complete(&iocb
);
368 * Only return the dio if its buffer is good. If notgood != 0,
369 * we return the buffer regardless (so ephermal dedup bits can be
372 if (notgood
== 0 && (dio
->refs
& HAMMER2_DIO_GOOD
) == 0) {
373 hammer2_io_putblk(&dio
);
379 * The originator of the iocb is finished with it.
381 * WARNING: iocb may be partially initialized with only iocb->dio and
385 hammer2_io_complete(hammer2_iocb_t
*iocb
)
387 hammer2_io_t
*dio
= iocb
->dio
;
388 hammer2_iocb_t
*cbtmp
;
395 * If IOCB_INPROG was not set completion is synchronous due to the
396 * buffer already being good. We can simply set IOCB_DONE and return.
398 * In this situation DIO_INPROG is not set and we have no visibility
399 * on dio->bp. We should not try to mess with dio->bp because another
400 * thread may be finishing up its processing. dio->bp should already
401 * be set to BUF_KERNPROC()!
403 if ((iocb
->flags
& HAMMER2_IOCB_INPROG
) == 0) {
404 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_DONE
);
409 * The iocb was queued, obtained DIO_INPROG, and its callback was
410 * made. The callback is now complete. We still own DIO_INPROG.
412 * We can set DIO_GOOD if no error occurred, which gives certain
413 * stability guarantees to dio->bp and allows other accessors to
414 * short-cut access. DIO_GOOD cannot be cleared until the last
417 KKASSERT(dio
->refs
& HAMMER2_DIO_INPROG
);
419 BUF_KERNPROC(dio
->bp
);
420 if ((dio
->bp
->b_flags
& B_ERROR
) == 0) {
421 KKASSERT(dio
->bp
->b_flags
& B_CACHE
);
422 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
427 * Clean up the dio before marking the iocb as being done. If another
428 * iocb is pending we chain to it while leaving DIO_INPROG set (it
429 * will call io completion and presumably clear DIO_INPROG).
431 * Otherwise if no other iocbs are pending we clear DIO_INPROG before
432 * finishing up the cbio. This means that DIO_INPROG is cleared at
433 * the end of the chain before ANY of the cbios are marked done.
435 * NOTE: The TAILQ is not stable until the spin-lock is held.
439 nrefs
= orefs
& ~(HAMMER2_DIO_WAITING
| HAMMER2_DIO_INPROG
);
441 if (orefs
& HAMMER2_DIO_WAITING
) {
442 hammer2_spin_ex(&dio
->spin
);
443 cbtmp
= TAILQ_FIRST(&dio
->iocbq
);
446 * NOTE: flags not adjusted in this case.
447 * Flags will be adjusted by the last
450 TAILQ_REMOVE(&dio
->iocbq
, cbtmp
, entry
);
451 hammer2_spin_unex(&dio
->spin
);
452 cbtmp
->callback(cbtmp
); /* chained */
454 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
455 hammer2_spin_unex(&dio
->spin
);
458 hammer2_spin_unex(&dio
->spin
);
460 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
467 * Mark the iocb as done and wakeup any waiters. This is done after
468 * all iocb chains have been called back and after DIO_INPROG has been
469 * cleared. This avoids races against ref count drops by the waiting
470 * threads (a hard but not impossible SMP race) which might result in
471 * a 1->0 transition of the refs while DIO_INPROG is still set.
474 oflags
= iocb
->flags
;
477 nflags
&= ~(HAMMER2_IOCB_WAKEUP
| HAMMER2_IOCB_INPROG
);
478 nflags
|= HAMMER2_IOCB_DONE
;
480 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
481 if (oflags
& HAMMER2_IOCB_WAKEUP
)
483 /* SMP: iocb is now stale */
493 * Wait for an iocb's I/O to finish.
496 hammer2_iocb_wait(hammer2_iocb_t
*iocb
)
502 oflags
= iocb
->flags
;
504 nflags
= oflags
| HAMMER2_IOCB_WAKEUP
;
505 if (oflags
& HAMMER2_IOCB_DONE
)
507 tsleep_interlock(iocb
, 0);
508 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
509 tsleep(iocb
, PINTERLOCKED
, "h2iocb", hz
);
516 * Release our ref on *diop.
518 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
519 * then dispose of the underlying buffer.
522 hammer2_io_putblk(hammer2_io_t
**diop
)
539 KKASSERT((dio
->refs
& HAMMER2_DIO_MASK
) != 0);
544 * On the 1->0 transition clear flags and set INPROG.
546 * On the 1->0 transition if INPROG is already set, another thread
547 * is in lastdrop and we can just return after the transition.
549 * On any other transition we can generally just return.
556 if ((orefs
& HAMMER2_DIO_MASK
) == 1 &&
557 (orefs
& HAMMER2_DIO_INPROG
) == 0) {
559 * Lastdrop case, INPROG can be set.
561 nrefs
&= ~(HAMMER2_DIO_GOOD
| HAMMER2_DIO_DIRTY
);
562 nrefs
|= HAMMER2_DIO_INPROG
;
563 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
565 } else if ((orefs
& HAMMER2_DIO_MASK
) == 1) {
567 * Lastdrop case, INPROG already set.
569 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
570 atomic_add_int(&hmp
->iofree_count
, 1);
577 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
585 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
586 * have been cleared. iofree_count has not yet been incremented,
587 * note that another accessor race will decrement iofree_count so
588 * we have to increment it regardless.
590 * We can now dispose of the buffer, and should do it before calling
591 * io_complete() in case there's a race against a new reference
592 * which causes io_complete() to chain and instantiate the bp again.
599 if (orefs
& HAMMER2_DIO_GOOD
) {
600 KKASSERT(bp
!= NULL
);
602 if (hammer2_inval_enable
&&
603 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
604 ++hammer2_iod_invals
;
605 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
609 if (orefs
& HAMMER2_DIO_DIRTY
) {
612 dio_write_stats_update(dio
, bp
);
613 if ((hce
= hammer2_cluster_write
) > 0) {
615 * Allows write-behind to keep the buffer
618 peof
= (pbase
+ HAMMER2_SEGMASK64
) &
620 bp
->b_flags
|= B_CLUSTEROK
;
621 cluster_write(bp
, peof
, psize
, hce
);
624 * Allows dirty buffers to accumulate and
625 * possibly be canceled (e.g. by a 'rm'),
626 * will burst-write later.
628 bp
->b_flags
|= B_CLUSTEROK
;
631 } else if (bp
->b_flags
& (B_ERROR
| B_INVAL
| B_RELBUF
)) {
638 if (hammer2_inval_enable
&&
639 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
640 ++hammer2_iod_invals
;
641 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
645 if (orefs
& HAMMER2_DIO_DIRTY
) {
646 dio_write_stats_update(dio
, bp
);
654 * The instant we call io_complete dio is a free agent again and
655 * can be ripped out from under us.
657 * we can cleanup our final DIO_INPROG by simulating an iocb
660 hmp
= dio
->hmp
; /* extract fields */
661 atomic_add_int(&hmp
->iofree_count
, 1);
665 iocb
.flags
= HAMMER2_IOCB_INPROG
;
666 hammer2_io_complete(&iocb
);
667 dio
= NULL
; /* dio stale */
670 * We cache free buffers so re-use cases can use a shared lock, but
671 * if too many build up we have to clean them out.
673 limit_dio
= hammer2_limit_dio
;
676 if (limit_dio
> 1024*1024)
677 limit_dio
= 1024*1024;
678 if (hmp
->iofree_count
> limit_dio
) {
679 struct hammer2_cleanupcb_info info
;
681 RB_INIT(&info
.tmptree
);
682 hammer2_spin_ex(&hmp
->io_spin
);
683 if (hmp
->iofree_count
> limit_dio
) {
684 info
.count
= hmp
->iofree_count
/ 5;
685 RB_SCAN(hammer2_io_tree
, &hmp
->iotree
, NULL
,
686 hammer2_io_cleanup_callback
, &info
);
688 hammer2_spin_unex(&hmp
->io_spin
);
689 hammer2_io_cleanup(hmp
, &info
.tmptree
);
694 * Cleanup any dio's with (INPROG | refs) == 0.
696 * Called to clean up cached DIOs on umount after all activity has been
701 hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
)
703 struct hammer2_cleanupcb_info
*info
= arg
;
706 if ((dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0) {
710 act
= dio
->act
- (ticks
- dio
->ticks
) / hz
- 1;
717 KKASSERT(dio
->bp
== NULL
);
718 if (info
->count
> 0) {
719 RB_REMOVE(hammer2_io_tree
, &dio
->hmp
->iotree
, dio
);
720 xio
= RB_INSERT(hammer2_io_tree
, &info
->tmptree
, dio
);
721 KKASSERT(xio
== NULL
);
729 hammer2_io_cleanup(hammer2_dev_t
*hmp
, struct hammer2_io_tree
*tree
)
733 while ((dio
= RB_ROOT(tree
)) != NULL
) {
734 RB_REMOVE(hammer2_io_tree
, tree
, dio
);
735 KKASSERT(dio
->bp
== NULL
&&
736 (dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0);
737 kfree(dio
, M_HAMMER2
);
738 atomic_add_int(&hammer2_dio_count
, -1);
739 atomic_add_int(&hmp
->iofree_count
, -1);
744 * Returns a pointer to the requested data.
747 hammer2_io_data(hammer2_io_t
*dio
, off_t lbase
)
753 KKASSERT(bp
!= NULL
);
754 off
= (lbase
& ~HAMMER2_OFF_MASK_RADIX
) - bp
->b_loffset
;
755 KKASSERT(off
>= 0 && off
< bp
->b_bufsize
);
756 return(bp
->b_data
+ off
);
761 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
762 * in the chain structure, but chain structure needs to be persistent as
763 * well on refs=0 and it isn't.
766 hammer2_io_crc_good(hammer2_chain_t
*chain
, uint64_t *maskp
)
771 if ((dio
= chain
->dio
) != NULL
&& chain
->bytes
>= 1024) {
772 mask
= hammer2_io_mask(dio
, chain
->bref
.data_off
, chain
->bytes
);
774 if ((dio
->crc_good_mask
& mask
) == mask
)
784 hammer2_io_crc_setmask(hammer2_io_t
*dio
, uint64_t mask
)
787 if (sizeof(long) == 8) {
788 atomic_set_long(&dio
->crc_good_mask
, mask
);
790 #if _BYTE_ORDER == _LITTLE_ENDIAN
791 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
793 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
794 (uint32_t)(mask
>> 32));
796 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
797 (uint32_t)(mask
>> 32));
798 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
806 hammer2_io_crc_clrmask(hammer2_io_t
*dio
, uint64_t mask
)
809 if (sizeof(long) == 8) {
810 atomic_clear_long(&dio
->crc_good_mask
, mask
);
812 #if _BYTE_ORDER == _LITTLE_ENDIAN
813 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
815 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
816 (uint32_t)(mask
>> 32));
818 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
819 (uint32_t)(mask
>> 32));
820 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
829 * Helpers for hammer2_io_new*() functions
833 hammer2_iocb_new_callback(hammer2_iocb_t
*iocb
)
835 hammer2_io_t
*dio
= iocb
->dio
;
836 int gbctl
= (iocb
->flags
& HAMMER2_IOCB_QUICK
) ? GETBLK_NOWAIT
: 0;
839 * If IOCB_INPROG is not set the dio already has a good buffer and we
840 * can't mess with it other than zero the requested range.
842 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
843 * do what needs to be done with dio->bp.
845 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
846 if ((iocb
->flags
& HAMMER2_IOCB_READ
) == 0) {
847 if (iocb
->lsize
== dio
->psize
) {
849 * Fully covered buffer, try to optimize to
850 * avoid any I/O. We might already have the
851 * buffer due to iocb chaining.
853 if (dio
->bp
== NULL
) {
854 dio
->bp
= getblk(dio
->hmp
->devvp
,
855 dio
->pbase
, dio
->psize
,
859 vfs_bio_clrbuf(dio
->bp
);
860 dio
->bp
->b_flags
|= B_CACHE
;
862 } else if (iocb
->flags
& HAMMER2_IOCB_QUICK
) {
864 * Partial buffer, quick mode. Do nothing.
865 * Do not instantiate the buffer or try to
866 * mark it B_CACHE because other portions of
867 * the buffer might have to be read by other
870 } else if (dio
->bp
== NULL
||
871 (dio
->bp
->b_flags
& B_CACHE
) == 0) {
873 * Partial buffer, normal mode, requires
874 * read-before-write. Chain the read.
876 * We might already have the buffer due to
877 * iocb chaining. XXX unclear if we really
878 * need to write/release it and reacquire
881 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
884 if (dio
->refs
& HAMMER2_DIO_DIRTY
) {
885 dio_write_stats_update(dio
,
893 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_READ
);
894 breadcb(dio
->hmp
->devvp
,
895 dio
->pbase
, dio
->psize
,
896 hammer2_io_callback
, iocb
);
898 } /* else buffer is good */
899 } /* else callback from breadcb is complete */
902 if (iocb
->flags
& HAMMER2_IOCB_ZERO
)
903 bzero(hammer2_io_data(dio
, iocb
->lbase
), iocb
->lsize
);
904 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
906 hammer2_io_complete(iocb
);
911 _hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
912 hammer2_io_t
**diop
, int flags
)
916 iocb
.callback
= hammer2_iocb_new_callback
;
924 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
925 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
926 hammer2_iocb_wait(&iocb
);
933 hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
936 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
,
937 diop
, HAMMER2_IOCB_ZERO
));
941 hammer2_io_newnz(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
944 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
, diop
, 0));
948 * This is called from the freemap to pre-validate a full-sized buffer
949 * whos contents we don't care about, in order to prevent an unnecessary
953 hammer2_io_newq(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
)
955 hammer2_io_t
*dio
= NULL
;
957 _hammer2_io_new(hmp
, btype
, lbase
, lsize
, &dio
, HAMMER2_IOCB_QUICK
);
958 hammer2_io_bqrelse(&dio
);
963 hammer2_iocb_bread_callback(hammer2_iocb_t
*iocb
)
965 hammer2_io_t
*dio
= iocb
->dio
;
970 * If IOCB_INPROG is not set the dio already has a good buffer and we
971 * can't mess with it other than zero the requested range.
973 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
974 * do what needs to be done with dio->bp.
976 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
979 if (dio
->bp
&& (dio
->bp
->b_flags
& B_CACHE
)) {
981 * Already good, likely due to being chained from
985 } else if ((hce
= hammer2_cluster_read
) > 0) {
987 * Synchronous cluster I/O for now.
993 peof
= (dio
->pbase
+ HAMMER2_SEGMASK64
) &
995 error
= cluster_read(dio
->hmp
->devvp
, peof
, dio
->pbase
,
997 dio
->psize
, HAMMER2_PBUFSIZE
*hce
,
1001 * Synchronous I/O for now.
1007 error
= bread(dio
->hmp
->devvp
, dio
->pbase
,
1008 dio
->psize
, &dio
->bp
);
1015 hammer2_io_complete(iocb
);
1019 hammer2_io_bread(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
1020 hammer2_io_t
**diop
)
1022 hammer2_iocb_t iocb
;
1024 iocb
.callback
= hammer2_iocb_bread_callback
;
1032 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
1033 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
1034 hammer2_iocb_wait(&iocb
);
1037 return (iocb
.error
);
1041 * System buf/bio async callback extracts the iocb and chains
1042 * to the iocb callback.
1045 hammer2_io_callback(struct bio
*bio
)
1047 struct buf
*dbp
= bio
->bio_buf
;
1048 hammer2_iocb_t
*iocb
= bio
->bio_caller_info1
.ptr
;
1052 if ((bio
->bio_flags
& BIO_DONE
) == 0)
1054 bio
->bio_flags
&= ~(BIO_DONE
| BIO_SYNC
);
1055 dio
->bp
= bio
->bio_buf
;
1056 iocb
->callback(iocb
);
1060 hammer2_io_bawrite(hammer2_io_t
**diop
)
1062 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1063 hammer2_io_putblk(diop
);
1067 hammer2_io_bdwrite(hammer2_io_t
**diop
)
1069 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1070 hammer2_io_putblk(diop
);
1074 hammer2_io_bwrite(hammer2_io_t
**diop
)
1076 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1077 hammer2_io_putblk(diop
);
1078 return (0); /* XXX */
1082 hammer2_io_setdirty(hammer2_io_t
*dio
)
1084 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
1088 * This routine is called when a MODIFIED chain is being DESTROYED,
1089 * in an attempt to allow the related buffer cache buffer to be
1090 * invalidated and discarded instead of flushing it to disk.
1092 * At the moment this case is only really useful for file meta-data.
1093 * File data is already handled via the logical buffer cache associated
1094 * with the vnode, and will be discarded if it was never flushed to disk.
1095 * File meta-data may include inodes, directory entries, and indirect blocks.
1098 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
1099 * invalidated might be smaller. Most of the meta-data structures above
1100 * are in the 'smaller' category. For now, don't try to invalidate the
1104 hammer2_io_inval(hammer2_io_t
*dio
, hammer2_off_t data_off
, u_int bytes
)
1110 hammer2_io_brelse(hammer2_io_t
**diop
)
1112 hammer2_io_putblk(diop
);
1116 hammer2_io_bqrelse(hammer2_io_t
**diop
)
1118 hammer2_io_putblk(diop
);
1122 hammer2_io_isdirty(hammer2_io_t
*dio
)
1124 return((dio
->refs
& HAMMER2_DIO_DIRTY
) != 0);
1128 * Set dedup validation bits in a DIO. We do not need the buffer cache
1129 * buffer for this. This must be done concurrent with setting bits in
1130 * the freemap so as to interlock with bulkfree's clearing of those bits.
1133 hammer2_io_dedup_set(hammer2_dev_t
*hmp
, hammer2_blockref_t
*bref
)
1138 dio
= hammer2_io_alloc(hmp
, bref
->data_off
, bref
->type
, 1);
1139 lsize
= 1 << (int)(bref
->data_off
& HAMMER2_OFF_MASK_RADIX
);
1140 atomic_set_64(&dio
->dedup_ok_mask
,
1141 hammer2_dedup_mask(dio
, bref
->data_off
, lsize
));
1142 hammer2_io_putblk(&dio
);
1146 * Clear dedup validation bits in a DIO. This is typically done when
1147 * a modified chain is destroyed or by the bulkfree code. No buffer
1148 * is needed for this operation. If the DIO no longer exists it is
1149 * equivalent to the bits not being set.
1152 hammer2_io_dedup_delete(hammer2_dev_t
*hmp
, uint8_t btype
,
1153 hammer2_off_t data_off
, u_int bytes
)
1157 if ((data_off
& ~HAMMER2_OFF_MASK_RADIX
) == 0)
1159 if (btype
!= HAMMER2_BREF_TYPE_DATA
)
1161 dio
= hammer2_io_alloc(hmp
, data_off
, btype
, 0);
1163 if (data_off
< dio
->pbase
||
1164 (data_off
& ~HAMMER2_OFF_MASK_RADIX
) + bytes
>
1165 dio
->pbase
+ dio
->psize
) {
1166 panic("hammer2_dedup_delete: DATAOFF BAD "
1167 "%016jx/%d %016jx\n",
1168 data_off
, bytes
, dio
->pbase
);
1170 atomic_clear_64(&dio
->dedup_ok_mask
,
1171 hammer2_dedup_mask(dio
, data_off
, bytes
));
1172 hammer2_io_putblk(&dio
);
1177 * Assert that dedup validation bits in a DIO are not set. This operation
1178 * does not require a buffer. The DIO does not need to exist.
1181 hammer2_io_dedup_assert(hammer2_dev_t
*hmp
, hammer2_off_t data_off
, u_int bytes
)
1185 dio
= hammer2_io_alloc(hmp
, data_off
, HAMMER2_BREF_TYPE_DATA
, 0);
1187 KASSERT((dio
->dedup_ok_mask
&
1188 hammer2_dedup_mask(dio
, data_off
, bytes
)) == 0,
1189 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
1192 hammer2_dedup_mask(dio
, data_off
, bytes
),
1193 dio
->dedup_ok_mask
));
1194 hammer2_io_putblk(&dio
);
1200 dio_write_stats_update(hammer2_io_t
*dio
, struct buf
*bp
)
1204 if (bp
->b_flags
& B_DELWRI
)
1207 switch(dio
->btype
) {
1210 case HAMMER2_BREF_TYPE_DATA
:
1211 counterp
= &hammer2_iod_file_write
;
1213 case HAMMER2_BREF_TYPE_DIRENT
:
1214 case HAMMER2_BREF_TYPE_INODE
:
1215 counterp
= &hammer2_iod_meta_write
;
1217 case HAMMER2_BREF_TYPE_INDIRECT
:
1218 counterp
= &hammer2_iod_indr_write
;
1220 case HAMMER2_BREF_TYPE_FREEMAP_NODE
:
1221 case HAMMER2_BREF_TYPE_FREEMAP_LEAF
:
1222 counterp
= &hammer2_iod_fmap_write
;
1225 counterp
= &hammer2_iod_volu_write
;
1228 *counterp
+= dio
->psize
;