2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used for OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
44 static int hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
);
45 static void dio_write_stats_update(hammer2_io_t
*dio
);
48 hammer2_io_cmp(hammer2_io_t
*io1
, hammer2_io_t
*io2
)
50 if (io1
->pbase
< io2
->pbase
)
52 if (io1
->pbase
> io2
->pbase
)
57 RB_PROTOTYPE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
, off_t
);
58 RB_GENERATE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
,
61 struct hammer2_cleanupcb_info
{
62 struct hammer2_io_tree tmptree
;
68 hammer2_io_mask(hammer2_io_t
*dio
, hammer2_off_t off
, u_int bytes
)
73 if (bytes
< 1024) /* smaller chunks not supported */
77 * Calculate crc check mask for larger chunks
79 i
= (((off
& ~HAMMER2_OFF_MASK_RADIX
) - dio
->pbase
) &
80 HAMMER2_PBUFMASK
) >> 10;
81 if (i
== 0 && bytes
== HAMMER2_PBUFSIZE
)
83 mask
= ((uint64_t)1U << (bytes
>> 10)) - 1;
89 #define HAMMER2_GETBLK_GOOD 0
90 #define HAMMER2_GETBLK_QUEUED 1
91 #define HAMMER2_GETBLK_OWNED 2
94 * Allocate/Locate the requested dio, reference it, issue or queue iocb.
97 hammer2_io_getblk(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
,
105 * XXX after free, buffer reuse case w/ different size can clash
106 * with dio cache. Lets avoid it for now. Ultimate we need to
107 * invalidate the dio cache when freeing blocks to allow a mix
108 * of 16KB and 64KB block sizes).
110 /*int psize = hammer2_devblksize(lsize);*/
111 int psize
= HAMMER2_PBUFSIZE
;
114 pmask
= ~(hammer2_off_t
)(psize
- 1);
116 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
117 lbase
&= ~HAMMER2_OFF_MASK_RADIX
;
118 pbase
= lbase
& pmask
;
119 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
120 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
121 pbase
, lbase
, lsize
, pmask
);
123 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
126 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
128 hammer2_spin_sh(&hmp
->io_spin
);
129 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
131 if ((atomic_fetchadd_64(&dio
->refs
, 1) &
132 HAMMER2_DIO_MASK
) == 0) {
133 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
135 hammer2_spin_unsh(&hmp
->io_spin
);
137 hammer2_spin_unsh(&hmp
->io_spin
);
138 dio
= kmalloc(sizeof(*dio
), M_HAMMER2
, M_INTWAIT
| M_ZERO
);
142 dio
->btype
= iocb
->btype
;
144 hammer2_spin_init(&dio
->spin
, "h2dio");
145 TAILQ_INIT(&dio
->iocbq
);
146 hammer2_spin_ex(&hmp
->io_spin
);
147 xio
= RB_INSERT(hammer2_io_tree
, &hmp
->iotree
, dio
);
149 atomic_add_int(&hammer2_dio_count
, 1);
150 hammer2_spin_unex(&hmp
->io_spin
);
152 if ((atomic_fetchadd_64(&xio
->refs
, 1) &
153 HAMMER2_DIO_MASK
) == 0) {
154 atomic_add_int(&xio
->hmp
->iofree_count
, -1);
156 hammer2_spin_unex(&hmp
->io_spin
);
157 kfree(dio
, M_HAMMER2
);
163 * Obtain/Validate the buffer.
167 if (dio
->act
< 5) /* SMP race ok */
175 * Issue the iocb immediately if the buffer is already good.
176 * Once set GOOD cannot be cleared until refs drops to 0.
178 * lfence required because dio's are not interlocked for
181 if (refs
& HAMMER2_DIO_GOOD
) {
183 iocb
->callback(iocb
);
188 * Try to own the DIO by setting INPROG so we can issue
191 if (refs
& HAMMER2_DIO_INPROG
) {
193 * If DIO_INPROG is already set then set WAITING and
196 hammer2_spin_ex(&dio
->spin
);
197 if (atomic_cmpset_64(&dio
->refs
, refs
,
198 refs
| HAMMER2_DIO_WAITING
)) {
199 iocb
->flags
|= HAMMER2_IOCB_ONQ
|
201 TAILQ_INSERT_TAIL(&dio
->iocbq
, iocb
, entry
);
202 hammer2_spin_unex(&dio
->spin
);
205 hammer2_spin_unex(&dio
->spin
);
209 * If DIO_INPROG is not set then set it and issue the
210 * callback immediately to start I/O.
212 if (atomic_cmpset_64(&dio
->refs
, refs
,
213 refs
| HAMMER2_DIO_INPROG
)) {
214 iocb
->flags
|= HAMMER2_IOCB_INPROG
;
215 iocb
->callback(iocb
);
225 * Quickly obtain a good DIO buffer, return NULL if the system no longer
229 hammer2_io_getquick(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
)
236 int psize
= HAMMER2_PBUFSIZE
;
240 pmask
= ~(hammer2_off_t
)(psize
- 1);
242 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
243 lbase
&= ~HAMMER2_OFF_MASK_RADIX
;
244 pbase
= lbase
& pmask
;
245 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
246 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
247 pbase
, lbase
, lsize
, pmask
);
249 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
252 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
254 hammer2_spin_sh(&hmp
->io_spin
);
255 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
257 hammer2_spin_unsh(&hmp
->io_spin
);
261 if ((atomic_fetchadd_64(&dio
->refs
, 1) & HAMMER2_DIO_MASK
) == 0)
262 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
263 hammer2_spin_unsh(&hmp
->io_spin
);
265 if (dio
->act
< 5) /* SMP race ok */
269 * Obtain/validate the buffer. Do NOT issue I/O. Discard if
270 * the system does not have the data already cached.
272 nrefs
= (uint64_t)-1;
278 * Issue the iocb immediately if the buffer is already good.
279 * Once set GOOD cannot be cleared until refs drops to 0.
281 * lfence required because dio is not interlockedf for
284 if (orefs
& HAMMER2_DIO_GOOD
) {
290 * Try to own the DIO by setting INPROG so we can issue
291 * I/O on it. INPROG might already be set, in which case
292 * there is no way we can do this non-blocking so we punt.
294 if ((orefs
& HAMMER2_DIO_INPROG
))
296 nrefs
= orefs
| HAMMER2_DIO_INPROG
;
297 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
) == 0)
301 * We own DIO_INPROG, try to set DIO_GOOD.
303 * For now do not use GETBLK_NOWAIT because
309 bp
= getblk(hmp
->devvp
, dio
->pbase
, dio
->psize
, 0, 0);
311 bread(hmp
->devvp
, dio
->pbase
, dio
->psize
, &bp
);
315 * System buffer must also have remained cached.
318 if ((bp
->b_flags
& B_ERROR
) == 0 &&
319 (bp
->b_flags
& B_CACHE
)) {
320 dio
->bp
= bp
; /* assign BEFORE setting flag */
321 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
331 * This is actually a bit complicated, see
332 * hammer2_io_complete() for more information.
335 iocb
.flags
= HAMMER2_IOCB_INPROG
;
336 hammer2_io_complete(&iocb
);
341 * Only return the dio if its buffer is good. If the buffer is not
342 * good be sure to clear INVALOK, meaning that invalidation is no
345 if ((dio
->refs
& HAMMER2_DIO_GOOD
) == 0) {
346 hammer2_io_putblk(&dio
);
352 * Make sure that all invalidation flags are cleared on the dio associated
353 * with the specified data offset, if the dio exists.
355 * Called from bulkfree when a block becomes reusable to ensure that new
356 * allocations do not accidently discard the buffer later on.
359 hammer2_io_resetinval(hammer2_dev_t
*hmp
, off_t data_off
)
363 data_off
&= ~HAMMER2_PBUFMASK64
;
364 hammer2_spin_sh(&hmp
->io_spin
);
365 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, data_off
);
367 atomic_clear_64(&dio
->refs
, HAMMER2_DIO_INVALBITS
);
368 hammer2_spin_unsh(&hmp
->io_spin
);
372 * The originator of the iocb is finished with it.
375 hammer2_io_complete(hammer2_iocb_t
*iocb
)
377 hammer2_io_t
*dio
= iocb
->dio
;
378 hammer2_iocb_t
*cbtmp
;
385 * If IOCB_INPROG was not set completion is synchronous due to the
386 * buffer already being good. We can simply set IOCB_DONE and return.
388 * In this situation DIO_INPROG is not set and we have no visibility
389 * on dio->bp. We should not try to mess with dio->bp because another
390 * thread may be finishing up its processing. dio->bp should already
391 * be set to BUF_KERNPROC()!
393 if ((iocb
->flags
& HAMMER2_IOCB_INPROG
) == 0) {
394 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_DONE
);
399 * The iocb was queued, obtained DIO_INPROG, and its callback was
400 * made. The callback is now complete. We still own DIO_INPROG.
402 * We can set DIO_GOOD if no error occurred, which gives certain
403 * stability guarantees to dio->bp and allows other accessors to
404 * short-cut access. DIO_GOOD cannot be cleared until the last
407 KKASSERT(dio
->refs
& HAMMER2_DIO_INPROG
);
409 BUF_KERNPROC(dio
->bp
);
410 if ((dio
->bp
->b_flags
& B_ERROR
) == 0) {
411 KKASSERT(dio
->bp
->b_flags
& B_CACHE
);
412 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
417 * Clean up the dio before marking the iocb as being done. If another
418 * iocb is pending we chain to it while leaving DIO_INPROG set (it
419 * will call io completion and presumably clear DIO_INPROG).
421 * Otherwise if no other iocbs are pending we clear DIO_INPROG before
422 * finishing up the cbio. This means that DIO_INPROG is cleared at
423 * the end of the chain before ANY of the cbios are marked done.
425 * NOTE: The TAILQ is not stable until the spin-lock is held.
429 nrefs
= orefs
& ~(HAMMER2_DIO_WAITING
| HAMMER2_DIO_INPROG
);
431 if (orefs
& HAMMER2_DIO_WAITING
) {
432 hammer2_spin_ex(&dio
->spin
);
433 cbtmp
= TAILQ_FIRST(&dio
->iocbq
);
436 * NOTE: flags not adjusted in this case.
437 * Flags will be adjusted by the last
440 TAILQ_REMOVE(&dio
->iocbq
, cbtmp
, entry
);
441 hammer2_spin_unex(&dio
->spin
);
442 cbtmp
->callback(cbtmp
); /* chained */
444 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
445 hammer2_spin_unex(&dio
->spin
);
448 hammer2_spin_unex(&dio
->spin
);
450 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
457 * Mark the iocb as done and wakeup any waiters. This is done after
458 * all iocb chains have been called back and after DIO_INPROG has been
459 * cleared. This avoids races against ref count drops by the waiting
460 * threads (a hard but not impossible SMP race) which might result in
461 * a 1->0 transition of the refs while DIO_INPROG is still set.
464 oflags
= iocb
->flags
;
467 nflags
&= ~(HAMMER2_IOCB_WAKEUP
| HAMMER2_IOCB_INPROG
);
468 nflags
|= HAMMER2_IOCB_DONE
;
470 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
471 if (oflags
& HAMMER2_IOCB_WAKEUP
)
473 /* SMP: iocb is now stale */
483 * Wait for an iocb's I/O to finish.
486 hammer2_iocb_wait(hammer2_iocb_t
*iocb
)
492 oflags
= iocb
->flags
;
494 nflags
= oflags
| HAMMER2_IOCB_WAKEUP
;
495 if (oflags
& HAMMER2_IOCB_DONE
)
497 tsleep_interlock(iocb
, 0);
498 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
499 tsleep(iocb
, PINTERLOCKED
, "h2iocb", hz
);
506 * Release our ref on *diop.
508 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
509 * then dispose of the underlying buffer.
512 hammer2_io_putblk(hammer2_io_t
**diop
)
528 while (dio
->unused01
) {
529 tsleep(&dio
->unused01
, 0, "h2DEBUG", hz
);
535 * On the 1->0 transition clear flags and set INPROG.
537 * On the 1->0 transition if INPROG is already set, another thread
538 * is in lastdrop and we can just return after the transition.
540 * On any other transition we can generally just return.
547 if ((orefs
& HAMMER2_DIO_MASK
) == 1 &&
548 (orefs
& HAMMER2_DIO_INPROG
) == 0) {
550 * Lastdrop case, INPROG can be set.
552 nrefs
&= ~(HAMMER2_DIO_GOOD
| HAMMER2_DIO_DIRTY
);
553 nrefs
&= ~(HAMMER2_DIO_INVAL
);
554 nrefs
|= HAMMER2_DIO_INPROG
;
555 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
557 } else if ((orefs
& HAMMER2_DIO_MASK
) == 1) {
559 * Lastdrop case, INPROG already set.
561 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
562 atomic_add_int(&hmp
->iofree_count
, 1);
569 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
577 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
580 * We can now dispose of the buffer, and should do it before calling
581 * io_complete() in case there's a race against a new reference
582 * which causes io_complete() to chain and instantiate the bp again.
589 if (orefs
& HAMMER2_DIO_GOOD
) {
590 KKASSERT(bp
!= NULL
);
592 if (hammer2_inval_enable
&&
593 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
594 ++hammer2_iod_invals
;
595 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
599 if (orefs
& HAMMER2_DIO_DIRTY
) {
602 dio_write_stats_update(dio
);
603 if ((hce
= hammer2_cluster_write
) > 0) {
605 * Allows write-behind to keep the buffer
608 peof
= (pbase
+ HAMMER2_SEGMASK64
) &
610 bp
->b_flags
|= B_CLUSTEROK
;
611 cluster_write(bp
, peof
, psize
, hce
);
614 * Allows dirty buffers to accumulate and
615 * possibly be canceled (e.g. by a 'rm'),
616 * will burst-write later.
618 bp
->b_flags
|= B_CLUSTEROK
;
621 } else if (bp
->b_flags
& (B_ERROR
| B_INVAL
| B_RELBUF
)) {
628 if (hammer2_inval_enable
&&
629 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
630 ++hammer2_iod_invals
;
631 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
635 if (orefs
& HAMMER2_DIO_DIRTY
) {
636 dio_write_stats_update(dio
);
644 * The instant we call io_complete dio is a free agent again and
645 * can be ripped out from under us.
647 * we can cleanup our final DIO_INPROG by simulating an iocb
650 hmp
= dio
->hmp
; /* extract fields */
651 atomic_add_int(&hmp
->iofree_count
, 1);
655 iocb
.flags
= HAMMER2_IOCB_INPROG
;
656 hammer2_io_complete(&iocb
);
657 dio
= NULL
; /* dio stale */
660 * We cache free buffers so re-use cases can use a shared lock, but
661 * if too many build up we have to clean them out.
663 if (hmp
->iofree_count
> 65536) {
664 struct hammer2_cleanupcb_info info
;
666 RB_INIT(&info
.tmptree
);
667 hammer2_spin_ex(&hmp
->io_spin
);
668 if (hmp
->iofree_count
> 65536) {
669 info
.count
= hmp
->iofree_count
/ 4;
670 RB_SCAN(hammer2_io_tree
, &hmp
->iotree
, NULL
,
671 hammer2_io_cleanup_callback
, &info
);
673 hammer2_spin_unex(&hmp
->io_spin
);
674 hammer2_io_cleanup(hmp
, &info
.tmptree
);
679 * Cleanup any dio's with (INPROG | refs) == 0.
681 * Called to clean up cached DIOs on umount after all activity has been
686 hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
)
688 struct hammer2_cleanupcb_info
*info
= arg
;
691 if ((dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0) {
696 KKASSERT(dio
->bp
== NULL
);
697 RB_REMOVE(hammer2_io_tree
, &dio
->hmp
->iotree
, dio
);
698 xio
= RB_INSERT(hammer2_io_tree
, &info
->tmptree
, dio
);
699 KKASSERT(xio
== NULL
);
700 if (--info
->count
<= 0) /* limit scan */
707 hammer2_io_cleanup(hammer2_dev_t
*hmp
, struct hammer2_io_tree
*tree
)
711 while ((dio
= RB_ROOT(tree
)) != NULL
) {
712 RB_REMOVE(hammer2_io_tree
, tree
, dio
);
713 KKASSERT(dio
->bp
== NULL
&&
714 (dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0);
715 kfree(dio
, M_HAMMER2
);
716 atomic_add_int(&hammer2_dio_count
, -1);
717 atomic_add_int(&hmp
->iofree_count
, -1);
722 * Returns a pointer to the requested data.
725 hammer2_io_data(hammer2_io_t
*dio
, off_t lbase
)
731 KKASSERT(bp
!= NULL
);
732 off
= (lbase
& ~HAMMER2_OFF_MASK_RADIX
) - bp
->b_loffset
;
733 KKASSERT(off
>= 0 && off
< bp
->b_bufsize
);
734 return(bp
->b_data
+ off
);
739 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
740 * in the chain structure, but chain structure needs to be persistent as
741 * well on refs=0 and it isn't.
744 hammer2_io_crc_good(hammer2_chain_t
*chain
, uint64_t *maskp
)
749 if ((dio
= chain
->dio
) != NULL
&& chain
->bytes
>= 1024) {
750 mask
= hammer2_io_mask(dio
, chain
->bref
.data_off
, chain
->bytes
);
752 if ((dio
->crc_good_mask
& mask
) == mask
)
762 hammer2_io_crc_setmask(hammer2_io_t
*dio
, uint64_t mask
)
765 if (sizeof(long) == 8) {
766 atomic_set_long(&dio
->crc_good_mask
, mask
);
768 #if _BYTE_ORDER == _LITTLE_ENDIAN
769 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
771 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
772 (uint32_t)(mask
>> 32));
774 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
775 (uint32_t)(mask
>> 32));
776 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
784 hammer2_io_crc_clrmask(hammer2_io_t
*dio
, uint64_t mask
)
787 if (sizeof(long) == 8) {
788 atomic_clear_long(&dio
->crc_good_mask
, mask
);
790 #if _BYTE_ORDER == _LITTLE_ENDIAN
791 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
793 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
794 (uint32_t)(mask
>> 32));
796 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
797 (uint32_t)(mask
>> 32));
798 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
807 * Helpers for hammer2_io_new*() functions
811 hammer2_iocb_new_callback(hammer2_iocb_t
*iocb
)
813 hammer2_io_t
*dio
= iocb
->dio
;
814 int gbctl
= (iocb
->flags
& HAMMER2_IOCB_QUICK
) ? GETBLK_NOWAIT
: 0;
817 * If IOCB_INPROG is not set the dio already has a good buffer and we
818 * can't mess with it other than zero the requested range.
820 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
821 * do what needs to be done with dio->bp.
823 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
824 if ((iocb
->flags
& HAMMER2_IOCB_READ
) == 0) {
825 if (iocb
->lsize
== dio
->psize
) {
827 * Fully covered buffer, try to optimize to
828 * avoid any I/O. We might already have the
829 * buffer due to iocb chaining.
831 if (dio
->bp
== NULL
) {
832 dio
->bp
= getblk(dio
->hmp
->devvp
,
833 dio
->pbase
, dio
->psize
,
837 vfs_bio_clrbuf(dio
->bp
);
838 dio
->bp
->b_flags
|= B_CACHE
;
842 * Invalidation is ok on newly allocated
843 * buffers which cover the entire buffer.
844 * Flag will be cleared on use by the de-dup
847 * hammer2_chain_modify() also checks this flag.
849 * QUICK mode is used by the freemap code to
850 * pre-validate a junk buffer to prevent an
851 * unnecessary read I/O. We do NOT want
852 * to set INVALOK in that situation as the
853 * underlying allocations may be smaller.
855 if ((iocb
->flags
& HAMMER2_IOCB_QUICK
) == 0) {
856 atomic_set_64(&dio
->refs
,
857 HAMMER2_DIO_INVALOK
);
859 } else if (iocb
->flags
& HAMMER2_IOCB_QUICK
) {
861 * Partial buffer, quick mode. Do nothing.
862 * Do not instantiate the buffer or try to
863 * mark it B_CACHE because other portions of
864 * the buffer might have to be read by other
867 } else if (dio
->bp
== NULL
||
868 (dio
->bp
->b_flags
& B_CACHE
) == 0) {
870 * Partial buffer, normal mode, requires
871 * read-before-write. Chain the read.
873 * We might already have the buffer due to
874 * iocb chaining. XXX unclear if we really
875 * need to write/release it and reacquire
878 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
881 if (dio
->refs
& HAMMER2_DIO_DIRTY
) {
882 dio_write_stats_update(dio
);
889 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_READ
);
890 breadcb(dio
->hmp
->devvp
,
891 dio
->pbase
, dio
->psize
,
892 hammer2_io_callback
, iocb
);
894 } /* else buffer is good */
895 } /* else callback from breadcb is complete */
898 if (iocb
->flags
& HAMMER2_IOCB_ZERO
)
899 bzero(hammer2_io_data(dio
, iocb
->lbase
), iocb
->lsize
);
900 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
902 hammer2_io_complete(iocb
);
907 _hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
908 hammer2_io_t
**diop
, int flags
)
912 iocb
.callback
= hammer2_iocb_new_callback
;
920 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
921 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
922 hammer2_iocb_wait(&iocb
);
929 hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
932 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
,
933 diop
, HAMMER2_IOCB_ZERO
));
937 hammer2_io_newnz(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
940 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
, diop
, 0));
944 * This is called from the freemap to pre-validate a full-sized buffer
945 * whos contents we don't care about, in order to prevent an unnecessary
949 hammer2_io_newq(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
)
951 hammer2_io_t
*dio
= NULL
;
953 _hammer2_io_new(hmp
, btype
, lbase
, lsize
, &dio
, HAMMER2_IOCB_QUICK
);
954 hammer2_io_bqrelse(&dio
);
959 hammer2_iocb_bread_callback(hammer2_iocb_t
*iocb
)
961 hammer2_io_t
*dio
= iocb
->dio
;
966 * If IOCB_INPROG is not set the dio already has a good buffer and we
967 * can't mess with it other than zero the requested range.
969 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
970 * do what needs to be done with dio->bp.
972 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
975 if (dio
->bp
&& (dio
->bp
->b_flags
& B_CACHE
)) {
977 * Already good, likely due to being chained from
981 } else if ((hce
= hammer2_cluster_read
) > 0) {
983 * Synchronous cluster I/O for now.
989 peof
= (dio
->pbase
+ HAMMER2_SEGMASK64
) &
991 error
= cluster_read(dio
->hmp
->devvp
, peof
, dio
->pbase
,
993 dio
->psize
, HAMMER2_PBUFSIZE
*hce
,
997 * Synchronous I/O for now.
1003 error
= bread(dio
->hmp
->devvp
, dio
->pbase
,
1004 dio
->psize
, &dio
->bp
);
1011 hammer2_io_complete(iocb
);
1015 hammer2_io_bread(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
1016 hammer2_io_t
**diop
)
1018 hammer2_iocb_t iocb
;
1020 iocb
.callback
= hammer2_iocb_bread_callback
;
1028 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
1029 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
1030 hammer2_iocb_wait(&iocb
);
1033 return (iocb
.error
);
1037 * System buf/bio async callback extracts the iocb and chains
1038 * to the iocb callback.
1041 hammer2_io_callback(struct bio
*bio
)
1043 struct buf
*dbp
= bio
->bio_buf
;
1044 hammer2_iocb_t
*iocb
= bio
->bio_caller_info1
.ptr
;
1048 if ((bio
->bio_flags
& BIO_DONE
) == 0)
1050 bio
->bio_flags
&= ~(BIO_DONE
| BIO_SYNC
);
1051 dio
->bp
= bio
->bio_buf
;
1052 iocb
->callback(iocb
);
1056 hammer2_io_bawrite(hammer2_io_t
**diop
)
1058 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1059 hammer2_io_putblk(diop
);
1063 hammer2_io_bdwrite(hammer2_io_t
**diop
)
1065 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1066 hammer2_io_putblk(diop
);
1070 hammer2_io_bwrite(hammer2_io_t
**diop
)
1072 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1073 hammer2_io_putblk(diop
);
1074 return (0); /* XXX */
1078 hammer2_io_setdirty(hammer2_io_t
*dio
)
1080 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
1084 * Request an invalidation. The hammer2_io code will oblige only if
1085 * DIO_INVALOK is also set. INVALOK is cleared if the dio is used
1086 * in a dedup lookup and prevents invalidation of the dirty buffer.
1089 hammer2_io_setinval(hammer2_io_t
*dio
, hammer2_off_t off
, u_int bytes
)
1091 if ((u_int
)dio
->psize
== bytes
)
1092 atomic_set_64(&dio
->refs
, HAMMER2_DIO_INVAL
);
1096 hammer2_io_brelse(hammer2_io_t
**diop
)
1098 hammer2_io_putblk(diop
);
1102 hammer2_io_bqrelse(hammer2_io_t
**diop
)
1104 hammer2_io_putblk(diop
);
1108 hammer2_io_isdirty(hammer2_io_t
*dio
)
1110 return((dio
->refs
& HAMMER2_DIO_DIRTY
) != 0);
1115 dio_write_stats_update(hammer2_io_t
*dio
)
1119 switch(dio
->btype
) {
1122 case HAMMER2_BREF_TYPE_DATA
:
1123 counterp
= &hammer2_iod_file_write
;
1125 case HAMMER2_BREF_TYPE_DIRENT
:
1126 case HAMMER2_BREF_TYPE_INODE
:
1127 counterp
= &hammer2_iod_meta_write
;
1129 case HAMMER2_BREF_TYPE_INDIRECT
:
1130 counterp
= &hammer2_iod_indr_write
;
1132 case HAMMER2_BREF_TYPE_FREEMAP_NODE
:
1133 case HAMMER2_BREF_TYPE_FREEMAP_LEAF
:
1134 counterp
= &hammer2_iod_fmap_write
;
1137 counterp
= &hammer2_iod_volu_write
;
1140 *counterp
+= dio
->psize
;