2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used for OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
44 static int hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
);
45 static void dio_write_stats_update(hammer2_io_t
*dio
);
48 hammer2_io_cmp(hammer2_io_t
*io1
, hammer2_io_t
*io2
)
50 if (io1
->pbase
< io2
->pbase
)
52 if (io1
->pbase
> io2
->pbase
)
57 RB_PROTOTYPE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
, off_t
);
58 RB_GENERATE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
,
61 struct hammer2_cleanupcb_info
{
62 struct hammer2_io_tree tmptree
;
68 hammer2_io_mask(hammer2_io_t
*dio
, hammer2_off_t off
, u_int bytes
)
73 if (bytes
< 1024) /* smaller chunks not supported */
77 * Calculate crc check mask for larger chunks
79 i
= (((off
& ~HAMMER2_OFF_MASK_RADIX
) - dio
->pbase
) &
80 HAMMER2_PBUFMASK
) >> 10;
81 if (i
== 0 && bytes
== HAMMER2_PBUFSIZE
)
83 mask
= ((uint64_t)1U << (bytes
>> 10)) - 1;
89 #define HAMMER2_GETBLK_GOOD 0
90 #define HAMMER2_GETBLK_QUEUED 1
91 #define HAMMER2_GETBLK_OWNED 2
94 * Allocate/Locate the requested dio, reference it, issue or queue iocb.
97 hammer2_io_getblk(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
,
105 * XXX after free, buffer reuse case w/ different size can clash
106 * with dio cache. Lets avoid it for now. Ultimate we need to
107 * invalidate the dio cache when freeing blocks to allow a mix
108 * of 16KB and 64KB block sizes).
110 /*int psize = hammer2_devblksize(lsize);*/
111 int psize
= HAMMER2_PBUFSIZE
;
114 pmask
= ~(hammer2_off_t
)(psize
- 1);
116 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
117 lbase
&= ~HAMMER2_OFF_MASK_RADIX
;
118 pbase
= lbase
& pmask
;
119 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
120 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
121 pbase
, lbase
, lsize
, pmask
);
123 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
126 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
128 hammer2_spin_sh(&hmp
->io_spin
);
129 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
131 if ((atomic_fetchadd_64(&dio
->refs
, 1) &
132 HAMMER2_DIO_MASK
) == 0) {
133 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
135 hammer2_spin_unsh(&hmp
->io_spin
);
137 hammer2_spin_unsh(&hmp
->io_spin
);
138 dio
= kmalloc(sizeof(*dio
), M_HAMMER2
, M_INTWAIT
| M_ZERO
);
142 dio
->btype
= iocb
->btype
;
144 hammer2_spin_init(&dio
->spin
, "h2dio");
145 TAILQ_INIT(&dio
->iocbq
);
146 hammer2_spin_ex(&hmp
->io_spin
);
147 xio
= RB_INSERT(hammer2_io_tree
, &hmp
->iotree
, dio
);
149 atomic_add_int(&hammer2_dio_count
, 1);
150 hammer2_spin_unex(&hmp
->io_spin
);
152 if ((atomic_fetchadd_64(&xio
->refs
, 1) &
153 HAMMER2_DIO_MASK
) == 0) {
154 atomic_add_int(&xio
->hmp
->iofree_count
, -1);
156 hammer2_spin_unex(&hmp
->io_spin
);
157 kfree(dio
, M_HAMMER2
);
163 * Obtain/Validate the buffer.
167 if (dio
->act
< 5) /* SMP race ok */
175 * Issue the iocb immediately if the buffer is already good.
176 * Once set GOOD cannot be cleared until refs drops to 0.
178 * lfence required because dio's are not interlocked for
181 if (refs
& HAMMER2_DIO_GOOD
) {
183 iocb
->callback(iocb
);
188 * Try to own the DIO by setting INPROG so we can issue
191 if (refs
& HAMMER2_DIO_INPROG
) {
193 * If DIO_INPROG is already set then set WAITING and
196 hammer2_spin_ex(&dio
->spin
);
197 if (atomic_cmpset_64(&dio
->refs
, refs
,
198 refs
| HAMMER2_DIO_WAITING
)) {
199 iocb
->flags
|= HAMMER2_IOCB_ONQ
|
201 TAILQ_INSERT_TAIL(&dio
->iocbq
, iocb
, entry
);
202 hammer2_spin_unex(&dio
->spin
);
205 hammer2_spin_unex(&dio
->spin
);
209 * If DIO_INPROG is not set then set it and issue the
210 * callback immediately to start I/O.
212 if (atomic_cmpset_64(&dio
->refs
, refs
,
213 refs
| HAMMER2_DIO_INPROG
)) {
214 iocb
->flags
|= HAMMER2_IOCB_INPROG
;
215 iocb
->callback(iocb
);
225 * Quickly obtain a good DIO buffer, return NULL if the system no longer
229 hammer2_io_getquick(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
)
236 int psize
= HAMMER2_PBUFSIZE
;
240 pmask
= ~(hammer2_off_t
)(psize
- 1);
242 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
243 lbase
&= ~HAMMER2_OFF_MASK_RADIX
;
244 pbase
= lbase
& pmask
;
245 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
246 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
247 pbase
, lbase
, lsize
, pmask
);
249 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
252 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
254 hammer2_spin_sh(&hmp
->io_spin
);
255 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
257 hammer2_spin_unsh(&hmp
->io_spin
);
261 if ((atomic_fetchadd_64(&dio
->refs
, 1) & HAMMER2_DIO_MASK
) == 0)
262 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
263 hammer2_spin_unsh(&hmp
->io_spin
);
265 if (dio
->act
< 5) /* SMP race ok */
269 * Obtain/validate the buffer. Do NOT issue I/O. Discard if
270 * the system does not have the data already cached.
272 nrefs
= (uint64_t)-1;
278 * Issue the iocb immediately if the buffer is already good.
279 * Once set GOOD cannot be cleared until refs drops to 0.
281 * lfence required because dio is not interlockedf for
284 if (orefs
& HAMMER2_DIO_GOOD
) {
290 * Try to own the DIO by setting INPROG so we can issue
291 * I/O on it. INPROG might already be set, in which case
292 * there is no way we can do this non-blocking so we punt.
294 if ((orefs
& HAMMER2_DIO_INPROG
))
296 nrefs
= orefs
| HAMMER2_DIO_INPROG
;
297 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
) == 0)
301 * We own DIO_INPROG, try to set DIO_GOOD.
303 * For now do not use GETBLK_NOWAIT because
309 bp
= getblk(hmp
->devvp
, dio
->pbase
, dio
->psize
, 0, 0);
311 bread(hmp
->devvp
, dio
->pbase
, dio
->psize
, &bp
);
315 * System buffer must also have remained cached.
318 if ((bp
->b_flags
& B_ERROR
) == 0 &&
319 (bp
->b_flags
& B_CACHE
)) {
320 dio
->bp
= bp
; /* assign BEFORE setting flag */
321 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
331 * This is actually a bit complicated, see
332 * hammer2_io_complete() for more information.
335 iocb
.flags
= HAMMER2_IOCB_INPROG
;
336 hammer2_io_complete(&iocb
);
341 * Only return the dio if its buffer is good. If the buffer is not
342 * good be sure to clear INVALOK, meaning that invalidation is no
345 if ((dio
->refs
& HAMMER2_DIO_GOOD
) == 0) {
346 hammer2_io_putblk(&dio
);
352 * Make sure that INVALOK is cleared on the dio associated with the specified
353 * data offset. Called from bulkfree when a block becomes reusable.
356 hammer2_io_resetinval(hammer2_dev_t
*hmp
, off_t data_off
)
360 data_off
&= ~HAMMER2_PBUFMASK64
;
361 hammer2_spin_sh(&hmp
->io_spin
);
362 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, data_off
);
364 atomic_clear_64(&dio
->refs
, HAMMER2_DIO_INVALOK
);
365 hammer2_spin_unsh(&hmp
->io_spin
);
369 * The originator of the iocb is finished with it.
372 hammer2_io_complete(hammer2_iocb_t
*iocb
)
374 hammer2_io_t
*dio
= iocb
->dio
;
375 hammer2_iocb_t
*cbtmp
;
382 * If IOCB_INPROG was not set completion is synchronous due to the
383 * buffer already being good. We can simply set IOCB_DONE and return.
385 * In this situation DIO_INPROG is not set and we have no visibility
386 * on dio->bp. We should not try to mess with dio->bp because another
387 * thread may be finishing up its processing. dio->bp should already
388 * be set to BUF_KERNPROC()!
390 if ((iocb
->flags
& HAMMER2_IOCB_INPROG
) == 0) {
391 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_DONE
);
396 * The iocb was queued, obtained DIO_INPROG, and its callback was
397 * made. The callback is now complete. We still own DIO_INPROG.
399 * We can set DIO_GOOD if no error occurred, which gives certain
400 * stability guarantees to dio->bp and allows other accessors to
401 * short-cut access. DIO_GOOD cannot be cleared until the last
404 KKASSERT(dio
->refs
& HAMMER2_DIO_INPROG
);
406 BUF_KERNPROC(dio
->bp
);
407 if ((dio
->bp
->b_flags
& B_ERROR
) == 0) {
408 KKASSERT(dio
->bp
->b_flags
& B_CACHE
);
409 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
414 * Clean up the dio before marking the iocb as being done. If another
415 * iocb is pending we chain to it while leaving DIO_INPROG set (it
416 * will call io completion and presumably clear DIO_INPROG).
418 * Otherwise if no other iocbs are pending we clear DIO_INPROG before
419 * finishing up the cbio. This means that DIO_INPROG is cleared at
420 * the end of the chain before ANY of the cbios are marked done.
422 * NOTE: The TAILQ is not stable until the spin-lock is held.
426 nrefs
= orefs
& ~(HAMMER2_DIO_WAITING
| HAMMER2_DIO_INPROG
);
428 if (orefs
& HAMMER2_DIO_WAITING
) {
429 hammer2_spin_ex(&dio
->spin
);
430 cbtmp
= TAILQ_FIRST(&dio
->iocbq
);
433 * NOTE: flags not adjusted in this case.
434 * Flags will be adjusted by the last
437 TAILQ_REMOVE(&dio
->iocbq
, cbtmp
, entry
);
438 hammer2_spin_unex(&dio
->spin
);
439 cbtmp
->callback(cbtmp
); /* chained */
441 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
442 hammer2_spin_unex(&dio
->spin
);
445 hammer2_spin_unex(&dio
->spin
);
447 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
454 * Mark the iocb as done and wakeup any waiters. This is done after
455 * all iocb chains have been called back and after DIO_INPROG has been
456 * cleared. This avoids races against ref count drops by the waiting
457 * threads (a hard but not impossible SMP race) which might result in
458 * a 1->0 transition of the refs while DIO_INPROG is still set.
461 oflags
= iocb
->flags
;
464 nflags
&= ~(HAMMER2_IOCB_WAKEUP
| HAMMER2_IOCB_INPROG
);
465 nflags
|= HAMMER2_IOCB_DONE
;
467 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
468 if (oflags
& HAMMER2_IOCB_WAKEUP
)
470 /* SMP: iocb is now stale */
480 * Wait for an iocb's I/O to finish.
483 hammer2_iocb_wait(hammer2_iocb_t
*iocb
)
489 oflags
= iocb
->flags
;
491 nflags
= oflags
| HAMMER2_IOCB_WAKEUP
;
492 if (oflags
& HAMMER2_IOCB_DONE
)
494 tsleep_interlock(iocb
, 0);
495 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
496 tsleep(iocb
, PINTERLOCKED
, "h2iocb", hz
);
503 * Release our ref on *diop.
505 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
506 * then dispose of the underlying buffer.
509 hammer2_io_putblk(hammer2_io_t
**diop
)
525 while (dio
->unused01
) {
526 tsleep(&dio
->unused01
, 0, "h2DEBUG", hz
);
532 * On the 1->0 transition clear flags and set INPROG.
534 * On the 1->0 transition if INPROG is already set, another thread
535 * is in lastdrop and we can just return after the transition.
537 * On any other transition we can generally just return.
544 if ((orefs
& HAMMER2_DIO_MASK
) == 1 &&
545 (orefs
& HAMMER2_DIO_INPROG
) == 0) {
547 * Lastdrop case, INPROG can be set.
549 nrefs
&= ~(HAMMER2_DIO_GOOD
| HAMMER2_DIO_DIRTY
);
550 nrefs
&= ~(HAMMER2_DIO_INVAL
);
551 nrefs
|= HAMMER2_DIO_INPROG
;
552 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
554 } else if ((orefs
& HAMMER2_DIO_MASK
) == 1) {
556 * Lastdrop case, INPROG already set.
558 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
559 atomic_add_int(&hmp
->iofree_count
, 1);
566 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
574 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
577 * We can now dispose of the buffer, and should do it before calling
578 * io_complete() in case there's a race against a new reference
579 * which causes io_complete() to chain and instantiate the bp again.
586 if (orefs
& HAMMER2_DIO_GOOD
) {
587 KKASSERT(bp
!= NULL
);
589 if (hammer2_inval_enable
&&
590 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
591 ++hammer2_iod_invals
;
592 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
596 if (orefs
& HAMMER2_DIO_DIRTY
) {
599 dio_write_stats_update(dio
);
600 if ((hce
= hammer2_cluster_write
) > 0) {
602 * Allows write-behind to keep the buffer
605 peof
= (pbase
+ HAMMER2_SEGMASK64
) &
607 bp
->b_flags
|= B_CLUSTEROK
;
608 cluster_write(bp
, peof
, psize
, hce
);
611 * Allows dirty buffers to accumulate and
612 * possibly be canceled (e.g. by a 'rm'),
613 * will burst-write later.
615 bp
->b_flags
|= B_CLUSTEROK
;
618 } else if (bp
->b_flags
& (B_ERROR
| B_INVAL
| B_RELBUF
)) {
625 if (hammer2_inval_enable
&&
626 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
627 ++hammer2_iod_invals
;
628 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
632 if (orefs
& HAMMER2_DIO_DIRTY
) {
633 dio_write_stats_update(dio
);
641 * The instant we call io_complete dio is a free agent again and
642 * can be ripped out from under us.
644 * we can cleanup our final DIO_INPROG by simulating an iocb
647 hmp
= dio
->hmp
; /* extract fields */
648 atomic_add_int(&hmp
->iofree_count
, 1);
652 iocb
.flags
= HAMMER2_IOCB_INPROG
;
653 hammer2_io_complete(&iocb
);
654 dio
= NULL
; /* dio stale */
657 * We cache free buffers so re-use cases can use a shared lock, but
658 * if too many build up we have to clean them out.
660 if (hmp
->iofree_count
> 65536) {
661 struct hammer2_cleanupcb_info info
;
663 RB_INIT(&info
.tmptree
);
664 hammer2_spin_ex(&hmp
->io_spin
);
665 if (hmp
->iofree_count
> 65536) {
666 info
.count
= hmp
->iofree_count
/ 4;
667 RB_SCAN(hammer2_io_tree
, &hmp
->iotree
, NULL
,
668 hammer2_io_cleanup_callback
, &info
);
670 hammer2_spin_unex(&hmp
->io_spin
);
671 hammer2_io_cleanup(hmp
, &info
.tmptree
);
676 * Cleanup any dio's with (INPROG | refs) == 0.
678 * Called to clean up cached DIOs on umount after all activity has been
683 hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
)
685 struct hammer2_cleanupcb_info
*info
= arg
;
688 if ((dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0) {
693 KKASSERT(dio
->bp
== NULL
);
694 RB_REMOVE(hammer2_io_tree
, &dio
->hmp
->iotree
, dio
);
695 xio
= RB_INSERT(hammer2_io_tree
, &info
->tmptree
, dio
);
696 KKASSERT(xio
== NULL
);
697 if (--info
->count
<= 0) /* limit scan */
704 hammer2_io_cleanup(hammer2_dev_t
*hmp
, struct hammer2_io_tree
*tree
)
708 while ((dio
= RB_ROOT(tree
)) != NULL
) {
709 RB_REMOVE(hammer2_io_tree
, tree
, dio
);
710 KKASSERT(dio
->bp
== NULL
&&
711 (dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0);
712 kfree(dio
, M_HAMMER2
);
713 atomic_add_int(&hammer2_dio_count
, -1);
714 atomic_add_int(&hmp
->iofree_count
, -1);
719 * Returns a pointer to the requested data.
722 hammer2_io_data(hammer2_io_t
*dio
, off_t lbase
)
728 KKASSERT(bp
!= NULL
);
729 off
= (lbase
& ~HAMMER2_OFF_MASK_RADIX
) - bp
->b_loffset
;
730 KKASSERT(off
>= 0 && off
< bp
->b_bufsize
);
731 return(bp
->b_data
+ off
);
736 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
737 * in the chain structure, but chain structure needs to be persistent as
738 * well on refs=0 and it isn't.
741 hammer2_io_crc_good(hammer2_chain_t
*chain
, uint64_t *maskp
)
746 if ((dio
= chain
->dio
) != NULL
&& chain
->bytes
>= 1024) {
747 mask
= hammer2_io_mask(dio
, chain
->bref
.data_off
, chain
->bytes
);
749 if ((dio
->crc_good_mask
& mask
) == mask
)
759 hammer2_io_crc_setmask(hammer2_io_t
*dio
, uint64_t mask
)
762 if (sizeof(long) == 8) {
763 atomic_set_long(&dio
->crc_good_mask
, mask
);
765 #if _BYTE_ORDER == _LITTLE_ENDIAN
766 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
768 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
769 (uint32_t)(mask
>> 32));
771 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
772 (uint32_t)(mask
>> 32));
773 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
781 hammer2_io_crc_clrmask(hammer2_io_t
*dio
, uint64_t mask
)
784 if (sizeof(long) == 8) {
785 atomic_clear_long(&dio
->crc_good_mask
, mask
);
787 #if _BYTE_ORDER == _LITTLE_ENDIAN
788 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
790 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
791 (uint32_t)(mask
>> 32));
793 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
794 (uint32_t)(mask
>> 32));
795 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
804 * Helpers for hammer2_io_new*() functions
808 hammer2_iocb_new_callback(hammer2_iocb_t
*iocb
)
810 hammer2_io_t
*dio
= iocb
->dio
;
811 int gbctl
= (iocb
->flags
& HAMMER2_IOCB_QUICK
) ? GETBLK_NOWAIT
: 0;
814 * If IOCB_INPROG is not set the dio already has a good buffer and we
815 * can't mess with it other than zero the requested range.
817 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
818 * do what needs to be done with dio->bp.
820 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
821 if ((iocb
->flags
& HAMMER2_IOCB_READ
) == 0) {
822 if (iocb
->lsize
== dio
->psize
) {
824 * Fully covered buffer, try to optimize to
825 * avoid any I/O. We might already have the
826 * buffer due to iocb chaining.
828 if (dio
->bp
== NULL
) {
829 dio
->bp
= getblk(dio
->hmp
->devvp
,
830 dio
->pbase
, dio
->psize
,
834 vfs_bio_clrbuf(dio
->bp
);
835 dio
->bp
->b_flags
|= B_CACHE
;
839 * Invalidation is ok on newly allocated
840 * buffers which cover the entire buffer.
841 * Flag will be cleared on use by the de-dup
844 * hammer2_chain_modify() also checks this flag.
846 * QUICK mode is used by the freemap code to
847 * pre-validate a junk buffer to prevent an
848 * unnecessary read I/O. We do NOT want
849 * to set INVALOK in that situation as the
850 * underlying allocations may be smaller.
852 if ((iocb
->flags
& HAMMER2_IOCB_QUICK
) == 0) {
853 atomic_set_64(&dio
->refs
,
854 HAMMER2_DIO_INVALOK
);
856 } else if (iocb
->flags
& HAMMER2_IOCB_QUICK
) {
858 * Partial buffer, quick mode. Do nothing.
859 * Do not instantiate the buffer or try to
860 * mark it B_CACHE because other portions of
861 * the buffer might have to be read by other
864 } else if (dio
->bp
== NULL
||
865 (dio
->bp
->b_flags
& B_CACHE
) == 0) {
867 * Partial buffer, normal mode, requires
868 * read-before-write. Chain the read.
870 * We might already have the buffer due to
871 * iocb chaining. XXX unclear if we really
872 * need to write/release it and reacquire
875 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
878 if (dio
->refs
& HAMMER2_DIO_DIRTY
) {
879 dio_write_stats_update(dio
);
886 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_READ
);
887 breadcb(dio
->hmp
->devvp
,
888 dio
->pbase
, dio
->psize
,
889 hammer2_io_callback
, iocb
);
891 } /* else buffer is good */
892 } /* else callback from breadcb is complete */
895 if (iocb
->flags
& HAMMER2_IOCB_ZERO
)
896 bzero(hammer2_io_data(dio
, iocb
->lbase
), iocb
->lsize
);
897 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
899 hammer2_io_complete(iocb
);
904 _hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
905 hammer2_io_t
**diop
, int flags
)
909 iocb
.callback
= hammer2_iocb_new_callback
;
918 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
919 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
920 hammer2_iocb_wait(&iocb
);
927 hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
930 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
,
931 diop
, HAMMER2_IOCB_ZERO
));
935 hammer2_io_newnz(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
938 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
, diop
, 0));
942 * This is called from the freemap to pre-validate a full-sized buffer
943 * whos contents we don't care about, in order to prevent an unnecessary
947 hammer2_io_newq(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
)
949 hammer2_io_t
*dio
= NULL
;
951 _hammer2_io_new(hmp
, btype
, lbase
, lsize
, &dio
, HAMMER2_IOCB_QUICK
);
952 hammer2_io_bqrelse(&dio
);
957 hammer2_iocb_bread_callback(hammer2_iocb_t
*iocb
)
959 hammer2_io_t
*dio
= iocb
->dio
;
964 * If IOCB_INPROG is not set the dio already has a good buffer and we
965 * can't mess with it other than zero the requested range.
967 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
968 * do what needs to be done with dio->bp.
970 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
973 if (dio
->bp
&& (dio
->bp
->b_flags
& B_CACHE
)) {
975 * Already good, likely due to being chained from
979 } else if ((hce
= hammer2_cluster_read
) > 0) {
981 * Synchronous cluster I/O for now.
987 peof
= (dio
->pbase
+ HAMMER2_SEGMASK64
) &
989 error
= cluster_read(dio
->hmp
->devvp
, peof
, dio
->pbase
,
991 dio
->psize
, HAMMER2_PBUFSIZE
*hce
,
995 * Synchronous I/O for now.
1001 error
= bread(dio
->hmp
->devvp
, dio
->pbase
,
1002 dio
->psize
, &dio
->bp
);
1009 hammer2_io_complete(iocb
);
1013 hammer2_io_bread(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
1014 hammer2_io_t
**diop
)
1016 hammer2_iocb_t iocb
;
1018 iocb
.callback
= hammer2_iocb_bread_callback
;
1019 iocb
.cluster
= NULL
;
1027 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
1028 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
1029 hammer2_iocb_wait(&iocb
);
1032 return (iocb
.error
);
1036 * System buf/bio async callback extracts the iocb and chains
1037 * to the iocb callback.
1040 hammer2_io_callback(struct bio
*bio
)
1042 struct buf
*dbp
= bio
->bio_buf
;
1043 hammer2_iocb_t
*iocb
= bio
->bio_caller_info1
.ptr
;
1047 if ((bio
->bio_flags
& BIO_DONE
) == 0)
1049 bio
->bio_flags
&= ~(BIO_DONE
| BIO_SYNC
);
1050 dio
->bp
= bio
->bio_buf
;
1051 iocb
->callback(iocb
);
1055 hammer2_io_bawrite(hammer2_io_t
**diop
)
1057 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1058 hammer2_io_putblk(diop
);
1062 hammer2_io_bdwrite(hammer2_io_t
**diop
)
1064 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1065 hammer2_io_putblk(diop
);
1069 hammer2_io_bwrite(hammer2_io_t
**diop
)
1071 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1072 hammer2_io_putblk(diop
);
1073 return (0); /* XXX */
1077 hammer2_io_setdirty(hammer2_io_t
*dio
)
1079 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
1083 * Request an invalidation. The hammer2_io code will oblige only if
1084 * DIO_INVALOK is also set. INVALOK is cleared if the dio is used
1085 * in a dedup lookup and prevents invalidation of the dirty buffer.
1088 hammer2_io_setinval(hammer2_io_t
*dio
, hammer2_off_t off
, u_int bytes
)
1090 if ((u_int
)dio
->psize
== bytes
)
1091 atomic_set_64(&dio
->refs
, HAMMER2_DIO_INVAL
);
1095 hammer2_io_brelse(hammer2_io_t
**diop
)
1097 hammer2_io_putblk(diop
);
1101 hammer2_io_bqrelse(hammer2_io_t
**diop
)
1103 hammer2_io_putblk(diop
);
1107 hammer2_io_isdirty(hammer2_io_t
*dio
)
1109 return((dio
->refs
& HAMMER2_DIO_DIRTY
) != 0);
1114 dio_write_stats_update(hammer2_io_t
*dio
)
1118 switch(dio
->btype
) {
1121 case HAMMER2_BREF_TYPE_DATA
:
1122 counterp
= &hammer2_iod_file_write
;
1124 case HAMMER2_BREF_TYPE_INODE
:
1125 counterp
= &hammer2_iod_meta_write
;
1127 case HAMMER2_BREF_TYPE_INDIRECT
:
1128 counterp
= &hammer2_iod_indr_write
;
1130 case HAMMER2_BREF_TYPE_FREEMAP_NODE
:
1131 case HAMMER2_BREF_TYPE_FREEMAP_LEAF
:
1132 counterp
= &hammer2_iod_fmap_write
;
1135 counterp
= &hammer2_iod_volu_write
;
1138 *counterp
+= dio
->psize
;