2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used for OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
44 static int hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
);
45 static void dio_write_stats_update(hammer2_io_t
*dio
);
48 hammer2_io_cmp(hammer2_io_t
*io1
, hammer2_io_t
*io2
)
50 if (io1
->pbase
< io2
->pbase
)
52 if (io1
->pbase
> io2
->pbase
)
57 RB_PROTOTYPE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
, off_t
);
58 RB_GENERATE2(hammer2_io_tree
, hammer2_io
, rbnode
, hammer2_io_cmp
,
61 struct hammer2_cleanupcb_info
{
62 struct hammer2_io_tree tmptree
;
68 hammer2_io_mask(hammer2_io_t
*dio
, hammer2_off_t off
, u_int bytes
)
73 if (bytes
< 1024) /* smaller chunks not supported */
77 * Calculate crc check mask for larger chunks
79 i
= (((off
& ~HAMMER2_OFF_MASK_RADIX
) - dio
->pbase
) &
80 HAMMER2_PBUFMASK
) >> 10;
81 if (i
== 0 && bytes
== HAMMER2_PBUFSIZE
)
83 mask
= ((uint64_t)1U << (bytes
>> 10)) - 1;
89 #define HAMMER2_GETBLK_GOOD 0
90 #define HAMMER2_GETBLK_QUEUED 1
91 #define HAMMER2_GETBLK_OWNED 2
94 * Allocate/Locate the requested dio, reference it, issue or queue iocb.
97 hammer2_io_getblk(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
,
105 * XXX after free, buffer reuse case w/ different size can clash
106 * with dio cache. Lets avoid it for now. Ultimate we need to
107 * invalidate the dio cache when freeing blocks to allow a mix
108 * of 16KB and 64KB block sizes).
110 /*int psize = hammer2_devblksize(lsize);*/
111 int psize
= HAMMER2_PBUFSIZE
;
114 pmask
= ~(hammer2_off_t
)(psize
- 1);
116 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
117 lbase
&= ~HAMMER2_OFF_MASK_RADIX
;
118 pbase
= lbase
& pmask
;
119 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
120 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
121 pbase
, lbase
, lsize
, pmask
);
123 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
126 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
128 hammer2_spin_sh(&hmp
->io_spin
);
129 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
131 if ((atomic_fetchadd_64(&dio
->refs
, 1) &
132 HAMMER2_DIO_MASK
) == 0) {
133 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
135 hammer2_spin_unsh(&hmp
->io_spin
);
137 hammer2_spin_unsh(&hmp
->io_spin
);
138 dio
= kmalloc(sizeof(*dio
), M_HAMMER2
, M_INTWAIT
| M_ZERO
);
142 dio
->btype
= iocb
->btype
;
144 hammer2_spin_init(&dio
->spin
, "h2dio");
145 TAILQ_INIT(&dio
->iocbq
);
146 hammer2_spin_ex(&hmp
->io_spin
);
147 xio
= RB_INSERT(hammer2_io_tree
, &hmp
->iotree
, dio
);
149 atomic_add_int(&hammer2_dio_count
, 1);
150 hammer2_spin_unex(&hmp
->io_spin
);
152 if ((atomic_fetchadd_64(&xio
->refs
, 1) &
153 HAMMER2_DIO_MASK
) == 0) {
154 atomic_add_int(&xio
->hmp
->iofree_count
, -1);
156 hammer2_spin_unex(&hmp
->io_spin
);
157 kfree(dio
, M_HAMMER2
);
163 * Obtain/Validate the buffer.
167 if (dio
->act
< 5) /* SMP race ok */
175 * Issue the iocb immediately if the buffer is already good.
176 * Once set GOOD cannot be cleared until refs drops to 0.
178 * lfence required because dio's are not interlocked for
181 if (refs
& HAMMER2_DIO_GOOD
) {
183 iocb
->callback(iocb
);
188 * Try to own the DIO by setting INPROG so we can issue
191 if (refs
& HAMMER2_DIO_INPROG
) {
193 * If DIO_INPROG is already set then set WAITING and
196 hammer2_spin_ex(&dio
->spin
);
197 if (atomic_cmpset_64(&dio
->refs
, refs
,
198 refs
| HAMMER2_DIO_WAITING
)) {
199 iocb
->flags
|= HAMMER2_IOCB_ONQ
|
201 TAILQ_INSERT_TAIL(&dio
->iocbq
, iocb
, entry
);
202 hammer2_spin_unex(&dio
->spin
);
205 hammer2_spin_unex(&dio
->spin
);
209 * If DIO_INPROG is not set then set it and issue the
210 * callback immediately to start I/O.
212 if (atomic_cmpset_64(&dio
->refs
, refs
,
213 refs
| HAMMER2_DIO_INPROG
)) {
214 iocb
->flags
|= HAMMER2_IOCB_INPROG
;
215 iocb
->callback(iocb
);
225 * Quickly obtain a good DIO buffer, return NULL if the system no longer
229 hammer2_io_getquick(hammer2_dev_t
*hmp
, off_t lbase
, int lsize
)
236 int psize
= HAMMER2_PBUFSIZE
;
240 pmask
= ~(hammer2_off_t
)(psize
- 1);
242 KKASSERT((1 << (int)(lbase
& HAMMER2_OFF_MASK_RADIX
)) == lsize
);
243 lbase
&= ~HAMMER2_OFF_MASK_RADIX
;
244 pbase
= lbase
& pmask
;
245 if (pbase
== 0 || ((lbase
+ lsize
- 1) & pmask
) != pbase
) {
246 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
247 pbase
, lbase
, lsize
, pmask
);
249 KKASSERT(pbase
!= 0 && ((lbase
+ lsize
- 1) & pmask
) == pbase
);
252 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
254 hammer2_spin_sh(&hmp
->io_spin
);
255 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, pbase
);
257 hammer2_spin_unsh(&hmp
->io_spin
);
261 if ((atomic_fetchadd_64(&dio
->refs
, 1) & HAMMER2_DIO_MASK
) == 0)
262 atomic_add_int(&dio
->hmp
->iofree_count
, -1);
263 hammer2_spin_unsh(&hmp
->io_spin
);
265 if (dio
->act
< 5) /* SMP race ok */
269 * Obtain/validate the buffer. Do NOT issue I/O. Discard if
270 * the system does not have the data already cached.
272 nrefs
= (uint64_t)-1;
278 * Issue the iocb immediately if the buffer is already good.
279 * Once set GOOD cannot be cleared until refs drops to 0.
281 * lfence required because dio is not interlockedf for
284 if (orefs
& HAMMER2_DIO_GOOD
) {
290 * Try to own the DIO by setting INPROG so we can issue
291 * I/O on it. INPROG might already be set, in which case
292 * there is no way we can do this non-blocking so we punt.
294 if ((orefs
& HAMMER2_DIO_INPROG
))
296 nrefs
= orefs
| HAMMER2_DIO_INPROG
;
297 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
) == 0)
301 * We own DIO_INPROG, try to set DIO_GOOD.
303 * For now do not use GETBLK_NOWAIT because
309 bp
= getblk(hmp
->devvp
, dio
->pbase
, dio
->psize
, 0, 0);
311 bread(hmp
->devvp
, dio
->pbase
, dio
->psize
, &bp
);
315 * System buffer must also have remained cached.
318 if ((bp
->b_flags
& B_ERROR
) == 0 &&
319 (bp
->b_flags
& B_CACHE
)) {
320 dio
->bp
= bp
; /* assign BEFORE setting flag */
321 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
331 * This is actually a bit complicated, see
332 * hammer2_io_complete() for more information.
335 iocb
.flags
= HAMMER2_IOCB_INPROG
;
336 hammer2_io_complete(&iocb
);
341 * Only return the dio if its buffer is good. If the buffer is not
342 * good be sure to clear INVALOK, meaning that invalidation is no
345 if ((dio
->refs
& HAMMER2_DIO_GOOD
) == 0) {
346 hammer2_io_putblk(&dio
);
352 * Make sure that INVALOK is cleared on the dio associated with the specified
353 * data offset. Called from bulkfree when a block becomes reusable.
356 hammer2_io_resetinval(hammer2_dev_t
*hmp
, off_t data_off
)
360 data_off
&= ~HAMMER2_PBUFMASK64
;
361 hammer2_spin_sh(&hmp
->io_spin
);
362 dio
= RB_LOOKUP(hammer2_io_tree
, &hmp
->iotree
, data_off
);
364 atomic_clear_64(&dio
->refs
, HAMMER2_DIO_INVALOK
);
365 hammer2_spin_unsh(&hmp
->io_spin
);
369 * The originator of the iocb is finished with it.
372 hammer2_io_complete(hammer2_iocb_t
*iocb
)
374 hammer2_io_t
*dio
= iocb
->dio
;
375 hammer2_iocb_t
*cbtmp
;
382 * If IOCB_INPROG was not set completion is synchronous due to the
383 * buffer already being good. We can simply set IOCB_DONE and return.
385 * In this situation DIO_INPROG is not set and we have no visibility
386 * on dio->bp. We should not try to mess with dio->bp because another
387 * thread may be finishing up its processing. dio->bp should already
388 * be set to BUF_KERNPROC()!
390 if ((iocb
->flags
& HAMMER2_IOCB_INPROG
) == 0) {
391 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_DONE
);
396 * The iocb was queued, obtained DIO_INPROG, and its callback was
397 * made. The callback is now complete. We still own DIO_INPROG.
399 * We can set DIO_GOOD if no error occurred, which gives certain
400 * stability guarantees to dio->bp and allows other accessors to
401 * short-cut access. DIO_GOOD cannot be cleared until the last
404 KKASSERT(dio
->refs
& HAMMER2_DIO_INPROG
);
406 BUF_KERNPROC(dio
->bp
);
407 if ((dio
->bp
->b_flags
& B_ERROR
) == 0) {
408 KKASSERT(dio
->bp
->b_flags
& B_CACHE
);
409 atomic_set_64(&dio
->refs
, HAMMER2_DIO_GOOD
);
414 * Clean up the dio before marking the iocb as being done. If another
415 * iocb is pending we chain to it while leaving DIO_INPROG set (it
416 * will call io completion and presumably clear DIO_INPROG).
418 * Otherwise if no other iocbs are pending we clear DIO_INPROG before
419 * finishing up the cbio. This means that DIO_INPROG is cleared at
420 * the end of the chain before ANY of the cbios are marked done.
422 * NOTE: The TAILQ is not stable until the spin-lock is held.
426 nrefs
= orefs
& ~(HAMMER2_DIO_WAITING
| HAMMER2_DIO_INPROG
);
428 if (orefs
& HAMMER2_DIO_WAITING
) {
429 hammer2_spin_ex(&dio
->spin
);
430 cbtmp
= TAILQ_FIRST(&dio
->iocbq
);
433 * NOTE: flags not adjusted in this case.
434 * Flags will be adjusted by the last
437 TAILQ_REMOVE(&dio
->iocbq
, cbtmp
, entry
);
438 hammer2_spin_unex(&dio
->spin
);
439 cbtmp
->callback(cbtmp
); /* chained */
441 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
442 hammer2_spin_unex(&dio
->spin
);
445 hammer2_spin_unex(&dio
->spin
);
447 } else if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
454 * Mark the iocb as done and wakeup any waiters. This is done after
455 * all iocb chains have been called back and after DIO_INPROG has been
456 * cleared. This avoids races against ref count drops by the waiting
457 * threads (a hard but not impossible SMP race) which might result in
458 * a 1->0 transition of the refs while DIO_INPROG is still set.
461 oflags
= iocb
->flags
;
464 nflags
&= ~(HAMMER2_IOCB_WAKEUP
| HAMMER2_IOCB_INPROG
);
465 nflags
|= HAMMER2_IOCB_DONE
;
467 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
468 if (oflags
& HAMMER2_IOCB_WAKEUP
)
470 /* SMP: iocb is now stale */
480 * Wait for an iocb's I/O to finish.
483 hammer2_iocb_wait(hammer2_iocb_t
*iocb
)
489 oflags
= iocb
->flags
;
491 nflags
= oflags
| HAMMER2_IOCB_WAKEUP
;
492 if (oflags
& HAMMER2_IOCB_DONE
)
494 tsleep_interlock(iocb
, 0);
495 if (atomic_cmpset_int(&iocb
->flags
, oflags
, nflags
)) {
496 tsleep(iocb
, PINTERLOCKED
, "h2iocb", hz
);
503 * Release our ref on *diop.
505 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
506 * then dispose of the underlying buffer.
509 hammer2_io_putblk(hammer2_io_t
**diop
)
528 * On the 1->0 transition clear flags and set INPROG.
530 * On the 1->0 transition if INPROG is already set, another thread
531 * is in lastdrop and we can just return after the transition.
533 * On any other transition we can generally just return.
540 if ((orefs
& HAMMER2_DIO_MASK
) == 1 &&
541 (orefs
& HAMMER2_DIO_INPROG
) == 0) {
543 * Lastdrop case, INPROG can be set.
545 nrefs
&= ~(HAMMER2_DIO_GOOD
| HAMMER2_DIO_DIRTY
);
546 nrefs
&= ~(HAMMER2_DIO_INVAL
);
547 nrefs
|= HAMMER2_DIO_INPROG
;
548 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
550 } else if ((orefs
& HAMMER2_DIO_MASK
) == 1) {
552 * Lastdrop case, INPROG already set.
554 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
)) {
555 atomic_add_int(&hmp
->iofree_count
, 1);
562 if (atomic_cmpset_64(&dio
->refs
, orefs
, nrefs
))
570 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
573 * We can now dispose of the buffer, and should do it before calling
574 * io_complete() in case there's a race against a new reference
575 * which causes io_complete() to chain and instantiate the bp again.
582 if (orefs
& HAMMER2_DIO_GOOD
) {
583 KKASSERT(bp
!= NULL
);
585 if (hammer2_inval_enable
&&
586 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
587 ++hammer2_iod_invals
;
588 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
592 if (orefs
& HAMMER2_DIO_DIRTY
) {
595 dio_write_stats_update(dio
);
596 if ((hce
= hammer2_cluster_write
) > 0) {
598 * Allows write-behind to keep the buffer
601 peof
= (pbase
+ HAMMER2_SEGMASK64
) &
603 bp
->b_flags
|= B_CLUSTEROK
;
604 cluster_write(bp
, peof
, psize
, hce
);
607 * Allows dirty buffers to accumulate and
608 * possibly be canceled (e.g. by a 'rm'),
609 * will burst-write later.
611 bp
->b_flags
|= B_CLUSTEROK
;
614 } else if (bp
->b_flags
& (B_ERROR
| B_INVAL
| B_RELBUF
)) {
621 if (hammer2_inval_enable
&&
622 (orefs
& HAMMER2_DIO_INVALBITS
) == HAMMER2_DIO_INVALBITS
) {
623 ++hammer2_iod_invals
;
624 bp
->b_flags
|= B_INVAL
| B_RELBUF
;
628 if (orefs
& HAMMER2_DIO_DIRTY
) {
629 dio_write_stats_update(dio
);
637 * The instant we call io_complete dio is a free agent again and
638 * can be ripped out from under us.
640 * we can cleanup our final DIO_INPROG by simulating an iocb
643 hmp
= dio
->hmp
; /* extract fields */
644 atomic_add_int(&hmp
->iofree_count
, 1);
648 iocb
.flags
= HAMMER2_IOCB_INPROG
;
649 hammer2_io_complete(&iocb
);
650 dio
= NULL
; /* dio stale */
653 * We cache free buffers so re-use cases can use a shared lock, but
654 * if too many build up we have to clean them out.
656 if (hmp
->iofree_count
> 65536) {
657 struct hammer2_cleanupcb_info info
;
659 RB_INIT(&info
.tmptree
);
660 hammer2_spin_ex(&hmp
->io_spin
);
661 if (hmp
->iofree_count
> 65536) {
662 info
.count
= hmp
->iofree_count
/ 4;
663 RB_SCAN(hammer2_io_tree
, &hmp
->iotree
, NULL
,
664 hammer2_io_cleanup_callback
, &info
);
666 hammer2_spin_unex(&hmp
->io_spin
);
667 hammer2_io_cleanup(hmp
, &info
.tmptree
);
672 * Cleanup any dio's with (INPROG | refs) == 0.
674 * Called to clean up cached DIOs on umount after all activity has been
679 hammer2_io_cleanup_callback(hammer2_io_t
*dio
, void *arg
)
681 struct hammer2_cleanupcb_info
*info
= arg
;
684 if ((dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0) {
689 KKASSERT(dio
->bp
== NULL
);
690 RB_REMOVE(hammer2_io_tree
, &dio
->hmp
->iotree
, dio
);
691 xio
= RB_INSERT(hammer2_io_tree
, &info
->tmptree
, dio
);
692 KKASSERT(xio
== NULL
);
693 if (--info
->count
<= 0) /* limit scan */
700 hammer2_io_cleanup(hammer2_dev_t
*hmp
, struct hammer2_io_tree
*tree
)
704 while ((dio
= RB_ROOT(tree
)) != NULL
) {
705 RB_REMOVE(hammer2_io_tree
, tree
, dio
);
706 KKASSERT(dio
->bp
== NULL
&&
707 (dio
->refs
& (HAMMER2_DIO_MASK
| HAMMER2_DIO_INPROG
)) == 0);
708 kfree(dio
, M_HAMMER2
);
709 atomic_add_int(&hammer2_dio_count
, -1);
710 atomic_add_int(&hmp
->iofree_count
, -1);
715 * Returns a pointer to the requested data.
718 hammer2_io_data(hammer2_io_t
*dio
, off_t lbase
)
724 KKASSERT(bp
!= NULL
);
725 off
= (lbase
& ~HAMMER2_OFF_MASK_RADIX
) - bp
->b_loffset
;
726 KKASSERT(off
>= 0 && off
< bp
->b_bufsize
);
727 return(bp
->b_data
+ off
);
732 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
733 * in the chain structure, but chain structure needs to be persistent as
734 * well on refs=0 and it isn't.
737 hammer2_io_crc_good(hammer2_chain_t
*chain
, uint64_t *maskp
)
742 if ((dio
= chain
->dio
) != NULL
&& chain
->bytes
>= 1024) {
743 mask
= hammer2_io_mask(dio
, chain
->bref
.data_off
, chain
->bytes
);
745 if ((dio
->crc_good_mask
& mask
) == mask
)
755 hammer2_io_crc_setmask(hammer2_io_t
*dio
, uint64_t mask
)
758 if (sizeof(long) == 8) {
759 atomic_set_long(&dio
->crc_good_mask
, mask
);
761 #if _BYTE_ORDER == _LITTLE_ENDIAN
762 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
764 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
765 (uint32_t)(mask
>> 32));
767 atomic_set_int(&((int *)&dio
->crc_good_mask
)[0],
768 (uint32_t)(mask
>> 32));
769 atomic_set_int(&((int *)&dio
->crc_good_mask
)[1],
777 hammer2_io_crc_clrmask(hammer2_io_t
*dio
, uint64_t mask
)
780 if (sizeof(long) == 8) {
781 atomic_clear_long(&dio
->crc_good_mask
, mask
);
783 #if _BYTE_ORDER == _LITTLE_ENDIAN
784 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
786 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
787 (uint32_t)(mask
>> 32));
789 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[0],
790 (uint32_t)(mask
>> 32));
791 atomic_clear_int(&((int *)&dio
->crc_good_mask
)[1],
800 * Helpers for hammer2_io_new*() functions
804 hammer2_iocb_new_callback(hammer2_iocb_t
*iocb
)
806 hammer2_io_t
*dio
= iocb
->dio
;
807 int gbctl
= (iocb
->flags
& HAMMER2_IOCB_QUICK
) ? GETBLK_NOWAIT
: 0;
810 * If IOCB_INPROG is not set the dio already has a good buffer and we
811 * can't mess with it other than zero the requested range.
813 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
814 * do what needs to be done with dio->bp.
816 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
817 if ((iocb
->flags
& HAMMER2_IOCB_READ
) == 0) {
818 if (iocb
->lsize
== dio
->psize
) {
820 * Fully covered buffer, try to optimize to
821 * avoid any I/O. We might already have the
822 * buffer due to iocb chaining.
824 if (dio
->bp
== NULL
) {
825 dio
->bp
= getblk(dio
->hmp
->devvp
,
826 dio
->pbase
, dio
->psize
,
830 vfs_bio_clrbuf(dio
->bp
);
831 dio
->bp
->b_flags
|= B_CACHE
;
835 * Invalidation is ok on newly allocated
836 * buffers which cover the entire buffer.
837 * Flag will be cleared on use by the de-dup
840 * hammer2_chain_modify() also checks this flag.
842 * QUICK mode is used by the freemap code to
843 * pre-validate a junk buffer to prevent an
844 * unnecessary read I/O. We do NOT want
845 * to set INVALOK in that situation as the
846 * underlying allocations may be smaller.
848 if ((iocb
->flags
& HAMMER2_IOCB_QUICK
) == 0) {
849 atomic_set_64(&dio
->refs
,
850 HAMMER2_DIO_INVALOK
);
852 } else if (iocb
->flags
& HAMMER2_IOCB_QUICK
) {
854 * Partial buffer, quick mode. Do nothing.
855 * Do not instantiate the buffer or try to
856 * mark it B_CACHE because other portions of
857 * the buffer might have to be read by other
860 } else if (dio
->bp
== NULL
||
861 (dio
->bp
->b_flags
& B_CACHE
) == 0) {
863 * Partial buffer, normal mode, requires
864 * read-before-write. Chain the read.
866 * We might already have the buffer due to
867 * iocb chaining. XXX unclear if we really
868 * need to write/release it and reacquire
871 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
874 if (dio
->refs
& HAMMER2_DIO_DIRTY
) {
875 dio_write_stats_update(dio
);
882 atomic_set_int(&iocb
->flags
, HAMMER2_IOCB_READ
);
883 breadcb(dio
->hmp
->devvp
,
884 dio
->pbase
, dio
->psize
,
885 hammer2_io_callback
, iocb
);
887 } /* else buffer is good */
888 } /* else callback from breadcb is complete */
891 if (iocb
->flags
& HAMMER2_IOCB_ZERO
)
892 bzero(hammer2_io_data(dio
, iocb
->lbase
), iocb
->lsize
);
893 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
895 hammer2_io_complete(iocb
);
900 _hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
901 hammer2_io_t
**diop
, int flags
)
905 iocb
.callback
= hammer2_iocb_new_callback
;
914 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
915 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
916 hammer2_iocb_wait(&iocb
);
923 hammer2_io_new(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
926 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
,
927 diop
, HAMMER2_IOCB_ZERO
));
931 hammer2_io_newnz(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
934 return(_hammer2_io_new(hmp
, btype
, lbase
, lsize
, diop
, 0));
938 * This is called from the freemap to pre-validate a full-sized buffer
939 * whos contents we don't care about, in order to prevent an unnecessary
943 hammer2_io_newq(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
)
945 hammer2_io_t
*dio
= NULL
;
947 _hammer2_io_new(hmp
, btype
, lbase
, lsize
, &dio
, HAMMER2_IOCB_QUICK
);
948 hammer2_io_bqrelse(&dio
);
953 hammer2_iocb_bread_callback(hammer2_iocb_t
*iocb
)
955 hammer2_io_t
*dio
= iocb
->dio
;
960 * If IOCB_INPROG is not set the dio already has a good buffer and we
961 * can't mess with it other than zero the requested range.
963 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
964 * do what needs to be done with dio->bp.
966 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
969 if (dio
->bp
&& (dio
->bp
->b_flags
& B_CACHE
)) {
971 * Already good, likely due to being chained from
975 } else if ((hce
= hammer2_cluster_read
) > 0) {
977 * Synchronous cluster I/O for now.
983 peof
= (dio
->pbase
+ HAMMER2_SEGMASK64
) &
985 error
= cluster_read(dio
->hmp
->devvp
, peof
, dio
->pbase
,
987 dio
->psize
, HAMMER2_PBUFSIZE
*hce
,
991 * Synchronous I/O for now.
997 error
= bread(dio
->hmp
->devvp
, dio
->pbase
,
998 dio
->psize
, &dio
->bp
);
1005 hammer2_io_complete(iocb
);
1009 hammer2_io_bread(hammer2_dev_t
*hmp
, int btype
, off_t lbase
, int lsize
,
1010 hammer2_io_t
**diop
)
1012 hammer2_iocb_t iocb
;
1014 iocb
.callback
= hammer2_iocb_bread_callback
;
1015 iocb
.cluster
= NULL
;
1023 hammer2_io_getblk(hmp
, lbase
, lsize
, &iocb
);
1024 if ((iocb
.flags
& HAMMER2_IOCB_DONE
) == 0)
1025 hammer2_iocb_wait(&iocb
);
1028 return (iocb
.error
);
1032 * System buf/bio async callback extracts the iocb and chains
1033 * to the iocb callback.
1036 hammer2_io_callback(struct bio
*bio
)
1038 struct buf
*dbp
= bio
->bio_buf
;
1039 hammer2_iocb_t
*iocb
= bio
->bio_caller_info1
.ptr
;
1043 if ((bio
->bio_flags
& BIO_DONE
) == 0)
1045 bio
->bio_flags
&= ~(BIO_DONE
| BIO_SYNC
);
1046 dio
->bp
= bio
->bio_buf
;
1047 iocb
->callback(iocb
);
1051 hammer2_io_bawrite(hammer2_io_t
**diop
)
1053 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1054 hammer2_io_putblk(diop
);
1058 hammer2_io_bdwrite(hammer2_io_t
**diop
)
1060 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1061 hammer2_io_putblk(diop
);
1065 hammer2_io_bwrite(hammer2_io_t
**diop
)
1067 atomic_set_64(&(*diop
)->refs
, HAMMER2_DIO_DIRTY
);
1068 hammer2_io_putblk(diop
);
1069 return (0); /* XXX */
1073 hammer2_io_setdirty(hammer2_io_t
*dio
)
1075 atomic_set_64(&dio
->refs
, HAMMER2_DIO_DIRTY
);
1079 * Request an invalidation. The hammer2_io code will oblige only if
1080 * DIO_INVALOK is also set. INVALOK is cleared if the dio is used
1081 * in a dedup lookup and prevents invalidation of the dirty buffer.
1084 hammer2_io_setinval(hammer2_io_t
*dio
, hammer2_off_t off
, u_int bytes
)
1086 if ((u_int
)dio
->psize
== bytes
)
1087 atomic_set_64(&dio
->refs
, HAMMER2_DIO_INVAL
);
1091 hammer2_io_brelse(hammer2_io_t
**diop
)
1093 hammer2_io_putblk(diop
);
1097 hammer2_io_bqrelse(hammer2_io_t
**diop
)
1099 hammer2_io_putblk(diop
);
1103 hammer2_io_isdirty(hammer2_io_t
*dio
)
1105 return((dio
->refs
& HAMMER2_DIO_DIRTY
) != 0);
1110 dio_write_stats_update(hammer2_io_t
*dio
)
1114 switch(dio
->btype
) {
1117 case HAMMER2_BREF_TYPE_DATA
:
1118 counterp
= &hammer2_iod_file_write
;
1120 case HAMMER2_BREF_TYPE_INODE
:
1121 counterp
= &hammer2_iod_meta_write
;
1123 case HAMMER2_BREF_TYPE_INDIRECT
:
1124 counterp
= &hammer2_iod_indr_write
;
1126 case HAMMER2_BREF_TYPE_FREEMAP_NODE
:
1127 case HAMMER2_BREF_TYPE_FREEMAP_LEAF
:
1128 counterp
= &hammer2_iod_fmap_write
;
1131 counterp
= &hammer2_iod_volu_write
;
1134 *counterp
+= dio
->psize
;