hammer2 - Clean DIO invalidation flags in more cases
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
blobc547e4ea9e463072aba22a1b7b01f92515769ee6
1 /*
2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include "hammer2.h"
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used for OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
45 static void dio_write_stats_update(hammer2_io_t *dio);
47 static int
48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
50 if (io1->pbase < io2->pbase)
51 return(-1);
52 if (io1->pbase > io2->pbase)
53 return(1);
54 return(0);
57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
59 off_t, pbase);
61 struct hammer2_cleanupcb_info {
62 struct hammer2_io_tree tmptree;
63 int count;
66 static __inline
67 uint64_t
68 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
70 uint64_t mask;
71 int i;
73 if (bytes < 1024) /* smaller chunks not supported */
74 return 0;
77 * Calculate crc check mask for larger chunks
79 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
80 HAMMER2_PBUFMASK) >> 10;
81 if (i == 0 && bytes == HAMMER2_PBUFSIZE)
82 return((uint64_t)-1);
83 mask = ((uint64_t)1U << (bytes >> 10)) - 1;
84 mask <<= i;
86 return mask;
89 #define HAMMER2_GETBLK_GOOD 0
90 #define HAMMER2_GETBLK_QUEUED 1
91 #define HAMMER2_GETBLK_OWNED 2
94 * Allocate/Locate the requested dio, reference it, issue or queue iocb.
96 void
97 hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
98 hammer2_iocb_t *iocb)
100 hammer2_io_t *dio;
101 hammer2_io_t *xio;
102 off_t pbase;
103 off_t pmask;
105 * XXX after free, buffer reuse case w/ different size can clash
106 * with dio cache. Lets avoid it for now. Ultimate we need to
107 * invalidate the dio cache when freeing blocks to allow a mix
108 * of 16KB and 64KB block sizes).
110 /*int psize = hammer2_devblksize(lsize);*/
111 int psize = HAMMER2_PBUFSIZE;
112 uint64_t refs;
114 pmask = ~(hammer2_off_t)(psize - 1);
116 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
117 lbase &= ~HAMMER2_OFF_MASK_RADIX;
118 pbase = lbase & pmask;
119 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
120 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
121 pbase, lbase, lsize, pmask);
123 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
126 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
128 hammer2_spin_sh(&hmp->io_spin);
129 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
130 if (dio) {
131 if ((atomic_fetchadd_64(&dio->refs, 1) &
132 HAMMER2_DIO_MASK) == 0) {
133 atomic_add_int(&dio->hmp->iofree_count, -1);
135 hammer2_spin_unsh(&hmp->io_spin);
136 } else {
137 hammer2_spin_unsh(&hmp->io_spin);
138 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
139 dio->hmp = hmp;
140 dio->pbase = pbase;
141 dio->psize = psize;
142 dio->btype = iocb->btype;
143 dio->refs = 1;
144 hammer2_spin_init(&dio->spin, "h2dio");
145 TAILQ_INIT(&dio->iocbq);
146 hammer2_spin_ex(&hmp->io_spin);
147 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
148 if (xio == NULL) {
149 atomic_add_int(&hammer2_dio_count, 1);
150 hammer2_spin_unex(&hmp->io_spin);
151 } else {
152 if ((atomic_fetchadd_64(&xio->refs, 1) &
153 HAMMER2_DIO_MASK) == 0) {
154 atomic_add_int(&xio->hmp->iofree_count, -1);
156 hammer2_spin_unex(&hmp->io_spin);
157 kfree(dio, M_HAMMER2);
158 dio = xio;
163 * Obtain/Validate the buffer.
165 iocb->dio = dio;
167 if (dio->act < 5) /* SMP race ok */
168 ++dio->act;
170 for (;;) {
171 refs = dio->refs;
172 cpu_ccfence();
175 * Issue the iocb immediately if the buffer is already good.
176 * Once set GOOD cannot be cleared until refs drops to 0.
178 * lfence required because dio's are not interlocked for
179 * the DIO_GOOD test.
181 if (refs & HAMMER2_DIO_GOOD) {
182 cpu_lfence();
183 iocb->callback(iocb);
184 break;
188 * Try to own the DIO by setting INPROG so we can issue
189 * I/O on it.
191 if (refs & HAMMER2_DIO_INPROG) {
193 * If DIO_INPROG is already set then set WAITING and
194 * queue the iocb.
196 hammer2_spin_ex(&dio->spin);
197 if (atomic_cmpset_64(&dio->refs, refs,
198 refs | HAMMER2_DIO_WAITING)) {
199 iocb->flags |= HAMMER2_IOCB_ONQ |
200 HAMMER2_IOCB_INPROG;
201 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry);
202 hammer2_spin_unex(&dio->spin);
203 break;
205 hammer2_spin_unex(&dio->spin);
206 /* retry */
207 } else {
209 * If DIO_INPROG is not set then set it and issue the
210 * callback immediately to start I/O.
212 if (atomic_cmpset_64(&dio->refs, refs,
213 refs | HAMMER2_DIO_INPROG)) {
214 iocb->flags |= HAMMER2_IOCB_INPROG;
215 iocb->callback(iocb);
216 break;
218 /* retry */
220 /* retry */
225 * Quickly obtain a good DIO buffer, return NULL if the system no longer
226 * caches the data.
228 hammer2_io_t *
229 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
231 hammer2_iocb_t iocb;
232 hammer2_io_t *dio;
233 struct buf *bp;
234 off_t pbase;
235 off_t pmask;
236 int psize = HAMMER2_PBUFSIZE;
237 uint64_t orefs;
238 uint64_t nrefs;
240 pmask = ~(hammer2_off_t)(psize - 1);
242 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
243 lbase &= ~HAMMER2_OFF_MASK_RADIX;
244 pbase = lbase & pmask;
245 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
246 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
247 pbase, lbase, lsize, pmask);
249 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
252 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
254 hammer2_spin_sh(&hmp->io_spin);
255 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
256 if (dio == NULL) {
257 hammer2_spin_unsh(&hmp->io_spin);
258 return NULL;
261 if ((atomic_fetchadd_64(&dio->refs, 1) & HAMMER2_DIO_MASK) == 0)
262 atomic_add_int(&dio->hmp->iofree_count, -1);
263 hammer2_spin_unsh(&hmp->io_spin);
265 if (dio->act < 5) /* SMP race ok */
266 ++dio->act;
269 * Obtain/validate the buffer. Do NOT issue I/O. Discard if
270 * the system does not have the data already cached.
272 nrefs = (uint64_t)-1;
273 for (;;) {
274 orefs = dio->refs;
275 cpu_ccfence();
278 * Issue the iocb immediately if the buffer is already good.
279 * Once set GOOD cannot be cleared until refs drops to 0.
281 * lfence required because dio is not interlockedf for
282 * the DIO_GOOD test.
284 if (orefs & HAMMER2_DIO_GOOD) {
285 cpu_lfence();
286 break;
290 * Try to own the DIO by setting INPROG so we can issue
291 * I/O on it. INPROG might already be set, in which case
292 * there is no way we can do this non-blocking so we punt.
294 if ((orefs & HAMMER2_DIO_INPROG))
295 break;
296 nrefs = orefs | HAMMER2_DIO_INPROG;
297 if (atomic_cmpset_64(&dio->refs, orefs, nrefs) == 0)
298 continue;
301 * We own DIO_INPROG, try to set DIO_GOOD.
303 * For now do not use GETBLK_NOWAIT because
305 bp = dio->bp;
306 dio->bp = NULL;
307 if (bp == NULL) {
308 #if 0
309 bp = getblk(hmp->devvp, dio->pbase, dio->psize, 0, 0);
310 #endif
311 bread(hmp->devvp, dio->pbase, dio->psize, &bp);
315 * System buffer must also have remained cached.
317 if (bp) {
318 if ((bp->b_flags & B_ERROR) == 0 &&
319 (bp->b_flags & B_CACHE)) {
320 dio->bp = bp; /* assign BEFORE setting flag */
321 atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD);
322 } else {
323 bqrelse(bp);
324 bp = NULL;
329 * Clear DIO_INPROG.
331 * This is actually a bit complicated, see
332 * hammer2_io_complete() for more information.
334 iocb.dio = dio;
335 iocb.flags = HAMMER2_IOCB_INPROG;
336 hammer2_io_complete(&iocb);
337 break;
341 * Only return the dio if its buffer is good. If the buffer is not
342 * good be sure to clear INVALOK, meaning that invalidation is no
343 * longer acceptable
345 if ((dio->refs & HAMMER2_DIO_GOOD) == 0) {
346 hammer2_io_putblk(&dio);
348 return dio;
352 * Make sure that all invalidation flags are cleared on the dio associated
353 * with the specified data offset, if the dio exists.
355 * Called from bulkfree when a block becomes reusable to ensure that new
356 * allocations do not accidently discard the buffer later on.
358 void
359 hammer2_io_resetinval(hammer2_dev_t *hmp, off_t data_off)
361 hammer2_io_t *dio;
363 data_off &= ~HAMMER2_PBUFMASK64;
364 hammer2_spin_sh(&hmp->io_spin);
365 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, data_off);
366 if (dio)
367 atomic_clear_64(&dio->refs, HAMMER2_DIO_INVALBITS);
368 hammer2_spin_unsh(&hmp->io_spin);
372 * The originator of the iocb is finished with it.
374 void
375 hammer2_io_complete(hammer2_iocb_t *iocb)
377 hammer2_io_t *dio = iocb->dio;
378 hammer2_iocb_t *cbtmp;
379 uint64_t orefs;
380 uint64_t nrefs;
381 uint32_t oflags;
382 uint32_t nflags;
385 * If IOCB_INPROG was not set completion is synchronous due to the
386 * buffer already being good. We can simply set IOCB_DONE and return.
388 * In this situation DIO_INPROG is not set and we have no visibility
389 * on dio->bp. We should not try to mess with dio->bp because another
390 * thread may be finishing up its processing. dio->bp should already
391 * be set to BUF_KERNPROC()!
393 if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) {
394 atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE);
395 return;
399 * The iocb was queued, obtained DIO_INPROG, and its callback was
400 * made. The callback is now complete. We still own DIO_INPROG.
402 * We can set DIO_GOOD if no error occurred, which gives certain
403 * stability guarantees to dio->bp and allows other accessors to
404 * short-cut access. DIO_GOOD cannot be cleared until the last
405 * ref is dropped.
407 KKASSERT(dio->refs & HAMMER2_DIO_INPROG);
408 if (dio->bp) {
409 BUF_KERNPROC(dio->bp);
410 if ((dio->bp->b_flags & B_ERROR) == 0) {
411 KKASSERT(dio->bp->b_flags & B_CACHE);
412 atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD);
417 * Clean up the dio before marking the iocb as being done. If another
418 * iocb is pending we chain to it while leaving DIO_INPROG set (it
419 * will call io completion and presumably clear DIO_INPROG).
421 * Otherwise if no other iocbs are pending we clear DIO_INPROG before
422 * finishing up the cbio. This means that DIO_INPROG is cleared at
423 * the end of the chain before ANY of the cbios are marked done.
425 * NOTE: The TAILQ is not stable until the spin-lock is held.
427 for (;;) {
428 orefs = dio->refs;
429 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG);
431 if (orefs & HAMMER2_DIO_WAITING) {
432 hammer2_spin_ex(&dio->spin);
433 cbtmp = TAILQ_FIRST(&dio->iocbq);
434 if (cbtmp) {
436 * NOTE: flags not adjusted in this case.
437 * Flags will be adjusted by the last
438 * iocb.
440 TAILQ_REMOVE(&dio->iocbq, cbtmp, entry);
441 hammer2_spin_unex(&dio->spin);
442 cbtmp->callback(cbtmp); /* chained */
443 break;
444 } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
445 hammer2_spin_unex(&dio->spin);
446 break;
448 hammer2_spin_unex(&dio->spin);
449 /* retry */
450 } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
451 break;
452 } /* else retry */
453 /* retry */
457 * Mark the iocb as done and wakeup any waiters. This is done after
458 * all iocb chains have been called back and after DIO_INPROG has been
459 * cleared. This avoids races against ref count drops by the waiting
460 * threads (a hard but not impossible SMP race) which might result in
461 * a 1->0 transition of the refs while DIO_INPROG is still set.
463 for (;;) {
464 oflags = iocb->flags;
465 cpu_ccfence();
466 nflags = oflags;
467 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG);
468 nflags |= HAMMER2_IOCB_DONE;
470 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
471 if (oflags & HAMMER2_IOCB_WAKEUP)
472 wakeup(iocb);
473 /* SMP: iocb is now stale */
474 break;
476 /* retry */
478 iocb = NULL;
483 * Wait for an iocb's I/O to finish.
485 void
486 hammer2_iocb_wait(hammer2_iocb_t *iocb)
488 uint32_t oflags;
489 uint32_t nflags;
491 for (;;) {
492 oflags = iocb->flags;
493 cpu_ccfence();
494 nflags = oflags | HAMMER2_IOCB_WAKEUP;
495 if (oflags & HAMMER2_IOCB_DONE)
496 break;
497 tsleep_interlock(iocb, 0);
498 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
499 tsleep(iocb, PINTERLOCKED, "h2iocb", hz);
506 * Release our ref on *diop.
508 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
509 * then dispose of the underlying buffer.
511 void
512 hammer2_io_putblk(hammer2_io_t **diop)
514 hammer2_dev_t *hmp;
515 hammer2_io_t *dio;
516 hammer2_iocb_t iocb;
517 struct buf *bp;
518 off_t peof;
519 off_t pbase;
520 int psize;
521 uint64_t orefs;
522 uint64_t nrefs;
524 dio = *diop;
525 *diop = NULL;
526 hmp = dio->hmp;
528 while (dio->unused01) {
529 tsleep(&dio->unused01, 0, "h2DEBUG", hz);
533 * Drop refs.
535 * On the 1->0 transition clear flags and set INPROG.
537 * On the 1->0 transition if INPROG is already set, another thread
538 * is in lastdrop and we can just return after the transition.
540 * On any other transition we can generally just return.
542 for (;;) {
543 orefs = dio->refs;
544 cpu_ccfence();
545 nrefs = orefs - 1;
547 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
548 (orefs & HAMMER2_DIO_INPROG) == 0) {
550 * Lastdrop case, INPROG can be set.
552 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
553 nrefs &= ~(HAMMER2_DIO_INVAL);
554 nrefs |= HAMMER2_DIO_INPROG;
555 if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
556 break;
557 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
559 * Lastdrop case, INPROG already set.
561 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
562 atomic_add_int(&hmp->iofree_count, 1);
563 return;
565 } else {
567 * Normal drop case.
569 if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
570 return;
572 cpu_pause();
573 /* retry */
577 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
578 * have been cleared.
580 * We can now dispose of the buffer, and should do it before calling
581 * io_complete() in case there's a race against a new reference
582 * which causes io_complete() to chain and instantiate the bp again.
584 pbase = dio->pbase;
585 psize = dio->psize;
586 bp = dio->bp;
587 dio->bp = NULL;
589 if (orefs & HAMMER2_DIO_GOOD) {
590 KKASSERT(bp != NULL);
591 #if 1
592 if (hammer2_inval_enable &&
593 (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) {
594 ++hammer2_iod_invals;
595 bp->b_flags |= B_INVAL | B_RELBUF;
596 brelse(bp);
597 } else
598 #endif
599 if (orefs & HAMMER2_DIO_DIRTY) {
600 int hce;
602 dio_write_stats_update(dio);
603 if ((hce = hammer2_cluster_write) > 0) {
605 * Allows write-behind to keep the buffer
606 * cache sane.
608 peof = (pbase + HAMMER2_SEGMASK64) &
609 ~HAMMER2_SEGMASK64;
610 bp->b_flags |= B_CLUSTEROK;
611 cluster_write(bp, peof, psize, hce);
612 } else {
614 * Allows dirty buffers to accumulate and
615 * possibly be canceled (e.g. by a 'rm'),
616 * will burst-write later.
618 bp->b_flags |= B_CLUSTEROK;
619 bdwrite(bp);
621 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
622 brelse(bp);
623 } else {
624 bqrelse(bp);
626 } else if (bp) {
627 #if 1
628 if (hammer2_inval_enable &&
629 (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) {
630 ++hammer2_iod_invals;
631 bp->b_flags |= B_INVAL | B_RELBUF;
632 brelse(bp);
633 } else
634 #endif
635 if (orefs & HAMMER2_DIO_DIRTY) {
636 dio_write_stats_update(dio);
637 bdwrite(bp);
638 } else {
639 brelse(bp);
644 * The instant we call io_complete dio is a free agent again and
645 * can be ripped out from under us.
647 * we can cleanup our final DIO_INPROG by simulating an iocb
648 * completion.
650 hmp = dio->hmp; /* extract fields */
651 atomic_add_int(&hmp->iofree_count, 1);
652 cpu_ccfence();
654 iocb.dio = dio;
655 iocb.flags = HAMMER2_IOCB_INPROG;
656 hammer2_io_complete(&iocb);
657 dio = NULL; /* dio stale */
660 * We cache free buffers so re-use cases can use a shared lock, but
661 * if too many build up we have to clean them out.
663 if (hmp->iofree_count > 65536) {
664 struct hammer2_cleanupcb_info info;
666 RB_INIT(&info.tmptree);
667 hammer2_spin_ex(&hmp->io_spin);
668 if (hmp->iofree_count > 65536) {
669 info.count = hmp->iofree_count / 4;
670 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
671 hammer2_io_cleanup_callback, &info);
673 hammer2_spin_unex(&hmp->io_spin);
674 hammer2_io_cleanup(hmp, &info.tmptree);
679 * Cleanup any dio's with (INPROG | refs) == 0.
681 * Called to clean up cached DIOs on umount after all activity has been
682 * flushed.
684 static
686 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
688 struct hammer2_cleanupcb_info *info = arg;
689 hammer2_io_t *xio;
691 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
692 if (dio->act > 0) {
693 --dio->act;
694 return 0;
696 KKASSERT(dio->bp == NULL);
697 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
698 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
699 KKASSERT(xio == NULL);
700 if (--info->count <= 0) /* limit scan */
701 return(-1);
703 return 0;
706 void
707 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
709 hammer2_io_t *dio;
711 while ((dio = RB_ROOT(tree)) != NULL) {
712 RB_REMOVE(hammer2_io_tree, tree, dio);
713 KKASSERT(dio->bp == NULL &&
714 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
715 kfree(dio, M_HAMMER2);
716 atomic_add_int(&hammer2_dio_count, -1);
717 atomic_add_int(&hmp->iofree_count, -1);
722 * Returns a pointer to the requested data.
724 char *
725 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
727 struct buf *bp;
728 int off;
730 bp = dio->bp;
731 KKASSERT(bp != NULL);
732 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
733 KKASSERT(off >= 0 && off < bp->b_bufsize);
734 return(bp->b_data + off);
737 #if 0
739 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
740 * in the chain structure, but chain structure needs to be persistent as
741 * well on refs=0 and it isn't.
744 hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp)
746 hammer2_io_t *dio;
747 uint64_t mask;
749 if ((dio = chain->dio) != NULL && chain->bytes >= 1024) {
750 mask = hammer2_io_mask(dio, chain->bref.data_off, chain->bytes);
751 *maskp = mask;
752 if ((dio->crc_good_mask & mask) == mask)
753 return 1;
754 return 0;
756 *maskp = 0;
758 return 0;
761 void
762 hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask)
764 if (dio) {
765 if (sizeof(long) == 8) {
766 atomic_set_long(&dio->crc_good_mask, mask);
767 } else {
768 #if _BYTE_ORDER == _LITTLE_ENDIAN
769 atomic_set_int(&((int *)&dio->crc_good_mask)[0],
770 (uint32_t)mask);
771 atomic_set_int(&((int *)&dio->crc_good_mask)[1],
772 (uint32_t)(mask >> 32));
773 #else
774 atomic_set_int(&((int *)&dio->crc_good_mask)[0],
775 (uint32_t)(mask >> 32));
776 atomic_set_int(&((int *)&dio->crc_good_mask)[1],
777 (uint32_t)mask);
778 #endif
783 void
784 hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask)
786 if (dio) {
787 if (sizeof(long) == 8) {
788 atomic_clear_long(&dio->crc_good_mask, mask);
789 } else {
790 #if _BYTE_ORDER == _LITTLE_ENDIAN
791 atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
792 (uint32_t)mask);
793 atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
794 (uint32_t)(mask >> 32));
795 #else
796 atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
797 (uint32_t)(mask >> 32));
798 atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
799 (uint32_t)mask);
800 #endif
804 #endif
807 * Helpers for hammer2_io_new*() functions
809 static
810 void
811 hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
813 hammer2_io_t *dio = iocb->dio;
814 int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0;
817 * If IOCB_INPROG is not set the dio already has a good buffer and we
818 * can't mess with it other than zero the requested range.
820 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
821 * do what needs to be done with dio->bp.
823 if (iocb->flags & HAMMER2_IOCB_INPROG) {
824 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
825 if (iocb->lsize == dio->psize) {
827 * Fully covered buffer, try to optimize to
828 * avoid any I/O. We might already have the
829 * buffer due to iocb chaining.
831 if (dio->bp == NULL) {
832 dio->bp = getblk(dio->hmp->devvp,
833 dio->pbase, dio->psize,
834 gbctl, 0);
836 if (dio->bp) {
837 vfs_bio_clrbuf(dio->bp);
838 dio->bp->b_flags |= B_CACHE;
842 * Invalidation is ok on newly allocated
843 * buffers which cover the entire buffer.
844 * Flag will be cleared on use by the de-dup
845 * code.
847 * hammer2_chain_modify() also checks this flag.
849 * QUICK mode is used by the freemap code to
850 * pre-validate a junk buffer to prevent an
851 * unnecessary read I/O. We do NOT want
852 * to set INVALOK in that situation as the
853 * underlying allocations may be smaller.
855 if ((iocb->flags & HAMMER2_IOCB_QUICK) == 0) {
856 atomic_set_64(&dio->refs,
857 HAMMER2_DIO_INVALOK);
859 } else if (iocb->flags & HAMMER2_IOCB_QUICK) {
861 * Partial buffer, quick mode. Do nothing.
862 * Do not instantiate the buffer or try to
863 * mark it B_CACHE because other portions of
864 * the buffer might have to be read by other
865 * accessors.
867 } else if (dio->bp == NULL ||
868 (dio->bp->b_flags & B_CACHE) == 0) {
870 * Partial buffer, normal mode, requires
871 * read-before-write. Chain the read.
873 * We might already have the buffer due to
874 * iocb chaining. XXX unclear if we really
875 * need to write/release it and reacquire
876 * in that case.
878 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
880 if (dio->bp) {
881 if (dio->refs & HAMMER2_DIO_DIRTY) {
882 dio_write_stats_update(dio);
883 bdwrite(dio->bp);
884 } else {
885 bqrelse(dio->bp);
887 dio->bp = NULL;
889 atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ);
890 breadcb(dio->hmp->devvp,
891 dio->pbase, dio->psize,
892 hammer2_io_callback, iocb);
893 return;
894 } /* else buffer is good */
895 } /* else callback from breadcb is complete */
897 if (dio->bp) {
898 if (iocb->flags & HAMMER2_IOCB_ZERO)
899 bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize);
900 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
902 hammer2_io_complete(iocb);
905 static
907 _hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
908 hammer2_io_t **diop, int flags)
910 hammer2_iocb_t iocb;
912 iocb.callback = hammer2_iocb_new_callback;
913 iocb.chain = NULL;
914 iocb.ptr = NULL;
915 iocb.lbase = lbase;
916 iocb.lsize = lsize;
917 iocb.flags = flags;
918 iocb.btype = btype;
919 iocb.error = 0;
920 hammer2_io_getblk(hmp, lbase, lsize, &iocb);
921 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
922 hammer2_iocb_wait(&iocb);
923 *diop = iocb.dio;
925 return (iocb.error);
929 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
930 hammer2_io_t **diop)
932 return(_hammer2_io_new(hmp, btype, lbase, lsize,
933 diop, HAMMER2_IOCB_ZERO));
937 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
938 hammer2_io_t **diop)
940 return(_hammer2_io_new(hmp, btype, lbase, lsize, diop, 0));
944 * This is called from the freemap to pre-validate a full-sized buffer
945 * whos contents we don't care about, in order to prevent an unnecessary
946 * read-before-write.
948 void
949 hammer2_io_newq(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize)
951 hammer2_io_t *dio = NULL;
953 _hammer2_io_new(hmp, btype, lbase, lsize, &dio, HAMMER2_IOCB_QUICK);
954 hammer2_io_bqrelse(&dio);
957 static
958 void
959 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
961 hammer2_io_t *dio = iocb->dio;
962 off_t peof;
963 int error;
966 * If IOCB_INPROG is not set the dio already has a good buffer and we
967 * can't mess with it other than zero the requested range.
969 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
970 * do what needs to be done with dio->bp.
972 if (iocb->flags & HAMMER2_IOCB_INPROG) {
973 int hce;
975 if (dio->bp && (dio->bp->b_flags & B_CACHE)) {
977 * Already good, likely due to being chained from
978 * another iocb.
980 error = 0;
981 } else if ((hce = hammer2_cluster_read) > 0) {
983 * Synchronous cluster I/O for now.
985 if (dio->bp) {
986 bqrelse(dio->bp);
987 dio->bp = NULL;
989 peof = (dio->pbase + HAMMER2_SEGMASK64) &
990 ~HAMMER2_SEGMASK64;
991 error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
992 dio->psize,
993 dio->psize, HAMMER2_PBUFSIZE*hce,
994 &dio->bp);
995 } else {
997 * Synchronous I/O for now.
999 if (dio->bp) {
1000 bqrelse(dio->bp);
1001 dio->bp = NULL;
1003 error = bread(dio->hmp->devvp, dio->pbase,
1004 dio->psize, &dio->bp);
1006 if (error) {
1007 brelse(dio->bp);
1008 dio->bp = NULL;
1011 hammer2_io_complete(iocb);
1015 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
1016 hammer2_io_t **diop)
1018 hammer2_iocb_t iocb;
1020 iocb.callback = hammer2_iocb_bread_callback;
1021 iocb.chain = NULL;
1022 iocb.ptr = NULL;
1023 iocb.lbase = lbase;
1024 iocb.lsize = lsize;
1025 iocb.btype = btype;
1026 iocb.flags = 0;
1027 iocb.error = 0;
1028 hammer2_io_getblk(hmp, lbase, lsize, &iocb);
1029 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
1030 hammer2_iocb_wait(&iocb);
1031 *diop = iocb.dio;
1033 return (iocb.error);
1037 * System buf/bio async callback extracts the iocb and chains
1038 * to the iocb callback.
1040 void
1041 hammer2_io_callback(struct bio *bio)
1043 struct buf *dbp = bio->bio_buf;
1044 hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr;
1045 hammer2_io_t *dio;
1047 dio = iocb->dio;
1048 if ((bio->bio_flags & BIO_DONE) == 0)
1049 bpdone(dbp, 0);
1050 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
1051 dio->bp = bio->bio_buf;
1052 iocb->callback(iocb);
1055 void
1056 hammer2_io_bawrite(hammer2_io_t **diop)
1058 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
1059 hammer2_io_putblk(diop);
1062 void
1063 hammer2_io_bdwrite(hammer2_io_t **diop)
1065 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
1066 hammer2_io_putblk(diop);
1070 hammer2_io_bwrite(hammer2_io_t **diop)
1072 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
1073 hammer2_io_putblk(diop);
1074 return (0); /* XXX */
1077 void
1078 hammer2_io_setdirty(hammer2_io_t *dio)
1080 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
1084 * Request an invalidation. The hammer2_io code will oblige only if
1085 * DIO_INVALOK is also set. INVALOK is cleared if the dio is used
1086 * in a dedup lookup and prevents invalidation of the dirty buffer.
1088 void
1089 hammer2_io_setinval(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
1091 if ((u_int)dio->psize == bytes)
1092 atomic_set_64(&dio->refs, HAMMER2_DIO_INVAL);
1095 void
1096 hammer2_io_brelse(hammer2_io_t **diop)
1098 hammer2_io_putblk(diop);
1101 void
1102 hammer2_io_bqrelse(hammer2_io_t **diop)
1104 hammer2_io_putblk(diop);
1108 hammer2_io_isdirty(hammer2_io_t *dio)
1110 return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
1113 static
1114 void
1115 dio_write_stats_update(hammer2_io_t *dio)
1117 long *counterp;
1119 switch(dio->btype) {
1120 case 0:
1121 return;
1122 case HAMMER2_BREF_TYPE_DATA:
1123 counterp = &hammer2_iod_file_write;
1124 break;
1125 case HAMMER2_BREF_TYPE_DIRENT:
1126 case HAMMER2_BREF_TYPE_INODE:
1127 counterp = &hammer2_iod_meta_write;
1128 break;
1129 case HAMMER2_BREF_TYPE_INDIRECT:
1130 counterp = &hammer2_iod_indr_write;
1131 break;
1132 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1133 case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
1134 counterp = &hammer2_iod_fmap_write;
1135 break;
1136 default:
1137 counterp = &hammer2_iod_volu_write;
1138 break;
1140 *counterp += dio->psize;