hammer2 - stabilization pass
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
blob8c88fc693281b295832c63cb888b05141efbd1c4
1 /*
2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include "hammer2.h"
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used for OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
45 static void dio_write_stats_update(hammer2_io_t *dio);
47 static int
48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
50 if (io1->pbase < io2->pbase)
51 return(-1);
52 if (io1->pbase > io2->pbase)
53 return(1);
54 return(0);
57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
59 off_t, pbase);
61 struct hammer2_cleanupcb_info {
62 struct hammer2_io_tree tmptree;
63 int count;
66 static __inline
67 uint64_t
68 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
70 uint64_t mask;
71 int i;
73 if (bytes < 1024) /* smaller chunks not supported */
74 return 0;
77 * Calculate crc check mask for larger chunks
79 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
80 HAMMER2_PBUFMASK) >> 10;
81 if (i == 0 && bytes == HAMMER2_PBUFSIZE)
82 return((uint64_t)-1);
83 mask = ((uint64_t)1U << (bytes >> 10)) - 1;
84 mask <<= i;
86 return mask;
89 #define HAMMER2_GETBLK_GOOD 0
90 #define HAMMER2_GETBLK_QUEUED 1
91 #define HAMMER2_GETBLK_OWNED 2
94 * Allocate/Locate the requested dio, reference it, issue or queue iocb.
96 void
97 hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
98 hammer2_iocb_t *iocb)
100 hammer2_io_t *dio;
101 hammer2_io_t *xio;
102 off_t pbase;
103 off_t pmask;
105 * XXX after free, buffer reuse case w/ different size can clash
106 * with dio cache. Lets avoid it for now. Ultimate we need to
107 * invalidate the dio cache when freeing blocks to allow a mix
108 * of 16KB and 64KB block sizes).
110 /*int psize = hammer2_devblksize(lsize);*/
111 int psize = HAMMER2_PBUFSIZE;
112 int refs;
114 pmask = ~(hammer2_off_t)(psize - 1);
116 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
117 lbase &= ~HAMMER2_OFF_MASK_RADIX;
118 pbase = lbase & pmask;
119 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
122 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
124 hammer2_spin_sh(&hmp->io_spin);
125 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
126 if (dio) {
127 if ((atomic_fetchadd_int(&dio->refs, 1) &
128 HAMMER2_DIO_MASK) == 0) {
129 atomic_add_int(&dio->hmp->iofree_count, -1);
131 hammer2_spin_unsh(&hmp->io_spin);
132 } else {
133 hammer2_spin_unsh(&hmp->io_spin);
134 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
135 dio->hmp = hmp;
136 dio->pbase = pbase;
137 dio->psize = psize;
138 dio->btype = iocb->btype;
139 dio->refs = 1;
140 hammer2_spin_init(&dio->spin, "h2dio");
141 TAILQ_INIT(&dio->iocbq);
142 hammer2_spin_ex(&hmp->io_spin);
143 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
144 if (xio == NULL) {
145 atomic_add_int(&hammer2_dio_count, 1);
146 hammer2_spin_unex(&hmp->io_spin);
147 } else {
148 if ((atomic_fetchadd_int(&xio->refs, 1) &
149 HAMMER2_DIO_MASK) == 0) {
150 atomic_add_int(&xio->hmp->iofree_count, -1);
152 hammer2_spin_unex(&hmp->io_spin);
153 kfree(dio, M_HAMMER2);
154 dio = xio;
159 * Obtain/Validate the buffer.
161 iocb->dio = dio;
163 if (dio->act < 5) /* SMP race ok */
164 ++dio->act;
166 for (;;) {
167 refs = dio->refs;
168 cpu_ccfence();
171 * Issue the iocb immediately if the buffer is already good.
172 * Once set GOOD cannot be cleared until refs drops to 0.
174 * lfence required because dio's are not interlocked for
175 * the DIO_GOOD test.
177 if (refs & HAMMER2_DIO_GOOD) {
178 cpu_lfence();
179 iocb->callback(iocb);
180 break;
184 * Try to own the DIO by setting INPROG so we can issue
185 * I/O on it.
187 if (refs & HAMMER2_DIO_INPROG) {
189 * If DIO_INPROG is already set then set WAITING and
190 * queue the iocb.
192 hammer2_spin_ex(&dio->spin);
193 if (atomic_cmpset_int(&dio->refs, refs,
194 refs | HAMMER2_DIO_WAITING)) {
195 iocb->flags |= HAMMER2_IOCB_ONQ |
196 HAMMER2_IOCB_INPROG;
197 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry);
198 hammer2_spin_unex(&dio->spin);
199 break;
201 hammer2_spin_unex(&dio->spin);
202 /* retry */
203 } else {
205 * If DIO_INPROG is not set then set it and issue the
206 * callback immediately to start I/O.
208 if (atomic_cmpset_int(&dio->refs, refs,
209 refs | HAMMER2_DIO_INPROG)) {
210 iocb->flags |= HAMMER2_IOCB_INPROG;
211 iocb->callback(iocb);
212 break;
214 /* retry */
216 /* retry */
221 * Quickly obtain a good DIO buffer, return NULL if the system no longer
222 * caches the data.
224 hammer2_io_t *
225 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
227 hammer2_iocb_t iocb;
228 hammer2_io_t *dio;
229 struct buf *bp;
230 off_t pbase;
231 off_t pmask;
232 int psize = HAMMER2_PBUFSIZE;
233 int orefs;
234 int nrefs;
236 pmask = ~(hammer2_off_t)(psize - 1);
238 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
239 lbase &= ~HAMMER2_OFF_MASK_RADIX;
240 pbase = lbase & pmask;
241 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
244 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
246 hammer2_spin_sh(&hmp->io_spin);
247 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
248 if (dio == NULL) {
249 hammer2_spin_unsh(&hmp->io_spin);
250 return NULL;
253 if ((atomic_fetchadd_int(&dio->refs, 1) & HAMMER2_DIO_MASK) == 0)
254 atomic_add_int(&dio->hmp->iofree_count, -1);
255 hammer2_spin_unsh(&hmp->io_spin);
257 if (dio->act < 5) /* SMP race ok */
258 ++dio->act;
261 * Obtain/validate the buffer. Do NOT issue I/O. Discard if
262 * the system does not have the data already cached.
264 nrefs = -1;
265 for (;;) {
266 orefs = dio->refs;
267 cpu_ccfence();
270 * Issue the iocb immediately if the buffer is already good.
271 * Once set GOOD cannot be cleared until refs drops to 0.
273 * lfence required because dio is not interlockedf for
274 * the DIO_GOOD test.
276 if (orefs & HAMMER2_DIO_GOOD) {
277 cpu_lfence();
278 break;
282 * Try to own the DIO by setting INPROG so we can issue
283 * I/O on it. INPROG might already be set, in which case
284 * there is no way we can do this non-blocking so we punt.
286 if ((orefs & HAMMER2_DIO_INPROG))
287 break;
288 nrefs = orefs | HAMMER2_DIO_INPROG;
289 if (atomic_cmpset_int(&dio->refs, orefs, nrefs) == 0)
290 continue;
293 * We own DIO_INPROG, try to set DIO_GOOD.
295 * For now do not use GETBLK_NOWAIT because
297 bp = dio->bp;
298 dio->bp = NULL;
299 if (bp == NULL) {
300 #if 0
301 bp = getblk(hmp->devvp, dio->pbase, dio->psize, 0, 0);
302 #endif
303 bread(hmp->devvp, dio->pbase, dio->psize, &bp);
305 if (bp) {
306 if ((bp->b_flags & B_ERROR) == 0 &&
307 (bp->b_flags & B_CACHE)) {
308 dio->bp = bp; /* assign BEFORE setting flag */
309 atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD);
310 } else {
311 bqrelse(bp);
312 bp = NULL;
317 * Clear DIO_INPROG.
319 * This is actually a bit complicated, see
320 * hammer2_io_complete() for more information.
322 iocb.dio = dio;
323 iocb.flags = HAMMER2_IOCB_INPROG;
324 hammer2_io_complete(&iocb);
325 break;
329 * Only return the dio if its buffer is good.
331 if ((dio->refs & HAMMER2_DIO_GOOD) == 0) {
332 hammer2_io_putblk(&dio);
334 return dio;
338 * The originator of the iocb is finished with it.
340 void
341 hammer2_io_complete(hammer2_iocb_t *iocb)
343 hammer2_io_t *dio = iocb->dio;
344 hammer2_iocb_t *cbtmp;
345 uint32_t orefs;
346 uint32_t nrefs;
347 uint32_t oflags;
348 uint32_t nflags;
351 * If IOCB_INPROG was not set completion is synchronous due to the
352 * buffer already being good. We can simply set IOCB_DONE and return.
353 * In this situation DIO_INPROG is not set and we have no visibility
354 * on dio->bp.
356 if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) {
357 if (dio->bp)
358 BUF_KERNPROC(dio->bp);
359 atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE);
360 return;
364 * The iocb was queued, obtained DIO_INPROG, and its callback was
365 * made. The callback is now complete. We still own DIO_INPROG.
367 * We can set DIO_GOOD if no error occurred, which gives certain
368 * stability guarantees to dio->bp and allows other accessors to
369 * short-cut access. DIO_GOOD cannot be cleared until the last
370 * ref is dropped.
372 KKASSERT(dio->refs & HAMMER2_DIO_INPROG);
373 if (dio->bp) {
374 BUF_KERNPROC(dio->bp);
375 if ((dio->bp->b_flags & B_ERROR) == 0) {
376 KKASSERT(dio->bp->b_flags & B_CACHE);
377 atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD);
382 * Clean up the dio before marking the iocb as being done. If another
383 * iocb is pending we chain to it while leaving DIO_INPROG set (it
384 * will call io completion and presumably clear DIO_INPROG).
386 * Otherwise if no other iocbs are pending we clear DIO_INPROG before
387 * finishing up the cbio. This means that DIO_INPROG is cleared at
388 * the end of the chain before ANY of the cbios are marked done.
390 * NOTE: The TAILQ is not stable until the spin-lock is held.
392 for (;;) {
393 orefs = dio->refs;
394 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG);
396 if (orefs & HAMMER2_DIO_WAITING) {
397 hammer2_spin_ex(&dio->spin);
398 cbtmp = TAILQ_FIRST(&dio->iocbq);
399 if (cbtmp) {
401 * NOTE: flags not adjusted in this case.
402 * Flags will be adjusted by the last
403 * iocb.
405 TAILQ_REMOVE(&dio->iocbq, cbtmp, entry);
406 hammer2_spin_unex(&dio->spin);
407 cbtmp->callback(cbtmp); /* chained */
408 break;
409 } else if (atomic_cmpset_int(&dio->refs,
410 orefs, nrefs)) {
411 hammer2_spin_unex(&dio->spin);
412 break;
414 hammer2_spin_unex(&dio->spin);
415 /* retry */
416 } else if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) {
417 break;
418 } /* else retry */
419 /* retry */
423 * Mark the iocb as done and wakeup any waiters. This is done after
424 * all iocb chains have been called back and after DIO_INPROG has been
425 * cleared. This avoids races against ref count drops by the waiting
426 * threads (a hard but not impossible SMP race) which might result in
427 * a 1->0 transition of the refs while DIO_INPROG is still set.
429 for (;;) {
430 oflags = iocb->flags;
431 cpu_ccfence();
432 nflags = oflags;
433 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG);
434 nflags |= HAMMER2_IOCB_DONE;
436 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
437 if (oflags & HAMMER2_IOCB_WAKEUP)
438 wakeup(iocb);
439 /* SMP: iocb is now stale */
440 break;
442 /* retry */
444 iocb = NULL;
449 * Wait for an iocb's I/O to finish.
451 void
452 hammer2_iocb_wait(hammer2_iocb_t *iocb)
454 uint32_t oflags;
455 uint32_t nflags;
457 for (;;) {
458 oflags = iocb->flags;
459 cpu_ccfence();
460 nflags = oflags | HAMMER2_IOCB_WAKEUP;
461 if (oflags & HAMMER2_IOCB_DONE)
462 break;
463 tsleep_interlock(iocb, 0);
464 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
465 tsleep(iocb, PINTERLOCKED, "h2iocb", hz);
472 * Release our ref on *diop.
474 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
475 * then dispose of the underlying buffer.
477 void
478 hammer2_io_putblk(hammer2_io_t **diop)
480 hammer2_dev_t *hmp;
481 hammer2_io_t *dio;
482 hammer2_iocb_t iocb;
483 struct buf *bp;
484 off_t peof;
485 off_t pbase;
486 int psize;
487 int orefs;
488 int nrefs;
490 dio = *diop;
491 *diop = NULL;
492 hmp = dio->hmp;
495 * Drop refs.
497 * On the 1->0 transition clear flags and set INPROG.
499 * On the 1->0 transition if INPROG is already set, another thread
500 * is in lastdrop and we can just return after the transition.
502 * On any other transition we can generally just return.
504 for (;;) {
505 orefs = dio->refs;
506 cpu_ccfence();
507 nrefs = orefs - 1;
509 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
510 (orefs & HAMMER2_DIO_INPROG) == 0) {
512 * Lastdrop case, INPROG can be set.
514 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
515 nrefs |= HAMMER2_DIO_INPROG;
516 if (atomic_cmpset_int(&dio->refs, orefs, nrefs))
517 break;
518 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
520 * Lastdrop case, INPROG already set.
522 if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) {
523 atomic_add_int(&hmp->iofree_count, 1);
524 return;
526 } else {
528 * Normal drop case.
530 if (atomic_cmpset_int(&dio->refs, orefs, nrefs))
531 return;
533 cpu_pause();
534 /* retry */
538 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
539 * have been cleared.
541 * We can now dispose of the buffer, and should do it before calling
542 * io_complete() in case there's a race against a new reference
543 * which causes io_complete() to chain and instantiate the bp again.
545 pbase = dio->pbase;
546 psize = dio->psize;
547 bp = dio->bp;
548 dio->bp = NULL;
550 if (orefs & HAMMER2_DIO_GOOD) {
551 KKASSERT(bp != NULL);
552 if (orefs & HAMMER2_DIO_DIRTY) {
553 int hce;
555 dio_write_stats_update(dio);
556 if ((hce = hammer2_cluster_enable) > 0) {
557 peof = (pbase + HAMMER2_SEGMASK64) &
558 ~HAMMER2_SEGMASK64;
559 cluster_write(bp, peof, psize, hce);
560 } else {
561 bp->b_flags |= B_CLUSTEROK;
562 bdwrite(bp);
564 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
565 brelse(bp);
566 } else {
567 bqrelse(bp);
569 } else if (bp) {
570 if (orefs & HAMMER2_DIO_DIRTY) {
571 dio_write_stats_update(dio);
572 bdwrite(bp);
573 } else {
574 brelse(bp);
579 * The instant we call io_complete dio is a free agent again and
580 * can be ripped out from under us.
582 * we can cleanup our final DIO_INPROG by simulating an iocb
583 * completion.
585 hmp = dio->hmp; /* extract fields */
586 atomic_add_int(&hmp->iofree_count, 1);
587 cpu_ccfence();
589 iocb.dio = dio;
590 iocb.flags = HAMMER2_IOCB_INPROG;
591 hammer2_io_complete(&iocb);
592 dio = NULL; /* dio stale */
595 * We cache free buffers so re-use cases can use a shared lock, but
596 * if too many build up we have to clean them out.
598 if (hmp->iofree_count > 65536) {
599 struct hammer2_cleanupcb_info info;
601 RB_INIT(&info.tmptree);
602 hammer2_spin_ex(&hmp->io_spin);
603 if (hmp->iofree_count > 65536) {
604 info.count = hmp->iofree_count / 4;
605 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
606 hammer2_io_cleanup_callback, &info);
608 hammer2_spin_unex(&hmp->io_spin);
609 hammer2_io_cleanup(hmp, &info.tmptree);
614 * Cleanup any dio's with (INPROG | refs) == 0.
616 * Called to clean up cached DIOs on umount after all activity has been
617 * flushed.
619 static
621 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
623 struct hammer2_cleanupcb_info *info = arg;
624 hammer2_io_t *xio;
626 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
627 if (dio->act > 0) {
628 --dio->act;
629 return 0;
631 KKASSERT(dio->bp == NULL);
632 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
633 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
634 KKASSERT(xio == NULL);
635 if (--info->count <= 0) /* limit scan */
636 return(-1);
638 return 0;
641 void
642 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
644 hammer2_io_t *dio;
646 while ((dio = RB_ROOT(tree)) != NULL) {
647 RB_REMOVE(hammer2_io_tree, tree, dio);
648 KKASSERT(dio->bp == NULL &&
649 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
650 kfree(dio, M_HAMMER2);
651 atomic_add_int(&hammer2_dio_count, -1);
652 atomic_add_int(&hmp->iofree_count, -1);
657 * Returns a pointer to the requested data.
659 char *
660 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
662 struct buf *bp;
663 int off;
665 bp = dio->bp;
666 KKASSERT(bp != NULL);
667 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
668 KKASSERT(off >= 0 && off < bp->b_bufsize);
669 return(bp->b_data + off);
672 #if 0
674 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
675 * in the chain structure, but chain structure needs to be persistent as
676 * well on refs=0 and it isn't.
679 hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp)
681 hammer2_io_t *dio;
682 uint64_t mask;
684 if ((dio = chain->dio) != NULL && chain->bytes >= 1024) {
685 mask = hammer2_io_mask(dio, chain->bref.data_off, chain->bytes);
686 *maskp = mask;
687 if ((dio->crc_good_mask & mask) == mask)
688 return 1;
689 return 0;
691 *maskp = 0;
693 return 0;
696 void
697 hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask)
699 if (dio) {
700 if (sizeof(long) == 8) {
701 atomic_set_long(&dio->crc_good_mask, mask);
702 } else {
703 #if _BYTE_ORDER == _LITTLE_ENDIAN
704 atomic_set_int(&((int *)&dio->crc_good_mask)[0],
705 (uint32_t)mask);
706 atomic_set_int(&((int *)&dio->crc_good_mask)[1],
707 (uint32_t)(mask >> 32));
708 #else
709 atomic_set_int(&((int *)&dio->crc_good_mask)[0],
710 (uint32_t)(mask >> 32));
711 atomic_set_int(&((int *)&dio->crc_good_mask)[1],
712 (uint32_t)mask);
713 #endif
718 void
719 hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask)
721 if (dio) {
722 if (sizeof(long) == 8) {
723 atomic_clear_long(&dio->crc_good_mask, mask);
724 } else {
725 #if _BYTE_ORDER == _LITTLE_ENDIAN
726 atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
727 (uint32_t)mask);
728 atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
729 (uint32_t)(mask >> 32));
730 #else
731 atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
732 (uint32_t)(mask >> 32));
733 atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
734 (uint32_t)mask);
735 #endif
739 #endif
742 * Helpers for hammer2_io_new*() functions
744 static
745 void
746 hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
748 hammer2_io_t *dio = iocb->dio;
749 int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0;
752 * If IOCB_INPROG is not set the dio already has a good buffer and we
753 * can't mess with it other than zero the requested range.
755 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
756 * do what needs to be done with dio->bp.
758 if (iocb->flags & HAMMER2_IOCB_INPROG) {
759 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
760 if (iocb->lsize == dio->psize) {
762 * Fully covered buffer, try to optimize to
763 * avoid any I/O. We might already have the
764 * buffer due to iocb chaining.
766 if (dio->bp == NULL) {
767 dio->bp = getblk(dio->hmp->devvp,
768 dio->pbase, dio->psize,
769 gbctl, 0);
771 if (dio->bp) {
772 vfs_bio_clrbuf(dio->bp);
773 dio->bp->b_flags |= B_CACHE;
775 } else if (iocb->flags & HAMMER2_IOCB_QUICK) {
777 * Partial buffer, quick mode. Do nothing.
778 * Do not instantiate the buffer or try to
779 * mark it B_CACHE because other portions of
780 * the buffer might have to be read by other
781 * accessors.
783 } else if (dio->bp == NULL ||
784 (dio->bp->b_flags & B_CACHE) == 0) {
786 * Partial buffer, normal mode, requires
787 * read-before-write. Chain the read.
789 * We might already have the buffer due to
790 * iocb chaining. XXX unclear if we really
791 * need to write/release it and reacquire
792 * in that case.
794 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
796 if (dio->bp) {
797 if (dio->refs & HAMMER2_DIO_DIRTY) {
798 dio_write_stats_update(dio);
799 bdwrite(dio->bp);
800 } else {
801 bqrelse(dio->bp);
803 dio->bp = NULL;
805 atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ);
806 breadcb(dio->hmp->devvp,
807 dio->pbase, dio->psize,
808 hammer2_io_callback, iocb);
809 return;
810 } /* else buffer is good */
811 } /* else callback from breadcb is complete */
813 if (dio->bp) {
814 if (iocb->flags & HAMMER2_IOCB_ZERO)
815 bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize);
816 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
818 hammer2_io_complete(iocb);
821 static
823 _hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
824 hammer2_io_t **diop, int flags)
826 hammer2_iocb_t iocb;
827 hammer2_io_t *dio;
829 iocb.callback = hammer2_iocb_new_callback;
830 iocb.cluster = NULL;
831 iocb.chain = NULL;
832 iocb.ptr = NULL;
833 iocb.lbase = lbase;
834 iocb.lsize = lsize;
835 iocb.flags = flags;
836 iocb.btype = btype;
837 iocb.error = 0;
838 hammer2_io_getblk(hmp, lbase, lsize, &iocb);
839 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
840 hammer2_iocb_wait(&iocb);
841 dio = *diop = iocb.dio;
843 return (iocb.error);
847 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
848 hammer2_io_t **diop)
850 return(_hammer2_io_new(hmp, btype, lbase, lsize,
851 diop, HAMMER2_IOCB_ZERO));
855 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
856 hammer2_io_t **diop)
858 return(_hammer2_io_new(hmp, btype, lbase, lsize, diop, 0));
862 hammer2_io_newq(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
863 hammer2_io_t **diop)
865 return(_hammer2_io_new(hmp, btype, lbase, lsize,
866 diop, HAMMER2_IOCB_QUICK));
869 static
870 void
871 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
873 hammer2_io_t *dio = iocb->dio;
874 off_t peof;
875 int error;
878 * If IOCB_INPROG is not set the dio already has a good buffer and we
879 * can't mess with it other than zero the requested range.
881 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
882 * do what needs to be done with dio->bp.
884 if (iocb->flags & HAMMER2_IOCB_INPROG) {
885 int hce;
887 if (dio->bp && (dio->bp->b_flags & B_CACHE)) {
889 * Already good, likely due to being chained from
890 * another iocb.
892 error = 0;
893 } else if ((hce = hammer2_cluster_enable) > 0) {
895 * Synchronous cluster I/O for now.
897 if (dio->bp) {
898 bqrelse(dio->bp);
899 dio->bp = NULL;
901 peof = (dio->pbase + HAMMER2_SEGMASK64) &
902 ~HAMMER2_SEGMASK64;
903 error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
904 dio->psize,
905 dio->psize, HAMMER2_PBUFSIZE*hce,
906 &dio->bp);
907 } else {
909 * Synchronous I/O for now.
911 if (dio->bp) {
912 bqrelse(dio->bp);
913 dio->bp = NULL;
915 error = bread(dio->hmp->devvp, dio->pbase,
916 dio->psize, &dio->bp);
918 if (error) {
919 brelse(dio->bp);
920 dio->bp = NULL;
923 hammer2_io_complete(iocb);
927 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
928 hammer2_io_t **diop)
930 hammer2_iocb_t iocb;
931 hammer2_io_t *dio;
933 iocb.callback = hammer2_iocb_bread_callback;
934 iocb.cluster = NULL;
935 iocb.chain = NULL;
936 iocb.ptr = NULL;
937 iocb.lbase = lbase;
938 iocb.lsize = lsize;
939 iocb.btype = btype;
940 iocb.flags = 0;
941 iocb.error = 0;
942 hammer2_io_getblk(hmp, lbase, lsize, &iocb);
943 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
944 hammer2_iocb_wait(&iocb);
945 dio = *diop = iocb.dio;
947 return (iocb.error);
951 * System buf/bio async callback extracts the iocb and chains
952 * to the iocb callback.
954 void
955 hammer2_io_callback(struct bio *bio)
957 struct buf *dbp = bio->bio_buf;
958 hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr;
959 hammer2_io_t *dio;
961 dio = iocb->dio;
962 if ((bio->bio_flags & BIO_DONE) == 0)
963 bpdone(dbp, 0);
964 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
965 dio->bp = bio->bio_buf;
966 iocb->callback(iocb);
969 void
970 hammer2_io_bawrite(hammer2_io_t **diop)
972 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
973 hammer2_io_putblk(diop);
976 void
977 hammer2_io_bdwrite(hammer2_io_t **diop)
979 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
980 hammer2_io_putblk(diop);
984 hammer2_io_bwrite(hammer2_io_t **diop)
986 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
987 hammer2_io_putblk(diop);
988 return (0); /* XXX */
991 void
992 hammer2_io_setdirty(hammer2_io_t *dio)
994 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
997 void
998 hammer2_io_setinval(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
1000 #if 0
1001 uint64_t mask = hammer2_io_mask(dio, off, bytes);
1002 hammer2_io_crc_clrmask(dio, mask);
1003 #endif
1004 if ((u_int)dio->psize == bytes) {
1005 dio->bp->b_flags |= B_INVAL | B_RELBUF;
1006 /* dio->bp->b_flags &= ~B_CACHE; not needed */
1010 void
1011 hammer2_io_brelse(hammer2_io_t **diop)
1013 hammer2_io_putblk(diop);
1016 void
1017 hammer2_io_bqrelse(hammer2_io_t **diop)
1019 hammer2_io_putblk(diop);
1023 hammer2_io_isdirty(hammer2_io_t *dio)
1025 return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
1028 static
1029 void
1030 dio_write_stats_update(hammer2_io_t *dio)
1032 long *counterp;
1034 switch(dio->btype) {
1035 case 0:
1036 return;
1037 case HAMMER2_BREF_TYPE_DATA:
1038 counterp = &hammer2_iod_file_write;
1039 break;
1040 case HAMMER2_BREF_TYPE_INODE:
1041 counterp = &hammer2_iod_meta_write;
1042 break;
1043 case HAMMER2_BREF_TYPE_INDIRECT:
1044 counterp = &hammer2_iod_indr_write;
1045 break;
1046 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1047 case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
1048 counterp = &hammer2_iod_fmap_write;
1049 break;
1050 default:
1051 counterp = &hammer2_iod_volu_write;
1052 break;
1054 *counterp += dio->psize;