nrelease: Add root_rw_mount="NO" to rc.conf to fix ISO boot
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
blob31be24efaf3c4d08eaeed22b227ff4ba5b02ffc5
1 /*
2 * Copyright (c) 2013-2023 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include "hammer2.h"
37 #define HAMMER2_DOP_READ 1
38 #define HAMMER2_DOP_NEW 2
39 #define HAMMER2_DOP_NEWNZ 3
40 #define HAMMER2_DOP_READQ 4
43 * Implements an abstraction layer for synchronous and asynchronous
44 * buffered device I/O. Can be used as an OS-abstraction but the main
45 * purpose is to allow larger buffers to be used against hammer2_chain's
46 * using smaller allocations, without causing deadlocks.
48 * The DIOs also record temporary state with limited persistence. This
49 * feature is used to keep track of dedupable blocks.
51 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53 static hammer2_io_t *hammer2_io_hash_lookup(hammer2_dev_t *hmp,
54 hammer2_off_t pbase, uint64_t *refsp);
55 static hammer2_io_t *hammer2_io_hash_enter(hammer2_dev_t *hmp,
56 hammer2_io_t *dio, uint64_t *refsp);
57 static void hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit);
59 void
60 hammer2_io_hash_init(hammer2_dev_t *hmp)
62 hammer2_io_hash_t *hash;
63 int i;
65 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
66 hash = &hmp->iohash[i];
67 hammer2_spin_init(&hash->spin, "h2iohash");
71 #ifdef HAMMER2_IO_DEBUG
73 static __inline void
74 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
76 int i;
78 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
80 dio->debug_file[i] = file;
81 dio->debug_line[i] = line;
82 dio->debug_refs[i] = dio->refs;
83 dio->debug_td[i] = curthread;
86 #else
88 #define DIO_RECORD(dio)
90 #endif
93 * Returns the DIO corresponding to the data|radix, creating it if necessary.
95 * If createit is 0, NULL can be returned indicating that the DIO does not
96 * exist. (btype) is ignored when createit is 0.
98 static __inline
99 hammer2_io_t *
100 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_off_t data_off, uint8_t btype,
101 int createit, int *isgoodp)
103 hammer2_io_t *dio;
104 hammer2_io_t *xio;
105 hammer2_off_t lbase;
106 hammer2_off_t pbase;
107 hammer2_off_t pmask;
108 hammer2_volume_t *vol;
109 uint64_t refs;
110 int lsize;
111 int psize;
113 psize = HAMMER2_PBUFSIZE;
114 pmask = ~(hammer2_off_t)(psize - 1);
115 if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
116 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
117 else
118 lsize = 0;
119 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
120 pbase = lbase & pmask;
122 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
123 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
124 pbase, lbase, lsize, pmask);
126 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
127 *isgoodp = 0;
130 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
132 * If DIO_GOOD is set the ref should prevent it from being cleared
133 * out from under us, we can set *isgoodp, and the caller can operate
134 * on the buffer without any further interaction.
136 dio = hammer2_io_hash_lookup(hmp, pbase, &refs);
137 if (dio) {
138 if (refs & HAMMER2_DIO_GOOD)
139 *isgoodp = 1;
140 } else if (createit) {
141 refs = 0;
142 vol = hammer2_get_volume(hmp, pbase);
143 dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO);
144 dio->hmp = hmp;
145 dio->devvp = vol->dev->devvp;
146 dio->dbase = vol->offset;
147 KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
148 dio->pbase = pbase;
149 dio->psize = psize;
150 dio->btype = btype;
151 dio->refs = refs + 1;
152 dio->act = 5;
153 xio = hammer2_io_hash_enter(hmp, dio, &refs);
154 if (xio == NULL) {
155 atomic_add_int(&hammer2_dio_count, 1);
156 } else {
157 if (refs & HAMMER2_DIO_GOOD)
158 *isgoodp = 1;
159 kfree_obj(dio, hmp->mio);
160 dio = xio;
162 } else {
163 return NULL;
165 dio->ticks = ticks;
166 if (dio->act < 10)
167 ++dio->act;
169 return dio;
173 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate
174 * a buffer. If set the buffer already exists and is good to go.
176 hammer2_io_t *
177 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
178 int lsize, int op HAMMER2_IO_DEBUG_ARGS)
180 hammer2_io_t *dio;
181 hammer2_off_t dev_pbase;
182 off_t peof;
183 uint64_t orefs;
184 uint64_t nrefs;
185 int isgood;
186 int error;
187 int hce;
188 int bflags;
190 bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
191 bflags |= B_KVABIO;
193 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
195 if (op == HAMMER2_DOP_READQ) {
196 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
197 if (dio == NULL)
198 return NULL;
199 op = HAMMER2_DOP_READ;
200 } else {
201 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
204 for (;;) {
205 orefs = dio->refs;
206 cpu_ccfence();
209 * Buffer is already good, handle the op and return.
211 if (orefs & HAMMER2_DIO_GOOD) {
212 if (isgood == 0)
213 cpu_mfence();
214 bkvasync(dio->bp);
216 switch(op) {
217 case HAMMER2_DOP_NEW:
218 bzero(hammer2_io_data(dio, lbase), lsize);
219 /* fall through */
220 case HAMMER2_DOP_NEWNZ:
221 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
222 break;
223 case HAMMER2_DOP_READ:
224 default:
225 /* nothing to do */
226 break;
228 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
229 return (dio);
233 * Try to own the DIO
235 if (orefs & HAMMER2_DIO_INPROG) {
236 nrefs = orefs | HAMMER2_DIO_WAITING;
237 tsleep_interlock(dio, 0);
238 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
239 tsleep(dio, PINTERLOCKED, "h2dio", hz);
241 /* retry */
242 } else {
243 nrefs = orefs | HAMMER2_DIO_INPROG;
244 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
245 break;
251 * We break to here if GOOD is not set and we acquired INPROG for
252 * the I/O.
254 KKASSERT(dio->bp == NULL);
255 if (btype == HAMMER2_BREF_TYPE_DATA)
256 hce = hammer2_cluster_data_read;
257 else
258 hce = hammer2_cluster_meta_read;
260 error = 0;
261 dev_pbase = dio->pbase - dio->dbase;
262 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
263 dio->psize == lsize) {
264 switch(op) {
265 case HAMMER2_DOP_NEW:
266 case HAMMER2_DOP_NEWNZ:
267 dio->bp = getblk(dio->devvp,
268 dev_pbase, dio->psize,
269 GETBLK_KVABIO, 0);
270 if (op == HAMMER2_DOP_NEW) {
271 bkvasync(dio->bp);
272 bzero(dio->bp->b_data, dio->psize);
274 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
275 break;
276 case HAMMER2_DOP_READ:
277 default:
278 KKASSERT(dio->bp == NULL);
279 if (hce > 0) {
281 * Synchronous cluster I/O for now.
283 peof = (dio->pbase + HAMMER2_SEGMASK64) &
284 ~HAMMER2_SEGMASK64;
285 peof -= dio->dbase;
286 error = cluster_readx(dio->devvp,
287 peof, dev_pbase,
288 dio->psize, bflags,
289 dio->psize,
290 HAMMER2_PBUFSIZE*hce,
291 &dio->bp);
292 } else {
293 error = breadnx(dio->devvp, dev_pbase,
294 dio->psize, bflags,
295 NULL, NULL, 0, &dio->bp);
297 break;
299 } else {
300 if (hce > 0) {
302 * Synchronous cluster I/O for now.
304 peof = (dio->pbase + HAMMER2_SEGMASK64) &
305 ~HAMMER2_SEGMASK64;
306 peof -= dio->dbase;
307 error = cluster_readx(dio->devvp,
308 peof, dev_pbase, dio->psize,
309 bflags,
310 dio->psize, HAMMER2_PBUFSIZE*hce,
311 &dio->bp);
312 } else {
313 error = breadnx(dio->devvp, dev_pbase,
314 dio->psize, bflags,
315 NULL, NULL, 0, &dio->bp);
317 if (dio->bp) {
319 * Handle NEW flags
321 switch(op) {
322 case HAMMER2_DOP_NEW:
323 bkvasync(dio->bp);
324 bzero(hammer2_io_data(dio, lbase), lsize);
325 /* fall through */
326 case HAMMER2_DOP_NEWNZ:
327 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
328 break;
329 case HAMMER2_DOP_READ:
330 default:
331 break;
335 * Tell the kernel that the buffer cache is not
336 * meta-data based on the btype. This allows
337 * swapcache to distinguish between data and
338 * meta-data.
340 switch(btype) {
341 case HAMMER2_BREF_TYPE_DATA:
342 dio->bp->b_flags |= B_NOTMETA;
343 break;
344 default:
345 break;
350 if (dio->bp) {
351 bkvasync(dio->bp);
352 BUF_KERNPROC(dio->bp);
353 dio->bp->b_flags &= ~B_AGE;
354 /* dio->bp->b_debug_info2 = dio; */
356 dio->error = error;
359 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
361 for (;;) {
362 orefs = dio->refs;
363 cpu_ccfence();
364 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
365 if (error == 0)
366 nrefs |= HAMMER2_DIO_GOOD;
367 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
368 if (orefs & HAMMER2_DIO_WAITING)
369 wakeup(dio);
370 break;
372 cpu_pause();
375 /* XXX error handling */
376 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
378 return dio;
382 * Release our ref on *diop.
384 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
385 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING.
387 void
388 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
390 hammer2_dev_t *hmp;
391 hammer2_io_t *dio;
392 struct buf *bp;
393 off_t pbase;
394 int psize;
395 int dio_limit;
396 uint64_t orefs;
397 uint64_t nrefs;
399 dio = *diop;
400 *diop = NULL;
401 hmp = dio->hmp;
402 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
404 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
407 * Drop refs.
409 * On the 1->0 transition clear GOOD and set INPROG, and break.
410 * On any other transition we can return early.
412 for (;;) {
413 orefs = dio->refs;
414 cpu_ccfence();
416 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
417 (orefs & HAMMER2_DIO_INPROG) == 0) {
419 * Lastdrop case, INPROG can be set. GOOD must be
420 * cleared to prevent the getblk shortcut.
422 nrefs = orefs - 1;
423 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
424 nrefs |= HAMMER2_DIO_INPROG;
425 if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
426 break;
427 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
429 * Lastdrop case, INPROG already set. We must
430 * wait for INPROG to clear.
432 nrefs = orefs | HAMMER2_DIO_WAITING;
433 tsleep_interlock(dio, 0);
434 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
435 tsleep(dio, PINTERLOCKED, "h2dio", hz);
437 /* retry */
438 } else {
440 * Normal drop case.
442 nrefs = orefs - 1;
443 if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
444 return;
445 /* retry */
447 cpu_pause();
448 /* retry */
452 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
453 * have been cleared. iofree_count has not yet been incremented,
454 * note that another accessor race will decrement iofree_count so
455 * we have to increment it regardless.
456 * We can now dispose of the buffer.
458 pbase = dio->pbase;
459 psize = dio->psize;
460 bp = dio->bp;
461 dio->bp = NULL;
463 if ((orefs & HAMMER2_DIO_GOOD) && bp) {
465 * Non-errored disposal of bp
467 if (orefs & HAMMER2_DIO_DIRTY) {
468 dio_write_stats_update(dio, bp);
471 * Allows dirty buffers to accumulate and
472 * possibly be canceled (e.g. by a 'rm'),
473 * by default we will burst-write later.
475 * We generally do NOT want to issue an actual
476 * b[a]write() or cluster_write() here. Due to
477 * the way chains are locked, buffers may be cycled
478 * in and out quite often and disposal here can cause
479 * multiple writes or write-read stalls.
481 * If FLUSH is set we do want to issue the actual
482 * write. This typically occurs in the write-behind
483 * case when writing to large files.
485 off_t peof;
486 int hce;
487 if (dio->refs & HAMMER2_DIO_FLUSH) {
488 if ((hce = hammer2_cluster_write) != 0) {
489 peof = (pbase + HAMMER2_SEGMASK64) &
490 ~HAMMER2_SEGMASK64;
491 peof -= dio->dbase;
492 bp->b_flags |= B_CLUSTEROK;
493 cluster_write(bp, peof, psize, hce);
494 } else {
495 bp->b_flags &= ~B_CLUSTEROK;
496 bawrite(bp);
498 } else {
499 bp->b_flags &= ~B_CLUSTEROK;
500 bdwrite(bp);
502 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
503 brelse(bp);
504 } else {
505 bqrelse(bp);
507 } else if (bp) {
509 * Errored disposal of bp
511 brelse(bp);
515 * Update iofree_count before disposing of the dio
517 hmp = dio->hmp;
518 atomic_add_int(&hmp->iofree_count, 1);
521 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
523 * Also clear FLUSH as it was handled above.
525 for (;;) {
526 orefs = dio->refs;
527 cpu_ccfence();
528 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
529 HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
530 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
531 if (orefs & HAMMER2_DIO_WAITING)
532 wakeup(dio);
533 break;
535 cpu_pause();
539 * We cache free buffers so re-use cases can use a shared lock, but
540 * if too many build up we have to clean them out.
542 dio_limit = hammer2_dio_limit;
543 if (dio_limit < 256)
544 dio_limit = 256;
545 if (dio_limit > 1024*1024)
546 dio_limit = 1024*1024;
547 if (hmp->iofree_count > dio_limit)
548 hammer2_io_hash_cleanup(hmp, dio_limit);
552 * Returns a pointer to the requested data.
554 char *
555 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
557 struct buf *bp;
558 int off;
560 bp = dio->bp;
561 KKASSERT(bp != NULL);
562 bkvasync(bp);
563 lbase -= dio->dbase;
564 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
565 KKASSERT(off >= 0 && off < bp->b_bufsize);
566 return(bp->b_data + off);
570 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
571 hammer2_io_t **diop)
573 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
574 return ((*diop)->error);
578 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
579 hammer2_io_t **diop)
581 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
582 return ((*diop)->error);
586 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
587 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
589 #ifdef HAMMER2_IO_DEBUG
590 hammer2_io_t *dio;
591 #endif
593 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
594 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
595 #ifdef HAMMER2_IO_DEBUG
596 if ((dio = *diop) != NULL) {
597 #if 0
598 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
599 dio->debug_data[i] = debug_data;
600 #endif
602 #endif
603 return ((*diop)->error);
606 hammer2_io_t *
607 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
608 int lsize HAMMER2_IO_DEBUG_ARGS)
610 hammer2_io_t *dio;
612 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
613 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
614 return dio;
617 void
618 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
620 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
621 HAMMER2_DIO_FLUSH);
622 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
625 void
626 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
628 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
629 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
633 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
635 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
636 HAMMER2_DIO_FLUSH);
637 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
638 return (0); /* XXX */
641 void
642 hammer2_io_setdirty(hammer2_io_t *dio)
644 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
648 * This routine is called when a MODIFIED chain is being DESTROYED,
649 * in an attempt to allow the related buffer cache buffer to be
650 * invalidated and discarded instead of flushing it to disk.
652 * At the moment this case is only really useful for file meta-data.
653 * File data is already handled via the logical buffer cache associated
654 * with the vnode, and will be discarded if it was never flushed to disk.
655 * File meta-data may include inodes, directory entries, and indirect blocks.
657 * XXX
658 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
659 * invalidated might be smaller. Most of the meta-data structures above
660 * are in the 'smaller' category. For now, don't try to invalidate the
661 * data areas.
663 void
664 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
666 /* NOP */
669 void
670 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
672 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
675 void
676 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
678 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
682 * Set dedup validation bits in a DIO. We do not need the buffer cache
683 * buffer for this. This must be done concurrent with setting bits in
684 * the freemap so as to interlock with bulkfree's clearing of those bits.
686 void
687 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
689 hammer2_io_t *dio;
690 uint64_t mask;
691 int lsize;
692 int isgood;
694 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
695 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
696 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
697 else
698 lsize = 0;
699 mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
700 atomic_clear_64(&dio->dedup_valid, mask);
701 atomic_set_64(&dio->dedup_alloc, mask);
702 hammer2_io_putblk(&dio);
706 * Clear dedup validation bits in a DIO. This is typically done when
707 * a modified chain is destroyed or by the bulkfree code. No buffer
708 * is needed for this operation. If the DIO no longer exists it is
709 * equivalent to the bits not being set.
711 void
712 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
713 hammer2_off_t data_off, u_int bytes)
715 hammer2_io_t *dio;
716 uint64_t mask;
717 int isgood;
719 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
720 return;
721 if (btype != HAMMER2_BREF_TYPE_DATA)
722 return;
723 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
724 if (dio) {
725 if (data_off < dio->pbase ||
726 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
727 dio->pbase + dio->psize) {
728 panic("hammer2_io_dedup_delete: DATAOFF BAD "
729 "%016jx/%d %016jx\n",
730 data_off, bytes, dio->pbase);
732 mask = hammer2_dedup_mask(dio, data_off, bytes);
733 atomic_clear_64(&dio->dedup_alloc, mask);
734 atomic_clear_64(&dio->dedup_valid, mask);
735 hammer2_io_putblk(&dio);
740 * Assert that dedup allocation bits in a DIO are not set. This operation
741 * does not require a buffer. The DIO does not need to exist.
743 void
744 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
746 hammer2_io_t *dio;
747 int isgood;
749 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
750 0, &isgood);
751 if (dio) {
752 KASSERT((dio->dedup_alloc &
753 hammer2_dedup_mask(dio, data_off, bytes)) == 0,
754 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
755 data_off,
756 bytes,
757 hammer2_dedup_mask(dio, data_off, bytes),
758 dio->dedup_alloc));
759 hammer2_io_putblk(&dio);
763 static
764 void
765 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
767 if (bp->b_flags & B_DELWRI)
768 return;
769 hammer2_adjwritecounter(dio->btype, dio->psize);
772 void
773 hammer2_io_bkvasync(hammer2_io_t *dio)
775 KKASSERT(dio->bp != NULL);
776 bkvasync(dio->bp);
780 * Ref a dio that is already owned
782 void
783 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
785 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
786 atomic_add_64(&dio->refs, 1);
789 static __inline hammer2_io_hash_t *
790 hammer2_io_hashv(hammer2_dev_t *hmp, hammer2_off_t pbase)
792 int hv;
794 hv = (int)pbase + (int)(pbase >> 16);
795 return (&hmp->iohash[hv & HAMMER2_IOHASH_MASK]);
799 * Lookup and reference the requested dio
801 static hammer2_io_t *
802 hammer2_io_hash_lookup(hammer2_dev_t *hmp, hammer2_off_t pbase, uint64_t *refsp)
804 hammer2_io_hash_t *hash;
805 hammer2_io_t *dio;
806 uint64_t refs;
808 *refsp = 0;
809 hash = hammer2_io_hashv(hmp, pbase);
810 hammer2_spin_sh(&hash->spin);
811 for (dio = hash->base; dio; dio = dio->next) {
812 if (dio->pbase == pbase) {
813 refs = atomic_fetchadd_64(&dio->refs, 1);
814 if ((refs & HAMMER2_DIO_MASK) == 0)
815 atomic_add_int(&dio->hmp->iofree_count, -1);
816 *refsp = refs;
817 break;
820 hammer2_spin_unsh(&hash->spin);
822 return dio;
826 * Enter a dio into the hash. If the pbase already exists in the hash,
827 * the xio in the hash is referenced and returned. If dio is sucessfully
828 * entered into the hash, NULL is returned.
830 static hammer2_io_t *
831 hammer2_io_hash_enter(hammer2_dev_t *hmp, hammer2_io_t *dio, uint64_t *refsp)
833 hammer2_io_t *xio;
834 hammer2_io_t **xiop;
835 hammer2_io_hash_t *hash;
836 uint64_t refs;
838 *refsp = 0;
839 hash = hammer2_io_hashv(hmp, dio->pbase);
840 hammer2_spin_ex(&hash->spin);
841 for (xiop = &hash->base; (xio = *xiop) != NULL; xiop = &xio->next) {
842 if (xio->pbase == dio->pbase) {
843 refs = atomic_fetchadd_64(&xio->refs, 1);
844 if ((refs & HAMMER2_DIO_MASK) == 0)
845 atomic_add_int(&xio->hmp->iofree_count, -1);
846 *refsp = refs;
847 goto done;
850 dio->next = NULL;
851 *xiop = dio;
852 done:
853 hammer2_spin_unex(&hash->spin);
855 return xio;
859 * Clean out a limited number of freeable DIOs
861 static void
862 hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit)
864 hammer2_io_hash_t *hash;
865 hammer2_io_t *dio;
866 hammer2_io_t **diop;
867 hammer2_io_t **cleanapp;
868 hammer2_io_t *cleanbase;
869 int count;
870 int maxscan;
871 int i;
873 count = hmp->iofree_count - dio_limit + 32;
874 if (count <= 0)
875 return;
876 cleanbase = NULL;
877 cleanapp = &cleanbase;
879 i = hmp->io_iterator++;
880 maxscan = HAMMER2_IOHASH_SIZE;
881 while (count > 0 && maxscan--) {
882 hash = &hmp->iohash[i & HAMMER2_IOHASH_MASK];
883 hammer2_spin_ex(&hash->spin);
884 diop = &hash->base;
885 while ((dio = *diop) != NULL) {
886 if ((dio->refs & (HAMMER2_DIO_MASK |
887 HAMMER2_DIO_INPROG)) != 0)
889 diop = &dio->next;
890 continue;
892 if (dio->act > 0) {
893 int act;
895 act = dio->act - (ticks - dio->ticks) / hz - 1;
896 dio->act = (act < 0) ? 0 : act;
898 if (dio->act) {
899 diop = &dio->next;
900 continue;
902 KKASSERT(dio->bp == NULL);
903 *diop = dio->next;
904 dio->next = NULL;
905 *cleanapp = dio;
906 cleanapp = &dio->next;
907 --count;
908 /* diop remains unchanged */
909 atomic_add_int(&hmp->iofree_count, -1);
911 hammer2_spin_unex(&hash->spin);
912 i = hmp->io_iterator++;
916 * Get rid of dios on clean list without holding any locks
918 while ((dio = cleanbase) != NULL) {
919 cleanbase = dio->next;
920 dio->next = NULL;
921 KKASSERT(dio->bp == NULL &&
922 (dio->refs & (HAMMER2_DIO_MASK |
923 HAMMER2_DIO_INPROG)) == 0);
924 if (dio->refs & HAMMER2_DIO_DIRTY) {
925 kprintf("hammer2_io_cleanup: Dirty buffer "
926 "%016jx/%d (bp=%p)\n",
927 dio->pbase, dio->psize, dio->bp);
929 kfree_obj(dio, hmp->mio);
930 atomic_add_int(&hammer2_dio_count, -1);
935 * Destroy all DIOs associated with the media
937 void
938 hammer2_io_hash_cleanup_all(hammer2_dev_t *hmp)
940 hammer2_io_hash_t *hash;
941 hammer2_io_t *dio;
942 int i;
944 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
945 hash = &hmp->iohash[i];
947 while ((dio = hash->base) != NULL) {
948 hash->base = dio->next;
949 dio->next = NULL;
950 KKASSERT(dio->bp == NULL &&
951 (dio->refs & (HAMMER2_DIO_MASK |
952 HAMMER2_DIO_INPROG)) == 0);
953 if (dio->refs & HAMMER2_DIO_DIRTY) {
954 kprintf("hammer2_io_cleanup: Dirty buffer "
955 "%016jx/%d (bp=%p)\n",
956 dio->pbase, dio->psize, dio->bp);
958 kfree_obj(dio, hmp->mio);
959 atomic_add_int(&hammer2_dio_count, -1);
960 atomic_add_int(&hmp->iofree_count, -1);