zfs: allow large block/gzip/raidz for boot pools
[unleashed.git] / usr / src / uts / common / fs / fdbuffer.c
blob7260abe9f7d176c204a7d7f795033817fb07717f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright (c) 1998,2001 by Sun Microsystems, Inc.
24 * All rights reserved.
28 #pragma ident "%Z%%M% %I% %E% SMI"
30 #include <sys/types.h>
31 #include <sys/cmn_err.h>
32 #include <sys/kmem.h>
33 #include <sys/systm.h>
34 #include <sys/debug.h>
35 #include <sys/ddi.h>
37 #include <sys/fdbuffer.h>
39 #ifdef DEBUG
40 static int fdb_debug;
41 #define FDB_D_CREATE 001
42 #define FDB_D_ALLOC 002
43 #define FDB_D_IO 004
44 #define FDB_D_ASYNC 010
45 #define DEBUGF(lvl, args) { if ((lvl) & fdb_debug) cmn_err args; }
46 #else
47 #define DEBUGF(level, args)
48 #endif
49 static struct kmem_cache *fdb_cache;
50 static void fdb_zero_holes(fdbuffer_t *fdb);
52 /* ARGSUSED */
53 static int
54 fdb_cache_constructor(void *buf, void *cdrarg, int kmflags)
56 fdbuffer_t *fdb = buf;
58 mutex_init(&fdb->fd_mutex, NULL, MUTEX_DEFAULT, NULL);
60 return (0);
63 /* ARGSUSED */
64 static void
65 fdb_cache_destructor(void *buf, void *cdrarg)
67 fdbuffer_t *fdb = buf;
69 mutex_destroy(&fdb->fd_mutex);
72 void
73 fdb_init()
75 fdb_cache = kmem_cache_create("fdb_cache", sizeof (fdbuffer_t),
76 0, fdb_cache_constructor, fdb_cache_destructor,
77 NULL, NULL, NULL, 0);
80 static void
81 fdb_prepare(fdbuffer_t *fdb)
83 fdb->fd_holes = NULL;
84 fdb->fd_iofunc = NULL;
85 fdb->fd_iargp = NULL;
86 fdb->fd_parentbp = NULL;
87 fdb->fd_resid = 0;
88 fdb->fd_iocount = 0;
89 fdb->fd_iodispatch = 0;
90 fdb->fd_err = 0;
93 fdbuffer_t *
94 fdb_page_create(page_t *pp, size_t len, int flags)
96 fdbuffer_t *fdb;
98 DEBUGF(FDB_D_CREATE, (CE_NOTE,
99 "?fdb_page_create: pp: %p len: %lux flags: %x",
100 (void *)pp, len, flags));
102 ASSERT(flags & (FDB_READ|FDB_WRITE));
104 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
106 fdb_prepare(fdb);
108 fdb->fd_type = FDB_PAGEIO;
109 fdb->fd_len = len;
110 fdb->fd_state = flags;
111 fdb->fd_pages = pp;
113 return (fdb);
116 fdbuffer_t *
117 fdb_addr_create(
118 caddr_t addr,
119 size_t len,
120 int flags,
121 page_t **pplist,
122 struct proc *procp)
124 fdbuffer_t *fdb;
126 DEBUGF(FDB_D_CREATE, (CE_NOTE,
127 "?fdb_addr_create: addr: %p len: %lux flags: %x",
128 (void *)addr, len, flags));
130 ASSERT(flags & (FDB_READ|FDB_WRITE));
132 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
134 fdb_prepare(fdb);
136 fdb->fd_type = FDB_VADDR;
137 fdb->fd_len = len;
138 fdb->fd_state = flags;
139 fdb->fd_addr = addr;
140 fdb->fd_shadow = pplist;
141 fdb->fd_procp = procp;
143 return (fdb);
146 void
147 fdb_set_iofunc(fdbuffer_t *fdb, fdb_iodone_t iofunc, void *ioargp, int flag)
149 ASSERT(fdb);
150 ASSERT(iofunc);
151 ASSERT((flag & ~FDB_ICALLBACK) == 0);
153 fdb->fd_iofunc = iofunc;
154 fdb->fd_iargp = ioargp;
156 mutex_enter(&fdb->fd_mutex);
158 if (flag & FDB_ICALLBACK)
159 fdb->fd_state |= FDB_ICALLBACK;
161 fdb->fd_state |= FDB_ASYNC;
163 mutex_exit(&fdb->fd_mutex);
167 fdb_get_error(fdbuffer_t *fdb)
169 return (fdb->fd_err);
172 void
173 fdb_free(fdbuffer_t *fdb)
175 fdb_holes_t *fdh, *fdhp;
177 DEBUGF(FDB_D_CREATE, (CE_NOTE, "?fdb_free: addr: %p flags: %x",
178 (void *)fdb, fdb->fd_state));
180 ASSERT(fdb);
181 ASSERT(fdb->fd_iodispatch == 0);
183 if (fdb->fd_state & FDB_ZEROHOLE) {
184 fdb_zero_holes(fdb);
187 for (fdh = fdb->fd_holes; fdh; ) {
188 fdhp = fdh;
189 fdh = fdh->next_hole;
190 kmem_free(fdhp, sizeof (fdb_holes_t));
193 if (fdb->fd_parentbp != NULL) {
194 switch (fdb->fd_type) {
195 case FDB_PAGEIO:
196 pageio_done(fdb->fd_parentbp);
197 break;
198 case FDB_VADDR:
199 kmem_free(fdb->fd_parentbp, sizeof (struct buf));
200 break;
201 default:
202 cmn_err(CE_CONT, "?fdb_free: Unknown fdb type.");
203 break;
207 kmem_cache_free(fdb_cache, fdb);
212 * The offset should be from the begining of the buffer
213 * it has nothing to do with file offset. This fact should be
214 * reflected in the caller of this routine.
217 void
218 fdb_add_hole(fdbuffer_t *fdb, u_offset_t off, size_t len)
220 fdb_holes_t *this_hole;
222 ASSERT(fdb);
223 ASSERT(off < fdb->fd_len);
225 DEBUGF(FDB_D_IO, (CE_NOTE, "?fdb_add_hole: off %llx len %lx",
226 off, len));
228 this_hole = kmem_alloc(sizeof (fdb_holes_t), KM_SLEEP);
229 this_hole->off = off;
230 this_hole->len = len;
232 if (fdb->fd_holes == NULL || off < fdb->fd_holes->off) {
233 this_hole->next_hole = fdb->fd_holes;
234 fdb->fd_holes = this_hole;
235 } else {
236 fdb_holes_t *fdhp = fdb->fd_holes;
238 while (fdhp->next_hole && off > fdhp->next_hole->off)
239 fdhp = fdhp->next_hole;
241 this_hole->next_hole = fdhp->next_hole;
242 fdhp->next_hole = this_hole;
245 mutex_enter(&fdb->fd_mutex);
247 fdb->fd_iocount += len;
249 mutex_exit(&fdb->fd_mutex);
252 fdb_holes_t *
253 fdb_get_holes(fdbuffer_t *fdb)
255 ASSERT(fdb);
257 if (fdb->fd_state & FDB_ZEROHOLE) {
258 fdb_zero_holes(fdb);
261 return (fdb->fd_holes);
265 * Note that offsets refer to offsets from the begining of the buffer
266 * and as such the memory should be cleared accordingly.
269 static void
270 fdb_zero_holes(fdbuffer_t *fdb)
272 fdb_holes_t *fdh = fdb->fd_holes;
273 page_t *pp;
275 ASSERT(fdb);
277 if (!fdh)
278 return;
280 switch (fdb->fd_type) {
281 case FDB_PAGEIO:
282 pp = fdb->fd_pages;
283 while (fdh) {
284 fdb_holes_t *pfdh = fdh;
285 size_t l = fdh->len;
286 u_offset_t o = fdh->off;
287 ASSERT(pp);
289 do {
290 int zerolen;
291 ASSERT(o >= pp->p_offset);
294 * This offset is wrong since
295 * the offset passed from the pages
296 * perspective starts at some virtual
297 * address but the hole is relative
298 * to the beginning of the fdbuffer.
300 if (o >= pp->p_offset + PAGESIZE)
301 continue;
303 zerolen = min(PAGESIZE, l);
305 ASSERT(zerolen > 0);
306 ASSERT(zerolen <= PAGESIZE);
308 pagezero(pp, ((uintptr_t)o & PAGEOFFSET),
309 zerolen);
311 l -= zerolen;
312 o += zerolen;
314 if (l == 0)
315 break;
317 } while (pp = page_list_next(pp));
319 if (!pp)
320 break;
322 fdh = fdh->next_hole;
323 kmem_free(pfdh, sizeof (fdb_holes_t));
325 break;
326 case FDB_VADDR:
327 while (fdh) {
328 fdb_holes_t *pfdh = fdh;
330 bzero(fdb->fd_addr + fdh->off, fdh->len);
332 fdh = fdh->next_hole;
333 kmem_free(pfdh, sizeof (fdb_holes_t));
335 default:
336 panic("fdb_zero_holes: Unknown fdb type.");
337 break;
342 buf_t *
343 fdb_iosetup(fdbuffer_t *fdb, u_offset_t off, size_t len, struct vnode *vp,
344 int b_flags)
346 buf_t *bp;
348 DEBUGF(FDB_D_IO, (CE_NOTE,
349 "?fdb_iosetup: off: %llx len: %lux fdb: len: %lux flags: %x",
350 off, len, fdb->fd_len, fdb->fd_state));
352 ASSERT(fdb);
354 mutex_enter(&fdb->fd_mutex);
356 ASSERT(((b_flags & B_READ) && (fdb->fd_state & FDB_READ)) ||
357 ((b_flags & B_WRITE) && (fdb->fd_state & FDB_WRITE)));
359 * The fdb can be used either in sync or async mode, if the
360 * buffer has not been used it may be used in either mode, but
361 * once you have started to use the buf in either mode all
362 * subsequent i/o requests must take place the same way.
365 ASSERT(((b_flags & B_ASYNC) &&
366 ((fdb->fd_state & FDB_ASYNC) || !(fdb->fd_state & FDB_SYNC))) ||
367 (!(b_flags & B_ASYNC) &&
368 ((fdb->fd_state & FDB_SYNC) || !(fdb->fd_state & FDB_ASYNC))));
371 fdb->fd_state |= b_flags & B_ASYNC ? FDB_ASYNC : FDB_SYNC;
373 fdb->fd_iodispatch++;
375 ASSERT((fdb->fd_state & FDB_ASYNC && fdb->fd_iofunc != NULL) ||
376 fdb->fd_state & FDB_SYNC);
378 mutex_exit(&fdb->fd_mutex);
380 ASSERT((len & (DEV_BSIZE - 1)) == 0);
381 ASSERT(off+len <= fdb->fd_len);
383 switch (fdb->fd_type) {
384 case FDB_PAGEIO:
385 if (fdb->fd_parentbp == NULL) {
386 bp = pageio_setup(fdb->fd_pages, len, vp, b_flags);
387 fdb->fd_parentbp = bp;
389 break;
390 case FDB_VADDR:
391 if (fdb->fd_parentbp == NULL) {
393 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
394 bioinit(bp);
395 bp->b_error = 0;
396 bp->b_proc = fdb->fd_procp;
397 bp->b_flags = b_flags | B_BUSY | B_PHYS;
398 bp->b_bcount = len;
399 bp->b_un.b_addr = fdb->fd_addr;
400 bp->b_shadow = fdb->fd_shadow;
401 if (fdb->fd_shadow != NULL)
402 bp->b_flags |= B_SHADOW;
403 fdb->fd_parentbp = bp;
405 break;
406 default:
407 panic("fdb_iosetup: Unsupported fdb type.");
408 break;
411 bp = bioclone(fdb->fd_parentbp, off, len, 0, 0,
412 (b_flags & B_ASYNC) ? (int (*)())fdb_iodone : NULL,
413 NULL, KM_SLEEP);
415 bp->b_forw = (struct buf *)fdb;
417 if (b_flags & B_ASYNC)
418 bp->b_flags |= B_ASYNC;
420 return (bp);
423 size_t
424 fdb_get_iolen(fdbuffer_t *fdb)
426 ASSERT(fdb);
427 ASSERT(fdb->fd_iodispatch == 0);
429 return (fdb->fd_iocount - fdb->fd_resid);
432 void
433 fdb_ioerrdone(fdbuffer_t *fdb, int error)
435 ASSERT(fdb);
436 ASSERT(fdb->fd_state & FDB_ASYNC);
438 DEBUGF(FDB_D_IO, (CE_NOTE,
439 "?fdb_ioerrdone: fdb: len: %lux flags: %x error: %d",
440 fdb->fd_len, fdb->fd_state, error));
442 mutex_enter(&fdb->fd_mutex);
444 fdb->fd_err = error;
446 if (error)
447 fdb->fd_state |= FDB_ERROR;
448 else
449 fdb->fd_state |= FDB_DONE;
452 * If there is outstanding i/o return wainting for i/o's to complete.
454 if (fdb->fd_iodispatch > 0) {
455 mutex_exit(&fdb->fd_mutex);
456 return;
459 mutex_exit(&fdb->fd_mutex);
460 fdb->fd_iofunc(fdb, fdb->fd_iargp, NULL);
463 void
464 fdb_iodone(buf_t *bp)
466 fdbuffer_t *fdb = (fdbuffer_t *)bp->b_forw;
467 int error, isasync;
468 int icallback;
470 ASSERT(fdb);
472 DEBUGF(FDB_D_IO, (CE_NOTE,
473 "?fdb_iodone: fdb: len: %lux flags: %x error: %d",
474 fdb->fd_len, fdb->fd_state, geterror(bp)));
476 if (bp->b_flags & B_REMAPPED)
477 bp_mapout(bp);
479 mutex_enter(&fdb->fd_mutex);
481 icallback = fdb->fd_state & FDB_ICALLBACK;
482 isasync = fdb->fd_state & FDB_ASYNC;
484 ASSERT(fdb->fd_iodispatch > 0);
485 fdb->fd_iodispatch--;
487 if (error = geterror(bp)) {
488 fdb->fd_err = error;
489 if (bp->b_resid)
490 fdb->fd_resid += bp->b_resid;
491 else
492 fdb->fd_resid += bp->b_bcount;
495 fdb->fd_iocount += bp->b_bcount;
498 * ioack collects the total amount of i/o accounted for
499 * this includes:
501 * - i/o completed
502 * - i/o attempted but not completed,
503 * - i/o not done due to holes.
505 * Once the entire i/o ranges has been accounted for we'll
506 * call the async function associated with the fdb.
510 if ((fdb->fd_iodispatch == 0) &&
511 (fdb->fd_state & (FDB_ERROR|FDB_DONE))) {
513 mutex_exit(&fdb->fd_mutex);
515 if (isasync || icallback) {
516 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
519 } else {
521 mutex_exit(&fdb->fd_mutex);
523 if (icallback) {
524 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
528 freerbuf(bp);