NFE - Change default RX ring size from 128 -> 256, Adjust moderation timer.
[dragonfly.git] / sys / dev / disk / ccd / ccd.c
blob20b16ac26c38529e72cdd225174b8d6c957a0c1f
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $
37 * Copyright (c) 1995 Jason R. Thorpe.
38 * All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed for the NetBSD Project
51 * by Jason R. Thorpe.
52 * 4. The name of the author may not be used to endorse or promote products
53 * derived from this software without specific prior written permission.
55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
58 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
59 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
60 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
61 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
62 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
63 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * SUCH DAMAGE.
69 * Copyright (c) 1988 University of Utah.
70 * Copyright (c) 1990, 1993
71 * The Regents of the University of California. All rights reserved.
73 * This code is derived from software contributed to Berkeley by
74 * the Systems Programming Group of the University of Utah Computer
75 * Science Department.
77 * Redistribution and use in source and binary forms, with or without
78 * modification, are permitted provided that the following conditions
79 * are met:
80 * 1. Redistributions of source code must retain the above copyright
81 * notice, this list of conditions and the following disclaimer.
82 * 2. Redistributions in binary form must reproduce the above copyright
83 * notice, this list of conditions and the following disclaimer in the
84 * documentation and/or other materials provided with the distribution.
85 * 3. All advertising materials mentioning features or use of this software
86 * must display the following acknowledgement:
87 * This product includes software developed by the University of
88 * California, Berkeley and its contributors.
89 * 4. Neither the name of the University nor the names of its contributors
90 * may be used to endorse or promote products derived from this software
91 * without specific prior written permission.
93 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
94 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
95 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
96 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
97 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
98 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
99 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
101 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
102 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
103 * SUCH DAMAGE.
105 * from: Utah $Hdr: cd.c 1.6 90/11/28$
108 * @(#)cd.c 8.2 (Berkeley) 11/16/93
109 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $
110 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
111 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $
115 * "Concatenated" disk driver.
117 * Original dynamic configuration support by:
118 * Jason R. Thorpe <thorpej@nas.nasa.gov>
119 * Numerical Aerodynamic Simulation Facility
120 * Mail Stop 258-6
121 * NASA Ames Research Center
122 * Moffett Field, CA 94035
125 #include "use_ccd.h"
127 #include <sys/param.h>
128 #include <sys/systm.h>
129 #include <sys/kernel.h>
130 #include <sys/module.h>
131 #include <sys/proc.h>
132 #include <sys/buf.h>
133 #include <sys/malloc.h>
134 #include <sys/nlookup.h>
135 #include <sys/conf.h>
136 #include <sys/stat.h>
137 #include <sys/sysctl.h>
138 #include <sys/disk.h>
139 #include <sys/dtype.h>
140 #include <sys/diskslice.h>
141 #include <sys/devicestat.h>
142 #include <sys/fcntl.h>
143 #include <sys/vnode.h>
144 #include <sys/buf2.h>
145 #include <sys/ccdvar.h>
147 #include <vm/vm_zone.h>
149 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */
150 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */
152 #include <sys/thread2.h>
154 #if defined(CCDDEBUG) && !defined(DEBUG)
155 #define DEBUG
156 #endif
158 #ifdef DEBUG
159 #define CCDB_FOLLOW 0x01
160 #define CCDB_INIT 0x02
161 #define CCDB_IO 0x04
162 #define CCDB_LABEL 0x08
163 #define CCDB_VNODE 0x10
164 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
165 CCDB_VNODE;
166 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
167 #undef DEBUG
168 #endif
170 #define ccdunit(x) dkunit(x)
171 #define ccdpart(x) dkpart(x)
174 This is how mirroring works (only writes are special):
176 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
177 linked together by the cb_mirror field. "cb_pflags &
178 CCDPF_MIRROR_DONE" is set to 0 on both of them.
180 When a component returns to ccdiodone(), it checks if "cb_pflags &
181 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
182 flag and returns. If it is, it means its partner has already
183 returned, so it will go to the regular cleanup.
187 struct ccdbuf {
188 struct buf cb_buf; /* new I/O buf */
189 struct vnode *cb_vp; /* related vnode */
190 struct bio *cb_obio; /* ptr. to original I/O buf */
191 struct ccdbuf *cb_freenext; /* free list link */
192 int cb_unit; /* target unit */
193 int cb_comp; /* target component */
194 int cb_pflags; /* mirror/parity status flag */
195 struct ccdbuf *cb_mirror; /* mirror counterpart */
198 /* bits in cb_pflags */
199 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
201 static d_open_t ccdopen;
202 static d_close_t ccdclose;
203 static d_strategy_t ccdstrategy;
204 static d_ioctl_t ccdioctl;
205 static d_dump_t ccddump;
207 #define NCCDFREEHIWAT 16
209 #define CDEV_MAJOR 74
211 static struct dev_ops ccd_ops = {
212 { "ccd", CDEV_MAJOR, D_DISK },
213 .d_open = ccdopen,
214 .d_close = ccdclose,
215 .d_read = physread,
216 .d_write = physwrite,
217 .d_ioctl = ccdioctl,
218 .d_strategy = ccdstrategy,
219 .d_dump = ccddump
222 /* called during module initialization */
223 static void ccdattach (void);
224 static int ccddetach (void);
225 static int ccd_modevent (module_t, int, void *);
227 /* called by biodone() at interrupt time */
228 static void ccdiodone (struct bio *bio);
230 static void ccdstart (struct ccd_softc *, struct bio *);
231 static void ccdinterleave (struct ccd_softc *, int);
232 static void ccdintr (struct ccd_softc *, struct bio *);
233 static int ccdinit (struct ccddevice *, char **, struct ucred *);
234 static int ccdlookup (char *, struct vnode **);
235 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
236 struct bio *, off_t, caddr_t, long);
237 static int ccdlock (struct ccd_softc *);
238 static void ccdunlock (struct ccd_softc *);
240 #ifdef DEBUG
241 static void printiinfo (struct ccdiinfo *);
242 #endif
244 /* Non-private for the benefit of libkvm. */
245 struct ccd_softc *ccd_softc;
246 struct ccddevice *ccddevs;
247 struct ccdbuf *ccdfreebufs;
248 static int numccdfreebufs;
249 static int numccd = 0;
252 * getccdbuf() - Allocate and zero a ccd buffer.
254 * This routine is called at splbio().
257 static __inline
258 struct ccdbuf *
259 getccdbuf(void)
261 struct ccdbuf *cbp;
264 * Allocate from freelist or malloc as necessary
266 if ((cbp = ccdfreebufs) != NULL) {
267 ccdfreebufs = cbp->cb_freenext;
268 --numccdfreebufs;
269 reinitbufbio(&cbp->cb_buf);
270 } else {
271 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO);
272 initbufbio(&cbp->cb_buf);
276 * independant struct buf initialization
278 buf_dep_init(&cbp->cb_buf);
279 BUF_LOCKINIT(&cbp->cb_buf);
280 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
281 BUF_KERNPROC(&cbp->cb_buf);
282 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
284 return(cbp);
288 * putccdbuf() - Free a ccd buffer.
290 * This routine is called at splbio().
293 static __inline
294 void
295 putccdbuf(struct ccdbuf *cbp)
297 BUF_UNLOCK(&cbp->cb_buf);
298 BUF_LOCKFREE(&cbp->cb_buf);
300 if (numccdfreebufs < NCCDFREEHIWAT) {
301 cbp->cb_freenext = ccdfreebufs;
302 ccdfreebufs = cbp;
303 ++numccdfreebufs;
304 } else {
305 kfree((caddr_t)cbp, M_DEVBUF);
310 * Called by main() during pseudo-device attachment. All we need
311 * to do is allocate enough space for devices to be configured later, and
312 * add devsw entries.
314 static void
315 ccdattach(void)
317 struct disk_info info;
318 struct ccd_softc *cs;
319 int i;
320 int num = NCCD;
322 if (num > 1)
323 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
324 else
325 kprintf("ccd0: Concatenated disk driver\n");
327 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
328 M_WAITOK | M_ZERO);
329 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
330 M_WAITOK | M_ZERO);
331 numccd = num;
334 * With normal disk devices the open simply fails if the media
335 * is not present. With CCD we have to be able to open the
336 * raw disk to use the ioctl's to set it up, so create a dummy
337 * disk info structure so dscheck() doesn't blow up.
339 bzero(&info, sizeof(info));
340 info.d_media_blksize = DEV_BSIZE;
342 for (i = 0; i < numccd; ++i) {
343 cs = &ccd_softc[i];
344 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops);
345 cs->sc_dev->si_drv1 = cs;
346 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */
347 disk_setdiskinfo(&cs->sc_disk, &info);
351 static int
352 ccddetach(void)
354 struct ccd_softc *cs;
355 struct dev_ioctl_args ioctl_args;
356 int i;
357 int error = 0;
358 int eval;
360 bzero(&ioctl_args, sizeof(ioctl_args));
362 for (i = 0; i < numccd; ++i) {
363 cs = &ccd_softc[i];
364 if (cs->sc_dev == NULL)
365 continue;
366 ioctl_args.a_head.a_dev = cs->sc_dev;
367 ioctl_args.a_cmd = CCDIOCCLR;
368 ioctl_args.a_fflag = FWRITE;
369 eval = ccdioctl(&ioctl_args);
370 if (eval && eval != ENXIO) {
371 kprintf("ccd%d: In use, cannot detach\n", i);
372 error = EBUSY;
375 if (error == 0) {
376 for (i = 0; i < numccd; ++i) {
377 cs = &ccd_softc[i];
378 if (cs->sc_dev == NULL)
379 continue;
380 disk_destroy(&cs->sc_disk);
381 cs->sc_dev = NULL;
383 if (ccd_softc)
384 kfree(ccd_softc, M_DEVBUF);
385 if (ccddevs)
386 kfree(ccddevs, M_DEVBUF);
388 return (error);
391 static int
392 ccd_modevent(module_t mod, int type, void *data)
394 int error = 0;
396 switch (type) {
397 case MOD_LOAD:
398 ccdattach();
399 break;
401 case MOD_UNLOAD:
402 error = ccddetach();
403 break;
405 default: /* MOD_SHUTDOWN etc */
406 break;
408 return (error);
411 DEV_MODULE(ccd, ccd_modevent, NULL);
413 static int
414 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
416 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
417 struct ccdcinfo *ci = NULL; /* XXX */
418 int ix;
419 struct vnode *vp;
420 u_int64_t skip;
421 u_int64_t size;
422 u_int64_t minsize;
423 int maxsecsize;
424 struct partinfo dpart;
425 struct ccdgeom *ccg = &cs->sc_geom;
426 char tmppath[MAXPATHLEN];
427 int error = 0;
429 #ifdef DEBUG
430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
431 kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
432 #endif
434 cs->sc_size = 0;
435 cs->sc_ileave = ccd->ccd_interleave;
436 cs->sc_nccdisks = ccd->ccd_ndev;
438 /* Allocate space for the component info. */
439 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
440 M_DEVBUF, M_WAITOK);
441 cs->sc_maxiosize = MAXPHYS;
444 * Verify that each component piece exists and record
445 * relevant information about it.
447 maxsecsize = 0;
448 minsize = 0;
449 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
450 vp = ccd->ccd_vpp[ix];
451 ci = &cs->sc_cinfo[ix];
452 ci->ci_vp = vp;
455 * Copy in the pathname of the component.
457 bzero(tmppath, sizeof(tmppath)); /* sanity */
458 if ((error = copyinstr(cpaths[ix], tmppath,
459 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
460 #ifdef DEBUG
461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
462 kprintf("ccd%d: can't copy path, error = %d\n",
463 ccd->ccd_unit, error);
464 #endif
465 goto fail;
467 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
468 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
470 ci->ci_dev = vn_todev(vp);
471 if (ci->ci_dev->si_iosize_max &&
472 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) {
473 cs->sc_maxiosize = ci->ci_dev->si_iosize_max;
477 * Get partition information for the component.
479 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD,
480 cred, NULL);
481 if (error) {
482 #ifdef DEBUG
483 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
484 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
485 ccd->ccd_unit, ci->ci_path, error);
486 #endif
487 goto fail;
489 if (dpart.fstype != FS_CCD &&
490 !kuuid_is_ccd(&dpart.fstype_uuid)) {
491 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n",
492 ccd->ccd_unit, ci->ci_path);
493 error = EFTYPE;
494 goto fail;
496 if (maxsecsize < dpart.media_blksize)
497 maxsecsize = dpart.media_blksize;
500 * Skip a certain amount of storage at the beginning of
501 * the component to make sure we don't infringe on any
502 * reserved sectors. This is handled entirely by
503 * dpart.reserved_blocks but we also impose a minimum
504 * of 16 sectors for backwards compatibility.
506 skip = 16;
507 if (skip < dpart.reserved_blocks)
508 skip = dpart.reserved_blocks;
509 size = dpart.media_blocks - skip;
512 * Calculate the size, truncating to an interleave
513 * boundary if necessary.
515 if (cs->sc_ileave > 1)
516 size -= size % cs->sc_ileave;
518 if ((int64_t)size <= 0) {
519 #ifdef DEBUG
520 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
521 kprintf("ccd%d: %s: size == 0\n",
522 ccd->ccd_unit, ci->ci_path);
523 #endif
524 error = ENODEV;
525 goto fail;
529 * Calculate the smallest uniform component, used
530 * elsewhere.
532 if (minsize == 0 || minsize > size)
533 minsize = size;
534 ci->ci_skip = skip;
535 ci->ci_size = size;
536 cs->sc_size += size;
538 kprintf("ccd%d: max component iosize is %d total blocks %lld\n",
539 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size);
542 * Don't allow the interleave to be smaller than
543 * the biggest component sector.
545 if ((cs->sc_ileave > 0) &&
546 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) {
547 #ifdef DEBUG
548 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
549 kprintf("ccd%d: interleave must be at least %d\n",
550 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
551 #endif
552 error = EINVAL;
553 goto fail;
557 * If uniform interleave is desired set all sizes to that of
558 * the smallest component. This will guarentee that a single
559 * interleave table is generated.
561 * Lost space must be taken into account when calculating the
562 * overall size. Half the space is lost when CCDF_MIRROR is
563 * specified. One disk is lost when CCDF_PARITY is specified.
565 if (ccd->ccd_flags & CCDF_UNIFORM) {
566 for (ci = cs->sc_cinfo;
567 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
568 ci->ci_size = minsize;
570 if (ccd->ccd_flags & CCDF_MIRROR) {
572 * Check to see if an even number of components
573 * have been specified. The interleave must also
574 * be non-zero in order for us to be able to
575 * guarentee the topology.
577 if (cs->sc_nccdisks % 2) {
578 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
579 error = EINVAL;
580 goto fail;
582 if (cs->sc_ileave == 0) {
583 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
584 error = EINVAL;
585 goto fail;
587 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
588 } else if (ccd->ccd_flags & CCDF_PARITY) {
589 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
590 } else {
591 if (cs->sc_ileave == 0) {
592 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
593 error = EINVAL;
594 goto fail;
596 cs->sc_size = cs->sc_nccdisks * minsize;
601 * Construct the interleave table.
603 ccdinterleave(cs, ccd->ccd_unit);
606 * Create pseudo-geometry based on 1MB cylinders. It's
607 * pretty close.
609 ccg->ccg_secsize = maxsecsize;
610 ccg->ccg_ntracks = 1;
611 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
612 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
615 * Add an devstat entry for this device.
617 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
618 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
619 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
620 DEVSTAT_PRIORITY_ARRAY);
622 cs->sc_flags |= CCDF_INITED;
623 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
624 cs->sc_unit = ccd->ccd_unit;
625 return (0);
626 fail:
627 while (ci > cs->sc_cinfo) {
628 ci--;
629 kfree(ci->ci_path, M_DEVBUF);
631 kfree(cs->sc_cinfo, M_DEVBUF);
632 cs->sc_cinfo = NULL;
633 return (error);
636 static void
637 ccdinterleave(struct ccd_softc *cs, int unit)
639 struct ccdcinfo *ci, *smallci;
640 struct ccdiinfo *ii;
641 u_int64_t bn;
642 u_int64_t lbn;
643 u_int64_t size;
644 int icount;
645 int ix;
647 #ifdef DEBUG
648 if (ccddebug & CCDB_INIT)
649 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
650 #endif
653 * Allocate an interleave table. The worst case occurs when each
654 * of N disks is of a different size, resulting in N interleave
655 * tables.
657 * Chances are this is too big, but we don't care.
659 icount = cs->sc_nccdisks + 1;
660 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo),
661 M_DEVBUF, M_WAITOK|M_ZERO);
664 * Trivial case: no interleave (actually interleave of disk size).
665 * Each table entry represents a single component in its entirety.
667 * An interleave of 0 may not be used with a mirror or parity setup.
669 if (cs->sc_ileave == 0) {
670 bn = 0;
671 ii = cs->sc_itable;
673 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
674 /* Allocate space for ii_index. */
675 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
676 ii->ii_ndisk = 1;
677 ii->ii_startblk = bn;
678 ii->ii_startoff = 0;
679 ii->ii_index[0] = ix;
680 bn += cs->sc_cinfo[ix].ci_size;
681 ii++;
683 ii->ii_ndisk = 0;
684 #ifdef DEBUG
685 if (ccddebug & CCDB_INIT)
686 printiinfo(cs->sc_itable);
687 #endif
688 return;
692 * The following isn't fast or pretty; it doesn't have to be.
694 size = 0;
695 bn = lbn = 0;
696 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) {
698 * Allocate space for ii_index. We might allocate more then
699 * we use.
701 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
702 M_DEVBUF, M_WAITOK);
705 * Locate the smallest of the remaining components
707 smallci = NULL;
708 ci = cs->sc_cinfo;
709 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) {
710 if (ci->ci_size > size &&
711 (smallci == NULL ||
712 ci->ci_size < smallci->ci_size)) {
713 smallci = ci;
715 ++ci;
719 * Nobody left, all done
721 if (smallci == NULL) {
722 ii->ii_ndisk = 0;
723 break;
727 * Record starting logical block using an sc_ileave blocksize.
729 ii->ii_startblk = bn / cs->sc_ileave;
732 * Record starting component block using an sc_ileave
733 * blocksize. This value is relative to the beginning of
734 * a component disk.
736 ii->ii_startoff = lbn;
739 * Determine how many disks take part in this interleave
740 * and record their indices.
742 ix = 0;
743 for (ci = cs->sc_cinfo;
744 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
745 if (ci->ci_size >= smallci->ci_size) {
746 ii->ii_index[ix++] = ci - cs->sc_cinfo;
749 ii->ii_ndisk = ix;
752 * Adjust for loop
754 bn += ix * (smallci->ci_size - size);
755 lbn = smallci->ci_size / cs->sc_ileave;
756 size = smallci->ci_size;
758 if (ii == &cs->sc_itable[icount])
759 panic("ccdinterlave software bug! table exhausted");
760 #ifdef DEBUG
761 if (ccddebug & CCDB_INIT)
762 printiinfo(cs->sc_itable);
763 #endif
766 /* ARGSUSED */
767 static int
768 ccdopen(struct dev_open_args *ap)
770 cdev_t dev = ap->a_head.a_dev;
771 int unit = ccdunit(dev);
772 struct ccd_softc *cs;
773 int error = 0;
775 #ifdef DEBUG
776 if (ccddebug & CCDB_FOLLOW)
777 kprintf("ccdopen(%x, %x)\n", dev, flags);
778 #endif
779 if (unit >= numccd)
780 return (ENXIO);
781 cs = &ccd_softc[unit];
783 if ((error = ccdlock(cs)) == 0) {
784 ccdunlock(cs);
786 return (error);
789 /* ARGSUSED */
790 static int
791 ccdclose(struct dev_close_args *ap)
793 cdev_t dev = ap->a_head.a_dev;
794 int unit = ccdunit(dev);
795 struct ccd_softc *cs;
796 int error = 0;
798 #ifdef DEBUG
799 if (ccddebug & CCDB_FOLLOW)
800 kprintf("ccdclose(%x, %x)\n", dev, flags);
801 #endif
803 if (unit >= numccd)
804 return (ENXIO);
805 cs = &ccd_softc[unit];
806 if ((error = ccdlock(cs)) == 0) {
807 ccdunlock(cs);
809 return (error);
812 static int
813 ccdstrategy(struct dev_strategy_args *ap)
815 cdev_t dev = ap->a_head.a_dev;
816 struct bio *bio = ap->a_bio;
817 int unit = ccdunit(dev);
818 struct bio *nbio;
819 struct buf *bp = bio->bio_buf;
820 struct ccd_softc *cs = &ccd_softc[unit];
821 u_int64_t pbn; /* in sc_secsize chunks */
822 u_int32_t sz; /* in sc_secsize chunks */
824 #ifdef DEBUG
825 if (ccddebug & CCDB_FOLLOW)
826 kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
827 #endif
828 if ((cs->sc_flags & CCDF_INITED) == 0) {
829 bp->b_error = ENXIO;
830 goto error;
833 /* If it's a nil transfer, wake up the top half now. */
834 if (bp->b_bcount == 0) {
835 bp->b_resid = 0;
836 goto done;
840 * Do bounds checking and adjust transfer. If there's an
841 * error, the bounds check will flag that for us.
844 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize;
845 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
848 * If out of bounds return an error. If the request goes
849 * past EOF, clip the request as appropriate. If exactly
850 * at EOF, return success (don't clip), but with 0 bytes
851 * of I/O.
853 * Mark EOF B_INVAL (just like bad), indicating that the
854 * contents of the buffer, if any, is invalid.
856 if ((int64_t)pbn < 0)
857 goto bad;
858 if (pbn + sz > cs->sc_size) {
859 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
860 goto bad;
861 if (pbn == cs->sc_size) {
862 bp->b_resid = bp->b_bcount;
863 bp->b_flags |= B_INVAL;
864 goto done;
866 sz = (long)(cs->sc_size - pbn);
867 bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
869 nbio = bio;
871 bp->b_resid = bp->b_bcount;
872 nbio->bio_driver_info = dev;
875 * "Start" the unit.
877 crit_enter();
878 ccdstart(cs, nbio);
879 crit_exit();
880 return(0);
883 * note: bio, not nbio, is valid at the done label.
885 bad:
886 bp->b_error = EINVAL;
887 error:
888 bp->b_resid = bp->b_bcount;
889 bp->b_flags |= B_ERROR | B_INVAL;
890 done:
891 biodone(bio);
892 return(0);
895 static void
896 ccdstart(struct ccd_softc *cs, struct bio *bio)
898 long bcount, rcount;
899 struct ccdbuf *cbp[4];
900 struct buf *bp = bio->bio_buf;
901 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
902 caddr_t addr;
903 off_t doffset;
905 #ifdef DEBUG
906 if (ccddebug & CCDB_FOLLOW)
907 kprintf("ccdstart(%x, %x)\n", cs, bp);
908 #endif
910 /* Record the transaction start */
911 devstat_start_transaction(&cs->device_stats);
914 * Allocate component buffers and fire off the requests
916 doffset = bio->bio_offset;
917 addr = bp->b_data;
919 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
920 ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
921 rcount = cbp[0]->cb_buf.b_bcount;
923 if (cs->sc_cflags & CCDF_MIRROR) {
925 * Mirroring. Writes go to both disks, reads are
926 * taken from whichever disk seems most appropriate.
928 * We attempt to localize reads to the disk whos arm
929 * is nearest the read request. We ignore seeks due
930 * to writes when making this determination and we
931 * also try to avoid hogging.
933 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
934 vn_strategy(cbp[0]->cb_vp,
935 &cbp[0]->cb_buf.b_bio1);
936 vn_strategy(cbp[1]->cb_vp,
937 &cbp[1]->cb_buf.b_bio1);
938 } else {
939 int pick = cs->sc_pick;
940 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize;
941 if (doffset < cs->sc_blk[pick] - range ||
942 doffset > cs->sc_blk[pick] + range
944 cs->sc_pick = pick = 1 - pick;
946 cs->sc_blk[pick] = doffset + rcount;
947 vn_strategy(cbp[pick]->cb_vp,
948 &cbp[pick]->cb_buf.b_bio1);
950 } else {
952 * Not mirroring
954 vn_strategy(cbp[0]->cb_vp,
955 &cbp[0]->cb_buf.b_bio1);
957 doffset += rcount;
958 addr += rcount;
963 * Build a component buffer header.
965 static void
966 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
967 off_t doffset, caddr_t addr, long bcount)
969 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
970 struct ccdbuf *cbp;
971 u_int64_t bn;
972 u_int64_t cbn;
973 u_int64_t cboff;
974 off_t cbc;
976 #ifdef DEBUG
977 if (ccddebug & CCDB_IO)
978 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
979 cs, bp, bn, addr, bcount);
980 #endif
982 * Determine which component bn falls in.
984 bn = doffset / cs->sc_geom.ccg_secsize;
985 cbn = bn;
986 cboff = 0;
988 if (cs->sc_ileave == 0) {
990 * Serially concatenated and neither a mirror nor a parity
991 * config. This is a special case.
993 daddr_t sblk;
995 sblk = 0;
996 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
997 sblk += ci->ci_size;
998 cbn -= sblk;
999 } else {
1000 struct ccdiinfo *ii;
1001 int ccdisk, off;
1004 * Calculate cbn, the logical superblock (sc_ileave chunks),
1005 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
1006 * to cbn.
1008 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
1009 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
1012 * Figure out which interleave table to use.
1014 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
1015 if (ii->ii_startblk > cbn)
1016 break;
1018 ii--;
1021 * off is the logical superblock relative to the beginning
1022 * of this interleave block.
1024 off = cbn - ii->ii_startblk;
1027 * We must calculate which disk component to use (ccdisk),
1028 * and recalculate cbn to be the superblock relative to
1029 * the beginning of the component. This is typically done by
1030 * adding 'off' and ii->ii_startoff together. However, 'off'
1031 * must typically be divided by the number of components in
1032 * this interleave array to be properly convert it from a
1033 * CCD-relative logical superblock number to a
1034 * component-relative superblock number.
1036 if (ii->ii_ndisk == 1) {
1038 * When we have just one disk, it can't be a mirror
1039 * or a parity config.
1041 ccdisk = ii->ii_index[0];
1042 cbn = ii->ii_startoff + off;
1043 } else {
1044 if (cs->sc_cflags & CCDF_MIRROR) {
1046 * We have forced a uniform mapping, resulting
1047 * in a single interleave array. We double
1048 * up on the first half of the available
1049 * components and our mirror is in the second
1050 * half. This only works with a single
1051 * interleave array because doubling up
1052 * doubles the number of sectors, so there
1053 * cannot be another interleave array because
1054 * the next interleave array's calculations
1055 * would be off.
1057 int ndisk2 = ii->ii_ndisk / 2;
1058 ccdisk = ii->ii_index[off % ndisk2];
1059 cbn = ii->ii_startoff + off / ndisk2;
1060 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1061 } else if (cs->sc_cflags & CCDF_PARITY) {
1063 * XXX not implemented yet
1065 int ndisk2 = ii->ii_ndisk - 1;
1066 ccdisk = ii->ii_index[off % ndisk2];
1067 cbn = ii->ii_startoff + off / ndisk2;
1068 if (cbn % ii->ii_ndisk <= ccdisk)
1069 ccdisk++;
1070 } else {
1071 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1072 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1076 ci = &cs->sc_cinfo[ccdisk];
1079 * Convert cbn from a superblock to a normal block so it
1080 * can be used to calculate (along with cboff) the normal
1081 * block index into this particular disk.
1083 cbn *= cs->sc_ileave;
1087 * Fill in the component buf structure.
1089 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1090 * will be truncated on device EOF so we use b_bufsize to detect
1091 * the case.
1093 cbp = getccdbuf();
1094 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1095 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1096 cbp->cb_buf.b_data = addr;
1097 cbp->cb_vp = ci->ci_vp;
1098 if (cs->sc_ileave == 0)
1099 cbc = dbtob((off_t)(ci->ci_size - cbn));
1100 else
1101 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1102 if (cbc > cs->sc_maxiosize)
1103 cbc = cs->sc_maxiosize;
1104 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1105 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1107 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1108 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1109 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip);
1112 * context for ccdiodone
1114 cbp->cb_obio = bio;
1115 cbp->cb_unit = cs - ccd_softc;
1116 cbp->cb_comp = ci - cs->sc_cinfo;
1118 #ifdef DEBUG
1119 if (ccddebug & CCDB_IO)
1120 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
1121 ci->ci_dev, ci-cs->sc_cinfo, cbp,
1122 cbp->cb_buf.b_bio1.bio_offset,
1123 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1124 #endif
1125 cb[0] = cbp;
1128 * Note: both I/O's setup when reading from mirror, but only one
1129 * will be executed.
1131 if (cs->sc_cflags & CCDF_MIRROR) {
1132 /* mirror, setup second I/O */
1133 cbp = getccdbuf();
1135 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1136 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1137 cbp->cb_buf.b_data = addr;
1138 cbp->cb_vp = ci2->ci_vp;
1139 if (cs->sc_ileave == 0)
1140 cbc = dbtob((off_t)(ci->ci_size - cbn));
1141 else
1142 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1143 if (cbc > cs->sc_maxiosize)
1144 cbc = cs->sc_maxiosize;
1145 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1146 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1148 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1149 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1150 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip);
1153 * context for ccdiodone
1155 cbp->cb_obio = bio;
1156 cbp->cb_unit = cs - ccd_softc;
1157 cbp->cb_comp = ci2 - cs->sc_cinfo;
1158 cb[1] = cbp;
1159 /* link together the ccdbuf's and clear "mirror done" flag */
1160 cb[0]->cb_mirror = cb[1];
1161 cb[1]->cb_mirror = cb[0];
1162 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1163 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1167 static void
1168 ccdintr(struct ccd_softc *cs, struct bio *bio)
1170 struct buf *bp = bio->bio_buf;
1172 #ifdef DEBUG
1173 if (ccddebug & CCDB_FOLLOW)
1174 kprintf("ccdintr(%x, %x)\n", cs, bp);
1175 #endif
1177 * Request is done for better or worse, wakeup the top half.
1179 if (bp->b_flags & B_ERROR)
1180 bp->b_resid = bp->b_bcount;
1181 devstat_end_transaction_buf(&cs->device_stats, bp);
1182 biodone(bio);
1186 * Called at interrupt time.
1187 * Mark the component as done and if all components are done,
1188 * take a ccd interrupt.
1190 static void
1191 ccdiodone(struct bio *bio)
1193 struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1194 struct bio *obio = cbp->cb_obio;
1195 struct buf *obp = obio->bio_buf;
1196 int unit = cbp->cb_unit;
1197 int count;
1200 * Since we do not have exclusive access to underlying devices,
1201 * we can't keep cache translations around.
1203 clearbiocache(bio->bio_next);
1205 crit_enter();
1206 #ifdef DEBUG
1207 if (ccddebug & CCDB_FOLLOW)
1208 kprintf("ccdiodone(%x)\n", cbp);
1209 if (ccddebug & CCDB_IO) {
1210 kprintf("ccdiodone: bp %x bcount %d resid %d\n",
1211 obp, obp->b_bcount, obp->b_resid);
1212 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
1213 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1214 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
1215 cbp->cb_buf.b_bcount);
1217 #endif
1220 * If an error occured, report it. If this is a mirrored
1221 * configuration and the first of two possible reads, do not
1222 * set the error in the bp yet because the second read may
1223 * succeed.
1225 if (cbp->cb_buf.b_flags & B_ERROR) {
1226 const char *msg = "";
1228 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1229 (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
1230 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1232 * We will try our read on the other disk down
1233 * below, also reverse the default pick so if we
1234 * are doing a scan we do not keep hitting the
1235 * bad disk first.
1237 struct ccd_softc *cs = &ccd_softc[unit];
1239 msg = ", trying other disk";
1240 cs->sc_pick = 1 - cs->sc_pick;
1241 cs->sc_blk[cs->sc_pick] = obio->bio_offset;
1242 } else {
1243 obp->b_flags |= B_ERROR;
1244 obp->b_error = cbp->cb_buf.b_error ?
1245 cbp->cb_buf.b_error : EIO;
1247 kprintf("ccd%d: error %d on component %d "
1248 "offset %jd (ccd offset %jd)%s\n",
1249 unit, obp->b_error, cbp->cb_comp,
1250 (intmax_t)cbp->cb_buf.b_bio2.bio_offset,
1251 (intmax_t)obio->bio_offset,
1252 msg);
1256 * Process mirror. If we are writing, I/O has been initiated on both
1257 * buffers and we fall through only after both are finished.
1259 * If we are reading only one I/O is initiated at a time. If an
1260 * error occurs we initiate the second I/O and return, otherwise
1261 * we free the second I/O without initiating it.
1264 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1265 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
1267 * When writing, handshake with the second buffer
1268 * to determine when both are done. If both are not
1269 * done, return here.
1271 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1272 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1273 putccdbuf(cbp);
1274 crit_exit();
1275 return;
1277 } else {
1279 * When reading, either dispose of the second buffer
1280 * or initiate I/O on the second buffer if an error
1281 * occured with this one.
1283 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1284 if (cbp->cb_buf.b_flags & B_ERROR) {
1285 cbp->cb_mirror->cb_pflags |=
1286 CCDPF_MIRROR_DONE;
1287 vn_strategy(
1288 cbp->cb_mirror->cb_vp,
1289 &cbp->cb_mirror->cb_buf.b_bio1
1291 putccdbuf(cbp);
1292 crit_exit();
1293 return;
1294 } else {
1295 putccdbuf(cbp->cb_mirror);
1296 /* fall through */
1303 * Use our saved b_bufsize to determine if an unexpected EOF occured.
1305 count = cbp->cb_buf.b_bufsize;
1306 putccdbuf(cbp);
1309 * If all done, "interrupt".
1311 obp->b_resid -= count;
1312 if (obp->b_resid < 0)
1313 panic("ccdiodone: count");
1314 if (obp->b_resid == 0)
1315 ccdintr(&ccd_softc[unit], obio);
1316 crit_exit();
1319 static int
1320 ccdioctl(struct dev_ioctl_args *ap)
1322 cdev_t dev = ap->a_head.a_dev;
1323 int unit = ccdunit(dev);
1324 int i, j, lookedup = 0, error = 0;
1325 struct ccd_softc *cs;
1326 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
1327 struct ccddevice ccd;
1328 struct disk_info info;
1329 char **cpp;
1330 struct vnode **vpp;
1332 if (unit >= numccd)
1333 return (ENXIO);
1334 cs = &ccd_softc[unit];
1336 bzero(&ccd, sizeof(ccd));
1338 switch (ap->a_cmd) {
1339 case CCDIOCSET:
1340 if (cs->sc_flags & CCDF_INITED)
1341 return (EBUSY);
1343 if ((ap->a_fflag & FWRITE) == 0)
1344 return (EBADF);
1346 if ((error = ccdlock(cs)) != 0)
1347 return (error);
1349 if (ccio->ccio_ndisks > CCD_MAXNDISKS) {
1350 ccdunlock(cs);
1351 return (EINVAL);
1354 /* Fill in some important bits. */
1355 ccd.ccd_unit = unit;
1356 ccd.ccd_interleave = ccio->ccio_ileave;
1357 if (ccd.ccd_interleave == 0 &&
1358 ((ccio->ccio_flags & CCDF_MIRROR) ||
1359 (ccio->ccio_flags & CCDF_PARITY))) {
1360 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1361 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1363 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1364 (ccio->ccio_flags & CCDF_PARITY)) {
1365 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1366 ccio->ccio_flags &= ~CCDF_PARITY;
1368 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1369 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1370 kprintf("ccd%d: mirror/parity forces uniform flag\n",
1371 unit);
1372 ccio->ccio_flags |= CCDF_UNIFORM;
1374 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1377 * Allocate space for and copy in the array of
1378 * componet pathnames and device numbers.
1380 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
1381 M_DEVBUF, M_WAITOK);
1382 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1383 M_DEVBUF, M_WAITOK);
1385 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1386 ccio->ccio_ndisks * sizeof(char **));
1387 if (error) {
1388 kfree(vpp, M_DEVBUF);
1389 kfree(cpp, M_DEVBUF);
1390 ccdunlock(cs);
1391 return (error);
1394 #ifdef DEBUG
1395 if (ccddebug & CCDB_INIT) {
1396 for (i = 0; i < ccio->ccio_ndisks; ++i)
1397 kprintf("ccdioctl: component %d: 0x%x\n",
1398 i, cpp[i]);
1400 #endif
1402 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1403 #ifdef DEBUG
1404 if (ccddebug & CCDB_INIT)
1405 kprintf("ccdioctl: lookedup = %d\n", lookedup);
1406 #endif
1407 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
1408 for (j = 0; j < lookedup; ++j)
1409 (void)vn_close(vpp[j], FREAD|FWRITE);
1410 kfree(vpp, M_DEVBUF);
1411 kfree(cpp, M_DEVBUF);
1412 ccdunlock(cs);
1413 return (error);
1415 ++lookedup;
1417 ccd.ccd_cpp = cpp;
1418 ccd.ccd_vpp = vpp;
1419 ccd.ccd_ndev = ccio->ccio_ndisks;
1422 * Initialize the ccd. Fills in the softc for us.
1424 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
1425 for (j = 0; j < lookedup; ++j)
1426 (void)vn_close(vpp[j], FREAD|FWRITE);
1427 kfree(vpp, M_DEVBUF);
1428 kfree(cpp, M_DEVBUF);
1429 ccdunlock(cs);
1430 return (error);
1434 * The ccd has been successfully initialized, so
1435 * we can place it into the array and read the disklabel.
1437 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1438 ccio->ccio_unit = unit;
1439 ccio->ccio_size = cs->sc_size;
1441 bzero(&info, sizeof(info));
1442 info.d_media_blksize = cs->sc_geom.ccg_secsize;
1443 info.d_media_blocks = cs->sc_size;
1444 info.d_nheads = cs->sc_geom.ccg_ntracks;
1445 info.d_secpertrack = cs->sc_geom.ccg_nsectors;
1446 info.d_ncylinders = cs->sc_geom.ccg_ncylinders;
1447 info.d_secpercyl = info.d_nheads * info.d_secpertrack;
1450 * For cases where a label is directly applied to the ccd,
1451 * without slices, DSO_COMPATMBR forces one sector be
1452 * reserved for backwards compatibility.
1454 info.d_dsflags = DSO_COMPATMBR;
1455 disk_setdiskinfo(&cs->sc_disk, &info);
1457 ccdunlock(cs);
1459 break;
1461 case CCDIOCCLR:
1462 if ((cs->sc_flags & CCDF_INITED) == 0)
1463 return (ENXIO);
1465 if ((ap->a_fflag & FWRITE) == 0)
1466 return (EBADF);
1468 if ((error = ccdlock(cs)) != 0)
1469 return (error);
1471 if (dev_drefs(cs->sc_dev) > 1) {
1472 ccdunlock(cs);
1473 return (EBUSY);
1477 * Free ccd_softc information and clear entry.
1480 /* Close the components and free their pathnames. */
1481 for (i = 0; i < cs->sc_nccdisks; ++i) {
1483 * XXX: this close could potentially fail and
1484 * cause Bad Things. Maybe we need to force
1485 * the close to happen?
1487 #ifdef DEBUG
1488 if (ccddebug & CCDB_VNODE)
1489 vprint("CCDIOCCLR: vnode info",
1490 cs->sc_cinfo[i].ci_vp);
1491 #endif
1492 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE);
1493 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1496 /* Free interleave index. */
1497 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1498 kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
1500 /* Free component info and interleave table. */
1501 kfree(cs->sc_cinfo, M_DEVBUF);
1502 kfree(cs->sc_itable, M_DEVBUF);
1503 cs->sc_cinfo = NULL;
1504 cs->sc_itable = NULL;
1505 cs->sc_flags &= ~CCDF_INITED;
1508 * Free ccddevice information and clear entry.
1510 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1511 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
1512 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1515 * And remove the devstat entry.
1517 devstat_remove_entry(&cs->device_stats);
1519 /* This must be atomic. */
1520 crit_enter();
1521 ccdunlock(cs);
1522 crit_exit();
1524 break;
1526 default:
1527 return (ENOTTY);
1530 return (0);
1533 static int
1534 ccddump(struct dev_dump_args *ap)
1536 /* Not implemented. */
1537 return ENXIO;
1541 * Lookup the provided name in the filesystem. If the file exists,
1542 * is a valid block device, and isn't being used by anyone else,
1543 * set *vpp to the file's vnode.
1545 static int
1546 ccdlookup(char *path, struct vnode **vpp)
1548 struct nlookupdata nd;
1549 struct vnode *vp;
1550 int error;
1552 *vpp = NULL;
1554 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1555 if (error)
1556 return (error);
1557 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1558 #ifdef DEBUG
1559 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1560 kprintf("ccdlookup: vn_open error = %d\n", error);
1561 #endif
1562 goto done;
1564 vp = nd.nl_open_vp;
1566 if (vp->v_opencount > 1) {
1567 error = EBUSY;
1568 goto done;
1571 if (!vn_isdisk(vp, &error))
1572 goto done;
1574 #ifdef DEBUG
1575 if (ccddebug & CCDB_VNODE)
1576 vprint("ccdlookup: vnode info", vp);
1577 #endif
1579 vn_unlock(vp);
1580 nd.nl_open_vp = NULL;
1581 nlookup_done(&nd);
1582 *vpp = vp; /* leave ref intact */
1583 return (0);
1584 done:
1585 nlookup_done(&nd);
1586 return (error);
1590 * Wait interruptibly for an exclusive lock.
1592 * XXX
1593 * Several drivers do this; it should be abstracted and made MP-safe.
1595 static int
1596 ccdlock(struct ccd_softc *cs)
1598 int error;
1600 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1601 cs->sc_flags |= CCDF_WANTED;
1602 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1603 return (error);
1605 cs->sc_flags |= CCDF_LOCKED;
1606 return (0);
1610 * Unlock and wake up any waiters.
1612 static void
1613 ccdunlock(struct ccd_softc *cs)
1616 cs->sc_flags &= ~CCDF_LOCKED;
1617 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1618 cs->sc_flags &= ~CCDF_WANTED;
1619 wakeup(cs);
1623 #ifdef DEBUG
1624 static void
1625 printiinfo(struct ccdiinfo *ii)
1627 int ix, i;
1629 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1630 kprintf(" itab[%d]: #dk %d sblk %d soff %d",
1631 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1632 for (i = 0; i < ii->ii_ndisk; i++)
1633 kprintf(" %d", ii->ii_index[i]);
1634 kprintf("\n");
1637 #endif
1640 /* Local Variables: */
1641 /* c-argdecl-indent: 8 */
1642 /* c-continued-statement-offset: 8 */
1643 /* c-indent-level: 8 */
1644 /* End: */