4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Soft partitioning metadevice driver (md_sp), administrative routines.
30 * This file contains the administrative routines for the soft partitioning
31 * metadevice driver. All administration is done through the use of ioctl's.
33 * The primary ioctl's supported by soft partitions are as follows:
35 * MD_IOCSET - set up a new soft partition.
36 * MD_IOCGET - get the unit structure of a soft partition.
37 * MD_IOCRESET - delete a soft partition.
38 * MD_IOCGROW - add space to a soft partition.
39 * MD_IOCGETDEVS - get the device the soft partition is built on.
40 * MD_IOC_SPSTATUS - set the status (un_status field in the soft
41 * partition unit structure) for one or more soft
44 * Note that, as with other metadevices, the majority of the work for
45 * building/growing/deleting soft partitions is performed in userland
46 * (specifically in libmeta, see meta_sp.c). The driver's main administrative
47 * function is to maintain the in-core & metadb entries associated with a soft
50 * In addition, a few other ioctl's are supported via helper routines in
51 * the md driver. These are:
53 * DKIOCINFO - get "disk" information.
54 * DKIOCGEOM - get geometry information.
55 * DKIOCGVTOC - get vtoc information.
57 #include <sys/param.h>
58 #include <sys/systm.h>
63 #include <sys/t_lock.h>
69 #include <sys/sysmacros.h>
70 #include <sys/types.h>
71 #include <sys/mkdev.h>
74 #include <sys/lvm/mdvar.h>
75 #include <sys/lvm/md_sp.h>
76 #include <sys/lvm/md_notify.h>
77 #include <sys/modctl.h>
79 #include <sys/sunddi.h>
80 #include <sys/debug.h>
81 #include <sys/model.h>
83 #include <sys/sysevent/eventdefs.h>
84 #include <sys/sysevent/svm.h>
88 extern unit_t md_nunits
;
89 extern set_t md_nsets
;
90 extern md_set_t md_set
[];
92 extern md_ops_t sp_md_ops
;
93 extern md_krwlock_t md_unit_array_rw
;
94 extern major_t md_major
;
97 * FUNCTION: sp_getun()
98 * INPUT: mnum - minor number of soft partition to get.
99 * OUTPUT: mde - return error pointer.
100 * RETURNS: mp_unit_t * - ptr to unit structure requested
102 * PURPOSE: Returns a reference to the soft partition unit structure
103 * indicated by the passed-in minor number.
106 sp_getun(minor_t mnum
, md_error_t
*mde
)
110 set_t setno
= MD_MIN2SET(mnum
);
113 if ((setno
>= md_nsets
) || (MD_MIN2UNIT(mnum
) >= md_nunits
)) {
114 (void) mdmderror(mde
, MDE_INVAL_UNIT
, mnum
);
118 if (md_get_setstatus(setno
) & MD_SET_STALE
) {
119 (void) mdmddberror(mde
, MDE_DB_STALE
, mnum
, setno
);
126 (void) mdmderror(mde
, MDE_UNIT_NOT_SETUP
, mnum
);
130 un
= (mp_unit_t
*)MD_UNIT(mnum
);
132 if (un
->c
.un_type
!= MD_METASP
) {
133 (void) mdmderror(mde
, MDE_NOT_SP
, mnum
);
142 * FUNCTION: sp_setstatus()
143 * INPUT: d - data ptr passed in from ioctl.
144 * mode - pass-through to ddi_copyin.
147 * RETURNS: 0 - success.
149 * PURPOSE: Set the status of one or more soft partitions atomically.
150 * this implements the MD_IOC_SPSTATUS ioctl. Soft partitions
151 * are passed in as an array of minor numbers. The un_status
152 * field in the unit structure of each soft partition is set to
153 * the status passed in and all unit structures are recommitted
154 * to the metadb at once.
157 sp_setstatus(void *d
, int mode
, IOLOCK
*lockp
)
161 mddb_recid_t
*recids
;
167 md_sp_statusset_t
*msp
= (md_sp_statusset_t
*)d
;
169 nunits
= msp
->num_units
;
171 status
= msp
->new_status
;
175 /* allocate minor number and recids arrays */
176 minors
= kmem_alloc(sz
, KM_SLEEP
);
177 recids
= kmem_alloc((nunits
+ 1) * sizeof (mddb_recid_t
), KM_SLEEP
);
179 /* copyin minor number array */
180 if (err
= ddi_copyin((void *)(uintptr_t)msp
->minors
, minors
, sz
, mode
))
183 /* check to make sure all units are valid first */
184 for (i
= 0; i
< nunits
; i
++) {
185 if ((un
= sp_getun(minors
[i
], mdep
)) == NULL
) {
186 err
= mdmderror(mdep
, MDE_INVAL_UNIT
, minors
[i
]);
191 /* update state for all units */
192 for (i
= 0; i
< nunits
; i
++) {
193 un
= sp_getun(minors
[i
], mdep
);
194 (void) md_ioctl_writerlock(lockp
, MDI_UNIT(minors
[i
]));
195 un
->un_status
= status
;
196 recids
[i
] = un
->c
.un_record_id
;
197 md_ioctl_writerexit(lockp
);
201 mddb_commitrecs_wrapper(recids
);
204 kmem_free(minors
, sz
);
205 kmem_free(recids
, ((nunits
+ 1) * sizeof (mddb_recid_t
)));
211 * FUNCTION: sp_update_watermarks()
212 * INPUT: d - data ptr passed in from ioctl.
213 * mode - pass-through to ddi_copyin.
215 * RETURNS: 0 - success.
217 * PURPOSE: This implements the MD_IOC_SPUPDATEWM ioctl.
218 * Watermarks are passed in an array.
221 sp_update_watermarks(void *d
, int mode
)
230 mp_watermark_t
*watermarks
;
231 sp_ext_offset_t
*offsets
;
235 md_sp_update_wm_t
*mup
= (md_sp_update_wm_t
*)d
;
239 setno
= MD_MIN2SET(mnum
);
240 side
= mddb_getsidenum(setno
);
250 /* Validate the set */
251 if ((setno
>= md_nsets
) || (MD_MIN2UNIT(mnum
) >= md_nunits
))
252 return (mdmderror(mdep
, MDE_INVAL_UNIT
, mnum
));
253 if (md_get_setstatus(setno
) & MD_SET_STALE
)
254 return (mdmddberror(mdep
, MDE_DB_STALE
, mnum
, setno
));
256 wsz
= mup
->count
* sizeof (mp_watermark_t
);
257 watermarks
= kmem_alloc(wsz
, KM_SLEEP
);
259 osz
= mup
->count
* sizeof (sp_ext_offset_t
);
260 offsets
= kmem_alloc(osz
, KM_SLEEP
);
263 * Once we're here, we are no longer stateless: we cannot
264 * return without first freeing the watermarks and offset
265 * arrays we just allocated. So use the "out" label instead
269 /* Retrieve the watermark and offset arrays from user land */
271 if (ddi_copyin((void *)(uintptr_t)mup
->wmp
, watermarks
, wsz
, mode
)) {
276 if (ddi_copyin((void *)(uintptr_t)mup
->osp
, offsets
, osz
, mode
)) {
282 * NOTE: For multi-node sets we only commit the watermarks if we are
283 * the master node. This avoids an ioctl-within-ioctl deadlock if the
284 * underlying device is a mirror.
286 if (MD_MNSET_SETNO(setno
) && !md_set
[setno
].s_am_i_master
) {
291 if ((md_getmajor(device
) != md_major
) &&
292 (md_devid_found(setno
, side
, un
->un_key
) == 1)) {
293 device
= md_resolve_bydevid(mnum
, device
, un
->un_key
);
296 * Flag the fact that we're coming from an ioctl handler to the
297 * underlying device so that it can take appropriate action if needed.
298 * This is necessary for multi-owner mirrors as they may need to
299 * update the metadevice state as a result of the layered open.
301 if (md_layered_open(mnum
, &device
, MD_OFLG_FROMIOCTL
)) {
302 err
= mdcomperror(mdep
, MDE_SP_COMP_OPEN_ERR
,
307 bp
= kmem_alloc(biosize(), KM_SLEEP
);
310 for (i
= 0; i
< mup
->count
; i
++) {
313 * Even the "constant" fields should be initialized
314 * here, since bioreset() below will clear them.
316 bp
->b_flags
= B_WRITE
;
317 bp
->b_bcount
= sizeof (mp_watermark_t
);
318 bp
->b_bufsize
= sizeof (mp_watermark_t
);
319 bp
->b_un
.b_addr
= (caddr_t
)&watermarks
[i
];
320 bp
->b_lblkno
= offsets
[i
];
321 bp
->b_edev
= md_dev64_to_dev(device
);
325 * Use a special flag MD_STR_WMUPDATE, for the following case:
326 * If the watermarks reside on a mirror disk and a switch
327 * of ownership is triggered by this IO,
328 * the message that is generated by that request must be
329 * processed even if the commd subsystem is currently suspended.
331 * For non-MN sets or non-mirror metadevices,
332 * this flag has no meaning and is not checked.
335 md_call_strategy(bp
, MD_NOBLOCK
| MD_STR_WMUPDATE
, NULL
);
338 err
= mdmderror(mdep
,
339 MDE_SP_BADWMWRITE
, mnum
);
343 /* Get the buf_t ready for the next iteration */
348 kmem_free(bp
, biosize());
350 md_layered_close(device
, MD_OFLG_NULL
);
353 kmem_free(watermarks
, wsz
);
354 kmem_free(offsets
, osz
);
361 * FUNCTION: sp_read_watermark()
362 * INPUT: d - data ptr passed in from ioctl.
363 * mode - pass-through to ddi_copyin.
365 * RETURNS: 0 - success.
367 * PURPOSE: This implements the MD_IOC_SPREADWM ioctl.
370 sp_read_watermark(void *d
, int mode
)
373 mp_watermark_t watermark
;
376 md_sp_read_wm_t
*mrp
= (md_sp_read_wm_t
*)d
;
385 * Flag the fact that we are being called from ioctl context so that
386 * the underlying device can take any necessary extra steps to handle
389 if (md_layered_open((minor_t
)-1, &device
, MD_OFLG_FROMIOCTL
)) {
390 return (mdcomperror(mdep
, MDE_SP_COMP_OPEN_ERR
,
391 (minor_t
)NODEV
, device
));
394 bp
= kmem_alloc(biosize(), KM_SLEEP
);
397 bp
->b_flags
= B_READ
;
398 bp
->b_bcount
= sizeof (mp_watermark_t
);
399 bp
->b_bufsize
= sizeof (mp_watermark_t
);
400 bp
->b_un
.b_addr
= (caddr_t
)&watermark
;
401 bp
->b_lblkno
= mrp
->offset
;
402 bp
->b_edev
= md_dev64_to_dev(device
);
404 md_call_strategy(bp
, MD_NOBLOCK
, NULL
);
408 * Taking advantage of the knowledge that mdmderror()
409 * returns 0, so we don't really need to keep track of
410 * an error code other than in the error struct.
412 (void) mdmderror(mdep
, MDE_SP_BADWMREAD
,
417 kmem_free(bp
, biosize());
419 md_layered_close(device
, MD_OFLG_NULL
);
421 if (ddi_copyout(&watermark
, (void *)(uintptr_t)mrp
->wmp
,
422 sizeof (mp_watermark_t
), mode
)) {
432 * INPUT: d - data ptr passed in from ioctl.
433 * mode - pass-through to ddi_copyin.
435 * RETURNS: 0 - success.
437 * PURPOSE: Create a soft partition. The unit structure representing
438 * the soft partiton is passed down from userland. We allocate
439 * a metadb entry, copyin the unit the structure, handle any
440 * metadevice parenting issues, then commit the record to the
441 * metadb. Once the record is in the metadb, we must also
442 * build the associated in-core structures. This is done via
443 * sp_build_incore() (see sp.c).
446 sp_set(void *d
, int mode
)
451 mddb_recid_t recids
[3];
452 mddb_type_t rec_type
;
457 md_set_params_t
*msp
= (md_set_params_t
*)d
;
460 setno
= MD_MIN2SET(mnum
);
467 if ((setno
>= md_nsets
) || (MD_MIN2UNIT(mnum
) >= md_nunits
))
468 return (mdmderror(mdep
, MDE_INVAL_UNIT
, mnum
));
469 if (md_get_setstatus(setno
) & MD_SET_STALE
)
470 return (mdmddberror(mdep
, MDE_DB_STALE
, mnum
, setno
));
472 /* get the record type */
473 rec_type
= (mddb_type_t
)md_getshared_key(setno
,
474 sp_md_ops
.md_driver
.md_drivername
);
476 /* check if there is already a device with this minor number */
479 return (mdmderror(mdep
, MDE_UNIT_ALREADY_SETUP
, mnum
));
481 /* create the db record for this soft partition */
483 if (msp
->options
& MD_CRO_64BIT
) {
485 return (mdmderror(mdep
, MDE_UNIT_TOO_LARGE
, mnum
));
487 recids
[0] = mddb_createrec((size_t)msp
->size
, rec_type
, 0,
488 MD_CRO_64BIT
| MD_CRO_SOFTPART
| MD_CRO_FN
, setno
);
491 recids
[0] = mddb_createrec((size_t)msp
->size
, rec_type
, 0,
492 MD_CRO_32BIT
| MD_CRO_SOFTPART
| MD_CRO_FN
, setno
);
494 /* set initial value for possible child record */
497 return (mddbstatus2error(mdep
, recids
[0], mnum
, setno
));
499 /* get the address of the soft partition db record */
500 rec_addr
= (void *) mddb_getrecaddr(recids
[0]);
503 * at this point we can happily mess with the soft partition
504 * db record since we haven't committed it to the metadb yet.
505 * if we crash before we commit, the uncommitted record will be
506 * automatically purged.
509 /* copy in the user's soft partition unit struct */
510 if (err
= ddi_copyin((void *)(uintptr_t)msp
->mdp
,
511 rec_addr
, (size_t)msp
->size
, mode
)) {
512 mddb_deleterec_wrapper(recids
[0]);
516 /* fill in common unit structure fields which aren't set in userland */
517 un
= (mp_unit_t
*)rec_addr
;
519 /* All 64 bit metadevices only support EFI labels. */
520 if (msp
->options
& MD_CRO_64BIT
) {
521 un
->c
.un_flag
|= MD_EFILABEL
;
525 MD_RECID(un
) = recids
[0];
526 MD_PARENT(un
) = MD_NO_PARENT
;
527 un
->c
.un_revision
|= MD_FN_META_DEV
;
529 /* if we are parenting a metadevice, set our child's parent field */
530 if (md_getmajor(un
->un_dev
) == md_major
) {
531 /* it's a metadevice, need to parent it */
532 child_un
= MD_UNIT(md_getminor(un
->un_dev
));
533 if (child_un
== NULL
) {
534 mddb_deleterec_wrapper(recids
[0]);
535 return (mdmderror(mdep
, MDE_INVAL_UNIT
,
536 md_getminor(un
->un_dev
)));
538 md_set_parent(un
->un_dev
, MD_SID(un
));
540 /* set child recid and recids end marker */
541 recids
[1] = MD_RECID(child_un
);
546 * build the incore structures.
548 if (err
= sp_build_incore(rec_addr
, 0)) {
549 md_nblocks_set(mnum
, -1ULL);
550 MD_UNIT(mnum
) = NULL
;
552 mddb_deleterec_wrapper(recids
[0]);
557 * Update unit availability
559 md_set
[setno
].s_un_avail
--;
563 * if we had to update a child record, it will get commited
566 mddb_commitrecs_wrapper(recids
);
568 /* create the mdi_unit struct for this soft partition */
569 md_create_unit_incore(mnum
, &sp_md_ops
, 0);
571 SE_NOTIFY(EC_SVM_CONFIG
, ESC_SVM_CREATE
, TAG_METADEVICE
, MD_UN2SET(un
),
579 * INPUT: d - data ptr.
580 * mode - pass-through to ddi_copyout.
583 * RETURNS: 0 - success.
585 * PURPOSE: Get the soft partition unit structure specified by the
586 * minor number. the in-core unit structure is obtained
587 * and copied into the md_i_get structure passed down from
591 sp_get(void *d
, int mode
, IOLOCK
*lock
)
597 md_i_get_t
*migp
= d
;
605 /* make sure this is a valid unit structure */
606 if ((MD_MIN2SET(mnum
) >= md_nsets
) || (MD_MIN2UNIT(mnum
) >= md_nunits
))
607 return (mdmderror(mdep
, MDE_INVAL_UNIT
, mnum
));
609 /* get the mdi_unit */
610 if ((ui
= MDI_UNIT(mnum
)) == NULL
) {
611 return (mdmderror(mdep
, MDE_UNIT_NOT_SETUP
, mnum
));
615 * md_ioctl_readerlock returns a reference to the in-core
616 * unit structure. this lock will be dropped by
617 * md_ioctl_lock_exit() before the ioctl returns.
619 un
= (mp_unit_t
*)md_ioctl_readerlock(lock
, ui
);
621 /* verify the md_i_get structure */
622 if (migp
->size
== 0) {
623 migp
->size
= un
->c
.un_size
;
626 if (migp
->size
< un
->c
.un_size
) {
631 if (ddi_copyout(un
, (void *)(uintptr_t)migp
->mdp
,
632 un
->c
.un_size
, mode
))
639 * FUNCTION: sp_reset()
640 * INPUT: reset_params - soft partitioning reset parameters.
642 * RETURNS: 0 - success.
644 * PURPOSE: Do the setup work needed to delete a soft partition.
645 * note that the actual removal of both in-core and metadb
646 * structures is done in the reset_sp() routine (see sp.c).
647 * In addition, since multiple soft partitions may exist
648 * on top of a single metadevice, the soft partition reset
649 * parameters (md_sp_reset_t) contains information about
650 * how the soft partition should deparent/reparent the
651 * underlying metadevice. If the underlying metadevice is
652 * to be deparented, the new_parent field will be MD_NO_PARENT,
653 * otherwise it will be contain the minor number of another
654 * soft partition built on top of the underlying metadevice.
657 sp_reset(md_sp_reset_t
*softp
)
659 minor_t mnum
= softp
->mnum
;
663 set_t setno
= MD_MIN2SET(mnum
);
665 mdclrerror(&softp
->mde
);
667 /* get the unit structure */
668 if ((un
= sp_getun(mnum
, &softp
->mde
)) == NULL
) {
669 return (mdmderror(&softp
->mde
, MDE_INVAL_UNIT
, mnum
));
672 /* don't delete if we have a parent */
673 if (MD_HAS_PARENT(un
->c
.un_parent
)) {
674 return (mdmderror(&softp
->mde
, MDE_IN_USE
, mnum
));
677 rw_enter(&md_unit_array_rw
.lock
, RW_WRITER
);
680 (void) md_unit_openclose_enter(ui
);
682 /* don't delete if we are currently open */
683 if (md_unit_isopen(ui
)) {
684 md_unit_openclose_exit(ui
);
685 rw_exit(&md_unit_array_rw
.lock
);
686 return (mdmderror(&softp
->mde
, MDE_IS_OPEN
, mnum
));
689 md_unit_openclose_exit(ui
);
692 * if we are built on metadevice, we need to deparent
693 * or reparent that metadevice.
695 if (md_getmajor(un
->un_dev
) == md_major
) {
696 child_un
= MD_UNIT(md_getminor(un
->un_dev
));
697 md_set_parent(un
->un_dev
, softp
->new_parent
);
698 mddb_commitrec_wrapper(MD_RECID(child_un
));
700 /* remove the soft partition */
701 reset_sp(un
, mnum
, 1);
704 * Update unit availability
706 md_set
[setno
].s_un_avail
++;
709 * If MN set, reset s_un_next so all nodes can have
710 * the same view of the next available slot when
711 * nodes are -w and -j
713 if (MD_MNSET_SETNO(setno
)) {
714 md_upd_set_unnext(setno
, MD_MIN2UNIT(mnum
));
717 /* release locks and return */
719 rw_exit(&md_unit_array_rw
.lock
);
725 * FUNCTION: sp_grow()
726 * INPUT: d - data ptr.
727 * mode - pass-through to ddi_copyin.
730 * RETURNS: 0 - success.
732 * PURPOSE: Attach more space to a soft partition. We are passed in
733 * a new unit structure with the new extents and other updated
734 * information. The new unit structure essentially replaces
735 * the old unit for this soft partition. We place the new
736 * unit into the metadb, delete the old metadb record, and
737 * then update the in-core unit structure array to point to
741 sp_grow(void *d
, int mode
, IOLOCK
*lockp
)
744 mp_unit_t
*un
, *new_un
;
747 IOLOCK
*plock
= NULL
;
750 mddb_type_t rec_type
;
751 mddb_recid_t old_vtoc
= 0;
752 md_create_rec_option_t options
;
758 md_grow_params_t
*mgp
= (md_grow_params_t
*)d
;
762 setno
= MD_MIN2SET(mnum
);
768 if ((setno
>= md_nsets
) || (MD_MIN2UNIT(mnum
) >= md_nunits
))
769 return (mdmderror(mdep
, MDE_INVAL_UNIT
, mnum
));
770 if (md_get_setstatus(setno
) & MD_SET_STALE
)
771 return (mdmddberror(mdep
, MDE_DB_STALE
, mnum
, setno
));
773 /* make sure this soft partition already exists */
776 return (mdmderror(mdep
, MDE_UNIT_NOT_SETUP
, mnum
));
778 /* handle any parents */
780 ASSERT((minor_t
*)(uintptr_t)mgp
->par
!= NULL
);
781 par
= kmem_alloc(npar
* sizeof (*par
), KM_SLEEP
);
782 plock
= kmem_alloc(npar
* sizeof (*plock
), KM_SLEEP
);
783 if (ddi_copyin((void *)(uintptr_t)mgp
->par
, par
,
784 (npar
* sizeof (*par
)), mode
) != 0) {
785 kmem_free(par
, npar
* sizeof (*par
));
786 kmem_free(plock
, npar
* sizeof (*plock
));
792 * handle parent locking. grab the unit writer lock,
793 * then all parent ioctl locks, and then finally our own.
794 * parents should be sorted to avoid deadlock.
796 rw_enter(&md_unit_array_rw
.lock
, RW_WRITER
);
797 for (i
= 0; i
< npar
; ++i
) {
798 (void) md_ioctl_writerlock(&plock
[i
],
801 un
= (mp_unit_t
*)md_ioctl_writerlock(lockp
, ui
);
803 rec_type
= (mddb_type_t
)md_getshared_key(setno
,
804 sp_md_ops
.md_driver
.md_drivername
);
807 * Preserve the friendly name nature of the unit that is growing.
809 options
= MD_CRO_SOFTPART
;
810 if (un
->c
.un_revision
& MD_FN_META_DEV
)
811 options
|= MD_CRO_FN
;
812 if (mgp
->options
& MD_CRO_64BIT
) {
814 rval
= mdmderror(mdep
, MDE_UNIT_TOO_LARGE
, mnum
);
817 recid
= mddb_createrec((size_t)mgp
->size
, rec_type
, 0,
818 MD_CRO_64BIT
| options
, setno
);
821 recid
= mddb_createrec((size_t)mgp
->size
, rec_type
, 0,
822 MD_CRO_32BIT
| options
, setno
);
825 rval
= mddbstatus2error(mdep
, (int)recid
, mnum
, setno
);
829 /* get the address of the new unit */
830 new_un
= (mp_unit_t
*)mddb_getrecaddr(recid
);
832 /* copy in the user's unit struct */
833 err
= ddi_copyin((void *)(uintptr_t)mgp
->mdp
, new_un
,
834 (size_t)mgp
->size
, mode
);
836 mddb_deleterec_wrapper(recid
);
840 if (options
& MD_CRO_FN
)
841 new_un
->c
.un_revision
|= MD_FN_META_DEV
;
843 /* All 64 bit metadevices only support EFI labels. */
844 if (mgp
->options
& MD_CRO_64BIT
) {
845 new_un
->c
.un_flag
|= MD_EFILABEL
;
847 * If the device was previously smaller than a terabyte,
848 * and had a vtoc record attached to it, we remove the
849 * vtoc record, because the layout has changed completely.
851 if (((un
->c
.un_revision
& MD_64BIT_META_DEV
) == 0) &&
852 (un
->c
.un_vtoc_id
!= 0)) {
853 old_vtoc
= un
->c
.un_vtoc_id
;
854 new_un
->c
.un_vtoc_id
=
855 md_vtoc_to_efi_record(old_vtoc
, setno
);
859 /* commit new unit struct */
860 MD_RECID(new_un
) = recid
;
861 mddb_commitrec_wrapper(recid
);
864 * delete old unit struct.
866 mddb_deleterec_wrapper(MD_RECID(un
));
868 /* place new unit in in-core array */
869 md_nblocks_set(mnum
, new_un
->c
.un_total_blocks
);
870 MD_UNIT(mnum
) = new_un
;
872 SE_NOTIFY(EC_SVM_CONFIG
, ESC_SVM_GROW
, TAG_METADEVICE
,
873 MD_UN2SET(new_un
), MD_SID(new_un
));
876 * If old_vtoc has a non zero value, we know:
877 * - This unit crossed the border from smaller to larger one TB
878 * - There was a vtoc record for the unit,
879 * - This vtoc record is no longer needed, because
880 * a new efi record has been created for this un.
883 mddb_deleterec_wrapper(old_vtoc
);
886 /* release locks, return success */
888 for (i
= npar
- 1; (i
>= 0); --i
)
889 md_ioctl_writerexit(&plock
[i
]);
890 rw_exit(&md_unit_array_rw
.lock
);
892 kmem_free(plock
, npar
* sizeof (*plock
));
894 kmem_free(par
, npar
* sizeof (*par
));
899 * FUNCTION: sp_getdevs()
900 * INPUT: d - data ptr.
901 * mode - pass-through to ddi_copyout.
904 * RETURNS: 0 - success.
906 * PURPOSE: Get the device on which the soft partition is built.
907 * This is simply a matter of copying out the md_dev64_t stored
908 * in the soft partition unit structure.
923 md_getdevs_params_t
*mgdp
= (md_getdevs_params_t
*)d
;
932 if ((MD_MIN2SET(mnum
) >= md_nsets
) || (MD_MIN2UNIT(mnum
) >= md_nunits
))
933 return (mdmderror(mdep
, MDE_INVAL_UNIT
, mnum
));
935 if ((ui
= MDI_UNIT(mnum
)) == NULL
) {
936 return (mdmderror(mdep
, MDE_UNIT_NOT_SETUP
, mnum
));
939 un
= (mp_unit_t
*)md_ioctl_readerlock(lockp
, ui
);
940 devsp
= (md_dev64_t
*)(uintptr_t)mgdp
->devs
;
942 /* only ever 1 device for a soft partition */
943 if (mgdp
->cnt
!= 0) {
944 /* do miniroot->target device translation */
945 unit_dev
= un
->un_dev
;
946 if (md_getmajor(unit_dev
) != md_major
) {
947 if ((unit_dev
= md_xlate_mini_2_targ(unit_dev
))
951 /* copyout dev information */
952 if (ddi_copyout(&unit_dev
, devsp
, sizeof (*devsp
), mode
) != 0)
963 * Called to set or clear a capability for a softpart
964 * called by the MD_MN_SET_CAP ioctl.
967 sp_set_capability(md_mn_setcap_params_t
*p
, IOLOCK
*lockp
)
974 if ((un
= sp_getun(p
->mnum
, &p
->mde
)) == NULL
)
977 /* This function is only valid for a multi-node set */
978 setno
= MD_MIN2SET(p
->mnum
);
979 if (!MD_MNSET_SETNO(setno
)) {
982 ui
= MDI_UNIT(p
->mnum
);
983 (void) md_ioctl_readerlock(lockp
, ui
);
985 if (p
->sc_set
& DKV_ABR_CAP
) {
986 void (*inc_abr_count
)();
988 ui
->ui_tstate
|= MD_ABR_CAP
; /* Set ABR capability */
989 /* Increment abr count in underlying metadevice */
990 inc_abr_count
= (void(*)())md_get_named_service(un
->un_dev
,
991 0, MD_INC_ABR_COUNT
, 0);
992 if (inc_abr_count
!= NULL
)
993 (void) (*inc_abr_count
)(un
->un_dev
);
995 void (*dec_abr_count
)();
997 ui
->ui_tstate
&= ~MD_ABR_CAP
; /* Clear ABR capability */
998 /* Decrement abr count in underlying metadevice */
999 dec_abr_count
= (void(*)())md_get_named_service(un
->un_dev
,
1000 0, MD_DEC_ABR_COUNT
, 0);
1001 if (dec_abr_count
!= NULL
)
1002 (void) (*dec_abr_count
)(un
->un_dev
);
1004 if (p
->sc_set
& DKV_DMR_CAP
) {
1005 ui
->ui_tstate
|= MD_DMR_CAP
; /* Set DMR capability */
1007 ui
->ui_tstate
&= ~MD_DMR_CAP
; /* Clear DMR capability */
1009 md_ioctl_readerexit(lockp
);
1015 * FUNCTION: sp_admin_ioctl().
1016 * INPUT: cmd - ioctl to be handled.
1018 * mode - pass-through to copyin/copyout routines.
1021 * RETURNS: 0 - success.
1023 * PURPOSE: Handle administrative ioctl's. Essentially a large
1024 * switch statement to dispatch the ioctl's to their
1025 * handlers. See comment at beginning of file for specifics
1026 * on which ioctl's are handled.
1029 sp_admin_ioctl(int cmd
, void *data
, int mode
, IOLOCK
*lockp
)
1035 /* We can only handle 32-bit clients for internal commands */
1036 if ((mode
& DATAMODEL_MASK
) != DATAMODEL_ILP32
) {
1045 /* create new soft partition */
1046 if (! (mode
& FWRITE
))
1049 sz
= sizeof (md_set_params_t
);
1051 d
= kmem_alloc(sz
, KM_SLEEP
);
1053 if (ddi_copyin(data
, d
, sz
, mode
)) {
1058 err
= sp_set(d
, mode
);
1064 /* get soft partition unit structure */
1065 if (! (mode
& FREAD
))
1068 sz
= sizeof (md_i_get_t
);
1070 d
= kmem_alloc(sz
, KM_SLEEP
);
1072 if (ddi_copyin(data
, d
, sz
, mode
)) {
1077 err
= sp_get(d
, mode
, lockp
);
1082 /* delete soft partition */
1083 if (! (mode
& FWRITE
))
1086 sz
= sizeof (md_sp_reset_t
);
1087 d
= kmem_alloc(sz
, KM_SLEEP
);
1089 if (ddi_copyin(data
, d
, sz
, mode
)) {
1094 err
= sp_reset((md_sp_reset_t
*)d
);
1100 /* grow soft partition */
1101 if (! (mode
& FWRITE
))
1104 sz
= sizeof (md_grow_params_t
);
1105 d
= kmem_alloc(sz
, KM_SLEEP
);
1107 if (ddi_copyin(data
, d
, sz
, mode
)) {
1112 err
= sp_grow(d
, mode
, lockp
);
1116 case MD_IOCGET_DEVS
:
1118 /* get underlying device */
1119 if (! (mode
& FREAD
))
1122 sz
= sizeof (md_getdevs_params_t
);
1123 d
= kmem_alloc(sz
, KM_SLEEP
);
1125 if (ddi_copyin(data
, d
, sz
, mode
)) {
1130 err
= sp_getdevs(d
, mode
, lockp
);
1134 case MD_IOC_SPSTATUS
:
1136 /* set the status field of one or more soft partitions */
1137 if (! (mode
& FWRITE
))
1140 sz
= sizeof (md_sp_statusset_t
);
1141 d
= kmem_alloc(sz
, KM_SLEEP
);
1143 if (ddi_copyin(data
, d
, sz
, mode
)) {
1148 err
= sp_setstatus(d
, mode
, lockp
);
1152 case MD_IOC_SPUPDATEWM
:
1153 case MD_MN_IOC_SPUPDATEWM
:
1155 if (! (mode
& FWRITE
))
1158 sz
= sizeof (md_sp_update_wm_t
);
1159 d
= kmem_alloc(sz
, KM_SLEEP
);
1161 if (ddi_copyin(data
, d
, sz
, mode
)) {
1166 err
= sp_update_watermarks(d
, mode
);
1170 case MD_IOC_SPREADWM
:
1172 if (! (mode
& FREAD
))
1175 sz
= sizeof (md_sp_read_wm_t
);
1176 d
= kmem_alloc(sz
, KM_SLEEP
);
1178 if (ddi_copyin(data
, d
, sz
, mode
)) {
1183 err
= sp_read_watermark(d
, mode
);
1189 if (! (mode
& FWRITE
))
1192 sz
= sizeof (md_mn_setcap_params_t
);
1193 d
= kmem_alloc(sz
, KM_SLEEP
);
1195 if (ddi_copyin(data
, d
, sz
, mode
)) {
1200 err
= sp_set_capability((md_mn_setcap_params_t
*)d
, lockp
);
1209 * copyout and free any args
1213 if (ddi_copyout(d
, data
, sz
, mode
) != 0) {
1224 * FUNCTION: md_sp_ioctl()
1225 * INPUT: dev - device we are operating on.
1226 * cmd - ioctl to be handled.
1228 * mode - pass-through to copyin/copyout routines.
1231 * RETURNS: 0 - success.
1233 * PURPOSE: Dispatch ioctl's. Administrative ioctl's are handled
1234 * by sp_admin_ioctl. All others (see comment at beginning
1235 * of this file) are handled in-line here.
1238 md_sp_ioctl(dev_t dev
, int cmd
, void *data
, int mode
, IOLOCK
*lockp
)
1240 minor_t mnum
= getminor(dev
);
1245 /* handle admin ioctls */
1246 if (mnum
== MD_ADM_MINOR
)
1247 return (sp_admin_ioctl(cmd
, data
, mode
, lockp
));
1250 if ((MD_MIN2SET(mnum
) >= md_nsets
) ||
1251 (MD_MIN2UNIT(mnum
) >= md_nunits
) ||
1252 ((ui
= MDI_UNIT(mnum
)) == NULL
) ||
1253 ((un
= MD_UNIT(mnum
)) == NULL
))
1256 /* is this a supported ioctl? */
1257 err
= md_check_ioctl_against_unit(cmd
, un
->c
);
1271 if (! (mode
& FREAD
))
1274 p
= kmem_alloc(sizeof (*p
), KM_SLEEP
);
1277 if (ddi_copyout((caddr_t
)p
, data
, sizeof (*p
), mode
) != 0)
1280 kmem_free(p
, sizeof (*p
));
1284 case DKIOCGMEDIAINFO
:
1288 if (! (mode
& FREAD
))
1291 get_minfo(&p
, mnum
);
1292 if (ddi_copyout(&p
, data
, sizeof (struct dk_minfo
), mode
) != 0)
1300 /* geometry information */
1303 if (! (mode
& FREAD
))
1306 p
= kmem_alloc(sizeof (*p
), KM_SLEEP
);
1308 md_get_geom((md_unit_t
*)un
, p
);
1309 if (ddi_copyout((caddr_t
)p
, data
, sizeof (*p
),
1313 kmem_free(p
, sizeof (*p
));
1321 md_get_cgapart((md_unit_t
*)un
, &dmp
);
1323 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_NATIVE
) {
1324 if (ddi_copyout((caddr_t
)&dmp
, data
, sizeof (dmp
),
1330 struct dk_map32 dmp32
;
1332 dmp32
.dkl_cylno
= dmp
.dkl_cylno
;
1333 dmp32
.dkl_nblk
= dmp
.dkl_nblk
;
1335 if (ddi_copyout((caddr_t
)&dmp32
, data
, sizeof (dmp32
),
1339 #endif /* _SYSCALL32 */
1345 /* vtoc information */
1348 if (! (mode
& FREAD
))
1351 vtoc
= kmem_zalloc(sizeof (*vtoc
), KM_SLEEP
);
1352 md_get_vtoc((md_unit_t
*)un
, vtoc
);
1354 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_NATIVE
) {
1355 if (ddi_copyout(vtoc
, data
, sizeof (*vtoc
), mode
))
1360 struct vtoc32
*vtoc32
;
1362 vtoc32
= kmem_zalloc(sizeof (*vtoc32
), KM_SLEEP
);
1364 vtoctovtoc32((*vtoc
), (*vtoc32
));
1365 if (ddi_copyout(vtoc32
, data
, sizeof (*vtoc32
), mode
))
1367 kmem_free(vtoc32
, sizeof (*vtoc32
));
1369 #endif /* _SYSCALL32 */
1371 kmem_free(vtoc
, sizeof (*vtoc
));
1379 if (! (mode
& FWRITE
))
1382 vtoc
= kmem_zalloc(sizeof (*vtoc
), KM_SLEEP
);
1383 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_NATIVE
) {
1384 if (ddi_copyin(data
, vtoc
, sizeof (*vtoc
), mode
)) {
1390 struct vtoc32
*vtoc32
;
1392 vtoc32
= kmem_zalloc(sizeof (*vtoc32
), KM_SLEEP
);
1394 if (ddi_copyin(data
, vtoc32
, sizeof (*vtoc32
), mode
)) {
1397 vtoc32tovtoc((*vtoc32
), (*vtoc
));
1399 kmem_free(vtoc32
, sizeof (*vtoc32
));
1401 #endif /* _SYSCALL32 */
1404 err
= md_set_vtoc((md_unit_t
*)un
, vtoc
);
1406 kmem_free(vtoc
, sizeof (*vtoc
));
1412 /* extended vtoc information */
1413 struct extvtoc
*extvtoc
;
1415 if (! (mode
& FREAD
))
1418 extvtoc
= kmem_zalloc(sizeof (*extvtoc
), KM_SLEEP
);
1419 md_get_extvtoc((md_unit_t
*)un
, extvtoc
);
1421 if (ddi_copyout(extvtoc
, data
, sizeof (*extvtoc
), mode
))
1424 kmem_free(extvtoc
, sizeof (*extvtoc
));
1430 struct extvtoc
*extvtoc
;
1432 if (! (mode
& FWRITE
))
1435 extvtoc
= kmem_zalloc(sizeof (*extvtoc
), KM_SLEEP
);
1436 if (ddi_copyin(data
, extvtoc
, sizeof (*extvtoc
), mode
)) {
1441 err
= md_set_extvtoc((md_unit_t
*)un
, extvtoc
);
1443 kmem_free(extvtoc
, sizeof (*extvtoc
));
1450 * This one can be done centralized,
1451 * no need to put in the same code for all types of metadevices
1453 return (md_dkiocgetefi(mnum
, data
, mode
));
1458 * This one can be done centralized,
1459 * no need to put in the same code for all types of metadevices
1461 return (md_dkiocsetefi(mnum
, data
, mode
));
1464 case DKIOCPARTITION
:
1466 return (md_dkiocpartition(mnum
, data
, mode
));
1469 case DKIOCGETVOLCAP
:
1472 * Return the supported capabilities for the soft-partition.
1473 * We can only support those caps that are provided by the
1474 * underlying device.
1479 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum
)))
1482 if (! (mode
& FREAD
))
1485 bzero(&vc
, sizeof (vc
));
1487 /* Send ioctl to underlying driver */
1489 err
= md_call_ioctl(un
->un_dev
, cmd
, &vc
, (mode
| FKIOCTL
),
1493 ui
->ui_capab
= vc
.vc_info
;
1495 if (ddi_copyout(&vc
, data
, sizeof (vc
), mode
))
1501 case DKIOCSETVOLCAP
:
1504 * Enable a supported capability (as returned by DKIOCGETVOLCAP)
1505 * Do not pass the request down as we're the top-level device
1506 * handler for the application.
1507 * If the requested capability is supported (set in ui_capab),
1508 * set the corresponding bit in ui_tstate so that we can pass
1509 * the appropriate flag when performing i/o.
1510 * This request is propagated to all nodes.
1513 volcapset_t volcap
= 0;
1514 void (*check_offline
)();
1515 int offline_status
= 0;
1517 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum
)))
1520 if (! (mode
& FWRITE
))
1523 if (ddi_copyin(data
, &vc
, sizeof (vc
), mode
))
1527 * Send DKIOCGETVOLCAP to underlying driver to see if
1528 * capability supported
1532 err
= md_call_ioctl(un
->un_dev
, DKIOCGETVOLCAP
, &vc1
,
1533 (mode
| FKIOCTL
), lockp
);
1537 /* Save capabilities */
1538 ui
->ui_capab
= vc1
.vc_info
;
1540 * Error if required capability not supported by underlying
1543 if ((vc1
.vc_info
& vc
.vc_set
) == 0)
1548 * Check if underlying mirror has an offline submirror,
1549 * fail if there is on offline submirror
1551 check_offline
= (void(*)())md_get_named_service(un
->un_dev
,
1552 0, MD_CHECK_OFFLINE
, 0);
1553 if (check_offline
!= NULL
)
1554 (void) (*check_offline
)(un
->un_dev
, &offline_status
);
1558 if (ui
->ui_tstate
& MD_ABR_CAP
)
1559 volcap
|= DKV_ABR_CAP
;
1561 /* Only send capability message if there is a change */
1562 if ((vc
.vc_set
& (DKV_ABR_CAP
)) != volcap
)
1563 err
= mdmn_send_capability_message(mnum
, vc
, lockp
);
1570 * Only valid for MN sets. We need to pass it down to the
1571 * underlying driver if its a metadevice, after we've modified
1572 * the offsets to pick up the correct lower-level device
1575 vol_directed_rd_t
*vdr
;
1576 #ifdef _MULTI_DATAMODEL
1577 vol_directed_rd32_t
*vdr32
;
1578 #endif /* _MULTI_DATAMODEL */
1580 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum
)))
1583 if (! (ui
->ui_capab
& DKV_DMR_CAP
))
1586 vdr
= kmem_zalloc(sizeof (vol_directed_rd_t
), KM_NOSLEEP
);
1591 * Underlying device supports directed mirror read, so update
1592 * the user-supplied offset to pick the correct block from the
1593 * partitioned metadevice.
1595 #ifdef _MULTI_DATAMODEL
1596 vdr32
= kmem_zalloc(sizeof (vol_directed_rd32_t
), KM_NOSLEEP
);
1597 if (vdr32
== NULL
) {
1598 kmem_free(vdr
, sizeof (vol_directed_rd_t
));
1602 switch (ddi_model_convert_from(mode
& FMODELS
)) {
1603 case DDI_MODEL_ILP32
:
1604 if (ddi_copyin(data
, vdr32
, sizeof (*vdr32
), mode
)) {
1605 kmem_free(vdr
, sizeof (*vdr
));
1608 vdr
->vdr_flags
= vdr32
->vdr_flags
;
1609 vdr
->vdr_offset
= vdr32
->vdr_offset
;
1610 vdr
->vdr_nbytes
= vdr32
->vdr_nbytes
;
1611 vdr
->vdr_data
= (void *)(uintptr_t)vdr32
->vdr_data
;
1612 vdr
->vdr_side
= vdr32
->vdr_side
;
1615 case DDI_MODEL_NONE
:
1616 if (ddi_copyin(data
, vdr
, sizeof (*vdr
), mode
)) {
1617 kmem_free(vdr32
, sizeof (*vdr32
));
1618 kmem_free(vdr
, sizeof (*vdr
));
1624 kmem_free(vdr32
, sizeof (*vdr32
));
1625 kmem_free(vdr
, sizeof (*vdr
));
1628 #else /* ! _MULTI_DATAMODEL */
1629 if (ddi_copyin(data
, vdr
, sizeof (*vdr
), mode
)) {
1630 kmem_free(vdr
, sizeof (*vdr
));
1633 #endif /* _MULTI_DATA_MODEL */
1635 err
= sp_directed_read(mnum
, vdr
, mode
);
1638 #ifdef _MULTI_DATAMODEL
1639 switch (ddi_model_convert_from(mode
& FMODELS
)) {
1640 case DDI_MODEL_ILP32
:
1641 vdr32
->vdr_flags
= vdr
->vdr_flags
;
1642 vdr32
->vdr_offset
= vdr
->vdr_offset
;
1643 vdr32
->vdr_side
= vdr
->vdr_side
;
1644 vdr32
->vdr_bytesread
= vdr
->vdr_bytesread
;
1645 bcopy(vdr
->vdr_side_name
, vdr32
->vdr_side_name
,
1646 sizeof (vdr32
->vdr_side_name
));
1648 if (ddi_copyout(vdr32
, data
, sizeof (*vdr32
), mode
))
1652 case DDI_MODEL_NONE
:
1653 if (ddi_copyout(&vdr
, data
, sizeof (vdr
), mode
))
1657 #else /* ! _MULTI_DATA_MODEL */
1658 if (ddi_copyout(&vdr
, data
, sizeof (vdr
), mode
))
1660 #endif /* _MULTI_DATA_MODEL */
1662 #ifdef _MULTI_DATAMODEL
1663 kmem_free(vdr32
, sizeof (*vdr32
));
1664 #endif /* _MULTI_DATAMODEL */
1665 kmem_free(vdr
, sizeof (*vdr
));
1672 /* Option not handled */