4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2012 Milan Jurik. All rights reserved.
31 * DESCRIPTION: RAID driver source file containing IOCTL operations.
33 * ROUTINES PROVIDED FOR EXTERNAL USE:
34 * raid_commit() - commits MD database updates for a RAID metadevice
35 * md_raid_ioctl() - RAID metadevice IOCTL operations entry point.
37 * ROUTINES PROVIDED FOR INTERNAL USE:
38 * raid_getun() - Performs unit checking on a RAID metadevice
39 * init_col_nextio() - normal backend when zeroing column of RAID metadevice.
40 * init_col_int() - I/O interrupt while zeroing column of RAID metadevice.
41 * raid_init_columns() - Zero one or more columns of a RAID metadevice.
42 * raid_set() - used to create a RAID metadevice
43 * raid_get() - used to get the unit structure of a RAID metadevice
44 * raid_replace() - used to replace a component of a RAID metadevice
45 * raid_grow() - Concatenate to a RAID metadevice
46 * raid_change() - change dynamic values of a RAID metadevice
47 * raid_reset() - used to reset (clear / remove) a RAID metadevice
48 * raid_get_geom() - used to get the geometry of a RAID metadevice
49 * raid_get_vtoc() - used to get the VTOC on a RAID metadevice
50 * raid_set_vtoc() - used to set the VTOC on a RAID metadevice
51 * raid_get_extvtoc() - used to get the extended VTOC on a RAID metadevice
52 * raid_set_extvtoc() - used to set the extended VTOC on a RAID metadevice
53 * raid_getdevs() - return all devices within a RAID metadevice
54 * raid_admin_ioctl() - IOCTL operations unique to metadevices and RAID
58 #include <sys/param.h>
59 #include <sys/systm.h>
64 #include <sys/t_lock.h>
70 #include <sys/sysmacros.h>
71 #include <sys/types.h>
72 #include <sys/mkdev.h>
76 #include <sys/modctl.h>
78 #include <sys/sunddi.h>
80 #include <sys/lvm/mdvar.h>
81 #include <sys/lvm/md_names.h>
82 #include <sys/lvm/md_mddb.h>
83 #include <sys/lvm/md_raid.h>
84 #include <sys/lvm/md_convert.h>
86 #include <sys/sysevent/eventdefs.h>
87 #include <sys/sysevent/svm.h>
90 extern unit_t md_nunits
;
91 extern set_t md_nsets
;
92 extern md_set_t md_set
[];
93 extern md_ops_t raid_md_ops
;
94 extern major_t md_major
;
95 extern md_krwlock_t md_unit_array_rw
;
96 extern mdq_anchor_t md_done_daemon
;
97 extern mdq_anchor_t md_ff_daemonq
;
100 extern void md_probe_one(probe_req_t
*);
101 extern int md_init_probereq(md_probedev_impl_t
*,
103 extern md_resync_t md_cpr_resync
;
106 extern void dump_mr_unit(mr_unit_t
*);
108 typedef struct raid_ci
{
110 struct raid_ci
*ci_next
;
117 diskaddr_t ci_lastblk
;
120 /* values for the ci_flag */
121 #define COL_INITING (0x0001)
122 #define COL_INIT_DONE (0x0002)
123 #define COL_READY (0x0004)
127 * DESCRIPTION: performs a lot of unit checking on a RAID metadevice
128 * PARAMETERS: minor_t mnum - minor device number for RAID unit
129 * md_error_t *mde - pointer to error reporting structure
130 * int flags - pointer to error reporting structure
131 * STALE_OK - allow stale MD memory
132 * NO_OLD - unit must not exist
133 * NO_LOCK - no IOCTL lock needed
134 * WR_LOCK - write IOCTL lock needed
135 * RD_LOCK - read IOCTL lock needed
136 * IOLOCK *lock - pointer to IOCTL lock
138 * LOCKS: obtains unit reader or writer lock via IOLOCK
142 raid_getun(minor_t mnum
, md_error_t
*mde
, int flags
, IOLOCK
*lock
)
146 set_t setno
= MD_MIN2SET(mnum
);
148 if ((setno
>= md_nsets
) || (MD_MIN2UNIT(mnum
) >= md_nunits
)) {
149 (void) mdmderror(mde
, MDE_INVAL_UNIT
, mnum
);
153 if (!(flags
& STALE_OK
)) {
154 if (md_get_setstatus(setno
) & MD_SET_STALE
) {
155 (void) mdmddberror(mde
, MDE_DB_STALE
, mnum
, setno
);
161 if (flags
& NO_OLD
) {
163 (void) mdmderror(mde
, MDE_UNIT_ALREADY_SETUP
, mnum
);
166 return ((mr_unit_t
*)1);
170 (void) mdmderror(mde
, MDE_UNIT_NOT_SETUP
, mnum
);
173 if (flags
& ARRAY_WRITER
)
174 md_array_writer(lock
);
175 else if (flags
& ARRAY_READER
)
176 md_array_reader(lock
);
178 if (!(flags
& NO_LOCK
)) {
179 if (flags
& WR_LOCK
) {
180 (void) md_ioctl_io_lock(lock
, ui
);
181 (void) md_ioctl_writerlock(lock
, ui
);
183 (void) md_ioctl_readerlock(lock
, ui
);
185 un
= (mr_unit_t
*)MD_UNIT(mnum
);
187 if (un
->c
.un_type
!= MD_METARAID
) {
188 (void) mdmderror(mde
, MDE_NOT_RAID
, mnum
);
198 * DESCRIPTION: commits MD database updates for a RAID metadevice
199 * PARAMETERS: mr_unit_t *un - RAID unit to update in the MD database
200 * mddb_recid_t *extras - array of other record IDs to update
202 * LOCKS: assumes caller holds unit writer lock
206 raid_commit(mr_unit_t
*un
, mddb_recid_t
*extras
)
208 mddb_recid_t
*recids
;
212 if (md_get_setstatus(MD_UN2SET(un
)) & MD_SET_STALE
)
215 /* Count the extra recids */
216 if (extras
!= NULL
) {
217 while (extras
[nrecids
] != 0) {
223 * Allocate space for two recids in addition to the extras:
224 * one for the unit structure, one for the null terminator.
227 recids
= (mddb_recid_t
*)
228 kmem_zalloc(nrecids
* sizeof (mddb_recid_t
), KM_SLEEP
);
231 ASSERT(MDI_UNIT(MD_SID(un
)) ? UNIT_WRITER_HELD(un
) : 1);
232 recids
[ri
++] = un
->c
.un_record_id
;
235 if (extras
!= NULL
) {
236 while (*extras
!= 0) {
237 recids
[ri
++] = *extras
;
243 mddb_commitrecs_wrapper(recids
);
246 kmem_free(recids
, nrecids
* sizeof (mddb_recid_t
));
250 raid_check_pw(mr_unit_t
*un
)
255 minor_t mnum
= MD_SID(un
);
260 buf
= kmem_zalloc((uint_t
)DEV_BSIZE
, KM_SLEEP
);
262 for (i
= 0; i
< un
->un_totalcolumncnt
; i
++) {
265 colptr
= &un
->un_column
[i
];
267 tmpdev
= colptr
->un_dev
;
270 * If this device is hotspared
271 * use the hotspare key
273 tmpdev
= md_resolve_bydevid(mnum
, tmpdev
, HOTSPARED(un
, i
) ?
274 colptr
->un_hs_key
: colptr
->un_orig_key
);
275 if (md_layered_open(mnum
, &tmpdev
, MD_OFLG_NULL
)) {
276 colptr
->un_dev
= tmpdev
;
279 colptr
->un_dev
= tmpdev
;
281 bzero((caddr_t
)&bp
, sizeof (buf_t
));
284 bp
.b_flags
= B_READ
| B_BUSY
;
285 sema_init(&bp
.b_io
, 0, NULL
,
287 sema_init(&bp
.b_sem
, 0, NULL
,
289 bp
.b_edev
= md_dev64_to_dev(colptr
->un_dev
);
290 bp
.b_lblkno
= colptr
->un_pwstart
;
291 bp
.b_bcount
= DEV_BSIZE
;
292 bp
.b_bufsize
= DEV_BSIZE
;
293 bp
.b_un
.b_addr
= (caddr_t
)buf
;
295 (void) md_call_strategy(&bp
, 0, NULL
);
299 if (un
->c
.un_revision
& MD_64BIT_META_DEV
) {
300 unit
= ((raid_pwhdr_t
*)buf
)->rpw_unit
;
302 unit
= ((raid_pwhdr32_od_t
*)buf
)->rpw_unit
;
306 * depending upon being an 64bit or 32 bit raid, the
307 * pre write headers have different layout
309 if (un
->c
.un_revision
& MD_64BIT_META_DEV
) {
310 if ((((raid_pwhdr_t
*)buf
)->rpw_column
!= i
) ||
311 (((raid_pwhdr_t
*)buf
)->rpw_unit
!= unit
))
314 if ((((raid_pwhdr32_od_t
*)buf
)->rpw_column
!= i
) ||
315 (((raid_pwhdr32_od_t
*)buf
)->rpw_unit
!= unit
))
318 md_layered_close(colptr
->un_dev
, MD_OFLG_NULL
);
322 kmem_free(buf
, DEV_BSIZE
);
327 * NAME: init_col_nextio
328 * DESCRIPTION: normal backend process when zeroing column of a RAID metadevice.
329 * PARAMETERS: raid_ci_t *cur - struct for column being zeroed
331 * LOCKS: assumes caller holds unit reader lock,
332 * preiodically releases and reacquires unit reader lock,
333 * broadcasts on unit conditional variable (un_cv)
336 #define INIT_RLS_CNT 10
338 init_col_nextio(raid_ci_t
*cur
)
344 cur
->ci_blkno
+= cur
->ci_zerosize
;
346 mutex_enter(&un
->un_mx
);
347 /* ===> update un_percent_done */
348 un
->un_init_iocnt
+= btodb(cur
->ci_buf
.b_bcount
);
349 mutex_exit(&un
->un_mx
);
352 * When gorwing a device, normal I/O is still going on.
353 * The init thread still holds the unit reader lock which
354 * prevents I/O from doing state changes.
355 * So every INIT_RLS_CNT init I/Os, we will release the
359 * We know we are in the middle of a grow operation and the
360 * unit cannot be grown or removed (through reset or halt)
361 * so the mr_unit_t structure will not move or disappear.
362 * In addition, we know that only one of the init I/Os
363 * can be in col_init_nextio at a time because they are
364 * placed on the md_done_daemon queue and md only processes
365 * one element of this queue at a time. In addition, any
366 * code that needs to acquire the unit writer lock to change
367 * state is supposed to be on the md_mstr_daemon queue so
368 * it can be processing while we sit here waiting to get the
369 * unit reader lock back.
372 if (cur
->ci_blkno
< cur
->ci_lastblk
) {
373 /* truncate last chunk to end_addr if needed */
374 if (cur
->ci_blkno
+ cur
->ci_zerosize
> cur
->ci_lastblk
) {
375 cur
->ci_zerosize
= (size_t)
376 (cur
->ci_lastblk
- cur
->ci_blkno
);
379 /* set address and length for I/O bufs */
380 cur
->ci_buf
.b_bufsize
= dbtob(cur
->ci_zerosize
);
381 cur
->ci_buf
.b_bcount
= dbtob(cur
->ci_zerosize
);
382 cur
->ci_buf
.b_lblkno
= cur
->ci_blkno
;
384 (void) md_call_strategy(&cur
->ci_buf
, MD_STR_NOTTOP
, NULL
);
387 /* finished initializing this column */
388 mutex_enter(&un
->un_mx
);
389 cur
->ci_flag
= COL_INIT_DONE
;
390 uniqtime32(&un
->un_column
[cur
->ci_col
].un_devtimestamp
);
391 mutex_exit(&un
->un_mx
);
392 cv_broadcast(&un
->un_cv
);
397 * DESCRIPTION: I/O interrupt while zeroing column of a RAID metadevice.
398 * PARAMETERS: buf_t *cb - I/O buffer for which interrupt occurred
400 * LOCKS: assumes caller holds unit reader or writer lock
404 init_col_int(buf_t
*cb
)
408 cur
= (raid_ci_t
*)cb
->b_chain
;
409 if (cb
->b_flags
& B_ERROR
) {
410 mutex_enter(&cur
->ci_un
->un_mx
);
412 mutex_exit(&cur
->ci_un
->un_mx
);
413 cv_broadcast(&cur
->ci_un
->un_cv
);
416 daemon_request(&md_done_daemon
, init_col_nextio
,
417 (daemon_queue_t
*)cur
, REQ_OLD
);
422 * NAME: raid_init_columns
423 * DESCRIPTION: Zero one or more columns of a RAID metadevice.
424 * PARAMETERS: minor_t mnum - RAID unit minor identifier
426 * LOCKS: obtains and releases unit reader lock,
427 * obtains and releases unit writer lock,
428 * obtains and releases md_unit_array_rw write lock,
429 * obtains and releases unit mutex (un_mx) lock,
430 * waits on unit conditional variable (un_cv)
434 raid_init_columns(minor_t mnum
)
438 raid_ci_t
*ci_chain
= NULL
, *cur
;
447 set_t setno
= MD_MIN2SET(mnum
);
450 * Increment the raid resync count for cpr
452 mutex_enter(&md_cpr_resync
.md_resync_mutex
);
453 md_cpr_resync
.md_raid_resync
++;
454 mutex_exit(&md_cpr_resync
.md_resync_mutex
);
457 * initialization is a multiple step process. The first step
458 * is to go through the unit structure and start each device
459 * in the init state writing zeros over the component.
460 * Next initialize the prewrite areas, so the device can be
461 * used if a metainit -k is done. Now close the componenets.
463 * Once this complete set the state of each component being
464 * zeroed and set the correct state for the unit.
466 * last commit the records.
470 un
= md_unit_readerlock(ui
);
472 /* check for active init on this column */
473 /* exiting is cpr safe */
474 if ((un
->un_init_colcnt
> 0) && (un
->un_resync_index
!= -1)) {
475 md_unit_readerexit(ui
);
476 (void) raid_internal_close(mnum
, OTYP_LYR
, 0, 0);
478 * Decrement the raid resync count for cpr
480 mutex_enter(&md_cpr_resync
.md_resync_mutex
);
481 md_cpr_resync
.md_raid_resync
--;
482 mutex_exit(&md_cpr_resync
.md_resync_mutex
);
486 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_INIT_START
, SVM_TAG_METADEVICE
, setno
,
488 un
->un_init_colcnt
= 0;
489 un
->un_init_iocnt
= 0;
490 end_off
= un
->un_pwsize
+ (un
->un_segsize
* un
->un_segsincolumn
);
491 zerosize
= (size_t)MIN((diskaddr_t
)un
->un_maxio
, end_off
);
493 /* allocate zero-filled buffer */
494 zero_addr
= kmem_zalloc(dbtob(zerosize
), KM_SLEEP
);
496 for (ix
= 0; ix
< un
->un_totalcolumncnt
; ix
++) {
497 if (un
->un_column
[ix
].un_devstate
!= RCS_INIT
)
499 /* allocate new column init structure */
500 cur
= (raid_ci_t
*)kmem_zalloc((sizeof (raid_ci_t
)), KM_SLEEP
);
502 un
->un_init_colcnt
++;
503 cur
->ci_next
= ci_chain
;
508 cur
->ci_flag
= COL_INITING
;
509 cur
->ci_zerosize
= zerosize
;
510 cur
->ci_blkno
= un
->un_column
[ix
].un_pwstart
;
511 cur
->ci_lastblk
= cur
->ci_blkno
+ un
->un_pwsize
512 + (un
->un_segsize
* un
->un_segsincolumn
);
513 /* initialize static buf fields */
514 cur
->ci_buf
.b_un
.b_addr
= zero_addr
;
515 cur
->ci_buf
.b_chain
= (buf_t
*)cur
;
516 cur
->ci_buf
.b_back
= &cur
->ci_buf
;
517 cur
->ci_buf
.b_forw
= &cur
->ci_buf
;
518 cur
->ci_buf
.b_iodone
= init_col_int
;
519 cur
->ci_buf
.b_flags
= B_BUSY
| B_WRITE
;
520 cur
->ci_buf
.b_edev
= md_dev64_to_dev(un
->un_column
[ix
].un_dev
);
521 sema_init(&cur
->ci_buf
.b_io
, 0, NULL
, SEMA_DEFAULT
, NULL
);
522 sema_init(&cur
->ci_buf
.b_sem
, 0, NULL
, SEMA_DEFAULT
, NULL
);
523 /* set address and length for I/O bufs */
524 cur
->ci_buf
.b_bufsize
= dbtob(zerosize
);
525 cur
->ci_buf
.b_bcount
= dbtob(zerosize
);
526 cur
->ci_buf
.b_lblkno
= un
->un_column
[ix
].un_pwstart
;
527 cur
->ci_buf
.b_offset
= -1;
529 if (! (un
->un_column
[ix
].un_devflags
& MD_RAID_DEV_ISOPEN
)) {
530 md_dev64_t tmpdev
= un
->un_column
[ix
].un_dev
;
533 * If this column is hotspared then
534 * use the hotspare key
536 tmpdev
= md_resolve_bydevid(mnum
, tmpdev
,
538 un
->un_column
[ix
].un_hs_key
:
539 un
->un_column
[ix
].un_orig_key
);
540 if ((cur
->ci_err
= md_layered_open(mnum
, &tmpdev
,
542 un
->un_column
[ix
].un_devflags
|=
544 un
->un_column
[ix
].un_dev
= tmpdev
;
546 if (cur
->ci_err
== 0)
547 md_call_strategy(&cur
->ci_buf
, MD_STR_NOTTOP
, NULL
);
550 md_unit_readerexit(ui
);
551 state
= un
->un_state
;
552 colcnt
= un
->un_init_colcnt
;
553 mutex_enter(&un
->un_mx
);
555 cv_wait(&un
->un_cv
, &un
->un_mx
);
558 for (cur
= ci_chain
; cur
!= NULL
; cur
= cur
->ci_next
) {
560 if ((cur
->ci_flag
!= COL_INITING
) || (cur
->ci_err
)) {
563 else if (cur
->ci_flag
== COL_INIT_DONE
) {
564 (void) init_pw_area(un
,
565 un
->un_column
[col
].un_dev
,
566 un
->un_column
[col
].un_pwstart
,
568 cur
->ci_flag
= COL_READY
;
575 mutex_exit(&un
->un_mx
);
577 /* This prevents new opens */
578 rw_enter(&md_unit_array_rw
.lock
, RW_WRITER
);
579 (void) md_io_writerlock(ui
);
580 un
= (mr_unit_t
*)md_unit_writerlock(ui
);
584 /* take this element out of the chain */
585 ci_chain
= cur
->ci_next
;
586 /* free this element */
587 sema_destroy(&cur
->ci_buf
.b_io
);
588 sema_destroy(&cur
->ci_buf
.b_sem
);
590 raid_set_state(cur
->ci_un
, cur
->ci_col
,
593 raid_set_state(cur
->ci_un
, cur
->ci_col
,
595 kmem_free(cur
, sizeof (raid_ci_t
));
598 /* free the zeroed buffer */
599 kmem_free(zero_addr
, dbtob(zerosize
));
601 /* determine new unit state */
603 if (state
== RUS_INIT
)
604 un
->un_state
= RUS_OKAY
;
606 un
->c
.un_total_blocks
= un
->un_grow_tb
;
607 md_nblocks_set(mnum
, un
->c
.un_total_blocks
);
609 if (raid_state_cnt(un
, RCS_OKAY
) ==
610 un
->un_totalcolumncnt
)
611 un
->un_state
= RUS_OKAY
;
613 } else { /* error orcurred */
614 if (state
& RUS_INIT
)
615 un
->un_state
= RUS_DOI
;
617 uniqtime32(&un
->un_timestamp
);
618 MD_STATUS(un
) &= ~MD_UN_GROW_PENDING
;
619 un
->un_init_colcnt
= 0;
620 un
->un_init_iocnt
= 0;
621 raid_commit(un
, NULL
);
622 md_unit_writerexit(ui
);
623 (void) md_io_writerexit(ui
);
624 rw_exit(&md_unit_array_rw
.lock
);
626 if (un
->un_state
& RUS_DOI
) {
627 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_INIT_FATAL
,
628 SVM_TAG_METADEVICE
, setno
, MD_SID(un
));
630 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_INIT_FAILED
,
631 SVM_TAG_METADEVICE
, setno
, MD_SID(un
));
634 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_INIT_SUCCESS
,
635 SVM_TAG_METADEVICE
, setno
, MD_SID(un
));
637 (void) raid_internal_close(mnum
, OTYP_LYR
, 0, 0);
639 * Decrement the raid resync count for cpr
641 mutex_enter(&md_cpr_resync
.md_resync_mutex
);
642 md_cpr_resync
.md_raid_resync
--;
643 mutex_exit(&md_cpr_resync
.md_resync_mutex
);
649 raid_init_unit(minor_t mnum
, md_error_t
*ep
)
654 set_t setno
= MD_MIN2SET(mnum
);
657 if (md_get_setstatus(setno
) & MD_SET_STALE
)
658 return (mdmddberror(ep
, MDE_DB_STALE
, mnum
, setno
));
660 /* Don't start an init if the device is not available */
661 if ((ui
== NULL
) || (ui
->ui_tstate
& MD_DEV_ERRORED
)) {
662 return (mdmderror(ep
, MDE_RAID_OPEN_FAILURE
, mnum
));
665 if (raid_internal_open(mnum
, (FREAD
| FWRITE
),
666 OTYP_LYR
, MD_OFLG_ISINIT
)) {
667 rval
= mdmderror(ep
, MDE_RAID_OPEN_FAILURE
, mnum
);
671 un
= md_unit_readerlock(ui
);
672 un
->un_percent_done
= 0;
673 md_unit_readerexit(ui
);
674 /* start resync_unit thread */
675 (void) thread_create(NULL
, 0, raid_init_columns
,
676 (void *)(uintptr_t)mnum
, 0, &p0
, TS_RUN
, minclsyspri
);
681 un
= md_unit_writerlock(ui
);
682 MD_STATUS(un
) &= ~MD_UN_GROW_PENDING
;
684 for (i
= 0; i
< un
->un_totalcolumncnt
; i
++)
685 if (COLUMN_STATE(un
, i
) == RCS_INIT
)
686 raid_set_state(un
, i
, RCS_ERRED
, 0);
687 if (un
->un_state
& RUS_INIT
)
688 un
->un_state
= RUS_DOI
;
689 raid_commit(un
, NULL
);
690 md_unit_writerexit(ui
);
691 if (un
->un_state
& RUS_DOI
) {
692 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_INIT_FATAL
,
693 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
695 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_INIT_FAILED
,
696 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
704 * DESCRIPTION: regenerate all the parity on the raid device. This
705 * routine starts a thread that will regenerate the
706 * parity on a raid device. If an I/O error occurs during
707 * this process the entire device is placed in error.
709 * PARAMETERS: md_set_params_t *msp - ioctl packet
712 regen_unit(minor_t mnum
)
714 mdi_unit_t
*ui
= MDI_UNIT(mnum
);
715 mr_unit_t
*un
= MD_UNIT(mnum
);
719 diskaddr_t total_segments
;
724 * Increment raid resync count for cpr
726 mutex_enter(&md_cpr_resync
.md_resync_mutex
);
727 md_cpr_resync
.md_raid_resync
++;
728 mutex_exit(&md_cpr_resync
.md_resync_mutex
);
730 iosize
= dbtob(un
->un_segsize
);
731 buffer
= kmem_alloc(iosize
, KM_SLEEP
);
733 total_segments
= un
->un_segsincolumn
;
734 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_REGEN_START
, SVM_TAG_METADEVICE
,
735 MD_UN2SET(un
), MD_SID(un
));
736 un
->un_percent_done
= 0;
737 init_buf(bp
, B_READ
| B_BUSY
, iosize
);
739 for (line
= 0; line
< total_segments
; line
++) {
740 bp
->b_lblkno
= line
*
741 ((un
->un_origcolumncnt
- 1) * un
->un_segsize
);
742 bp
->b_un
.b_addr
= buffer
;
743 bp
->b_bcount
= iosize
;
746 * The following assignment is only correct because
747 * md_raid_strategy is fine when it's only a minor number
748 * and not a real dev_t. Yuck.
751 md_raid_strategy(bp
, MD_STR_NOTTOP
, NULL
);
756 un
->un_percent_done
= (uint_t
)((line
* 1000) /
757 un
->un_segsincolumn
);
758 /* just to avoid rounding errors */
759 if (un
->un_percent_done
> 1000)
760 un
->un_percent_done
= 1000;
761 reset_buf(bp
, B_READ
| B_BUSY
, iosize
);
764 kmem_free(buffer
, iosize
);
766 (void) md_io_writerlock(ui
);
767 (void) raid_internal_close(mnum
, OTYP_LYR
, 0, 0);
768 (void) md_io_writerexit(ui
);
769 un
= md_unit_writerlock(ui
);
771 (raid_state_cnt(un
, RCS_OKAY
) == un
->un_totalcolumncnt
))
772 un
->un_state
= RUS_OKAY
;
773 raid_commit(un
, NULL
);
774 md_unit_writerexit(ui
);
776 raid_state_cnt(un
, RCS_OKAY
) != un
->un_totalcolumncnt
) {
777 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_REGEN_FAILED
,
778 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
780 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_REGEN_DONE
, SVM_TAG_METADEVICE
,
781 MD_UN2SET(un
), MD_SID(un
));
785 * Decrement the raid resync count for cpr
787 mutex_enter(&md_cpr_resync
.md_resync_mutex
);
788 md_cpr_resync
.md_raid_resync
--;
789 mutex_exit(&md_cpr_resync
.md_resync_mutex
);
794 raid_regen_unit(minor_t mnum
, md_error_t
*ep
)
799 set_t setno
= MD_MIN2SET(mnum
);
802 un
= (mr_unit_t
*)MD_UNIT(mnum
);
804 if (md_get_setstatus(setno
) & MD_SET_STALE
)
805 return (mdmddberror(ep
, MDE_DB_STALE
, mnum
, setno
));
807 /* Don't start a regen if the device is not available */
808 if ((ui
== NULL
) || (ui
->ui_tstate
& MD_DEV_ERRORED
)) {
809 return (mdmderror(ep
, MDE_RAID_OPEN_FAILURE
, mnum
));
812 if (raid_internal_open(mnum
, (FREAD
| FWRITE
), OTYP_LYR
, 0)) {
813 (void) md_unit_writerlock(ui
);
814 for (i
= 0; i
< un
->un_totalcolumncnt
; i
++)
815 raid_set_state(un
, i
, RCS_ERRED
, 0);
816 md_unit_writerexit(ui
);
817 return (mdmderror(ep
, MDE_RAID_OPEN_FAILURE
, mnum
));
820 /* start resync_unit thread */
821 (void) thread_create(NULL
, 0, regen_unit
,
822 (void *)(uintptr_t)mnum
, 0, &p0
, TS_RUN
, minclsyspri
);
828 raid_regen(md_regen_param_t
*mrp
, IOLOCK
*lock
)
830 minor_t mnum
= mrp
->mnum
;
833 mdclrerror(&mrp
->mde
);
835 un
= md_unit_readerlock(MDI_UNIT(mnum
));
837 if (MD_STATUS(un
) & MD_UN_GROW_PENDING
) {
838 md_unit_readerexit(MDI_UNIT(mnum
));
839 return (mdmderror(&mrp
->mde
, MDE_IN_USE
, mnum
));
842 if ((MD_STATUS(un
) & MD_UN_RESYNC_ACTIVE
) ||
843 (raid_state_cnt(un
, RCS_RESYNC
))) {
844 md_unit_readerexit(MDI_UNIT(mnum
));
845 return (mdmderror(&mrp
->mde
, MDE_RESYNC_ACTIVE
, mnum
));
848 if ((raid_state_cnt(un
, RCS_INIT
) != 0) || (un
->un_state
& RUS_INIT
)) {
849 md_unit_readerexit(MDI_UNIT(mnum
));
850 return (mdmderror(&mrp
->mde
, MDE_IN_USE
, mnum
));
853 if ((raid_state_cnt(un
, RCS_OKAY
) != un
->un_totalcolumncnt
) ||
854 (! (un
->un_state
& RUS_OKAY
))) {
855 md_unit_readerexit(MDI_UNIT(mnum
));
856 return (mdmderror(&mrp
->mde
, MDE_RAID_NOT_OKAY
, mnum
));
859 md_unit_readerexit(MDI_UNIT(mnum
));
861 /* get locks and recheck to be sure something did not change */
862 if ((un
= raid_getun(mnum
, &mrp
->mde
, WRITERS
, lock
)) == NULL
)
865 if ((raid_state_cnt(un
, RCS_OKAY
) != un
->un_totalcolumncnt
) ||
866 (! (un
->un_state
& RUS_OKAY
))) {
867 return (mdmderror(&mrp
->mde
, MDE_RAID_NOT_OKAY
, mnum
));
870 raid_set_state(un
, 0, RCS_REGEN
, 0);
871 raid_commit(un
, NULL
);
872 md_ioctl_droplocks(lock
);
873 return (raid_regen_unit(mnum
, &mrp
->mde
));
878 * DESCRIPTION: used to create a RAID metadevice
879 * PARAMETERS: md_set_params_t *d - pointer to set data structure
880 * int mode - must be FWRITE
886 raid_set(void *d
, int mode
)
890 mddb_recid_t mr_recid
;
891 mddb_recid_t
*recids
;
898 md_set_params_t
*msp
= d
;
902 setno
= MD_MIN2SET(mnum
);
904 mdclrerror(&msp
->mde
);
906 if (raid_getun(mnum
, &msp
->mde
, NO_OLD
, NULL
) == NULL
)
909 typ1
= (mddb_type_t
)md_getshared_key(setno
,
910 raid_md_ops
.md_driver
.md_drivername
);
912 /* create the db record for this mdstruct */
914 if (msp
->options
& MD_CRO_64BIT
) {
916 return (mdmderror(&msp
->mde
, MDE_UNIT_TOO_LARGE
, mnum
));
918 mr_recid
= mddb_createrec(msp
->size
, typ1
, 0,
919 MD_CRO_64BIT
| MD_CRO_RAID
| MD_CRO_FN
, setno
);
922 mr_recid
= mddb_createrec(msp
->size
, typ1
, 0,
923 MD_CRO_32BIT
| MD_CRO_RAID
| MD_CRO_FN
, setno
);
927 return (mddbstatus2error(&msp
->mde
,
928 (int)mr_recid
, mnum
, setno
));
930 /* get the address of the mdstruct */
931 un
= (mr_unit_t
*)mddb_getrecaddr(mr_recid
);
933 * It is okay that we muck with the mdstruct here,
934 * since no one else will know about the mdstruct
935 * until we commit it. If we crash, the record will
936 * be automatically purged, since we haven't
940 /* copy in the user's mdstruct */
941 if (err
= ddi_copyin((caddr_t
)(uintptr_t)msp
->mdp
, un
,
943 mddb_deleterec_wrapper(mr_recid
);
946 /* All 64 bit metadevices only support EFI labels. */
947 if (msp
->options
& MD_CRO_64BIT
) {
948 un
->c
.un_flag
|= MD_EFILABEL
;
952 * allocate the real recids array. since we may have to commit
953 * underlying metadevice records, we need an array of size:
954 * total number of components in raid + 3 (1 for the raid itself,
955 * one for the hotspare, one for the end marker).
957 num_recs
= un
->un_totalcolumncnt
+ 3;
959 recids
= kmem_alloc(num_recs
* sizeof (mddb_recid_t
), KM_SLEEP
);
960 recids
[rid
++] = mr_recid
;
963 MD_RECID(un
) = recids
[0];
964 MD_CAPAB(un
) = MD_CAN_PARENT
| MD_CAN_SP
;
965 MD_PARENT(un
) = MD_NO_PARENT
;
966 un
->un_resync_copysize
= 0;
967 un
->c
.un_revision
|= MD_FN_META_DEV
;
969 if (UNIT_STATE(un
) == RUS_INIT
)
970 MD_STATUS(un
) |= MD_UN_GROW_PENDING
;
972 if ((UNIT_STATE(un
) != RUS_INIT
) && raid_check_pw(un
)) {
973 mddb_deleterec_wrapper(mr_recid
);
974 err
= mderror(&msp
->mde
, MDE_RAID_INVALID
);
978 if (err
= raid_build_incore(un
, 0)) {
980 kmem_free(un
->un_column_ic
, sizeof (mr_column_ic_t
) *
981 un
->un_totalcolumncnt
);
982 kmem_free(un
->mr_ic
, sizeof (*un
->mr_ic
));
985 md_nblocks_set(mnum
, -1ULL);
986 MD_UNIT(mnum
) = NULL
;
988 mddb_deleterec_wrapper(mr_recid
);
993 * Update unit availability
995 md_set
[setno
].s_un_avail
--;
998 if (un
->un_hsp_id
!= -1) {
999 /* increment the reference count of the hot spare pool */
1000 err
= md_hot_spare_ifc(HSP_INCREF
, un
->un_hsp_id
, 0, 0,
1001 &recids
[rid
], NULL
, NULL
, NULL
);
1003 md_nblocks_set(mnum
, -1ULL);
1004 MD_UNIT(mnum
) = NULL
;
1006 mddb_deleterec_wrapper(mr_recid
);
1013 * set the parent on any metadevice components.
1014 * NOTE: currently soft partitions are the only metadevices
1015 * which can appear within a RAID metadevice.
1017 for (col
= 0; col
< un
->un_totalcolumncnt
; col
++) {
1018 mr_column_t
*mr_col
= &un
->un_column
[col
];
1021 if (md_getmajor(mr_col
->un_dev
) == md_major
) {
1022 comp_un
= MD_UNIT(md_getminor(mr_col
->un_dev
));
1023 recids
[rid
++] = MD_RECID(comp_un
);
1024 md_set_parent(mr_col
->un_dev
, MD_SID(un
));
1028 /* set the end marker */
1031 mddb_commitrecs_wrapper(recids
);
1032 md_create_unit_incore(mnum
, &raid_md_ops
, 1);
1034 SE_NOTIFY(EC_SVM_CONFIG
, ESC_SVM_CREATE
, SVM_TAG_METADEVICE
, setno
,
1038 kmem_free(recids
, (num_recs
* sizeof (mddb_recid_t
)));
1042 /* only attempt to init a device that is in the init state */
1043 if (UNIT_STATE(un
) != RUS_INIT
)
1046 return (raid_init_unit(mnum
, &msp
->mde
));
1051 * DESCRIPTION: used to get the unit structure of a RAID metadevice
1052 * PARAMETERS: md_i_get_t *migp - pointer to get data structure
1053 * int mode - must be FREAD
1054 * IOLOCK *lock - pointer to IOCTL lock
1056 * LOCKS: obtains unit reader lock via IOLOCK
1068 md_i_get_t
*migph
= migp
;
1073 mdclrerror(&migph
->mde
);
1075 if ((un
= raid_getun(mnum
, &migph
->mde
,
1076 RD_LOCK
, lock
)) == NULL
)
1079 if (migph
->size
== 0) {
1080 migph
->size
= un
->c
.un_size
;
1084 if (migph
->size
< un
->c
.un_size
) {
1087 if (ddi_copyout(un
, (void *)(uintptr_t)migph
->mdp
,
1088 un
->c
.un_size
, mode
))
1096 * NAME: raid_replace
1097 * DESCRIPTION: used to replace a component of a RAID metadevice
1098 * PARAMETERS: replace_params_t *mrp - pointer to replace data structure
1099 * IOLOCK *lock - pointer to IOCTL lock
1101 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun),
1102 * obtains and releases md_unit_array_rw write lock
1107 replace_params_t
*mrp
,
1111 minor_t mnum
= mrp
->mnum
;
1112 md_dev64_t odev
= mrp
->old_dev
;
1113 md_error_t
*ep
= &mrp
->mde
;
1124 mddb_recid_t extra_recids
[3] = { 0, 0, 0 };
1126 md_error_t mde
= mdnullerror
;
1127 sv_dev_t sv
= {MD_SET_BAD
, MD_SIDEWILD
, MD_KEYWILD
};
1130 setno
= MD_MIN2SET(mnum
);
1131 side
= mddb_getsidenum(setno
);
1133 un
= md_unit_readerlock(MDI_UNIT(mnum
));
1135 if ((MD_STATUS(un
) & MD_UN_RESYNC_ACTIVE
) ||
1136 (raid_state_cnt(un
, RCS_RESYNC
) != 0)) {
1137 md_unit_readerexit(MDI_UNIT(mnum
));
1138 return (mdmderror(ep
, MDE_RESYNC_ACTIVE
, mnum
));
1141 if (un
->un_state
& RUS_DOI
) {
1142 md_unit_readerexit(MDI_UNIT(mnum
));
1143 return (mdmderror(ep
, MDE_RAID_DOI
, mnum
));
1146 if ((raid_state_cnt(un
, RCS_INIT
) != 0) || (un
->un_state
& RUS_INIT
) ||
1147 (MD_STATUS(un
) & MD_UN_GROW_PENDING
)) {
1148 md_unit_readerexit(MDI_UNIT(mnum
));
1149 return (mdmderror(ep
, MDE_IN_USE
, mnum
));
1152 md_unit_readerexit(MDI_UNIT(mnum
));
1154 /* get locks and recheck to be sure something did not change */
1155 if ((un
= raid_getun(mnum
, ep
, WRITERS
, lock
)) == NULL
)
1158 if (md_getkeyfromdev(setno
, side
, odev
, &devkey
, &nkeys
) != 0) {
1159 return (mddeverror(ep
, MDE_NAME_SPACE
, odev
));
1162 for (ix
= 0; ix
< un
->un_totalcolumncnt
; ix
++) {
1163 md_dev64_t tmpdevt
= un
->un_column
[ix
].un_orig_dev
;
1165 * Try to resolve devt again if NODEV64
1167 if (tmpdevt
== NODEV64
) {
1168 tmpdevt
= md_resolve_bydevid(mnum
, tmpdevt
,
1169 un
->un_column
[ix
].un_orig_key
);
1170 un
->un_column
[ix
].un_orig_dev
= tmpdevt
;
1173 if (un
->un_column
[ix
].un_orig_dev
== odev
) {
1177 if (un
->un_column
[ix
].un_orig_dev
== NODEV64
) {
1179 * Now we use the keys to match.
1180 * If no key found, continue.
1185 if (un
->un_column
[ix
].un_orig_key
== devkey
) {
1187 return (mddeverror(ep
,
1197 return (mdcomperror(ep
, MDE_CANT_FIND_COMP
,
1200 if ((MD_STATUS(un
) & MD_UN_RESYNC_ACTIVE
) ||
1201 (raid_state_cnt(un
, RCS_RESYNC
) != 0))
1202 return (mdmderror(ep
, MDE_RESYNC_ACTIVE
, mnum
));
1204 if (un
->un_state
& RUS_DOI
)
1205 return (mdcomperror(ep
, MDE_REPL_INVAL_STATE
, mnum
,
1206 un
->un_column
[col
].un_dev
));
1208 if ((raid_state_cnt(un
, RCS_INIT
) != 0) || (un
->un_state
& RUS_INIT
) ||
1209 (MD_STATUS(un
) & MD_UN_GROW_PENDING
))
1210 return (mdmderror(ep
, MDE_IN_USE
, mnum
));
1212 if ((mrp
->cmd
== FORCE_ENABLE_COMP
) || (mrp
->cmd
== FORCE_REPLACE_COMP
))
1214 if ((mrp
->cmd
== FORCE_ENABLE_COMP
) || (mrp
->cmd
== ENABLE_COMP
))
1216 if ((mrp
->cmd
== FORCE_REPLACE_COMP
) || (mrp
->cmd
== REPLACE_COMP
))
1219 if (un
->un_state
== RUS_LAST_ERRED
) {
1220 /* Must use -f force flag for unit in LAST_ERRED state */
1222 return (mdmderror(ep
, MDE_RAID_NEED_FORCE
, mnum
));
1224 /* Must use -f force flag on ERRED column first */
1225 if (un
->un_column
[col
].un_devstate
!= RCS_ERRED
) {
1226 for (ix
= 0; ix
< un
->un_totalcolumncnt
; ix
++) {
1227 if (un
->un_column
[ix
].un_devstate
& RCS_ERRED
)
1228 return (mdcomperror(ep
,
1229 MDE_RAID_COMP_ERRED
, mnum
,
1230 un
->un_column
[ix
].un_dev
));
1234 /* must use -f force flag on LAST_ERRED columns next */
1235 if ((un
->un_column
[col
].un_devstate
!= RCS_LAST_ERRED
) &&
1236 (un
->un_column
[col
].un_devstate
!= RCS_ERRED
))
1237 return (mdcomperror(ep
, MDE_RAID_COMP_ERRED
,
1238 mnum
, un
->un_column
[col
].un_dev
));
1241 if (un
->un_state
== RUS_ERRED
) {
1242 if (! (un
->un_column
[col
].un_devstate
&
1243 (RCS_ERRED
| RCS_INIT_ERRED
)))
1244 return (mdcomperror(ep
, MDE_RAID_COMP_ERRED
,
1245 mnum
, un
->un_column
[ix
].un_dev
));
1248 ASSERT(!(un
->un_column
[col
].un_devflags
& MD_RAID_ALT_ISOPEN
));
1249 ASSERT(!(un
->un_column
[col
].un_devflags
& MD_RAID_WRITE_ALT
));
1251 state
= un
->un_column
[col
].un_devstate
;
1252 if (state
& RCS_INIT_ERRED
) {
1253 MD_STATUS(un
) |= MD_UN_GROW_PENDING
;
1254 un
->un_percent_done
= 0;
1255 raid_set_state(un
, col
, RCS_INIT
, 0);
1256 } else if (((mrp
->options
& MDIOCTL_NO_RESYNC_RAID
) == 0) &&
1257 resync_request(mnum
, col
, 0, ep
))
1258 return (mdmderror(ep
, MDE_RESYNC_ACTIVE
, mnum
));
1261 if (cmd
== REPLACE_COMP
) {
1262 md_dev64_t tmpdev
= mrp
->new_dev
;
1265 * open the device by device id
1267 tmpdev
= md_resolve_bydevid(mnum
, tmpdev
, mrp
->new_key
);
1268 if (md_layered_open(mnum
, &tmpdev
, MD_OFLG_NULL
)) {
1269 return (mdcomperror(ep
, MDE_COMP_OPEN_ERR
, mnum
,
1274 * If it's a metadevice, make sure it gets reparented
1276 if (md_getmajor(tmpdev
) == md_major
) {
1277 minor_t new_mnum
= md_getminor(tmpdev
);
1278 md_unit_t
*new_un
= MD_UNIT(new_mnum
);
1280 md_set_parent(tmpdev
, MD_SID(un
));
1281 extra_recids
[extra_rids
++] = MD_RECID(new_un
);
1284 mrp
->new_dev
= tmpdev
;
1285 un
->un_column
[col
].un_orig_dev
= tmpdev
;
1286 un
->un_column
[col
].un_orig_key
= mrp
->new_key
;
1287 un
->un_column
[col
].un_orig_pwstart
= mrp
->start_blk
;
1288 un
->un_column
[col
].un_orig_devstart
=
1289 mrp
->start_blk
+ un
->un_pwsize
;
1292 * If the old device was a metadevice, make sure to
1295 if (md_getmajor(odev
) == md_major
) {
1296 minor_t old_mnum
= md_getminor(odev
);
1297 md_unit_t
*old_un
= MD_UNIT(old_mnum
);
1299 md_reset_parent(odev
);
1300 extra_recids
[extra_rids
++] =
1304 if (HOTSPARED(un
, col
)) {
1305 md_layered_close(mrp
->new_dev
, MD_OFLG_NULL
);
1306 un
->un_column
[col
].un_alt_dev
= mrp
->new_dev
;
1307 un
->un_column
[col
].un_alt_pwstart
= mrp
->start_blk
;
1308 un
->un_column
[col
].un_alt_devstart
=
1309 mrp
->start_blk
+ un
->un_pwsize
;
1310 un
->un_column
[col
].un_devflags
|= MD_RAID_COPY_RESYNC
;
1313 * not hot spared. Close the old device and
1314 * move the new device in.
1316 if (un
->un_column
[col
].un_devflags
& MD_RAID_DEV_ISOPEN
)
1317 md_layered_close(odev
, MD_OFLG_NULL
);
1318 un
->un_column
[col
].un_devflags
|= MD_RAID_DEV_ISOPEN
;
1319 un
->un_column
[col
].un_dev
= mrp
->new_dev
;
1320 un
->un_column
[col
].un_pwstart
= mrp
->start_blk
;
1321 un
->un_column
[col
].un_devstart
=
1322 mrp
->start_blk
+ un
->un_pwsize
;
1323 if ((mrp
->options
& MDIOCTL_NO_RESYNC_RAID
) == 0) {
1324 un
->un_column
[col
].un_devflags
|=
1325 MD_RAID_REGEN_RESYNC
;
1329 * If the old device is not a metadevice then
1330 * save off the set number and key so that it
1331 * can be removed from the namespace later.
1333 if (md_getmajor(odev
) != md_major
) {
1339 if (cmd
== ENABLE_COMP
) {
1340 md_dev64_t tmpdev
= un
->un_column
[col
].un_orig_dev
;
1341 mdkey_t raidkey
= un
->un_column
[col
].un_orig_key
;
1344 * We trust the dev_t because we cannot determine the
1345 * dev_t from the device id since a new disk is in the
1346 * same location. Since this is a call from metareplace -e dx
1347 * AND it is SCSI a new dev_t is not generated. So the
1348 * dev_t from the mddb is used. Before enabling the device
1349 * we check to make sure that multiple entries for the same
1350 * device does not exist in the namespace. If they do we
1352 * One of the many ways multiple entries in the name space
1353 * can occur is if one removed the failed component in a
1354 * RAID metadevice and put another disk that was part of
1355 * another metadevice. After reboot metadevadm would correctly
1356 * update the device name for the metadevice whose component
1357 * has moved. However now in the metadb there are two entries
1358 * for the same name (ctds) that belong to different
1359 * metadevices. One is valid, the other is a ghost or "last
1362 tmpdev
= md_resolve_bydevid(mnum
, tmpdev
, raidkey
);
1363 if (tmpdev
== NODEV64
)
1364 tmpdev
= md_getdevnum(setno
, side
, raidkey
,
1367 * check for multiple entries in namespace for the
1371 if (md_getkeyfromdev(setno
, side
, tmpdev
, &devkey
,
1373 return (mddeverror(ep
, MDE_NAME_SPACE
, tmpdev
));
1375 * If number of keys are greater that
1376 * 1, then we have an invalid
1377 * namespace. STOP and return.
1380 return (mddeverror(ep
, MDE_MULTNM
, tmpdev
));
1381 if (devkey
!= raidkey
)
1382 return (mdcomperror(ep
, MDE_CANT_FIND_COMP
,
1385 if (un
->un_column
[col
].un_orig_dev
== NODEV64
)
1386 un
->un_column
[col
].un_orig_dev
= tmpdev
;
1388 if (HOTSPARED(un
, col
)) {
1389 un
->un_column
[col
].un_alt_dev
=
1390 un
->un_column
[col
].un_orig_dev
;
1391 un
->un_column
[col
].un_alt_pwstart
=
1392 un
->un_column
[col
].un_orig_pwstart
;
1393 un
->un_column
[col
].un_alt_devstart
=
1394 un
->un_column
[col
].un_orig_devstart
;
1395 un
->un_column
[col
].un_devflags
|= MD_RAID_COPY_RESYNC
;
1397 if (!(un
->un_column
[col
].un_devflags
&
1398 MD_RAID_DEV_ISOPEN
)) {
1399 if (md_layered_open(mnum
, &tmpdev
,
1401 un
->un_column
[col
].un_dev
= tmpdev
;
1402 return (mdcomperror(ep
,
1403 MDE_COMP_OPEN_ERR
, mnum
, tmpdev
));
1405 ASSERT(tmpdev
!= NODEV64
&&
1408 if ((md_getmajor(tmpdev
) != md_major
) &&
1409 (md_devid_found(setno
, side
, raidkey
)
1411 if (md_update_namespace_did(setno
, side
,
1412 raidkey
, &mde
) != 0) {
1415 " update namespace\n");
1418 un
->un_column
[col
].un_dev
=
1419 un
->un_column
[col
].un_orig_dev
;
1421 un
->un_column
[col
].un_devflags
|= MD_RAID_DEV_ISOPEN
;
1422 un
->un_column
[col
].un_devflags
|= MD_RAID_REGEN_RESYNC
;
1425 if (mrp
->has_label
) {
1426 un
->un_column
[col
].un_devflags
|= MD_RAID_HAS_LABEL
;
1428 un
->un_column
[col
].un_devflags
&= ~MD_RAID_HAS_LABEL
;
1431 raid_commit(un
, extra_recids
);
1433 /* If the component has been replaced - clean up the name space */
1434 if (sv
.setno
!= MD_SET_BAD
) {
1435 md_rem_names(&sv
, 1);
1438 md_ioctl_droplocks(lock
);
1440 if ((cmd
== ENABLE_COMP
) || (cmd
== FORCE_ENABLE_COMP
)) {
1441 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_ENABLE
, SVM_TAG_METADEVICE
,
1444 SE_NOTIFY(EC_SVM_CONFIG
, ESC_SVM_REPLACE
, SVM_TAG_METADEVICE
,
1448 if (un
->un_column
[col
].un_devstate
& RCS_INIT
)
1449 err
= raid_init_unit(mnum
, ep
);
1450 else if ((mrp
->options
& MDIOCTL_NO_RESYNC_RAID
) == 0)
1451 err
= raid_resync_unit(mnum
, ep
);
1458 /* is already set by this time */
1459 /* fix state and commit record */
1460 un
= md_unit_writerlock(MDI_UNIT(mnum
));
1461 if (state
& RCS_INIT_ERRED
)
1462 raid_set_state(un
, col
, state
, 1);
1463 else if (state
& RCS_OKAY
)
1464 raid_set_state(un
, col
, RCS_ERRED
, 0);
1466 raid_set_state(un
, col
, state
, 1);
1467 raid_commit(un
, NULL
);
1468 md_unit_writerexit(MDI_UNIT(mnum
));
1475 * NAME: raid_set_sync
1476 * DESCRIPTION: used to sync a component of a RAID metadevice
1477 * PARAMETERS: md_resync_ioctl_t *mrp - pointer to resync data structure
1478 * int mode - must be FWRITE
1479 * IOLOCK *lock - pointer to IOCTL lock
1481 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun),
1482 * obtains and releases md_unit_array_rw write lock
1487 md_resync_ioctl_t
*rip
,
1491 minor_t mnum
= rip
->ri_mnum
;
1499 mdclrerror(&rip
->mde
);
1501 if ((un
= raid_getun(mnum
, &rip
->mde
, WRITERS
, lock
)) == NULL
)
1504 if (un
->un_state
& RUS_DOI
)
1505 return (mdmderror(&rip
->mde
, MDE_RAID_DOI
, mnum
));
1507 if (un
->c
.un_status
& MD_UN_RESYNC_ACTIVE
)
1508 return (mdmderror(&rip
->mde
, MDE_RESYNC_ACTIVE
, mnum
));
1510 /* This prevents new opens */
1513 if (un
->un_state
& RUS_REGEN
)
1516 if (raid_state_cnt(un
, RCS_RESYNC
))
1519 if (raid_state_cnt(un
, RCS_INIT
) || (un
->un_state
& RUS_INIT
))
1522 ASSERT(!(resync
&& init
&& regen
));
1523 md_ioctl_droplocks(lock
);
1524 rip
->ri_percent_done
= 0;
1527 MD_STATUS(un
) |= MD_UN_GROW_PENDING
;
1528 return (raid_init_unit(mnum
, &rip
->mde
));
1532 * If resync is needed, it will call raid_internal_open forcing
1533 * replay before the open completes.
1534 * Otherwise, call raid_internal_open directly to force
1535 * replay to complete during boot (metasync -r).
1536 * NOTE: the unit writer lock must remain held while setting
1537 * MD_UN_RESYNC_ACTIVE but must be released before
1538 * calling raid_resync_unit or raid_internal_open.
1542 un
= md_unit_writerlock(MDI_UNIT(mnum
));
1543 MD_STATUS(un
) |= MD_UN_RESYNC_ACTIVE
;
1544 /* Must release unit writer lock for resync */
1546 * correctly setup the devices before trying to start the
1549 for (ix
= 0; un
->un_totalcolumncnt
; ix
++) {
1550 if (un
->un_column
[ix
].un_devstate
& RCS_RESYNC
) {
1551 if ((un
->un_column
[ix
].un_devflags
&
1552 MD_RAID_COPY_RESYNC
) &&
1553 HOTSPARED(un
, ix
)) {
1554 un
->un_column
[ix
].un_alt_dev
=
1555 un
->un_column
[ix
].un_orig_dev
;
1556 un
->un_column
[ix
].un_alt_devstart
=
1557 un
->un_column
[ix
].un_orig_devstart
;
1558 un
->un_column
[ix
].un_alt_pwstart
=
1559 un
->un_column
[ix
].un_orig_pwstart
;
1564 ASSERT(un
->un_column
[ix
].un_devflags
&
1565 (MD_RAID_COPY_RESYNC
| MD_RAID_REGEN_RESYNC
));
1566 rip
->ri_percent_done
= 0;
1567 un
->un_column
[ix
].un_devflags
|= MD_RAID_RESYNC
;
1568 (void) resync_request(mnum
, ix
, 0, NULL
);
1569 md_unit_writerexit(MDI_UNIT(mnum
));
1570 err
= raid_resync_unit(mnum
, &rip
->mde
);
1575 err
= raid_regen_unit(mnum
, &rip
->mde
);
1579 /* The unit requires not work so just force replay of the device */
1580 if (raid_internal_open(mnum
, (FREAD
| FWRITE
), OTYP_LYR
, 0))
1581 return (mdmderror(&rip
->mde
,
1582 MDE_RAID_OPEN_FAILURE
, mnum
));
1583 (void) raid_internal_close(mnum
, OTYP_LYR
, 0, 0);
1589 * NAME: raid_get_resync
1590 * DESCRIPTION: used to check resync status on a component of a RAID metadevice
1591 * PARAMETERS: md_resync_ioctl_t *mrp - pointer to resync data structure
1592 * int mode - must be FWRITE
1593 * IOLOCK *lock - pointer to IOCTL lock
1600 md_resync_ioctl_t
*rip
,
1604 minor_t mnum
= rip
->ri_mnum
;
1606 u_longlong_t percent
;
1611 mdclrerror(&rip
->mde
);
1613 if ((un
= raid_getun(mnum
, &rip
->mde
, RD_LOCK
, lock
)) == NULL
)
1617 if (MD_STATUS(un
) & MD_UN_RESYNC_ACTIVE
) {
1618 d
= un
->un_segsincolumn
;
1619 percent
= d
? ((1000 * un
->un_resync_line_index
) / d
) : 0;
1621 percent
= 1000; /* can't go over 100% */
1622 rip
->ri_percent_done
= (int)percent
;
1623 rip
->ri_flags
|= MD_RI_INPROGRESS
;
1626 if (UNIT_STATE(un
) & RUS_INIT
) {
1627 d
= un
->un_segsize
* un
->un_segsincolumn
*
1628 un
->un_totalcolumncnt
;
1630 d
? ((1000 * (u_longlong_t
)un
->un_init_iocnt
) / d
) : 0;
1632 percent
= 1000; /* can't go over 100% */
1633 rip
->ri_percent_done
= (int)percent
;
1634 rip
->ri_flags
|= MD_GROW_INPROGRESS
;
1635 } else if (MD_STATUS(un
) & MD_UN_GROW_PENDING
) {
1636 d
= un
->un_segsize
* un
->un_segsincolumn
* un
->un_init_colcnt
;
1638 d
? (((u_longlong_t
)un
->un_init_iocnt
* 1000) / d
) : 0;
1641 rip
->ri_percent_done
= (int)percent
;
1642 rip
->ri_flags
|= MD_GROW_INPROGRESS
;
1645 if (un
->un_state
& RUS_REGEN
)
1646 rip
->ri_percent_done
= un
->un_percent_done
;
1649 for (ix
= 0; ix
< un
->un_totalcolumncnt
; ix
++) {
1650 switch (un
->un_column
[ix
].un_devstate
) {
1653 case RCS_LAST_ERRED
:
1660 d
= un
->un_totalcolumncnt
;
1661 rip
->ri_percent_dirty
= d
? (((u_longlong_t
)cnt
* 100) / d
) : 0;
1667 * DESCRIPTION: Concatenate to a RAID metadevice
1668 * PARAMETERS: md_grow_params_t *mgp
1669 * - pointer to IOCGROW data structure
1670 * int mode - must be FWRITE
1671 * IOLOCK *lockp - IOCTL read/write and unit_array_rw lock
1673 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun),
1674 * obtains and releases md_unit_array_rw write lock
1678 raid_grow(void *mgp
, int mode
, IOLOCK
*lock
)
1681 mr_unit_t
*un
, *new_un
;
1684 mddb_recid_t mr_recid
;
1685 mddb_recid_t old_vtoc
= 0;
1686 mddb_recid_t
*recids
;
1687 md_create_rec_option_t options
;
1695 mr_column_ic_t
*mrc
;
1697 md_grow_params_t
*mgph
= mgp
;
1702 mdclrerror(&mgph
->mde
);
1704 ui
= MDI_UNIT(mnum
);
1705 un
= md_unit_readerlock(ui
);
1707 if (MD_STATUS(un
) & MD_UN_GROW_PENDING
) {
1708 md_unit_readerexit(ui
);
1709 return (mdmderror(&mgph
->mde
, MDE_IN_USE
, mnum
));
1712 if (MD_STATUS(un
) & MD_UN_RESYNC_ACTIVE
) {
1713 md_unit_readerexit(ui
);
1714 return (mdmderror(&mgph
->mde
, MDE_RESYNC_ACTIVE
, mnum
));
1717 if (UNIT_STATE(un
) & RUS_LAST_ERRED
) {
1718 md_unit_readerexit(ui
);
1719 return (mdmderror(&mgph
->mde
, MDE_RAID_LAST_ERRED
, mnum
));
1722 if (UNIT_STATE(un
) & RUS_DOI
) {
1723 md_unit_readerexit(ui
);
1724 return (mdmderror(&mgph
->mde
, MDE_RAID_DOI
, mnum
));
1727 if ((raid_state_cnt(un
, RCS_INIT
) != 0) || (un
->un_state
& RUS_INIT
)) {
1728 md_unit_readerexit(ui
);
1729 return (mdmderror(&mgph
->mde
, MDE_IN_USE
, mnum
));
1732 md_unit_readerexit(ui
);
1734 if ((un
= raid_getun(mnum
, &mgph
->mde
, WRITERS
, lock
)) ==
1738 if (MD_STATUS(un
) & MD_UN_GROW_PENDING
)
1739 return (mdmderror(&mgph
->mde
, MDE_IN_USE
, mnum
));
1741 if (MD_STATUS(un
) & MD_UN_RESYNC_ACTIVE
)
1742 return (mdmderror(&mgph
->mde
, MDE_RESYNC_ACTIVE
, mnum
));
1744 if (un
->c
.un_size
>= mgph
->size
)
1747 if (UNIT_STATE(un
) & RUS_LAST_ERRED
)
1748 return (mdmderror(&mgph
->mde
, MDE_RAID_LAST_ERRED
, mnum
));
1750 if (UNIT_STATE(un
) & RUS_DOI
)
1751 return (mdmderror(&mgph
->mde
, MDE_RAID_DOI
, mnum
));
1753 if ((raid_state_cnt(un
, RCS_INIT
) != 0) || (un
->un_state
& RUS_INIT
))
1754 return (mdmderror(&mgph
->mde
, MDE_IN_USE
, mnum
));
1756 setno
= MD_MIN2SET(mnum
);
1758 typ1
= (mddb_type_t
)md_getshared_key(setno
,
1759 raid_md_ops
.md_driver
.md_drivername
);
1762 * Preserve the friendly name nature of the device that is
1765 options
= MD_CRO_RAID
;
1766 if (un
->c
.un_revision
& MD_FN_META_DEV
)
1767 options
|= MD_CRO_FN
;
1768 if (mgph
->options
& MD_CRO_64BIT
) {
1770 return (mdmderror(&mgph
->mde
, MDE_UNIT_TOO_LARGE
, mnum
));
1772 mr_recid
= mddb_createrec(mgph
->size
, typ1
, 0,
1773 MD_CRO_64BIT
| options
, setno
);
1776 mr_recid
= mddb_createrec(mgph
->size
, typ1
, 0,
1777 MD_CRO_32BIT
| options
, setno
);
1780 rval
= mddbstatus2error(&mgph
->mde
, (int)mr_recid
,
1785 /* get the address of the new unit */
1786 new_un
= (mr_unit_t
*)mddb_getrecaddr(mr_recid
);
1789 * It is okay that we muck with the new unit here,
1790 * since no one else will know about the unit struct
1791 * until we commit it. If we crash, the record will
1792 * be automatically purged, since we haven't
1793 * committed it yet and the old unit struct will be found.
1796 /* copy in the user's unit struct */
1797 err
= ddi_copyin((void *)(uintptr_t)mgph
->mdp
, new_un
,
1800 mddb_deleterec_wrapper(mr_recid
);
1804 /* make sure columns are being added */
1805 if (un
->un_totalcolumncnt
>= new_un
->un_totalcolumncnt
) {
1806 mddb_deleterec_wrapper(mr_recid
);
1811 * Save a few of the new unit structs fields.
1812 * Before they get clobbered.
1814 tc
= new_un
->un_totalcolumncnt
;
1815 tb
= new_un
->c
.un_total_blocks
;
1816 atb
= new_un
->c
.un_actual_tb
;
1817 unrev
= new_un
->c
.un_revision
;
1820 * Copy the old unit struct (static stuff)
1821 * into new unit struct
1823 bcopy((caddr_t
)un
, (caddr_t
)new_un
, un
->c
.un_size
);
1826 * Restore a few of the new unit struct values.
1828 new_un
->un_totalcolumncnt
= tc
;
1829 new_un
->c
.un_actual_tb
= atb
;
1830 new_un
->un_grow_tb
= tb
;
1831 new_un
->c
.un_revision
= unrev
;
1832 new_un
->c
.un_record_id
= mr_recid
;
1833 new_un
->c
.un_size
= mgph
->size
;
1835 ASSERT(new_un
->mr_ic
== un
->mr_ic
);
1838 * Save old column slots
1840 mrc
= un
->un_column_ic
;
1843 * Allocate new column slot
1845 new_un
->un_column_ic
= (mr_column_ic_t
*)
1846 kmem_zalloc(sizeof (mr_column_ic_t
) * new_un
->un_totalcolumncnt
,
1850 * Restore old column slots
1851 * Free the old column slots
1853 bcopy(mrc
, new_un
->un_column_ic
,
1854 sizeof (mr_column_ic_t
) * un
->un_totalcolumncnt
);
1855 kmem_free(mrc
, sizeof (mr_column_ic_t
) * un
->un_totalcolumncnt
);
1857 /* All 64 bit metadevices only support EFI labels. */
1858 if (mgph
->options
& MD_CRO_64BIT
) {
1859 new_un
->c
.un_flag
|= MD_EFILABEL
;
1861 * If the device was previously smaller than a terabyte,
1862 * and had a vtoc record attached to it, we remove the
1863 * vtoc record, because the layout has changed completely.
1865 if (((un
->c
.un_revision
& MD_64BIT_META_DEV
) == 0) &&
1866 (un
->c
.un_vtoc_id
!= 0)) {
1867 old_vtoc
= un
->c
.un_vtoc_id
;
1868 new_un
->c
.un_vtoc_id
=
1869 md_vtoc_to_efi_record(old_vtoc
, setno
);
1875 * allocate the real recids array. since we may have to commit
1876 * underlying metadevice records, we need an array of size:
1877 * total number of new components being attach + 2 (one for the
1878 * raid itself, one for the end marker).
1880 num_recs
= new_un
->un_totalcolumncnt
+ 2;
1882 recids
= kmem_alloc(num_recs
* sizeof (mddb_recid_t
), KM_SLEEP
);
1883 recids
[rid
++] = mr_recid
;
1885 for (col
= un
->un_totalcolumncnt
;
1886 (col
< new_un
->un_totalcolumncnt
); col
++) {
1887 mr_column_t
*mr_col
= &new_un
->un_column
[col
];
1890 if (raid_build_pw_reservation(new_un
, col
) != 0) {
1891 /* release pwslots already allocated by grow */
1892 for (i
= un
->un_totalcolumncnt
; i
< col
; i
++) {
1893 raid_free_pw_reservation(new_un
, i
);
1895 kmem_free(new_un
->un_column_ic
,
1896 sizeof (mr_column_ic_t
) *
1897 new_un
->un_totalcolumncnt
);
1898 kmem_free(new_un
->mr_ic
, sizeof (*un
->mr_ic
));
1899 kmem_free(recids
, num_recs
* sizeof (mddb_recid_t
));
1900 mddb_deleterec_wrapper(mr_recid
);
1904 * set parent on metadevices being added.
1905 * NOTE: currently soft partitions are the only metadevices
1906 * which can appear within a RAID metadevice.
1908 if (md_getmajor(mr_col
->un_dev
) == md_major
) {
1909 comp_un
= MD_UNIT(md_getminor(mr_col
->un_dev
));
1910 recids
[rid
++] = MD_RECID(comp_un
);
1911 md_set_parent(mr_col
->un_dev
, MD_SID(new_un
));
1913 new_un
->un_column
[col
].un_devflags
= 0;
1916 /* set end marker */
1919 /* commit new unit struct */
1920 mddb_commitrecs_wrapper(recids
);
1922 /* delete old unit struct */
1923 mddb_deleterec_wrapper(un
->c
.un_record_id
);
1925 /* place new unit in in-core array */
1926 md_nblocks_set(mnum
, new_un
->c
.un_total_blocks
);
1927 MD_UNIT(mnum
) = new_un
;
1930 * If old_vtoc has a non zero value, we know:
1931 * - This unit crossed the border from smaller to larger one TB
1932 * - There was a vtoc record for the unit,
1933 * - This vtoc record is no longer needed, because
1934 * a new efi record has been created for this un.
1936 if (old_vtoc
!= 0) {
1937 mddb_deleterec_wrapper(old_vtoc
);
1941 kmem_free(recids
, num_recs
* sizeof (mddb_recid_t
));
1943 SE_NOTIFY(EC_SVM_CONFIG
, ESC_SVM_GROW
, SVM_TAG_METADEVICE
,
1944 MD_UN2SET(new_un
), MD_SID(new_un
));
1945 MD_STATUS(new_un
) |= MD_UN_GROW_PENDING
;
1948 * Since the md_ioctl_writelock aquires the unit write lock
1949 * and open/close aquires the unit reader lock it is necessary
1950 * to drop the unit write lock and then reaquire it as needed
1953 md_unit_writerexit(ui
);
1955 if (raid_internal_open(mnum
, (FREAD
| FWRITE
), OTYP_LYR
, 0)) {
1956 rval
= mdmderror(&mgph
->mde
, MDE_RAID_OPEN_FAILURE
, mnum
);
1957 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_OPEN_FAIL
, SVM_TAG_METADEVICE
,
1958 MD_UN2SET(new_un
), MD_SID(new_un
));
1961 (void) md_unit_writerlock(ui
);
1962 for (i
= 0; i
< new_un
->un_totalcolumncnt
; i
++) {
1963 if (new_un
->un_column
[i
].un_devstate
& RCS_OKAY
)
1964 (void) init_pw_area(new_un
, new_un
->un_column
[i
].un_dev
,
1965 new_un
->un_column
[i
].un_pwstart
, i
);
1967 md_unit_writerexit(ui
);
1968 (void) raid_internal_close(mnum
, OTYP_LYR
, 0, 0);
1969 (void) md_unit_writerlock(ui
);
1970 /* create a background thread to initialize the columns */
1971 md_ioctl_droplocks(lock
);
1973 return (raid_init_unit(mnum
, &mgph
->mde
));
1978 * DESCRIPTION: used to reset (clear / remove) a RAID metadevice
1979 * PARAMETERS: md_i_reset_t *mirp - pointer to reset data structure
1981 * LOCKS: obtains and releases md_unit_array_rw write lock
1985 raid_reset(md_i_reset_t
*mirp
)
1987 minor_t mnum
= mirp
->mnum
;
1990 set_t setno
= MD_MIN2SET(mnum
);
1992 mdclrerror(&mirp
->mde
);
1994 rw_enter(&md_unit_array_rw
.lock
, RW_WRITER
);
1996 * NOTE: need to get md_unit_writerlock to avoid conflict
1997 * with raid_init thread.
1999 if ((un
= raid_getun(mnum
, &mirp
->mde
, NO_LOCK
, NULL
)) ==
2001 rw_exit(&md_unit_array_rw
.lock
);
2004 ui
= MDI_UNIT(mnum
);
2006 if (MD_HAS_PARENT(MD_PARENT(un
))) {
2007 rw_exit(&md_unit_array_rw
.lock
);
2008 return (mdmderror(&mirp
->mde
, MDE_IN_USE
, mnum
));
2011 un
= (mr_unit_t
*)md_unit_openclose_enter(ui
);
2012 if (md_unit_isopen(MDI_UNIT(mnum
))) {
2013 md_unit_openclose_exit(ui
);
2014 rw_exit(&md_unit_array_rw
.lock
);
2015 return (mdmderror(&mirp
->mde
, MDE_IS_OPEN
, mnum
));
2017 md_unit_openclose_exit(ui
);
2018 if (UNIT_STATE(un
) != RUS_OKAY
&& !mirp
->force
) {
2019 rw_exit(&md_unit_array_rw
.lock
);
2020 return (mdmderror(&mirp
->mde
, MDE_RAID_NEED_FORCE
, mnum
));
2023 reset_raid(un
, mnum
, 1);
2026 * Update unit availability
2028 md_set
[setno
].s_un_avail
++;
2031 * If MN set, reset s_un_next so all nodes can have
2032 * the same view of the next available slot when
2033 * nodes are -w and -j
2035 if (MD_MNSET_SETNO(setno
)) {
2036 (void) md_upd_set_unnext(setno
, MD_MIN2UNIT(mnum
));
2039 rw_exit(&md_unit_array_rw
.lock
);
2045 * NAME: raid_get_geom
2046 * DESCRIPTION: used to get the geometry of a RAID metadevice
2047 * PARAMETERS: mr_unit_t *un - RAID unit to get the geometry for
2048 * struct dk_geom *gp - pointer to geometry data structure
2056 struct dk_geom
*geomp
2059 md_get_geom((md_unit_t
*)un
, geomp
);
2065 * NAME: raid_get_vtoc
2066 * DESCRIPTION: used to get the VTOC on a RAID metadevice
2067 * PARAMETERS: mr_unit_t *un - RAID unit to get the VTOC from
2068 * struct vtoc *vtocp - pointer to VTOC data structure
2079 md_get_vtoc((md_unit_t
*)un
, vtocp
);
2085 * NAME: raid_set_vtoc
2086 * DESCRIPTION: used to set the VTOC on a RAID metadevice
2087 * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on
2088 * struct vtoc *vtocp - pointer to VTOC data structure
2099 return (md_set_vtoc((md_unit_t
*)un
, vtocp
));
2104 * NAME: raid_get_extvtoc
2105 * DESCRIPTION: used to get the extended VTOC on a RAID metadevice
2106 * PARAMETERS: mr_unit_t *un - RAID unit to get the VTOC from
2107 * struct extvtoc *vtocp - pointer to extended VTOC data structure
2115 struct extvtoc
*vtocp
2118 md_get_extvtoc((md_unit_t
*)un
, vtocp
);
2124 * NAME: raid_set_extvtoc
2125 * DESCRIPTION: used to set the extended VTOC on a RAID metadevice
2126 * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on
2127 * struct extvtoc *vtocp - pointer to extended VTOC data structure
2135 struct extvtoc
*vtocp
2138 return (md_set_extvtoc((md_unit_t
*)un
, vtocp
));
2144 * NAME: raid_get_cgapart
2145 * DESCRIPTION: used to get the dk_map on a RAID metadevice
2146 * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on
2147 * struct vtoc *dkmapp - pointer to dk_map data structure
2156 struct dk_map
*dkmapp
2159 md_get_cgapart((md_unit_t
*)un
, dkmapp
);
2164 * NAME: raid_getdevs
2165 * DESCRIPTION: return all devices within a RAID metadevice
2166 * PARAMETERS: md_getdevs_params_t *mgdp
2167 * - pointer to getdevs IOCTL data structure
2168 * int mode - should be FREAD
2169 * IOLOCK *lockp - IOCTL read/write lock
2171 * LOCKS: obtains unit reader lock via IOLOCK
2185 md_dev64_t unit_dev
;
2186 md_getdevs_params_t
*mgdph
= mgdp
;
2191 /* check out unit */
2192 mdclrerror(&mgdph
->mde
);
2194 if ((un
= raid_getun(mnum
, &mgdph
->mde
, RD_LOCK
, lock
)) == NULL
)
2197 udevs
= (md_dev64_t
*)(uintptr_t)mgdph
->devs
;
2199 for (cnt
= 0, i
= 0; i
< un
->un_totalcolumncnt
; i
++, cnt
++) {
2200 if (cnt
< mgdph
->cnt
) {
2201 unit_dev
= un
->un_column
[i
].un_orig_dev
;
2202 if (md_getmajor(unit_dev
) != md_major
) {
2203 if ((unit_dev
= md_xlate_mini_2_targ
2204 (unit_dev
)) == NODEV64
)
2208 if (ddi_copyout((caddr_t
)&unit_dev
,
2209 (caddr_t
)&udevs
[cnt
], sizeof (*udevs
), mode
) != 0)
2212 if (HOTSPARED(un
, i
)) {
2214 if (cnt
>= mgdph
->cnt
)
2217 unit_dev
= un
->un_column
[i
].un_dev
;
2218 if (md_getmajor(unit_dev
) != md_major
) {
2219 if ((unit_dev
= md_xlate_mini_2_targ
2220 (unit_dev
)) == NODEV64
)
2224 if (ddi_copyout((caddr_t
)&unit_dev
,
2225 (caddr_t
)&udevs
[cnt
], sizeof (*udevs
), mode
) != 0)
2235 * DESCRIPTION: used to change the following dynamic values:
2236 * the hot spare pool
2237 * in the unit structure of a RAID metadevice
2238 * PARAMETERS: md_change_params_t *mcp - pointer to change data structure
2239 * IOLOCK *lock - pointer to IOCTL lock
2241 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun)
2246 md_raid_params_t
*mrp
,
2250 minor_t mnum
= mrp
->mnum
;
2253 mddb_recid_t recids
[3] = {0, 0, 0};
2256 int inc_new_hsp
= 0;
2258 mdclrerror(&mrp
->mde
);
2260 if ((un
= raid_getun(mnum
, &mrp
->mde
, WR_LOCK
, lock
)) == NULL
)
2263 if (!mrp
->params
.change_hsp_id
)
2266 /* verify that no hotspare is in use */
2267 for (ix
= 0; ix
< un
->un_totalcolumncnt
; ix
++) {
2268 if (HOTSPARED(un
, ix
)) {
2269 return (mdmderror(&mrp
->mde
, MDE_HS_IN_USE
, mnum
));
2273 /* replace the hot spare pool */
2276 if (mrp
->params
.hsp_id
!= -1) {
2277 /* increment the reference count of the new hsp */
2278 err
= md_hot_spare_ifc(HSP_INCREF
, mrp
->params
.hsp_id
, 0, 0,
2279 &recids
[0], NULL
, NULL
, NULL
);
2281 return (mdhsperror(&mrp
->mde
, MDE_INVAL_HSP
,
2282 mrp
->params
.hsp_id
));
2288 if (un
->un_hsp_id
!= -1) {
2289 /* decrement the reference count of the old hsp */
2290 err
= md_hot_spare_ifc(HSP_DECREF
, un
->un_hsp_id
, 0, 0,
2291 &recids
[irecid
], NULL
, NULL
, NULL
);
2293 err
= mdhsperror(&mrp
->mde
, MDE_INVAL_HSP
,
2294 mrp
->params
.hsp_id
);
2296 (void) md_hot_spare_ifc(HSP_DECREF
,
2297 mrp
->params
.hsp_id
, 0, 0,
2298 &recids
[0], NULL
, NULL
, NULL
);
2300 * Don't need to commit the record,
2301 * because it wasn't committed before
2308 un
->un_hsp_id
= mrp
->params
.hsp_id
;
2310 raid_commit(un
, recids
);
2311 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_CHANGE
, SVM_TAG_METADEVICE
,
2312 MD_UN2SET(un
), MD_SID(un
));
2314 /* Now trigger hot spare processing in case one is needed. */
2315 if ((un
->un_hsp_id
!= -1) && (un
->un_state
== RUS_ERRED
))
2316 (void) raid_hotspares();
2322 * NAME: raid_admin_ioctl
2323 * DESCRIPTION: IOCTL operations unique to metadevices and RAID
2324 * PARAMETERS: int cmd - IOCTL command to be executed
2325 * void *data - pointer to IOCTL data structure
2326 * int mode - either FREAD or FWRITE
2327 * IOLOCK *lockp - IOCTL read/write lock
2344 /* We can only handle 32-bit clients for internal commands */
2345 if ((mode
& DATAMODEL_MASK
) != DATAMODEL_ILP32
) {
2350 /* dispatch ioctl */
2355 if (! (mode
& FWRITE
))
2358 sz
= sizeof (md_set_params_t
);
2359 d
= kmem_alloc(sz
, KM_SLEEP
);
2361 if (ddi_copyin(data
, d
, sz
, mode
)) {
2366 err
= raid_set(d
, mode
);
2372 if (! (mode
& FREAD
))
2375 sz
= sizeof (md_i_get_t
);
2376 d
= kmem_alloc(sz
, KM_SLEEP
);
2378 if (ddi_copyin(data
, d
, sz
, mode
)) {
2383 err
= raid_get(d
, mode
, lockp
);
2389 if (! (mode
& FWRITE
))
2392 sz
= sizeof (replace_params_t
);
2393 d
= kmem_alloc(sz
, KM_SLEEP
);
2395 if (ddi_copyin(data
, d
, sz
, mode
)) {
2400 err
= raid_replace((replace_params_t
*)d
, lockp
);
2406 if (! (mode
& FWRITE
))
2409 sz
= sizeof (md_resync_ioctl_t
);
2410 d
= kmem_alloc(sz
, KM_SLEEP
);
2412 if (ddi_copyin(data
, d
, sz
, mode
)) {
2417 err
= raid_set_sync((md_resync_ioctl_t
*)d
, lockp
);
2423 if (! (mode
& FREAD
))
2426 sz
= sizeof (md_resync_ioctl_t
);
2427 d
= kmem_alloc(sz
, KM_SLEEP
);
2429 if (ddi_copyin(data
, d
, sz
, mode
)) {
2433 err
= raid_get_resync((md_resync_ioctl_t
*)d
, lockp
);
2440 if (! (mode
& FWRITE
))
2443 sz
= sizeof (md_grow_params_t
);
2444 d
= kmem_alloc(sz
, KM_SLEEP
);
2446 if (ddi_copyin(data
, d
, sz
, mode
)) {
2451 err
= raid_grow(d
, mode
, lockp
);
2457 if (! (mode
& FWRITE
))
2460 sz
= sizeof (md_raid_params_t
);
2461 d
= kmem_alloc(sz
, KM_SLEEP
);
2463 if (ddi_copyin(data
, d
, sz
, mode
)) {
2468 err
= raid_change((md_raid_params_t
*)d
, lockp
);
2474 if (! (mode
& FWRITE
))
2477 sz
= sizeof (md_i_reset_t
);
2478 d
= kmem_alloc(sz
, KM_SLEEP
);
2480 if (ddi_copyin(data
, d
, sz
, mode
)) {
2485 err
= raid_reset((md_i_reset_t
*)d
);
2489 case MD_IOCGET_DEVS
:
2491 if (! (mode
& FREAD
))
2494 sz
= sizeof (md_getdevs_params_t
);
2495 d
= kmem_alloc(sz
, KM_SLEEP
);
2497 if (ddi_copyin(data
, d
, sz
, mode
)) {
2502 err
= raid_getdevs(d
, mode
, lockp
);
2506 case MD_IOCSETREGEN
:
2508 if (! (mode
& FWRITE
))
2511 sz
= sizeof (md_regen_param_t
);
2512 d
= kmem_alloc(sz
, KM_SLEEP
);
2514 if (ddi_copyin(data
, d
, sz
, mode
)) {
2519 err
= raid_regen((md_regen_param_t
*)d
, lockp
);
2523 case MD_IOCPROBE_DEV
:
2525 md_probedev_impl_t
*p
= NULL
;
2526 md_probedev_t
*ph
= NULL
;
2527 daemon_queue_t
*hdr
= NULL
;
2532 if (! (mode
& FREAD
))
2535 sz
= sizeof (md_probedev_t
);
2537 d
= kmem_alloc(sz
, KM_SLEEP
);
2539 /* now copy in the data */
2540 if (ddi_copyin(data
, d
, sz
, mode
)) {
2546 * Sanity test the args. Test name should have the keyword
2549 p
= kmem_alloc(sizeof (md_probedev_impl_t
), KM_SLEEP
);
2550 p
->probe_sema
= NULL
;
2552 p
->probe
.mnum_list
= (uint64_t)NULL
;
2554 ph
= (md_probedev_t
*)d
;
2555 p
->probe
.nmdevs
= ph
->nmdevs
;
2556 (void) strcpy(p
->probe
.test_name
, ph
->test_name
);
2557 bcopy(&ph
->md_driver
, &(p
->probe
.md_driver
),
2558 sizeof (md_driver_t
));
2560 if ((p
->probe
.nmdevs
< 1) ||
2561 (strstr(p
->probe
.test_name
, "probe") == NULL
)) {
2566 sz1
= sizeof (minor_t
) * p
->probe
.nmdevs
;
2568 p
->probe
.mnum_list
= (uint64_t)(uintptr_t)kmem_alloc(sz1
,
2571 if (ddi_copyin((caddr_t
)(uintptr_t)ph
->mnum_list
,
2572 (caddr_t
)(uintptr_t)p
->probe
.mnum_list
, sz1
, mode
)) {
2577 if (err
= md_init_probereq(p
, &hdr
))
2581 * put the request on the queue and wait.
2584 daemon_request_new(&md_ff_daemonq
, md_probe_one
, hdr
, REQ_NEW
);
2586 (void) IOLOCK_RETURN(0, lockp
);
2587 /* wait for the events to occur */
2588 for (i
= 0; i
< p
->probe
.nmdevs
; i
++) {
2589 sema_p(PROBE_SEMA(p
));
2591 while (md_ioctl_lock_enter() == EINTR
)
2595 * clean up. The hdr list is freed in the probe routines
2596 * since the list is NULL by the time we get here.
2600 if (p
->probe_sema
!= NULL
) {
2601 sema_destroy(PROBE_SEMA(p
));
2602 kmem_free(p
->probe_sema
, sizeof (ksema_t
));
2604 if (p
->probe_mx
!= NULL
) {
2605 mutex_destroy(PROBE_MX(p
));
2606 kmem_free(p
->probe_mx
, sizeof (kmutex_t
));
2608 if (p
->probe
.mnum_list
)
2609 kmem_free((caddr_t
)(uintptr_t)
2610 p
->probe
.mnum_list
, sz1
);
2612 kmem_free(p
, sizeof (md_probedev_impl_t
));
2622 * copyout and free any args
2626 if (ddi_copyout(d
, data
, sz
, mode
) != 0) {
2636 * NAME: md_raid_ioctl
2637 * DESCRIPTION: RAID metadevice IOCTL operations entry point.
2638 * PARAMETERS: md_dev64_t dev - RAID device identifier
2639 * int cmd - IOCTL command to be executed
2640 * void *data - pointer to IOCTL data structure
2641 * int mode - either FREAD or FWRITE
2642 * IOLOCK *lockp - IOCTL read/write lock
2656 minor_t mnum
= getminor(dev
);
2660 /* handle admin ioctls */
2661 if (mnum
== MD_ADM_MINOR
)
2662 return (raid_admin_ioctl(cmd
, data
, mode
, lockp
));
2665 if ((MD_MIN2SET(mnum
) >= md_nsets
) ||
2666 (MD_MIN2UNIT(mnum
) >= md_nunits
) ||
2667 ((un
= MD_UNIT(mnum
)) == NULL
))
2670 /* is this a supported ioctl? */
2671 err
= md_check_ioctl_against_unit(cmd
, un
->c
);
2676 /* dispatch ioctl */
2683 if (! (mode
& FREAD
))
2686 p
= kmem_alloc(sizeof (*p
), KM_SLEEP
);
2689 if (ddi_copyout((caddr_t
)p
, data
, sizeof (*p
), mode
) != 0)
2692 kmem_free(p
, sizeof (*p
));
2696 case DKIOCGMEDIAINFO
:
2700 if (! (mode
& FREAD
))
2703 get_minfo(&p
, mnum
);
2704 if (ddi_copyout(&p
, data
, sizeof (struct dk_minfo
), mode
) != 0)
2714 if (! (mode
& FREAD
))
2717 p
= kmem_alloc(sizeof (*p
), KM_SLEEP
);
2719 if ((err
= raid_get_geom(un
, p
)) == 0) {
2720 if (ddi_copyout((caddr_t
)p
, data
, sizeof (*p
),
2725 kmem_free(p
, sizeof (*p
));
2733 if (! (mode
& FREAD
))
2736 vtoc
= kmem_zalloc(sizeof (*vtoc
), KM_SLEEP
);
2737 if ((err
= raid_get_vtoc(un
, vtoc
)) != 0) {
2738 kmem_free(vtoc
, sizeof (*vtoc
));
2742 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_NATIVE
) {
2743 if (ddi_copyout(vtoc
, data
, sizeof (*vtoc
), mode
))
2748 struct vtoc32
*vtoc32
;
2750 vtoc32
= kmem_zalloc(sizeof (*vtoc32
), KM_SLEEP
);
2752 vtoctovtoc32((*vtoc
), (*vtoc32
));
2753 if (ddi_copyout(vtoc32
, data
, sizeof (*vtoc32
), mode
))
2755 kmem_free(vtoc32
, sizeof (*vtoc32
));
2757 #endif /* _SYSCALL32 */
2759 kmem_free(vtoc
, sizeof (*vtoc
));
2767 if (! (mode
& FWRITE
))
2770 vtoc
= kmem_zalloc(sizeof (*vtoc
), KM_SLEEP
);
2771 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_NATIVE
) {
2772 if (ddi_copyin(data
, vtoc
, sizeof (*vtoc
), mode
)) {
2778 struct vtoc32
*vtoc32
;
2780 vtoc32
= kmem_zalloc(sizeof (*vtoc32
), KM_SLEEP
);
2782 if (ddi_copyin(data
, vtoc32
, sizeof (*vtoc32
), mode
)) {
2785 vtoc32tovtoc((*vtoc32
), (*vtoc
));
2787 kmem_free(vtoc32
, sizeof (*vtoc32
));
2789 #endif /* _SYSCALL32 */
2792 err
= raid_set_vtoc(un
, vtoc
);
2794 kmem_free(vtoc
, sizeof (*vtoc
));
2800 struct extvtoc
*extvtoc
;
2802 if (! (mode
& FREAD
))
2805 extvtoc
= kmem_zalloc(sizeof (*extvtoc
), KM_SLEEP
);
2806 if ((err
= raid_get_extvtoc(un
, extvtoc
)) != 0) {
2807 kmem_free(extvtoc
, sizeof (*extvtoc
));
2811 if (ddi_copyout(extvtoc
, data
, sizeof (*extvtoc
), mode
))
2814 kmem_free(extvtoc
, sizeof (*extvtoc
));
2820 struct extvtoc
*extvtoc
;
2822 if (! (mode
& FWRITE
))
2825 extvtoc
= kmem_zalloc(sizeof (*extvtoc
), KM_SLEEP
);
2826 if (ddi_copyin(data
, extvtoc
, sizeof (*extvtoc
), mode
)) {
2831 err
= raid_set_extvtoc(un
, extvtoc
);
2833 kmem_free(extvtoc
, sizeof (*extvtoc
));
2841 if ((err
= raid_get_cgapart(un
, &dmp
)) != 0) {
2845 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_NATIVE
) {
2846 if (ddi_copyout((caddr_t
)&dmp
, data
, sizeof (dmp
),
2852 struct dk_map32 dmp32
;
2854 dmp32
.dkl_cylno
= dmp
.dkl_cylno
;
2855 dmp32
.dkl_nblk
= dmp
.dkl_nblk
;
2857 if (ddi_copyout((caddr_t
)&dmp32
, data
, sizeof (dmp32
),
2861 #endif /* _SYSCALL32 */
2868 * This one can be done centralized,
2869 * no need to put in the same code for all types of metadevices
2871 return (md_dkiocgetefi(mnum
, data
, mode
));
2877 * This one can be done centralized,
2878 * no need to put in the same code for all types of metadevices
2880 return (md_dkiocsetefi(mnum
, data
, mode
));
2883 case DKIOCPARTITION
:
2885 return (md_dkiocpartition(mnum
, data
, mode
));
2894 * rename/exchange named service entry points and support functions follow.
2895 * Most functions are handled generically, except for raid-specific locking
2900 * NAME: raid_may_renexch_self
2901 * DESCRIPTION: support routine for rename check ("MDRNM_CHECK") named service
2902 * PARAMETERS: mr_unit_t *un - unit struct of raid unit to be renamed
2903 * mdi_unit_t *ui - in-core unit struct of same raid unit
2904 * md_rentxn_t *rtxnp - rename transaction state
2910 raid_may_renexch_self(
2920 from_min
= rtxnp
->from
.mnum
;
2921 to_min
= rtxnp
->to
.mnum
;
2924 (void) mdmderror(&rtxnp
->mde
, MDE_RENAME_CONFIG_ERROR
,
2929 ASSERT(!(MD_CAPAB(un
) & MD_CAN_META_CHILD
));
2930 if (MD_CAPAB(un
) & MD_CAN_META_CHILD
) {
2931 (void) mdmderror(&rtxnp
->mde
, MDE_RENAME_SOURCE_BAD
, from_min
);
2935 if (MD_PARENT(un
) == MD_MULTI_PARENT
) {
2936 (void) mdmderror(&rtxnp
->mde
, MDE_RENAME_SOURCE_BAD
, from_min
);
2940 toplevel
= !MD_HAS_PARENT(MD_PARENT(un
));
2942 /* we're related if trying to swap with our parent */
2943 related
= (!toplevel
) && (MD_PARENT(un
) == to_min
);
2945 switch (rtxnp
->op
) {
2946 case MDRNOP_EXCHANGE
:
2949 (void) mdmderror(&rtxnp
->mde
,
2950 MDE_RENAME_TARGET_UNRELATED
, to_min
);
2958 * if from is top-level and is open, then the kernel is using
2962 if (toplevel
&& md_unit_isopen(ui
)) {
2963 (void) mdmderror(&rtxnp
->mde
, MDE_RENAME_BUSY
,
2970 (void) mdmderror(&rtxnp
->mde
, MDE_RENAME_CONFIG_ERROR
,
2975 return (0); /* ok */
2979 * NAME: raid_rename_check
2980 * DESCRIPTION: ("MDRNM_CHECK") rename/exchange named service entry point
2981 * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this
2982 * raid device for rename transaction
2983 * md_rentxn_t *rtxnp - rename transaction state
2990 md_rendelta_t
*delta
,
3002 if (!delta
|| !rtxnp
|| !delta
->unp
|| !delta
->uip
) {
3003 (void) mdsyserror(&rtxnp
->mde
, EINVAL
);
3007 un
= (mr_unit_t
*)delta
->unp
;
3009 for (column
= 0; column
< un
->un_totalcolumncnt
; column
++) {
3010 rcs_state_t colstate
;
3012 colstate
= un
->un_column
[column
].un_devstate
;
3014 if (colstate
& RCS_LAST_ERRED
) {
3015 (void) mdmderror(&rtxnp
->mde
, MDE_RAID_LAST_ERRED
,
3016 md_getminor(delta
->dev
));
3020 if (colstate
& RCS_INIT_ERRED
) {
3021 (void) mdmderror(&rtxnp
->mde
, MDE_RAID_DOI
,
3022 md_getminor(delta
->dev
));
3026 /* How did we get this far before detecting this? */
3027 if (colstate
& RCS_RESYNC
) {
3028 (void) mdmderror(&rtxnp
->mde
, MDE_RENAME_BUSY
,
3029 md_getminor(delta
->dev
));
3033 if (colstate
& RCS_ERRED
) {
3034 (void) mdmderror(&rtxnp
->mde
, MDE_RAID_NOT_OKAY
,
3035 md_getminor(delta
->dev
));
3039 if (!(colstate
& RCS_OKAY
)) {
3040 (void) mdmderror(&rtxnp
->mde
, MDE_RAID_NOT_OKAY
,
3041 md_getminor(delta
->dev
));
3045 if (HOTSPARED(un
, column
)) {
3046 (void) mdmderror(&rtxnp
->mde
, MDE_RAID_NOT_OKAY
,
3047 md_getminor(delta
->dev
));
3052 /* self does additional checks */
3053 if (delta
->old_role
== MDRR_SELF
) {
3054 err
= raid_may_renexch_self((mr_unit_t
*)delta
->unp
,
3061 * NAME: raid_rename_lock
3062 * DESCRIPTION: ("MDRNM_LOCK") rename/exchange named service entry point
3063 * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this
3064 * raid device for rename transaction
3065 * md_rentxn_t *rtxnp - rename transaction state
3067 * LOCKS: io and unit locks (taken explicitly *not* via ioctl wrappers)
3072 md_rendelta_t
*delta
,
3080 mnum
= md_getminor(delta
->dev
);
3081 if (mnum
== rtxnp
->to
.mnum
&& rtxnp
->op
== MDRNOP_RENAME
) {
3087 (void) mdmderror(&rtxnp
->mde
, MDE_UNIT_NOT_SETUP
, mnum
);
3097 ASSERT(!IO_WRITER_HELD(delta
->unp
));
3098 (void) md_io_writerlock(delta
->uip
);
3099 ASSERT(IO_WRITER_HELD(delta
->unp
));
3102 ASSERT(!UNIT_WRITER_HELD(delta
->unp
));
3103 (void) md_unit_writerlock(delta
->uip
);
3104 ASSERT(UNIT_WRITER_HELD(delta
->unp
));
3110 * NAME: raid_rename_unlock
3111 * DESCRIPTION: ("MDRNM_UNLOCK") rename/exchange named service entry point
3112 * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this
3113 * raid device for rename transaction
3114 * md_rentxn_t *rtxnp - rename transaction state
3116 * LOCKS: drops io and unit locks
3122 md_rendelta_t
*delta
,
3125 mr_unit_t
*un
= (mr_unit_t
*)delta
->unp
;
3126 minor_t mnum
= MD_SID(un
);
3133 ASSERT(UNIT_WRITER_HELD(delta
->unp
));
3134 md_unit_writerexit(delta
->uip
);
3135 ASSERT(!UNIT_WRITER_HELD(delta
->unp
));
3137 if (! (delta
->txn_stat
.role_swapped
) || ! (delta
->txn_stat
.is_open
)) {
3140 if (raid_internal_open(mnum
, (FREAD
| FWRITE
),
3141 OTYP_LYR
, MD_OFLG_ISINIT
) == 0) {
3142 for (col
= 0; col
< un
->un_totalcolumncnt
; col
++) {
3143 if (un
->un_column
[col
].un_devstate
& RCS_OKAY
)
3144 (void) init_pw_area(un
,
3145 un
->un_column
[col
].un_dev
,
3146 un
->un_column
[col
].un_pwstart
, col
);
3148 (void) raid_internal_close(mnum
, OTYP_LYR
, 0, 0);
3152 ASSERT(IO_WRITER_HELD(delta
->unp
));
3153 md_io_writerexit(delta
->uip
);
3154 ASSERT(!IO_WRITER_HELD(delta
->unp
));
3156 /* end of rename/exchange named service and support functions */