7712 mandoc -Tlint does always exit with error code 0
[unleashed.git] / usr / src / uts / common / io / lvm / raid / raid_ioctl.c
blob3910d85c62a574971a944013e02abea3138992d8
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2012 Milan Jurik. All rights reserved.
29 * NAME: raid_ioctl.c
31 * DESCRIPTION: RAID driver source file containing IOCTL operations.
33 * ROUTINES PROVIDED FOR EXTERNAL USE:
34 * raid_commit() - commits MD database updates for a RAID metadevice
35 * md_raid_ioctl() - RAID metadevice IOCTL operations entry point.
37 * ROUTINES PROVIDED FOR INTERNAL USE:
38 * raid_getun() - Performs unit checking on a RAID metadevice
39 * init_col_nextio() - normal backend when zeroing column of RAID metadevice.
40 * init_col_int() - I/O interrupt while zeroing column of RAID metadevice.
41 * raid_init_columns() - Zero one or more columns of a RAID metadevice.
42 * raid_set() - used to create a RAID metadevice
43 * raid_get() - used to get the unit structure of a RAID metadevice
44 * raid_replace() - used to replace a component of a RAID metadevice
45 * raid_grow() - Concatenate to a RAID metadevice
46 * raid_change() - change dynamic values of a RAID metadevice
47 * raid_reset() - used to reset (clear / remove) a RAID metadevice
48 * raid_get_geom() - used to get the geometry of a RAID metadevice
49 * raid_get_vtoc() - used to get the VTOC on a RAID metadevice
50 * raid_set_vtoc() - used to set the VTOC on a RAID metadevice
51 * raid_get_extvtoc() - used to get the extended VTOC on a RAID metadevice
52 * raid_set_extvtoc() - used to set the extended VTOC on a RAID metadevice
53 * raid_getdevs() - return all devices within a RAID metadevice
54 * raid_admin_ioctl() - IOCTL operations unique to metadevices and RAID
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/conf.h>
61 #include <sys/file.h>
62 #include <sys/user.h>
63 #include <sys/uio.h>
64 #include <sys/t_lock.h>
65 #include <sys/buf.h>
66 #include <sys/dkio.h>
67 #include <sys/vtoc.h>
68 #include <sys/kmem.h>
69 #include <vm/page.h>
70 #include <sys/sysmacros.h>
71 #include <sys/types.h>
72 #include <sys/mkdev.h>
73 #include <sys/stat.h>
74 #include <sys/open.h>
75 #include <sys/disp.h>
76 #include <sys/modctl.h>
77 #include <sys/ddi.h>
78 #include <sys/sunddi.h>
79 #include <sys/cred.h>
80 #include <sys/lvm/mdvar.h>
81 #include <sys/lvm/md_names.h>
82 #include <sys/lvm/md_mddb.h>
83 #include <sys/lvm/md_raid.h>
84 #include <sys/lvm/md_convert.h>
86 #include <sys/sysevent/eventdefs.h>
87 #include <sys/sysevent/svm.h>
89 extern int md_status;
90 extern unit_t md_nunits;
91 extern set_t md_nsets;
92 extern md_set_t md_set[];
93 extern md_ops_t raid_md_ops;
94 extern major_t md_major;
95 extern md_krwlock_t md_unit_array_rw;
96 extern mdq_anchor_t md_done_daemon;
97 extern mdq_anchor_t md_ff_daemonq;
98 extern int mdopen();
99 extern int mdclose();
100 extern void md_probe_one(probe_req_t *);
101 extern int md_init_probereq(md_probedev_impl_t *,
102 daemon_queue_t **);
103 extern md_resync_t md_cpr_resync;
106 extern void dump_mr_unit(mr_unit_t *);
108 typedef struct raid_ci {
109 DAEMON_QUEUE
110 struct raid_ci *ci_next;
111 mr_unit_t *ci_un;
112 int ci_col;
113 int ci_err;
114 int ci_flag;
115 size_t ci_zerosize;
116 diskaddr_t ci_blkno;
117 diskaddr_t ci_lastblk;
118 buf_t ci_buf;
119 } raid_ci_t;
120 /* values for the ci_flag */
121 #define COL_INITING (0x0001)
122 #define COL_INIT_DONE (0x0002)
123 #define COL_READY (0x0004)
126 * NAME: raid_getun
127 * DESCRIPTION: performs a lot of unit checking on a RAID metadevice
128 * PARAMETERS: minor_t mnum - minor device number for RAID unit
129 * md_error_t *mde - pointer to error reporting structure
130 * int flags - pointer to error reporting structure
131 * STALE_OK - allow stale MD memory
132 * NO_OLD - unit must not exist
133 * NO_LOCK - no IOCTL lock needed
134 * WR_LOCK - write IOCTL lock needed
135 * RD_LOCK - read IOCTL lock needed
136 * IOLOCK *lock - pointer to IOCTL lock
138 * LOCKS: obtains unit reader or writer lock via IOLOCK
141 static mr_unit_t *
142 raid_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
144 mr_unit_t *un;
145 mdi_unit_t *ui;
146 set_t setno = MD_MIN2SET(mnum);
148 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
149 (void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
150 return (NULL);
153 if (!(flags & STALE_OK)) {
154 if (md_get_setstatus(setno) & MD_SET_STALE) {
155 (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
156 return (NULL);
160 ui = MDI_UNIT(mnum);
161 if (flags & NO_OLD) {
162 if (ui != NULL) {
163 (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
164 return (NULL);
166 return ((mr_unit_t *)1);
169 if (ui == NULL) {
170 (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
171 return (NULL);
173 if (flags & ARRAY_WRITER)
174 md_array_writer(lock);
175 else if (flags & ARRAY_READER)
176 md_array_reader(lock);
178 if (!(flags & NO_LOCK)) {
179 if (flags & WR_LOCK) {
180 (void) md_ioctl_io_lock(lock, ui);
181 (void) md_ioctl_writerlock(lock, ui);
182 } else /* RD_LOCK */
183 (void) md_ioctl_readerlock(lock, ui);
185 un = (mr_unit_t *)MD_UNIT(mnum);
187 if (un->c.un_type != MD_METARAID) {
188 (void) mdmderror(mde, MDE_NOT_RAID, mnum);
189 return (NULL);
192 return (un);
197 * NAME: raid_commit
198 * DESCRIPTION: commits MD database updates for a RAID metadevice
199 * PARAMETERS: mr_unit_t *un - RAID unit to update in the MD database
200 * mddb_recid_t *extras - array of other record IDs to update
202 * LOCKS: assumes caller holds unit writer lock
205 void
206 raid_commit(mr_unit_t *un, mddb_recid_t *extras)
208 mddb_recid_t *recids;
209 int ri = 0;
210 int nrecids = 0;
212 if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE)
213 return;
215 /* Count the extra recids */
216 if (extras != NULL) {
217 while (extras[nrecids] != 0) {
218 nrecids++;
223 * Allocate space for two recids in addition to the extras:
224 * one for the unit structure, one for the null terminator.
226 nrecids += 2;
227 recids = (mddb_recid_t *)
228 kmem_zalloc(nrecids * sizeof (mddb_recid_t), KM_SLEEP);
230 if (un != NULL) {
231 ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1);
232 recids[ri++] = un->c.un_record_id;
235 if (extras != NULL) {
236 while (*extras != 0) {
237 recids[ri++] = *extras;
238 extras++;
242 if (ri > 0) {
243 mddb_commitrecs_wrapper(recids);
246 kmem_free(recids, nrecids * sizeof (mddb_recid_t));
249 static int
250 raid_check_pw(mr_unit_t *un)
252 buf_t bp;
253 char *buf;
254 mr_column_t *colptr;
255 minor_t mnum = MD_SID(un);
256 int i;
257 int err = 0;
258 minor_t unit;
260 buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP);
262 for (i = 0; i < un->un_totalcolumncnt; i++) {
263 md_dev64_t tmpdev;
265 colptr = &un->un_column[i];
267 tmpdev = colptr->un_dev;
269 * Open by device id
270 * If this device is hotspared
271 * use the hotspare key
273 tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ?
274 colptr->un_hs_key : colptr->un_orig_key);
275 if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) {
276 colptr->un_dev = tmpdev;
277 return (1);
279 colptr->un_dev = tmpdev;
281 bzero((caddr_t)&bp, sizeof (buf_t));
282 bp.b_back = &bp;
283 bp.b_forw = &bp;
284 bp.b_flags = B_READ | B_BUSY;
285 sema_init(&bp.b_io, 0, NULL,
286 SEMA_DEFAULT, NULL);
287 sema_init(&bp.b_sem, 0, NULL,
288 SEMA_DEFAULT, NULL);
289 bp.b_edev = md_dev64_to_dev(colptr->un_dev);
290 bp.b_lblkno = colptr->un_pwstart;
291 bp.b_bcount = DEV_BSIZE;
292 bp.b_bufsize = DEV_BSIZE;
293 bp.b_un.b_addr = (caddr_t)buf;
294 bp.b_offset = -1;
295 (void) md_call_strategy(&bp, 0, NULL);
296 if (biowait(&bp))
297 err = 1;
298 if (i == 0) {
299 if (un->c.un_revision & MD_64BIT_META_DEV) {
300 unit = ((raid_pwhdr_t *)buf)->rpw_unit;
301 } else {
302 unit = ((raid_pwhdr32_od_t *)buf)->rpw_unit;
306 * depending upon being an 64bit or 32 bit raid, the
307 * pre write headers have different layout
309 if (un->c.un_revision & MD_64BIT_META_DEV) {
310 if ((((raid_pwhdr_t *)buf)->rpw_column != i) ||
311 (((raid_pwhdr_t *)buf)->rpw_unit != unit))
312 err = 1;
313 } else {
314 if ((((raid_pwhdr32_od_t *)buf)->rpw_column != i) ||
315 (((raid_pwhdr32_od_t *)buf)->rpw_unit != unit))
316 err = 1;
318 md_layered_close(colptr->un_dev, MD_OFLG_NULL);
319 if (err)
320 break;
322 kmem_free(buf, DEV_BSIZE);
323 return (err);
327 * NAME: init_col_nextio
328 * DESCRIPTION: normal backend process when zeroing column of a RAID metadevice.
329 * PARAMETERS: raid_ci_t *cur - struct for column being zeroed
331 * LOCKS: assumes caller holds unit reader lock,
332 * preiodically releases and reacquires unit reader lock,
333 * broadcasts on unit conditional variable (un_cv)
336 #define INIT_RLS_CNT 10
337 static void
338 init_col_nextio(raid_ci_t *cur)
340 mr_unit_t *un;
342 un = cur->ci_un;
344 cur->ci_blkno += cur->ci_zerosize;
346 mutex_enter(&un->un_mx);
347 /* ===> update un_percent_done */
348 un->un_init_iocnt += btodb(cur->ci_buf.b_bcount);
349 mutex_exit(&un->un_mx);
352 * When gorwing a device, normal I/O is still going on.
353 * The init thread still holds the unit reader lock which
354 * prevents I/O from doing state changes.
355 * So every INIT_RLS_CNT init I/Os, we will release the
356 * unit reader lock.
358 * CAVEAT:
359 * We know we are in the middle of a grow operation and the
360 * unit cannot be grown or removed (through reset or halt)
361 * so the mr_unit_t structure will not move or disappear.
362 * In addition, we know that only one of the init I/Os
363 * can be in col_init_nextio at a time because they are
364 * placed on the md_done_daemon queue and md only processes
365 * one element of this queue at a time. In addition, any
366 * code that needs to acquire the unit writer lock to change
367 * state is supposed to be on the md_mstr_daemon queue so
368 * it can be processing while we sit here waiting to get the
369 * unit reader lock back.
372 if (cur->ci_blkno < cur->ci_lastblk) {
373 /* truncate last chunk to end_addr if needed */
374 if (cur->ci_blkno + cur->ci_zerosize > cur->ci_lastblk) {
375 cur->ci_zerosize = (size_t)
376 (cur->ci_lastblk - cur->ci_blkno);
379 /* set address and length for I/O bufs */
380 cur->ci_buf.b_bufsize = dbtob(cur->ci_zerosize);
381 cur->ci_buf.b_bcount = dbtob(cur->ci_zerosize);
382 cur->ci_buf.b_lblkno = cur->ci_blkno;
384 (void) md_call_strategy(&cur->ci_buf, MD_STR_NOTTOP, NULL);
385 return;
387 /* finished initializing this column */
388 mutex_enter(&un->un_mx);
389 cur->ci_flag = COL_INIT_DONE;
390 uniqtime32(&un->un_column[cur->ci_col].un_devtimestamp);
391 mutex_exit(&un->un_mx);
392 cv_broadcast(&un->un_cv);
396 * NAME: init_col_int
397 * DESCRIPTION: I/O interrupt while zeroing column of a RAID metadevice.
398 * PARAMETERS: buf_t *cb - I/O buffer for which interrupt occurred
400 * LOCKS: assumes caller holds unit reader or writer lock
403 static int
404 init_col_int(buf_t *cb)
406 raid_ci_t *cur;
408 cur = (raid_ci_t *)cb->b_chain;
409 if (cb->b_flags & B_ERROR) {
410 mutex_enter(&cur->ci_un->un_mx);
411 cur->ci_err = EIO;
412 mutex_exit(&cur->ci_un->un_mx);
413 cv_broadcast(&cur->ci_un->un_cv);
414 return (1);
416 daemon_request(&md_done_daemon, init_col_nextio,
417 (daemon_queue_t *)cur, REQ_OLD);
418 return (1);
422 * NAME: raid_init_columns
423 * DESCRIPTION: Zero one or more columns of a RAID metadevice.
424 * PARAMETERS: minor_t mnum - RAID unit minor identifier
426 * LOCKS: obtains and releases unit reader lock,
427 * obtains and releases unit writer lock,
428 * obtains and releases md_unit_array_rw write lock,
429 * obtains and releases unit mutex (un_mx) lock,
430 * waits on unit conditional variable (un_cv)
433 static void
434 raid_init_columns(minor_t mnum)
436 mr_unit_t *un;
437 mdi_unit_t *ui;
438 raid_ci_t *ci_chain = NULL, *cur;
439 rus_state_t state;
440 caddr_t zero_addr;
441 diskaddr_t end_off;
442 size_t zerosize;
443 int err = 0;
444 int ix;
445 int colcnt = 0;
446 int col;
447 set_t setno = MD_MIN2SET(mnum);
450 * Increment the raid resync count for cpr
452 mutex_enter(&md_cpr_resync.md_resync_mutex);
453 md_cpr_resync.md_raid_resync++;
454 mutex_exit(&md_cpr_resync.md_resync_mutex);
457 * initialization is a multiple step process. The first step
458 * is to go through the unit structure and start each device
459 * in the init state writing zeros over the component.
460 * Next initialize the prewrite areas, so the device can be
461 * used if a metainit -k is done. Now close the componenets.
463 * Once this complete set the state of each component being
464 * zeroed and set the correct state for the unit.
466 * last commit the records.
469 ui = MDI_UNIT(mnum);
470 un = md_unit_readerlock(ui);
472 /* check for active init on this column */
473 /* exiting is cpr safe */
474 if ((un->un_init_colcnt > 0) && (un->un_resync_index != -1)) {
475 md_unit_readerexit(ui);
476 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
478 * Decrement the raid resync count for cpr
480 mutex_enter(&md_cpr_resync.md_resync_mutex);
481 md_cpr_resync.md_raid_resync--;
482 mutex_exit(&md_cpr_resync.md_resync_mutex);
483 thread_exit();
486 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_START, SVM_TAG_METADEVICE, setno,
487 MD_SID(un));
488 un->un_init_colcnt = 0;
489 un->un_init_iocnt = 0;
490 end_off = un->un_pwsize + (un->un_segsize * un->un_segsincolumn);
491 zerosize = (size_t)MIN((diskaddr_t)un->un_maxio, end_off);
493 /* allocate zero-filled buffer */
494 zero_addr = kmem_zalloc(dbtob(zerosize), KM_SLEEP);
496 for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
497 if (un->un_column[ix].un_devstate != RCS_INIT)
498 continue;
499 /* allocate new column init structure */
500 cur = (raid_ci_t *)kmem_zalloc((sizeof (raid_ci_t)), KM_SLEEP);
501 ASSERT(cur != NULL);
502 un->un_init_colcnt++;
503 cur->ci_next = ci_chain;
504 ci_chain = cur;
505 cur->ci_un = un;
506 cur->ci_col = ix;
507 cur->ci_err = 0;
508 cur->ci_flag = COL_INITING;
509 cur->ci_zerosize = zerosize;
510 cur->ci_blkno = un->un_column[ix].un_pwstart;
511 cur->ci_lastblk = cur->ci_blkno + un->un_pwsize
512 + (un->un_segsize * un->un_segsincolumn);
513 /* initialize static buf fields */
514 cur->ci_buf.b_un.b_addr = zero_addr;
515 cur->ci_buf.b_chain = (buf_t *)cur;
516 cur->ci_buf.b_back = &cur->ci_buf;
517 cur->ci_buf.b_forw = &cur->ci_buf;
518 cur->ci_buf.b_iodone = init_col_int;
519 cur->ci_buf.b_flags = B_BUSY | B_WRITE;
520 cur->ci_buf.b_edev = md_dev64_to_dev(un->un_column[ix].un_dev);
521 sema_init(&cur->ci_buf.b_io, 0, NULL, SEMA_DEFAULT, NULL);
522 sema_init(&cur->ci_buf.b_sem, 0, NULL, SEMA_DEFAULT, NULL);
523 /* set address and length for I/O bufs */
524 cur->ci_buf.b_bufsize = dbtob(zerosize);
525 cur->ci_buf.b_bcount = dbtob(zerosize);
526 cur->ci_buf.b_lblkno = un->un_column[ix].un_pwstart;
527 cur->ci_buf.b_offset = -1;
529 if (! (un->un_column[ix].un_devflags & MD_RAID_DEV_ISOPEN)) {
530 md_dev64_t tmpdev = un->un_column[ix].un_dev;
532 * Open by device id
533 * If this column is hotspared then
534 * use the hotspare key
536 tmpdev = md_resolve_bydevid(mnum, tmpdev,
537 HOTSPARED(un, ix) ?
538 un->un_column[ix].un_hs_key :
539 un->un_column[ix].un_orig_key);
540 if ((cur->ci_err = md_layered_open(mnum, &tmpdev,
541 MD_OFLG_NULL)) == 0)
542 un->un_column[ix].un_devflags |=
543 MD_RAID_DEV_ISOPEN;
544 un->un_column[ix].un_dev = tmpdev;
546 if (cur->ci_err == 0)
547 md_call_strategy(&cur->ci_buf, MD_STR_NOTTOP, NULL);
550 md_unit_readerexit(ui);
551 state = un->un_state;
552 colcnt = un->un_init_colcnt;
553 mutex_enter(&un->un_mx);
554 while (colcnt) {
555 cv_wait(&un->un_cv, &un->un_mx);
557 colcnt = 0;
558 for (cur = ci_chain; cur != NULL; cur = cur->ci_next) {
559 col = cur->ci_col;
560 if ((cur->ci_flag != COL_INITING) || (cur->ci_err)) {
561 if (cur->ci_err)
562 err = cur->ci_err;
563 else if (cur->ci_flag == COL_INIT_DONE) {
564 (void) init_pw_area(un,
565 un->un_column[col].un_dev,
566 un->un_column[col].un_pwstart,
567 col);
568 cur->ci_flag = COL_READY;
570 } else {
571 colcnt++;
575 mutex_exit(&un->un_mx);
577 /* This prevents new opens */
578 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
579 (void) md_io_writerlock(ui);
580 un = (mr_unit_t *)md_unit_writerlock(ui);
581 while (ci_chain) {
582 cur = ci_chain;
584 /* take this element out of the chain */
585 ci_chain = cur->ci_next;
586 /* free this element */
587 sema_destroy(&cur->ci_buf.b_io);
588 sema_destroy(&cur->ci_buf.b_sem);
589 if (cur->ci_err)
590 raid_set_state(cur->ci_un, cur->ci_col,
591 RCS_INIT_ERRED, 0);
592 else
593 raid_set_state(cur->ci_un, cur->ci_col,
594 RCS_OKAY, 0);
595 kmem_free(cur, sizeof (raid_ci_t));
598 /* free the zeroed buffer */
599 kmem_free(zero_addr, dbtob(zerosize));
601 /* determine new unit state */
602 if (err == 0) {
603 if (state == RUS_INIT)
604 un->un_state = RUS_OKAY;
605 else {
606 un->c.un_total_blocks = un->un_grow_tb;
607 md_nblocks_set(mnum, un->c.un_total_blocks);
608 un->un_grow_tb = 0;
609 if (raid_state_cnt(un, RCS_OKAY) ==
610 un->un_totalcolumncnt)
611 un->un_state = RUS_OKAY;
613 } else { /* error orcurred */
614 if (state & RUS_INIT)
615 un->un_state = RUS_DOI;
617 uniqtime32(&un->un_timestamp);
618 MD_STATUS(un) &= ~MD_UN_GROW_PENDING;
619 un->un_init_colcnt = 0;
620 un->un_init_iocnt = 0;
621 raid_commit(un, NULL);
622 md_unit_writerexit(ui);
623 (void) md_io_writerexit(ui);
624 rw_exit(&md_unit_array_rw.lock);
625 if (err) {
626 if (un->un_state & RUS_DOI) {
627 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FATAL,
628 SVM_TAG_METADEVICE, setno, MD_SID(un));
629 } else {
630 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FAILED,
631 SVM_TAG_METADEVICE, setno, MD_SID(un));
633 } else {
634 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_SUCCESS,
635 SVM_TAG_METADEVICE, setno, MD_SID(un));
637 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
639 * Decrement the raid resync count for cpr
641 mutex_enter(&md_cpr_resync.md_resync_mutex);
642 md_cpr_resync.md_raid_resync--;
643 mutex_exit(&md_cpr_resync.md_resync_mutex);
644 thread_exit();
645 /*NOTREACHED*/
648 static int
649 raid_init_unit(minor_t mnum, md_error_t *ep)
651 mdi_unit_t *ui;
652 mr_unit_t *un;
653 int rval, i;
654 set_t setno = MD_MIN2SET(mnum);
656 ui = MDI_UNIT(mnum);
657 if (md_get_setstatus(setno) & MD_SET_STALE)
658 return (mdmddberror(ep, MDE_DB_STALE, mnum, setno));
660 /* Don't start an init if the device is not available */
661 if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) {
662 return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
665 if (raid_internal_open(mnum, (FREAD | FWRITE),
666 OTYP_LYR, MD_OFLG_ISINIT)) {
667 rval = mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum);
668 goto out;
671 un = md_unit_readerlock(ui);
672 un->un_percent_done = 0;
673 md_unit_readerexit(ui);
674 /* start resync_unit thread */
675 (void) thread_create(NULL, 0, raid_init_columns,
676 (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri);
678 return (0);
680 out:
681 un = md_unit_writerlock(ui);
682 MD_STATUS(un) &= ~MD_UN_GROW_PENDING;
683 /* recover state */
684 for (i = 0; i < un->un_totalcolumncnt; i++)
685 if (COLUMN_STATE(un, i) == RCS_INIT)
686 raid_set_state(un, i, RCS_ERRED, 0);
687 if (un->un_state & RUS_INIT)
688 un->un_state = RUS_DOI;
689 raid_commit(un, NULL);
690 md_unit_writerexit(ui);
691 if (un->un_state & RUS_DOI) {
692 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FATAL,
693 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
694 } else {
695 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FAILED,
696 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
698 return (rval);
702 * NAME: raid_regen
704 * DESCRIPTION: regenerate all the parity on the raid device. This
705 * routine starts a thread that will regenerate the
706 * parity on a raid device. If an I/O error occurs during
707 * this process the entire device is placed in error.
709 * PARAMETERS: md_set_params_t *msp - ioctl packet
711 static void
712 regen_unit(minor_t mnum)
714 mdi_unit_t *ui = MDI_UNIT(mnum);
715 mr_unit_t *un = MD_UNIT(mnum);
716 buf_t buf, *bp;
717 caddr_t buffer;
718 int err = 0;
719 diskaddr_t total_segments;
720 diskaddr_t line;
721 size_t iosize;
724 * Increment raid resync count for cpr
726 mutex_enter(&md_cpr_resync.md_resync_mutex);
727 md_cpr_resync.md_raid_resync++;
728 mutex_exit(&md_cpr_resync.md_resync_mutex);
730 iosize = dbtob(un->un_segsize);
731 buffer = kmem_alloc(iosize, KM_SLEEP);
732 bp = &buf;
733 total_segments = un->un_segsincolumn;
734 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_START, SVM_TAG_METADEVICE,
735 MD_UN2SET(un), MD_SID(un));
736 un->un_percent_done = 0;
737 init_buf(bp, B_READ | B_BUSY, iosize);
739 for (line = 0; line < total_segments; line++) {
740 bp->b_lblkno = line *
741 ((un->un_origcolumncnt - 1) * un->un_segsize);
742 bp->b_un.b_addr = buffer;
743 bp->b_bcount = iosize;
744 bp->b_iodone = NULL;
746 * The following assignment is only correct because
747 * md_raid_strategy is fine when it's only a minor number
748 * and not a real dev_t. Yuck.
750 bp->b_edev = mnum;
751 md_raid_strategy(bp, MD_STR_NOTTOP, NULL);
752 if (biowait(bp)) {
753 err = 1;
754 break;
756 un->un_percent_done = (uint_t)((line * 1000) /
757 un->un_segsincolumn);
758 /* just to avoid rounding errors */
759 if (un->un_percent_done > 1000)
760 un->un_percent_done = 1000;
761 reset_buf(bp, B_READ | B_BUSY, iosize);
763 destroy_buf(bp);
764 kmem_free(buffer, iosize);
766 (void) md_io_writerlock(ui);
767 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
768 (void) md_io_writerexit(ui);
769 un = md_unit_writerlock(ui);
770 if (!err &&
771 (raid_state_cnt(un, RCS_OKAY) == un->un_totalcolumncnt))
772 un->un_state = RUS_OKAY;
773 raid_commit(un, NULL);
774 md_unit_writerexit(ui);
775 if (err ||
776 raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) {
777 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_FAILED,
778 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
779 } else {
780 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_DONE, SVM_TAG_METADEVICE,
781 MD_UN2SET(un), MD_SID(un));
785 * Decrement the raid resync count for cpr
787 mutex_enter(&md_cpr_resync.md_resync_mutex);
788 md_cpr_resync.md_raid_resync--;
789 mutex_exit(&md_cpr_resync.md_resync_mutex);
790 thread_exit();
793 static int
794 raid_regen_unit(minor_t mnum, md_error_t *ep)
796 mdi_unit_t *ui;
797 mr_unit_t *un;
798 int i;
799 set_t setno = MD_MIN2SET(mnum);
801 ui = MDI_UNIT(mnum);
802 un = (mr_unit_t *)MD_UNIT(mnum);
804 if (md_get_setstatus(setno) & MD_SET_STALE)
805 return (mdmddberror(ep, MDE_DB_STALE, mnum, setno));
807 /* Don't start a regen if the device is not available */
808 if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) {
809 return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
812 if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0)) {
813 (void) md_unit_writerlock(ui);
814 for (i = 0; i < un->un_totalcolumncnt; i++)
815 raid_set_state(un, i, RCS_ERRED, 0);
816 md_unit_writerexit(ui);
817 return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
820 /* start resync_unit thread */
821 (void) thread_create(NULL, 0, regen_unit,
822 (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri);
824 return (0);
827 static int
828 raid_regen(md_regen_param_t *mrp, IOLOCK *lock)
830 minor_t mnum = mrp->mnum;
831 mr_unit_t *un;
833 mdclrerror(&mrp->mde);
835 un = md_unit_readerlock(MDI_UNIT(mnum));
837 if (MD_STATUS(un) & MD_UN_GROW_PENDING) {
838 md_unit_readerexit(MDI_UNIT(mnum));
839 return (mdmderror(&mrp->mde, MDE_IN_USE, mnum));
842 if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) ||
843 (raid_state_cnt(un, RCS_RESYNC))) {
844 md_unit_readerexit(MDI_UNIT(mnum));
845 return (mdmderror(&mrp->mde, MDE_RESYNC_ACTIVE, mnum));
848 if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT)) {
849 md_unit_readerexit(MDI_UNIT(mnum));
850 return (mdmderror(&mrp->mde, MDE_IN_USE, mnum));
853 if ((raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) ||
854 (! (un->un_state & RUS_OKAY))) {
855 md_unit_readerexit(MDI_UNIT(mnum));
856 return (mdmderror(&mrp->mde, MDE_RAID_NOT_OKAY, mnum));
859 md_unit_readerexit(MDI_UNIT(mnum));
861 /* get locks and recheck to be sure something did not change */
862 if ((un = raid_getun(mnum, &mrp->mde, WRITERS, lock)) == NULL)
863 return (0);
865 if ((raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) ||
866 (! (un->un_state & RUS_OKAY))) {
867 return (mdmderror(&mrp->mde, MDE_RAID_NOT_OKAY, mnum));
870 raid_set_state(un, 0, RCS_REGEN, 0);
871 raid_commit(un, NULL);
872 md_ioctl_droplocks(lock);
873 return (raid_regen_unit(mnum, &mrp->mde));
877 * NAME: raid_set
878 * DESCRIPTION: used to create a RAID metadevice
879 * PARAMETERS: md_set_params_t *d - pointer to set data structure
880 * int mode - must be FWRITE
882 * LOCKS: none
885 static int
886 raid_set(void *d, int mode)
888 minor_t mnum;
889 mr_unit_t *un;
890 mddb_recid_t mr_recid;
891 mddb_recid_t *recids;
892 mddb_type_t typ1;
893 int err;
894 set_t setno;
895 int num_recs;
896 int rid;
897 int col;
898 md_set_params_t *msp = d;
901 mnum = msp->mnum;
902 setno = MD_MIN2SET(mnum);
904 mdclrerror(&msp->mde);
906 if (raid_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL)
907 return (0);
909 typ1 = (mddb_type_t)md_getshared_key(setno,
910 raid_md_ops.md_driver.md_drivername);
912 /* create the db record for this mdstruct */
914 if (msp->options & MD_CRO_64BIT) {
915 #if defined(_ILP32)
916 return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum));
917 #else
918 mr_recid = mddb_createrec(msp->size, typ1, 0,
919 MD_CRO_64BIT | MD_CRO_RAID | MD_CRO_FN, setno);
920 #endif
921 } else {
922 mr_recid = mddb_createrec(msp->size, typ1, 0,
923 MD_CRO_32BIT | MD_CRO_RAID | MD_CRO_FN, setno);
926 if (mr_recid < 0)
927 return (mddbstatus2error(&msp->mde,
928 (int)mr_recid, mnum, setno));
930 /* get the address of the mdstruct */
931 un = (mr_unit_t *)mddb_getrecaddr(mr_recid);
933 * It is okay that we muck with the mdstruct here,
934 * since no one else will know about the mdstruct
935 * until we commit it. If we crash, the record will
936 * be automatically purged, since we haven't
937 * committed it yet.
940 /* copy in the user's mdstruct */
941 if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un,
942 msp->size, mode)) {
943 mddb_deleterec_wrapper(mr_recid);
944 return (EFAULT);
946 /* All 64 bit metadevices only support EFI labels. */
947 if (msp->options & MD_CRO_64BIT) {
948 un->c.un_flag |= MD_EFILABEL;
952 * allocate the real recids array. since we may have to commit
953 * underlying metadevice records, we need an array of size:
954 * total number of components in raid + 3 (1 for the raid itself,
955 * one for the hotspare, one for the end marker).
957 num_recs = un->un_totalcolumncnt + 3;
958 rid = 0;
959 recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
960 recids[rid++] = mr_recid;
962 MD_SID(un) = mnum;
963 MD_RECID(un) = recids[0];
964 MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SP;
965 MD_PARENT(un) = MD_NO_PARENT;
966 un->un_resync_copysize = 0;
967 un->c.un_revision |= MD_FN_META_DEV;
969 if (UNIT_STATE(un) == RUS_INIT)
970 MD_STATUS(un) |= MD_UN_GROW_PENDING;
972 if ((UNIT_STATE(un) != RUS_INIT) && raid_check_pw(un)) {
973 mddb_deleterec_wrapper(mr_recid);
974 err = mderror(&msp->mde, MDE_RAID_INVALID);
975 goto out;
978 if (err = raid_build_incore(un, 0)) {
979 if (un->mr_ic) {
980 kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) *
981 un->un_totalcolumncnt);
982 kmem_free(un->mr_ic, sizeof (*un->mr_ic));
985 md_nblocks_set(mnum, -1ULL);
986 MD_UNIT(mnum) = NULL;
988 mddb_deleterec_wrapper(mr_recid);
989 goto out;
993 * Update unit availability
995 md_set[setno].s_un_avail--;
997 recids[rid] = 0;
998 if (un->un_hsp_id != -1) {
999 /* increment the reference count of the hot spare pool */
1000 err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0,
1001 &recids[rid], NULL, NULL, NULL);
1002 if (err) {
1003 md_nblocks_set(mnum, -1ULL);
1004 MD_UNIT(mnum) = NULL;
1006 mddb_deleterec_wrapper(mr_recid);
1007 goto out;
1009 rid++;
1013 * set the parent on any metadevice components.
1014 * NOTE: currently soft partitions are the only metadevices
1015 * which can appear within a RAID metadevice.
1017 for (col = 0; col < un->un_totalcolumncnt; col++) {
1018 mr_column_t *mr_col = &un->un_column[col];
1019 md_unit_t *comp_un;
1021 if (md_getmajor(mr_col->un_dev) == md_major) {
1022 comp_un = MD_UNIT(md_getminor(mr_col->un_dev));
1023 recids[rid++] = MD_RECID(comp_un);
1024 md_set_parent(mr_col->un_dev, MD_SID(un));
1028 /* set the end marker */
1029 recids[rid] = 0;
1031 mddb_commitrecs_wrapper(recids);
1032 md_create_unit_incore(mnum, &raid_md_ops, 1);
1034 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno,
1035 MD_SID(un));
1037 out:
1038 kmem_free(recids, (num_recs * sizeof (mddb_recid_t)));
1039 if (err)
1040 return (err);
1042 /* only attempt to init a device that is in the init state */
1043 if (UNIT_STATE(un) != RUS_INIT)
1044 return (0);
1046 return (raid_init_unit(mnum, &msp->mde));
1050 * NAME: raid_get
1051 * DESCRIPTION: used to get the unit structure of a RAID metadevice
1052 * PARAMETERS: md_i_get_t *migp - pointer to get data structure
1053 * int mode - must be FREAD
1054 * IOLOCK *lock - pointer to IOCTL lock
1056 * LOCKS: obtains unit reader lock via IOLOCK
1059 static int
1060 raid_get(
1061 void *migp,
1062 int mode,
1063 IOLOCK *lock
1066 minor_t mnum;
1067 mr_unit_t *un;
1068 md_i_get_t *migph = migp;
1071 mnum = migph->id;
1073 mdclrerror(&migph->mde);
1075 if ((un = raid_getun(mnum, &migph->mde,
1076 RD_LOCK, lock)) == NULL)
1077 return (0);
1079 if (migph->size == 0) {
1080 migph->size = un->c.un_size;
1081 return (0);
1084 if (migph->size < un->c.un_size) {
1085 return (EFAULT);
1087 if (ddi_copyout(un, (void *)(uintptr_t)migph->mdp,
1088 un->c.un_size, mode))
1089 return (EFAULT);
1091 return (0);
1096 * NAME: raid_replace
1097 * DESCRIPTION: used to replace a component of a RAID metadevice
1098 * PARAMETERS: replace_params_t *mrp - pointer to replace data structure
1099 * IOLOCK *lock - pointer to IOCTL lock
1101 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun),
1102 * obtains and releases md_unit_array_rw write lock
1105 static int
1106 raid_replace(
1107 replace_params_t *mrp,
1108 IOLOCK *lock
1111 minor_t mnum = mrp->mnum;
1112 md_dev64_t odev = mrp->old_dev;
1113 md_error_t *ep = &mrp->mde;
1114 mr_unit_t *un;
1115 rcs_state_t state;
1116 int ix, col = -1;
1117 int force = 0;
1118 int err = 0;
1119 replace_cmd_t cmd;
1120 set_t setno;
1121 side_t side;
1122 mdkey_t devkey;
1123 int nkeys;
1124 mddb_recid_t extra_recids[3] = { 0, 0, 0 };
1125 int extra_rids = 0;
1126 md_error_t mde = mdnullerror;
1127 sv_dev_t sv = {MD_SET_BAD, MD_SIDEWILD, MD_KEYWILD};
1129 mdclrerror(ep);
1130 setno = MD_MIN2SET(mnum);
1131 side = mddb_getsidenum(setno);
1133 un = md_unit_readerlock(MDI_UNIT(mnum));
1135 if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) ||
1136 (raid_state_cnt(un, RCS_RESYNC) != 0)) {
1137 md_unit_readerexit(MDI_UNIT(mnum));
1138 return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum));
1141 if (un->un_state & RUS_DOI) {
1142 md_unit_readerexit(MDI_UNIT(mnum));
1143 return (mdmderror(ep, MDE_RAID_DOI, mnum));
1146 if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT) ||
1147 (MD_STATUS(un) & MD_UN_GROW_PENDING)) {
1148 md_unit_readerexit(MDI_UNIT(mnum));
1149 return (mdmderror(ep, MDE_IN_USE, mnum));
1152 md_unit_readerexit(MDI_UNIT(mnum));
1154 /* get locks and recheck to be sure something did not change */
1155 if ((un = raid_getun(mnum, ep, WRITERS, lock)) == NULL)
1156 return (0);
1158 if (md_getkeyfromdev(setno, side, odev, &devkey, &nkeys) != 0) {
1159 return (mddeverror(ep, MDE_NAME_SPACE, odev));
1162 for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
1163 md_dev64_t tmpdevt = un->un_column[ix].un_orig_dev;
1165 * Try to resolve devt again if NODEV64
1167 if (tmpdevt == NODEV64) {
1168 tmpdevt = md_resolve_bydevid(mnum, tmpdevt,
1169 un->un_column[ix].un_orig_key);
1170 un->un_column[ix].un_orig_dev = tmpdevt;
1173 if (un->un_column[ix].un_orig_dev == odev) {
1174 col = ix;
1175 break;
1176 } else {
1177 if (un->un_column[ix].un_orig_dev == NODEV64) {
1179 * Now we use the keys to match.
1180 * If no key found, continue.
1182 if (nkeys == 0) {
1183 continue;
1185 if (un->un_column[ix].un_orig_key == devkey) {
1186 if (nkeys > 1)
1187 return (mddeverror(ep,
1188 MDE_MULTNM, odev));
1189 col = ix;
1190 break;
1196 if (col == -1)
1197 return (mdcomperror(ep, MDE_CANT_FIND_COMP,
1198 mnum, odev));
1200 if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) ||
1201 (raid_state_cnt(un, RCS_RESYNC) != 0))
1202 return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum));
1204 if (un->un_state & RUS_DOI)
1205 return (mdcomperror(ep, MDE_REPL_INVAL_STATE, mnum,
1206 un->un_column[col].un_dev));
1208 if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT) ||
1209 (MD_STATUS(un) & MD_UN_GROW_PENDING))
1210 return (mdmderror(ep, MDE_IN_USE, mnum));
1212 if ((mrp->cmd == FORCE_ENABLE_COMP) || (mrp->cmd == FORCE_REPLACE_COMP))
1213 force = 1;
1214 if ((mrp->cmd == FORCE_ENABLE_COMP) || (mrp->cmd == ENABLE_COMP))
1215 cmd = ENABLE_COMP;
1216 if ((mrp->cmd == FORCE_REPLACE_COMP) || (mrp->cmd == REPLACE_COMP))
1217 cmd = REPLACE_COMP;
1219 if (un->un_state == RUS_LAST_ERRED) {
1220 /* Must use -f force flag for unit in LAST_ERRED state */
1221 if (!force)
1222 return (mdmderror(ep, MDE_RAID_NEED_FORCE, mnum));
1224 /* Must use -f force flag on ERRED column first */
1225 if (un->un_column[col].un_devstate != RCS_ERRED) {
1226 for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
1227 if (un->un_column[ix].un_devstate & RCS_ERRED)
1228 return (mdcomperror(ep,
1229 MDE_RAID_COMP_ERRED, mnum,
1230 un->un_column[ix].un_dev));
1234 /* must use -f force flag on LAST_ERRED columns next */
1235 if ((un->un_column[col].un_devstate != RCS_LAST_ERRED) &&
1236 (un->un_column[col].un_devstate != RCS_ERRED))
1237 return (mdcomperror(ep, MDE_RAID_COMP_ERRED,
1238 mnum, un->un_column[col].un_dev));
1241 if (un->un_state == RUS_ERRED) {
1242 if (! (un->un_column[col].un_devstate &
1243 (RCS_ERRED | RCS_INIT_ERRED)))
1244 return (mdcomperror(ep, MDE_RAID_COMP_ERRED,
1245 mnum, un->un_column[ix].un_dev));
1248 ASSERT(!(un->un_column[col].un_devflags & MD_RAID_ALT_ISOPEN));
1249 ASSERT(!(un->un_column[col].un_devflags & MD_RAID_WRITE_ALT));
1251 state = un->un_column[col].un_devstate;
1252 if (state & RCS_INIT_ERRED) {
1253 MD_STATUS(un) |= MD_UN_GROW_PENDING;
1254 un->un_percent_done = 0;
1255 raid_set_state(un, col, RCS_INIT, 0);
1256 } else if (((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0) &&
1257 resync_request(mnum, col, 0, ep))
1258 return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum));
1261 if (cmd == REPLACE_COMP) {
1262 md_dev64_t tmpdev = mrp->new_dev;
1265 * open the device by device id
1267 tmpdev = md_resolve_bydevid(mnum, tmpdev, mrp->new_key);
1268 if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) {
1269 return (mdcomperror(ep, MDE_COMP_OPEN_ERR, mnum,
1270 tmpdev));
1274 * If it's a metadevice, make sure it gets reparented
1276 if (md_getmajor(tmpdev) == md_major) {
1277 minor_t new_mnum = md_getminor(tmpdev);
1278 md_unit_t *new_un = MD_UNIT(new_mnum);
1280 md_set_parent(tmpdev, MD_SID(un));
1281 extra_recids[extra_rids++] = MD_RECID(new_un);
1284 mrp->new_dev = tmpdev;
1285 un->un_column[col].un_orig_dev = tmpdev;
1286 un->un_column[col].un_orig_key = mrp->new_key;
1287 un->un_column[col].un_orig_pwstart = mrp->start_blk;
1288 un->un_column[col].un_orig_devstart =
1289 mrp->start_blk + un->un_pwsize;
1292 * If the old device was a metadevice, make sure to
1293 * reset its parent.
1295 if (md_getmajor(odev) == md_major) {
1296 minor_t old_mnum = md_getminor(odev);
1297 md_unit_t *old_un = MD_UNIT(old_mnum);
1299 md_reset_parent(odev);
1300 extra_recids[extra_rids++] =
1301 MD_RECID(old_un);
1304 if (HOTSPARED(un, col)) {
1305 md_layered_close(mrp->new_dev, MD_OFLG_NULL);
1306 un->un_column[col].un_alt_dev = mrp->new_dev;
1307 un->un_column[col].un_alt_pwstart = mrp->start_blk;
1308 un->un_column[col].un_alt_devstart =
1309 mrp->start_blk + un->un_pwsize;
1310 un->un_column[col].un_devflags |= MD_RAID_COPY_RESYNC;
1311 } else {
1313 * not hot spared. Close the old device and
1314 * move the new device in.
1316 if (un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN)
1317 md_layered_close(odev, MD_OFLG_NULL);
1318 un->un_column[col].un_devflags |= MD_RAID_DEV_ISOPEN;
1319 un->un_column[col].un_dev = mrp->new_dev;
1320 un->un_column[col].un_pwstart = mrp->start_blk;
1321 un->un_column[col].un_devstart =
1322 mrp->start_blk + un->un_pwsize;
1323 if ((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0) {
1324 un->un_column[col].un_devflags |=
1325 MD_RAID_REGEN_RESYNC;
1329 * If the old device is not a metadevice then
1330 * save off the set number and key so that it
1331 * can be removed from the namespace later.
1333 if (md_getmajor(odev) != md_major) {
1334 sv.setno = setno;
1335 sv.key = devkey;
1339 if (cmd == ENABLE_COMP) {
1340 md_dev64_t tmpdev = un->un_column[col].un_orig_dev;
1341 mdkey_t raidkey = un->un_column[col].un_orig_key;
1344 * We trust the dev_t because we cannot determine the
1345 * dev_t from the device id since a new disk is in the
1346 * same location. Since this is a call from metareplace -e dx
1347 * AND it is SCSI a new dev_t is not generated. So the
1348 * dev_t from the mddb is used. Before enabling the device
1349 * we check to make sure that multiple entries for the same
1350 * device does not exist in the namespace. If they do we
1351 * fail the ioctl.
1352 * One of the many ways multiple entries in the name space
1353 * can occur is if one removed the failed component in a
1354 * RAID metadevice and put another disk that was part of
1355 * another metadevice. After reboot metadevadm would correctly
1356 * update the device name for the metadevice whose component
1357 * has moved. However now in the metadb there are two entries
1358 * for the same name (ctds) that belong to different
1359 * metadevices. One is valid, the other is a ghost or "last
1360 * know as" ctds.
1362 tmpdev = md_resolve_bydevid(mnum, tmpdev, raidkey);
1363 if (tmpdev == NODEV64)
1364 tmpdev = md_getdevnum(setno, side, raidkey,
1365 MD_TRUST_DEVT);
1367 * check for multiple entries in namespace for the
1368 * same dev
1371 if (md_getkeyfromdev(setno, side, tmpdev, &devkey,
1372 &nkeys) != 0)
1373 return (mddeverror(ep, MDE_NAME_SPACE, tmpdev));
1375 * If number of keys are greater that
1376 * 1, then we have an invalid
1377 * namespace. STOP and return.
1379 if (nkeys > 1)
1380 return (mddeverror(ep, MDE_MULTNM, tmpdev));
1381 if (devkey != raidkey)
1382 return (mdcomperror(ep, MDE_CANT_FIND_COMP,
1383 mnum, tmpdev));
1385 if (un->un_column[col].un_orig_dev == NODEV64)
1386 un->un_column[col].un_orig_dev = tmpdev;
1388 if (HOTSPARED(un, col)) {
1389 un->un_column[col].un_alt_dev =
1390 un->un_column[col].un_orig_dev;
1391 un->un_column[col].un_alt_pwstart =
1392 un->un_column[col].un_orig_pwstart;
1393 un->un_column[col].un_alt_devstart =
1394 un->un_column[col].un_orig_devstart;
1395 un->un_column[col].un_devflags |= MD_RAID_COPY_RESYNC;
1396 } else {
1397 if (!(un->un_column[col].un_devflags &
1398 MD_RAID_DEV_ISOPEN)) {
1399 if (md_layered_open(mnum, &tmpdev,
1400 MD_OFLG_NULL)) {
1401 un->un_column[col].un_dev = tmpdev;
1402 return (mdcomperror(ep,
1403 MDE_COMP_OPEN_ERR, mnum, tmpdev));
1405 ASSERT(tmpdev != NODEV64 &&
1406 tmpdev != 0);
1408 if ((md_getmajor(tmpdev) != md_major) &&
1409 (md_devid_found(setno, side, raidkey)
1410 == 1)) {
1411 if (md_update_namespace_did(setno, side,
1412 raidkey, &mde) != 0) {
1413 cmn_err(CE_WARN,
1414 "md: could not"
1415 " update namespace\n");
1418 un->un_column[col].un_dev =
1419 un->un_column[col].un_orig_dev;
1421 un->un_column[col].un_devflags |= MD_RAID_DEV_ISOPEN;
1422 un->un_column[col].un_devflags |= MD_RAID_REGEN_RESYNC;
1425 if (mrp->has_label) {
1426 un->un_column[col].un_devflags |= MD_RAID_HAS_LABEL;
1427 } else {
1428 un->un_column[col].un_devflags &= ~MD_RAID_HAS_LABEL;
1431 raid_commit(un, extra_recids);
1433 /* If the component has been replaced - clean up the name space */
1434 if (sv.setno != MD_SET_BAD) {
1435 md_rem_names(&sv, 1);
1438 md_ioctl_droplocks(lock);
1440 if ((cmd == ENABLE_COMP) || (cmd == FORCE_ENABLE_COMP)) {
1441 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE,
1442 setno, MD_SID(un));
1443 } else {
1444 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
1445 setno, MD_SID(un));
1448 if (un->un_column[col].un_devstate & RCS_INIT)
1449 err = raid_init_unit(mnum, ep);
1450 else if ((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0)
1451 err = raid_resync_unit(mnum, ep);
1453 mdclrerror(ep);
1454 if (!err)
1455 return (0);
1457 /* be sure state */
1458 /* is already set by this time */
1459 /* fix state and commit record */
1460 un = md_unit_writerlock(MDI_UNIT(mnum));
1461 if (state & RCS_INIT_ERRED)
1462 raid_set_state(un, col, state, 1);
1463 else if (state & RCS_OKAY)
1464 raid_set_state(un, col, RCS_ERRED, 0);
1465 else
1466 raid_set_state(un, col, state, 1);
1467 raid_commit(un, NULL);
1468 md_unit_writerexit(MDI_UNIT(mnum));
1469 mdclrerror(ep);
1470 return (0);
1475 * NAME: raid_set_sync
1476 * DESCRIPTION: used to sync a component of a RAID metadevice
1477 * PARAMETERS: md_resync_ioctl_t *mrp - pointer to resync data structure
1478 * int mode - must be FWRITE
1479 * IOLOCK *lock - pointer to IOCTL lock
1481 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun),
1482 * obtains and releases md_unit_array_rw write lock
1485 static int
1486 raid_set_sync(
1487 md_resync_ioctl_t *rip,
1488 IOLOCK *lock
1491 minor_t mnum = rip->ri_mnum;
1492 mr_unit_t *un;
1493 int init = 0;
1494 int resync = 0;
1495 int regen = 0;
1496 int ix;
1497 int err;
1499 mdclrerror(&rip->mde);
1501 if ((un = raid_getun(mnum, &rip->mde, WRITERS, lock)) == NULL)
1502 return (0);
1504 if (un->un_state & RUS_DOI)
1505 return (mdmderror(&rip->mde, MDE_RAID_DOI, mnum));
1507 if (un->c.un_status & MD_UN_RESYNC_ACTIVE)
1508 return (mdmderror(&rip->mde, MDE_RESYNC_ACTIVE, mnum));
1510 /* This prevents new opens */
1512 rip->ri_flags = 0;
1513 if (un->un_state & RUS_REGEN)
1514 regen++;
1516 if (raid_state_cnt(un, RCS_RESYNC))
1517 resync++;
1519 if (raid_state_cnt(un, RCS_INIT) || (un->un_state & RUS_INIT))
1520 init++;
1522 ASSERT(!(resync && init && regen));
1523 md_ioctl_droplocks(lock);
1524 rip->ri_percent_done = 0;
1526 if (init) {
1527 MD_STATUS(un) |= MD_UN_GROW_PENDING;
1528 return (raid_init_unit(mnum, &rip->mde));
1532 * If resync is needed, it will call raid_internal_open forcing
1533 * replay before the open completes.
1534 * Otherwise, call raid_internal_open directly to force
1535 * replay to complete during boot (metasync -r).
1536 * NOTE: the unit writer lock must remain held while setting
1537 * MD_UN_RESYNC_ACTIVE but must be released before
1538 * calling raid_resync_unit or raid_internal_open.
1540 if (resync) {
1541 ASSERT(resync < 2);
1542 un = md_unit_writerlock(MDI_UNIT(mnum));
1543 MD_STATUS(un) |= MD_UN_RESYNC_ACTIVE;
1544 /* Must release unit writer lock for resync */
1546 * correctly setup the devices before trying to start the
1547 * resync operation.
1549 for (ix = 0; un->un_totalcolumncnt; ix++) {
1550 if (un->un_column[ix].un_devstate & RCS_RESYNC) {
1551 if ((un->un_column[ix].un_devflags &
1552 MD_RAID_COPY_RESYNC) &&
1553 HOTSPARED(un, ix)) {
1554 un->un_column[ix].un_alt_dev =
1555 un->un_column[ix].un_orig_dev;
1556 un->un_column[ix].un_alt_devstart =
1557 un->un_column[ix].un_orig_devstart;
1558 un->un_column[ix].un_alt_pwstart =
1559 un->un_column[ix].un_orig_pwstart;
1561 break;
1564 ASSERT(un->un_column[ix].un_devflags &
1565 (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC));
1566 rip->ri_percent_done = 0;
1567 un->un_column[ix].un_devflags |= MD_RAID_RESYNC;
1568 (void) resync_request(mnum, ix, 0, NULL);
1569 md_unit_writerexit(MDI_UNIT(mnum));
1570 err = raid_resync_unit(mnum, &rip->mde);
1571 return (err);
1574 if (regen) {
1575 err = raid_regen_unit(mnum, &rip->mde);
1576 return (err);
1579 /* The unit requires not work so just force replay of the device */
1580 if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0))
1581 return (mdmderror(&rip->mde,
1582 MDE_RAID_OPEN_FAILURE, mnum));
1583 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
1585 return (0);
1589 * NAME: raid_get_resync
1590 * DESCRIPTION: used to check resync status on a component of a RAID metadevice
1591 * PARAMETERS: md_resync_ioctl_t *mrp - pointer to resync data structure
1592 * int mode - must be FWRITE
1593 * IOLOCK *lock - pointer to IOCTL lock
1595 * LOCKS: none
1598 static int
1599 raid_get_resync(
1600 md_resync_ioctl_t *rip,
1601 IOLOCK *lock
1604 minor_t mnum = rip->ri_mnum;
1605 mr_unit_t *un;
1606 u_longlong_t percent;
1607 int cnt;
1608 int ix;
1609 uint64_t d;
1611 mdclrerror(&rip->mde);
1613 if ((un = raid_getun(mnum, &rip->mde, RD_LOCK, lock)) == NULL)
1614 return (0);
1616 rip->ri_flags = 0;
1617 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1618 d = un->un_segsincolumn;
1619 percent = d ? ((1000 * un->un_resync_line_index) / d) : 0;
1620 if (percent > 1000)
1621 percent = 1000; /* can't go over 100% */
1622 rip->ri_percent_done = (int)percent;
1623 rip->ri_flags |= MD_RI_INPROGRESS;
1626 if (UNIT_STATE(un) & RUS_INIT) {
1627 d = un->un_segsize * un->un_segsincolumn *
1628 un->un_totalcolumncnt;
1629 percent =
1630 d ? ((1000 * (u_longlong_t)un->un_init_iocnt) / d) : 0;
1631 if (percent > 1000)
1632 percent = 1000; /* can't go over 100% */
1633 rip->ri_percent_done = (int)percent;
1634 rip->ri_flags |= MD_GROW_INPROGRESS;
1635 } else if (MD_STATUS(un) & MD_UN_GROW_PENDING) {
1636 d = un->un_segsize * un->un_segsincolumn * un->un_init_colcnt;
1637 percent =
1638 d ? (((u_longlong_t)un->un_init_iocnt * 1000) / d) : 0;
1639 if (percent > 1000)
1640 percent = 1000;
1641 rip->ri_percent_done = (int)percent;
1642 rip->ri_flags |= MD_GROW_INPROGRESS;
1645 if (un->un_state & RUS_REGEN)
1646 rip->ri_percent_done = un->un_percent_done;
1648 cnt = 0;
1649 for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
1650 switch (un->un_column[ix].un_devstate) {
1651 case RCS_INIT:
1652 case RCS_ERRED:
1653 case RCS_LAST_ERRED:
1654 cnt++;
1655 break;
1656 default:
1657 break;
1660 d = un->un_totalcolumncnt;
1661 rip->ri_percent_dirty = d ? (((u_longlong_t)cnt * 100) / d) : 0;
1662 return (0);
1666 * NAME: raid_grow
1667 * DESCRIPTION: Concatenate to a RAID metadevice
1668 * PARAMETERS: md_grow_params_t *mgp
1669 * - pointer to IOCGROW data structure
1670 * int mode - must be FWRITE
1671 * IOLOCK *lockp - IOCTL read/write and unit_array_rw lock
1673 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun),
1674 * obtains and releases md_unit_array_rw write lock
1677 static int
1678 raid_grow(void *mgp, int mode, IOLOCK *lock)
1680 minor_t mnum;
1681 mr_unit_t *un, *new_un;
1682 mdi_unit_t *ui;
1683 mddb_type_t typ1;
1684 mddb_recid_t mr_recid;
1685 mddb_recid_t old_vtoc = 0;
1686 mddb_recid_t *recids;
1687 md_create_rec_option_t options;
1688 int err;
1689 int col, i;
1690 int64_t tb, atb;
1691 u_longlong_t unrev;
1692 int tc;
1693 int rval = 0;
1694 set_t setno;
1695 mr_column_ic_t *mrc;
1696 int num_recs, rid;
1697 md_grow_params_t *mgph = mgp;
1700 mnum = mgph->mnum;
1702 mdclrerror(&mgph->mde);
1704 ui = MDI_UNIT(mnum);
1705 un = md_unit_readerlock(ui);
1707 if (MD_STATUS(un) & MD_UN_GROW_PENDING) {
1708 md_unit_readerexit(ui);
1709 return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1712 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1713 md_unit_readerexit(ui);
1714 return (mdmderror(&mgph->mde, MDE_RESYNC_ACTIVE, mnum));
1717 if (UNIT_STATE(un) & RUS_LAST_ERRED) {
1718 md_unit_readerexit(ui);
1719 return (mdmderror(&mgph->mde, MDE_RAID_LAST_ERRED, mnum));
1722 if (UNIT_STATE(un) & RUS_DOI) {
1723 md_unit_readerexit(ui);
1724 return (mdmderror(&mgph->mde, MDE_RAID_DOI, mnum));
1727 if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT)) {
1728 md_unit_readerexit(ui);
1729 return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1732 md_unit_readerexit(ui);
1734 if ((un = raid_getun(mnum, &mgph->mde, WRITERS, lock)) ==
1735 NULL)
1736 return (0);
1738 if (MD_STATUS(un) & MD_UN_GROW_PENDING)
1739 return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1741 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE)
1742 return (mdmderror(&mgph->mde, MDE_RESYNC_ACTIVE, mnum));
1744 if (un->c.un_size >= mgph->size)
1745 return (EINVAL);
1747 if (UNIT_STATE(un) & RUS_LAST_ERRED)
1748 return (mdmderror(&mgph->mde, MDE_RAID_LAST_ERRED, mnum));
1750 if (UNIT_STATE(un) & RUS_DOI)
1751 return (mdmderror(&mgph->mde, MDE_RAID_DOI, mnum));
1753 if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT))
1754 return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1756 setno = MD_MIN2SET(mnum);
1758 typ1 = (mddb_type_t)md_getshared_key(setno,
1759 raid_md_ops.md_driver.md_drivername);
1762 * Preserve the friendly name nature of the device that is
1763 * growing.
1765 options = MD_CRO_RAID;
1766 if (un->c.un_revision & MD_FN_META_DEV)
1767 options |= MD_CRO_FN;
1768 if (mgph->options & MD_CRO_64BIT) {
1769 #if defined(_ILP32)
1770 return (mdmderror(&mgph->mde, MDE_UNIT_TOO_LARGE, mnum));
1771 #else
1772 mr_recid = mddb_createrec(mgph->size, typ1, 0,
1773 MD_CRO_64BIT | options, setno);
1774 #endif
1775 } else {
1776 mr_recid = mddb_createrec(mgph->size, typ1, 0,
1777 MD_CRO_32BIT | options, setno);
1779 if (mr_recid < 0) {
1780 rval = mddbstatus2error(&mgph->mde, (int)mr_recid,
1781 mnum, setno);
1782 return (rval);
1785 /* get the address of the new unit */
1786 new_un = (mr_unit_t *)mddb_getrecaddr(mr_recid);
1789 * It is okay that we muck with the new unit here,
1790 * since no one else will know about the unit struct
1791 * until we commit it. If we crash, the record will
1792 * be automatically purged, since we haven't
1793 * committed it yet and the old unit struct will be found.
1796 /* copy in the user's unit struct */
1797 err = ddi_copyin((void *)(uintptr_t)mgph->mdp, new_un,
1798 mgph->size, mode);
1799 if (err) {
1800 mddb_deleterec_wrapper(mr_recid);
1801 return (EFAULT);
1804 /* make sure columns are being added */
1805 if (un->un_totalcolumncnt >= new_un->un_totalcolumncnt) {
1806 mddb_deleterec_wrapper(mr_recid);
1807 return (EINVAL);
1811 * Save a few of the new unit structs fields.
1812 * Before they get clobbered.
1814 tc = new_un->un_totalcolumncnt;
1815 tb = new_un->c.un_total_blocks;
1816 atb = new_un->c.un_actual_tb;
1817 unrev = new_un->c.un_revision;
1820 * Copy the old unit struct (static stuff)
1821 * into new unit struct
1823 bcopy((caddr_t)un, (caddr_t)new_un, un->c.un_size);
1826 * Restore a few of the new unit struct values.
1828 new_un->un_totalcolumncnt = tc;
1829 new_un->c.un_actual_tb = atb;
1830 new_un->un_grow_tb = tb;
1831 new_un->c.un_revision = unrev;
1832 new_un->c.un_record_id = mr_recid;
1833 new_un->c.un_size = mgph->size;
1835 ASSERT(new_un->mr_ic == un->mr_ic);
1838 * Save old column slots
1840 mrc = un->un_column_ic;
1843 * Allocate new column slot
1845 new_un->un_column_ic = (mr_column_ic_t *)
1846 kmem_zalloc(sizeof (mr_column_ic_t) * new_un->un_totalcolumncnt,
1847 KM_SLEEP);
1850 * Restore old column slots
1851 * Free the old column slots
1853 bcopy(mrc, new_un->un_column_ic,
1854 sizeof (mr_column_ic_t) * un->un_totalcolumncnt);
1855 kmem_free(mrc, sizeof (mr_column_ic_t) * un->un_totalcolumncnt);
1857 /* All 64 bit metadevices only support EFI labels. */
1858 if (mgph->options & MD_CRO_64BIT) {
1859 new_un->c.un_flag |= MD_EFILABEL;
1861 * If the device was previously smaller than a terabyte,
1862 * and had a vtoc record attached to it, we remove the
1863 * vtoc record, because the layout has changed completely.
1865 if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
1866 (un->c.un_vtoc_id != 0)) {
1867 old_vtoc = un->c.un_vtoc_id;
1868 new_un->c.un_vtoc_id =
1869 md_vtoc_to_efi_record(old_vtoc, setno);
1875 * allocate the real recids array. since we may have to commit
1876 * underlying metadevice records, we need an array of size:
1877 * total number of new components being attach + 2 (one for the
1878 * raid itself, one for the end marker).
1880 num_recs = new_un->un_totalcolumncnt + 2;
1881 rid = 0;
1882 recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
1883 recids[rid++] = mr_recid;
1885 for (col = un->un_totalcolumncnt;
1886 (col < new_un->un_totalcolumncnt); col++) {
1887 mr_column_t *mr_col = &new_un->un_column[col];
1888 md_unit_t *comp_un;
1890 if (raid_build_pw_reservation(new_un, col) != 0) {
1891 /* release pwslots already allocated by grow */
1892 for (i = un->un_totalcolumncnt; i < col; i++) {
1893 raid_free_pw_reservation(new_un, i);
1895 kmem_free(new_un->un_column_ic,
1896 sizeof (mr_column_ic_t) *
1897 new_un->un_totalcolumncnt);
1898 kmem_free(new_un->mr_ic, sizeof (*un->mr_ic));
1899 kmem_free(recids, num_recs * sizeof (mddb_recid_t));
1900 mddb_deleterec_wrapper(mr_recid);
1901 return (EINVAL);
1904 * set parent on metadevices being added.
1905 * NOTE: currently soft partitions are the only metadevices
1906 * which can appear within a RAID metadevice.
1908 if (md_getmajor(mr_col->un_dev) == md_major) {
1909 comp_un = MD_UNIT(md_getminor(mr_col->un_dev));
1910 recids[rid++] = MD_RECID(comp_un);
1911 md_set_parent(mr_col->un_dev, MD_SID(new_un));
1913 new_un->un_column[col].un_devflags = 0;
1916 /* set end marker */
1917 recids[rid] = 0;
1919 /* commit new unit struct */
1920 mddb_commitrecs_wrapper(recids);
1922 /* delete old unit struct */
1923 mddb_deleterec_wrapper(un->c.un_record_id);
1925 /* place new unit in in-core array */
1926 md_nblocks_set(mnum, new_un->c.un_total_blocks);
1927 MD_UNIT(mnum) = new_un;
1930 * If old_vtoc has a non zero value, we know:
1931 * - This unit crossed the border from smaller to larger one TB
1932 * - There was a vtoc record for the unit,
1933 * - This vtoc record is no longer needed, because
1934 * a new efi record has been created for this un.
1936 if (old_vtoc != 0) {
1937 mddb_deleterec_wrapper(old_vtoc);
1940 /* free recids */
1941 kmem_free(recids, num_recs * sizeof (mddb_recid_t));
1943 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1944 MD_UN2SET(new_un), MD_SID(new_un));
1945 MD_STATUS(new_un) |= MD_UN_GROW_PENDING;
1948 * Since the md_ioctl_writelock aquires the unit write lock
1949 * and open/close aquires the unit reader lock it is necessary
1950 * to drop the unit write lock and then reaquire it as needed
1951 * later.
1953 md_unit_writerexit(ui);
1955 if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0)) {
1956 rval = mdmderror(&mgph->mde, MDE_RAID_OPEN_FAILURE, mnum);
1957 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE,
1958 MD_UN2SET(new_un), MD_SID(new_un));
1959 return (rval);
1961 (void) md_unit_writerlock(ui);
1962 for (i = 0; i < new_un->un_totalcolumncnt; i++) {
1963 if (new_un->un_column[i].un_devstate & RCS_OKAY)
1964 (void) init_pw_area(new_un, new_un->un_column[i].un_dev,
1965 new_un->un_column[i].un_pwstart, i);
1967 md_unit_writerexit(ui);
1968 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
1969 (void) md_unit_writerlock(ui);
1970 /* create a background thread to initialize the columns */
1971 md_ioctl_droplocks(lock);
1973 return (raid_init_unit(mnum, &mgph->mde));
1977 * NAME: raid_reset
1978 * DESCRIPTION: used to reset (clear / remove) a RAID metadevice
1979 * PARAMETERS: md_i_reset_t *mirp - pointer to reset data structure
1981 * LOCKS: obtains and releases md_unit_array_rw write lock
1984 static int
1985 raid_reset(md_i_reset_t *mirp)
1987 minor_t mnum = mirp->mnum;
1988 mr_unit_t *un;
1989 mdi_unit_t *ui;
1990 set_t setno = MD_MIN2SET(mnum);
1992 mdclrerror(&mirp->mde);
1994 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
1996 * NOTE: need to get md_unit_writerlock to avoid conflict
1997 * with raid_init thread.
1999 if ((un = raid_getun(mnum, &mirp->mde, NO_LOCK, NULL)) ==
2000 NULL) {
2001 rw_exit(&md_unit_array_rw.lock);
2002 return (0);
2004 ui = MDI_UNIT(mnum);
2006 if (MD_HAS_PARENT(MD_PARENT(un))) {
2007 rw_exit(&md_unit_array_rw.lock);
2008 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
2011 un = (mr_unit_t *)md_unit_openclose_enter(ui);
2012 if (md_unit_isopen(MDI_UNIT(mnum))) {
2013 md_unit_openclose_exit(ui);
2014 rw_exit(&md_unit_array_rw.lock);
2015 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
2017 md_unit_openclose_exit(ui);
2018 if (UNIT_STATE(un) != RUS_OKAY && !mirp->force) {
2019 rw_exit(&md_unit_array_rw.lock);
2020 return (mdmderror(&mirp->mde, MDE_RAID_NEED_FORCE, mnum));
2023 reset_raid(un, mnum, 1);
2026 * Update unit availability
2028 md_set[setno].s_un_avail++;
2031 * If MN set, reset s_un_next so all nodes can have
2032 * the same view of the next available slot when
2033 * nodes are -w and -j
2035 if (MD_MNSET_SETNO(setno)) {
2036 (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
2039 rw_exit(&md_unit_array_rw.lock);
2041 return (0);
2045 * NAME: raid_get_geom
2046 * DESCRIPTION: used to get the geometry of a RAID metadevice
2047 * PARAMETERS: mr_unit_t *un - RAID unit to get the geometry for
2048 * struct dk_geom *gp - pointer to geometry data structure
2050 * LOCKS: none
2053 static int
2054 raid_get_geom(
2055 mr_unit_t *un,
2056 struct dk_geom *geomp
2059 md_get_geom((md_unit_t *)un, geomp);
2061 return (0);
2065 * NAME: raid_get_vtoc
2066 * DESCRIPTION: used to get the VTOC on a RAID metadevice
2067 * PARAMETERS: mr_unit_t *un - RAID unit to get the VTOC from
2068 * struct vtoc *vtocp - pointer to VTOC data structure
2070 * LOCKS: none
2073 static int
2074 raid_get_vtoc(
2075 mr_unit_t *un,
2076 struct vtoc *vtocp
2079 md_get_vtoc((md_unit_t *)un, vtocp);
2081 return (0);
2085 * NAME: raid_set_vtoc
2086 * DESCRIPTION: used to set the VTOC on a RAID metadevice
2087 * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on
2088 * struct vtoc *vtocp - pointer to VTOC data structure
2090 * LOCKS: none
2093 static int
2094 raid_set_vtoc(
2095 mr_unit_t *un,
2096 struct vtoc *vtocp
2099 return (md_set_vtoc((md_unit_t *)un, vtocp));
2104 * NAME: raid_get_extvtoc
2105 * DESCRIPTION: used to get the extended VTOC on a RAID metadevice
2106 * PARAMETERS: mr_unit_t *un - RAID unit to get the VTOC from
2107 * struct extvtoc *vtocp - pointer to extended VTOC data structure
2109 * LOCKS: none
2112 static int
2113 raid_get_extvtoc(
2114 mr_unit_t *un,
2115 struct extvtoc *vtocp
2118 md_get_extvtoc((md_unit_t *)un, vtocp);
2120 return (0);
2124 * NAME: raid_set_extvtoc
2125 * DESCRIPTION: used to set the extended VTOC on a RAID metadevice
2126 * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on
2127 * struct extvtoc *vtocp - pointer to extended VTOC data structure
2129 * LOCKS: none
2132 static int
2133 raid_set_extvtoc(
2134 mr_unit_t *un,
2135 struct extvtoc *vtocp
2138 return (md_set_extvtoc((md_unit_t *)un, vtocp));
2144 * NAME: raid_get_cgapart
2145 * DESCRIPTION: used to get the dk_map on a RAID metadevice
2146 * PARAMETERS: mr_unit_t *un - RAID unit to set the VTOC on
2147 * struct vtoc *dkmapp - pointer to dk_map data structure
2149 * LOCKS: none
2153 static int
2154 raid_get_cgapart(
2155 mr_unit_t *un,
2156 struct dk_map *dkmapp
2159 md_get_cgapart((md_unit_t *)un, dkmapp);
2160 return (0);
2164 * NAME: raid_getdevs
2165 * DESCRIPTION: return all devices within a RAID metadevice
2166 * PARAMETERS: md_getdevs_params_t *mgdp
2167 * - pointer to getdevs IOCTL data structure
2168 * int mode - should be FREAD
2169 * IOLOCK *lockp - IOCTL read/write lock
2171 * LOCKS: obtains unit reader lock via IOLOCK
2174 static int
2175 raid_getdevs(
2176 void *mgdp,
2177 int mode,
2178 IOLOCK *lock
2181 minor_t mnum;
2182 mr_unit_t *un;
2183 md_dev64_t *udevs;
2184 int i, cnt;
2185 md_dev64_t unit_dev;
2186 md_getdevs_params_t *mgdph = mgdp;
2189 mnum = mgdph->mnum;
2191 /* check out unit */
2192 mdclrerror(&mgdph->mde);
2194 if ((un = raid_getun(mnum, &mgdph->mde, RD_LOCK, lock)) == NULL)
2195 return (0);
2197 udevs = (md_dev64_t *)(uintptr_t)mgdph->devs;
2199 for (cnt = 0, i = 0; i < un->un_totalcolumncnt; i++, cnt++) {
2200 if (cnt < mgdph->cnt) {
2201 unit_dev = un->un_column[i].un_orig_dev;
2202 if (md_getmajor(unit_dev) != md_major) {
2203 if ((unit_dev = md_xlate_mini_2_targ
2204 (unit_dev)) == NODEV64)
2205 return (ENODEV);
2208 if (ddi_copyout((caddr_t)&unit_dev,
2209 (caddr_t)&udevs[cnt], sizeof (*udevs), mode) != 0)
2210 return (EFAULT);
2212 if (HOTSPARED(un, i)) {
2213 cnt++;
2214 if (cnt >= mgdph->cnt)
2215 continue;
2217 unit_dev = un->un_column[i].un_dev;
2218 if (md_getmajor(unit_dev) != md_major) {
2219 if ((unit_dev = md_xlate_mini_2_targ
2220 (unit_dev)) == NODEV64)
2221 return (ENODEV);
2224 if (ddi_copyout((caddr_t)&unit_dev,
2225 (caddr_t)&udevs[cnt], sizeof (*udevs), mode) != 0)
2226 return (EFAULT);
2229 mgdph->cnt = cnt;
2230 return (0);
2234 * NAME: raid_change
2235 * DESCRIPTION: used to change the following dynamic values:
2236 * the hot spare pool
2237 * in the unit structure of a RAID metadevice
2238 * PARAMETERS: md_change_params_t *mcp - pointer to change data structure
2239 * IOLOCK *lock - pointer to IOCTL lock
2241 * LOCKS: obtains unit writer lock via IOLOCK (through raid_getun)
2244 static int
2245 raid_change(
2246 md_raid_params_t *mrp,
2247 IOLOCK *lock
2250 minor_t mnum = mrp->mnum;
2251 mr_unit_t *un;
2252 int ix;
2253 mddb_recid_t recids[3] = {0, 0, 0};
2254 int err;
2255 int irecid;
2256 int inc_new_hsp = 0;
2258 mdclrerror(&mrp->mde);
2260 if ((un = raid_getun(mnum, &mrp->mde, WR_LOCK, lock)) == NULL)
2261 return (0);
2263 if (!mrp->params.change_hsp_id)
2264 return (0);
2266 /* verify that no hotspare is in use */
2267 for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
2268 if (HOTSPARED(un, ix)) {
2269 return (mdmderror(&mrp->mde, MDE_HS_IN_USE, mnum));
2273 /* replace the hot spare pool */
2275 irecid = 0;
2276 if (mrp->params.hsp_id != -1) {
2277 /* increment the reference count of the new hsp */
2278 err = md_hot_spare_ifc(HSP_INCREF, mrp->params.hsp_id, 0, 0,
2279 &recids[0], NULL, NULL, NULL);
2280 if (err) {
2281 return (mdhsperror(&mrp->mde, MDE_INVAL_HSP,
2282 mrp->params.hsp_id));
2284 inc_new_hsp = 1;
2285 irecid++;
2288 if (un->un_hsp_id != -1) {
2289 /* decrement the reference count of the old hsp */
2290 err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0,
2291 &recids[irecid], NULL, NULL, NULL);
2292 if (err) {
2293 err = mdhsperror(&mrp->mde, MDE_INVAL_HSP,
2294 mrp->params.hsp_id);
2295 if (inc_new_hsp) {
2296 (void) md_hot_spare_ifc(HSP_DECREF,
2297 mrp->params.hsp_id, 0, 0,
2298 &recids[0], NULL, NULL, NULL);
2300 * Don't need to commit the record,
2301 * because it wasn't committed before
2304 return (err);
2308 un->un_hsp_id = mrp->params.hsp_id;
2310 raid_commit(un, recids);
2311 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
2312 MD_UN2SET(un), MD_SID(un));
2314 /* Now trigger hot spare processing in case one is needed. */
2315 if ((un->un_hsp_id != -1) && (un->un_state == RUS_ERRED))
2316 (void) raid_hotspares();
2318 return (0);
2322 * NAME: raid_admin_ioctl
2323 * DESCRIPTION: IOCTL operations unique to metadevices and RAID
2324 * PARAMETERS: int cmd - IOCTL command to be executed
2325 * void *data - pointer to IOCTL data structure
2326 * int mode - either FREAD or FWRITE
2327 * IOLOCK *lockp - IOCTL read/write lock
2329 * LOCKS: none
2332 static int
2333 raid_admin_ioctl(
2334 int cmd,
2335 void *data,
2336 int mode,
2337 IOLOCK *lockp
2340 size_t sz = 0;
2341 void *d = NULL;
2342 int err = 0;
2344 /* We can only handle 32-bit clients for internal commands */
2345 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
2346 return (EINVAL);
2350 /* dispatch ioctl */
2351 switch (cmd) {
2353 case MD_IOCSET:
2355 if (! (mode & FWRITE))
2356 return (EACCES);
2358 sz = sizeof (md_set_params_t);
2359 d = kmem_alloc(sz, KM_SLEEP);
2361 if (ddi_copyin(data, d, sz, mode)) {
2362 err = EFAULT;
2363 break;
2366 err = raid_set(d, mode);
2367 break;
2370 case MD_IOCGET:
2372 if (! (mode & FREAD))
2373 return (EACCES);
2375 sz = sizeof (md_i_get_t);
2376 d = kmem_alloc(sz, KM_SLEEP);
2378 if (ddi_copyin(data, d, sz, mode)) {
2379 err = EFAULT;
2380 break;
2383 err = raid_get(d, mode, lockp);
2384 break;
2387 case MD_IOCREPLACE:
2389 if (! (mode & FWRITE))
2390 return (EACCES);
2392 sz = sizeof (replace_params_t);
2393 d = kmem_alloc(sz, KM_SLEEP);
2395 if (ddi_copyin(data, d, sz, mode)) {
2396 err = EFAULT;
2397 break;
2400 err = raid_replace((replace_params_t *)d, lockp);
2401 break;
2404 case MD_IOCSETSYNC:
2406 if (! (mode & FWRITE))
2407 return (EACCES);
2409 sz = sizeof (md_resync_ioctl_t);
2410 d = kmem_alloc(sz, KM_SLEEP);
2412 if (ddi_copyin(data, d, sz, mode)) {
2413 err = EFAULT;
2414 break;
2417 err = raid_set_sync((md_resync_ioctl_t *)d, lockp);
2418 break;
2421 case MD_IOCGETSYNC:
2423 if (! (mode & FREAD))
2424 return (EACCES);
2426 sz = sizeof (md_resync_ioctl_t);
2427 d = kmem_alloc(sz, KM_SLEEP);
2429 if (ddi_copyin(data, d, sz, mode)) {
2430 err = EFAULT;
2431 break;
2433 err = raid_get_resync((md_resync_ioctl_t *)d, lockp);
2435 break;
2438 case MD_IOCGROW:
2440 if (! (mode & FWRITE))
2441 return (EACCES);
2443 sz = sizeof (md_grow_params_t);
2444 d = kmem_alloc(sz, KM_SLEEP);
2446 if (ddi_copyin(data, d, sz, mode)) {
2447 err = EFAULT;
2448 break;
2451 err = raid_grow(d, mode, lockp);
2452 break;
2455 case MD_IOCCHANGE:
2457 if (! (mode & FWRITE))
2458 return (EACCES);
2460 sz = sizeof (md_raid_params_t);
2461 d = kmem_alloc(sz, KM_SLEEP);
2463 if (ddi_copyin(data, d, sz, mode)) {
2464 err = EFAULT;
2465 break;
2468 err = raid_change((md_raid_params_t *)d, lockp);
2469 break;
2472 case MD_IOCRESET:
2474 if (! (mode & FWRITE))
2475 return (EACCES);
2477 sz = sizeof (md_i_reset_t);
2478 d = kmem_alloc(sz, KM_SLEEP);
2480 if (ddi_copyin(data, d, sz, mode)) {
2481 err = EFAULT;
2482 break;
2485 err = raid_reset((md_i_reset_t *)d);
2486 break;
2489 case MD_IOCGET_DEVS:
2491 if (! (mode & FREAD))
2492 return (EACCES);
2494 sz = sizeof (md_getdevs_params_t);
2495 d = kmem_alloc(sz, KM_SLEEP);
2497 if (ddi_copyin(data, d, sz, mode)) {
2498 err = EFAULT;
2499 break;
2502 err = raid_getdevs(d, mode, lockp);
2503 break;
2506 case MD_IOCSETREGEN:
2508 if (! (mode & FWRITE))
2509 return (EACCES);
2511 sz = sizeof (md_regen_param_t);
2512 d = kmem_alloc(sz, KM_SLEEP);
2514 if (ddi_copyin(data, d, sz, mode)) {
2515 err = EFAULT;
2516 break;
2519 err = raid_regen((md_regen_param_t *)d, lockp);
2520 break;
2523 case MD_IOCPROBE_DEV:
2525 md_probedev_impl_t *p = NULL;
2526 md_probedev_t *ph = NULL;
2527 daemon_queue_t *hdr = NULL;
2528 int i;
2529 size_t sz1 = 0;
2532 if (! (mode & FREAD))
2533 return (EACCES);
2535 sz = sizeof (md_probedev_t);
2537 d = kmem_alloc(sz, KM_SLEEP);
2539 /* now copy in the data */
2540 if (ddi_copyin(data, d, sz, mode)) {
2541 err = EFAULT;
2542 goto free_mem;
2546 * Sanity test the args. Test name should have the keyword
2547 * probe.
2549 p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP);
2550 p->probe_sema = NULL;
2551 p->probe_mx = NULL;
2552 p->probe.mnum_list = (uint64_t)NULL;
2554 ph = (md_probedev_t *)d;
2555 p->probe.nmdevs = ph->nmdevs;
2556 (void) strcpy(p->probe.test_name, ph->test_name);
2557 bcopy(&ph->md_driver, &(p->probe.md_driver),
2558 sizeof (md_driver_t));
2560 if ((p->probe.nmdevs < 1) ||
2561 (strstr(p->probe.test_name, "probe") == NULL)) {
2562 err = EINVAL;
2563 goto free_mem;
2566 sz1 = sizeof (minor_t) * p->probe.nmdevs;
2568 p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz1,
2569 KM_SLEEP);
2571 if (ddi_copyin((caddr_t)(uintptr_t)ph->mnum_list,
2572 (caddr_t)(uintptr_t)p->probe.mnum_list, sz1, mode)) {
2573 err = EFAULT;
2574 goto free_mem;
2577 if (err = md_init_probereq(p, &hdr))
2578 goto free_mem;
2581 * put the request on the queue and wait.
2584 daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW);
2586 (void) IOLOCK_RETURN(0, lockp);
2587 /* wait for the events to occur */
2588 for (i = 0; i < p->probe.nmdevs; i++) {
2589 sema_p(PROBE_SEMA(p));
2591 while (md_ioctl_lock_enter() == EINTR)
2595 * clean up. The hdr list is freed in the probe routines
2596 * since the list is NULL by the time we get here.
2598 free_mem:
2599 if (p) {
2600 if (p->probe_sema != NULL) {
2601 sema_destroy(PROBE_SEMA(p));
2602 kmem_free(p->probe_sema, sizeof (ksema_t));
2604 if (p->probe_mx != NULL) {
2605 mutex_destroy(PROBE_MX(p));
2606 kmem_free(p->probe_mx, sizeof (kmutex_t));
2608 if (p->probe.mnum_list)
2609 kmem_free((caddr_t)(uintptr_t)
2610 p->probe.mnum_list, sz1);
2612 kmem_free(p, sizeof (md_probedev_impl_t));
2614 break;
2617 default:
2618 return (ENOTTY);
2622 * copyout and free any args
2624 if (sz != 0) {
2625 if (err == 0) {
2626 if (ddi_copyout(d, data, sz, mode) != 0) {
2627 err = EFAULT;
2630 kmem_free(d, sz);
2632 return (err);
2636 * NAME: md_raid_ioctl
2637 * DESCRIPTION: RAID metadevice IOCTL operations entry point.
2638 * PARAMETERS: md_dev64_t dev - RAID device identifier
2639 * int cmd - IOCTL command to be executed
2640 * void *data - pointer to IOCTL data structure
2641 * int mode - either FREAD or FWRITE
2642 * IOLOCK *lockp - IOCTL read/write lock
2644 * LOCKS: none
2648 md_raid_ioctl(
2649 dev_t dev,
2650 int cmd,
2651 void *data,
2652 int mode,
2653 IOLOCK *lockp
2656 minor_t mnum = getminor(dev);
2657 mr_unit_t *un;
2658 int err = 0;
2660 /* handle admin ioctls */
2661 if (mnum == MD_ADM_MINOR)
2662 return (raid_admin_ioctl(cmd, data, mode, lockp));
2664 /* check unit */
2665 if ((MD_MIN2SET(mnum) >= md_nsets) ||
2666 (MD_MIN2UNIT(mnum) >= md_nunits) ||
2667 ((un = MD_UNIT(mnum)) == NULL))
2668 return (ENXIO);
2670 /* is this a supported ioctl? */
2671 err = md_check_ioctl_against_unit(cmd, un->c);
2672 if (err != 0) {
2673 return (err);
2676 /* dispatch ioctl */
2677 switch (cmd) {
2679 case DKIOCINFO:
2681 struct dk_cinfo *p;
2683 if (! (mode & FREAD))
2684 return (EACCES);
2686 p = kmem_alloc(sizeof (*p), KM_SLEEP);
2688 get_info(p, mnum);
2689 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
2690 err = EFAULT;
2692 kmem_free(p, sizeof (*p));
2693 return (err);
2696 case DKIOCGMEDIAINFO:
2698 struct dk_minfo p;
2700 if (! (mode & FREAD))
2701 return (EACCES);
2703 get_minfo(&p, mnum);
2704 if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
2705 err = EFAULT;
2707 return (err);
2710 case DKIOCGGEOM:
2712 struct dk_geom *p;
2714 if (! (mode & FREAD))
2715 return (EACCES);
2717 p = kmem_alloc(sizeof (*p), KM_SLEEP);
2719 if ((err = raid_get_geom(un, p)) == 0) {
2720 if (ddi_copyout((caddr_t)p, data, sizeof (*p),
2721 mode) != 0)
2722 err = EFAULT;
2725 kmem_free(p, sizeof (*p));
2726 return (err);
2729 case DKIOCGVTOC:
2731 struct vtoc *vtoc;
2733 if (! (mode & FREAD))
2734 return (EACCES);
2736 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2737 if ((err = raid_get_vtoc(un, vtoc)) != 0) {
2738 kmem_free(vtoc, sizeof (*vtoc));
2739 return (err);
2742 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2743 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
2744 err = EFAULT;
2746 #ifdef _SYSCALL32
2747 else {
2748 struct vtoc32 *vtoc32;
2750 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2752 vtoctovtoc32((*vtoc), (*vtoc32));
2753 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
2754 err = EFAULT;
2755 kmem_free(vtoc32, sizeof (*vtoc32));
2757 #endif /* _SYSCALL32 */
2759 kmem_free(vtoc, sizeof (*vtoc));
2760 return (err);
2763 case DKIOCSVTOC:
2765 struct vtoc *vtoc;
2767 if (! (mode & FWRITE))
2768 return (EACCES);
2770 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2771 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2772 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
2773 err = EFAULT;
2776 #ifdef _SYSCALL32
2777 else {
2778 struct vtoc32 *vtoc32;
2780 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2782 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
2783 err = EFAULT;
2784 } else {
2785 vtoc32tovtoc((*vtoc32), (*vtoc));
2787 kmem_free(vtoc32, sizeof (*vtoc32));
2789 #endif /* _SYSCALL32 */
2791 if (err == 0)
2792 err = raid_set_vtoc(un, vtoc);
2794 kmem_free(vtoc, sizeof (*vtoc));
2795 return (err);
2798 case DKIOCGEXTVTOC:
2800 struct extvtoc *extvtoc;
2802 if (! (mode & FREAD))
2803 return (EACCES);
2805 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2806 if ((err = raid_get_extvtoc(un, extvtoc)) != 0) {
2807 kmem_free(extvtoc, sizeof (*extvtoc));
2808 return (err);
2811 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
2812 err = EFAULT;
2814 kmem_free(extvtoc, sizeof (*extvtoc));
2815 return (err);
2818 case DKIOCSEXTVTOC:
2820 struct extvtoc *extvtoc;
2822 if (! (mode & FWRITE))
2823 return (EACCES);
2825 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2826 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
2827 err = EFAULT;
2830 if (err == 0)
2831 err = raid_set_extvtoc(un, extvtoc);
2833 kmem_free(extvtoc, sizeof (*extvtoc));
2834 return (err);
2837 case DKIOCGAPART:
2839 struct dk_map dmp;
2841 if ((err = raid_get_cgapart(un, &dmp)) != 0) {
2842 return (err);
2845 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2846 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
2847 mode) != 0)
2848 err = EFAULT;
2850 #ifdef _SYSCALL32
2851 else {
2852 struct dk_map32 dmp32;
2854 dmp32.dkl_cylno = dmp.dkl_cylno;
2855 dmp32.dkl_nblk = dmp.dkl_nblk;
2857 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
2858 mode) != 0)
2859 err = EFAULT;
2861 #endif /* _SYSCALL32 */
2863 return (err);
2865 case DKIOCGETEFI:
2868 * This one can be done centralized,
2869 * no need to put in the same code for all types of metadevices
2871 return (md_dkiocgetefi(mnum, data, mode));
2874 case DKIOCSETEFI:
2877 * This one can be done centralized,
2878 * no need to put in the same code for all types of metadevices
2880 return (md_dkiocsetefi(mnum, data, mode));
2883 case DKIOCPARTITION:
2885 return (md_dkiocpartition(mnum, data, mode));
2888 default:
2889 return (ENOTTY);
2894 * rename/exchange named service entry points and support functions follow.
2895 * Most functions are handled generically, except for raid-specific locking
2896 * and checking
2900 * NAME: raid_may_renexch_self
2901 * DESCRIPTION: support routine for rename check ("MDRNM_CHECK") named service
2902 * PARAMETERS: mr_unit_t *un - unit struct of raid unit to be renamed
2903 * mdi_unit_t *ui - in-core unit struct of same raid unit
2904 * md_rentxn_t *rtxnp - rename transaction state
2906 * LOCKS: none
2909 static int
2910 raid_may_renexch_self(
2911 mr_unit_t *un,
2912 mdi_unit_t *ui,
2913 md_rentxn_t *rtxnp)
2915 minor_t from_min;
2916 minor_t to_min;
2917 bool_t toplevel;
2918 bool_t related;
2920 from_min = rtxnp->from.mnum;
2921 to_min = rtxnp->to.mnum;
2923 if (!un || !ui) {
2924 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2925 from_min);
2926 return (EINVAL);
2929 ASSERT(!(MD_CAPAB(un) & MD_CAN_META_CHILD));
2930 if (MD_CAPAB(un) & MD_CAN_META_CHILD) {
2931 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2932 return (EINVAL);
2935 if (MD_PARENT(un) == MD_MULTI_PARENT) {
2936 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2937 return (EINVAL);
2940 toplevel = !MD_HAS_PARENT(MD_PARENT(un));
2942 /* we're related if trying to swap with our parent */
2943 related = (!toplevel) && (MD_PARENT(un) == to_min);
2945 switch (rtxnp->op) {
2946 case MDRNOP_EXCHANGE:
2948 if (!related) {
2949 (void) mdmderror(&rtxnp->mde,
2950 MDE_RENAME_TARGET_UNRELATED, to_min);
2951 return (EINVAL);
2954 break;
2956 case MDRNOP_RENAME:
2958 * if from is top-level and is open, then the kernel is using
2959 * the md_dev64_t.
2962 if (toplevel && md_unit_isopen(ui)) {
2963 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
2964 from_min);
2965 return (EBUSY);
2967 break;
2969 default:
2970 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2971 from_min);
2972 return (EINVAL);
2975 return (0); /* ok */
2979 * NAME: raid_rename_check
2980 * DESCRIPTION: ("MDRNM_CHECK") rename/exchange named service entry point
2981 * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this
2982 * raid device for rename transaction
2983 * md_rentxn_t *rtxnp - rename transaction state
2985 * LOCKS: none
2988 intptr_t
2989 raid_rename_check(
2990 md_rendelta_t *delta,
2991 md_rentxn_t *rtxnp)
2993 int err = 0;
2994 int column;
2995 mr_unit_t *un;
2997 ASSERT(delta);
2998 ASSERT(rtxnp);
2999 ASSERT(delta->unp);
3000 ASSERT(delta->uip);
3002 if (!delta || !rtxnp || !delta->unp || !delta->uip) {
3003 (void) mdsyserror(&rtxnp->mde, EINVAL);
3004 return (EINVAL);
3007 un = (mr_unit_t *)delta->unp;
3009 for (column = 0; column < un->un_totalcolumncnt; column++) {
3010 rcs_state_t colstate;
3012 colstate = un->un_column[column].un_devstate;
3014 if (colstate & RCS_LAST_ERRED) {
3015 (void) mdmderror(&rtxnp->mde, MDE_RAID_LAST_ERRED,
3016 md_getminor(delta->dev));
3017 return (EINVAL);
3020 if (colstate & RCS_INIT_ERRED) {
3021 (void) mdmderror(&rtxnp->mde, MDE_RAID_DOI,
3022 md_getminor(delta->dev));
3023 return (EINVAL);
3026 /* How did we get this far before detecting this? */
3027 if (colstate & RCS_RESYNC) {
3028 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
3029 md_getminor(delta->dev));
3030 return (EBUSY);
3033 if (colstate & RCS_ERRED) {
3034 (void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY,
3035 md_getminor(delta->dev));
3036 return (EINVAL);
3039 if (!(colstate & RCS_OKAY)) {
3040 (void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY,
3041 md_getminor(delta->dev));
3042 return (EINVAL);
3045 if (HOTSPARED(un, column)) {
3046 (void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY,
3047 md_getminor(delta->dev));
3048 return (EINVAL);
3052 /* self does additional checks */
3053 if (delta->old_role == MDRR_SELF) {
3054 err = raid_may_renexch_self((mr_unit_t *)delta->unp,
3055 delta->uip, rtxnp);
3057 return (err);
3061 * NAME: raid_rename_lock
3062 * DESCRIPTION: ("MDRNM_LOCK") rename/exchange named service entry point
3063 * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this
3064 * raid device for rename transaction
3065 * md_rentxn_t *rtxnp - rename transaction state
3067 * LOCKS: io and unit locks (taken explicitly *not* via ioctl wrappers)
3070 intptr_t
3071 raid_rename_lock(
3072 md_rendelta_t *delta,
3073 md_rentxn_t *rtxnp)
3075 minor_t mnum;
3077 ASSERT(delta);
3078 ASSERT(rtxnp);
3080 mnum = md_getminor(delta->dev);
3081 if (mnum == rtxnp->to.mnum && rtxnp->op == MDRNOP_RENAME) {
3082 return (0);
3085 ASSERT(delta->uip);
3086 if (!delta->uip) {
3087 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum);
3088 return (ENODEV);
3091 ASSERT(delta->unp);
3092 if (!delta->unp) {
3094 return (ENODEV);
3097 ASSERT(!IO_WRITER_HELD(delta->unp));
3098 (void) md_io_writerlock(delta->uip);
3099 ASSERT(IO_WRITER_HELD(delta->unp));
3102 ASSERT(!UNIT_WRITER_HELD(delta->unp));
3103 (void) md_unit_writerlock(delta->uip);
3104 ASSERT(UNIT_WRITER_HELD(delta->unp));
3106 return (0);
3110 * NAME: raid_rename_unlock
3111 * DESCRIPTION: ("MDRNM_UNLOCK") rename/exchange named service entry point
3112 * PARAMETERS: md_rendelta_t *delta - describes changes to be made to this
3113 * raid device for rename transaction
3114 * md_rentxn_t *rtxnp - rename transaction state
3116 * LOCKS: drops io and unit locks
3119 /* ARGSUSED */
3120 void
3121 raid_rename_unlock(
3122 md_rendelta_t *delta,
3123 md_rentxn_t *rtxnp)
3125 mr_unit_t *un = (mr_unit_t *)delta->unp;
3126 minor_t mnum = MD_SID(un);
3127 int col;
3129 ASSERT(delta);
3130 ASSERT(delta->unp);
3131 ASSERT(delta->uip);
3133 ASSERT(UNIT_WRITER_HELD(delta->unp));
3134 md_unit_writerexit(delta->uip);
3135 ASSERT(!UNIT_WRITER_HELD(delta->unp));
3137 if (! (delta->txn_stat.role_swapped) || ! (delta->txn_stat.is_open)) {
3138 goto out;
3140 if (raid_internal_open(mnum, (FREAD | FWRITE),
3141 OTYP_LYR, MD_OFLG_ISINIT) == 0) {
3142 for (col = 0; col < un->un_totalcolumncnt; col++) {
3143 if (un->un_column[col].un_devstate & RCS_OKAY)
3144 (void) init_pw_area(un,
3145 un->un_column[col].un_dev,
3146 un->un_column[col].un_pwstart, col);
3148 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
3151 out:
3152 ASSERT(IO_WRITER_HELD(delta->unp));
3153 md_io_writerexit(delta->uip);
3154 ASSERT(!IO_WRITER_HELD(delta->unp));
3156 /* end of rename/exchange named service and support functions */