7712 mandoc -Tlint does always exit with error code 0
[unleashed.git] / usr / src / uts / common / io / lvm / md / md_rename.c
blobef7f1a733f5c21b449e178cb868573b17c01b24b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
30 * rename or exchange identities of virtual device nodes
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/debug.h>
36 #include <sys/sysmacros.h>
37 #include <sys/types.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
41 #include <sys/lvm/mdvar.h>
42 #include <sys/lvm/md_rename.h>
44 #include <sys/sysevent/eventdefs.h>
45 #include <sys/sysevent/svm.h>
47 extern major_t md_major;
48 extern unit_t md_nunits;
49 extern set_t md_nsets;
50 extern md_set_t md_set[];
52 #define ROLE(r) \
53 ((r) == MDRR_PARENT? "parent": \
54 (r) == MDRR_SELF? "self": \
55 (r) == MDRR_CHILD? "child": \
56 (r) == MDRR_UNK? "<unknown>": "<garbage>")
58 #define OP_STR(op) \
59 (((op) == MDRNOP_UNK)? "<unknown>" : \
60 ((op) == MDRNOP_RENAME)? "rename" : \
61 ((op) == MDRNOP_EXCHANGE)? "exchange" : \
62 "<garbage>")
63 int md_rename_debug = 0;
65 /* delta guard rails */
66 const unsigned long long DELTA_BEG = (0xDad08888a110beefull);
67 const unsigned long long DELTA_END = (0xa110Beef88880Dadull);
69 const unsigned long long DELTA_BEG_FREED = (0xBad0c0ed0fed0dadull);
70 const unsigned long long DELTA_END_FREED = (0x0Fed0dadbad0c0edull);
72 /* transaction guard rails */
73 const unsigned long long TXN_BEG = (0xDad01eadc0ed2badull);
74 const unsigned long long TXN_END = (0xc0ed2badDad01eadull);
76 const unsigned long long TXNUN_BEG = (0xcafe0fedbad0beefull);
77 const unsigned long long TXNUN_END = (0xbad0beefcafe0fedull);
79 const unsigned int guard_shift = (sizeof (u_longlong_t) - 3);
80 const md_stackcap_t MD_CAN_DO_ANYTHING = (md_stackcap_t)0;
82 typedef struct role_change_mapping_tab_t {
83 const int ord;
84 const md_renrole_t old_role;
85 const md_renrole_t new_role;
86 const char *svc_name;
87 md_ren_roleswap_svc_t * const default_svc;
88 } role_change_tab_t;
91 * The actual table is at the end of the file, so we don't need
92 * many forward references
94 static role_change_tab_t role_swap_tab[];
96 #define ILLEGAL_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(0xA1100BAD))
97 #define NO_DEFAULT_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(NULL))
98 #define ILLEGAL_SVC_NAME (NULL)
102 * Role swap rule table:
104 * New Role
105 * +---------------------------------------------------------------|
106 * | | Parent | Self | Child |
107 * +--------+-----------------+----------------+-------------------+
108 * | Parent | no default | ...no default | illegal |
109 * | | 1 (update kids) | 2 (update to) | 3 |
110 * Old +--------+-----------------+----------------+-------------------+
111 * Role | Self | ...self update | ...rename self | no default (down |
112 * | | 4 update up | 5 | 6 update from) |
113 * +--------+-----------------+----------------+-------------------+
114 * | Child | illegal | ...child | ...update |
115 * | | 7 | 8 update to | 9 parent |
116 * +---------------------------------------------------------------+
118 * and notes:
120 * - Boxes 1, 4 and 6 are the most interesting. They are responsible
121 * for updating the from unit's data structures. These may involve
122 * finding (former or future) children, resetting name keys and the like.
124 * - The "rename" operation is boxes 1, 5 and 9. Most of the work
125 * is done in box 5, since that contains both the "from" and "to"
126 * unit struct for rename.
128 * (There's got to be an eigen function for this; that diagonal
129 * axis is a role identity operation searching for an expression.)
131 * - Almost every transaction will call more than one of these.
132 * (Only a rename of a unit with no relatives will only call
133 * a single box.)
135 * - Box 4 "...update from" is the generic self->parent modifier.
136 * - Box 8 "...update to" is the generic child->self modifier.
137 * These can be generic because all of the information which
138 * needs to be updated is in the common portion of the unit
139 * structure when changing from their respective roles.
141 * - Boxes 1, 2 and 6 ("no default") indicate that per-metadevice
142 * information must be updated. For example, in box 1, children
143 * identities must be updated. Since different metadevice types
144 * detect and manipulate their children differently, there can
145 * be no generic "md_rename" function in this box.
147 * In addition to the named services in the table above, there
148 * are other named services used by rename/exchange.
149 * MDRNM_LIST_URFOLKS, MDRNM_LIST_URSELF, MDRNM_LIST_URKIDS
150 * list a device's parents, self and children, respectively.
151 * In most cases the default functions can be used for parents
152 * and self. Top-level devices, are not required to have a
153 * "list folks" named service. Likewise, devices which can
154 * not have metadevice children, are not required to have the
155 * "list kids" named service. The LIST_UR* functions call back into
156 * the base driver (md_build_rendelta()) to package the changes to
157 * a device for addition onto the tree. The LIST_UR* named service
158 * then adds this "rename delta" onto the delta tree itself.
159 * This keeps private knowledge appropriately encapsulated.
160 * They return the number of devices which will need to be changed,
161 * and hence the number of elements they've added to the delta list
162 * or -1 for error.
164 * Other named services used by rename/exchange are:
165 * "lock" (MDRNM_LOCK), "unlock" (MDRNM_UNLOCK) and "check" (MDRNM_CHECK).
166 * These (un) write-lock all of the relevant in-core structs,
167 * including the unit structs for the device and quiesce i/o as necessary.
168 * The "check" named service verifies that this device
169 * is in a state where rename could and may occur at this time.
170 * Since the role_swap functions themselves cannot be undone
171 * (at least in this implementation), it is check()'s job to
172 * verify that the device is renamable (sic) or, if not, abort.
173 * The check function for the device participating in the role
174 * of "self" is usually where rename or exchange validity is verified.
176 * All of these functions take two arguments which may be thought
177 * of as the collective state changes of the tree of devices
178 * (md_rendelta_t *family) and the rename transaction state
179 * (md_rentxn_t rtxn or rtxnp).
185 * rename unit lock
186 * (default name service routine MDRNM_LOCK)
188 static intptr_t
189 md_rename_lock(md_rendelta_t *delta, md_rentxn_t *rtxnp)
191 minor_t mnum;
192 md_renop_t op;
194 ASSERT(delta);
195 ASSERT(rtxnp);
197 if (!delta || !rtxnp) {
198 (void) mdsyserror(&rtxnp->mde, EINVAL);
199 return (EINVAL);
201 mnum = md_getminor(delta->dev);
202 op = rtxnp->op;
205 * target doesn't exist if renaming (by definition),
206 * so it need not be locked
208 if (op == MDRNOP_RENAME && mnum == rtxnp->to.mnum) {
209 return (0);
212 ASSERT(delta->uip);
213 if (!delta->uip) {
214 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum);
215 return (ENODEV);
218 ASSERT(delta->unp);
219 if (!delta->unp) {
220 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum);
221 return (ENODEV);
224 ASSERT(!UNIT_WRITER_HELD(delta->unp));
226 (void) md_unit_writerlock(delta->uip);
228 ASSERT(UNIT_WRITER_HELD(delta->unp));
230 return (0);
234 * (default name service routine MDRNM_UNLOCK)
236 /* ARGSUSED */
237 static void
238 md_rename_unlock(
239 md_rendelta_t *delta,
240 md_rentxn_t *rtxnp)
242 ASSERT(delta);
243 ASSERT(delta->uip);
244 ASSERT(delta->unp);
246 ASSERT(UNIT_WRITER_HELD(delta->unp));
248 (void) md_unit_writerexit(delta->uip);
250 ASSERT(!UNIT_WRITER_HELD(delta->unp));
254 * This is used by the various MDRNM_LIST* named services.
256 md_rendelta_t *
257 md_build_rendelta(
258 md_renrole_t old_role,
259 md_renrole_t new_role,
260 md_dev64_t dev,
261 md_rendelta_t *prev,
262 md_unit_t *unp,
263 mdi_unit_t *uip,
264 md_error_t *ep)
266 int err = 0;
267 md_rendelta_t *new;
269 new = (md_rendelta_t *)kmem_alloc(sizeof (md_rendelta_t), KM_SLEEP);
271 new->beginning = DELTA_BEG;
272 new->dev = dev;
273 new->new_role = new_role;
274 new->old_role = old_role;
275 new->next = NULL;
276 new->prev = prev;
277 new->unp = unp;
278 new->uip = uip;
279 bzero((void *) &new->txn_stat, sizeof (md_rendstat_t));
282 * For non-meta devices that are being renamed (in the future,
283 * that is) we would need to pass in default functions to
284 * accommodate them, provided the default function is
285 * truly capable of performing the lock/check/unlock function
286 * on opaque devices.
289 new->lock = md_get_named_service(dev, /* modindex */ 0,
290 MDRNM_LOCK, md_rename_lock);
292 new->unlock = (md_ren_void_svc_t *)md_get_named_service(dev,
293 /* modindex */ 0, MDRNM_UNLOCK,
294 (intptr_t (*)()) md_rename_unlock);
296 new->check = md_get_named_service(dev, /* modindex */ 0,
297 MDRNM_CHECK, /* Default */ NULL);
299 new->role_swap = NULL; /* set this when the roles are determined */
301 if (!new->lock || !new->unlock || !new->check) {
302 (void) mdmderror(ep, MDE_RENAME_CONFIG_ERROR, md_getminor(dev));
303 err = EINVAL;
304 goto out;
307 new->end = DELTA_END;
309 out:
310 if (err != 0) {
311 if (new) {
312 new->beginning = DELTA_BEG_FREED;
313 new->end = DELTA_END_FREED;
315 kmem_free(new, sizeof (md_rendelta_t));
316 new = NULL;
320 if (prev) {
321 prev->next = new;
324 return (new);
328 * md_store_recid()
329 * used by role swap functions
331 void
332 md_store_recid(
333 int *prec_idx,
334 mddb_recid_t *recid_list,
335 md_unit_t *un)
337 mddb_recid_t *rp;
338 bool_t add_recid;
340 ASSERT(prec_idx);
341 ASSERT(recid_list);
342 ASSERT(recid_list[*prec_idx] == 0);
343 ASSERT(*prec_idx >= 0);
345 for (add_recid = TRUE, rp = recid_list; add_recid && rp && *rp; rp++) {
346 if (MD_RECID(un) == *rp) {
347 add_recid = FALSE;
351 if (add_recid) {
352 recid_list[(*prec_idx)++] = MD_RECID(un);
357 * MDRNM_LIST_URFOLKS: generic named svc entry point
358 * add all parents onto the list pointed to by dlpp
359 * (only weird multi-parented devices need to have their
360 * own named svc to do this.)
362 static int
363 md_rename_listfolks(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
365 md_rendelta_t *new;
367 ASSERT(rtxnp);
368 ASSERT(dlpp);
369 ASSERT(*dlpp == NULL);
370 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
371 ASSERT(rtxnp->from.uip);
372 ASSERT(rtxnp->from.unp);
374 if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) {
375 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP,
376 rtxnp->from.mnum);
377 return (-1);
380 if (!MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) {
381 return (0);
385 * If supporting log renaming (and other multiparented devices)
386 * callout to each misc module to claim this waif and return the
387 * md_dev64_t of its parents.
389 if (MD_PARENT(rtxnp->from.unp) == MD_MULTI_PARENT) {
390 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD,
391 rtxnp->from.mnum);
392 return (2);
395 if ((rtxnp->op == MDRNOP_RENAME) ||
396 (MD_PARENT(rtxnp->from.unp) != MD_SID(rtxnp->to.unp))) {
398 new = md_build_rendelta(
399 MDRR_PARENT,
400 MDRR_PARENT,
401 md_makedevice(md_major, MD_PARENT(rtxnp->from.unp)),
402 NULL,
403 MD_UNIT(MD_PARENT(rtxnp->from.unp)),
404 MDI_UNIT(MD_PARENT(rtxnp->from.unp)),
405 &rtxnp->mde);
406 } else {
407 /* parent is swapping roles with self */
408 new = md_build_rendelta(
409 MDRR_PARENT,
410 MDRR_SELF,
411 md_makedevice(md_major, MD_SID(rtxnp->to.unp)),
412 NULL,
413 rtxnp->to.unp,
414 rtxnp->to.uip,
415 &rtxnp->mde);
418 if (!new) {
419 if (mdisok(&rtxnp->mde)) {
420 (void) mdsyserror(&rtxnp->mde, ENOMEM);
422 return (-1);
425 *dlpp = new;
427 return (1);
431 * MDRNM_LIST_URSELF: named svc entry point
432 * add all delta entries appropriate for ourselves onto the deltalist pointed
433 * to by dlpp
435 static int
436 md_rename_listself(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
438 md_rendelta_t *new, *p;
439 bool_t exchange_up = FALSE;
441 ASSERT(rtxnp);
442 ASSERT(dlpp);
443 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
444 ASSERT(rtxnp->from.unp);
445 ASSERT(rtxnp->from.uip);
447 if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) {
448 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP,
449 rtxnp->from.mnum);
450 return (-1);
453 for (p = *dlpp; p && p->next != NULL; p = p->next) {
454 /* NULL */
458 * renaming or
459 * from's parent is not to and to's parent is not from
461 if (rtxnp->op == MDRNOP_RENAME) {
462 new = md_build_rendelta(
463 MDRR_SELF,
464 MDRR_SELF,
465 md_makedevice(md_major, rtxnp->from.mnum),
467 rtxnp->from.unp,
468 rtxnp->from.uip,
469 &rtxnp->mde);
470 } else {
472 if (MD_PARENT(rtxnp->from.unp) == MD_SID(rtxnp->to.unp)) {
473 exchange_up = TRUE;
476 /* self and parent are flipping */
477 new = md_build_rendelta(
478 MDRR_SELF,
479 exchange_up? MDRR_PARENT: MDRR_CHILD,
480 md_makedevice(md_major, rtxnp->from.mnum),
482 rtxnp->from.unp,
483 rtxnp->from.uip,
484 &rtxnp->mde);
487 if (!new) {
488 if (mdisok(&rtxnp->mde)) {
489 (void) mdsyserror(&rtxnp->mde, ENOMEM);
491 return (-1);
494 if (!*dlpp) {
495 *dlpp = new;
498 return (1);
502 * free the tree of all deltas to devices involved in the rename transaction
504 static void
505 free_dtree(md_rendelta_t *family)
507 md_rendelta_t *next = NULL;
508 int i = 0;
509 md_rendelta_t *r;
511 for (r = family; (NULL != r); r = next, i++) {
513 next = r->next;
515 /* shift << because it makes the resultant pattern readable */
516 r->beginning = DELTA_BEG_FREED ^ (i << guard_shift);
517 r->end = DELTA_END_FREED ^ (i << guard_shift);
519 kmem_free(r, sizeof (md_rendelta_t));
524 * walk down family tree, calling lock service function
526 static int
527 lock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
529 md_rendelta_t *r;
530 int rc;
532 ASSERT(family);
533 ASSERT(rtxnp);
535 if (!family || !rtxnp) {
536 return (EINVAL);
539 for (rc = 0, r = family; r; r = r->next) {
541 ASSERT(r->unp);
542 ASSERT(!UNIT_WRITER_HELD(r->unp));
543 ASSERT(r->lock);
545 if ((rc = (int)(*r->lock) (r, rtxnp)) != 0) {
546 return (rc);
548 r->txn_stat.locked = TRUE;
551 return (0);
555 * We rely on check() (MDRNM_CHECK) to make exhaustive checks,
556 * since we don't attempt to undo role_swap() failures.
558 * To implement an undo() function would require each role_swap()
559 * to store a log of previous state of the structures it changes,
560 * presumably anchored by the rendelta.
563 static int
564 check_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
566 md_rendelta_t *r;
567 int rc;
569 ASSERT(family);
570 ASSERT(rtxnp);
572 if (!family || !rtxnp) {
573 /* no error packet to set? */
574 return (EINVAL);
577 for (r = family, rc = 0; r; r = r->next) {
579 ASSERT(UNIT_WRITER_HELD(r->unp));
580 ASSERT(r->txn_stat.locked);
583 * <to> doesn't exist for rename
585 if (!(rtxnp->op == MDRNOP_RENAME &&
586 md_getminor(r->dev) == rtxnp->to.mnum)) {
587 ASSERT(r->uip);
588 r->txn_stat.is_open = md_unit_isopen(r->uip);
592 * if only allowing offline rename/exchanges, check
593 * for top being trans because it opens its sub-devices
596 switch (rtxnp->revision) {
597 case MD_RENAME_VERSION_OFFLINE:
598 if ((r->txn_stat.is_open) &&
599 (!rtxnp->stat.trans_in_stack)) {
600 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
601 md_getminor(r->dev));
602 return (EBUSY);
604 break;
606 case MD_RENAME_VERSION_ONLINE:
607 break;
609 default:
610 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
611 md_getminor(r->dev));
612 return (EINVAL);
615 /* MD_UN_MOD_INPROGRESS includes the MD_UN_RENAMING bit */
617 if (MD_STATUS(r->unp) & MD_UN_MOD_INPROGRESS) {
618 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
619 md_getminor(r->dev));
620 return (EBUSY);
623 MD_STATUS(r->unp) |= MD_UN_RENAMING;
625 if ((rc = (int)(*r->check)(r, rtxnp)) != 0) {
626 return (rc);
629 /* and be sure we can proceed */
630 if (!(r->role_swap)) {
631 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
632 md_getminor(r->dev));
633 return (EINVAL);
635 r->txn_stat.checked = TRUE;
638 return (0);
643 * rename role_swap() functions are responsible for updating their
644 * own parent, self and children references in both on-disk
645 * and in-core structures, as well as storing the changed
646 * record ids into recids and incrementing rec_idx.
649 static void
650 role_swap_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
652 md_rendelta_t *r;
654 ASSERT(family);
655 ASSERT(rtxnp);
657 for (r = family; r; r = r->next) {
658 ASSERT(r->role_swap);
659 ASSERT(r->txn_stat.locked);
660 ASSERT(r->txn_stat.checked);
662 (*r->role_swap)(r, rtxnp);
664 r->txn_stat.role_swapped = TRUE;
668 * there's some work to do, but not more than expected
670 ASSERT(rtxnp->rec_idx > 0);
671 ASSERT(rtxnp->rec_idx < rtxnp->n_recids);
673 if (rtxnp->rec_idx >= rtxnp->n_recids || rtxnp->rec_idx <= 0) {
675 * There's no way to indicate error from here,
676 * and even if we could, there's no undo mechanism.
677 * We've already modified the in-core structs, so
678 * We can't continue w/o committing, but we
679 * don't appear to have anything to commit.
681 cmn_err(CE_PANIC,
682 "md_rename: role_swap_dtree(family:%p, rtxnp:%p)",
683 (void *) family, (void *) rtxnp);
684 return;
686 rtxnp->recids[rtxnp->rec_idx] = 0;
688 mddb_commitrecs_wrapper(rtxnp->recids);
692 * walk down delta tree, calling the unlock service for each device,
693 * provided any of the devices appear to have been locked
695 static void
696 unlock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
698 md_rendelta_t *r;
699 uint_t any_locked = FALSE;
701 ASSERT(family);
702 ASSERT(rtxnp);
704 for (r = family; r; r = r->next) {
706 ASSERT(!(r->txn_stat.unlocked)); /* "has been unlocked" */
707 any_locked |= r->txn_stat.locked;
710 if (any_locked) {
712 /* unwind in reverse order */
713 for (r = family; NULL != r->next; r = r->next) {
714 /* NULL */
717 for (; NULL != r; r = r->prev) {
718 MD_STATUS(r->unp) &= ~MD_UN_RENAMING;
719 ASSERT(r->unlock);
720 r->unlock(r, rtxnp);
721 r->txn_stat.unlocked = TRUE;
727 * MDRNM_UPDATE_SELF
728 * This role swap function is identical for all unit types,
729 * so keep it here. It's also the best example because it
730 * touches all the modified portions of the relevant
731 * in-common structures.
733 static void
734 md_rename_update_self(
735 md_rendelta_t *delta,
736 md_rentxn_t *rtxnp)
738 minor_t from_min, to_min;
739 sv_dev_t sv;
740 mddb_de_ic_t *dep;
741 mddb_rb32_t *rbp;
743 ASSERT(rtxnp);
744 ASSERT(rtxnp->op == MDRNOP_RENAME);
745 ASSERT(delta);
746 ASSERT(delta->unp);
747 ASSERT(delta->uip);
748 ASSERT(rtxnp->rec_idx >= 0);
749 ASSERT(rtxnp->recids);
750 ASSERT(delta->old_role == MDRR_SELF);
751 ASSERT(delta->new_role == MDRR_SELF);
752 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
754 from_min = rtxnp->from.mnum;
755 to_min = rtxnp->to.mnum;
758 * self id changes in our own unit struct
760 MD_SID(delta->unp) = to_min;
763 * make sure that dest always has correct un_revision
764 * and rb_revision
766 delta->unp->c.un_revision |= MD_FN_META_DEV;
767 dep = mddb_getrecdep(MD_RECID(delta->unp));
768 ASSERT(dep);
769 rbp = dep->de_rb;
770 if (rbp->rb_revision & MDDB_REV_RB) {
771 rbp->rb_revision = MDDB_REV_RBFN;
772 } else if (rbp->rb_revision & MDDB_REV_RB64) {
773 rbp->rb_revision = MDDB_REV_RB64FN;
777 * clear old array pointers to unit in-core and unit
780 MDI_VOIDUNIT(from_min) = NULL;
781 MD_VOIDUNIT(from_min) = NULL;
784 * and point the new slots at the unit in-core and unit structs
787 MDI_VOIDUNIT(to_min) = delta->uip;
788 MD_VOIDUNIT(to_min) = delta->unp;
791 * recreate kstats
792 * - destroy the ones associated with our former identity
793 * - reallocate and associate them with our new identity
795 md_kstat_destroy_ui(delta->uip);
796 md_kstat_init_ui(to_min, delta->uip);
799 * the unit in-core reference to the get next link's id changes
802 delta->uip->ui_link.ln_id = to_min;
805 * name space addition of new key was done from user-level
806 * remove the old name's key here
809 sv.setno = MD_MIN2SET(from_min);
810 sv.key = rtxnp->from.key;
812 md_rem_names(&sv, 1);
815 * Remove associated device node as well
817 md_remove_minor_node(from_min);
820 * and store the record id (from the unit struct) into recids
821 * for later commitment by md_rename()
823 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
827 * Either one of our siblings and/or our parent changed identities.
829 static void
830 md_renexch_update_parent(
831 md_rendelta_t *delta,
832 md_rentxn_t *rtxnp)
834 ASSERT(rtxnp);
835 ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE));
836 ASSERT(rtxnp->rec_idx >= 0);
837 ASSERT(rtxnp->recids);
838 ASSERT(delta);
839 ASSERT(delta->unp);
840 ASSERT(delta->old_role == MDRR_CHILD);
841 ASSERT(delta->new_role == MDRR_CHILD);
842 ASSERT((MD_PARENT(delta->unp) == rtxnp->from.mnum) ||
843 (MD_PARENT(delta->unp) == rtxnp->to.mnum));
845 if (MD_PARENT(delta->unp) == rtxnp->from.mnum) {
846 MD_PARENT(delta->unp) = rtxnp->to.mnum;
849 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
853 * exchange up (child->self)
855 static void
856 md_exchange_child_update_to(
857 md_rendelta_t *delta,
858 md_rentxn_t *rtxnp)
860 minor_t from_min, to_min;
862 ASSERT(rtxnp);
863 ASSERT(rtxnp->op == MDRNOP_EXCHANGE);
864 ASSERT(rtxnp->rec_idx >= 0);
865 ASSERT(rtxnp->recids);
866 ASSERT(delta);
867 ASSERT(delta->unp);
868 ASSERT(delta->uip);
869 ASSERT(delta->old_role == MDRR_CHILD);
870 ASSERT(delta->new_role == MDRR_SELF);
871 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
873 from_min = rtxnp->from.mnum;
874 to_min = rtxnp->to.mnum;
877 * self id changes in our own unit struct
878 * Note:
879 * - Since we're assuming the identity of "from" we use its mnum even
880 * though we're updating the "to" structures.
883 MD_SID(delta->unp) = from_min;
886 * our parent identifier becomes the new self, who was "to"
889 MD_PARENT(delta->unp) = to_min;
892 * point the set array pointers at the "new" unit and unit in-cores
893 * Note:
894 * - The other half of this transfer is done in the "update from"
895 * rename/exchange named service.
898 MD_VOIDUNIT(from_min) = delta->unp;
899 MDI_VOIDUNIT(from_min) = delta->uip;
902 * transfer kstats
905 delta->uip->ui_kstat = rtxnp->from.kstatp;
908 * the unit in-core reference to the get next link's id changes
911 delta->uip->ui_link.ln_id = from_min;
914 * name space additions, if necessary, were done from user-level.
915 * name space deletions, if necessary, were done in "exchange_from"
919 * and store the record id (from the unit struct) into recids
920 * for later comitment by md_rename()
923 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
927 * exchange up (self->parent)
929 static void
930 md_exchange_self_update_from_up(
931 md_rendelta_t *delta,
932 md_rentxn_t *rtxnp)
934 minor_t from_min, to_min;
936 ASSERT(rtxnp);
937 ASSERT(rtxnp->op == MDRNOP_EXCHANGE);
938 ASSERT(rtxnp->rec_idx >= 0);
939 ASSERT(rtxnp->recids);
940 ASSERT(delta);
941 ASSERT(delta->unp);
942 ASSERT(delta->uip);
943 ASSERT(delta->old_role == MDRR_SELF);
944 ASSERT(delta->new_role == MDRR_PARENT);
945 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
947 from_min = rtxnp->from.mnum;
948 to_min = rtxnp->to.mnum;
951 * self id changes in our own unit struct
952 * Note:
953 * - Since we're assuming the identity of "to" we use its mnum
954 * while we're updating the "to" structures.
957 MD_SID(delta->unp) = to_min;
960 * our parent identifier becomes the new parent, who was "from"
963 MD_PARENT(delta->unp) = from_min;
966 * point the set array pointers at the "new" unit and unit in-cores
967 * Note:
968 * - The other half of this transfer is done in the "update from"
969 * rename/exchange named service.
972 MD_VOIDUNIT(to_min) = delta->unp;
973 MDI_VOIDUNIT(to_min) = delta->uip;
976 * transfer kstats
979 delta->uip->ui_kstat = rtxnp->to.kstatp;
982 * the unit in-core reference to the get next link's id changes
985 delta->uip->ui_link.ln_id = to_min;
988 * name space additions, if necessary, were done from user-level.
989 * name space deletions, if necessary, were done in "exchange_from"
993 * and store the record id (from the unit struct) into recids
994 * for later comitment by md_rename()
997 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
1001 * The order of the called role swap functions is critical.
1002 * If they're not ordered as "all parents", then "all self"
1003 * then "all child" transitions, we will almost certainly
1004 * corrupt the data base and the in-core linkages. So,
1005 * verify that the list built by the individual drivers is
1006 * ok here.
1008 * We could have done fancy bit encodings of the roles so
1009 * it all fit into a single word and we wouldn't need the
1010 * prev_ord field. But, since cpu power is cheaper than
1011 * than people power, they're all separate for easier
1012 * debugging and maintaining. (In the unlikely event that
1013 * rename/exchange ever becomes cpu-limited, and this
1014 * algorithm is the bottleneck, we should revisit this.)
1017 static bool_t
1018 role_swap_is_valid(
1019 int previous,
1020 int current,
1021 md_rendelta_t *delta,
1022 md_rentxn_t *rtxnp)
1024 bool_t valid = FALSE;
1027 * we've backed up in processing the role table
1029 if ((previous > current) &&
1030 (delta->prev && (delta->old_role != delta->prev->old_role))) {
1031 goto out;
1035 * we're repeating the same role transition
1037 if (previous == current) {
1038 switch (delta->old_role) {
1039 case MDRR_PARENT:
1041 * require at least one of the devices to
1042 * be multiparented for us to allow another
1043 * parent transition
1045 if ((MD_MULTI_PARENT != MD_PARENT(rtxnp->from.unp)) &&
1046 (MD_MULTI_PARENT != MD_PARENT(rtxnp->to.unp))) {
1047 goto out;
1049 break;
1051 case MDRR_CHILD:
1052 /* it's ok to have multiple children */
1053 break;
1055 case MDRR_SELF:
1056 /* it's never ok to have multiple self transitions */
1057 /* FALLTHROUGH */
1058 default:
1059 goto out;
1063 valid = TRUE;
1064 out:
1065 if (!valid) {
1066 if (md_rename_debug != 0) {
1067 cmn_err(CE_NOTE, "previous: %d, current: %d, role: %s",
1068 previous, current,
1069 ROLE(delta->old_role));
1070 delay(3*drv_usectohz(1000000));
1071 ASSERT(FALSE);
1075 return (valid);
1078 static role_change_tab_t *
1079 lookup_role(md_renrole_t old_role, md_renrole_t new_role)
1081 role_change_tab_t *rp;
1082 role_change_tab_t *found = NULL;
1084 for (rp = role_swap_tab; !found && (rp->old_role != MDRR_UNK); rp++) {
1086 if (rp->old_role == old_role && rp->new_role == new_role) {
1087 found = rp;
1091 * we require a named svc if we've got two devices
1092 * claiming to be changing roles in this manner
1094 ASSERT(found);
1095 ASSERT(found->default_svc != ILLEGAL_ROLESWAP_SVC);
1096 ASSERT(found->svc_name != ILLEGAL_SVC_NAME);
1098 if (!found ||
1099 (found->default_svc == ILLEGAL_ROLESWAP_SVC) ||
1100 (found->svc_name == ILLEGAL_SVC_NAME)) {
1101 return (NULL);
1104 return (found);
1108 * fill in the role swap named svc., now that we know each device
1109 * and its changing role
1111 static int
1112 valid_roleswap_dtree(
1113 md_rendelta_t *family,
1114 md_rentxn_t *rtxnp
1117 md_rendelta_t *r;
1118 role_change_tab_t *rolep;
1119 minor_t from_min, to_min;
1120 int prev_ord = -1;
1121 bool_t found_self = FALSE;
1122 int err = 0;
1124 ASSERT(family);
1125 ASSERT(rtxnp);
1127 from_min = rtxnp->from.mnum;
1128 to_min = rtxnp->to.mnum;
1130 for (r = family; r; r = r->next, prev_ord = rolep->ord) {
1132 if (!(rolep = lookup_role(r->old_role, r->new_role))) {
1133 (void) mdmderror(&rtxnp->mde,
1134 MDE_RENAME_CONFIG_ERROR, from_min);
1135 err = EOPNOTSUPP;
1136 goto out;
1138 r->role_swap = (md_ren_roleswap_svc_t *)md_get_named_service(
1139 r->dev, /* modindex */ 0,
1140 (char *)rolep->svc_name,
1141 (intptr_t (*)()) rolep->default_svc);
1144 * someone probably called the ioctl directly and
1145 * incorrectly, rather than via the libmeta wrappers
1147 if (!(r->role_swap)) {
1148 (void) mdmderror(&rtxnp->mde,
1149 MDE_RENAME_TARGET_UNRELATED, to_min);
1150 err = EOPNOTSUPP;
1151 goto out;
1154 if (!role_swap_is_valid(prev_ord, rolep->ord, r, rtxnp)) {
1155 (void) mdmderror(&rtxnp->mde,
1156 MDE_RENAME_CONFIG_ERROR, from_min);
1157 err = EINVAL;
1158 goto out;
1161 if (rolep->old_role == MDRR_SELF) {
1162 found_self = TRUE;
1165 if (MD_PARENT(r->unp) == MD_MULTI_PARENT) {
1166 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
1167 md_getminor(r->dev));
1168 err = EINVAL;
1169 goto out;
1174 * must be at least one selfish device
1176 ASSERT(found_self);
1177 if (!found_self) {
1178 (void) mdmderror(&rtxnp->mde,
1179 MDE_RENAME_CONFIG_ERROR, from_min);
1180 err = EINVAL;
1181 goto out;
1184 out:
1185 return (err);
1189 * dump contents of rename transaction
1191 static void
1192 dump_txn(md_rentxn_t *rtxnp) {
1194 if (md_rename_debug == 0) {
1195 return;
1198 cmn_err(CE_NOTE, "rtxnp: %p", (void *) rtxnp);
1199 if (rtxnp) {
1200 cmn_err(CE_NOTE, "beginning: %llx, op: %s",
1201 rtxnp->beginning, OP_STR(rtxnp->op));
1203 cmn_err(CE_NOTE,
1204 "revision: %d, uflags: %d, rec_idx: %d, n_recids: %d, rec_ids: %p%s",
1205 rtxnp->revision, rtxnp->uflags,
1206 rtxnp->rec_idx, rtxnp->n_recids, (void *) rtxnp->recids,
1207 rtxnp->stat.trans_in_stack? " (trans in stack)": "");
1208 cmn_err(CE_NOTE, " from: beginning: %llx",
1209 rtxnp->from.beginning);
1210 cmn_err(CE_NOTE, " minor: %lX, key: %lX",
1211 (ulong_t)rtxnp->from.mnum, (ulong_t)rtxnp->from.key);
1212 cmn_err(CE_NOTE, " unp: %lX, uip: %lX",
1213 (ulong_t)rtxnp->from.unp, (ulong_t)rtxnp->from.uip);
1214 cmn_err(CE_NOTE, " end: %llx", rtxnp->from.end);
1215 cmn_err(CE_NOTE, " to: beginning: %llx", rtxnp->to.beginning);
1216 cmn_err(CE_NOTE, " minor: %lX, key: %lX",
1217 (ulong_t)rtxnp->to.mnum, (ulong_t)rtxnp->to.key);
1218 cmn_err(CE_NOTE, " unp: %lX, uip: %lX",
1219 (ulong_t)rtxnp->to.unp, (ulong_t)rtxnp->to.uip);
1220 cmn_err(CE_NOTE, " end: %llx", rtxnp->to.end);
1221 cmn_err(CE_NOTE, "end: %llx\n", rtxnp->end);
1223 delay(drv_usectohz(1000000));
1227 * dump contents of all deltas
1229 static void
1230 dump_dtree(md_rendelta_t *family)
1232 md_rendelta_t *r;
1233 int i;
1235 if (md_rename_debug == 0) {
1236 return;
1239 for (r = family, i = 0; r; r = r->next, i++) {
1240 cmn_err(CE_NOTE, "%d. beginning: %llx", i, r->beginning);
1241 cmn_err(CE_NOTE, " r: %lX, dev: %lX, next: %lx, prev: %lx",
1242 (ulong_t)r, (ulong_t)r->dev,
1243 (ulong_t)r->next, (ulong_t)r->prev);
1245 cmn_err(CE_NOTE, " role: %s -> %s, unp: %lx, uip: %lx",
1246 ROLE(r->old_role), ROLE(r->new_role),
1247 (ulong_t)r->unp, (ulong_t)r->uip);
1248 cmn_err(CE_NOTE,
1249 " lock: %lx, unlock: %lx\n\t check: %lx, role_swap: %lx",
1250 (ulong_t)r->lock, (ulong_t)r->unlock,
1251 (ulong_t)r->check, (ulong_t)r->role_swap);
1252 if (*((uint_t *)(&r->txn_stat)) != 0) {
1253 cmn_err(CE_NOTE, "status: (0x%x) %s%s%s%s%s",
1254 *((uint_t *)(&r->txn_stat)),
1255 r->txn_stat.is_open? "is_open " : "",
1256 r->txn_stat.locked? "locked " : "",
1257 r->txn_stat.checked? "checked " : "",
1258 r->txn_stat.role_swapped? "role_swapped " : "",
1259 r->txn_stat.unlocked? "unlocked" : "");
1261 cmn_err(CE_NOTE, "end: %llx\n", r->end);
1263 delay(drv_usectohz(1000000));
1267 * validate the rename request parameters
1269 static int
1270 validate_txn_parms(md_rentxn_t *rtxnp)
1272 minor_t to_min, from_min;
1274 ASSERT(rtxnp);
1276 from_min = rtxnp->from.mnum;
1277 to_min = rtxnp->to.mnum;
1279 switch (rtxnp->revision) {
1280 case MD_RENAME_VERSION_OFFLINE:
1281 if (rtxnp->uflags != 0) {
1282 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1283 from_min);
1284 return (ENOTSUP);
1286 break;
1288 case MD_RENAME_VERSION_ONLINE:
1289 /* not supported until 5.0 */
1290 /* FALLTHROUGH */
1292 default:
1293 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1294 from_min);
1295 return (EPROTONOSUPPORT);
1298 if ((rtxnp->from.uip = MDI_UNIT(from_min)) == NULL) {
1299 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
1300 return (ENODEV);
1303 if (!md_dev_exists(md_makedevice(md_major, from_min))) {
1304 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
1305 return (ENODEV);
1308 if ((rtxnp->from.key == MD_KEYBAD) || (rtxnp->from.key == MD_KEYWILD)) {
1309 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, from_min);
1310 return (EINVAL);
1313 rtxnp->from.kstatp = rtxnp->from.uip->ui_kstat;
1314 rtxnp->from.unp = MD_UNIT(from_min);
1316 if (MD_MIN2SET(to_min) != MD_MIN2SET(from_min)) {
1317 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min);
1318 return (EINVAL);
1321 switch (rtxnp->op) {
1322 case MDRNOP_EXCHANGE:
1323 rtxnp->to.unp = MD_UNIT(to_min);
1324 rtxnp->to.uip = MDI_UNIT(to_min);
1327 * exchange requires target to exist
1330 if ((rtxnp->to.uip == NULL) ||
1331 (md_dev_exists(md_makedevice(md_major, to_min)) == NULL)) {
1332 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP,
1333 to_min);
1334 return (ENODEV);
1337 if ((rtxnp->to.key == MD_KEYBAD) ||
1338 (rtxnp->to.key == MD_KEYWILD)) {
1339 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min);
1340 return (EINVAL);
1344 * <from> is not in the role of <self>,
1345 * that is,
1346 * <from> has a parent, which is <to> and <to> has a parent too
1347 * or
1348 * <to> has a parent, which is <from> and <to> can have a child
1350 if ((MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) &&
1351 (MD_PARENT(rtxnp->from.unp) == to_min) &&
1352 MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) {
1353 (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER,
1354 from_min);
1355 return (EINVAL);
1358 if ((MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) &&
1359 (MD_PARENT(rtxnp->to.unp) == from_min) &&
1360 (MD_CAPAB(rtxnp->to.unp) & MD_CAN_META_CHILD)) {
1361 (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER,
1362 from_min);
1363 return (EINVAL);
1366 rtxnp->to.kstatp = rtxnp->to.uip->ui_kstat;
1367 break;
1369 case MDRNOP_RENAME:
1372 * rename requires <to> not to exist
1375 if (MDI_UNIT(to_min) ||
1376 md_dev_exists(md_makedevice(md_major, to_min))) {
1378 (void) mdmderror(&rtxnp->mde, MDE_UNIT_ALREADY_SETUP,
1379 to_min);
1380 return (EEXIST);
1384 * and to be within valid ranges for the current
1385 * limits on number of sets and metadevices
1387 if ((MD_MIN2SET(to_min) >= md_nsets) ||
1388 (MD_MIN2UNIT(to_min) >= md_nunits)) {
1389 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min);
1390 return (EINVAL);
1393 rtxnp->to.unp = NULL;
1394 rtxnp->to.uip = NULL;
1395 rtxnp->to.kstatp = NULL;
1396 break;
1398 default:
1399 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1400 from_min);
1401 return (EINVAL);
1405 * install guard rails
1407 rtxnp->beginning = TXN_BEG;
1409 rtxnp->from.beginning = TXNUN_BEG;
1410 rtxnp->from.end = TXNUN_END;
1412 rtxnp->to.beginning = TXNUN_BEG;
1413 rtxnp->to.end = TXNUN_END;
1415 rtxnp->end = TXN_END;
1417 return (0);
1421 * If the device being changed exhibits this capability, set the list
1422 * relatives function pointer to the named service that lists the
1423 * appropriate relatives for this capability.
1425 static int
1426 set_list_rels_funcp(
1427 md_rentxn_t *rtxnp,
1428 md_stackcap_t capability,
1429 char *svc_name,
1430 md_ren_list_svc_t default_svc_func,
1431 md_ren_list_svc_t **list_relatives_funcp
1434 int err;
1435 minor_t from_min;
1436 md_dev64_t from_dev;
1437 md_unit_t *from_un;
1438 mdi_unit_t *from_ui;
1440 ASSERT(rtxnp);
1441 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
1442 ASSERT(list_relatives_funcp);
1444 from_min = rtxnp->from.mnum;
1445 from_dev = md_makedevice(md_major, from_min);
1446 from_un = MD_UNIT(from_min);
1447 from_ui = MDI_UNIT(from_min);
1448 err = 0;
1450 if (!from_ui || !from_un) {
1451 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
1452 err = EINVAL;
1453 goto out;
1456 if ((capability == MD_CAN_DO_ANYTHING) ||
1457 ((MD_CAPAB(from_un) & capability) == capability)) {
1459 *list_relatives_funcp = (md_ren_list_svc_t *)
1460 md_get_named_service(from_dev,
1461 /* modindex */ 0, svc_name,
1462 (intptr_t (*)()) default_svc_func);
1464 ASSERT(*list_relatives_funcp);
1465 if (!(*list_relatives_funcp)) {
1466 (void) mdmderror(&rtxnp->mde,
1467 MDE_RENAME_CONFIG_ERROR, from_min);
1468 err = EINVAL;
1469 goto out;
1471 } else {
1472 *list_relatives_funcp = (md_ren_list_svc_t *)NULL;
1475 out:
1476 return (err);
1480 * call list relations function, bump recid counter
1481 * by number of members added to the delta list.
1482 * Validate that the number of members added is within bounds.
1484 static int
1485 list_relations(
1486 md_rendelta_t **family,
1487 md_rentxn_t *rtxnp,
1488 md_ren_list_svc_t *add_relatives_funcp,
1489 int valid_min,
1490 int valid_max
1493 int n_added;
1494 int err = 0;
1496 ASSERT(family);
1497 ASSERT(rtxnp);
1499 if (!family || !rtxnp) {
1500 err = EINVAL;
1501 goto out;
1504 n_added = 0;
1506 /* no relations of this type */
1507 if (!add_relatives_funcp) {
1508 goto out;
1511 n_added = (*add_relatives_funcp) (family, rtxnp);
1513 if ((n_added < valid_min) || (n_added > valid_max)) {
1514 if (mdisok(&rtxnp->mde)) {
1515 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1516 rtxnp->from.mnum);
1518 err = EINVAL;
1519 goto out;
1522 rtxnp->n_recids += n_added;
1524 out:
1525 return (err);
1529 * build recid array
1531 static int
1532 alloc_recids(md_rendelta_t *family, md_rentxn_t *rtxnp)
1534 int err = 0;
1536 if (!family || !rtxnp) {
1537 err = ENOMEM;
1538 goto out;
1541 rtxnp->rec_idx = 0;
1543 if (rtxnp->n_recids == 0) {
1544 err = EINVAL;
1545 goto out;
1548 rtxnp->n_recids += 1; /* terminator */
1550 rtxnp->recids = kmem_alloc(sizeof (mddb_recid_t) * rtxnp->n_recids,
1551 KM_SLEEP);
1552 if (!(rtxnp->recids)) {
1553 err = ENOMEM;
1554 goto out;
1557 bzero((void *) rtxnp->recids,
1558 (sizeof (mddb_recid_t) * rtxnp->n_recids));
1559 out:
1560 if (err != 0) {
1561 (void) mdsyserror(&rtxnp->mde, err);
1564 return (err);
1568 * build family tree (parent(s), self, children)
1569 * The order of the resultant list is important, as it governs
1570 * the order of locking, checking and changing the unit structures.
1571 * Since we'll be changing them, we may not use the MD_UNIT, MDI_UNIT,
1572 * and other pointer which depend on the array being correct.
1573 * Use only the cached pointers (in rtxnp.)
1575 static md_rendelta_t *
1576 build_dtree(md_rentxn_t *rtxnp)
1578 md_ren_list_svc_t *add_folks, *add_self, *add_kids;
1579 int err;
1580 md_rendelta_t *family = NULL;
1582 ASSERT(rtxnp);
1583 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
1585 err = set_list_rels_funcp(rtxnp, MD_CAN_PARENT, MDRNM_LIST_URFOLKS,
1586 md_rename_listfolks, &add_folks);
1588 if (err) {
1589 goto out;
1592 err = set_list_rels_funcp(rtxnp, MD_CAN_DO_ANYTHING, MDRNM_LIST_URSELF,
1593 md_rename_listself, &add_self);
1594 if (err) {
1595 goto out;
1598 err = set_list_rels_funcp(rtxnp, MD_CAN_META_CHILD, MDRNM_LIST_URKIDS,
1599 /* no default list func */ ((int (*)()) NULL),
1600 &add_kids);
1601 if (err) {
1602 goto out;
1605 rtxnp->n_recids = 0; /* accumulated by list_relations() */
1607 if ((err = list_relations(&family, rtxnp, add_folks, 0, 1)) != 0) {
1608 goto out;
1611 if ((err = list_relations(&family, rtxnp, add_self, 1, 1)) != 0) {
1612 goto out;
1615 err = list_relations(&family, rtxnp, add_kids, 0, md_nunits);
1616 if (err != 0) {
1617 goto out;
1621 * delta tree is still empty?
1623 if ((!family) || (rtxnp->n_recids == 0)) {
1624 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1625 rtxnp->from.mnum);
1626 err = EINVAL;
1627 goto out;
1631 * verify role change interactions
1633 if ((err = valid_roleswap_dtree(family, rtxnp)) != 0) {
1634 goto out;
1637 if ((err = alloc_recids(family, rtxnp)) != 0) {
1638 goto out;
1641 out:
1642 if (err != 0) {
1643 free_dtree(family);
1644 dump_dtree(family); /* yes, after freeing it */
1645 family = NULL;
1648 return (family);
1653 * (MD_IOCRENAME) rename/exchange ioctl entry point
1654 * calls individual driver named service entry points
1655 * to build a list of devices which need state changed,
1656 * to verify that they're in a state where renames may occur,
1657 * and to modify themselves into their new identities
1661 md_rename(
1662 md_rename_t *mrp,
1663 IOLOCK *iolockp)
1665 md_rendelta_t *family = NULL;
1666 md_rentxn_t rtxn;
1667 int err = 0;
1668 set_t setno;
1669 mdc_unit_t *mdc;
1671 ASSERT(iolockp);
1672 if (mrp == NULL)
1673 return (EINVAL);
1675 setno = MD_MIN2SET(mrp->from.mnum);
1676 if (setno >= md_nsets) {
1677 return (EINVAL);
1681 * Early exit if top is eof trans
1683 mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT(mrp->from.mnum)];
1684 while (mdc != NULL) {
1685 if (!MD_HAS_PARENT(mdc->un_parent)) {
1686 break;
1687 } else {
1688 mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT
1689 (mdc->un_parent)];
1693 if (mdc && mdc->un_type == MD_METATRANS) {
1694 return (EINVAL);
1698 mdclrerror(&mrp->mde);
1700 bzero((void *) &rtxn, sizeof (md_rentxn_t));
1701 mdclrerror(&rtxn.mde);
1704 * encapsulate user parameters
1706 rtxn.from.key = mrp->from.key;
1707 rtxn.to.key = mrp->to.key;
1708 rtxn.from.mnum = mrp->from.mnum;
1709 rtxn.to.mnum = mrp->to.mnum;
1710 rtxn.op = mrp->op;
1711 rtxn.uflags = mrp->flags;
1712 rtxn.revision = mrp->revision;
1714 if (MD_MIN2UNIT(mrp->to.mnum) >= md_nunits) {
1715 err = EINVAL;
1716 goto cleanup;
1720 * catch this early, before taking any locks
1722 if (md_get_setstatus(setno) & MD_SET_STALE) {
1723 (void) (mdmddberror(&rtxn.mde, MDE_DB_STALE, rtxn.from.mnum,
1724 MD_MIN2SET(rtxn.from.mnum)));
1725 err = EROFS;
1726 goto cleanup;
1730 * Locking and re-validation (of the per-unit state) is
1731 * done by the rename lock/unlock service, for now only take
1732 * the array lock.
1734 md_array_writer(iolockp);
1737 * validate the rename/exchange parameters
1738 * rtxn is filled in on succesful completion of validate_txn_parms()
1740 if ((err = validate_txn_parms(&rtxn)) != 0) {
1741 goto cleanup;
1745 * build list of work to do, the "delta tree" for related devices
1747 if (!(family = build_dtree(&rtxn))) {
1748 err = ENOMEM;
1749 goto cleanup;
1751 dump_txn(&rtxn);
1752 dump_dtree(family);
1754 if ((err = lock_dtree(family, &rtxn)) != 0) {
1755 goto cleanup;
1758 if ((err = check_dtree(family, &rtxn)) != 0) {
1759 goto cleanup;
1761 dump_txn(&rtxn);
1763 role_swap_dtree(family, &rtxn); /* commits the recids */
1766 * let folks know
1768 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_SRC, SVM_TAG_METADEVICE,
1769 MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum);
1770 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_DST, SVM_TAG_METADEVICE,
1771 MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum);
1773 cleanup:
1775 if (err != 0 && mdisok(&rtxn.mde)) {
1776 (void) mdsyserror(&rtxn.mde, EINVAL);
1779 if (family) {
1780 unlock_dtree(family, &rtxn);
1781 free_dtree(family);
1782 dump_dtree(family);
1783 family = NULL;
1786 if (rtxn.recids && (rtxn.n_recids > 0)) {
1787 kmem_free(rtxn.recids, sizeof (mddb_recid_t) * rtxn.n_recids);
1790 if (!mdisok(&rtxn.mde)) {
1791 (void) mdstealerror(&mrp->mde, &rtxn.mde);
1794 return (0); /* success/failure will be communicated via rtxn.mde */
1797 static role_change_tab_t
1798 role_swap_tab[] =
1801 1, /* ordinal */
1802 MDRR_PARENT, /* old role */
1803 MDRR_PARENT, /* new role */
1804 MDRNM_UPDATE_KIDS, /* named service */
1805 NO_DEFAULT_ROLESWAP_SVC /* default role swap function */
1809 MDRR_PARENT,
1810 MDRR_SELF,
1811 MDRNM_PARENT_UPDATE_TO,
1812 NO_DEFAULT_ROLESWAP_SVC
1816 MDRR_PARENT,
1817 MDRR_CHILD,
1818 ILLEGAL_SVC_NAME,
1819 ILLEGAL_ROLESWAP_SVC
1823 MDRR_SELF,
1824 MDRR_PARENT,
1825 MDRNM_SELF_UPDATE_FROM_UP,
1826 md_exchange_self_update_from_up
1830 MDRR_SELF,
1831 MDRR_SELF,
1832 MDRNM_UPDATE_SELF,
1833 md_rename_update_self
1837 MDRR_SELF,
1838 MDRR_CHILD,
1839 MDRNM_SELF_UPDATE_FROM_DOWN,
1840 NO_DEFAULT_ROLESWAP_SVC
1844 MDRR_CHILD,
1845 MDRR_PARENT,
1846 ILLEGAL_SVC_NAME,
1847 ILLEGAL_ROLESWAP_SVC
1851 MDRR_CHILD,
1852 MDRR_SELF,
1853 MDRNM_CHILD_UPDATE_TO,
1854 md_exchange_child_update_to
1858 MDRR_CHILD,
1859 MDRR_CHILD,
1860 MDRNM_UPDATE_FOLKS,
1861 md_renexch_update_parent
1864 /* terminator is old_role == MDRR_UNK */
1867 MDRR_UNK,
1868 MDRR_UNK,
1869 ILLEGAL_SVC_NAME,
1870 NO_DEFAULT_ROLESWAP_SVC