4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Just in case we're not in a build environment, make sure that
29 * TEXT_DOMAIN gets set to something.
31 #if !defined(TEXT_DOMAIN)
32 #define TEXT_DOMAIN "SYS_TEST"
36 * Metadevice diskset interfaces
39 #include "meta_set_prv.h"
41 #include <sys/lvm/md_crc.h>
51 md_replicalist_t
*rlp
= NULL
;
55 if (metareplicalist(sp
, MD_FULLNAME_ONLY
, &rlp
, ep
) < 0)
58 for (rl
= rlp
; rl
!= NULL
; rl
= rl
->rl_next
) {
59 md_replica_t
*r
= rl
->rl_repp
;
62 * This is not the first replica being added to the
63 * diskset so call with ADDSIDENMS_BCAST. If this
64 * is a traditional diskset, the bcast flag is ignored
65 * since traditional disksets don't use the rpc.mdcommd.
67 if (meta_db_addsidenms(sp
, r
->r_namep
, r
->r_blkno
,
68 DB_ADDSIDENMS_BCAST
, ep
)) {
75 metafreereplicalist(rlp
);
93 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
96 if ((dd
= metaget_drivedesc(sp
, MD_FULLNAME_ONLY
, ep
)) == NULL
) {
103 genid
= sd
->sd_genid
- 1;
105 for (i
= 0; i
< node_c
; i
++) {
106 if (clnt_adddrvs(node_v
[i
], sp
, dd
, now
, genid
, ep
) == -1)
114 add_md_sidenms(mdsetname_t
*sp
, side_t sideno
, side_t otherside
, md_error_t
*ep
)
124 (void) memset(&nm
, '\0', sizeof (nm
));
127 if (!metaislocalset(sp
)) {
128 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
131 /* Use rpc.mdcommd to add md side info from all nodes */
132 if ((! metaislocalset(sp
)) && MD_MNSET_DESC(sd
) &&
133 (sd
->sd_mn_mynode
->nd_flags
& MD_MN_NODE_OWN
)) {
134 md_mn_result_t
*resultp
= NULL
;
135 md_mn_msg_meta_md_addside_t md_as
;
138 md_as
.msg_sideno
= sideno
;
139 md_as
.msg_otherside
= otherside
;
141 * If reconfig cycle has been started, this node is stuck in
142 * in the return step until this command has completed. If
143 * mdcommd is suspended, ask send_message to fail (instead of
144 * retrying) so that metaset can finish allowing the
145 * reconfig cycle to proceed.
147 send_rval
= mdmn_send_message(sp
->setno
,
148 MD_MN_MSG_META_MD_ADDSIDE
,
149 MD_MSGF_FAIL_ON_SUSPEND
| MD_MSGF_PANIC_WHEN_INCONSISTENT
,
150 0, (char *)&md_as
, sizeof (md_mn_msg_meta_md_addside_t
),
152 if (send_rval
!= 0) {
153 (void) mdstealerror(ep
, &(resultp
->mmr_ep
));
155 free_result(resultp
);
159 free_result(resultp
);
166 nm
.mde
= mdnullerror
;
167 nm
.setno
= sp
->setno
;
169 if (metaioctl(MD_IOCNXTKEY_NM
, &nm
, &nm
.mde
, NULL
) != 0)
170 return (mdstealerror(ep
, &nm
.mde
));
172 if (nm
.key
== MD_KEYWILD
)
176 * Okay we have a valid key
177 * Let's see if it is hsp or not
179 nm
.devname
= (uintptr_t)meta_getnmentbykey(sp
->setno
,
180 otherside
, nm
.key
, &drvnm
, NULL
, NULL
, ep
);
181 if (nm
.devname
== NULL
|| drvnm
== NULL
) {
183 Free((void *)(uintptr_t)nm
.devname
);
185 Free((void *)(uintptr_t)drvnm
);
190 * If it is hsp add here
192 if (strcmp(drvnm
, MD_HOTSPARES
) == 0) {
193 if (add_name(sp
, sideno
, nm
.key
, MD_HOTSPARES
,
194 minor(NODEV
), (char *)(uintptr_t)nm
.devname
,
195 NULL
, NULL
, ep
) == -1) {
196 Free((void *)(uintptr_t)nm
.devname
);
197 Free((void *)(uintptr_t)drvnm
);
200 Free((void *)(uintptr_t)nm
.devname
);
201 Free((void *)(uintptr_t)drvnm
);
207 if (MD_MNSET_DESC(sd
)) {
210 tmp_sideno
= sideno
- 1;
213 if ((done
= meta_getnextside_devinfo(sp
,
214 (char *)(uintptr_t)nm
.devname
, &tmp_sideno
,
215 &cname
, &dname
, &mnum
, ep
)) == -1) {
216 Free((void *)(uintptr_t)nm
.devname
);
221 Free((void *)(uintptr_t)nm
.devname
);
222 Free((void *)(uintptr_t)drvnm
);
225 * The device reference count can be greater than 1 if
226 * more than one softpart is configured on top of the
227 * same device. If this is the case then we want to
228 * increment the count to sync up with the other sides.
230 for (i
= 0; i
< nm
.ref_count
; i
++) {
231 if (add_name(sp
, sideno
, nm
.key
, dname
, mnum
,
232 cname
, NULL
, NULL
, ep
) == -1)
248 check_setdrvs_againstnode(mdsetname_t
*sp
, char *node
, md_error_t
*ep
)
251 md_drive_desc
*dd
, *ddp
;
253 if ((dd
= metaget_drivedesc(sp
, MD_FULLNAME_ONLY
, ep
)) == NULL
)
257 for (ddp
= dd
; ddp
!= NULL
; ddp
= ddp
->dd_next
) {
260 if (checkdrive_onnode(sp
, dp
, node
, ep
))
268 create_multinode_set_on_hosts(
270 int node_c
, /* Number of new nodes */
271 char **node_v
, /* Nodes which are being added */
281 md_mnnode_desc
*nd
, *ndm
= NULL
;
282 md_mnnode_desc
*nd_prev
, *nd_curr
;
284 mndiskset_membershiplist_t
*nl
, *nl2
;
287 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
290 genid
= sd
->sd_genid
- 1;
294 sd
= Zalloc(sizeof (*sd
));
296 if (meta_gettimeofday(&now
) == -1) {
297 (void) mdsyserror(ep
, errno
,
298 dgettext(TEXT_DOMAIN
, "meta_gettimeofday()"));
303 /* Put the new entries into the set */
305 * Get membershiplist from API routine. If there's
306 * an error, fail to create set and pass back error.
308 if (meta_read_nodelist(&nodecnt
, &nl
, ep
) == -1) {
314 * meta_set_addhosts has already verified that
315 * this node list is in the membership list
317 * Since this is a new set, all hosts being
318 * added are new to the set, so also set ADD flag.
320 for (i
= 0; i
< node_c
; i
++) {
321 nd
= Zalloc(sizeof (*nd
));
322 (void) strcpy(nd
->nd_nodename
, node_v
[i
]);
324 nd
->nd_flags
= (MD_MN_NODE_ALIVE
|
328 if (strcmp(nl2
->msl_node_name
,
330 nd
->nd_nodeid
= nl2
->msl_node_id
;
331 (void) strcpy(nd
->nd_priv_ic
,
339 * Nodelist must be kept in ascending
342 if (sd
->sd_nodelist
== NULL
) {
343 /* Nothing in list, just add it */
344 sd
->sd_nodelist
= nd
;
345 } else if (nd
->nd_nodeid
< sd
->sd_nodelist
->nd_nodeid
) {
346 /* Add to head of list */
347 nd
->nd_next
= sd
->sd_nodelist
;
348 sd
->sd_nodelist
= nd
;
350 nd_curr
= sd
->sd_nodelist
->nd_next
;
351 nd_prev
= sd
->sd_nodelist
;
352 /* Search for place ot add it */
355 nd_curr
->nd_nodeid
) {
356 /* Add before nd_curr */
357 nd
->nd_next
= nd_curr
;
358 nd_prev
->nd_next
= nd
;
362 nd_curr
= nd_curr
->nd_next
;
364 /* Add to end of list */
365 if (nd_curr
== NULL
) {
366 nd_prev
->nd_next
= nd
;
370 /* Set master to be first node added */
375 meta_free_nodelist(nl
);
377 * Creating mnset for first time.
378 * Set master to be invalid until first drive is
381 (void) strcpy(sd
->sd_mn_master_nodenm
, "");
382 sd
->sd_mn_master_nodeid
= MD_MN_INVALID_NID
;
383 sd
->sd_mn_masternode
= ndm
;
385 genid
= sd
->sd_genid
= 0;
388 /* Create the set where needed */
389 for (i
= 0; i
< node_c
; i
++) {
391 * Create the set on each new node. If the set already
392 * exists, then the node list being created on each new node
393 * is the current node list from before the new nodes
394 * were added. If the set doesn't exist, then the node
395 * list being created on each new node is the entire
398 if (clnt_mncreateset(node_v
[i
], sp
, sd
->sd_nodelist
,
399 now
, genid
, sd
->sd_mn_master_nodenm
,
400 sd
->sd_mn_master_nodeid
, ep
) == -1) {
408 nd
= sd
->sd_nodelist
;
410 sd
->sd_nodelist
= nd
->nd_next
;
412 nd
= sd
->sd_nodelist
;
417 if (rval
!= 0 || new_set
)
421 * Add the drive records to the new sets
422 * and names for the new sides.
424 return (add_drvs_to_hosts(sp
, node_c
, node_v
, ep
));
429 create_traditional_set_on_hosts(
431 int node_c
, /* Number of new nodes */
432 char **node_v
, /* Nodes which are being added */
445 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
449 genid
= sd
->sd_genid
;
454 if (node_c
> MD_MAXSIDES
)
455 return (mddserror(ep
, MDE_DS_SIDENUMNOTAVAIL
,
456 sp
->setno
, NULL
, NULL
, sp
->setname
));
458 sd
= Zalloc(sizeof (*sd
));
460 /* Put the new entries into the set */
461 for (i
= 0; i
< node_c
; i
++) {
462 (void) strcpy(sd
->sd_nodes
[i
], node_v
[i
]);
465 if (meta_gettimeofday(&now
) == -1) {
466 (void) mdsyserror(ep
, errno
, "meta_gettimeofday()");
472 genid
= sd
->sd_genid
= 0;
475 /* Create the set where needed */
476 for (i
= 0; i
< node_c
; i
++) {
478 * Create the set on each new host
480 if (clnt_createset(node_v
[i
], sp
, sd
->sd_nodes
, now
, genid
,
491 if (rval
!= 0 || new_set
)
495 * Add the drive records to the new sets
496 * and names for the new sides.
498 return (add_drvs_to_hosts(sp
, node_c
, node_v
, ep
));
504 int multi_node
, /* Multi_node diskset or not? */
505 int node_c
, /* Number of new nodes */
506 char **node_v
, /* Nodes which are being added */
512 return (create_multinode_set_on_hosts(sp
, node_c
, node_v
,
515 return (create_traditional_set_on_hosts(sp
, node_c
, node_v
,
522 int multi_node
, /* Multi-node diskset or not? */
538 md_error_t xep
= mdnullerror
;
543 if ((max_sets
= get_max_sets(ep
)) == 0)
546 /* We must be a member of the set we are creating */
547 if (! strinlst(mynode(), node_c
, node_v
))
548 return (mddserror(ep
, MDE_DS_SELFNOTIN
,
549 sp
->setno
, mynode(), NULL
, sp
->setname
));
552 * If auto_take then we must be the only member of the set
553 * that we are creating.
555 if (auto_take
&& node_c
> 1)
556 return (mddserror(ep
, MDE_DS_SINGLEHOST
, sp
->setno
, NULL
, NULL
,
560 * If we're part of SC3.0 we'll already have allocated the
561 * set number so we can skip the allocation algorithm used.
562 * Set number is unique across traditional and MN disksets.
564 if ((sdssc_rval
= sdssc_get_index(sp
->setname
, &setno
))
565 == SDSSC_NOT_BOUND
) {
567 for (i
= 0; i
< node_c
; i
++) {
571 if (strcmp(mynode(), node_v
[i
]) == 0)
575 * Make sure this set name is not used on the
578 has_set
= nodehasset(sp
, node_v
[i
], NHS_N_EQ
, ep
);
580 if (! mdiserror(ep
, MDE_NO_SET
)) {
589 (void) mddserror(ep
, MDE_DS_NODEHASSET
,
590 sp
->setno
, node_v
[i
], NULL
, sp
->setname
);
596 for (setno
= 1; setno
< max_sets
; setno
++) {
597 for (i
= 0; i
< node_c
; i
++) {
598 if (clnt_setnumbusy(node_v
[i
], setno
,
610 } else if (sdssc_rval
!= SDSSC_OKAY
) {
611 (void) mddserror(ep
, MDE_DS_SETNUMNOTAVAIL
, MD_SET_BAD
, NULL
,
617 if (setno
== max_sets
) {
618 (void) mddserror(ep
, MDE_DS_SETNUMNOTAVAIL
, MD_SET_BAD
, NULL
,
627 * Lock the set on current set members.
628 * Set locking done much earlier for MN diskset than for traditional
629 * diskset since lock_set is used to protect against
630 * other meta* commands running on the other nodes.
631 * Don't issue mdcommd SUSPEND command since there is nothing
632 * to suspend since there currently is no set.
635 /* Make sure we are blocking all signals */
636 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
640 /* Lock the set on new set members */
641 for (i
= 0; i
< node_c
; i
++) {
642 if (clnt_lock_set(node_v
[i
], sp
, ep
)) {
648 /* Now have the diskset locked, verify set number is still ok */
649 for (i
= 0; i
< node_c
; i
++) {
650 if (clnt_setnumbusy(node_v
[i
], setno
,
659 if (meta_set_checkname(sp
->setname
, ep
)) {
664 for (i
= 0; i
< node_c
; i
++) {
665 if (clnt_setnameok(node_v
[i
], sp
, &bool, ep
) == -1) {
670 (void) mddserror(ep
, MDE_DS_SETNAMEBUSY
, sp
->setno
,
671 node_v
[i
], NULL
, sp
->setname
);
679 /* Lock the set on new set members */
681 md_rb_sig_handling_on();
683 for (i
= 0; i
< node_c
; i
++) {
684 if (clnt_lock_set(node_v
[i
], sp
, ep
)) {
692 RB_TEST(1, "create_set", ep
)
695 rb_level
= 1; /* level 1 */
697 RB_TEST(2, "create_set", ep
)
699 if ((rval
= create_set_on_hosts(sp
, multi_node
, node_c
, node_v
,
703 RB_TEST(3, "create_set", ep
)
706 sr_flags
= MD_SR_OK
| MD_SR_AUTO_TAKE
;
711 * Mark the set record MD_SR_OK
713 for (i
= 0; i
< node_c
; i
++)
714 if (clnt_upd_sr_flags(node_v
[i
], sp
, sr_flags
, ep
))
717 rb_level
= 2; /* level 2 */
721 * On each added node, set the node record for that node
722 * to OK. Then set all node records for the newly added
723 * nodes on all nodes to ok.
725 * By setting a node's own node record to ok first, even if
726 * the node adding the hosts panics, the rest of the nodes can
727 * determine the same node list during the choosing of the master
728 * during reconfig. So, only nodes considered for mastership
729 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
730 * on that node's rpc.metad. If all nodes have MD_SR_OK set,
731 * but no node has its own MD_MN_NODE_OK set, then the set will
732 * be removed during reconfig since a panic occurred during the
733 * creation of the initial diskset.
737 md_mnnode_desc
*nd
, *saved_nd_next
;
740 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
) {
744 for (i
= 0; i
< node_c
; i
++) {
745 nd
= sd
->sd_nodelist
;
746 /* All nodes are guaranteed to be ALIVE */
748 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
752 /* Something wrong, will pick this up in next loop */
756 /* Only changing my local cache of node list */
757 saved_nd_next
= nd
->nd_next
;
760 /* Set node record for added host to ok on that host */
761 if (clnt_upd_nr_flags(node_v
[i
], sp
,
762 nd
, MD_NR_OK
, NULL
, ep
)) {
763 nd
->nd_next
= saved_nd_next
;
766 nd
->nd_next
= saved_nd_next
;
769 /* Now set all node records on all nodes to be ok */
770 nd
= sd
->sd_nodelist
;
771 /* All nodes are guaranteed to be ALIVE */
773 if (clnt_upd_nr_flags(nd
->nd_nodename
, sp
,
774 sd
->sd_nodelist
, MD_NR_OK
, NULL
, ep
)) {
781 RB_TEST(4, "create_set", ep
)
784 if ((rval
== 0) && multi_node
) {
786 * Set successfully created.
787 * Notify rpc.mdcommd on all nodes of a nodelist change.
788 * Send reinit command to mdcommd which forces it to get
789 * fresh set description. Then send resume.
790 * Resume on class 0 will resume all classes.
792 for (i
= 0; i
< node_c
; i
++) {
793 /* Class is ignored for REINIT */
794 if (clnt_mdcommdctl(node_v
[i
], COMMDCTL_REINIT
,
795 sp
, NULL
, MD_MSCF_NO_FLAGS
, &xep
)) {
797 (void) mdstealerror(ep
, &xep
);
799 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
800 "Unable to reinit rpc.mdcommd.\n"));
803 for (i
= 0; i
< node_c
; i
++) {
804 if (clnt_mdcommdctl(node_v
[i
], COMMDCTL_RESUME
,
805 sp
, MD_MSG_CLASS0
, MD_MSCF_NO_FLAGS
, &xep
)) {
807 (void) mdstealerror(ep
, &xep
);
809 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
810 "Unable to resume rpc.mdcommd.\n"));
813 meta_ping_mnset(sp
->setno
);
816 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
817 for (i
= 0; i
< node_c
; i
++) {
818 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
)) {
820 (void) mdstealerror(ep
, &xep
);
829 /* release signals back to what they were on entry */
830 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
833 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
840 /* all signals already blocked for MN disket */
842 /* Make sure we are blocking all signals */
843 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
851 * On each added node (which is now each node to be deleted),
852 * set the node record for that node to DEL. Then set all
853 * node records for the newly added (soon to be deleted) nodes
854 * on all nodes to ok.
856 * By setting a node's own node record to DEL first, even if
857 * the node doing the rollback panics, the rest of the nodes can
858 * determine the same node list during the choosing of the master
863 if ((rb_level
> 1) && (multi_node
)) {
864 md_mnnode_desc
*nd
, *saved_nd_next
;
867 if ((sd
= metaget_setdesc(sp
, &xep
)) == NULL
) {
871 for (i
= 0; i
< node_c
; i
++) {
872 nd
= sd
->sd_nodelist
;
873 /* All nodes are guaranteed to be ALIVE */
875 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
879 /* Something wrong, will pick this up in next loop */
883 /* Only changing my local cache of node list */
884 saved_nd_next
= nd
->nd_next
;
887 /* Set node record for added host to DEL on that host */
888 if (clnt_upd_nr_flags(node_v
[i
], sp
,
889 nd
, MD_NR_DEL
, NULL
, &xep
)) {
890 nd
->nd_next
= saved_nd_next
;
893 nd
->nd_next
= saved_nd_next
;
896 /* Now set all node records on all nodes to be DEL */
897 nd
= sd
->sd_nodelist
;
898 /* All nodes are guaranteed to be ALIVE */
900 if (clnt_upd_nr_flags(nd
->nd_nodename
, sp
,
901 sd
->sd_nodelist
, MD_NR_DEL
, NULL
, &xep
)) {
907 /* Mark set record on all hosts to be DELETED */
908 for (i
= 0; i
< node_c
; i
++) {
909 if (clnt_upd_sr_flags(node_v
[i
], sp
, MD_SR_DEL
, &xep
)) {
916 for (i
= 0; i
< node_c
; i
++) {
917 if (clnt_delset(node_v
[i
], sp
, &xep
) == -1)
923 /* Don't test lock flag since guaranteed to be set if in rollback */
924 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
925 for (i
= 0; i
< node_c
; i
++) {
926 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
))
931 /* release signals back to what they were on entry */
932 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
935 if ((sig_flag
) && (!multi_node
))
936 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
948 md_replicalist_t
*rlp
= NULL
;
949 md_replicalist_t
*rl
;
952 if (metareplicalist(sp
, MD_BASICNAME_OK
, &rlp
, ep
) < 0)
955 for (rl
= rlp
; rl
!= NULL
; rl
= rl
->rl_next
) {
956 md_replica_t
*r
= rl
->rl_repp
;
958 if (meta_db_delsidenm(sp
, sideno
, r
->r_namep
, r
->r_blkno
, ep
)) {
965 metafreereplicalist(rlp
);
983 for (i
= 0; i
< node_c
; i
++) {
984 if (MD_MNSET_DESC(sd
) && (oha
== TRUE
)) {
986 * During OHA mode, don't issue RPCs to
987 * non-alive nodes since there is no reason to
988 * wait for RPC timeouts.
990 nd
= sd
->sd_nodelist
;
992 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
997 return (mddserror(ep
, MDE_DS_NOTINMEMBERLIST
,
998 sp
->setno
, nd
->nd_nodename
,
1002 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
1005 if (clnt_deldrvs(node_v
[i
], sp
, dd
, ep
)) {
1008 } else if (MD_MNSET_DESC(sd
) && (oha
== FALSE
)) {
1010 * All nodes should be alive in non-oha mode.
1012 if (clnt_deldrvs(node_v
[i
], sp
, dd
, ep
)) {
1017 * For traditional diskset, issue the RPC and
1018 * ignore RPC failure if in OHA mode.
1020 if (clnt_deldrvs(node_v
[i
], sp
, dd
, ep
)) {
1021 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
1043 md_error_t xep
= mdnullerror
;
1046 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
1049 /* Make sure we own the set */
1050 if (meta_check_ownership(sp
, ep
) != 0)
1053 /* Lock the set on our side */
1054 if (clnt_lock_set(mynode(), sp
, ep
)) {
1059 if (clnt_delhosts(mynode(), sp
, 1, anode
, ep
)) {
1064 if (!MD_MNSET_DESC(sd
)) {
1065 if ((dd
= metaget_drivedesc(sp
, (MD_BASICNAME_OK
| PRINT_FAST
),
1073 /* If we have drives */
1075 if (clnt_del_drv_sidenms(mynode(), sp
, ep
)) {
1083 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
1084 if (clnt_unlock_set(mynode(), cl_sk
, &xep
)) {
1086 (void) mdstealerror(ep
, &xep
);
1089 cl_set_setkey(NULL
);
1091 metaflushsetname(sp
);
1097 del_md_sidenms(mdsetname_t
*sp
, side_t sideno
, md_error_t
*ep
)
1103 if (!metaislocalset(sp
)) {
1104 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
1107 /* Use rpc.mdcommd to add md side info from all nodes */
1108 if ((! metaislocalset(sp
)) && MD_MNSET_DESC(sd
) &&
1109 (sd
->sd_mn_mynode
->nd_flags
& MD_MN_NODE_OWN
)) {
1110 md_mn_result_t
*resultp
= NULL
;
1111 md_mn_msg_meta_md_delside_t md_ds
;
1114 md_ds
.msg_sideno
= sideno
;
1116 * If reconfig cycle has been started, this node is stuck in
1117 * in the return step until this command has completed. If
1118 * mdcommd is suspended, ask send_message to fail (instead of
1119 * retrying) so that metaset can finish allowing the
1120 * reconfig cycle to proceed.
1122 send_rval
= mdmn_send_message(sp
->setno
,
1123 MD_MN_MSG_META_MD_DELSIDE
,
1124 MD_MSGF_FAIL_ON_SUSPEND
| MD_MSGF_PANIC_WHEN_INCONSISTENT
,
1125 0, (char *)&md_ds
, sizeof (md_mn_msg_meta_md_delside_t
),
1127 if (send_rval
!= 0) {
1128 (void) mdstealerror(ep
, &(resultp
->mmr_ep
));
1130 free_result(resultp
);
1134 free_result(resultp
);
1136 (void) memset(&nm
, '\0', sizeof (nm
));
1137 nm
.key
= MD_KEYWILD
;
1141 nm
.mde
= mdnullerror
;
1142 nm
.setno
= sp
->setno
;
1143 nm
.side
= MD_SIDEWILD
;
1144 if (metaioctl(MD_IOCNXTKEY_NM
, &nm
, &nm
.mde
, NULL
) != 0)
1145 return (mdstealerror(ep
, &nm
.mde
));
1147 if (nm
.key
== MD_KEYWILD
)
1151 * The device reference count can be greater than 1 if
1152 * more than one softpart is configured on top of the
1153 * same device. If this is the case then we want to
1154 * decrement the count to zero so the entry can be
1157 for (i
= 0; i
< nm
.ref_count
; i
++) {
1158 if (del_name(sp
, sideno
, nm
.key
, ep
) == -1)
1174 md_error_t xep
= mdnullerror
;
1177 if (MD_MNSET_DESC(sd
)) {
1178 nd
= sd
->sd_nodelist
;
1180 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
1184 has_set
= nodehasset(sp
, nd
->nd_nodename
,
1194 if (clnt_mncreateset(nd
->nd_nodename
, sp
,
1196 sd
->sd_ctime
, sd
->sd_genid
,
1197 sd
->sd_mn_master_nodenm
,
1198 sd
->sd_mn_master_nodeid
, &xep
) == -1)
1203 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
1204 /* Skip empty slots */
1205 if (sd
->sd_nodes
[i
][0] == '\0')
1208 has_set
= nodehasset(sp
, sd
->sd_nodes
[i
],
1216 if (clnt_createset(sd
->sd_nodes
[i
], sp
, sd
->sd_nodes
,
1217 sd
->sd_ctime
, sd
->sd_genid
, &xep
) == -1)
1224 * If a MN diskset, set is already locked on all nodes via clnt_lock_set.
1240 ulong_t max_genid
= 0;
1242 md_error_t xep
= mdnullerror
;
1246 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
1249 if (MD_MNSET_DESC(sd
)) {
1250 /* Make sure we are blocking all signals */
1251 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
1254 md_rb_sig_handling_on();
1258 * Lock the set on current set members for traditional disksets.
1260 if (!(MD_MNSET_DESC(sd
))) {
1261 for (i
= 0; i
< node_c
; i
++) {
1263 * For traditional diskset, issue the RPC and
1264 * ignore RPC failure if in OHA mode.
1266 if (clnt_lock_set(node_v
[i
], sp
, ep
)) {
1267 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
1278 RB_TEST(1, "deletehosts", ep
)
1281 rb_level
= 1; /* level 1 */
1283 RB_TEST(2, "deletehosts", ep
)
1286 * Mark the set record MD_SR_DEL
1288 for (i
= 0; i
< node_c
; i
++) {
1290 RB_TEST(3, "deletehosts", ep
)
1292 if (MD_MNSET_DESC(sd
) && (oha
== TRUE
)) {
1294 * During OHA mode, don't issue RPCs to
1295 * non-alive nodes since there is no reason to
1296 * wait for RPC timeouts.
1298 nd
= sd
->sd_nodelist
;
1300 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
1305 (void) mddserror(ep
, MDE_DS_NOTINMEMBERLIST
,
1306 sp
->setno
, nd
->nd_nodename
,
1311 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
1315 if (clnt_upd_sr_flags(node_v
[i
], sp
, MD_SR_DEL
, ep
)) {
1318 } else if (MD_MNSET_DESC(sd
) && (oha
== FALSE
)) {
1320 * All nodes should be alive in non-oha mode.
1322 if (clnt_upd_sr_flags(node_v
[i
], sp
, MD_SR_DEL
, ep
)) {
1327 * For traditional diskset, issue the RPC and
1328 * ignore RPC failure if in OHA mode.
1330 if (clnt_upd_sr_flags(node_v
[i
], sp
, MD_SR_DEL
, ep
)) {
1331 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
1339 RB_TEST(4, "deletehosts", ep
)
1342 RB_TEST(5, "deletehosts", ep
)
1345 rb_level
= 2; /* level 2 */
1347 RB_TEST(6, "deletehosts", ep
)
1349 if (sdssc_delete_begin(sp
->setname
) == SDSSC_ERROR
)
1350 if (metad_isautotakebyname(sp
->setname
))
1355 /* The set is OK to delete, make it so. */
1356 for (i
= 0; i
< node_c
; i
++) {
1358 RB_TEST(7, "deletehosts", ep
)
1360 if (MD_MNSET_DESC(sd
) && (oha
== TRUE
)) {
1362 * During OHA mode, don't issue RPCs to
1363 * non-alive nodes since there is no reason to
1364 * wait for RPC timeouts.
1366 nd
= sd
->sd_nodelist
;
1368 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
1373 (void) mddserror(ep
, MDE_DS_NOTINMEMBERLIST
,
1374 sp
->setno
, nd
->nd_nodename
,
1379 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
1383 if (clnt_delset(node_v
[i
], sp
, ep
) == -1) {
1386 } else if (MD_MNSET_DESC(sd
) && (oha
== FALSE
)) {
1388 * All nodes should be alive in non-oha mode.
1390 if (clnt_delset(node_v
[i
], sp
, ep
) == -1) {
1395 * For traditional diskset, issue the RPC and
1396 * ignore RPC failure if in OHA mode.
1398 if (clnt_delset(node_v
[i
], sp
, ep
) == -1) {
1399 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
1407 RB_TEST(8, "deletehosts", ep
)
1410 RB_TEST(9, "deletehosts", ep
)
1414 * Unlock the set on current set members
1415 * for traditional disksets.
1417 if (!(MD_MNSET_DESC(sd
))) {
1418 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
1419 for (i
= 0; i
< node_c
; i
++) {
1421 * For traditional diskset, issue the RPC and
1422 * ignore RPC failure if in OHA mode.
1424 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
)) {
1425 if (oha
== TRUE
&& mdanyrpcerror(&xep
)) {
1430 (void) mdstealerror(ep
, &xep
);
1434 cl_set_setkey(NULL
);
1438 * A MN diskset has the clnt_locks held by meta_set_deletehosts so
1439 * don't flush that data until meta_set_deletehosts has finished
1440 * with it. meta_set_deletehosts will handle the flush of the
1443 if (!(MD_MNSET_DESC(sd
))) {
1444 metaflushsetname(sp
);
1448 sdssc_delete_end(sp
->setname
, SDSSC_COMMIT
) == SDSSC_ERROR
)
1451 if (MD_MNSET_DESC(sd
)) {
1452 /* release signals back to what they were on entry */
1453 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
1456 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
1462 /* all signals already blocked for MN disket */
1463 if (!(MD_MNSET_DESC(sd
))) {
1464 /* Make sure we are blocking all signals */
1465 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
1471 max_genid
= sd
->sd_genid
;
1475 recreate_set(sp
, sd
);
1479 (void) sdssc_delete_end(sp
->setname
, SDSSC_CLEANUP
);
1485 resync_genid(sp
, sd
, max_genid
, node_c
, node_v
);
1490 * Unlock the set on current set members
1491 * for traditional disksets.
1493 if (!(MD_MNSET_DESC(sd
))) {
1494 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
1495 for (i
= 0; i
< node_c
; i
++) {
1497 * For traditional diskset, issue the RPC and
1498 * ignore RPC failure if in OHA mode.
1500 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
))
1503 cl_set_setkey(NULL
);
1506 /* release signals back to what they were on entry */
1507 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
1511 * A MN diskset has the clnt_locks held by meta_set_deletehosts so
1512 * don't flush that data until meta_set_deletehosts has finished
1513 * with it. meta_set_deletehosts will handle the flush of the
1516 if (!(MD_MNSET_DESC(sd
))) {
1517 metaflushsetname(sp
);
1518 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
1526 * procsigs already called for MN diskset.
1527 * md_rb_sig_handling already called for traditional diskset.
1534 int node_c
, /* Number of nodes */
1535 char **node_v
, /* Nodes being deleted */
1543 md_replicalist_t
*rlp
= NULL
;
1546 ulong_t max_genid
= 0;
1547 int rb_level
= 1; /* This is a special case */
1548 md_error_t xep
= mdnullerror
;
1553 RB_TEST(7, "deletehosts", ep
)
1557 * May need this to re-add sidenames on roll back.
1559 if (metareplicalist(sp
, (MD_BASICNAME_OK
| PRINT_FAST
), &rlp
,
1563 RB_TEST(8, "deletehosts", ep
)
1566 rb_level
= 2; /* level 2 */
1568 RB_TEST(9, "deletehosts", ep
)
1570 if (del_drvs_from_hosts(sp
, sd
, dd
, node_c
, node_v
, oha
, ep
))
1573 RB_TEST(10, "deletehosts", ep
)
1576 rb_level
= 3; /* level 3 */
1578 RB_TEST(11, "deletehosts", ep
)
1581 * Delete the db replica sides
1582 * This is done before the next loop, so that
1583 * the db does not get unloaded before we are finished
1584 * deleting the sides.
1586 if (MD_MNSET_DESC(sd
)) {
1587 nd
= sd
->sd_nodelist
;
1589 /* Skip hosts not being deleted */
1590 if (! strinlst(nd
->nd_nodename
, node_c
,
1596 if (del_db_sidenms(sp
, nd
->nd_nodeid
, ep
))
1599 RB_TEST(12, "deletehosts", ep
)
1603 for (sideno
= 0; sideno
< MD_MAXSIDES
; sideno
++) {
1604 /* Skip empty slots */
1605 if (sd
->sd_nodes
[sideno
][0] == '\0')
1608 /* Skip hosts not being deleted */
1609 if (! strinlst(sd
->sd_nodes
[sideno
], node_c
,
1613 if (del_db_sidenms(sp
, sideno
, ep
))
1616 RB_TEST(12, "deletehosts", ep
)
1620 RB_TEST(13, "deletehosts", ep
)
1623 rb_level
= 4; /* level 4 */
1625 RB_TEST(14, "deletehosts", ep
)
1627 /* Delete the names from the namespace */
1628 if (MD_MNSET_DESC(sd
)) {
1629 nd
= sd
->sd_nodelist
;
1631 /* Skip hosts not being deleted */
1632 if (! strinlst(nd
->nd_nodename
, node_c
,
1638 if (del_md_sidenms(sp
, nd
->nd_nodeid
, ep
))
1641 RB_TEST(15, "deletehosts", ep
)
1645 for (sideno
= 0; sideno
< MD_MAXSIDES
; sideno
++) {
1646 /* Skip empty slots */
1647 if (sd
->sd_nodes
[sideno
][0] == '\0')
1650 /* Skip hosts not being deleted */
1651 if (! strinlst(sd
->sd_nodes
[sideno
], node_c
,
1655 if (del_md_sidenms(sp
, sideno
, ep
))
1658 RB_TEST(15, "deletehosts", ep
)
1663 RB_TEST(16, "deletehosts", ep
)
1666 rb_level
= 5; /* level 6 */
1668 RB_TEST(17, "deletehosts", ep
)
1670 for (i
= 0; i
< node_c
; i
++) {
1671 if (MD_MNSET_DESC(sd
) && (oha
== TRUE
)) {
1673 * During OHA mode, don't issue RPCs to
1674 * non-alive nodes since there is no reason to
1675 * wait for RPC timeouts.
1677 nd
= sd
->sd_nodelist
;
1679 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
1684 (void) mddserror(ep
, MDE_DS_NOTINMEMBERLIST
,
1685 sp
->setno
, nd
->nd_nodename
,
1690 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
1694 if (clnt_delset(node_v
[i
], sp
, ep
) == -1) {
1697 } else if (MD_MNSET_DESC(sd
) && (oha
== FALSE
)) {
1699 * All nodes should be alive in non-oha mode.
1701 if (clnt_delset(node_v
[i
], sp
, ep
) == -1) {
1706 * For traditional diskset, issue the RPC and
1707 * ignore RPC failure if in OHA mode.
1709 if (clnt_delset(node_v
[i
], sp
, ep
) == -1) {
1710 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
1718 RB_TEST(18, "deletehosts", ep
)
1721 metafreereplicalist(rlp
);
1723 if (MD_MNSET_DESC(sd
)) {
1724 /* release signals back to what they were on entry */
1725 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
1728 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
1734 /* all signals already blocked for MN disket */
1735 if (!(MD_MNSET_DESC(sd
))) {
1736 /* Make sure we are blocking all signals */
1737 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
1741 max_genid
= sd
->sd_genid
;
1745 recreate_set(sp
, sd
);
1750 if (rb_level
> 1 && dd
!= NULL
) {
1752 * See if we have to re-add the drives specified.
1754 for (i
= 0; i
< node_c
; i
++) {
1757 if (MD_MNSET_DESC(sd
) && (oha
== TRUE
)) {
1759 * During OHA mode, don't issue RPCs to
1760 * non-alive nodes since there is no reason to
1761 * wait for RPC timeouts.
1763 nd
= sd
->sd_nodelist
;
1765 if (strcmp(nd
->nd_nodename
, node_v
[i
])
1773 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))
1777 /* Don't care if set record is MN or not */
1778 if (clnt_getset(node_v
[i
], sp
->setname
,
1779 MD_SET_BAD
, &sr
, &xep
) == -1) {
1784 /* Drive already added, skip to next node */
1785 if (sr
->sr_drivechain
!= NULL
) {
1787 * Set record structure was allocated from RPC
1788 * routine getset so this structure is only of
1789 * size md_set_record even if the MN flag is
1790 * set. So, clear the flag so that the free
1791 * code doesn't attempt to free a structure
1792 * the size of md_mnset_record.
1794 sr
->sr_flags
&= ~MD_SR_MN
;
1799 if (clnt_adddrvs(node_v
[i
], sp
, dd
,
1800 sr
->sr_ctime
, sr
->sr_genid
, &xep
) == -1)
1803 if (clnt_upd_dr_flags(node_v
[i
], sp
, dd
,
1804 MD_DR_OK
, &xep
) == -1)
1808 * Set record structure was allocated from RPC routine
1809 * getset so this structure is only of size
1810 * md_set_record even if the MN flag is set. So,
1811 * clear the flag so that the free code doesn't
1812 * attempt to free a structure the size of
1815 sr
->sr_flags
&= ~MD_SR_MN
;
1822 if (rb_level
> 2 && dd
!= NULL
) {
1823 md_replicalist_t
*rl
;
1825 for (rl
= rlp
; rl
!= NULL
; rl
= rl
->rl_next
) {
1826 md_replica_t
*r
= rl
->rl_repp
;
1829 * This is not the first replica being added to the
1830 * diskset so call with ADDSIDENMS_BCAST. If this
1831 * is a traditional diskset, the bcast flag is ignored
1832 * since traditional disksets don't use the rpc.mdcommd.
1834 if (meta_db_addsidenms(sp
, r
->r_namep
, r
->r_blkno
,
1835 DB_ADDSIDENMS_BCAST
, &xep
))
1841 if (rb_level
> 3 && dd
!= NULL
) {
1842 int nodeid_addsides
= 0;
1844 * Add the device names for the new sides into the namespace,
1845 * on all hosts not being deleted.
1847 if (MD_MNSET_DESC(sd
)) {
1848 nd
= sd
->sd_nodelist
;
1850 /* Find a node that is not being deleted */
1851 if (! strinlst(nd
->nd_nodename
, node_c
,
1853 nodeid_addsides
= nd
->nd_nodeid
;
1859 for (j
= 0; j
< MD_MAXSIDES
; j
++) {
1860 /* Skip empty slots */
1861 if (sd
->sd_nodes
[j
][0] == '\0')
1864 /* Find a node that is not being deleted */
1865 if (! strinlst(sd
->sd_nodes
[j
], node_c
,
1869 nodeid_addsides
= j
;
1872 if (MD_MNSET_DESC(sd
)) {
1873 nd
= sd
->sd_nodelist
;
1875 /* Skip nodes not being deleted */
1876 if (!strinlst(nd
->nd_nodename
, node_c
,
1882 /* this side was just created, add the names */
1883 if (add_md_sidenms(sp
, nd
->nd_nodeid
,
1884 nodeid_addsides
, &xep
))
1889 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
1890 /* Skip empty slots */
1891 if (sd
->sd_nodes
[i
][0] == '\0')
1894 /* Skip nodes not being deleted */
1895 if (!strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
1898 /* this side was just created, add the names */
1899 if (add_md_sidenms(sp
, i
, nodeid_addsides
,
1909 resync_genid(sp
, sd
, max_genid
, node_c
, node_v
);
1913 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
1914 if (MD_MNSET_DESC(sd
)) {
1915 nd
= sd
->sd_nodelist
;
1917 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))
1919 /* To balance lock/unlock; can send to dead node */
1920 if (clnt_unlock_set(nd
->nd_nodename
, cl_sk
, &xep
))
1925 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
1926 /* Skip empty slots */
1927 if (sd
->sd_nodes
[i
][0] == '\0')
1930 if (clnt_unlock_set(sd
->sd_nodes
[i
], cl_sk
, &xep
))
1934 cl_set_setkey(NULL
);
1936 /* release signals back to what they were on entry */
1937 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
1940 metafreereplicalist(rlp
);
1942 if (!(MD_MNSET_DESC(sd
))) {
1943 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
1957 mdsidenames_t
*sn
, **sn_next
;
1963 assert(dnp
->side_names_key
!= MD_KEYWILD
);
1965 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
1968 /* find the end of the link list */
1969 for (sn
= dnp
->side_names
; sn
->next
!= NULL
; sn
= sn
->next
)
1971 sn_next
= &sn
->next
;
1973 if (meta_replicaslice(dnp
, &rep_slice
, ep
) != 0)
1976 if ((np
= metaslicename(dnp
, rep_slice
, ep
)) == NULL
)
1979 sn
= Zalloc(sizeof (*sn
));
1980 sn
->sideno
= sideno
;
1982 if (MD_MNSET_DESC(sd
)) {
1984 * For MO diskset the sideno is not an index into
1985 * the array of nodes. Hence getside_devinfo is
1986 * used instead of meta_getnextside_devinfo.
1988 if (meta_getside_devinfo(sp
, np
->bname
, sideno
, &sn
->cname
,
1989 &sn
->dname
, &sn
->mnum
, ep
) == -1)
1992 /* decrement sideno, to look like the previous sideno */
1994 if (meta_getnextside_devinfo(sp
, np
->bname
, &sideno
,
1995 &sn
->cname
, &sn
->dname
, &sn
->mnum
, ep
) == -1)
2003 assert(sn
->sideno
== sideno
);
2005 /* Add to the end of the linked list */
2022 for (i
= 0; i
< node_c
; i
++) {
2023 if (strlen(node_v
[i
]) > (size_t)MD_MAX_NODENAME
)
2024 return (mddserror(ep
, MDE_DS_NODENAMETOOLONG
,
2025 sp
->setno
, node_v
[i
], NULL
, sp
->setname
));
2026 if (clnt_hostname(node_v
[i
], &hostname
, ep
))
2028 if (strcmp(node_v
[i
], hostname
) != 0) {
2030 return (mddserror(ep
, MDE_DS_NOTNODENAME
, sp
->setno
,
2031 node_v
[i
], NULL
, sp
->setname
));
2039 * Exported Entry Points
2043 * Check the given disk set name for syntactic correctness.
2046 meta_set_checkname(char *setname
, md_error_t
*ep
)
2050 if (strlen(setname
) > (size_t)MD_MAX_SETNAME
)
2051 return (mddserror(ep
, MDE_DS_SETNAMETOOLONG
,
2052 MD_SET_BAD
, NULL
, NULL
, setname
));
2054 for (cp
= setname
; *cp
; cp
++)
2055 if (!isprint(*cp
) || strchr(INVALID_IN_NAMES
, *cp
) != NULL
)
2056 return (mddserror(ep
, MDE_DS_INVALIDSETNAME
,
2057 MD_SET_BAD
, NULL
, NULL
, setname
));
2062 * Add host(s) to the multi-node diskset provided in sp.
2063 * - create set if non-existent.
2066 meta_multinode_set_addhosts(
2076 md_drive_desc
*dd
, *p
;
2085 md_error_t xep
= mdnullerror
;
2086 md_mnnode_desc
*nd
, *nd_curr
, *nd_prev
;
2089 mndiskset_membershiplist_t
*nl
, *nl2
;
2090 int suspendall_flag
= 0;
2091 int suspend1_flag
= 0;
2094 md_mnnode_desc
*saved_nd_next
;
2095 int remote_sets_created
= 0;
2098 * Check membershiplist first. If there's
2099 * an error, fail to create set and pass back error.
2101 if (meta_read_nodelist(&nodecnt
, &nl
, ep
) == -1) {
2104 /* Verify that all nodes are in member list */
2105 for (i
= 0; i
< node_c
; i
++) {
2107 * If node in list isn't a member of the membership,
2108 * just return error.
2110 if (meta_is_member(node_v
[i
], NULL
, nl
) == 0) {
2111 meta_free_nodelist(nl
);
2112 return (mddserror(ep
, MDE_DS_NOTINMEMBERLIST
,
2113 sp
->setno
, node_v
[i
], NULL
, sp
->setname
));
2117 * Node list is needed later, but there is a lot of error
2118 * checking and possible failures between here and there, so
2119 * just re-get the list later if there are no errors.
2121 meta_free_nodelist(nl
);
2125 * Verify that list of nodes being added contains no
2128 if (nodesuniq(sp
, node_c
, node_v
, ep
))
2132 * Verify that each node being added thinks that its nodename
2133 * is the same as the nodename given.
2135 if (validate_nodes(sp
, node_c
, node_v
, ep
))
2138 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
) {
2139 if (! mdiserror(ep
, MDE_NO_SET
))
2142 return (create_set(sp
, multi_node
, node_c
, node_v
, auto_take
,
2146 * If this node and another node were both attempting to
2147 * create the same setname at the same time, and the other
2148 * node has just created the set on this node then sd would
2149 * be non-NULL, but sp->setno would be null (setno is filled
2150 * in by the create_set). If this is true, then fail since
2151 * the other node has already won this race.
2153 if (sp
->setno
== NULL
) {
2154 return (mddserror(ep
, MDE_DS_NODEINSET
,
2155 NULL
, mynode(), NULL
, sp
->setname
));
2159 /* The auto_take behavior is inconsistent with multiple hosts. */
2160 if (auto_take
|| sd
->sd_flags
& MD_SR_AUTO_TAKE
) {
2161 (void) mddserror(ep
, MDE_DS_SINGLEHOST
, sp
->setno
, NULL
, NULL
,
2167 * We already have the set.
2170 /* Make sure we own the set */
2171 if (meta_check_ownership(sp
, ep
) != 0)
2175 * The drive and node records are stored in the local mddbs of each
2176 * node in the diskset. Each node's rpc.metad daemon reads in the set,
2177 * drive and node records from that node's local mddb and caches them
2178 * internally. Any process needing diskset information contacts its
2179 * local rpc.metad to get this information. Since each node in the
2180 * diskset is independently reading the set information from its local
2181 * mddb, the set, drive and node records in the local mddbs must stay
2182 * in-sync, so that all nodes have a consistent view of the diskset.
2184 * For a multinode diskset, explicitly verify that all nodes in the
2185 * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
2186 * fail this operation since all nodes must be ALIVE in order to add
2187 * the new node record to their local mddb. If a panic of this node
2188 * leaves the local mddbs set, node and drive records out-of-sync, the
2189 * reconfig cycle will fix the local mddbs and force them back into
2192 nd
= sd
->sd_nodelist
;
2194 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
2195 return (mddserror(ep
, MDE_DS_NOTINMEMBERLIST
,
2196 sp
->setno
, nd
->nd_nodename
, NULL
,
2203 * Check if node is already in set.
2205 for (i
= 0; i
< node_c
; i
++) {
2206 /* Is node already in set? */
2207 nd
= sd
->sd_nodelist
;
2209 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
2214 return (mddserror(ep
, MDE_DS_NODEINSET
,
2215 sp
->setno
, node_v
[i
], NULL
,
2221 * Lock the set on current set members.
2222 * Set locking done much earlier for MN diskset than for traditional
2223 * diskset since lock_set and SUSPEND are used to protect against
2224 * other meta* commands running on the other nodes.
2226 /* Make sure we are blocking all signals */
2227 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
2230 nd
= sd
->sd_nodelist
;
2231 /* All nodes are guaranteed to be ALIVE */
2233 if (clnt_lock_set(nd
->nd_nodename
, sp
, ep
)) {
2241 * Lock out other meta* commands by suspending
2242 * class 1 messages across the diskset.
2244 nd
= sd
->sd_nodelist
;
2245 /* Send suspend to nodes in nodelist before addhosts call */
2246 /* All nodes are guaranteed to be ALIVE */
2248 if (clnt_mdcommdctl(nd
->nd_nodename
,
2249 COMMDCTL_SUSPEND
, sp
, MD_MSG_CLASS1
,
2250 MD_MSCF_NO_FLAGS
, ep
)) {
2258 /* Lock the set on new set members */
2259 for (i
= 0; i
< node_c
; i
++) {
2260 /* Already verified to be alive */
2261 if (clnt_lock_set(node_v
[i
], sp
, ep
)) {
2269 * Perform the required checks for new hosts
2271 for (i
= 0; i
< node_c
; i
++) {
2272 /* Make sure this set name is not used on the other hosts */
2273 has_set
= nodehasset(sp
, node_v
[i
], NHS_N_EQ
, ep
);
2275 if (! mdiserror(ep
, MDE_NO_SET
)) {
2279 /* Keep on truck'n */
2281 } else if (has_set
) {
2282 (void) mddserror(ep
, MDE_DS_NODEHASSET
, sp
->setno
,
2283 node_v
[i
], NULL
, sp
->setname
);
2288 if (clnt_setnumbusy(node_v
[i
], sp
->setno
, &bool, ep
) == -1) {
2294 (void) mddserror(ep
, MDE_DS_SETNUMBUSY
, sp
->setno
,
2295 node_v
[i
], NULL
, sp
->setname
);
2300 if (clnt_setnameok(node_v
[i
], sp
, &bool, ep
) == -1) {
2305 if (bool == FALSE
) {
2306 (void) mddserror(ep
, MDE_DS_SETNAMEBUSY
, sp
->setno
,
2307 node_v
[i
], NULL
, sp
->setname
);
2312 if (check_setdrvs_againstnode(sp
, node_v
[i
], ep
)) {
2318 /* Get drive descriptors for the set */
2319 if ((dd
= metaget_drivedesc(sp
, MD_FULLNAME_ONLY
, ep
)) == NULL
) {
2326 /* END CHECK CODE */
2328 RB_TEST(1, "addhosts", ep
)
2331 rb_level
= 1; /* level 1 */
2333 RB_TEST(2, "addhosts", ep
)
2336 * Create the set where needed
2338 if (create_set_on_hosts(sp
, multi_node
, node_c
, node_v
, 0, ep
)) {
2343 * Send suspend to rpc.mdcommd on nodes where a set has been
2344 * created since rpc.mdcommd must now be running on the remote nodes.
2346 remote_sets_created
= 1;
2347 for (i
= 0; i
< node_c
; i
++) {
2349 * Lock out other meta* commands by suspending
2350 * class 1 messages across the diskset.
2352 if (clnt_mdcommdctl(node_v
[i
],
2353 COMMDCTL_SUSPEND
, sp
, MD_MSG_CLASS1
,
2354 MD_MSCF_NO_FLAGS
, ep
)) {
2361 * Merge the new entries into the set with the existing sides.
2362 * Get membershiplist from API routine. If there's
2363 * an error, fail to create set and pass back error.
2365 if (meta_read_nodelist(&nodecnt
, &nl
, ep
) == -1) {
2368 if (meta_gettimeofday(&now
) == -1) {
2369 meta_free_nodelist(nl
);
2370 (void) mdsyserror(ep
, errno
,
2371 dgettext(TEXT_DOMAIN
, "meta_gettimeofday()"));
2374 for (nodeindex
= 0; nodeindex
< node_c
; nodeindex
++) {
2375 nd
= Zalloc(sizeof (*nd
));
2376 (void) strcpy(nd
->nd_nodename
, node_v
[nodeindex
]);
2380 if (strcmp(nl2
->msl_node_name
,
2381 node_v
[nodeindex
]) == 0) {
2382 nd
->nd_nodeid
= nl2
->msl_node_id
;
2383 (void) strcpy(nd
->nd_priv_ic
,
2384 nl2
->msl_node_addr
);
2391 * Nodelist must be kept in ascending nodeid order.
2393 if (sd
->sd_nodelist
== NULL
) {
2394 /* Nothing in list, just add it */
2395 sd
->sd_nodelist
= nd
;
2396 } else if (nd
->nd_nodeid
<
2397 sd
->sd_nodelist
->nd_nodeid
) {
2398 /* Add to head of list */
2399 nd
->nd_next
= sd
->sd_nodelist
;
2400 sd
->sd_nodelist
= nd
;
2402 nd_curr
= sd
->sd_nodelist
->nd_next
;
2403 nd_prev
= sd
->sd_nodelist
;
2404 /* Search for place to add it */
2406 if (nd
->nd_nodeid
< nd_curr
->nd_nodeid
) {
2407 /* Add before nd_curr */
2408 nd
->nd_next
= nd_curr
;
2409 nd_prev
->nd_next
= nd
;
2413 nd_curr
= nd_curr
->nd_next
;
2415 /* Add to end of list */
2416 if (nd_curr
== NULL
) {
2417 nd_prev
->nd_next
= nd
;
2421 /* Node already verified to be in membership */
2422 nd
->nd_flags
|= MD_MN_NODE_ALIVE
;
2424 meta_free_nodelist(nl
);
2426 /* If we have drives */
2429 * For all the hosts being added, create a sidename structure
2431 nd
= sd
->sd_nodelist
;
2433 /* Skip nodes not being added */
2434 if (!strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
2438 for (p
= dd
; p
!= NULL
; p
= p
->dd_next
) {
2439 if (make_sideno_sidenm(sp
, p
->dd_dnp
,
2440 nd
->nd_nodeid
, ep
) != 0)
2447 rb_level
= 2; /* level 2 */
2449 RB_TEST(4, "addhosts", ep
)
2452 * Add the new sidename for each drive to all the hosts
2454 * If a multi-node diskset, each host only stores
2455 * the side information for itself. So, only send
2456 * side information to the new hosts where each host
2457 * will add the appropriate side information to its
2460 nd
= sd
->sd_nodelist
;
2462 /* Skip nodes not being added */
2463 if (!strinlst(nd
->nd_nodename
, node_c
,
2469 /* Add side info to new hosts */
2470 if (clnt_add_drv_sidenms(nd
->nd_nodename
,
2471 mynode(), sp
, sd
, node_c
, node_v
, ep
))
2477 RB_TEST(5, "addhosts", ep
)
2480 rb_level
= 3; /* level 3 */
2482 RB_TEST(6, "addhosts", ep
)
2485 * Add the device names for the new sides into the namespace
2486 * for all hosts being added. This is adding the side
2487 * names to the diskset's mddb so add sidenames for all
2490 nd
= sd
->sd_nodelist
;
2492 /* Skip nodes not being added */
2493 if (!strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
2498 /* this side was just created, add the names */
2499 if (add_md_sidenms(sp
, nd
->nd_nodeid
,
2506 RB_TEST(7, "addhosts", ep
)
2509 rb_level
= 4; /* level 4 */
2511 RB_TEST(8, "addhosts", ep
)
2513 if (add_db_sidenms(sp
, ep
))
2521 RB_TEST(9, "addhosts", ep
)
2524 rb_level
= 5; /* level 5 */
2526 RB_TEST(10, "addhosts", ep
)
2530 * Notify rpc.mdcommd on all nodes of a nodelist change.
2531 * Start by suspending rpc.mdcommd (which drains it of all
2532 * messages), then change the nodelist followed by a reinit
2535 nd
= sd
->sd_nodelist
;
2536 /* Send suspend_all to nodes in nodelist (existing + new) */
2537 /* All nodes are guaranteed to be ALIVE */
2539 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_SUSPEND
,
2540 sp
, MD_MSG_CLASS0
, MD_MSCF_NO_FLAGS
, ep
)) {
2544 suspendall_flag
= 1;
2549 /* Add the node(s) to the each host that is currently in the set */
2550 nd
= sd
->sd_nodelist
;
2551 /* All nodes are guaranteed to be ALIVE */
2553 if (clnt_addhosts(nd
->nd_nodename
, sp
, node_c
, node_v
, ep
)) {
2559 RB_TEST(11, "addhosts", ep
)
2563 * Mark the drives MD_DR_OK.
2565 nd
= sd
->sd_nodelist
;
2566 /* All nodes are guaranteed to be ALIVE */
2568 if (clnt_upd_dr_flags(nd
->nd_nodename
, sp
, dd
,
2569 MD_DR_OK
, ep
) == -1)
2575 RB_TEST(12, "addhosts", ep
)
2578 rb_level
= 6; /* level 6 */
2580 RB_TEST(13, "addhosts", ep
)
2583 /* Add the mediator information to all hosts in the set. */
2584 nd
= sd
->sd_nodelist
;
2585 /* All nodes are guaranteed to be ALIVE */
2587 if (clnt_updmeds(nd
->nd_nodename
, sp
, &sd
->sd_med
, ep
))
2592 RB_TEST(14, "addhosts", ep
)
2595 * If a MN diskset and there are drives in the set,
2596 * set the master on the new nodes and
2597 * automatically join the new nodes into the set.
2602 * Is current set STALE?
2604 (void) memset(&c
, 0, sizeof (c
));
2606 c
.c_setno
= sp
->setno
;
2607 if (metaioctl(MD_DB_GETDEV
, &c
, &c
.c_mde
, NULL
) != 0) {
2608 (void) mdstealerror(ep
, &c
.c_mde
);
2612 if (c
.c_flags
& MDDB_C_STALE
) {
2613 stale_flag
= MNSET_IS_STALE
;
2616 /* Set master on newly added nodes */
2617 for (i
= 0; i
< node_c
; i
++) {
2618 if (clnt_mnsetmaster(node_v
[i
], sp
,
2619 sd
->sd_mn_master_nodenm
,
2620 sd
->sd_mn_master_nodeid
, ep
)) {
2624 /* Join newly added nodes to diskset and set OWN flag */
2625 for (i
= 0; i
< node_c
; i
++) {
2626 if (clnt_joinset(node_v
[i
], sp
, stale_flag
, ep
))
2628 nd
= sd
->sd_nodelist
;
2630 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0) {
2631 nd
->nd_flags
|= MD_MN_NODE_OWN
;
2633 * Also set ADD flag since this flag
2634 * is already set in rpc.metad - it's
2635 * just not in the local copy.
2636 * Could flush local cache and call
2637 * metaget_setdesc, but this just
2638 * adds time. Since this node knows
2639 * the state of the node flags in
2640 * rpc.metad, just set the ADD
2641 * flag and save time.
2643 nd
->nd_flags
|= MD_MN_NODE_ADD
;
2650 /* Send new node flag list to all Owner nodes */
2651 nd
= sd
->sd_nodelist
;
2653 if (!(nd
->nd_flags
& MD_MN_NODE_OWN
)) {
2658 * Will effectively set OWN flag in records kept
2659 * cached in rpc.metad. The ADD flag would have
2660 * already been set by the call to clnt_addhosts.
2662 if (clnt_upd_nr_flags(nd
->nd_nodename
, sp
,
2663 sd
->sd_nodelist
, MD_NR_SET
, NULL
, ep
)) {
2671 * Mark the set record MD_SR_OK
2673 nd
= sd
->sd_nodelist
;
2674 /* All nodes are guaranteed to be ALIVE */
2676 if (clnt_upd_sr_flags(nd
->nd_nodename
, sp
, MD_SR_OK
,
2685 * On each newly added node, set the node record for that node
2686 * to OK. Then set all node records for the newly added
2687 * nodes on all nodes to ok.
2689 * By setting a node's own node record to ok first, even if
2690 * the node adding the hosts panics, the rest of the nodes can
2691 * determine the same node list during the choosing of the master
2692 * during reconfig. So, only nodes considered for mastership
2693 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set
2694 * on that node's rpc.metad. If all nodes have MD_SR_OK set,
2695 * but no node has its own MD_MN_NODE_OK set, then the set will
2696 * be removed during reconfig since a panic occurred during the
2697 * creation of the initial diskset.
2700 for (i
= 0; i
< node_c
; i
++) {
2701 nd
= sd
->sd_nodelist
;
2702 /* All nodes are guaranteed to be ALIVE */
2704 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
2708 /* Something wrong, will pick this up in next loop */
2712 /* Only changing my local cache of node list */
2713 saved_nd_next
= nd
->nd_next
;
2716 /* Set node record for added host to ok on that host */
2717 if (clnt_upd_nr_flags(node_v
[i
], sp
,
2718 nd
, MD_NR_OK
, NULL
, ep
)) {
2719 nd
->nd_next
= saved_nd_next
;
2722 nd
->nd_next
= saved_nd_next
;
2725 /* Now set all node records on all nodes to be ok */
2726 nd
= sd
->sd_nodelist
;
2727 /* All nodes are guaranteed to be ALIVE */
2729 if (clnt_upd_nr_flags(nd
->nd_nodename
, sp
,
2730 sd
->sd_nodelist
, MD_NR_OK
, NULL
, ep
)) {
2736 RB_TEST(15, "addhosts", ep
)
2739 * Notify rpc.mdcommd on all nodes of a nodelist change.
2740 * Send reinit command to mdcommd which forces it to get
2741 * fresh set description. Then send resume.
2742 * Resume on class 0 will resume all classes, so can skip
2743 * doing an explicit resume of class1 (ignore suspend1_flag).
2745 if (suspendall_flag
) {
2747 * Don't know if nodelist contains the nodes being added
2748 * or not, so do reinit to nodes not being added (by skipping
2749 * any nodes in the nodelist being added) and then do
2750 * reinit to nodes being added if remote_sets_created is 1.
2752 nd
= sd
->sd_nodelist
;
2753 /* All nodes are guaranteed to be ALIVE */
2755 /* Skip nodes being added - handled later */
2756 if (strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
2760 /* Class is ignored for REINIT */
2761 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_REINIT
,
2762 sp
, NULL
, MD_MSCF_NO_FLAGS
, &xep
)) {
2764 (void) mdstealerror(ep
, &xep
);
2766 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
2767 "Unable to reinit rpc.mdcommd.\n"));
2772 * Send reinit to added nodes that had a set created since
2773 * rpc.mdcommd is running on the nodes with a set.
2775 if (remote_sets_created
== 1) {
2776 for (i
= 0; i
< node_c
; i
++) {
2777 if (clnt_mdcommdctl(node_v
[i
], COMMDCTL_REINIT
,
2778 sp
, NULL
, MD_MSCF_NO_FLAGS
, &xep
)) {
2780 (void) mdstealerror(ep
, &xep
);
2782 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
2783 "Unable to reinit rpc.mdcommd.\n"));
2788 if ((suspend1_flag
) || (suspendall_flag
)) {
2790 * Unlock diskset by resuming messages across the diskset.
2791 * Just resume all classes so that resume is the same whether
2792 * just one class was locked or all classes were locked.
2794 * Don't know if nodelist contains the nodes being added
2795 * or not, so do resume_all to nodes not being added (by
2796 * skipping any nodes in the nodelist being added) and then do
2797 * resume_all to nodes being added if remote_sets_created is 1.
2799 nd
= sd
->sd_nodelist
;
2800 /* All nodes are guaranteed to be ALIVE */
2802 /* Skip nodes being added - handled later */
2803 if (strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
2807 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_RESUME
,
2808 sp
, MD_MSG_CLASS0
, MD_MSCF_NO_FLAGS
, &xep
)) {
2810 (void) mdstealerror(ep
, &xep
);
2812 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
2813 "Unable to resume rpc.mdcommd.\n"));
2818 * Send resume to added nodes that had a set created since
2819 * rpc.mdcommd is be running on the nodes with a set.
2821 if (remote_sets_created
== 1) {
2822 for (i
= 0; i
< node_c
; i
++) {
2823 /* Already verified to be alive */
2824 if (clnt_mdcommdctl(node_v
[i
], COMMDCTL_RESUME
,
2825 sp
, MD_MSG_CLASS0
, MD_MSCF_NO_FLAGS
,
2828 (void) mdstealerror(ep
, &xep
);
2830 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
2831 "Unable to resume rpc.mdcommd.\n"));
2835 meta_ping_mnset(sp
->setno
);
2837 * Start a resync thread on the newly added nodes
2838 * if set is not stale. Also start a thread to update the
2839 * abr state of all soft partitions
2841 if (stale_flag
!= MNSET_IS_STALE
) {
2842 for (i
= 0; i
< node_c
; i
++) {
2843 if (clnt_mn_mirror_resync_all(node_v
[i
],
2846 (void) mdstealerror(ep
, &xep
);
2848 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
2849 "Unable to start resync "
2852 if (clnt_mn_sp_update_abr(node_v
[i
],
2855 (void) mdstealerror(ep
, &xep
);
2857 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
2858 "Unable to start sp update "
2864 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
2866 * Don't know if nodelist contains the nodes being added
2867 * or not, so do clnt_unlock_set to nodes not being added (by
2868 * skipping any nodes in the nodelist being added) and then do
2869 * clnt_unlock_set to nodes being added.
2872 nd
= sd
->sd_nodelist
;
2873 /* All nodes are guaranteed to be ALIVE */
2875 /* Skip hosts we get in the next loop */
2876 if (strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
2880 if (clnt_unlock_set(nd
->nd_nodename
, cl_sk
, &xep
)) {
2882 (void) mdstealerror(ep
, &xep
);
2887 for (i
= 0; i
< node_c
; i
++) {
2888 /* Already verified to be alive */
2889 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
)) {
2891 (void) mdstealerror(ep
, &xep
);
2896 cl_set_setkey(NULL
);
2898 metaflushsetname(sp
);
2900 /* release signals back to what they were on entry */
2901 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
2912 * For each node being deleted, set DEL flag and
2913 * reset OK flag on that node first.
2914 * Until a node has turned off its own
2915 * rpc.metad's NODE_OK flag, that node could be
2916 * considered for master during a reconfig.
2918 for (i
= 0; i
< node_c
; i
++) {
2919 nd
= sd
->sd_nodelist
;
2920 /* All nodes are guaranteed to be ALIVE */
2922 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
2926 /* Something wrong, handle this in next loop */
2930 /* Only changing my local cache of node list */
2931 saved_nd_next
= nd
->nd_next
;
2934 /* Set flags for del host to DEL on that host */
2935 if (clnt_upd_nr_flags(node_v
[i
], sp
,
2936 nd
, MD_NR_DEL
, NULL
, &xep
)) {
2939 nd
->nd_next
= saved_nd_next
;
2942 for (i
= 0; i
< node_c
; i
++) {
2944 /* Reset master on newly added node */
2945 if (clnt_mnsetmaster(node_v
[i
], sp
, "",
2946 MD_MN_INVALID_NID
, &xep
))
2948 /* Withdraw set on newly added node */
2949 if (clnt_withdrawset(node_v
[i
], sp
, &xep
))
2953 * Turn off owner flag in nodes to be deleted
2954 * if there are drives in the set.
2955 * Also, turn off NODE_OK and turn on NODE_DEL
2956 * for nodes to be deleted.
2957 * These flags are used to set the node
2958 * record flags in all nodes in the set.
2960 nd
= sd
->sd_nodelist
;
2962 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0) {
2964 nd
->nd_flags
&= ~MD_MN_NODE_OWN
;
2966 nd
->nd_flags
|= MD_MN_NODE_DEL
;
2967 nd
->nd_flags
&= ~MD_MN_NODE_OK
;
2975 * Now, reset owner and set delete flags for the deleted
2976 * nodes on all nodes.
2978 nd
= sd
->sd_nodelist
;
2980 if (clnt_upd_nr_flags(nd
->nd_nodename
, sp
,
2981 sd
->sd_nodelist
, MD_NR_SET
, NULL
, &xep
)) {
2988 * On each node being deleted, set the set record
2989 * to be in DEL state.
2991 for (i
= 0; i
< node_c
; i
++) {
2992 if (clnt_upd_sr_flags(node_v
[i
], sp
, MD_SR_DEL
, &xep
)) {
3000 nd
= sd
->sd_nodelist
;
3001 /* All nodes are guaranteed to be ALIVE */
3003 if (clnt_delhosts(nd
->nd_nodename
, sp
, node_c
,
3004 node_v
, &xep
) == -1)
3011 * Notify rpc.mdcommd on all nodes of a nodelist change.
3012 * Send reinit command to mdcommd which forces it to get
3013 * fresh set description. Then send resume.
3014 * Nodelist contains all nodes (existing + added).
3016 if (suspendall_flag
) {
3018 nd
= sd
->sd_nodelist
;
3019 /* All nodes are guaranteed to be ALIVE */
3020 /* Send reinit to nodes in nodelist before addhosts call */
3023 * Skip nodes being added if remote sets were not
3024 * created since rpc.mdcommd may not be running
3025 * on the remote nodes.
3027 if ((remote_sets_created
== 0) &&
3028 (strinlst(nd
->nd_nodename
, node_c
, node_v
))) {
3032 /* Class is ignored for REINIT */
3033 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_REINIT
,
3034 sp
, NULL
, MD_MSCF_NO_FLAGS
, &xep
)) {
3035 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
3036 "Unable to reinit rpc.mdcommd.\n"));
3043 nd
= sd
->sd_nodelist
;
3044 /* All nodes are guaranteed to be ALIVE */
3047 * Skip nodes being added if remote sets were not
3048 * created since rpc.mdcommd may not be running
3049 * on the remote nodes.
3051 if ((remote_sets_created
== 0) &&
3052 (strinlst(nd
->nd_nodename
, node_c
, node_v
))) {
3057 * Resume all classes but class 1 so that lock is held
3058 * against meta* commands.
3059 * Send resume_all_but_1 to nodes in nodelist
3060 * before addhosts call.
3062 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_RESUME
,
3063 sp
, MD_MSG_CLASS0
, MD_MSCF_DONT_RESUME_CLASS1
,
3065 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
3066 "Unable to resume rpc.mdcommd.\n"));
3071 meta_ping_mnset(sp
->setno
);
3075 /* Nodelist may or may not contain nodes being added. */
3076 if (rb_level
> 3 && dd
!= NULL
) {
3077 nd
= sd
->sd_nodelist
;
3079 /* Skip nodes not being added */
3080 if (!strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
3085 if (del_db_sidenms(sp
, nd
->nd_nodeid
, &xep
))
3092 /* Nodelist may or may not contain nodes being added. */
3093 if (rb_level
> 2 && dd
!= NULL
) {
3094 nd
= sd
->sd_nodelist
;
3096 /* Skip nodes not being added */
3097 if (!strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
3102 if (del_md_sidenms(sp
, nd
->nd_nodeid
, &xep
))
3111 /* delete the drive records */
3112 for (i
= 0; i
< node_c
; i
++) {
3113 if (clnt_deldrvs(node_v
[i
], sp
, dd
, &xep
) == -1)
3118 /* delete the set record */
3119 for (i
= 0; i
< node_c
; i
++) {
3120 if (clnt_delset(node_v
[i
], sp
, &xep
) == -1)
3126 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
3127 /* Don't test lock flag since guaranteed to be set if in rollback */
3128 /* Nodelist may or may not contain nodes being added. */
3130 * Unlock diskset by resuming messages across the diskset.
3131 * Just resume all classes so that resume is the same whether
3132 * just one class was locked or all classes were locked.
3134 if ((suspend1_flag
) || (suspendall_flag
)) {
3135 /* All nodes are guaranteed to be ALIVE */
3136 nd
= sd
->sd_nodelist
;
3139 * Skip nodes being added since remote sets
3140 * were either created and then deleted or
3141 * were never created. Either way - rpc.mdcommd
3142 * may not be running on the remote node.
3144 if (strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
3148 if (clnt_mdcommdctl(nd
->nd_nodename
,
3149 COMMDCTL_RESUME
, sp
, MD_MSG_CLASS0
,
3150 MD_MSCF_NO_FLAGS
, &xep
)) {
3151 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
3152 "Unable to resume rpc.mdcommd.\n"));
3157 meta_ping_mnset(sp
->setno
);
3159 nd
= sd
->sd_nodelist
;
3160 /* All nodes are guaranteed to be ALIVE */
3162 /* Skip hosts we get in the next loop */
3163 if (strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
3168 if (clnt_unlock_set(nd
->nd_nodename
, cl_sk
, &xep
))
3173 for (i
= 0; i
< node_c
; i
++)
3174 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
))
3176 cl_set_setkey(NULL
);
3178 /* release signals back to what they were on entry */
3179 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
3182 metaflushsetname(sp
);
3188 * Add host(s) to the traditional diskset provided in sp.
3189 * - create set if non-existent.
3192 meta_traditional_set_addhosts(
3202 md_drive_desc
*dd
, *p
;
3214 md_error_t xep
= mdnullerror
;
3217 if (nodesuniq(sp
, node_c
, node_v
, ep
))
3220 if (validate_nodes(sp
, node_c
, node_v
, ep
))
3223 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
) {
3224 if (! mdiserror(ep
, MDE_NO_SET
))
3227 return (create_set(sp
, multi_node
, node_c
, node_v
, auto_take
,
3231 /* The auto_take behavior is inconsistent with multiple hosts. */
3232 if (auto_take
|| sd
->sd_flags
& MD_SR_AUTO_TAKE
) {
3233 (void) mddserror(ep
, MDE_DS_SINGLEHOST
, sp
->setno
, NULL
, NULL
,
3239 * We already have the set.
3242 /* Make sure we own the set */
3243 if (meta_check_ownership(sp
, ep
) != 0)
3247 * Perform the required checks for new hosts
3249 for (i
= 0; i
< node_c
; i
++) {
3250 if (getnodeside(node_v
[i
], sd
) != MD_SIDEWILD
)
3251 return (mddserror(ep
, MDE_DS_NODEINSET
, sp
->setno
,
3252 node_v
[i
], NULL
, sp
->setname
));
3254 /* Make sure this set name is not used on the other hosts */
3255 has_set
= nodehasset(sp
, node_v
[i
], NHS_N_EQ
, ep
);
3257 if (! mdiserror(ep
, MDE_NO_SET
))
3259 /* Keep on truck'n */
3262 return (mddserror(ep
, MDE_DS_NODEHASSET
, sp
->setno
,
3263 node_v
[i
], NULL
, sp
->setname
));
3265 if (clnt_setnumbusy(node_v
[i
], sp
->setno
, &bool, ep
) == -1)
3269 return (mddserror(ep
, MDE_DS_SETNUMBUSY
, sp
->setno
,
3270 node_v
[i
], NULL
, sp
->setname
));
3272 if (clnt_setnameok(node_v
[i
], sp
, &bool, ep
) == -1)
3276 return (mddserror(ep
, MDE_DS_SETNAMEBUSY
, sp
->setno
,
3277 node_v
[i
], NULL
, sp
->setname
));
3279 if (check_setdrvs_againstnode(sp
, node_v
[i
], ep
))
3283 /* Count the number of occupied slots */
3285 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3286 /* Count occupied slots */
3287 if (sd
->sd_nodes
[i
][0] != '\0')
3291 /* Make sure the we have space to add the new sides */
3292 if ((numsides
+ node_c
) > MD_MAXSIDES
) {
3293 (void) mddserror(ep
, MDE_DS_SIDENUMNOTAVAIL
, sp
->setno
, NULL
,
3298 /* Get drive descriptors for the set */
3299 if ((dd
= metaget_drivedesc(sp
, MD_FULLNAME_ONLY
, ep
)) == NULL
)
3303 /* Setup the mediator record roll-back structure */
3304 (void) memset(&rb_medr
, '\0', sizeof (med_rec_t
));
3305 rb_medr
.med_rec_mag
= MED_REC_MAGIC
;
3306 rb_medr
.med_rec_rev
= MED_REC_REV
;
3307 rb_medr
.med_rec_fl
= 0;
3308 rb_medr
.med_rec_sn
= sp
->setno
;
3309 (void) strcpy(rb_medr
.med_rec_snm
, sp
->setname
);
3310 for (i
= 0; i
< MD_MAXSIDES
; i
++)
3311 (void) strcpy(rb_medr
.med_rec_nodes
[i
], sd
->sd_nodes
[i
]);
3312 rb_medr
.med_rec_meds
= sd
->sd_med
; /* structure assigment */
3313 (void) memset(&rb_medr
.med_rec_data
, '\0', sizeof (med_data_t
));
3314 rb_medr
.med_rec_foff
= 0;
3315 crcgen(&rb_medr
, &rb_medr
.med_rec_cks
, sizeof (med_rec_t
), NULL
);
3317 if ((max_meds
= get_max_meds(ep
)) == 0)
3320 /* END CHECK CODE */
3322 md_rb_sig_handling_on();
3324 /* Lock the set on current set members */
3325 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3326 /* Skip empty slots */
3327 if (sd
->sd_nodes
[i
][0] == '\0')
3330 if (clnt_lock_set(sd
->sd_nodes
[i
], sp
, ep
)) {
3336 /* Lock the set on new set members */
3337 for (i
= 0; i
< node_c
; i
++) {
3338 if (clnt_lock_set(node_v
[i
], sp
, ep
)) {
3344 RB_TEST(1, "addhosts", ep
)
3347 rb_level
= 1; /* level 1 */
3349 RB_TEST(2, "addhosts", ep
)
3352 * Add the new hosts to the existing set record on the existing hosts
3354 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3355 /* skip empty slots */
3356 if (sd
->sd_nodes
[i
][0] == '\0')
3359 if (clnt_addhosts(sd
->sd_nodes
[i
], sp
, node_c
, node_v
, ep
))
3364 rb_level
= 2; /* level 2 */
3366 RB_TEST(3, "addhosts", ep
);
3368 /* Merge the new entries into the set with the existing sides */
3370 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3371 /* Skip full slots */
3372 if (sd
->sd_nodes
[i
][0] != '\0')
3375 (void) strcpy(sd
->sd_nodes
[i
], node_v
[nodeindex
++]);
3376 if (nodeindex
== node_c
)
3380 /* If we have drives */
3383 * For all the hosts being added, create a sidename structure
3385 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3386 /* Skip empty slots */
3387 if (sd
->sd_nodes
[i
][0] == '\0')
3390 /* Skip nodes not being added */
3391 if (! strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3394 for (p
= dd
; p
!= NULL
; p
= p
->dd_next
) {
3395 if (make_sideno_sidenm(sp
, p
->dd_dnp
, i
,
3402 * Add the new sidename for each drive to the existing hosts
3404 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3405 /* Skip empty slots */
3406 if (sd
->sd_nodes
[i
][0] == '\0')
3409 /* Skip nodes being added */
3410 if (strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3413 if (clnt_add_drv_sidenms(sd
->sd_nodes
[i
], mynode(), sp
,
3414 sd
, node_c
, node_v
, ep
)) {
3419 RB_TEST(4, "addhosts", ep
)
3422 rb_level
= 3; /* level 3 */
3424 RB_TEST(5, "addhosts", ep
)
3426 if (add_db_sidenms(sp
, ep
)) {
3435 RB_TEST(6, "addhosts", ep
)
3438 rb_level
= 4; /* level 4 */
3440 RB_TEST(7, "addhosts", ep
)
3443 /* create the set on the new nodes, this adds the drives as well */
3444 if (create_set_on_hosts(sp
, multi_node
, node_c
, node_v
, 0, ep
)) {
3448 RB_TEST(8, "addhosts", ep
)
3451 rb_level
= 5; /* level 5 */
3453 RB_TEST(9, "addhosts", ep
)
3458 * Add the device entries for the new sides into the namespace.
3460 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3461 /* Skip empty slots */
3462 if (sd
->sd_nodes
[i
][0] == '\0')
3465 /* Skip nodes not being added */
3466 if (! strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3469 if (add_md_sidenms(sp
, i
, MD_SIDEWILD
, ep
))
3474 RB_TEST(10, "addhosts", ep
)
3477 rb_level
= 6; /* level 6 */
3479 RB_TEST(11, "addhosts", ep
);
3483 * Mark the drives MD_DR_OK.
3485 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3486 /* Skip empty slots */
3487 if (sd
->sd_nodes
[i
][0] == '\0')
3490 if (clnt_upd_dr_flags(sd
->sd_nodes
[i
], sp
, dd
,
3491 MD_DR_OK
, ep
) == -1) {
3497 RB_TEST(12, "addhosts", ep
)
3499 /* Bring the mediator record up to date with the set record */
3500 medr
= rb_medr
; /* structure assignment */
3501 for (i
= 0; i
< MD_MAXSIDES
; i
++)
3502 (void) strcpy(medr
.med_rec_nodes
[i
], sd
->sd_nodes
[i
]);
3503 crcgen(&medr
, &medr
.med_rec_cks
, sizeof (med_rec_t
), NULL
);
3505 /* Inform the mediator hosts of the new node list */
3506 for (i
= 0; i
< max_meds
; i
++) {
3507 if (sd
->sd_med
.n_lst
[i
].a_cnt
== 0)
3510 if (clnt_med_upd_rec(&sd
->sd_med
.n_lst
[i
], sp
, &medr
, ep
))
3514 /* Add the mediator information to all hosts in the set */
3515 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3516 /* Skip empty slots */
3517 if (sd
->sd_nodes
[i
][0] == '\0')
3520 if (clnt_updmeds(sd
->sd_nodes
[i
], sp
, &sd
->sd_med
, ep
))
3524 RB_TEST(13, "addhosts", ep
)
3527 * Mark the set record MD_SR_OK
3529 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3530 /* Skip empty slots */
3531 if (sd
->sd_nodes
[i
][0] == '\0')
3534 if (clnt_upd_sr_flags(sd
->sd_nodes
[i
], sp
, MD_SR_OK
, ep
))
3538 RB_TEST(14, "addhosts", ep
)
3541 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
3542 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3543 /* Skip empty slots */
3544 if (sd
->sd_nodes
[i
][0] == '\0')
3547 /* Skip hosts we get in the next loop */
3548 if (strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3551 if (clnt_unlock_set(sd
->sd_nodes
[i
], cl_sk
, &xep
)) {
3553 (void) mdstealerror(ep
, &xep
);
3559 for (i
= 0; i
< node_c
; i
++)
3560 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
)) {
3562 (void) mdstealerror(ep
, &xep
);
3566 cl_set_setkey(NULL
);
3568 metaflushsetname(sp
);
3570 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
3575 /* Make sure we are blocking all signals */
3576 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
3583 for (i
= 0; i
< max_meds
; i
++) {
3584 if (sd
->sd_med
.n_lst
[i
].a_cnt
== 0)
3587 if (clnt_med_upd_rec(&sd
->sd_med
.n_lst
[i
], sp
,
3592 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3593 /* Skip empty slots */
3594 if (sd
->sd_nodes
[i
][0] == '\0')
3597 /* Skip nodes not being added */
3598 if (! strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3601 if (del_md_sidenms(sp
, i
, &xep
))
3610 /* delete the drive records */
3611 for (i
= 0; i
< node_c
; i
++) {
3612 if (clnt_deldrvs(node_v
[i
], sp
, dd
, &xep
) == -1)
3616 /* delete the set record on the 'new' hosts */
3617 for (i
= 0; i
< node_c
; i
++) {
3618 if (clnt_delset(node_v
[i
], sp
, &xep
) == -1)
3624 if (rb_level
> 3 && dd
!= NULL
) {
3625 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3626 /* Skip empty slots */
3627 if (sd
->sd_nodes
[i
][0] == '\0')
3630 /* Skip nodes not being added */
3631 if (! strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3634 if (del_db_sidenms(sp
, i
, &xep
))
3640 if (rb_level
> 2 && dd
!= NULL
) {
3641 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3642 /* Skip empty slots */
3643 if (sd
->sd_nodes
[i
][0] == '\0')
3646 /* Skip nodes not being added */
3647 if (! strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3650 if (clnt_del_drv_sidenms(sd
->sd_nodes
[i
], sp
,
3658 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3659 /* Skip empty slots */
3660 if (sd
->sd_nodes
[i
][0] == '\0')
3663 if (clnt_delhosts(sd
->sd_nodes
[i
], sp
, node_c
, node_v
,
3671 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
3672 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3673 /* Skip empty slots */
3674 if (sd
->sd_nodes
[i
][0] == '\0')
3677 /* Skip hosts we get in the next loop */
3678 if (strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
3681 if (clnt_unlock_set(sd
->sd_nodes
[i
], cl_sk
, &xep
))
3685 for (i
= 0; i
< node_c
; i
++)
3686 if (clnt_unlock_set(node_v
[i
], cl_sk
, &xep
))
3688 cl_set_setkey(NULL
);
3691 /* release signals back to what they were on entry */
3692 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
3695 metaflushsetname(sp
);
3697 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
3703 * Add host(s) to the diskset provided in sp.
3704 * - create set if non-existent.
3717 return (meta_multinode_set_addhosts(sp
, multi_node
, node_c
,
3718 node_v
, auto_take
, ep
));
3720 return (meta_traditional_set_addhosts(sp
, multi_node
, node_c
,
3721 node_v
, auto_take
, ep
));
3725 * Delete host(s) from the diskset provided in sp.
3726 * - destroy set if last host in set is removed.
3729 meta_set_deletehosts(
3746 mhd_mhiargs_t mhiargs
;
3747 md_replicalist_t
*rlp
= NULL
;
3749 ulong_t max_genid
= 0;
3753 md_error_t xep
= mdnullerror
;
3755 md_mnnode_record
*nr
;
3756 int delete_master
= 0;
3757 int suspendall_flag
= 0, suspendall_flag_rb
= 0;
3758 int suspend1_flag
= 0;
3761 int *node_id_list
= NULL
;
3762 int remote_sets_deleted
= 0;
3764 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
3768 * Verify that list of nodes being deleted contains no
3771 if (nodesuniq(sp
, node_c
, node_v
, ep
))
3774 /* Make sure we own the set */
3775 if (meta_check_ownership(sp
, ep
) != 0)
3779 * The drive and node records are stored in the local mddbs of each
3780 * node in the diskset. Each node's rpc.metad daemon reads in the set,
3781 * drive and node records from that node's local mddb and caches them
3782 * internally. Any process needing diskset information contacts its
3783 * local rpc.metad to get this information. Since each node in the
3784 * diskset is independently reading the set information from its local
3785 * mddb, the set, drive and node records in the local mddbs must stay
3786 * in-sync, so that all nodes have a consistent view of the diskset.
3788 * For a multinode diskset, explicitly verify that all nodes in the
3789 * diskset are ALIVE (i.e. are in the API membership list) if the
3790 * forceflag is FALSE. (The case of forceflag being TRUE is handled
3791 * in OHA check above.)
3793 * If forceflag is FALSE and a node in the diskset is not in
3794 * the membership list, then fail this operation since all nodes must
3795 * be ALIVE in order to delete the node record from their local mddb.
3796 * If a panic of this node leaves the local mddbs set, node and drive
3797 * records out-of-sync, the reconfig cycle will fix the local mddbs
3798 * and force them back into synchronization.
3800 if ((forceflg
== FALSE
) && (MD_MNSET_DESC(sd
))) {
3801 nd
= sd
->sd_nodelist
;
3803 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
3804 return (mddserror(ep
, MDE_DS_NOTINMEMBERLIST
,
3805 sp
->setno
, nd
->nd_nodename
,
3806 NULL
, sp
->setname
));
3814 * Lock the set on current set members.
3815 * Set locking done much earlier for MN diskset than for traditional
3816 * diskset since lock_set and SUSPEND are used to protect against
3817 * other meta* commands running on the other nodes.
3819 if (MD_MNSET_DESC(sd
)) {
3820 /* Make sure we are blocking all signals */
3821 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
3824 nd
= sd
->sd_nodelist
;
3826 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
3831 if (clnt_lock_set(nd
->nd_nodename
, sp
, ep
)) {
3839 * Lock out other meta* commands by suspending
3840 * class 1 messages across the diskset.
3842 nd
= sd
->sd_nodelist
;
3844 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
3848 if (clnt_mdcommdctl(nd
->nd_nodename
,
3849 COMMDCTL_SUSPEND
, sp
, MD_MSG_CLASS1
,
3850 MD_MSCF_NO_FLAGS
, ep
)) {
3859 for (i
= 0; i
< node_c
; i
++)
3860 if (getnodeside(node_v
[i
], sd
) == MD_SIDEWILD
) {
3861 (void) mddserror(ep
, MDE_DS_NODENOTINSET
, sp
->setno
,
3862 node_v
[i
], NULL
, sp
->setname
);
3868 * Count the number of nodes currently in the set.
3870 if (MD_MNSET_DESC(sd
)) {
3871 nd
= sd
->sd_nodelist
;
3877 for (i
= 0; i
< MD_MAXSIDES
; i
++)
3878 /* Count full slots */
3879 if (sd
->sd_nodes
[i
][0] != '\0')
3884 * OHA mode == -f -h <hostname>
3885 * OHA is One Host Administration that occurs when the forceflag (-f)
3886 * is set and at least one host in the diskset isn't responding
3889 * When in OHA mode, a node cannot delete itself from a diskset.
3890 * When in OHA mode, a node can delete a list of nodes from a diskset
3891 * even if some of the nodes in the diskset are unresponsive.
3893 * For multinode diskset, only allow OHA mode when the nodes that
3894 * aren't responding in the diskset are not in the membership list
3895 * (i.e. nodes that aren't responding are not marked ALIVE).
3896 * Nodes that aren't in the membership list will be rejoining
3897 * the diskset through a reconfig cycle and the local mddb set
3898 * and node records can be reconciled during the reconfig cycle.
3900 * If a node isn't responding, but is still in the membership list,
3901 * fail the request since the node may not be responding because
3902 * rpc.metad died and is restarting. In this case, no reconfig
3903 * cycle will be started, so there's no way to recover if
3904 * the host delete operation was allowed.
3906 * NOTE: if nodes that weren't in the membership when the OHA host
3907 * delete occurred are now the only nodes in membership list,
3908 * those nodes will see the old view of the diskset. As soon as
3909 * a node re-enters the cluster that was present in the cluster
3910 * during the host deletion, the diskset will reflect the host
3911 * deletion on all nodes presently in the cluster.
3913 if (forceflg
== TRUE
) {
3914 if (MD_MNSET_DESC(sd
)) {
3915 nd
= sd
->sd_nodelist
;
3918 * If a node isn't ALIVE (in member list),
3919 * then allow a force-able delete in OHA mode.
3921 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
3926 * Don't test for clnt_nullproc since already
3927 * tested the RPC connections by clnt_lock_set.
3932 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
3933 /* Skip empty slots */
3934 if (sd
->sd_nodes
[i
][0] == '\0')
3937 if (clnt_nullproc(sd
->sd_nodes
[i
], ep
) == -1) {
3939 * If we timeout to at least one
3940 * client, then we can allow OHA mode,
3941 * otherwise, we are in normal mode.
3943 if (mdanyrpcerror(ep
)) {
3945 if (strinlst(sd
->sd_nodes
[i
],
3957 * Don't allow this for MN diskset since meta_set_destroy of 1 node
3958 * does NOT remove this node's node record from the other node's set
3959 * records in their local mddb. This leaves a MN diskset in a very
3962 if (!(MD_MNSET_DESC(sd
))) {
3964 if (forceflg
== TRUE
&& node_c
== 1 &&
3965 strcmp(mynode(), node_v
[0]) == 0) {
3966 /* Can return since !MN diskset so nothing to unlock */
3967 return (meta_set_destroy(sp
, TRUE
, ep
));
3973 * In multinode diskset, can only delete self if this
3974 * is the last node in the set or if all nodes in
3975 * the set are being deleted. The traditional diskset code
3976 * allows a node to delete itself (when there are other nodes
3977 * in the diskset) when using the force flag, but that code
3978 * path doesn't have the node remove itself from
3979 * the set node list on the other nodes. Since this isn't
3980 * satisfactory for the multinode diskset, just don't
3981 * allow this operation.
3983 if (MD_MNSET_DESC(sd
) && (numsides
> 1) && (node_c
!= numsides
) &&
3984 strinlst(mynode(), node_c
, node_v
)) {
3985 (void) mddserror(ep
, MDE_DS_MNCANTDELSELF
, sp
->setno
,
3986 mynode(), NULL
, sp
->setname
);
3992 * In multinode diskset, don't allow deletion of master node unless
3993 * this is the only node left or unless all nodes are being
3994 * deleted since there is no way to switch
3995 * master ownership (unless via a cluster reconfig cycle).
3997 delete_master
= strinlst(sd
->sd_mn_master_nodenm
, node_c
, node_v
);
3998 if (MD_MNSET_DESC(sd
) && (numsides
> 1) && (node_c
!= numsides
) &&
4000 (void) mddserror(ep
, MDE_DS_CANTDELMASTER
, sp
->setno
,
4001 sd
->sd_mn_master_nodenm
, NULL
, sp
->setname
);
4007 /* Deleting self w/o forceflg */
4008 if (forceflg
== FALSE
&& numsides
> 1 &&
4009 strinlst(mynode(), node_c
, node_v
)) {
4010 (void) mddserror(ep
, MDE_DS_CANTDELSELF
, sp
->setno
,
4011 mynode(), NULL
, sp
->setname
);
4017 * Setup the mediator record roll-back structure for a trad diskset.
4019 * For a MN diskset, the deletion of a host in the diskset
4020 * does not cause an update of the mediator record. If the
4021 * host deletion will cause the diskset to be removed (this is
4022 * the last host being removed or all hosts are being removed)
4023 * then the mediator record must have already been removed by the
4024 * user or this delete host operation will fail (a check for
4025 * this is done later in this routine).
4027 if (!(MD_MNSET_DESC(sd
))) {
4028 (void) memset(&rb_medr
, '\0', sizeof (med_rec_t
));
4029 rb_medr
.med_rec_mag
= MED_REC_MAGIC
;
4030 rb_medr
.med_rec_rev
= MED_REC_REV
;
4031 rb_medr
.med_rec_fl
= 0;
4032 rb_medr
.med_rec_sn
= sp
->setno
;
4033 (void) strcpy(rb_medr
.med_rec_snm
, sp
->setname
);
4034 for (i
= 0; i
< MD_MAXSIDES
; i
++)
4035 (void) strcpy(rb_medr
.med_rec_nodes
[i
],
4037 rb_medr
.med_rec_meds
= sd
->sd_med
; /* structure assigment */
4038 (void) memset(&rb_medr
.med_rec_data
, '\0', sizeof (med_data_t
));
4039 rb_medr
.med_rec_foff
= 0;
4040 crcgen(&rb_medr
, &rb_medr
.med_rec_cks
,
4041 sizeof (med_rec_t
), NULL
);
4043 /* Bring the mediator record up to date with the set record */
4044 medr
= rb_medr
; /* structure assignment */
4046 if ((max_meds
= get_max_meds(ep
)) == 0) {
4053 * For traditional diskset:
4054 * Check to see if all the hosts we are trying to delete the set from
4055 * have a set "setname" that is the same as ours, i.e. - same name,
4056 * same time stamp, same genid. We only do this if forceflg is not
4057 * specified or we are in OHA mode.
4059 if (!(MD_MNSET_DESC(sd
)) && (forceflg
== FALSE
|| oha
== TRUE
)) {
4060 int fix_node_v
= FALSE
;
4063 for (i
= 0; i
< node_c
; i
++) {
4064 /* We skip this side */
4065 if (strcmp(mynode(), node_v
[i
]) == 0)
4068 has_set
= nodehasset(sp
, node_v
[i
], NHS_NSTG_EQ
, ep
);
4074 * Can't talk to the host only allowed in OHA
4077 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
4083 * We got an error we do not, or are not,
4084 * prepared to handle.
4086 if (! mdiserror(ep
, MDE_NO_SET
) &&
4087 ! mdismddberror(ep
, MDE_DB_NODB
)) {
4094 * If we got here: both hosts are up; a host in
4095 * our set record does not have the set. So we
4096 * delete the host from our set and invalidate
4099 anode
[0] = Strdup(node_v
[i
]);
4101 rval
= del_host_noset(sp
, anode
, ep
);
4104 * If we delete a host, make sure the mediator
4105 * hosts are made aware of this.
4107 for (j
= 0; j
< MD_MAXSIDES
; j
++) {
4108 if (strcmp(medr
.med_rec_nodes
[j
],
4111 (void) memset(&medr
.med_rec_nodes
[j
],
4112 '\0', sizeof (md_node_nm_t
));
4114 crcgen(&medr
, &medr
.med_rec_cks
,
4115 sizeof (med_rec_t
), NULL
);
4117 rb_medr
= medr
; /* struct assignment */
4124 node_v
[i
][0] = '\0';
4130 * If we can talk to the host, and they do not have the
4131 * exact set, then we disallow the operation.
4133 if (has_set
== FALSE
) {
4134 (void) mddserror(ep
, MDE_DS_NODENOSET
,
4135 sp
->setno
, node_v
[i
], NULL
, sp
->setname
);
4142 * Here we prune the node_v's that were invalidated above.
4144 if (fix_node_v
== TRUE
) {
4146 while (i
< node_c
) {
4147 if (node_v
[i
][0] == '\0') {
4148 for (j
= i
; (j
+ 1) < node_c
; j
++)
4149 node_v
[j
] = node_v
[j
+ 1];
4155 * If we are left with no nodes, then we have
4156 * compeleted the operation.
4160 * Inform the mediator hosts of the new node
4163 for (i
= 0; i
< max_meds
; i
++) {
4164 if (sd
->sd_med
.n_lst
[i
].a_cnt
== 0)
4167 if (clnt_med_upd_rec(
4168 &sd
->sd_med
.n_lst
[i
], sp
, &medr
,
4179 * For multinode diskset:
4180 * If forceflag is FALSE then check to see if all the hosts we
4181 * are trying to delete the set from have a set "setname" that
4182 * is the same as ours, i.e. - same name, same time stamp, same genid.
4183 * If forceflag is TRUE, then we don't care if the hosts being
4184 * deleted have the same set information or not since user is forcing
4185 * those hosts to be deleted.
4187 if ((MD_MNSET_DESC(sd
)) && (forceflg
== FALSE
)) {
4188 for (i
= 0; i
< node_c
; i
++) {
4189 /* We skip this node since comparing against it */
4190 if (strcmp(mynode(), node_v
[i
]) == 0)
4193 has_set
= nodehasset(sp
, node_v
[i
], NHS_NSTG_EQ
, ep
);
4201 * If we can talk to the host, and they do not have the
4202 * exact set, then we disallow the operation.
4204 if (has_set
== FALSE
) {
4205 (void) mddserror(ep
, MDE_DS_NODENOSET
,
4206 sp
->setno
, node_v
[i
], NULL
, sp
->setname
);
4214 * For traditional diskset:
4215 * Can't allow user to delete their node (without deleting all nodes)
4216 * out of a set in OHA mode, would leave a real mess.
4217 * This action was already failed above for a MN diskset.
4219 if (!(MD_MNSET_DESC(sd
)) && (oha
== TRUE
) &&
4220 strinlst(mynode(), node_c
, node_v
)) {
4221 /* Can directly return since !MN diskset; nothing to unlock */
4222 return (mddserror(ep
, MDE_DS_OHACANTDELSELF
, sp
->setno
,
4223 mynode(), NULL
, sp
->setname
));
4227 /* Get the drive descriptors for this set */
4228 if ((dd
= metaget_drivedesc(sp
, (MD_BASICNAME_OK
| PRINT_FAST
),
4237 * We have been asked to delete all the hosts in the set, i.e. - delete
4240 if (node_c
== numsides
) {
4242 * This is only a valid operation if all drives have been
4247 (void) mddserror(ep
, MDE_DS_HASDRIVES
, sp
->setno
,
4248 NULL
, NULL
, sp
->setname
);
4254 * If a mediator is currently associated with this set,
4255 * fail the deletion of the last host(s).
4257 if (sd
->sd_med
.n_cnt
!= 0) {
4258 (void) mddserror(ep
, MDE_DS_HASMED
, sp
->setno
,
4259 NULL
, NULL
, sp
->setname
);
4269 rval
= del_set_nodrives(sp
, node_c
, node_v
, oha
, ep
);
4270 remote_sets_deleted
= 1;
4275 * Get timeout values in case we need to roll back
4277 (void) memset(&mhiargs
, '\0', sizeof (mhiargs
));
4278 if (clnt_gtimeout(mynode(), sp
, &mhiargs
, ep
) != 0) {
4285 * We need this around for re-adding DB side names later.
4287 if (metareplicalist(sp
, MD_BASICNAME_OK
, &rlp
, ep
) < 0) {
4293 * Alloc nodeid list if drives are present in diskset.
4294 * nodeid list is used to reset mirror owners if the
4295 * owner is a deleted node.
4297 if (MD_MNSET_DESC(sd
)) {
4298 node_id_list
= Zalloc(sizeof (int) * node_c
);
4302 /* Lock the set on current set members */
4303 if (!(MD_MNSET_DESC(sd
))) {
4304 md_rb_sig_handling_on();
4305 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
4306 /* Skip empty slots */
4307 if (sd
->sd_nodes
[i
][0] == '\0')
4310 if (clnt_lock_set(sd
->sd_nodes
[i
], sp
, ep
)) {
4311 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
4322 RB_TEST(1, "deletehosts", ep
)
4325 rb_level
= 1; /* level 1 */
4327 RB_TEST(2, "deletehosts", ep
)
4329 if (MD_MNSET_DESC(sd
)) {
4330 md_mnnode_desc
*saved_nd_next
;
4335 * Notify rpc.mdcommd on all nodes of a nodelist change.
4336 * Start by suspending rpc.mdcommd (which drains it of
4337 * all messages), then change the nodelist followed
4338 * by a reinit and resume.
4340 nd
= sd
->sd_nodelist
;
4342 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
4346 if (clnt_mdcommdctl(nd
->nd_nodename
,
4347 COMMDCTL_SUSPEND
, sp
,
4349 MD_MSCF_NO_FLAGS
, ep
)) {
4353 suspendall_flag
= 1;
4357 * Is current set STALE?
4358 * Need to know this if delete host fails and node
4359 * is re-joined to diskset.
4361 (void) memset(&c
, 0, sizeof (c
));
4363 c
.c_setno
= sp
->setno
;
4364 if (metaioctl(MD_DB_GETDEV
, &c
, &c
.c_mde
, NULL
) != 0) {
4365 (void) mdstealerror(ep
, &c
.c_mde
);
4369 if (c
.c_flags
& MDDB_C_STALE
) {
4370 stale_flag
= MNSET_IS_STALE
;
4375 * For each node being deleted, set DEL flag and
4376 * reset OK flag on that node first.
4377 * Until a node has turned off its own
4378 * rpc.metad's NODE_OK flag, that node could be
4379 * considered for master during a reconfig.
4381 for (i
= 0; i
< node_c
; i
++) {
4383 * During OHA mode, don't issue RPCs to
4384 * non-alive nodes since there is no reason to
4385 * wait for RPC timeouts.
4387 nd
= sd
->sd_nodelist
;
4389 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0)
4393 /* Something wrong, handle this in next loop */
4397 /* If node_id_list is alloc'd, fill in for later use */
4399 node_id_list
[i
] = nd
->nd_nodeid
;
4401 /* All nodes are guaranteed to be ALIVE unless OHA */
4402 if ((oha
== TRUE
) &&
4403 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
4407 /* Only changing my local cache of node list */
4408 saved_nd_next
= nd
->nd_next
;
4411 /* Set flags for del host to DEL on that host */
4412 if (clnt_upd_nr_flags(node_v
[i
], sp
,
4413 nd
, MD_NR_DEL
, NULL
, ep
)) {
4414 nd
->nd_next
= saved_nd_next
;
4417 nd
->nd_next
= saved_nd_next
;
4419 for (i
= 0; i
< node_c
; i
++) {
4421 * Turn off owner flag in nodes to be deleted
4422 * if this node has been joined.
4423 * Also, turn off NODE_OK and turn on NODE_DEL
4424 * for nodes to be deleted.
4425 * These flags are used to set the node
4426 * record flags in all nodes in the set.
4427 * Only withdraw nodes that are joined.
4429 nd
= sd
->sd_nodelist
;
4432 * Don't communicate with non-ALIVE node if
4433 * in OHA - but set flags in master list so
4434 * alive nodes are updated correctly.
4436 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0) {
4437 if ((oha
== TRUE
) && (!(nd
->nd_flags
&
4438 MD_MN_NODE_ALIVE
))) {
4439 nd
->nd_flags
|= MD_MN_NODE_DEL
;
4440 nd
->nd_flags
&= ~MD_MN_NODE_OK
;
4444 if (nd
->nd_flags
& MD_MN_NODE_OWN
) {
4446 * Going to set locally cached
4447 * node flags to rollback join
4448 * so in case of error, the
4449 * rollback code knows which
4450 * nodes to re-join. rpc.metad
4451 * ignores the RB_JOIN flag.
4455 nd
->nd_flags
&= ~MD_MN_NODE_OWN
;
4458 * Be careful in ordering of
4459 * following steps so that
4460 * recovery from a panic
4461 * between the steps is viable.
4462 * Only reset master info in
4463 * rpc.metad - don't reset
4464 * local cached info which will
4465 * be used to set master info
4466 * back if failure (rollback).
4468 if (clnt_withdrawset(
4469 nd
->nd_nodename
, sp
, ep
))
4473 * Reset master on deleted node
4475 if (clnt_mnsetmaster(node_v
[i
],
4476 sp
, "", MD_MN_INVALID_NID
,
4481 nd
->nd_flags
|= MD_MN_NODE_DEL
;
4482 nd
->nd_flags
&= ~MD_MN_NODE_OK
;
4489 * Now, reset owner and set delete flags for the
4490 * deleted nodes on all nodes.
4492 nd
= sd
->sd_nodelist
;
4494 /* Skip non-ALIVE node if in OHA */
4495 if ((oha
== TRUE
) &&
4496 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
4500 if (clnt_upd_nr_flags(nd
->nd_nodename
, sp
,
4501 sd
->sd_nodelist
, MD_NR_SET
, NULL
, ep
)) {
4507 * Notify rpc.mdcommd on all nodes of a nodelist change.
4508 * Send reinit command to mdcommd which forces it to get
4509 * fresh set description.
4511 if (suspendall_flag
) {
4513 nd
= sd
->sd_nodelist
;
4515 if ((oha
== TRUE
) &&
4516 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
4520 /* Class is ignored for REINIT */
4521 if (clnt_mdcommdctl(nd
->nd_nodename
,
4522 COMMDCTL_REINIT
, sp
, NULL
,
4523 MD_MSCF_NO_FLAGS
, ep
)) {
4524 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
4525 "Unable to reinit rpc.mdcommd.\n"));
4531 nd
= sd
->sd_nodelist
;
4533 if ((oha
== TRUE
) &&
4534 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
4538 if (clnt_mdcommdctl(nd
->nd_nodename
,
4539 COMMDCTL_RESUME
, sp
, MD_MSG_CLASS0
,
4540 MD_MSCF_DONT_RESUME_CLASS1
, ep
)) {
4541 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
4542 "Unable to resume rpc.mdcommd.\n"));
4547 meta_ping_mnset(sp
->setno
);
4553 * Mark the set record MD_SR_DEL on the hosts we are deleting
4554 * If a MN diskset and OHA mode, don't issue RPC to nodes that
4556 * If a MN diskset and not in OHA mode, then all nodes must respond
4557 * to RPC (be alive) or this routine will return failure.
4558 * If a traditional diskset, all RPC failures if in OHA mode.
4560 for (i
= 0; i
< node_c
; i
++) {
4562 RB_TEST(3, "deletehosts", ep
)
4564 if ((MD_MNSET_DESC(sd
)) && (oha
== TRUE
)) {
4566 * During OHA mode, don't issue RPCs to
4567 * non-alive nodes since there is no reason to
4568 * wait for RPC timeouts.
4570 nd
= sd
->sd_nodelist
;
4572 if (strcmp(nd
->nd_nodename
, node_v
[i
]) == 0) {
4578 (void) mddserror(ep
, MDE_DS_NODENOTINSET
,
4579 sp
->setno
, node_v
[i
], NULL
, sp
->setname
);
4581 } else if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
4582 /* Skip non-ALIVE node if in OHA mode */
4585 if (clnt_upd_sr_flags(node_v
[i
], sp
,
4590 } else if ((MD_MNSET_DESC(sd
)) && (oha
== FALSE
)) {
4592 * All nodes should be alive in non-oha mode.
4594 if (clnt_upd_sr_flags(node_v
[i
], sp
, MD_SR_DEL
, ep
)) {
4599 * For traditional diskset, issue the RPC and
4600 * ignore RPC failure if in OHA mode.
4602 if (clnt_upd_sr_flags(node_v
[i
], sp
, MD_SR_DEL
, ep
)) {
4603 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
4611 RB_TEST(4, "deletehosts", ep
)
4614 RB_TEST(5, "deletehosts", ep
)
4617 rb_level
= 2; /* level 2 */
4619 RB_TEST(6, "deletehosts", ep
)
4621 /* Delete the set on the hosts we are deleting */
4622 if (del_set_on_hosts(sp
, sd
, dd
, node_c
, node_v
, oha
, ep
)) {
4626 * Failure during del_set_on_hosts would have recreated
4627 * the diskset on the remote hosts, but for multi-owner
4628 * disksets need to set node flags properly and REINIT and
4629 * RESUME rpc.mdcommd, so just let the rollback code
4632 if (MD_MNSET_DESC(sd
))
4636 remote_sets_deleted
= 1;
4638 RB_TEST(19, "deletehosts", ep
)
4641 rb_level
= 3; /* level 3 */
4643 RB_TEST(20, "deletehosts", ep
)
4645 /* Delete the host from sets on hosts not being deleted */
4646 if (MD_MNSET_DESC(sd
)) {
4647 nd
= sd
->sd_nodelist
;
4648 /* All nodes are guaranteed to be ALIVE unless in oha mode */
4651 * During OHA mode, don't issue RPCs to
4652 * non-alive nodes since there is no reason to
4653 * wait for RPC timeouts.
4655 if ((oha
== TRUE
) &&
4656 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
4661 /* Skip nodes being deleted */
4662 if (strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
4666 if (clnt_delhosts(nd
->nd_nodename
, sp
, node_c
, node_v
,
4671 RB_TEST(21, "deletehosts", ep
)
4675 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
4676 /* Skip empty slots */
4677 if (sd
->sd_nodes
[i
][0] == '\0')
4680 /* Skip nodes being deleted */
4681 if (strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
4684 if (clnt_delhosts(sd
->sd_nodes
[i
], sp
, node_c
, node_v
,
4686 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
4693 RB_TEST(21, "deletehosts", ep
)
4697 /* We have drives */
4699 RB_TEST(22, "deletehosts", ep
)
4702 rb_level
= 4; /* level 4 */
4704 RB_TEST(23, "deletehosts", ep
)
4707 * Delete the old sidename for each drive on all the hosts.
4708 * If a multi-node diskset, each host only stores
4709 * the side information for itself. So, a multi-node
4710 * diskset doesn't delete the old sidename for
4713 * If a MN diskset, reset owners of mirrors that are
4714 * owned by the deleted nodes.
4716 if (!(MD_MNSET_DESC(sd
))) {
4717 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
4718 /* Skip empty slots */
4719 if (sd
->sd_nodes
[i
][0] == '\0')
4722 /* Skip nodes being deleted */
4723 if (strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
4726 if (clnt_del_drv_sidenms(sd
->sd_nodes
[i
], sp
,
4728 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
4732 metaflushsetname(sp
);
4736 RB_TEST(24, "deletehosts", ep
)
4739 nd
= sd
->sd_nodelist
;
4740 /* All nodes guaranteed ALIVE unless in oha mode */
4743 * If mirror owner was set to a deleted node,
4744 * then each existing node resets mirror owner
4747 * During OHA mode, don't issue RPCs to
4748 * non-alive nodes since there is no reason to
4749 * wait for RPC timeouts.
4751 if ((oha
== TRUE
) &&
4752 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
4757 /* Skip nodes being deleted */
4758 if (strinlst(nd
->nd_nodename
, node_c
, node_v
)) {
4764 * If mirror owner is a deleted node, reset
4765 * mirror owners to NULL. If an error occurs,
4766 * print a warning and continue. Don't fail
4767 * metaset because of mirror owner reset
4768 * problem since next node to grab mirror
4769 * will resolve this issue. Before next node
4770 * grabs mirrors, metaset will show the deleted
4771 * node as owner which is why an attempt to
4772 * reset the mirror owner is made.
4774 if (clnt_reset_mirror_owner(nd
->nd_nodename
, sp
,
4775 node_c
, &node_id_list
[0], &xep
) == -1) {
4776 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
4777 "Unable to reset mirror owner on"
4778 " node %s\n"), nd
->nd_nodename
);
4782 RB_TEST(21, "deletehosts", ep
)
4788 RB_TEST(25, "deletehosts", ep
)
4791 rb_level
= 4; /* level 4 */
4793 RB_TEST(26, "deletehosts", ep
)
4796 * Bring the mediator record up to date with the set record for
4797 * traditional diskset.
4799 if (!(MD_MNSET_DESC(sd
))) {
4800 medr
= rb_medr
; /* structure assignment */
4801 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
4802 if (strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
4803 (void) memset(&medr
.med_rec_nodes
[i
],
4804 '\0', sizeof (md_node_nm_t
));
4806 (void) strcpy(medr
.med_rec_nodes
[i
],
4809 crcgen(&medr
, &medr
.med_rec_cks
, sizeof (med_rec_t
), NULL
);
4811 /* Inform the mediator hosts of the new node list */
4812 for (i
= 0; i
< max_meds
; i
++) {
4813 if (sd
->sd_med
.n_lst
[i
].a_cnt
== 0)
4816 if (clnt_med_upd_rec(&sd
->sd_med
.n_lst
[i
], sp
,
4818 if (oha
== TRUE
&& mdanyrpcerror(ep
)) {
4827 RB_TEST(27, "deletehosts", ep
)
4830 * For traditional diskset:
4831 * We are deleting ourselves out of the set and we have drives to
4832 * consider; so we need to halt the set, release the drives and
4833 * reset the timeout. **** THIS IS A ONE WAY TICKET, NO ROLL BACK
4834 * IS POSSIBLE AS SOON AS THE HALT SET COMPLETES, SO THIS IS DONE
4835 * WITH ALL SIGNALS BLOCKED AND LAST ****
4837 * This situation cannot occur in a MN diskset since a node can't
4838 * delete itself unless all nodes are being deleted and a diskset
4839 * cannot contain any drives if all nodes are being deleted.
4840 * So, don't even test for this if a MN diskset.
4842 if (!(MD_MNSET_DESC(sd
)) && (dd
!= NULL
) &&
4843 strinlst(mynode(), node_c
, node_v
)) {
4844 /* Make sure we are blocking all signals */
4845 if (procsigs(TRUE
, &oldsigs
, ep
) < 0) {
4850 if (halt_set(sp
, ep
)) {
4855 if (rel_own_bydd(sp
, dd
, FALSE
, ep
))
4859 /* release signals back to what they were on entry */
4860 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0) {
4862 (void) mdstealerror(ep
, &xep
);
4869 * Unlock diskset by resuming messages across the diskset.
4870 * Just resume all classes so that resume is the same whether
4871 * just one class was locked or all classes were locked.
4873 if ((suspend1_flag
) || (suspendall_flag
)) {
4875 nd
= sd
->sd_nodelist
;
4877 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
4882 * Skip nodes being deleted if remote set
4883 * was deleted since rpc.mdcommd may no longer
4884 * be running on remote node.
4886 if ((remote_sets_deleted
== 1) &&
4887 (strinlst(nd
->nd_nodename
, node_c
, node_v
))) {
4891 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_RESUME
,
4892 sp
, MD_MSG_CLASS0
, MD_MSCF_NO_FLAGS
, &xep
)) {
4894 (void) mdstealerror(ep
, &xep
);
4896 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
4897 "Unable to resume rpc.mdcommd.\n"));
4901 meta_ping_mnset(sp
->setno
);
4904 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
4906 if (MD_MNSET_DESC(sd
)) {
4907 nd
= sd
->sd_nodelist
;
4910 * During OHA mode, don't issue RPCs to
4911 * non-alive nodes since there is no reason to
4912 * wait for RPC timeouts.
4914 if ((oha
== TRUE
) &&
4915 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
4919 if (clnt_unlock_set(nd
->nd_nodename
,
4922 (void) mdstealerror(ep
, &xep
);
4928 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
4929 /* Skip empty slots */
4930 if (sd
->sd_nodes
[i
][0] == '\0')
4933 if (clnt_unlock_set(sd
->sd_nodes
[i
],
4936 mdanyrpcerror(&xep
)) {
4941 (void) mdstealerror(ep
, &xep
);
4947 cl_set_setkey(NULL
);
4950 metafreereplicalist(rlp
);
4954 metaflushsetname(sp
);
4956 if (MD_MNSET_DESC(sd
)) {
4957 /* release signals back to what they were on entry */
4958 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
4961 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
4968 /* all signals already blocked for MN disket */
4969 if (!(MD_MNSET_DESC(sd
))) {
4970 if (procsigs(TRUE
, &oldsigs
, &xep
) < 0)
4976 max_genid
= sd
->sd_genid
;
4980 * Send reinit command to rpc.mdcommd which forces it to get
4981 * fresh set description and resume all classes but class 0.
4982 * Don't send any commands to rpc.mdcommd if set on that node
4985 if (suspendall_flag
) {
4987 nd
= sd
->sd_nodelist
;
4989 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
4994 * If the remote set was deleted, rpc.mdcommd
4995 * may no longer be running so send nothing to it.
4997 if ((remote_sets_deleted
== 1) &&
4998 (strinlst(nd
->nd_nodename
, node_c
, node_v
))) {
5002 /* Class is ignored for REINIT */
5003 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_REINIT
,
5004 sp
, NULL
, MD_MSCF_NO_FLAGS
, &xep
)) {
5005 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
5006 "Unable to reinit rpc.mdcommd.\n"));
5012 nd
= sd
->sd_nodelist
;
5014 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
5019 * If the remote set was deleted, rpc.mdcommd
5020 * may no longer be running so send nothing to it.
5022 if ((remote_sets_deleted
== 1) &&
5023 (strinlst(nd
->nd_nodename
, node_c
, node_v
))) {
5027 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_RESUME
,
5028 sp
, MD_MSG_CLASS0
, MD_MSCF_DONT_RESUME_CLASS1
,
5030 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
5031 "Unable to resume rpc.mdcommd.\n"));
5036 meta_ping_mnset(sp
->setno
);
5042 md_replicalist_t
*rl
;
5044 recreate_set(sp
, sd
);
5047 * Lock out other meta* commands on nodes with the newly
5048 * re-created sets by suspending class 1 messages
5049 * across the diskset.
5051 nd
= sd
->sd_nodelist
;
5053 /* Skip nodes not being deleted */
5054 if (!(strinlst(nd
->nd_nodename
, node_c
, node_v
))) {
5058 /* Suspend commd on nodes with re-created sets */
5059 if (clnt_mdcommdctl(nd
->nd_nodename
,
5060 COMMDCTL_SUSPEND
, sp
, MD_MSG_CLASS1
,
5061 MD_MSCF_NO_FLAGS
, &xep
)) {
5062 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
5063 "Unable to suspend rpc.mdcommd.\n"));
5072 * See if we have to re-add the drives specified.
5074 for (i
= 0; i
< node_c
; i
++) {
5075 if (MD_MNSET_DESC(sd
) && (oha
== TRUE
)) {
5077 * During OHA mode, don't issue RPCs to
5078 * non-alive nodes since there is no reason to
5079 * wait for RPC timeouts.
5081 nd
= sd
->sd_nodelist
;
5083 if (strcmp(nd
->nd_nodename
, node_v
[i
])
5091 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))
5095 /* Don't care if set record is MN or not */
5096 if (clnt_getset(node_v
[i
], sp
->setname
, MD_SET_BAD
, &sr
,
5102 /* Drive already added, skip to next node */
5103 if (sr
->sr_drivechain
!= NULL
) {
5105 * Set record structure was allocated from RPC
5106 * routine getset so this structure is only of
5107 * size md_set_record even if the MN flag is
5108 * set. So, clear the flag so that the free
5109 * code doesn't attempt to free a structure
5110 * the size of md_mnset_record.
5112 sr
->sr_flags
&= ~MD_SR_MN
;
5117 if (clnt_adddrvs(node_v
[i
], sp
, dd
, sr
->sr_ctime
,
5118 sr
->sr_genid
, &xep
) == -1)
5121 if (clnt_upd_dr_flags(node_v
[i
], sp
, dd
, MD_DR_OK
,
5126 * Set record structure was allocated from RPC routine
5127 * getset so this structure is only of size
5128 * md_set_record even if the MN flag is set. So,
5129 * clear the flag so that the free code doesn't
5130 * attempt to free a structure the size of
5133 sr
->sr_flags
&= ~MD_SR_MN
;
5138 for (rl
= rlp
; rl
!= NULL
; rl
= rl
->rl_next
) {
5139 md_replica_t
*r
= rl
->rl_repp
;
5141 * This is not the first replica being added to the
5142 * diskset so call with ADDSIDENMS_BCAST. If this
5143 * is a traditional diskset, the bcast flag is ignored
5144 * since traditional disksets don't use the rpc.mdcommd.
5146 if (meta_db_addsidenms(sp
, r
->r_namep
, r
->r_blkno
,
5147 DB_ADDSIDENMS_BCAST
, &xep
))
5152 * Add the device names for the new sides into the namespace,
5153 * on all hosts not being deleted.
5155 if (MD_MNSET_DESC(sd
)) {
5156 nd
= sd
->sd_nodelist
;
5158 /* Find a node that is not being deleted */
5159 if (!strinlst(nd
->nd_nodename
, node_c
,
5167 for (j
= 0; j
< MD_MAXSIDES
; j
++) {
5168 /* Skip empty slots */
5169 if (sd
->sd_nodes
[j
][0] == '\0')
5172 /* Find a node that is not being deleted */
5173 if (!strinlst(sd
->sd_nodes
[j
], node_c
, node_v
))
5178 if (MD_MNSET_DESC(sd
)) {
5179 nd
= sd
->sd_nodelist
;
5181 /* Skip nodes not being deleted */
5182 if (!strinlst(nd
->nd_nodename
, node_c
,
5188 /* this side was just created, add the names */
5189 if (add_md_sidenms(sp
, nd
->nd_nodeid
, j
, &xep
))
5194 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
5195 /* Skip empty slots */
5196 if (sd
->sd_nodes
[i
][0] == '\0')
5199 /* Skip nodes not being deleted */
5200 if (!strinlst(sd
->sd_nodes
[i
], node_c
, node_v
))
5203 /* this side was just created, add the names */
5204 if (add_md_sidenms(sp
, i
, j
, &xep
))
5211 if (rb_level
> 3 && dd
!= NULL
) {
5213 * Add the new sidename for each drive to all the hosts
5214 * Multi-node disksets only store the sidename for
5215 * that host, so there is nothing to re-add.
5217 if (!(MD_MNSET_DESC(sd
))) {
5218 for (j
= 0; j
< MD_MAXSIDES
; j
++) {
5219 /* Skip empty slots */
5220 if (sd
->sd_nodes
[j
][0] == '\0')
5223 /* Skip nodes not being deleted */
5224 if (!strinlst(sd
->sd_nodes
[j
], node_c
, node_v
))
5227 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
5228 /* Skip empty slots */
5229 if (sd
->sd_nodes
[i
][0] == '\0')
5232 if (clnt_add_drv_sidenms(sd
->sd_nodes
[i
],
5233 sd
->sd_nodes
[j
], sp
, sd
, node_c
, node_v
,
5242 if ((rb_level
> 4) && (!(MD_MNSET_DESC(sd
)))) {
5243 /* rollback the mediator record */
5244 for (i
= 0; i
< max_meds
; i
++) {
5245 if (sd
->sd_med
.n_lst
[i
].a_cnt
== 0)
5248 if (clnt_med_upd_rec(&sd
->sd_med
.n_lst
[i
], sp
,
5257 md_mnset_record
*mnsr
;
5259 if (MD_MNSET_DESC(sd
)) {
5260 nd
= sd
->sd_nodelist
;
5262 * During OHA mode, don't issue RPCs to
5263 * non-alive nodes since there is no reason to
5264 * wait for RPC timeouts.
5267 if ((oha
== TRUE
) &&
5268 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
5272 /* Record should be for a multi-node diskset */
5273 if (clnt_mngetset(nd
->nd_nodename
, sp
->setname
,
5274 MD_SET_BAD
, &mnsr
, &xep
) == -1) {
5282 nr
= mnsr
->sr_nodechain
;
5284 if (nd
->nd_nodeid
== nr
->nr_nodeid
) {
5292 free_sr((struct md_set_record
*)mnsr
);
5298 if (clnt_addhosts(nd
->nd_nodename
, sp
, node_c
,
5299 node_v
, &xep
) == -1)
5305 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
5306 /* Skip empty slots */
5307 if (sd
->sd_nodes
[i
][0] == '\0')
5310 /* Record should be for a non-multi-node set */
5311 if (clnt_getset(sd
->sd_nodes
[i
], sp
->setname
,
5312 MD_SET_BAD
, &sr
, &xep
) == -1) {
5318 * Set record structure was allocated from RPC
5319 * routine getset so this structure is only of
5320 * size md_set_record even if the MN flag is
5321 * set. So, clear the flag so that the free
5322 * code doesn't attempt to free a structure
5323 * the size of md_mnset_record.
5325 if (MD_MNSET_REC(sr
)) {
5326 sr
->sr_flags
&= ~MD_SR_MN
;
5332 for (j
= 0; j
< MD_MAXSIDES
; j
++) {
5333 /* Skip empty slots */
5334 if (sd
->sd_nodes
[j
][0] == '\0')
5337 if (sr
->sr_nodes
[j
][0] == '\0') {
5347 if (clnt_addhosts(sd
->sd_nodes
[i
], sp
, node_c
,
5348 node_v
, &xep
) == -1)
5358 /* Sets MD_SR_OK on given nodes. */
5359 resync_genid(sp
, sd
, max_genid
, node_c
, node_v
);
5363 * On each newly re-added node, set the node record for that
5364 * node to OK. Then set all node records for the newly added
5365 * nodes on all nodes to ok.
5367 * By setting a node's own node record to ok first, even if
5368 * the node re-adding the hosts panics, the rest of the nodes
5369 * can determine the same node list during the choosing of the
5370 * master during reconfig. So, only nodes considered for
5371 * mastership are nodes that have both MD_MN_NODE_OK and
5372 * MD_SR_OK set on that node's rpc.metad. If all nodes have
5373 * MD_SR_OK set, but no node has its own MD_MN_NODE_OK set,
5374 * then the set will be removed during reconfig since a panic
5375 * occurred during the re-creation of the deletion of
5376 * the initial diskset.
5378 if (MD_MNSET_DESC(sd
)) {
5379 md_mnnode_desc
*saved_nd_next
;
5382 * Notify rpc.mdcommd on all nodes of a
5383 * nodelist change. Start by suspending
5384 * rpc.mdcommd (which drains it of all
5385 * messages), then change the nodelist
5386 * followed by a reinit and resume.
5388 nd
= sd
->sd_nodelist
;
5390 if (!(nd
->nd_flags
&
5391 MD_MN_NODE_ALIVE
)) {
5395 if (clnt_mdcommdctl(nd
->nd_nodename
,
5396 COMMDCTL_SUSPEND
, sp
,
5398 MD_MSCF_NO_FLAGS
, &xep
)) {
5400 dgettext(TEXT_DOMAIN
,
5401 "Unable to suspend "
5405 suspendall_flag_rb
= 1;
5409 for (i
= 0; i
< node_c
; i
++) {
5411 * During OHA mode, don't issue RPCs to
5412 * non-alive nodes since there is no reason to
5413 * wait for RPC timeouts.
5415 nd
= sd
->sd_nodelist
;
5417 if (strcmp(nd
->nd_nodename
, node_v
[i
])
5422 /* Something wrong, finish this in next loop */
5426 if ((oha
== TRUE
) &&
5427 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
5432 /* Set master on re-joining node. */
5433 if (clnt_mnsetmaster(node_v
[i
], sp
,
5434 sd
->sd_mn_master_nodenm
,
5435 sd
->sd_mn_master_nodeid
, &xep
)) {
5440 * Re-join set to same state as
5441 * before - stale or non-stale.
5443 if (clnt_joinset(node_v
[i
], sp
,
5444 stale_flag
, &xep
)) {
5449 /* Only changing my local cache of node list */
5450 saved_nd_next
= nd
->nd_next
;
5453 /* Set record for host to ok on that host */
5454 if (clnt_upd_nr_flags(node_v
[i
], sp
,
5455 nd
, MD_NR_OK
, NULL
, &xep
)) {
5458 nd
->nd_next
= saved_nd_next
;
5461 /* Now set all node records on all nodes to be ok */
5462 nd
= sd
->sd_nodelist
;
5465 * During OHA mode, don't issue RPCs to
5466 * non-alive nodes since there is no reason to
5467 * wait for RPC timeouts.
5469 if ((oha
== TRUE
) &&
5470 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
5474 if (clnt_upd_nr_flags(nd
->nd_nodename
, sp
,
5475 sd
->sd_nodelist
, MD_NR_OK
, NULL
, &xep
)) {
5484 * Notify rpc.mdcommd on all nodes of a nodelist change.
5485 * Send reinit command to mdcommd which forces it to get
5486 * fresh set description.
5488 if (suspendall_flag_rb
) {
5490 nd
= sd
->sd_nodelist
;
5492 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
5497 /* Class is ignored for REINIT */
5498 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_REINIT
,
5499 sp
, NULL
, MD_MSCF_NO_FLAGS
, &xep
)) {
5500 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
5501 "Unable to reinit rpc.mdcommd.\n"));
5509 * Unlock diskset by resuming messages across the diskset.
5510 * Just resume all classes so that resume is the same whether
5511 * just one class was locked or all classes were locked.
5513 if ((suspend1_flag
) || (suspendall_flag
) || (suspendall_flag_rb
)) {
5515 nd
= sd
->sd_nodelist
;
5517 if (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
)) {
5521 if (clnt_mdcommdctl(nd
->nd_nodename
, COMMDCTL_RESUME
,
5522 sp
, MD_MSG_CLASS0
, MD_MSCF_NO_FLAGS
, &xep
)) {
5523 mde_perror(&xep
, dgettext(TEXT_DOMAIN
,
5524 "Unable to resume rpc.mdcommd.\n"));
5528 meta_ping_mnset(sp
->setno
);
5532 * Start a resync thread on the re-added nodes
5533 * if set is not stale. Also start a thread to update the
5534 * abr state of all soft partitions
5536 if (stale_flag
!= MNSET_IS_STALE
) {
5537 for (i
= 0; i
< node_c
; i
++) {
5539 * During OHA mode, don't issue RPCs to
5540 * non-alive nodes since there is no reason to
5541 * wait for RPC timeouts.
5543 nd
= sd
->sd_nodelist
;
5545 if (strcmp(nd
->nd_nodename
, node_v
[i
])
5553 if ((oha
== TRUE
) &&
5554 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
5559 if (clnt_mn_mirror_resync_all(node_v
[i
],
5561 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
5562 "Unable to start resync "
5565 if (clnt_mn_sp_update_abr(node_v
[i
],
5567 mde_perror(ep
, dgettext(TEXT_DOMAIN
,
5568 "Unable to start sp update "
5576 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
5577 /* Don't test lock flag since guaranteed to be set if in rollback */
5578 if (MD_MNSET_DESC(sd
)) {
5579 nd
= sd
->sd_nodelist
;
5582 * During OHA mode, don't issue RPCs to
5583 * non-alive nodes since there is no reason to
5584 * wait for RPC timeouts.
5586 if ((oha
== TRUE
) &&
5587 (!(nd
->nd_flags
& MD_MN_NODE_ALIVE
))) {
5591 if (clnt_unlock_set(nd
->nd_nodename
, cl_sk
, &xep
))
5596 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
5597 /* Skip empty slots */
5598 if (sd
->sd_nodes
[i
][0] == '\0')
5601 if (clnt_unlock_set(sd
->sd_nodes
[i
], cl_sk
, &xep
))
5605 cl_set_setkey(NULL
);
5607 /* release signals back to what they were on entry */
5608 if (procsigs(FALSE
, &oldsigs
, &xep
) < 0)
5611 metafreereplicalist(rlp
);
5615 metaflushsetname(sp
);
5617 if (!(MD_MNSET_DESC(sd
))) {
5618 md_rb_sig_handling_off(md_got_sig(), md_which_sig());
5635 md_error_t xep
= mdnullerror
;
5639 if ((sd
= metaget_setdesc(sp
, ep
)) == NULL
)
5642 /* Make sure we own the set */
5643 if (meta_check_ownership(sp
, ep
) != 0)
5646 hostname
= mynode();
5648 /* Lock the set on our side */
5649 if (clnt_lock_set(hostname
, sp
, ep
)) {
5655 /* enable auto_take but only if it is not already set */
5656 if (! (sd
->sd_flags
& MD_SR_AUTO_TAKE
)) {
5657 /* verify that we're the only host in the set */
5658 for (i
= 0; i
< MD_MAXSIDES
; i
++) {
5659 if (sd
->sd_nodes
[i
] == NULL
||
5660 sd
->sd_nodes
[i
][0] == '\0')
5663 if (strcmp(sd
->sd_nodes
[i
], hostname
) != 0) {
5664 (void) mddserror(ep
, MDE_DS_SINGLEHOST
,
5665 sp
->setno
, NULL
, NULL
, sp
->setname
);
5671 if (clnt_enable_sr_flags(hostname
, sp
,
5672 MD_SR_AUTO_TAKE
, ep
))
5675 /* Disable SCSI reservations */
5676 if (sd
->sd_flags
& MD_SR_MB_DEVID
)
5677 dd
= metaget_drivedesc(sp
, MD_BASICNAME_OK
|
5680 dd
= metaget_drivedesc(sp
, MD_BASICNAME_OK
,
5687 if (rel_own_bydd(sp
, dd
, TRUE
, &xep
))
5693 /* disable auto_take, if set, or error */
5694 if (sd
->sd_flags
& MD_SR_AUTO_TAKE
) {
5695 if (clnt_disable_sr_flags(hostname
, sp
,
5696 MD_SR_AUTO_TAKE
, ep
))
5699 /* Enable SCSI reservations */
5700 if (sd
->sd_flags
& MD_SR_MB_DEVID
)
5701 dd
= metaget_drivedesc(sp
, MD_BASICNAME_OK
|
5704 dd
= metaget_drivedesc(sp
, MD_BASICNAME_OK
,
5711 mhd_mhiargs_t mhiargs
= defmhiargs
;
5713 if (tk_own_bydd(sp
, dd
, &mhiargs
, TRUE
, &xep
))
5717 (void) mddserror(ep
, MDE_DS_AUTONOTSET
, sp
->setno
,
5718 NULL
, NULL
, sp
->setname
);
5724 cl_sk
= cl_get_setkey(sp
->setno
, sp
->setname
);
5725 if (clnt_unlock_set(hostname
, cl_sk
, &xep
)) {
5727 (void) mdstealerror(ep
, &xep
);
5730 cl_set_setkey(NULL
);