4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
22 #pragma ident "%Z%%M% %I% %E% SMI"
25 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
30 * NAME: raid_hotspare.c
31 * DESCRIPTION: RAID driver source file containing routines related to
33 * ROUTINES PROVIDED FOR EXTERNAL USE:
34 * raid_hs_release() - release a hotspare device
35 * raid_hotspares() - prompt the hospare daemon to attempt needed hotspare work
38 #include <sys/param.h>
39 #include <sys/systm.h>
44 #include <sys/t_lock.h>
50 #include <sys/sysmacros.h>
51 #include <sys/types.h>
52 #include <sys/mkdev.h>
55 #include <sys/lvm/md_raid.h>
56 #include <sys/modctl.h>
58 #include <sys/sunddi.h>
59 #include <sys/debug.h>
61 #include <sys/sysevent/eventdefs.h>
62 #include <sys/sysevent/svm.h>
64 extern mdq_anchor_t md_hs_daemon
;
65 static daemon_request_t hotspare_request
;
67 extern md_set_t md_set
[];
68 extern md_ops_t raid_md_ops
;
71 * NAME: raid_hs_release
73 * DESCRIPTION: Release the hotspare.
75 * PARAMETERS: int error - indication of error on hotspare
76 * mr_unit_t *un - raid unit
77 * mddb_recid_t *recids - output records to commit revised hs info
78 * int hs_index - component to release
80 * LOCKS: Expects Unit Writer Lock to be held across call.
92 col
= &un
->un_column
[hs_index
];
94 /* close the hotspare device */
95 if (col
->un_devflags
& MD_RAID_DEV_ISOPEN
) {
96 md_layered_close(col
->un_dev
, MD_OFLG_NULL
);
97 col
->un_devflags
&= ~MD_RAID_DEV_ISOPEN
;
100 /* return the hotspare to the pool */
101 (void) md_hot_spare_ifc(cmd
, un
->un_hsp_id
, 0, 0, recids
,
102 &col
->un_hs_key
, NULL
, NULL
);
104 col
->un_hs_pwstart
= 0;
105 col
->un_hs_devstart
= 0;
106 col
->un_hs_id
= (mddb_recid_t
)0;
112 * NAME: check_comp_4_hs
114 * DESCRIPTION: Check whether the input component has an error and can be
115 * backed with a hot spare (RCS_ERRED state), and initiate
118 * PARAMETERS: mr_unit_t *un - raid unit
119 * int hs_index - component to check
121 * LOCKS: Expects Unit Writer Lock to be held upon entrance. Releases
122 * the lock prior to calling raid_resync_unit, then reacquires
123 * it before returning.
131 mddb_recid_t recids
[3];
132 minor_t mnum
= MD_SID(un
);
138 md_error_t mde
= mdnullerror
;
139 char devname
[MD_MAX_CTDLEN
];
140 char hs_devname
[MD_MAX_CTDLEN
];
147 setno
= MD_UN2SET(un
);
149 md_unit_readerexit(ui
);
150 (void) md_io_writerlock(ui
);
151 un
= (mr_unit_t
*)md_unit_writerlock(ui
);
152 col
= &un
->un_column
[hs_index
];
155 * add a hotspare for erred column only if not resyncing
157 if ((!(COLUMN_STATE(un
, hs_index
) & RCS_ERRED
)) ||
158 (raid_state_cnt(un
, (RCS_ERRED
| RCS_LAST_ERRED
)) != 1) ||
159 (raid_state_cnt(un
, RCS_RESYNC
) > 0)) {
165 /* if there is already a hotspare then just return */
166 if (HOTSPARED(un
, hs_index
) && (col
->un_devstate
& RCS_ERRED
)) {
167 raid_hs_release(HS_BAD
, un
, &recids
[0], hs_index
);
168 cmn_err(CE_WARN
, "md: %s: %s hotspare errored and released",
170 md_devname(MD_MIN2SET(mnum
), col
->un_dev
, NULL
, 0));
171 col
->un_dev
= col
->un_orig_dev
;
172 col
->un_pwstart
= col
->un_orig_pwstart
;
173 col
->un_devstart
= col
->un_orig_devstart
;
174 raid_commit(un
, recids
);
176 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_HS_FREED
, SVM_TAG_METADEVICE
,
179 ASSERT(!HOTSPARED(un
, hs_index
));
181 state
= col
->un_devstate
;
182 size
= col
->un_pwstart
+ un
->un_pwsize
+
183 (un
->un_segsize
* un
->un_segsincolumn
);
186 /* quit if resync is already active */
187 col
->un_devflags
|= MD_RAID_REGEN_RESYNC
;
188 if (resync_request(mnum
, hs_index
, 0, NULL
))
194 tmpdev
= col
->un_dev
;
195 tmpdaddr
= col
->un_hs_pwstart
;
198 if (md_hot_spare_ifc(HS_GET
, un
->un_hsp_id
, size
,
199 ((col
->un_orig_pwstart
>= 1) &&
200 (col
->un_orig_pwstart
!= MD_DISKADDR_ERROR
)),
201 &col
->un_hs_id
, &col
->un_hs_key
, &tmpdev
, &tmpdaddr
) != 0) {
202 col
->un_dev
= tmpdev
;
203 col
->un_hs_pwstart
= tmpdaddr
;
204 release_resync_request(mnum
);
205 raid_set_state(un
, hs_index
, state
, 1);
209 col
->un_hs_pwstart
= tmpdaddr
;
212 * record id is filled in by raid_commit, recids[0] filled in by
213 * md_hot_spare_ifc if needed
215 recids
[0] = col
->un_hs_id
;
219 * close the device and open the hot spare. The device should
220 * never be a hotspare here.
222 if (col
->un_devflags
& MD_RAID_DEV_ISOPEN
) {
223 md_layered_close(col
->un_orig_dev
, MD_OFLG_NULL
);
224 col
->un_devflags
&= ~MD_RAID_DEV_ISOPEN
;
227 * Try open by device id
229 tmpdev
= md_resolve_bydevid(mnum
, tmpdev
, col
->un_hs_key
);
230 if (md_layered_open(mnum
, &tmpdev
, MD_OFLG_NULL
)) {
231 md_dev64_t hs_dev
= tmpdev
;
232 /* cannot open return to orig */
233 raid_hs_release(HS_BAD
, un
, &recids
[0], hs_index
);
234 release_resync_request(mnum
);
235 raid_set_state(un
, hs_index
, state
, 1);
236 col
->un_dev
= col
->un_orig_dev
;
237 col
->un_devstart
= col
->un_orig_devstart
;
238 col
->un_pwstart
= col
->un_orig_pwstart
;
239 col
->un_devflags
&= ~MD_RAID_DEV_ISOPEN
;
240 raid_commit(un
, recids
);
241 cmn_err(CE_WARN
, "md: %s: open error of hotspare %s",
243 md_devname(MD_MIN2SET(mnum
), hs_dev
, NULL
, 0));
244 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_HS_FREED
, SVM_TAG_HS
, setno
,
249 col
->un_dev
= tmpdev
;
251 col
->un_devflags
|= MD_RAID_DEV_ISOPEN
;
254 * move the values into the device fields. Since in some cases
255 * the pwstart is not zero this must be added into the start of
256 * the hotspare to avoid over writting the label
258 col
->un_hs_pwstart
+= col
->un_orig_pwstart
;
259 col
->un_pwstart
= col
->un_hs_pwstart
;
260 col
->un_hs_devstart
= col
->un_hs_pwstart
+ un
->un_pwsize
;
261 col
->un_devstart
= col
->un_hs_devstart
;
263 /* commit unit and hotspare records and release lock */
264 raid_commit(un
, recids
);
265 md_unit_writerexit(ui
);
266 md_io_writerexit(ui
);
268 err
= raid_resync_unit(mnum
, &mde
);
270 /* if resync fails, transition back to erred state and reset */
272 /* reaquire unit writerr lock */
273 un
= (mr_unit_t
*)md_unit_writerlock(ui
);
275 raid_set_state(un
, hs_index
, RCS_ERRED
, 0);
278 * close the hotspare and return it. Then restore the
279 * original device back to the original state
281 raid_hs_release(HS_FREE
, un
, &recids
[0], hs_index
);
282 col
->un_dev
= col
->un_orig_dev
;
283 col
->un_devstart
= col
->un_orig_devstart
;
284 col
->un_pwstart
= col
->un_orig_pwstart
;
285 raid_commit(un
, recids
);
286 md_unit_writerexit(ui
);
287 un
= (mr_unit_t
*)md_unit_readerlock(ui
);
291 setno
= MD_MIN2SET(mnum
);
293 (void) md_devname(setno
, col
->un_orig_dev
, devname
,
295 (void) md_devname(setno
, col
->un_dev
, hs_devname
,
296 sizeof (hs_devname
));
298 cmn_err(CE_NOTE
, "md: %s: hotspared device %s with %s",
299 md_shortname(mnum
), devname
, hs_devname
);
300 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_HOTSPARED
, SVM_TAG_HS
, setno
,
302 (void) md_unit_readerlock(ui
);
306 md_unit_writerexit(ui
);
307 md_io_writerexit(ui
);
308 un
= (mr_unit_t
*)md_unit_readerlock(ui
);
314 * DESCRIPTION: Check every component of every raid unit for any device which
315 * needs to be backed with a hot spare.
317 * PARAMETERS: daemon_request_t *dr - hotspare request daemon
319 * LOCKS: Acquires and releases the Hotspare Request Lock and the RAID
320 * Driver Lock. Acquires the Unit Writer Lock which is released
321 * in check_comp_4_hs.
324 check_4_hs(daemon_request_t
*dr
)
331 mutex_enter(&dr
->dr_mx
); /* clear up front so can poke */
332 dr
->dr_pending
= 0; /* again in low level routine if */
333 mutex_exit(&dr
->dr_mx
); /* something found to do */
336 * Scan raid unit list and call component hotspare check routine for
337 * each component of each unit where resync is inactive.
339 rw_enter(&raid_md_ops
.md_link_rw
.lock
, RW_READER
);
340 for (next
= raid_md_ops
.md_head
; next
!= NULL
; next
= next
->ln_next
) {
341 ui
= MDI_UNIT(next
->ln_id
);
342 un
= (mr_unit_t
*)md_unit_readerlock(ui
);
343 if (!(un
->c
.un_status
& MD_UN_RESYNC_ACTIVE
) &&
344 (raid_state_cnt(un
, RCS_RESYNC
) == 0) &&
345 (UNIT_STATE(un
) & RUS_ERRED
) &&
346 (un
->un_hsp_id
!= -1) &&
347 (raid_state_cnt(un
, RCS_ERRED
) == 1)) {
348 for (i
= 0; i
< un
->un_totalcolumncnt
; i
++)
349 if (un
->un_column
[i
].un_devstate
== RCS_ERRED
)
350 check_comp_4_hs(un
, i
);
352 md_unit_readerexit(ui
);
354 rw_exit(&raid_md_ops
.md_link_rw
.lock
);
358 * NAME: raid_hotspares
360 * DESCRIPTION: Initiate a check of all RAID devices for components which
361 * may require a hot spare, if it is not already running.
365 * LOCKS: Acquires and releases the Hotspare Request Lock.
370 /* if available, make request for hotspare to master daemon */
371 mutex_enter(&hotspare_request
.dr_mx
);
372 if (hotspare_request
.dr_pending
== 0) {
373 hotspare_request
.dr_pending
= 1;
374 daemon_request(&md_hs_daemon
,
375 check_4_hs
, (daemon_queue_t
*)&hotspare_request
, REQ_OLD
);
377 mutex_exit(&hotspare_request
.dr_mx
);