7712 mandoc -Tlint does always exit with error code 0
[unleashed.git] / usr / src / uts / common / io / lvm / raid / raid_hotspare.c
blobe3363750c1811e413d4ecbb38fcb58d647e0dc91
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
22 #pragma ident "%Z%%M% %I% %E% SMI"
25 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
30 * NAME: raid_hotspare.c
31 * DESCRIPTION: RAID driver source file containing routines related to
32 * hospare operation.
33 * ROUTINES PROVIDED FOR EXTERNAL USE:
34 * raid_hs_release() - release a hotspare device
35 * raid_hotspares() - prompt the hospare daemon to attempt needed hotspare work
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/conf.h>
41 #include <sys/file.h>
42 #include <sys/user.h>
43 #include <sys/uio.h>
44 #include <sys/t_lock.h>
45 #include <sys/buf.h>
46 #include <sys/dkio.h>
47 #include <sys/vtoc.h>
48 #include <sys/kmem.h>
49 #include <vm/page.h>
50 #include <sys/sysmacros.h>
51 #include <sys/types.h>
52 #include <sys/mkdev.h>
53 #include <sys/stat.h>
54 #include <sys/open.h>
55 #include <sys/lvm/md_raid.h>
56 #include <sys/modctl.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/debug.h>
61 #include <sys/sysevent/eventdefs.h>
62 #include <sys/sysevent/svm.h>
64 extern mdq_anchor_t md_hs_daemon;
65 static daemon_request_t hotspare_request;
67 extern md_set_t md_set[];
68 extern md_ops_t raid_md_ops;
71 * NAME: raid_hs_release
73 * DESCRIPTION: Release the hotspare.
75 * PARAMETERS: int error - indication of error on hotspare
76 * mr_unit_t *un - raid unit
77 * mddb_recid_t *recids - output records to commit revised hs info
78 * int hs_index - component to release
80 * LOCKS: Expects Unit Writer Lock to be held across call.
82 void
83 raid_hs_release(
84 hs_cmds_t cmd,
85 mr_unit_t *un,
86 mddb_recid_t *recids,
87 int hs_index
90 mr_column_t *col;
92 col = &un->un_column[hs_index];
94 /* close the hotspare device */
95 if (col->un_devflags & MD_RAID_DEV_ISOPEN) {
96 md_layered_close(col->un_dev, MD_OFLG_NULL);
97 col->un_devflags &= ~MD_RAID_DEV_ISOPEN;
100 /* return the hotspare to the pool */
101 (void) md_hot_spare_ifc(cmd, un->un_hsp_id, 0, 0, recids,
102 &col->un_hs_key, NULL, NULL);
104 col->un_hs_pwstart = 0;
105 col->un_hs_devstart = 0;
106 col->un_hs_id = (mddb_recid_t)0;
107 col->un_hs_key = 0;
112 * NAME: check_comp_4_hs
114 * DESCRIPTION: Check whether the input component has an error and can be
115 * backed with a hot spare (RCS_ERRED state), and initiate
116 * a resync if so.
118 * PARAMETERS: mr_unit_t *un - raid unit
119 * int hs_index - component to check
121 * LOCKS: Expects Unit Writer Lock to be held upon entrance. Releases
122 * the lock prior to calling raid_resync_unit, then reacquires
123 * it before returning.
125 static void
126 check_comp_4_hs(
127 mr_unit_t *un,
128 int hs_index
131 mddb_recid_t recids[3];
132 minor_t mnum = MD_SID(un);
133 mdi_unit_t *ui;
134 rcs_state_t state;
135 diskaddr_t size;
136 int err;
137 mr_column_t *col;
138 md_error_t mde = mdnullerror;
139 char devname[MD_MAX_CTDLEN];
140 char hs_devname[MD_MAX_CTDLEN];
141 set_t setno;
142 md_dev64_t tmpdev;
143 diskaddr_t tmpdaddr;
146 /* initialize */
147 setno = MD_UN2SET(un);
148 ui = MDI_UNIT(mnum);
149 md_unit_readerexit(ui);
150 (void) md_io_writerlock(ui);
151 un = (mr_unit_t *)md_unit_writerlock(ui);
152 col = &un->un_column[hs_index];
155 * add a hotspare for erred column only if not resyncing
157 if ((!(COLUMN_STATE(un, hs_index) & RCS_ERRED)) ||
158 (raid_state_cnt(un, (RCS_ERRED | RCS_LAST_ERRED)) != 1) ||
159 (raid_state_cnt(un, RCS_RESYNC) > 0)) {
160 goto errout;
163 recids[0] = 0;
164 recids[1] = 0;
165 /* if there is already a hotspare then just return */
166 if (HOTSPARED(un, hs_index) && (col->un_devstate & RCS_ERRED)) {
167 raid_hs_release(HS_BAD, un, &recids[0], hs_index);
168 cmn_err(CE_WARN, "md: %s: %s hotspare errored and released",
169 md_shortname(mnum),
170 md_devname(MD_MIN2SET(mnum), col->un_dev, NULL, 0));
171 col->un_dev = col->un_orig_dev;
172 col->un_pwstart = col->un_orig_pwstart;
173 col->un_devstart = col->un_orig_devstart;
174 raid_commit(un, recids);
176 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_METADEVICE,
177 setno, MD_SID(un));
179 ASSERT(!HOTSPARED(un, hs_index));
181 state = col->un_devstate;
182 size = col->un_pwstart + un->un_pwsize +
183 (un->un_segsize * un->un_segsincolumn);
185 again:
186 /* quit if resync is already active */
187 col->un_devflags |= MD_RAID_REGEN_RESYNC;
188 if (resync_request(mnum, hs_index, 0, NULL))
189 goto errout;
191 recids[0] = 0;
192 recids[1] = 0;
194 tmpdev = col->un_dev;
195 tmpdaddr = col->un_hs_pwstart;
197 /* get a hotspare */
198 if (md_hot_spare_ifc(HS_GET, un->un_hsp_id, size,
199 ((col->un_orig_pwstart >= 1) &&
200 (col->un_orig_pwstart != MD_DISKADDR_ERROR)),
201 &col->un_hs_id, &col->un_hs_key, &tmpdev, &tmpdaddr) != 0) {
202 col->un_dev = tmpdev;
203 col->un_hs_pwstart = tmpdaddr;
204 release_resync_request(mnum);
205 raid_set_state(un, hs_index, state, 1);
206 goto errout;
209 col->un_hs_pwstart = tmpdaddr;
212 * record id is filled in by raid_commit, recids[0] filled in by
213 * md_hot_spare_ifc if needed
215 recids[0] = col->un_hs_id;
216 recids[1] = 0;
219 * close the device and open the hot spare. The device should
220 * never be a hotspare here.
222 if (col->un_devflags & MD_RAID_DEV_ISOPEN) {
223 md_layered_close(col->un_orig_dev, MD_OFLG_NULL);
224 col->un_devflags &= ~MD_RAID_DEV_ISOPEN;
227 * Try open by device id
229 tmpdev = md_resolve_bydevid(mnum, tmpdev, col->un_hs_key);
230 if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) {
231 md_dev64_t hs_dev = tmpdev;
232 /* cannot open return to orig */
233 raid_hs_release(HS_BAD, un, &recids[0], hs_index);
234 release_resync_request(mnum);
235 raid_set_state(un, hs_index, state, 1);
236 col->un_dev = col->un_orig_dev;
237 col->un_devstart = col->un_orig_devstart;
238 col->un_pwstart = col->un_orig_pwstart;
239 col->un_devflags &= ~MD_RAID_DEV_ISOPEN;
240 raid_commit(un, recids);
241 cmn_err(CE_WARN, "md: %s: open error of hotspare %s",
242 md_shortname(mnum),
243 md_devname(MD_MIN2SET(mnum), hs_dev, NULL, 0));
244 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS, setno,
245 MD_SID(un));
246 goto again;
249 col->un_dev = tmpdev;
251 col->un_devflags |= MD_RAID_DEV_ISOPEN;
254 * move the values into the device fields. Since in some cases
255 * the pwstart is not zero this must be added into the start of
256 * the hotspare to avoid over writting the label
258 col->un_hs_pwstart += col->un_orig_pwstart;
259 col->un_pwstart = col->un_hs_pwstart;
260 col->un_hs_devstart = col->un_hs_pwstart + un->un_pwsize;
261 col->un_devstart = col->un_hs_devstart;
263 /* commit unit and hotspare records and release lock */
264 raid_commit(un, recids);
265 md_unit_writerexit(ui);
266 md_io_writerexit(ui);
268 err = raid_resync_unit(mnum, &mde);
270 /* if resync fails, transition back to erred state and reset */
271 if (err) {
272 /* reaquire unit writerr lock */
273 un = (mr_unit_t *)md_unit_writerlock(ui);
275 raid_set_state(un, hs_index, RCS_ERRED, 0);
278 * close the hotspare and return it. Then restore the
279 * original device back to the original state
281 raid_hs_release(HS_FREE, un, &recids[0], hs_index);
282 col->un_dev = col->un_orig_dev;
283 col->un_devstart = col->un_orig_devstart;
284 col->un_pwstart = col->un_orig_pwstart;
285 raid_commit(un, recids);
286 md_unit_writerexit(ui);
287 un = (mr_unit_t *)md_unit_readerlock(ui);
288 return;
291 setno = MD_MIN2SET(mnum);
293 (void) md_devname(setno, col->un_orig_dev, devname,
294 sizeof (devname));
295 (void) md_devname(setno, col->un_dev, hs_devname,
296 sizeof (hs_devname));
298 cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s",
299 md_shortname(mnum), devname, hs_devname);
300 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HOTSPARED, SVM_TAG_HS, setno,
301 MD_SID(un));
302 (void) md_unit_readerlock(ui);
303 return;
305 errout:
306 md_unit_writerexit(ui);
307 md_io_writerexit(ui);
308 un = (mr_unit_t *)md_unit_readerlock(ui);
312 * NAME: check_4_hs
314 * DESCRIPTION: Check every component of every raid unit for any device which
315 * needs to be backed with a hot spare.
317 * PARAMETERS: daemon_request_t *dr - hotspare request daemon
319 * LOCKS: Acquires and releases the Hotspare Request Lock and the RAID
320 * Driver Lock. Acquires the Unit Writer Lock which is released
321 * in check_comp_4_hs.
323 static void
324 check_4_hs(daemon_request_t *dr)
326 mdi_unit_t *ui;
327 mr_unit_t *un;
328 md_link_t *next;
329 int i;
331 mutex_enter(&dr->dr_mx); /* clear up front so can poke */
332 dr->dr_pending = 0; /* again in low level routine if */
333 mutex_exit(&dr->dr_mx); /* something found to do */
336 * Scan raid unit list and call component hotspare check routine for
337 * each component of each unit where resync is inactive.
339 rw_enter(&raid_md_ops.md_link_rw.lock, RW_READER);
340 for (next = raid_md_ops.md_head; next != NULL; next = next->ln_next) {
341 ui = MDI_UNIT(next->ln_id);
342 un = (mr_unit_t *)md_unit_readerlock(ui);
343 if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE) &&
344 (raid_state_cnt(un, RCS_RESYNC) == 0) &&
345 (UNIT_STATE(un) & RUS_ERRED) &&
346 (un->un_hsp_id != -1) &&
347 (raid_state_cnt(un, RCS_ERRED) == 1)) {
348 for (i = 0; i < un->un_totalcolumncnt; i++)
349 if (un->un_column[i].un_devstate == RCS_ERRED)
350 check_comp_4_hs(un, i);
352 md_unit_readerexit(ui);
354 rw_exit(&raid_md_ops.md_link_rw.lock);
358 * NAME: raid_hotspares
360 * DESCRIPTION: Initiate a check of all RAID devices for components which
361 * may require a hot spare, if it is not already running.
363 * PARAMETERS: NONE
365 * LOCKS: Acquires and releases the Hotspare Request Lock.
367 intptr_t
368 raid_hotspares()
370 /* if available, make request for hotspare to master daemon */
371 mutex_enter(&hotspare_request.dr_mx);
372 if (hotspare_request.dr_pending == 0) {
373 hotspare_request.dr_pending = 1;
374 daemon_request(&md_hs_daemon,
375 check_4_hs, (daemon_queue_t *)&hotspare_request, REQ_OLD);
377 mutex_exit(&hotspare_request.dr_mx);
378 return (0);