4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
31 * DESCRIPTION: RAID driver source file containing routines related to resync
33 * ROUTINES PROVIDED FOR EXTERNAL USE:
34 * resync_request() - get resync lock if available
35 * release_resync_request() - relinquish resync lock
36 * erred_check_line() - provide write instruction for erred column
37 * init_pw_area() - initialize pre-write area
38 * copy_pw_area() - copy pre-write area from one device to another
41 #include <sys/param.h>
42 #include <sys/systm.h>
47 #include <sys/t_lock.h>
53 #include <sys/sysmacros.h>
54 #include <sys/types.h>
55 #include <sys/mkdev.h>
59 #include <sys/modctl.h>
61 #include <sys/sunddi.h>
62 #include <sys/lvm/md_raid.h>
64 #include <sys/sysevent/eventdefs.h>
65 #include <sys/sysevent/svm.h>
69 extern md_set_t md_set
[];
70 extern kmem_cache_t
*raid_child_cache
;
71 extern kmem_cache_t
*raid_parent_cache
;
72 extern md_resync_t md_cpr_resync
;
73 extern major_t md_major
;
74 extern void raid_parent_init(md_raidps_t
*ps
);
75 extern void raid_child_init(md_raidcs_t
*ps
);
79 * DESCRIPTION: Xor two chunks of data together. The data referenced by
80 * addr1 and addr2 are xor'd together for size and written into
82 * PARAMETERS: caddr_t addr1 - address of first chunk of data and destination
83 * caddr_t addr2 - address of second chunk of data
84 * u_int size - number to xor
87 xor(caddr_t addr1
, caddr_t addr2
, size_t size
)
95 * NAME: release_resync_request
97 * DESCRIPTION: Release resync active flag and reset unit values accordingly.
99 * PARAMETERS: minor_t mnum - minor number identity of metadevice
101 * LOCKS: Expects Unit Writer Lock to be held across call.
104 release_resync_request(
113 un
->c
.un_status
&= ~MD_UN_RESYNC_ACTIVE
;
115 un
->un_column
[un
->un_resync_index
].un_devflags
&= ~MD_RAID_RESYNC
;
116 un
->un_column
[un
->un_resync_index
].un_devflags
&= ~MD_RAID_RESYNC_ERRED
;
117 un
->un_column
[un
->un_resync_index
].un_devflags
&=
118 ~(MD_RAID_COPY_RESYNC
| MD_RAID_REGEN_RESYNC
);
120 un
->un_resync_line_index
= 0;
121 un
->un_resync_index
= NOCOLUMN
;
125 * NAME: resync_request
127 * DESCRIPTION: Request resync. If resync is available (no current active
128 * resync), mark unit as resync active and initialize.
130 * PARAMETERS: minor_t mnum - minor number identity of metadevice
131 * int column_index - index of column to resync
132 * int copysize - copysize of ioctl request
133 * md_error_t *ep - error output parameter
135 * RETURN: 0 if resync is available, 1 otherwise.
137 * LOCKS: Expects Unit Writer Lock to be held across call.
139 * NOTE: Sets un_resync_copysize to the input value in copysize, the
140 * existing value from an incomplete previous resync with an
141 * input value in copysize, or the lesser of the unit segment
158 /* if resync or grow not already active, set resync active for unit */
159 if (! (un
->un_column
[column_index
].un_devflags
& MD_RAID_RESYNC
) &&
160 ((un
->c
.un_status
& MD_UN_RESYNC_ACTIVE
) ||
161 (un
->c
.un_status
& MD_UN_GROW_PENDING
) ||
162 (un
->un_column
[column_index
].un_devstate
& RCS_RESYNC
))) {
164 return (mdmderror(mde
, MDE_GROW_DELAYED
, mnum
));
168 if (un
->un_column
[column_index
].un_devstate
&
169 (RCS_ERRED
| RCS_LAST_ERRED
))
170 un
->un_column
[column_index
].un_devflags
|= MD_RAID_DEV_ERRED
;
172 un
->un_column
[column_index
].un_devflags
&= ~MD_RAID_DEV_ERRED
;
173 un
->c
.un_status
|= MD_UN_RESYNC_ACTIVE
;
174 un
->un_resync_index
= column_index
;
175 un
->un_resync_line_index
= 0;
176 raid_set_state(un
, column_index
, RCS_RESYNC
, 0);
184 * DESCRIPTION: Initialize resync_comp buffers.
186 * PARAMETERS: size_t bsize - size of buffer
187 * buf_t *read_buf1 - first read buf
188 * buf_t *read_buf2 - second read buf
189 * buf_t *write_buf - write buf
192 alloc_bufs(md_raidcs_t
*cs
, size_t bsize
)
194 /* allocate buffers, write uses the read_buf1 buffer */
195 cs
->cs_dbuffer
= kmem_zalloc(bsize
, KM_SLEEP
);
196 cs
->cs_pbuffer
= kmem_zalloc(bsize
, KM_SLEEP
);
200 init_buf(buf_t
*bp
, int flags
, size_t size
)
203 bzero((caddr_t
)bp
, sizeof (buf_t
));
205 /* set b_back and b_forw to point back to buf */
211 bp
->b_bufsize
= size
;
214 /* setup semaphores */
215 sema_init(&bp
->b_io
, 0, NULL
, SEMA_DEFAULT
, NULL
);
216 sema_init(&bp
->b_sem
, 0, NULL
, SEMA_DEFAULT
, NULL
);
220 destroy_buf(buf_t
*bp
)
222 sema_destroy(&bp
->b_io
);
223 sema_destroy(&bp
->b_sem
);
227 reset_buf(buf_t
*bp
, int flags
, size_t size
)
230 init_buf(bp
, flags
, size
);
236 * DESCRIPTION: Free up buffers.
238 * PARAMETERS: size_t bsize - size of buffer
239 * buf_t *read_buf1 - first read buf
240 * buf_t *read_buf2 - second read buf
241 * buf_t *write_buf - write buf
244 free_bufs(size_t bsize
, md_raidcs_t
*cs
)
246 kmem_free(cs
->cs_dbuffer
, bsize
);
247 kmem_free(cs
->cs_pbuffer
, bsize
);
253 * DESCRIPTION: Initialize pre-write area to all zeros.
255 * PARAMETERS: minor_t mnum - minor number identity of metadevice
256 * md_dev64_t dev_to_write - index of column to resync
257 * int column_index - index of column to resync
259 * RETURN: 1 if write error on resync device, otherwise 0
261 * LOCKS: Expects Unit Reader Lock to be held across call.
266 md_dev64_t dev_to_write
,
279 ASSERT(un
->un_column
[col
].un_devflags
& MD_RAID_DEV_ISOPEN
);
281 bsize
= un
->un_iosize
;
282 copysize
= dbtob(bsize
);
283 databuffer
= kmem_zalloc(copysize
, KM_SLEEP
);
284 init_buf(&buf
, (B_BUSY
| B_WRITE
), copysize
);
286 for (i
= 0; i
< un
->un_pwcnt
; i
++) {
287 /* magic field is 0 for 4.0 compatability */
288 RAID_FILLIN_RPW(databuffer
, un
, 0, 0,
291 buf
.b_un
.b_addr
= (caddr_t
)databuffer
;
292 buf
.b_edev
= md_dev64_to_dev(dev_to_write
);
293 buf
.b_bcount
= dbtob(bsize
);
294 buf
.b_lblkno
= pwstart
+ (i
* un
->un_iosize
);
297 (void) md_call_strategy(&buf
, MD_STR_NOTTOP
, NULL
);
303 reset_buf(&buf
, (B_BUSY
| B_WRITE
), copysize
);
307 kmem_free(databuffer
, copysize
);
313 * NAME: raid_open_alt
315 * DESCRIPTION: opens the alt device used during resync.
319 * RETURN: 0 - successfull
322 * LOCKS: requires unit writer lock
326 raid_open_alt(mr_unit_t
*un
, int index
)
328 mr_column_t
*column
= &un
->un_column
[index
];
329 set_t setno
= MD_MIN2SET(MD_SID(un
));
330 side_t side
= mddb_getsidenum(setno
);
331 md_dev64_t tmpdev
= column
->un_alt_dev
;
334 ASSERT(UNIT_WRITER_HELD(un
));
335 /* not already writing to */
336 ASSERT(! (column
->un_devflags
& MD_RAID_WRITE_ALT
));
337 /* not already open */
338 ASSERT(! (column
->un_devflags
& MD_RAID_ALT_ISOPEN
));
340 if (tmpdev
!= NODEV64
) {
342 * Open by device id. We use orig_key since alt_dev
343 * has been set by the caller to be the same as orig_dev.
345 if ((md_getmajor(tmpdev
) != md_major
) &&
346 md_devid_found(setno
, side
, column
->un_orig_key
) == 1) {
347 tmpdev
= md_resolve_bydevid(MD_SID(un
), tmpdev
,
348 column
->un_orig_key
);
350 if (md_layered_open(MD_SID(un
), &tmpdev
, MD_OFLG_NULL
)) {
352 column
->un_alt_dev
= tmpdev
;
356 column
->un_alt_dev
= tmpdev
;
357 column
->un_devflags
|= MD_RAID_ALT_ISOPEN
;
361 /* no alt device to open */
367 * NAME: raid_close_alt
369 * DESCRIPTION: closes the alt device used during resync.
371 * PARAMETERS: un - raid unit structure
372 * indes - raid column
376 * LOCKS: requires unit writer lock
380 raid_close_alt(mr_unit_t
*un
, int index
)
382 mr_column_t
*column
= &un
->un_column
[index
];
383 md_dev64_t tmpdev
= column
->un_alt_dev
;
385 ASSERT(UNIT_WRITER_HELD(un
)); /* correct locks */
386 ASSERT(! (column
->un_devflags
& MD_RAID_WRITE_ALT
)); /* not writing */
387 ASSERT(column
->un_devflags
& MD_RAID_ALT_ISOPEN
); /* already open */
388 ASSERT(tmpdev
!= NODEV64
); /* is a device */
390 md_layered_close(column
->un_alt_dev
, MD_OFLG_NULL
);
391 column
->un_devflags
&= ~MD_RAID_ALT_ISOPEN
;
392 column
->un_alt_dev
= NODEV64
;
396 raid_resync_fillin_cs(diskaddr_t line
, uint_t line_count
, md_raidcs_t
*cs
)
398 mr_unit_t
*un
= cs
->cs_un
;
400 ASSERT(line
< un
->un_segsincolumn
);
403 cs
->cs_blkno
= line
* un
->un_segsize
;
404 cs
->cs_blkcnt
= un
->un_segsize
* line_count
;
405 cs
->cs_lastblk
= cs
->cs_blkno
+ cs
->cs_blkcnt
- 1;
406 raid_line_reader_lock(cs
, 1);
408 return (line
+ line_count
);
411 /* states returned by raid_resync_line */
413 #define RAID_RESYNC_OKAY 0
414 #define RAID_RESYNC_RDERROR 2
415 #define RAID_RESYNC_WRERROR 3
416 #define RAID_RESYNC_STATE 4
426 md_dev64_t dev_to_write
,
427 diskaddr_t write_dev_start
)
429 mr_unit_t
*un
= cs
->cs_un
;
430 buf_t
*readb1
= &cs
->cs_pbuf
;
431 buf_t
*readb2
= &cs
->cs_dbuf
;
432 buf_t
*writeb
= &cs
->cs_hbuf
;
441 resync
= un
->un_resync_index
;
442 off
= line
* un
->un_segsize
;
443 copysize
= un
->un_resync_copysize
;
445 /* find first column to read, skip resync column */
447 leftinseg
= un
->un_segsize
* line_count
;
450 /* truncate last chunk to end if needed */
451 if (copysize
> leftinseg
)
452 tcopysize
= leftinseg
;
454 tcopysize
= copysize
;
455 leftinseg
-= tcopysize
;
458 * One of two scenarios:
459 * 1) resync device with hotspare ok. This implies that
460 * we are copying from a good hotspare to a new good original
461 * device. In this case readb1 is used as the buf for
462 * the read from the hotspare device.
463 * 2) For all other cases, including when in case 1) and an
464 * error is detected on the (formerly good) hotspare device,
465 * readb1 is used for the initial read. readb2 is used for
466 * all other reads. Each readb2 buffer is xor'd into the
469 * In both cases, writeb is used for the write, using readb1's
472 * For case 2, we could alternatively perform the read for all
473 * devices concurrently to improve performance. However,
474 * this could diminish performance for concurrent reads and
475 * writes if low on memory.
478 /* read first buffer */
480 /* switch to read from good columns if single_read */
482 if (un
->un_column
[resync
].un_dev
== NODEV64
)
483 return (RAID_RESYNC_RDERROR
);
485 reset_buf(readb1
, B_READ
| B_BUSY
,
487 readb1
->b_bcount
= dbtob(tcopysize
);
488 readb1
->b_un
.b_addr
= cs
->cs_pbuffer
;
489 readb1
->b_edev
= md_dev64_to_dev(
490 un
->un_column
[resync
].un_dev
);
492 un
->un_column
[resync
].un_devstart
+ off
;
493 (void) md_call_strategy(readb1
, MD_STR_NOTTOP
, NULL
);
494 if (biowait(readb1
)) {
496 * at this point just start rebuilding the
497 * data and go on since the other column
502 un
->un_column
[resync
].un_devflags
&=
503 ~MD_RAID_COPY_RESYNC
;
504 un
->un_column
[resync
].un_devflags
|=
505 MD_RAID_REGEN_RESYNC
;
509 /* if reading from all non-resync columns */
511 /* for each column, read line and xor into write buf */
512 bzero(cs
->cs_pbuffer
, dbtob(tcopysize
));
513 for (i
= 0; i
< un
->un_totalcolumncnt
; i
++) {
515 if (un
->un_column
[i
].un_dev
== NODEV64
)
516 return (RAID_RESYNC_RDERROR
);
518 /* skip column getting resync'ed */
522 reset_buf(readb1
, B_READ
| B_BUSY
,
524 readb1
->b_bcount
= dbtob(tcopysize
);
525 readb1
->b_un
.b_addr
= cs
->cs_dbuffer
;
526 readb1
->b_edev
= md_dev64_to_dev(
527 un
->un_column
[i
].un_dev
);
529 un
->un_column
[i
].un_devstart
+ off
;
531 (void) md_call_strategy(readb1
, MD_STR_NOTTOP
,
533 if (biowait(readb1
)) {
535 quit
= RAID_RESYNC_RDERROR
;
541 /* xor readb2 data into readb1 */
542 xor(cs
->cs_pbuffer
, readb1
->b_un
.b_addr
,
547 reset_buf(writeb
, B_WRITE
| B_BUSY
,
549 writeb
->b_bcount
= dbtob(tcopysize
);
550 writeb
->b_un
.b_addr
= cs
->cs_pbuffer
;
551 writeb
->b_lblkno
= off
+ write_dev_start
;
552 writeb
->b_edev
= md_dev64_to_dev(dev_to_write
);
554 /* set write block number and perform the write */
555 (void) md_call_strategy(writeb
, MD_STR_NOTTOP
, NULL
);
556 if (biowait(writeb
)) {
557 if (*single_read
== 0) {
560 return (RAID_RESYNC_WRERROR
);
562 writeb
->b_blkno
+= tcopysize
;
565 sema_destroy(&readb1
->b_io
);
566 sema_destroy(&readb1
->b_sem
);
567 sema_destroy(&readb2
->b_io
);
568 sema_destroy(&readb2
->b_sem
);
569 sema_destroy(&writeb
->b_io
);
570 sema_destroy(&writeb
->b_sem
);
571 return (RAID_RESYNC_OKAY
);
577 * DESCRIPTION: Resync the component. Iterate through the raid unit a line at
578 * a time, read from the good device(s) and write the resync
581 * PARAMETERS: minor_t mnum - minor number identity of metadevice
582 * md_raidcs_t *cs - child save struct
584 * RETURN: 0 - successfull
588 * LOCKS: Expects Unit Reader Lock to be held across call. Acquires and
589 * releases Line Reader Lock for per-line I/O.
599 mddb_recid_t recids
[2];
601 md_dev64_t dev_to_write
;
602 diskaddr_t write_pwstart
;
603 diskaddr_t write_devstart
;
612 diskaddr_t segsincolumn
;
617 * hs_state is the state of the hotspare on the column being resynced
618 * dev_state is the state of the resync target
622 diskaddr_t resync_end_pos
;
629 md_unit_readerexit(ui
);
630 un
= (mr_unit_t
*)md_io_writerlock(ui
);
631 un
= (mr_unit_t
*)md_unit_writerlock(ui
);
632 resync
= un
->un_resync_index
;
633 state
= un
->un_column
[resync
].un_devstate
;
634 line_count
= un
->un_maxio
/ un
->un_segsize
;
635 if (line_count
== 0) { /* handle the case of segsize > maxio */
637 bsize
= un
->un_maxio
;
639 bsize
= line_count
* un
->un_segsize
;
641 un
->un_resync_copysize
= (uint_t
)bsize
;
643 ASSERT(un
->c
.un_status
& MD_UN_RESYNC_ACTIVE
);
644 ASSERT(un
->un_column
[resync
].un_devflags
&
645 (MD_RAID_COPY_RESYNC
| MD_RAID_REGEN_RESYNC
));
648 * if the column is not in resync then just bail out.
650 if (! (un
->un_column
[resync
].un_devstate
& RCS_RESYNC
)) {
651 md_unit_writerexit(ui
);
652 md_io_writerexit(ui
);
653 un
= (mr_unit_t
*)md_unit_readerlock(ui
);
656 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_RESYNC_START
, SVM_TAG_METADEVICE
,
657 MD_UN2SET(un
), MD_SID(un
));
659 /* identify device to write and its start block */
661 if (un
->un_column
[resync
].un_alt_dev
!= NODEV64
) {
662 if (raid_open_alt(un
, resync
)) {
663 raid_set_state(un
, resync
, state
, 0);
664 md_unit_writerexit(ui
);
665 md_io_writerexit(ui
);
666 un
= (mr_unit_t
*)md_unit_readerlock(ui
);
667 cmn_err(CE_WARN
, "md: %s: %s open failed replace "
668 "terminated", md_shortname(MD_SID(un
)),
669 md_devname(MD_UN2SET(un
),
670 un
->un_column
[resync
].un_alt_dev
,
672 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_RESYNC_FAILED
,
673 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
676 ASSERT(un
->un_column
[resync
].un_devflags
& MD_RAID_COPY_RESYNC
);
677 dev_to_write
= un
->un_column
[resync
].un_alt_dev
;
678 write_devstart
= un
->un_column
[resync
].un_alt_devstart
;
679 write_pwstart
= un
->un_column
[resync
].un_alt_pwstart
;
680 if (un
->un_column
[resync
].un_devflags
& MD_RAID_DEV_ERRED
) {
687 un
->un_column
[resync
].un_devflags
|= MD_RAID_WRITE_ALT
;
689 dev_to_write
= un
->un_column
[resync
].un_dev
;
690 write_devstart
= un
->un_column
[resync
].un_devstart
;
691 write_pwstart
= un
->un_column
[resync
].un_pwstart
;
694 ASSERT(un
->un_column
[resync
].un_devflags
&
695 MD_RAID_REGEN_RESYNC
);
698 alloc_bufs(cs
, dbtob(bsize
));
699 /* initialize pre-write area */
700 if (init_pw_area(un
, dev_to_write
, write_pwstart
, resync
)) {
701 un
->un_column
[resync
].un_devflags
&= ~MD_RAID_WRITE_ALT
;
702 if (un
->un_column
[resync
].un_alt_dev
!= NODEV64
) {
703 raid_close_alt(un
, resync
);
705 md_unit_writerexit(ui
);
706 md_io_writerexit(ui
);
707 if (dev_to_write
== un
->un_column
[resync
].un_dev
)
709 err
= RAID_RESYNC_WRERROR
;
710 goto resync_comp_error
;
713 un
->c
.un_status
&= ~MD_UN_RESYNC_CANCEL
;
714 segsincolumn
= un
->un_segsincolumn
;
715 err_cnt
= raid_state_cnt(un
, RCS_ERRED
| RCS_LAST_ERRED
);
717 /* commit the record */
719 md_unit_writerexit(ui
);
720 md_io_writerexit(ui
);
723 /* resync each line of the unit */
724 for (line
= 0; line
< segsincolumn
; line
+= line_count
) {
726 * Update address range in child struct and lock the line.
728 * The reader version of the line lock is used since only
729 * resync will use data beyond un_resync_line_index on the
732 un
= (mr_unit_t
*)md_io_readerlock(ui
);
733 if (line
+ line_count
> segsincolumn
)
734 line_count
= segsincolumn
- line
;
735 resync_end_pos
= raid_resync_fillin_cs(line
, line_count
, cs
);
736 (void) md_unit_readerlock(ui
);
737 ASSERT(un
->un_resync_line_index
== resync_end_pos
);
738 err
= raid_resync_region(cs
, line
, (int)line_count
,
739 &single_read
, &hs_state
, &err_col
, dev_to_write
,
743 * if the column failed to resync then stop writing directly
747 un
->un_resync_line_index
= 0;
749 md_unit_readerexit(ui
);
751 md_io_readerexit(ui
);
756 un
= (mr_unit_t
*)md_unit_writerlock(ui
);
758 if (raid_state_cnt(un
, RCS_ERRED
| RCS_LAST_ERRED
) != err_cnt
) {
759 err
= RAID_RESYNC_STATE
;
760 md_unit_writerexit(ui
);
763 md_unit_writerexit(ui
);
767 un
= (mr_unit_t
*)md_io_writerlock(ui
);
768 (void) md_unit_writerlock(ui
);
769 un
->un_column
[resync
].un_devflags
&= ~MD_RAID_WRITE_ALT
;
777 case RAID_RESYNC_OKAY
:
778 /* initialize pre-write area */
779 if ((un
->un_column
[resync
].un_orig_dev
!= NODEV64
) &&
780 (un
->un_column
[resync
].un_orig_dev
==
781 un
->un_column
[resync
].un_alt_dev
)) {
783 * replacing a hot spare
784 * release the hot spare, which will close the hotspare
785 * and mark it closed.
787 raid_hs_release(hs_state
, un
, &recids
[0], resync
);
789 * make the resync target the main device and
792 un
->un_column
[resync
].un_hs_id
= 0;
793 un
->un_column
[resync
].un_dev
=
794 un
->un_column
[resync
].un_orig_dev
;
795 un
->un_column
[resync
].un_devstart
=
796 un
->un_column
[resync
].un_orig_devstart
;
797 un
->un_column
[resync
].un_pwstart
=
798 un
->un_column
[resync
].un_orig_pwstart
;
799 un
->un_column
[resync
].un_devflags
|= MD_RAID_DEV_ISOPEN
;
800 /* alt becomes the device so don't close it */
801 un
->un_column
[resync
].un_devflags
&= ~MD_RAID_WRITE_ALT
;
802 un
->un_column
[resync
].un_devflags
&=
804 un
->un_column
[resync
].un_alt_dev
= NODEV64
;
806 raid_set_state(un
, resync
, RCS_OKAY
, 0);
809 case RAID_RESYNC_WRERROR
:
810 if (HOTSPARED(un
, resync
) && single_read
&&
811 (un
->un_column
[resync
].un_devflags
& MD_RAID_COPY_RESYNC
)) {
813 * this is the case where the resync target is
814 * bad but there is a good hotspare. In this
815 * case keep the hotspare, and go back to okay.
817 raid_set_state(un
, resync
, RCS_OKAY
, 0);
818 cmn_err(CE_WARN
, "md: %s: %s write error, replace "
819 "terminated", md_shortname(MD_SID(un
)),
820 md_devname(MD_UN2SET(un
),
821 un
->un_column
[resync
].un_orig_dev
,
825 if (HOTSPARED(un
, resync
)) {
826 raid_hs_release(hs_state
, un
, &recids
[0], resync
);
827 un
->un_column
[resync
].un_dev
=
828 un
->un_column
[resync
].un_orig_dev
;
829 un
->un_column
[resync
].un_devstart
=
830 un
->un_column
[resync
].un_orig_devstart
;
831 un
->un_column
[resync
].un_pwstart
=
832 un
->un_column
[resync
].un_orig_pwstart
;
834 raid_set_state(un
, resync
, RCS_ERRED
, 0);
835 if (un
->un_column
[resync
].un_devflags
& MD_RAID_REGEN_RESYNC
)
836 dev
= un
->un_column
[resync
].un_dev
;
838 dev
= un
->un_column
[resync
].un_alt_dev
;
839 cmn_err(CE_WARN
, "md: %s: %s write error replace terminated",
840 md_shortname(MD_SID(un
)), md_devname(MD_UN2SET(un
), dev
,
844 case RAID_RESYNC_STATE
:
845 if (HOTSPARED(un
, resync
) && single_read
&&
846 (un
->un_column
[resync
].un_devflags
& MD_RAID_COPY_RESYNC
)) {
848 * this is the case where the resync target is
849 * bad but there is a good hotspare. In this
850 * case keep the hotspare, and go back to okay.
852 raid_set_state(un
, resync
, RCS_OKAY
, 0);
853 cmn_err(CE_WARN
, "md: %s: needs maintenance, replace "
854 "terminated", md_shortname(MD_SID(un
)));
857 if (HOTSPARED(un
, resync
)) {
858 raid_hs_release(hs_state
, un
, &recids
[0], resync
);
859 un
->un_column
[resync
].un_dev
=
860 un
->un_column
[resync
].un_orig_dev
;
861 un
->un_column
[resync
].un_devstart
=
862 un
->un_column
[resync
].un_orig_devstart
;
863 un
->un_column
[resync
].un_pwstart
=
864 un
->un_column
[resync
].un_orig_pwstart
;
867 case RAID_RESYNC_RDERROR
:
868 if (HOTSPARED(un
, resync
)) {
869 raid_hs_release(hs_state
, un
, &recids
[0], resync
);
870 un
->un_column
[resync
].un_dev
=
871 un
->un_column
[resync
].un_orig_dev
;
872 un
->un_column
[resync
].un_devstart
=
873 un
->un_column
[resync
].un_orig_devstart
;
874 un
->un_column
[resync
].un_pwstart
=
875 un
->un_column
[resync
].un_orig_pwstart
;
878 if ((resync
!= err_col
) && (err_col
!= NOCOLUMN
))
879 raid_set_state(un
, err_col
, RCS_ERRED
, 0);
885 if (un
->un_column
[resync
].un_alt_dev
!= NODEV64
) {
886 raid_close_alt(un
, resync
);
890 * an io operation may have gotten an error and placed a
891 * column in erred state. This will abort the resync, which
892 * will end up in last erred. This is ugly so go through
893 * the columns and do cleanup
897 for (i
= 0; i
< un
->un_totalcolumncnt
; i
++) {
898 if (un
->un_column
[i
].un_devstate
& RCS_OKAY
)
901 raid_set_state(un
, i
, RCS_ERRED
, 1);
903 } else if (err
== RAID_RESYNC_OKAY
) {
906 raid_set_state(un
, i
, RCS_LAST_ERRED
, 1);
910 if ((err_cnt
== 0) && (last_err
== 0))
911 un
->un_state
= RUS_OKAY
;
912 else if (last_err
== 0) {
913 un
->un_state
= RUS_ERRED
;
914 ASSERT(err_cnt
== 1);
915 } else if (last_err
> 0) {
916 un
->un_state
= RUS_LAST_ERRED
;
919 uniqtime32(&un
->un_column
[resync
].un_devtimestamp
);
920 un
->un_resync_copysize
= 0;
921 un
->un_column
[resync
].un_devflags
&=
922 ~(MD_RAID_REGEN_RESYNC
| MD_RAID_COPY_RESYNC
);
923 raid_commit(un
, recids
);
924 /* release unit writer lock and acquire unit reader lock */
925 md_unit_writerexit(ui
);
926 md_io_writerexit(ui
);
927 (void) md_unit_readerlock(ui
);
928 if (err
== RAID_RESYNC_OKAY
) {
929 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_RESYNC_DONE
,
930 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
932 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_RESYNC_FAILED
,
933 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
934 if (raid_state_cnt(un
, RCS_ERRED
|
935 RCS_LAST_ERRED
) > 1) {
936 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_LASTERRED
,
937 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
939 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_ERRED
,
940 SVM_TAG_METADEVICE
, MD_UN2SET(un
), MD_SID(un
));
944 free_bufs(dbtob(bsize
), cs
);
950 * DESCRIPTION: Start of RAID resync thread. Perform up front allocations,
951 * initializations and consistency checking, then call
952 * resync_comp to resync the component.
954 * PARAMETERS: minor_t mnum - minor number identity of metadevice
956 * LOCKS: Acquires and releases Unit Reader Lock to maintain unit
957 * existence during resync.
958 * Acquires and releases the resync count lock for cpr.
967 md_raidps_t
*ps
= NULL
;
968 md_raidcs_t
*cs
= NULL
;
972 * Increment the raid resync count for cpr
974 mutex_enter(&md_cpr_resync
.md_resync_mutex
);
975 md_cpr_resync
.md_raid_resync
++;
976 mutex_exit(&md_cpr_resync
.md_resync_mutex
);
981 un
= (mr_unit_t
*)md_unit_readerlock(ui
);
984 * Allocate parent and child memory pool structures. These are
985 * only needed to lock raid lines, so only the minimal
986 * required fields for this purpose are initialized.
988 * Do not use the reserve pool for resync.
990 ps
= kmem_cache_alloc(raid_parent_cache
, MD_ALLOCFLAGS
);
991 raid_parent_init(ps
);
992 cs
= kmem_cache_alloc(raid_child_cache
, MD_ALLOCFLAGS
);
994 resync
= un
->un_resync_index
;
997 ps
->ps_flags
= MD_RPS_INUSE
;
1001 ASSERT(!(un
->un_column
[resync
].un_devflags
& MD_RAID_WRITE_ALT
));
1003 resync_comp(mnum
, cs
);
1004 release_resync_request(mnum
);
1006 kmem_cache_free(raid_child_cache
, cs
);
1007 kmem_cache_free(raid_parent_cache
, ps
);
1009 md_unit_readerexit(ui
);
1011 /* close raid unit */
1012 (void) raid_internal_close(mnum
, OTYP_LYR
, 0, 0);
1014 /* poke hot spare daemon */
1015 (void) raid_hotspares();
1018 * Decrement the raid resync count for cpr
1020 mutex_enter(&md_cpr_resync
.md_resync_mutex
);
1021 md_cpr_resync
.md_raid_resync
--;
1022 mutex_exit(&md_cpr_resync
.md_resync_mutex
);
1028 * NAME: raid_resync_unit
1030 * DESCRIPTION: RAID metadevice specific resync routine.
1031 * Open the unit and start resync_unit as a separate thread.
1033 * PARAMETERS: minor_t mnum - minor number identity of metadevice
1034 * md_error_t *ep - output error parameter
1036 * RETURN: On error return 1 or set ep to nonzero, otherwise return 0.
1038 * LOCKS: Acquires and releases Unit Writer Lock.
1047 set_t setno
= MD_MIN2SET(mnum
);
1050 ui
= MDI_UNIT(mnum
);
1053 if (md_get_setstatus(setno
) & MD_SET_STALE
)
1054 return (mdmddberror(ep
, MDE_DB_STALE
, mnum
, setno
));
1056 ASSERT(un
->un_column
[un
->un_resync_index
].un_devflags
&
1057 (MD_RAID_COPY_RESYNC
| MD_RAID_REGEN_RESYNC
));
1059 /* Don't start a resync if the device is not available */
1060 if ((ui
== NULL
) || (ui
->ui_tstate
& MD_DEV_ERRORED
)) {
1061 return (mdmderror(ep
, MDE_RAID_OPEN_FAILURE
, mnum
));
1064 if (raid_internal_open(mnum
, FREAD
| FWRITE
, OTYP_LYR
, 0)) {
1065 (void) md_unit_writerlock(ui
);
1066 release_resync_request(mnum
);
1067 md_unit_writerexit(ui
);
1068 SE_NOTIFY(EC_SVM_STATE
, ESC_SVM_OPEN_FAIL
, SVM_TAG_METADEVICE
,
1070 return (mdmderror(ep
, MDE_RAID_OPEN_FAILURE
, mnum
));
1073 /* start resync_unit thread */
1074 (void) thread_create(NULL
, 0, resync_unit
, (void *)(uintptr_t)mnum
,
1075 0, &p0
, TS_RUN
, minclsyspri
);