Import 2.3.9pre5
[davej-history.git] / drivers / block / raid1.c
blob890584dcdd684679c0bf3e422b27e791717110e3
1 /************************************************************************
2 * raid1.c : Multiple Devices driver for Linux
3 * Copyright (C) 1996 Ingo Molnar, Miguel de Icaza, Gadi Oxman
5 * RAID-1 management functions.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
12 * You should have received a copy of the GNU General Public License
13 * (for example /usr/src/linux/COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 #include <linux/module.h>
18 #include <linux/locks.h>
19 #include <linux/malloc.h>
20 #include <linux/md.h>
21 #include <linux/raid1.h>
22 #include <asm/bitops.h>
23 #include <asm/atomic.h>
25 #define MAJOR_NR MD_MAJOR
26 #define MD_DRIVER
27 #define MD_PERSONALITY
30 * The following can be used to debug the driver
32 /*#define RAID1_DEBUG*/
33 #ifdef RAID1_DEBUG
34 #define PRINTK(x) do { printk x; } while (0);
35 #else
36 #define PRINTK(x) do { ; } while (0);
37 #endif
39 #define MAX(a,b) ((a) > (b) ? (a) : (b))
40 #define MIN(a,b) ((a) < (b) ? (a) : (b))
42 static struct md_personality raid1_personality;
43 static struct md_thread *raid1_thread = NULL;
44 struct buffer_head *raid1_retry_list = NULL;
46 static int __raid1_map (struct md_dev *mddev, kdev_t *rdev,
47 unsigned long *rsector, unsigned long size)
49 struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
50 int i, n = raid_conf->raid_disks;
53 * Later we do read balancing on the read side
54 * now we use the first available disk.
57 PRINTK(("raid1_map().\n"));
59 for (i=0; i<n; i++) {
60 if (raid_conf->mirrors[i].operational) {
61 *rdev = raid_conf->mirrors[i].dev;
62 return (0);
66 printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n");
67 return (-1);
70 static int raid1_map (struct md_dev *mddev, kdev_t *rdev,
71 unsigned long *rsector, unsigned long size)
73 return 0;
76 void raid1_reschedule_retry (struct buffer_head *bh)
78 struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_dev_id);
80 PRINTK(("raid1_reschedule_retry().\n"));
82 r1_bh->next_retry = raid1_retry_list;
83 raid1_retry_list = bh;
84 md_wakeup_thread(raid1_thread);
88 * raid1_end_buffer_io() is called when we have finished servicing a mirrored
89 * operation and are ready to return a success/failure code to the buffer
90 * cache layer.
92 static inline void raid1_end_buffer_io(struct raid1_bh *r1_bh, int uptodate)
94 struct buffer_head *bh = r1_bh->master_bh;
96 bh->b_end_io(bh, uptodate);
97 kfree(r1_bh);
100 int raid1_one_error=0;
102 void raid1_end_request (struct buffer_head *bh, int uptodate)
104 struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_dev_id);
105 unsigned long flags;
107 save_flags(flags);
108 cli();
109 PRINTK(("raid1_end_request().\n"));
111 if (raid1_one_error) {
112 raid1_one_error=0;
113 uptodate=0;
116 * this branch is our 'one mirror IO has finished' event handler:
118 if (!uptodate)
119 md_error (bh->b_dev, bh->b_rdev);
120 else {
122 * Set BH_Uptodate in our master buffer_head, so that
123 * we will return a good error code for to the higher
124 * levels even if IO on some other mirrored buffer fails.
126 * The 'master' represents the complex operation to
127 * user-side. So if something waits for IO, then it will
128 * wait for the 'master' buffer_head.
130 set_bit (BH_Uptodate, &r1_bh->state);
134 * We split up the read and write side, imho they are
135 * conceptually different.
138 if ( (r1_bh->cmd == READ) || (r1_bh->cmd == READA) ) {
140 PRINTK(("raid1_end_request(), read branch.\n"));
143 * we have only one buffer_head on the read side
145 if (uptodate) {
146 PRINTK(("raid1_end_request(), read branch, uptodate.\n"));
147 raid1_end_buffer_io(r1_bh, uptodate);
148 restore_flags(flags);
149 return;
152 * oops, read error:
154 printk(KERN_ERR "raid1: %s: rescheduling block %lu\n",
155 kdevname(bh->b_dev), bh->b_blocknr);
156 raid1_reschedule_retry (bh);
157 restore_flags(flags);
158 return;
162 * WRITE or WRITEA.
164 PRINTK(("raid1_end_request(), write branch.\n"));
167 * Let's see if all mirrored write operations have finished
168 * already [we have irqs off, so we can decrease]:
171 if (!--r1_bh->remaining) {
172 struct md_dev *mddev = r1_bh->mddev;
173 struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
174 int i, n = raid_conf->raid_disks;
176 PRINTK(("raid1_end_request(), remaining == 0.\n"));
178 for ( i=0; i<n; i++)
179 if (r1_bh->mirror_bh[i]) kfree(r1_bh->mirror_bh[i]);
181 raid1_end_buffer_io(r1_bh, test_bit(BH_Uptodate, &r1_bh->state));
183 else PRINTK(("raid1_end_request(), remaining == %u.\n", r1_bh->remaining));
184 restore_flags(flags);
187 /* This routine checks if the undelying device is an md device and in that
188 * case it maps the blocks before putting the request on the queue
190 static inline void
191 map_and_make_request (int rw, struct buffer_head *bh)
193 if (MAJOR (bh->b_rdev) == MD_MAJOR)
194 md_map (MINOR (bh->b_rdev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9);
195 clear_bit(BH_Lock, &bh->b_state);
196 make_request (MAJOR (bh->b_rdev), rw, bh);
199 static int
200 raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh)
203 struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
204 struct buffer_head *mirror_bh[MD_SB_DISKS], *bh_req;
205 struct raid1_bh * r1_bh;
206 int n = raid_conf->raid_disks, i, sum_bhs = 0, switch_disks = 0, sectors;
207 struct mirror_info *mirror;
209 PRINTK(("raid1_make_request().\n"));
211 while (!( /* FIXME: now we are rather fault tolerant than nice */
212 r1_bh = kmalloc (sizeof (struct raid1_bh), GFP_KERNEL)
214 printk ("raid1_make_request(#1): out of memory\n");
215 memset (r1_bh, 0, sizeof (struct raid1_bh));
218 * make_request() can abort the operation when READA or WRITEA are being
219 * used and no empty request is available.
221 * Currently, just replace the command with READ/WRITE.
223 if (rw == READA) rw = READ;
224 if (rw == WRITEA) rw = WRITE;
226 if (rw == WRITE || rw == WRITEA)
227 mark_buffer_clean(bh); /* Too early ? */
230 * i think the read and write branch should be separated completely, since we want
231 * to do read balancing on the read side for example. Comments? :) --mingo
234 r1_bh->master_bh=bh;
235 r1_bh->mddev=mddev;
236 r1_bh->cmd = rw;
238 if (rw==READ || rw==READA) {
239 int last_used = raid_conf->last_used;
240 PRINTK(("raid1_make_request(), read branch.\n"));
241 mirror = raid_conf->mirrors + last_used;
242 bh->b_rdev = mirror->dev;
243 sectors = bh->b_size >> 9;
244 if (bh->b_blocknr * sectors == raid_conf->next_sect) {
245 raid_conf->sect_count += sectors;
246 if (raid_conf->sect_count >= mirror->sect_limit)
247 switch_disks = 1;
248 } else
249 switch_disks = 1;
250 raid_conf->next_sect = (bh->b_blocknr + 1) * sectors;
251 if (switch_disks) {
252 PRINTK(("read-balancing: switching %d -> %d (%d sectors)\n", last_used, mirror->next, raid_conf->sect_count));
253 raid_conf->sect_count = 0;
254 last_used = raid_conf->last_used = mirror->next;
256 * Do not switch to write-only disks ... resyncing
257 * is in progress
259 while (raid_conf->mirrors[last_used].write_only)
260 raid_conf->last_used = raid_conf->mirrors[last_used].next;
262 PRINTK (("raid1 read queue: %d %d\n", MAJOR (bh->b_rdev), MINOR (bh->b_rdev)));
263 bh_req = &r1_bh->bh_req;
264 memcpy(bh_req, bh, sizeof(*bh));
265 bh_req->b_end_io = raid1_end_request;
266 bh_req->b_dev_id = r1_bh;
267 map_and_make_request (rw, bh_req);
268 return 0;
272 * WRITE or WRITEA.
274 PRINTK(("raid1_make_request(n=%d), write branch.\n",n));
276 for (i = 0; i < n; i++) {
278 if (!raid_conf->mirrors [i].operational) {
280 * the r1_bh->mirror_bh[i] pointer remains NULL
282 mirror_bh[i] = NULL;
283 continue;
287 * We should use a private pool (size depending on NR_REQUEST),
288 * to avoid writes filling up the memory with bhs
290 * Such pools are much faster than kmalloc anyways (so we waste almost
291 * nothing by not using the master bh when writing and win alot of cleanness)
293 * but for now we are cool enough. --mingo
295 * It's safe to sleep here, buffer heads cannot be used in a shared
296 * manner in the write branch. Look how we lock the buffer at the beginning
297 * of this function to grok the difference ;)
299 while (!( /* FIXME: now we are rather fault tolerant than nice */
300 mirror_bh[i] = kmalloc (sizeof (struct buffer_head), GFP_KERNEL)
302 printk ("raid1_make_request(#2): out of memory\n");
303 memset (mirror_bh[i], 0, sizeof (struct buffer_head));
306 * prepare mirrored bh (fields ordered for max mem throughput):
308 mirror_bh [i]->b_blocknr = bh->b_blocknr;
309 mirror_bh [i]->b_dev = bh->b_dev;
310 mirror_bh [i]->b_rdev = raid_conf->mirrors [i].dev;
311 mirror_bh [i]->b_rsector = bh->b_rsector;
312 mirror_bh [i]->b_state = (1<<BH_Req) | (1<<BH_Dirty);
313 mirror_bh [i]->b_count = 1;
314 mirror_bh [i]->b_size = bh->b_size;
315 mirror_bh [i]->b_data = bh->b_data;
316 mirror_bh [i]->b_list = BUF_LOCKED;
317 mirror_bh [i]->b_end_io = raid1_end_request;
318 mirror_bh [i]->b_dev_id = r1_bh;
320 r1_bh->mirror_bh[i] = mirror_bh[i];
321 sum_bhs++;
324 r1_bh->remaining = sum_bhs;
326 PRINTK(("raid1_make_request(), write branch, sum_bhs=%d.\n",sum_bhs));
329 * We have to be a bit careful about the semaphore above, thats why we
330 * start the requests separately. Since kmalloc() could fail, sleep and
331 * make_request() can sleep too, this is the safer solution. Imagine,
332 * end_request decreasing the semaphore before we could have set it up ...
333 * We could play tricks with the semaphore (presetting it and correcting
334 * at the end if sum_bhs is not 'n' but we have to do end_request by hand
335 * if all requests finish until we had a chance to set up the semaphore
336 * correctly ... lots of races).
338 for (i = 0; i < n; i++)
339 if (mirror_bh [i] != NULL)
340 map_and_make_request (rw, mirror_bh [i]);
342 return (0);
345 static int raid1_status (char *page, int minor, struct md_dev *mddev)
347 struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
348 int sz = 0, i;
350 sz += sprintf (page+sz, " [%d/%d] [", raid_conf->raid_disks, raid_conf->working_disks);
351 for (i = 0; i < raid_conf->raid_disks; i++)
352 sz += sprintf (page+sz, "%s", raid_conf->mirrors [i].operational ? "U" : "_");
353 sz += sprintf (page+sz, "]");
354 return sz;
357 static void raid1_fix_links (struct raid1_data *raid_conf, int failed_index)
359 int disks = raid_conf->raid_disks;
360 int j;
362 for (j = 0; j < disks; j++)
363 if (raid_conf->mirrors [j].next == failed_index)
364 raid_conf->mirrors [j].next = raid_conf->mirrors [failed_index].next;
367 #define LAST_DISK KERN_ALERT \
368 "raid1: only one disk left and IO error.\n"
370 #define NO_SPARE_DISK KERN_ALERT \
371 "raid1: no spare disk left, degrading mirror level by one.\n"
373 #define DISK_FAILED KERN_ALERT \
374 "raid1: Disk failure on %s, disabling device. \n" \
375 " Operation continuing on %d devices\n"
377 #define START_SYNCING KERN_ALERT \
378 "raid1: start syncing spare disk.\n"
380 #define ALREADY_SYNCING KERN_INFO \
381 "raid1: syncing already in progress.\n"
383 static int raid1_error (struct md_dev *mddev, kdev_t dev)
385 struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
386 struct mirror_info *mirror;
387 md_superblock_t *sb = mddev->sb;
388 int disks = raid_conf->raid_disks;
389 int i;
391 PRINTK(("raid1_error called\n"));
393 if (raid_conf->working_disks == 1) {
395 * Uh oh, we can do nothing if this is our last disk, but
396 * first check if this is a queued request for a device
397 * which has just failed.
399 for (i = 0, mirror = raid_conf->mirrors; i < disks;
400 i++, mirror++)
401 if (mirror->dev == dev && !mirror->operational)
402 return 0;
403 printk (LAST_DISK);
404 } else {
405 /* Mark disk as unusable */
406 for (i = 0, mirror = raid_conf->mirrors; i < disks;
407 i++, mirror++) {
408 if (mirror->dev == dev && mirror->operational){
409 mirror->operational = 0;
410 raid1_fix_links (raid_conf, i);
411 sb->disks[mirror->number].state |=
412 (1 << MD_FAULTY_DEVICE);
413 sb->disks[mirror->number].state &=
414 ~(1 << MD_SYNC_DEVICE);
415 sb->disks[mirror->number].state &=
416 ~(1 << MD_ACTIVE_DEVICE);
417 sb->active_disks--;
418 sb->working_disks--;
419 sb->failed_disks++;
420 mddev->sb_dirty = 1;
421 md_wakeup_thread(raid1_thread);
422 raid_conf->working_disks--;
423 printk (DISK_FAILED, kdevname (dev),
424 raid_conf->working_disks);
428 return 0;
431 #undef LAST_DISK
432 #undef NO_SPARE_DISK
433 #undef DISK_FAILED
434 #undef START_SYNCING
437 * This is the personality-specific hot-addition routine
440 #define NO_SUPERBLOCK KERN_ERR \
441 "raid1: cannot hot-add disk to the array with no RAID superblock\n"
443 #define WRONG_LEVEL KERN_ERR \
444 "raid1: hot-add: level of disk is not RAID-1\n"
446 #define HOT_ADD_SUCCEEDED KERN_INFO \
447 "raid1: device %s hot-added\n"
449 static int raid1_hot_add_disk (struct md_dev *mddev, kdev_t dev)
451 unsigned long flags;
452 struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
453 struct mirror_info *mirror;
454 md_superblock_t *sb = mddev->sb;
455 struct real_dev * realdev;
456 int n;
459 * The device has its superblock already read and it was found
460 * to be consistent for generic RAID usage. Now we check whether
461 * it's usable for RAID-1 hot addition.
464 n = mddev->nb_dev++;
465 realdev = &mddev->devices[n];
466 if (!realdev->sb) {
467 printk (NO_SUPERBLOCK);
468 return -EINVAL;
470 if (realdev->sb->level != 1) {
471 printk (WRONG_LEVEL);
472 return -EINVAL;
474 /* FIXME: are there other things left we could sanity-check? */
477 * We have to disable interrupts, as our RAID-1 state is used
478 * from irq handlers as well.
480 save_flags(flags);
481 cli();
483 raid_conf->raid_disks++;
484 mirror = raid_conf->mirrors+n;
486 mirror->number=n;
487 mirror->raid_disk=n;
488 mirror->dev=dev;
489 mirror->next=0; /* FIXME */
490 mirror->sect_limit=128;
492 mirror->operational=0;
493 mirror->spare=1;
494 mirror->write_only=0;
496 sb->disks[n].state |= (1 << MD_FAULTY_DEVICE);
497 sb->disks[n].state &= ~(1 << MD_SYNC_DEVICE);
498 sb->disks[n].state &= ~(1 << MD_ACTIVE_DEVICE);
499 sb->nr_disks++;
500 sb->spare_disks++;
502 restore_flags(flags);
504 md_update_sb(MINOR(dev));
506 printk (HOT_ADD_SUCCEEDED, kdevname(realdev->dev));
508 return 0;
511 #undef NO_SUPERBLOCK
512 #undef WRONG_LEVEL
513 #undef HOT_ADD_SUCCEEDED
516 * Insert the spare disk into the drive-ring
518 static void add_ring(struct raid1_data *raid_conf, struct mirror_info *mirror)
520 int j, next;
521 struct mirror_info *p = raid_conf->mirrors;
523 for (j = 0; j < raid_conf->raid_disks; j++, p++)
524 if (p->operational && !p->write_only) {
525 next = p->next;
526 p->next = mirror->raid_disk;
527 mirror->next = next;
528 return;
530 printk("raid1: bug: no read-operational devices\n");
533 static int raid1_mark_spare(struct md_dev *mddev, md_descriptor_t *spare,
534 int state)
536 int i = 0, failed_disk = -1;
537 struct raid1_data *raid_conf = mddev->private;
538 struct mirror_info *mirror = raid_conf->mirrors;
539 md_descriptor_t *descriptor;
540 unsigned long flags;
542 for (i = 0; i < MD_SB_DISKS; i++, mirror++) {
543 if (mirror->spare && mirror->number == spare->number)
544 goto found;
546 return 1;
547 found:
548 for (i = 0, mirror = raid_conf->mirrors; i < raid_conf->raid_disks;
549 i++, mirror++)
550 if (!mirror->operational)
551 failed_disk = i;
553 save_flags(flags);
554 cli();
555 switch (state) {
556 case SPARE_WRITE:
557 mirror->operational = 1;
558 mirror->write_only = 1;
559 raid_conf->raid_disks = MAX(raid_conf->raid_disks,
560 mirror->raid_disk + 1);
561 break;
562 case SPARE_INACTIVE:
563 mirror->operational = 0;
564 mirror->write_only = 0;
565 break;
566 case SPARE_ACTIVE:
567 mirror->spare = 0;
568 mirror->write_only = 0;
569 raid_conf->working_disks++;
570 add_ring(raid_conf, mirror);
572 if (failed_disk != -1) {
573 descriptor = &mddev->sb->disks[raid_conf->mirrors[failed_disk].number];
574 i = spare->raid_disk;
575 spare->raid_disk = descriptor->raid_disk;
576 descriptor->raid_disk = i;
578 break;
579 default:
580 printk("raid1_mark_spare: bug: state == %d\n", state);
581 restore_flags(flags);
582 return 1;
584 restore_flags(flags);
585 return 0;
589 * This is a kernel thread which:
591 * 1. Retries failed read operations on working mirrors.
592 * 2. Updates the raid superblock when problems encounter.
594 void raid1d (void *data)
596 struct buffer_head *bh;
597 kdev_t dev;
598 unsigned long flags;
599 struct raid1_bh * r1_bh;
600 struct md_dev *mddev;
602 PRINTK(("raid1d() active\n"));
603 save_flags(flags);
604 cli();
605 while (raid1_retry_list) {
606 bh = raid1_retry_list;
607 r1_bh = (struct raid1_bh *)(bh->b_dev_id);
608 raid1_retry_list = r1_bh->next_retry;
609 restore_flags(flags);
611 mddev = md_dev + MINOR(bh->b_dev);
612 if (mddev->sb_dirty) {
613 printk("dirty sb detected, updating.\n");
614 mddev->sb_dirty = 0;
615 md_update_sb(MINOR(bh->b_dev));
617 dev = bh->b_rdev;
618 __raid1_map (md_dev + MINOR(bh->b_dev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9);
619 if (bh->b_rdev == dev) {
620 printk (KERN_ALERT
621 "raid1: %s: unrecoverable I/O read error for block %lu\n",
622 kdevname(bh->b_dev), bh->b_blocknr);
623 raid1_end_buffer_io(r1_bh, 0);
624 } else {
625 printk (KERN_ERR "raid1: %s: redirecting sector %lu to another mirror\n",
626 kdevname(bh->b_dev), bh->b_blocknr);
627 map_and_make_request (r1_bh->cmd, bh);
629 cli();
631 restore_flags(flags);
635 * This will catch the scenario in which one of the mirrors was
636 * mounted as a normal device rather than as a part of a raid set.
638 static int __check_consistency (struct md_dev *mddev, int row)
640 struct raid1_data *raid_conf = mddev->private;
641 kdev_t dev;
642 struct buffer_head *bh = NULL;
643 int i, rc = 0;
644 char *buffer = NULL;
646 for (i = 0; i < raid_conf->raid_disks; i++) {
647 if (!raid_conf->mirrors[i].operational)
648 continue;
649 dev = raid_conf->mirrors[i].dev;
650 set_blocksize(dev, 4096);
651 if ((bh = bread(dev, row / 4, 4096)) == NULL)
652 break;
653 if (!buffer) {
654 buffer = (char *) __get_free_page(GFP_KERNEL);
655 if (!buffer)
656 break;
657 memcpy(buffer, bh->b_data, 4096);
658 } else if (memcmp(buffer, bh->b_data, 4096)) {
659 rc = 1;
660 break;
662 bforget(bh);
663 fsync_dev(dev);
664 invalidate_buffers(dev);
665 bh = NULL;
667 if (buffer)
668 free_page((unsigned long) buffer);
669 if (bh) {
670 dev = bh->b_dev;
671 bforget(bh);
672 fsync_dev(dev);
673 invalidate_buffers(dev);
675 return rc;
678 static int check_consistency (struct md_dev *mddev)
680 int size = mddev->sb->size;
681 int row;
683 for (row = 0; row < size; row += size / 8)
684 if (__check_consistency(mddev, row))
685 return 1;
686 return 0;
689 static int raid1_run (int minor, struct md_dev *mddev)
691 struct raid1_data *raid_conf;
692 int i, j, raid_disk;
693 md_superblock_t *sb = mddev->sb;
694 md_descriptor_t *descriptor;
695 struct real_dev *realdev;
697 MOD_INC_USE_COUNT;
699 if (sb->level != 1) {
700 printk("raid1: %s: raid level not set to mirroring (%d)\n",
701 kdevname(MKDEV(MD_MAJOR, minor)), sb->level);
702 MOD_DEC_USE_COUNT;
703 return -EIO;
705 /****
706 * copy the now verified devices into our private RAID1 bookkeeping
707 * area. [whatever we allocate in raid1_run(), should be freed in
708 * raid1_stop()]
711 while (!( /* FIXME: now we are rather fault tolerant than nice */
712 mddev->private = kmalloc (sizeof (struct raid1_data), GFP_KERNEL)
714 printk ("raid1_run(): out of memory\n");
715 raid_conf = mddev->private;
716 memset(raid_conf, 0, sizeof(*raid_conf));
718 PRINTK(("raid1_run(%d) called.\n", minor));
720 for (i = 0; i < mddev->nb_dev; i++) {
721 realdev = &mddev->devices[i];
722 if (!realdev->sb) {
723 printk(KERN_ERR "raid1: disabled mirror %s (couldn't access raid superblock)\n", kdevname(realdev->dev));
724 continue;
728 * This is important -- we are using the descriptor on
729 * the disk only to get a pointer to the descriptor on
730 * the main superblock, which might be more recent.
732 descriptor = &sb->disks[realdev->sb->descriptor.number];
733 if (descriptor->state & (1 << MD_FAULTY_DEVICE)) {
734 printk(KERN_ERR "raid1: disabled mirror %s (errors detected)\n", kdevname(realdev->dev));
735 continue;
737 if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) {
738 if (!(descriptor->state & (1 << MD_SYNC_DEVICE))) {
739 printk(KERN_ERR "raid1: disabled mirror %s (not in sync)\n", kdevname(realdev->dev));
740 continue;
742 raid_disk = descriptor->raid_disk;
743 if (descriptor->number > sb->nr_disks || raid_disk > sb->raid_disks) {
744 printk(KERN_ERR "raid1: disabled mirror %s (inconsistent descriptor)\n", kdevname(realdev->dev));
745 continue;
747 if (raid_conf->mirrors[raid_disk].operational) {
748 printk(KERN_ERR "raid1: disabled mirror %s (mirror %d already operational)\n", kdevname(realdev->dev), raid_disk);
749 continue;
751 printk(KERN_INFO "raid1: device %s operational as mirror %d\n", kdevname(realdev->dev), raid_disk);
752 raid_conf->mirrors[raid_disk].number = descriptor->number;
753 raid_conf->mirrors[raid_disk].raid_disk = raid_disk;
754 raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev;
755 raid_conf->mirrors[raid_disk].operational = 1;
756 raid_conf->mirrors[raid_disk].sect_limit = 128;
757 raid_conf->working_disks++;
758 } else {
760 * Must be a spare disk ..
762 printk(KERN_INFO "raid1: spare disk %s\n", kdevname(realdev->dev));
763 raid_disk = descriptor->raid_disk;
764 raid_conf->mirrors[raid_disk].number = descriptor->number;
765 raid_conf->mirrors[raid_disk].raid_disk = raid_disk;
766 raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev;
767 raid_conf->mirrors[raid_disk].sect_limit = 128;
769 raid_conf->mirrors[raid_disk].operational = 0;
770 raid_conf->mirrors[raid_disk].write_only = 0;
771 raid_conf->mirrors[raid_disk].spare = 1;
774 if (!raid_conf->working_disks) {
775 printk(KERN_ERR "raid1: no operational mirrors for %s\n", kdevname(MKDEV(MD_MAJOR, minor)));
776 kfree(raid_conf);
777 mddev->private = NULL;
778 MOD_DEC_USE_COUNT;
779 return -EIO;
782 raid_conf->raid_disks = sb->raid_disks;
783 raid_conf->mddev = mddev;
785 for (j = 0; !raid_conf->mirrors[j].operational; j++);
786 raid_conf->last_used = j;
787 for (i = raid_conf->raid_disks - 1; i >= 0; i--) {
788 if (raid_conf->mirrors[i].operational) {
789 PRINTK(("raid_conf->mirrors[%d].next == %d\n", i, j));
790 raid_conf->mirrors[i].next = j;
791 j = i;
795 if (check_consistency(mddev)) {
796 printk(KERN_ERR "raid1: detected mirror differences -- run ckraid\n");
797 sb->state |= 1 << MD_SB_ERRORS;
798 kfree(raid_conf);
799 mddev->private = NULL;
800 MOD_DEC_USE_COUNT;
801 return -EIO;
805 * Regenerate the "device is in sync with the raid set" bit for
806 * each device.
808 for (i = 0; i < sb->nr_disks ; i++) {
809 sb->disks[i].state &= ~(1 << MD_SYNC_DEVICE);
810 for (j = 0; j < sb->raid_disks; j++) {
811 if (!raid_conf->mirrors[j].operational)
812 continue;
813 if (sb->disks[i].number == raid_conf->mirrors[j].number)
814 sb->disks[i].state |= 1 << MD_SYNC_DEVICE;
817 sb->active_disks = raid_conf->working_disks;
819 printk("raid1: raid set %s active with %d out of %d mirrors\n", kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks);
820 /* Ok, everything is just fine now */
821 return (0);
824 static int raid1_stop (int minor, struct md_dev *mddev)
826 struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
828 kfree (raid_conf);
829 mddev->private = NULL;
830 MOD_DEC_USE_COUNT;
831 return 0;
834 static struct md_personality raid1_personality=
836 "raid1",
837 raid1_map,
838 raid1_make_request,
839 raid1_end_request,
840 raid1_run,
841 raid1_stop,
842 raid1_status,
843 NULL, /* no ioctls */
845 raid1_error,
846 raid1_hot_add_disk,
847 /* raid1_hot_remove_drive */ NULL,
848 raid1_mark_spare
851 int raid1_init (void)
853 if ((raid1_thread = md_register_thread(raid1d, NULL)) == NULL)
854 return -EBUSY;
855 return register_md_personality (RAID1, &raid1_personality);
858 #ifdef MODULE
859 int init_module (void)
861 return raid1_init();
864 void cleanup_module (void)
866 md_unregister_thread (raid1_thread);
867 unregister_md_personality (RAID1);
869 #endif