3 md.c : Multiple Devices driver for Linux
4 Copyright (C) 1994-96 Marc ZYNGIER
5 <zyngier@ufr-info-p7.ibp.fr> or
8 A lot of inspiration came from hd.c ...
10 kerneld support by Boris Tobotras <boris@xtalk.msk.su>
11 boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net>
13 RAID-1/RAID-5 extensions by:
14 Ingo Molnar, Miguel de Icaza, Gadi Oxman
19 This program is free software; you can redistribute it and/or modify
20 it under the terms of the GNU General Public License as published by
21 the Free Software Foundation; either version 2, or (at your option)
24 You should have received a copy of the GNU General Public License
25 (for example /usr/src/linux/COPYING); if not, write to the Free
26 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 * Current RAID-1,4,5 parallel reconstruction speed limit is 1024 KB/sec, so
31 * the extra system load does not show up that much. Increase it if your
32 * system can take more.
34 #define SPEED_LIMIT 1024
36 #include <linux/config.h>
37 #include <linux/module.h>
38 #include <linux/version.h>
39 #include <linux/malloc.h>
42 #include <linux/hdreg.h>
43 #include <linux/stat.h>
45 #include <linux/proc_fs.h>
46 #include <linux/blkdev.h>
47 #include <linux/genhd.h>
48 #include <linux/smp_lock.h>
50 #include <linux/kmod.h>
52 #include <linux/errno.h>
53 #include <linux/init.h>
55 #define __KERNEL_SYSCALLS__
56 #include <linux/unistd.h>
58 #define MAJOR_NR MD_MAJOR
61 #include <linux/blk.h>
62 #include <linux/blkpg.h>
63 #include <asm/uaccess.h>
64 #include <asm/bitops.h>
65 #include <asm/atomic.h>
68 extern kdev_t
name_to_kdev_t(char *line
) __init
;
71 static struct hd_struct md_hd_struct
[MAX_MD_DEV
];
72 static int md_blocksizes
[MAX_MD_DEV
];
73 int md_maxreadahead
[MAX_MD_DEV
];
74 #if SUPPORT_RECONSTRUCTION
75 static struct md_thread
*md_sync_thread
= NULL
;
76 #endif /* SUPPORT_RECONSTRUCTION */
78 int md_size
[MAX_MD_DEV
]={0, };
80 static void md_geninit (struct gendisk
*);
82 static struct gendisk md_gendisk
=
97 static struct md_personality
*pers
[MAX_PERSONALITY
]={NULL
, };
98 struct md_dev md_dev
[MAX_MD_DEV
];
100 int md_thread(void * arg
);
102 static int legacy_raid_sb (int minor
, int pnum
)
106 factor
= 1 << FACTOR_SHIFT(FACTOR((md_dev
+minor
)));
109 * do size and offset calculations.
111 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++) {
112 md_dev
[minor
].devices
[i
].size
&= ~(factor
- 1);
113 md_size
[minor
] += md_dev
[minor
].devices
[i
].size
;
114 md_dev
[minor
].devices
[i
].offset
=i
? (md_dev
[minor
].devices
[i
-1].offset
+
115 md_dev
[minor
].devices
[i
-1].size
) : 0;
117 if (pnum
== RAID0
>> PERSONALITY_SHIFT
)
118 md_maxreadahead
[minor
] = MD_DEFAULT_DISK_READAHEAD
* md_dev
[minor
].nb_dev
;
122 static void free_sb (struct md_dev
*mddev
)
125 struct real_dev
*realdev
;
128 free_page((unsigned long) mddev
->sb
);
131 for (i
= 0; i
<mddev
->nb_dev
; i
++) {
132 realdev
= mddev
->devices
+ i
;
134 free_page((unsigned long) realdev
->sb
);
141 * Check one RAID superblock for generic plausibility
144 #define BAD_MAGIC KERN_ERR \
145 "md: %s: invalid raid superblock magic (%x) on block %u\n"
147 #define OUT_OF_MEM KERN_ALERT \
148 "md: out of memory.\n"
150 #define NO_DEVICE KERN_ERR \
151 "md: disabled device %s\n"
156 static int analyze_one_sb (struct real_dev
* rdev
)
159 struct buffer_head
*bh
;
160 kdev_t dev
= rdev
->dev
;
164 * Read the superblock, it's at the end of the disk
166 rdev
->sb_offset
= MD_NEW_SIZE_BLOCKS (blk_size
[MAJOR(dev
)][MINOR(dev
)]);
167 set_blocksize (dev
, MD_SB_BYTES
);
168 bh
= bread (dev
, rdev
->sb_offset
/ MD_SB_BLOCKS
, MD_SB_BYTES
);
171 sb
= (md_superblock_t
*) bh
->b_data
;
172 if (sb
->md_magic
!= MD_SB_MAGIC
) {
173 printk (BAD_MAGIC
, kdevname(dev
),
174 sb
->md_magic
, rdev
->sb_offset
);
177 rdev
->sb
= (md_superblock_t
*) __get_free_page(GFP_KERNEL
);
182 memcpy (rdev
->sb
, bh
->b_data
, MD_SB_BYTES
);
184 rdev
->size
= sb
->size
;
186 printk (NO_DEVICE
,kdevname(rdev
->dev
));
202 * Check a full RAID array for plausibility
205 #define INCONSISTENT KERN_ERR \
206 "md: superblock inconsistency -- run ckraid\n"
208 #define OUT_OF_DATE KERN_ERR \
209 "md: superblock update time inconsistenty -- using the most recent one\n"
211 #define OLD_VERSION KERN_ALERT \
212 "md: %s: unsupported raid array version %d.%d.%d\n"
214 #define NOT_CLEAN KERN_ERR \
215 "md: %s: raid array is not clean -- run ckraid\n"
217 #define NOT_CLEAN_IGNORE KERN_ERR \
218 "md: %s: raid array is not clean -- reconstructing parity\n"
220 #define UNKNOWN_LEVEL KERN_ERR \
221 "md: %s: unsupported raid level %d\n"
223 static int analyze_sbs (int minor
, int pnum
)
225 struct md_dev
*mddev
= md_dev
+ minor
;
226 int i
, N
= mddev
->nb_dev
, out_of_date
= 0;
227 struct real_dev
* disks
= mddev
->devices
;
228 md_superblock_t
*sb
, *freshest
= NULL
;
231 * RAID-0 and linear don't use a RAID superblock
233 if (pnum
== RAID0
>> PERSONALITY_SHIFT
||
234 pnum
== LINEAR
>> PERSONALITY_SHIFT
)
235 return legacy_raid_sb (minor
, pnum
);
238 * Verify the RAID superblock on each real device
240 for (i
= 0; i
< N
; i
++)
241 if (analyze_one_sb(disks
+i
))
245 * The superblock constant part has to be the same
246 * for all disks in the array.
249 for (i
= 0; i
< N
; i
++) {
257 disks
[i
].sb
, MD_SB_GENERIC_CONSTANT_WORDS
* 4)) {
258 printk (INCONSISTENT
);
264 * OK, we have all disks and the array is ready to run. Let's
265 * find the freshest superblock, that one will be the superblock
266 * that represents the whole array.
268 if ((sb
= mddev
->sb
= (md_superblock_t
*) __get_free_page (GFP_KERNEL
)) == NULL
)
271 for (i
= 0; i
< N
; i
++) {
275 freshest
= disks
[i
].sb
;
279 * Find the newest superblock version
281 if (disks
[i
].sb
->utime
!= freshest
->utime
) {
283 if (disks
[i
].sb
->utime
> freshest
->utime
)
284 freshest
= disks
[i
].sb
;
289 memcpy (sb
, freshest
, sizeof(*freshest
));
292 * Check if we can support this RAID array
294 if (sb
->major_version
!= MD_MAJOR_VERSION
||
295 sb
->minor_version
> MD_MINOR_VERSION
) {
297 printk (OLD_VERSION
, kdevname(MKDEV(MD_MAJOR
, minor
)),
298 sb
->major_version
, sb
->minor_version
,
304 * We need to add this as a superblock option.
306 #if SUPPORT_RECONSTRUCTION
307 if (sb
->state
!= (1 << MD_SB_CLEAN
)) {
308 if (sb
->level
== 1) {
309 printk (NOT_CLEAN
, kdevname(MKDEV(MD_MAJOR
, minor
)));
312 printk (NOT_CLEAN_IGNORE
, kdevname(MKDEV(MD_MAJOR
, minor
)));
315 if (sb
->state
!= (1 << MD_SB_CLEAN
)) {
316 printk (NOT_CLEAN
, kdevname(MKDEV(MD_MAJOR
, minor
)));
319 #endif /* SUPPORT_RECONSTRUCTION */
323 md_size
[minor
] = sb
->size
;
324 md_maxreadahead
[minor
] = MD_DEFAULT_DISK_READAHEAD
;
328 md_size
[minor
] = sb
->size
* (sb
->raid_disks
- 1);
329 md_maxreadahead
[minor
] = MD_DEFAULT_DISK_READAHEAD
* (sb
->raid_disks
- 1);
332 printk (UNKNOWN_LEVEL
, kdevname(MKDEV(MD_MAJOR
, minor
)),
348 int md_update_sb(int minor
)
350 struct md_dev
*mddev
= md_dev
+ minor
;
351 struct buffer_head
*bh
;
352 md_superblock_t
*sb
= mddev
->sb
;
353 struct real_dev
*realdev
;
358 sb
->utime
= CURRENT_TIME
;
359 for (i
= 0; i
< mddev
->nb_dev
; i
++) {
360 realdev
= mddev
->devices
+ i
;
364 sb_offset
= realdev
->sb_offset
;
365 set_blocksize(dev
, MD_SB_BYTES
);
366 printk("md: updating raid superblock on device %s, sb_offset == %u\n", kdevname(dev
), sb_offset
);
367 bh
= getblk(dev
, sb_offset
/ MD_SB_BLOCKS
, MD_SB_BYTES
);
369 sb
= (md_superblock_t
*) bh
->b_data
;
370 memcpy(sb
, mddev
->sb
, MD_SB_BYTES
);
371 memcpy(&sb
->descriptor
, sb
->disks
+ realdev
->sb
->descriptor
.number
, MD_SB_DESCRIPTOR_WORDS
* 4);
372 mark_buffer_uptodate(bh
, 1);
373 mark_buffer_dirty(bh
, 1);
374 ll_rw_block(WRITE
, 1, &bh
);
378 invalidate_buffers(dev
);
380 printk(KERN_ERR
"md: getblk failed for device %s\n", kdevname(dev
));
385 static int do_md_run (int minor
, int repart
)
387 int pnum
, i
, min
, factor
, err
;
389 if (!md_dev
[minor
].nb_dev
)
392 if (md_dev
[minor
].pers
)
395 md_dev
[minor
].repartition
=repart
;
397 if ((pnum
=PERSONALITY(&md_dev
[minor
]) >> (PERSONALITY_SHIFT
))
401 /* Only RAID-1 and RAID-5 can have MD devices as underlying devices */
402 if (pnum
!= (RAID1
>> PERSONALITY_SHIFT
) && pnum
!= (RAID5
>> PERSONALITY_SHIFT
)){
403 for (i
= 0; i
< md_dev
[minor
].nb_dev
; i
++)
404 if (MAJOR (md_dev
[minor
].devices
[i
].dev
) == MD_MAJOR
)
410 char module_name
[80];
411 sprintf (module_name
, "md-personality-%d", pnum
);
412 request_module (module_name
);
418 factor
= min
= 1 << FACTOR_SHIFT(FACTOR((md_dev
+minor
)));
420 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++)
421 if (md_dev
[minor
].devices
[i
].size
<min
)
423 printk ("Dev %s smaller than %dk, cannot shrink\n",
424 partition_name (md_dev
[minor
].devices
[i
].dev
), min
);
428 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++) {
429 fsync_dev(md_dev
[minor
].devices
[i
].dev
);
430 invalidate_buffers(md_dev
[minor
].devices
[i
].dev
);
433 /* Resize devices according to the factor. It is used to align
434 partitions size on a given chunk size. */
438 * Analyze the raid superblock
440 if (analyze_sbs(minor
, pnum
))
443 md_dev
[minor
].pers
=pers
[pnum
];
445 if ((err
=md_dev
[minor
].pers
->run (minor
, md_dev
+minor
)))
447 md_dev
[minor
].pers
=NULL
;
448 free_sb(md_dev
+ minor
);
452 if (pnum
!= RAID0
>> PERSONALITY_SHIFT
&& pnum
!= LINEAR
>> PERSONALITY_SHIFT
)
454 md_dev
[minor
].sb
->state
&= ~(1 << MD_SB_CLEAN
);
458 /* FIXME : We assume here we have blocks
459 that are twice as large as sectors.
460 THIS MAY NOT BE TRUE !!! */
461 md_hd_struct
[minor
].start_sect
=0;
462 md_hd_struct
[minor
].nr_sects
=md_size
[minor
]<<1;
464 read_ahead
[MD_MAJOR
] = 128;
468 static int do_md_stop (int minor
, struct inode
*inode
)
472 if (inode
->i_count
>1 || md_dev
[minor
].busy
>1) {
474 * ioctl : one open channel
476 printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n",
477 minor
, inode
->i_count
, md_dev
[minor
].busy
);
481 if (md_dev
[minor
].pers
) {
483 * It is safe to call stop here, it only frees private
484 * data. Also, it tells us if a device is unstoppable
485 * (eg. resyncing is in progress)
487 if (md_dev
[minor
].pers
->stop (minor
, md_dev
+minor
))
490 * The device won't exist anymore -> flush it now
492 fsync_dev (inode
->i_rdev
);
493 invalidate_buffers (inode
->i_rdev
);
494 if (md_dev
[minor
].sb
) {
495 md_dev
[minor
].sb
->state
|= 1 << MD_SB_CLEAN
;
501 if (md_dev
[minor
].sb
)
502 free_sb(md_dev
+ minor
);
503 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++)
504 clear_inode (md_dev
[minor
].devices
[i
].inode
);
506 md_dev
[minor
].nb_dev
=md_size
[minor
]=0;
507 md_hd_struct
[minor
].nr_sects
=0;
508 md_dev
[minor
].pers
=NULL
;
510 read_ahead
[MD_MAJOR
] = 128;
515 static int do_md_add (int minor
, kdev_t dev
)
519 struct real_dev
*realdev
;
521 if (md_dev
[minor
].nb_dev
==MAX_REAL
)
524 if (!fs_may_mount (dev
))
527 if (blk_size
[MAJOR(dev
)] == NULL
|| blk_size
[MAJOR(dev
)][MINOR(dev
)] == 0) {
528 printk("md_add(): zero device size, huh, bailing out.\n");
532 if (md_dev
[minor
].pers
) {
534 * The array is already running, hot-add the drive, or
537 if (!md_dev
[minor
].pers
->hot_add_disk
)
544 * Careful. We cannot increase nb_dev for a running array.
546 i
=md_dev
[minor
].nb_dev
;
547 realdev
= &md_dev
[minor
].devices
[i
];
550 /* Lock the device by inserting a dummy inode. This doesn't
551 smell very good, but I need to be consistent with the
552 mount stuff, specially with fs_may_mount. If someone have
553 a better idea, please help ! */
555 realdev
->inode
=get_empty_inode ();
556 realdev
->inode
->i_dev
=dev
; /* don't care about other fields */
557 insert_inode_hash (realdev
->inode
);
559 /* Sizes are now rounded at run time */
561 /* md_dev[minor].devices[i].size=gen_real->sizes[MINOR(dev)]; HACKHACK*/
563 realdev
->size
=blk_size
[MAJOR(dev
)][MINOR(dev
)];
567 * Check the superblock for consistency.
568 * The personality itself has to check whether it's getting
569 * added with the proper flags. The personality has to be
572 if (analyze_one_sb (realdev
))
575 * hot_add has to bump up nb_dev itself
577 if (md_dev
[minor
].pers
->hot_add_disk (&md_dev
[minor
], dev
)) {
579 * FIXME: here we should free up the inode and stuff
585 md_dev
[minor
].nb_dev
++;
587 printk ("REGISTER_DEV %s to md%x done\n", partition_name(dev
), minor
);
591 static int md_ioctl (struct inode
*inode
, struct file
*file
,
592 unsigned int cmd
, unsigned long arg
)
595 struct hd_geometry
*loc
= (struct hd_geometry
*) arg
;
597 if (!capable(CAP_SYS_ADMIN
))
600 if (((minor
=MINOR(inode
->i_rdev
)) & 0x80) &&
601 (minor
& 0x7f) < MAX_PERSONALITY
&&
602 pers
[minor
& 0x7f] &&
603 pers
[minor
& 0x7f]->ioctl
)
604 return (pers
[minor
& 0x7f]->ioctl (inode
, file
, cmd
, arg
));
606 if (minor
>= MAX_MD_DEV
)
612 return do_md_add (minor
, to_kdev_t ((dev_t
) arg
));
615 return do_md_run (minor
, (int) arg
);
618 return do_md_stop (minor
, inode
);
620 case BLKGETSIZE
: /* Return device size */
621 if (!arg
) return -EINVAL
;
622 err
= put_user (md_hd_struct
[MINOR(inode
->i_rdev
)].nr_sects
, (long *) arg
);
628 /* We have a problem here : there is no easy way to give a CHS
629 virtual geometry. We currently pretend that we have a 2 heads
630 4 sectors (with a BIG number of cylinders...). This drives dosfs
634 if (!loc
) return -EINVAL
;
635 err
= put_user (2, (char *) &loc
->heads
);
638 err
= put_user (4, (char *) &loc
->sectors
);
641 err
= put_user (md_hd_struct
[minor
].nr_sects
/8, (short *) &loc
->cylinders
);
644 err
= put_user (md_hd_struct
[MINOR(inode
->i_rdev
)].start_sect
,
645 (long *) &loc
->start
);
655 return blk_ioctl(inode
->i_rdev
, cmd
, arg
);
664 static int md_open (struct inode
*inode
, struct file
*file
)
666 int minor
=MINOR(inode
->i_rdev
);
668 md_dev
[minor
].busy
++;
669 return (0); /* Always succeed */
673 static int md_release (struct inode
*inode
, struct file
*file
)
675 int minor
=MINOR(inode
->i_rdev
);
677 sync_dev (inode
->i_rdev
);
678 md_dev
[minor
].busy
--;
683 static ssize_t
md_read (struct file
*file
, char *buf
, size_t count
,
686 int minor
=MINOR(file
->f_dentry
->d_inode
->i_rdev
);
688 if (!md_dev
[minor
].pers
) /* Check if device is being run */
691 return block_read(file
, buf
, count
, ppos
);
694 static ssize_t
md_write (struct file
*file
, const char *buf
,
695 size_t count
, loff_t
*ppos
)
697 int minor
=MINOR(file
->f_dentry
->d_inode
->i_rdev
);
699 if (!md_dev
[minor
].pers
) /* Check if device is being run */
702 return block_write(file
, buf
, count
, ppos
);
705 static struct file_operations md_fops
=
720 int md_map (int minor
, kdev_t
*rdev
, unsigned long *rsector
, unsigned long size
)
722 if ((unsigned int) minor
>= MAX_MD_DEV
)
724 printk ("Bad md device %d\n", minor
);
728 if (!md_dev
[minor
].pers
)
730 printk ("Oops ! md%d not running, giving up !\n", minor
);
734 return (md_dev
[minor
].pers
->map(md_dev
+minor
, rdev
, rsector
, size
));
737 int md_make_request (int minor
, int rw
, struct buffer_head
* bh
)
739 if (md_dev
[minor
].pers
->make_request
) {
740 if (buffer_locked(bh
))
742 set_bit(BH_Lock
, &bh
->b_state
);
743 if (rw
== WRITE
|| rw
== WRITEA
) {
744 if (!buffer_dirty(bh
)) {
745 bh
->b_end_io(bh
, test_bit(BH_Uptodate
, &bh
->b_state
));
749 if (rw
== READ
|| rw
== READA
) {
750 if (buffer_uptodate(bh
)) {
751 bh
->b_end_io(bh
, test_bit(BH_Uptodate
, &bh
->b_state
));
755 return (md_dev
[minor
].pers
->make_request(md_dev
+minor
, rw
, bh
));
757 make_request (MAJOR(bh
->b_rdev
), rw
, bh
);
762 static void do_md_request (void)
764 printk ("Got md request, not good...");
768 void md_wakeup_thread(struct md_thread
*thread
)
770 set_bit(THREAD_WAKEUP
, &thread
->flags
);
771 wake_up(&thread
->wqueue
);
774 struct md_thread
*md_register_thread (void (*run
) (void *), void *data
)
776 struct md_thread
*thread
= (struct md_thread
*)
777 kmalloc(sizeof(struct md_thread
), GFP_KERNEL
);
779 DECLARE_MUTEX_LOCKED(sem
);
781 if (!thread
) return NULL
;
783 memset(thread
, 0, sizeof(struct md_thread
));
784 init_waitqueue_head(&thread
->wqueue
);
789 ret
= kernel_thread(md_thread
, thread
, 0);
798 void md_unregister_thread (struct md_thread
*thread
)
800 DECLARE_MUTEX_LOCKED(sem
);
805 printk("Killing md_thread %d %p %s\n",
806 thread
->tsk
->pid
, thread
->tsk
, thread
->tsk
->comm
);
808 printk("Aiee. md_thread has 0 tsk\n");
809 send_sig(SIGKILL
, thread
->tsk
, 1);
810 printk("downing on %p\n", &sem
);
814 #define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGTERM))
816 int md_thread(void * arg
)
818 struct md_thread
*thread
= arg
;
825 current
->session
= 1;
827 sprintf(current
->comm
, "md_thread");
828 siginitsetinv(¤t
->blocked
, SHUTDOWN_SIGS
);
829 thread
->tsk
= current
;
834 if (!test_bit(THREAD_WAKEUP
, &thread
->flags
)) {
836 spin_lock(¤t
->sigmask_lock
);
837 flush_signals(current
);
838 spin_unlock(¤t
->sigmask_lock
);
839 interruptible_sleep_on(&thread
->wqueue
);
841 if (test_bit(THREAD_WAKEUP
, &thread
->flags
))
848 } while (signal_pending(current
));
851 clear_bit(THREAD_WAKEUP
, &thread
->flags
);
853 thread
->run(thread
->data
);
854 run_task_queue(&tq_disk
);
859 EXPORT_SYMBOL(md_size
);
860 EXPORT_SYMBOL(md_maxreadahead
);
861 EXPORT_SYMBOL(register_md_personality
);
862 EXPORT_SYMBOL(unregister_md_personality
);
863 EXPORT_SYMBOL(md_dev
);
864 EXPORT_SYMBOL(md_error
);
865 EXPORT_SYMBOL(md_register_thread
);
866 EXPORT_SYMBOL(md_unregister_thread
);
867 EXPORT_SYMBOL(md_update_sb
);
868 EXPORT_SYMBOL(md_map
);
869 EXPORT_SYMBOL(md_wakeup_thread
);
870 EXPORT_SYMBOL(md_do_sync
);
872 #ifdef CONFIG_PROC_FS
873 static struct proc_dir_entry proc_md
= {
874 PROC_MD
, 6, "mdstat",
875 S_IFREG
| S_IRUGO
, 1, 0, 0,
876 0, &proc_array_inode_operations
,
880 static void md_geninit (struct gendisk
*gdisk
)
884 for(i
=0;i
<MAX_MD_DEV
;i
++)
886 md_blocksizes
[i
] = 1024;
887 md_maxreadahead
[i
] = MD_DEFAULT_DISK_READAHEAD
;
888 md_gendisk
.part
[i
].start_sect
=-1; /* avoid partition check */
889 md_gendisk
.part
[i
].nr_sects
=0;
893 blksize_size
[MD_MAJOR
] = md_blocksizes
;
894 max_readahead
[MD_MAJOR
] = md_maxreadahead
;
896 #ifdef CONFIG_PROC_FS
897 proc_register(&proc_root
, &proc_md
);
901 int md_error (kdev_t mddev
, kdev_t rdev
)
903 unsigned int minor
= MINOR (mddev
);
906 if (MAJOR(mddev
) != MD_MAJOR
|| minor
> MAX_MD_DEV
)
907 panic ("md_error gets unknown device\n");
908 if (!md_dev
[minor
].pers
)
909 panic ("md_error gets an error for an unknown device\n");
910 if (md_dev
[minor
].pers
->error_handler
) {
911 rc
= md_dev
[minor
].pers
->error_handler (md_dev
+minor
, rdev
);
912 #if SUPPORT_RECONSTRUCTION
913 md_wakeup_thread(md_sync_thread
);
914 #endif /* SUPPORT_RECONSTRUCTION */
920 int get_md_status (char *page
)
922 int sz
=0, i
, j
, size
;
924 sz
+=sprintf( page
+sz
, "Personalities : ");
925 for (i
=0; i
<MAX_PERSONALITY
; i
++)
927 sz
+=sprintf (page
+sz
, "[%d %s] ", i
, pers
[i
]->name
);
931 sz
+=sprintf (page
+sz
, "read_ahead ");
932 if (read_ahead
[MD_MAJOR
]==INT_MAX
)
933 sz
+=sprintf (page
+sz
, "not set\n");
935 sz
+=sprintf (page
+sz
, "%d sectors\n", read_ahead
[MD_MAJOR
]);
937 for (i
=0; i
<MAX_MD_DEV
; i
++)
939 sz
+=sprintf (page
+sz
, "md%d : %sactive", i
, md_dev
[i
].pers
? "" : "in");
942 sz
+=sprintf (page
+sz
, " %s", md_dev
[i
].pers
->name
);
945 for (j
=0; j
<md_dev
[i
].nb_dev
; j
++)
947 sz
+=sprintf (page
+sz
, " %s",
948 partition_name(md_dev
[i
].devices
[j
].dev
));
949 size
+=md_dev
[i
].devices
[j
].size
;
952 if (md_dev
[i
].nb_dev
) {
954 sz
+=sprintf (page
+sz
, " %d blocks", md_size
[i
]);
956 sz
+=sprintf (page
+sz
, " %d blocks", size
);
961 sz
+=sprintf (page
+sz
, "\n");
965 if (md_dev
[i
].pers
->max_invalid_dev
)
966 sz
+=sprintf (page
+sz
, " maxfault=%ld", MAX_FAULT(md_dev
+i
));
968 sz
+=md_dev
[i
].pers
->status (page
+sz
, i
, md_dev
+i
);
969 sz
+=sprintf (page
+sz
, "\n");
975 int register_md_personality (int p_num
, struct md_personality
*p
)
977 int i
=(p_num
>> PERSONALITY_SHIFT
);
979 if (i
>= MAX_PERSONALITY
)
986 printk ("%s personality registered\n", p
->name
);
990 int unregister_md_personality (int p_num
)
992 int i
=(p_num
>> PERSONALITY_SHIFT
);
994 if (i
>= MAX_PERSONALITY
)
997 printk ("%s personality unregistered\n", pers
[i
]->name
);
1002 static md_descriptor_t
*get_spare(struct md_dev
*mddev
)
1005 md_superblock_t
*sb
= mddev
->sb
;
1006 md_descriptor_t
*descriptor
;
1007 struct real_dev
*realdev
;
1009 for (i
= 0; i
< mddev
->nb_dev
; i
++) {
1010 realdev
= &mddev
->devices
[i
];
1013 descriptor
= &sb
->disks
[realdev
->sb
->descriptor
.number
];
1014 if (descriptor
->state
& (1 << MD_FAULTY_DEVICE
))
1016 if (descriptor
->state
& (1 << MD_ACTIVE_DEVICE
))
1024 * parallel resyncing thread.
1026 * FIXME: - make it abort with a dirty array on mdstop, now it just blocks
1027 * - fix read error handing
1030 int md_do_sync(struct md_dev
*mddev
)
1032 struct buffer_head
*bh
;
1033 int max_blocks
, blocksize
, curr_bsize
, percent
=1, j
;
1034 kdev_t read_disk
= MKDEV(MD_MAJOR
, mddev
- md_dev
);
1035 int major
= MAJOR(read_disk
), minor
= MINOR(read_disk
);
1036 unsigned long starttime
;
1038 blocksize
= blksize_size
[major
][minor
];
1039 max_blocks
= blk_size
[major
][minor
] / (blocksize
>> 10);
1041 printk("... resync log\n");
1042 printk(" .... mddev->nb_dev: %d\n", mddev
->nb_dev
);
1043 printk(" .... raid array: %s\n", kdevname(read_disk
));
1044 printk(" .... max_blocks: %d blocksize: %d\n", max_blocks
, blocksize
);
1045 printk("md: syncing RAID array %s\n", kdevname(read_disk
));
1050 for (j
= 0; j
< max_blocks
; j
++) {
1053 * B careful. When some1 mounts a non-'blocksize' filesystem
1054 * then we get the blocksize changed right under us. Go deal
1055 * with it transparently, recalculate 'blocksize', 'j' and
1058 curr_bsize
= blksize_size
[major
][minor
];
1059 if (curr_bsize
!= blocksize
) {
1061 if (curr_bsize
> blocksize
)
1063 * this is safe, rounds downwards.
1065 j
/= curr_bsize
/blocksize
;
1067 j
*= blocksize
/curr_bsize
;
1069 blocksize
= curr_bsize
;
1070 max_blocks
= blk_size
[major
][minor
] / (blocksize
>> 10);
1072 if ((bh
= breada (read_disk
, j
, blocksize
, j
* blocksize
,
1073 max_blocks
* blocksize
)) != NULL
) {
1074 mark_buffer_dirty(bh
, 1);
1078 * FIXME: Ugly, but set_blocksize() isnt safe ...
1080 curr_bsize
= blksize_size
[major
][minor
];
1081 if (curr_bsize
!= blocksize
)
1082 goto diff_blocksize
;
1085 * It's a real read problem. FIXME, handle this
1089 "read error, stopping reconstruction.\n");
1095 * Let's sleep some if we are faster than our speed limit:
1097 while (blocksize
*j
/(jiffies
-starttime
+1)*HZ
/1024 > SPEED_LIMIT
)
1099 current
->state
= TASK_INTERRUPTIBLE
;
1100 schedule_timeout(1);
1104 * FIXME: put this status bar thing into /proc
1106 if (!(j
%(max_blocks
/100))) {
1108 printk (" %03d%% done.\n",percent
);
1114 fsync_dev(read_disk
);
1115 printk("md: %s: sync done.\n", kdevname(read_disk
));
1121 * This is a kernel thread which: syncs a spare disk with the active array
1123 * the amount of foolproofing might seem to be a tad excessive, but an
1124 * early (not so error-safe) version of raid1syncd synced the first 0.5 gigs
1125 * of my root partition with the first 0.5 gigs of my /home partition ... so
1126 * i'm a bit nervous ;)
1128 void mdsyncd (void *data
)
1131 struct md_dev
*mddev
;
1132 md_superblock_t
*sb
;
1133 md_descriptor_t
*spare
;
1134 unsigned long flags
;
1136 for (i
= 0, mddev
= md_dev
; i
< MAX_MD_DEV
; i
++, mddev
++) {
1137 if ((sb
= mddev
->sb
) == NULL
)
1139 if (sb
->active_disks
== sb
->raid_disks
)
1141 if (!sb
->spare_disks
)
1143 if ((spare
= get_spare(mddev
)) == NULL
)
1145 if (!mddev
->pers
->mark_spare
)
1147 if (mddev
->pers
->mark_spare(mddev
, spare
, SPARE_WRITE
))
1149 if (md_do_sync(mddev
) || (spare
->state
& (1 << MD_FAULTY_DEVICE
))) {
1150 mddev
->pers
->mark_spare(mddev
, spare
, SPARE_INACTIVE
);
1155 mddev
->pers
->mark_spare(mddev
, spare
, SPARE_ACTIVE
);
1156 spare
->state
|= (1 << MD_SYNC_DEVICE
);
1157 spare
->state
|= (1 << MD_ACTIVE_DEVICE
);
1160 mddev
->sb_dirty
= 1;
1161 md_update_sb(mddev
- md_dev
);
1162 restore_flags(flags
);
1167 #ifdef CONFIG_MD_BOOT
1172 } md_setup_args __initdata
= {
1176 /* called from init/main.c */
1177 __initfunc(void md_setup(char *str
,int *ints
))
1180 for(i
=0;i
<=ints
[0];i
++) {
1181 md_setup_args
.ints
[i
] = ints
[i
];
1182 strcpy(md_setup_args
.str
, str
);
1183 /* printk ("md: ints[%d]=%d.\n", i, ints[i]);*/
1185 md_setup_args
.set
=1;
1189 __initfunc(void do_md_setup(char *str
,int *ints
))
1191 int minor
, pers
, factor
, fault
;
1196 printk ("md: Too few Arguments (%d).\n", ints
[0]);
1202 if (minor
>= MAX_MD_DEV
) {
1203 printk ("md: Minor device number too high.\n");
1209 switch(ints
[i
++]) { /* Raidlevel */
1211 #ifdef CONFIG_MD_LINEAR
1213 printk ("md: Setting up md%d as linear device.\n",minor
);
1215 printk ("md: Linear mode not configured."
1216 "Recompile the kernel with linear mode enabled!\n");
1221 #ifdef CONFIG_MD_STRIPED
1222 printk ("md: Setting up md%d as a striped device.\n",minor
);
1224 printk ("md: Striped mode not configured."
1225 "Recompile the kernel with striped mode enabled!\n");
1228 /* not supported yet
1231 printk ("md: Setting up md%d as a raid1 device.\n",minor);
1235 printk ("md: Setting up md%d as a raid5 device.\n",minor);
1239 printk ("md: Unknown or not supported raid level %d.\n", ints
[--i
]);
1245 factor
=ints
[i
++]; /* Chunksize */
1246 fault
=ints
[i
++]; /* Faultlevel */
1248 pers
=pers
| factor
| (fault
<< FAULT_SHIFT
);
1250 while( str
&& (dev
= name_to_kdev_t(str
))) {
1251 do_md_add (minor
, dev
);
1252 if((str
= strchr (str
, ',')) != NULL
)
1256 do_md_run (minor
, pers
);
1257 printk ("md: Loading md%d.\n",minor
);
1263 void linear_init (void);
1264 void raid0_init (void);
1265 void raid1_init (void);
1266 void raid5_init (void);
1268 __initfunc(int md_init (void))
1270 printk ("md driver %d.%d.%d MAX_MD_DEV=%d, MAX_REAL=%d\n",
1271 MD_MAJOR_VERSION
, MD_MINOR_VERSION
, MD_PATCHLEVEL_VERSION
,
1272 MAX_MD_DEV
, MAX_REAL
);
1274 if (register_blkdev (MD_MAJOR
, "md", &md_fops
))
1276 printk ("Unable to get major %d for md\n", MD_MAJOR
);
1280 blk_dev
[MD_MAJOR
].request_fn
=DEVICE_REQUEST
;
1281 blk_dev
[MD_MAJOR
].current_request
=NULL
;
1282 read_ahead
[MD_MAJOR
]=INT_MAX
;
1283 memset(md_dev
, 0, MAX_MD_DEV
* sizeof (struct md_dev
));
1284 md_gendisk
.next
=gendisk_head
;
1286 gendisk_head
=&md_gendisk
;
1288 #if SUPPORT_RECONSTRUCTION
1289 if ((md_sync_thread
= md_register_thread(mdsyncd
, NULL
)) == NULL
)
1290 printk("md: bug: md_sync_thread == NULL\n");
1291 #endif /* SUPPORT_RECONSTRUCTION */
1293 #ifdef CONFIG_MD_LINEAR
1296 #ifdef CONFIG_MD_STRIPED
1299 #ifdef CONFIG_MD_MIRRORING
1302 #ifdef CONFIG_MD_RAID5
1308 #ifdef CONFIG_MD_BOOT
1309 __initfunc(void md_setup_drive(void))
1311 if(md_setup_args
.set
)
1312 do_md_setup(md_setup_args
.str
, md_setup_args
.ints
);