3 md.c : Multiple Devices driver for Linux
4 Copyright (C) 1994-96 Marc ZYNGIER
5 <zyngier@ufr-info-p7.ibp.fr> or
8 A lot of inspiration came from hd.c ...
10 kerneld support by Boris Tobotras <boris@xtalk.msk.su>
11 boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net>
13 RAID-1/RAID-5 extensions by:
14 Ingo Molnar, Miguel de Icaza, Gadi Oxman
19 This program is free software; you can redistribute it and/or modify
20 it under the terms of the GNU General Public License as published by
21 the Free Software Foundation; either version 2, or (at your option)
24 You should have received a copy of the GNU General Public License
25 (for example /usr/src/linux/COPYING); if not, write to the Free
26 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 * Current RAID-1,4,5 parallel reconstruction speed limit is 1024 KB/sec, so
31 * the extra system load does not show up that much. Increase it if your
32 * system can take more.
34 #define SPEED_LIMIT 1024
36 #include <linux/config.h>
37 #include <linux/module.h>
38 #include <linux/version.h>
39 #include <linux/malloc.h>
42 #include <linux/hdreg.h>
43 #include <linux/stat.h>
45 #include <linux/proc_fs.h>
46 #include <linux/blkdev.h>
47 #include <linux/genhd.h>
48 #include <linux/smp_lock.h>
50 #include <linux/kmod.h>
52 #include <linux/errno.h>
53 #include <linux/init.h>
55 #define __KERNEL_SYSCALLS__
56 #include <linux/unistd.h>
58 #define MAJOR_NR MD_MAJOR
61 #include <linux/blk.h>
62 #include <linux/blkpg.h>
63 #include <asm/uaccess.h>
64 #include <asm/bitops.h>
65 #include <asm/atomic.h>
68 extern kdev_t
name_to_kdev_t(char *line
) __init
;
71 static struct hd_struct md_hd_struct
[MAX_MD_DEV
];
72 static int md_blocksizes
[MAX_MD_DEV
];
73 int md_maxreadahead
[MAX_MD_DEV
];
74 #if SUPPORT_RECONSTRUCTION
75 static struct md_thread
*md_sync_thread
= NULL
;
76 #endif /* SUPPORT_RECONSTRUCTION */
78 int md_size
[MAX_MD_DEV
]={0, };
80 static void md_geninit (struct gendisk
*);
82 static struct gendisk md_gendisk
=
97 static struct md_personality
*pers
[MAX_PERSONALITY
]={NULL
, };
98 struct md_dev md_dev
[MAX_MD_DEV
];
100 int md_thread(void * arg
);
102 static int legacy_raid_sb (int minor
, int pnum
)
106 factor
= 1 << FACTOR_SHIFT(FACTOR((md_dev
+minor
)));
109 * do size and offset calculations.
111 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++) {
112 md_dev
[minor
].devices
[i
].size
&= ~(factor
- 1);
113 md_size
[minor
] += md_dev
[minor
].devices
[i
].size
;
114 md_dev
[minor
].devices
[i
].offset
=i
? (md_dev
[minor
].devices
[i
-1].offset
+
115 md_dev
[minor
].devices
[i
-1].size
) : 0;
117 if (pnum
== RAID0
>> PERSONALITY_SHIFT
)
118 md_maxreadahead
[minor
] = MD_DEFAULT_DISK_READAHEAD
* md_dev
[minor
].nb_dev
;
122 static void free_sb (struct md_dev
*mddev
)
125 struct real_dev
*realdev
;
128 free_page((unsigned long) mddev
->sb
);
131 for (i
= 0; i
<mddev
->nb_dev
; i
++) {
132 realdev
= mddev
->devices
+ i
;
134 free_page((unsigned long) realdev
->sb
);
141 * Check one RAID superblock for generic plausibility
144 #define BAD_MAGIC KERN_ERR \
145 "md: %s: invalid raid superblock magic (%x) on block %u\n"
147 #define OUT_OF_MEM KERN_ALERT \
148 "md: out of memory.\n"
150 #define NO_DEVICE KERN_ERR \
151 "md: disabled device %s\n"
156 static int analyze_one_sb (struct real_dev
* rdev
)
159 struct buffer_head
*bh
;
160 kdev_t dev
= rdev
->dev
;
164 * Read the superblock, it's at the end of the disk
166 rdev
->sb_offset
= MD_NEW_SIZE_BLOCKS (blk_size
[MAJOR(dev
)][MINOR(dev
)]);
167 set_blocksize (dev
, MD_SB_BYTES
);
168 bh
= bread (dev
, rdev
->sb_offset
/ MD_SB_BLOCKS
, MD_SB_BYTES
);
171 sb
= (md_superblock_t
*) bh
->b_data
;
172 if (sb
->md_magic
!= MD_SB_MAGIC
) {
173 printk (BAD_MAGIC
, kdevname(dev
),
174 sb
->md_magic
, rdev
->sb_offset
);
177 rdev
->sb
= (md_superblock_t
*) __get_free_page(GFP_KERNEL
);
182 memcpy (rdev
->sb
, bh
->b_data
, MD_SB_BYTES
);
184 rdev
->size
= sb
->size
;
186 printk (NO_DEVICE
,kdevname(rdev
->dev
));
202 * Check a full RAID array for plausibility
205 #define INCONSISTENT KERN_ERR \
206 "md: superblock inconsistency -- run ckraid\n"
208 #define OUT_OF_DATE KERN_ERR \
209 "md: superblock update time inconsistenty -- using the most recent one\n"
211 #define OLD_VERSION KERN_ALERT \
212 "md: %s: unsupported raid array version %d.%d.%d\n"
214 #define NOT_CLEAN KERN_ERR \
215 "md: %s: raid array is not clean -- run ckraid\n"
217 #define NOT_CLEAN_IGNORE KERN_ERR \
218 "md: %s: raid array is not clean -- reconstructing parity\n"
220 #define UNKNOWN_LEVEL KERN_ERR \
221 "md: %s: unsupported raid level %d\n"
223 static int analyze_sbs (int minor
, int pnum
)
225 struct md_dev
*mddev
= md_dev
+ minor
;
226 int i
, N
= mddev
->nb_dev
, out_of_date
= 0;
227 struct real_dev
* disks
= mddev
->devices
;
228 md_superblock_t
*sb
, *freshest
= NULL
;
231 * RAID-0 and linear don't use a RAID superblock
233 if (pnum
== RAID0
>> PERSONALITY_SHIFT
||
234 pnum
== LINEAR
>> PERSONALITY_SHIFT
)
235 return legacy_raid_sb (minor
, pnum
);
238 * Verify the RAID superblock on each real device
240 for (i
= 0; i
< N
; i
++)
241 if (analyze_one_sb(disks
+i
))
245 * The superblock constant part has to be the same
246 * for all disks in the array.
249 for (i
= 0; i
< N
; i
++) {
257 disks
[i
].sb
, MD_SB_GENERIC_CONSTANT_WORDS
* 4)) {
258 printk (INCONSISTENT
);
264 * OK, we have all disks and the array is ready to run. Let's
265 * find the freshest superblock, that one will be the superblock
266 * that represents the whole array.
268 if ((sb
= mddev
->sb
= (md_superblock_t
*) __get_free_page (GFP_KERNEL
)) == NULL
)
271 for (i
= 0; i
< N
; i
++) {
275 freshest
= disks
[i
].sb
;
279 * Find the newest superblock version
281 if (disks
[i
].sb
->utime
!= freshest
->utime
) {
283 if (disks
[i
].sb
->utime
> freshest
->utime
)
284 freshest
= disks
[i
].sb
;
289 memcpy (sb
, freshest
, sizeof(*freshest
));
292 * Check if we can support this RAID array
294 if (sb
->major_version
!= MD_MAJOR_VERSION
||
295 sb
->minor_version
> MD_MINOR_VERSION
) {
297 printk (OLD_VERSION
, kdevname(MKDEV(MD_MAJOR
, minor
)),
298 sb
->major_version
, sb
->minor_version
,
304 * We need to add this as a superblock option.
306 #if SUPPORT_RECONSTRUCTION
307 if (sb
->state
!= (1 << MD_SB_CLEAN
)) {
308 if (sb
->level
== 1) {
309 printk (NOT_CLEAN
, kdevname(MKDEV(MD_MAJOR
, minor
)));
312 printk (NOT_CLEAN_IGNORE
, kdevname(MKDEV(MD_MAJOR
, minor
)));
315 if (sb
->state
!= (1 << MD_SB_CLEAN
)) {
316 printk (NOT_CLEAN
, kdevname(MKDEV(MD_MAJOR
, minor
)));
319 #endif /* SUPPORT_RECONSTRUCTION */
323 md_size
[minor
] = sb
->size
;
324 md_maxreadahead
[minor
] = MD_DEFAULT_DISK_READAHEAD
;
328 md_size
[minor
] = sb
->size
* (sb
->raid_disks
- 1);
329 md_maxreadahead
[minor
] = MD_DEFAULT_DISK_READAHEAD
* (sb
->raid_disks
- 1);
332 printk (UNKNOWN_LEVEL
, kdevname(MKDEV(MD_MAJOR
, minor
)),
348 int md_update_sb(int minor
)
350 struct md_dev
*mddev
= md_dev
+ minor
;
351 struct buffer_head
*bh
;
352 md_superblock_t
*sb
= mddev
->sb
;
353 struct real_dev
*realdev
;
358 sb
->utime
= CURRENT_TIME
;
359 for (i
= 0; i
< mddev
->nb_dev
; i
++) {
360 realdev
= mddev
->devices
+ i
;
364 sb_offset
= realdev
->sb_offset
;
365 set_blocksize(dev
, MD_SB_BYTES
);
366 printk("md: updating raid superblock on device %s, sb_offset == %u\n", kdevname(dev
), sb_offset
);
367 bh
= getblk(dev
, sb_offset
/ MD_SB_BLOCKS
, MD_SB_BYTES
);
369 sb
= (md_superblock_t
*) bh
->b_data
;
370 memcpy(sb
, mddev
->sb
, MD_SB_BYTES
);
371 memcpy(&sb
->descriptor
, sb
->disks
+ realdev
->sb
->descriptor
.number
, MD_SB_DESCRIPTOR_WORDS
* 4);
372 mark_buffer_uptodate(bh
, 1);
373 mark_buffer_dirty(bh
, 1);
374 ll_rw_block(WRITE
, 1, &bh
);
378 invalidate_buffers(dev
);
380 printk(KERN_ERR
"md: getblk failed for device %s\n", kdevname(dev
));
385 static int do_md_run (int minor
, int repart
)
387 int pnum
, i
, min
, factor
, err
;
389 if (!md_dev
[minor
].nb_dev
)
392 if (md_dev
[minor
].pers
)
395 md_dev
[minor
].repartition
=repart
;
397 if ((pnum
=PERSONALITY(&md_dev
[minor
]) >> (PERSONALITY_SHIFT
))
401 /* Only RAID-1 and RAID-5 can have MD devices as underlying devices */
402 if (pnum
!= (RAID1
>> PERSONALITY_SHIFT
) && pnum
!= (RAID5
>> PERSONALITY_SHIFT
)){
403 for (i
= 0; i
< md_dev
[minor
].nb_dev
; i
++)
404 if (MAJOR (md_dev
[minor
].devices
[i
].dev
) == MD_MAJOR
)
410 char module_name
[80];
411 sprintf (module_name
, "md-personality-%d", pnum
);
412 request_module (module_name
);
418 factor
= min
= 1 << FACTOR_SHIFT(FACTOR((md_dev
+minor
)));
420 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++)
421 if (md_dev
[minor
].devices
[i
].size
<min
)
423 printk ("Dev %s smaller than %dk, cannot shrink\n",
424 partition_name (md_dev
[minor
].devices
[i
].dev
), min
);
428 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++) {
429 fsync_dev(md_dev
[minor
].devices
[i
].dev
);
430 invalidate_buffers(md_dev
[minor
].devices
[i
].dev
);
433 /* Resize devices according to the factor. It is used to align
434 partitions size on a given chunk size. */
438 * Analyze the raid superblock
440 if (analyze_sbs(minor
, pnum
))
443 md_dev
[minor
].pers
=pers
[pnum
];
445 if ((err
=md_dev
[minor
].pers
->run (minor
, md_dev
+minor
)))
447 md_dev
[minor
].pers
=NULL
;
448 free_sb(md_dev
+ minor
);
452 if (pnum
!= RAID0
>> PERSONALITY_SHIFT
&& pnum
!= LINEAR
>> PERSONALITY_SHIFT
)
454 md_dev
[minor
].sb
->state
&= ~(1 << MD_SB_CLEAN
);
458 /* FIXME : We assume here we have blocks
459 that are twice as large as sectors.
460 THIS MAY NOT BE TRUE !!! */
461 md_hd_struct
[minor
].start_sect
=0;
462 md_hd_struct
[minor
].nr_sects
=md_size
[minor
]<<1;
464 read_ahead
[MD_MAJOR
] = 128;
468 static int do_md_stop (int minor
, struct inode
*inode
)
472 if (inode
->i_count
>1 || md_dev
[minor
].busy
>1) {
474 * ioctl : one open channel
476 printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n",
477 minor
, inode
->i_count
, md_dev
[minor
].busy
);
481 if (md_dev
[minor
].pers
) {
483 * It is safe to call stop here, it only frees private
484 * data. Also, it tells us if a device is unstoppable
485 * (eg. resyncing is in progress)
487 if (md_dev
[minor
].pers
->stop (minor
, md_dev
+minor
))
490 * The device won't exist anymore -> flush it now
492 fsync_dev (inode
->i_rdev
);
493 invalidate_buffers (inode
->i_rdev
);
494 if (md_dev
[minor
].sb
) {
495 md_dev
[minor
].sb
->state
|= 1 << MD_SB_CLEAN
;
501 if (md_dev
[minor
].sb
)
502 free_sb(md_dev
+ minor
);
503 for (i
=0; i
<md_dev
[minor
].nb_dev
; i
++)
504 clear_inode (md_dev
[minor
].devices
[i
].inode
);
506 md_dev
[minor
].nb_dev
=md_size
[minor
]=0;
507 md_hd_struct
[minor
].nr_sects
=0;
508 md_dev
[minor
].pers
=NULL
;
510 read_ahead
[MD_MAJOR
] = 128;
515 static int do_md_add (int minor
, kdev_t dev
)
519 struct real_dev
*realdev
;
521 if (md_dev
[minor
].nb_dev
==MAX_REAL
)
524 if (!fs_may_mount (dev
))
527 if (blk_size
[MAJOR(dev
)] == NULL
|| blk_size
[MAJOR(dev
)][MINOR(dev
)] == 0) {
528 printk("md_add(): zero device size, huh, bailing out.\n");
532 if (md_dev
[minor
].pers
) {
534 * The array is already running, hot-add the drive, or
537 if (!md_dev
[minor
].pers
->hot_add_disk
)
544 * Careful. We cannot increase nb_dev for a running array.
546 i
=md_dev
[minor
].nb_dev
;
547 realdev
= &md_dev
[minor
].devices
[i
];
550 /* Lock the device by inserting a dummy inode. This doesn't
551 smell very good, but I need to be consistent with the
552 mount stuff, specially with fs_may_mount. If someone have
553 a better idea, please help ! */
555 realdev
->inode
=get_empty_inode ();
558 realdev
->inode
->i_dev
=dev
; /* don't care about other fields */
559 insert_inode_hash (realdev
->inode
);
561 /* Sizes are now rounded at run time */
563 /* md_dev[minor].devices[i].size=gen_real->sizes[MINOR(dev)]; HACKHACK*/
565 realdev
->size
=blk_size
[MAJOR(dev
)][MINOR(dev
)];
569 * Check the superblock for consistency.
570 * The personality itself has to check whether it's getting
571 * added with the proper flags. The personality has to be
574 if (analyze_one_sb (realdev
))
577 * hot_add has to bump up nb_dev itself
579 if (md_dev
[minor
].pers
->hot_add_disk (&md_dev
[minor
], dev
)) {
581 * FIXME: here we should free up the inode and stuff
587 md_dev
[minor
].nb_dev
++;
589 printk ("REGISTER_DEV %s to md%x done\n", partition_name(dev
), minor
);
593 static int md_ioctl (struct inode
*inode
, struct file
*file
,
594 unsigned int cmd
, unsigned long arg
)
597 struct hd_geometry
*loc
= (struct hd_geometry
*) arg
;
599 if (!capable(CAP_SYS_ADMIN
))
602 if (((minor
=MINOR(inode
->i_rdev
)) & 0x80) &&
603 (minor
& 0x7f) < MAX_PERSONALITY
&&
604 pers
[minor
& 0x7f] &&
605 pers
[minor
& 0x7f]->ioctl
)
606 return (pers
[minor
& 0x7f]->ioctl (inode
, file
, cmd
, arg
));
608 if (minor
>= MAX_MD_DEV
)
614 return do_md_add (minor
, to_kdev_t ((dev_t
) arg
));
617 return do_md_run (minor
, (int) arg
);
620 return do_md_stop (minor
, inode
);
622 case BLKGETSIZE
: /* Return device size */
623 if (!arg
) return -EINVAL
;
624 err
= put_user (md_hd_struct
[MINOR(inode
->i_rdev
)].nr_sects
, (long *) arg
);
630 /* We have a problem here : there is no easy way to give a CHS
631 virtual geometry. We currently pretend that we have a 2 heads
632 4 sectors (with a BIG number of cylinders...). This drives dosfs
636 if (!loc
) return -EINVAL
;
637 err
= put_user (2, (char *) &loc
->heads
);
640 err
= put_user (4, (char *) &loc
->sectors
);
643 err
= put_user (md_hd_struct
[minor
].nr_sects
/8, (short *) &loc
->cylinders
);
646 err
= put_user (md_hd_struct
[MINOR(inode
->i_rdev
)].start_sect
,
647 (long *) &loc
->start
);
657 return blk_ioctl(inode
->i_rdev
, cmd
, arg
);
666 static int md_open (struct inode
*inode
, struct file
*file
)
668 int minor
=MINOR(inode
->i_rdev
);
670 md_dev
[minor
].busy
++;
671 return (0); /* Always succeed */
675 static int md_release (struct inode
*inode
, struct file
*file
)
677 int minor
=MINOR(inode
->i_rdev
);
679 sync_dev (inode
->i_rdev
);
680 md_dev
[minor
].busy
--;
685 static ssize_t
md_read (struct file
*file
, char *buf
, size_t count
,
688 int minor
=MINOR(file
->f_dentry
->d_inode
->i_rdev
);
690 if (!md_dev
[minor
].pers
) /* Check if device is being run */
693 return block_read(file
, buf
, count
, ppos
);
696 static ssize_t
md_write (struct file
*file
, const char *buf
,
697 size_t count
, loff_t
*ppos
)
699 int minor
=MINOR(file
->f_dentry
->d_inode
->i_rdev
);
701 if (!md_dev
[minor
].pers
) /* Check if device is being run */
704 return block_write(file
, buf
, count
, ppos
);
707 static struct file_operations md_fops
=
722 int md_map (int minor
, kdev_t
*rdev
, unsigned long *rsector
, unsigned long size
)
724 if ((unsigned int) minor
>= MAX_MD_DEV
)
726 printk ("Bad md device %d\n", minor
);
730 if (!md_dev
[minor
].pers
)
732 printk ("Oops ! md%d not running, giving up !\n", minor
);
736 return (md_dev
[minor
].pers
->map(md_dev
+minor
, rdev
, rsector
, size
));
739 int md_make_request (int minor
, int rw
, struct buffer_head
* bh
)
741 if (md_dev
[minor
].pers
->make_request
) {
742 if (buffer_locked(bh
))
744 set_bit(BH_Lock
, &bh
->b_state
);
746 if (!buffer_dirty(bh
)) {
747 bh
->b_end_io(bh
, test_bit(BH_Uptodate
, &bh
->b_state
));
751 if (rw
== READ
|| rw
== READA
) {
752 if (buffer_uptodate(bh
)) {
753 bh
->b_end_io(bh
, test_bit(BH_Uptodate
, &bh
->b_state
));
757 return (md_dev
[minor
].pers
->make_request(md_dev
+minor
, rw
, bh
));
759 make_request (MAJOR(bh
->b_rdev
), rw
, bh
);
764 static void do_md_request (void)
766 printk ("Got md request, not good...");
770 void md_wakeup_thread(struct md_thread
*thread
)
772 set_bit(THREAD_WAKEUP
, &thread
->flags
);
773 wake_up(&thread
->wqueue
);
776 struct md_thread
*md_register_thread (void (*run
) (void *), void *data
)
778 struct md_thread
*thread
= (struct md_thread
*)
779 kmalloc(sizeof(struct md_thread
), GFP_KERNEL
);
781 DECLARE_MUTEX_LOCKED(sem
);
783 if (!thread
) return NULL
;
785 memset(thread
, 0, sizeof(struct md_thread
));
786 init_waitqueue_head(&thread
->wqueue
);
791 ret
= kernel_thread(md_thread
, thread
, 0);
800 void md_unregister_thread (struct md_thread
*thread
)
802 DECLARE_MUTEX_LOCKED(sem
);
807 printk("Killing md_thread %d %p %s\n",
808 thread
->tsk
->pid
, thread
->tsk
, thread
->tsk
->comm
);
810 printk("Aiee. md_thread has 0 tsk\n");
811 send_sig(SIGKILL
, thread
->tsk
, 1);
812 printk("downing on %p\n", &sem
);
816 #define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGTERM))
818 int md_thread(void * arg
)
820 struct md_thread
*thread
= arg
;
827 current
->session
= 1;
829 sprintf(current
->comm
, "md_thread");
830 siginitsetinv(¤t
->blocked
, SHUTDOWN_SIGS
);
831 thread
->tsk
= current
;
836 if (!test_bit(THREAD_WAKEUP
, &thread
->flags
)) {
838 spin_lock(¤t
->sigmask_lock
);
839 flush_signals(current
);
840 spin_unlock(¤t
->sigmask_lock
);
841 interruptible_sleep_on(&thread
->wqueue
);
843 if (test_bit(THREAD_WAKEUP
, &thread
->flags
))
850 } while (signal_pending(current
));
853 clear_bit(THREAD_WAKEUP
, &thread
->flags
);
855 thread
->run(thread
->data
);
856 run_task_queue(&tq_disk
);
861 EXPORT_SYMBOL(md_size
);
862 EXPORT_SYMBOL(md_maxreadahead
);
863 EXPORT_SYMBOL(register_md_personality
);
864 EXPORT_SYMBOL(unregister_md_personality
);
865 EXPORT_SYMBOL(md_dev
);
866 EXPORT_SYMBOL(md_error
);
867 EXPORT_SYMBOL(md_register_thread
);
868 EXPORT_SYMBOL(md_unregister_thread
);
869 EXPORT_SYMBOL(md_update_sb
);
870 EXPORT_SYMBOL(md_map
);
871 EXPORT_SYMBOL(md_wakeup_thread
);
872 EXPORT_SYMBOL(md_do_sync
);
874 #ifdef CONFIG_PROC_FS
875 static int md_status_read_proc(char *page
, char **start
, off_t off
,
876 int count
, int *eof
, void *data
)
878 int sz
= 0, i
, j
, size
;
881 sz
=sprintf( page
, "Personalities : ");
882 for (i
=0; i
<MAX_PERSONALITY
; i
++)
884 sz
+=sprintf (page
+sz
, "[%d %s] ", i
, pers
[i
]->name
);
887 sz
+=sprintf (page
+sz
, "read_ahead ");
888 if (read_ahead
[MD_MAJOR
]==INT_MAX
)
889 sz
+=sprintf (page
+sz
, "not set\n");
891 sz
+=sprintf (page
+sz
, "%d sectors\n", read_ahead
[MD_MAJOR
]);
893 for (i
=0; i
<MAX_MD_DEV
; i
++) {
899 if (sz
>= off
+count
) {
903 sz
+=sprintf (page
+sz
, "md%d : %sactive",
904 i
, md_dev
[i
].pers
? "" : "in");
907 sz
+=sprintf (page
+sz
, " %s", md_dev
[i
].pers
->name
);
909 for (j
=0, size
=0; j
<md_dev
[i
].nb_dev
; j
++) {
910 sz
+=sprintf (page
+sz
, " %s",
911 partition_name(md_dev
[i
].devices
[j
].dev
));
912 size
+=md_dev
[i
].devices
[j
].size
;
915 if (md_dev
[i
].nb_dev
) {
917 sz
+=sprintf (page
+sz
, " %d blocks", md_size
[i
]);
919 sz
+=sprintf (page
+sz
, " %d blocks", size
);
922 if (!md_dev
[i
].pers
) {
923 sz
+=sprintf (page
+sz
, "\n");
927 if (md_dev
[i
].pers
->max_invalid_dev
)
928 sz
+=sprintf (page
+sz
, " maxfault=%ld",
929 MAX_FAULT(md_dev
+i
));
931 sz
+=md_dev
[i
].pers
->status (page
+sz
, i
, md_dev
+i
);
932 sz
+=sprintf (page
+sz
, "\n");
945 static void md_geninit (struct gendisk
*gdisk
)
949 for(i
=0;i
<MAX_MD_DEV
;i
++)
951 md_blocksizes
[i
] = 1024;
952 md_maxreadahead
[i
] = MD_DEFAULT_DISK_READAHEAD
;
953 md_gendisk
.part
[i
].start_sect
=-1; /* avoid partition check */
954 md_gendisk
.part
[i
].nr_sects
=0;
958 blksize_size
[MD_MAJOR
] = md_blocksizes
;
959 max_readahead
[MD_MAJOR
] = md_maxreadahead
;
961 #ifdef CONFIG_PROC_FS
962 create_proc_read_entry("mdstat", 0, NULL
, md_status_read_proc
, NULL
);
966 int md_error (kdev_t mddev
, kdev_t rdev
)
968 unsigned int minor
= MINOR (mddev
);
971 if (MAJOR(mddev
) != MD_MAJOR
|| minor
> MAX_MD_DEV
)
972 panic ("md_error gets unknown device\n");
973 if (!md_dev
[minor
].pers
)
974 panic ("md_error gets an error for an unknown device\n");
975 if (md_dev
[minor
].pers
->error_handler
) {
976 rc
= md_dev
[minor
].pers
->error_handler (md_dev
+minor
, rdev
);
977 #if SUPPORT_RECONSTRUCTION
978 md_wakeup_thread(md_sync_thread
);
979 #endif /* SUPPORT_RECONSTRUCTION */
985 int register_md_personality (int p_num
, struct md_personality
*p
)
987 int i
=(p_num
>> PERSONALITY_SHIFT
);
989 if (i
>= MAX_PERSONALITY
)
996 printk ("%s personality registered\n", p
->name
);
1000 int unregister_md_personality (int p_num
)
1002 int i
=(p_num
>> PERSONALITY_SHIFT
);
1004 if (i
>= MAX_PERSONALITY
)
1007 printk ("%s personality unregistered\n", pers
[i
]->name
);
1012 static md_descriptor_t
*get_spare(struct md_dev
*mddev
)
1015 md_superblock_t
*sb
= mddev
->sb
;
1016 md_descriptor_t
*descriptor
;
1017 struct real_dev
*realdev
;
1019 for (i
= 0; i
< mddev
->nb_dev
; i
++) {
1020 realdev
= &mddev
->devices
[i
];
1023 descriptor
= &sb
->disks
[realdev
->sb
->descriptor
.number
];
1024 if (descriptor
->state
& (1 << MD_FAULTY_DEVICE
))
1026 if (descriptor
->state
& (1 << MD_ACTIVE_DEVICE
))
1034 * parallel resyncing thread.
1036 * FIXME: - make it abort with a dirty array on mdstop, now it just blocks
1037 * - fix read error handing
1040 int md_do_sync(struct md_dev
*mddev
)
1042 struct buffer_head
*bh
;
1043 int max_blocks
, blocksize
, curr_bsize
, percent
=1, j
;
1044 kdev_t read_disk
= MKDEV(MD_MAJOR
, mddev
- md_dev
);
1045 int major
= MAJOR(read_disk
), minor
= MINOR(read_disk
);
1046 unsigned long starttime
;
1048 blocksize
= blksize_size
[major
][minor
];
1049 max_blocks
= blk_size
[major
][minor
] / (blocksize
>> 10);
1051 printk("... resync log\n");
1052 printk(" .... mddev->nb_dev: %d\n", mddev
->nb_dev
);
1053 printk(" .... raid array: %s\n", kdevname(read_disk
));
1054 printk(" .... max_blocks: %d blocksize: %d\n", max_blocks
, blocksize
);
1055 printk("md: syncing RAID array %s\n", kdevname(read_disk
));
1060 for (j
= 0; j
< max_blocks
; j
++) {
1063 * B careful. When some1 mounts a non-'blocksize' filesystem
1064 * then we get the blocksize changed right under us. Go deal
1065 * with it transparently, recalculate 'blocksize', 'j' and
1068 curr_bsize
= blksize_size
[major
][minor
];
1069 if (curr_bsize
!= blocksize
) {
1071 if (curr_bsize
> blocksize
)
1073 * this is safe, rounds downwards.
1075 j
/= curr_bsize
/blocksize
;
1077 j
*= blocksize
/curr_bsize
;
1079 blocksize
= curr_bsize
;
1080 max_blocks
= blk_size
[major
][minor
] / (blocksize
>> 10);
1082 if ((bh
= breada (read_disk
, j
, blocksize
, j
* blocksize
,
1083 max_blocks
* blocksize
)) != NULL
) {
1084 mark_buffer_dirty(bh
, 1);
1088 * FIXME: Ugly, but set_blocksize() isnt safe ...
1090 curr_bsize
= blksize_size
[major
][minor
];
1091 if (curr_bsize
!= blocksize
)
1092 goto diff_blocksize
;
1095 * It's a real read problem. FIXME, handle this
1099 "read error, stopping reconstruction.\n");
1105 * Let's sleep some if we are faster than our speed limit:
1107 while (blocksize
*j
/(jiffies
-starttime
+1)*HZ
/1024 > SPEED_LIMIT
)
1109 current
->state
= TASK_INTERRUPTIBLE
;
1110 schedule_timeout(1);
1114 * FIXME: put this status bar thing into /proc
1116 if (!(j
%(max_blocks
/100))) {
1118 printk (" %03d%% done.\n",percent
);
1124 fsync_dev(read_disk
);
1125 printk("md: %s: sync done.\n", kdevname(read_disk
));
1131 * This is a kernel thread which: syncs a spare disk with the active array
1133 * the amount of foolproofing might seem to be a tad excessive, but an
1134 * early (not so error-safe) version of raid1syncd synced the first 0.5 gigs
1135 * of my root partition with the first 0.5 gigs of my /home partition ... so
1136 * i'm a bit nervous ;)
1138 void mdsyncd (void *data
)
1141 struct md_dev
*mddev
;
1142 md_superblock_t
*sb
;
1143 md_descriptor_t
*spare
;
1144 unsigned long flags
;
1146 for (i
= 0, mddev
= md_dev
; i
< MAX_MD_DEV
; i
++, mddev
++) {
1147 if ((sb
= mddev
->sb
) == NULL
)
1149 if (sb
->active_disks
== sb
->raid_disks
)
1151 if (!sb
->spare_disks
)
1153 if ((spare
= get_spare(mddev
)) == NULL
)
1155 if (!mddev
->pers
->mark_spare
)
1157 if (mddev
->pers
->mark_spare(mddev
, spare
, SPARE_WRITE
))
1159 if (md_do_sync(mddev
) || (spare
->state
& (1 << MD_FAULTY_DEVICE
))) {
1160 mddev
->pers
->mark_spare(mddev
, spare
, SPARE_INACTIVE
);
1165 mddev
->pers
->mark_spare(mddev
, spare
, SPARE_ACTIVE
);
1166 spare
->state
|= (1 << MD_SYNC_DEVICE
);
1167 spare
->state
|= (1 << MD_ACTIVE_DEVICE
);
1170 mddev
->sb_dirty
= 1;
1171 md_update_sb(mddev
- md_dev
);
1172 restore_flags(flags
);
1177 #ifdef CONFIG_MD_BOOT
1182 } md_setup_args __initdata
= {
1186 /* called from init/main.c */
1187 void __init
md_setup(char *str
,int *ints
)
1190 for(i
=0;i
<=ints
[0];i
++) {
1191 md_setup_args
.ints
[i
] = ints
[i
];
1192 strcpy(md_setup_args
.str
, str
);
1193 /* printk ("md: ints[%d]=%d.\n", i, ints[i]);*/
1195 md_setup_args
.set
=1;
1199 void __init
do_md_setup(char *str
,int *ints
)
1201 int minor
, pers
, factor
, fault
;
1206 printk ("md: Too few Arguments (%d).\n", ints
[0]);
1212 if (minor
>= MAX_MD_DEV
) {
1213 printk ("md: Minor device number too high.\n");
1219 switch(ints
[i
++]) { /* Raidlevel */
1221 #ifdef CONFIG_MD_LINEAR
1223 printk ("md: Setting up md%d as linear device.\n",minor
);
1225 printk ("md: Linear mode not configured."
1226 "Recompile the kernel with linear mode enabled!\n");
1231 #ifdef CONFIG_MD_STRIPED
1232 printk ("md: Setting up md%d as a striped device.\n",minor
);
1234 printk ("md: Striped mode not configured."
1235 "Recompile the kernel with striped mode enabled!\n");
1238 /* not supported yet
1241 printk ("md: Setting up md%d as a raid1 device.\n",minor);
1245 printk ("md: Setting up md%d as a raid5 device.\n",minor);
1249 printk ("md: Unknown or not supported raid level %d.\n", ints
[--i
]);
1255 factor
=ints
[i
++]; /* Chunksize */
1256 fault
=ints
[i
++]; /* Faultlevel */
1258 pers
=pers
| factor
| (fault
<< FAULT_SHIFT
);
1260 while( str
&& (dev
= name_to_kdev_t(str
))) {
1261 do_md_add (minor
, dev
);
1262 if((str
= strchr (str
, ',')) != NULL
)
1266 do_md_run (minor
, pers
);
1267 printk ("md: Loading md%d.\n",minor
);
1273 void linear_init (void);
1274 void raid0_init (void);
1275 void raid1_init (void);
1276 void raid5_init (void);
1278 int __init
md_init (void)
1280 printk ("md driver %d.%d.%d MAX_MD_DEV=%d, MAX_REAL=%d\n",
1281 MD_MAJOR_VERSION
, MD_MINOR_VERSION
, MD_PATCHLEVEL_VERSION
,
1282 MAX_MD_DEV
, MAX_REAL
);
1284 if (register_blkdev (MD_MAJOR
, "md", &md_fops
))
1286 printk ("Unable to get major %d for md\n", MD_MAJOR
);
1290 blk_dev
[MD_MAJOR
].request_fn
=DEVICE_REQUEST
;
1291 blk_dev
[MD_MAJOR
].current_request
=NULL
;
1292 read_ahead
[MD_MAJOR
]=INT_MAX
;
1293 memset(md_dev
, 0, MAX_MD_DEV
* sizeof (struct md_dev
));
1294 md_gendisk
.next
=gendisk_head
;
1296 gendisk_head
=&md_gendisk
;
1298 #if SUPPORT_RECONSTRUCTION
1299 if ((md_sync_thread
= md_register_thread(mdsyncd
, NULL
)) == NULL
)
1300 printk("md: bug: md_sync_thread == NULL\n");
1301 #endif /* SUPPORT_RECONSTRUCTION */
1303 #ifdef CONFIG_MD_LINEAR
1306 #ifdef CONFIG_MD_STRIPED
1309 #ifdef CONFIG_MD_MIRRORING
1312 #ifdef CONFIG_MD_RAID5
1318 #ifdef CONFIG_MD_BOOT
1319 void __init
md_setup_drive(void)
1321 if(md_setup_args
.set
)
1322 do_md_setup(md_setup_args
.str
, md_setup_args
.ints
);