2 pdcraid.c Copyright (C) 2001 Red Hat, Inc. All rights reserved.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 You should have received a copy of the GNU General Public License
10 (for example /usr/src/linux/COPYING); if not, write to the Free
11 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
13 Authors: Arjan van de Ven <arjanv@redhat.com>
15 Based on work done by Søren Schmidt for FreeBSD
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/smp_lock.h>
24 #include <linux/blkdev.h>
25 #include <linux/blkpg.h>
26 #include <linux/genhd.h>
27 #include <linux/ioctl.h>
29 #include <linux/ide.h>
30 #include <asm/uaccess.h>
34 static int pdcraid_open(struct inode
*inode
, struct file
*filp
);
35 static int pdcraid_release(struct inode
*inode
, struct file
*filp
);
36 static int pdcraid_ioctl(struct inode
*inode
, struct file
*file
,
37 unsigned int cmd
, unsigned long arg
);
38 static int pdcraid0_make_request(request_queue_t
* q
, int rw
,
39 struct buffer_head
*bh
);
40 static int pdcraid1_make_request(request_queue_t
* q
, int rw
,
41 struct buffer_head
*bh
);
49 static struct disk_dev devlist
[] = {
69 unsigned long sectors
;
70 struct block_device
*bdev
;
71 unsigned long last_pos
;
77 unsigned long sectors
;
80 struct pdcdisk disk
[8];
82 unsigned long cutoff
[8];
83 unsigned int cutoff_disks
[8];
86 static struct raid_device_operations pdcraid0_ops
= {
88 .release
= pdcraid_release
,
89 .ioctl
= pdcraid_ioctl
,
90 .make_request
= pdcraid0_make_request
93 static struct raid_device_operations pdcraid1_ops
= {
95 .release
= pdcraid_release
,
96 .ioctl
= pdcraid_ioctl
,
97 .make_request
= pdcraid1_make_request
100 static struct pdcraid raid
[16];
103 static int pdcraid_ioctl(struct inode
*inode
, struct file
*file
,
104 unsigned int cmd
, unsigned long arg
)
107 unsigned long sectors
;
109 if (!inode
|| kdev_none(inode
->i_rdev
))
112 minor
= minor(inode
->i_rdev
) >> SHIFT
;
116 case BLKGETSIZE
: /* Return device size */
120 ataraid_gendisk
.part
[minor(inode
->i_rdev
)].nr_sects
;
121 if (minor(inode
->i_rdev
) & 15)
122 return put_user(sectors
, (unsigned long *) arg
);
123 return put_user(raid
[minor
].sectors
,
124 (unsigned long *) arg
);
130 struct hd_geometry
*loc
=
131 (struct hd_geometry
*) arg
;
132 unsigned short bios_cyl
= raid
[minor
].geom
.cylinders
; /* truncate */
137 (raid
[minor
].geom
.heads
,
138 (u8
*) & loc
->heads
))
141 (raid
[minor
].geom
.sectors
,
142 (u8
*) & loc
->sectors
))
145 (bios_cyl
, (unsigned short *) &loc
->cylinders
))
148 ((unsigned) ataraid_gendisk
.
149 part
[minor(inode
->i_rdev
)].start_sect
,
150 (unsigned long *) &loc
->start
))
156 printk("Invalid ioctl \n");
164 unsigned long partition_map_normal(unsigned long block
,
165 unsigned long partition_off
,
166 unsigned long partition_size
,
169 return block
+ partition_off
;
172 unsigned long partition_map_linux(unsigned long block
,
173 unsigned long partition_off
,
174 unsigned long partition_size
, int stride
)
176 unsigned long newblock
;
178 newblock
= stride
- (partition_off
% stride
);
179 if (newblock
== stride
)
182 newblock
= newblock
% partition_size
;
183 newblock
+= partition_off
;
188 static int funky_remap
[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
190 unsigned long partition_map_linux_raid0_4disk(unsigned long block
,
191 unsigned long partition_off
,
192 unsigned long partition_size
,
195 unsigned long newblock
, temp
, temp2
;
197 newblock
= stride
- (partition_off
% stride
);
198 if (newblock
== stride
)
201 if (block
< (partition_size
/ (8 * stride
)) * 8 * stride
) {
202 temp
= block
% stride
;
203 temp2
= block
/ stride
;
204 temp2
= ((temp2
>> 3) << 3) | (funky_remap
[temp2
& 7]);
205 block
= temp2
* stride
+ temp
;
210 newblock
= newblock
% partition_size
;
211 newblock
+= partition_off
;
218 static int pdcraid0_make_request(request_queue_t
* q
, int rw
,
219 struct buffer_head
*bh
)
222 unsigned long rsect_left
, rsect_accum
= 0;
224 unsigned int disk
= 0, real_disk
= 0;
227 struct pdcraid
*thisraid
;
229 rsect
= bh
->b_rsector
;
231 /* Ok. We need to modify this sector number to a new disk + new sector number.
232 * If there are disks of different sizes, this gets tricky.
233 * Example with 3 disks (1Gb, 4Gb and 5 GB):
234 * The first 3 Gb of the "RAID" are evenly spread over the 3 disks.
235 * Then things get interesting. The next 2Gb (RAID view) are spread across disk 2 and 3
236 * and the last 1Gb is disk 3 only.
238 * the way this is solved is like this: We have a list of "cutoff" points where everytime
239 * a disk falls out of the "higher" count, we mark the max sector. So once we pass a cutoff
240 * point, we have to divide by one less.
243 device
= (bh
->b_rdev
>> SHIFT
) & MAJOR_MASK
;
244 thisraid
= &raid
[device
];
245 if (thisraid
->stride
== 0)
246 thisraid
->stride
= 1;
248 /* Partitions need adding of the start sector of the partition to the requested sector */
251 partition_map_normal(rsect
,
252 ataraid_gendisk
.part
[MINOR(bh
->b_rdev
)].
254 ataraid_gendisk
.part
[MINOR(bh
->b_rdev
)].
255 nr_sects
, thisraid
->stride
);
257 /* Woops we need to split the request to avoid crossing a stride barrier */
258 if ((rsect
/ thisraid
->stride
) !=
259 ((rsect
+ (bh
->b_size
/ 512) - 1) / thisraid
->stride
)) {
265 for (i
= 0; i
< 8; i
++) {
266 if (thisraid
->cutoff_disks
[i
] == 0)
268 if (rsect
> thisraid
->cutoff
[i
]) {
269 /* we're in the wrong area so far */
270 rsect_left
-= thisraid
->cutoff
[i
];
272 thisraid
->cutoff
[i
] /
273 thisraid
->cutoff_disks
[i
];
275 block
= rsect_left
/ thisraid
->stride
;
276 disk
= block
% thisraid
->cutoff_disks
[i
];
278 (block
/ thisraid
->cutoff_disks
[i
]) *
281 rsect_accum
+ (rsect_left
% thisraid
->stride
) +
287 for (i
= 0; i
< 8; i
++) {
289 && (thisraid
->disk
[i
].sectors
> rsect_accum
)) {
294 && (thisraid
->disk
[i
].sectors
>= rsect_accum
)) {
303 * The new BH_Lock semantics in ll_rw_blk.c guarantee that this
304 * is the only IO operation happening on this bh.
306 bh
->b_rdev
= thisraid
->disk
[disk
].device
;
307 bh
->b_rsector
= rsect
;
310 * Let the main block layer submit the IO and resolve recursion:
315 static int pdcraid1_write_request(request_queue_t
* q
, int rw
,
316 struct buffer_head
*bh
)
318 struct buffer_head
*bh1
;
319 struct ataraid_bh_private
*private;
323 device
= (bh
->b_rdev
>> SHIFT
) & MAJOR_MASK
;
324 private = ataraid_get_private();
328 private->parent
= bh
;
330 atomic_set(&private->count
, raid
[device
].disks
);
333 for (i
= 0; i
< raid
[device
].disks
; i
++) {
334 bh1
= ataraid_get_bhead();
335 /* If this ever fails we're doomed */
339 /* dupe the bufferhead and update the parts that need to be different */
340 memcpy(bh1
, bh
, sizeof(*bh
));
342 bh1
->b_end_io
= ataraid_end_request
;
343 bh1
->b_private
= private;
344 bh1
->b_rsector
+= ataraid_gendisk
.part
[MINOR(bh
->b_rdev
)].start_sect
; /* partition offset */
345 bh1
->b_rdev
= raid
[device
].disk
[i
].device
;
347 /* update the last known head position for the drive */
348 raid
[device
].disk
[i
].last_pos
=
349 bh1
->b_rsector
+ (bh1
->b_size
>> 9);
351 generic_make_request(rw
, bh1
);
356 static int pdcraid1_read_request(request_queue_t
* q
, int rw
,
357 struct buffer_head
*bh
)
361 int bestsofar
, bestdist
, i
;
364 /* Reads are simple in principle. Pick a disk and go.
365 Initially I cheat by just picking the one which the last known
366 head position is closest by.
367 Later on, online/offline checking and performance needs adding */
369 device
= (bh
->b_rdev
>> SHIFT
) & MAJOR_MASK
;
371 ataraid_gendisk
.part
[MINOR(bh
->b_rdev
)].start_sect
;
374 bestdist
= raid
[device
].disk
[0].last_pos
- bh
->b_rsector
;
376 bestdist
= -bestdist
;
380 for (i
= 1; i
< raid
[device
].disks
; i
++) {
381 dist
= raid
[device
].disk
[i
].last_pos
- bh
->b_rsector
;
387 if (bestdist
== dist
) { /* it's a tie; try to do some read balancing */
388 if ((previous
> bestsofar
) && (previous
<= i
))
390 previous
= (previous
+ 1) % raid
[device
].disks
;
391 } else if (bestdist
> dist
) {
398 bh
->b_rdev
= raid
[device
].disk
[bestsofar
].device
;
399 raid
[device
].disk
[bestsofar
].last_pos
=
400 bh
->b_rsector
+ (bh
->b_size
>> 9);
403 * Let the main block layer submit the IO and resolve recursion:
410 static int pdcraid1_make_request(request_queue_t
* q
, int rw
,
411 struct buffer_head
*bh
)
413 /* Read and Write are totally different cases; split them totally here */
418 return pdcraid1_read_request(q
, rw
, bh
);
420 return pdcraid1_write_request(q
, rw
, bh
);
425 static unsigned long calc_pdcblock_offset(struct block_device
*bdev
)
427 unsigned long lba
= 0;
428 struct ata_device
*ideinfo
= get_info_ptr(to_kdev_t(bdev
->bd_dev
));
433 /* first sector of the last cluster */
434 if (ideinfo
->head
== 0)
436 if (ideinfo
->sect
== 0)
438 lba
= (ideinfo
->capacity
/ (ideinfo
->head
* ideinfo
->sect
));
439 lba
= lba
* (ideinfo
->head
* ideinfo
->sect
);
440 lba
= lba
- ideinfo
->sect
;
446 static int read_disk_sb(struct block_device
*bdev
,
447 struct promise_raid_conf
*p
)
449 unsigned long sb_offset
;
454 * Calculate the position of the superblock,
455 * it's at first sector of the last cylinder
457 sb_offset
= calc_pdcblock_offset(bdev
);
462 for (i
= 0, buffer
= (char *) p
; i
< 4; i
++, buffer
+= 512) {
464 char *q
= read_dev_sector(bdev
, sb_offset
+ i
, §
);
467 "pdcraid: Error reading superblock.\n");
470 memcpy(buffer
, q
, 512);
471 put_dev_sector(§
);
476 static unsigned int calc_sb_csum(unsigned int *ptr
)
482 for (count
= 0; count
< 511; count
++)
488 static int cookie
= 0;
490 static struct promise_raid_conf __initdata prom
;
491 static void __init
probedisk(int devindex
, int device
, int raidlevel
)
495 struct block_device
*bdev
;
497 if (devlist
[devindex
].device
!= -1) /* already assigned to another array */
500 major
= devlist
[devindex
].major
;
501 minor
= devlist
[devindex
].minor
;
503 bdev
= bdget(mk_kdev(major
, minor
));
507 if (blkdev_get(bdev
, FMODE_READ
| FMODE_WRITE
, 0, BDEV_RAW
) != 0)
510 if (read_disk_sb(bdev
, &prom
))
513 /* the checksums must match */
514 if (prom
.checksum
!= calc_sb_csum((unsigned int *) &prom
))
516 if (prom
.raid
.type
!= raidlevel
) /* different raidlevel */
519 if ((cookie
!= 0) && (cookie
!= prom
.raid
.magic_1
)) /* different array */
522 cookie
= prom
.raid
.magic_1
;
524 /* This looks evil. But basically, we have to search for our adapternumber
525 in the arraydefinition, both of which are in the superblock */
526 for (i
= 0; (i
< prom
.raid
.total_disks
) && (i
< 8); i
++) {
527 if ((prom
.raid
.disk
[i
].channel
== prom
.raid
.channel
) &&
528 (prom
.raid
.disk
[i
].device
== prom
.raid
.device
)) {
530 raid
[device
].disk
[i
].bdev
= bdev
;
531 raid
[device
].disk
[i
].device
=
532 mk_kdev(major
, minor
);
533 raid
[device
].disk
[i
].sectors
= prom
.raid
.disk_secs
;
534 raid
[device
].stride
= (1 << prom
.raid
.raid0_shift
);
535 raid
[device
].disks
= prom
.raid
.total_disks
;
536 raid
[device
].sectors
= prom
.raid
.total_secs
;
537 raid
[device
].geom
.heads
= prom
.raid
.heads
+ 1;
538 raid
[device
].geom
.sectors
= prom
.raid
.sectors
;
539 raid
[device
].geom
.cylinders
=
540 prom
.raid
.cylinders
+ 1;
541 devlist
[devindex
].device
= device
;
546 blkdev_put(bdev
, BDEV_RAW
);
549 static void __init
fill_cutoff(int device
)
552 unsigned long smallest
;
557 for (i
= 0; i
< 8; i
++) {
559 for (j
= 0; j
< 8; j
++)
560 if ((raid
[device
].disk
[j
].sectors
< smallest
)
561 && (raid
[device
].disk
[j
].sectors
> bar
))
562 smallest
= raid
[device
].disk
[j
].sectors
;
564 for (j
= 0; j
< 8; j
++)
565 if (raid
[device
].disk
[j
].sectors
>= smallest
)
568 smallest
= smallest
* count
;
570 raid
[device
].cutoff
[i
] = smallest
;
571 raid
[device
].cutoff_disks
[i
] = count
;
575 static __init
int pdcraid_init_one(int device
, int raidlevel
)
579 for (i
= 0; i
< 14; i
++)
580 probedisk(i
, device
, raidlevel
);
585 /* Initialize the gendisk structure */
587 ataraid_register_disk(device
, raid
[device
].sectors
);
591 for (i
= 0; i
< 8; i
++) {
592 if (raid
[device
].disk
[i
].device
!= 0) {
593 printk(KERN_INFO
"Drive %i is %li Mb (%i / %i) \n",
594 i
, raid
[device
].disk
[i
].sectors
/ 2048,
595 major(raid
[device
].disk
[i
].device
),
596 minor(raid
[device
].disk
[i
].device
));
601 printk(KERN_INFO
"Raid%i array consists of %i drives. \n",
609 static __init
int pdcraid_init(void)
611 int retval
, device
, count
= 0;
615 device
= ataraid_get_device(&pdcraid0_ops
);
618 retval
= pdcraid_init_one(device
, 0);
620 ataraid_release_device(device
);
630 device
= ataraid_get_device(&pdcraid1_ops
);
633 retval
= pdcraid_init_one(device
, 1);
635 ataraid_release_device(device
);
644 "Promise Fasttrak(tm) Softwareraid driver for linux version 0.03beta\n");
648 "Promise Fasttrak(tm) Softwareraid driver 0.03beta: No raid array found\n");
652 static void __exit
pdcraid_exit(void)
655 for (device
= 0; device
< 16; device
++) {
656 for (i
= 0; i
< 8; i
++) {
657 struct block_device
*bdev
=
658 raid
[device
].disk
[i
].bdev
;
659 raid
[device
].disk
[i
].bdev
= NULL
;
661 blkdev_put(bdev
, BDEV_RAW
);
663 if (raid
[device
].sectors
)
664 ataraid_release_device(device
);
668 static int pdcraid_open(struct inode
*inode
, struct file
*filp
)
673 static int pdcraid_release(struct inode
*inode
, struct file
*filp
)
679 module_init(pdcraid_init
);
680 module_exit(pdcraid_exit
);
681 MODULE_LICENSE("GPL");