drivers/md/raid0.c

   1 /*
   2    raid0.c : Multiple Devices driver for Linux
   3              Copyright (C) 1994-96 Marc ZYNGIER
   4              <zyngier@ufr-info-p7.ibp.fr> or
   5              <maz@gloups.fdn.fr>
   6              Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
   7
   8
   9    RAID-0 management functions.
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2, or (at your option)
  14    any later version.
  15
  16    You should have received a copy of the GNU General Public License
  17    (for example /usr/src/linux/COPYING); if not, write to the Free
  18    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19 */
  20
  21 #include <linux/blkdev.h>
  22 #include <linux/seq_file.h>
  23 #include <linux/slab.h>
  24 #include "md.h"
  25 #include "raid0.h"
  26 #include "raid5.h"
  27
  28 static void raid0_unplug(struct request_queue *q)
  29 {
  30         mddev_t *mddev = q->queuedata;
  31         raid0_conf_t *conf = mddev->private;
  32         mdk_rdev_t **devlist = conf->devlist;
  33         int raid_disks = conf->strip_zone[0].nb_dev;
  34         int i;
  35
  36         for (i=0; i < raid_disks; i++) {
  37                 struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
  38
  39                 blk_unplug(r_queue);
  40         }
  41 }
  42
  43 static int raid0_congested(void *data, int bits)
  44 {
  45         mddev_t *mddev = data;
  46         raid0_conf_t *conf = mddev->private;
  47         mdk_rdev_t **devlist = conf->devlist;
  48         int raid_disks = conf->strip_zone[0].nb_dev;
  49         int i, ret = 0;
  50
  51         if (mddev_congested(mddev, bits))
  52                 return 1;
  53
  54         for (i = 0; i < raid_disks && !ret ; i++) {
  55                 struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
  56
  57                 ret |= bdi_congested(&q->backing_dev_info, bits);
  58         }
  59         return ret;
  60 }
  61
  62 /*
  63  * inform the user of the raid configuration
  64 */
  65 static void dump_zones(mddev_t *mddev)
  66 {
  67         int j, k, h;
  68         sector_t zone_size = 0;
  69         sector_t zone_start = 0;
  70         char b[BDEVNAME_SIZE];
  71         raid0_conf_t *conf = mddev->private;
  72         int raid_disks = conf->strip_zone[0].nb_dev;
  73         printk(KERN_INFO "******* %s configuration *********\n",
  74                 mdname(mddev));
  75         h = 0;
  76         for (j = 0; j < conf->nr_strip_zones; j++) {
  77                 printk(KERN_INFO "zone%d=[", j);
  78                 for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
  79                         printk(KERN_CONT "%s/",
  80                         bdevname(conf->devlist[j*raid_disks
  81                                                 + k]->bdev, b));
  82                 printk(KERN_CONT "]\n");
  83
  84                 zone_size  = conf->strip_zone[j].zone_end - zone_start;
  85                 printk(KERN_INFO "        zone offset=%llukb "
  86                                 "device offset=%llukb size=%llukb\n",
  87                         (unsigned long long)zone_start>>1,
  88                         (unsigned long long)conf->strip_zone[j].dev_start>>1,
  89                         (unsigned long long)zone_size>>1);
  90                 zone_start = conf->strip_zone[j].zone_end;
  91         }
  92         printk(KERN_INFO "**********************************\n\n");
  93 }
  94
  95 static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
  96 {
  97         int i, c, err;
  98         sector_t curr_zone_end, sectors;
  99         mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
 100         struct strip_zone *zone;
 101         int cnt;
 102         char b[BDEVNAME_SIZE];
 103         raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
 104
 105         if (!conf)
 106                 return -ENOMEM;
 107         list_for_each_entry(rdev1, &mddev->disks, same_set) {
 108                 printk(KERN_INFO "md/raid0:%s: looking at %s\n",
 109                        mdname(mddev),
 110                        bdevname(rdev1->bdev, b));
 111                 c = 0;
 112
 113                 /* round size to chunk_size */
 114                 sectors = rdev1->sectors;
 115                 sector_div(sectors, mddev->chunk_sectors);
 116                 rdev1->sectors = sectors * mddev->chunk_sectors;
 117
 118                 list_for_each_entry(rdev2, &mddev->disks, same_set) {
 119                         printk(KERN_INFO "md/raid0:%s:   comparing %s(%llu)",
 120                                mdname(mddev),
 121                                bdevname(rdev1->bdev,b),
 122                                (unsigned long long)rdev1->sectors);
 123                         printk(KERN_CONT " with %s(%llu)\n",
 124                                bdevname(rdev2->bdev,b),
 125                                (unsigned long long)rdev2->sectors);
 126                         if (rdev2 == rdev1) {
 127                                 printk(KERN_INFO "md/raid0:%s:   END\n",
 128                                        mdname(mddev));
 129                                 break;
 130                         }
 131                         if (rdev2->sectors == rdev1->sectors) {
 132                                 /*
 133                                  * Not unique, don't count it as a new
 134                                  * group
 135                                  */
 136                                 printk(KERN_INFO "md/raid0:%s:   EQUAL\n",
 137                                        mdname(mddev));
 138                                 c = 1;
 139                                 break;
 140                         }
 141                         printk(KERN_INFO "md/raid0:%s:   NOT EQUAL\n",
 142                                mdname(mddev));
 143                 }
 144                 if (!c) {
 145                         printk(KERN_INFO "md/raid0:%s:   ==> UNIQUE\n",
 146                                mdname(mddev));
 147                         conf->nr_strip_zones++;
 148                         printk(KERN_INFO "md/raid0:%s: %d zones\n",
 149                                mdname(mddev), conf->nr_strip_zones);
 150                 }
 151         }
 152         printk(KERN_INFO "md/raid0:%s: FINAL %d zones\n",
 153                mdname(mddev), conf->nr_strip_zones);
 154         err = -ENOMEM;
 155         conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
 156                                 conf->nr_strip_zones, GFP_KERNEL);
 157         if (!conf->strip_zone)
 158                 goto abort;
 159         conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
 160                                 conf->nr_strip_zones*mddev->raid_disks,
 161                                 GFP_KERNEL);
 162         if (!conf->devlist)
 163                 goto abort;
 164
 165         /* The first zone must contain all devices, so here we check that
 166          * there is a proper alignment of slots to devices and find them all
 167          */
 168         zone = &conf->strip_zone[0];
 169         cnt = 0;
 170         smallest = NULL;
 171         dev = conf->devlist;
 172         err = -EINVAL;
 173         list_for_each_entry(rdev1, &mddev->disks, same_set) {
 174                 int j = rdev1->raid_disk;
 175
 176                 if (mddev->level == 10) {
 177                         /* taking over a raid10-n2 array */
 178                         j /= 2;
 179                         rdev1->new_raid_disk = j;
 180                 }
 181
 182                 if (mddev->level == 1) {
 183                         /* taiking over a raid1 array-
 184                          * we have only one active disk
 185                          */
 186                         j = 0;
 187                         rdev1->new_raid_disk = j;
 188                 }
 189
 190                 if (j < 0 || j >= mddev->raid_disks) {
 191                         printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
 192                                "aborting!\n", mdname(mddev), j);
 193                         goto abort;
 194                 }
 195                 if (dev[j]) {
 196                         printk(KERN_ERR "md/raid0:%s: multiple devices for %d - "
 197                                "aborting!\n", mdname(mddev), j);
 198                         goto abort;
 199                 }
 200                 dev[j] = rdev1;
 201
 202                 disk_stack_limits(mddev->gendisk, rdev1->bdev,
 203                                   rdev1->data_offset << 9);
 204                 /* as we don't honour merge_bvec_fn, we must never risk
 205                  * violating it, so limit ->max_segments to 1, lying within
 206                  * a single page.
 207                  */
 208
 209                 if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
 210                         blk_queue_max_segments(mddev->queue, 1);
 211                         blk_queue_segment_boundary(mddev->queue,
 212                                                    PAGE_CACHE_SIZE - 1);
 213                 }
 214                 if (!smallest || (rdev1->sectors < smallest->sectors))
 215                         smallest = rdev1;
 216                 cnt++;
 217         }
 218         if (cnt != mddev->raid_disks) {
 219                 printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
 220                        "aborting!\n", mdname(mddev), cnt, mddev->raid_disks);
 221                 goto abort;
 222         }
 223         zone->nb_dev = cnt;
 224         zone->zone_end = smallest->sectors * cnt;
 225
 226         curr_zone_end = zone->zone_end;
 227
 228         /* now do the other zones */
 229         for (i = 1; i < conf->nr_strip_zones; i++)
 230         {
 231                 int j;
 232
 233                 zone = conf->strip_zone + i;
 234                 dev = conf->devlist + i * mddev->raid_disks;
 235
 236                 printk(KERN_INFO "md/raid0:%s: zone %d\n",
 237                        mdname(mddev), i);
 238                 zone->dev_start = smallest->sectors;
 239                 smallest = NULL;
 240                 c = 0;
 241
 242                 for (j=0; j<cnt; j++) {
 243                         rdev = conf->devlist[j];
 244                         printk(KERN_INFO "md/raid0:%s: checking %s ...",
 245                                mdname(mddev),
 246                                bdevname(rdev->bdev, b));
 247                         if (rdev->sectors <= zone->dev_start) {
 248                                 printk(KERN_CONT " nope.\n");
 249                                 continue;
 250                         }
 251                         printk(KERN_CONT " contained as device %d\n", c);
 252                         dev[c] = rdev;
 253                         c++;
 254                         if (!smallest || rdev->sectors < smallest->sectors) {
 255                                 smallest = rdev;
 256                                 printk(KERN_INFO "md/raid0:%s:  (%llu) is smallest!.\n",
 257                                        mdname(mddev),
 258                                        (unsigned long long)rdev->sectors);
 259                         }
 260                 }
 261
 262                 zone->nb_dev = c;
 263                 sectors = (smallest->sectors - zone->dev_start) * c;
 264                 printk(KERN_INFO "md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
 265                        mdname(mddev),
 266                        zone->nb_dev, (unsigned long long)sectors);
 267
 268                 curr_zone_end += sectors;
 269                 zone->zone_end = curr_zone_end;
 270
 271                 printk(KERN_INFO "md/raid0:%s: current zone start: %llu\n",
 272                        mdname(mddev),
 273                        (unsigned long long)smallest->sectors);
 274         }
 275         mddev->queue->unplug_fn = raid0_unplug;
 276         mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 277         mddev->queue->backing_dev_info.congested_data = mddev;
 278
 279         /*
 280          * now since we have the hard sector sizes, we can make sure
 281          * chunk size is a multiple of that sector size
 282          */
 283         if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
 284                 printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
 285                        mdname(mddev),
 286                        mddev->chunk_sectors << 9);
 287                 goto abort;
 288         }
 289
 290         blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
 291         blk_queue_io_opt(mddev->queue,
 292                          (mddev->chunk_sectors << 9) * mddev->raid_disks);
 293
 294         printk(KERN_INFO "md/raid0:%s: done.\n", mdname(mddev));
 295         *private_conf = conf;
 296
 297         return 0;
 298 abort:
 299         kfree(conf->strip_zone);
 300         kfree(conf->devlist);
 301         kfree(conf);
 302         *private_conf = NULL;
 303         return err;
 304 }
 305
 306 /**
 307  *      raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
 308  *      @q: request queue
 309  *      @bvm: properties of new bio
 310  *      @biovec: the request that could be merged to it.
 311  *
 312  *      Return amount of bytes we can accept at this offset
 313  */
 314 static int raid0_mergeable_bvec(struct request_queue *q,
 315                                 struct bvec_merge_data *bvm,
 316                                 struct bio_vec *biovec)
 317 {
 318         mddev_t *mddev = q->queuedata;
 319         sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
 320         int max;
 321         unsigned int chunk_sectors = mddev->chunk_sectors;
 322         unsigned int bio_sectors = bvm->bi_size >> 9;
 323
 324         if (is_power_of_2(chunk_sectors))
 325                 max =  (chunk_sectors - ((sector & (chunk_sectors-1))
 326                                                 + bio_sectors)) << 9;
 327         else
 328                 max =  (chunk_sectors - (sector_div(sector, chunk_sectors)
 329                                                 + bio_sectors)) << 9;
 330         if (max < 0) max = 0; /* bio_add cannot handle a negative return */
 331         if (max <= biovec->bv_len && bio_sectors == 0)
 332                 return biovec->bv_len;
 333         else
 334                 return max;
 335 }
 336
 337 static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
 338 {
 339         sector_t array_sectors = 0;
 340         mdk_rdev_t *rdev;
 341
 342         WARN_ONCE(sectors || raid_disks,
 343                   "%s does not support generic reshape\n", __func__);
 344
 345         list_for_each_entry(rdev, &mddev->disks, same_set)
 346                 array_sectors += rdev->sectors;
 347
 348         return array_sectors;
 349 }
 350
 351 static int raid0_run(mddev_t *mddev)
 352 {
 353         raid0_conf_t *conf;
 354         int ret;
 355
 356         if (mddev->chunk_sectors == 0) {
 357                 printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
 358                        mdname(mddev));
 359                 return -EINVAL;
 360         }
 361         if (md_check_no_bitmap(mddev))
 362                 return -EINVAL;
 363         blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 364         mddev->queue->queue_lock = &mddev->queue->__queue_lock;
 365
 366         /* if private is not null, we are here after takeover */
 367         if (mddev->private == NULL) {
 368                 ret = create_strip_zones(mddev, &conf);
 369                 if (ret < 0)
 370                         return ret;
 371                 mddev->private = conf;
 372         }
 373         conf = mddev->private;
 374
 375         /* calculate array device size */
 376         md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
 377
 378         printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
 379                mdname(mddev),
 380                (unsigned long long)mddev->array_sectors);
 381         /* calculate the max read-ahead size.
 382          * For read-ahead of large files to be effective, we need to
 383          * readahead at least twice a whole stripe. i.e. number of devices
 384          * multiplied by chunk size times 2.
 385          * If an individual device has an ra_pages greater than the
 386          * chunk size, then we will not drive that device as hard as it
 387          * wants.  We consider this a configuration error: a larger
 388          * chunksize should be used in that case.
 389          */
 390         {
 391                 int stripe = mddev->raid_disks *
 392                         (mddev->chunk_sectors << 9) / PAGE_SIZE;
 393                 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
 394                         mddev->queue->backing_dev_info.ra_pages = 2* stripe;
 395         }
 396
 397         blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
 398         dump_zones(mddev);
 399         md_integrity_register(mddev);
 400         return 0;
 401 }
 402
 403 static int raid0_stop(mddev_t *mddev)
 404 {
 405         raid0_conf_t *conf = mddev->private;
 406
 407         blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 408         kfree(conf->strip_zone);
 409         kfree(conf->devlist);
 410         kfree(conf);
 411         mddev->private = NULL;
 412         return 0;
 413 }
 414
 415 /* Find the zone which holds a particular offset
 416  * Update *sectorp to be an offset in that zone
 417  */
 418 static struct strip_zone *find_zone(struct raid0_private_data *conf,
 419                                     sector_t *sectorp)
 420 {
 421         int i;
 422         struct strip_zone *z = conf->strip_zone;
 423         sector_t sector = *sectorp;
 424
 425         for (i = 0; i < conf->nr_strip_zones; i++)
 426                 if (sector < z[i].zone_end) {
 427                         if (i)
 428                                 *sectorp = sector - z[i-1].zone_end;
 429                         return z + i;
 430                 }
 431         BUG();
 432 }
 433
 434 /*
 435  * remaps the bio to the target device. we separate two flows.
 436  * power 2 flow and a general flow for the sake of perfromance
 437 */
 438 static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone,
 439                                 sector_t sector, sector_t *sector_offset)
 440 {
 441         unsigned int sect_in_chunk;
 442         sector_t chunk;
 443         raid0_conf_t *conf = mddev->private;
 444         int raid_disks = conf->strip_zone[0].nb_dev;
 445         unsigned int chunk_sects = mddev->chunk_sectors;
 446
 447         if (is_power_of_2(chunk_sects)) {
 448                 int chunksect_bits = ffz(~chunk_sects);
 449                 /* find the sector offset inside the chunk */
 450                 sect_in_chunk  = sector & (chunk_sects - 1);
 451                 sector >>= chunksect_bits;
 452                 /* chunk in zone */
 453                 chunk = *sector_offset;
 454                 /* quotient is the chunk in real device*/
 455                 sector_div(chunk, zone->nb_dev << chunksect_bits);
 456         } else{
 457                 sect_in_chunk = sector_div(sector, chunk_sects);
 458                 chunk = *sector_offset;
 459                 sector_div(chunk, chunk_sects * zone->nb_dev);
 460         }
 461         /*
 462         *  position the bio over the real device
 463         *  real sector = chunk in device + starting of zone
 464         *       + the position in the chunk
 465         */
 466         *sector_offset = (chunk * chunk_sects) + sect_in_chunk;
 467         return conf->devlist[(zone - conf->strip_zone)*raid_disks
 468                              + sector_div(sector, zone->nb_dev)];
 469 }
 470
 471 /*
 472  * Is io distribute over 1 or more chunks ?
 473 */
 474 static inline int is_io_in_chunk_boundary(mddev_t *mddev,
 475                         unsigned int chunk_sects, struct bio *bio)
 476 {
 477         if (likely(is_power_of_2(chunk_sects))) {
 478                 return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
 479                                         + (bio->bi_size >> 9));
 480         } else{
 481                 sector_t sector = bio->bi_sector;
 482                 return chunk_sects >= (sector_div(sector, chunk_sects)
 483                                                 + (bio->bi_size >> 9));
 484         }
 485 }
 486
 487 static int raid0_make_request(mddev_t *mddev, struct bio *bio)
 488 {
 489         unsigned int chunk_sects;
 490         sector_t sector_offset;
 491         struct strip_zone *zone;
 492         mdk_rdev_t *tmp_dev;
 493
 494         if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 495                 md_flush_request(mddev, bio);
 496                 return 0;
 497         }
 498
 499         chunk_sects = mddev->chunk_sectors;
 500         if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
 501                 sector_t sector = bio->bi_sector;
 502                 struct bio_pair *bp;
 503                 /* Sanity check -- queue functions should prevent this happening */
 504                 if (bio->bi_vcnt != 1 ||
 505                     bio->bi_idx != 0)
 506                         goto bad_map;
 507                 /* This is a one page bio that upper layers
 508                  * refuse to split for us, so we need to split it.
 509                  */
 510                 if (likely(is_power_of_2(chunk_sects)))
 511                         bp = bio_split(bio, chunk_sects - (sector &
 512                                                            (chunk_sects-1)));
 513                 else
 514                         bp = bio_split(bio, chunk_sects -
 515                                        sector_div(sector, chunk_sects));
 516                 if (raid0_make_request(mddev, &bp->bio1))
 517                         generic_make_request(&bp->bio1);
 518                 if (raid0_make_request(mddev, &bp->bio2))
 519                         generic_make_request(&bp->bio2);
 520
 521                 bio_pair_release(bp);
 522                 return 0;
 523         }
 524
 525         sector_offset = bio->bi_sector;
 526         zone =  find_zone(mddev->private, &sector_offset);
 527         tmp_dev = map_sector(mddev, zone, bio->bi_sector,
 528                              &sector_offset);
 529         bio->bi_bdev = tmp_dev->bdev;
 530         bio->bi_sector = sector_offset + zone->dev_start +
 531                 tmp_dev->data_offset;
 532         /*
 533          * Let the main block layer submit the IO and resolve recursion:
 534          */
 535         return 1;
 536
 537 bad_map:
 538         printk("md/raid0:%s: make_request bug: can't convert block across chunks"
 539                " or bigger than %dk %llu %d\n",
 540                mdname(mddev), chunk_sects / 2,
 541                (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 542
 543         bio_io_error(bio);
 544         return 0;
 545 }
 546
 547 static void raid0_status(struct seq_file *seq, mddev_t *mddev)
 548 {
 549 #undef MD_DEBUG
 550 #ifdef MD_DEBUG
 551         int j, k, h;
 552         char b[BDEVNAME_SIZE];
 553         raid0_conf_t *conf = mddev->private;
 554         int raid_disks = conf->strip_zone[0].nb_dev;
 555
 556         sector_t zone_size;
 557         sector_t zone_start = 0;
 558         h = 0;
 559
 560         for (j = 0; j < conf->nr_strip_zones; j++) {
 561                 seq_printf(seq, "      z%d", j);
 562                 seq_printf(seq, "=[");
 563                 for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
 564                         seq_printf(seq, "%s/", bdevname(
 565                                 conf->devlist[j*raid_disks + k]
 566                                                 ->bdev, b));
 567
 568                 zone_size  = conf->strip_zone[j].zone_end - zone_start;
 569                 seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n",
 570                         (unsigned long long)zone_start>>1,
 571                         (unsigned long long)conf->strip_zone[j].dev_start>>1,
 572                         (unsigned long long)zone_size>>1);
 573                 zone_start = conf->strip_zone[j].zone_end;
 574         }
 575 #endif
 576         seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
 577         return;
 578 }
 579
 580 static void *raid0_takeover_raid45(mddev_t *mddev)
 581 {
 582         mdk_rdev_t *rdev;
 583         raid0_conf_t *priv_conf;
 584
 585         if (mddev->degraded != 1) {
 586                 printk(KERN_ERR "md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
 587                        mdname(mddev),
 588                        mddev->degraded);
 589                 return ERR_PTR(-EINVAL);
 590         }
 591
 592         list_for_each_entry(rdev, &mddev->disks, same_set) {
 593                 /* check slot number for a disk */
 594                 if (rdev->raid_disk == mddev->raid_disks-1) {
 595                         printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
 596                                mdname(mddev));
 597                         return ERR_PTR(-EINVAL);
 598                 }
 599         }
 600
 601         /* Set new parameters */
 602         mddev->new_level = 0;
 603         mddev->new_layout = 0;
 604         mddev->new_chunk_sectors = mddev->chunk_sectors;
 605         mddev->raid_disks--;
 606         mddev->delta_disks = -1;
 607         /* make sure it will be not marked as dirty */
 608         mddev->recovery_cp = MaxSector;
 609
 610         create_strip_zones(mddev, &priv_conf);
 611         return priv_conf;
 612 }
 613
 614 static void *raid0_takeover_raid10(mddev_t *mddev)
 615 {
 616         raid0_conf_t *priv_conf;
 617
 618         /* Check layout:
 619          *  - far_copies must be 1
 620          *  - near_copies must be 2
 621          *  - disks number must be even
 622          *  - all mirrors must be already degraded
 623          */
 624         if (mddev->layout != ((1 << 8) + 2)) {
 625                 printk(KERN_ERR "md/raid0:%s:: Raid0 cannot takover layout: 0x%x\n",
 626                        mdname(mddev),
 627                        mddev->layout);
 628                 return ERR_PTR(-EINVAL);
 629         }
 630         if (mddev->raid_disks & 1) {
 631                 printk(KERN_ERR "md/raid0:%s: Raid0 cannot takover Raid10 with odd disk number.\n",
 632                        mdname(mddev));
 633                 return ERR_PTR(-EINVAL);
 634         }
 635         if (mddev->degraded != (mddev->raid_disks>>1)) {
 636                 printk(KERN_ERR "md/raid0:%s: All mirrors must be already degraded!\n",
 637                        mdname(mddev));
 638                 return ERR_PTR(-EINVAL);
 639         }
 640
 641         /* Set new parameters */
 642         mddev->new_level = 0;
 643         mddev->new_layout = 0;
 644         mddev->new_chunk_sectors = mddev->chunk_sectors;
 645         mddev->delta_disks = - mddev->raid_disks / 2;
 646         mddev->raid_disks += mddev->delta_disks;
 647         mddev->degraded = 0;
 648         /* make sure it will be not marked as dirty */
 649         mddev->recovery_cp = MaxSector;
 650
 651         create_strip_zones(mddev, &priv_conf);
 652         return priv_conf;
 653 }
 654
 655 static void *raid0_takeover_raid1(mddev_t *mddev)
 656 {
 657         raid0_conf_t *priv_conf;
 658
 659         /* Check layout:
 660          *  - (N - 1) mirror drives must be already faulty
 661          */
 662         if ((mddev->raid_disks - 1) != mddev->degraded) {
 663                 printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
 664                        mdname(mddev));
 665                 return ERR_PTR(-EINVAL);
 666         }
 667
 668         /* Set new parameters */
 669         mddev->new_level = 0;
 670         mddev->new_layout = 0;
 671         mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
 672         mddev->delta_disks = 1 - mddev->raid_disks;
 673         /* make sure it will be not marked as dirty */
 674         mddev->recovery_cp = MaxSector;
 675
 676         create_strip_zones(mddev, &priv_conf);
 677         return priv_conf;
 678 }
 679
 680 static void *raid0_takeover(mddev_t *mddev)
 681 {
 682         /* raid0 can take over:
 683          *  raid4 - if all data disks are active.
 684          *  raid5 - providing it is Raid4 layout and one disk is faulty
 685          *  raid10 - assuming we have all necessary active disks
 686          *  raid1 - with (N -1) mirror drives faulty
 687          */
 688         if (mddev->level == 4)
 689                 return raid0_takeover_raid45(mddev);
 690
 691         if (mddev->level == 5) {
 692                 if (mddev->layout == ALGORITHM_PARITY_N)
 693                         return raid0_takeover_raid45(mddev);
 694
 695                 printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
 696                        mdname(mddev), ALGORITHM_PARITY_N);
 697         }
 698
 699         if (mddev->level == 10)
 700                 return raid0_takeover_raid10(mddev);
 701
 702         if (mddev->level == 1)
 703                 return raid0_takeover_raid1(mddev);
 704
 705         printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
 706                 mddev->level);
 707
 708         return ERR_PTR(-EINVAL);
 709 }
 710
 711 static void raid0_quiesce(mddev_t *mddev, int state)
 712 {
 713 }
 714
 715 static struct mdk_personality raid0_personality=
 716 {
 717         .name           = "raid0",
 718         .level          = 0,
 719         .owner          = THIS_MODULE,
 720         .make_request   = raid0_make_request,
 721         .run            = raid0_run,
 722         .stop           = raid0_stop,
 723         .status         = raid0_status,
 724         .size           = raid0_size,
 725         .takeover       = raid0_takeover,
 726         .quiesce        = raid0_quiesce,
 727 };
 728
 729 static int __init raid0_init (void)
 730 {
 731         return register_md_personality (&raid0_personality);
 732 }
 733
 734 static void raid0_exit (void)
 735 {
 736         unregister_md_personality (&raid0_personality);
 737 }
 738
 739 module_init(raid0_init);
 740 module_exit(raid0_exit);
 741 MODULE_LICENSE("GPL");
 742 MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
 743 MODULE_ALIAS("md-personality-2"); /* RAID0 */
 744 MODULE_ALIAS("md-raid0");
 745 MODULE_ALIAS("md-level-0");