TOR: fix compilation
[tomato.git] / release / src / router / mdadm / Grow.c
blobc160f2c6251306dda602c4fec87c5bb5a71f4767
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
29 #include "mdadm.h"
30 #include "dlink.h"
32 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
33 #error no endian defined
34 #endif
35 #include "md_u.h"
36 #include "md_p.h"
38 int Grow_Add_device(char *devname, int fd, char *newdev)
40 /* Add a device to an active array.
41 * Currently, just extend a linear array.
42 * This requires writing a new superblock on the
43 * new device, calling the kernel to add the device,
44 * and if that succeeds, update the superblock on
45 * all other devices.
46 * This means that we need to *find* all other devices.
48 struct mdinfo info;
50 void *super = NULL;
51 struct stat stb;
52 int nfd, fd2;
53 int d, nd;
54 struct supertype *st = NULL;
57 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
58 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
59 return 1;
62 st = super_by_version(info.array.major_version, info.array.minor_version);
63 if (!st) {
64 fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
65 return 1;
68 if (info.array.level != -1) {
69 fprintf(stderr, Name ": can only add devices to linear arrays\n");
70 return 1;
73 nfd = open(newdev, O_RDWR|O_EXCL);
74 if (nfd < 0) {
75 fprintf(stderr, Name ": cannot open %s\n", newdev);
76 return 1;
78 fstat(nfd, &stb);
79 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
80 fprintf(stderr, Name ": %s is not a block device!\n", newdev);
81 close(nfd);
82 return 1;
84 /* now check out all the devices and make sure we can read the superblock */
85 for (d=0 ; d < info.array.raid_disks ; d++) {
86 mdu_disk_info_t disk;
87 char *dv;
89 disk.number = d;
90 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
91 fprintf(stderr, Name ": cannot get device detail for device %d\n",
92 d);
93 return 1;
95 dv = map_dev(disk.major, disk.minor, 1);
96 if (!dv) {
97 fprintf(stderr, Name ": cannot find device file for device %d\n",
98 d);
99 return 1;
101 fd2 = dev_open(dv, O_RDWR);
102 if (!fd2) {
103 fprintf(stderr, Name ": cannot open device file %s\n", dv);
104 return 1;
106 if (super) free(super);
107 super= NULL;
108 if (st->ss->load_super(st, fd2, &super, NULL)) {
109 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
110 close(fd2);
111 return 1;
113 close(fd2);
115 /* Ok, looks good. Lets update the superblock and write it out to
116 * newdev.
119 info.disk.number = d;
120 info.disk.major = major(stb.st_rdev);
121 info.disk.minor = minor(stb.st_rdev);
122 info.disk.raid_disk = d;
123 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
124 st->ss->update_super(&info, super, "grow", newdev, 0, 0, NULL);
126 if (st->ss->store_super(st, nfd, super)) {
127 fprintf(stderr, Name ": Cannot store new superblock on %s\n", newdev);
128 close(nfd);
129 return 1;
131 close(nfd);
133 if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
134 fprintf(stderr, Name ": Cannot add new disk to this array\n");
135 return 1;
137 /* Well, that seems to have worked.
138 * Now go through and update all superblocks
141 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
142 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
143 return 1;
146 nd = d;
147 for (d=0 ; d < info.array.raid_disks ; d++) {
148 mdu_disk_info_t disk;
149 char *dv;
151 disk.number = d;
152 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
153 fprintf(stderr, Name ": cannot get device detail for device %d\n",
155 return 1;
157 dv = map_dev(disk.major, disk.minor, 1);
158 if (!dv) {
159 fprintf(stderr, Name ": cannot find device file for device %d\n",
161 return 1;
163 fd2 = dev_open(dv, O_RDWR);
164 if (fd2 < 0) {
165 fprintf(stderr, Name ": cannot open device file %s\n", dv);
166 return 1;
168 if (st->ss->load_super(st, fd2, &super, NULL)) {
169 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
170 close(fd);
171 return 1;
173 info.array.raid_disks = nd+1;
174 info.array.nr_disks = nd+1;
175 info.array.active_disks = nd+1;
176 info.array.working_disks = nd+1;
177 info.disk.number = nd;
178 info.disk.major = major(stb.st_rdev);
179 info.disk.minor = minor(stb.st_rdev);
180 info.disk.raid_disk = nd;
181 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
182 st->ss->update_super(&info, super, "grow", dv, 0, 0, NULL);
184 if (st->ss->store_super(st, fd2, super)) {
185 fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
186 close(fd2);
187 return 1;
189 close(fd2);
192 return 0;
195 int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force)
198 * First check that array doesn't have a bitmap
199 * Then create the bitmap
200 * Then add it
202 * For internal bitmaps, we need to check the version,
203 * find all the active devices, and write the bitmap block
204 * to all devices
206 mdu_bitmap_file_t bmf;
207 mdu_array_info_t array;
208 struct supertype *st;
209 int major = BITMAP_MAJOR_HI;
210 int vers = md_get_version(fd);
211 unsigned long long bitmapsize, array_size;
213 if (vers < 9003) {
214 major = BITMAP_MAJOR_HOSTENDIAN;
215 #ifdef __BIG_ENDIAN
216 fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
217 " between different architectured. Consider upgrading the Linux kernel.\n");
218 #endif
221 if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
222 if (errno == ENOMEM)
223 fprintf(stderr, Name ": Memory allocation failure.\n");
224 else
225 fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
226 return 1;
228 if (bmf.pathname[0]) {
229 if (strcmp(file,"none")==0) {
230 if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
231 fprintf(stderr, Name ": failed to remove bitmap %s\n",
232 bmf.pathname);
233 return 1;
235 return 0;
237 fprintf(stderr, Name ": %s already has a bitmap (%s)\n",
238 devname, bmf.pathname);
239 return 1;
241 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
242 fprintf(stderr, Name ": cannot get array status for %s\n", devname);
243 return 1;
245 if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
246 if (strcmp(file, "none")==0) {
247 array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
248 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
249 fprintf(stderr, Name ": failed to remove internal bitmap.\n");
250 return 1;
252 return 0;
254 fprintf(stderr, Name ": Internal bitmap already present on %s\n",
255 devname);
256 return 1;
258 bitmapsize = array.size;
259 bitmapsize <<= 1;
260 if (get_dev_size(fd, NULL, &array_size) &&
261 array_size > (0x7fffffffULL<<9)) {
262 /* Array is big enough that we cannot trust array.size
263 * try other approaches
265 bitmapsize = get_component_size(fd);
267 if (bitmapsize == 0) {
268 fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
269 return 1;
272 if (array.level == 10) {
273 int ncopies = (array.layout&255)*((array.layout>>8)&255);
274 bitmapsize = bitmapsize * array.raid_disks / ncopies;
277 st = super_by_version(array.major_version, array.minor_version);
278 if (!st) {
279 fprintf(stderr, Name ": Cannot understand version %d.%d\n",
280 array.major_version, array.minor_version);
281 return 1;
283 if (strcmp(file, "none") == 0) {
284 fprintf(stderr, Name ": no bitmap found on %s\n", devname);
285 return 1;
286 } else if (strcmp(file, "internal") == 0) {
287 int d;
288 for (d=0; d< st->max_devs; d++) {
289 mdu_disk_info_t disk;
290 char *dv;
291 disk.number = d;
292 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
293 continue;
294 if (disk.major == 0 &&
295 disk.minor == 0)
296 continue;
297 if ((disk.state & (1<<MD_DISK_SYNC))==0)
298 continue;
299 dv = map_dev(disk.major, disk.minor, 1);
300 if (dv) {
301 void *super;
302 int fd2 = dev_open(dv, O_RDWR);
303 if (fd2 < 0)
304 continue;
305 if (st->ss->load_super(st, fd2, &super, NULL)==0) {
306 if (st->ss->add_internal_bitmap(
307 st, super,
308 &chunk, delay, write_behind,
309 bitmapsize, 0, major)
311 st->ss->write_bitmap(st, fd2, super);
312 else {
313 fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
314 close(fd2);
315 return 1;
318 close(fd2);
321 array.state |= (1<<MD_SB_BITMAP_PRESENT);
322 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
323 fprintf(stderr, Name ": failed to set internal bitmap.\n");
324 return 1;
326 } else {
327 int uuid[4];
328 int bitmap_fd;
329 int d;
330 int max_devs = st->max_devs;
331 void *super = NULL;
333 /* try to load a superblock */
334 for (d=0; d<max_devs; d++) {
335 mdu_disk_info_t disk;
336 char *dv;
337 int fd2;
338 disk.number = d;
339 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
340 continue;
341 if ((disk.major==0 && disk.minor==0) ||
342 (disk.state & (1<<MD_DISK_REMOVED)))
343 continue;
344 dv = map_dev(disk.major, disk.minor, 1);
345 if (!dv) continue;
346 fd2 = dev_open(dv, O_RDONLY);
347 if (fd2 >= 0 &&
348 st->ss->load_super(st, fd2, &super, NULL) == 0) {
349 close(fd2);
350 st->ss->uuid_from_super(uuid, super);
351 break;
353 close(fd2);
355 if (d == max_devs) {
356 fprintf(stderr, Name ": cannot find UUID for array!\n");
357 return 1;
359 if (CreateBitmap(file, force, (char*)uuid, chunk,
360 delay, write_behind, bitmapsize, major)) {
361 return 1;
363 bitmap_fd = open(file, O_RDWR);
364 if (bitmap_fd < 0) {
365 fprintf(stderr, Name ": weird: %s cannot be opened\n",
366 file);
367 return 1;
369 if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
370 fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
371 devname, strerror(errno));
372 return 1;
376 return 0;
381 * When reshaping an array we might need to backup some data.
382 * This is written to all spares with a 'super_block' describing it.
383 * The superblock goes 1K form the end of the used space on the
384 * device.
385 * It if written after the backup is complete.
386 * It has the following structure.
389 struct mdp_backup_super {
390 char magic[16]; /* md_backup_data-1 */
391 __u8 set_uuid[16];
392 __u64 mtime;
393 /* start/sizes in 512byte sectors */
394 __u64 devstart;
395 __u64 arraystart;
396 __u64 length;
397 __u32 sb_csum; /* csum of preceeding bytes. */
400 int bsb_csum(char *buf, int len)
402 int i;
403 int csum = 0;
404 for (i=0; i<len; i++)
405 csum = (csum<<3) + buf[0];
406 return __cpu_to_le32(csum);
409 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
410 long long size,
411 int level, int layout, int chunksize, int raid_disks)
413 /* Make some changes in the shape of an array.
414 * The kernel must support the change.
415 * Different reshapes have subtly different meaning for different
416 * levels, so we need to check the current state of the array
417 * and go from there.
419 struct mdu_array_info_s array;
420 char *c;
422 struct mdp_backup_super bsb;
423 struct supertype *st;
425 int nlevel, olevel;
426 int nchunk, ochunk;
427 int nlayout, olayout;
428 int ndisks, odisks;
429 int ndata, odata;
430 unsigned long long nstripe, ostripe, last_block;
431 int *fdlist;
432 unsigned long long *offsets;
433 int d, i, spares;
434 int nrdisks;
435 int err;
436 void *super = NULL;
438 struct sysarray *sra;
439 struct sysdev *sd;
441 if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
442 fprintf(stderr, Name ": %s is not an active md array - aborting\n",
443 devname);
444 return 1;
446 c = map_num(pers, array.level);
447 if (c == NULL) c = "-unknown-";
448 switch(array.level) {
449 default: /* raid0, linear, multipath cannot be reconfigured */
450 fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
451 c, devname);
452 return 1;
454 case LEVEL_FAULTY: /* only 'layout' change is permitted */
456 if (size >= 0) {
457 fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
458 devname);
459 return 1;
461 if (level != UnSet && level != LEVEL_FAULTY) {
462 fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
463 devname);
464 return 1;
466 if (chunksize || raid_disks) {
467 fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
468 devname);
469 return 1;
471 if (layout == UnSet)
472 return 0; /* nothing to do.... */
474 array.layout = layout;
475 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
476 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
477 devname, strerror(errno));
478 return 1;
480 if (!quiet)
481 printf("layout for %s set to %d\n", devname, array.layout);
482 return 0;
484 case 1: /* raid_disks and size can each be changed. They are independant */
486 if (level != UnSet && level != 1) {
487 fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
488 devname);
489 return 1;
491 if (chunksize || layout != UnSet) {
492 fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
493 devname);
494 return 1;
497 /* Each can trigger a resync/recovery which will block the
498 * other from happening. Later we could block
499 * resync for the duration via 'sync_action'...
501 if (raid_disks > 0) {
502 array.raid_disks = raid_disks;
503 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
504 fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
505 devname, strerror(errno));
506 return 1;
509 if (size >= 0) {
510 array.size = size;
511 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
512 fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
513 devname, strerror(errno));
514 return 1;
517 return 0;
519 case 4:
520 case 5:
521 case 6:
522 st = super_by_version(array.major_version,
523 array.minor_version);
524 /* size can be changed independently.
525 * layout/chunksize/raid_disks/level can be changed
526 * though the kernel may not support it all.
527 * If 'suspend_lo' is not present in devfs, then
528 * these cannot be changed.
530 if (size >= 0) {
531 /* Cannot change other details as well.. */
532 if (layout != UnSet ||
533 chunksize != 0 ||
534 raid_disks != 0 ||
535 level != UnSet) {
536 fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
537 devname, c);
538 return 1;
540 array.size = size;
541 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
542 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
543 devname, strerror(errno));
544 return 1;
546 return 0;
548 /* Ok, just change the shape. This can be awkward.
549 * There are three possibilities.
550 * 1/ The array will shrink. We don't support this
551 * possibility. Maybe one day...
552 * 2/ The array will not change size. This is easy enough
553 * to do, but not reliably. If the process is aborted
554 * the array *will* be corrupted. So maybe we can allow
555 * this but only if the user is really certain. e.g.
556 * --really-risk-everything
557 * 3/ The array will grow. This can be reliably achieved.
558 * However the kernel's restripe routines will cheerfully
559 * overwrite some early data before it is safe. So we
560 * need to make a backup of the early parts of the array
561 * and be ready to restore it if rebuild aborts very early.
563 * We backup data by writing it to all spares (there must be
564 * at least 1, so even raid6->raid5 requires a spare to be
565 * present).
567 * So: we enumerate the devices in the array and
568 * make sure we can open all of them.
569 * Then we freeze the early part of the array and
570 * backup to the various spares.
571 * Then we request changes and start the reshape.
572 * Monitor progress until it has passed the danger zone.
573 * and finally invalidate the copied data and unfreeze the
574 * start of the array.
576 * Before we can do this we need to decide:
577 * - will the array grow? Just calculate size
578 * - how much needs to be saved: count stripes.
579 * - where to save data... good question.
582 nlevel = olevel = array.level;
583 nchunk = ochunk = array.chunk_size;
584 nlayout = olayout = array.layout;
585 ndisks = odisks = array.raid_disks;
587 if (level != UnSet) nlevel = level;
588 if (chunksize) nchunk = chunksize;
589 if (layout != UnSet) nlayout = layout;
590 if (raid_disks) ndisks = raid_disks;
592 odata = odisks-1;
593 if (olevel == 6) odata--; /* number of data disks */
594 ndata = ndisks-1;
595 if (nlevel == 6) ndata--;
597 if (ndata < odata) {
598 fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
599 devname);
600 return 1;
602 if (ndata == odata) {
603 fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
604 devname);
605 return 1;
607 /* Well, it is growing... so how much do we need to backup.
608 * Need to backup a full number of new-stripes, such that the
609 * last one does not over-write any place that it would be read
610 * from
612 nstripe = ostripe = 0;
613 while (nstripe >= ostripe) {
614 nstripe += nchunk/512;
615 last_block = nstripe * ndata;
616 ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
618 printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
620 sra = sysfs_read(fd, 0,
621 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
622 GET_CACHE);
623 if (!sra) {
624 fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
625 devname);
626 return 1;
629 if (last_block >= sra->component_size/2) {
630 fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
631 devname);
632 return 1;
634 if (sra->spares == 0 && backup_file == NULL) {
635 fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
636 devname);
637 return 1;
640 nrdisks = array.nr_disks + sra->spares;
641 /* Now we need to open all these devices so we can read/write.
643 fdlist = malloc((1+nrdisks) * sizeof(int));
644 offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
645 if (!fdlist || !offsets) {
646 fprintf(stderr, Name ": malloc failed: grow aborted\n");
647 return 1;
649 for (d=0; d <= nrdisks; d++)
650 fdlist[d] = -1;
651 d = array.raid_disks;
652 for (sd = sra->devs; sd; sd=sd->next) {
653 if (sd->state & (1<<MD_DISK_FAULTY))
654 continue;
655 if (sd->state & (1<<MD_DISK_SYNC)) {
656 char *dn = map_dev(sd->major, sd->minor, 1);
657 fdlist[sd->role] = dev_open(dn, O_RDONLY);
658 offsets[sd->role] = sd->offset;
659 if (fdlist[sd->role] < 0) {
660 fprintf(stderr, Name ": %s: cannot open component %s\n",
661 devname, dn?dn:"-unknown-");
662 goto abort;
664 } else {
665 /* spare */
666 char *dn = map_dev(sd->major, sd->minor, 1);
667 fdlist[d] = dev_open(dn, O_RDWR);
668 offsets[d] = sd->offset;
669 if (fdlist[d]<0) {
670 fprintf(stderr, Name ": %s: cannot open component %s\n",
671 devname, dn?dn:"-unknown");
672 goto abort;
674 d++;
677 for (i=0 ; i<array.raid_disks; i++)
678 if (fdlist[i] < 0) {
679 fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
680 " --grow aborted\n", devname, i);
681 goto abort;
683 spares = sra->spares;
684 if (backup_file) {
685 fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600);
686 if (fdlist[d] < 0) {
687 fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
688 devname, backup_file, strerror(errno));
689 goto abort;
691 offsets[d] = 8;
692 d++;
693 spares++;
695 if (fdlist[array.raid_disks] < 0) {
696 fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
697 devname);
698 goto abort;
701 /* Find a superblock */
702 if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
703 fprintf(stderr, Name ": %s: Cannot find a superblock\n",
704 devname);
705 goto abort;
709 memcpy(bsb.magic, "md_backup_data-1", 16);
710 st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
711 bsb.mtime = __cpu_to_le64(time(0));
712 bsb.arraystart = 0;
713 bsb.length = __cpu_to_le64(last_block);
715 /* Decide offset for the backup, llseek the spares, and write
716 * a leading superblock 4K earlier.
718 for (i=array.raid_disks; i<d; i++) {
719 char buf[4096];
720 if (i==d-1 && backup_file) {
721 /* This is the backup file */
722 offsets[i] = 8;
723 } else
724 offsets[i] += sra->component_size - last_block - 8;
725 if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
726 != (offsets[i]<<9) - 4096) {
727 fprintf(stderr, Name ": could not seek...\n");
728 goto abort;
730 memset(buf, 0, sizeof(buf));
731 bsb.devstart = __cpu_to_le64(offsets[i]);
732 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
733 memcpy(buf, &bsb, sizeof(bsb));
734 if (write(fdlist[i], buf, 4096) != 4096) {
735 fprintf(stderr, Name ": could not write leading superblock\n");
736 goto abort;
739 array.level = nlevel;
740 array.raid_disks = ndisks;
741 array.chunk_size = nchunk;
742 array.layout = nlayout;
743 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
744 if (errno == ENOSPC) {
745 /* stripe cache is not big enough.
746 * It needs to be 4 times chunksize_size,
747 * and we assume pagesize is 4K
749 if (sra->cache_size < 4 * (nchunk/4096)) {
750 sysfs_set_num(sra, NULL,
751 "stripe_cache_size",
752 4 * (nchunk/4096) +1);
753 if (ioctl(fd, SET_ARRAY_INFO,
754 &array) == 0)
755 goto ok;
758 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
759 devname, strerror(errno));
760 goto abort;
762 ok: ;
764 /* suspend the relevant region */
765 sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
766 if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
767 sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
768 fprintf(stderr, Name ": %s: failed to suspend device.\n",
769 devname);
770 goto abort_resume;
774 err = save_stripes(fdlist, offsets,
775 odisks, ochunk, olevel, olayout,
776 spares, fdlist+odisks,
777 0ULL, last_block*512);
779 /* abort if there was an error */
780 if (err < 0) {
781 fprintf(stderr, Name ": %s: failed to save critical region\n",
782 devname);
783 goto abort_resume;
786 for (i=odisks; i<d ; i++) {
787 bsb.devstart = __cpu_to_le64(offsets[i]);
788 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
789 if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
790 write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
791 fsync(fdlist[i]) != 0) {
792 fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
793 devname);
794 goto abort_resume;
798 /* start the reshape happening */
799 if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
800 fprintf(stderr, Name ": %s: failed to initiate reshape\n",
801 devname);
802 goto abort_resume;
804 /* wait for reshape to pass the critical region */
805 while(1) {
806 unsigned long long comp;
807 if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
808 sleep(5);
809 break;
811 if (comp >= nstripe)
812 break;
813 sleep(1);
816 /* invalidate superblocks */
817 memset(&bsb, 0, sizeof(bsb));
818 for (i=odisks; i<d ; i++) {
819 lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
820 if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
821 fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
822 devname, i);
826 /* unsuspend. */
827 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
829 for (i=0; i<d; i++)
830 if (fdlist[i] >= 0)
831 close(fdlist[i]);
832 free(fdlist);
833 free(offsets);
834 if (backup_file)
835 unlink(backup_file);
837 printf(Name ": ... critical section passed.\n");
838 break;
840 return 0;
843 abort_resume:
844 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
845 abort:
846 for (i=0; i<array.nr_disks; i++)
847 if (fdlist[i] >= 0)
848 close(fdlist[i]);
849 free(fdlist);
850 free(offsets);
851 if (backup_file)
852 unlink(backup_file);
853 return 1;
858 * If any spare contains md_back_data-1 which is recent wrt mtime,
859 * write that data into the array and update the super blocks with
860 * the new reshape_progress
862 int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
864 int i, j;
865 int old_disks;
866 int err = 0;
867 unsigned long long *offsets;
869 if (info->delta_disks < 0)
870 return 1; /* cannot handle a shrink */
871 if (info->new_level != info->array.level ||
872 info->new_layout != info->array.layout ||
873 info->new_chunk != info->array.chunk_size)
874 return 1; /* Can only handle change in disks */
876 old_disks = info->array.raid_disks - info->delta_disks;
878 for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
879 void *super = NULL;
880 struct mdinfo dinfo;
881 struct mdp_backup_super bsb;
882 char buf[4096];
883 int fd;
885 /* This was a spare and may have some saved data on it.
886 * Load the superblock, find and load the
887 * backup_super_block.
888 * If either fail, go on to next device.
889 * If the backup contains no new info, just return
890 * else restore data and update all superblocks
892 if (i == old_disks-1) {
893 fd = open(backup_file, O_RDONLY);
894 if (fd<0)
895 continue;
896 } else {
897 fd = fdlist[i];
898 if (fd < 0)
899 continue;
900 if (st->ss->load_super(st, fd, &super, NULL))
901 continue;
903 st->ss->getinfo_super(&dinfo, super);
904 free(super); super = NULL;
905 if (lseek64(fd,
906 (dinfo.data_offset + dinfo.component_size - 8) <<9,
907 0) < 0)
908 continue; /* Cannot seek */
910 if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
911 continue; /* Cannot read */
912 if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
913 continue;
914 if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
915 continue; /* bad checksum */
916 if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
917 continue; /* Wrong uuid */
919 if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
920 info->array.utime < __le64_to_cpu(bsb.mtime))
921 continue; /* time stamp is too bad */
923 if (__le64_to_cpu(bsb.arraystart) != 0)
924 continue; /* Can only handle backup from start of array */
925 if (__le64_to_cpu(bsb.length) <
926 info->reshape_progress)
927 continue; /* No new data here */
929 if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
930 continue; /* Cannot seek */
931 /* There should be a duplicate backup superblock 4k before here */
932 if (lseek64(fd, -4096, 1) < 0 ||
933 read(fd, buf, 4096) != 4096 ||
934 memcmp(buf, &bsb, sizeof(bsb)) != 0)
935 continue; /* Cannot find leading superblock */
937 /* Now need the data offsets for all devices. */
938 offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
939 for(j=0; j<info->array.raid_disks; j++) {
940 if (fdlist[j] < 0)
941 continue;
942 if (st->ss->load_super(st, fdlist[j], &super, NULL))
943 /* FIXME should be this be an error */
944 continue;
945 st->ss->getinfo_super(&dinfo, super);
946 free(super); super = NULL;
947 offsets[j] = dinfo.data_offset;
949 printf(Name ": restoring critical section\n");
951 if (restore_stripes(fdlist, offsets,
952 info->array.raid_disks,
953 info->new_chunk,
954 info->new_level,
955 info->new_layout,
956 fd, __le64_to_cpu(bsb.devstart)*512,
957 0, __le64_to_cpu(bsb.length)*512)) {
958 /* didn't succeed, so giveup */
959 return -1;
962 /* Ok, so the data is restored. Let's update those superblocks. */
964 for (j=0; j<info->array.raid_disks; j++) {
965 if (fdlist[j] < 0) continue;
966 if (st->ss->load_super(st, fdlist[j], &super, NULL))
967 continue;
968 st->ss->getinfo_super(&dinfo, super);
969 dinfo.reshape_progress = __le64_to_cpu(bsb.length);
970 st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0, 0, NULL);
971 st->ss->store_super(st, fdlist[j], super);
972 free(super);
975 /* And we are done! */
976 return 0;
978 return err;