TOR: fix compilation
[tomato.git] / release / src / router / mdadm / Incremental.c
blob705c402cb9cc90dae4a22a74ff979efe0d397764
1 /*
2 * Incremental.c - support --incremental. Part of:
3 * mdadm - manage Linux "md" devices aka RAID arrays.
5 * Copyright (C) 2006 Neil Brown <neilb@suse.de>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
24 * Paper: Neil Brown
25 * Novell Inc
26 * GPO Box Q1283
27 * QVB Post Office, NSW 1230
28 * Australia
31 #include "mdadm.h"
33 static int count_active(struct supertype *st, int mdfd, char **availp,
34 struct mdinfo *info);
35 static void find_reject(int mdfd, struct supertype *st, struct sysarray *sra,
36 int number, __u64 events, int verbose,
37 char *array_name);
39 int Incremental(char *devname, int verbose, int runstop,
40 struct supertype *st, char *homehost, int autof)
42 /* Add this device to an array, creating the array if necessary
43 * and starting the array if sensibe or - if runstop>0 - if possible.
45 * This has several steps:
47 * 1/ Check if device is permitted by mdadm.conf, reject if not.
48 * 2/ Find metadata, reject if none appropriate (check
49 * version/name from args)
50 * 3/ Check if there is a match in mdadm.conf
51 * 3a/ if not, check for homehost match. If no match, reject.
52 * 4/ Determine device number.
53 * - If in mdadm.conf with std name, use that
54 * - UUID in /var/run/mdadm.map use that
55 * - If name is suggestive, use that. unless in use with different uuid.
56 * - Choose a free, high number.
57 * - Use a partitioned device unless strong suggestion not to.
58 * e.g. auto=md
59 * 5/ Find out if array already exists
60 * 5a/ if it does not
61 * - choose a name, from mdadm.conf or 'name' field in array.
62 * - create the array
63 * - add the device
64 * 5b/ if it does
65 * - check one drive in array to make sure metadata is a reasonably
66 * close match. Reject if not (e.g. different type)
67 * - add the device
68 * 6/ Make sure /var/run/mdadm.map contains this array.
69 * 7/ Is there enough devices to possibly start the array?
70 * 7a/ if not, finish with success.
71 * 7b/ if yes,
72 * - read all metadata and arrange devices like -A does
73 * - if number of OK devices match expected, or -R and there are enough,
74 * start the array (auto-readonly).
76 struct stat stb;
77 void *super, *super2;
78 struct mdinfo info, info2;
79 struct mddev_ident_s *array_list, *match;
80 char chosen_name[1024];
81 int rv;
82 int devnum;
83 struct map_ent *mp, *map = NULL;
84 int dfd, mdfd;
85 char *avail;
86 int active_disks;
89 struct createinfo *ci = conf_get_create_info();
91 if (autof == 0)
92 autof = ci->autof;
94 /* 1/ Check if devices is permitted by mdadm.conf */
96 if (!conf_test_dev(devname)) {
97 if (verbose >= 0)
98 fprintf(stderr, Name
99 ": %s not permitted by mdadm.conf.\n",
100 devname);
101 return 1;
104 /* 2/ Find metadata, reject if none appropriate (check
105 * version/name from args) */
107 dfd = dev_open(devname, O_RDONLY|O_EXCL);
108 if (dfd < 0) {
109 if (verbose >= 0)
110 fprintf(stderr, Name ": cannot open %s: %s.\n",
111 devname, strerror(errno));
112 return 1;
114 if (fstat(dfd, &stb) < 0) {
115 if (verbose >= 0)
116 fprintf(stderr, Name ": fstat failed for %s: %s.\n",
117 devname, strerror(errno));
118 close(dfd);
119 return 1;
121 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
122 if (verbose >= 0)
123 fprintf(stderr, Name ": %s is not a block device.\n",
124 devname);
125 close(dfd);
126 return 1;
129 if (st == NULL && (st = guess_super(dfd)) == NULL) {
130 if (verbose >= 0)
131 fprintf(stderr, Name
132 ": no recognisable superblock on %s.\n",
133 devname);
134 close(dfd);
135 return 1;
137 if (st->ss->load_super(st, dfd, &super, NULL)) {
138 if (verbose >= 0)
139 fprintf(stderr, Name ": no RAID superblock on %s.\n",
140 devname);
141 close(dfd);
142 return 1;
144 st->ss->getinfo_super(&info, super);
145 close (dfd);
147 /* 3/ Check if there is a match in mdadm.conf */
149 array_list = conf_get_ident(NULL);
150 match = NULL;
151 for (; array_list; array_list = array_list->next) {
152 if (array_list->uuid_set &&
153 same_uuid(array_list->uuid, info.uuid, st->ss->swapuuid)
154 == 0) {
155 if (verbose >= 2)
156 fprintf(stderr, Name
157 ": UUID differs from %s.\n",
158 array_list->devname);
159 continue;
161 if (array_list->name[0] &&
162 strcasecmp(array_list->name, info.name) != 0) {
163 if (verbose >= 2)
164 fprintf(stderr, Name
165 ": Name differs from %s.\n",
166 array_list->devname);
167 continue;
169 if (array_list->devices &&
170 !match_oneof(array_list->devices, devname)) {
171 if (verbose >= 2)
172 fprintf(stderr, Name
173 ": Not a listed device for %s.\n",
174 array_list->devname);
175 continue;
177 if (array_list->super_minor != UnSet &&
178 array_list->super_minor != info.array.md_minor) {
179 if (verbose >= 2)
180 fprintf(stderr, Name
181 ": Different super-minor to %s.\n",
182 array_list->devname);
183 continue;
185 if (!array_list->uuid_set &&
186 !array_list->name[0] &&
187 !array_list->devices &&
188 array_list->super_minor == UnSet) {
189 if (verbose >= 2)
190 fprintf(stderr, Name
191 ": %s doesn't have any identifying information.\n",
192 array_list->devname);
193 continue;
195 /* FIXME, should I check raid_disks and level too?? */
197 if (match) {
198 if (verbose >= 0)
199 fprintf(stderr, Name
200 ": we match both %s and %s - cannot decide which to use.\n",
201 match->devname, array_list->devname);
202 return 2;
204 match = array_list;
207 /* 3a/ if not, check for homehost match. If no match, reject. */
208 if (!match) {
209 if (homehost == NULL ||
210 st->ss->match_home(super, homehost) == 0) {
211 if (verbose >= 0)
212 fprintf(stderr, Name
213 ": not found in mdadm.conf and not identified by homehost.\n");
214 return 2;
217 /* 4/ Determine device number. */
218 /* - If in mdadm.conf with std name, use that */
219 /* - UUID in /var/run/mdadm.map use that */
220 /* - If name is suggestive, use that. unless in use with */
221 /* different uuid. */
222 /* - Choose a free, high number. */
223 /* - Use a partitioned device unless strong suggestion not to. */
224 /* e.g. auto=md */
225 if (match && is_standard(match->devname, &devnum))
226 /* We have devnum now */;
227 else if ((mp = map_by_uuid(&map, info.uuid)) != NULL)
228 devnum = mp->devnum;
229 else {
230 /* Have to guess a bit. */
231 int use_partitions = 1;
232 char *np, *ep;
233 if ((autof&7) == 3 || (autof&7) == 5)
234 use_partitions = 0;
235 np = strchr(info.name, ':');
236 if (np)
237 np++;
238 else
239 np = info.name;
240 devnum = strtoul(np, &ep, 10);
241 if (ep > np && *ep == 0) {
242 /* This is a number. Let check that it is unused. */
243 if (mddev_busy(use_partitions ? (-1-devnum) : devnum))
244 devnum = -1;
245 } else
246 devnum = -1;
248 if (devnum < 0) {
249 /* Haven't found anything yet, choose something free */
250 /* There is similar code in mdopen.c - should unify */
251 for (devnum = 127 ; devnum != 128 ;
252 devnum = devnum ? devnum-1 : (1<<22)-1) {
253 if (mddev_busy(use_partitions ?
254 (-1-devnum) : devnum))
255 break;
257 if (devnum == 128) {
258 fprintf(stderr, Name
259 ": No spare md devices!!\n");
260 return 2;
263 devnum = use_partitions ? (-1-devnum) : devnum;
265 mdfd = open_mddev_devnum(match ? match->devname : NULL,
266 devnum,
267 info.name,
268 chosen_name);
269 if (mdfd < 0) {
270 fprintf(stderr, Name ": failed to open %s: %s.\n",
271 chosen_name, strerror(errno));
272 return 2;
274 /* 5/ Find out if array already exists */
275 if (! mddev_busy(devnum)) {
276 /* 5a/ if it does not */
277 /* - choose a name, from mdadm.conf or 'name' field in array. */
278 /* - create the array */
279 /* - add the device */
280 mdu_array_info_t ainf;
281 mdu_disk_info_t disk;
282 char md[20];
283 struct sysarray *sra;
285 memset(&ainf, 0, sizeof(ainf));
286 ainf.major_version = st->ss->major;
287 ainf.minor_version = st->minor_version;
288 if (ioctl(mdfd, SET_ARRAY_INFO, &ainf) != 0) {
289 fprintf(stderr, Name
290 ": SET_ARRAY_INFO failed for %s: %s\b",
291 chosen_name, strerror(errno));
292 close(mdfd);
293 return 2;
295 sprintf(md, "%d.%d\n", st->ss->major, st->minor_version);
296 sra = sysfs_read(mdfd, devnum, GET_VERSION);
297 sysfs_set_str(sra, NULL, "metadata_version", md);
298 memset(&disk, 0, sizeof(disk));
299 disk.major = major(stb.st_rdev);
300 disk.minor = minor(stb.st_rdev);
301 sysfs_free(sra);
302 if (ioctl(mdfd, ADD_NEW_DISK, &disk) != 0) {
303 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
304 devname, chosen_name, strerror(errno));
305 ioctl(mdfd, STOP_ARRAY, 0);
306 close(mdfd);
307 return 2;
309 sra = sysfs_read(mdfd, devnum, GET_DEVS);
310 if (!sra || !sra->devs || sra->devs->role >= 0) {
311 /* It really should be 'none' - must be old buggy
312 * kernel, and mdadm -I may not be able to complete.
313 * So reject it.
315 ioctl(mdfd, STOP_ARRAY, NULL);
316 fprintf(stderr, Name
317 ": You have an old buggy kernel which cannot support\n"
318 " --incremental reliably. Aborting.\n");
319 close(mdfd);
320 sysfs_free(sra);
321 return 2;
323 } else {
324 /* 5b/ if it does */
325 /* - check one drive in array to make sure metadata is a reasonably */
326 /* close match. Reject if not (e.g. different type) */
327 /* - add the device */
328 char dn[20];
329 int dfd2;
330 mdu_disk_info_t disk;
331 int err;
332 struct sysarray *sra;
333 sra = sysfs_read(mdfd, devnum, (GET_VERSION | GET_DEVS |
334 GET_STATE));
335 if (sra->major_version != st->ss->major ||
336 sra->minor_version != st->minor_version) {
337 if (verbose >= 0)
338 fprintf(stderr, Name
339 ": %s has different metadata to chosen array %s %d.%d %d.%d.\n",
340 devname, chosen_name,
341 sra->major_version, sra->minor_version,
342 st->ss->major, st->minor_version);
343 close(mdfd);
344 return 1;
346 sprintf(dn, "%d:%d", sra->devs->major, sra->devs->minor);
347 dfd2 = dev_open(dn, O_RDONLY);
348 if (st->ss->load_super(st, dfd2,&super2, NULL)) {
349 fprintf(stderr, Name
350 ": Strange error loading metadata for %s.\n",
351 chosen_name);
352 close(mdfd);
353 close(dfd2);
354 return 2;
356 close(dfd2);
357 st->ss->getinfo_super(&info2, super2);
358 if (info.array.level != info2.array.level ||
359 memcmp(info.uuid, info2.uuid, 16) != 0 ||
360 info.array.raid_disks != info2.array.raid_disks) {
361 fprintf(stderr, Name
362 ": unexpected difference between %s and %s.\n",
363 chosen_name, devname);
364 close(mdfd);
365 return 2;
367 memset(&disk, 0, sizeof(disk));
368 disk.major = major(stb.st_rdev);
369 disk.minor = minor(stb.st_rdev);
370 err = ioctl(mdfd, ADD_NEW_DISK, &disk);
371 if (err < 0 && errno == EBUSY) {
372 /* could be another device present with the same
373 * disk.number. Find and reject any such
375 find_reject(mdfd, st, sra, info.disk.number,
376 info.events, verbose, chosen_name);
377 err = ioctl(mdfd, ADD_NEW_DISK, &disk);
379 if (err < 0) {
380 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
381 devname, chosen_name, strerror(errno));
382 close(mdfd);
383 return 2;
386 /* 6/ Make sure /var/run/mdadm.map contains this array. */
387 map_update(&map, devnum,
388 info.array.major_version,
389 info.array.minor_version,
390 info.uuid, chosen_name);
392 /* 7/ Is there enough devices to possibly start the array? */
393 /* 7a/ if not, finish with success. */
394 avail = NULL;
395 active_disks = count_active(st, mdfd, &avail, &info);
396 if (enough(info.array.level, info.array.raid_disks,
397 info.array.layout, info.array.state & 1,
398 avail, active_disks) == 0) {
399 free(avail);
400 if (verbose >= 0)
401 fprintf(stderr, Name
402 ": %s attached to %s, not enough to start (%d).\n",
403 devname, chosen_name, active_disks);
404 close(mdfd);
405 return 0;
407 free(avail);
409 /* 7b/ if yes, */
410 /* - if number of OK devices match expected, or -R and there */
411 /* are enough, */
412 /* + add any bitmap file */
413 /* + start the array (auto-readonly). */
415 mdu_array_info_t ainf;
417 if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
418 if (verbose >= 0)
419 fprintf(stderr, Name
420 ": %s attached to %s which is already active.\n",
421 devname, chosen_name);
422 close (mdfd);
423 return 0;
426 if (runstop > 0 || active_disks >= info.array.working_disks) {
427 struct sysarray *sra;
428 /* Let's try to start it */
429 if (match && match->bitmap_file) {
430 int bmfd = open(match->bitmap_file, O_RDWR);
431 if (bmfd < 0) {
432 fprintf(stderr, Name
433 ": Could not open bitmap file %s.\n",
434 match->bitmap_file);
435 close(mdfd);
436 return 1;
438 if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
439 close(bmfd);
440 fprintf(stderr, Name
441 ": Failed to set bitmapfile for %s.\n",
442 chosen_name);
443 close(mdfd);
444 return 1;
446 close(bmfd);
448 sra = sysfs_read(mdfd, devnum, 0);
449 if (sra == NULL || active_disks >= info.array.working_disks)
450 rv = ioctl(mdfd, RUN_ARRAY, NULL);
451 else
452 rv = sysfs_set_str(sra, NULL,
453 "array_state", "read-auto");
454 if (rv == 0) {
455 if (verbose >= 0)
456 fprintf(stderr, Name
457 ": %s attached to %s, which has been started.\n",
458 devname, chosen_name);
459 rv = 0;
460 } else {
461 fprintf(stderr, Name
462 ": %s attached to %s, but failed to start: %s.\n",
463 devname, chosen_name, strerror(errno));
464 rv = 1;
466 } else {
467 if (verbose >= 0)
468 fprintf(stderr, Name
469 ": %s attached to %s, not enough to start safely.\n",
470 devname, chosen_name);
471 rv = 0;
473 close(mdfd);
474 return rv;
477 static void find_reject(int mdfd, struct supertype *st, struct sysarray *sra,
478 int number, __u64 events, int verbose,
479 char *array_name)
481 /* Find an device attached to this array with a disk.number of number
482 * and events less than the passed events, and remove the device.
484 struct sysdev *d;
485 mdu_array_info_t ra;
487 if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
488 return; /* not safe to remove from active arrays
489 * without thinking more */
491 for (d = sra->devs; d ; d = d->next) {
492 char dn[10];
493 int dfd;
494 void *super;
495 struct mdinfo info;
496 sprintf(dn, "%d:%d", d->major, d->minor);
497 dfd = dev_open(dn, O_RDONLY);
498 if (dfd < 0)
499 continue;
500 if (st->ss->load_super(st, dfd, &super, NULL)) {
501 close(dfd);
502 continue;
504 st->ss->getinfo_super(&info, super);
505 free(super);
506 close(dfd);
508 if (info.disk.number != number ||
509 info.events >= events)
510 continue;
512 if (d->role > -1)
513 sysfs_set_str(sra, d, "slot", "none");
514 if (sysfs_set_str(sra, d, "state", "remove") == 0)
515 if (verbose >= 0)
516 fprintf(stderr, Name
517 ": removing old device %s from %s\n",
518 d->name+4, array_name);
522 static int count_active(struct supertype *st, int mdfd, char **availp,
523 struct mdinfo *bestinfo)
525 /* count how many devices in sra think they are active */
526 struct sysdev *d;
527 int cnt = 0, cnt1 = 0;
528 __u64 max_events = 0;
529 void *best_super = NULL;
530 struct sysarray *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
531 char *avail = NULL;
533 for (d = sra->devs ; d ; d = d->next) {
534 char dn[30];
535 int dfd;
536 void *super;
537 int ok;
538 struct mdinfo info;
540 sprintf(dn, "%d:%d", d->major, d->minor);
541 dfd = dev_open(dn, O_RDONLY);
542 if (dfd < 0)
543 continue;
544 ok = st->ss->load_super(st, dfd, &super, NULL);
545 close(dfd);
546 if (ok != 0)
547 continue;
548 st->ss->getinfo_super(&info, super);
549 if (info.disk.state & (1<<MD_DISK_SYNC))
551 if (avail == NULL) {
552 avail = malloc(info.array.raid_disks);
553 memset(avail, 0, info.array.raid_disks);
555 if (cnt == 0) {
556 cnt++;
557 max_events = info.events;
558 avail[info.disk.raid_disk] = 2;
559 best_super = super; super = NULL;
560 } else if (info.events == max_events) {
561 cnt++;
562 avail[info.disk.raid_disk] = 2;
563 } else if (info.events == max_events-1) {
564 cnt1++;
565 avail[info.disk.raid_disk] = 1;
566 } else if (info.events < max_events - 1)
568 else if (info.events == max_events+1) {
569 int i;
570 cnt1 = cnt;
571 cnt = 1;
572 max_events = info.events;
573 for (i=0; i<info.array.raid_disks; i++)
574 if (avail[i])
575 avail[i]--;
576 avail[info.disk.raid_disk] = 2;
577 free(best_super);
578 best_super = super;
579 super = NULL;
580 } else { /* info.events much bigger */
581 cnt = 1; cnt1 = 0;
582 memset(avail, 0, info.disk.raid_disk);
583 max_events = info.events;
584 free(best_super);
585 best_super = super;
586 super = NULL;
589 if (super)
590 free(super);
592 if (best_super) {
593 st->ss->getinfo_super(bestinfo,best_super);
594 free(best_super);
596 return cnt + cnt1;
599 void RebuildMap(void)
601 struct mdstat_ent *mdstat = mdstat_read(0, 0);
602 struct mdstat_ent *md;
603 struct map_ent *map = NULL;
604 int mdp = get_mdp_major();
606 for (md = mdstat ; md ; md = md->next) {
607 struct sysarray *sra = sysfs_read(-1, md->devnum, GET_DEVS);
608 struct sysdev *sd;
610 for (sd = sra->devs ; sd ; sd = sd->next) {
611 char dn[30];
612 int dfd;
613 int ok;
614 struct supertype *st;
615 char *path;
616 void *super;
617 struct mdinfo info;
619 sprintf(dn, "%d:%d", sd->major, sd->minor);
620 dfd = dev_open(dn, O_RDONLY);
621 if (dfd < 0)
622 continue;
623 st = guess_super(dfd);
624 if ( st == NULL)
625 ok = -1;
626 else
627 ok = st->ss->load_super(st, dfd, &super, NULL);
628 close(dfd);
629 if (ok != 0)
630 continue;
631 st->ss->getinfo_super(&info, super);
632 if (md->devnum > 0)
633 path = map_dev(MD_MAJOR, md->devnum, 0);
634 else
635 path = map_dev(mdp, (-1-md->devnum)<< 6, 0);
636 map_add(&map, md->devnum, st->ss->major,
637 st->minor_version,
638 info.uuid, path ? : "/unknown");
639 free(super);
640 break;
643 map_write(map);
644 map_free(map);
647 int IncrementalScan(int verbose)
649 /* look at every device listed in the 'map' file.
650 * If one is found that is not running then:
651 * look in mdadm.conf for bitmap file.
652 * if one exists, but array has none, add it.
653 * try to start array in auto-readonly mode
655 struct map_ent *mapl = NULL;
656 struct map_ent *me;
657 mddev_ident_t devs, mddev;
658 int rv = 0;
660 map_read(&mapl);
661 devs = conf_get_ident(NULL);
663 for (me = mapl ; me ; me = me->next) {
664 char path[1024];
665 mdu_array_info_t array;
666 mdu_bitmap_file_t bmf;
667 struct sysarray *sra;
668 int mdfd = open_mddev_devnum(me->path, me->devnum, NULL, path);
669 if (mdfd < 0)
670 continue;
671 if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
672 errno != ENODEV) {
673 close(mdfd);
674 continue;
676 /* Ok, we can try this one. Maybe it needs a bitmap */
677 for (mddev = devs ; mddev ; mddev = mddev->next)
678 if (strcmp(mddev->devname, me->path) == 0)
679 break;
680 if (mddev && mddev->bitmap_file) {
682 * Note: early kernels will wrongly fail this, so it
683 * is a hint only
685 int added = -1;
686 if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
687 int bmfd = open(mddev->bitmap_file, O_RDWR);
688 if (bmfd >= 0) {
689 added = ioctl(mdfd, SET_BITMAP_FILE,
690 bmfd);
691 close(bmfd);
694 if (verbose >= 0) {
695 if (added == 0)
696 fprintf(stderr, Name
697 ": Added bitmap %s to %s\n",
698 mddev->bitmap_file, me->path);
699 else if (errno != EEXIST)
700 fprintf(stderr, Name
701 ": Failed to add bitmap to %s: %s\n",
702 me->path, strerror(errno));
705 sra = sysfs_read(mdfd, 0, 0);
706 if (sra) {
707 if (sysfs_set_str(sra, NULL,
708 "array_state", "read-auto") == 0) {
709 if (verbose >= 0)
710 fprintf(stderr, Name
711 ": started array %s\n",
712 me->path);
713 } else {
714 fprintf(stderr, Name
715 ": failed to start array %s: %s\n",
716 me->path, strerror(errno));
717 rv = 1;
721 return rv;