4 * Don't store drive configuration on the config DB: read each drive's header
5 * to decide where it is.
7 * Accept any old crap in the config_<foo> functions, and complain when
8 * we try to bring it up.
10 * When trying to bring volumes up, check that the complete address range
14 * Copyright (c) 1997, 1998
15 * Nan Yang Computer Services Limited. All rights reserved.
17 * This software is distributed under the so-called ``Berkeley
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
23 * 1. Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 * notice, this list of conditions and the following disclaimer in the
27 * documentation and/or other materials provided with the distribution.
28 * 3. All advertising materials mentioning features or use of this software
29 * must display the following acknowledgement:
30 * This product includes software developed by Nan Yang Computer
32 * 4. Neither the name of the Company nor the names of its contributors
33 * may be used to endorse or promote products derived from this software
34 * without specific prior written permission.
36 * This software is provided ``as is'', and any express or implied
37 * warranties, including, but not limited to, the implied warranties of
38 * merchantability and fitness for a particular purpose are disclaimed.
39 * In no event shall the company or contributors be liable for any
40 * direct, indirect, incidental, special, exemplary, or consequential
41 * damages (including, but not limited to, procurement of substitute
42 * goods or services; loss of use, data, or profits; or business
43 * interruption) however caused and on any theory of liability, whether
44 * in contract, strict liability, or tort (including negligence or
45 * otherwise) arising in any way out of the use of this software, even if
46 * advised of the possibility of such damage.
48 * $Id: vinumconfig.c,v 1.30 2000/05/01 09:45:50 grog Exp grog $
49 * $FreeBSD: src/sys/dev/vinum/vinumconfig.c,v 1.32.2.6 2002/02/03 00:43:35 grog Exp $
58 #define MAXTOKEN 64 /* maximum number of tokens in a line */
61 * We can afford the luxury of global variables here,
62 * since start_config ensures that these functions
63 * are single-threaded.
66 /* These are indices in vinum_conf of the last-mentioned of each kind of object */
67 static int current_drive
; /* note the last drive we mention, for
69 static int current_plex
; /* and the same for the last plex */
70 static int current_volume
; /* and the last volme */
71 static struct _ioctl_reply
*ioctl_reply
; /* struct to return via ioctl */
73 static void made_sd(struct sd
*sd
);
74 static void made_vol(struct volume
*vol
);
75 static void made_plex(struct plex
*plex
);
77 /* These values are used by most of these routines, so set them as globals */
78 static char *token
[MAXTOKEN
]; /* pointers to individual tokens */
79 static int tokens
; /* number of tokens */
90 #define MSG_MAX 1024 /* maximum length of a formatted message */
92 * Format an error message and return to the user in the reply.
93 * CARE: This routine is designed to be called only from the
94 * configuration routines, so it assumes it's the owner of
95 * the configuration lock, and unlocks it on exit
98 throw_rude_remark(int error
, char *msg
,...)
102 static int finishing
; /* don't recurse */
105 if ((vinum_conf
.flags
& VF_LOCKED
) == 0) /* bug catcher */
106 panic ("throw_rude_remark: called without config lock");
108 if ((ioctl_reply
!= NULL
) /* we're called from the user */
109 &&(!(vinum_conf
.flags
& VF_READING_CONFIG
))) { /* and not reading from disk: return msg */
111 * We can't just format to ioctl_reply, since it
112 * may contain our input parameters
114 kvasnprintf(&text
, MSG_MAX
, msg
, ap
);
115 strcpy(ioctl_reply
->msg
, text
);
116 ioctl_reply
->error
= error
; /* first byte is the error number */
120 kvprintf(msg
, ap
); /* print to the console */
125 if (vinum_conf
.flags
& VF_READING_CONFIG
) { /* go through to the bitter end, */
126 if ((vinum_conf
.flags
& VF_READING_CONFIG
) /* we're reading from disk, */
127 &&((daemon_options
& daemon_noupdate
) == 0)) {
128 log(LOG_NOTICE
, "Disabling configuration updates\n");
129 daemon_options
|= daemon_noupdate
;
134 * We have a problem here: we want to unlock the
135 * configuration, which implies tidying up, but
136 * if we find an error while tidying up, we could
137 * recurse for ever. Use this kludge to only try
140 was_finishing
= finishing
;
142 finish_config(was_finishing
); /* unlock anything we may be holding */
143 finishing
= was_finishing
;
144 longjmp(command_fail
, error
);
148 * Check a volume to see if the plex is already assigned to it.
149 * Return index in volume->plex, or -1 if not assigned
152 my_plex(int volno
, int plexno
)
157 vol
= &VOL
[volno
]; /* point to volno */
158 for (i
= 0; i
< vol
->plexes
; i
++)
159 if (vol
->plex
[i
] == plexno
)
161 return -1; /* not found */
165 * Check a plex to see if the subdisk is already assigned to it.
166 * Return index in plex->sd, or -1 if not assigned
169 my_sd(int plexno
, int sdno
)
174 plex
= &PLEX
[plexno
];
175 for (i
= 0; i
< plex
->subdisks
; i
++)
176 if (plex
->sdnos
[i
] == sdno
)
178 return -1; /* not found */
181 /* Add plex to the volume if possible */
183 give_plex_to_volume(int volno
, int plexno
)
189 * It's not an error for the plex to already
190 * belong to the volume, but we need to check a
191 * number of things to make sure it's done right.
194 if (my_plex(volno
, plexno
) >= 0)
195 return plexno
; /* that's it */
197 vol
= &VOL
[volno
]; /* point to volume */
198 if (vol
->plexes
== MAXPLEX
) /* all plexes allocated */
199 throw_rude_remark(ENOSPC
,
200 "Too many plexes for volume %s",
202 else if ((vol
->plexes
> 0) /* we have other plexes */
203 &&((vol
->flags
& VF_CONFIG_SETUPSTATE
) == 0)) /* and we're not setting up state */
204 invalidate_subdisks(&PLEX
[plexno
], sd_stale
); /* make the subdisks invalid */
205 vol
->plex
[vol
->plexes
] = plexno
; /* this one */
206 vol
->plexes
++; /* add another plex */
207 PLEX
[plexno
].volno
= volno
; /* note the number of our volume */
209 /* Find out how big our volume is */
210 for (i
= 0; i
< vol
->plexes
; i
++)
211 vol
->size
= u64max(vol
->size
, PLEX
[vol
->plex
[i
]].length
);
212 return vol
->plexes
- 1; /* and return its index */
216 * Add subdisk to a plex if possible
219 give_sd_to_plex(int plexno
, int sdno
)
226 * It's not an error for the sd to already
227 * belong to the plex, but we need to check a
228 * number of things to make sure it's done right.
231 i
= my_sd(plexno
, sdno
);
232 if (i
>= 0) /* does it already belong to us? */
233 return i
; /* that's it */
235 plex
= &PLEX
[plexno
]; /* point to the plex */
236 sd
= &SD
[sdno
]; /* and the subdisk */
238 /* Do we have an offset? Otherwise put it after the last one */
239 if (sd
->plexoffset
< 0) { /* no offset specified */
240 if (plex
->subdisks
> 0) {
241 struct sd
*lastsd
= &SD
[plex
->sdnos
[plex
->subdisks
- 1]]; /* last subdisk */
243 if (plex
->organization
== plex_concat
) /* concat, */
244 sd
->plexoffset
= lastsd
->sectors
+ lastsd
->plexoffset
; /* starts here */
245 else /* striped, RAID-4 or RAID-5 */
246 sd
->plexoffset
= plex
->stripesize
* plex
->subdisks
; /* starts here */
247 } else /* first subdisk */
248 sd
->plexoffset
= 0; /* start at the beginning */
250 if (plex
->subdisks
== MAXSD
) /* we already have our maximum */
251 throw_rude_remark(ENOSPC
, /* crap out */
252 "Can't add %s to %s: plex full",
256 plex
->subdisks
++; /* another entry */
257 if (plex
->subdisks
>= plex
->subdisks_allocated
) /* need more space */
258 EXPAND(plex
->sdnos
, int, plex
->subdisks_allocated
, INITIAL_SUBDISKS_IN_PLEX
);
260 /* Adjust size of plex and volume. */
261 if (isparity(plex
)) /* RAID-4 or RAID-5 */
262 plex
->length
= (plex
->subdisks
- 1) * sd
->sectors
; /* size is one disk short */
264 plex
->length
+= sd
->sectors
; /* plex gets this much bigger */
265 if (plex
->volno
>= 0) /* we have a volume */
266 VOL
[plex
->volno
].size
= u64max(VOL
[plex
->volno
].size
, plex
->length
); /* adjust its size */
269 * We need to check that the subdisks don't overlap,
270 * but we can't do that until a point where we *must*
271 * know the size of all the subdisks. That's not
272 * here. But we need to sort them by offset
274 for (i
= 0; i
< plex
->subdisks
- 1; i
++) {
275 if (sd
->plexoffset
< SD
[plex
->sdnos
[i
]].plexoffset
) { /* it fits before this one */
276 /* First move any remaining subdisks by one */
279 for (j
= plex
->subdisks
- 1; j
> i
; j
--) /* move up one at a time */
280 plex
->sdnos
[j
] = plex
->sdnos
[j
- 1];
281 plex
->sdnos
[i
] = sdno
;
282 sd
->plexsdno
= i
; /* note where we are in the subdisk */
288 * The plex doesn't have any subdisk with a
289 * larger offset. Insert it here.
291 plex
->sdnos
[i
] = sdno
;
292 sd
->plexsdno
= i
; /* note where we are in the subdisk */
293 sd
->plexno
= plex
->plexno
; /* and who we belong to */
298 * Add a subdisk to drive if possible. The
299 * pointer to the drive must already be stored in
300 * the sd structure, but the drive doesn't know
301 * about the subdisk yet.
304 give_sd_to_drive(int sdno
)
306 struct sd
*sd
; /* pointer to subdisk */
307 struct drive
*drive
; /* and drive */
308 int fe
; /* index in free list */
309 int sfe
; /* and index of subdisk when assigning max */
311 sd
= &SD
[sdno
]; /* point to sd */
312 drive
= &DRIVE
[sd
->driveno
]; /* and drive */
314 if (drive
->state
!= drive_up
) {
315 update_sd_state(sdno
); /* that crashes the subdisk */
318 if (drive
->flags
& VF_HOTSPARE
) /* the drive is a hot spare, */
319 throw_rude_remark(ENOSPC
,
320 "Can't place %s on hot spare drive %s",
323 if ((drive
->sectors_available
== 0) /* no space left */
324 ||(sd
->sectors
> drive
->sectors_available
)) { /* or too big, */
325 sd
->driveoffset
= -1; /* don't be confusing */
327 throw_rude_remark(ENOSPC
, "No space for %s on %s", sd
->name
, drive
->label
.name
);
328 return; /* in case we come back here */
330 drive
->subdisks_used
++; /* one more subdisk */
332 if (sd
->sectors
== 0) { /* take the largest chunk */
333 sfe
= 0; /* to keep the compiler happy */
334 for (fe
= 0; fe
< drive
->freelist_entries
; fe
++) {
335 if (drive
->freelist
[fe
].sectors
>= sd
->sectors
) { /* more space here */
336 sd
->sectors
= drive
->freelist
[fe
].sectors
; /* take it */
337 sd
->driveoffset
= drive
->freelist
[fe
].offset
;
338 sfe
= fe
; /* and note the index for later */
341 if (sd
->sectors
== 0) { /* no luck, */
342 sd
->driveoffset
= -1; /* don't be confusing */
344 throw_rude_remark(ENOSPC
, /* give up */
345 "No space for %s on %s",
349 if (sfe
< (drive
->freelist_entries
- 1)) /* not the last one, */
350 bcopy(&drive
->freelist
[sfe
+ 1],
351 &drive
->freelist
[sfe
],
352 (drive
->freelist_entries
- sfe
) * sizeof(struct drive_freelist
));
353 drive
->freelist_entries
--; /* one less entry */
354 drive
->sectors_available
-= sd
->sectors
; /* and note how much less space we have */
355 } else if (sd
->driveoffset
< 0) { /* no offset specified, find one */
356 for (fe
= 0; fe
< drive
->freelist_entries
; fe
++) {
357 if (drive
->freelist
[fe
].sectors
>= sd
->sectors
) { /* it'll fit here */
358 sd
->driveoffset
= drive
->freelist
[fe
].offset
;
359 if (sd
->sectors
== drive
->freelist
[fe
].sectors
) { /* used up the entire entry */
360 if (fe
< (drive
->freelist_entries
- 1)) /* not the last one, */
361 bcopy(&drive
->freelist
[fe
+ 1],
362 &drive
->freelist
[fe
],
363 (drive
->freelist_entries
- fe
) * sizeof(struct drive_freelist
));
364 drive
->freelist_entries
--; /* one less entry */
366 drive
->freelist
[fe
].sectors
-= sd
->sectors
; /* this much less space */
367 drive
->freelist
[fe
].offset
+= sd
->sectors
; /* this much further on */
369 drive
->sectors_available
-= sd
->sectors
; /* and note how much less space we have */
373 if (sd
->driveoffset
< 0)
375 * Didn't find anything. Although the drive has
376 * enough space, it's too fragmented
380 throw_rude_remark(ENOSPC
, "No space for %s on %s", sd
->name
, drive
->label
.name
);
382 } else { /* specific offset */
384 * For a specific offset to work, the space must be
385 * entirely in a single freelist entry. Look for it.
387 u_int64_t sdend
= sd
->driveoffset
+ sd
->sectors
; /* end of our subdisk */
388 for (fe
= 0; fe
< drive
->freelist_entries
; fe
++) {
389 u_int64_t dend
= drive
->freelist
[fe
].offset
+ drive
->freelist
[fe
].sectors
; /* end of entry */
390 if (dend
>= sdend
) { /* fits before here */
391 if (drive
->freelist
[fe
].offset
> sd
->driveoffset
) { /* starts after the beginning of sd area */
392 sd
->driveoffset
= -1; /* don't be confusing */
393 set_sd_state(sd
->sdno
, sd_down
, setstate_force
);
394 throw_rude_remark(ENOSPC
,
395 "No space for %s on drive %s at offset %jd",
398 (intmax_t)sd
->driveoffset
);
402 * We've found the space, and we can allocate it.
403 * We don't need to say that to the subdisk, which
404 * already knows about it. We need to tell it to
405 * the free list, though. We have four possibilities:
407 * 1. The subdisk exactly eats up the entry. That's the
409 * 2. The subdisk starts at the beginning and leaves space
411 * 3. The subdisk starts after the beginning and leaves
412 * space at the end as well: we end up with another
414 * 4. The subdisk leaves space at the beginning and finishes
417 drive
->sectors_available
-= sd
->sectors
; /* note how much less space we have */
418 if (sd
->driveoffset
== drive
->freelist
[fe
].offset
) { /* 1 or 2 */
419 if (sd
->sectors
== drive
->freelist
[fe
].sectors
) { /* 1: used up the entire entry */
420 if (fe
< (drive
->freelist_entries
- 1)) /* not the last one, */
421 bcopy(&drive
->freelist
[fe
+ 1],
422 &drive
->freelist
[fe
],
423 (drive
->freelist_entries
- fe
) * sizeof(struct drive_freelist
));
424 drive
->freelist_entries
--; /* one less entry */
425 } else { /* 2: space at the end */
426 drive
->freelist
[fe
].sectors
-= sd
->sectors
; /* this much less space */
427 drive
->freelist
[fe
].offset
+= sd
->sectors
; /* this much further on */
429 } else { /* 3 or 4 */
430 drive
->freelist
[fe
].sectors
= sd
->driveoffset
- drive
->freelist
[fe
].offset
;
431 if (dend
> sdend
) { /* 3: space at the end as well */
432 if (fe
< (drive
->freelist_entries
- 1)) /* not the last one */
433 bcopy(&drive
->freelist
[fe
], /* move the rest down */
434 &drive
->freelist
[fe
+ 1],
435 (drive
->freelist_entries
- fe
) * sizeof(struct drive_freelist
));
436 drive
->freelist_entries
++; /* one less entry */
437 drive
->freelist
[fe
+ 1].offset
= sdend
; /* second entry starts after sd */
438 drive
->freelist
[fe
+ 1].sectors
= dend
- sdend
; /* and is this long */
445 drive
->opencount
++; /* one more subdisk attached */
448 /* Get an empty drive entry from the drive table */
450 get_empty_drive(void)
455 /* first see if we have one which has been deallocated */
456 for (driveno
= 0; driveno
< vinum_conf
.drives_allocated
; driveno
++) {
457 if (DRIVE
[driveno
].state
== drive_unallocated
) /* bingo */
461 if (driveno
>= vinum_conf
.drives_allocated
) /* we've used all our allocation */
462 EXPAND(DRIVE
, struct drive
, vinum_conf
.drives_allocated
, INITIAL_DRIVES
);
464 /* got a drive entry. Make it pretty */
465 drive
= &DRIVE
[driveno
];
466 bzero(drive
, sizeof(struct drive
));
467 drive
->driveno
= driveno
; /* put number in structure */
468 drive
->flags
|= VF_NEWBORN
; /* newly born drive */
469 strcpy(drive
->devicename
, "unknown"); /* and make the name ``unknown'' */
470 return driveno
; /* return the index */
474 * Find the named drive in vinum_conf.drive, return a pointer
475 * return the index in vinum_conf.drive.
476 * Don't mark the drive as allocated (XXX SMP)
477 * If create != 0, create an entry if it doesn't exist
479 /* XXX check if we have it open from attach */
481 find_drive(const char *name
, int create
)
487 for (driveno
= 0; driveno
< vinum_conf
.drives_allocated
; driveno
++) {
488 drive
= &DRIVE
[driveno
]; /* point to drive */
489 if ((drive
->label
.name
[0] != '\0') /* it has a name */
490 &&(strcmp(drive
->label
.name
, name
) == 0) /* and it's this one */
491 &&(drive
->state
> drive_unallocated
)) /* and it's a real one: found */
495 /* the drive isn't in the list. Add it if he wants */
496 if (create
== 0) /* don't want to create */
497 return -1; /* give up */
499 driveno
= get_empty_drive();
500 drive
= &DRIVE
[driveno
];
502 ksnprintf(drive
->label
.name
, sizeof(drive
->label
.name
), "%s", name
);
503 drive
->state
= drive_referenced
; /* in use, nothing worthwhile there */
504 return driveno
; /* return the index */
508 * Find a drive given its device name.
509 * devname must be valid.
510 * Otherwise the same as find_drive above
513 find_drive_by_dev(const char *devname
, int create
)
518 for (driveno
= 0; driveno
< vinum_conf
.drives_allocated
; driveno
++) {
519 drive
= &DRIVE
[driveno
];
520 if (strcmp(drive
->devicename
, devname
) == 0 &&
521 drive
->state
> drive_unallocated
530 driveno
= get_empty_drive();
531 drive
= &DRIVE
[driveno
];
532 ksnprintf(drive
->devicename
, sizeof(drive
->devicename
), "%s", devname
);
533 /* in use, nothing worthwhile there */
534 drive
->state
= drive_referenced
;
538 /* Find an empty subdisk in the subdisk table */
545 /* first see if we have one which has been deallocated */
546 for (sdno
= 0; sdno
< vinum_conf
.subdisks_allocated
; sdno
++) {
547 if (SD
[sdno
].state
== sd_unallocated
) /* bingo */
550 if (sdno
>= vinum_conf
.subdisks_allocated
)
552 * We've run out of space. sdno is pointing
553 * where we want it, but at the moment we
554 * don't have the space. Get it.
556 EXPAND(SD
, struct sd
, vinum_conf
.subdisks_allocated
, INITIAL_SUBDISKS
);
558 /* initialize some things */
559 sd
= &SD
[sdno
]; /* point to it */
560 bzero(sd
, sizeof(struct sd
)); /* initialize */
561 sd
->flags
|= VF_NEWBORN
; /* newly born subdisk */
562 sd
->plexno
= -1; /* no plex */
563 sd
->sectors
= -1; /* no space */
564 sd
->driveno
= -1; /* no drive */
565 sd
->plexoffset
= -1; /* and no offsets */
566 sd
->driveoffset
= -1;
567 return sdno
; /* return the index */
570 /* return a drive to the free pool */
572 free_drive(struct drive
*drive
)
574 if ((drive
->state
> drive_referenced
) /* real drive */
575 ||(drive
->flags
& VF_OPEN
)) { /* how can it be open without a state? */
577 if (drive
->flags
& VF_OPEN
) { /* it's open, */
578 close_locked_drive(drive
); /* close it */
579 drive
->state
= drive_down
; /* and note the fact */
582 Free(drive
->freelist
);
583 bzero(drive
, sizeof(struct drive
)); /* this also sets drive_unallocated */
589 * Find the named subdisk in vinum_conf.sd.
591 * If create != 0, create an entry if it doesn't exist
593 * Return index in vinum_conf.sd
596 find_subdisk(const char *name
, int create
)
601 for (sdno
= 0; sdno
< vinum_conf
.subdisks_allocated
; sdno
++) {
602 if (strcmp(SD
[sdno
].name
, name
) == 0) /* found it */
606 /* the subdisk isn't in the list. Add it if he wants */
607 if (create
== 0) /* don't want to create */
608 return -1; /* give up */
610 /* Allocate one and insert the name */
611 sdno
= get_empty_sd();
613 ksnprintf(sd
->name
, sizeof(sd
->name
), "%s", name
);
614 return sdno
; /* return the pointer */
617 /* Return space to a drive */
619 return_drive_space(int driveno
, int64_t offset
, int length
)
622 int fe
; /* free list entry */
623 u_int64_t sdend
; /* end of our subdisk */
624 u_int64_t dend
; /* end of our freelist entry */
626 drive
= &DRIVE
[driveno
];
627 if (drive
->state
== drive_up
) {
628 sdend
= offset
+ length
; /* end of our subdisk */
630 /* Look for where to return the sd address space */
632 (fe
< drive
->freelist_entries
) && (drive
->freelist
[fe
].offset
< offset
);
635 * Now we are pointing to the last entry, the first
636 * with a higher offset than the subdisk, or both.
638 if ((fe
> 1) /* not the first entry */
639 &&((fe
== drive
->freelist_entries
) /* gone past the end */
640 ||(drive
->freelist
[fe
].offset
> offset
))) /* or past the block were looking for */
641 fe
--; /* point to the block before */
642 dend
= drive
->freelist
[fe
].offset
+ drive
->freelist
[fe
].sectors
; /* end of the entry */
645 * At this point, we are pointing to the correct
646 * place in the free list. A number of possibilities
649 * 1. The block to be freed starts at the end of the
650 * block to which we are pointing. This has two
653 * a. The block to be freed ends at the beginning
654 * of the following block. Merge the three
655 * areas into a single block.
657 * b. The block is shorter than the space between
658 * the current block and the next one. Enlarge
661 * 2. The block to be freed starts after the end
662 * of the block. Again, we have two cases:
664 * a. It ends before the start of the following block.
665 * Create a new free block.
667 * b. It ends at the start of the following block.
668 * Enlarge the following block downwards.
670 * When there is only one free space block, and the
671 * space to be returned is before it, the pointer is
672 * to a non-existent zeroth block. XXX check this
674 if (offset
== dend
) { /* Case 1: it starts at the end of this block */
675 if ((fe
< drive
->freelist_entries
- 1) /* we're not the last block in the free list */
676 /* and the subdisk ends at the start of the next block */
677 &&(sdend
== drive
->freelist
[fe
+ 1].offset
)) {
678 drive
->freelist
[fe
].sectors
/* 1a: merge all three blocks */
679 = drive
->freelist
[fe
+ 1].sectors
;
680 if (fe
< drive
->freelist_entries
- 2) /* still more blocks after next */
681 bcopy(&drive
->freelist
[fe
+ 2], /* move down one */
682 &drive
->freelist
[fe
+ 1],
683 (drive
->freelist_entries
- 2 - fe
)
684 * sizeof(struct drive_freelist
));
685 drive
->freelist_entries
--; /* one less entry in the free list */
686 } else /* 1b: just enlarge this block */
687 drive
->freelist
[fe
].sectors
+= length
;
688 } else { /* Case 2 */
689 if (offset
> dend
) /* it starts after this block */
690 fe
++; /* so look at the next block */
691 if ((fe
< drive
->freelist_entries
) /* we're not the last block in the free list */
692 /* and the subdisk ends at the start of this block: case 4 */
693 &&(sdend
== drive
->freelist
[fe
].offset
)) {
694 drive
->freelist
[fe
].offset
= offset
; /* it starts where the sd was */
695 drive
->freelist
[fe
].sectors
+= length
; /* and it's this much bigger */
696 } else { /* case 3: non-contiguous */
697 if (fe
< drive
->freelist_entries
) /* not after the last block, */
698 bcopy(&drive
->freelist
[fe
], /* move the rest up one entry */
699 &drive
->freelist
[fe
+ 1],
700 (drive
->freelist_entries
- fe
)
701 * sizeof(struct drive_freelist
));
702 drive
->freelist_entries
++; /* one less entry */
703 drive
->freelist
[fe
].offset
= offset
; /* this entry represents the sd */
704 drive
->freelist
[fe
].sectors
= length
;
707 drive
->sectors_available
+= length
; /* the sectors are now available */
712 * Free an allocated sd entry.
713 * This performs memory management only. remove()
714 * is responsible for checking relationships.
722 if ((sd
->driveno
>= 0) /* we have a drive, */
723 &&(sd
->sectors
> 0)) /* and some space on it */
724 return_drive_space(sd
->driveno
, /* return the space */
728 PLEX
[sd
->plexno
].subdisks
--; /* one less subdisk */
729 sd
->state
= sd_unallocated
;
731 bzero(sd
, sizeof(struct sd
)); /* and clear it out */
732 sd
->state
= sd_unallocated
;
733 vinum_conf
.subdisks_used
--; /* one less sd */
737 made_sd(struct sd
*sd
)
739 if (sd
->sd_dev
== NULL
&& sd
->state
!= sd_unallocated
) {
740 sd
->sd_dev
= make_dev(&vinum_ops
, VINUM_SD(sd
->sdno
),
741 UID_ROOT
, GID_OPERATOR
, 0640,
742 VINUM_BASE
"sd/%s", sd
->name
);
743 udev_dict_set_cstr(sd
->sd_dev
, "subsystem", "raid");
744 udev_dict_set_cstr(sd
->sd_dev
, "disk-type", "raid");
746 if (sd
->plexno
>= 0 && PLEX
[sd
->plexno
].volno
>= 0) {
747 make_dev_alias(sd
->sd_dev
, "vol/%s.plex/%s",
748 VOL
[PLEX
[sd
->plexno
].volno
].name
,
749 plex
->name
, VOL
[plex
->volno
].name
);
753 if (sd
->sd_dev
&& sd
->state
== sd_unallocated
) {
754 destroy_dev(sd
->sd_dev
);
760 made_vol(struct volume
*vol
)
762 if (vol
->vol_dev
== NULL
&& vol
->state
!= volume_unallocated
) {
763 vol
->vol_dev
= make_dev(&vinum_ops
,
764 VINUMDEV(vol
->volno
, 0, 0, VINUM_VOLUME_TYPE
),
765 UID_ROOT
, GID_OPERATOR
, 0640,
766 VINUM_BASE
"vol/%s", vol
->name
);
767 udev_dict_set_cstr(vol
->vol_dev
, "subsystem", "raid");
768 udev_dict_set_cstr(vol
->vol_dev
, "disk-type", "raid");
770 if (vol
->vol_dev
&& vol
->state
== volume_unallocated
) {
771 destroy_dev(vol
->vol_dev
);
777 made_plex(struct plex
*plex
)
779 if (plex
->plex_dev
== NULL
&& plex
->state
!= plex_unallocated
) {
780 plex
->plex_dev
= make_dev(&vinum_ops
, VINUM_PLEX(plex
->plexno
),
781 UID_ROOT
, GID_OPERATOR
, 0640,
782 VINUM_BASE
"plex/%s", plex
->name
);
783 udev_dict_set_cstr(plex
->plex_dev
, "subsystem", "raid");
784 udev_dict_set_cstr(plex
->plex_dev
, "disk-type", "raid");
785 if (plex
->volno
>= 0) {
786 make_dev_alias(plex
->plex_dev
, "vol/%s.plex/%s",
787 plex
->name
, VOL
[plex
->volno
].name
);
790 if (plex
->plex_dev
&& plex
->state
== plex_unallocated
) {
791 destroy_dev(plex
->plex_dev
);
792 plex
->plex_dev
= NULL
;
796 /* Find an empty plex in the plex table */
801 struct plex
*plex
; /* if we allocate one */
803 /* first see if we have one which has been deallocated */
804 for (plexno
= 0; plexno
< vinum_conf
.plexes_allocated
; plexno
++) {
805 if (PLEX
[plexno
].state
== plex_unallocated
) /* bingo */
806 break; /* and get out of here */
809 if (plexno
>= vinum_conf
.plexes_allocated
)
810 EXPAND(PLEX
, struct plex
, vinum_conf
.plexes_allocated
, INITIAL_PLEXES
);
812 /* Found a plex. Give it an sd structure */
813 plex
= &PLEX
[plexno
]; /* this one is ours */
814 bzero(plex
, sizeof(struct plex
)); /* polish it up */
815 plex
->sdnos
= (int *) Malloc(sizeof(int) * INITIAL_SUBDISKS_IN_PLEX
); /* allocate sd table */
816 CHECKALLOC(plex
->sdnos
, "vinum: Can't allocate plex subdisk table");
817 bzero(plex
->sdnos
, (sizeof(int) * INITIAL_SUBDISKS_IN_PLEX
)); /* do we need this? */
818 plex
->flags
|= VF_NEWBORN
; /* newly born plex */
819 plex
->subdisks
= 0; /* no subdisks in use */
820 plex
->subdisks_allocated
= INITIAL_SUBDISKS_IN_PLEX
; /* and we have space for this many */
821 plex
->organization
= plex_disorg
; /* and it's not organized */
822 plex
->volno
= -1; /* no volume yet */
823 return plexno
; /* return the index */
827 * Find the named plex in vinum_conf.plex
829 * If create != 0, create an entry if it doesn't exist
830 * return index in vinum_conf.plex
833 find_plex(const char *name
, int create
)
838 for (plexno
= 0; plexno
< vinum_conf
.plexes_allocated
; plexno
++) {
839 if (strcmp(PLEX
[plexno
].name
, name
) == 0) /* found it */
843 /* the plex isn't in the list. Add it if he wants */
844 if (create
== 0) /* don't want to create */
845 return -1; /* give up */
847 /* Allocate one and insert the name */
848 plexno
= get_empty_plex();
849 plex
= &PLEX
[plexno
]; /* point to it */
850 ksnprintf(plex
->name
, sizeof(plex
->name
), "%s", name
);
851 return plexno
; /* return the pointer */
855 * Free an allocated plex entry
856 * and its associated memory areas
859 free_plex(int plexno
)
863 plex
= &PLEX
[plexno
];
868 plex
->state
= plex_unallocated
;
870 bzero(plex
, sizeof(struct plex
)); /* and clear it out */
871 plex
->state
= plex_unallocated
;
874 /* Find an empty volume in the volume table */
876 get_empty_volume(void)
882 /* first see if we have one which has been deallocated */
883 for (volno
= 0; volno
< vinum_conf
.volumes_allocated
; volno
++) {
884 if (VOL
[volno
].state
== volume_unallocated
) /* bingo */
888 if (volno
>= vinum_conf
.volumes_allocated
)
889 EXPAND(VOL
, struct volume
, vinum_conf
.volumes_allocated
, INITIAL_VOLUMES
);
891 /* Now initialize fields */
893 bzero(vol
, sizeof(struct volume
));
894 vol
->flags
|= VF_NEWBORN
| VF_CREATED
; /* newly born volume */
895 vol
->preferred_plex
= ROUND_ROBIN_READPOL
; /* round robin */
896 for (i
= 0; i
< MAXPLEX
; i
++) /* mark the plexes missing */
898 return volno
; /* return the index */
902 * Find the named volume in vinum_conf.volume.
904 * If create != 0, create an entry if it doesn't exist
905 * return the index in vinum_conf
908 find_volume(const char *name
, int create
)
913 for (volno
= 0; volno
< vinum_conf
.volumes_allocated
; volno
++) {
914 if (strcmp(VOL
[volno
].name
, name
) == 0) /* found it */
918 /* the volume isn't in the list. Add it if he wants */
919 if (create
== 0) /* don't want to create */
920 return -1; /* give up */
922 /* Allocate one and insert the name */
923 volno
= get_empty_volume();
925 ksnprintf(vol
->name
, sizeof(vol
->name
), "%s", name
);
926 vol
->blocksize
= DEV_BSIZE
; /* block size of this volume */
927 return volno
; /* return the pointer */
931 * Free an allocated volume entry
932 * and its associated memory areas
935 free_volume(int volno
)
940 vol
->state
= volume_unallocated
;
942 bzero(vol
, sizeof(struct volume
)); /* and clear it out */
943 vol
->state
= volume_unallocated
;
947 * Handle a drive definition. We store the information in the global variable
948 * drive, so we don't need to allocate.
950 * If we find an error, print a message and return
953 config_drive(int update
)
955 enum drive_label_info partition_status
; /* info about the partition */
957 int driveno
; /* index of drive in vinum_conf */
958 struct drive
*drive
; /* and pointer to it */
959 int otherdriveno
; /* index of possible second drive */
962 if (tokens
< 2) /* not enough tokens */
963 throw_rude_remark(EINVAL
, "Drive has no name\n");
964 driveno
= find_drive(token
[1], 1); /* allocate a drive to initialize */
965 drive
= &DRIVE
[driveno
]; /* and get a pointer */
966 if (update
&& ((drive
->flags
& VF_NEWBORN
) == 0)) /* this drive exists already */
967 return; /* don't do anything */
968 drive
->flags
&= ~VF_NEWBORN
; /* no longer newly born */
970 if (drive
->state
!= drive_referenced
) { /* we already know this drive */
972 * XXX Check which definition is more up-to-date. Give
973 * preference for the definition on its own drive.
977 for (parameter
= 2; parameter
< tokens
; parameter
++) { /* look at the other tokens */
978 switch (get_keyword(token
[parameter
], &keyword_set
)) {
981 otherdriveno
= find_drive_by_dev(token
[parameter
], 0); /* see if it exists already */
982 if (otherdriveno
>= 0) { /* yup, */
983 drive
->state
= drive_unallocated
; /* deallocate the drive */
984 throw_rude_remark(EEXIST
, /* and complain */
985 "Drive %s would have same device as drive %s",
987 DRIVE
[otherdriveno
].label
.name
);
989 if (drive
->devicename
[0] == '/') { /* we know this drive... */
990 if (strcmp(drive
->devicename
, token
[parameter
])) /* different name */
991 close_drive(drive
); /* close it if it's open */
997 * open the device and get the configuration
999 ksnprintf(drive
->devicename
, sizeof(drive
->devicename
),
1000 "%s", token
[parameter
]);
1001 partition_status
= read_drive_label(drive
, 1);
1003 switch (partition_status
) {
1004 case DL_CANT_OPEN
: /* not our kind */
1006 if (drive
->lasterror
== EFTYPE
) /* wrong kind of partition */
1007 throw_rude_remark(drive
->lasterror
,
1008 "Drive %s has invalid partition type",
1010 else /* I/O error of some kind */
1011 throw_rude_remark(drive
->lasterror
,
1012 "Can't initialize drive %s",
1016 case DL_WRONG_DRIVE
: /* valid drive, not the name we expected */
1017 if (vinum_conf
.flags
& VF_FORCECONFIG
) { /* but we'll accept that */
1018 bcopy(token
[1], drive
->label
.name
, sizeof(drive
->label
.name
));
1023 * There's a potential race condition here:
1024 * the rude remark refers to a field in an
1025 * unallocated drive, which potentially could
1026 * be reused. This works because we're the only
1027 * thread accessing the config at the moment.
1029 drive
->state
= drive_unallocated
; /* throw it away completely */
1030 throw_rude_remark(drive
->lasterror
,
1031 "Incorrect drive name %s specified for drive %s",
1036 case DL_DELETED_LABEL
: /* it was a drive, but we deleted it */
1037 case DL_NOT_OURS
: /* nothing to do with the rest */
1042 * read_drive_label overwrites the device name.
1043 * If we get here, we can have the drive,
1044 * so put it back again
1046 ksnprintf(drive
->devicename
, sizeof(drive
->devicename
),
1047 "%s", token
[parameter
]);
1051 parameter
++; /* skip the keyword */
1052 if (vinum_conf
.flags
& VF_READING_CONFIG
)
1053 drive
->state
= DriveState(token
[parameter
]); /* set the state */
1056 case kw_hotspare
: /* this drive is a hot spare */
1057 drive
->flags
|= VF_HOTSPARE
;
1062 throw_rude_remark(EINVAL
,
1063 "Drive %s, invalid keyword: %s",
1069 if (drive
->devicename
[0] != '/') {
1070 drive
->state
= drive_unallocated
; /* deallocate the drive */
1071 throw_rude_remark(EINVAL
, "No device name for %s", drive
->label
.name
);
1073 vinum_conf
.drives_used
++; /* passed all hurdles: one more in use */
1075 * If we're replacing a drive, it could be that
1076 * we already have subdisks referencing this
1077 * drive. Note where they should be and change
1078 * their state to obsolete.
1080 for (sdno
= 0; sdno
< vinum_conf
.subdisks_allocated
; sdno
++) {
1081 if ((SD
[sdno
].state
> sd_referenced
)
1082 && (SD
[sdno
].driveno
== driveno
)) {
1083 give_sd_to_drive(sdno
);
1084 if (SD
[sdno
].state
> sd_stale
)
1085 SD
[sdno
].state
= sd_stale
;
1091 * Handle a subdisk definition. We store the information in the global variable
1092 * sd, so we don't need to allocate.
1094 * If we find an error, print a message and return
1097 config_subdisk(int update
)
1100 int sdno
; /* index of sd in vinum_conf */
1101 struct sd
*sd
; /* and pointer to it */
1103 int detached
= 0; /* set to 1 if this is a detached subdisk */
1104 int sdindex
= -1; /* index in plexes subdisk table */
1105 enum sdstate state
= sd_unallocated
; /* state to set, if specified */
1106 int autosize
= 0; /* set if we autosize in give_sd_to_drive */
1107 int namedsdno
; /* index of another with this name */
1109 sdno
= get_empty_sd(); /* allocate an SD to initialize */
1110 sd
= &SD
[sdno
]; /* and get a pointer */
1112 for (parameter
= 1; parameter
< tokens
; parameter
++) { /* look at the other tokens */
1113 switch (get_keyword(token
[parameter
], &keyword_set
)) {
1115 * If we have a 'name' parameter, it must
1116 * come first, because we're too lazy to tidy
1117 * up dangling refs if it comes later.
1120 namedsdno
= find_subdisk(token
[++parameter
], 0); /* find an existing sd with this name */
1121 if (namedsdno
>= 0) { /* got one */
1122 if (SD
[namedsdno
].state
== sd_referenced
) { /* we've been told about this one */
1124 throw_rude_remark(EINVAL
,
1125 "sd %s: name parameter must come first\n", /* no go */
1129 struct plex
*plex
; /* for tidying up dangling references */
1131 *sd
= SD
[namedsdno
]; /* copy from the referenced one */
1134 SD
[namedsdno
].state
= sd_unallocated
; /* and deallocate the referenced one */
1135 made_sd(&SD
[namedsdno
]);
1136 plex
= &PLEX
[sd
->plexno
]; /* now take a look at our plex */
1137 for (i
= 0; i
< plex
->subdisks
; i
++) { /* look for the pointer */
1138 if (plex
->sdnos
[i
] == namedsdno
) /* pointing to the old subdisk */
1139 plex
->sdnos
[i
] = sdno
; /* bend it to point here */
1143 if (update
) /* are we updating? */
1144 return; /* that's OK, nothing more to do */
1146 throw_rude_remark(EINVAL
, "Duplicate subdisk %s", token
[parameter
]);
1148 ksnprintf(sd
->name
, sizeof(sd
->name
),
1149 "%s", token
[parameter
]);
1158 size
= sizespec(token
[++parameter
]);
1159 if ((size
== -1) /* unallocated */
1160 &&(vinum_conf
.flags
& VF_READING_CONFIG
)) /* reading from disk */
1161 break; /* invalid sd; just ignore it */
1162 if ((size
% DEV_BSIZE
) != 0)
1163 throw_rude_remark(EINVAL
,
1164 "sd %s, bad plex offset alignment: %lld",
1168 sd
->plexoffset
= size
/ DEV_BSIZE
;
1171 case kw_driveoffset
:
1172 size
= sizespec(token
[++parameter
]);
1173 if ((size
== -1) /* unallocated */
1174 &&(vinum_conf
.flags
& VF_READING_CONFIG
)) /* reading from disk */
1175 break; /* invalid sd; just ignore it */
1176 if ((size
% DEV_BSIZE
) != 0)
1177 throw_rude_remark(EINVAL
,
1178 "sd %s, bad drive offset alignment: %lld",
1182 sd
->driveoffset
= size
/ DEV_BSIZE
;
1186 if (get_keyword(token
[++parameter
], &keyword_set
) == kw_max
) /* select maximum size from drive */
1187 size
= 0; /* this is how we say it :-) */
1189 size
= sizespec(token
[parameter
]);
1190 if ((size
% DEV_BSIZE
) != 0)
1191 throw_rude_remark(EINVAL
, "sd %s, length %jd not multiple of sector size", sd
->name
, (intmax_t)size
);
1193 sd
->sectors
= size
/ DEV_BSIZE
;
1195 * We have a problem with autosizing: we need to
1196 * give the drive to the plex before we give it
1197 * to the drive, in order to be clean if we give
1198 * up in the middle, but at this time the size hasn't
1199 * been set. Note that we have to fix up after
1200 * giving the subdisk to the drive.
1203 autosize
= 1; /* note that we're autosizing */
1207 sd
->driveno
= find_drive(token
[++parameter
], 1); /* insert drive information */
1211 sd
->plexno
= find_plex(token
[++parameter
], 1); /* insert plex information */
1215 * Set the state. We can't do this directly,
1216 * because give_sd_to_plex may change it
1219 parameter
++; /* skip the keyword */
1220 if (vinum_conf
.flags
& VF_READING_CONFIG
)
1221 state
= SdState(token
[parameter
]); /* set the state */
1225 parameter
++; /* skip the keyword */
1226 if ((strlen(token
[parameter
]) != 1)
1227 || (token
[parameter
][0] < 'a')
1228 || (token
[parameter
][0] > 'p'))
1229 throw_rude_remark(EINVAL
,
1230 "%s: invalid partition %c",
1232 token
[parameter
][0]);
1235 case kw_retryerrors
:
1236 sd
->flags
|= VF_RETRYERRORS
;
1240 throw_rude_remark(EINVAL
, "%s: invalid keyword: %s", sd
->name
, token
[parameter
]);
1244 /* Check we have a drive name */
1245 if (sd
->driveno
< 0) { /* didn't specify a drive */
1246 sd
->driveno
= current_drive
; /* set to the current drive */
1247 if (sd
->driveno
< 0) /* no current drive? */
1248 throw_rude_remark(EINVAL
, "Subdisk %s is not associated with a drive", sd
->name
);
1251 * This is tacky. If something goes wrong
1252 * with the checks, we may end up losing drive
1255 if (autosize
!= 0) /* need to find a size, */
1256 give_sd_to_drive(sdno
); /* do it before the plex */
1258 /* Check for a plex name */
1259 if ((sd
->plexno
< 0) /* didn't specify a plex */
1260 &&(!detached
)) /* and didn't say not to, */
1261 sd
->plexno
= current_plex
; /* set to the current plex */
1263 if (sd
->plexno
>= 0)
1264 sdindex
= give_sd_to_plex(sd
->plexno
, sdno
); /* now tell the plex that it has this sd */
1266 sd
->sdno
= sdno
; /* point to our entry in the table */
1268 /* Does the subdisk have a name? If not, give it one */
1269 if (sd
->name
[0] == '\0') { /* no name */
1270 char sdsuffix
[8]; /* form sd name suffix here */
1272 /* Do we have a plex name? */
1273 if (sdindex
>= 0) /* we have a plex */
1274 strcpy(sd
->name
, PLEX
[sd
->plexno
].name
); /* take it from there */
1276 throw_rude_remark(EINVAL
, "Unnamed sd is not associated with a plex");
1277 ksprintf(sdsuffix
, ".s%d", sdindex
); /* form the suffix */
1278 strcat(sd
->name
, sdsuffix
); /* and add it to the name */
1280 /* do we have complete info for this subdisk? */
1281 if (sd
->sectors
< 0)
1282 throw_rude_remark(EINVAL
, "sd %s has no length spec", sd
->name
);
1284 if (state
!= sd_unallocated
) { /* we had a specific state to set */
1285 sd
->state
= state
; /* do it now */
1287 } else if (sd
->state
== sd_unallocated
) { /* no, nothing set yet, */
1288 sd
->state
= sd_empty
; /* must be empty */
1291 if (autosize
== 0) /* no autoconfig, do the drive now */
1292 give_sd_to_drive(sdno
);
1293 vinum_conf
.subdisks_used
++; /* one more in use */
1297 * Handle a plex definition.
1300 config_plex(int update
)
1303 int plexno
; /* index of plex in vinum_conf */
1304 struct plex
*plex
; /* and pointer to it */
1305 int pindex
= MAXPLEX
; /* index in volume's plex list */
1306 int detached
= 0; /* don't give it to a volume */
1308 enum plexstate state
= plex_init
; /* state to set at end */
1310 current_plex
= -1; /* forget the previous plex */
1311 plexno
= get_empty_plex(); /* allocate a plex */
1312 plex
= &PLEX
[plexno
]; /* and point to it */
1313 plex
->plexno
= plexno
; /* and back to the config */
1315 for (parameter
= 1; parameter
< tokens
; parameter
++) { /* look at the other tokens */
1316 switch (get_keyword(token
[parameter
], &keyword_set
)) {
1318 * If we have a 'name' parameter, it must
1319 * come first, because we're too lazy to tidy
1320 * up dangling refs if it comes later.
1323 namedplexno
= find_plex(token
[++parameter
], 0); /* find an existing plex with this name */
1324 if (namedplexno
>= 0) { /* plex exists already, */
1325 if (PLEX
[namedplexno
].state
== plex_referenced
) { /* we've been told about this one */
1326 if (parameter
> 2) /* we've done other things first, */
1327 throw_rude_remark(EINVAL
,
1328 "plex %s: name parameter must come first\n", /* no go */
1332 struct volume
*vol
; /* for tidying up dangling references */
1334 *plex
= PLEX
[namedplexno
]; /* get the info */
1335 plex
->plex_dev
= NULL
;
1337 PLEX
[namedplexno
].state
= plex_unallocated
; /* and deallocate the other one */
1338 made_plex(&PLEX
[namedplexno
]);
1339 vol
= &VOL
[plex
->volno
]; /* point to the volume */
1340 for (i
= 0; i
< MAXPLEX
; i
++) { /* for each plex */
1341 if (vol
->plex
[i
] == namedplexno
)
1342 vol
->plex
[i
] = plexno
; /* bend the pointer */
1345 break; /* use this one */
1347 if (update
) /* are we updating? */
1348 return; /* yes: that's OK, just return */
1350 throw_rude_remark(EINVAL
, "Duplicate plex %s", token
[parameter
]);
1352 ksnprintf(plex
->name
, sizeof(plex
->name
),
1353 "%s", token
[parameter
]);
1361 case kw_org
: /* plex organization */
1362 switch (get_keyword(token
[++parameter
], &keyword_set
)) {
1364 plex
->organization
= plex_concat
;
1369 int stripesize
= sizespec(token
[++parameter
]);
1371 plex
->organization
= plex_striped
;
1372 if (stripesize
% DEV_BSIZE
!= 0) /* not a multiple of block size, */
1373 throw_rude_remark(EINVAL
, "plex %s: stripe size %d not a multiple of sector size",
1377 plex
->stripesize
= stripesize
/ DEV_BSIZE
;
1383 int stripesize
= sizespec(token
[++parameter
]);
1385 plex
->organization
= plex_raid4
;
1386 if (stripesize
% DEV_BSIZE
!= 0) /* not a multiple of block size, */
1387 throw_rude_remark(EINVAL
, "plex %s: stripe size %d not a multiple of sector size",
1391 plex
->stripesize
= stripesize
/ DEV_BSIZE
;
1397 int stripesize
= sizespec(token
[++parameter
]);
1399 plex
->organization
= plex_raid5
;
1400 if (stripesize
% DEV_BSIZE
!= 0) /* not a multiple of block size, */
1401 throw_rude_remark(EINVAL
, "plex %s: stripe size %d not a multiple of sector size",
1405 plex
->stripesize
= stripesize
/ DEV_BSIZE
;
1410 throw_rude_remark(EINVAL
, "Invalid plex organization");
1413 && (plex
->stripesize
== 0)) /* didn't specify a valid stripe size */
1414 throw_rude_remark(EINVAL
, "Need a stripe size parameter");
1418 plex
->volno
= find_volume(token
[++parameter
], 1); /* insert a pointer to the volume */
1421 case kw_sd
: /* add a subdisk */
1425 sdno
= find_subdisk(token
[++parameter
], 1); /* find a subdisk */
1426 SD
[sdno
].plexoffset
= sizespec(token
[++parameter
]); /* get the offset */
1427 give_sd_to_plex(plexno
, sdno
); /* and insert it there */
1432 parameter
++; /* skip the keyword */
1433 if (vinum_conf
.flags
& VF_READING_CONFIG
)
1434 state
= PlexState(token
[parameter
]); /* set the state */
1438 throw_rude_remark(EINVAL
, "plex %s, invalid keyword: %s",
1444 if (plex
->organization
== plex_disorg
)
1445 throw_rude_remark(EINVAL
, "No plex organization specified");
1447 if ((plex
->volno
< 0) /* we don't have a volume */
1448 &&(!detached
)) /* and we wouldn't object */
1449 plex
->volno
= current_volume
;
1451 if (plex
->volno
>= 0)
1452 pindex
= give_plex_to_volume(plex
->volno
, plexno
); /* Now tell the volume that it has this plex */
1454 /* Does the plex have a name? If not, give it one */
1455 if (plex
->name
[0] == '\0') { /* no name */
1456 char plexsuffix
[8]; /* form plex name suffix here */
1457 /* Do we have a volume name? */
1458 if (plex
->volno
>= 0) /* we have a volume */
1459 strcpy(plex
->name
, /* take it from there */
1460 VOL
[plex
->volno
].name
);
1462 throw_rude_remark(EINVAL
, "Unnamed plex is not associated with a volume");
1463 ksprintf(plexsuffix
, ".p%d", pindex
); /* form the suffix */
1464 strcat(plex
->name
, plexsuffix
); /* and add it to the name */
1466 if (isstriped(plex
)) {
1467 plex
->lock
= (struct rangelock
*)
1468 Malloc(PLEX_LOCKS
* sizeof(struct rangelock
));
1469 CHECKALLOC(plex
->lock
, "vinum: Can't allocate lock table\n");
1470 bzero((char *) plex
->lock
, PLEX_LOCKS
* sizeof(struct rangelock
));
1472 /* Note the last plex we configured */
1473 current_plex
= plexno
;
1474 plex
->state
= state
; /* set whatever state we chose */
1476 vinum_conf
.plexes_used
++; /* one more in use */
1480 * Handle a volume definition.
1481 * If we find an error, print a message, deallocate the nascent volume, and return
1484 config_volume(int update
)
1488 struct volume
*vol
; /* collect volume info here */
1491 if (tokens
< 2) /* not enough tokens */
1492 throw_rude_remark(EINVAL
, "Volume has no name");
1493 current_volume
= -1; /* forget the previous volume */
1494 volno
= find_volume(token
[1], 1); /* allocate a volume to initialize */
1495 vol
= &VOL
[volno
]; /* and get a pointer */
1496 if (update
&& ((vol
->flags
& VF_CREATED
) == 0)) /* this volume exists already */
1497 return; /* don't do anything */
1498 vol
->flags
&= ~VF_CREATED
; /* it exists now */
1500 for (parameter
= 2; parameter
< tokens
; parameter
++) { /* look at all tokens */
1501 switch (get_keyword(token
[parameter
], &keyword_set
)) {
1504 int plexno
; /* index of this plex */
1505 int myplexno
; /* and index if it's already ours */
1507 plexno
= find_plex(token
[++parameter
], 1); /* find a plex */
1508 if (plexno
< 0) /* couldn't */
1509 break; /* we've already had an error message */
1510 myplexno
= my_plex(volno
, plexno
); /* does it already belong to us? */
1511 if (myplexno
> 0) /* yes, shouldn't get it again */
1512 throw_rude_remark(EINVAL
,
1513 "Plex %s already belongs to volume %s",
1516 else if (vol
->plexes
+ 1 > 8) /* another entry */
1517 throw_rude_remark(EINVAL
,
1518 "Too many plexes for volume %s",
1520 vol
->plex
[vol
->plexes
] = plexno
;
1522 PLEX
[plexno
].state
= plex_referenced
; /* we know something about it */
1523 PLEX
[plexno
].volno
= volno
; /* and this volume references it */
1528 switch (get_keyword(token
[++parameter
], &keyword_set
)) { /* decide what to do */
1530 vol
->preferred_plex
= ROUND_ROBIN_READPOL
; /* default */
1535 int myplexno
; /* index of this plex */
1537 myplexno
= find_plex(token
[++parameter
], 1); /* find a plex */
1538 if (myplexno
< 0) /* couldn't */
1539 break; /* we've already had an error message */
1540 myplexno
= my_plex(volno
, myplexno
); /* does it already belong to us? */
1541 if (myplexno
> 0) /* yes */
1542 vol
->preferred_plex
= myplexno
; /* just note the index */
1543 else if (++vol
->plexes
> 8) /* another entry */
1544 throw_rude_remark(EINVAL
, "Too many plexes");
1545 else { /* space for the new plex */
1546 vol
->plex
[vol
->plexes
- 1] = myplexno
; /* add it to our list */
1547 vol
->preferred_plex
= vol
->plexes
- 1; /* and note the index */
1553 throw_rude_remark(EINVAL
, "Invalid read policy");
1557 vol
->flags
|= VF_CONFIG_SETUPSTATE
; /* set the volume up later on */
1561 parameter
++; /* skip the keyword */
1562 if (vinum_conf
.flags
& VF_READING_CONFIG
) {
1563 vol
->state
= VolState(token
[parameter
]); /* set the state */
1564 vol
->volno
= volno
; /* needs correct volno to make devs */
1570 * XXX experimental ideas. These are not
1571 * documented, and will not be until I
1572 * decide they're worth keeping
1574 case kw_writethrough
: /* set writethrough mode */
1575 vol
->flags
|= VF_WRITETHROUGH
;
1578 case kw_writeback
: /* set writeback mode */
1579 vol
->flags
&= ~VF_WRITETHROUGH
;
1583 vol
->flags
|= VF_RAW
; /* raw volume (no label) */
1587 throw_rude_remark(EINVAL
, "volume %s, invalid keyword: %s",
1592 current_volume
= volno
; /* note last referred volume */
1593 vol
->volno
= volno
; /* also note in volume */
1596 * Before we can actually use the volume, we need
1597 * a volume label. We could start to fake one here,
1598 * but it will be a lot easier when we have some
1599 * to copy from the drives, so defer it until we
1600 * set up the configuration. XXX
1602 if (vol
->state
== volume_unallocated
) {
1603 vol
->state
= volume_down
; /* now ready to bring up at the end */
1607 /* Find out how big our volume is */
1608 for (i
= 0; i
< vol
->plexes
; i
++)
1609 vol
->size
= u64max(vol
->size
, PLEX
[vol
->plex
[i
]].length
);
1610 vinum_conf
.volumes_used
++; /* one more in use */
1614 * Parse a config entry. CARE! This destroys the original contents of the
1615 * config entry, which we don't really need after this. More specifically, it
1616 * places \0 characters at the end of each token.
1618 * Return 0 if all is well, otherwise EINVAL for invalid keyword,
1619 * or ENOENT if 'read' command doesn't find any drives.
1622 parse_config(char *cptr
, struct keywordset
*keyset
, int update
)
1626 status
= 0; /* until proven otherwise */
1627 tokens
= tokenize(cptr
, token
); /* chop up into tokens */
1629 if (tokens
<= 0) /* screwed up or empty line */
1630 return tokens
; /* give up */
1632 if (token
[0][0] == '#') /* comment line */
1635 switch (get_keyword(token
[0], keyset
)) { /* decide what to do */
1636 case kw_read
: /* read config from a specified drive */
1637 status
= vinum_scandisk(&token
[1], tokens
- 1); /* read the config from disk */
1641 config_drive(update
);
1645 config_subdisk(update
);
1649 config_plex(update
);
1653 config_volume(update
);
1656 /* Anything else is invalid in this context */
1658 throw_rude_remark(EINVAL
, /* should we die? */
1659 "Invalid configuration information: %s",
1666 * parse a line handed in from userland via ioctl.
1667 * This differs only by the error reporting mechanism:
1668 * we return the error indication in the reply to the
1669 * ioctl, so we need to set a global static pointer in
1670 * this file. This technique works because we have
1671 * ensured that configuration is performed in a single-
1675 parse_user_config(char *cptr
, struct keywordset
*keyset
)
1679 ioctl_reply
= (struct _ioctl_reply
*) cptr
;
1680 status
= parse_config(cptr
, keyset
, 0);
1681 if (status
== ENOENT
) /* from scandisk, but it can't tell us */
1682 strcpy(ioctl_reply
->msg
, "no drives found");
1683 ioctl_reply
= NULL
; /* don't do this again */
1687 /* Remove an object */
1689 remove(struct vinum_ioctl_msg
*msg
)
1691 struct vinum_ioctl_msg message
= *msg
; /* make a copy to hand on */
1693 ioctl_reply
= (struct _ioctl_reply
*) msg
; /* reinstate the address to reply to */
1694 ioctl_reply
->error
= 0; /* no error, */
1695 ioctl_reply
->msg
[0] = '\0'; /* no message */
1697 switch (message
.type
) {
1699 remove_drive_entry(message
.index
, message
.force
);
1704 remove_sd_entry(message
.index
, message
.force
, message
.recurse
);
1709 remove_plex_entry(message
.index
, message
.force
, message
.recurse
);
1714 remove_volume_entry(message
.index
, message
.force
, message
.recurse
);
1719 ioctl_reply
->error
= EINVAL
;
1720 strcpy(ioctl_reply
->msg
, "Invalid object type");
1724 /* Remove a drive. */
1726 remove_drive_entry(int driveno
, int force
)
1728 struct drive
*drive
= &DRIVE
[driveno
];
1731 if ((driveno
> vinum_conf
.drives_allocated
) /* not a valid drive */
1732 ||(drive
->state
== drive_unallocated
)) { /* or nothing there */
1733 ioctl_reply
->error
= EINVAL
;
1734 strcpy(ioctl_reply
->msg
, "No such drive");
1735 } else if (drive
->opencount
> 0) { /* we have subdisks */
1736 if (force
) { /* do it at any cost */
1737 for (sdno
= 0; sdno
< vinum_conf
.subdisks_allocated
; sdno
++) {
1738 if ((SD
[sdno
].state
!= sd_unallocated
) /* subdisk is allocated */
1739 &&(SD
[sdno
].driveno
== driveno
)) /* and it belongs to this drive */
1740 remove_sd_entry(sdno
, force
, 0);
1742 remove_drive(driveno
); /* now remove it */
1743 vinum_conf
.drives_used
--; /* one less drive */
1745 ioctl_reply
->error
= EBUSY
; /* can't do that */
1747 remove_drive(driveno
); /* just remove it */
1748 vinum_conf
.drives_used
--; /* one less drive */
1752 /* remove a subdisk */
1754 remove_sd_entry(int sdno
, int force
, int recurse
)
1756 struct sd
*sd
= &SD
[sdno
];
1758 if ((sdno
> vinum_conf
.subdisks_allocated
) /* not a valid sd */
1759 ||(sd
->state
== sd_unallocated
)) { /* or nothing there */
1760 ioctl_reply
->error
= EINVAL
;
1761 strcpy(ioctl_reply
->msg
, "No such subdisk");
1762 } else if (sd
->flags
& VF_OPEN
) { /* we're open */
1763 ioctl_reply
->error
= EBUSY
; /* no getting around that */
1765 } else if (sd
->plexno
>= 0) { /* we have a plex */
1766 if (force
) { /* do it at any cost */
1767 struct plex
*plex
= &PLEX
[sd
->plexno
]; /* point to our plex */
1770 for (mysdno
= 0; /* look for ourselves */
1771 mysdno
< plex
->subdisks
&& &SD
[plex
->sdnos
[mysdno
]] != sd
;
1773 if (mysdno
== plex
->subdisks
) /* didn't find it */
1775 "Error removing subdisk %s: not found in plex %s\n",
1778 else { /* remove the subdisk from plex */
1779 if (mysdno
< (plex
->subdisks
- 1)) /* not the last subdisk */
1780 bcopy(&plex
->sdnos
[mysdno
+ 1],
1781 &plex
->sdnos
[mysdno
],
1782 (plex
->subdisks
- 1 - mysdno
) * sizeof(int));
1784 sd
->plexno
= -1; /* disown the subdisk */
1788 * Removing a subdisk from a striped or
1789 * RAID-4 or RAID-5 plex really tears the
1790 * hell out of the structure, and it needs
1791 * to be reinitialized.
1793 if (plex
->organization
!= plex_concat
) /* not concatenated, */
1794 set_plex_state(plex
->plexno
, plex_faulty
, setstate_force
); /* need to reinitialize */
1795 log(LOG_INFO
, "vinum: removing %s\n", sd
->name
);
1798 ioctl_reply
->error
= EBUSY
; /* can't do that */
1800 log(LOG_INFO
, "vinum: removing %s\n", sd
->name
);
1807 remove_plex_entry(int plexno
, int force
, int recurse
)
1809 struct plex
*plex
= &PLEX
[plexno
];
1812 if ((plexno
> vinum_conf
.plexes_allocated
) /* not a valid plex */
1813 ||(plex
->state
== plex_unallocated
)) { /* or nothing there */
1814 ioctl_reply
->error
= EINVAL
;
1815 strcpy(ioctl_reply
->msg
, "No such plex");
1816 } else if (plex
->flags
& VF_OPEN
) { /* we're open */
1817 ioctl_reply
->error
= EBUSY
; /* no getting around that */
1820 if (plex
->subdisks
) {
1821 if (force
) { /* do it anyway */
1822 if (recurse
) { /* remove all below */
1823 int sds
= plex
->subdisks
;
1824 for (sdno
= 0; sdno
< sds
; sdno
++)
1825 free_sd(plex
->sdnos
[sdno
]); /* free all subdisks */
1826 } else { /* just tear them out */
1827 int sds
= plex
->subdisks
;
1828 for (sdno
= 0; sdno
< sds
; sdno
++)
1829 SD
[plex
->sdnos
[sdno
]].plexno
= -1; /* no plex any more */
1831 } else { /* can't do it without force */
1832 ioctl_reply
->error
= EBUSY
; /* can't do that */
1836 if (plex
->volno
>= 0) { /* we are part of a volume */
1837 if (force
) { /* do it at any cost */
1838 struct volume
*vol
= &VOL
[plex
->volno
];
1841 for (myplexno
= 0; myplexno
< vol
->plexes
; myplexno
++)
1842 if (vol
->plex
[myplexno
] == plexno
) /* found it */
1844 if (myplexno
== vol
->plexes
) /* didn't find it. Huh? */
1846 "Error removing plex %s: not found in volume %s\n",
1849 if (myplexno
< (vol
->plexes
- 1)) /* not the last plex in the list */
1850 bcopy(&vol
->plex
[myplexno
+ 1],
1851 &vol
->plex
[myplexno
],
1852 vol
->plexes
- 1 - myplexno
);
1855 ioctl_reply
->error
= EBUSY
; /* can't do that */
1859 log(LOG_INFO
, "vinum: removing %s\n", plex
->name
);
1861 vinum_conf
.plexes_used
--; /* one less plex */
1864 /* remove a volume */
1866 remove_volume_entry(int volno
, int force
, int recurse
)
1868 struct volume
*vol
= &VOL
[volno
];
1871 if ((volno
> vinum_conf
.volumes_allocated
) /* not a valid volume */
1872 ||(vol
->state
== volume_unallocated
)) { /* or nothing there */
1873 ioctl_reply
->error
= EINVAL
;
1874 strcpy(ioctl_reply
->msg
, "No such volume");
1875 } else if (vol
->flags
& VF_OPEN
) /* we're open */
1876 ioctl_reply
->error
= EBUSY
; /* no getting around that */
1877 else if (vol
->plexes
) {
1878 if (recurse
&& force
) { /* remove all below */
1879 int plexes
= vol
->plexes
;
1881 /* for (plexno = plexes - 1; plexno >= 0; plexno--) */
1882 for (plexno
= 0; plexno
< plexes
; plexno
++)
1883 remove_plex_entry(vol
->plex
[plexno
], force
, recurse
);
1884 log(LOG_INFO
, "vinum: removing %s\n", vol
->name
);
1886 vinum_conf
.volumes_used
--; /* one less volume */
1888 ioctl_reply
->error
= EBUSY
; /* can't do that */
1890 log(LOG_INFO
, "vinum: removing %s\n", vol
->name
);
1892 vinum_conf
.volumes_used
--; /* one less volume */
1896 /* Currently called only from ioctl */
1898 update_sd_config(int sdno
, int diskconfig
)
1901 set_sd_state(sdno
, sd_up
, setstate_configuring
);
1902 SD
[sdno
].flags
&= ~VF_NEWBORN
;
1906 update_plex_config(int plexno
, int diskconfig
)
1910 struct plex
*plex
= &PLEX
[plexno
];
1911 int remainder
; /* size of fractional stripe at end */
1912 int added_plex
; /* set if we add a plex to a volume */
1913 int required_sds
; /* number of subdisks we need */
1916 int data_sds
= 0; /* number of sds carrying data */
1918 if (plex
->state
< plex_init
) /* not a real plex, */
1921 if (plex
->volno
>= 0) { /* we have a volume */
1922 vol
= &VOL
[plex
->volno
];
1925 * If we're newly born,
1926 * and the volume isn't,
1927 * and it has other plexes,
1928 * and we didn't read this mess from disk,
1929 * we were added later.
1931 if ((plex
->flags
& VF_NEWBORN
)
1932 && ((vol
->flags
& VF_NEWBORN
) == 0)
1933 && (vol
->plexes
> 0)
1934 && (diskconfig
== 0)) {
1939 * Check that our subdisks make sense. For
1940 * striped, RAID-4 and RAID-5 plexes, we need at
1941 * least two subdisks, and they must all be the
1944 if (plex
->organization
== plex_striped
) {
1945 data_sds
= plex
->subdisks
;
1947 } else if (isparity(plex
)) { /* RAID 4 or 5 */
1948 data_sds
= plex
->subdisks
- 1;
1952 if (required_sds
> 0) { /* striped, RAID-4 or RAID-5 */
1953 if (plex
->subdisks
< required_sds
) {
1955 "vinum: plex %s does not have at least %d subdisks\n",
1960 * Now see if the plex size is a multiple of
1961 * the stripe size. If not, trim off the end
1962 * of each subdisk and return it to the drive.
1964 if (plex
->length
> 0) {
1966 if (plex
->stripesize
> 0) {
1967 remainder
= (int) (plex
->length
/* are we exact? */
1968 % ((u_int64_t
) plex
->stripesize
* data_sds
));
1969 if (remainder
) { /* no */
1970 log(LOG_INFO
, "vinum: removing %d blocks of partial stripe at the end of %s\n",
1973 plex
->length
-= remainder
; /* shorten the plex */
1974 remainder
/= data_sds
; /* spread the remainder amongst the sds */
1975 for (sdno
= 0; sdno
< plex
->subdisks
; sdno
++) {
1976 sd
= &SD
[plex
->sdnos
[sdno
]]; /* point to the subdisk */
1977 return_drive_space(sd
->driveno
, /* return the space */
1978 sd
->driveoffset
+ sd
->sectors
- remainder
,
1980 sd
->sectors
-= remainder
; /* and shorten it */
1983 } else /* no data sds, */
1984 plex
->length
= 0; /* reset length */
1989 for (sdno
= 0; sdno
< plex
->subdisks
; sdno
++) {
1990 sd
= &SD
[plex
->sdnos
[sdno
]];
1993 && (sd
->sectors
!= SD
[plex
->sdnos
[sdno
- 1]].sectors
)) {
1994 log(LOG_ERR
, "vinum: %s must have equal sized subdisks\n", plex
->name
);
1996 size
+= sd
->sectors
;
1997 if (added_plex
) { /* we were added later */
1998 sd
->state
= sd_stale
; /* stale until proven otherwise */
2003 if (plex
->subdisks
) { /* plex has subdisks, calculate size */
2005 * XXX We shouldn't need to calculate the size any
2006 * more. Check this some time
2009 size
= size
/ plex
->subdisks
* (plex
->subdisks
- 1); /* less space for RAID-4 and RAID-5 */
2010 if (plex
->length
!= size
)
2012 "Correcting length of %s: was %lld, is %lld\n",
2014 (long long) plex
->length
,
2016 plex
->length
= size
;
2017 } else { /* no subdisks, */
2018 plex
->length
= 0; /* no size */
2020 update_plex_state(plexno
); /* set the state */
2021 plex
->flags
&= ~VF_NEWBORN
;
2025 update_volume_config(int volno
, int diskconfig
)
2027 struct volume
*vol
= &VOL
[volno
];
2031 if (vol
->state
!= volume_unallocated
)
2033 * Recalculate the size of the volume,
2034 * which might change if the original
2035 * plexes were not a multiple of the
2040 for (plexno
= 0; plexno
< vol
->plexes
; plexno
++) {
2041 plex
= &PLEX
[vol
->plex
[plexno
]];
2042 vol
->size
= u64max(plex
->length
, vol
->size
);
2043 plex
->volplexno
= plexno
; /* note it in the plex */
2046 vol
->flags
&= ~VF_NEWBORN
; /* no longer newly born */
2050 * Update the global configuration.
2051 * diskconfig is != 0 if we're reading in a config
2052 * from disk. In this case, we don't try to
2053 * bring the devices up, though we will bring
2054 * them down if there's some error which got
2055 * missed when writing to disk.
2058 updateconfig(int diskconfig
)
2063 for (plexno
= 0; plexno
< vinum_conf
.plexes_allocated
; plexno
++)
2064 update_plex_config(plexno
, diskconfig
);
2066 for (volno
= 0; volno
< vinum_conf
.volumes_allocated
; volno
++) {
2067 if (VOL
[volno
].state
> volume_uninit
) {
2068 VOL
[volno
].flags
&= ~VF_CONFIG_SETUPSTATE
; /* no more setupstate */
2069 update_volume_state(volno
);
2070 update_volume_config(volno
, diskconfig
);
2077 * Start manual changes to the configuration and lock out
2078 * others who may wish to do so.
2079 * XXX why do we need this and lock_config too?
2082 start_config(int force
)
2086 current_drive
= -1; /* note the last drive we mention, for
2088 current_plex
= -1; /* and the same for the last plex */
2089 current_volume
= -1; /* and the last volume */
2090 while ((vinum_conf
.flags
& VF_CONFIGURING
) != 0) {
2091 vinum_conf
.flags
|= VF_WILL_CONFIGURE
;
2092 if ((error
= tsleep(&vinum_conf
, PCATCH
, "vincfg", 0)) != 0)
2096 * We need two flags here: VF_CONFIGURING
2097 * tells other processes to hold off (this
2098 * function), and VF_CONFIG_INCOMPLETE
2099 * tells the state change routines not to
2100 * propagate incrememntal state changes
2102 vinum_conf
.flags
|= VF_CONFIGURING
| VF_CONFIG_INCOMPLETE
;
2104 vinum_conf
.flags
|= VF_FORCECONFIG
; /* overwrite differently named drives */
2105 current_drive
= -1; /* reset the defaults */
2106 current_plex
= -1; /* and the same for the last plex */
2107 current_volume
= -1; /* and the last volme */
2112 * Update the config if update is 1, and unlock
2113 * it. We won't update the configuration if we
2114 * are called in a recursive loop via throw_rude_remark.
2117 finish_config(int update
)
2119 /* we've finished our config */
2120 vinum_conf
.flags
&= ~(VF_CONFIG_INCOMPLETE
| VF_READING_CONFIG
| VF_FORCECONFIG
);
2122 updateconfig(0); /* so update things */
2124 updateconfig(1); /* do some updates only */
2125 vinum_conf
.flags
&= ~VF_CONFIGURING
; /* and now other people can take a turn */
2126 if ((vinum_conf
.flags
& VF_WILL_CONFIGURE
) != 0) {
2127 vinum_conf
.flags
&= ~VF_WILL_CONFIGURE
;
2128 wakeup_one(&vinum_conf
);