kernel - kqueue - major refactoring
[dragonfly.git] / sys / kern / subr_diskmbr.c
blobaf63076188f692f068b89be153165e9a7a1b32cb
1 /*-
2 * Copyright (c) 1994 Bruce D. Evans.
3 * All rights reserved.
5 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 * from: @(#)ufs_disksubr.c 7.16 (Berkeley) 5/4/91
37 * from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
38 * $FreeBSD: src/sys/kern/subr_diskmbr.c,v 1.45 2000/01/28 10:22:07 bde Exp $
39 * $DragonFly: src/sys/kern/subr_diskmbr.c,v 1.26 2007/06/19 06:07:57 dillon Exp $
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/buf.h>
45 #include <sys/conf.h>
46 #include <sys/diskslice.h>
47 #define DOSPTYP_EXTENDED 5
48 #define DOSPTYP_EXTENDEDX 15
49 #define DOSPTYP_ONTRACK 84
50 #include <sys/diskslice.h>
51 #include <sys/diskmbr.h>
52 #include <sys/disk.h>
53 #include <sys/malloc.h>
54 #include <sys/syslog.h>
55 #include <sys/device.h>
57 #define TRACE(str) do { if (dsi_debug) kprintf str; } while (0)
59 static volatile u_char dsi_debug;
62 * This is what we have embedded in every boot1 for supporting the bogus
63 * "Dangerously Dedicated" mode. However, the old table is broken because
64 * it has an illegal geometry in it - it specifies 256 heads (heads = end
65 * head + 1) which causes nasty stuff when that wraps to zero in bios code.
66 * eg: divide by zero etc. This caused the dead-thinkpad problem, numerous
67 * SCSI bios crashes, EFI to crash, etc.
69 * We still have to recognize the old table though, even though we stopped
70 * inflicting it upon the world.
72 static struct dos_partition historical_bogus_partition_table[NDOSPART] = {
73 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
74 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
75 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
76 { 0x80, 0, 1, 0, DOSPTYP_386BSD, 255, 255, 255, 0, 50000, },
78 static struct dos_partition historical_bogus_partition_table_fixed[NDOSPART] = {
79 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
80 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
82 { 0x80, 0, 1, 0, DOSPTYP_386BSD, 254, 255, 255, 0, 50000, },
85 static int check_part (char *sname, struct dos_partition *dp,
86 u_int64_t offset, int nsectors, int ntracks,
87 u_int64_t mbr_offset);
88 static void mbr_extended (cdev_t dev, struct disk_info *info,
89 struct diskslices *ssp, u_int64_t ext_offset,
90 u_int64_t ext_size, u_int64_t base_ext_offset,
91 int nsectors, int ntracks, u_int64_t mbr_offset,
92 int level);
93 static int mbr_setslice (char *sname, struct disk_info *info,
94 struct diskslice *sp, struct dos_partition *dp,
95 u_int64_t br_offset);
98 int
99 mbrinit(cdev_t dev, struct disk_info *info, struct diskslices **sspp)
101 struct buf *bp;
102 u_char *cp;
103 int dospart;
104 struct dos_partition *dp;
105 struct dos_partition *dp0;
106 struct dos_partition dpcopy[NDOSPART];
107 int error;
108 int max_ncyls;
109 int max_nsectors;
110 int max_ntracks;
111 u_int64_t mbr_offset;
112 char partname[2];
113 u_long secpercyl;
114 char *sname = "tempname";
115 struct diskslice *sp;
116 struct diskslices *ssp;
117 cdev_t wdev;
119 mbr_offset = DOSBBSECTOR;
120 reread_mbr:
122 * Don't bother if the block size is weird or the
123 * media size is 0 (probably means no media present).
125 if (info->d_media_blksize & DEV_BMASK)
126 return (EIO);
127 if (info->d_media_size == 0)
128 return (EIO);
131 * Read master boot record.
133 wdev = dev;
134 bp = geteblk((int)info->d_media_blksize);
135 bp->b_bio1.bio_offset = (off_t)mbr_offset * info->d_media_blksize;
136 bp->b_bio1.bio_done = biodone_sync;
137 bp->b_bio1.bio_flags |= BIO_SYNC;
138 bp->b_bcount = info->d_media_blksize;
139 bp->b_cmd = BUF_CMD_READ;
140 dev_dstrategy(wdev, &bp->b_bio1);
141 if (biowait(&bp->b_bio1, "mbrrd") != 0) {
142 if ((info->d_dsflags & DSO_MBRQUIET) == 0) {
143 diskerr(&bp->b_bio1, wdev,
144 "reading primary partition table: error",
145 LOG_PRINTF, 0);
146 kprintf("\n");
148 error = EIO;
149 goto done;
152 /* Weakly verify it. */
153 cp = bp->b_data;
154 sname = dsname(dev, 0, 0, 0, NULL);
155 if (cp[0x1FE] != 0x55 || cp[0x1FF] != 0xAA) {
156 if (bootverbose)
157 kprintf("%s: invalid primary partition table: no magic\n",
158 sname);
159 error = EINVAL;
160 goto done;
163 /* Make a copy of the partition table to avoid alignment problems. */
164 memcpy(&dpcopy[0], cp + DOSPARTOFF, sizeof(dpcopy));
166 dp0 = &dpcopy[0];
169 * Check for "Ontrack Diskmanager" or GPT. If a GPT is found in
170 * the first dos partition, ignore the rest of the MBR and go
171 * to GPT processing.
173 for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
174 if (dospart == 0 &&
175 (dp->dp_typ == DOSPTYP_PMBR || dp->dp_typ == DOSPTYP_GPT)) {
176 if (bootverbose)
177 kprintf(
178 "%s: Found GPT in slice #%d\n", sname, dospart + 1);
179 error = gptinit(dev, info, sspp);
180 goto done;
183 if (dp->dp_typ == DOSPTYP_ONTRACK) {
184 if (bootverbose)
185 kprintf(
186 "%s: Found \"Ontrack Disk Manager\" on this disk.\n", sname);
187 bp->b_flags |= B_INVAL | B_AGE;
188 brelse(bp);
189 mbr_offset = 63;
190 goto reread_mbr;
194 if (bcmp(dp0, historical_bogus_partition_table,
195 sizeof historical_bogus_partition_table) == 0 ||
196 bcmp(dp0, historical_bogus_partition_table_fixed,
197 sizeof historical_bogus_partition_table_fixed) == 0) {
198 #if 0
199 TRACE(("%s: invalid primary partition table: historical\n",
200 sname));
201 #endif /* 0 */
202 if (bootverbose)
203 kprintf(
204 "%s: invalid primary partition table: Dangerously Dedicated (ignored)\n",
205 sname);
206 error = EINVAL;
207 goto done;
210 /* Guess the geometry. */
212 * TODO:
213 * Perhaps skip entries with 0 size.
214 * Perhaps only look at entries of type DOSPTYP_386BSD.
216 max_ncyls = 0;
217 max_nsectors = 0;
218 max_ntracks = 0;
219 for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
220 int ncyls;
221 int nsectors;
222 int ntracks;
224 ncyls = DPCYL(dp->dp_ecyl, dp->dp_esect) + 1;
225 if (max_ncyls < ncyls)
226 max_ncyls = ncyls;
227 nsectors = DPSECT(dp->dp_esect);
228 if (max_nsectors < nsectors)
229 max_nsectors = nsectors;
230 ntracks = dp->dp_ehd + 1;
231 if (max_ntracks < ntracks)
232 max_ntracks = ntracks;
236 * Check that we have guessed the geometry right by checking the
237 * partition entries.
240 * TODO:
241 * As above.
242 * Check for overlaps.
243 * Check against d_secperunit if the latter is reliable.
245 error = 0;
246 for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
247 if (dp->dp_scyl == 0 && dp->dp_shd == 0 && dp->dp_ssect == 0
248 && dp->dp_start == 0 && dp->dp_size == 0)
249 continue;
250 //sname = dsname(dev, dkunit(dev), BASE_SLICE + dospart,
251 // WHOLE_SLICE_PART, partname);
254 * Temporarily ignore errors from this check. We could
255 * simplify things by accepting the table eariler if we
256 * always ignore errors here. Perhaps we should always
257 * accept the table if the magic is right but not let
258 * bad entries affect the geometry.
260 check_part(sname, dp, mbr_offset, max_nsectors, max_ntracks,
261 mbr_offset);
263 if (error != 0)
264 goto done;
267 * Accept the DOS partition table.
269 * Adjust the disk information structure with updated CHS
270 * conversion parameters, but only use values extracted from
271 * the primary partition table.
273 * NOTE! Regardless of our having to deal with this old cruft,
274 * we do not screw around with the info->d_media* parameters.
276 secpercyl = (u_long)max_nsectors * max_ntracks;
277 if (secpercyl != 0 && mbr_offset == DOSBBSECTOR) {
278 info->d_secpertrack = max_nsectors;
279 info->d_nheads = max_ntracks;
280 info->d_secpercyl = secpercyl;
281 info->d_ncylinders = info->d_media_blocks / secpercyl;
285 * We are passed a pointer to a suitably initialized minimal
286 * slices "struct" with no dangling pointers in it. Replace it
287 * by a maximal one. This usually oversizes the "struct", but
288 * enlarging it while searching for logical drives would be
289 * inconvenient.
291 kfree(*sspp, M_DEVBUF);
292 ssp = dsmakeslicestruct(MAX_SLICES, info);
293 *sspp = ssp;
295 /* Initialize normal slices. */
296 sp = &ssp->dss_slices[BASE_SLICE];
297 for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++, sp++) {
298 sname = dsname(dev, dkunit(dev), BASE_SLICE + dospart,
299 WHOLE_SLICE_PART, partname);
300 (void)mbr_setslice(sname, info, sp, dp, mbr_offset);
302 ssp->dss_nslices = BASE_SLICE + NDOSPART;
304 /* Handle extended partitions. */
305 sp -= NDOSPART;
306 for (dospart = 0; dospart < NDOSPART; dospart++, sp++) {
307 if (sp->ds_type == DOSPTYP_EXTENDED ||
308 sp->ds_type == DOSPTYP_EXTENDEDX) {
309 mbr_extended(wdev, info, ssp,
310 sp->ds_offset, sp->ds_size, sp->ds_offset,
311 max_nsectors, max_ntracks, mbr_offset, 1);
316 * mbr_extended() abuses ssp->dss_nslices for the number of slices
317 * that would be found if there were no limit on the number of slices
318 * in *ssp. Cut it back now.
320 if (ssp->dss_nslices > MAX_SLICES)
321 ssp->dss_nslices = MAX_SLICES;
323 done:
324 bp->b_flags |= B_INVAL | B_AGE;
325 brelse(bp);
326 if (error == EINVAL)
327 error = 0;
328 return (error);
331 static int
332 check_part(char *sname, struct dos_partition *dp, u_int64_t offset,
333 int nsectors, int ntracks, u_int64_t mbr_offset)
335 int chs_ecyl;
336 int chs_esect;
337 int chs_scyl;
338 int chs_ssect;
339 int error;
340 u_long secpercyl;
341 u_int64_t esector;
342 u_int64_t esector1;
343 u_int64_t ssector;
344 u_int64_t ssector1;
346 secpercyl = (u_long)nsectors * ntracks;
347 chs_scyl = DPCYL(dp->dp_scyl, dp->dp_ssect);
348 chs_ssect = DPSECT(dp->dp_ssect);
349 ssector = chs_ssect - 1 + dp->dp_shd * nsectors + chs_scyl * secpercyl
350 + mbr_offset;
351 ssector1 = offset + dp->dp_start;
354 * If ssector1 is on a cylinder >= 1024, then ssector can't be right.
355 * Allow the C/H/S for it to be 1023/ntracks-1/nsectors, or correct
356 * apart from the cylinder being reduced modulo 1024. Always allow
357 * 1023/255/63, because this is the official way to represent
358 * pure-LBA for the starting position.
360 if ((ssector < ssector1
361 && ((chs_ssect == nsectors && dp->dp_shd == ntracks - 1
362 && chs_scyl == 1023)
363 || (secpercyl != 0
364 && (ssector1 - ssector) % (1024 * secpercyl) == 0)))
365 || (dp->dp_scyl == 255 && dp->dp_shd == 255
366 && dp->dp_ssect == 255)) {
367 TRACE(("%s: C/H/S start %d/%d/%d, start %llu: allow\n",
368 sname, chs_scyl, dp->dp_shd, chs_ssect,
369 (long long)ssector1));
370 ssector = ssector1;
373 chs_ecyl = DPCYL(dp->dp_ecyl, dp->dp_esect);
374 chs_esect = DPSECT(dp->dp_esect);
375 esector = chs_esect - 1 + dp->dp_ehd * nsectors + chs_ecyl * secpercyl
376 + mbr_offset;
377 esector1 = ssector1 + dp->dp_size - 1;
380 * Allow certain bogus C/H/S values for esector, as above. However,
381 * heads == 255 isn't really legal and causes some BIOS crashes. The
382 * correct value to indicate a pure-LBA end is 1023/heads-1/sectors -
383 * usually 1023/254/63. "heads" is base 0, "sectors" is base 1.
385 if ((esector < esector1
386 && ((chs_esect == nsectors && dp->dp_ehd == ntracks - 1
387 && chs_ecyl == 1023)
388 || (secpercyl != 0
389 && (esector1 - esector) % (1024 * secpercyl) == 0)))
390 || (dp->dp_ecyl == 255 && dp->dp_ehd == 255
391 && dp->dp_esect == 255)) {
392 TRACE(("%s: C/H/S end %d/%d/%d, end %llu: allow\n",
393 sname, chs_ecyl, dp->dp_ehd, chs_esect,
394 (long long)esector1));
395 esector = esector1;
398 error = (ssector == ssector1 && esector == esector1) ? 0 : EINVAL;
399 if (bootverbose)
400 kprintf("%s: type 0x%x, start %llu, end = %llu, size %u %s\n",
401 sname, dp->dp_typ,
402 (long long)ssector1, (long long)esector1,
403 dp->dp_size, (error ? "" : ": OK"));
404 if (ssector != ssector1 && bootverbose)
405 kprintf("%s: C/H/S start %d/%d/%d (%llu) != start %llu: invalid\n",
406 sname, chs_scyl, dp->dp_shd, chs_ssect,
407 (long long)ssector, (long long)ssector1);
408 if (esector != esector1 && bootverbose)
409 kprintf("%s: C/H/S end %d/%d/%d (%llu) != end %llu: invalid\n",
410 sname, chs_ecyl, dp->dp_ehd, chs_esect,
411 (long long)esector, (long long)esector1);
412 return (error);
415 static
416 void
417 mbr_extended(cdev_t dev, struct disk_info *info, struct diskslices *ssp,
418 u_int64_t ext_offset, u_int64_t ext_size, u_int64_t base_ext_offset,
419 int nsectors, int ntracks, u_int64_t mbr_offset, int level)
421 struct buf *bp;
422 u_char *cp;
423 int dospart;
424 struct dos_partition *dp;
425 struct dos_partition dpcopy[NDOSPART];
426 u_int64_t ext_offsets[NDOSPART];
427 u_int64_t ext_sizes[NDOSPART];
428 char partname[2];
429 int slice;
430 char *sname;
431 struct diskslice *sp;
433 if (level >= 16) {
434 kprintf(
435 "%s: excessive recursion in search for slices; aborting search\n",
436 devtoname(dev));
437 return;
440 /* Read extended boot record. */
441 bp = geteblk((int)info->d_media_blksize);
442 bp->b_bio1.bio_offset = (off_t)ext_offset * info->d_media_blksize;
443 bp->b_bio1.bio_done = biodone_sync;
444 bp->b_bio1.bio_flags |= BIO_SYNC;
445 bp->b_bcount = info->d_media_blksize;
446 bp->b_cmd = BUF_CMD_READ;
447 dev_dstrategy(dev, &bp->b_bio1);
448 if (biowait(&bp->b_bio1, "mbrrd") != 0) {
449 diskerr(&bp->b_bio1, dev,
450 "reading extended partition table: error",
451 LOG_PRINTF, 0);
452 kprintf("\n");
453 goto done;
456 /* Weakly verify it. */
457 cp = bp->b_data;
458 if (cp[0x1FE] != 0x55 || cp[0x1FF] != 0xAA) {
459 sname = dsname(dev, dkunit(dev), WHOLE_DISK_SLICE, WHOLE_SLICE_PART,
460 partname);
461 if (bootverbose)
462 kprintf("%s: invalid extended partition table: no magic\n",
463 sname);
464 goto done;
467 /* Make a copy of the partition table to avoid alignment problems. */
468 memcpy(&dpcopy[0], cp + DOSPARTOFF, sizeof(dpcopy));
470 slice = ssp->dss_nslices;
471 for (dospart = 0, dp = &dpcopy[0]; dospart < NDOSPART;
472 dospart++, dp++) {
473 ext_sizes[dospart] = 0;
474 if (dp->dp_scyl == 0 && dp->dp_shd == 0 && dp->dp_ssect == 0
475 && dp->dp_start == 0 && dp->dp_size == 0)
476 continue;
477 if (dp->dp_typ == DOSPTYP_EXTENDED ||
478 dp->dp_typ == DOSPTYP_EXTENDEDX) {
479 static char buf[32];
481 sname = dsname(dev, dkunit(dev), WHOLE_DISK_SLICE,
482 WHOLE_SLICE_PART, partname);
483 ksnprintf(buf, sizeof(buf), "%s", sname);
484 if (strlen(buf) < sizeof buf - 11)
485 strcat(buf, "<extended>");
486 check_part(buf, dp, base_ext_offset, nsectors,
487 ntracks, mbr_offset);
488 ext_offsets[dospart] = base_ext_offset + dp->dp_start;
489 ext_sizes[dospart] = dp->dp_size;
490 } else {
491 sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART,
492 partname);
493 check_part(sname, dp, ext_offset, nsectors, ntracks,
494 mbr_offset);
495 if (slice >= MAX_SLICES) {
496 kprintf("%s: too many slices\n", sname);
497 slice++;
498 continue;
500 sp = &ssp->dss_slices[slice];
501 if (mbr_setslice(sname, info, sp, dp, ext_offset) != 0)
502 continue;
503 slice++;
506 ssp->dss_nslices = slice;
508 /* If we found any more slices, recursively find all the subslices. */
509 for (dospart = 0; dospart < NDOSPART; dospart++) {
510 if (ext_sizes[dospart] != 0) {
511 mbr_extended(dev, info, ssp, ext_offsets[dospart],
512 ext_sizes[dospart], base_ext_offset,
513 nsectors, ntracks, mbr_offset, ++level);
517 done:
518 bp->b_flags |= B_INVAL | B_AGE;
519 brelse(bp);
522 static int
523 mbr_setslice(char *sname, struct disk_info *info, struct diskslice *sp,
524 struct dos_partition *dp, u_int64_t br_offset)
526 u_int64_t offset;
527 u_int64_t size;
529 offset = br_offset + dp->dp_start;
530 if (offset > info->d_media_blocks || offset < br_offset) {
531 kprintf(
532 "%s: slice starts beyond end of the disk: rejecting it\n",
533 sname);
534 return (1);
536 size = info->d_media_blocks - offset;
537 if (size >= dp->dp_size) {
538 if (dp->dp_size == 0xFFFFFFFFU) {
539 kprintf("%s: slice >2TB, using media size instead "
540 "of slice table size\n", sname);
541 } else {
542 size = dp->dp_size;
544 } else {
545 kprintf("%s: slice extends beyond end of disk: "
546 "truncating from %u to %llu sectors\n",
547 sname, dp->dp_size, (unsigned long long)size);
549 sp->ds_offset = offset;
550 sp->ds_size = size;
551 sp->ds_type = dp->dp_typ;
552 bzero(&sp->ds_type_uuid, sizeof(sp->ds_type_uuid));
553 bzero(&sp->ds_stor_uuid, sizeof(sp->ds_type_uuid));
556 * Slices do not overlap with the parent (if any).
558 sp->ds_reserved = 0;
559 return (0);