2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
40 #include <sys/disklabel.h>
41 #include <sys/disklabel64.h>
42 #include <sys/diskslice.h>
44 #include <sys/kern_syscall.h>
48 * Alignment against physical start (verses slice start). We use a megabyte
49 * here. Why do we use a megabyte? Because SSDs already use large 128K
50 * blocks internally (for MLC) and who the hell knows in the future.
52 * This way if the sysop picks sane values for partition sizes everything
53 * will be nicely aligned, particularly swap for e.g. swapcache, and
54 * clustered operations against larger physical sector sizes for newer HDs,
57 #define PALIGN_SIZE (1024 * 1024)
58 #define PALIGN_MASK (PALIGN_SIZE - 1)
61 * Retrieve the partition start and extent, in blocks. Return 0 on success,
65 l64_getpartbounds(struct diskslices
*ssp
, disklabel_t lp
, u_int32_t part
,
66 u_int64_t
*start
, u_int64_t
*blocks
)
68 struct partition64
*pp
;
70 if (part
>= lp
.lab64
->d_npartitions
)
73 pp
= &lp
.lab64
->d_partitions
[part
];
75 if ((pp
->p_boffset
& (ssp
->dss_secsize
- 1)) ||
76 (pp
->p_bsize
& (ssp
->dss_secsize
- 1))) {
79 *start
= pp
->p_boffset
/ ssp
->dss_secsize
;
80 *blocks
= pp
->p_bsize
/ ssp
->dss_secsize
;
85 * Get the filesystem type XXX - diskslices code needs to use uuids
88 l64_loadpartinfo(disklabel_t lp
, u_int32_t part
, struct partinfo
*dpart
)
90 struct partition64
*pp
;
91 const size_t uuid_size
= sizeof(struct uuid
);
93 if (part
< lp
.lab64
->d_npartitions
) {
94 pp
= &lp
.lab64
->d_partitions
[part
];
95 dpart
->fstype_uuid
= pp
->p_type_uuid
;
96 dpart
->storage_uuid
= pp
->p_stor_uuid
;
97 dpart
->fstype
= pp
->p_fstype
;
99 bzero(&dpart
->fstype_uuid
, uuid_size
);
100 bzero(&dpart
->storage_uuid
, uuid_size
);
106 * Get the number of partitions
109 l64_getnumparts(disklabel_t lp
)
111 return(lp
.lab64
->d_npartitions
);
115 l64_getpackname(disklabel_t lp
, char *buf
, size_t bytes
)
119 if (lp
.lab64
->d_packname
[0] == 0) {
123 slen
= strnlen(lp
.lab64
->d_packname
, sizeof(lp
.lab64
->d_packname
));
126 bcopy(lp
.lab64
->d_packname
, buf
, slen
);
133 l64_freedisklabel(disklabel_t
*lpp
)
135 kfree((*lpp
).lab64
, M_DEVBUF
);
140 * Attempt to read a disk label from a device. 64 bit disklabels are
141 * sector-agnostic and begin at offset 0 on the device.
143 * Returns NULL on sucess, and an error string on failure.
146 l64_readdisklabel(cdev_t dev
, struct diskslice
*sp
, disklabel_t
*lpp
,
147 struct disk_info
*info
)
150 struct disklabel64
*dlp
;
158 * XXX I/O size is subject to device DMA limitations
160 secsize
= info
->d_media_blksize
;
161 bpsize
= roundup2(sizeof(*dlp
), secsize
);
163 bp
= getpbuf_mem(NULL
);
164 KKASSERT(bpsize
<= bp
->b_bufsize
);
165 bp
->b_bio1
.bio_offset
= 0;
166 bp
->b_bio1
.bio_done
= biodone_sync
;
167 bp
->b_bio1
.bio_flags
|= BIO_SYNC
;
168 bp
->b_bcount
= bpsize
;
169 bp
->b_flags
&= ~B_INVAL
;
170 bp
->b_flags
|= B_FAILONDIS
;
171 bp
->b_cmd
= BUF_CMD_READ
;
172 dev_dstrategy(dev
, &bp
->b_bio1
);
174 if (biowait(&bp
->b_bio1
, "labrd")) {
177 dlp
= (struct disklabel64
*)bp
->b_data
;
178 dlpcrcsize
= offsetof(struct disklabel64
,
179 d_partitions
[dlp
->d_npartitions
]) -
180 offsetof(struct disklabel64
, d_magic
);
181 savecrc
= dlp
->d_crc
;
183 if (dlp
->d_magic
!= DISKMAGIC64
) {
184 msg
= "no disk label";
185 } else if (dlp
->d_npartitions
> MAXPARTITIONS64
) {
186 msg
= "disklabel64 corrupted, too many partitions";
187 } else if (savecrc
!= crc32(&dlp
->d_magic
, dlpcrcsize
)) {
188 msg
= "disklabel64 corrupted, bad CRC";
190 dlp
->d_crc
= savecrc
;
191 (*lpp
).lab64
= kmalloc(sizeof(*dlp
),
192 M_DEVBUF
, M_WAITOK
|M_ZERO
);
193 *(*lpp
).lab64
= *dlp
;
197 bp
->b_flags
|= B_INVAL
| B_AGE
;
204 * If everything is good, copy olpx to nlpx. Check to see if any
205 * open partitions would change.
208 l64_setdisklabel(disklabel_t olpx
, disklabel_t nlpx
, struct diskslices
*ssp
,
209 struct diskslice
*sp
, u_int32_t
*openmask
)
211 struct disklabel64
*olp
, *nlp
;
212 struct partition64
*opp
, *npp
;
221 slicebsize
= (uint64_t)sp
->ds_size
* ssp
->dss_secsize
;
223 if (nlp
->d_magic
!= DISKMAGIC64
)
225 if (nlp
->d_npartitions
> MAXPARTITIONS64
)
227 savecrc
= nlp
->d_crc
;
229 nlpcrcsize
= offsetof(struct disklabel64
,
230 d_partitions
[nlp
->d_npartitions
]) -
231 offsetof(struct disklabel64
, d_magic
);
232 if (crc32(&nlp
->d_magic
, nlpcrcsize
) != savecrc
) {
233 nlp
->d_crc
= savecrc
;
236 nlp
->d_crc
= savecrc
;
239 * Check if open partitions have changed
242 while (i
< MAXPARTITIONS64
) {
243 if (openmask
[i
>> 5] == 0) {
247 if ((openmask
[i
>> 5] & (1 << (i
& 31))) == 0) {
251 if (nlp
->d_npartitions
<= i
)
253 opp
= &olp
->d_partitions
[i
];
254 npp
= &nlp
->d_partitions
[i
];
255 if (npp
->p_boffset
!= opp
->p_boffset
||
256 npp
->p_bsize
< opp
->p_bsize
) {
261 * Do not allow p_type_uuid or p_stor_uuid to change if
262 * the partition is currently open.
264 if (bcmp(&npp
->p_type_uuid
, &opp
->p_type_uuid
,
265 sizeof(npp
->p_type_uuid
)) != 0) {
268 if (bcmp(&npp
->p_stor_uuid
, &opp
->p_stor_uuid
,
269 sizeof(npp
->p_stor_uuid
)) != 0) {
276 * Make sure the label and partition offsets and sizes are sane.
278 if (nlp
->d_total_size
> slicebsize
)
280 if (nlp
->d_total_size
& (ssp
->dss_secsize
- 1))
282 if (nlp
->d_bbase
& (ssp
->dss_secsize
- 1))
284 if (nlp
->d_pbase
& (ssp
->dss_secsize
- 1))
286 if (nlp
->d_pstop
& (ssp
->dss_secsize
- 1))
288 if (nlp
->d_abase
& (ssp
->dss_secsize
- 1))
291 for (i
= 0; i
< nlp
->d_npartitions
; ++i
) {
292 npp
= &nlp
->d_partitions
[i
];
293 if (npp
->p_bsize
== 0) {
294 if (npp
->p_boffset
!= 0)
298 if (npp
->p_boffset
& (ssp
->dss_secsize
- 1))
300 if (npp
->p_bsize
& (ssp
->dss_secsize
- 1))
302 if (npp
->p_boffset
< nlp
->d_pbase
)
304 if (npp
->p_boffset
+ npp
->p_bsize
> nlp
->d_total_size
)
309 * Structurally we may add code to make modifications above in the
310 * future, so regenerate the crc anyway.
313 nlp
->d_crc
= crc32(&nlp
->d_magic
, nlpcrcsize
);
320 * Write disk label back to device after modification.
323 l64_writedisklabel(cdev_t dev
, struct diskslices
*ssp
,
324 struct diskslice
*sp
, disklabel_t lpx
)
326 struct disklabel64
*lp
;
327 struct disklabel64
*dlp
;
336 * XXX I/O size is subject to device DMA limitations
338 secsize
= ssp
->dss_secsize
;
339 bpsize
= roundup2(sizeof(*lp
), secsize
);
341 bp
= getpbuf_mem(NULL
);
342 KKASSERT(bpsize
<= bp
->b_bufsize
);
343 bp
->b_bio1
.bio_offset
= 0;
344 bp
->b_bio1
.bio_done
= biodone_sync
;
345 bp
->b_bio1
.bio_flags
|= BIO_SYNC
;
346 bp
->b_bcount
= bpsize
;
347 bp
->b_flags
|= B_FAILONDIS
;
350 * Because our I/O is larger then the label, and because we do not
351 * write the d_reserved0[] area, do a read-modify-write.
353 bp
->b_flags
&= ~B_INVAL
;
354 bp
->b_cmd
= BUF_CMD_READ
;
355 KKASSERT(dkpart(dev
) == WHOLE_SLICE_PART
);
356 dev_dstrategy(dev
, &bp
->b_bio1
);
357 error
= biowait(&bp
->b_bio1
, "labrd");
361 dlp
= (void *)bp
->b_data
;
362 bcopy(&lp
->d_magic
, &dlp
->d_magic
,
363 sizeof(*lp
) - offsetof(struct disklabel64
, d_magic
));
364 bp
->b_cmd
= BUF_CMD_WRITE
;
365 bp
->b_bio1
.bio_done
= biodone_sync
;
366 bp
->b_bio1
.bio_flags
|= BIO_SYNC
;
367 KKASSERT(dkpart(dev
) == WHOLE_SLICE_PART
);
368 dev_dstrategy(dev
, &bp
->b_bio1
);
369 error
= biowait(&bp
->b_bio1
, "labwr");
371 bp
->b_flags
|= B_INVAL
| B_AGE
;
378 * Create a disklabel based on a disk_info structure for the purposes of
379 * DSO_COMPATLABEL - cases where no real label exists on the storage medium.
381 * If a diskslice is passed, the label is truncated to the slice.
383 * NOTE! This is not a legal label because d_bbase and d_pbase are both
387 l64_clone_label(struct disk_info
*info
, struct diskslice
*sp
)
389 struct disklabel64
*lp
;
391 uint32_t blksize
= info
->d_media_blksize
;
394 lp
= kmalloc(sizeof *lp
, M_DEVBUF
, M_WAITOK
| M_ZERO
);
397 lp
->d_total_size
= (uint64_t)sp
->ds_size
* blksize
;
399 lp
->d_total_size
= info
->d_media_blocks
* blksize
;
401 lp
->d_magic
= DISKMAGIC64
;
402 lp
->d_align
= blksize
;
403 lp
->d_npartitions
= MAXPARTITIONS64
;
404 lp
->d_pstop
= lp
->d_total_size
;
407 * Create a dummy 'c' part and a dummy 'a' part (if requested).
408 * Note that the 'c' part is really a hack. 64 bit disklabels
409 * do not use 'c' to mean the raw partition.
412 lp
->d_partitions
[2].p_boffset
= 0;
413 lp
->d_partitions
[2].p_bsize
= lp
->d_total_size
;
414 /* XXX SET FS TYPE */
416 if (info
->d_dsflags
& DSO_COMPATPARTA
) {
417 lp
->d_partitions
[0].p_boffset
= 0;
418 lp
->d_partitions
[0].p_bsize
= lp
->d_total_size
;
419 /* XXX SET FS TYPE */
422 lpcrcsize
= offsetof(struct disklabel64
,
423 d_partitions
[lp
->d_npartitions
]) -
424 offsetof(struct disklabel64
, d_magic
);
426 lp
->d_crc
= crc32(&lp
->d_magic
, lpcrcsize
);
432 * Create a virgin disklabel64 suitable for writing to the media.
434 * disklabel64 always reserves 32KB for a boot area and leaves room
435 * for up to RESPARTITIONS64 partitions.
438 l64_makevirginlabel(disklabel_t lpx
, struct diskslices
*ssp
,
439 struct diskslice
*sp
, struct disk_info
*info
)
441 struct disklabel64
*lp
= lpx
.lab64
;
442 struct partition64
*pp
;
445 uint64_t blkmask
; /* 64 bits so we can ~ */
449 doffset
= sp
->ds_offset
* info
->d_media_blksize
;
452 * Setup the initial label. Use of a block size of at least 4KB
453 * for calculating the initial reserved areas to allow some degree
454 * of portability between media with different sector sizes.
456 * Note that the modified blksize is stored in d_align as a hint
457 * to the disklabeling program.
459 bzero(lp
, sizeof(*lp
));
460 if ((blksize
= info
->d_media_blksize
) < 4096)
462 blkmask
= blksize
- 1;
465 lp
->d_total_size
= (uint64_t)sp
->ds_size
* ssp
->dss_secsize
;
467 lp
->d_total_size
= info
->d_media_blocks
* info
->d_media_blksize
;
469 lp
->d_magic
= DISKMAGIC64
;
470 lp
->d_align
= blksize
;
471 lp
->d_npartitions
= MAXPARTITIONS64
;
472 kern_uuidgen(&lp
->d_stor_uuid
, 1);
474 ressize
= offsetof(struct disklabel64
, d_partitions
[RESPARTITIONS64
]);
475 ressize
= (ressize
+ (uint32_t)blkmask
) & ~blkmask
;
477 /* Reserve space for the stage2 boot code */
478 lp
->d_bbase
= ressize
;
479 lp
->d_pbase
= lp
->d_bbase
+ ((BOOT2SIZE64
+ blkmask
) & ~blkmask
);
481 /* Reserve space for the backup label at the slice end */
482 lp
->d_abase
= lp
->d_total_size
- ressize
;
485 * NOTE: The pbase and pstop are calculated to align to PALIGN_SIZE
486 * and adjusted with the slice offset, so the partitions are
487 * aligned relative to the start of the physical disk.
489 lp
->d_pbase
= ((doffset
+ lp
->d_pbase
+ PALIGN_MASK
) &
490 ~(uint64_t)PALIGN_MASK
) - doffset
;
491 lp
->d_pstop
= ((lp
->d_abase
- lp
->d_pbase
) &
492 ~(uint64_t)PALIGN_MASK
) + lp
->d_pbase
;
495 * All partitions are left empty unless DSO_COMPATPARTA is set
498 if (info
->d_dsflags
& DSO_COMPATPARTA
) {
499 pp
= &lp
->d_partitions
[0];
500 pp
->p_boffset
= lp
->d_pbase
;
501 pp
->p_bsize
= lp
->d_pstop
- lp
->d_pbase
;
502 /* XXX SET FS TYPE */
505 lpcrcsize
= offsetof(struct disklabel64
,
506 d_partitions
[lp
->d_npartitions
]) -
507 offsetof(struct disklabel64
, d_magic
);
508 lp
->d_crc
= crc32(&lp
->d_magic
, lpcrcsize
);
512 * Set the number of blocks at the beginning of the slice which have
513 * been reserved for label operations. This area will be write-protected
514 * when accessed via the slice.
516 * For now just protect the label area proper. Do not protect the
517 * boot area. Note partitions in 64 bit disklabels do not overlap
518 * the disklabel or boot area.
521 l64_adjust_label_reserved(struct diskslices
*ssp
, int slice
,
522 struct diskslice
*sp
)
524 struct disklabel64
*lp
= sp
->ds_label
.lab64
;
526 sp
->ds_reserved
= lp
->d_bbase
/ ssp
->dss_secsize
;
529 struct disklabel_ops disklabel64_ops
= {
530 .labelsize
= sizeof(struct disklabel64
),
531 .op_readdisklabel
= l64_readdisklabel
,
532 .op_setdisklabel
= l64_setdisklabel
,
533 .op_writedisklabel
= l64_writedisklabel
,
534 .op_clone_label
= l64_clone_label
,
535 .op_adjust_label_reserved
= l64_adjust_label_reserved
,
536 .op_getpartbounds
= l64_getpartbounds
,
537 .op_loadpartinfo
= l64_loadpartinfo
,
538 .op_getnumparts
= l64_getnumparts
,
539 .op_getpackname
= l64_getpackname
,
540 .op_makevirginlabel
= l64_makevirginlabel
,
541 .op_freedisklabel
= l64_freedisklabel