usr.sbin/makefs/ffs: Remove m_buf::b_is_hammer2
[dragonfly.git] / sys / kern / subr_disklabel64.c
blob478ab04a3cda5d31f701b42aee0806d77328c256
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/disklabel.h>
41 #include <sys/disklabel64.h>
42 #include <sys/diskslice.h>
43 #include <sys/disk.h>
44 #include <sys/kern_syscall.h>
45 #include <sys/buf2.h>
48 * Alignment against physical start (verses slice start). We use a megabyte
49 * here. Why do we use a megabyte? Because SSDs already use large 128K
50 * blocks internally (for MLC) and who the hell knows in the future.
52 * This way if the sysop picks sane values for partition sizes everything
53 * will be nicely aligned, particularly swap for e.g. swapcache, and
54 * clustered operations against larger physical sector sizes for newer HDs,
55 * and so forth.
57 #define PALIGN_SIZE (1024 * 1024)
58 #define PALIGN_MASK (PALIGN_SIZE - 1)
61 * Retrieve the partition start and extent, in blocks. Return 0 on success,
62 * EINVAL on error.
64 static int
65 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part,
66 u_int64_t *start, u_int64_t *blocks)
68 struct partition64 *pp;
70 if (part >= lp.lab64->d_npartitions)
71 return (EINVAL);
73 pp = &lp.lab64->d_partitions[part];
75 if ((pp->p_boffset & (ssp->dss_secsize - 1)) ||
76 (pp->p_bsize & (ssp->dss_secsize - 1))) {
77 return (EINVAL);
79 *start = pp->p_boffset / ssp->dss_secsize;
80 *blocks = pp->p_bsize / ssp->dss_secsize;
81 return(0);
85 * Get the filesystem type XXX - diskslices code needs to use uuids
87 static void
88 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart)
90 struct partition64 *pp;
91 const size_t uuid_size = sizeof(struct uuid);
93 if (part < lp.lab64->d_npartitions) {
94 pp = &lp.lab64->d_partitions[part];
95 dpart->fstype_uuid = pp->p_type_uuid;
96 dpart->storage_uuid = pp->p_stor_uuid;
97 dpart->fstype = pp->p_fstype;
98 } else {
99 bzero(&dpart->fstype_uuid, uuid_size);
100 bzero(&dpart->storage_uuid, uuid_size);
101 dpart->fstype = 0;
106 * Get the number of partitions
108 static u_int32_t
109 l64_getnumparts(disklabel_t lp)
111 return(lp.lab64->d_npartitions);
114 static int
115 l64_getpackname(disklabel_t lp, char *buf, size_t bytes)
117 size_t slen;
119 if (lp.lab64->d_packname[0] == 0) {
120 buf[0] = 0;
121 return -1;
123 slen = strnlen(lp.lab64->d_packname, sizeof(lp.lab64->d_packname));
124 if (slen >= bytes)
125 slen = bytes - 1;
126 bcopy(lp.lab64->d_packname, buf, slen);
127 buf[slen] = 0;
129 return 0;
132 static void
133 l64_freedisklabel(disklabel_t *lpp)
135 kfree((*lpp).lab64, M_DEVBUF);
136 (*lpp).lab64 = NULL;
140 * Attempt to read a disk label from a device. 64 bit disklabels are
141 * sector-agnostic and begin at offset 0 on the device.
143 * Returns NULL on sucess, and an error string on failure.
145 static const char *
146 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp,
147 struct disk_info *info)
149 struct buf *bp;
150 struct disklabel64 *dlp;
151 const char *msg;
152 uint32_t savecrc;
153 size_t dlpcrcsize;
154 size_t bpsize;
155 int secsize;
158 * XXX I/O size is subject to device DMA limitations
160 secsize = info->d_media_blksize;
161 bpsize = roundup2(sizeof(*dlp), secsize);
163 bp = getpbuf_mem(NULL);
164 KKASSERT(bpsize <= bp->b_bufsize);
165 bp->b_bio1.bio_offset = 0;
166 bp->b_bio1.bio_done = biodone_sync;
167 bp->b_bio1.bio_flags |= BIO_SYNC;
168 bp->b_bcount = bpsize;
169 bp->b_flags &= ~B_INVAL;
170 bp->b_flags |= B_FAILONDIS;
171 bp->b_cmd = BUF_CMD_READ;
172 dev_dstrategy(dev, &bp->b_bio1);
174 if (biowait(&bp->b_bio1, "labrd")) {
175 msg = "I/O error";
176 } else {
177 dlp = (struct disklabel64 *)bp->b_data;
178 dlpcrcsize = offsetof(struct disklabel64,
179 d_partitions[dlp->d_npartitions]) -
180 offsetof(struct disklabel64, d_magic);
181 savecrc = dlp->d_crc;
182 dlp->d_crc = 0;
183 if (dlp->d_magic != DISKMAGIC64) {
184 msg = "no disk label";
185 } else if (dlp->d_npartitions > MAXPARTITIONS64) {
186 msg = "disklabel64 corrupted, too many partitions";
187 } else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) {
188 msg = "disklabel64 corrupted, bad CRC";
189 } else {
190 dlp->d_crc = savecrc;
191 (*lpp).lab64 = kmalloc(sizeof(*dlp),
192 M_DEVBUF, M_WAITOK|M_ZERO);
193 *(*lpp).lab64 = *dlp;
194 msg = NULL;
197 bp->b_flags |= B_INVAL | B_AGE;
198 relpbuf(bp, NULL);
200 return (msg);
204 * If everything is good, copy olpx to nlpx. Check to see if any
205 * open partitions would change.
207 static int
208 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp,
209 struct diskslice *sp, u_int32_t *openmask)
211 struct disklabel64 *olp, *nlp;
212 struct partition64 *opp, *npp;
213 uint32_t savecrc;
214 uint64_t slicebsize;
215 size_t nlpcrcsize;
216 int i;
218 olp = olpx.lab64;
219 nlp = nlpx.lab64;
221 slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize;
223 if (nlp->d_magic != DISKMAGIC64)
224 return (EINVAL);
225 if (nlp->d_npartitions > MAXPARTITIONS64)
226 return (EINVAL);
227 savecrc = nlp->d_crc;
228 nlp->d_crc = 0;
229 nlpcrcsize = offsetof(struct disklabel64,
230 d_partitions[nlp->d_npartitions]) -
231 offsetof(struct disklabel64, d_magic);
232 if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) {
233 nlp->d_crc = savecrc;
234 return (EINVAL);
236 nlp->d_crc = savecrc;
239 * Check if open partitions have changed
241 i = 0;
242 while (i < MAXPARTITIONS64) {
243 if (openmask[i >> 5] == 0) {
244 i += 32;
245 continue;
247 if ((openmask[i >> 5] & (1 << (i & 31))) == 0) {
248 ++i;
249 continue;
251 if (nlp->d_npartitions <= i)
252 return (EBUSY);
253 opp = &olp->d_partitions[i];
254 npp = &nlp->d_partitions[i];
255 if (npp->p_boffset != opp->p_boffset ||
256 npp->p_bsize < opp->p_bsize) {
257 return (EBUSY);
261 * Do not allow p_type_uuid or p_stor_uuid to change if
262 * the partition is currently open.
264 if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid,
265 sizeof(npp->p_type_uuid)) != 0) {
266 return (EBUSY);
268 if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid,
269 sizeof(npp->p_stor_uuid)) != 0) {
270 return (EBUSY);
272 ++i;
276 * Make sure the label and partition offsets and sizes are sane.
278 if (nlp->d_total_size > slicebsize)
279 return (ENOSPC);
280 if (nlp->d_total_size & (ssp->dss_secsize - 1))
281 return (EINVAL);
282 if (nlp->d_bbase & (ssp->dss_secsize - 1))
283 return (EINVAL);
284 if (nlp->d_pbase & (ssp->dss_secsize - 1))
285 return (EINVAL);
286 if (nlp->d_pstop & (ssp->dss_secsize - 1))
287 return (EINVAL);
288 if (nlp->d_abase & (ssp->dss_secsize - 1))
289 return (EINVAL);
291 for (i = 0; i < nlp->d_npartitions; ++i) {
292 npp = &nlp->d_partitions[i];
293 if (npp->p_bsize == 0) {
294 if (npp->p_boffset != 0)
295 return (EINVAL);
296 continue;
298 if (npp->p_boffset & (ssp->dss_secsize - 1))
299 return (EINVAL);
300 if (npp->p_bsize & (ssp->dss_secsize - 1))
301 return (EINVAL);
302 if (npp->p_boffset < nlp->d_pbase)
303 return (ENOSPC);
304 if (npp->p_boffset + npp->p_bsize > nlp->d_total_size)
305 return (ENOSPC);
309 * Structurally we may add code to make modifications above in the
310 * future, so regenerate the crc anyway.
312 nlp->d_crc = 0;
313 nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize);
314 *olp = *nlp;
316 return (0);
320 * Write disk label back to device after modification.
322 static int
323 l64_writedisklabel(cdev_t dev, struct diskslices *ssp,
324 struct diskslice *sp, disklabel_t lpx)
326 struct disklabel64 *lp;
327 struct disklabel64 *dlp;
328 struct buf *bp;
329 int error = 0;
330 size_t bpsize;
331 int secsize;
333 lp = lpx.lab64;
336 * XXX I/O size is subject to device DMA limitations
338 secsize = ssp->dss_secsize;
339 bpsize = roundup2(sizeof(*lp), secsize);
341 bp = getpbuf_mem(NULL);
342 KKASSERT(bpsize <= bp->b_bufsize);
343 bp->b_bio1.bio_offset = 0;
344 bp->b_bio1.bio_done = biodone_sync;
345 bp->b_bio1.bio_flags |= BIO_SYNC;
346 bp->b_bcount = bpsize;
347 bp->b_flags |= B_FAILONDIS;
350 * Because our I/O is larger then the label, and because we do not
351 * write the d_reserved0[] area, do a read-modify-write.
353 bp->b_flags &= ~B_INVAL;
354 bp->b_cmd = BUF_CMD_READ;
355 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
356 dev_dstrategy(dev, &bp->b_bio1);
357 error = biowait(&bp->b_bio1, "labrd");
358 if (error)
359 goto done;
361 dlp = (void *)bp->b_data;
362 bcopy(&lp->d_magic, &dlp->d_magic,
363 sizeof(*lp) - offsetof(struct disklabel64, d_magic));
364 bp->b_cmd = BUF_CMD_WRITE;
365 bp->b_bio1.bio_done = biodone_sync;
366 bp->b_bio1.bio_flags |= BIO_SYNC;
367 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
368 dev_dstrategy(dev, &bp->b_bio1);
369 error = biowait(&bp->b_bio1, "labwr");
370 done:
371 bp->b_flags |= B_INVAL | B_AGE;
372 relpbuf(bp, NULL);
374 return (error);
378 * Create a disklabel based on a disk_info structure for the purposes of
379 * DSO_COMPATLABEL - cases where no real label exists on the storage medium.
381 * If a diskslice is passed, the label is truncated to the slice.
383 * NOTE! This is not a legal label because d_bbase and d_pbase are both
384 * set to 0.
386 static disklabel_t
387 l64_clone_label(struct disk_info *info, struct diskslice *sp)
389 struct disklabel64 *lp;
390 disklabel_t res;
391 uint32_t blksize = info->d_media_blksize;
392 size_t lpcrcsize;
394 lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO);
396 if (sp)
397 lp->d_total_size = (uint64_t)sp->ds_size * blksize;
398 else
399 lp->d_total_size = info->d_media_blocks * blksize;
401 lp->d_magic = DISKMAGIC64;
402 lp->d_align = blksize;
403 lp->d_npartitions = MAXPARTITIONS64;
404 lp->d_pstop = lp->d_total_size;
407 * Create a dummy 'c' part and a dummy 'a' part (if requested).
408 * Note that the 'c' part is really a hack. 64 bit disklabels
409 * do not use 'c' to mean the raw partition.
412 lp->d_partitions[2].p_boffset = 0;
413 lp->d_partitions[2].p_bsize = lp->d_total_size;
414 /* XXX SET FS TYPE */
416 if (info->d_dsflags & DSO_COMPATPARTA) {
417 lp->d_partitions[0].p_boffset = 0;
418 lp->d_partitions[0].p_bsize = lp->d_total_size;
419 /* XXX SET FS TYPE */
422 lpcrcsize = offsetof(struct disklabel64,
423 d_partitions[lp->d_npartitions]) -
424 offsetof(struct disklabel64, d_magic);
426 lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
427 res.lab64 = lp;
428 return (res);
432 * Create a virgin disklabel64 suitable for writing to the media.
434 * disklabel64 always reserves 32KB for a boot area and leaves room
435 * for up to RESPARTITIONS64 partitions.
437 static void
438 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp,
439 struct diskslice *sp, struct disk_info *info)
441 struct disklabel64 *lp = lpx.lab64;
442 struct partition64 *pp;
443 uint32_t blksize;
444 uint32_t ressize;
445 uint64_t blkmask; /* 64 bits so we can ~ */
446 uint64_t doffset;
447 size_t lpcrcsize;
449 doffset = sp->ds_offset * info->d_media_blksize;
452 * Setup the initial label. Use of a block size of at least 4KB
453 * for calculating the initial reserved areas to allow some degree
454 * of portability between media with different sector sizes.
456 * Note that the modified blksize is stored in d_align as a hint
457 * to the disklabeling program.
459 bzero(lp, sizeof(*lp));
460 if ((blksize = info->d_media_blksize) < 4096)
461 blksize = 4096;
462 blkmask = blksize - 1;
464 if (sp)
465 lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize;
466 else
467 lp->d_total_size = info->d_media_blocks * info->d_media_blksize;
469 lp->d_magic = DISKMAGIC64;
470 lp->d_align = blksize;
471 lp->d_npartitions = MAXPARTITIONS64;
472 kern_uuidgen(&lp->d_stor_uuid, 1);
474 ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]);
475 ressize = (ressize + (uint32_t)blkmask) & ~blkmask;
477 /* Reserve space for the stage2 boot code */
478 lp->d_bbase = ressize;
479 lp->d_pbase = lp->d_bbase + ((BOOT2SIZE64 + blkmask) & ~blkmask);
481 /* Reserve space for the backup label at the slice end */
482 lp->d_abase = lp->d_total_size - ressize;
485 * NOTE: The pbase and pstop are calculated to align to PALIGN_SIZE
486 * and adjusted with the slice offset, so the partitions are
487 * aligned relative to the start of the physical disk.
489 lp->d_pbase = ((doffset + lp->d_pbase + PALIGN_MASK) &
490 ~(uint64_t)PALIGN_MASK) - doffset;
491 lp->d_pstop = ((lp->d_abase - lp->d_pbase) &
492 ~(uint64_t)PALIGN_MASK) + lp->d_pbase;
495 * All partitions are left empty unless DSO_COMPATPARTA is set
498 if (info->d_dsflags & DSO_COMPATPARTA) {
499 pp = &lp->d_partitions[0];
500 pp->p_boffset = lp->d_pbase;
501 pp->p_bsize = lp->d_pstop - lp->d_pbase;
502 /* XXX SET FS TYPE */
505 lpcrcsize = offsetof(struct disklabel64,
506 d_partitions[lp->d_npartitions]) -
507 offsetof(struct disklabel64, d_magic);
508 lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
512 * Set the number of blocks at the beginning of the slice which have
513 * been reserved for label operations. This area will be write-protected
514 * when accessed via the slice.
516 * For now just protect the label area proper. Do not protect the
517 * boot area. Note partitions in 64 bit disklabels do not overlap
518 * the disklabel or boot area.
520 static void
521 l64_adjust_label_reserved(struct diskslices *ssp, int slice,
522 struct diskslice *sp)
524 struct disklabel64 *lp = sp->ds_label.lab64;
526 sp->ds_reserved = lp->d_bbase / ssp->dss_secsize;
529 struct disklabel_ops disklabel64_ops = {
530 .labelsize = sizeof(struct disklabel64),
531 .op_readdisklabel = l64_readdisklabel,
532 .op_setdisklabel = l64_setdisklabel,
533 .op_writedisklabel = l64_writedisklabel,
534 .op_clone_label = l64_clone_label,
535 .op_adjust_label_reserved = l64_adjust_label_reserved,
536 .op_getpartbounds = l64_getpartbounds,
537 .op_loadpartinfo = l64_loadpartinfo,
538 .op_getnumparts = l64_getnumparts,
539 .op_getpackname = l64_getpackname,
540 .op_makevirginlabel = l64_makevirginlabel,
541 .op_freedisklabel = l64_freedisklabel