dconschat(8): Put the fflush() under 'verbose' too.
[dragonfly.git] / sys / kern / subr_disklabel64.c
blob84d6c92ac847033ab093c497cb5457d6f18bc734
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/conf.h>
39 #include <sys/disklabel.h>
40 #include <sys/disklabel64.h>
41 #include <sys/diskslice.h>
42 #include <sys/disk.h>
43 #include <sys/kern_syscall.h>
44 #include <sys/buf2.h>
47 * Alignment against physical start (verses slice start). We use a megabyte
48 * here. Why do we use a megabyte? Because SSDs already use large 128K
49 * blocks internally (for MLC) and who the hell knows in the future.
51 * This way if the sysop picks sane values for partition sizes everything
52 * will be nicely aligned, particularly swap for e.g. swapcache, and
53 * clustered operations against larger physical sector sizes for newer HDs,
54 * and so forth.
56 #define PALIGN_SIZE (1024 * 1024)
57 #define PALIGN_MASK (PALIGN_SIZE - 1)
60 * Retrieve the partition start and extent, in blocks. Return 0 on success,
61 * EINVAL on error.
63 static int
64 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part,
65 u_int64_t *start, u_int64_t *blocks)
67 struct partition64 *pp;
69 if (part >= lp.lab64->d_npartitions)
70 return (EINVAL);
72 pp = &lp.lab64->d_partitions[part];
74 if ((pp->p_boffset & (ssp->dss_secsize - 1)) ||
75 (pp->p_bsize & (ssp->dss_secsize - 1))) {
76 return (EINVAL);
78 *start = pp->p_boffset / ssp->dss_secsize;
79 *blocks = pp->p_bsize / ssp->dss_secsize;
80 return(0);
84 * Get the filesystem type XXX - diskslices code needs to use uuids
86 static void
87 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart)
89 struct partition64 *pp;
90 const size_t uuid_size = sizeof(struct uuid);
92 if (part < lp.lab64->d_npartitions) {
93 pp = &lp.lab64->d_partitions[part];
94 dpart->fstype_uuid = pp->p_type_uuid;
95 dpart->storage_uuid = pp->p_stor_uuid;
96 dpart->fstype = pp->p_fstype;
97 } else {
98 bzero(&dpart->fstype_uuid, uuid_size);
99 bzero(&dpart->storage_uuid, uuid_size);
100 dpart->fstype = 0;
105 * Get the number of partitions
107 static u_int32_t
108 l64_getnumparts(disklabel_t lp)
110 return(lp.lab64->d_npartitions);
113 static void
114 l64_freedisklabel(disklabel_t *lpp)
116 kfree((*lpp).lab64, M_DEVBUF);
117 (*lpp).lab64 = NULL;
121 * Attempt to read a disk label from a device. 64 bit disklabels are
122 * sector-agnostic and begin at offset 0 on the device. 64 bit disklabels
123 * may only be used with GPT partitioning schemes.
125 * Returns NULL on sucess, and an error string on failure.
127 static const char *
128 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp,
129 struct disk_info *info)
131 struct buf *bp;
132 struct disklabel64 *dlp;
133 const char *msg;
134 uint32_t savecrc;
135 size_t dlpcrcsize;
136 size_t bpsize;
137 int secsize;
140 * XXX I/O size is subject to device DMA limitations
142 secsize = info->d_media_blksize;
143 bpsize = roundup2(sizeof(*dlp), secsize);
145 bp = getpbuf_mem(NULL);
146 KKASSERT(bpsize <= bp->b_bufsize);
147 bp->b_bio1.bio_offset = 0;
148 bp->b_bio1.bio_done = biodone_sync;
149 bp->b_bio1.bio_flags |= BIO_SYNC;
150 bp->b_bcount = bpsize;
151 bp->b_flags &= ~B_INVAL;
152 bp->b_flags |= B_FAILONDIS;
153 bp->b_cmd = BUF_CMD_READ;
154 dev_dstrategy(dev, &bp->b_bio1);
156 if (biowait(&bp->b_bio1, "labrd")) {
157 msg = "I/O error";
158 } else {
159 dlp = (struct disklabel64 *)bp->b_data;
160 dlpcrcsize = offsetof(struct disklabel64,
161 d_partitions[dlp->d_npartitions]) -
162 offsetof(struct disklabel64, d_magic);
163 savecrc = dlp->d_crc;
164 dlp->d_crc = 0;
165 if (dlp->d_magic != DISKMAGIC64) {
166 msg = "no disk label";
167 } else if (dlp->d_npartitions > MAXPARTITIONS64) {
168 msg = "disklabel64 corrupted, too many partitions";
169 } else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) {
170 msg = "disklabel64 corrupted, bad CRC";
171 } else {
172 dlp->d_crc = savecrc;
173 (*lpp).lab64 = kmalloc(sizeof(*dlp),
174 M_DEVBUF, M_WAITOK|M_ZERO);
175 *(*lpp).lab64 = *dlp;
176 msg = NULL;
179 bp->b_flags |= B_INVAL | B_AGE;
180 relpbuf(bp, NULL);
182 return (msg);
186 * If everything is good, copy olpx to nlpx. Check to see if any
187 * open partitions would change.
189 static int
190 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp,
191 struct diskslice *sp, u_int32_t *openmask)
193 struct disklabel64 *olp, *nlp;
194 struct partition64 *opp, *npp;
195 uint32_t savecrc;
196 uint64_t slicebsize;
197 size_t nlpcrcsize;
198 int i;
200 olp = olpx.lab64;
201 nlp = nlpx.lab64;
203 slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize;
205 if (nlp->d_magic != DISKMAGIC64)
206 return (EINVAL);
207 if (nlp->d_npartitions > MAXPARTITIONS64)
208 return (EINVAL);
209 savecrc = nlp->d_crc;
210 nlp->d_crc = 0;
211 nlpcrcsize = offsetof(struct disklabel64,
212 d_partitions[nlp->d_npartitions]) -
213 offsetof(struct disklabel64, d_magic);
214 if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) {
215 nlp->d_crc = savecrc;
216 return (EINVAL);
218 nlp->d_crc = savecrc;
221 * Check if open partitions have changed
223 i = 0;
224 while (i < MAXPARTITIONS64) {
225 if (openmask[i >> 5] == 0) {
226 i += 32;
227 continue;
229 if ((openmask[i >> 5] & (1 << (i & 31))) == 0) {
230 ++i;
231 continue;
233 if (nlp->d_npartitions <= i)
234 return (EBUSY);
235 opp = &olp->d_partitions[i];
236 npp = &nlp->d_partitions[i];
237 if (npp->p_boffset != opp->p_boffset ||
238 npp->p_bsize < opp->p_bsize) {
239 return (EBUSY);
243 * Do not allow p_type_uuid or p_stor_uuid to change if
244 * the partition is currently open.
246 if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid,
247 sizeof(npp->p_type_uuid)) != 0) {
248 return (EBUSY);
250 if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid,
251 sizeof(npp->p_stor_uuid)) != 0) {
252 return (EBUSY);
254 ++i;
258 * Make sure the label and partition offsets and sizes are sane.
260 if (nlp->d_total_size > slicebsize)
261 return (ENOSPC);
262 if (nlp->d_total_size & (ssp->dss_secsize - 1))
263 return (EINVAL);
264 if (nlp->d_bbase & (ssp->dss_secsize - 1))
265 return (EINVAL);
266 if (nlp->d_pbase & (ssp->dss_secsize - 1))
267 return (EINVAL);
268 if (nlp->d_pstop & (ssp->dss_secsize - 1))
269 return (EINVAL);
270 if (nlp->d_abase & (ssp->dss_secsize - 1))
271 return (EINVAL);
273 for (i = 0; i < nlp->d_npartitions; ++i) {
274 npp = &nlp->d_partitions[i];
275 if (npp->p_bsize == 0) {
276 if (npp->p_boffset != 0)
277 return (EINVAL);
278 continue;
280 if (npp->p_boffset & (ssp->dss_secsize - 1))
281 return (EINVAL);
282 if (npp->p_bsize & (ssp->dss_secsize - 1))
283 return (EINVAL);
284 if (npp->p_boffset < nlp->d_pbase)
285 return (ENOSPC);
286 if (npp->p_boffset + npp->p_bsize > nlp->d_total_size)
287 return (ENOSPC);
291 * Structurally we may add code to make modifications above in the
292 * future, so regenerate the crc anyway.
294 nlp->d_crc = 0;
295 nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize);
296 *olp = *nlp;
298 return (0);
302 * Write disk label back to device after modification.
304 static int
305 l64_writedisklabel(cdev_t dev, struct diskslices *ssp,
306 struct diskslice *sp, disklabel_t lpx)
308 struct disklabel64 *lp;
309 struct disklabel64 *dlp;
310 struct buf *bp;
311 int error = 0;
312 size_t bpsize;
313 int secsize;
315 lp = lpx.lab64;
318 * XXX I/O size is subject to device DMA limitations
320 secsize = ssp->dss_secsize;
321 bpsize = roundup2(sizeof(*lp), secsize);
323 bp = getpbuf_mem(NULL);
324 KKASSERT(bpsize <= bp->b_bufsize);
325 bp->b_bio1.bio_offset = 0;
326 bp->b_bio1.bio_done = biodone_sync;
327 bp->b_bio1.bio_flags |= BIO_SYNC;
328 bp->b_bcount = bpsize;
329 bp->b_flags |= B_FAILONDIS;
332 * Because our I/O is larger then the label, and because we do not
333 * write the d_reserved0[] area, do a read-modify-write.
335 bp->b_flags &= ~B_INVAL;
336 bp->b_cmd = BUF_CMD_READ;
337 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
338 dev_dstrategy(dev, &bp->b_bio1);
339 error = biowait(&bp->b_bio1, "labrd");
340 if (error)
341 goto done;
343 dlp = (void *)bp->b_data;
344 bcopy(&lp->d_magic, &dlp->d_magic,
345 sizeof(*lp) - offsetof(struct disklabel64, d_magic));
346 bp->b_cmd = BUF_CMD_WRITE;
347 bp->b_bio1.bio_done = biodone_sync;
348 bp->b_bio1.bio_flags |= BIO_SYNC;
349 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
350 dev_dstrategy(dev, &bp->b_bio1);
351 error = biowait(&bp->b_bio1, "labwr");
352 done:
353 bp->b_flags |= B_INVAL | B_AGE;
354 relpbuf(bp, NULL);
356 return (error);
360 * Create a disklabel based on a disk_info structure for the purposes of
361 * DSO_COMPATLABEL - cases where no real label exists on the storage medium.
363 * If a diskslice is passed, the label is truncated to the slice.
365 * NOTE! This is not a legal label because d_bbase and d_pbase are both
366 * set to 0.
368 static disklabel_t
369 l64_clone_label(struct disk_info *info, struct diskslice *sp)
371 struct disklabel64 *lp;
372 disklabel_t res;
373 uint32_t blksize = info->d_media_blksize;
374 size_t lpcrcsize;
376 lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO);
378 if (sp)
379 lp->d_total_size = (uint64_t)sp->ds_size * blksize;
380 else
381 lp->d_total_size = info->d_media_blocks * blksize;
383 lp->d_magic = DISKMAGIC64;
384 lp->d_align = blksize;
385 lp->d_npartitions = MAXPARTITIONS64;
386 lp->d_pstop = lp->d_total_size;
389 * Create a dummy 'c' part and a dummy 'a' part (if requested).
390 * Note that the 'c' part is really a hack. 64 bit disklabels
391 * do not use 'c' to mean the raw partition.
394 lp->d_partitions[2].p_boffset = 0;
395 lp->d_partitions[2].p_bsize = lp->d_total_size;
396 /* XXX SET FS TYPE */
398 if (info->d_dsflags & DSO_COMPATPARTA) {
399 lp->d_partitions[0].p_boffset = 0;
400 lp->d_partitions[0].p_bsize = lp->d_total_size;
401 /* XXX SET FS TYPE */
404 lpcrcsize = offsetof(struct disklabel64,
405 d_partitions[lp->d_npartitions]) -
406 offsetof(struct disklabel64, d_magic);
408 lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
409 res.lab64 = lp;
410 return (res);
414 * Create a virgin disklabel64 suitable for writing to the media.
416 * disklabel64 always reserves 32KB for a boot area and leaves room
417 * for up to RESPARTITIONS64 partitions.
419 static void
420 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp,
421 struct diskslice *sp, struct disk_info *info)
423 struct disklabel64 *lp = lpx.lab64;
424 struct partition64 *pp;
425 uint32_t blksize;
426 uint32_t ressize;
427 uint64_t blkmask; /* 64 bits so we can ~ */
428 size_t lpcrcsize;
431 * Setup the initial label. Use of a block size of at least 4KB
432 * for calculating the initial reserved areas to allow some degree
433 * of portability between media with different sector sizes.
435 * Note that the modified blksize is stored in d_align as a hint
436 * to the disklabeling program.
438 bzero(lp, sizeof(*lp));
439 if ((blksize = info->d_media_blksize) < 4096)
440 blksize = 4096;
441 blkmask = blksize - 1;
443 if (sp)
444 lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize;
445 else
446 lp->d_total_size = info->d_media_blocks * info->d_media_blksize;
448 lp->d_magic = DISKMAGIC64;
449 lp->d_align = blksize;
450 lp->d_npartitions = MAXPARTITIONS64;
451 kern_uuidgen(&lp->d_stor_uuid, 1);
453 ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]);
454 ressize = (ressize + (uint32_t)blkmask) & ~blkmask;
457 * NOTE: When calculating pbase take into account the slice offset
458 * so the partitions are at least 32K-aligned relative to the
459 * start of the physical disk. This will accomodate efficient
460 * access to 4096 byte physical sector drives.
462 lp->d_bbase = ressize;
463 lp->d_pbase = lp->d_bbase + ((32768 + blkmask) & ~blkmask);
464 lp->d_pbase = (lp->d_pbase + PALIGN_MASK) & ~(uint64_t)PALIGN_MASK;
466 /* adjust for slice offset so we are physically aligned */
467 lp->d_pbase += 32768 - (sp->ds_offset * info->d_media_blksize) % 32768;
469 lp->d_pstop = (lp->d_total_size - lp->d_bbase) & ~blkmask;
470 lp->d_abase = lp->d_pstop;
473 * All partitions are left empty unless DSO_COMPATPARTA is set
476 if (info->d_dsflags & DSO_COMPATPARTA) {
477 pp = &lp->d_partitions[0];
478 pp->p_boffset = lp->d_pbase;
479 pp->p_bsize = lp->d_pstop - lp->d_pbase;
480 /* XXX SET FS TYPE */
483 lpcrcsize = offsetof(struct disklabel64,
484 d_partitions[lp->d_npartitions]) -
485 offsetof(struct disklabel64, d_magic);
486 lp->d_crc = crc32(&lp->d_magic, lpcrcsize);
490 * Set the number of blocks at the beginning of the slice which have
491 * been reserved for label operations. This area will be write-protected
492 * when accessed via the slice.
494 * For now just protect the label area proper. Do not protect the
495 * boot area. Note partitions in 64 bit disklabels do not overlap
496 * the disklabel or boot area.
498 static void
499 l64_adjust_label_reserved(struct diskslices *ssp, int slice,
500 struct diskslice *sp)
502 struct disklabel64 *lp = sp->ds_label.lab64;
504 sp->ds_reserved = lp->d_bbase / ssp->dss_secsize;
507 struct disklabel_ops disklabel64_ops = {
508 .labelsize = sizeof(struct disklabel64),
509 .op_readdisklabel = l64_readdisklabel,
510 .op_setdisklabel = l64_setdisklabel,
511 .op_writedisklabel = l64_writedisklabel,
512 .op_clone_label = l64_clone_label,
513 .op_adjust_label_reserved = l64_adjust_label_reserved,
514 .op_getpartbounds = l64_getpartbounds,
515 .op_loadpartinfo = l64_loadpartinfo,
516 .op_getnumparts = l64_getnumparts,
517 .op_makevirginlabel = l64_makevirginlabel,
518 .op_freedisklabel = l64_freedisklabel