spapr: Increase hotpluggable memory slots to 256
[qemu/rayw.git] / block / vpc.c
blob076a7ce399e84cfdd79d7f5ef95bd133c9521400
1 /*
2 * Block driver for Connectix / Microsoft Virtual PC images
4 * Copyright (c) 2005 Alex Beregszaszi
5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "qapi/error.h"
27 #include "qemu-common.h"
28 #include "block/block_int.h"
29 #include "sysemu/block-backend.h"
30 #include "qemu/module.h"
31 #include "migration/migration.h"
32 #include "qemu/bswap.h"
33 #if defined(CONFIG_UUID)
34 #include <uuid/uuid.h>
35 #endif
37 /**************************************************************/
39 #define HEADER_SIZE 512
41 //#define CACHE
43 enum vhd_type {
44 VHD_FIXED = 2,
45 VHD_DYNAMIC = 3,
46 VHD_DIFFERENCING = 4,
49 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
50 #define VHD_TIMESTAMP_BASE 946684800
52 #define VHD_CHS_MAX_C 65535LL
53 #define VHD_CHS_MAX_H 16
54 #define VHD_CHS_MAX_S 255
56 #define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
57 #define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
59 #define VPC_OPT_FORCE_SIZE "force_size"
61 /* always big-endian */
62 typedef struct vhd_footer {
63 char creator[8]; /* "conectix" */
64 uint32_t features;
65 uint32_t version;
67 /* Offset of next header structure, 0xFFFFFFFF if none */
68 uint64_t data_offset;
70 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
71 uint32_t timestamp;
73 char creator_app[4]; /* e.g., "vpc " */
74 uint16_t major;
75 uint16_t minor;
76 char creator_os[4]; /* "Wi2k" */
78 uint64_t orig_size;
79 uint64_t current_size;
81 uint16_t cyls;
82 uint8_t heads;
83 uint8_t secs_per_cyl;
85 uint32_t type;
87 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
88 the bytes in the footer without the checksum field") */
89 uint32_t checksum;
91 /* UUID used to identify a parent hard disk (backing file) */
92 uint8_t uuid[16];
94 uint8_t in_saved_state;
95 } QEMU_PACKED VHDFooter;
97 typedef struct vhd_dyndisk_header {
98 char magic[8]; /* "cxsparse" */
100 /* Offset of next header structure, 0xFFFFFFFF if none */
101 uint64_t data_offset;
103 /* Offset of the Block Allocation Table (BAT) */
104 uint64_t table_offset;
106 uint32_t version;
107 uint32_t max_table_entries; /* 32bit/entry */
109 /* 2 MB by default, must be a power of two */
110 uint32_t block_size;
112 uint32_t checksum;
113 uint8_t parent_uuid[16];
114 uint32_t parent_timestamp;
115 uint32_t reserved;
117 /* Backing file name (in UTF-16) */
118 uint8_t parent_name[512];
120 struct {
121 uint32_t platform;
122 uint32_t data_space;
123 uint32_t data_length;
124 uint32_t reserved;
125 uint64_t data_offset;
126 } parent_locator[8];
127 } QEMU_PACKED VHDDynDiskHeader;
129 typedef struct BDRVVPCState {
130 CoMutex lock;
131 uint8_t footer_buf[HEADER_SIZE];
132 uint64_t free_data_block_offset;
133 int max_table_entries;
134 uint32_t *pagetable;
135 uint64_t bat_offset;
136 uint64_t last_bitmap_offset;
138 uint32_t block_size;
139 uint32_t bitmap_size;
140 bool force_use_chs;
141 bool force_use_sz;
143 #ifdef CACHE
144 uint8_t *pageentry_u8;
145 uint32_t *pageentry_u32;
146 uint16_t *pageentry_u16;
148 uint64_t last_bitmap;
149 #endif
151 Error *migration_blocker;
152 } BDRVVPCState;
154 #define VPC_OPT_SIZE_CALC "force_size_calc"
155 static QemuOptsList vpc_runtime_opts = {
156 .name = "vpc-runtime-opts",
157 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
158 .desc = {
160 .name = VPC_OPT_SIZE_CALC,
161 .type = QEMU_OPT_STRING,
162 .help = "Force disk size calculation to use either CHS geometry, "
163 "or use the disk current_size specified in the VHD footer. "
164 "{chs, current_size}"
166 { /* end of list */ }
170 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
172 uint32_t res = 0;
173 int i;
175 for (i = 0; i < size; i++)
176 res += buf[i];
178 return ~res;
182 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
184 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
185 return 100;
186 return 0;
189 static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
190 Error **errp)
192 BDRVVPCState *s = bs->opaque;
193 const char *size_calc;
195 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
197 if (!size_calc) {
198 /* no override, use autodetect only */
199 } else if (!strcmp(size_calc, "current_size")) {
200 s->force_use_sz = true;
201 } else if (!strcmp(size_calc, "chs")) {
202 s->force_use_chs = true;
203 } else {
204 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
208 static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
209 Error **errp)
211 BDRVVPCState *s = bs->opaque;
212 int i;
213 VHDFooter *footer;
214 VHDDynDiskHeader *dyndisk_header;
215 QemuOpts *opts = NULL;
216 Error *local_err = NULL;
217 bool use_chs;
218 uint8_t buf[HEADER_SIZE];
219 uint32_t checksum;
220 uint64_t computed_size;
221 uint64_t pagetable_size;
222 int disk_type = VHD_DYNAMIC;
223 int ret;
225 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
226 qemu_opts_absorb_qdict(opts, options, &local_err);
227 if (local_err) {
228 error_propagate(errp, local_err);
229 ret = -EINVAL;
230 goto fail;
233 vpc_parse_options(bs, opts, &local_err);
234 if (local_err) {
235 error_propagate(errp, local_err);
236 ret = -EINVAL;
237 goto fail;
240 ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
241 if (ret < 0) {
242 error_setg(errp, "Unable to read VHD header");
243 goto fail;
246 footer = (VHDFooter *) s->footer_buf;
247 if (strncmp(footer->creator, "conectix", 8)) {
248 int64_t offset = bdrv_getlength(bs->file->bs);
249 if (offset < 0) {
250 ret = offset;
251 error_setg(errp, "Invalid file size");
252 goto fail;
253 } else if (offset < HEADER_SIZE) {
254 ret = -EINVAL;
255 error_setg(errp, "File too small for a VHD header");
256 goto fail;
259 /* If a fixed disk, the footer is found only at the end of the file */
260 ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
261 HEADER_SIZE);
262 if (ret < 0) {
263 goto fail;
265 if (strncmp(footer->creator, "conectix", 8)) {
266 error_setg(errp, "invalid VPC image");
267 ret = -EINVAL;
268 goto fail;
270 disk_type = VHD_FIXED;
273 checksum = be32_to_cpu(footer->checksum);
274 footer->checksum = 0;
275 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
276 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
277 "incorrect.\n", bs->filename);
279 /* Write 'checksum' back to footer, or else will leave it with zero. */
280 footer->checksum = cpu_to_be32(checksum);
282 /* The visible size of a image in Virtual PC depends on the geometry
283 rather than on the size stored in the footer (the size in the footer
284 is too large usually) */
285 bs->total_sectors = (int64_t)
286 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
288 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
289 * VHD image sizes differently. VPC will rely on CHS geometry,
290 * while Hyper-V and disk2vhd use the size specified in the footer.
292 * We use a couple of approaches to try and determine the correct method:
293 * look at the Creator App field, and look for images that have CHS
294 * geometry that is the maximum value.
296 * If the CHS geometry is the maximum CHS geometry, then we assume that
297 * the size is the footer->current_size to avoid truncation. Otherwise,
298 * we follow the table based on footer->creator_app:
300 * Known creator apps:
301 * 'vpc ' : CHS Virtual PC (uses disk geometry)
302 * 'qemu' : CHS QEMU (uses disk geometry)
303 * 'qem2' : current_size QEMU (uses current_size)
304 * 'win ' : current_size Hyper-V
305 * 'd2v ' : current_size Disk2vhd
306 * 'tap\0' : current_size XenServer
307 * 'CTXS' : current_size XenConverter
309 * The user can override the table values via drive options, however
310 * even with an override we will still use current_size for images
311 * that have CHS geometry of the maximum size.
313 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
314 !!strncmp(footer->creator_app, "qem2", 4) &&
315 !!strncmp(footer->creator_app, "d2v ", 4) &&
316 !!strncmp(footer->creator_app, "CTXS", 4) &&
317 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
319 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
320 bs->total_sectors = be64_to_cpu(footer->current_size) /
321 BDRV_SECTOR_SIZE;
324 /* Allow a maximum disk size of 2040 GiB */
325 if (bs->total_sectors > VHD_MAX_SECTORS) {
326 ret = -EFBIG;
327 goto fail;
330 if (disk_type == VHD_DYNAMIC) {
331 ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
332 HEADER_SIZE);
333 if (ret < 0) {
334 error_setg(errp, "Error reading dynamic VHD header");
335 goto fail;
338 dyndisk_header = (VHDDynDiskHeader *) buf;
340 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
341 error_setg(errp, "Invalid header magic");
342 ret = -EINVAL;
343 goto fail;
346 s->block_size = be32_to_cpu(dyndisk_header->block_size);
347 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
348 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
349 ret = -EINVAL;
350 goto fail;
352 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
354 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
356 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
357 error_setg(errp, "Too many blocks");
358 ret = -EINVAL;
359 goto fail;
362 computed_size = (uint64_t) s->max_table_entries * s->block_size;
363 if (computed_size < bs->total_sectors * 512) {
364 error_setg(errp, "Page table too small");
365 ret = -EINVAL;
366 goto fail;
369 if (s->max_table_entries > SIZE_MAX / 4 ||
370 s->max_table_entries > (int) INT_MAX / 4) {
371 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
372 s->max_table_entries);
373 ret = -EINVAL;
374 goto fail;
377 pagetable_size = (uint64_t) s->max_table_entries * 4;
379 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
380 if (s->pagetable == NULL) {
381 error_setg(errp, "Unable to allocate memory for page table");
382 ret = -ENOMEM;
383 goto fail;
386 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
388 ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
389 pagetable_size);
390 if (ret < 0) {
391 error_setg(errp, "Error reading pagetable");
392 goto fail;
395 s->free_data_block_offset =
396 ROUND_UP(s->bat_offset + pagetable_size, 512);
398 for (i = 0; i < s->max_table_entries; i++) {
399 be32_to_cpus(&s->pagetable[i]);
400 if (s->pagetable[i] != 0xFFFFFFFF) {
401 int64_t next = (512 * (int64_t) s->pagetable[i]) +
402 s->bitmap_size + s->block_size;
404 if (next > s->free_data_block_offset) {
405 s->free_data_block_offset = next;
410 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
411 error_setg(errp, "block-vpc: free_data_block_offset points after "
412 "the end of file. The image has been truncated.");
413 ret = -EINVAL;
414 goto fail;
417 s->last_bitmap_offset = (int64_t) -1;
419 #ifdef CACHE
420 s->pageentry_u8 = g_malloc(512);
421 s->pageentry_u32 = s->pageentry_u8;
422 s->pageentry_u16 = s->pageentry_u8;
423 s->last_pagetable = -1;
424 #endif
427 qemu_co_mutex_init(&s->lock);
429 /* Disable migration when VHD images are used */
430 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
431 "does not support live migration",
432 bdrv_get_device_or_node_name(bs));
433 migrate_add_blocker(s->migration_blocker);
435 return 0;
437 fail:
438 qemu_vfree(s->pagetable);
439 #ifdef CACHE
440 g_free(s->pageentry_u8);
441 #endif
442 return ret;
445 static int vpc_reopen_prepare(BDRVReopenState *state,
446 BlockReopenQueue *queue, Error **errp)
448 return 0;
452 * Returns the absolute byte offset of the given sector in the image file.
453 * If the sector is not allocated, -1 is returned instead.
455 * The parameter write must be 1 if the offset will be used for a write
456 * operation (the block bitmaps is updated then), 0 otherwise.
458 static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
459 bool write)
461 BDRVVPCState *s = bs->opaque;
462 uint64_t bitmap_offset, block_offset;
463 uint32_t pagetable_index, offset_in_block;
465 pagetable_index = offset / s->block_size;
466 offset_in_block = offset % s->block_size;
468 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
469 return -1; /* not allocated */
471 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
472 block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
474 /* We must ensure that we don't write to any sectors which are marked as
475 unused in the bitmap. We get away with setting all bits in the block
476 bitmap each time we write to a new block. This might cause Virtual PC to
477 miss sparse read optimization, but it's not a problem in terms of
478 correctness. */
479 if (write && (s->last_bitmap_offset != bitmap_offset)) {
480 uint8_t bitmap[s->bitmap_size];
482 s->last_bitmap_offset = bitmap_offset;
483 memset(bitmap, 0xff, s->bitmap_size);
484 bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
487 return block_offset;
490 static inline int64_t get_sector_offset(BlockDriverState *bs,
491 int64_t sector_num, bool write)
493 return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
497 * Writes the footer to the end of the image file. This is needed when the
498 * file grows as it overwrites the old footer
500 * Returns 0 on success and < 0 on error
502 static int rewrite_footer(BlockDriverState* bs)
504 int ret;
505 BDRVVPCState *s = bs->opaque;
506 int64_t offset = s->free_data_block_offset;
508 ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
509 if (ret < 0)
510 return ret;
512 return 0;
516 * Allocates a new block. This involves writing a new footer and updating
517 * the Block Allocation Table to use the space at the old end of the image
518 * file (overwriting the old footer)
520 * Returns the sectors' offset in the image file on success and < 0 on error
522 static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
524 BDRVVPCState *s = bs->opaque;
525 int64_t bat_offset;
526 uint32_t index, bat_value;
527 int ret;
528 uint8_t bitmap[s->bitmap_size];
530 /* Check if sector_num is valid */
531 if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
532 return -EINVAL;
535 /* Write entry into in-memory BAT */
536 index = offset / s->block_size;
537 assert(s->pagetable[index] == 0xFFFFFFFF);
538 s->pagetable[index] = s->free_data_block_offset / 512;
540 /* Initialize the block's bitmap */
541 memset(bitmap, 0xff, s->bitmap_size);
542 ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
543 s->bitmap_size);
544 if (ret < 0) {
545 return ret;
548 /* Write new footer (the old one will be overwritten) */
549 s->free_data_block_offset += s->block_size + s->bitmap_size;
550 ret = rewrite_footer(bs);
551 if (ret < 0)
552 goto fail;
554 /* Write BAT entry to disk */
555 bat_offset = s->bat_offset + (4 * index);
556 bat_value = cpu_to_be32(s->pagetable[index]);
557 ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
558 if (ret < 0)
559 goto fail;
561 return get_image_offset(bs, offset, false);
563 fail:
564 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
565 return ret;
568 static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
570 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
571 VHDFooter *footer = (VHDFooter *) s->footer_buf;
573 if (be32_to_cpu(footer->type) != VHD_FIXED) {
574 bdi->cluster_size = s->block_size;
577 bdi->unallocated_blocks_are_zero = true;
578 return 0;
581 static int coroutine_fn
582 vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
583 QEMUIOVector *qiov, int flags)
585 BDRVVPCState *s = bs->opaque;
586 int ret;
587 int64_t image_offset;
588 int64_t n_bytes;
589 int64_t bytes_done = 0;
590 VHDFooter *footer = (VHDFooter *) s->footer_buf;
591 QEMUIOVector local_qiov;
593 if (be32_to_cpu(footer->type) == VHD_FIXED) {
594 return bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, 0);
597 qemu_co_mutex_lock(&s->lock);
598 qemu_iovec_init(&local_qiov, qiov->niov);
600 while (bytes > 0) {
601 image_offset = get_image_offset(bs, offset, false);
602 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
604 if (image_offset == -1) {
605 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
606 } else {
607 qemu_iovec_reset(&local_qiov);
608 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
610 ret = bdrv_co_preadv(bs->file->bs, image_offset, n_bytes,
611 &local_qiov, 0);
612 if (ret < 0) {
613 goto fail;
617 bytes -= n_bytes;
618 offset += n_bytes;
619 bytes_done += n_bytes;
622 ret = 0;
623 fail:
624 qemu_iovec_destroy(&local_qiov);
625 qemu_co_mutex_unlock(&s->lock);
627 return ret;
630 static int coroutine_fn
631 vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
632 QEMUIOVector *qiov, int flags)
634 BDRVVPCState *s = bs->opaque;
635 int64_t image_offset;
636 int64_t n_bytes;
637 int64_t bytes_done = 0;
638 int ret;
639 VHDFooter *footer = (VHDFooter *) s->footer_buf;
640 QEMUIOVector local_qiov;
642 if (be32_to_cpu(footer->type) == VHD_FIXED) {
643 return bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, 0);
646 qemu_co_mutex_lock(&s->lock);
647 qemu_iovec_init(&local_qiov, qiov->niov);
649 while (bytes > 0) {
650 image_offset = get_image_offset(bs, offset, true);
651 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
653 if (image_offset == -1) {
654 image_offset = alloc_block(bs, offset);
655 if (image_offset < 0) {
656 ret = image_offset;
657 goto fail;
661 qemu_iovec_reset(&local_qiov);
662 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
664 ret = bdrv_co_pwritev(bs->file->bs, image_offset, n_bytes,
665 &local_qiov, 0);
666 if (ret < 0) {
667 goto fail;
670 bytes -= n_bytes;
671 offset += n_bytes;
672 bytes_done += n_bytes;
675 ret = 0;
676 fail:
677 qemu_iovec_destroy(&local_qiov);
678 qemu_co_mutex_unlock(&s->lock);
680 return ret;
683 static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
684 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
686 BDRVVPCState *s = bs->opaque;
687 VHDFooter *footer = (VHDFooter*) s->footer_buf;
688 int64_t start, offset;
689 bool allocated;
690 int n;
692 if (be32_to_cpu(footer->type) == VHD_FIXED) {
693 *pnum = nb_sectors;
694 *file = bs->file->bs;
695 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
696 (sector_num << BDRV_SECTOR_BITS);
699 offset = get_sector_offset(bs, sector_num, 0);
700 start = offset;
701 allocated = (offset != -1);
702 *pnum = 0;
704 do {
705 /* All sectors in a block are contiguous (without using the bitmap) */
706 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
707 - sector_num;
708 n = MIN(n, nb_sectors);
710 *pnum += n;
711 sector_num += n;
712 nb_sectors -= n;
713 /* *pnum can't be greater than one block for allocated
714 * sectors since there is always a bitmap in between. */
715 if (allocated) {
716 *file = bs->file->bs;
717 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
719 if (nb_sectors == 0) {
720 break;
722 offset = get_sector_offset(bs, sector_num, 0);
723 } while (offset == -1);
725 return 0;
729 * Calculates the number of cylinders, heads and sectors per cylinder
730 * based on a given number of sectors. This is the algorithm described
731 * in the VHD specification.
733 * Note that the geometry doesn't always exactly match total_sectors but
734 * may round it down.
736 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
737 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
738 * and instead allow up to 255 heads.
740 static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
741 uint8_t* heads, uint8_t* secs_per_cyl)
743 uint32_t cyls_times_heads;
745 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
747 if (total_sectors >= 65535LL * 16 * 63) {
748 *secs_per_cyl = 255;
749 *heads = 16;
750 cyls_times_heads = total_sectors / *secs_per_cyl;
751 } else {
752 *secs_per_cyl = 17;
753 cyls_times_heads = total_sectors / *secs_per_cyl;
754 *heads = (cyls_times_heads + 1023) / 1024;
756 if (*heads < 4) {
757 *heads = 4;
760 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
761 *secs_per_cyl = 31;
762 *heads = 16;
763 cyls_times_heads = total_sectors / *secs_per_cyl;
766 if (cyls_times_heads >= (*heads * 1024)) {
767 *secs_per_cyl = 63;
768 *heads = 16;
769 cyls_times_heads = total_sectors / *secs_per_cyl;
773 *cyls = cyls_times_heads / *heads;
775 return 0;
778 static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
779 int64_t total_sectors)
781 VHDDynDiskHeader *dyndisk_header =
782 (VHDDynDiskHeader *) buf;
783 size_t block_size, num_bat_entries;
784 int i;
785 int ret;
786 int64_t offset = 0;
788 /* Write the footer (twice: at the beginning and at the end) */
789 block_size = 0x200000;
790 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
792 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
793 if (ret < 0) {
794 goto fail;
797 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
798 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
799 if (ret < 0) {
800 goto fail;
803 /* Write the initial BAT */
804 offset = 3 * 512;
806 memset(buf, 0xFF, 512);
807 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
808 ret = blk_pwrite(blk, offset, buf, 512, 0);
809 if (ret < 0) {
810 goto fail;
812 offset += 512;
815 /* Prepare the Dynamic Disk Header */
816 memset(buf, 0, 1024);
818 memcpy(dyndisk_header->magic, "cxsparse", 8);
821 * Note: The spec is actually wrong here for data_offset, it says
822 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
824 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
825 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
826 dyndisk_header->version = cpu_to_be32(0x00010000);
827 dyndisk_header->block_size = cpu_to_be32(block_size);
828 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
830 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
832 /* Write the header */
833 offset = 512;
835 ret = blk_pwrite(blk, offset, buf, 1024, 0);
836 if (ret < 0) {
837 goto fail;
840 fail:
841 return ret;
844 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
845 int64_t total_size)
847 int ret;
849 /* Add footer to total size */
850 total_size += HEADER_SIZE;
852 ret = blk_truncate(blk, total_size);
853 if (ret < 0) {
854 return ret;
857 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
858 if (ret < 0) {
859 return ret;
862 return ret;
865 static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
867 uint8_t buf[1024];
868 VHDFooter *footer = (VHDFooter *) buf;
869 char *disk_type_param;
870 int i;
871 uint16_t cyls = 0;
872 uint8_t heads = 0;
873 uint8_t secs_per_cyl = 0;
874 int64_t total_sectors;
875 int64_t total_size;
876 int disk_type;
877 int ret = -EIO;
878 bool force_size;
879 Error *local_err = NULL;
880 BlockBackend *blk = NULL;
882 /* Read out options */
883 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
884 BDRV_SECTOR_SIZE);
885 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
886 if (disk_type_param) {
887 if (!strcmp(disk_type_param, "dynamic")) {
888 disk_type = VHD_DYNAMIC;
889 } else if (!strcmp(disk_type_param, "fixed")) {
890 disk_type = VHD_FIXED;
891 } else {
892 error_setg(errp, "Invalid disk type, %s", disk_type_param);
893 ret = -EINVAL;
894 goto out;
896 } else {
897 disk_type = VHD_DYNAMIC;
900 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
902 ret = bdrv_create_file(filename, opts, &local_err);
903 if (ret < 0) {
904 error_propagate(errp, local_err);
905 goto out;
908 blk = blk_new_open(filename, NULL, NULL,
909 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
910 if (blk == NULL) {
911 error_propagate(errp, local_err);
912 ret = -EIO;
913 goto out;
916 blk_set_allow_write_beyond_eof(blk, true);
919 * Calculate matching total_size and geometry. Increase the number of
920 * sectors requested until we get enough (or fail). This ensures that
921 * qemu-img convert doesn't truncate images, but rather rounds up.
923 * If the image size can't be represented by a spec conformant CHS geometry,
924 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
925 * the image size from the VHD footer to calculate total_sectors.
927 if (force_size) {
928 /* This will force the use of total_size for sector count, below */
929 cyls = VHD_CHS_MAX_C;
930 heads = VHD_CHS_MAX_H;
931 secs_per_cyl = VHD_CHS_MAX_S;
932 } else {
933 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
934 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
935 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
939 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
940 total_sectors = total_size / BDRV_SECTOR_SIZE;
941 /* Allow a maximum disk size of 2040 GiB */
942 if (total_sectors > VHD_MAX_SECTORS) {
943 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
944 ret = -EFBIG;
945 goto out;
947 } else {
948 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
949 total_size = total_sectors * BDRV_SECTOR_SIZE;
952 /* Prepare the Hard Disk Footer */
953 memset(buf, 0, 1024);
955 memcpy(footer->creator, "conectix", 8);
956 if (force_size) {
957 memcpy(footer->creator_app, "qem2", 4);
958 } else {
959 memcpy(footer->creator_app, "qemu", 4);
961 memcpy(footer->creator_os, "Wi2k", 4);
963 footer->features = cpu_to_be32(0x02);
964 footer->version = cpu_to_be32(0x00010000);
965 if (disk_type == VHD_DYNAMIC) {
966 footer->data_offset = cpu_to_be64(HEADER_SIZE);
967 } else {
968 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
970 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
972 /* Version of Virtual PC 2007 */
973 footer->major = cpu_to_be16(0x0005);
974 footer->minor = cpu_to_be16(0x0003);
975 footer->orig_size = cpu_to_be64(total_size);
976 footer->current_size = cpu_to_be64(total_size);
977 footer->cyls = cpu_to_be16(cyls);
978 footer->heads = heads;
979 footer->secs_per_cyl = secs_per_cyl;
981 footer->type = cpu_to_be32(disk_type);
983 #if defined(CONFIG_UUID)
984 uuid_generate(footer->uuid);
985 #endif
987 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
989 if (disk_type == VHD_DYNAMIC) {
990 ret = create_dynamic_disk(blk, buf, total_sectors);
991 } else {
992 ret = create_fixed_disk(blk, buf, total_size);
994 if (ret < 0) {
995 error_setg(errp, "Unable to create or write VHD header");
998 out:
999 blk_unref(blk);
1000 g_free(disk_type_param);
1001 return ret;
1004 static int vpc_has_zero_init(BlockDriverState *bs)
1006 BDRVVPCState *s = bs->opaque;
1007 VHDFooter *footer = (VHDFooter *) s->footer_buf;
1009 if (be32_to_cpu(footer->type) == VHD_FIXED) {
1010 return bdrv_has_zero_init(bs->file->bs);
1011 } else {
1012 return 1;
1016 static void vpc_close(BlockDriverState *bs)
1018 BDRVVPCState *s = bs->opaque;
1019 qemu_vfree(s->pagetable);
1020 #ifdef CACHE
1021 g_free(s->pageentry_u8);
1022 #endif
1024 migrate_del_blocker(s->migration_blocker);
1025 error_free(s->migration_blocker);
1028 static QemuOptsList vpc_create_opts = {
1029 .name = "vpc-create-opts",
1030 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1031 .desc = {
1033 .name = BLOCK_OPT_SIZE,
1034 .type = QEMU_OPT_SIZE,
1035 .help = "Virtual disk size"
1038 .name = BLOCK_OPT_SUBFMT,
1039 .type = QEMU_OPT_STRING,
1040 .help =
1041 "Type of virtual hard disk format. Supported formats are "
1042 "{dynamic (default) | fixed} "
1045 .name = VPC_OPT_FORCE_SIZE,
1046 .type = QEMU_OPT_BOOL,
1047 .help = "Force disk size calculation to use the actual size "
1048 "specified, rather than using the nearest CHS-based "
1049 "calculation"
1051 { /* end of list */ }
1055 static BlockDriver bdrv_vpc = {
1056 .format_name = "vpc",
1057 .instance_size = sizeof(BDRVVPCState),
1059 .bdrv_probe = vpc_probe,
1060 .bdrv_open = vpc_open,
1061 .bdrv_close = vpc_close,
1062 .bdrv_reopen_prepare = vpc_reopen_prepare,
1063 .bdrv_create = vpc_create,
1065 .bdrv_co_preadv = vpc_co_preadv,
1066 .bdrv_co_pwritev = vpc_co_pwritev,
1067 .bdrv_co_get_block_status = vpc_co_get_block_status,
1069 .bdrv_get_info = vpc_get_info,
1071 .create_opts = &vpc_create_opts,
1072 .bdrv_has_zero_init = vpc_has_zero_init,
1075 static void bdrv_vpc_init(void)
1077 bdrv_register(&bdrv_vpc);
1080 block_init(bdrv_vpc_init);