Allow use of pc machine type (accel=xen) for Xen HVM domains.
[qemu.git] / block / vhdx.c
blobe9704b1fdc1c119ea52280b3080bd6c729c63b9e
1 /*
2 * Block driver for Hyper-V VHDX Images
4 * Copyright (c) 2013 Red Hat, Inc.,
6 * Authors:
7 * Jeff Cody <jcody@redhat.com>
9 * This is based on the "VHDX Format Specification v0.95", published 4/12/2012
10 * by Microsoft:
11 * https://www.microsoft.com/en-us/download/details.aspx?id=29681
13 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14 * See the COPYING.LIB file in the top-level directory.
18 #include "qemu-common.h"
19 #include "block/block_int.h"
20 #include "qemu/module.h"
21 #include "qemu/crc32c.h"
22 #include "block/vhdx.h"
25 /* Several metadata and region table data entries are identified by
26 * guids in a MS-specific GUID format. */
29 /* ------- Known Region Table GUIDs ---------------------- */
30 static const MSGUID bat_guid = { .data1 = 0x2dc27766,
31 .data2 = 0xf623,
32 .data3 = 0x4200,
33 .data4 = { 0x9d, 0x64, 0x11, 0x5e,
34 0x9b, 0xfd, 0x4a, 0x08} };
36 static const MSGUID metadata_guid = { .data1 = 0x8b7ca206,
37 .data2 = 0x4790,
38 .data3 = 0x4b9a,
39 .data4 = { 0xb8, 0xfe, 0x57, 0x5f,
40 0x05, 0x0f, 0x88, 0x6e} };
44 /* ------- Known Metadata Entry GUIDs ---------------------- */
45 static const MSGUID file_param_guid = { .data1 = 0xcaa16737,
46 .data2 = 0xfa36,
47 .data3 = 0x4d43,
48 .data4 = { 0xb3, 0xb6, 0x33, 0xf0,
49 0xaa, 0x44, 0xe7, 0x6b} };
51 static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224,
52 .data2 = 0xcd1b,
53 .data3 = 0x4876,
54 .data4 = { 0xb2, 0x11, 0x5d, 0xbe,
55 0xd8, 0x3b, 0xf4, 0xb8} };
57 static const MSGUID page83_guid = { .data1 = 0xbeca12ab,
58 .data2 = 0xb2e6,
59 .data3 = 0x4523,
60 .data4 = { 0x93, 0xef, 0xc3, 0x09,
61 0xe0, 0x00, 0xc7, 0x46} };
64 static const MSGUID phys_sector_guid = { .data1 = 0xcda348c7,
65 .data2 = 0x445d,
66 .data3 = 0x4471,
67 .data4 = { 0x9c, 0xc9, 0xe9, 0x88,
68 0x52, 0x51, 0xc5, 0x56} };
70 static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d,
71 .data2 = 0xb30b,
72 .data3 = 0x454d,
73 .data4 = { 0xab, 0xf7, 0xd3,
74 0xd8, 0x48, 0x34,
75 0xab, 0x0c} };
77 static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
78 .data2 = 0xa96f,
79 .data3 = 0x4709,
80 .data4 = { 0xba, 0x47, 0xf2,
81 0x33, 0xa8, 0xfa,
82 0xab, 0x5f} };
84 /* Each parent type must have a valid GUID; this is for parent images
85 * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
86 * need to make up our own QCOW2 GUID type */
87 static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
88 .data2 = 0xd19e,
89 .data3 = 0x4a81,
90 .data4 = { 0xb7, 0x89, 0x25, 0xb8,
91 0xe9, 0x44, 0x59, 0x13} };
94 #define META_FILE_PARAMETER_PRESENT 0x01
95 #define META_VIRTUAL_DISK_SIZE_PRESENT 0x02
96 #define META_PAGE_83_PRESENT 0x04
97 #define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
98 #define META_PHYS_SECTOR_SIZE_PRESENT 0x10
99 #define META_PARENT_LOCATOR_PRESENT 0x20
101 #define META_ALL_PRESENT \
102 (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
103 META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
104 META_PHYS_SECTOR_SIZE_PRESENT)
106 typedef struct VHDXMetadataEntries {
107 VHDXMetadataTableEntry file_parameters_entry;
108 VHDXMetadataTableEntry virtual_disk_size_entry;
109 VHDXMetadataTableEntry page83_data_entry;
110 VHDXMetadataTableEntry logical_sector_size_entry;
111 VHDXMetadataTableEntry phys_sector_size_entry;
112 VHDXMetadataTableEntry parent_locator_entry;
113 uint16_t present;
114 } VHDXMetadataEntries;
117 typedef struct VHDXSectorInfo {
118 uint32_t bat_idx; /* BAT entry index */
119 uint32_t sectors_avail; /* sectors available in payload block */
120 uint32_t bytes_left; /* bytes left in the block after data to r/w */
121 uint32_t bytes_avail; /* bytes available in payload block */
122 uint64_t file_offset; /* absolute offset in bytes, in file */
123 uint64_t block_offset; /* block offset, in bytes */
124 } VHDXSectorInfo;
128 typedef struct BDRVVHDXState {
129 CoMutex lock;
131 int curr_header;
132 VHDXHeader *headers[2];
134 VHDXRegionTableHeader rt;
135 VHDXRegionTableEntry bat_rt; /* region table for the BAT */
136 VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
138 VHDXMetadataTableHeader metadata_hdr;
139 VHDXMetadataEntries metadata_entries;
141 VHDXFileParameters params;
142 uint32_t block_size;
143 uint32_t block_size_bits;
144 uint32_t sectors_per_block;
145 uint32_t sectors_per_block_bits;
147 uint64_t virtual_disk_size;
148 uint32_t logical_sector_size;
149 uint32_t physical_sector_size;
151 uint64_t chunk_ratio;
152 uint32_t chunk_ratio_bits;
153 uint32_t logical_sector_size_bits;
155 uint32_t bat_entries;
156 VHDXBatEntry *bat;
157 uint64_t bat_offset;
159 VHDXParentLocatorHeader parent_header;
160 VHDXParentLocatorEntry *parent_entries;
162 } BDRVVHDXState;
164 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
165 int crc_offset)
167 uint32_t crc_new;
168 uint32_t crc_orig;
169 assert(buf != NULL);
171 if (crc_offset > 0) {
172 memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
173 memset(buf + crc_offset, 0, sizeof(crc_orig));
176 crc_new = crc32c(crc, buf, size);
177 if (crc_offset > 0) {
178 memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig));
181 return crc_new;
184 /* Validates the checksum of the buffer, with an in-place CRC.
186 * Zero is substituted during crc calculation for the original crc field,
187 * and the crc field is restored afterwards. But the buffer will be modifed
188 * during the calculation, so this may not be not suitable for multi-threaded
189 * use.
191 * crc_offset: byte offset in buf of the buffer crc
192 * buf: buffer pointer
193 * size: size of buffer (must be > crc_offset+4)
195 * returns true if checksum is valid, false otherwise
197 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
199 uint32_t crc_orig;
200 uint32_t crc;
202 assert(buf != NULL);
203 assert(size > (crc_offset + 4));
205 memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
206 crc_orig = le32_to_cpu(crc_orig);
208 crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
210 return crc == crc_orig;
215 * Per the MS VHDX Specification, for every VHDX file:
216 * - The header section is fixed size - 1 MB
217 * - The header section is always the first "object"
218 * - The first 64KB of the header is the File Identifier
219 * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
220 * - The following 512 bytes constitute a UTF-16 string identifiying the
221 * software that created the file, and is optional and diagnostic only.
223 * Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
225 static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
227 if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
228 return 100;
230 return 0;
233 /* All VHDX structures on disk are little endian */
234 static void vhdx_header_le_import(VHDXHeader *h)
236 assert(h != NULL);
238 le32_to_cpus(&h->signature);
239 le32_to_cpus(&h->checksum);
240 le64_to_cpus(&h->sequence_number);
242 leguid_to_cpus(&h->file_write_guid);
243 leguid_to_cpus(&h->data_write_guid);
244 leguid_to_cpus(&h->log_guid);
246 le16_to_cpus(&h->log_version);
247 le16_to_cpus(&h->version);
248 le32_to_cpus(&h->log_length);
249 le64_to_cpus(&h->log_offset);
253 /* opens the specified header block from the VHDX file header section */
254 static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
256 int ret = 0;
257 VHDXHeader *header1;
258 VHDXHeader *header2;
259 bool h1_valid = false;
260 bool h2_valid = false;
261 uint64_t h1_seq = 0;
262 uint64_t h2_seq = 0;
263 uint8_t *buffer;
265 header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
266 header2 = qemu_blockalign(bs, sizeof(VHDXHeader));
268 buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
270 s->headers[0] = header1;
271 s->headers[1] = header2;
273 /* We have to read the whole VHDX_HEADER_SIZE instead of
274 * sizeof(VHDXHeader), because the checksum is over the whole
275 * region */
276 ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
277 if (ret < 0) {
278 goto fail;
280 /* copy over just the relevant portion that we need */
281 memcpy(header1, buffer, sizeof(VHDXHeader));
282 vhdx_header_le_import(header1);
284 if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
285 !memcmp(&header1->signature, "head", 4) &&
286 header1->version == 1) {
287 h1_seq = header1->sequence_number;
288 h1_valid = true;
291 ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
292 if (ret < 0) {
293 goto fail;
295 /* copy over just the relevant portion that we need */
296 memcpy(header2, buffer, sizeof(VHDXHeader));
297 vhdx_header_le_import(header2);
299 if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
300 !memcmp(&header2->signature, "head", 4) &&
301 header2->version == 1) {
302 h2_seq = header2->sequence_number;
303 h2_valid = true;
306 /* If there is only 1 valid header (or no valid headers), we
307 * don't care what the sequence numbers are */
308 if (h1_valid && !h2_valid) {
309 s->curr_header = 0;
310 } else if (!h1_valid && h2_valid) {
311 s->curr_header = 1;
312 } else if (!h1_valid && !h2_valid) {
313 ret = -EINVAL;
314 goto fail;
315 } else {
316 /* If both headers are valid, then we choose the active one by the
317 * highest sequence number. If the sequence numbers are equal, that is
318 * invalid */
319 if (h1_seq > h2_seq) {
320 s->curr_header = 0;
321 } else if (h2_seq > h1_seq) {
322 s->curr_header = 1;
323 } else {
324 ret = -EINVAL;
325 goto fail;
329 ret = 0;
331 goto exit;
333 fail:
334 qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found");
335 qemu_vfree(header1);
336 qemu_vfree(header2);
337 s->headers[0] = NULL;
338 s->headers[1] = NULL;
339 exit:
340 qemu_vfree(buffer);
341 return ret;
345 static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
347 int ret = 0;
348 uint8_t *buffer;
349 int offset = 0;
350 VHDXRegionTableEntry rt_entry;
351 uint32_t i;
352 bool bat_rt_found = false;
353 bool metadata_rt_found = false;
355 /* We have to read the whole 64KB block, because the crc32 is over the
356 * whole block */
357 buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
359 ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
360 VHDX_HEADER_BLOCK_SIZE);
361 if (ret < 0) {
362 goto fail;
364 memcpy(&s->rt, buffer, sizeof(s->rt));
365 le32_to_cpus(&s->rt.signature);
366 le32_to_cpus(&s->rt.checksum);
367 le32_to_cpus(&s->rt.entry_count);
368 le32_to_cpus(&s->rt.reserved);
369 offset += sizeof(s->rt);
371 if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
372 memcmp(&s->rt.signature, "regi", 4)) {
373 ret = -EINVAL;
374 goto fail;
377 /* Per spec, maximum region table entry count is 2047 */
378 if (s->rt.entry_count > 2047) {
379 ret = -EINVAL;
380 goto fail;
383 for (i = 0; i < s->rt.entry_count; i++) {
384 memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
385 offset += sizeof(rt_entry);
387 leguid_to_cpus(&rt_entry.guid);
388 le64_to_cpus(&rt_entry.file_offset);
389 le32_to_cpus(&rt_entry.length);
390 le32_to_cpus(&rt_entry.data_bits);
392 /* see if we recognize the entry */
393 if (guid_eq(rt_entry.guid, bat_guid)) {
394 /* must be unique; if we have already found it this is invalid */
395 if (bat_rt_found) {
396 ret = -EINVAL;
397 goto fail;
399 bat_rt_found = true;
400 s->bat_rt = rt_entry;
401 continue;
404 if (guid_eq(rt_entry.guid, metadata_guid)) {
405 /* must be unique; if we have already found it this is invalid */
406 if (metadata_rt_found) {
407 ret = -EINVAL;
408 goto fail;
410 metadata_rt_found = true;
411 s->metadata_rt = rt_entry;
412 continue;
415 if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
416 /* cannot read vhdx file - required region table entry that
417 * we do not understand. per spec, we must fail to open */
418 ret = -ENOTSUP;
419 goto fail;
422 ret = 0;
424 fail:
425 qemu_vfree(buffer);
426 return ret;
431 /* Metadata initial parser
433 * This loads all the metadata entry fields. This may cause additional
434 * fields to be processed (e.g. parent locator, etc..).
436 * There are 5 Metadata items that are always required:
437 * - File Parameters (block size, has a parent)
438 * - Virtual Disk Size (size, in bytes, of the virtual drive)
439 * - Page 83 Data (scsi page 83 guid)
440 * - Logical Sector Size (logical sector size in bytes, either 512 or
441 * 4096. We only support 512 currently)
442 * - Physical Sector Size (512 or 4096)
444 * Also, if the File Parameters indicate this is a differencing file,
445 * we must also look for the Parent Locator metadata item.
447 static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
449 int ret = 0;
450 uint8_t *buffer;
451 int offset = 0;
452 uint32_t i = 0;
453 VHDXMetadataTableEntry md_entry;
455 buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
457 ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
458 VHDX_METADATA_TABLE_MAX_SIZE);
459 if (ret < 0) {
460 goto exit;
462 memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
463 offset += sizeof(s->metadata_hdr);
465 le64_to_cpus(&s->metadata_hdr.signature);
466 le16_to_cpus(&s->metadata_hdr.reserved);
467 le16_to_cpus(&s->metadata_hdr.entry_count);
469 if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
470 ret = -EINVAL;
471 goto exit;
474 s->metadata_entries.present = 0;
476 if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
477 (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
478 ret = -EINVAL;
479 goto exit;
482 for (i = 0; i < s->metadata_hdr.entry_count; i++) {
483 memcpy(&md_entry, buffer + offset, sizeof(md_entry));
484 offset += sizeof(md_entry);
486 leguid_to_cpus(&md_entry.item_id);
487 le32_to_cpus(&md_entry.offset);
488 le32_to_cpus(&md_entry.length);
489 le32_to_cpus(&md_entry.data_bits);
490 le32_to_cpus(&md_entry.reserved2);
492 if (guid_eq(md_entry.item_id, file_param_guid)) {
493 if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
494 ret = -EINVAL;
495 goto exit;
497 s->metadata_entries.file_parameters_entry = md_entry;
498 s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
499 continue;
502 if (guid_eq(md_entry.item_id, virtual_size_guid)) {
503 if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) {
504 ret = -EINVAL;
505 goto exit;
507 s->metadata_entries.virtual_disk_size_entry = md_entry;
508 s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
509 continue;
512 if (guid_eq(md_entry.item_id, page83_guid)) {
513 if (s->metadata_entries.present & META_PAGE_83_PRESENT) {
514 ret = -EINVAL;
515 goto exit;
517 s->metadata_entries.page83_data_entry = md_entry;
518 s->metadata_entries.present |= META_PAGE_83_PRESENT;
519 continue;
522 if (guid_eq(md_entry.item_id, logical_sector_guid)) {
523 if (s->metadata_entries.present &
524 META_LOGICAL_SECTOR_SIZE_PRESENT) {
525 ret = -EINVAL;
526 goto exit;
528 s->metadata_entries.logical_sector_size_entry = md_entry;
529 s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
530 continue;
533 if (guid_eq(md_entry.item_id, phys_sector_guid)) {
534 if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) {
535 ret = -EINVAL;
536 goto exit;
538 s->metadata_entries.phys_sector_size_entry = md_entry;
539 s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
540 continue;
543 if (guid_eq(md_entry.item_id, parent_locator_guid)) {
544 if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
545 ret = -EINVAL;
546 goto exit;
548 s->metadata_entries.parent_locator_entry = md_entry;
549 s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
550 continue;
553 if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
554 /* cannot read vhdx file - required region table entry that
555 * we do not understand. per spec, we must fail to open */
556 ret = -ENOTSUP;
557 goto exit;
561 if (s->metadata_entries.present != META_ALL_PRESENT) {
562 ret = -ENOTSUP;
563 goto exit;
566 ret = bdrv_pread(bs->file,
567 s->metadata_entries.file_parameters_entry.offset
568 + s->metadata_rt.file_offset,
569 &s->params,
570 sizeof(s->params));
572 if (ret < 0) {
573 goto exit;
576 le32_to_cpus(&s->params.block_size);
577 le32_to_cpus(&s->params.data_bits);
580 /* We now have the file parameters, so we can tell if this is a
581 * differencing file (i.e.. has_parent), is dynamic or fixed
582 * sized (leave_blocks_allocated), and the block size */
584 /* The parent locator required iff the file parameters has_parent set */
585 if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
586 if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
587 /* TODO: parse parent locator fields */
588 ret = -ENOTSUP; /* temp, until differencing files are supported */
589 goto exit;
590 } else {
591 /* if has_parent is set, but there is not parent locator present,
592 * then that is an invalid combination */
593 ret = -EINVAL;
594 goto exit;
598 /* determine virtual disk size, logical sector size,
599 * and phys sector size */
601 ret = bdrv_pread(bs->file,
602 s->metadata_entries.virtual_disk_size_entry.offset
603 + s->metadata_rt.file_offset,
604 &s->virtual_disk_size,
605 sizeof(uint64_t));
606 if (ret < 0) {
607 goto exit;
609 ret = bdrv_pread(bs->file,
610 s->metadata_entries.logical_sector_size_entry.offset
611 + s->metadata_rt.file_offset,
612 &s->logical_sector_size,
613 sizeof(uint32_t));
614 if (ret < 0) {
615 goto exit;
617 ret = bdrv_pread(bs->file,
618 s->metadata_entries.phys_sector_size_entry.offset
619 + s->metadata_rt.file_offset,
620 &s->physical_sector_size,
621 sizeof(uint32_t));
622 if (ret < 0) {
623 goto exit;
626 le64_to_cpus(&s->virtual_disk_size);
627 le32_to_cpus(&s->logical_sector_size);
628 le32_to_cpus(&s->physical_sector_size);
630 if (s->logical_sector_size == 0 || s->params.block_size == 0) {
631 ret = -EINVAL;
632 goto exit;
635 /* both block_size and sector_size are guaranteed powers of 2 */
636 s->sectors_per_block = s->params.block_size / s->logical_sector_size;
637 s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
638 (uint64_t)s->logical_sector_size /
639 (uint64_t)s->params.block_size;
641 /* These values are ones we will want to use for division / multiplication
642 * later on, and they are all guaranteed (per the spec) to be powers of 2,
643 * so we can take advantage of that for shift operations during
644 * reads/writes */
645 if (s->logical_sector_size & (s->logical_sector_size - 1)) {
646 ret = -EINVAL;
647 goto exit;
649 if (s->sectors_per_block & (s->sectors_per_block - 1)) {
650 ret = -EINVAL;
651 goto exit;
653 if (s->chunk_ratio & (s->chunk_ratio - 1)) {
654 ret = -EINVAL;
655 goto exit;
657 s->block_size = s->params.block_size;
658 if (s->block_size & (s->block_size - 1)) {
659 ret = -EINVAL;
660 goto exit;
663 s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
664 s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
665 s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
666 s->block_size_bits = 31 - clz32(s->block_size);
668 ret = 0;
670 exit:
671 qemu_vfree(buffer);
672 return ret;
675 /* Parse the replay log. Per the VHDX spec, if the log is present
676 * it must be replayed prior to opening the file, even read-only.
678 * If read-only, we must replay the log in RAM (or refuse to open
679 * a dirty VHDX file read-only */
680 static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s)
682 int ret = 0;
683 int i;
684 VHDXHeader *hdr;
686 hdr = s->headers[s->curr_header];
688 /* either the log guid, or log length is zero,
689 * then a replay log is present */
690 for (i = 0; i < sizeof(hdr->log_guid.data4); i++) {
691 ret |= hdr->log_guid.data4[i];
693 if (hdr->log_guid.data1 == 0 &&
694 hdr->log_guid.data2 == 0 &&
695 hdr->log_guid.data3 == 0 &&
696 ret == 0) {
697 goto exit;
700 /* per spec, only log version of 0 is supported */
701 if (hdr->log_version != 0) {
702 ret = -EINVAL;
703 goto exit;
706 if (hdr->log_length == 0) {
707 goto exit;
710 /* We currently do not support images with logs to replay */
711 ret = -ENOTSUP;
713 exit:
714 return ret;
718 static int vhdx_open(BlockDriverState *bs, QDict *options, int flags)
720 BDRVVHDXState *s = bs->opaque;
721 int ret = 0;
722 uint32_t i;
723 uint64_t signature;
724 uint32_t data_blocks_cnt, bitmap_blocks_cnt;
727 s->bat = NULL;
729 qemu_co_mutex_init(&s->lock);
731 /* validate the file signature */
732 ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
733 if (ret < 0) {
734 goto fail;
736 if (memcmp(&signature, "vhdxfile", 8)) {
737 ret = -EINVAL;
738 goto fail;
741 ret = vhdx_parse_header(bs, s);
742 if (ret) {
743 goto fail;
746 ret = vhdx_parse_log(bs, s);
747 if (ret) {
748 goto fail;
751 ret = vhdx_open_region_tables(bs, s);
752 if (ret) {
753 goto fail;
756 ret = vhdx_parse_metadata(bs, s);
757 if (ret) {
758 goto fail;
760 s->block_size = s->params.block_size;
762 /* the VHDX spec dictates that virtual_disk_size is always a multiple of
763 * logical_sector_size */
764 bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
766 data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
767 if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
768 data_blocks_cnt++;
770 bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
771 if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
772 bitmap_blocks_cnt++;
775 if (s->parent_entries) {
776 s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
777 } else {
778 s->bat_entries = data_blocks_cnt +
779 ((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
782 s->bat_offset = s->bat_rt.file_offset;
784 if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) {
785 /* BAT allocation is not large enough for all entries */
786 ret = -EINVAL;
787 goto fail;
790 s->bat = qemu_blockalign(bs, s->bat_rt.length);
792 ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
793 if (ret < 0) {
794 goto fail;
797 for (i = 0; i < s->bat_entries; i++) {
798 le64_to_cpus(&s->bat[i]);
801 if (flags & BDRV_O_RDWR) {
802 ret = -ENOTSUP;
803 goto fail;
806 /* TODO: differencing files, write */
808 return 0;
809 fail:
810 qemu_vfree(s->headers[0]);
811 qemu_vfree(s->headers[1]);
812 qemu_vfree(s->bat);
813 qemu_vfree(s->parent_entries);
814 return ret;
817 static int vhdx_reopen_prepare(BDRVReopenState *state,
818 BlockReopenQueue *queue, Error **errp)
820 return 0;
825 * Perform sector to block offset translations, to get various
826 * sector and file offsets into the image. See VHDXSectorInfo
828 static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num,
829 int nb_sectors, VHDXSectorInfo *sinfo)
831 uint32_t block_offset;
833 sinfo->bat_idx = sector_num >> s->sectors_per_block_bits;
834 /* effectively a modulo - this gives us the offset into the block
835 * (in sector sizes) for our sector number */
836 block_offset = sector_num - (sinfo->bat_idx << s->sectors_per_block_bits);
837 /* the chunk ratio gives us the interleaving of the sector
838 * bitmaps, so we need to advance our page block index by the
839 * sector bitmaps entry number */
840 sinfo->bat_idx += sinfo->bat_idx >> s->chunk_ratio_bits;
842 /* the number of sectors we can read/write in this cycle */
843 sinfo->sectors_avail = s->sectors_per_block - block_offset;
845 sinfo->bytes_left = sinfo->sectors_avail << s->logical_sector_size_bits;
847 if (sinfo->sectors_avail > nb_sectors) {
848 sinfo->sectors_avail = nb_sectors;
851 sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits;
853 sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS;
855 sinfo->block_offset = block_offset << s->logical_sector_size_bits;
857 /* The file offset must be past the header section, so must be > 0 */
858 if (sinfo->file_offset == 0) {
859 return;
862 /* block offset is the offset in vhdx logical sectors, in
863 * the payload data block. Convert that to a byte offset
864 * in the block, and add in the payload data block offset
865 * in the file, in bytes, to get the final read address */
867 sinfo->file_offset <<= 20; /* now in bytes, rather than 1MB units */
868 sinfo->file_offset += sinfo->block_offset;
873 static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
874 int nb_sectors, QEMUIOVector *qiov)
876 BDRVVHDXState *s = bs->opaque;
877 int ret = 0;
878 VHDXSectorInfo sinfo;
879 uint64_t bytes_done = 0;
880 QEMUIOVector hd_qiov;
882 qemu_iovec_init(&hd_qiov, qiov->niov);
884 qemu_co_mutex_lock(&s->lock);
886 while (nb_sectors > 0) {
887 /* We are a differencing file, so we need to inspect the sector bitmap
888 * to see if we have the data or not */
889 if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
890 /* not supported yet */
891 ret = -ENOTSUP;
892 goto exit;
893 } else {
894 vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
896 qemu_iovec_reset(&hd_qiov);
897 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail);
899 /* check the payload block state */
900 switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) {
901 case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
902 case PAYLOAD_BLOCK_UNDEFINED: /* fall through */
903 case PAYLOAD_BLOCK_UNMAPPED: /* fall through */
904 case PAYLOAD_BLOCK_ZERO:
905 /* return zero */
906 qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
907 break;
908 case PAYLOAD_BLOCK_FULL_PRESENT:
909 qemu_co_mutex_unlock(&s->lock);
910 ret = bdrv_co_readv(bs->file,
911 sinfo.file_offset >> BDRV_SECTOR_BITS,
912 sinfo.sectors_avail, &hd_qiov);
913 qemu_co_mutex_lock(&s->lock);
914 if (ret < 0) {
915 goto exit;
917 break;
918 case PAYLOAD_BLOCK_PARTIALLY_PRESENT:
919 /* we don't yet support difference files, fall through
920 * to error */
921 default:
922 ret = -EIO;
923 goto exit;
924 break;
926 nb_sectors -= sinfo.sectors_avail;
927 sector_num += sinfo.sectors_avail;
928 bytes_done += sinfo.bytes_avail;
931 ret = 0;
932 exit:
933 qemu_co_mutex_unlock(&s->lock);
934 qemu_iovec_destroy(&hd_qiov);
935 return ret;
940 static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
941 int nb_sectors, QEMUIOVector *qiov)
943 return -ENOTSUP;
947 static void vhdx_close(BlockDriverState *bs)
949 BDRVVHDXState *s = bs->opaque;
950 qemu_vfree(s->headers[0]);
951 qemu_vfree(s->headers[1]);
952 qemu_vfree(s->bat);
953 qemu_vfree(s->parent_entries);
956 static BlockDriver bdrv_vhdx = {
957 .format_name = "vhdx",
958 .instance_size = sizeof(BDRVVHDXState),
959 .bdrv_probe = vhdx_probe,
960 .bdrv_open = vhdx_open,
961 .bdrv_close = vhdx_close,
962 .bdrv_reopen_prepare = vhdx_reopen_prepare,
963 .bdrv_co_readv = vhdx_co_readv,
964 .bdrv_co_writev = vhdx_co_writev,
967 static void bdrv_vhdx_init(void)
969 bdrv_register(&bdrv_vhdx);
972 block_init(bdrv_vhdx_init);