2 * Block driver for Hyper-V VHDX Images
4 * Copyright (c) 2013 Red Hat, Inc.,
7 * Jeff Cody <jcody@redhat.com>
9 * This is based on the "VHDX Format Specification v0.95", published 4/12/2012
11 * https://www.microsoft.com/en-us/download/details.aspx?id=29681
13 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14 * See the COPYING.LIB file in the top-level directory.
18 #include "qemu-common.h"
19 #include "block/block_int.h"
20 #include "qemu/module.h"
21 #include "qemu/crc32c.h"
22 #include "block/vhdx.h"
23 #include "migration/migration.h"
26 /* Several metadata and region table data entries are identified by
27 * guids in a MS-specific GUID format. */
30 /* ------- Known Region Table GUIDs ---------------------- */
31 static const MSGUID bat_guid
= { .data1
= 0x2dc27766,
34 .data4
= { 0x9d, 0x64, 0x11, 0x5e,
35 0x9b, 0xfd, 0x4a, 0x08} };
37 static const MSGUID metadata_guid
= { .data1
= 0x8b7ca206,
40 .data4
= { 0xb8, 0xfe, 0x57, 0x5f,
41 0x05, 0x0f, 0x88, 0x6e} };
45 /* ------- Known Metadata Entry GUIDs ---------------------- */
46 static const MSGUID file_param_guid
= { .data1
= 0xcaa16737,
49 .data4
= { 0xb3, 0xb6, 0x33, 0xf0,
50 0xaa, 0x44, 0xe7, 0x6b} };
52 static const MSGUID virtual_size_guid
= { .data1
= 0x2FA54224,
55 .data4
= { 0xb2, 0x11, 0x5d, 0xbe,
56 0xd8, 0x3b, 0xf4, 0xb8} };
58 static const MSGUID page83_guid
= { .data1
= 0xbeca12ab,
61 .data4
= { 0x93, 0xef, 0xc3, 0x09,
62 0xe0, 0x00, 0xc7, 0x46} };
65 static const MSGUID phys_sector_guid
= { .data1
= 0xcda348c7,
68 .data4
= { 0x9c, 0xc9, 0xe9, 0x88,
69 0x52, 0x51, 0xc5, 0x56} };
71 static const MSGUID parent_locator_guid
= { .data1
= 0xa8d35f2d,
74 .data4
= { 0xab, 0xf7, 0xd3,
78 static const MSGUID logical_sector_guid
= { .data1
= 0x8141bf1d,
81 .data4
= { 0xba, 0x47, 0xf2,
85 /* Each parent type must have a valid GUID; this is for parent images
86 * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
87 * need to make up our own QCOW2 GUID type */
88 static const MSGUID parent_vhdx_guid
= { .data1
= 0xb04aefb7,
91 .data4
= { 0xb7, 0x89, 0x25, 0xb8,
92 0xe9, 0x44, 0x59, 0x13} };
95 #define META_FILE_PARAMETER_PRESENT 0x01
96 #define META_VIRTUAL_DISK_SIZE_PRESENT 0x02
97 #define META_PAGE_83_PRESENT 0x04
98 #define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
99 #define META_PHYS_SECTOR_SIZE_PRESENT 0x10
100 #define META_PARENT_LOCATOR_PRESENT 0x20
102 #define META_ALL_PRESENT \
103 (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
104 META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
105 META_PHYS_SECTOR_SIZE_PRESENT)
107 typedef struct VHDXMetadataEntries
{
108 VHDXMetadataTableEntry file_parameters_entry
;
109 VHDXMetadataTableEntry virtual_disk_size_entry
;
110 VHDXMetadataTableEntry page83_data_entry
;
111 VHDXMetadataTableEntry logical_sector_size_entry
;
112 VHDXMetadataTableEntry phys_sector_size_entry
;
113 VHDXMetadataTableEntry parent_locator_entry
;
115 } VHDXMetadataEntries
;
118 typedef struct VHDXSectorInfo
{
119 uint32_t bat_idx
; /* BAT entry index */
120 uint32_t sectors_avail
; /* sectors available in payload block */
121 uint32_t bytes_left
; /* bytes left in the block after data to r/w */
122 uint32_t bytes_avail
; /* bytes available in payload block */
123 uint64_t file_offset
; /* absolute offset in bytes, in file */
124 uint64_t block_offset
; /* block offset, in bytes */
129 typedef struct BDRVVHDXState
{
133 VHDXHeader
*headers
[2];
135 VHDXRegionTableHeader rt
;
136 VHDXRegionTableEntry bat_rt
; /* region table for the BAT */
137 VHDXRegionTableEntry metadata_rt
; /* region table for the metadata */
139 VHDXMetadataTableHeader metadata_hdr
;
140 VHDXMetadataEntries metadata_entries
;
142 VHDXFileParameters params
;
144 uint32_t block_size_bits
;
145 uint32_t sectors_per_block
;
146 uint32_t sectors_per_block_bits
;
148 uint64_t virtual_disk_size
;
149 uint32_t logical_sector_size
;
150 uint32_t physical_sector_size
;
152 uint64_t chunk_ratio
;
153 uint32_t chunk_ratio_bits
;
154 uint32_t logical_sector_size_bits
;
156 uint32_t bat_entries
;
160 VHDXParentLocatorHeader parent_header
;
161 VHDXParentLocatorEntry
*parent_entries
;
163 Error
*migration_blocker
;
166 uint32_t vhdx_checksum_calc(uint32_t crc
, uint8_t *buf
, size_t size
,
173 if (crc_offset
> 0) {
174 memcpy(&crc_orig
, buf
+ crc_offset
, sizeof(crc_orig
));
175 memset(buf
+ crc_offset
, 0, sizeof(crc_orig
));
178 crc_new
= crc32c(crc
, buf
, size
);
179 if (crc_offset
> 0) {
180 memcpy(buf
+ crc_offset
, &crc_orig
, sizeof(crc_orig
));
186 /* Validates the checksum of the buffer, with an in-place CRC.
188 * Zero is substituted during crc calculation for the original crc field,
189 * and the crc field is restored afterwards. But the buffer will be modifed
190 * during the calculation, so this may not be not suitable for multi-threaded
193 * crc_offset: byte offset in buf of the buffer crc
194 * buf: buffer pointer
195 * size: size of buffer (must be > crc_offset+4)
197 * returns true if checksum is valid, false otherwise
199 bool vhdx_checksum_is_valid(uint8_t *buf
, size_t size
, int crc_offset
)
205 assert(size
> (crc_offset
+ 4));
207 memcpy(&crc_orig
, buf
+ crc_offset
, sizeof(crc_orig
));
208 crc_orig
= le32_to_cpu(crc_orig
);
210 crc
= vhdx_checksum_calc(0xffffffff, buf
, size
, crc_offset
);
212 return crc
== crc_orig
;
217 * Per the MS VHDX Specification, for every VHDX file:
218 * - The header section is fixed size - 1 MB
219 * - The header section is always the first "object"
220 * - The first 64KB of the header is the File Identifier
221 * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
222 * - The following 512 bytes constitute a UTF-16 string identifiying the
223 * software that created the file, and is optional and diagnostic only.
225 * Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
227 static int vhdx_probe(const uint8_t *buf
, int buf_size
, const char *filename
)
229 if (buf_size
>= 8 && !memcmp(buf
, "vhdxfile", 8)) {
235 /* All VHDX structures on disk are little endian */
236 static void vhdx_header_le_import(VHDXHeader
*h
)
240 le32_to_cpus(&h
->signature
);
241 le32_to_cpus(&h
->checksum
);
242 le64_to_cpus(&h
->sequence_number
);
244 leguid_to_cpus(&h
->file_write_guid
);
245 leguid_to_cpus(&h
->data_write_guid
);
246 leguid_to_cpus(&h
->log_guid
);
248 le16_to_cpus(&h
->log_version
);
249 le16_to_cpus(&h
->version
);
250 le32_to_cpus(&h
->log_length
);
251 le64_to_cpus(&h
->log_offset
);
255 /* opens the specified header block from the VHDX file header section */
256 static int vhdx_parse_header(BlockDriverState
*bs
, BDRVVHDXState
*s
)
261 bool h1_valid
= false;
262 bool h2_valid
= false;
267 header1
= qemu_blockalign(bs
, sizeof(VHDXHeader
));
268 header2
= qemu_blockalign(bs
, sizeof(VHDXHeader
));
270 buffer
= qemu_blockalign(bs
, VHDX_HEADER_SIZE
);
272 s
->headers
[0] = header1
;
273 s
->headers
[1] = header2
;
275 /* We have to read the whole VHDX_HEADER_SIZE instead of
276 * sizeof(VHDXHeader), because the checksum is over the whole
278 ret
= bdrv_pread(bs
->file
, VHDX_HEADER1_OFFSET
, buffer
, VHDX_HEADER_SIZE
);
282 /* copy over just the relevant portion that we need */
283 memcpy(header1
, buffer
, sizeof(VHDXHeader
));
284 vhdx_header_le_import(header1
);
286 if (vhdx_checksum_is_valid(buffer
, VHDX_HEADER_SIZE
, 4) &&
287 !memcmp(&header1
->signature
, "head", 4) &&
288 header1
->version
== 1) {
289 h1_seq
= header1
->sequence_number
;
293 ret
= bdrv_pread(bs
->file
, VHDX_HEADER2_OFFSET
, buffer
, VHDX_HEADER_SIZE
);
297 /* copy over just the relevant portion that we need */
298 memcpy(header2
, buffer
, sizeof(VHDXHeader
));
299 vhdx_header_le_import(header2
);
301 if (vhdx_checksum_is_valid(buffer
, VHDX_HEADER_SIZE
, 4) &&
302 !memcmp(&header2
->signature
, "head", 4) &&
303 header2
->version
== 1) {
304 h2_seq
= header2
->sequence_number
;
308 /* If there is only 1 valid header (or no valid headers), we
309 * don't care what the sequence numbers are */
310 if (h1_valid
&& !h2_valid
) {
312 } else if (!h1_valid
&& h2_valid
) {
314 } else if (!h1_valid
&& !h2_valid
) {
318 /* If both headers are valid, then we choose the active one by the
319 * highest sequence number. If the sequence numbers are equal, that is
321 if (h1_seq
> h2_seq
) {
323 } else if (h2_seq
> h1_seq
) {
336 qerror_report(ERROR_CLASS_GENERIC_ERROR
, "No valid VHDX header found");
339 s
->headers
[0] = NULL
;
340 s
->headers
[1] = NULL
;
347 static int vhdx_open_region_tables(BlockDriverState
*bs
, BDRVVHDXState
*s
)
352 VHDXRegionTableEntry rt_entry
;
354 bool bat_rt_found
= false;
355 bool metadata_rt_found
= false;
357 /* We have to read the whole 64KB block, because the crc32 is over the
359 buffer
= qemu_blockalign(bs
, VHDX_HEADER_BLOCK_SIZE
);
361 ret
= bdrv_pread(bs
->file
, VHDX_REGION_TABLE_OFFSET
, buffer
,
362 VHDX_HEADER_BLOCK_SIZE
);
366 memcpy(&s
->rt
, buffer
, sizeof(s
->rt
));
367 le32_to_cpus(&s
->rt
.signature
);
368 le32_to_cpus(&s
->rt
.checksum
);
369 le32_to_cpus(&s
->rt
.entry_count
);
370 le32_to_cpus(&s
->rt
.reserved
);
371 offset
+= sizeof(s
->rt
);
373 if (!vhdx_checksum_is_valid(buffer
, VHDX_HEADER_BLOCK_SIZE
, 4) ||
374 memcmp(&s
->rt
.signature
, "regi", 4)) {
379 /* Per spec, maximum region table entry count is 2047 */
380 if (s
->rt
.entry_count
> 2047) {
385 for (i
= 0; i
< s
->rt
.entry_count
; i
++) {
386 memcpy(&rt_entry
, buffer
+ offset
, sizeof(rt_entry
));
387 offset
+= sizeof(rt_entry
);
389 leguid_to_cpus(&rt_entry
.guid
);
390 le64_to_cpus(&rt_entry
.file_offset
);
391 le32_to_cpus(&rt_entry
.length
);
392 le32_to_cpus(&rt_entry
.data_bits
);
394 /* see if we recognize the entry */
395 if (guid_eq(rt_entry
.guid
, bat_guid
)) {
396 /* must be unique; if we have already found it this is invalid */
402 s
->bat_rt
= rt_entry
;
406 if (guid_eq(rt_entry
.guid
, metadata_guid
)) {
407 /* must be unique; if we have already found it this is invalid */
408 if (metadata_rt_found
) {
412 metadata_rt_found
= true;
413 s
->metadata_rt
= rt_entry
;
417 if (rt_entry
.data_bits
& VHDX_REGION_ENTRY_REQUIRED
) {
418 /* cannot read vhdx file - required region table entry that
419 * we do not understand. per spec, we must fail to open */
433 /* Metadata initial parser
435 * This loads all the metadata entry fields. This may cause additional
436 * fields to be processed (e.g. parent locator, etc..).
438 * There are 5 Metadata items that are always required:
439 * - File Parameters (block size, has a parent)
440 * - Virtual Disk Size (size, in bytes, of the virtual drive)
441 * - Page 83 Data (scsi page 83 guid)
442 * - Logical Sector Size (logical sector size in bytes, either 512 or
443 * 4096. We only support 512 currently)
444 * - Physical Sector Size (512 or 4096)
446 * Also, if the File Parameters indicate this is a differencing file,
447 * we must also look for the Parent Locator metadata item.
449 static int vhdx_parse_metadata(BlockDriverState
*bs
, BDRVVHDXState
*s
)
455 VHDXMetadataTableEntry md_entry
;
457 buffer
= qemu_blockalign(bs
, VHDX_METADATA_TABLE_MAX_SIZE
);
459 ret
= bdrv_pread(bs
->file
, s
->metadata_rt
.file_offset
, buffer
,
460 VHDX_METADATA_TABLE_MAX_SIZE
);
464 memcpy(&s
->metadata_hdr
, buffer
, sizeof(s
->metadata_hdr
));
465 offset
+= sizeof(s
->metadata_hdr
);
467 le64_to_cpus(&s
->metadata_hdr
.signature
);
468 le16_to_cpus(&s
->metadata_hdr
.reserved
);
469 le16_to_cpus(&s
->metadata_hdr
.entry_count
);
471 if (memcmp(&s
->metadata_hdr
.signature
, "metadata", 8)) {
476 s
->metadata_entries
.present
= 0;
478 if ((s
->metadata_hdr
.entry_count
* sizeof(md_entry
)) >
479 (VHDX_METADATA_TABLE_MAX_SIZE
- offset
)) {
484 for (i
= 0; i
< s
->metadata_hdr
.entry_count
; i
++) {
485 memcpy(&md_entry
, buffer
+ offset
, sizeof(md_entry
));
486 offset
+= sizeof(md_entry
);
488 leguid_to_cpus(&md_entry
.item_id
);
489 le32_to_cpus(&md_entry
.offset
);
490 le32_to_cpus(&md_entry
.length
);
491 le32_to_cpus(&md_entry
.data_bits
);
492 le32_to_cpus(&md_entry
.reserved2
);
494 if (guid_eq(md_entry
.item_id
, file_param_guid
)) {
495 if (s
->metadata_entries
.present
& META_FILE_PARAMETER_PRESENT
) {
499 s
->metadata_entries
.file_parameters_entry
= md_entry
;
500 s
->metadata_entries
.present
|= META_FILE_PARAMETER_PRESENT
;
504 if (guid_eq(md_entry
.item_id
, virtual_size_guid
)) {
505 if (s
->metadata_entries
.present
& META_VIRTUAL_DISK_SIZE_PRESENT
) {
509 s
->metadata_entries
.virtual_disk_size_entry
= md_entry
;
510 s
->metadata_entries
.present
|= META_VIRTUAL_DISK_SIZE_PRESENT
;
514 if (guid_eq(md_entry
.item_id
, page83_guid
)) {
515 if (s
->metadata_entries
.present
& META_PAGE_83_PRESENT
) {
519 s
->metadata_entries
.page83_data_entry
= md_entry
;
520 s
->metadata_entries
.present
|= META_PAGE_83_PRESENT
;
524 if (guid_eq(md_entry
.item_id
, logical_sector_guid
)) {
525 if (s
->metadata_entries
.present
&
526 META_LOGICAL_SECTOR_SIZE_PRESENT
) {
530 s
->metadata_entries
.logical_sector_size_entry
= md_entry
;
531 s
->metadata_entries
.present
|= META_LOGICAL_SECTOR_SIZE_PRESENT
;
535 if (guid_eq(md_entry
.item_id
, phys_sector_guid
)) {
536 if (s
->metadata_entries
.present
& META_PHYS_SECTOR_SIZE_PRESENT
) {
540 s
->metadata_entries
.phys_sector_size_entry
= md_entry
;
541 s
->metadata_entries
.present
|= META_PHYS_SECTOR_SIZE_PRESENT
;
545 if (guid_eq(md_entry
.item_id
, parent_locator_guid
)) {
546 if (s
->metadata_entries
.present
& META_PARENT_LOCATOR_PRESENT
) {
550 s
->metadata_entries
.parent_locator_entry
= md_entry
;
551 s
->metadata_entries
.present
|= META_PARENT_LOCATOR_PRESENT
;
555 if (md_entry
.data_bits
& VHDX_META_FLAGS_IS_REQUIRED
) {
556 /* cannot read vhdx file - required region table entry that
557 * we do not understand. per spec, we must fail to open */
563 if (s
->metadata_entries
.present
!= META_ALL_PRESENT
) {
568 ret
= bdrv_pread(bs
->file
,
569 s
->metadata_entries
.file_parameters_entry
.offset
570 + s
->metadata_rt
.file_offset
,
578 le32_to_cpus(&s
->params
.block_size
);
579 le32_to_cpus(&s
->params
.data_bits
);
582 /* We now have the file parameters, so we can tell if this is a
583 * differencing file (i.e.. has_parent), is dynamic or fixed
584 * sized (leave_blocks_allocated), and the block size */
586 /* The parent locator required iff the file parameters has_parent set */
587 if (s
->params
.data_bits
& VHDX_PARAMS_HAS_PARENT
) {
588 if (s
->metadata_entries
.present
& META_PARENT_LOCATOR_PRESENT
) {
589 /* TODO: parse parent locator fields */
590 ret
= -ENOTSUP
; /* temp, until differencing files are supported */
593 /* if has_parent is set, but there is not parent locator present,
594 * then that is an invalid combination */
600 /* determine virtual disk size, logical sector size,
601 * and phys sector size */
603 ret
= bdrv_pread(bs
->file
,
604 s
->metadata_entries
.virtual_disk_size_entry
.offset
605 + s
->metadata_rt
.file_offset
,
606 &s
->virtual_disk_size
,
611 ret
= bdrv_pread(bs
->file
,
612 s
->metadata_entries
.logical_sector_size_entry
.offset
613 + s
->metadata_rt
.file_offset
,
614 &s
->logical_sector_size
,
619 ret
= bdrv_pread(bs
->file
,
620 s
->metadata_entries
.phys_sector_size_entry
.offset
621 + s
->metadata_rt
.file_offset
,
622 &s
->physical_sector_size
,
628 le64_to_cpus(&s
->virtual_disk_size
);
629 le32_to_cpus(&s
->logical_sector_size
);
630 le32_to_cpus(&s
->physical_sector_size
);
632 if (s
->logical_sector_size
== 0 || s
->params
.block_size
== 0) {
637 /* both block_size and sector_size are guaranteed powers of 2 */
638 s
->sectors_per_block
= s
->params
.block_size
/ s
->logical_sector_size
;
639 s
->chunk_ratio
= (VHDX_MAX_SECTORS_PER_BLOCK
) *
640 (uint64_t)s
->logical_sector_size
/
641 (uint64_t)s
->params
.block_size
;
643 /* These values are ones we will want to use for division / multiplication
644 * later on, and they are all guaranteed (per the spec) to be powers of 2,
645 * so we can take advantage of that for shift operations during
647 if (s
->logical_sector_size
& (s
->logical_sector_size
- 1)) {
651 if (s
->sectors_per_block
& (s
->sectors_per_block
- 1)) {
655 if (s
->chunk_ratio
& (s
->chunk_ratio
- 1)) {
659 s
->block_size
= s
->params
.block_size
;
660 if (s
->block_size
& (s
->block_size
- 1)) {
665 s
->logical_sector_size_bits
= 31 - clz32(s
->logical_sector_size
);
666 s
->sectors_per_block_bits
= 31 - clz32(s
->sectors_per_block
);
667 s
->chunk_ratio_bits
= 63 - clz64(s
->chunk_ratio
);
668 s
->block_size_bits
= 31 - clz32(s
->block_size
);
677 /* Parse the replay log. Per the VHDX spec, if the log is present
678 * it must be replayed prior to opening the file, even read-only.
680 * If read-only, we must replay the log in RAM (or refuse to open
681 * a dirty VHDX file read-only */
682 static int vhdx_parse_log(BlockDriverState
*bs
, BDRVVHDXState
*s
)
688 hdr
= s
->headers
[s
->curr_header
];
690 /* either the log guid, or log length is zero,
691 * then a replay log is present */
692 for (i
= 0; i
< sizeof(hdr
->log_guid
.data4
); i
++) {
693 ret
|= hdr
->log_guid
.data4
[i
];
695 if (hdr
->log_guid
.data1
== 0 &&
696 hdr
->log_guid
.data2
== 0 &&
697 hdr
->log_guid
.data3
== 0 &&
702 /* per spec, only log version of 0 is supported */
703 if (hdr
->log_version
!= 0) {
708 if (hdr
->log_length
== 0) {
712 /* We currently do not support images with logs to replay */
720 static int vhdx_open(BlockDriverState
*bs
, QDict
*options
, int flags
,
723 BDRVVHDXState
*s
= bs
->opaque
;
727 uint32_t data_blocks_cnt
, bitmap_blocks_cnt
;
732 qemu_co_mutex_init(&s
->lock
);
734 /* validate the file signature */
735 ret
= bdrv_pread(bs
->file
, 0, &signature
, sizeof(uint64_t));
739 if (memcmp(&signature
, "vhdxfile", 8)) {
744 ret
= vhdx_parse_header(bs
, s
);
749 ret
= vhdx_parse_log(bs
, s
);
754 ret
= vhdx_open_region_tables(bs
, s
);
759 ret
= vhdx_parse_metadata(bs
, s
);
763 s
->block_size
= s
->params
.block_size
;
765 /* the VHDX spec dictates that virtual_disk_size is always a multiple of
766 * logical_sector_size */
767 bs
->total_sectors
= s
->virtual_disk_size
>> s
->logical_sector_size_bits
;
769 data_blocks_cnt
= s
->virtual_disk_size
>> s
->block_size_bits
;
770 if (s
->virtual_disk_size
- (data_blocks_cnt
<< s
->block_size_bits
)) {
773 bitmap_blocks_cnt
= data_blocks_cnt
>> s
->chunk_ratio_bits
;
774 if (data_blocks_cnt
- (bitmap_blocks_cnt
<< s
->chunk_ratio_bits
)) {
778 if (s
->parent_entries
) {
779 s
->bat_entries
= bitmap_blocks_cnt
* (s
->chunk_ratio
+ 1);
781 s
->bat_entries
= data_blocks_cnt
+
782 ((data_blocks_cnt
- 1) >> s
->chunk_ratio_bits
);
785 s
->bat_offset
= s
->bat_rt
.file_offset
;
787 if (s
->bat_entries
> s
->bat_rt
.length
/ sizeof(VHDXBatEntry
)) {
788 /* BAT allocation is not large enough for all entries */
793 s
->bat
= qemu_blockalign(bs
, s
->bat_rt
.length
);
795 ret
= bdrv_pread(bs
->file
, s
->bat_offset
, s
->bat
, s
->bat_rt
.length
);
800 for (i
= 0; i
< s
->bat_entries
; i
++) {
801 le64_to_cpus(&s
->bat
[i
]);
804 if (flags
& BDRV_O_RDWR
) {
809 /* TODO: differencing files, write */
811 /* Disable migration when VHDX images are used */
812 error_set(&s
->migration_blocker
,
813 QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED
,
814 "vhdx", bs
->device_name
, "live migration");
815 migrate_add_blocker(s
->migration_blocker
);
819 qemu_vfree(s
->headers
[0]);
820 qemu_vfree(s
->headers
[1]);
822 qemu_vfree(s
->parent_entries
);
826 static int vhdx_reopen_prepare(BDRVReopenState
*state
,
827 BlockReopenQueue
*queue
, Error
**errp
)
834 * Perform sector to block offset translations, to get various
835 * sector and file offsets into the image. See VHDXSectorInfo
837 static void vhdx_block_translate(BDRVVHDXState
*s
, int64_t sector_num
,
838 int nb_sectors
, VHDXSectorInfo
*sinfo
)
840 uint32_t block_offset
;
842 sinfo
->bat_idx
= sector_num
>> s
->sectors_per_block_bits
;
843 /* effectively a modulo - this gives us the offset into the block
844 * (in sector sizes) for our sector number */
845 block_offset
= sector_num
- (sinfo
->bat_idx
<< s
->sectors_per_block_bits
);
846 /* the chunk ratio gives us the interleaving of the sector
847 * bitmaps, so we need to advance our page block index by the
848 * sector bitmaps entry number */
849 sinfo
->bat_idx
+= sinfo
->bat_idx
>> s
->chunk_ratio_bits
;
851 /* the number of sectors we can read/write in this cycle */
852 sinfo
->sectors_avail
= s
->sectors_per_block
- block_offset
;
854 sinfo
->bytes_left
= sinfo
->sectors_avail
<< s
->logical_sector_size_bits
;
856 if (sinfo
->sectors_avail
> nb_sectors
) {
857 sinfo
->sectors_avail
= nb_sectors
;
860 sinfo
->bytes_avail
= sinfo
->sectors_avail
<< s
->logical_sector_size_bits
;
862 sinfo
->file_offset
= s
->bat
[sinfo
->bat_idx
] >> VHDX_BAT_FILE_OFF_BITS
;
864 sinfo
->block_offset
= block_offset
<< s
->logical_sector_size_bits
;
866 /* The file offset must be past the header section, so must be > 0 */
867 if (sinfo
->file_offset
== 0) {
871 /* block offset is the offset in vhdx logical sectors, in
872 * the payload data block. Convert that to a byte offset
873 * in the block, and add in the payload data block offset
874 * in the file, in bytes, to get the final read address */
876 sinfo
->file_offset
<<= 20; /* now in bytes, rather than 1MB units */
877 sinfo
->file_offset
+= sinfo
->block_offset
;
882 static coroutine_fn
int vhdx_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
883 int nb_sectors
, QEMUIOVector
*qiov
)
885 BDRVVHDXState
*s
= bs
->opaque
;
887 VHDXSectorInfo sinfo
;
888 uint64_t bytes_done
= 0;
889 QEMUIOVector hd_qiov
;
891 qemu_iovec_init(&hd_qiov
, qiov
->niov
);
893 qemu_co_mutex_lock(&s
->lock
);
895 while (nb_sectors
> 0) {
896 /* We are a differencing file, so we need to inspect the sector bitmap
897 * to see if we have the data or not */
898 if (s
->params
.data_bits
& VHDX_PARAMS_HAS_PARENT
) {
899 /* not supported yet */
903 vhdx_block_translate(s
, sector_num
, nb_sectors
, &sinfo
);
905 qemu_iovec_reset(&hd_qiov
);
906 qemu_iovec_concat(&hd_qiov
, qiov
, bytes_done
, sinfo
.bytes_avail
);
908 /* check the payload block state */
909 switch (s
->bat
[sinfo
.bat_idx
] & VHDX_BAT_STATE_BIT_MASK
) {
910 case PAYLOAD_BLOCK_NOT_PRESENT
: /* fall through */
911 case PAYLOAD_BLOCK_UNDEFINED
: /* fall through */
912 case PAYLOAD_BLOCK_UNMAPPED
: /* fall through */
913 case PAYLOAD_BLOCK_ZERO
:
915 qemu_iovec_memset(&hd_qiov
, 0, 0, sinfo
.bytes_avail
);
917 case PAYLOAD_BLOCK_FULL_PRESENT
:
918 qemu_co_mutex_unlock(&s
->lock
);
919 ret
= bdrv_co_readv(bs
->file
,
920 sinfo
.file_offset
>> BDRV_SECTOR_BITS
,
921 sinfo
.sectors_avail
, &hd_qiov
);
922 qemu_co_mutex_lock(&s
->lock
);
927 case PAYLOAD_BLOCK_PARTIALLY_PRESENT
:
928 /* we don't yet support difference files, fall through
935 nb_sectors
-= sinfo
.sectors_avail
;
936 sector_num
+= sinfo
.sectors_avail
;
937 bytes_done
+= sinfo
.bytes_avail
;
942 qemu_co_mutex_unlock(&s
->lock
);
943 qemu_iovec_destroy(&hd_qiov
);
949 static coroutine_fn
int vhdx_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
950 int nb_sectors
, QEMUIOVector
*qiov
)
956 static void vhdx_close(BlockDriverState
*bs
)
958 BDRVVHDXState
*s
= bs
->opaque
;
959 qemu_vfree(s
->headers
[0]);
960 qemu_vfree(s
->headers
[1]);
962 qemu_vfree(s
->parent_entries
);
963 migrate_del_blocker(s
->migration_blocker
);
964 error_free(s
->migration_blocker
);
967 static BlockDriver bdrv_vhdx
= {
968 .format_name
= "vhdx",
969 .instance_size
= sizeof(BDRVVHDXState
),
970 .bdrv_probe
= vhdx_probe
,
971 .bdrv_open
= vhdx_open
,
972 .bdrv_close
= vhdx_close
,
973 .bdrv_reopen_prepare
= vhdx_reopen_prepare
,
974 .bdrv_co_readv
= vhdx_co_readv
,
975 .bdrv_co_writev
= vhdx_co_writev
,
978 static void bdrv_vhdx_init(void)
980 bdrv_register(&bdrv_vhdx
);
983 block_init(bdrv_vhdx_init
);