2 * Block driver for Hyper-V VHDX Images
4 * Copyright (c) 2013 Red Hat, Inc.,
7 * Jeff Cody <jcody@redhat.com>
9 * This is based on the "VHDX Format Specification v0.95", published 4/12/2012
11 * https://www.microsoft.com/en-us/download/details.aspx?id=29681
13 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14 * See the COPYING.LIB file in the top-level directory.
18 #include "qemu-common.h"
19 #include "block/block_int.h"
20 #include "qemu/module.h"
21 #include "qemu/crc32c.h"
22 #include "block/vhdx.h"
25 /* Several metadata and region table data entries are identified by
26 * guids in a MS-specific GUID format. */
29 /* ------- Known Region Table GUIDs ---------------------- */
30 static const MSGUID bat_guid
= { .data1
= 0x2dc27766,
33 .data4
= { 0x9d, 0x64, 0x11, 0x5e,
34 0x9b, 0xfd, 0x4a, 0x08} };
36 static const MSGUID metadata_guid
= { .data1
= 0x8b7ca206,
39 .data4
= { 0xb8, 0xfe, 0x57, 0x5f,
40 0x05, 0x0f, 0x88, 0x6e} };
44 /* ------- Known Metadata Entry GUIDs ---------------------- */
45 static const MSGUID file_param_guid
= { .data1
= 0xcaa16737,
48 .data4
= { 0xb3, 0xb6, 0x33, 0xf0,
49 0xaa, 0x44, 0xe7, 0x6b} };
51 static const MSGUID virtual_size_guid
= { .data1
= 0x2FA54224,
54 .data4
= { 0xb2, 0x11, 0x5d, 0xbe,
55 0xd8, 0x3b, 0xf4, 0xb8} };
57 static const MSGUID page83_guid
= { .data1
= 0xbeca12ab,
60 .data4
= { 0x93, 0xef, 0xc3, 0x09,
61 0xe0, 0x00, 0xc7, 0x46} };
64 static const MSGUID phys_sector_guid
= { .data1
= 0xcda348c7,
67 .data4
= { 0x9c, 0xc9, 0xe9, 0x88,
68 0x52, 0x51, 0xc5, 0x56} };
70 static const MSGUID parent_locator_guid
= { .data1
= 0xa8d35f2d,
73 .data4
= { 0xab, 0xf7, 0xd3,
77 static const MSGUID logical_sector_guid
= { .data1
= 0x8141bf1d,
80 .data4
= { 0xba, 0x47, 0xf2,
84 /* Each parent type must have a valid GUID; this is for parent images
85 * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
86 * need to make up our own QCOW2 GUID type */
87 static const MSGUID parent_vhdx_guid
= { .data1
= 0xb04aefb7,
90 .data4
= { 0xb7, 0x89, 0x25, 0xb8,
91 0xe9, 0x44, 0x59, 0x13} };
94 #define META_FILE_PARAMETER_PRESENT 0x01
95 #define META_VIRTUAL_DISK_SIZE_PRESENT 0x02
96 #define META_PAGE_83_PRESENT 0x04
97 #define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
98 #define META_PHYS_SECTOR_SIZE_PRESENT 0x10
99 #define META_PARENT_LOCATOR_PRESENT 0x20
101 #define META_ALL_PRESENT \
102 (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
103 META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
104 META_PHYS_SECTOR_SIZE_PRESENT)
106 typedef struct VHDXMetadataEntries
{
107 VHDXMetadataTableEntry file_parameters_entry
;
108 VHDXMetadataTableEntry virtual_disk_size_entry
;
109 VHDXMetadataTableEntry page83_data_entry
;
110 VHDXMetadataTableEntry logical_sector_size_entry
;
111 VHDXMetadataTableEntry phys_sector_size_entry
;
112 VHDXMetadataTableEntry parent_locator_entry
;
114 } VHDXMetadataEntries
;
117 typedef struct VHDXSectorInfo
{
118 uint32_t bat_idx
; /* BAT entry index */
119 uint32_t sectors_avail
; /* sectors available in payload block */
120 uint32_t bytes_left
; /* bytes left in the block after data to r/w */
121 uint32_t bytes_avail
; /* bytes available in payload block */
122 uint64_t file_offset
; /* absolute offset in bytes, in file */
123 uint64_t block_offset
; /* block offset, in bytes */
128 typedef struct BDRVVHDXState
{
132 VHDXHeader
*headers
[2];
134 VHDXRegionTableHeader rt
;
135 VHDXRegionTableEntry bat_rt
; /* region table for the BAT */
136 VHDXRegionTableEntry metadata_rt
; /* region table for the metadata */
138 VHDXMetadataTableHeader metadata_hdr
;
139 VHDXMetadataEntries metadata_entries
;
141 VHDXFileParameters params
;
143 uint32_t block_size_bits
;
144 uint32_t sectors_per_block
;
145 uint32_t sectors_per_block_bits
;
147 uint64_t virtual_disk_size
;
148 uint32_t logical_sector_size
;
149 uint32_t physical_sector_size
;
151 uint64_t chunk_ratio
;
152 uint32_t chunk_ratio_bits
;
153 uint32_t logical_sector_size_bits
;
155 uint32_t bat_entries
;
159 VHDXParentLocatorHeader parent_header
;
160 VHDXParentLocatorEntry
*parent_entries
;
164 uint32_t vhdx_checksum_calc(uint32_t crc
, uint8_t *buf
, size_t size
,
171 if (crc_offset
> 0) {
172 memcpy(&crc_orig
, buf
+ crc_offset
, sizeof(crc_orig
));
173 memset(buf
+ crc_offset
, 0, sizeof(crc_orig
));
176 crc_new
= crc32c(crc
, buf
, size
);
177 if (crc_offset
> 0) {
178 memcpy(buf
+ crc_offset
, &crc_orig
, sizeof(crc_orig
));
184 /* Validates the checksum of the buffer, with an in-place CRC.
186 * Zero is substituted during crc calculation for the original crc field,
187 * and the crc field is restored afterwards. But the buffer will be modifed
188 * during the calculation, so this may not be not suitable for multi-threaded
191 * crc_offset: byte offset in buf of the buffer crc
192 * buf: buffer pointer
193 * size: size of buffer (must be > crc_offset+4)
195 * returns true if checksum is valid, false otherwise
197 bool vhdx_checksum_is_valid(uint8_t *buf
, size_t size
, int crc_offset
)
203 assert(size
> (crc_offset
+ 4));
205 memcpy(&crc_orig
, buf
+ crc_offset
, sizeof(crc_orig
));
206 crc_orig
= le32_to_cpu(crc_orig
);
208 crc
= vhdx_checksum_calc(0xffffffff, buf
, size
, crc_offset
);
210 return crc
== crc_orig
;
215 * Per the MS VHDX Specification, for every VHDX file:
216 * - The header section is fixed size - 1 MB
217 * - The header section is always the first "object"
218 * - The first 64KB of the header is the File Identifier
219 * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
220 * - The following 512 bytes constitute a UTF-16 string identifiying the
221 * software that created the file, and is optional and diagnostic only.
223 * Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
225 static int vhdx_probe(const uint8_t *buf
, int buf_size
, const char *filename
)
227 if (buf_size
>= 8 && !memcmp(buf
, "vhdxfile", 8)) {
233 /* All VHDX structures on disk are little endian */
234 static void vhdx_header_le_import(VHDXHeader
*h
)
238 le32_to_cpus(&h
->signature
);
239 le32_to_cpus(&h
->checksum
);
240 le64_to_cpus(&h
->sequence_number
);
242 leguid_to_cpus(&h
->file_write_guid
);
243 leguid_to_cpus(&h
->data_write_guid
);
244 leguid_to_cpus(&h
->log_guid
);
246 le16_to_cpus(&h
->log_version
);
247 le16_to_cpus(&h
->version
);
248 le32_to_cpus(&h
->log_length
);
249 le64_to_cpus(&h
->log_offset
);
253 /* opens the specified header block from the VHDX file header section */
254 static int vhdx_parse_header(BlockDriverState
*bs
, BDRVVHDXState
*s
)
259 bool h1_valid
= false;
260 bool h2_valid
= false;
265 header1
= qemu_blockalign(bs
, sizeof(VHDXHeader
));
266 header2
= qemu_blockalign(bs
, sizeof(VHDXHeader
));
268 buffer
= qemu_blockalign(bs
, VHDX_HEADER_SIZE
);
270 s
->headers
[0] = header1
;
271 s
->headers
[1] = header2
;
273 /* We have to read the whole VHDX_HEADER_SIZE instead of
274 * sizeof(VHDXHeader), because the checksum is over the whole
276 ret
= bdrv_pread(bs
->file
, VHDX_HEADER1_OFFSET
, buffer
, VHDX_HEADER_SIZE
);
280 /* copy over just the relevant portion that we need */
281 memcpy(header1
, buffer
, sizeof(VHDXHeader
));
282 vhdx_header_le_import(header1
);
284 if (vhdx_checksum_is_valid(buffer
, VHDX_HEADER_SIZE
, 4) &&
285 !memcmp(&header1
->signature
, "head", 4) &&
286 header1
->version
== 1) {
287 h1_seq
= header1
->sequence_number
;
291 ret
= bdrv_pread(bs
->file
, VHDX_HEADER2_OFFSET
, buffer
, VHDX_HEADER_SIZE
);
295 /* copy over just the relevant portion that we need */
296 memcpy(header2
, buffer
, sizeof(VHDXHeader
));
297 vhdx_header_le_import(header2
);
299 if (vhdx_checksum_is_valid(buffer
, VHDX_HEADER_SIZE
, 4) &&
300 !memcmp(&header2
->signature
, "head", 4) &&
301 header2
->version
== 1) {
302 h2_seq
= header2
->sequence_number
;
306 /* If there is only 1 valid header (or no valid headers), we
307 * don't care what the sequence numbers are */
308 if (h1_valid
&& !h2_valid
) {
310 } else if (!h1_valid
&& h2_valid
) {
312 } else if (!h1_valid
&& !h2_valid
) {
316 /* If both headers are valid, then we choose the active one by the
317 * highest sequence number. If the sequence numbers are equal, that is
319 if (h1_seq
> h2_seq
) {
321 } else if (h2_seq
> h1_seq
) {
334 qerror_report(ERROR_CLASS_GENERIC_ERROR
, "No valid VHDX header found");
337 s
->headers
[0] = NULL
;
338 s
->headers
[1] = NULL
;
345 static int vhdx_open_region_tables(BlockDriverState
*bs
, BDRVVHDXState
*s
)
350 VHDXRegionTableEntry rt_entry
;
352 bool bat_rt_found
= false;
353 bool metadata_rt_found
= false;
355 /* We have to read the whole 64KB block, because the crc32 is over the
357 buffer
= qemu_blockalign(bs
, VHDX_HEADER_BLOCK_SIZE
);
359 ret
= bdrv_pread(bs
->file
, VHDX_REGION_TABLE_OFFSET
, buffer
,
360 VHDX_HEADER_BLOCK_SIZE
);
364 memcpy(&s
->rt
, buffer
, sizeof(s
->rt
));
365 le32_to_cpus(&s
->rt
.signature
);
366 le32_to_cpus(&s
->rt
.checksum
);
367 le32_to_cpus(&s
->rt
.entry_count
);
368 le32_to_cpus(&s
->rt
.reserved
);
369 offset
+= sizeof(s
->rt
);
371 if (!vhdx_checksum_is_valid(buffer
, VHDX_HEADER_BLOCK_SIZE
, 4) ||
372 memcmp(&s
->rt
.signature
, "regi", 4)) {
377 /* Per spec, maximum region table entry count is 2047 */
378 if (s
->rt
.entry_count
> 2047) {
383 for (i
= 0; i
< s
->rt
.entry_count
; i
++) {
384 memcpy(&rt_entry
, buffer
+ offset
, sizeof(rt_entry
));
385 offset
+= sizeof(rt_entry
);
387 leguid_to_cpus(&rt_entry
.guid
);
388 le64_to_cpus(&rt_entry
.file_offset
);
389 le32_to_cpus(&rt_entry
.length
);
390 le32_to_cpus(&rt_entry
.data_bits
);
392 /* see if we recognize the entry */
393 if (guid_eq(rt_entry
.guid
, bat_guid
)) {
394 /* must be unique; if we have already found it this is invalid */
400 s
->bat_rt
= rt_entry
;
404 if (guid_eq(rt_entry
.guid
, metadata_guid
)) {
405 /* must be unique; if we have already found it this is invalid */
406 if (metadata_rt_found
) {
410 metadata_rt_found
= true;
411 s
->metadata_rt
= rt_entry
;
415 if (rt_entry
.data_bits
& VHDX_REGION_ENTRY_REQUIRED
) {
416 /* cannot read vhdx file - required region table entry that
417 * we do not understand. per spec, we must fail to open */
431 /* Metadata initial parser
433 * This loads all the metadata entry fields. This may cause additional
434 * fields to be processed (e.g. parent locator, etc..).
436 * There are 5 Metadata items that are always required:
437 * - File Parameters (block size, has a parent)
438 * - Virtual Disk Size (size, in bytes, of the virtual drive)
439 * - Page 83 Data (scsi page 83 guid)
440 * - Logical Sector Size (logical sector size in bytes, either 512 or
441 * 4096. We only support 512 currently)
442 * - Physical Sector Size (512 or 4096)
444 * Also, if the File Parameters indicate this is a differencing file,
445 * we must also look for the Parent Locator metadata item.
447 static int vhdx_parse_metadata(BlockDriverState
*bs
, BDRVVHDXState
*s
)
453 VHDXMetadataTableEntry md_entry
;
455 buffer
= qemu_blockalign(bs
, VHDX_METADATA_TABLE_MAX_SIZE
);
457 ret
= bdrv_pread(bs
->file
, s
->metadata_rt
.file_offset
, buffer
,
458 VHDX_METADATA_TABLE_MAX_SIZE
);
462 memcpy(&s
->metadata_hdr
, buffer
, sizeof(s
->metadata_hdr
));
463 offset
+= sizeof(s
->metadata_hdr
);
465 le64_to_cpus(&s
->metadata_hdr
.signature
);
466 le16_to_cpus(&s
->metadata_hdr
.reserved
);
467 le16_to_cpus(&s
->metadata_hdr
.entry_count
);
469 if (memcmp(&s
->metadata_hdr
.signature
, "metadata", 8)) {
474 s
->metadata_entries
.present
= 0;
476 if ((s
->metadata_hdr
.entry_count
* sizeof(md_entry
)) >
477 (VHDX_METADATA_TABLE_MAX_SIZE
- offset
)) {
482 for (i
= 0; i
< s
->metadata_hdr
.entry_count
; i
++) {
483 memcpy(&md_entry
, buffer
+ offset
, sizeof(md_entry
));
484 offset
+= sizeof(md_entry
);
486 leguid_to_cpus(&md_entry
.item_id
);
487 le32_to_cpus(&md_entry
.offset
);
488 le32_to_cpus(&md_entry
.length
);
489 le32_to_cpus(&md_entry
.data_bits
);
490 le32_to_cpus(&md_entry
.reserved2
);
492 if (guid_eq(md_entry
.item_id
, file_param_guid
)) {
493 if (s
->metadata_entries
.present
& META_FILE_PARAMETER_PRESENT
) {
497 s
->metadata_entries
.file_parameters_entry
= md_entry
;
498 s
->metadata_entries
.present
|= META_FILE_PARAMETER_PRESENT
;
502 if (guid_eq(md_entry
.item_id
, virtual_size_guid
)) {
503 if (s
->metadata_entries
.present
& META_VIRTUAL_DISK_SIZE_PRESENT
) {
507 s
->metadata_entries
.virtual_disk_size_entry
= md_entry
;
508 s
->metadata_entries
.present
|= META_VIRTUAL_DISK_SIZE_PRESENT
;
512 if (guid_eq(md_entry
.item_id
, page83_guid
)) {
513 if (s
->metadata_entries
.present
& META_PAGE_83_PRESENT
) {
517 s
->metadata_entries
.page83_data_entry
= md_entry
;
518 s
->metadata_entries
.present
|= META_PAGE_83_PRESENT
;
522 if (guid_eq(md_entry
.item_id
, logical_sector_guid
)) {
523 if (s
->metadata_entries
.present
&
524 META_LOGICAL_SECTOR_SIZE_PRESENT
) {
528 s
->metadata_entries
.logical_sector_size_entry
= md_entry
;
529 s
->metadata_entries
.present
|= META_LOGICAL_SECTOR_SIZE_PRESENT
;
533 if (guid_eq(md_entry
.item_id
, phys_sector_guid
)) {
534 if (s
->metadata_entries
.present
& META_PHYS_SECTOR_SIZE_PRESENT
) {
538 s
->metadata_entries
.phys_sector_size_entry
= md_entry
;
539 s
->metadata_entries
.present
|= META_PHYS_SECTOR_SIZE_PRESENT
;
543 if (guid_eq(md_entry
.item_id
, parent_locator_guid
)) {
544 if (s
->metadata_entries
.present
& META_PARENT_LOCATOR_PRESENT
) {
548 s
->metadata_entries
.parent_locator_entry
= md_entry
;
549 s
->metadata_entries
.present
|= META_PARENT_LOCATOR_PRESENT
;
553 if (md_entry
.data_bits
& VHDX_META_FLAGS_IS_REQUIRED
) {
554 /* cannot read vhdx file - required region table entry that
555 * we do not understand. per spec, we must fail to open */
561 if (s
->metadata_entries
.present
!= META_ALL_PRESENT
) {
566 ret
= bdrv_pread(bs
->file
,
567 s
->metadata_entries
.file_parameters_entry
.offset
568 + s
->metadata_rt
.file_offset
,
576 le32_to_cpus(&s
->params
.block_size
);
577 le32_to_cpus(&s
->params
.data_bits
);
580 /* We now have the file parameters, so we can tell if this is a
581 * differencing file (i.e.. has_parent), is dynamic or fixed
582 * sized (leave_blocks_allocated), and the block size */
584 /* The parent locator required iff the file parameters has_parent set */
585 if (s
->params
.data_bits
& VHDX_PARAMS_HAS_PARENT
) {
586 if (s
->metadata_entries
.present
& META_PARENT_LOCATOR_PRESENT
) {
587 /* TODO: parse parent locator fields */
588 ret
= -ENOTSUP
; /* temp, until differencing files are supported */
591 /* if has_parent is set, but there is not parent locator present,
592 * then that is an invalid combination */
598 /* determine virtual disk size, logical sector size,
599 * and phys sector size */
601 ret
= bdrv_pread(bs
->file
,
602 s
->metadata_entries
.virtual_disk_size_entry
.offset
603 + s
->metadata_rt
.file_offset
,
604 &s
->virtual_disk_size
,
609 ret
= bdrv_pread(bs
->file
,
610 s
->metadata_entries
.logical_sector_size_entry
.offset
611 + s
->metadata_rt
.file_offset
,
612 &s
->logical_sector_size
,
617 ret
= bdrv_pread(bs
->file
,
618 s
->metadata_entries
.phys_sector_size_entry
.offset
619 + s
->metadata_rt
.file_offset
,
620 &s
->physical_sector_size
,
626 le64_to_cpus(&s
->virtual_disk_size
);
627 le32_to_cpus(&s
->logical_sector_size
);
628 le32_to_cpus(&s
->physical_sector_size
);
630 if (s
->logical_sector_size
== 0 || s
->params
.block_size
== 0) {
635 /* both block_size and sector_size are guaranteed powers of 2 */
636 s
->sectors_per_block
= s
->params
.block_size
/ s
->logical_sector_size
;
637 s
->chunk_ratio
= (VHDX_MAX_SECTORS_PER_BLOCK
) *
638 (uint64_t)s
->logical_sector_size
/
639 (uint64_t)s
->params
.block_size
;
641 /* These values are ones we will want to use for division / multiplication
642 * later on, and they are all guaranteed (per the spec) to be powers of 2,
643 * so we can take advantage of that for shift operations during
645 if (s
->logical_sector_size
& (s
->logical_sector_size
- 1)) {
649 if (s
->sectors_per_block
& (s
->sectors_per_block
- 1)) {
653 if (s
->chunk_ratio
& (s
->chunk_ratio
- 1)) {
657 s
->block_size
= s
->params
.block_size
;
658 if (s
->block_size
& (s
->block_size
- 1)) {
663 s
->logical_sector_size_bits
= 31 - clz32(s
->logical_sector_size
);
664 s
->sectors_per_block_bits
= 31 - clz32(s
->sectors_per_block
);
665 s
->chunk_ratio_bits
= 63 - clz64(s
->chunk_ratio
);
666 s
->block_size_bits
= 31 - clz32(s
->block_size
);
675 /* Parse the replay log. Per the VHDX spec, if the log is present
676 * it must be replayed prior to opening the file, even read-only.
678 * If read-only, we must replay the log in RAM (or refuse to open
679 * a dirty VHDX file read-only */
680 static int vhdx_parse_log(BlockDriverState
*bs
, BDRVVHDXState
*s
)
686 hdr
= s
->headers
[s
->curr_header
];
688 /* either the log guid, or log length is zero,
689 * then a replay log is present */
690 for (i
= 0; i
< sizeof(hdr
->log_guid
.data4
); i
++) {
691 ret
|= hdr
->log_guid
.data4
[i
];
693 if (hdr
->log_guid
.data1
== 0 &&
694 hdr
->log_guid
.data2
== 0 &&
695 hdr
->log_guid
.data3
== 0 &&
700 /* per spec, only log version of 0 is supported */
701 if (hdr
->log_version
!= 0) {
706 if (hdr
->log_length
== 0) {
710 /* We currently do not support images with logs to replay */
718 static int vhdx_open(BlockDriverState
*bs
, QDict
*options
, int flags
)
720 BDRVVHDXState
*s
= bs
->opaque
;
724 uint32_t data_blocks_cnt
, bitmap_blocks_cnt
;
729 qemu_co_mutex_init(&s
->lock
);
731 /* validate the file signature */
732 ret
= bdrv_pread(bs
->file
, 0, &signature
, sizeof(uint64_t));
736 if (memcmp(&signature
, "vhdxfile", 8)) {
741 ret
= vhdx_parse_header(bs
, s
);
746 ret
= vhdx_parse_log(bs
, s
);
751 ret
= vhdx_open_region_tables(bs
, s
);
756 ret
= vhdx_parse_metadata(bs
, s
);
760 s
->block_size
= s
->params
.block_size
;
762 /* the VHDX spec dictates that virtual_disk_size is always a multiple of
763 * logical_sector_size */
764 bs
->total_sectors
= s
->virtual_disk_size
>> s
->logical_sector_size_bits
;
766 data_blocks_cnt
= s
->virtual_disk_size
>> s
->block_size_bits
;
767 if (s
->virtual_disk_size
- (data_blocks_cnt
<< s
->block_size_bits
)) {
770 bitmap_blocks_cnt
= data_blocks_cnt
>> s
->chunk_ratio_bits
;
771 if (data_blocks_cnt
- (bitmap_blocks_cnt
<< s
->chunk_ratio_bits
)) {
775 if (s
->parent_entries
) {
776 s
->bat_entries
= bitmap_blocks_cnt
* (s
->chunk_ratio
+ 1);
778 s
->bat_entries
= data_blocks_cnt
+
779 ((data_blocks_cnt
- 1) >> s
->chunk_ratio_bits
);
782 s
->bat_offset
= s
->bat_rt
.file_offset
;
784 if (s
->bat_entries
> s
->bat_rt
.length
/ sizeof(VHDXBatEntry
)) {
785 /* BAT allocation is not large enough for all entries */
790 s
->bat
= qemu_blockalign(bs
, s
->bat_rt
.length
);
792 ret
= bdrv_pread(bs
->file
, s
->bat_offset
, s
->bat
, s
->bat_rt
.length
);
797 for (i
= 0; i
< s
->bat_entries
; i
++) {
798 le64_to_cpus(&s
->bat
[i
]);
801 if (flags
& BDRV_O_RDWR
) {
806 /* TODO: differencing files, write */
810 qemu_vfree(s
->headers
[0]);
811 qemu_vfree(s
->headers
[1]);
813 qemu_vfree(s
->parent_entries
);
817 static int vhdx_reopen_prepare(BDRVReopenState
*state
,
818 BlockReopenQueue
*queue
, Error
**errp
)
825 * Perform sector to block offset translations, to get various
826 * sector and file offsets into the image. See VHDXSectorInfo
828 static void vhdx_block_translate(BDRVVHDXState
*s
, int64_t sector_num
,
829 int nb_sectors
, VHDXSectorInfo
*sinfo
)
831 uint32_t block_offset
;
833 sinfo
->bat_idx
= sector_num
>> s
->sectors_per_block_bits
;
834 /* effectively a modulo - this gives us the offset into the block
835 * (in sector sizes) for our sector number */
836 block_offset
= sector_num
- (sinfo
->bat_idx
<< s
->sectors_per_block_bits
);
837 /* the chunk ratio gives us the interleaving of the sector
838 * bitmaps, so we need to advance our page block index by the
839 * sector bitmaps entry number */
840 sinfo
->bat_idx
+= sinfo
->bat_idx
>> s
->chunk_ratio_bits
;
842 /* the number of sectors we can read/write in this cycle */
843 sinfo
->sectors_avail
= s
->sectors_per_block
- block_offset
;
845 sinfo
->bytes_left
= sinfo
->sectors_avail
<< s
->logical_sector_size_bits
;
847 if (sinfo
->sectors_avail
> nb_sectors
) {
848 sinfo
->sectors_avail
= nb_sectors
;
851 sinfo
->bytes_avail
= sinfo
->sectors_avail
<< s
->logical_sector_size_bits
;
853 sinfo
->file_offset
= s
->bat
[sinfo
->bat_idx
] >> VHDX_BAT_FILE_OFF_BITS
;
855 sinfo
->block_offset
= block_offset
<< s
->logical_sector_size_bits
;
857 /* The file offset must be past the header section, so must be > 0 */
858 if (sinfo
->file_offset
== 0) {
862 /* block offset is the offset in vhdx logical sectors, in
863 * the payload data block. Convert that to a byte offset
864 * in the block, and add in the payload data block offset
865 * in the file, in bytes, to get the final read address */
867 sinfo
->file_offset
<<= 20; /* now in bytes, rather than 1MB units */
868 sinfo
->file_offset
+= sinfo
->block_offset
;
873 static coroutine_fn
int vhdx_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
874 int nb_sectors
, QEMUIOVector
*qiov
)
876 BDRVVHDXState
*s
= bs
->opaque
;
878 VHDXSectorInfo sinfo
;
879 uint64_t bytes_done
= 0;
880 QEMUIOVector hd_qiov
;
882 qemu_iovec_init(&hd_qiov
, qiov
->niov
);
884 qemu_co_mutex_lock(&s
->lock
);
886 while (nb_sectors
> 0) {
887 /* We are a differencing file, so we need to inspect the sector bitmap
888 * to see if we have the data or not */
889 if (s
->params
.data_bits
& VHDX_PARAMS_HAS_PARENT
) {
890 /* not supported yet */
894 vhdx_block_translate(s
, sector_num
, nb_sectors
, &sinfo
);
896 qemu_iovec_reset(&hd_qiov
);
897 qemu_iovec_concat(&hd_qiov
, qiov
, bytes_done
, sinfo
.bytes_avail
);
899 /* check the payload block state */
900 switch (s
->bat
[sinfo
.bat_idx
] & VHDX_BAT_STATE_BIT_MASK
) {
901 case PAYLOAD_BLOCK_NOT_PRESENT
: /* fall through */
902 case PAYLOAD_BLOCK_UNDEFINED
: /* fall through */
903 case PAYLOAD_BLOCK_UNMAPPED
: /* fall through */
904 case PAYLOAD_BLOCK_ZERO
:
906 qemu_iovec_memset(&hd_qiov
, 0, 0, sinfo
.bytes_avail
);
908 case PAYLOAD_BLOCK_FULL_PRESENT
:
909 qemu_co_mutex_unlock(&s
->lock
);
910 ret
= bdrv_co_readv(bs
->file
,
911 sinfo
.file_offset
>> BDRV_SECTOR_BITS
,
912 sinfo
.sectors_avail
, &hd_qiov
);
913 qemu_co_mutex_lock(&s
->lock
);
918 case PAYLOAD_BLOCK_PARTIALLY_PRESENT
:
919 /* we don't yet support difference files, fall through
926 nb_sectors
-= sinfo
.sectors_avail
;
927 sector_num
+= sinfo
.sectors_avail
;
928 bytes_done
+= sinfo
.bytes_avail
;
933 qemu_co_mutex_unlock(&s
->lock
);
934 qemu_iovec_destroy(&hd_qiov
);
940 static coroutine_fn
int vhdx_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
941 int nb_sectors
, QEMUIOVector
*qiov
)
947 static void vhdx_close(BlockDriverState
*bs
)
949 BDRVVHDXState
*s
= bs
->opaque
;
950 qemu_vfree(s
->headers
[0]);
951 qemu_vfree(s
->headers
[1]);
953 qemu_vfree(s
->parent_entries
);
956 static BlockDriver bdrv_vhdx
= {
957 .format_name
= "vhdx",
958 .instance_size
= sizeof(BDRVVHDXState
),
959 .bdrv_probe
= vhdx_probe
,
960 .bdrv_open
= vhdx_open
,
961 .bdrv_close
= vhdx_close
,
962 .bdrv_reopen_prepare
= vhdx_reopen_prepare
,
963 .bdrv_co_readv
= vhdx_co_readv
,
964 .bdrv_co_writev
= vhdx_co_writev
,
967 static void bdrv_vhdx_init(void)
969 bdrv_register(&bdrv_vhdx
);
972 block_init(bdrv_vhdx_init
);