qemu-iotests: Test that "stop" doesn't drain block jobs
[qemu.git] / block / vhdx.h
blob7003ab7a7951bd6ff3432d203966696db88352c4
1 /*
2 * Block driver for Hyper-V VHDX Images
4 * Copyright (c) 2013 Red Hat, Inc.,
6 * Authors:
7 * Jeff Cody <jcody@redhat.com>
9 * This is based on the "VHDX Format Specification v1.00", published 8/25/2012
10 * by Microsoft:
11 * https://www.microsoft.com/en-us/download/details.aspx?id=34750
13 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14 * See the COPYING.LIB file in the top-level directory.
18 #ifndef BLOCK_VHDX_H
19 #define BLOCK_VHDX_H
21 #define KiB (1 * 1024)
22 #define MiB (KiB * 1024)
23 #define GiB (MiB * 1024)
24 #define TiB ((uint64_t) GiB * 1024)
26 #define DEFAULT_LOG_SIZE 1048576 /* 1MiB */
27 /* Structures and fields present in the VHDX file */
29 /* The header section has the following blocks,
30 * each block is 64KB:
32 * _____________________________________________________________________________
33 * | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) |
34 * |----------|---------------|------------|--------------|--------------------|
35 * | | | | | |
36 * 0.........64KB...........128KB........192KB..........256KB................1MB
39 #define VHDX_HEADER_BLOCK_SIZE (64 * 1024)
41 #define VHDX_FILE_ID_OFFSET 0
42 #define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1)
43 #define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2)
44 #define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3)
45 #define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4)
47 #define VHDX_HEADER_SECTION_END (1 * MiB)
49 * A note on the use of MS-GUID fields. For more details on the GUID,
50 * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
52 * The VHDX specification only states that these are MS GUIDs, and which
53 * bytes are data1-data4. It makes no mention of what algorithm should be used
54 * to generate the GUID, nor what standard. However, looking at the specified
55 * known GUID fields, it appears the GUIDs are:
56 * Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4)
57 * Random algorithm (noted by 0x4XXX for .data3)
60 /* ---- HEADER SECTION STRUCTURES ---- */
62 /* These structures are ones that are defined in the VHDX specification
63 * document */
65 #define VHDX_FILE_SIGNATURE 0x656C696678646876ULL /* "vhdxfile" in ASCII */
66 typedef struct VHDXFileIdentifier {
67 uint64_t signature; /* "vhdxfile" in ASCII */
68 uint16_t creator[256]; /* optional; utf-16 string to identify
69 the vhdx file creator. Diagnostic
70 only */
71 } VHDXFileIdentifier;
74 /* the guid is a 16 byte unique ID - the definition for this used by
75 * Microsoft is not just 16 bytes though - it is a structure that is defined,
76 * so we need to follow it here so that endianness does not trip us up */
78 typedef struct QEMU_PACKED MSGUID {
79 uint32_t data1;
80 uint16_t data2;
81 uint16_t data3;
82 uint8_t data4[8];
83 } MSGUID;
85 #define guid_eq(a, b) \
86 (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
88 #define VHDX_HEADER_SIZE (4 * 1024) /* although the vhdx_header struct in disk
89 is only 582 bytes, for purposes of crc
90 the header is the first 4KB of the 64KB
91 block */
93 /* The full header is 4KB, although the actual header data is much smaller.
94 * But for the checksum calculation, it is over the entire 4KB structure,
95 * not just the defined portion of it */
96 #define VHDX_HEADER_SIGNATURE 0x64616568
97 typedef struct QEMU_PACKED VHDXHeader {
98 uint32_t signature; /* "head" in ASCII */
99 uint32_t checksum; /* CRC-32C hash of the whole header */
100 uint64_t sequence_number; /* Seq number of this header. Each
101 VHDX file has 2 of these headers,
102 and only the header with the highest
103 sequence number is valid */
104 MSGUID file_write_guid; /* 128 bit unique identifier. Must be
105 updated to new, unique value before
106 the first modification is made to
107 file */
108 MSGUID data_write_guid; /* 128 bit unique identifier. Must be
109 updated to new, unique value before
110 the first modification is made to
111 visible data. Visbile data is
112 defined as:
113 - system & user metadata
114 - raw block data
115 - disk size
116 - any change that will
117 cause the virtual disk
118 sector read to differ
120 This does not need to change if
121 blocks are re-arranged */
122 MSGUID log_guid; /* 128 bit unique identifier. If zero,
123 there is no valid log. If non-zero,
124 log entries with this guid are
125 valid. */
126 uint16_t log_version; /* version of the log format. Must be
127 set to zero */
128 uint16_t version; /* version of the vhdx file. Currently,
129 only supported version is "1" */
130 uint32_t log_length; /* length of the log. Must be multiple
131 of 1MB */
132 uint64_t log_offset; /* byte offset in the file of the log.
133 Must also be a multiple of 1MB */
134 } VHDXHeader;
136 /* Header for the region table block */
137 #define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */
138 typedef struct QEMU_PACKED VHDXRegionTableHeader {
139 uint32_t signature; /* "regi" in ASCII */
140 uint32_t checksum; /* CRC-32C hash of the 64KB table */
141 uint32_t entry_count; /* number of valid entries */
142 uint32_t reserved;
143 } VHDXRegionTableHeader;
145 /* Individual region table entry. There may be a maximum of 2047 of these
147 * There are two known region table properties. Both are required.
148 * BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08
149 * Metadata: 8B7CA20647904B9AB8FE575F050F886E
151 #define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand
152 this entry in order to open
153 file */
154 typedef struct QEMU_PACKED VHDXRegionTableEntry {
155 MSGUID guid; /* 128-bit unique identifier */
156 uint64_t file_offset; /* offset of the object in the file.
157 Must be multiple of 1MB */
158 uint32_t length; /* length, in bytes, of the object */
159 uint32_t data_bits;
160 } VHDXRegionTableEntry;
163 /* ---- LOG ENTRY STRUCTURES ---- */
164 #define VHDX_LOG_MIN_SIZE (1024 * 1024)
165 #define VHDX_LOG_SECTOR_SIZE 4096
166 #define VHDX_LOG_HDR_SIZE 64
167 #define VHDX_LOG_SIGNATURE 0x65676f6c
168 typedef struct QEMU_PACKED VHDXLogEntryHeader {
169 uint32_t signature; /* "loge" in ASCII */
170 uint32_t checksum; /* CRC-32C hash of the 64KB table */
171 uint32_t entry_length; /* length in bytes, multiple of 1MB */
172 uint32_t tail; /* byte offset of first log entry of a
173 seq, where this entry is the last
174 entry */
175 uint64_t sequence_number; /* incremented with each log entry.
176 May not be zero. */
177 uint32_t descriptor_count; /* number of descriptors in this log
178 entry, must be >= 0 */
179 uint32_t reserved;
180 MSGUID log_guid; /* value of the log_guid from
181 vhdx_header. If not found in
182 vhdx_header, it is invalid */
183 uint64_t flushed_file_offset; /* see spec for full details - this
184 should be vhdx file size in bytes */
185 uint64_t last_file_offset; /* size in bytes that all allocated
186 file structures fit into */
187 } VHDXLogEntryHeader;
189 #define VHDX_LOG_DESC_SIZE 32
190 #define VHDX_LOG_DESC_SIGNATURE 0x63736564
191 #define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
192 typedef struct QEMU_PACKED VHDXLogDescriptor {
193 uint32_t signature; /* "zero" or "desc" in ASCII */
194 union {
195 uint32_t reserved; /* zero desc */
196 uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the
197 data sector */
199 union {
200 uint64_t zero_length; /* zero desc: length of the section to
201 zero */
202 uint64_t leading_bytes; /* data desc: bytes 0-7 of the data
203 sector */
205 uint64_t file_offset; /* file offset to write zeros - multiple
206 of 4kB */
207 uint64_t sequence_number; /* must match same field in
208 vhdx_log_entry_header */
209 } VHDXLogDescriptor;
211 #define VHDX_LOG_DATA_SIGNATURE 0x61746164
212 typedef struct QEMU_PACKED VHDXLogDataSector {
213 uint32_t data_signature; /* "data" in ASCII */
214 uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */
215 uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive).
216 see the data descriptor field for the
217 other mising bytes */
218 uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */
219 } VHDXLogDataSector;
223 /* block states - different state values depending on whether it is a
224 * payload block, or a sector block. */
226 #define PAYLOAD_BLOCK_NOT_PRESENT 0
227 #define PAYLOAD_BLOCK_UNDEFINED 1
228 #define PAYLOAD_BLOCK_ZERO 2
229 #define PAYLOAD_BLOCK_UNMAPPED 3
230 #define PAYLOAD_BLOCK_UNMAPPED_v095 5
231 #define PAYLOAD_BLOCK_FULLY_PRESENT 6
232 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
234 #define SB_BLOCK_NOT_PRESENT 0
235 #define SB_BLOCK_PRESENT 6
237 /* per the spec */
238 #define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23)
240 /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
241 other bits are reserved */
242 #define VHDX_BAT_STATE_BIT_MASK 0x07
243 #define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000ULL /* upper 44 bits */
244 typedef uint64_t VHDXBatEntry;
246 /* ---- METADATA REGION STRUCTURES ---- */
248 #define VHDX_METADATA_ENTRY_SIZE 32
249 #define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */
250 #define VHDX_METADATA_TABLE_MAX_SIZE \
251 (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
252 #define VHDX_METADATA_SIGNATURE 0x617461646174656DULL /* "metadata" in ASCII */
253 typedef struct QEMU_PACKED VHDXMetadataTableHeader {
254 uint64_t signature; /* "metadata" in ASCII */
255 uint16_t reserved;
256 uint16_t entry_count; /* number table entries. <= 2047 */
257 uint32_t reserved2[5];
258 } VHDXMetadataTableHeader;
260 #define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */
261 #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set,
262 otherwise file metdata */
263 #define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this
264 entry to open the file */
265 typedef struct QEMU_PACKED VHDXMetadataTableEntry {
266 MSGUID item_id; /* 128-bit identifier for metadata */
267 uint32_t offset; /* byte offset of the metadata. At
268 least 64kB. Relative to start of
269 metadata region */
270 /* note: if length = 0, so is offset */
271 uint32_t length; /* length of metadata. <= 1MB. */
272 uint32_t data_bits; /* least-significant 3 bits are flags,
273 the rest are reserved (see above) */
274 uint32_t reserved2;
275 } VHDXMetadataTableEntry;
277 #define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to
278 be BLOCK_NOT_PRESENT.
279 If set indicates a fixed
280 size VHDX file */
281 #define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */
282 #define VHDX_BLOCK_SIZE_MIN (1 * MiB)
283 #define VHDX_BLOCK_SIZE_MAX (256 * MiB)
284 typedef struct QEMU_PACKED VHDXFileParameters {
285 uint32_t block_size; /* size of each payload block, always
286 power of 2, <= 256MB and >= 1MB. */
287 uint32_t data_bits; /* least-significant 2 bits are flags,
288 the rest are reserved (see above) */
289 } VHDXFileParameters;
291 #define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB)
292 typedef struct QEMU_PACKED VHDXVirtualDiskSize {
293 uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes.
294 Must be multiple of the sector size,
295 max of 64TB */
296 } VHDXVirtualDiskSize;
298 typedef struct QEMU_PACKED VHDXPage83Data {
299 MSGUID page_83_data; /* unique id for scsi devices that
300 support page 0x83 */
301 } VHDXPage83Data;
303 typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
304 uint32_t logical_sector_size; /* virtual disk sector size (in bytes).
305 Can only be 512 or 4096 bytes */
306 } VHDXVirtualDiskLogicalSectorSize;
308 typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
309 uint32_t physical_sector_size; /* physical sector size (in bytes).
310 Can only be 512 or 4096 bytes */
311 } VHDXVirtualDiskPhysicalSectorSize;
313 typedef struct QEMU_PACKED VHDXParentLocatorHeader {
314 MSGUID locator_type; /* type of the parent virtual disk. */
315 uint16_t reserved;
316 uint16_t key_value_count; /* number of key/value pairs for this
317 locator */
318 } VHDXParentLocatorHeader;
320 /* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
321 typedef struct QEMU_PACKED VHDXParentLocatorEntry {
322 uint32_t key_offset; /* offset in metadata for key, > 0 */
323 uint32_t value_offset; /* offset in metadata for value, >0 */
324 uint16_t key_length; /* length of entry key, > 0 */
325 uint16_t value_length; /* length of entry value, > 0 */
326 } VHDXParentLocatorEntry;
329 /* ----- END VHDX SPECIFICATION STRUCTURES ---- */
331 typedef struct VHDXMetadataEntries {
332 VHDXMetadataTableEntry file_parameters_entry;
333 VHDXMetadataTableEntry virtual_disk_size_entry;
334 VHDXMetadataTableEntry page83_data_entry;
335 VHDXMetadataTableEntry logical_sector_size_entry;
336 VHDXMetadataTableEntry phys_sector_size_entry;
337 VHDXMetadataTableEntry parent_locator_entry;
338 uint16_t present;
339 } VHDXMetadataEntries;
341 typedef struct VHDXLogEntries {
342 uint64_t offset;
343 uint64_t length;
344 uint32_t write;
345 uint32_t read;
346 VHDXLogEntryHeader *hdr;
347 void *desc_buffer;
348 uint64_t sequence;
349 uint32_t tail;
350 } VHDXLogEntries;
352 typedef struct VHDXRegionEntry {
353 uint64_t start;
354 uint64_t end;
355 QLIST_ENTRY(VHDXRegionEntry) entries;
356 } VHDXRegionEntry;
358 typedef struct BDRVVHDXState {
359 CoMutex lock;
361 int curr_header;
362 VHDXHeader *headers[2];
364 VHDXRegionTableHeader rt;
365 VHDXRegionTableEntry bat_rt; /* region table for the BAT */
366 VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
368 VHDXMetadataTableHeader metadata_hdr;
369 VHDXMetadataEntries metadata_entries;
371 VHDXFileParameters params;
372 uint32_t block_size;
373 uint32_t block_size_bits;
374 uint32_t sectors_per_block;
375 uint32_t sectors_per_block_bits;
377 uint64_t virtual_disk_size;
378 uint32_t logical_sector_size;
379 uint32_t physical_sector_size;
381 uint64_t chunk_ratio;
382 uint32_t chunk_ratio_bits;
383 uint32_t logical_sector_size_bits;
385 uint32_t bat_entries;
386 VHDXBatEntry *bat;
387 uint64_t bat_offset;
389 bool first_visible_write;
390 MSGUID session_guid;
392 VHDXLogEntries log;
394 VHDXParentLocatorHeader parent_header;
395 VHDXParentLocatorEntry *parent_entries;
397 Error *migration_blocker;
399 bool log_replayed_on_open;
401 QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
402 } BDRVVHDXState;
404 void vhdx_guid_generate(MSGUID *guid);
406 int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
407 MSGUID *log_guid);
409 uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
410 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
411 int crc_offset);
413 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
415 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
416 Error **errp);
418 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
419 void *data, uint32_t length, uint64_t offset);
421 static inline void leguid_to_cpus(MSGUID *guid)
423 le32_to_cpus(&guid->data1);
424 le16_to_cpus(&guid->data2);
425 le16_to_cpus(&guid->data3);
428 static inline void cpu_to_leguids(MSGUID *guid)
430 cpu_to_le32s(&guid->data1);
431 cpu_to_le16s(&guid->data2);
432 cpu_to_le16s(&guid->data3);
435 void vhdx_header_le_import(VHDXHeader *h);
436 void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
437 void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
438 void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
439 void vhdx_log_data_le_import(VHDXLogDataSector *d);
440 void vhdx_log_data_le_export(VHDXLogDataSector *d);
441 void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
442 void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
443 void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
444 void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
445 void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
446 void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
447 void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
448 void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
449 void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
450 void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
451 int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
453 #endif