qed: Add QEMU Enhanced Disk image format
[qemu-kvm/stefanha.git] / block / qed.h
blob1f8a125787e320cf9d40fe240598dce0170e40cc
1 /*
2 * QEMU Enhanced Disk Format
4 * Copyright IBM, Corp. 2010
6 * Authors:
7 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
8 * Anthony Liguori <aliguori@us.ibm.com>
10 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11 * See the COPYING.LIB file in the top-level directory.
15 #ifndef BLOCK_QED_H
16 #define BLOCK_QED_H
18 #include "block_int.h"
20 /* The layout of a QED file is as follows:
22 * +--------+----------+----------+----------+-----+
23 * | header | L1 table | cluster0 | cluster1 | ... |
24 * +--------+----------+----------+----------+-----+
26 * There is a 2-level pagetable for cluster allocation:
28 * +----------+
29 * | L1 table |
30 * +----------+
31 * ,------' | '------.
32 * +----------+ | +----------+
33 * | L2 table | ... | L2 table |
34 * +----------+ +----------+
35 * ,------' | '------.
36 * +----------+ | +----------+
37 * | Data | ... | Data |
38 * +----------+ +----------+
40 * The L1 table is fixed size and always present. L2 tables are allocated on
41 * demand. The L1 table size determines the maximum possible image size; it
42 * can be influenced using the cluster_size and table_size values.
44 * All fields are little-endian on disk.
47 enum {
48 QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
50 /* The image supports a backing file */
51 QED_F_BACKING_FILE = 0x01,
53 /* The backing file format must not be probed, treat as raw image */
54 QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
56 /* Feature bits must be used when the on-disk format changes */
57 QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
58 QED_F_BACKING_FORMAT_NO_PROBE,
59 QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */
60 QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */
62 /* Data is stored in groups of sectors called clusters. Cluster size must
63 * be large to avoid keeping too much metadata. I/O requests that have
64 * sub-cluster size will require read-modify-write.
66 QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
67 QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
68 QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
70 /* Allocated clusters are tracked using a 2-level pagetable. Table size is
71 * a multiple of clusters so large maximum image sizes can be supported
72 * without jacking up the cluster size too much.
74 QED_MIN_TABLE_SIZE = 1, /* in clusters */
75 QED_MAX_TABLE_SIZE = 16,
76 QED_DEFAULT_TABLE_SIZE = 4,
79 typedef struct {
80 uint32_t magic; /* QED\0 */
82 uint32_t cluster_size; /* in bytes */
83 uint32_t table_size; /* for L1 and L2 tables, in clusters */
84 uint32_t header_size; /* in clusters */
86 uint64_t features; /* format feature bits */
87 uint64_t compat_features; /* compatible feature bits */
88 uint64_t autoclear_features; /* self-resetting feature bits */
90 uint64_t l1_table_offset; /* in bytes */
91 uint64_t image_size; /* total logical image size, in bytes */
93 /* if (features & QED_F_BACKING_FILE) */
94 uint32_t backing_filename_offset; /* in bytes from start of header */
95 uint32_t backing_filename_size; /* in bytes */
96 } QEDHeader;
98 typedef struct {
99 BlockDriverState *bs; /* device */
100 uint64_t file_size; /* length of image file, in bytes */
102 QEDHeader header; /* always cpu-endian */
103 uint32_t table_nelems;
104 uint32_t l1_shift;
105 uint32_t l2_shift;
106 uint32_t l2_mask;
107 } BDRVQEDState;
110 * Round down to the start of a cluster
112 static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
114 return offset & ~(uint64_t)(s->header.cluster_size - 1);
118 * Test if a cluster offset is valid
120 static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
122 uint64_t header_size = (uint64_t)s->header.header_size *
123 s->header.cluster_size;
125 if (offset & (s->header.cluster_size - 1)) {
126 return false;
128 return offset >= header_size && offset < s->file_size;
132 * Test if a table offset is valid
134 static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
136 uint64_t end_offset = offset + (s->header.table_size - 1) *
137 s->header.cluster_size;
139 /* Overflow check */
140 if (end_offset <= offset) {
141 return false;
144 return qed_check_cluster_offset(s, offset) &&
145 qed_check_cluster_offset(s, end_offset);
148 #endif /* BLOCK_QED_H */