2 * Block driver for the QCOW version 2 format
4 * Copyright (c) 2004-2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "block_int.h"
31 Differences with QCOW:
33 - Support for multiple incremental snapshots.
34 - Memory management by reference counts.
35 - Clusters which have a reference count of one have the bit
36 QCOW_OFLAG_COPIED to optimize write performance.
37 - Size of compressed clusters is stored in sectors to reduce bit usage
38 in the cluster offsets.
39 - Support for storing additional data (such as the VM state) in the
41 - If a backing store is used, the cluster size is not constrained
42 (could be backported to QCOW).
43 - L2 tables have always a size of one cluster.
47 //#define DEBUG_ALLOC2
49 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
50 #define QCOW_VERSION 2
52 #define QCOW_CRYPT_NONE 0
53 #define QCOW_CRYPT_AES 1
55 /* indicate that the refcount of the referenced cluster is exactly one. */
56 #define QCOW_OFLAG_COPIED (1LL << 63)
57 /* indicate that the cluster is compressed (they never have the copied flag) */
58 #define QCOW_OFLAG_COMPRESSED (1LL << 62)
60 #define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
63 #define offsetof(type, field) ((size_t) &((type *)0)->field)
66 typedef struct QCowHeader
{
69 uint64_t backing_file_offset
;
70 uint32_t backing_file_size
;
71 uint32_t cluster_bits
;
72 uint64_t size
; /* in bytes */
73 uint32_t crypt_method
;
74 uint32_t l1_size
; /* XXX: save number of clusters instead ? */
75 uint64_t l1_table_offset
;
76 uint64_t refcount_table_offset
;
77 uint32_t refcount_table_clusters
;
78 uint32_t nb_snapshots
;
79 uint64_t snapshots_offset
;
82 typedef struct __attribute__((packed
)) QCowSnapshotHeader
{
83 /* header is 8 byte aligned */
84 uint64_t l1_table_offset
;
93 uint64_t vm_clock_nsec
;
95 uint32_t vm_state_size
;
96 uint32_t extra_data_size
; /* for extension */
97 /* extra data follows */
100 } QCowSnapshotHeader
;
102 #define L2_CACHE_SIZE 16
104 typedef struct QCowSnapshot
{
105 uint64_t l1_table_offset
;
109 uint32_t vm_state_size
;
112 uint64_t vm_clock_nsec
;
115 typedef struct BDRVQcowState
{
116 BlockDriverState
*hd
;
123 int l1_vm_state_index
;
126 uint64_t cluster_offset_mask
;
127 uint64_t l1_table_offset
;
130 uint64_t l2_cache_offsets
[L2_CACHE_SIZE
];
131 uint32_t l2_cache_counts
[L2_CACHE_SIZE
];
132 uint8_t *cluster_cache
;
133 uint8_t *cluster_data
;
134 uint64_t cluster_cache_offset
;
136 uint64_t *refcount_table
;
137 uint64_t refcount_table_offset
;
138 uint32_t refcount_table_size
;
139 uint64_t refcount_block_cache_offset
;
140 uint16_t *refcount_block_cache
;
141 int64_t free_cluster_index
;
142 int64_t free_byte_offset
;
144 uint32_t crypt_method
; /* current crypt method, 0 if no key yet */
145 uint32_t crypt_method_header
;
146 AES_KEY aes_encrypt_key
;
147 AES_KEY aes_decrypt_key
;
148 uint64_t snapshots_offset
;
151 QCowSnapshot
*snapshots
;
154 static int decompress_cluster(BDRVQcowState
*s
, uint64_t cluster_offset
);
155 static int qcow_read(BlockDriverState
*bs
, int64_t sector_num
,
156 uint8_t *buf
, int nb_sectors
);
157 static int qcow_read_snapshots(BlockDriverState
*bs
);
158 static void qcow_free_snapshots(BlockDriverState
*bs
);
159 static int refcount_init(BlockDriverState
*bs
);
160 static void refcount_close(BlockDriverState
*bs
);
161 static int get_refcount(BlockDriverState
*bs
, int64_t cluster_index
);
162 static int update_cluster_refcount(BlockDriverState
*bs
,
163 int64_t cluster_index
,
165 static void update_refcount(BlockDriverState
*bs
,
166 int64_t offset
, int64_t length
,
168 static int64_t alloc_clusters(BlockDriverState
*bs
, int64_t size
);
169 static int64_t alloc_bytes(BlockDriverState
*bs
, int size
);
170 static void free_clusters(BlockDriverState
*bs
,
171 int64_t offset
, int64_t size
);
173 static void check_refcounts(BlockDriverState
*bs
);
176 static int qcow_probe(const uint8_t *buf
, int buf_size
, const char *filename
)
178 const QCowHeader
*cow_header
= (const void *)buf
;
180 if (buf_size
>= sizeof(QCowHeader
) &&
181 be32_to_cpu(cow_header
->magic
) == QCOW_MAGIC
&&
182 be32_to_cpu(cow_header
->version
) == QCOW_VERSION
)
188 static int qcow_open(BlockDriverState
*bs
, const char *filename
, int flags
)
190 BDRVQcowState
*s
= bs
->opaque
;
191 int len
, i
, shift
, ret
;
194 ret
= bdrv_file_open(&s
->hd
, filename
, flags
);
197 if (bdrv_pread(s
->hd
, 0, &header
, sizeof(header
)) != sizeof(header
))
199 be32_to_cpus(&header
.magic
);
200 be32_to_cpus(&header
.version
);
201 be64_to_cpus(&header
.backing_file_offset
);
202 be32_to_cpus(&header
.backing_file_size
);
203 be64_to_cpus(&header
.size
);
204 be32_to_cpus(&header
.cluster_bits
);
205 be32_to_cpus(&header
.crypt_method
);
206 be64_to_cpus(&header
.l1_table_offset
);
207 be32_to_cpus(&header
.l1_size
);
208 be64_to_cpus(&header
.refcount_table_offset
);
209 be32_to_cpus(&header
.refcount_table_clusters
);
210 be64_to_cpus(&header
.snapshots_offset
);
211 be32_to_cpus(&header
.nb_snapshots
);
213 if (header
.magic
!= QCOW_MAGIC
|| header
.version
!= QCOW_VERSION
)
215 if (header
.size
<= 1 ||
216 header
.cluster_bits
< 9 ||
217 header
.cluster_bits
> 16)
219 if (header
.crypt_method
> QCOW_CRYPT_AES
)
221 s
->crypt_method_header
= header
.crypt_method
;
222 if (s
->crypt_method_header
)
224 s
->cluster_bits
= header
.cluster_bits
;
225 s
->cluster_size
= 1 << s
->cluster_bits
;
226 s
->cluster_sectors
= 1 << (s
->cluster_bits
- 9);
227 s
->l2_bits
= s
->cluster_bits
- 3; /* L2 is always one cluster */
228 s
->l2_size
= 1 << s
->l2_bits
;
229 bs
->total_sectors
= header
.size
/ 512;
230 s
->csize_shift
= (62 - (s
->cluster_bits
- 8));
231 s
->csize_mask
= (1 << (s
->cluster_bits
- 8)) - 1;
232 s
->cluster_offset_mask
= (1LL << s
->csize_shift
) - 1;
233 s
->refcount_table_offset
= header
.refcount_table_offset
;
234 s
->refcount_table_size
=
235 header
.refcount_table_clusters
<< (s
->cluster_bits
- 3);
237 s
->snapshots_offset
= header
.snapshots_offset
;
238 s
->nb_snapshots
= header
.nb_snapshots
;
240 /* read the level 1 table */
241 s
->l1_size
= header
.l1_size
;
242 shift
= s
->cluster_bits
+ s
->l2_bits
;
243 s
->l1_vm_state_index
= (header
.size
+ (1LL << shift
) - 1) >> shift
;
244 /* the L1 table must contain at least enough entries to put
246 if (s
->l1_size
< s
->l1_vm_state_index
)
248 s
->l1_table_offset
= header
.l1_table_offset
;
249 s
->l1_table
= qemu_malloc(s
->l1_size
* sizeof(uint64_t));
252 if (bdrv_pread(s
->hd
, s
->l1_table_offset
, s
->l1_table
, s
->l1_size
* sizeof(uint64_t)) !=
253 s
->l1_size
* sizeof(uint64_t))
255 for(i
= 0;i
< s
->l1_size
; i
++) {
256 be64_to_cpus(&s
->l1_table
[i
]);
259 s
->l2_cache
= qemu_malloc(s
->l2_size
* L2_CACHE_SIZE
* sizeof(uint64_t));
262 s
->cluster_cache
= qemu_malloc(s
->cluster_size
);
263 if (!s
->cluster_cache
)
265 /* one more sector for decompressed data alignment */
266 s
->cluster_data
= qemu_malloc(s
->cluster_size
+ 512);
267 if (!s
->cluster_data
)
269 s
->cluster_cache_offset
= -1;
271 if (refcount_init(bs
) < 0)
274 /* read the backing file name */
275 if (header
.backing_file_offset
!= 0) {
276 len
= header
.backing_file_size
;
279 if (bdrv_pread(s
->hd
, header
.backing_file_offset
, bs
->backing_file
, len
) != len
)
281 bs
->backing_file
[len
] = '\0';
283 if (qcow_read_snapshots(bs
) < 0)
292 qcow_free_snapshots(bs
);
294 qemu_free(s
->l1_table
);
295 qemu_free(s
->l2_cache
);
296 qemu_free(s
->cluster_cache
);
297 qemu_free(s
->cluster_data
);
302 static int qcow_set_key(BlockDriverState
*bs
, const char *key
)
304 BDRVQcowState
*s
= bs
->opaque
;
308 memset(keybuf
, 0, 16);
312 /* XXX: we could compress the chars to 7 bits to increase
314 for(i
= 0;i
< len
;i
++) {
317 s
->crypt_method
= s
->crypt_method_header
;
319 if (AES_set_encrypt_key(keybuf
, 128, &s
->aes_encrypt_key
) != 0)
321 if (AES_set_decrypt_key(keybuf
, 128, &s
->aes_decrypt_key
) != 0)
331 AES_encrypt(in
, tmp
, &s
->aes_encrypt_key
);
332 AES_decrypt(tmp
, out
, &s
->aes_decrypt_key
);
333 for(i
= 0; i
< 16; i
++)
334 printf(" %02x", tmp
[i
]);
336 for(i
= 0; i
< 16; i
++)
337 printf(" %02x", out
[i
]);
344 /* The crypt function is compatible with the linux cryptoloop
345 algorithm for < 4 GB images. NOTE: out_buf == in_buf is
347 static void encrypt_sectors(BDRVQcowState
*s
, int64_t sector_num
,
348 uint8_t *out_buf
, const uint8_t *in_buf
,
349 int nb_sectors
, int enc
,
358 for(i
= 0; i
< nb_sectors
; i
++) {
359 ivec
.ll
[0] = cpu_to_le64(sector_num
);
361 AES_cbc_encrypt(in_buf
, out_buf
, 512, key
,
369 static int copy_sectors(BlockDriverState
*bs
, uint64_t start_sect
,
370 uint64_t cluster_offset
, int n_start
, int n_end
)
372 BDRVQcowState
*s
= bs
->opaque
;
378 ret
= qcow_read(bs
, start_sect
+ n_start
, s
->cluster_data
, n
);
381 if (s
->crypt_method
) {
382 encrypt_sectors(s
, start_sect
+ n_start
,
384 s
->cluster_data
, n
, 1,
385 &s
->aes_encrypt_key
);
387 ret
= bdrv_write(s
->hd
, (cluster_offset
>> 9) + n_start
,
394 static void l2_cache_reset(BlockDriverState
*bs
)
396 BDRVQcowState
*s
= bs
->opaque
;
398 memset(s
->l2_cache
, 0, s
->l2_size
* L2_CACHE_SIZE
* sizeof(uint64_t));
399 memset(s
->l2_cache_offsets
, 0, L2_CACHE_SIZE
* sizeof(uint64_t));
400 memset(s
->l2_cache_counts
, 0, L2_CACHE_SIZE
* sizeof(uint32_t));
403 static inline int l2_cache_new_entry(BlockDriverState
*bs
)
405 BDRVQcowState
*s
= bs
->opaque
;
409 /* find a new entry in the least used one */
411 min_count
= 0xffffffff;
412 for(i
= 0; i
< L2_CACHE_SIZE
; i
++) {
413 if (s
->l2_cache_counts
[i
] < min_count
) {
414 min_count
= s
->l2_cache_counts
[i
];
421 static int64_t align_offset(int64_t offset
, int n
)
423 offset
= (offset
+ n
- 1) & ~(n
- 1);
427 static int grow_l1_table(BlockDriverState
*bs
, int min_size
)
429 BDRVQcowState
*s
= bs
->opaque
;
430 int new_l1_size
, new_l1_size2
, ret
, i
;
431 uint64_t *new_l1_table
;
432 uint64_t new_l1_table_offset
;
436 new_l1_size
= s
->l1_size
;
437 if (min_size
<= new_l1_size
)
439 while (min_size
> new_l1_size
) {
440 new_l1_size
= (new_l1_size
* 3 + 1) / 2;
443 printf("grow l1_table from %d to %d\n", s
->l1_size
, new_l1_size
);
446 new_l1_size2
= sizeof(uint64_t) * new_l1_size
;
447 new_l1_table
= qemu_mallocz(new_l1_size2
);
450 memcpy(new_l1_table
, s
->l1_table
, s
->l1_size
* sizeof(uint64_t));
452 /* write new table (align to cluster) */
453 new_l1_table_offset
= alloc_clusters(bs
, new_l1_size2
);
455 for(i
= 0; i
< s
->l1_size
; i
++)
456 new_l1_table
[i
] = cpu_to_be64(new_l1_table
[i
]);
457 ret
= bdrv_pwrite(s
->hd
, new_l1_table_offset
, new_l1_table
, new_l1_size2
);
458 if (ret
!= new_l1_size2
)
460 for(i
= 0; i
< s
->l1_size
; i
++)
461 new_l1_table
[i
] = be64_to_cpu(new_l1_table
[i
]);
464 data64
= cpu_to_be64(new_l1_table_offset
);
465 if (bdrv_pwrite(s
->hd
, offsetof(QCowHeader
, l1_table_offset
),
466 &data64
, sizeof(data64
)) != sizeof(data64
))
468 data32
= cpu_to_be32(new_l1_size
);
469 if (bdrv_pwrite(s
->hd
, offsetof(QCowHeader
, l1_size
),
470 &data32
, sizeof(data32
)) != sizeof(data32
))
472 qemu_free(s
->l1_table
);
473 free_clusters(bs
, s
->l1_table_offset
, s
->l1_size
* sizeof(uint64_t));
474 s
->l1_table_offset
= new_l1_table_offset
;
475 s
->l1_table
= new_l1_table
;
476 s
->l1_size
= new_l1_size
;
479 qemu_free(s
->l1_table
);
487 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
490 * 2 to allocate a compressed cluster of size
491 * 'compressed_size'. 'compressed_size' must be > 0 and <
494 * return 0 if not allocated.
496 static uint64_t get_cluster_offset(BlockDriverState
*bs
,
497 uint64_t offset
, int allocate
,
499 int n_start
, int n_end
)
501 BDRVQcowState
*s
= bs
->opaque
;
502 int min_index
, i
, j
, l1_index
, l2_index
, ret
;
503 uint64_t l2_offset
, *l2_table
, cluster_offset
, tmp
, old_l2_offset
;
505 l1_index
= offset
>> (s
->l2_bits
+ s
->cluster_bits
);
506 if (l1_index
>= s
->l1_size
) {
507 /* outside l1 table is allowed: we grow the table if needed */
510 if (grow_l1_table(bs
, l1_index
+ 1) < 0)
513 l2_offset
= s
->l1_table
[l1_index
];
518 old_l2_offset
= l2_offset
;
519 /* allocate a new l2 entry */
520 l2_offset
= alloc_clusters(bs
, s
->l2_size
* sizeof(uint64_t));
521 /* update the L1 entry */
522 s
->l1_table
[l1_index
] = l2_offset
| QCOW_OFLAG_COPIED
;
523 tmp
= cpu_to_be64(l2_offset
| QCOW_OFLAG_COPIED
);
524 if (bdrv_pwrite(s
->hd
, s
->l1_table_offset
+ l1_index
* sizeof(tmp
),
525 &tmp
, sizeof(tmp
)) != sizeof(tmp
))
527 min_index
= l2_cache_new_entry(bs
);
528 l2_table
= s
->l2_cache
+ (min_index
<< s
->l2_bits
);
530 if (old_l2_offset
== 0) {
531 memset(l2_table
, 0, s
->l2_size
* sizeof(uint64_t));
533 if (bdrv_pread(s
->hd
, old_l2_offset
,
534 l2_table
, s
->l2_size
* sizeof(uint64_t)) !=
535 s
->l2_size
* sizeof(uint64_t))
538 if (bdrv_pwrite(s
->hd
, l2_offset
,
539 l2_table
, s
->l2_size
* sizeof(uint64_t)) !=
540 s
->l2_size
* sizeof(uint64_t))
543 if (!(l2_offset
& QCOW_OFLAG_COPIED
)) {
545 free_clusters(bs
, l2_offset
, s
->l2_size
* sizeof(uint64_t));
549 l2_offset
&= ~QCOW_OFLAG_COPIED
;
551 for(i
= 0; i
< L2_CACHE_SIZE
; i
++) {
552 if (l2_offset
== s
->l2_cache_offsets
[i
]) {
553 /* increment the hit count */
554 if (++s
->l2_cache_counts
[i
] == 0xffffffff) {
555 for(j
= 0; j
< L2_CACHE_SIZE
; j
++) {
556 s
->l2_cache_counts
[j
] >>= 1;
559 l2_table
= s
->l2_cache
+ (i
<< s
->l2_bits
);
563 /* not found: load a new entry in the least used one */
564 min_index
= l2_cache_new_entry(bs
);
565 l2_table
= s
->l2_cache
+ (min_index
<< s
->l2_bits
);
566 if (bdrv_pread(s
->hd
, l2_offset
, l2_table
, s
->l2_size
* sizeof(uint64_t)) !=
567 s
->l2_size
* sizeof(uint64_t))
570 s
->l2_cache_offsets
[min_index
] = l2_offset
;
571 s
->l2_cache_counts
[min_index
] = 1;
573 l2_index
= (offset
>> s
->cluster_bits
) & (s
->l2_size
- 1);
574 cluster_offset
= be64_to_cpu(l2_table
[l2_index
]);
575 if (!cluster_offset
) {
577 return cluster_offset
;
578 } else if (!(cluster_offset
& QCOW_OFLAG_COPIED
)) {
580 return cluster_offset
;
581 /* free the cluster */
582 if (cluster_offset
& QCOW_OFLAG_COMPRESSED
) {
584 nb_csectors
= ((cluster_offset
>> s
->csize_shift
) &
586 free_clusters(bs
, (cluster_offset
& s
->cluster_offset_mask
) & ~511,
589 free_clusters(bs
, cluster_offset
, s
->cluster_size
);
592 cluster_offset
&= ~QCOW_OFLAG_COPIED
;
593 return cluster_offset
;
596 /* allocate a new cluster */
597 cluster_offset
= alloc_clusters(bs
, s
->cluster_size
);
599 /* we must initialize the cluster content which won't be
601 if ((n_end
- n_start
) < s
->cluster_sectors
) {
604 start_sect
= (offset
& ~(s
->cluster_size
- 1)) >> 9;
605 ret
= copy_sectors(bs
, start_sect
,
606 cluster_offset
, 0, n_start
);
609 ret
= copy_sectors(bs
, start_sect
,
610 cluster_offset
, n_end
, s
->cluster_sectors
);
614 tmp
= cpu_to_be64(cluster_offset
| QCOW_OFLAG_COPIED
);
617 cluster_offset
= alloc_bytes(bs
, compressed_size
);
618 nb_csectors
= ((cluster_offset
+ compressed_size
- 1) >> 9) -
619 (cluster_offset
>> 9);
620 cluster_offset
|= QCOW_OFLAG_COMPRESSED
|
621 ((uint64_t)nb_csectors
<< s
->csize_shift
);
622 /* compressed clusters never have the copied flag */
623 tmp
= cpu_to_be64(cluster_offset
);
625 /* update L2 table */
626 l2_table
[l2_index
] = tmp
;
627 if (bdrv_pwrite(s
->hd
,
628 l2_offset
+ l2_index
* sizeof(tmp
), &tmp
, sizeof(tmp
)) != sizeof(tmp
))
630 return cluster_offset
;
633 static int qcow_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
634 int nb_sectors
, int *pnum
)
636 BDRVQcowState
*s
= bs
->opaque
;
637 int index_in_cluster
, n
;
638 uint64_t cluster_offset
;
640 cluster_offset
= get_cluster_offset(bs
, sector_num
<< 9, 0, 0, 0, 0);
641 index_in_cluster
= sector_num
& (s
->cluster_sectors
- 1);
642 n
= s
->cluster_sectors
- index_in_cluster
;
646 return (cluster_offset
!= 0);
649 static int decompress_buffer(uint8_t *out_buf
, int out_buf_size
,
650 const uint8_t *buf
, int buf_size
)
652 z_stream strm1
, *strm
= &strm1
;
655 memset(strm
, 0, sizeof(*strm
));
657 strm
->next_in
= (uint8_t *)buf
;
658 strm
->avail_in
= buf_size
;
659 strm
->next_out
= out_buf
;
660 strm
->avail_out
= out_buf_size
;
662 ret
= inflateInit2(strm
, -12);
665 ret
= inflate(strm
, Z_FINISH
);
666 out_len
= strm
->next_out
- out_buf
;
667 if ((ret
!= Z_STREAM_END
&& ret
!= Z_BUF_ERROR
) ||
668 out_len
!= out_buf_size
) {
676 static int decompress_cluster(BDRVQcowState
*s
, uint64_t cluster_offset
)
678 int ret
, csize
, nb_csectors
, sector_offset
;
681 coffset
= cluster_offset
& s
->cluster_offset_mask
;
682 if (s
->cluster_cache_offset
!= coffset
) {
683 nb_csectors
= ((cluster_offset
>> s
->csize_shift
) & s
->csize_mask
) + 1;
684 sector_offset
= coffset
& 511;
685 csize
= nb_csectors
* 512 - sector_offset
;
686 ret
= bdrv_read(s
->hd
, coffset
>> 9, s
->cluster_data
, nb_csectors
);
690 if (decompress_buffer(s
->cluster_cache
, s
->cluster_size
,
691 s
->cluster_data
+ sector_offset
, csize
) < 0) {
694 s
->cluster_cache_offset
= coffset
;
699 /* handle reading after the end of the backing file */
700 static int backing_read1(BlockDriverState
*bs
,
701 int64_t sector_num
, uint8_t *buf
, int nb_sectors
)
704 if ((sector_num
+ nb_sectors
) <= bs
->total_sectors
)
706 if (sector_num
>= bs
->total_sectors
)
709 n1
= bs
->total_sectors
- sector_num
;
710 memset(buf
+ n1
* 512, 0, 512 * (nb_sectors
- n1
));
714 static int qcow_read(BlockDriverState
*bs
, int64_t sector_num
,
715 uint8_t *buf
, int nb_sectors
)
717 BDRVQcowState
*s
= bs
->opaque
;
718 int ret
, index_in_cluster
, n
, n1
;
719 uint64_t cluster_offset
;
721 while (nb_sectors
> 0) {
722 cluster_offset
= get_cluster_offset(bs
, sector_num
<< 9, 0, 0, 0, 0);
723 index_in_cluster
= sector_num
& (s
->cluster_sectors
- 1);
724 n
= s
->cluster_sectors
- index_in_cluster
;
727 if (!cluster_offset
) {
728 if (bs
->backing_hd
) {
729 /* read from the base image */
730 n1
= backing_read1(bs
->backing_hd
, sector_num
, buf
, n
);
732 ret
= bdrv_read(bs
->backing_hd
, sector_num
, buf
, n1
);
737 memset(buf
, 0, 512 * n
);
739 } else if (cluster_offset
& QCOW_OFLAG_COMPRESSED
) {
740 if (decompress_cluster(s
, cluster_offset
) < 0)
742 memcpy(buf
, s
->cluster_cache
+ index_in_cluster
* 512, 512 * n
);
744 ret
= bdrv_pread(s
->hd
, cluster_offset
+ index_in_cluster
* 512, buf
, n
* 512);
747 if (s
->crypt_method
) {
748 encrypt_sectors(s
, sector_num
, buf
, buf
, n
, 0,
749 &s
->aes_decrypt_key
);
759 static int qcow_write(BlockDriverState
*bs
, int64_t sector_num
,
760 const uint8_t *buf
, int nb_sectors
)
762 BDRVQcowState
*s
= bs
->opaque
;
763 int ret
, index_in_cluster
, n
;
764 uint64_t cluster_offset
;
766 while (nb_sectors
> 0) {
767 index_in_cluster
= sector_num
& (s
->cluster_sectors
- 1);
768 n
= s
->cluster_sectors
- index_in_cluster
;
771 cluster_offset
= get_cluster_offset(bs
, sector_num
<< 9, 1, 0,
773 index_in_cluster
+ n
);
776 if (s
->crypt_method
) {
777 encrypt_sectors(s
, sector_num
, s
->cluster_data
, buf
, n
, 1,
778 &s
->aes_encrypt_key
);
779 ret
= bdrv_pwrite(s
->hd
, cluster_offset
+ index_in_cluster
* 512,
780 s
->cluster_data
, n
* 512);
782 ret
= bdrv_pwrite(s
->hd
, cluster_offset
+ index_in_cluster
* 512, buf
, n
* 512);
790 s
->cluster_cache_offset
= -1; /* disable compressed cache */
794 typedef struct QCowAIOCB
{
795 BlockDriverAIOCB common
;
800 uint64_t cluster_offset
;
801 uint8_t *cluster_data
;
802 BlockDriverAIOCB
*hd_aiocb
;
805 static void qcow_aio_read_cb(void *opaque
, int ret
)
807 QCowAIOCB
*acb
= opaque
;
808 BlockDriverState
*bs
= acb
->common
.bs
;
809 BDRVQcowState
*s
= bs
->opaque
;
810 int index_in_cluster
, n1
;
812 acb
->hd_aiocb
= NULL
;
815 acb
->common
.cb(acb
->common
.opaque
, ret
);
816 qemu_aio_release(acb
);
821 /* post process the read buffer */
822 if (!acb
->cluster_offset
) {
824 } else if (acb
->cluster_offset
& QCOW_OFLAG_COMPRESSED
) {
827 if (s
->crypt_method
) {
828 encrypt_sectors(s
, acb
->sector_num
, acb
->buf
, acb
->buf
,
830 &s
->aes_decrypt_key
);
834 acb
->nb_sectors
-= acb
->n
;
835 acb
->sector_num
+= acb
->n
;
836 acb
->buf
+= acb
->n
* 512;
838 if (acb
->nb_sectors
== 0) {
839 /* request completed */
840 acb
->common
.cb(acb
->common
.opaque
, 0);
841 qemu_aio_release(acb
);
845 /* prepare next AIO request */
846 acb
->cluster_offset
= get_cluster_offset(bs
, acb
->sector_num
<< 9,
848 index_in_cluster
= acb
->sector_num
& (s
->cluster_sectors
- 1);
849 acb
->n
= s
->cluster_sectors
- index_in_cluster
;
850 if (acb
->n
> acb
->nb_sectors
)
851 acb
->n
= acb
->nb_sectors
;
853 if (!acb
->cluster_offset
) {
854 if (bs
->backing_hd
) {
855 /* read from the base image */
856 n1
= backing_read1(bs
->backing_hd
, acb
->sector_num
,
859 acb
->hd_aiocb
= bdrv_aio_read(bs
->backing_hd
, acb
->sector_num
,
860 acb
->buf
, acb
->n
, qcow_aio_read_cb
, acb
);
861 if (acb
->hd_aiocb
== NULL
)
867 /* Note: in this case, no need to wait */
868 memset(acb
->buf
, 0, 512 * acb
->n
);
871 } else if (acb
->cluster_offset
& QCOW_OFLAG_COMPRESSED
) {
872 /* add AIO support for compressed blocks ? */
873 if (decompress_cluster(s
, acb
->cluster_offset
) < 0)
876 s
->cluster_cache
+ index_in_cluster
* 512, 512 * acb
->n
);
879 if ((acb
->cluster_offset
& 511) != 0) {
883 acb
->hd_aiocb
= bdrv_aio_read(s
->hd
,
884 (acb
->cluster_offset
>> 9) + index_in_cluster
,
885 acb
->buf
, acb
->n
, qcow_aio_read_cb
, acb
);
886 if (acb
->hd_aiocb
== NULL
)
891 static QCowAIOCB
*qcow_aio_setup(BlockDriverState
*bs
,
892 int64_t sector_num
, uint8_t *buf
, int nb_sectors
,
893 BlockDriverCompletionFunc
*cb
, void *opaque
)
897 acb
= qemu_aio_get(bs
, cb
, opaque
);
900 acb
->hd_aiocb
= NULL
;
901 acb
->sector_num
= sector_num
;
903 acb
->nb_sectors
= nb_sectors
;
905 acb
->cluster_offset
= 0;
909 static BlockDriverAIOCB
*qcow_aio_read(BlockDriverState
*bs
,
910 int64_t sector_num
, uint8_t *buf
, int nb_sectors
,
911 BlockDriverCompletionFunc
*cb
, void *opaque
)
915 acb
= qcow_aio_setup(bs
, sector_num
, buf
, nb_sectors
, cb
, opaque
);
919 qcow_aio_read_cb(acb
, 0);
923 static void qcow_aio_write_cb(void *opaque
, int ret
)
925 QCowAIOCB
*acb
= opaque
;
926 BlockDriverState
*bs
= acb
->common
.bs
;
927 BDRVQcowState
*s
= bs
->opaque
;
928 int index_in_cluster
;
929 uint64_t cluster_offset
;
930 const uint8_t *src_buf
;
932 acb
->hd_aiocb
= NULL
;
936 acb
->common
.cb(acb
->common
.opaque
, ret
);
937 qemu_aio_release(acb
);
941 acb
->nb_sectors
-= acb
->n
;
942 acb
->sector_num
+= acb
->n
;
943 acb
->buf
+= acb
->n
* 512;
945 if (acb
->nb_sectors
== 0) {
946 /* request completed */
947 acb
->common
.cb(acb
->common
.opaque
, 0);
948 qemu_aio_release(acb
);
952 index_in_cluster
= acb
->sector_num
& (s
->cluster_sectors
- 1);
953 acb
->n
= s
->cluster_sectors
- index_in_cluster
;
954 if (acb
->n
> acb
->nb_sectors
)
955 acb
->n
= acb
->nb_sectors
;
956 cluster_offset
= get_cluster_offset(bs
, acb
->sector_num
<< 9, 1, 0,
958 index_in_cluster
+ acb
->n
);
959 if (!cluster_offset
|| (cluster_offset
& 511) != 0) {
963 if (s
->crypt_method
) {
964 if (!acb
->cluster_data
) {
965 acb
->cluster_data
= qemu_mallocz(s
->cluster_size
);
966 if (!acb
->cluster_data
) {
971 encrypt_sectors(s
, acb
->sector_num
, acb
->cluster_data
, acb
->buf
,
972 acb
->n
, 1, &s
->aes_encrypt_key
);
973 src_buf
= acb
->cluster_data
;
977 acb
->hd_aiocb
= bdrv_aio_write(s
->hd
,
978 (cluster_offset
>> 9) + index_in_cluster
,
980 qcow_aio_write_cb
, acb
);
981 if (acb
->hd_aiocb
== NULL
)
985 static BlockDriverAIOCB
*qcow_aio_write(BlockDriverState
*bs
,
986 int64_t sector_num
, const uint8_t *buf
, int nb_sectors
,
987 BlockDriverCompletionFunc
*cb
, void *opaque
)
989 BDRVQcowState
*s
= bs
->opaque
;
992 s
->cluster_cache_offset
= -1; /* disable compressed cache */
994 acb
= qcow_aio_setup(bs
, sector_num
, (uint8_t*)buf
, nb_sectors
, cb
, opaque
);
998 qcow_aio_write_cb(acb
, 0);
1002 static void qcow_aio_cancel(BlockDriverAIOCB
*blockacb
)
1004 QCowAIOCB
*acb
= (QCowAIOCB
*)blockacb
;
1006 bdrv_aio_cancel(acb
->hd_aiocb
);
1007 qemu_aio_release(acb
);
1010 static void qcow_close(BlockDriverState
*bs
)
1012 BDRVQcowState
*s
= bs
->opaque
;
1013 qemu_free(s
->l1_table
);
1014 qemu_free(s
->l2_cache
);
1015 qemu_free(s
->cluster_cache
);
1016 qemu_free(s
->cluster_data
);
1021 /* XXX: use std qcow open function ? */
1022 typedef struct QCowCreateState
{
1025 uint16_t *refcount_block
;
1026 uint64_t *refcount_table
;
1027 int64_t l1_table_offset
;
1028 int64_t refcount_table_offset
;
1029 int64_t refcount_block_offset
;
1032 static void create_refcount_update(QCowCreateState
*s
,
1033 int64_t offset
, int64_t size
)
1036 int64_t start
, last
, cluster_offset
;
1039 start
= offset
& ~(s
->cluster_size
- 1);
1040 last
= (offset
+ size
- 1) & ~(s
->cluster_size
- 1);
1041 for(cluster_offset
= start
; cluster_offset
<= last
;
1042 cluster_offset
+= s
->cluster_size
) {
1043 p
= &s
->refcount_block
[cluster_offset
>> s
->cluster_bits
];
1044 refcount
= be16_to_cpu(*p
);
1046 *p
= cpu_to_be16(refcount
);
1050 static int qcow_create(const char *filename
, int64_t total_size
,
1051 const char *backing_file
, int flags
)
1053 int fd
, header_size
, backing_filename_len
, l1_size
, i
, shift
, l2_bits
;
1055 uint64_t tmp
, offset
;
1056 QCowCreateState s1
, *s
= &s1
;
1058 memset(s
, 0, sizeof(*s
));
1060 fd
= open(filename
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
, 0644);
1063 memset(&header
, 0, sizeof(header
));
1064 header
.magic
= cpu_to_be32(QCOW_MAGIC
);
1065 header
.version
= cpu_to_be32(QCOW_VERSION
);
1066 header
.size
= cpu_to_be64(total_size
* 512);
1067 header_size
= sizeof(header
);
1068 backing_filename_len
= 0;
1070 header
.backing_file_offset
= cpu_to_be64(header_size
);
1071 backing_filename_len
= strlen(backing_file
);
1072 header
.backing_file_size
= cpu_to_be32(backing_filename_len
);
1073 header_size
+= backing_filename_len
;
1075 s
->cluster_bits
= 12; /* 4 KB clusters */
1076 s
->cluster_size
= 1 << s
->cluster_bits
;
1077 header
.cluster_bits
= cpu_to_be32(s
->cluster_bits
);
1078 header_size
= (header_size
+ 7) & ~7;
1079 if (flags
& BLOCK_FLAG_ENCRYPT
) {
1080 header
.crypt_method
= cpu_to_be32(QCOW_CRYPT_AES
);
1082 header
.crypt_method
= cpu_to_be32(QCOW_CRYPT_NONE
);
1084 l2_bits
= s
->cluster_bits
- 3;
1085 shift
= s
->cluster_bits
+ l2_bits
;
1086 l1_size
= (((total_size
* 512) + (1LL << shift
) - 1) >> shift
);
1087 offset
= align_offset(header_size
, s
->cluster_size
);
1088 s
->l1_table_offset
= offset
;
1089 header
.l1_table_offset
= cpu_to_be64(s
->l1_table_offset
);
1090 header
.l1_size
= cpu_to_be32(l1_size
);
1091 offset
+= align_offset(l1_size
* sizeof(uint64_t), s
->cluster_size
);
1093 s
->refcount_table
= qemu_mallocz(s
->cluster_size
);
1094 if (!s
->refcount_table
)
1096 s
->refcount_block
= qemu_mallocz(s
->cluster_size
);
1097 if (!s
->refcount_block
)
1100 s
->refcount_table_offset
= offset
;
1101 header
.refcount_table_offset
= cpu_to_be64(offset
);
1102 header
.refcount_table_clusters
= cpu_to_be32(1);
1103 offset
+= s
->cluster_size
;
1105 s
->refcount_table
[0] = cpu_to_be64(offset
);
1106 s
->refcount_block_offset
= offset
;
1107 offset
+= s
->cluster_size
;
1109 /* update refcounts */
1110 create_refcount_update(s
, 0, header_size
);
1111 create_refcount_update(s
, s
->l1_table_offset
, l1_size
* sizeof(uint64_t));
1112 create_refcount_update(s
, s
->refcount_table_offset
, s
->cluster_size
);
1113 create_refcount_update(s
, s
->refcount_block_offset
, s
->cluster_size
);
1115 /* write all the data */
1116 write(fd
, &header
, sizeof(header
));
1118 write(fd
, backing_file
, backing_filename_len
);
1120 lseek(fd
, s
->l1_table_offset
, SEEK_SET
);
1122 for(i
= 0;i
< l1_size
; i
++) {
1123 write(fd
, &tmp
, sizeof(tmp
));
1125 lseek(fd
, s
->refcount_table_offset
, SEEK_SET
);
1126 write(fd
, s
->refcount_table
, s
->cluster_size
);
1128 lseek(fd
, s
->refcount_block_offset
, SEEK_SET
);
1129 write(fd
, s
->refcount_block
, s
->cluster_size
);
1131 qemu_free(s
->refcount_table
);
1132 qemu_free(s
->refcount_block
);
1136 qemu_free(s
->refcount_table
);
1137 qemu_free(s
->refcount_block
);
1142 static int qcow_make_empty(BlockDriverState
*bs
)
1145 /* XXX: not correct */
1146 BDRVQcowState
*s
= bs
->opaque
;
1147 uint32_t l1_length
= s
->l1_size
* sizeof(uint64_t);
1150 memset(s
->l1_table
, 0, l1_length
);
1151 if (bdrv_pwrite(s
->hd
, s
->l1_table_offset
, s
->l1_table
, l1_length
) < 0)
1153 ret
= bdrv_truncate(s
->hd
, s
->l1_table_offset
+ l1_length
);
1162 /* XXX: put compressed sectors first, then all the cluster aligned
1163 tables to avoid losing bytes in alignment */
1164 static int qcow_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
1165 const uint8_t *buf
, int nb_sectors
)
1167 BDRVQcowState
*s
= bs
->opaque
;
1171 uint64_t cluster_offset
;
1173 if (nb_sectors
== 0) {
1174 /* align end of file to a sector boundary to ease reading with
1175 sector based I/Os */
1176 cluster_offset
= bdrv_getlength(s
->hd
);
1177 cluster_offset
= (cluster_offset
+ 511) & ~511;
1178 bdrv_truncate(s
->hd
, cluster_offset
);
1182 if (nb_sectors
!= s
->cluster_sectors
)
1185 out_buf
= qemu_malloc(s
->cluster_size
+ (s
->cluster_size
/ 1000) + 128);
1189 /* best compression, small window, no zlib header */
1190 memset(&strm
, 0, sizeof(strm
));
1191 ret
= deflateInit2(&strm
, Z_DEFAULT_COMPRESSION
,
1193 9, Z_DEFAULT_STRATEGY
);
1199 strm
.avail_in
= s
->cluster_size
;
1200 strm
.next_in
= (uint8_t *)buf
;
1201 strm
.avail_out
= s
->cluster_size
;
1202 strm
.next_out
= out_buf
;
1204 ret
= deflate(&strm
, Z_FINISH
);
1205 if (ret
!= Z_STREAM_END
&& ret
!= Z_OK
) {
1210 out_len
= strm
.next_out
- out_buf
;
1214 if (ret
!= Z_STREAM_END
|| out_len
>= s
->cluster_size
) {
1215 /* could not compress: write normal cluster */
1216 qcow_write(bs
, sector_num
, buf
, s
->cluster_sectors
);
1218 cluster_offset
= get_cluster_offset(bs
, sector_num
<< 9, 2,
1220 cluster_offset
&= s
->cluster_offset_mask
;
1221 if (bdrv_pwrite(s
->hd
, cluster_offset
, out_buf
, out_len
) != out_len
) {
1231 static void qcow_flush(BlockDriverState
*bs
)
1233 BDRVQcowState
*s
= bs
->opaque
;
1237 static int qcow_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
1239 BDRVQcowState
*s
= bs
->opaque
;
1240 bdi
->cluster_size
= s
->cluster_size
;
1241 bdi
->vm_state_offset
= (int64_t)s
->l1_vm_state_index
<<
1242 (s
->cluster_bits
+ s
->l2_bits
);
1246 /*********************************************************/
1247 /* snapshot support */
1249 /* update the refcounts of snapshots and the copied flag */
1250 static int update_snapshot_refcount(BlockDriverState
*bs
,
1251 int64_t l1_table_offset
,
1255 BDRVQcowState
*s
= bs
->opaque
;
1256 uint64_t *l1_table
, *l2_table
, l2_offset
, offset
, l1_size2
, l1_allocated
;
1257 int64_t old_offset
, old_l2_offset
;
1258 int l2_size
, i
, j
, l1_modified
, l2_modified
, nb_csectors
, refcount
;
1264 l1_size2
= l1_size
* sizeof(uint64_t);
1266 if (l1_table_offset
!= s
->l1_table_offset
) {
1267 l1_table
= qemu_malloc(l1_size2
);
1271 if (bdrv_pread(s
->hd
, l1_table_offset
,
1272 l1_table
, l1_size2
) != l1_size2
)
1274 for(i
= 0;i
< l1_size
; i
++)
1275 be64_to_cpus(&l1_table
[i
]);
1277 assert(l1_size
== s
->l1_size
);
1278 l1_table
= s
->l1_table
;
1282 l2_size
= s
->l2_size
* sizeof(uint64_t);
1283 l2_table
= qemu_malloc(l2_size
);
1287 for(i
= 0; i
< l1_size
; i
++) {
1288 l2_offset
= l1_table
[i
];
1290 old_l2_offset
= l2_offset
;
1291 l2_offset
&= ~QCOW_OFLAG_COPIED
;
1293 if (bdrv_pread(s
->hd
, l2_offset
, l2_table
, l2_size
) != l2_size
)
1295 for(j
= 0; j
< s
->l2_size
; j
++) {
1296 offset
= be64_to_cpu(l2_table
[j
]);
1298 old_offset
= offset
;
1299 offset
&= ~QCOW_OFLAG_COPIED
;
1300 if (offset
& QCOW_OFLAG_COMPRESSED
) {
1301 nb_csectors
= ((offset
>> s
->csize_shift
) &
1304 update_refcount(bs
, (offset
& s
->cluster_offset_mask
) & ~511,
1305 nb_csectors
* 512, addend
);
1306 /* compressed clusters are never modified */
1310 refcount
= update_cluster_refcount(bs
, offset
>> s
->cluster_bits
, addend
);
1312 refcount
= get_refcount(bs
, offset
>> s
->cluster_bits
);
1316 if (refcount
== 1) {
1317 offset
|= QCOW_OFLAG_COPIED
;
1319 if (offset
!= old_offset
) {
1320 l2_table
[j
] = cpu_to_be64(offset
);
1326 if (bdrv_pwrite(s
->hd
,
1327 l2_offset
, l2_table
, l2_size
) != l2_size
)
1332 refcount
= update_cluster_refcount(bs
, l2_offset
>> s
->cluster_bits
, addend
);
1334 refcount
= get_refcount(bs
, l2_offset
>> s
->cluster_bits
);
1336 if (refcount
== 1) {
1337 l2_offset
|= QCOW_OFLAG_COPIED
;
1339 if (l2_offset
!= old_l2_offset
) {
1340 l1_table
[i
] = l2_offset
;
1346 for(i
= 0; i
< l1_size
; i
++)
1347 cpu_to_be64s(&l1_table
[i
]);
1348 if (bdrv_pwrite(s
->hd
, l1_table_offset
, l1_table
,
1349 l1_size2
) != l1_size2
)
1351 for(i
= 0; i
< l1_size
; i
++)
1352 be64_to_cpus(&l1_table
[i
]);
1355 qemu_free(l1_table
);
1356 qemu_free(l2_table
);
1360 qemu_free(l1_table
);
1361 qemu_free(l2_table
);
1365 static void qcow_free_snapshots(BlockDriverState
*bs
)
1367 BDRVQcowState
*s
= bs
->opaque
;
1370 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1371 qemu_free(s
->snapshots
[i
].name
);
1372 qemu_free(s
->snapshots
[i
].id_str
);
1374 qemu_free(s
->snapshots
);
1375 s
->snapshots
= NULL
;
1376 s
->nb_snapshots
= 0;
1379 static int qcow_read_snapshots(BlockDriverState
*bs
)
1381 BDRVQcowState
*s
= bs
->opaque
;
1382 QCowSnapshotHeader h
;
1384 int i
, id_str_size
, name_size
;
1386 uint32_t extra_data_size
;
1388 offset
= s
->snapshots_offset
;
1389 s
->snapshots
= qemu_mallocz(s
->nb_snapshots
* sizeof(QCowSnapshot
));
1392 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1393 offset
= align_offset(offset
, 8);
1394 if (bdrv_pread(s
->hd
, offset
, &h
, sizeof(h
)) != sizeof(h
))
1396 offset
+= sizeof(h
);
1397 sn
= s
->snapshots
+ i
;
1398 sn
->l1_table_offset
= be64_to_cpu(h
.l1_table_offset
);
1399 sn
->l1_size
= be32_to_cpu(h
.l1_size
);
1400 sn
->vm_state_size
= be32_to_cpu(h
.vm_state_size
);
1401 sn
->date_sec
= be32_to_cpu(h
.date_sec
);
1402 sn
->date_nsec
= be32_to_cpu(h
.date_nsec
);
1403 sn
->vm_clock_nsec
= be64_to_cpu(h
.vm_clock_nsec
);
1404 extra_data_size
= be32_to_cpu(h
.extra_data_size
);
1406 id_str_size
= be16_to_cpu(h
.id_str_size
);
1407 name_size
= be16_to_cpu(h
.name_size
);
1409 offset
+= extra_data_size
;
1411 sn
->id_str
= qemu_malloc(id_str_size
+ 1);
1414 if (bdrv_pread(s
->hd
, offset
, sn
->id_str
, id_str_size
) != id_str_size
)
1416 offset
+= id_str_size
;
1417 sn
->id_str
[id_str_size
] = '\0';
1419 sn
->name
= qemu_malloc(name_size
+ 1);
1422 if (bdrv_pread(s
->hd
, offset
, sn
->name
, name_size
) != name_size
)
1424 offset
+= name_size
;
1425 sn
->name
[name_size
] = '\0';
1427 s
->snapshots_size
= offset
- s
->snapshots_offset
;
1430 qcow_free_snapshots(bs
);
1434 /* add at the end of the file a new list of snapshots */
1435 static int qcow_write_snapshots(BlockDriverState
*bs
)
1437 BDRVQcowState
*s
= bs
->opaque
;
1439 QCowSnapshotHeader h
;
1440 int i
, name_size
, id_str_size
, snapshots_size
;
1443 int64_t offset
, snapshots_offset
;
1445 /* compute the size of the snapshots */
1447 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1448 sn
= s
->snapshots
+ i
;
1449 offset
= align_offset(offset
, 8);
1450 offset
+= sizeof(h
);
1451 offset
+= strlen(sn
->id_str
);
1452 offset
+= strlen(sn
->name
);
1454 snapshots_size
= offset
;
1456 snapshots_offset
= alloc_clusters(bs
, snapshots_size
);
1457 offset
= snapshots_offset
;
1459 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1460 sn
= s
->snapshots
+ i
;
1461 memset(&h
, 0, sizeof(h
));
1462 h
.l1_table_offset
= cpu_to_be64(sn
->l1_table_offset
);
1463 h
.l1_size
= cpu_to_be32(sn
->l1_size
);
1464 h
.vm_state_size
= cpu_to_be32(sn
->vm_state_size
);
1465 h
.date_sec
= cpu_to_be32(sn
->date_sec
);
1466 h
.date_nsec
= cpu_to_be32(sn
->date_nsec
);
1467 h
.vm_clock_nsec
= cpu_to_be64(sn
->vm_clock_nsec
);
1469 id_str_size
= strlen(sn
->id_str
);
1470 name_size
= strlen(sn
->name
);
1471 h
.id_str_size
= cpu_to_be16(id_str_size
);
1472 h
.name_size
= cpu_to_be16(name_size
);
1473 offset
= align_offset(offset
, 8);
1474 if (bdrv_pwrite(s
->hd
, offset
, &h
, sizeof(h
)) != sizeof(h
))
1476 offset
+= sizeof(h
);
1477 if (bdrv_pwrite(s
->hd
, offset
, sn
->id_str
, id_str_size
) != id_str_size
)
1479 offset
+= id_str_size
;
1480 if (bdrv_pwrite(s
->hd
, offset
, sn
->name
, name_size
) != name_size
)
1482 offset
+= name_size
;
1485 /* update the various header fields */
1486 data64
= cpu_to_be64(snapshots_offset
);
1487 if (bdrv_pwrite(s
->hd
, offsetof(QCowHeader
, snapshots_offset
),
1488 &data64
, sizeof(data64
)) != sizeof(data64
))
1490 data32
= cpu_to_be32(s
->nb_snapshots
);
1491 if (bdrv_pwrite(s
->hd
, offsetof(QCowHeader
, nb_snapshots
),
1492 &data32
, sizeof(data32
)) != sizeof(data32
))
1495 /* free the old snapshot table */
1496 free_clusters(bs
, s
->snapshots_offset
, s
->snapshots_size
);
1497 s
->snapshots_offset
= snapshots_offset
;
1498 s
->snapshots_size
= snapshots_size
;
1504 static void find_new_snapshot_id(BlockDriverState
*bs
,
1505 char *id_str
, int id_str_size
)
1507 BDRVQcowState
*s
= bs
->opaque
;
1509 int i
, id
, id_max
= 0;
1511 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1512 sn
= s
->snapshots
+ i
;
1513 id
= strtoul(sn
->id_str
, NULL
, 10);
1517 snprintf(id_str
, id_str_size
, "%d", id_max
+ 1);
1520 static int find_snapshot_by_id(BlockDriverState
*bs
, const char *id_str
)
1522 BDRVQcowState
*s
= bs
->opaque
;
1525 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1526 if (!strcmp(s
->snapshots
[i
].id_str
, id_str
))
1532 static int find_snapshot_by_id_or_name(BlockDriverState
*bs
, const char *name
)
1534 BDRVQcowState
*s
= bs
->opaque
;
1537 ret
= find_snapshot_by_id(bs
, name
);
1540 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1541 if (!strcmp(s
->snapshots
[i
].name
, name
))
1547 /* if no id is provided, a new one is constructed */
1548 static int qcow_snapshot_create(BlockDriverState
*bs
,
1549 QEMUSnapshotInfo
*sn_info
)
1551 BDRVQcowState
*s
= bs
->opaque
;
1552 QCowSnapshot
*snapshots1
, sn1
, *sn
= &sn1
;
1554 uint64_t *l1_table
= NULL
;
1556 memset(sn
, 0, sizeof(*sn
));
1558 if (sn_info
->id_str
[0] == '\0') {
1559 /* compute a new id */
1560 find_new_snapshot_id(bs
, sn_info
->id_str
, sizeof(sn_info
->id_str
));
1563 /* check that the ID is unique */
1564 if (find_snapshot_by_id(bs
, sn_info
->id_str
) >= 0)
1567 sn
->id_str
= qemu_strdup(sn_info
->id_str
);
1570 sn
->name
= qemu_strdup(sn_info
->name
);
1573 sn
->vm_state_size
= sn_info
->vm_state_size
;
1574 sn
->date_sec
= sn_info
->date_sec
;
1575 sn
->date_nsec
= sn_info
->date_nsec
;
1576 sn
->vm_clock_nsec
= sn_info
->vm_clock_nsec
;
1578 ret
= update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 1);
1582 /* create the L1 table of the snapshot */
1583 sn
->l1_table_offset
= alloc_clusters(bs
, s
->l1_size
* sizeof(uint64_t));
1584 sn
->l1_size
= s
->l1_size
;
1586 l1_table
= qemu_malloc(s
->l1_size
* sizeof(uint64_t));
1589 for(i
= 0; i
< s
->l1_size
; i
++) {
1590 l1_table
[i
] = cpu_to_be64(s
->l1_table
[i
]);
1592 if (bdrv_pwrite(s
->hd
, sn
->l1_table_offset
,
1593 l1_table
, s
->l1_size
* sizeof(uint64_t)) !=
1594 (s
->l1_size
* sizeof(uint64_t)))
1596 qemu_free(l1_table
);
1599 snapshots1
= qemu_malloc((s
->nb_snapshots
+ 1) * sizeof(QCowSnapshot
));
1602 memcpy(snapshots1
, s
->snapshots
, s
->nb_snapshots
* sizeof(QCowSnapshot
));
1603 s
->snapshots
= snapshots1
;
1604 s
->snapshots
[s
->nb_snapshots
++] = *sn
;
1606 if (qcow_write_snapshots(bs
) < 0)
1609 check_refcounts(bs
);
1613 qemu_free(sn
->name
);
1614 qemu_free(l1_table
);
1618 /* copy the snapshot 'snapshot_name' into the current disk image */
1619 static int qcow_snapshot_goto(BlockDriverState
*bs
,
1620 const char *snapshot_id
)
1622 BDRVQcowState
*s
= bs
->opaque
;
1624 int i
, snapshot_index
, l1_size2
;
1626 snapshot_index
= find_snapshot_by_id_or_name(bs
, snapshot_id
);
1627 if (snapshot_index
< 0)
1629 sn
= &s
->snapshots
[snapshot_index
];
1631 if (update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, -1) < 0)
1634 if (grow_l1_table(bs
, sn
->l1_size
) < 0)
1637 s
->l1_size
= sn
->l1_size
;
1638 l1_size2
= s
->l1_size
* sizeof(uint64_t);
1639 /* copy the snapshot l1 table to the current l1 table */
1640 if (bdrv_pread(s
->hd
, sn
->l1_table_offset
,
1641 s
->l1_table
, l1_size2
) != l1_size2
)
1643 if (bdrv_pwrite(s
->hd
, s
->l1_table_offset
,
1644 s
->l1_table
, l1_size2
) != l1_size2
)
1646 for(i
= 0;i
< s
->l1_size
; i
++) {
1647 be64_to_cpus(&s
->l1_table
[i
]);
1650 if (update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 1) < 0)
1654 check_refcounts(bs
);
1661 static int qcow_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
1663 BDRVQcowState
*s
= bs
->opaque
;
1665 int snapshot_index
, ret
;
1667 snapshot_index
= find_snapshot_by_id_or_name(bs
, snapshot_id
);
1668 if (snapshot_index
< 0)
1670 sn
= &s
->snapshots
[snapshot_index
];
1672 ret
= update_snapshot_refcount(bs
, sn
->l1_table_offset
, sn
->l1_size
, -1);
1675 /* must update the copied flag on the current cluster offsets */
1676 ret
= update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 0);
1679 free_clusters(bs
, sn
->l1_table_offset
, sn
->l1_size
* sizeof(uint64_t));
1681 qemu_free(sn
->id_str
);
1682 qemu_free(sn
->name
);
1683 memmove(sn
, sn
+ 1, (s
->nb_snapshots
- snapshot_index
- 1) * sizeof(*sn
));
1685 ret
= qcow_write_snapshots(bs
);
1687 /* XXX: restore snapshot if error ? */
1691 check_refcounts(bs
);
1696 static int qcow_snapshot_list(BlockDriverState
*bs
,
1697 QEMUSnapshotInfo
**psn_tab
)
1699 BDRVQcowState
*s
= bs
->opaque
;
1700 QEMUSnapshotInfo
*sn_tab
, *sn_info
;
1704 sn_tab
= qemu_mallocz(s
->nb_snapshots
* sizeof(QEMUSnapshotInfo
));
1707 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
1708 sn_info
= sn_tab
+ i
;
1709 sn
= s
->snapshots
+ i
;
1710 pstrcpy(sn_info
->id_str
, sizeof(sn_info
->id_str
),
1712 pstrcpy(sn_info
->name
, sizeof(sn_info
->name
),
1714 sn_info
->vm_state_size
= sn
->vm_state_size
;
1715 sn_info
->date_sec
= sn
->date_sec
;
1716 sn_info
->date_nsec
= sn
->date_nsec
;
1717 sn_info
->vm_clock_nsec
= sn
->vm_clock_nsec
;
1720 return s
->nb_snapshots
;
1727 /*********************************************************/
1728 /* refcount handling */
1730 static int refcount_init(BlockDriverState
*bs
)
1732 BDRVQcowState
*s
= bs
->opaque
;
1733 int ret
, refcount_table_size2
, i
;
1735 s
->refcount_block_cache
= qemu_malloc(s
->cluster_size
);
1736 if (!s
->refcount_block_cache
)
1738 refcount_table_size2
= s
->refcount_table_size
* sizeof(uint64_t);
1739 s
->refcount_table
= qemu_malloc(refcount_table_size2
);
1740 if (!s
->refcount_table
)
1742 if (s
->refcount_table_size
> 0) {
1743 ret
= bdrv_pread(s
->hd
, s
->refcount_table_offset
,
1744 s
->refcount_table
, refcount_table_size2
);
1745 if (ret
!= refcount_table_size2
)
1747 for(i
= 0; i
< s
->refcount_table_size
; i
++)
1748 be64_to_cpus(&s
->refcount_table
[i
]);
1755 static void refcount_close(BlockDriverState
*bs
)
1757 BDRVQcowState
*s
= bs
->opaque
;
1758 qemu_free(s
->refcount_block_cache
);
1759 qemu_free(s
->refcount_table
);
1763 static int load_refcount_block(BlockDriverState
*bs
,
1764 int64_t refcount_block_offset
)
1766 BDRVQcowState
*s
= bs
->opaque
;
1768 ret
= bdrv_pread(s
->hd
, refcount_block_offset
, s
->refcount_block_cache
,
1770 if (ret
!= s
->cluster_size
)
1772 s
->refcount_block_cache_offset
= refcount_block_offset
;
1776 static int get_refcount(BlockDriverState
*bs
, int64_t cluster_index
)
1778 BDRVQcowState
*s
= bs
->opaque
;
1779 int refcount_table_index
, block_index
;
1780 int64_t refcount_block_offset
;
1782 refcount_table_index
= cluster_index
>> (s
->cluster_bits
- REFCOUNT_SHIFT
);
1783 if (refcount_table_index
>= s
->refcount_table_size
)
1785 refcount_block_offset
= s
->refcount_table
[refcount_table_index
];
1786 if (!refcount_block_offset
)
1788 if (refcount_block_offset
!= s
->refcount_block_cache_offset
) {
1789 /* better than nothing: return allocated if read error */
1790 if (load_refcount_block(bs
, refcount_block_offset
) < 0)
1793 block_index
= cluster_index
&
1794 ((1 << (s
->cluster_bits
- REFCOUNT_SHIFT
)) - 1);
1795 return be16_to_cpu(s
->refcount_block_cache
[block_index
]);
1798 /* return < 0 if error */
1799 static int64_t alloc_clusters_noref(BlockDriverState
*bs
, int64_t size
)
1801 BDRVQcowState
*s
= bs
->opaque
;
1804 nb_clusters
= (size
+ s
->cluster_size
- 1) >> s
->cluster_bits
;
1806 if (get_refcount(bs
, s
->free_cluster_index
) == 0) {
1807 s
->free_cluster_index
++;
1808 for(i
= 1; i
< nb_clusters
; i
++) {
1809 if (get_refcount(bs
, s
->free_cluster_index
) != 0)
1811 s
->free_cluster_index
++;
1814 printf("alloc_clusters: size=%lld -> %lld\n",
1816 (s
->free_cluster_index
- nb_clusters
) << s
->cluster_bits
);
1818 return (s
->free_cluster_index
- nb_clusters
) << s
->cluster_bits
;
1821 s
->free_cluster_index
++;
1826 static int64_t alloc_clusters(BlockDriverState
*bs
, int64_t size
)
1830 offset
= alloc_clusters_noref(bs
, size
);
1831 update_refcount(bs
, offset
, size
, 1);
1835 /* only used to allocate compressed sectors. We try to allocate
1836 contiguous sectors. size must be <= cluster_size */
1837 static int64_t alloc_bytes(BlockDriverState
*bs
, int size
)
1839 BDRVQcowState
*s
= bs
->opaque
;
1840 int64_t offset
, cluster_offset
;
1841 int free_in_cluster
;
1843 assert(size
> 0 && size
<= s
->cluster_size
);
1844 if (s
->free_byte_offset
== 0) {
1845 s
->free_byte_offset
= alloc_clusters(bs
, s
->cluster_size
);
1848 free_in_cluster
= s
->cluster_size
-
1849 (s
->free_byte_offset
& (s
->cluster_size
- 1));
1850 if (size
<= free_in_cluster
) {
1851 /* enough space in current cluster */
1852 offset
= s
->free_byte_offset
;
1853 s
->free_byte_offset
+= size
;
1854 free_in_cluster
-= size
;
1855 if (free_in_cluster
== 0)
1856 s
->free_byte_offset
= 0;
1857 if ((offset
& (s
->cluster_size
- 1)) != 0)
1858 update_cluster_refcount(bs
, offset
>> s
->cluster_bits
, 1);
1860 offset
= alloc_clusters(bs
, s
->cluster_size
);
1861 cluster_offset
= s
->free_byte_offset
& ~(s
->cluster_size
- 1);
1862 if ((cluster_offset
+ s
->cluster_size
) == offset
) {
1863 /* we are lucky: contiguous data */
1864 offset
= s
->free_byte_offset
;
1865 update_cluster_refcount(bs
, offset
>> s
->cluster_bits
, 1);
1866 s
->free_byte_offset
+= size
;
1868 s
->free_byte_offset
= offset
;
1875 static void free_clusters(BlockDriverState
*bs
,
1876 int64_t offset
, int64_t size
)
1878 update_refcount(bs
, offset
, size
, -1);
1881 static int grow_refcount_table(BlockDriverState
*bs
, int min_size
)
1883 BDRVQcowState
*s
= bs
->opaque
;
1884 int new_table_size
, new_table_size2
, refcount_table_clusters
, i
, ret
;
1885 uint64_t *new_table
;
1886 int64_t table_offset
;
1890 int64_t old_table_offset
;
1892 if (min_size
<= s
->refcount_table_size
)
1894 /* compute new table size */
1895 refcount_table_clusters
= s
->refcount_table_size
>> (s
->cluster_bits
- 3);
1897 if (refcount_table_clusters
== 0) {
1898 refcount_table_clusters
= 1;
1900 refcount_table_clusters
= (refcount_table_clusters
* 3 + 1) / 2;
1902 new_table_size
= refcount_table_clusters
<< (s
->cluster_bits
- 3);
1903 if (min_size
<= new_table_size
)
1907 printf("grow_refcount_table from %d to %d\n",
1908 s
->refcount_table_size
,
1911 new_table_size2
= new_table_size
* sizeof(uint64_t);
1912 new_table
= qemu_mallocz(new_table_size2
);
1915 memcpy(new_table
, s
->refcount_table
,
1916 s
->refcount_table_size
* sizeof(uint64_t));
1917 for(i
= 0; i
< s
->refcount_table_size
; i
++)
1918 cpu_to_be64s(&new_table
[i
]);
1919 /* Note: we cannot update the refcount now to avoid recursion */
1920 table_offset
= alloc_clusters_noref(bs
, new_table_size2
);
1921 ret
= bdrv_pwrite(s
->hd
, table_offset
, new_table
, new_table_size2
);
1922 if (ret
!= new_table_size2
)
1924 for(i
= 0; i
< s
->refcount_table_size
; i
++)
1925 be64_to_cpus(&new_table
[i
]);
1927 data64
= cpu_to_be64(table_offset
);
1928 if (bdrv_pwrite(s
->hd
, offsetof(QCowHeader
, refcount_table_offset
),
1929 &data64
, sizeof(data64
)) != sizeof(data64
))
1931 data32
= cpu_to_be32(refcount_table_clusters
);
1932 if (bdrv_pwrite(s
->hd
, offsetof(QCowHeader
, refcount_table_clusters
),
1933 &data32
, sizeof(data32
)) != sizeof(data32
))
1935 qemu_free(s
->refcount_table
);
1936 old_table_offset
= s
->refcount_table_offset
;
1937 old_table_size
= s
->refcount_table_size
;
1938 s
->refcount_table
= new_table
;
1939 s
->refcount_table_size
= new_table_size
;
1940 s
->refcount_table_offset
= table_offset
;
1942 update_refcount(bs
, table_offset
, new_table_size2
, 1);
1943 free_clusters(bs
, old_table_offset
, old_table_size
* sizeof(uint64_t));
1946 free_clusters(bs
, table_offset
, new_table_size2
);
1947 qemu_free(new_table
);
1951 /* addend must be 1 or -1 */
1952 /* XXX: cache several refcount block clusters ? */
1953 static int update_cluster_refcount(BlockDriverState
*bs
,
1954 int64_t cluster_index
,
1957 BDRVQcowState
*s
= bs
->opaque
;
1958 int64_t offset
, refcount_block_offset
;
1959 int ret
, refcount_table_index
, block_index
, refcount
;
1962 refcount_table_index
= cluster_index
>> (s
->cluster_bits
- REFCOUNT_SHIFT
);
1963 if (refcount_table_index
>= s
->refcount_table_size
) {
1966 ret
= grow_refcount_table(bs
, refcount_table_index
+ 1);
1970 refcount_block_offset
= s
->refcount_table
[refcount_table_index
];
1971 if (!refcount_block_offset
) {
1974 /* create a new refcount block */
1975 /* Note: we cannot update the refcount now to avoid recursion */
1976 offset
= alloc_clusters_noref(bs
, s
->cluster_size
);
1977 memset(s
->refcount_block_cache
, 0, s
->cluster_size
);
1978 ret
= bdrv_pwrite(s
->hd
, offset
, s
->refcount_block_cache
, s
->cluster_size
);
1979 if (ret
!= s
->cluster_size
)
1981 s
->refcount_table
[refcount_table_index
] = offset
;
1982 data64
= cpu_to_be64(offset
);
1983 ret
= bdrv_pwrite(s
->hd
, s
->refcount_table_offset
+
1984 refcount_table_index
* sizeof(uint64_t),
1985 &data64
, sizeof(data64
));
1986 if (ret
!= sizeof(data64
))
1989 refcount_block_offset
= offset
;
1990 s
->refcount_block_cache_offset
= offset
;
1991 update_refcount(bs
, offset
, s
->cluster_size
, 1);
1993 if (refcount_block_offset
!= s
->refcount_block_cache_offset
) {
1994 if (load_refcount_block(bs
, refcount_block_offset
) < 0)
1998 /* we can update the count and save it */
1999 block_index
= cluster_index
&
2000 ((1 << (s
->cluster_bits
- REFCOUNT_SHIFT
)) - 1);
2001 refcount
= be16_to_cpu(s
->refcount_block_cache
[block_index
]);
2003 if (refcount
< 0 || refcount
> 0xffff)
2005 if (refcount
== 0 && cluster_index
< s
->free_cluster_index
) {
2006 s
->free_cluster_index
= cluster_index
;
2008 s
->refcount_block_cache
[block_index
] = cpu_to_be16(refcount
);
2009 if (bdrv_pwrite(s
->hd
,
2010 refcount_block_offset
+ (block_index
<< REFCOUNT_SHIFT
),
2011 &s
->refcount_block_cache
[block_index
], 2) != 2)
2016 static void update_refcount(BlockDriverState
*bs
,
2017 int64_t offset
, int64_t length
,
2020 BDRVQcowState
*s
= bs
->opaque
;
2021 int64_t start
, last
, cluster_offset
;
2024 printf("update_refcount: offset=%lld size=%lld addend=%d\n",
2025 offset
, length
, addend
);
2029 start
= offset
& ~(s
->cluster_size
- 1);
2030 last
= (offset
+ length
- 1) & ~(s
->cluster_size
- 1);
2031 for(cluster_offset
= start
; cluster_offset
<= last
;
2032 cluster_offset
+= s
->cluster_size
) {
2033 update_cluster_refcount(bs
, cluster_offset
>> s
->cluster_bits
, addend
);
2038 static void inc_refcounts(BlockDriverState
*bs
,
2039 uint16_t *refcount_table
,
2040 int refcount_table_size
,
2041 int64_t offset
, int64_t size
)
2043 BDRVQcowState
*s
= bs
->opaque
;
2044 int64_t start
, last
, cluster_offset
;
2050 start
= offset
& ~(s
->cluster_size
- 1);
2051 last
= (offset
+ size
- 1) & ~(s
->cluster_size
- 1);
2052 for(cluster_offset
= start
; cluster_offset
<= last
;
2053 cluster_offset
+= s
->cluster_size
) {
2054 k
= cluster_offset
>> s
->cluster_bits
;
2055 if (k
< 0 || k
>= refcount_table_size
) {
2056 printf("ERROR: invalid cluster offset=0x%llx\n", cluster_offset
);
2058 if (++refcount_table
[k
] == 0) {
2059 printf("ERROR: overflow cluster offset=0x%llx\n", cluster_offset
);
2065 static int check_refcounts_l1(BlockDriverState
*bs
,
2066 uint16_t *refcount_table
,
2067 int refcount_table_size
,
2068 int64_t l1_table_offset
, int l1_size
,
2071 BDRVQcowState
*s
= bs
->opaque
;
2072 uint64_t *l1_table
, *l2_table
, l2_offset
, offset
, l1_size2
;
2073 int l2_size
, i
, j
, nb_csectors
, refcount
;
2076 l1_size2
= l1_size
* sizeof(uint64_t);
2078 inc_refcounts(bs
, refcount_table
, refcount_table_size
,
2079 l1_table_offset
, l1_size2
);
2081 l1_table
= qemu_malloc(l1_size2
);
2084 if (bdrv_pread(s
->hd
, l1_table_offset
,
2085 l1_table
, l1_size2
) != l1_size2
)
2087 for(i
= 0;i
< l1_size
; i
++)
2088 be64_to_cpus(&l1_table
[i
]);
2090 l2_size
= s
->l2_size
* sizeof(uint64_t);
2091 l2_table
= qemu_malloc(l2_size
);
2094 for(i
= 0; i
< l1_size
; i
++) {
2095 l2_offset
= l1_table
[i
];
2098 refcount
= get_refcount(bs
, (l2_offset
& ~QCOW_OFLAG_COPIED
) >> s
->cluster_bits
);
2099 if ((refcount
== 1) != ((l2_offset
& QCOW_OFLAG_COPIED
) != 0)) {
2100 printf("ERROR OFLAG_COPIED: l2_offset=%llx refcount=%d\n",
2101 l2_offset
, refcount
);
2104 l2_offset
&= ~QCOW_OFLAG_COPIED
;
2105 if (bdrv_pread(s
->hd
, l2_offset
, l2_table
, l2_size
) != l2_size
)
2107 for(j
= 0; j
< s
->l2_size
; j
++) {
2108 offset
= be64_to_cpu(l2_table
[j
]);
2110 if (offset
& QCOW_OFLAG_COMPRESSED
) {
2111 if (offset
& QCOW_OFLAG_COPIED
) {
2112 printf("ERROR: cluster %lld: copied flag must never be set for compressed clusters\n",
2113 offset
>> s
->cluster_bits
);
2114 offset
&= ~QCOW_OFLAG_COPIED
;
2116 nb_csectors
= ((offset
>> s
->csize_shift
) &
2118 offset
&= s
->cluster_offset_mask
;
2119 inc_refcounts(bs
, refcount_table
,
2120 refcount_table_size
,
2121 offset
& ~511, nb_csectors
* 512);
2124 refcount
= get_refcount(bs
, (offset
& ~QCOW_OFLAG_COPIED
) >> s
->cluster_bits
);
2125 if ((refcount
== 1) != ((offset
& QCOW_OFLAG_COPIED
) != 0)) {
2126 printf("ERROR OFLAG_COPIED: offset=%llx refcount=%d\n",
2130 offset
&= ~QCOW_OFLAG_COPIED
;
2131 inc_refcounts(bs
, refcount_table
,
2132 refcount_table_size
,
2133 offset
, s
->cluster_size
);
2137 inc_refcounts(bs
, refcount_table
,
2138 refcount_table_size
,
2143 qemu_free(l1_table
);
2144 qemu_free(l2_table
);
2147 printf("ERROR: I/O error in check_refcounts_l1\n");
2148 qemu_free(l1_table
);
2149 qemu_free(l2_table
);
2153 static void check_refcounts(BlockDriverState
*bs
)
2155 BDRVQcowState
*s
= bs
->opaque
;
2157 int nb_clusters
, refcount1
, refcount2
, i
;
2159 uint16_t *refcount_table
;
2161 size
= bdrv_getlength(s
->hd
);
2162 nb_clusters
= (size
+ s
->cluster_size
- 1) >> s
->cluster_bits
;
2163 refcount_table
= qemu_mallocz(nb_clusters
* sizeof(uint16_t));
2166 inc_refcounts(bs
, refcount_table
, nb_clusters
,
2167 0, s
->cluster_size
);
2169 check_refcounts_l1(bs
, refcount_table
, nb_clusters
,
2170 s
->l1_table_offset
, s
->l1_size
, 1);
2173 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
2174 sn
= s
->snapshots
+ i
;
2175 check_refcounts_l1(bs
, refcount_table
, nb_clusters
,
2176 sn
->l1_table_offset
, sn
->l1_size
, 0);
2178 inc_refcounts(bs
, refcount_table
, nb_clusters
,
2179 s
->snapshots_offset
, s
->snapshots_size
);
2182 inc_refcounts(bs
, refcount_table
, nb_clusters
,
2183 s
->refcount_table_offset
,
2184 s
->refcount_table_size
* sizeof(uint64_t));
2185 for(i
= 0; i
< s
->refcount_table_size
; i
++) {
2187 offset
= s
->refcount_table
[i
];
2189 inc_refcounts(bs
, refcount_table
, nb_clusters
,
2190 offset
, s
->cluster_size
);
2194 /* compare ref counts */
2195 for(i
= 0; i
< nb_clusters
; i
++) {
2196 refcount1
= get_refcount(bs
, i
);
2197 refcount2
= refcount_table
[i
];
2198 if (refcount1
!= refcount2
)
2199 printf("ERROR cluster %d refcount=%d reference=%d\n",
2200 i
, refcount1
, refcount2
);
2203 qemu_free(refcount_table
);
2207 static void dump_refcounts(BlockDriverState
*bs
)
2209 BDRVQcowState
*s
= bs
->opaque
;
2210 int64_t nb_clusters
, k
, k1
, size
;
2213 size
= bdrv_getlength(s
->hd
);
2214 nb_clusters
= (size
+ s
->cluster_size
- 1) >> s
->cluster_bits
;
2215 for(k
= 0; k
< nb_clusters
;) {
2217 refcount
= get_refcount(bs
, k
);
2219 while (k
< nb_clusters
&& get_refcount(bs
, k
) == refcount
)
2221 printf("%lld: refcount=%d nb=%lld\n", k
, refcount
, k
- k1
);
2227 BlockDriver bdrv_qcow2
= {
2229 sizeof(BDRVQcowState
),
2241 .bdrv_aio_read
= qcow_aio_read
,
2242 .bdrv_aio_write
= qcow_aio_write
,
2243 .bdrv_aio_cancel
= qcow_aio_cancel
,
2244 .aiocb_size
= sizeof(QCowAIOCB
),
2245 .bdrv_write_compressed
= qcow_write_compressed
,
2247 .bdrv_snapshot_create
= qcow_snapshot_create
,
2248 .bdrv_snapshot_goto
= qcow_snapshot_goto
,
2249 .bdrv_snapshot_delete
= qcow_snapshot_delete
,
2250 .bdrv_snapshot_list
= qcow_snapshot_list
,
2251 .bdrv_get_info
= qcow_get_info
,