2 * Block driver for the QCOW version 2 format
4 * Copyright (c) 2004-2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qapi/error.h"
27 #include "block/block_int.h"
29 #include "qemu/bswap.h"
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
33 void qcow2_free_snapshots(BlockDriverState
*bs
)
35 BDRVQcow2State
*s
= bs
->opaque
;
38 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
39 g_free(s
->snapshots
[i
].name
);
40 g_free(s
->snapshots
[i
].id_str
);
47 int qcow2_read_snapshots(BlockDriverState
*bs
)
49 BDRVQcow2State
*s
= bs
->opaque
;
51 QCowSnapshotExtraData extra
;
53 int i
, id_str_size
, name_size
;
55 uint32_t extra_data_size
;
58 if (!s
->nb_snapshots
) {
60 s
->snapshots_size
= 0;
64 offset
= s
->snapshots_offset
;
65 s
->snapshots
= g_new0(QCowSnapshot
, s
->nb_snapshots
);
67 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
68 /* Read statically sized part of the snapshot header */
69 offset
= ROUND_UP(offset
, 8);
70 ret
= bdrv_pread(bs
->file
, offset
, &h
, sizeof(h
));
76 sn
= s
->snapshots
+ i
;
77 sn
->l1_table_offset
= be64_to_cpu(h
.l1_table_offset
);
78 sn
->l1_size
= be32_to_cpu(h
.l1_size
);
79 sn
->vm_state_size
= be32_to_cpu(h
.vm_state_size
);
80 sn
->date_sec
= be32_to_cpu(h
.date_sec
);
81 sn
->date_nsec
= be32_to_cpu(h
.date_nsec
);
82 sn
->vm_clock_nsec
= be64_to_cpu(h
.vm_clock_nsec
);
83 extra_data_size
= be32_to_cpu(h
.extra_data_size
);
85 id_str_size
= be16_to_cpu(h
.id_str_size
);
86 name_size
= be16_to_cpu(h
.name_size
);
89 ret
= bdrv_pread(bs
->file
, offset
, &extra
,
90 MIN(sizeof(extra
), extra_data_size
));
94 offset
+= extra_data_size
;
96 if (extra_data_size
>= 8) {
97 sn
->vm_state_size
= be64_to_cpu(extra
.vm_state_size_large
);
100 if (extra_data_size
>= 16) {
101 sn
->disk_size
= be64_to_cpu(extra
.disk_size
);
103 sn
->disk_size
= bs
->total_sectors
* BDRV_SECTOR_SIZE
;
106 /* Read snapshot ID */
107 sn
->id_str
= g_malloc(id_str_size
+ 1);
108 ret
= bdrv_pread(bs
->file
, offset
, sn
->id_str
, id_str_size
);
112 offset
+= id_str_size
;
113 sn
->id_str
[id_str_size
] = '\0';
115 /* Read snapshot name */
116 sn
->name
= g_malloc(name_size
+ 1);
117 ret
= bdrv_pread(bs
->file
, offset
, sn
->name
, name_size
);
122 sn
->name
[name_size
] = '\0';
124 if (offset
- s
->snapshots_offset
> QCOW_MAX_SNAPSHOTS_SIZE
) {
130 assert(offset
- s
->snapshots_offset
<= INT_MAX
);
131 s
->snapshots_size
= offset
- s
->snapshots_offset
;
135 qcow2_free_snapshots(bs
);
139 /* add at the end of the file a new list of snapshots */
140 static int qcow2_write_snapshots(BlockDriverState
*bs
)
142 BDRVQcow2State
*s
= bs
->opaque
;
144 QCowSnapshotHeader h
;
145 QCowSnapshotExtraData extra
;
146 int i
, name_size
, id_str_size
, snapshots_size
;
148 uint32_t nb_snapshots
;
149 uint64_t snapshots_offset
;
150 } QEMU_PACKED header_data
;
151 int64_t offset
, snapshots_offset
= 0;
154 /* compute the size of the snapshots */
156 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
157 sn
= s
->snapshots
+ i
;
158 offset
= ROUND_UP(offset
, 8);
160 offset
+= sizeof(extra
);
161 offset
+= strlen(sn
->id_str
);
162 offset
+= strlen(sn
->name
);
164 if (offset
> QCOW_MAX_SNAPSHOTS_SIZE
) {
170 assert(offset
<= INT_MAX
);
171 snapshots_size
= offset
;
173 /* Allocate space for the new snapshot list */
174 snapshots_offset
= qcow2_alloc_clusters(bs
, snapshots_size
);
175 offset
= snapshots_offset
;
180 ret
= bdrv_flush(bs
);
185 /* The snapshot list position has not yet been updated, so these clusters
186 * must indeed be completely free */
187 ret
= qcow2_pre_write_overlap_check(bs
, 0, offset
, snapshots_size
, false);
193 /* Write all snapshots to the new list */
194 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
195 sn
= s
->snapshots
+ i
;
196 memset(&h
, 0, sizeof(h
));
197 h
.l1_table_offset
= cpu_to_be64(sn
->l1_table_offset
);
198 h
.l1_size
= cpu_to_be32(sn
->l1_size
);
199 /* If it doesn't fit in 32 bit, older implementations should treat it
200 * as a disk-only snapshot rather than truncate the VM state */
201 if (sn
->vm_state_size
<= 0xffffffff) {
202 h
.vm_state_size
= cpu_to_be32(sn
->vm_state_size
);
204 h
.date_sec
= cpu_to_be32(sn
->date_sec
);
205 h
.date_nsec
= cpu_to_be32(sn
->date_nsec
);
206 h
.vm_clock_nsec
= cpu_to_be64(sn
->vm_clock_nsec
);
207 h
.extra_data_size
= cpu_to_be32(sizeof(extra
));
209 memset(&extra
, 0, sizeof(extra
));
210 extra
.vm_state_size_large
= cpu_to_be64(sn
->vm_state_size
);
211 extra
.disk_size
= cpu_to_be64(sn
->disk_size
);
213 id_str_size
= strlen(sn
->id_str
);
214 name_size
= strlen(sn
->name
);
215 assert(id_str_size
<= UINT16_MAX
&& name_size
<= UINT16_MAX
);
216 h
.id_str_size
= cpu_to_be16(id_str_size
);
217 h
.name_size
= cpu_to_be16(name_size
);
218 offset
= ROUND_UP(offset
, 8);
220 ret
= bdrv_pwrite(bs
->file
, offset
, &h
, sizeof(h
));
226 ret
= bdrv_pwrite(bs
->file
, offset
, &extra
, sizeof(extra
));
230 offset
+= sizeof(extra
);
232 ret
= bdrv_pwrite(bs
->file
, offset
, sn
->id_str
, id_str_size
);
236 offset
+= id_str_size
;
238 ret
= bdrv_pwrite(bs
->file
, offset
, sn
->name
, name_size
);
246 * Update the header to point to the new snapshot table. This requires the
247 * new table and its refcounts to be stable on disk.
249 ret
= bdrv_flush(bs
);
254 QEMU_BUILD_BUG_ON(offsetof(QCowHeader
, snapshots_offset
) !=
255 offsetof(QCowHeader
, nb_snapshots
) + sizeof(header_data
.nb_snapshots
));
257 header_data
.nb_snapshots
= cpu_to_be32(s
->nb_snapshots
);
258 header_data
.snapshots_offset
= cpu_to_be64(snapshots_offset
);
260 ret
= bdrv_pwrite_sync(bs
->file
, offsetof(QCowHeader
, nb_snapshots
),
261 &header_data
, sizeof(header_data
));
266 /* free the old snapshot table */
267 qcow2_free_clusters(bs
, s
->snapshots_offset
, s
->snapshots_size
,
268 QCOW2_DISCARD_SNAPSHOT
);
269 s
->snapshots_offset
= snapshots_offset
;
270 s
->snapshots_size
= snapshots_size
;
274 if (snapshots_offset
> 0) {
275 qcow2_free_clusters(bs
, snapshots_offset
, snapshots_size
,
276 QCOW2_DISCARD_ALWAYS
);
281 static void find_new_snapshot_id(BlockDriverState
*bs
,
282 char *id_str
, int id_str_size
)
284 BDRVQcow2State
*s
= bs
->opaque
;
287 unsigned long id
, id_max
= 0;
289 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
290 sn
= s
->snapshots
+ i
;
291 id
= strtoul(sn
->id_str
, NULL
, 10);
295 snprintf(id_str
, id_str_size
, "%lu", id_max
+ 1);
298 static int find_snapshot_by_id_and_name(BlockDriverState
*bs
,
302 BDRVQcow2State
*s
= bs
->opaque
;
306 for (i
= 0; i
< s
->nb_snapshots
; i
++) {
307 if (!strcmp(s
->snapshots
[i
].id_str
, id
) &&
308 !strcmp(s
->snapshots
[i
].name
, name
)) {
313 for (i
= 0; i
< s
->nb_snapshots
; i
++) {
314 if (!strcmp(s
->snapshots
[i
].id_str
, id
)) {
319 for (i
= 0; i
< s
->nb_snapshots
; i
++) {
320 if (!strcmp(s
->snapshots
[i
].name
, name
)) {
329 static int find_snapshot_by_id_or_name(BlockDriverState
*bs
,
330 const char *id_or_name
)
334 ret
= find_snapshot_by_id_and_name(bs
, id_or_name
, NULL
);
338 return find_snapshot_by_id_and_name(bs
, NULL
, id_or_name
);
341 /* if no id is provided, a new one is constructed */
342 int qcow2_snapshot_create(BlockDriverState
*bs
, QEMUSnapshotInfo
*sn_info
)
344 BDRVQcow2State
*s
= bs
->opaque
;
345 QCowSnapshot
*new_snapshot_list
= NULL
;
346 QCowSnapshot
*old_snapshot_list
= NULL
;
347 QCowSnapshot sn1
, *sn
= &sn1
;
349 uint64_t *l1_table
= NULL
;
350 int64_t l1_table_offset
;
352 if (s
->nb_snapshots
>= QCOW_MAX_SNAPSHOTS
) {
356 if (has_data_file(bs
)) {
360 memset(sn
, 0, sizeof(*sn
));
363 find_new_snapshot_id(bs
, sn_info
->id_str
, sizeof(sn_info
->id_str
));
365 /* Populate sn with passed data */
366 sn
->id_str
= g_strdup(sn_info
->id_str
);
367 sn
->name
= g_strdup(sn_info
->name
);
369 sn
->disk_size
= bs
->total_sectors
* BDRV_SECTOR_SIZE
;
370 sn
->vm_state_size
= sn_info
->vm_state_size
;
371 sn
->date_sec
= sn_info
->date_sec
;
372 sn
->date_nsec
= sn_info
->date_nsec
;
373 sn
->vm_clock_nsec
= sn_info
->vm_clock_nsec
;
375 /* Allocate the L1 table of the snapshot and copy the current one there. */
376 l1_table_offset
= qcow2_alloc_clusters(bs
, s
->l1_size
* sizeof(uint64_t));
377 if (l1_table_offset
< 0) {
378 ret
= l1_table_offset
;
382 sn
->l1_table_offset
= l1_table_offset
;
383 sn
->l1_size
= s
->l1_size
;
385 l1_table
= g_try_new(uint64_t, s
->l1_size
);
386 if (s
->l1_size
&& l1_table
== NULL
) {
391 for(i
= 0; i
< s
->l1_size
; i
++) {
392 l1_table
[i
] = cpu_to_be64(s
->l1_table
[i
]);
395 ret
= qcow2_pre_write_overlap_check(bs
, 0, sn
->l1_table_offset
,
396 s
->l1_size
* sizeof(uint64_t), false);
401 ret
= bdrv_pwrite(bs
->file
, sn
->l1_table_offset
, l1_table
,
402 s
->l1_size
* sizeof(uint64_t));
411 * Increase the refcounts of all clusters and make sure everything is
412 * stable on disk before updating the snapshot table to contain a pointer
413 * to the new L1 table.
415 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 1);
420 /* Append the new snapshot to the snapshot list */
421 new_snapshot_list
= g_new(QCowSnapshot
, s
->nb_snapshots
+ 1);
423 memcpy(new_snapshot_list
, s
->snapshots
,
424 s
->nb_snapshots
* sizeof(QCowSnapshot
));
425 old_snapshot_list
= s
->snapshots
;
427 s
->snapshots
= new_snapshot_list
;
428 s
->snapshots
[s
->nb_snapshots
++] = *sn
;
430 ret
= qcow2_write_snapshots(bs
);
432 g_free(s
->snapshots
);
433 s
->snapshots
= old_snapshot_list
;
438 g_free(old_snapshot_list
);
440 /* The VM state isn't needed any more in the active L1 table; in fact, it
441 * hurts by causing expensive COW for the next snapshot. */
442 qcow2_cluster_discard(bs
, qcow2_vm_state_offset(s
),
443 ROUND_UP(sn
->vm_state_size
, s
->cluster_size
),
444 QCOW2_DISCARD_NEVER
, false);
448 BdrvCheckResult result
= {0};
449 qcow2_check_refcounts(bs
, &result
, 0);
462 /* copy the snapshot 'snapshot_name' into the current disk image */
463 int qcow2_snapshot_goto(BlockDriverState
*bs
, const char *snapshot_id
)
465 BDRVQcow2State
*s
= bs
->opaque
;
467 Error
*local_err
= NULL
;
468 int i
, snapshot_index
;
469 int cur_l1_bytes
, sn_l1_bytes
;
471 uint64_t *sn_l1_table
= NULL
;
473 if (has_data_file(bs
)) {
477 /* Search the snapshot */
478 snapshot_index
= find_snapshot_by_id_or_name(bs
, snapshot_id
);
479 if (snapshot_index
< 0) {
482 sn
= &s
->snapshots
[snapshot_index
];
484 ret
= qcow2_validate_table(bs
, sn
->l1_table_offset
, sn
->l1_size
,
485 sizeof(uint64_t), QCOW_MAX_L1_SIZE
,
486 "Snapshot L1 table", &local_err
);
488 error_report_err(local_err
);
492 if (sn
->disk_size
!= bs
->total_sectors
* BDRV_SECTOR_SIZE
) {
493 error_report("qcow2: Loading snapshots with different disk "
494 "size is not implemented");
500 * Make sure that the current L1 table is big enough to contain the whole
501 * L1 table of the snapshot. If the snapshot L1 table is smaller, the
502 * current one must be padded with zeros.
504 ret
= qcow2_grow_l1_table(bs
, sn
->l1_size
, true);
509 cur_l1_bytes
= s
->l1_size
* sizeof(uint64_t);
510 sn_l1_bytes
= sn
->l1_size
* sizeof(uint64_t);
513 * Copy the snapshot L1 table to the current L1 table.
515 * Before overwriting the old current L1 table on disk, make sure to
516 * increase all refcounts for the clusters referenced by the new one.
517 * Decrease the refcount referenced by the old one only when the L1
518 * table is overwritten.
520 sn_l1_table
= g_try_malloc0(cur_l1_bytes
);
521 if (cur_l1_bytes
&& sn_l1_table
== NULL
) {
526 ret
= bdrv_pread(bs
->file
, sn
->l1_table_offset
,
527 sn_l1_table
, sn_l1_bytes
);
532 ret
= qcow2_update_snapshot_refcount(bs
, sn
->l1_table_offset
,
538 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_ACTIVE_L1
,
539 s
->l1_table_offset
, cur_l1_bytes
,
545 ret
= bdrv_pwrite_sync(bs
->file
, s
->l1_table_offset
, sn_l1_table
,
552 * Decrease refcount of clusters of current L1 table.
554 * At this point, the in-memory s->l1_table points to the old L1 table,
555 * whereas on disk we already have the new one.
557 * qcow2_update_snapshot_refcount special cases the current L1 table to use
558 * the in-memory data instead of really using the offset to load a new one,
559 * which is why this works.
561 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
,
565 * Now update the in-memory L1 table to be in sync with the on-disk one. We
566 * need to do this even if updating refcounts failed.
568 for(i
= 0;i
< s
->l1_size
; i
++) {
569 s
->l1_table
[i
] = be64_to_cpu(sn_l1_table
[i
]);
580 * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
581 * when we decreased the refcount of the old snapshot.
583 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 0);
590 BdrvCheckResult result
= {0};
591 qcow2_check_refcounts(bs
, &result
, 0);
601 int qcow2_snapshot_delete(BlockDriverState
*bs
,
602 const char *snapshot_id
,
606 BDRVQcow2State
*s
= bs
->opaque
;
608 int snapshot_index
, ret
;
610 if (has_data_file(bs
)) {
614 /* Search the snapshot */
615 snapshot_index
= find_snapshot_by_id_and_name(bs
, snapshot_id
, name
);
616 if (snapshot_index
< 0) {
617 error_setg(errp
, "Can't find the snapshot");
620 sn
= s
->snapshots
[snapshot_index
];
622 ret
= qcow2_validate_table(bs
, sn
.l1_table_offset
, sn
.l1_size
,
623 sizeof(uint64_t), QCOW_MAX_L1_SIZE
,
624 "Snapshot L1 table", errp
);
629 /* Remove it from the snapshot list */
630 memmove(s
->snapshots
+ snapshot_index
,
631 s
->snapshots
+ snapshot_index
+ 1,
632 (s
->nb_snapshots
- snapshot_index
- 1) * sizeof(sn
));
634 ret
= qcow2_write_snapshots(bs
);
636 error_setg_errno(errp
, -ret
,
637 "Failed to remove snapshot from snapshot list");
642 * The snapshot is now unused, clean up. If we fail after this point, we
643 * won't recover but just leak clusters.
649 * Now decrease the refcounts of clusters referenced by the snapshot and
652 ret
= qcow2_update_snapshot_refcount(bs
, sn
.l1_table_offset
,
655 error_setg_errno(errp
, -ret
, "Failed to free the cluster and L1 table");
658 qcow2_free_clusters(bs
, sn
.l1_table_offset
, sn
.l1_size
* sizeof(uint64_t),
659 QCOW2_DISCARD_SNAPSHOT
);
661 /* must update the copied flag on the current cluster offsets */
662 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 0);
664 error_setg_errno(errp
, -ret
,
665 "Failed to update snapshot status in disk");
671 BdrvCheckResult result
= {0};
672 qcow2_check_refcounts(bs
, &result
, 0);
678 int qcow2_snapshot_list(BlockDriverState
*bs
, QEMUSnapshotInfo
**psn_tab
)
680 BDRVQcow2State
*s
= bs
->opaque
;
681 QEMUSnapshotInfo
*sn_tab
, *sn_info
;
685 if (has_data_file(bs
)) {
688 if (!s
->nb_snapshots
) {
690 return s
->nb_snapshots
;
693 sn_tab
= g_new0(QEMUSnapshotInfo
, s
->nb_snapshots
);
694 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
695 sn_info
= sn_tab
+ i
;
696 sn
= s
->snapshots
+ i
;
697 pstrcpy(sn_info
->id_str
, sizeof(sn_info
->id_str
),
699 pstrcpy(sn_info
->name
, sizeof(sn_info
->name
),
701 sn_info
->vm_state_size
= sn
->vm_state_size
;
702 sn_info
->date_sec
= sn
->date_sec
;
703 sn_info
->date_nsec
= sn
->date_nsec
;
704 sn_info
->vm_clock_nsec
= sn
->vm_clock_nsec
;
707 return s
->nb_snapshots
;
710 int qcow2_snapshot_load_tmp(BlockDriverState
*bs
,
711 const char *snapshot_id
,
715 int i
, snapshot_index
;
716 BDRVQcow2State
*s
= bs
->opaque
;
718 uint64_t *new_l1_table
;
722 assert(bs
->read_only
);
724 /* Search the snapshot */
725 snapshot_index
= find_snapshot_by_id_and_name(bs
, snapshot_id
, name
);
726 if (snapshot_index
< 0) {
728 "Can't find snapshot");
731 sn
= &s
->snapshots
[snapshot_index
];
733 /* Allocate and read in the snapshot's L1 table */
734 ret
= qcow2_validate_table(bs
, sn
->l1_table_offset
, sn
->l1_size
,
735 sizeof(uint64_t), QCOW_MAX_L1_SIZE
,
736 "Snapshot L1 table", errp
);
740 new_l1_bytes
= sn
->l1_size
* sizeof(uint64_t);
741 new_l1_table
= qemu_try_blockalign(bs
->file
->bs
,
742 ROUND_UP(new_l1_bytes
, 512));
743 if (new_l1_table
== NULL
) {
747 ret
= bdrv_pread(bs
->file
, sn
->l1_table_offset
,
748 new_l1_table
, new_l1_bytes
);
750 error_setg(errp
, "Failed to read l1 table for snapshot");
751 qemu_vfree(new_l1_table
);
755 /* Switch the L1 table */
756 qemu_vfree(s
->l1_table
);
758 s
->l1_size
= sn
->l1_size
;
759 s
->l1_table_offset
= sn
->l1_table_offset
;
760 s
->l1_table
= new_l1_table
;
762 for(i
= 0;i
< s
->l1_size
; i
++) {
763 be64_to_cpus(&s
->l1_table
[i
]);