2 * Copyright (c) 2010-2011 IBM
5 * Chunqiang Tang <ctang@us.ibm.com>
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 * See the COPYING file in the top-level directory.
11 /*=============================================================================
12 * A short description: this module implements the QEMU block device driver
13 * for the Fast Virtual Disk (FVD) format. See the following companion
14 * papers for a detailed description of FVD:
15 * 1. The so-called "FVD-cow paper":
16 * "FVD: a High-Performance Virtual Machine Image Format for Cloud",
17 * by Chunqiang Tang, 2010.
18 * 2. The so-called "FVD-compact paper":
19 * "FVD: a High-Performance Virtual Machine Image Format for Cloud
20 * with Sparse Image Capability", by Chunqiang Tang, 2010.
21 *============================================================================*/
23 #include "qemu/osdep.h"
24 #include "block/fvd.h"
26 //#define ENABLE_TRACE_IO
27 //#define DEBUG_MEMORY_LEAK
28 //#define SIMULATED_TEST_WITH_QEMU_IO
31 #undef DEBUG_MEMORY_LEAK
32 #undef ENABLE_TRACE_IO
33 #undef SIMULATED_TEST_WITH_QEMU_IO
36 /* Use include to avoid exposing too many FVD symbols, and to allow inline
37 * function optimization. */
38 #include "block/fvd-utils.c"
39 #include "block/fvd-debug.c"
40 #include "block/fvd-misc.c"
41 #include "block/fvd-create.c"
42 #include "block/fvd-open.c"
43 #include "block/fvd-read.c"
44 #include "block/fvd-write.c"
45 #include "block/fvd-load.c"
46 #include "block/fvd-store.c"
47 #include "block/fvd-journal.c"
48 #include "block/fvd-prefetch.c"
52 static AIOCBInfo fvd_aio_pool
= {
53 .aiocb_size
= sizeof (FvdAIOCB
),
56 static BlockDriver bdrv_fvd
= {
58 .instance_size
= sizeof (BDRVFvdState
),
59 .bdrv_create
= fvd_create
,
60 .bdrv_probe
= fvd_probe
,
61 .bdrv_file_open
= fvd_open
,
62 .bdrv_close
= fvd_close
,
63 .bdrv_co_get_block_status
= fvd_get_block_status
,
64 .bdrv_co_flush_to_disk
= fvd_flush
,
65 .bdrv_aio_readv
= fvd_aio_readv
,
66 .bdrv_aio_writev
= fvd_aio_writev
,
67 .bdrv_aio_flush
= fvd_aio_flush
,
68 .create_opts
= &fvd_create_opts
,
69 .bdrv_get_info
= fvd_get_info
,
70 .bdrv_update
= fvd_update
,
71 .bdrv_has_zero_init
= fvd_has_zero_init
74 static void bdrv_fvd_init (void)
76 bdrv_register (&bdrv_fvd
);
79 block_init (bdrv_fvd_init
);
82 * Since bdrv_close may not be properly invoked on a VM shutdown, we
83 * use a destructor to flush metadata to disk. This only affects
84 * performance and does not affect correctness.
85 * See Section 3.3.4 of the FVD-cow paper for the rationale.
87 extern QTAILQ_HEAD (, BlockDriverState
) bdrv_states
;
88 static void __attribute__ ((destructor
)) flush_fvd_bitmap_to_disk (void)
90 BlockDriverState
*bs
= NULL
;
91 while ((bs
= bdrv_next(bs
))) {
92 if (bs
->drv
== &bdrv_fvd
) {
93 flush_metadata_to_disk_on_exit (bs
);
96 dump_resource_summary (bs
->opaque
);
103 * TODOs: Below are some potential enhancements for future development:
104 * 1. Handle storage leak on failure.
106 * 2. Profile-directed prefetch. See Section 3.4.1 of the FVD-cow paper.
107 * Related metadata are FvdHeader.prefetch_profile_offset and
108 * FvdHeader.prefetch_profile_entries,
109 * FvdHeader.profile_directed_prefetch_start_delay,
110 * FvdHeader.generate_prefetch_profile.
112 * 3. Cap the prefetch throughput at the upper limit. See Section 3.4.2 of
113 * the FVD-cow paper. Related metadata are
114 * FvdHeader.prefetch_max_read_throughput and
115 * FvdHeader.prefetch_max_write_throughput.
117 * 4. Support write through to the base image. When a VM issues a write
118 * request, in addition to saving the data in the FVD data file, also save the
119 * data in the base image if the address of write request is not beyond the
120 * size of the base image (this of course requires the base image NOT to be
121 * 'read_only'. This feature changes the semantics of copy-on-write, but it
122 * suits a different use case, where the base image is stored on a remote
123 * storage server, and the FVD image is stored on a local disk and acts as a
124 * write-through cache of the base image. This can be used to cache and
125 * improve the performance of persistent storage on network-attached storage,
126 * e.g., Amazon EBS. This feature is not described in the FVD-cow paper as it
127 * would complicate the discussion. Related metadata are
128 * FvdHeader.write_updates_base_img.