2 * Copyright (c) 2010-2011 IBM
5 * Chunqiang Tang <ctang@us.ibm.com>
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 * See the COPYING file in the top-level directory.
11 /*=============================================================================
12 * A short description: this module implements bdrv_create() for FVD.
13 *============================================================================*/
15 static inline int64_t calc_min_journal_size (int64_t table_entries
);
16 static inline int search_holes(const char *filename
, size_t bitmap_size
,
17 int32_t bitmap_start_offset
, BlockDriverState
* bs
,
18 int64_t nb_sectors
, int32_t hole_size
, int32_t block_size
);
20 static int fvd_create(const char *filename
, QemuOpts
*opts
, Error
**errp
)
24 int64_t virtual_disk_size
;
26 const char *add_storage_cmd
;
28 const char *base_img_fmt
;
29 const char *data_file
;
30 const char *data_file_fmt
;
33 int prefetch_start_delay
;
34 int64_t prefetch_profile_size
= 0;
35 BlockDriverState
*bs
= NULL
;
37 int64_t base_img_size
= 0;
38 int64_t table_size
= 0;
42 header_size
= sizeof (FvdHeader
);
43 header_size
= ROUND_UP (header_size
, DEF_PAGE_SIZE
);
44 header
= my_qemu_mallocz (header_size
);
46 /* Read out options */
47 virtual_disk_size
= qemu_opt_get_size_del(opts
, BLOCK_OPT_SIZE
,
49 prefetch_start_delay
= qemu_opt_get_size_del(opts
, "prefetch_start_delay",
51 if (prefetch_start_delay
<= 0) {
52 prefetch_start_delay
= -1;
54 base_img
= qemu_opt_get_del(opts
, BLOCK_OPT_BACKING_FILE
);
55 base_img_fmt
= qemu_opt_get_del(opts
, BLOCK_OPT_BACKING_FMT
);
56 copy_on_read
= qemu_opt_get_bool_del(opts
, "copy_on_read",
58 data_file
= qemu_opt_get_del(opts
, "data_file");
59 data_file_fmt
= qemu_opt_get_del(opts
, "data_file_fmt");
60 hole_size
= qemu_opt_get_size_del(opts
, "detect_sparse_hole",
62 header
->compact_image
= qemu_opt_get_bool_del(opts
, "compact_image",
64 block_size
= qemu_opt_get_size_del(opts
, "block_size",
66 header
->chunk_size
= qemu_opt_get_size_del(opts
, "chunk_size",
68 journal_size
= qemu_opt_get_size_del(opts
, "journal_size",
70 header
->storage_grow_unit
= qemu_opt_get_size_del(opts
, "storage_grow_unit",
72 add_storage_cmd
= qemu_opt_get_del(opts
, "add_storage_cmd");
73 if (add_storage_cmd
) {
74 pstrcpy(header
->add_storage_cmd
, sizeof (header
->add_storage_cmd
),
78 virtual_disk_size
= ROUND_UP (virtual_disk_size
, 512);
80 /* Check if arguments are valid. */
81 if (base_img
&& strlen (base_img
) > 1023) {
82 fprintf (stderr
, "The base image name is longer than 1023 characters, "
83 "which is not allowed.\n");
87 if (base_img
&& hole_size
> 0) {
88 if (header
->compact_image
) {
89 fprintf (stderr
, "compact_image and detect_sparse_hole cannot be "
90 "enabled together. Please disable detect_sparse_hole. \n");
93 header
->need_zero_init
= TRUE
;
95 header
->need_zero_init
= FALSE
;
99 pstrcpy (header
->data_file
, 1024, data_file
);
101 pstrcpy (header
->data_file_fmt
, 16, data_file_fmt
);
105 header
->magic
= FVD_MAGIC
;
106 header
->version
= FVD_VERSION
;
107 header
->virtual_disk_size
= virtual_disk_size
;
108 header
->clean_shutdown
= TRUE
;
111 header
->all_data_in_fvd_img
= TRUE
;
113 Error
*local_err
= NULL
;
116 bs
= bdrv_new("", &error_abort
);
118 fprintf (stderr
, "Failed to create a new block driver\n");
122 pstrcpy (header
->base_img
, 1024, base_img
);
124 pstrcpy (header
->base_img_fmt
, 16, base_img_fmt
);
125 BlockDriver
*drv
= bdrv_find_format (base_img_fmt
);
127 fprintf (stderr
, "Failed to find driver for format '%s'\n",
131 ret
= bdrv_open(&bs
, header
->data_file
, NULL
, NULL
, 0, drv
, &local_err
);
133 ret
= bdrv_open(&bs
, base_img
, NULL
, NULL
, 0, NULL
, &local_err
);
137 qerror_report_err(local_err
);
138 error_free(local_err
);
142 base_img_size
= bdrv_getlength (bs
);
143 base_img_size
= MIN (virtual_disk_size
, base_img_size
);
144 base_img_size
= ROUND_UP (base_img_size
, 512);
146 if (block_size
<= 0) {
147 /* No block size is provided. Find the smallest block size that
148 * does not make the bitmap too big. */
151 int64_t blocks
= (base_img_size
+ block_size
- 1) / block_size
;
152 bitmap_size
= (blocks
+ 7) / 8;
153 if (bitmap_size
<= MODERATE_BITMAP_SIZE
) {
159 block_size
= ROUND_UP (block_size
, 512);
160 int64_t blocks
= (base_img_size
+ block_size
- 1) / block_size
;
161 bitmap_size
= (blocks
+ 7) / 8;
164 bitmap_size
= ROUND_UP (bitmap_size
, DEF_PAGE_SIZE
);
165 header
->bitmap_size
= bitmap_size
;
166 header
->block_size
= block_size
;
167 header
->bitmap_offset
= header_size
;
169 prefetch_profile_size
= header
->prefetch_profile_entries
*
170 sizeof (PrefetchProfileEntry
);
171 prefetch_profile_size
= ROUND_UP (prefetch_profile_size
, DEF_PAGE_SIZE
);
172 header
->base_img_size
= base_img_size
;
173 header
->max_outstanding_copy_on_read_data
=
174 MAX_OUTSTANDING_COPY_ON_READ_DATA
;
175 header
->copy_on_read
= copy_on_read
;
176 header
->prefetch_start_delay
=
177 prefetch_start_delay
;
178 header
->num_prefetch_slots
= NUM_PREFETCH_SLOTS
;
179 header
->bytes_per_prefetch
= ROUND_UP (BYTES_PER_PREFETCH
, block_size
);
180 header
->prefetch_throttle_time
= PREFETCH_THROTTLING_TIME
;
181 header
->prefetch_read_throughput_measure_time
=
182 PREFETCH_MIN_MEASURE_READ_TIME
;
183 header
->prefetch_write_throughput_measure_time
=
184 PREFETCH_MIN_MEASURE_WRITE_TIME
;
185 header
->prefetch_perf_calc_alpha
= PREFETCH_PERF_CALC_ALPHA
;
186 header
->prefetch_min_read_throughput
= PREFETCH_MIN_READ_THROUGHPUT
;
187 header
->prefetch_min_write_throughput
= PREFETCH_MIN_WRITE_THROUGHPUT
;
188 header
->prefetch_max_read_throughput
= PREFETCH_MAX_READ_THROUGHPUT
;
189 header
->prefetch_max_write_throughput
= PREFETCH_MAX_WRITE_THROUGHPUT
;
190 header
->all_data_in_fvd_img
= FALSE
;
191 header
->unit_of_PrefetchProfileEntry_len
= DEF_PAGE_SIZE
;
192 header
->generate_prefetch_profile
= FALSE
; /* To be implemented. */
193 header
->profile_directed_prefetch_start_delay
= -1;/*To be implemented*/
196 /* Set the table size. */
197 if (header
->compact_image
) {
198 if (header
->chunk_size
<= 0) {
199 header
->chunk_size
= CHUNK_SIZE
;
201 header
->chunk_size
= ROUND_UP (header
->chunk_size
, DEF_PAGE_SIZE
);
202 if (header
->storage_grow_unit
<= 0) {
203 header
->storage_grow_unit
= STORAGE_GROW_UNIT
;
205 if (header
->storage_grow_unit
< header
->chunk_size
) {
206 header
->storage_grow_unit
= header
->chunk_size
;
208 int64_t table_entries
=
209 (virtual_disk_size
+ header
->chunk_size
- 1) / header
->chunk_size
;
210 table_size
= sizeof (uint32_t) * table_entries
;
211 table_size
= ROUND_UP (table_size
, DEF_PAGE_SIZE
);
212 header
->table_offset
= header_size
+ bitmap_size
;
215 /* Set the journal size. */
216 if (bitmap_size
<= 0 && table_size
<= 0) {
217 header
->journal_size
= 0; /* No need to use journal. */
218 } else if (journal_size
< 0) {
219 /* Disable the use of journal, which reduces overhead but may cause
220 * data corruption if the host crashes. This is a valid configuration
221 * for some use cases, where data integrity is not critical. */
222 header
->journal_size
= 0;
224 if (journal_size
== 0) {
225 /* No journal size is specified. Use a default size. */
226 journal_size
= JOURNAL_SIZE
;
228 if (table_size
> 0) {
229 /* Make sure that the journal is at least large enough to record
230 * all table changes in one shot, which is the extremely unlikely
232 int64_t vsize
= virtual_disk_size
+ header
->chunk_size
- 1;
233 int64_t table_entries
= vsize
/ header
->chunk_size
;
234 int64_t min_journal_size
= calc_min_journal_size (table_entries
);
235 if (journal_size
< min_journal_size
) {
236 journal_size
= min_journal_size
;
239 journal_size
= ROUND_UP (journal_size
, DEF_PAGE_SIZE
);
240 header
->journal_size
= journal_size
;
241 header
->journal_offset
= header_size
+ bitmap_size
+ table_size
;
244 const int64_t metadata_size
= header_size
+ bitmap_size
+ table_size
+
245 prefetch_profile_size
+ MAX (0, journal_size
);
246 header
->metadata_size
= metadata_size
;
248 fd
= open (filename
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
, 0644);
250 fprintf (stderr
, "Failed to open %s\n", filename
);
253 fvd_header_cpu_to_le (header
);
255 if (qemu_write_full (fd
, header
, header_size
) != header_size
) {
256 fprintf (stderr
, "Failed to write the header of %s\n", filename
);
260 /* Initialize the bitmap. */
261 if (bitmap_size
> 0) {
262 uint8_t *bitmap
= my_qemu_mallocz (bitmap_size
);
263 ret
= qemu_write_full (fd
, bitmap
, bitmap_size
);
264 my_qemu_free (bitmap
);
265 if (ret
!= bitmap_size
) {
266 fprintf (stderr
, "Failed to zero out the bitmap of %s\n", filename
);
271 /* Initialize the table. */
272 if (table_size
> 0) {
273 /* Set all entries to EMPTY_TABLE (0xFFFFFFFF). */
274 uint8_t *empty_table
= my_qemu_malloc (table_size
);
275 memset (empty_table
, 0xFF, table_size
);
276 ret
= qemu_write_full (fd
, empty_table
, table_size
);
277 my_qemu_free (empty_table
);
278 if (ret
!= table_size
) {
279 fprintf (stderr
, "Failed to write the table of %s\n.", filename
);
284 /* Initialize the journal. */
285 if (journal_size
> 0) {
286 uint8_t *empty_journal
= my_qemu_mallocz (journal_size
);
287 ret
= qemu_write_full (fd
, empty_journal
, journal_size
);
288 my_qemu_free (empty_journal
);
289 if (ret
!= journal_size
) {
290 fprintf (stderr
, "Failed to initialize the journal for %s\n.",
299 if (bs
&& hole_size
> 0) {
300 ret
= search_holes (filename
, (size_t) bitmap_size
, header_size
, bs
,
301 base_img_size
/ 512, hole_size
, block_size
);
307 my_qemu_free (header
);
315 my_qemu_free (header
);
319 /* For the optimization called "free write to zero-filled blocks". See Section
320 * 3.3.3 of the FVD-cow paper. */
321 static inline int search_holes (const char *filename
, size_t bitmap_size
,
322 int32_t bitmap_start_offset
,
323 BlockDriverState
* bs
, int64_t nb_sectors
,
324 int32_t hole_size
, int32_t block_size
)
326 const int fd
= open (filename
, O_RDWR
| O_BINARY
| O_LARGEFILE
, 0);
328 fprintf (stderr
, "Failed to open %s for read and write.\n", filename
);
332 printf ("Searching zero-filled sectors in the base image. Please wait...");
336 (uint8_t *) mmap (NULL
, bitmap_size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
,
337 fd
, (off_t
) bitmap_start_offset
);
338 if (bitmap
== MAP_FAILED
) {
339 fprintf (stderr
, "Failed to mmap() %s\n", filename
);
344 if (hole_size
< block_size
) {
345 hole_size
= block_size
;
347 hole_size
= ROUND_UP (hole_size
, block_size
);
348 nb_sectors
= ROUND_DOWN (nb_sectors
, hole_size
);
349 const int sectors_per_hole
= hole_size
/ 512;
350 const int sectors_per_block
= block_size
/ 512;
351 int num_int64_in_hole
= hole_size
/ 8;
352 int64_t hole_count
= 0;
355 uint8_t *p
= my_qemu_blockalign (bs
, hole_size
);
357 while (sec
< nb_sectors
) {
360 if (bdrv_read (bs
, sec
, p
, sectors_per_hole
) < 0) {
361 fprintf (stderr
, "Error in reading the base image\n");
368 for (i
= 0; i
< num_int64_in_hole
; i
++) {
375 if (i
< num_int64_in_hole
) {
376 /* This is not a hole. */
377 sec
+= sectors_per_hole
;
379 /* These sectors consist of only zeros. Set the flag to
380 * indicate that there is no need to read this sector from the
381 * base image. See Section 3.3.3 of the FVD-cow paper for the
384 int64_t end
= sec
+ sectors_per_hole
;
386 int block_num
= sec
/ sectors_per_block
;
387 int64_t bitmap_byte_offset
= block_num
/ 8;
388 uint8_t bitmap_bit_offset
= block_num
% 8;
389 int8_t mask
= (uint8_t) (0x01 << bitmap_bit_offset
);
390 uint8_t b
= bitmap
[bitmap_byte_offset
];
393 bitmap
[bitmap_byte_offset
] |= mask
;
395 sec
+= sectors_per_block
;
401 printf ("\nFound %" PRId64
402 " zero-filled hole regions. Image creation done.\n", hole_count
);
404 munmap (bitmap
, bitmap_size
);
409 static QemuOptsList fvd_create_opts
= {
410 .name
= "fvd-create-options",
411 .head
= QTAILQ_HEAD_INITIALIZER(fvd_create_opts
.head
),
414 .name
= BLOCK_OPT_SIZE
,
415 .type
= QEMU_OPT_SIZE
,
416 .help
= "Virtual disk size"},
418 .name
= "compact_image",
419 .type
= QEMU_OPT_BOOL
,
420 .help
= "compact_image=on|off"},
422 .name
= "block_size",
423 .type
= QEMU_OPT_SIZE
,
424 .help
= "Block size"},
426 .name
= "chunk_size",
427 .type
= QEMU_OPT_SIZE
,
428 .help
= "Chunk size"},
430 .name
= "storage_grow_unit",
431 .type
= QEMU_OPT_SIZE
,
432 .help
= "Storage grow unit"},
434 .name
= "add_storage_cmd",
435 .type
= QEMU_OPT_STRING
,
436 .help
= "Command to add storage when FSI runs out of space"},
438 .name
= BLOCK_OPT_BACKING_FILE
,
439 .type
= QEMU_OPT_STRING
,
440 .help
= "File name of a backing image"},
442 .name
= BLOCK_OPT_BACKING_FMT
,
443 .type
= QEMU_OPT_STRING
,
444 .help
= "Image format of the backing image"},
447 .type
= QEMU_OPT_STRING
,
448 .help
= "File name of a separate data file"},
450 .name
= "data_file_fmt",
451 .type
= QEMU_OPT_STRING
,
452 .help
= "Image format of the separate data file"},
454 .name
= "copy_on_read",
455 .type
= QEMU_OPT_BOOL
,
456 .help
= "copy_on_read=on|off"},
458 .name
= "prefetch_start_delay",
459 .type
= QEMU_OPT_NUMBER
,
460 .help
= "Delay in seconds before starting whole image prefetching. "
461 "Prefetching is disabled if the delay is not a positive number."},
463 .name
= "detect_sparse_hole",
464 .type
= QEMU_OPT_SIZE
,
465 .help
= "Minimum size (in bytes) of a continuous zero-filled region to be "
466 "considered as a sparse file hole in the backing image (setting it "
467 "to 0 turns off sparse file detection)"},
469 .name
= "journal_size",
470 .type
= QEMU_OPT_SIZE
,
471 .help
= "Journal size"},