Merge tag 'v5.1.0-rc0'
[qemu/ar7.git] / block / fvd-open.c
blobde2b47853754edfb744221fb51d132e098861e03
1 /*
2 * Copyright (c) 2010-2011 IBM
4 * Authors:
5 * Chunqiang Tang <ctang@us.ibm.com>
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 * See the COPYING file in the top-level directory.
9 */
11 /*=============================================================================
12 * A short description: this module implements bdrv_file_open() for FVD.
13 *============================================================================*/
15 static void init_prefetch_timer (BlockDriverState * bs, BDRVFvdState * s);
16 static int init_data_file (BDRVFvdState * s, FvdHeader * header, int flags);
17 static int init_bitmap (BlockDriverState * bs, BDRVFvdState * s,
18 FvdHeader * header, const char *const filename);
19 static int load_table (BDRVFvdState * s, FvdHeader * header,
20 const char *const filename);
21 static int init_journal (int read_only, BlockDriverState * bs,
22 FvdHeader * header);
23 static int init_compact_image (BDRVFvdState * s, FvdHeader * header,
24 const char *const filename);
26 static QemuOptsList runtime_opts = {
27 .name = "sim",
28 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
29 .desc = {
31 .name = "filename",
32 .type = QEMU_OPT_STRING,
33 .help = "File name of the image",
35 { /* end of list */ }
39 static int fvd_open(BlockDriverState * bs, QDict *options, int flags,
40 Error **errp)
42 BDRVFvdState *s = bs->opaque;
43 int ret;
44 FvdHeader header;
45 BlockDriver *drv;
47 Error *local_err = NULL;
48 const char *filename;
50 QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
51 qemu_opts_absorb_qdict(opts, options, &local_err);
52 if (local_err) {
53 qerror_report_err(local_err);
54 error_free(local_err);
55 return -EINVAL;
58 filename = qemu_opt_get(opts, "filename");
60 const char * protocol = strchr (filename, ':');
61 if (protocol) {
62 drv = bdrv_find_protocol (filename, true);
63 filename = protocol + 1;
65 else {
66 /* Use "raw" instead of "file" to allow storing the image on device. */
67 drv = bdrv_find_format ("raw");
68 if (!drv) {
69 fprintf (stderr, "Failed to find the block device driver\n");
70 return -EINVAL;
74 s->fvd_metadata = bdrv_new("", &error_abort);
75 ret = bdrv_open(&s->fvd_metadata, filename, NULL, NULL,
76 flags, drv, &local_err);
77 if (ret < 0) {
78 qerror_report_err(local_err);
79 error_free(local_err);
80 return ret;
83 /* Initialize so that jumping to 'fail' would do cleanup properly. */
84 s->stale_bitmap = NULL;
85 s->fresh_bitmap = NULL;
86 s->table = NULL;
87 s->outstanding_copy_on_read_data = 0;
88 QLIST_INIT (&s->write_locks);
89 QLIST_INIT (&s->copy_locks);
90 QLIST_INIT (&s->wait_for_journal);
91 s->ongoing_journal_updates = 0;
92 s->prefetch_acb = NULL;
93 s->add_storage_cmd = NULL;
94 #ifdef FVD_DEBUG
95 s->total_copy_on_read_data = s->total_prefetch_data = 0;
96 #endif
98 if (bdrv_pread (s->fvd_metadata, 0, &header, sizeof (header)) !=
99 sizeof (header)) {
100 fprintf (stderr, "Failed to read the header of %s\n", filename);
101 goto fail;
104 fvd_header_le_to_cpu (&header);
106 if (header.magic != FVD_MAGIC || header.version != FVD_VERSION) {
107 fprintf (stderr, "Incorrect magic number in the header of %s: "
108 "magic=%0X version=%d expect_magic=%0X expect_version=%d\n",
109 filename, header.magic, header.version, FVD_MAGIC,
110 FVD_VERSION);
111 goto fail;
113 if (header.virtual_disk_size % 512 != 0) {
114 fprintf (stderr, "Disk size %"PRId64" in the header of %s is not "
115 "a multple of 512.\n", header.virtual_disk_size, filename);
116 goto fail;
119 /* Initialize the fields of BDRVFvdState. */
120 s->dirty_image = FALSE;
121 s->block_size = header.block_size / 512;
122 s->bitmap_size = header.bitmap_size;
123 s->prefetch_error = FALSE;
124 s->prefetch_timer = NULL;
125 s->sectors_per_prefetch = (header.bytes_per_prefetch + 511) / 512;
126 s->prefetch_throttle_time = header.prefetch_throttle_time;
127 s->prefetch_perf_calc_alpha = header.prefetch_perf_calc_alpha / 100.0;
128 s->prefetch_read_throughput_measure_time =
129 header.prefetch_read_throughput_measure_time;
130 s->prefetch_write_throughput_measure_time =
131 header.prefetch_write_throughput_measure_time;
133 /* Convert KB/s to bytes/millisec. */
134 s->prefetch_min_read_throughput =
135 ((double) header.prefetch_min_read_throughput) * 1024.0 / 1000.0;
136 s->prefetch_min_write_throughput =
137 ((double) header.prefetch_min_write_throughput) * 1024.0 / 1000.0;
139 if (header.base_img[0] != 0 && s->sectors_per_prefetch%s->block_size != 0) {
140 fprintf (stderr, "sectors_per_prefetch (%d) is not a multiple of "
141 "block_size (%d)\n",
142 s->sectors_per_prefetch * 512, s->block_size * 512);
144 s->max_outstanding_copy_on_read_data =
145 header.max_outstanding_copy_on_read_data;
146 if (s->max_outstanding_copy_on_read_data < header.block_size * 2) {
147 s->max_outstanding_copy_on_read_data = header.block_size;
150 if (header.num_prefetch_slots < 1) {
151 s->num_prefetch_slots = 1;
152 } else {
153 s->num_prefetch_slots = header.num_prefetch_slots;
155 if (in_qemu_tool) {
156 /* No prefetching in a qemu tool. */
157 s->prefetch_start_delay = -1;
159 #ifndef SIMULATED_TEST_WITH_QEMU_IO
160 s->copy_on_read = FALSE; /* No prefetching in a qemu tool. */
161 #else
162 /* But allow debugging copy_on_read in qemu-io if configured. */
163 s->copy_on_read = header.copy_on_read;
164 #endif
165 } else {
166 s->prefetch_start_delay = header.prefetch_start_delay;
167 s->copy_on_read = header.copy_on_read;
169 s->virtual_disk_size = header.virtual_disk_size;
170 s->bitmap_offset = header.bitmap_offset / 512;
171 s->nb_sectors_in_base_img = header.base_img_size / 512;
172 bs->total_sectors = s->virtual_disk_size / 512;
174 if (init_data_file (s, &header, flags)) {
175 goto fail;
178 if (init_bitmap (bs, s, &header, filename)) {
179 goto fail;
182 if (load_table (s, &header, filename)) {
183 goto fail;
186 const int read_only = !(flags & BDRV_O_RDWR);
187 if (init_journal (read_only, bs, &header)) {
188 goto fail;
191 /* This must be done after init_journal() because it may use metadata
192 * recovered from the journal. */
193 if (init_compact_image (s, &header, filename)) {
194 goto fail;
197 if (!read_only) {
198 /* This flag will be cleaned later when the image is shut down
199 * gracefully. */
200 update_clean_shutdown_flag (s, FALSE);
202 init_prefetch_timer (bs, s);
204 QDEBUG ("copy_on_read=%s block_size=%d journal_size=%" PRId64
205 " prefetching_delay=%d prefetch_slots=%d "
206 "prefetch_read_threshold_KB=%.0lf "
207 "prefetch_write_threshold_KB=%.0lf "
208 "prefetch_throttle_time=%d bytes_per_prefetch=%d "
209 "max_outstanding_copy_on_read_data=%"PRId64"\n",
210 BOOL (s->copy_on_read), s->block_size * 512,
211 s->journal_size * 512, s->prefetch_start_delay,
212 s->num_prefetch_slots,
213 s->prefetch_min_read_throughput * 1000.0 / 1024.0,
214 s->prefetch_min_write_throughput * 1000.0 / 1024.0,
215 s->prefetch_throttle_time, s->sectors_per_prefetch * 512,
216 s->max_outstanding_copy_on_read_data);
218 return 0;
220 fail:
221 fprintf (stderr, "Failed to open %s using the FVD format.\n", filename);
222 fvd_close (bs);
223 return -1;
226 static int load_table (BDRVFvdState * s, FvdHeader * header,
227 const char *const filename)
229 if (!header->compact_image) {
230 return 0;
233 /* Initialize the table. */
234 s->table_offset = header->table_offset / 512;
235 s->chunk_size = header->chunk_size / 512;
236 int64_t vsize = header->virtual_disk_size + header->chunk_size - 1;
237 int table_entries = vsize / header->chunk_size;
238 int64_t table_size = sizeof (uint32_t) * table_entries;
239 table_size = ROUND_UP (table_size, DEF_PAGE_SIZE);
240 s->table = my_qemu_blockalign (s->fvd_metadata, (size_t) table_size);
242 if (bdrv_pread (s->fvd_metadata, header->table_offset, s->table, table_size)
243 != table_size) {
244 fprintf (stderr, "Failed to read the table of %s\n", filename);
245 return -1;
248 return 0;
251 static int init_compact_image (BDRVFvdState * s, FvdHeader * header,
252 const char *const filename)
254 if (!header->compact_image) {
255 s->data_region_prepared = FALSE;
256 return 0;
259 /* Scan the table to find the max allocated chunk. */
260 int i;
261 uint32_t max_chunk = 0;
262 int empty_disk = TRUE;
263 int table_entries =
264 (int) (ROUND_UP (header->virtual_disk_size, header->chunk_size) /
265 header->chunk_size);
266 for (i = 0; i < table_entries; i++) {
267 if (!IS_EMPTY (s->table[i])) {
268 empty_disk = FALSE;
269 uint32_t id = READ_TABLE (s->table[i]);
270 if (id > max_chunk) {
271 max_chunk = id;
275 if (!empty_disk) {
276 max_chunk++;
278 s->used_storage = max_chunk * s->chunk_size;
279 s->storage_grow_unit = header->storage_grow_unit / 512;
281 /* Check if the image is directly stored on a raw device, including
282 * logical volume. If so, figure out the size of the device. */
283 struct stat stat_buf;
284 if (stat (filename, &stat_buf) != 0) {
285 fprintf (stderr, "Failed to stat() %s\n", filename);
286 return -1;
289 /* Check how much storage space is already allocated. */
290 int64_t size = bdrv_getlength (s->fvd_data);
291 if (size < 0) {
292 fprintf (stderr, "Failed in bdrv_getlength(%s)\n", filename);
293 return -1;
295 const int64_t min_size = (s->data_offset + s->used_storage) * 512;
296 if (size < min_size) {
297 fprintf (stderr, "The size of device %s is not even big enough to "
298 "store already allocated data.\n",
299 filename);
300 return -1;
303 if (S_ISBLK (stat_buf.st_mode) || S_ISCHR (stat_buf.st_mode)) {
304 /* Initialize the command to grow storage space. */
305 char cmd[2048];
306 if (header->add_storage_cmd[0] == 0) {
307 s->add_storage_cmd = NULL;
308 } else {
309 if (strcmp (header->add_storage_cmd, "builtin:lvextend") == 0) {
310 /* Note the following:
311 * 1. lvextend may generate warning messages like "File
312 * descriptor...leaked...", * which is fine. See the
313 * following from LVM manual: "On invocation, lvm requires
314 * that only the standard file descriptors stdin,
315 * stdout * and stderr are available. If others are
316 * found, they get closed and messages are issued warning
317 * about the leak."
318 * 2. Instead of using the lvextend command line, one
319 * option is to use liblvm directly, which avoids creating
320 * a process to resize a LV.
321 * 3. On Ubuntu, /bin/sh is linked to /bin/dash, which
322 * does not support ">&" for stdout and stderr
323 * redirection. */
324 snprintf (cmd, sizeof (cmd) - 1, "/sbin/lvextend -L+%" PRId64
325 "B %s >/dev/null 2>/dev/null",
326 header->storage_grow_unit,
327 header->data_file[0] ? header->data_file : filename);
328 } else {
329 snprintf (cmd, sizeof (cmd) - 1, "%s %" PRId64
330 " %s >/dev/null 2>/dev/null",
331 header->add_storage_cmd, header->storage_grow_unit,
332 header->data_file[0] ? header->data_file : filename);
335 int len = strlen (cmd);
336 s->add_storage_cmd = my_qemu_malloc (len + 1);
337 memcpy (s->add_storage_cmd, cmd, len + 1);
341 s->data_storage = size / 512 - s->data_offset;
342 s->fvd_data->growable = TRUE;
343 s->data_region_prepared = TRUE;
345 return 0;
348 static int init_data_file (BDRVFvdState * s, FvdHeader * header, int flags)
350 Error *local_err = NULL;
351 int ret;
353 if (header->data_file[0]) {
354 /* Open a separate data file. */
355 s->data_offset = 0;
356 s->fvd_data = bdrv_new("", &error_abort);
357 if (!s->fvd_data) {
358 fprintf (stderr, "Failed to create a new block device driver.\n");
359 return -1;
362 if (header->data_file_fmt[0] == 0) {
363 ret = bdrv_open(&s->fvd_data, header->data_file, NULL, NULL,
364 flags, NULL, &local_err);
365 } else {
366 BlockDriver *data_drv = bdrv_find_format (header->data_file_fmt);
367 if (!data_drv) {
368 fprintf (stderr, "Failed to find driver for image format "
369 "'%s' of data file %s\n",
370 header->data_file_fmt, header->data_file);
371 return -1;
373 ret = bdrv_open(&s->fvd_data, header->data_file,
374 NULL, NULL, flags, data_drv, &local_err);
376 if (ret != 0) {
377 qerror_report_err(local_err);
378 error_free(local_err);
379 return -1;
381 } else {
382 s->data_offset = header->metadata_size / 512; /* In sectors. */
383 s->fvd_data = s->fvd_metadata;
386 if (header->need_zero_init && !bdrv_has_zero_init (s->fvd_data)) {
387 if (in_qemu_tool) {
388 /* Only give a warning to allow 'qemu-img update' to modify
389 * need_zero_init if the user manually zero-init the device. */
390 fprintf (stderr, "Warning: image needs zero_init but it is not "
391 "supported by the storage media.\n");
392 } else {
393 fprintf (stderr, "Error: image needs zero_init but it is not "
394 "supported by the storage media.\n");
395 return -EINVAL;
399 return 0;
402 static int init_bitmap (BlockDriverState * bs, BDRVFvdState * s,
403 FvdHeader * header, const char *const filename)
405 if (header->all_data_in_fvd_img) {
406 /* This also covers the case of no base image. */
407 s->prefetch_state = PREFETCH_STATE_FINISHED;
408 s->copy_on_read = FALSE;
409 s->prefetch_start_delay = -1;
411 if (bs->backing_file[0] != 0) {
412 /* No need to use the base image. It may operate without problem
413 * even if the base image is no longer accessible. */
414 bs->backing_file[0] = 0;
416 } else {
417 ASSERT (header->base_img[0] != 0);
418 pstrcpy (bs->backing_file, 1024, header->base_img);
419 const int flags = O_RDONLY | O_BINARY | O_LARGEFILE;
420 int test_backing_fd = open (bs->backing_file, flags);
421 if (test_backing_fd < 0) {
422 fprintf (stderr, "Failed to open the base image %s for read.\n",
423 bs->backing_file);
424 return -1;
426 close (test_backing_fd);
428 /* This will be enabled in init_prefetch() after a timer expires. */
429 s->prefetch_state = PREFETCH_STATE_DISABLED;
431 s->stale_bitmap = my_qemu_blockalign (s->fvd_metadata,
432 (size_t) s->bitmap_size);
433 if (bdrv_pread (s->fvd_metadata, header->bitmap_offset,
434 s->stale_bitmap, s->bitmap_size) != s->bitmap_size) {
435 fprintf (stderr, "Failed to the bitmap of %s.\n", filename);
436 return -1;
439 if (s->copy_on_read || (s->prefetch_state != PREFETCH_STATE_FINISHED &&
440 s->prefetch_start_delay > 0)) {
441 /* Use two bitmaps only if copy_on_read or prefetching is enabled.
442 * See Section 3.3.4 of the FVD-cow paper. */
443 s->fresh_bitmap = my_qemu_blockalign (s->fvd_metadata,
444 s->bitmap_size);
445 memcpy (s->fresh_bitmap, s->stale_bitmap, s->bitmap_size);
446 } else {
447 s->fresh_bitmap = s->stale_bitmap;
451 return 0;
454 static void init_prefetch_timer (BlockDriverState * bs, BDRVFvdState * s)
456 #ifndef SIMULATED_TEST_WITH_QEMU_IO
457 if (in_qemu_tool) {
458 return;
460 #endif
462 if (s->prefetch_state == PREFETCH_STATE_FINISHED ||
463 s->prefetch_start_delay <= 0) {
464 return;
467 /* Start prefetching after a delay. Times 1000 to convert sec to ms. */
468 int64_t expire = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->prefetch_start_delay * 1000;
469 s->prefetch_timer = timer_new_ns(QEMU_CLOCK_REALTIME, fvd_init_prefetch, bs);
470 timer_mod(s->prefetch_timer, expire);