qxl: Add missing trace.h (fix broken build)
[qemu/ar7.git] / block / fvd-open.c
blob45ca13b3a82fbfc4da99f3d82b1a5cc83b546991
1 /*
2 * Copyright (c) 2010-2011 IBM
4 * Authors:
5 * Chunqiang Tang <ctang@us.ibm.com>
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 * See the COPYING file in the top-level directory.
9 */
11 /*=============================================================================
12 * A short description: this module implements bdrv_file_open() for FVD.
13 *============================================================================*/
15 static void init_prefetch_timer (BlockDriverState * bs, BDRVFvdState * s);
16 static int init_data_file (BDRVFvdState * s, FvdHeader * header, int flags);
17 static int init_bitmap (BlockDriverState * bs, BDRVFvdState * s,
18 FvdHeader * header, const char *const filename);
19 static int load_table (BDRVFvdState * s, FvdHeader * header,
20 const char *const filename);
21 static int init_journal (int read_only, BlockDriverState * bs,
22 FvdHeader * header);
23 static int init_compact_image (BDRVFvdState * s, FvdHeader * header,
24 const char *const filename);
26 static QemuOptsList runtime_opts = {
27 .name = "sim",
28 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
29 .desc = {
31 .name = "filename",
32 .type = QEMU_OPT_STRING,
33 .help = "File name of the image",
35 { /* end of list */ }
39 static int fvd_open(BlockDriverState * bs, QDict *options, int flags,
40 Error **errp)
42 BDRVFvdState *s = bs->opaque;
43 int ret;
44 FvdHeader header;
45 BlockDriver *drv;
47 Error *local_err = NULL;
48 const char *filename;
50 QemuOpts *opts = qemu_opts_create_nofail(&runtime_opts);
51 qemu_opts_absorb_qdict(opts, options, &local_err);
52 if (error_is_set(&local_err)) {
53 qerror_report_err(local_err);
54 error_free(local_err);
55 return -EINVAL;
58 filename = qemu_opt_get(opts, "filename");
60 const char * protocol = strchr (filename, ':');
61 if (protocol) {
62 drv = bdrv_find_protocol (filename, true);
63 filename = protocol + 1;
65 else {
66 /* Use "raw" instead of "file" to allow storing the image on device. */
67 drv = bdrv_find_format ("raw");
68 if (!drv) {
69 fprintf (stderr, "Failed to find the block device driver\n");
70 return -EINVAL;
74 s->fvd_metadata = bdrv_new ("");
75 ret = bdrv_open(s->fvd_metadata, filename, NULL, flags, drv, &local_err);
76 if (ret < 0) {
77 qerror_report_err(local_err);
78 error_free(local_err);
79 return ret;
82 /* Initialize so that jumping to 'fail' would do cleanup properly. */
83 s->stale_bitmap = NULL;
84 s->fresh_bitmap = NULL;
85 s->table = NULL;
86 s->outstanding_copy_on_read_data = 0;
87 QLIST_INIT (&s->write_locks);
88 QLIST_INIT (&s->copy_locks);
89 QLIST_INIT (&s->wait_for_journal);
90 s->ongoing_journal_updates = 0;
91 s->prefetch_acb = NULL;
92 s->add_storage_cmd = NULL;
93 #ifdef FVD_DEBUG
94 s->total_copy_on_read_data = s->total_prefetch_data = 0;
95 #endif
97 if (bdrv_pread (s->fvd_metadata, 0, &header, sizeof (header)) !=
98 sizeof (header)) {
99 fprintf (stderr, "Failed to read the header of %s\n", filename);
100 goto fail;
103 fvd_header_le_to_cpu (&header);
105 if (header.magic != FVD_MAGIC || header.version != FVD_VERSION) {
106 fprintf (stderr, "Incorrect magic number in the header of %s: "
107 "magic=%0X version=%d expect_magic=%0X expect_version=%d\n",
108 filename, header.magic, header.version, FVD_MAGIC,
109 FVD_VERSION);
110 goto fail;
112 if (header.virtual_disk_size % 512 != 0) {
113 fprintf (stderr, "Disk size %"PRId64" in the header of %s is not "
114 "a multple of 512.\n", header.virtual_disk_size, filename);
115 goto fail;
118 /* Initialize the fields of BDRVFvdState. */
119 s->dirty_image = FALSE;
120 s->block_size = header.block_size / 512;
121 s->bitmap_size = header.bitmap_size;
122 s->prefetch_error = FALSE;
123 s->prefetch_timer = NULL;
124 s->sectors_per_prefetch = (header.bytes_per_prefetch + 511) / 512;
125 s->prefetch_throttle_time = header.prefetch_throttle_time;
126 s->prefetch_perf_calc_alpha = header.prefetch_perf_calc_alpha / 100.0;
127 s->prefetch_read_throughput_measure_time =
128 header.prefetch_read_throughput_measure_time;
129 s->prefetch_write_throughput_measure_time =
130 header.prefetch_write_throughput_measure_time;
132 /* Convert KB/s to bytes/millisec. */
133 s->prefetch_min_read_throughput =
134 ((double) header.prefetch_min_read_throughput) * 1024.0 / 1000.0;
135 s->prefetch_min_write_throughput =
136 ((double) header.prefetch_min_write_throughput) * 1024.0 / 1000.0;
138 if (header.base_img[0] != 0 && s->sectors_per_prefetch%s->block_size != 0) {
139 fprintf (stderr, "sectors_per_prefetch (%d) is not a multiple of "
140 "block_size (%d)\n",
141 s->sectors_per_prefetch * 512, s->block_size * 512);
143 s->max_outstanding_copy_on_read_data =
144 header.max_outstanding_copy_on_read_data;
145 if (s->max_outstanding_copy_on_read_data < header.block_size * 2) {
146 s->max_outstanding_copy_on_read_data = header.block_size;
149 if (header.num_prefetch_slots < 1) {
150 s->num_prefetch_slots = 1;
151 } else {
152 s->num_prefetch_slots = header.num_prefetch_slots;
154 if (in_qemu_tool) {
155 /* No prefetching in a qemu tool. */
156 s->prefetch_start_delay = -1;
158 #ifndef SIMULATED_TEST_WITH_QEMU_IO
159 s->copy_on_read = FALSE; /* No prefetching in a qemu tool. */
160 #else
161 /* But allow debugging copy_on_read in qemu-io if configured. */
162 s->copy_on_read = header.copy_on_read;
163 #endif
164 } else {
165 s->prefetch_start_delay = header.prefetch_start_delay;
166 s->copy_on_read = header.copy_on_read;
168 s->virtual_disk_size = header.virtual_disk_size;
169 s->bitmap_offset = header.bitmap_offset / 512;
170 s->nb_sectors_in_base_img = header.base_img_size / 512;
171 bs->total_sectors = s->virtual_disk_size / 512;
173 if (init_data_file (s, &header, flags)) {
174 goto fail;
177 if (init_bitmap (bs, s, &header, filename)) {
178 goto fail;
181 if (load_table (s, &header, filename)) {
182 goto fail;
185 const int read_only = !(flags & BDRV_O_RDWR);
186 if (init_journal (read_only, bs, &header)) {
187 goto fail;
190 /* This must be done after init_journal() because it may use metadata
191 * recovered from the journal. */
192 if (init_compact_image (s, &header, filename)) {
193 goto fail;
196 if (!read_only) {
197 /* This flag will be cleaned later when the image is shut down
198 * gracefully. */
199 update_clean_shutdown_flag (s, FALSE);
201 init_prefetch_timer (bs, s);
203 QDEBUG ("copy_on_read=%s block_size=%d journal_size=%" PRId64
204 " prefetching_delay=%d prefetch_slots=%d "
205 "prefetch_read_threshold_KB=%.0lf "
206 "prefetch_write_threshold_KB=%.0lf "
207 "prefetch_throttle_time=%d bytes_per_prefetch=%d "
208 "max_outstanding_copy_on_read_data=%"PRId64"\n",
209 BOOL (s->copy_on_read), s->block_size * 512,
210 s->journal_size * 512, s->prefetch_start_delay,
211 s->num_prefetch_slots,
212 s->prefetch_min_read_throughput * 1000.0 / 1024.0,
213 s->prefetch_min_write_throughput * 1000.0 / 1024.0,
214 s->prefetch_throttle_time, s->sectors_per_prefetch * 512,
215 s->max_outstanding_copy_on_read_data);
217 return 0;
219 fail:
220 fprintf (stderr, "Failed to open %s using the FVD format.\n", filename);
221 fvd_close (bs);
222 return -1;
225 static int load_table (BDRVFvdState * s, FvdHeader * header,
226 const char *const filename)
228 if (!header->compact_image) {
229 return 0;
232 /* Initialize the table. */
233 s->table_offset = header->table_offset / 512;
234 s->chunk_size = header->chunk_size / 512;
235 int64_t vsize = header->virtual_disk_size + header->chunk_size - 1;
236 int table_entries = vsize / header->chunk_size;
237 int64_t table_size = sizeof (uint32_t) * table_entries;
238 table_size = ROUND_UP (table_size, DEF_PAGE_SIZE);
239 s->table = my_qemu_blockalign (s->fvd_metadata, (size_t) table_size);
241 if (bdrv_pread (s->fvd_metadata, header->table_offset, s->table, table_size)
242 != table_size) {
243 fprintf (stderr, "Failed to read the table of %s\n", filename);
244 return -1;
247 return 0;
250 static int init_compact_image (BDRVFvdState * s, FvdHeader * header,
251 const char *const filename)
253 if (!header->compact_image) {
254 s->data_region_prepared = FALSE;
255 return 0;
258 /* Scan the table to find the max allocated chunk. */
259 int i;
260 uint32_t max_chunk = 0;
261 int empty_disk = TRUE;
262 int table_entries =
263 (int) (ROUND_UP (header->virtual_disk_size, header->chunk_size) /
264 header->chunk_size);
265 for (i = 0; i < table_entries; i++) {
266 if (!IS_EMPTY (s->table[i])) {
267 empty_disk = FALSE;
268 uint32_t id = READ_TABLE (s->table[i]);
269 if (id > max_chunk) {
270 max_chunk = id;
274 if (!empty_disk) {
275 max_chunk++;
277 s->used_storage = max_chunk * s->chunk_size;
278 s->storage_grow_unit = header->storage_grow_unit / 512;
280 /* Check if the image is directly stored on a raw device, including
281 * logical volume. If so, figure out the size of the device. */
282 struct stat stat_buf;
283 if (stat (filename, &stat_buf) != 0) {
284 fprintf (stderr, "Failed to stat() %s\n", filename);
285 return -1;
288 /* Check how much storage space is already allocated. */
289 int64_t size = bdrv_getlength (s->fvd_data);
290 if (size < 0) {
291 fprintf (stderr, "Failed in bdrv_getlength(%s)\n", filename);
292 return -1;
294 const int64_t min_size = (s->data_offset + s->used_storage) * 512;
295 if (size < min_size) {
296 fprintf (stderr, "The size of device %s is not even big enough to "
297 "store already allocated data.\n",
298 filename);
299 return -1;
302 if (S_ISBLK (stat_buf.st_mode) || S_ISCHR (stat_buf.st_mode)) {
303 /* Initialize the command to grow storage space. */
304 char cmd[2048];
305 if (header->add_storage_cmd[0] == 0) {
306 s->add_storage_cmd = NULL;
307 } else {
308 if (strcmp (header->add_storage_cmd, "builtin:lvextend") == 0) {
309 /* Note the following:
310 * 1. lvextend may generate warning messages like "File
311 * descriptor...leaked...", * which is fine. See the
312 * following from LVM manual: "On invocation, lvm requires
313 * that only the standard file descriptors stdin,
314 * stdout * and stderr are available. If others are
315 * found, they get closed and messages are issued warning
316 * about the leak."
317 * 2. Instead of using the lvextend command line, one
318 * option is to use liblvm directly, which avoids creating
319 * a process to resize a LV.
320 * 3. On Ubuntu, /bin/sh is linked to /bin/dash, which
321 * does not support ">&" for stdout and stderr
322 * redirection. */
323 snprintf (cmd, sizeof (cmd) - 1, "/sbin/lvextend -L+%" PRId64
324 "B %s >/dev/null 2>/dev/null",
325 header->storage_grow_unit,
326 header->data_file[0] ? header->data_file : filename);
327 } else {
328 snprintf (cmd, sizeof (cmd) - 1, "%s %" PRId64
329 " %s >/dev/null 2>/dev/null",
330 header->add_storage_cmd, header->storage_grow_unit,
331 header->data_file[0] ? header->data_file : filename);
334 int len = strlen (cmd);
335 s->add_storage_cmd = my_qemu_malloc (len + 1);
336 memcpy (s->add_storage_cmd, cmd, len + 1);
340 s->data_storage = size / 512 - s->data_offset;
341 s->fvd_data->growable = TRUE;
342 s->data_region_prepared = TRUE;
344 return 0;
347 static int init_data_file (BDRVFvdState * s, FvdHeader * header, int flags)
349 Error *local_err = NULL;
350 int ret;
352 if (header->data_file[0]) {
353 /* Open a separate data file. */
354 s->data_offset = 0;
355 s->fvd_data = bdrv_new ("");
356 if (!s->fvd_data) {
357 fprintf (stderr, "Failed to create a new block device driver.\n");
358 return -1;
361 if (header->data_file_fmt[0] == 0) {
362 ret = bdrv_open(s->fvd_data, header->data_file, NULL, flags, NULL,
363 &local_err);
364 } else {
365 BlockDriver *data_drv = bdrv_find_format (header->data_file_fmt);
366 if (!data_drv) {
367 fprintf (stderr, "Failed to find driver for image format "
368 "'%s' of data file %s\n",
369 header->data_file_fmt, header->data_file);
370 return -1;
372 ret = bdrv_open(s->fvd_data, header->data_file,
373 NULL, flags, data_drv, &local_err);
375 if (ret != 0) {
376 qerror_report_err(local_err);
377 error_free(local_err);
378 return -1;
380 } else {
381 s->data_offset = header->metadata_size / 512; /* In sectors. */
382 s->fvd_data = s->fvd_metadata;
385 if (header->need_zero_init && !bdrv_has_zero_init (s->fvd_data)) {
386 if (in_qemu_tool) {
387 /* Only give a warning to allow 'qemu-img update' to modify
388 * need_zero_init if the user manually zero-init the device. */
389 fprintf (stderr, "Warning: image needs zero_init but it is not "
390 "supported by the storage media.\n");
391 } else {
392 fprintf (stderr, "Error: image needs zero_init but it is not "
393 "supported by the storage media.\n");
394 return -EINVAL;
398 return 0;
401 static int init_bitmap (BlockDriverState * bs, BDRVFvdState * s,
402 FvdHeader * header, const char *const filename)
404 if (header->all_data_in_fvd_img) {
405 /* This also covers the case of no base image. */
406 s->prefetch_state = PREFETCH_STATE_FINISHED;
407 s->copy_on_read = FALSE;
408 s->prefetch_start_delay = -1;
410 if (bs->backing_file[0] != 0) {
411 /* No need to use the base image. It may operate without problem
412 * even if the base image is no longer accessible. */
413 bs->backing_file[0] = 0;
415 } else {
416 ASSERT (header->base_img[0] != 0);
417 pstrcpy (bs->backing_file, 1024, header->base_img);
418 const int flags = O_RDONLY | O_BINARY | O_LARGEFILE;
419 int test_backing_fd = open (bs->backing_file, flags);
420 if (test_backing_fd < 0) {
421 fprintf (stderr, "Failed to open the base image %s for read.\n",
422 bs->backing_file);
423 return -1;
425 close (test_backing_fd);
427 /* This will be enabled in init_prefetch() after a timer expires. */
428 s->prefetch_state = PREFETCH_STATE_DISABLED;
430 s->stale_bitmap = my_qemu_blockalign (s->fvd_metadata,
431 (size_t) s->bitmap_size);
432 if (bdrv_pread (s->fvd_metadata, header->bitmap_offset,
433 s->stale_bitmap, s->bitmap_size) != s->bitmap_size) {
434 fprintf (stderr, "Failed to the bitmap of %s.\n", filename);
435 return -1;
438 if (s->copy_on_read || (s->prefetch_state != PREFETCH_STATE_FINISHED &&
439 s->prefetch_start_delay > 0)) {
440 /* Use two bitmaps only if copy_on_read or prefetching is enabled.
441 * See Section 3.3.4 of the FVD-cow paper. */
442 s->fresh_bitmap = my_qemu_blockalign (s->fvd_metadata,
443 s->bitmap_size);
444 memcpy (s->fresh_bitmap, s->stale_bitmap, s->bitmap_size);
445 } else {
446 s->fresh_bitmap = s->stale_bitmap;
450 return 0;
453 static void init_prefetch_timer (BlockDriverState * bs, BDRVFvdState * s)
455 #ifndef SIMULATED_TEST_WITH_QEMU_IO
456 if (in_qemu_tool) {
457 return;
459 #endif
461 if (s->prefetch_state == PREFETCH_STATE_FINISHED ||
462 s->prefetch_start_delay <= 0) {
463 return;
466 /* Start prefetching after a delay. Times 1000 to convert sec to ms. */
467 int64_t expire = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->prefetch_start_delay * 1000;
468 s->prefetch_timer = timer_new_ns(QEMU_CLOCK_REALTIME, fvd_init_prefetch, bs);
469 timer_mod(s->prefetch_timer, expire);