block: check bdrv_in_use() before blockdev operations
[qemu-kvm.git] / block.c
blob43f648437d148eb50b4c75d37c38145dca8a6a0d
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qjson.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
35 #ifdef CONFIG_BSD
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
40 #ifndef __DragonFly__
41 #include <sys/disk.h>
42 #endif
43 #endif
45 #ifdef _WIN32
46 #include <windows.h>
47 #endif
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
51 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
52 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
54 BlockDriverCompletionFunc *cb, void *opaque);
55 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
57 BlockDriverCompletionFunc *cb, void *opaque);
58 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
64 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
66 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
68 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
74 bool is_write);
75 static void coroutine_fn bdrv_co_do_rw(void *opaque);
77 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
84 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
87 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
90 /* The device to use for VM snapshots */
91 static BlockDriverState *bs_snapshots;
93 /* If non-zero, use only whitelisted block drivers */
94 static int use_bdrv_whitelist;
96 #ifdef _WIN32
97 static int is_windows_drive_prefix(const char *filename)
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
104 int is_windows_drive(const char *filename)
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
114 #endif
116 /* throttling disk I/O limits */
117 void bdrv_io_limits_disable(BlockDriverState *bs)
119 bs->io_limits_enabled = false;
121 while (qemu_co_queue_next(&bs->throttled_reqs));
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
135 static void bdrv_block_timer(void *opaque)
137 BlockDriverState *bs = opaque;
139 qemu_co_queue_next(&bs->throttled_reqs);
142 void bdrv_io_limits_enable(BlockDriverState *bs)
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
153 bool bdrv_io_limits_enabled(BlockDriverState *bs)
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
164 static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
167 int64_t wait_time = -1;
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
186 qemu_co_queue_next(&bs->throttled_reqs);
189 /* check if the path starts with "<protocol>:" */
190 static int path_has_protocol(const char *path)
192 #ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
197 #endif
199 return strchr(path, ':') != NULL;
202 int path_is_absolute(const char *path)
204 const char *p;
205 #ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209 #endif
210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
215 #ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217 #else
218 return (*p == '/');
219 #endif
222 /* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225 void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
229 const char *p, *p1;
230 int len;
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
242 p1 = strrchr(base_path, '/');
243 #ifdef _WIN32
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
250 #endif
251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
266 void bdrv_register(BlockDriver *bdrv)
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
286 /* create a new block device (by default it is empty) */
287 BlockDriverState *bdrv_new(const char *device_name)
289 BlockDriverState *bs;
291 bs = g_malloc0(sizeof(BlockDriverState));
292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
293 if (device_name[0] != '\0') {
294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
296 bdrv_iostatus_disable(bs);
297 return bs;
300 BlockDriver *bdrv_find_format(const char *format_name)
302 BlockDriver *drv1;
303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
305 return drv1;
308 return NULL;
311 static int bdrv_is_whitelisted(BlockDriver *drv)
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
316 const char **p;
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
326 return 0;
329 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
335 int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
341 return drv->bdrv_create(filename, options);
344 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
346 BlockDriver *drv;
348 drv = bdrv_find_protocol(filename);
349 if (drv == NULL) {
350 return -ENOENT;
353 return bdrv_create(drv, filename, options);
356 #ifdef _WIN32
357 void get_tmp_filename(char *filename, int size)
359 char temp_dir[MAX_PATH];
361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
364 #else
365 void get_tmp_filename(char *filename, int size)
367 int fd;
368 const char *tmpdir;
369 /* XXX: race condition possible */
370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
374 fd = mkstemp(filename);
375 close(fd);
377 #endif
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
383 static BlockDriver *find_hdev_driver(const char *filename)
385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
388 QLIST_FOREACH(d, &bdrv_drivers, list) {
389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
398 return drv;
401 BlockDriver *bdrv_find_protocol(const char *filename)
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
419 return drv1;
422 if (!path_has_protocol(filename)) {
423 return bdrv_find_format("file");
425 p = strchr(filename, ':');
426 assert(p != NULL);
427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
438 return NULL;
441 static int find_image_format(const char *filename, BlockDriver **pdrv)
443 int ret, score, score_max;
444 BlockDriver *drv1, *drv;
445 uint8_t buf[2048];
446 BlockDriverState *bs;
448 ret = bdrv_file_open(&bs, filename, 0);
449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
456 bdrv_delete(bs);
457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
461 *pdrv = drv;
462 return ret;
465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
468 *pdrv = NULL;
469 return ret;
472 score_max = 0;
473 drv = NULL;
474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
483 if (!drv) {
484 ret = -ENOENT;
486 *pdrv = drv;
487 return ret;
491 * Set the current 'total_sectors' value
493 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
495 BlockDriver *drv = bs->drv;
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
507 hint = length >> BDRV_SECTOR_BITS;
510 bs->total_sectors = hint;
511 return 0;
515 * Set open flags for a given cache mode
517 * Return 0 on success, -1 if the cache mode was invalid.
519 int bdrv_parse_cache_flags(const char *mode, int *flags)
521 *flags &= ~BDRV_O_CACHE_MASK;
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
538 return 0;
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
546 void bdrv_enable_copy_on_read(BlockDriverState *bs)
548 bs->copy_on_read++;
551 void bdrv_disable_copy_on_read(BlockDriverState *bs)
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
558 * Common part for opening disk images and files
560 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
563 int ret, open_flags;
565 assert(drv != NULL);
567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
569 bs->file = NULL;
570 bs->total_sectors = 0;
571 bs->encrypted = 0;
572 bs->valid_key = 0;
573 bs->sg = 0;
574 bs->open_flags = flags;
575 bs->growable = 0;
576 bs->buffer_alignment = 512;
578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
584 bs->backing_file[0] = '\0';
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
590 bs->drv = drv;
591 bs->opaque = g_malloc0(drv->instance_size);
593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
596 * Clear flags that are internal to the block layer before opening the
597 * image.
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
602 * Snapshots should be writable.
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
620 if (ret < 0) {
621 goto free_and_fail;
624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
629 #ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
633 #endif
634 return 0;
636 free_and_fail:
637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
641 g_free(bs->opaque);
642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
648 * Opens a file using a protocol (file, host_device, nbd, ...)
650 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
652 BlockDriverState *bs;
653 BlockDriver *drv;
654 int ret;
656 drv = bdrv_find_protocol(filename);
657 if (!drv) {
658 return -ENOENT;
661 bs = bdrv_new("");
662 ret = bdrv_open_common(bs, filename, flags, drv);
663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
667 bs->growable = 1;
668 *pbs = bs;
669 return 0;
673 * Opens a disk image (raw, qcow2, vmdk, ...)
675 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
678 int ret;
679 char tmp_filename[PATH_MAX];
681 if (flags & BDRV_O_SNAPSHOT) {
682 BlockDriverState *bs1;
683 int64_t total_size;
684 int is_protocol = 0;
685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
687 char backing_filename[PATH_MAX];
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
694 ret = bdrv_open(bs1, filename, 0, drv);
695 if (ret < 0) {
696 bdrv_delete(bs1);
697 return ret;
699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
704 bdrv_delete(bs1);
706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
712 else if (!realpath(filename, backing_filename))
713 return -errno;
715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
726 free_option_parameters(options);
727 if (ret < 0) {
728 return ret;
731 filename = tmp_filename;
732 drv = bdrv_qcow2;
733 bs->is_temporary = 1;
736 /* Find the right image format driver */
737 if (!drv) {
738 ret = find_image_format(filename, &drv);
741 if (!drv) {
742 goto unlink_and_fail;
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
748 goto unlink_and_fail;
751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
757 bs->backing_hd = bdrv_new("");
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
767 if (bs->backing_format[0] != '\0') {
768 back_drv = bdrv_find_format(bs->backing_format);
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
788 if (!bdrv_key_required(bs)) {
789 bdrv_dev_change_media_cb(bs, true);
792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
797 return 0;
799 unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
803 return ret;
806 void bdrv_close(BlockDriverState *bs)
808 if (bs->drv) {
809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
812 if (bs->backing_hd) {
813 bdrv_delete(bs->backing_hd);
814 bs->backing_hd = NULL;
816 bs->drv->bdrv_close(bs);
817 g_free(bs->opaque);
818 #ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
822 #endif
823 bs->opaque = NULL;
824 bs->drv = NULL;
825 bs->copy_on_read = 0;
827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
831 bdrv_dev_change_media_cb(bs, false);
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
840 void bdrv_close_all(void)
842 BlockDriverState *bs;
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
850 * Wait for pending requests to complete across all BlockDriverStates
852 * This function does not flush data to disk, use bdrv_flush_all() for that
853 * after calling this function.
855 void bdrv_drain_all(void)
857 BlockDriverState *bs;
859 qemu_aio_flush();
861 /* If requests are still pending there is a bug somewhere */
862 QTAILQ_FOREACH(bs, &bdrv_states, list) {
863 assert(QLIST_EMPTY(&bs->tracked_requests));
864 assert(qemu_co_queue_empty(&bs->throttled_reqs));
868 /* make a BlockDriverState anonymous by removing from bdrv_state list.
869 Also, NULL terminate the device_name to prevent double remove */
870 void bdrv_make_anon(BlockDriverState *bs)
872 if (bs->device_name[0] != '\0') {
873 QTAILQ_REMOVE(&bdrv_states, bs, list);
875 bs->device_name[0] = '\0';
878 void bdrv_delete(BlockDriverState *bs)
880 assert(!bs->dev);
882 /* remove from list, if necessary */
883 bdrv_make_anon(bs);
885 bdrv_close(bs);
886 if (bs->file != NULL) {
887 bdrv_delete(bs->file);
890 assert(bs != bs_snapshots);
891 g_free(bs);
894 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
895 /* TODO change to DeviceState *dev when all users are qdevified */
897 if (bs->dev) {
898 return -EBUSY;
900 bs->dev = dev;
901 bdrv_iostatus_reset(bs);
902 return 0;
905 /* TODO qdevified devices don't use this, remove when devices are qdevified */
906 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
908 if (bdrv_attach_dev(bs, dev) < 0) {
909 abort();
913 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
914 /* TODO change to DeviceState *dev when all users are qdevified */
916 assert(bs->dev == dev);
917 bs->dev = NULL;
918 bs->dev_ops = NULL;
919 bs->dev_opaque = NULL;
920 bs->buffer_alignment = 512;
923 /* TODO change to return DeviceState * when all users are qdevified */
924 void *bdrv_get_attached_dev(BlockDriverState *bs)
926 return bs->dev;
929 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
930 void *opaque)
932 bs->dev_ops = ops;
933 bs->dev_opaque = opaque;
934 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
935 bs_snapshots = NULL;
939 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
941 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
942 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
946 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
948 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
951 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
953 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
954 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
958 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
960 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
961 return bs->dev_ops->is_tray_open(bs->dev_opaque);
963 return false;
966 static void bdrv_dev_resize_cb(BlockDriverState *bs)
968 if (bs->dev_ops && bs->dev_ops->resize_cb) {
969 bs->dev_ops->resize_cb(bs->dev_opaque);
973 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
975 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
976 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
978 return false;
982 * Run consistency checks on an image
984 * Returns 0 if the check could be completed (it doesn't mean that the image is
985 * free of errors) or -errno when an internal error occurred. The results of the
986 * check are stored in res.
988 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
990 if (bs->drv->bdrv_check == NULL) {
991 return -ENOTSUP;
994 memset(res, 0, sizeof(*res));
995 return bs->drv->bdrv_check(bs, res);
998 #define COMMIT_BUF_SECTORS 2048
1000 /* commit COW file into the raw image */
1001 int bdrv_commit(BlockDriverState *bs)
1003 BlockDriver *drv = bs->drv;
1004 BlockDriver *backing_drv;
1005 int64_t sector, total_sectors;
1006 int n, ro, open_flags;
1007 int ret = 0, rw_ret = 0;
1008 uint8_t *buf;
1009 char filename[1024];
1010 BlockDriverState *bs_rw, *bs_ro;
1012 if (!drv)
1013 return -ENOMEDIUM;
1015 if (!bs->backing_hd) {
1016 return -ENOTSUP;
1019 if (bs->backing_hd->keep_read_only) {
1020 return -EACCES;
1023 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1024 return -EBUSY;
1027 backing_drv = bs->backing_hd->drv;
1028 ro = bs->backing_hd->read_only;
1029 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1030 open_flags = bs->backing_hd->open_flags;
1032 if (ro) {
1033 /* re-open as RW */
1034 bdrv_delete(bs->backing_hd);
1035 bs->backing_hd = NULL;
1036 bs_rw = bdrv_new("");
1037 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1038 backing_drv);
1039 if (rw_ret < 0) {
1040 bdrv_delete(bs_rw);
1041 /* try to re-open read-only */
1042 bs_ro = bdrv_new("");
1043 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1044 backing_drv);
1045 if (ret < 0) {
1046 bdrv_delete(bs_ro);
1047 /* drive not functional anymore */
1048 bs->drv = NULL;
1049 return ret;
1051 bs->backing_hd = bs_ro;
1052 return rw_ret;
1054 bs->backing_hd = bs_rw;
1057 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
1058 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
1060 for (sector = 0; sector < total_sectors; sector += n) {
1061 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
1063 if (bdrv_read(bs, sector, buf, n) != 0) {
1064 ret = -EIO;
1065 goto ro_cleanup;
1068 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1069 ret = -EIO;
1070 goto ro_cleanup;
1075 if (drv->bdrv_make_empty) {
1076 ret = drv->bdrv_make_empty(bs);
1077 bdrv_flush(bs);
1081 * Make sure all data we wrote to the backing device is actually
1082 * stable on disk.
1084 if (bs->backing_hd)
1085 bdrv_flush(bs->backing_hd);
1087 ro_cleanup:
1088 g_free(buf);
1090 if (ro) {
1091 /* re-open as RO */
1092 bdrv_delete(bs->backing_hd);
1093 bs->backing_hd = NULL;
1094 bs_ro = bdrv_new("");
1095 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1096 backing_drv);
1097 if (ret < 0) {
1098 bdrv_delete(bs_ro);
1099 /* drive not functional anymore */
1100 bs->drv = NULL;
1101 return ret;
1103 bs->backing_hd = bs_ro;
1104 bs->backing_hd->keep_read_only = 0;
1107 return ret;
1110 void bdrv_commit_all(void)
1112 BlockDriverState *bs;
1114 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1115 bdrv_commit(bs);
1119 struct BdrvTrackedRequest {
1120 BlockDriverState *bs;
1121 int64_t sector_num;
1122 int nb_sectors;
1123 bool is_write;
1124 QLIST_ENTRY(BdrvTrackedRequest) list;
1125 Coroutine *co; /* owner, used for deadlock detection */
1126 CoQueue wait_queue; /* coroutines blocked on this request */
1130 * Remove an active request from the tracked requests list
1132 * This function should be called when a tracked request is completing.
1134 static void tracked_request_end(BdrvTrackedRequest *req)
1136 QLIST_REMOVE(req, list);
1137 qemu_co_queue_restart_all(&req->wait_queue);
1141 * Add an active request to the tracked requests list
1143 static void tracked_request_begin(BdrvTrackedRequest *req,
1144 BlockDriverState *bs,
1145 int64_t sector_num,
1146 int nb_sectors, bool is_write)
1148 *req = (BdrvTrackedRequest){
1149 .bs = bs,
1150 .sector_num = sector_num,
1151 .nb_sectors = nb_sectors,
1152 .is_write = is_write,
1153 .co = qemu_coroutine_self(),
1156 qemu_co_queue_init(&req->wait_queue);
1158 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1162 * Round a region to cluster boundaries
1164 static void round_to_clusters(BlockDriverState *bs,
1165 int64_t sector_num, int nb_sectors,
1166 int64_t *cluster_sector_num,
1167 int *cluster_nb_sectors)
1169 BlockDriverInfo bdi;
1171 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1172 *cluster_sector_num = sector_num;
1173 *cluster_nb_sectors = nb_sectors;
1174 } else {
1175 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1176 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1177 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1178 nb_sectors, c);
1182 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1183 int64_t sector_num, int nb_sectors) {
1184 /* aaaa bbbb */
1185 if (sector_num >= req->sector_num + req->nb_sectors) {
1186 return false;
1188 /* bbbb aaaa */
1189 if (req->sector_num >= sector_num + nb_sectors) {
1190 return false;
1192 return true;
1195 static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1196 int64_t sector_num, int nb_sectors)
1198 BdrvTrackedRequest *req;
1199 int64_t cluster_sector_num;
1200 int cluster_nb_sectors;
1201 bool retry;
1203 /* If we touch the same cluster it counts as an overlap. This guarantees
1204 * that allocating writes will be serialized and not race with each other
1205 * for the same cluster. For example, in copy-on-read it ensures that the
1206 * CoR read and write operations are atomic and guest writes cannot
1207 * interleave between them.
1209 round_to_clusters(bs, sector_num, nb_sectors,
1210 &cluster_sector_num, &cluster_nb_sectors);
1212 do {
1213 retry = false;
1214 QLIST_FOREACH(req, &bs->tracked_requests, list) {
1215 if (tracked_request_overlaps(req, cluster_sector_num,
1216 cluster_nb_sectors)) {
1217 /* Hitting this means there was a reentrant request, for
1218 * example, a block driver issuing nested requests. This must
1219 * never happen since it means deadlock.
1221 assert(qemu_coroutine_self() != req->co);
1223 qemu_co_queue_wait(&req->wait_queue);
1224 retry = true;
1225 break;
1228 } while (retry);
1232 * Return values:
1233 * 0 - success
1234 * -EINVAL - backing format specified, but no file
1235 * -ENOSPC - can't update the backing file because no space is left in the
1236 * image file header
1237 * -ENOTSUP - format driver doesn't support changing the backing file
1239 int bdrv_change_backing_file(BlockDriverState *bs,
1240 const char *backing_file, const char *backing_fmt)
1242 BlockDriver *drv = bs->drv;
1244 if (drv->bdrv_change_backing_file != NULL) {
1245 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1246 } else {
1247 return -ENOTSUP;
1251 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1252 size_t size)
1254 int64_t len;
1256 if (!bdrv_is_inserted(bs))
1257 return -ENOMEDIUM;
1259 if (bs->growable)
1260 return 0;
1262 len = bdrv_getlength(bs);
1264 if (offset < 0)
1265 return -EIO;
1267 if ((offset > len) || (len - offset < size))
1268 return -EIO;
1270 return 0;
1273 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors)
1276 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1277 nb_sectors * BDRV_SECTOR_SIZE);
1280 typedef struct RwCo {
1281 BlockDriverState *bs;
1282 int64_t sector_num;
1283 int nb_sectors;
1284 QEMUIOVector *qiov;
1285 bool is_write;
1286 int ret;
1287 } RwCo;
1289 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1291 RwCo *rwco = opaque;
1293 if (!rwco->is_write) {
1294 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1295 rwco->nb_sectors, rwco->qiov);
1296 } else {
1297 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1298 rwco->nb_sectors, rwco->qiov);
1303 * Process a synchronous request using coroutines
1305 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1306 int nb_sectors, bool is_write)
1308 QEMUIOVector qiov;
1309 struct iovec iov = {
1310 .iov_base = (void *)buf,
1311 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1313 Coroutine *co;
1314 RwCo rwco = {
1315 .bs = bs,
1316 .sector_num = sector_num,
1317 .nb_sectors = nb_sectors,
1318 .qiov = &qiov,
1319 .is_write = is_write,
1320 .ret = NOT_DONE,
1323 qemu_iovec_init_external(&qiov, &iov, 1);
1325 if (qemu_in_coroutine()) {
1326 /* Fast-path if already in coroutine context */
1327 bdrv_rw_co_entry(&rwco);
1328 } else {
1329 co = qemu_coroutine_create(bdrv_rw_co_entry);
1330 qemu_coroutine_enter(co, &rwco);
1331 while (rwco.ret == NOT_DONE) {
1332 qemu_aio_wait();
1335 return rwco.ret;
1338 /* return < 0 if error. See bdrv_write() for the return codes */
1339 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1340 uint8_t *buf, int nb_sectors)
1342 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1345 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1346 int nb_sectors, int dirty)
1348 int64_t start, end;
1349 unsigned long val, idx, bit;
1351 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1352 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1354 for (; start <= end; start++) {
1355 idx = start / (sizeof(unsigned long) * 8);
1356 bit = start % (sizeof(unsigned long) * 8);
1357 val = bs->dirty_bitmap[idx];
1358 if (dirty) {
1359 if (!(val & (1UL << bit))) {
1360 bs->dirty_count++;
1361 val |= 1UL << bit;
1363 } else {
1364 if (val & (1UL << bit)) {
1365 bs->dirty_count--;
1366 val &= ~(1UL << bit);
1369 bs->dirty_bitmap[idx] = val;
1373 /* Return < 0 if error. Important errors are:
1374 -EIO generic I/O error (may happen for all errors)
1375 -ENOMEDIUM No media inserted.
1376 -EINVAL Invalid sector number or nb_sectors
1377 -EACCES Trying to write a read-only device
1379 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1380 const uint8_t *buf, int nb_sectors)
1382 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1385 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1386 void *buf, int count1)
1388 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1389 int len, nb_sectors, count;
1390 int64_t sector_num;
1391 int ret;
1393 count = count1;
1394 /* first read to align to sector start */
1395 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1396 if (len > count)
1397 len = count;
1398 sector_num = offset >> BDRV_SECTOR_BITS;
1399 if (len > 0) {
1400 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1401 return ret;
1402 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1403 count -= len;
1404 if (count == 0)
1405 return count1;
1406 sector_num++;
1407 buf += len;
1410 /* read the sectors "in place" */
1411 nb_sectors = count >> BDRV_SECTOR_BITS;
1412 if (nb_sectors > 0) {
1413 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1414 return ret;
1415 sector_num += nb_sectors;
1416 len = nb_sectors << BDRV_SECTOR_BITS;
1417 buf += len;
1418 count -= len;
1421 /* add data from the last sector */
1422 if (count > 0) {
1423 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1424 return ret;
1425 memcpy(buf, tmp_buf, count);
1427 return count1;
1430 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1431 const void *buf, int count1)
1433 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1434 int len, nb_sectors, count;
1435 int64_t sector_num;
1436 int ret;
1438 count = count1;
1439 /* first write to align to sector start */
1440 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1441 if (len > count)
1442 len = count;
1443 sector_num = offset >> BDRV_SECTOR_BITS;
1444 if (len > 0) {
1445 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1446 return ret;
1447 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1448 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1449 return ret;
1450 count -= len;
1451 if (count == 0)
1452 return count1;
1453 sector_num++;
1454 buf += len;
1457 /* write the sectors "in place" */
1458 nb_sectors = count >> BDRV_SECTOR_BITS;
1459 if (nb_sectors > 0) {
1460 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1461 return ret;
1462 sector_num += nb_sectors;
1463 len = nb_sectors << BDRV_SECTOR_BITS;
1464 buf += len;
1465 count -= len;
1468 /* add data from the last sector */
1469 if (count > 0) {
1470 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1471 return ret;
1472 memcpy(tmp_buf, buf, count);
1473 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1474 return ret;
1476 return count1;
1480 * Writes to the file and ensures that no writes are reordered across this
1481 * request (acts as a barrier)
1483 * Returns 0 on success, -errno in error cases.
1485 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1486 const void *buf, int count)
1488 int ret;
1490 ret = bdrv_pwrite(bs, offset, buf, count);
1491 if (ret < 0) {
1492 return ret;
1495 /* No flush needed for cache modes that use O_DSYNC */
1496 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1497 bdrv_flush(bs);
1500 return 0;
1503 static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1504 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1506 /* Perform I/O through a temporary buffer so that users who scribble over
1507 * their read buffer while the operation is in progress do not end up
1508 * modifying the image file. This is critical for zero-copy guest I/O
1509 * where anything might happen inside guest memory.
1511 void *bounce_buffer;
1513 struct iovec iov;
1514 QEMUIOVector bounce_qiov;
1515 int64_t cluster_sector_num;
1516 int cluster_nb_sectors;
1517 size_t skip_bytes;
1518 int ret;
1520 /* Cover entire cluster so no additional backing file I/O is required when
1521 * allocating cluster in the image file.
1523 round_to_clusters(bs, sector_num, nb_sectors,
1524 &cluster_sector_num, &cluster_nb_sectors);
1526 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1527 cluster_sector_num, cluster_nb_sectors);
1529 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1530 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1531 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1533 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1534 &bounce_qiov);
1535 if (ret < 0) {
1536 goto err;
1539 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1540 &bounce_qiov);
1541 if (ret < 0) {
1542 /* It might be okay to ignore write errors for guest requests. If this
1543 * is a deliberate copy-on-read then we don't want to ignore the error.
1544 * Simply report it in all cases.
1546 goto err;
1549 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1550 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1551 nb_sectors * BDRV_SECTOR_SIZE);
1553 err:
1554 qemu_vfree(bounce_buffer);
1555 return ret;
1559 * Handle a read request in coroutine context
1561 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1562 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1564 BlockDriver *drv = bs->drv;
1565 BdrvTrackedRequest req;
1566 int ret;
1568 if (!drv) {
1569 return -ENOMEDIUM;
1571 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1572 return -EIO;
1575 /* throttling disk read I/O */
1576 if (bs->io_limits_enabled) {
1577 bdrv_io_limits_intercept(bs, false, nb_sectors);
1580 if (bs->copy_on_read) {
1581 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1584 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
1586 if (bs->copy_on_read) {
1587 int pnum;
1589 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1590 if (ret < 0) {
1591 goto out;
1594 if (!ret || pnum != nb_sectors) {
1595 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1596 goto out;
1600 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1602 out:
1603 tracked_request_end(&req);
1604 return ret;
1607 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1608 int nb_sectors, QEMUIOVector *qiov)
1610 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1612 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1616 * Handle a write request in coroutine context
1618 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1619 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1621 BlockDriver *drv = bs->drv;
1622 BdrvTrackedRequest req;
1623 int ret;
1625 if (!bs->drv) {
1626 return -ENOMEDIUM;
1628 if (bs->read_only) {
1629 return -EACCES;
1631 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1632 return -EIO;
1635 /* throttling disk write I/O */
1636 if (bs->io_limits_enabled) {
1637 bdrv_io_limits_intercept(bs, true, nb_sectors);
1640 if (bs->copy_on_read) {
1641 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1644 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1646 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1648 if (bs->dirty_bitmap) {
1649 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1652 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1653 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1656 tracked_request_end(&req);
1658 return ret;
1661 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1662 int nb_sectors, QEMUIOVector *qiov)
1664 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1666 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1670 * Truncate file to 'offset' bytes (needed only for file protocols)
1672 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1674 BlockDriver *drv = bs->drv;
1675 int ret;
1676 if (!drv)
1677 return -ENOMEDIUM;
1678 if (!drv->bdrv_truncate)
1679 return -ENOTSUP;
1680 if (bs->read_only)
1681 return -EACCES;
1682 if (bdrv_in_use(bs))
1683 return -EBUSY;
1684 ret = drv->bdrv_truncate(bs, offset);
1685 if (ret == 0) {
1686 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1687 bdrv_dev_resize_cb(bs);
1689 return ret;
1693 * Length of a allocated file in bytes. Sparse files are counted by actual
1694 * allocated space. Return < 0 if error or unknown.
1696 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1698 BlockDriver *drv = bs->drv;
1699 if (!drv) {
1700 return -ENOMEDIUM;
1702 if (drv->bdrv_get_allocated_file_size) {
1703 return drv->bdrv_get_allocated_file_size(bs);
1705 if (bs->file) {
1706 return bdrv_get_allocated_file_size(bs->file);
1708 return -ENOTSUP;
1712 * Length of a file in bytes. Return < 0 if error or unknown.
1714 int64_t bdrv_getlength(BlockDriverState *bs)
1716 BlockDriver *drv = bs->drv;
1717 if (!drv)
1718 return -ENOMEDIUM;
1720 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1721 if (drv->bdrv_getlength) {
1722 return drv->bdrv_getlength(bs);
1725 return bs->total_sectors * BDRV_SECTOR_SIZE;
1728 /* return 0 as number of sectors if no device present or error */
1729 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1731 int64_t length;
1732 length = bdrv_getlength(bs);
1733 if (length < 0)
1734 length = 0;
1735 else
1736 length = length >> BDRV_SECTOR_BITS;
1737 *nb_sectors_ptr = length;
1740 struct partition {
1741 uint8_t boot_ind; /* 0x80 - active */
1742 uint8_t head; /* starting head */
1743 uint8_t sector; /* starting sector */
1744 uint8_t cyl; /* starting cylinder */
1745 uint8_t sys_ind; /* What partition type */
1746 uint8_t end_head; /* end head */
1747 uint8_t end_sector; /* end sector */
1748 uint8_t end_cyl; /* end cylinder */
1749 uint32_t start_sect; /* starting sector counting from 0 */
1750 uint32_t nr_sects; /* nr of sectors in partition */
1751 } QEMU_PACKED;
1753 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1754 static int guess_disk_lchs(BlockDriverState *bs,
1755 int *pcylinders, int *pheads, int *psectors)
1757 uint8_t buf[BDRV_SECTOR_SIZE];
1758 int ret, i, heads, sectors, cylinders;
1759 struct partition *p;
1760 uint32_t nr_sects;
1761 uint64_t nb_sectors;
1763 bdrv_get_geometry(bs, &nb_sectors);
1765 ret = bdrv_read(bs, 0, buf, 1);
1766 if (ret < 0)
1767 return -1;
1768 /* test msdos magic */
1769 if (buf[510] != 0x55 || buf[511] != 0xaa)
1770 return -1;
1771 for(i = 0; i < 4; i++) {
1772 p = ((struct partition *)(buf + 0x1be)) + i;
1773 nr_sects = le32_to_cpu(p->nr_sects);
1774 if (nr_sects && p->end_head) {
1775 /* We make the assumption that the partition terminates on
1776 a cylinder boundary */
1777 heads = p->end_head + 1;
1778 sectors = p->end_sector & 63;
1779 if (sectors == 0)
1780 continue;
1781 cylinders = nb_sectors / (heads * sectors);
1782 if (cylinders < 1 || cylinders > 16383)
1783 continue;
1784 *pheads = heads;
1785 *psectors = sectors;
1786 *pcylinders = cylinders;
1787 #if 0
1788 printf("guessed geometry: LCHS=%d %d %d\n",
1789 cylinders, heads, sectors);
1790 #endif
1791 return 0;
1794 return -1;
1797 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1799 int translation, lba_detected = 0;
1800 int cylinders, heads, secs;
1801 uint64_t nb_sectors;
1803 /* if a geometry hint is available, use it */
1804 bdrv_get_geometry(bs, &nb_sectors);
1805 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1806 translation = bdrv_get_translation_hint(bs);
1807 if (cylinders != 0) {
1808 *pcyls = cylinders;
1809 *pheads = heads;
1810 *psecs = secs;
1811 } else {
1812 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1813 if (heads > 16) {
1814 /* if heads > 16, it means that a BIOS LBA
1815 translation was active, so the default
1816 hardware geometry is OK */
1817 lba_detected = 1;
1818 goto default_geometry;
1819 } else {
1820 *pcyls = cylinders;
1821 *pheads = heads;
1822 *psecs = secs;
1823 /* disable any translation to be in sync with
1824 the logical geometry */
1825 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1826 bdrv_set_translation_hint(bs,
1827 BIOS_ATA_TRANSLATION_NONE);
1830 } else {
1831 default_geometry:
1832 /* if no geometry, use a standard physical disk geometry */
1833 cylinders = nb_sectors / (16 * 63);
1835 if (cylinders > 16383)
1836 cylinders = 16383;
1837 else if (cylinders < 2)
1838 cylinders = 2;
1839 *pcyls = cylinders;
1840 *pheads = 16;
1841 *psecs = 63;
1842 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1843 if ((*pcyls * *pheads) <= 131072) {
1844 bdrv_set_translation_hint(bs,
1845 BIOS_ATA_TRANSLATION_LARGE);
1846 } else {
1847 bdrv_set_translation_hint(bs,
1848 BIOS_ATA_TRANSLATION_LBA);
1852 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1856 void bdrv_set_geometry_hint(BlockDriverState *bs,
1857 int cyls, int heads, int secs)
1859 bs->cyls = cyls;
1860 bs->heads = heads;
1861 bs->secs = secs;
1864 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1866 bs->translation = translation;
1869 void bdrv_get_geometry_hint(BlockDriverState *bs,
1870 int *pcyls, int *pheads, int *psecs)
1872 *pcyls = bs->cyls;
1873 *pheads = bs->heads;
1874 *psecs = bs->secs;
1877 /* throttling disk io limits */
1878 void bdrv_set_io_limits(BlockDriverState *bs,
1879 BlockIOLimit *io_limits)
1881 bs->io_limits = *io_limits;
1882 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1885 /* Recognize floppy formats */
1886 typedef struct FDFormat {
1887 FDriveType drive;
1888 uint8_t last_sect;
1889 uint8_t max_track;
1890 uint8_t max_head;
1891 } FDFormat;
1893 static const FDFormat fd_formats[] = {
1894 /* First entry is default format */
1895 /* 1.44 MB 3"1/2 floppy disks */
1896 { FDRIVE_DRV_144, 18, 80, 1, },
1897 { FDRIVE_DRV_144, 20, 80, 1, },
1898 { FDRIVE_DRV_144, 21, 80, 1, },
1899 { FDRIVE_DRV_144, 21, 82, 1, },
1900 { FDRIVE_DRV_144, 21, 83, 1, },
1901 { FDRIVE_DRV_144, 22, 80, 1, },
1902 { FDRIVE_DRV_144, 23, 80, 1, },
1903 { FDRIVE_DRV_144, 24, 80, 1, },
1904 /* 2.88 MB 3"1/2 floppy disks */
1905 { FDRIVE_DRV_288, 36, 80, 1, },
1906 { FDRIVE_DRV_288, 39, 80, 1, },
1907 { FDRIVE_DRV_288, 40, 80, 1, },
1908 { FDRIVE_DRV_288, 44, 80, 1, },
1909 { FDRIVE_DRV_288, 48, 80, 1, },
1910 /* 720 kB 3"1/2 floppy disks */
1911 { FDRIVE_DRV_144, 9, 80, 1, },
1912 { FDRIVE_DRV_144, 10, 80, 1, },
1913 { FDRIVE_DRV_144, 10, 82, 1, },
1914 { FDRIVE_DRV_144, 10, 83, 1, },
1915 { FDRIVE_DRV_144, 13, 80, 1, },
1916 { FDRIVE_DRV_144, 14, 80, 1, },
1917 /* 1.2 MB 5"1/4 floppy disks */
1918 { FDRIVE_DRV_120, 15, 80, 1, },
1919 { FDRIVE_DRV_120, 18, 80, 1, },
1920 { FDRIVE_DRV_120, 18, 82, 1, },
1921 { FDRIVE_DRV_120, 18, 83, 1, },
1922 { FDRIVE_DRV_120, 20, 80, 1, },
1923 /* 720 kB 5"1/4 floppy disks */
1924 { FDRIVE_DRV_120, 9, 80, 1, },
1925 { FDRIVE_DRV_120, 11, 80, 1, },
1926 /* 360 kB 5"1/4 floppy disks */
1927 { FDRIVE_DRV_120, 9, 40, 1, },
1928 { FDRIVE_DRV_120, 9, 40, 0, },
1929 { FDRIVE_DRV_120, 10, 41, 1, },
1930 { FDRIVE_DRV_120, 10, 42, 1, },
1931 /* 320 kB 5"1/4 floppy disks */
1932 { FDRIVE_DRV_120, 8, 40, 1, },
1933 { FDRIVE_DRV_120, 8, 40, 0, },
1934 /* 360 kB must match 5"1/4 better than 3"1/2... */
1935 { FDRIVE_DRV_144, 9, 80, 0, },
1936 /* end */
1937 { FDRIVE_DRV_NONE, -1, -1, 0, },
1940 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1941 int *max_track, int *last_sect,
1942 FDriveType drive_in, FDriveType *drive)
1944 const FDFormat *parse;
1945 uint64_t nb_sectors, size;
1946 int i, first_match, match;
1948 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1949 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1950 /* User defined disk */
1951 } else {
1952 bdrv_get_geometry(bs, &nb_sectors);
1953 match = -1;
1954 first_match = -1;
1955 for (i = 0; ; i++) {
1956 parse = &fd_formats[i];
1957 if (parse->drive == FDRIVE_DRV_NONE) {
1958 break;
1960 if (drive_in == parse->drive ||
1961 drive_in == FDRIVE_DRV_NONE) {
1962 size = (parse->max_head + 1) * parse->max_track *
1963 parse->last_sect;
1964 if (nb_sectors == size) {
1965 match = i;
1966 break;
1968 if (first_match == -1) {
1969 first_match = i;
1973 if (match == -1) {
1974 if (first_match == -1) {
1975 match = 1;
1976 } else {
1977 match = first_match;
1979 parse = &fd_formats[match];
1981 *nb_heads = parse->max_head + 1;
1982 *max_track = parse->max_track;
1983 *last_sect = parse->last_sect;
1984 *drive = parse->drive;
1988 int bdrv_get_translation_hint(BlockDriverState *bs)
1990 return bs->translation;
1993 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1994 BlockErrorAction on_write_error)
1996 bs->on_read_error = on_read_error;
1997 bs->on_write_error = on_write_error;
2000 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2002 return is_read ? bs->on_read_error : bs->on_write_error;
2005 int bdrv_is_read_only(BlockDriverState *bs)
2007 return bs->read_only;
2010 int bdrv_is_sg(BlockDriverState *bs)
2012 return bs->sg;
2015 int bdrv_enable_write_cache(BlockDriverState *bs)
2017 return bs->enable_write_cache;
2020 int bdrv_is_encrypted(BlockDriverState *bs)
2022 if (bs->backing_hd && bs->backing_hd->encrypted)
2023 return 1;
2024 return bs->encrypted;
2027 int bdrv_key_required(BlockDriverState *bs)
2029 BlockDriverState *backing_hd = bs->backing_hd;
2031 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2032 return 1;
2033 return (bs->encrypted && !bs->valid_key);
2036 int bdrv_set_key(BlockDriverState *bs, const char *key)
2038 int ret;
2039 if (bs->backing_hd && bs->backing_hd->encrypted) {
2040 ret = bdrv_set_key(bs->backing_hd, key);
2041 if (ret < 0)
2042 return ret;
2043 if (!bs->encrypted)
2044 return 0;
2046 if (!bs->encrypted) {
2047 return -EINVAL;
2048 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2049 return -ENOMEDIUM;
2051 ret = bs->drv->bdrv_set_key(bs, key);
2052 if (ret < 0) {
2053 bs->valid_key = 0;
2054 } else if (!bs->valid_key) {
2055 bs->valid_key = 1;
2056 /* call the change callback now, we skipped it on open */
2057 bdrv_dev_change_media_cb(bs, true);
2059 return ret;
2062 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2064 if (!bs->drv) {
2065 buf[0] = '\0';
2066 } else {
2067 pstrcpy(buf, buf_size, bs->drv->format_name);
2071 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2072 void *opaque)
2074 BlockDriver *drv;
2076 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2077 it(opaque, drv->format_name);
2081 BlockDriverState *bdrv_find(const char *name)
2083 BlockDriverState *bs;
2085 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2086 if (!strcmp(name, bs->device_name)) {
2087 return bs;
2090 return NULL;
2093 BlockDriverState *bdrv_next(BlockDriverState *bs)
2095 if (!bs) {
2096 return QTAILQ_FIRST(&bdrv_states);
2098 return QTAILQ_NEXT(bs, list);
2101 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
2103 BlockDriverState *bs;
2105 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2106 it(opaque, bs);
2110 const char *bdrv_get_device_name(BlockDriverState *bs)
2112 return bs->device_name;
2115 void bdrv_flush_all(void)
2117 BlockDriverState *bs;
2119 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2120 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
2121 bdrv_flush(bs);
2126 int bdrv_has_zero_init(BlockDriverState *bs)
2128 assert(bs->drv);
2130 if (bs->drv->bdrv_has_zero_init) {
2131 return bs->drv->bdrv_has_zero_init(bs);
2134 return 1;
2137 typedef struct BdrvCoIsAllocatedData {
2138 BlockDriverState *bs;
2139 int64_t sector_num;
2140 int nb_sectors;
2141 int *pnum;
2142 int ret;
2143 bool done;
2144 } BdrvCoIsAllocatedData;
2147 * Returns true iff the specified sector is present in the disk image. Drivers
2148 * not implementing the functionality are assumed to not support backing files,
2149 * hence all their sectors are reported as allocated.
2151 * If 'sector_num' is beyond the end of the disk image the return value is 0
2152 * and 'pnum' is set to 0.
2154 * 'pnum' is set to the number of sectors (including and immediately following
2155 * the specified sector) that are known to be in the same
2156 * allocated/unallocated state.
2158 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2159 * beyond the end of the disk image it will be clamped.
2161 int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2162 int nb_sectors, int *pnum)
2164 int64_t n;
2166 if (sector_num >= bs->total_sectors) {
2167 *pnum = 0;
2168 return 0;
2171 n = bs->total_sectors - sector_num;
2172 if (n < nb_sectors) {
2173 nb_sectors = n;
2176 if (!bs->drv->bdrv_co_is_allocated) {
2177 *pnum = nb_sectors;
2178 return 1;
2181 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2184 /* Coroutine wrapper for bdrv_is_allocated() */
2185 static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2187 BdrvCoIsAllocatedData *data = opaque;
2188 BlockDriverState *bs = data->bs;
2190 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2191 data->pnum);
2192 data->done = true;
2196 * Synchronous wrapper around bdrv_co_is_allocated().
2198 * See bdrv_co_is_allocated() for details.
2200 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2201 int *pnum)
2203 Coroutine *co;
2204 BdrvCoIsAllocatedData data = {
2205 .bs = bs,
2206 .sector_num = sector_num,
2207 .nb_sectors = nb_sectors,
2208 .pnum = pnum,
2209 .done = false,
2212 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2213 qemu_coroutine_enter(co, &data);
2214 while (!data.done) {
2215 qemu_aio_wait();
2217 return data.ret;
2220 void bdrv_mon_event(const BlockDriverState *bdrv,
2221 BlockMonEventAction action, int is_read)
2223 QObject *data;
2224 const char *action_str;
2226 switch (action) {
2227 case BDRV_ACTION_REPORT:
2228 action_str = "report";
2229 break;
2230 case BDRV_ACTION_IGNORE:
2231 action_str = "ignore";
2232 break;
2233 case BDRV_ACTION_STOP:
2234 action_str = "stop";
2235 break;
2236 default:
2237 abort();
2240 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2241 bdrv->device_name,
2242 action_str,
2243 is_read ? "read" : "write");
2244 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2246 qobject_decref(data);
2249 BlockInfoList *qmp_query_block(Error **errp)
2251 BlockInfoList *head = NULL, *cur_item = NULL;
2252 BlockDriverState *bs;
2254 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2255 BlockInfoList *info = g_malloc0(sizeof(*info));
2257 info->value = g_malloc0(sizeof(*info->value));
2258 info->value->device = g_strdup(bs->device_name);
2259 info->value->type = g_strdup("unknown");
2260 info->value->locked = bdrv_dev_is_medium_locked(bs);
2261 info->value->removable = bdrv_dev_has_removable_media(bs);
2263 if (bdrv_dev_has_removable_media(bs)) {
2264 info->value->has_tray_open = true;
2265 info->value->tray_open = bdrv_dev_is_tray_open(bs);
2268 if (bdrv_iostatus_is_enabled(bs)) {
2269 info->value->has_io_status = true;
2270 info->value->io_status = bs->iostatus;
2273 if (bs->drv) {
2274 info->value->has_inserted = true;
2275 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2276 info->value->inserted->file = g_strdup(bs->filename);
2277 info->value->inserted->ro = bs->read_only;
2278 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2279 info->value->inserted->encrypted = bs->encrypted;
2280 if (bs->backing_file[0]) {
2281 info->value->inserted->has_backing_file = true;
2282 info->value->inserted->backing_file = g_strdup(bs->backing_file);
2285 if (bs->io_limits_enabled) {
2286 info->value->inserted->bps =
2287 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2288 info->value->inserted->bps_rd =
2289 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2290 info->value->inserted->bps_wr =
2291 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2292 info->value->inserted->iops =
2293 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2294 info->value->inserted->iops_rd =
2295 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2296 info->value->inserted->iops_wr =
2297 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2301 /* XXX: waiting for the qapi to support GSList */
2302 if (!cur_item) {
2303 head = cur_item = info;
2304 } else {
2305 cur_item->next = info;
2306 cur_item = info;
2310 return head;
2313 /* Consider exposing this as a full fledged QMP command */
2314 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2316 BlockStats *s;
2318 s = g_malloc0(sizeof(*s));
2320 if (bs->device_name[0]) {
2321 s->has_device = true;
2322 s->device = g_strdup(bs->device_name);
2325 s->stats = g_malloc0(sizeof(*s->stats));
2326 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2327 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2328 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2329 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2330 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2331 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2332 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2333 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2334 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2336 if (bs->file) {
2337 s->has_parent = true;
2338 s->parent = qmp_query_blockstat(bs->file, NULL);
2341 return s;
2344 BlockStatsList *qmp_query_blockstats(Error **errp)
2346 BlockStatsList *head = NULL, *cur_item = NULL;
2347 BlockDriverState *bs;
2349 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2350 BlockStatsList *info = g_malloc0(sizeof(*info));
2351 info->value = qmp_query_blockstat(bs, NULL);
2353 /* XXX: waiting for the qapi to support GSList */
2354 if (!cur_item) {
2355 head = cur_item = info;
2356 } else {
2357 cur_item->next = info;
2358 cur_item = info;
2362 return head;
2365 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2367 if (bs->backing_hd && bs->backing_hd->encrypted)
2368 return bs->backing_file;
2369 else if (bs->encrypted)
2370 return bs->filename;
2371 else
2372 return NULL;
2375 void bdrv_get_backing_filename(BlockDriverState *bs,
2376 char *filename, int filename_size)
2378 pstrcpy(filename, filename_size, bs->backing_file);
2381 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2382 const uint8_t *buf, int nb_sectors)
2384 BlockDriver *drv = bs->drv;
2385 if (!drv)
2386 return -ENOMEDIUM;
2387 if (!drv->bdrv_write_compressed)
2388 return -ENOTSUP;
2389 if (bdrv_check_request(bs, sector_num, nb_sectors))
2390 return -EIO;
2392 if (bs->dirty_bitmap) {
2393 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2396 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2399 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2401 BlockDriver *drv = bs->drv;
2402 if (!drv)
2403 return -ENOMEDIUM;
2404 if (!drv->bdrv_get_info)
2405 return -ENOTSUP;
2406 memset(bdi, 0, sizeof(*bdi));
2407 return drv->bdrv_get_info(bs, bdi);
2410 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2411 int64_t pos, int size)
2413 BlockDriver *drv = bs->drv;
2414 if (!drv)
2415 return -ENOMEDIUM;
2416 if (drv->bdrv_save_vmstate)
2417 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2418 if (bs->file)
2419 return bdrv_save_vmstate(bs->file, buf, pos, size);
2420 return -ENOTSUP;
2423 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2424 int64_t pos, int size)
2426 BlockDriver *drv = bs->drv;
2427 if (!drv)
2428 return -ENOMEDIUM;
2429 if (drv->bdrv_load_vmstate)
2430 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2431 if (bs->file)
2432 return bdrv_load_vmstate(bs->file, buf, pos, size);
2433 return -ENOTSUP;
2436 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2438 BlockDriver *drv = bs->drv;
2440 if (!drv || !drv->bdrv_debug_event) {
2441 return;
2444 return drv->bdrv_debug_event(bs, event);
2448 /**************************************************************/
2449 /* handling of snapshots */
2451 int bdrv_can_snapshot(BlockDriverState *bs)
2453 BlockDriver *drv = bs->drv;
2454 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2455 return 0;
2458 if (!drv->bdrv_snapshot_create) {
2459 if (bs->file != NULL) {
2460 return bdrv_can_snapshot(bs->file);
2462 return 0;
2465 return 1;
2468 int bdrv_is_snapshot(BlockDriverState *bs)
2470 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2473 BlockDriverState *bdrv_snapshots(void)
2475 BlockDriverState *bs;
2477 if (bs_snapshots) {
2478 return bs_snapshots;
2481 bs = NULL;
2482 while ((bs = bdrv_next(bs))) {
2483 if (bdrv_can_snapshot(bs)) {
2484 bs_snapshots = bs;
2485 return bs;
2488 return NULL;
2491 int bdrv_snapshot_create(BlockDriverState *bs,
2492 QEMUSnapshotInfo *sn_info)
2494 BlockDriver *drv = bs->drv;
2495 if (!drv)
2496 return -ENOMEDIUM;
2497 if (drv->bdrv_snapshot_create)
2498 return drv->bdrv_snapshot_create(bs, sn_info);
2499 if (bs->file)
2500 return bdrv_snapshot_create(bs->file, sn_info);
2501 return -ENOTSUP;
2504 int bdrv_snapshot_goto(BlockDriverState *bs,
2505 const char *snapshot_id)
2507 BlockDriver *drv = bs->drv;
2508 int ret, open_ret;
2510 if (!drv)
2511 return -ENOMEDIUM;
2512 if (drv->bdrv_snapshot_goto)
2513 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2515 if (bs->file) {
2516 drv->bdrv_close(bs);
2517 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2518 open_ret = drv->bdrv_open(bs, bs->open_flags);
2519 if (open_ret < 0) {
2520 bdrv_delete(bs->file);
2521 bs->drv = NULL;
2522 return open_ret;
2524 return ret;
2527 return -ENOTSUP;
2530 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2532 BlockDriver *drv = bs->drv;
2533 if (!drv)
2534 return -ENOMEDIUM;
2535 if (drv->bdrv_snapshot_delete)
2536 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2537 if (bs->file)
2538 return bdrv_snapshot_delete(bs->file, snapshot_id);
2539 return -ENOTSUP;
2542 int bdrv_snapshot_list(BlockDriverState *bs,
2543 QEMUSnapshotInfo **psn_info)
2545 BlockDriver *drv = bs->drv;
2546 if (!drv)
2547 return -ENOMEDIUM;
2548 if (drv->bdrv_snapshot_list)
2549 return drv->bdrv_snapshot_list(bs, psn_info);
2550 if (bs->file)
2551 return bdrv_snapshot_list(bs->file, psn_info);
2552 return -ENOTSUP;
2555 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2556 const char *snapshot_name)
2558 BlockDriver *drv = bs->drv;
2559 if (!drv) {
2560 return -ENOMEDIUM;
2562 if (!bs->read_only) {
2563 return -EINVAL;
2565 if (drv->bdrv_snapshot_load_tmp) {
2566 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2568 return -ENOTSUP;
2571 #define NB_SUFFIXES 4
2573 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2575 static const char suffixes[NB_SUFFIXES] = "KMGT";
2576 int64_t base;
2577 int i;
2579 if (size <= 999) {
2580 snprintf(buf, buf_size, "%" PRId64, size);
2581 } else {
2582 base = 1024;
2583 for(i = 0; i < NB_SUFFIXES; i++) {
2584 if (size < (10 * base)) {
2585 snprintf(buf, buf_size, "%0.1f%c",
2586 (double)size / base,
2587 suffixes[i]);
2588 break;
2589 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2590 snprintf(buf, buf_size, "%" PRId64 "%c",
2591 ((size + (base >> 1)) / base),
2592 suffixes[i]);
2593 break;
2595 base = base * 1024;
2598 return buf;
2601 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2603 char buf1[128], date_buf[128], clock_buf[128];
2604 #ifdef _WIN32
2605 struct tm *ptm;
2606 #else
2607 struct tm tm;
2608 #endif
2609 time_t ti;
2610 int64_t secs;
2612 if (!sn) {
2613 snprintf(buf, buf_size,
2614 "%-10s%-20s%7s%20s%15s",
2615 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2616 } else {
2617 ti = sn->date_sec;
2618 #ifdef _WIN32
2619 ptm = localtime(&ti);
2620 strftime(date_buf, sizeof(date_buf),
2621 "%Y-%m-%d %H:%M:%S", ptm);
2622 #else
2623 localtime_r(&ti, &tm);
2624 strftime(date_buf, sizeof(date_buf),
2625 "%Y-%m-%d %H:%M:%S", &tm);
2626 #endif
2627 secs = sn->vm_clock_nsec / 1000000000;
2628 snprintf(clock_buf, sizeof(clock_buf),
2629 "%02d:%02d:%02d.%03d",
2630 (int)(secs / 3600),
2631 (int)((secs / 60) % 60),
2632 (int)(secs % 60),
2633 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2634 snprintf(buf, buf_size,
2635 "%-10s%-20s%7s%20s%15s",
2636 sn->id_str, sn->name,
2637 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2638 date_buf,
2639 clock_buf);
2641 return buf;
2644 /**************************************************************/
2645 /* async I/Os */
2647 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2648 QEMUIOVector *qiov, int nb_sectors,
2649 BlockDriverCompletionFunc *cb, void *opaque)
2651 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2653 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2654 cb, opaque, false);
2657 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2658 QEMUIOVector *qiov, int nb_sectors,
2659 BlockDriverCompletionFunc *cb, void *opaque)
2661 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2663 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2664 cb, opaque, true);
2668 typedef struct MultiwriteCB {
2669 int error;
2670 int num_requests;
2671 int num_callbacks;
2672 struct {
2673 BlockDriverCompletionFunc *cb;
2674 void *opaque;
2675 QEMUIOVector *free_qiov;
2676 void *free_buf;
2677 } callbacks[];
2678 } MultiwriteCB;
2680 static void multiwrite_user_cb(MultiwriteCB *mcb)
2682 int i;
2684 for (i = 0; i < mcb->num_callbacks; i++) {
2685 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2686 if (mcb->callbacks[i].free_qiov) {
2687 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2689 g_free(mcb->callbacks[i].free_qiov);
2690 qemu_vfree(mcb->callbacks[i].free_buf);
2694 static void multiwrite_cb(void *opaque, int ret)
2696 MultiwriteCB *mcb = opaque;
2698 trace_multiwrite_cb(mcb, ret);
2700 if (ret < 0 && !mcb->error) {
2701 mcb->error = ret;
2704 mcb->num_requests--;
2705 if (mcb->num_requests == 0) {
2706 multiwrite_user_cb(mcb);
2707 g_free(mcb);
2711 static int multiwrite_req_compare(const void *a, const void *b)
2713 const BlockRequest *req1 = a, *req2 = b;
2716 * Note that we can't simply subtract req2->sector from req1->sector
2717 * here as that could overflow the return value.
2719 if (req1->sector > req2->sector) {
2720 return 1;
2721 } else if (req1->sector < req2->sector) {
2722 return -1;
2723 } else {
2724 return 0;
2729 * Takes a bunch of requests and tries to merge them. Returns the number of
2730 * requests that remain after merging.
2732 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2733 int num_reqs, MultiwriteCB *mcb)
2735 int i, outidx;
2737 // Sort requests by start sector
2738 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2740 // Check if adjacent requests touch the same clusters. If so, combine them,
2741 // filling up gaps with zero sectors.
2742 outidx = 0;
2743 for (i = 1; i < num_reqs; i++) {
2744 int merge = 0;
2745 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2747 // This handles the cases that are valid for all block drivers, namely
2748 // exactly sequential writes and overlapping writes.
2749 if (reqs[i].sector <= oldreq_last) {
2750 merge = 1;
2753 // The block driver may decide that it makes sense to combine requests
2754 // even if there is a gap of some sectors between them. In this case,
2755 // the gap is filled with zeros (therefore only applicable for yet
2756 // unused space in format like qcow2).
2757 if (!merge && bs->drv->bdrv_merge_requests) {
2758 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2761 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2762 merge = 0;
2765 if (merge) {
2766 size_t size;
2767 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2768 qemu_iovec_init(qiov,
2769 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2771 // Add the first request to the merged one. If the requests are
2772 // overlapping, drop the last sectors of the first request.
2773 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2774 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2776 // We might need to add some zeros between the two requests
2777 if (reqs[i].sector > oldreq_last) {
2778 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2779 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2780 memset(buf, 0, zero_bytes);
2781 qemu_iovec_add(qiov, buf, zero_bytes);
2782 mcb->callbacks[i].free_buf = buf;
2785 // Add the second request
2786 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2788 reqs[outidx].nb_sectors = qiov->size >> 9;
2789 reqs[outidx].qiov = qiov;
2791 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2792 } else {
2793 outidx++;
2794 reqs[outidx].sector = reqs[i].sector;
2795 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2796 reqs[outidx].qiov = reqs[i].qiov;
2800 return outidx + 1;
2804 * Submit multiple AIO write requests at once.
2806 * On success, the function returns 0 and all requests in the reqs array have
2807 * been submitted. In error case this function returns -1, and any of the
2808 * requests may or may not be submitted yet. In particular, this means that the
2809 * callback will be called for some of the requests, for others it won't. The
2810 * caller must check the error field of the BlockRequest to wait for the right
2811 * callbacks (if error != 0, no callback will be called).
2813 * The implementation may modify the contents of the reqs array, e.g. to merge
2814 * requests. However, the fields opaque and error are left unmodified as they
2815 * are used to signal failure for a single request to the caller.
2817 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2819 MultiwriteCB *mcb;
2820 int i;
2822 /* don't submit writes if we don't have a medium */
2823 if (bs->drv == NULL) {
2824 for (i = 0; i < num_reqs; i++) {
2825 reqs[i].error = -ENOMEDIUM;
2827 return -1;
2830 if (num_reqs == 0) {
2831 return 0;
2834 // Create MultiwriteCB structure
2835 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2836 mcb->num_requests = 0;
2837 mcb->num_callbacks = num_reqs;
2839 for (i = 0; i < num_reqs; i++) {
2840 mcb->callbacks[i].cb = reqs[i].cb;
2841 mcb->callbacks[i].opaque = reqs[i].opaque;
2844 // Check for mergable requests
2845 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2847 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2849 /* Run the aio requests. */
2850 mcb->num_requests = num_reqs;
2851 for (i = 0; i < num_reqs; i++) {
2852 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2853 reqs[i].nb_sectors, multiwrite_cb, mcb);
2856 return 0;
2859 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2861 acb->pool->cancel(acb);
2864 /* block I/O throttling */
2865 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2866 bool is_write, double elapsed_time, uint64_t *wait)
2868 uint64_t bps_limit = 0;
2869 double bytes_limit, bytes_base, bytes_res;
2870 double slice_time, wait_time;
2872 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2873 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2874 } else if (bs->io_limits.bps[is_write]) {
2875 bps_limit = bs->io_limits.bps[is_write];
2876 } else {
2877 if (wait) {
2878 *wait = 0;
2881 return false;
2884 slice_time = bs->slice_end - bs->slice_start;
2885 slice_time /= (NANOSECONDS_PER_SECOND);
2886 bytes_limit = bps_limit * slice_time;
2887 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2888 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2889 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2892 /* bytes_base: the bytes of data which have been read/written; and
2893 * it is obtained from the history statistic info.
2894 * bytes_res: the remaining bytes of data which need to be read/written.
2895 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2896 * the total time for completing reading/writting all data.
2898 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2900 if (bytes_base + bytes_res <= bytes_limit) {
2901 if (wait) {
2902 *wait = 0;
2905 return false;
2908 /* Calc approx time to dispatch */
2909 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2911 /* When the I/O rate at runtime exceeds the limits,
2912 * bs->slice_end need to be extended in order that the current statistic
2913 * info can be kept until the timer fire, so it is increased and tuned
2914 * based on the result of experiment.
2916 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2917 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2918 if (wait) {
2919 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2922 return true;
2925 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2926 double elapsed_time, uint64_t *wait)
2928 uint64_t iops_limit = 0;
2929 double ios_limit, ios_base;
2930 double slice_time, wait_time;
2932 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2933 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2934 } else if (bs->io_limits.iops[is_write]) {
2935 iops_limit = bs->io_limits.iops[is_write];
2936 } else {
2937 if (wait) {
2938 *wait = 0;
2941 return false;
2944 slice_time = bs->slice_end - bs->slice_start;
2945 slice_time /= (NANOSECONDS_PER_SECOND);
2946 ios_limit = iops_limit * slice_time;
2947 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2948 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2949 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2952 if (ios_base + 1 <= ios_limit) {
2953 if (wait) {
2954 *wait = 0;
2957 return false;
2960 /* Calc approx time to dispatch */
2961 wait_time = (ios_base + 1) / iops_limit;
2962 if (wait_time > elapsed_time) {
2963 wait_time = wait_time - elapsed_time;
2964 } else {
2965 wait_time = 0;
2968 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2969 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2970 if (wait) {
2971 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2974 return true;
2977 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2978 bool is_write, int64_t *wait)
2980 int64_t now, max_wait;
2981 uint64_t bps_wait = 0, iops_wait = 0;
2982 double elapsed_time;
2983 int bps_ret, iops_ret;
2985 now = qemu_get_clock_ns(vm_clock);
2986 if ((bs->slice_start < now)
2987 && (bs->slice_end > now)) {
2988 bs->slice_end = now + bs->slice_time;
2989 } else {
2990 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2991 bs->slice_start = now;
2992 bs->slice_end = now + bs->slice_time;
2994 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
2995 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
2997 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
2998 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3001 elapsed_time = now - bs->slice_start;
3002 elapsed_time /= (NANOSECONDS_PER_SECOND);
3004 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3005 is_write, elapsed_time, &bps_wait);
3006 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3007 elapsed_time, &iops_wait);
3008 if (bps_ret || iops_ret) {
3009 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3010 if (wait) {
3011 *wait = max_wait;
3014 now = qemu_get_clock_ns(vm_clock);
3015 if (bs->slice_end < now + max_wait) {
3016 bs->slice_end = now + max_wait;
3019 return true;
3022 if (wait) {
3023 *wait = 0;
3026 return false;
3029 /**************************************************************/
3030 /* async block device emulation */
3032 typedef struct BlockDriverAIOCBSync {
3033 BlockDriverAIOCB common;
3034 QEMUBH *bh;
3035 int ret;
3036 /* vector translation state */
3037 QEMUIOVector *qiov;
3038 uint8_t *bounce;
3039 int is_write;
3040 } BlockDriverAIOCBSync;
3042 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3044 BlockDriverAIOCBSync *acb =
3045 container_of(blockacb, BlockDriverAIOCBSync, common);
3046 qemu_bh_delete(acb->bh);
3047 acb->bh = NULL;
3048 qemu_aio_release(acb);
3051 static AIOPool bdrv_em_aio_pool = {
3052 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3053 .cancel = bdrv_aio_cancel_em,
3056 static void bdrv_aio_bh_cb(void *opaque)
3058 BlockDriverAIOCBSync *acb = opaque;
3060 if (!acb->is_write)
3061 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
3062 qemu_vfree(acb->bounce);
3063 acb->common.cb(acb->common.opaque, acb->ret);
3064 qemu_bh_delete(acb->bh);
3065 acb->bh = NULL;
3066 qemu_aio_release(acb);
3069 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3070 int64_t sector_num,
3071 QEMUIOVector *qiov,
3072 int nb_sectors,
3073 BlockDriverCompletionFunc *cb,
3074 void *opaque,
3075 int is_write)
3078 BlockDriverAIOCBSync *acb;
3080 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
3081 acb->is_write = is_write;
3082 acb->qiov = qiov;
3083 acb->bounce = qemu_blockalign(bs, qiov->size);
3084 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
3086 if (is_write) {
3087 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
3088 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
3089 } else {
3090 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
3093 qemu_bh_schedule(acb->bh);
3095 return &acb->common;
3098 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3099 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3100 BlockDriverCompletionFunc *cb, void *opaque)
3102 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3105 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3106 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3107 BlockDriverCompletionFunc *cb, void *opaque)
3109 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3113 typedef struct BlockDriverAIOCBCoroutine {
3114 BlockDriverAIOCB common;
3115 BlockRequest req;
3116 bool is_write;
3117 QEMUBH* bh;
3118 } BlockDriverAIOCBCoroutine;
3120 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3122 qemu_aio_flush();
3125 static AIOPool bdrv_em_co_aio_pool = {
3126 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3127 .cancel = bdrv_aio_co_cancel_em,
3130 static void bdrv_co_em_bh(void *opaque)
3132 BlockDriverAIOCBCoroutine *acb = opaque;
3134 acb->common.cb(acb->common.opaque, acb->req.error);
3135 qemu_bh_delete(acb->bh);
3136 qemu_aio_release(acb);
3139 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3140 static void coroutine_fn bdrv_co_do_rw(void *opaque)
3142 BlockDriverAIOCBCoroutine *acb = opaque;
3143 BlockDriverState *bs = acb->common.bs;
3145 if (!acb->is_write) {
3146 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3147 acb->req.nb_sectors, acb->req.qiov);
3148 } else {
3149 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3150 acb->req.nb_sectors, acb->req.qiov);
3153 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3154 qemu_bh_schedule(acb->bh);
3157 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3158 int64_t sector_num,
3159 QEMUIOVector *qiov,
3160 int nb_sectors,
3161 BlockDriverCompletionFunc *cb,
3162 void *opaque,
3163 bool is_write)
3165 Coroutine *co;
3166 BlockDriverAIOCBCoroutine *acb;
3168 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3169 acb->req.sector = sector_num;
3170 acb->req.nb_sectors = nb_sectors;
3171 acb->req.qiov = qiov;
3172 acb->is_write = is_write;
3174 co = qemu_coroutine_create(bdrv_co_do_rw);
3175 qemu_coroutine_enter(co, acb);
3177 return &acb->common;
3180 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
3182 BlockDriverAIOCBCoroutine *acb = opaque;
3183 BlockDriverState *bs = acb->common.bs;
3185 acb->req.error = bdrv_co_flush(bs);
3186 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3187 qemu_bh_schedule(acb->bh);
3190 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
3191 BlockDriverCompletionFunc *cb, void *opaque)
3193 trace_bdrv_aio_flush(bs, opaque);
3195 Coroutine *co;
3196 BlockDriverAIOCBCoroutine *acb;
3198 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3199 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3200 qemu_coroutine_enter(co, acb);
3202 return &acb->common;
3205 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3207 BlockDriverAIOCBCoroutine *acb = opaque;
3208 BlockDriverState *bs = acb->common.bs;
3210 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3211 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3212 qemu_bh_schedule(acb->bh);
3215 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3216 int64_t sector_num, int nb_sectors,
3217 BlockDriverCompletionFunc *cb, void *opaque)
3219 Coroutine *co;
3220 BlockDriverAIOCBCoroutine *acb;
3222 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3224 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3225 acb->req.sector = sector_num;
3226 acb->req.nb_sectors = nb_sectors;
3227 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3228 qemu_coroutine_enter(co, acb);
3230 return &acb->common;
3233 void bdrv_init(void)
3235 module_call_init(MODULE_INIT_BLOCK);
3238 void bdrv_init_with_whitelist(void)
3240 use_bdrv_whitelist = 1;
3241 bdrv_init();
3244 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3245 BlockDriverCompletionFunc *cb, void *opaque)
3247 BlockDriverAIOCB *acb;
3249 if (pool->free_aiocb) {
3250 acb = pool->free_aiocb;
3251 pool->free_aiocb = acb->next;
3252 } else {
3253 acb = g_malloc0(pool->aiocb_size);
3254 acb->pool = pool;
3256 acb->bs = bs;
3257 acb->cb = cb;
3258 acb->opaque = opaque;
3259 return acb;
3262 void qemu_aio_release(void *p)
3264 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3265 AIOPool *pool = acb->pool;
3266 acb->next = pool->free_aiocb;
3267 pool->free_aiocb = acb;
3270 /**************************************************************/
3271 /* Coroutine block device emulation */
3273 typedef struct CoroutineIOCompletion {
3274 Coroutine *coroutine;
3275 int ret;
3276 } CoroutineIOCompletion;
3278 static void bdrv_co_io_em_complete(void *opaque, int ret)
3280 CoroutineIOCompletion *co = opaque;
3282 co->ret = ret;
3283 qemu_coroutine_enter(co->coroutine, NULL);
3286 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3287 int nb_sectors, QEMUIOVector *iov,
3288 bool is_write)
3290 CoroutineIOCompletion co = {
3291 .coroutine = qemu_coroutine_self(),
3293 BlockDriverAIOCB *acb;
3295 if (is_write) {
3296 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3297 bdrv_co_io_em_complete, &co);
3298 } else {
3299 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3300 bdrv_co_io_em_complete, &co);
3303 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
3304 if (!acb) {
3305 return -EIO;
3307 qemu_coroutine_yield();
3309 return co.ret;
3312 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3313 int64_t sector_num, int nb_sectors,
3314 QEMUIOVector *iov)
3316 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3319 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3320 int64_t sector_num, int nb_sectors,
3321 QEMUIOVector *iov)
3323 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3326 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
3328 RwCo *rwco = opaque;
3330 rwco->ret = bdrv_co_flush(rwco->bs);
3333 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3335 int ret;
3337 if (!bs->drv) {
3338 return 0;
3341 /* Write back cached data to the OS even with cache=unsafe */
3342 if (bs->drv->bdrv_co_flush_to_os) {
3343 ret = bs->drv->bdrv_co_flush_to_os(bs);
3344 if (ret < 0) {
3345 return ret;
3349 /* But don't actually force it to the disk with cache=unsafe */
3350 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3351 return 0;
3354 if (bs->drv->bdrv_co_flush_to_disk) {
3355 return bs->drv->bdrv_co_flush_to_disk(bs);
3356 } else if (bs->drv->bdrv_aio_flush) {
3357 BlockDriverAIOCB *acb;
3358 CoroutineIOCompletion co = {
3359 .coroutine = qemu_coroutine_self(),
3362 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3363 if (acb == NULL) {
3364 return -EIO;
3365 } else {
3366 qemu_coroutine_yield();
3367 return co.ret;
3369 } else {
3371 * Some block drivers always operate in either writethrough or unsafe
3372 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3373 * know how the server works (because the behaviour is hardcoded or
3374 * depends on server-side configuration), so we can't ensure that
3375 * everything is safe on disk. Returning an error doesn't work because
3376 * that would break guests even if the server operates in writethrough
3377 * mode.
3379 * Let's hope the user knows what he's doing.
3381 return 0;
3385 void bdrv_invalidate_cache(BlockDriverState *bs)
3387 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3388 bs->drv->bdrv_invalidate_cache(bs);
3392 void bdrv_invalidate_cache_all(void)
3394 BlockDriverState *bs;
3396 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3397 bdrv_invalidate_cache(bs);
3401 int bdrv_flush(BlockDriverState *bs)
3403 Coroutine *co;
3404 RwCo rwco = {
3405 .bs = bs,
3406 .ret = NOT_DONE,
3409 if (qemu_in_coroutine()) {
3410 /* Fast-path if already in coroutine context */
3411 bdrv_flush_co_entry(&rwco);
3412 } else {
3413 co = qemu_coroutine_create(bdrv_flush_co_entry);
3414 qemu_coroutine_enter(co, &rwco);
3415 while (rwco.ret == NOT_DONE) {
3416 qemu_aio_wait();
3420 return rwco.ret;
3423 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3425 RwCo *rwco = opaque;
3427 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3430 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3431 int nb_sectors)
3433 if (!bs->drv) {
3434 return -ENOMEDIUM;
3435 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3436 return -EIO;
3437 } else if (bs->read_only) {
3438 return -EROFS;
3439 } else if (bs->drv->bdrv_co_discard) {
3440 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3441 } else if (bs->drv->bdrv_aio_discard) {
3442 BlockDriverAIOCB *acb;
3443 CoroutineIOCompletion co = {
3444 .coroutine = qemu_coroutine_self(),
3447 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3448 bdrv_co_io_em_complete, &co);
3449 if (acb == NULL) {
3450 return -EIO;
3451 } else {
3452 qemu_coroutine_yield();
3453 return co.ret;
3455 } else {
3456 return 0;
3460 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3462 Coroutine *co;
3463 RwCo rwco = {
3464 .bs = bs,
3465 .sector_num = sector_num,
3466 .nb_sectors = nb_sectors,
3467 .ret = NOT_DONE,
3470 if (qemu_in_coroutine()) {
3471 /* Fast-path if already in coroutine context */
3472 bdrv_discard_co_entry(&rwco);
3473 } else {
3474 co = qemu_coroutine_create(bdrv_discard_co_entry);
3475 qemu_coroutine_enter(co, &rwco);
3476 while (rwco.ret == NOT_DONE) {
3477 qemu_aio_wait();
3481 return rwco.ret;
3484 /**************************************************************/
3485 /* removable device support */
3488 * Return TRUE if the media is present
3490 int bdrv_is_inserted(BlockDriverState *bs)
3492 BlockDriver *drv = bs->drv;
3494 if (!drv)
3495 return 0;
3496 if (!drv->bdrv_is_inserted)
3497 return 1;
3498 return drv->bdrv_is_inserted(bs);
3502 * Return whether the media changed since the last call to this
3503 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3505 int bdrv_media_changed(BlockDriverState *bs)
3507 BlockDriver *drv = bs->drv;
3509 if (drv && drv->bdrv_media_changed) {
3510 return drv->bdrv_media_changed(bs);
3512 return -ENOTSUP;
3516 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3518 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3520 BlockDriver *drv = bs->drv;
3522 if (drv && drv->bdrv_eject) {
3523 drv->bdrv_eject(bs, eject_flag);
3528 * Lock or unlock the media (if it is locked, the user won't be able
3529 * to eject it manually).
3531 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3533 BlockDriver *drv = bs->drv;
3535 trace_bdrv_lock_medium(bs, locked);
3537 if (drv && drv->bdrv_lock_medium) {
3538 drv->bdrv_lock_medium(bs, locked);
3542 /* needed for generic scsi interface */
3544 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3546 BlockDriver *drv = bs->drv;
3548 if (drv && drv->bdrv_ioctl)
3549 return drv->bdrv_ioctl(bs, req, buf);
3550 return -ENOTSUP;
3553 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3554 unsigned long int req, void *buf,
3555 BlockDriverCompletionFunc *cb, void *opaque)
3557 BlockDriver *drv = bs->drv;
3559 if (drv && drv->bdrv_aio_ioctl)
3560 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3561 return NULL;
3564 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3566 bs->buffer_alignment = align;
3569 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3571 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3574 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3576 int64_t bitmap_size;
3578 bs->dirty_count = 0;
3579 if (enable) {
3580 if (!bs->dirty_bitmap) {
3581 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3582 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3583 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3585 bs->dirty_bitmap = g_malloc0(bitmap_size);
3587 } else {
3588 if (bs->dirty_bitmap) {
3589 g_free(bs->dirty_bitmap);
3590 bs->dirty_bitmap = NULL;
3595 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3597 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3599 if (bs->dirty_bitmap &&
3600 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3601 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3602 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3603 } else {
3604 return 0;
3608 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3609 int nr_sectors)
3611 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3614 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3616 return bs->dirty_count;
3619 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3621 assert(bs->in_use != in_use);
3622 bs->in_use = in_use;
3625 int bdrv_in_use(BlockDriverState *bs)
3627 return bs->in_use;
3630 void bdrv_iostatus_enable(BlockDriverState *bs)
3632 bs->iostatus_enabled = true;
3633 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3636 /* The I/O status is only enabled if the drive explicitly
3637 * enables it _and_ the VM is configured to stop on errors */
3638 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3640 return (bs->iostatus_enabled &&
3641 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3642 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3643 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3646 void bdrv_iostatus_disable(BlockDriverState *bs)
3648 bs->iostatus_enabled = false;
3651 void bdrv_iostatus_reset(BlockDriverState *bs)
3653 if (bdrv_iostatus_is_enabled(bs)) {
3654 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3658 /* XXX: Today this is set by device models because it makes the implementation
3659 quite simple. However, the block layer knows about the error, so it's
3660 possible to implement this without device models being involved */
3661 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3663 if (bdrv_iostatus_is_enabled(bs) &&
3664 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3665 assert(error >= 0);
3666 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3667 BLOCK_DEVICE_IO_STATUS_FAILED;
3671 void
3672 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3673 enum BlockAcctType type)
3675 assert(type < BDRV_MAX_IOTYPE);
3677 cookie->bytes = bytes;
3678 cookie->start_time_ns = get_clock();
3679 cookie->type = type;
3682 void
3683 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3685 assert(cookie->type < BDRV_MAX_IOTYPE);
3687 bs->nr_bytes[cookie->type] += cookie->bytes;
3688 bs->nr_ops[cookie->type]++;
3689 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3692 int bdrv_img_create(const char *filename, const char *fmt,
3693 const char *base_filename, const char *base_fmt,
3694 char *options, uint64_t img_size, int flags)
3696 QEMUOptionParameter *param = NULL, *create_options = NULL;
3697 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3698 BlockDriverState *bs = NULL;
3699 BlockDriver *drv, *proto_drv;
3700 BlockDriver *backing_drv = NULL;
3701 int ret = 0;
3703 /* Find driver and parse its options */
3704 drv = bdrv_find_format(fmt);
3705 if (!drv) {
3706 error_report("Unknown file format '%s'", fmt);
3707 ret = -EINVAL;
3708 goto out;
3711 proto_drv = bdrv_find_protocol(filename);
3712 if (!proto_drv) {
3713 error_report("Unknown protocol '%s'", filename);
3714 ret = -EINVAL;
3715 goto out;
3718 create_options = append_option_parameters(create_options,
3719 drv->create_options);
3720 create_options = append_option_parameters(create_options,
3721 proto_drv->create_options);
3723 /* Create parameter list with default values */
3724 param = parse_option_parameters("", create_options, param);
3726 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3728 /* Parse -o options */
3729 if (options) {
3730 param = parse_option_parameters(options, create_options, param);
3731 if (param == NULL) {
3732 error_report("Invalid options for file format '%s'.", fmt);
3733 ret = -EINVAL;
3734 goto out;
3738 if (base_filename) {
3739 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3740 base_filename)) {
3741 error_report("Backing file not supported for file format '%s'",
3742 fmt);
3743 ret = -EINVAL;
3744 goto out;
3748 if (base_fmt) {
3749 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3750 error_report("Backing file format not supported for file "
3751 "format '%s'", fmt);
3752 ret = -EINVAL;
3753 goto out;
3757 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3758 if (backing_file && backing_file->value.s) {
3759 if (!strcmp(filename, backing_file->value.s)) {
3760 error_report("Error: Trying to create an image with the "
3761 "same filename as the backing file");
3762 ret = -EINVAL;
3763 goto out;
3767 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3768 if (backing_fmt && backing_fmt->value.s) {
3769 backing_drv = bdrv_find_format(backing_fmt->value.s);
3770 if (!backing_drv) {
3771 error_report("Unknown backing file format '%s'",
3772 backing_fmt->value.s);
3773 ret = -EINVAL;
3774 goto out;
3778 // The size for the image must always be specified, with one exception:
3779 // If we are using a backing file, we can obtain the size from there
3780 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3781 if (size && size->value.n == -1) {
3782 if (backing_file && backing_file->value.s) {
3783 uint64_t size;
3784 char buf[32];
3786 bs = bdrv_new("");
3788 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3789 if (ret < 0) {
3790 error_report("Could not open '%s'", backing_file->value.s);
3791 goto out;
3793 bdrv_get_geometry(bs, &size);
3794 size *= 512;
3796 snprintf(buf, sizeof(buf), "%" PRId64, size);
3797 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3798 } else {
3799 error_report("Image creation needs a size parameter");
3800 ret = -EINVAL;
3801 goto out;
3805 printf("Formatting '%s', fmt=%s ", filename, fmt);
3806 print_option_parameters(param);
3807 puts("");
3809 ret = bdrv_create(drv, filename, param);
3811 if (ret < 0) {
3812 if (ret == -ENOTSUP) {
3813 error_report("Formatting or formatting option not supported for "
3814 "file format '%s'", fmt);
3815 } else if (ret == -EFBIG) {
3816 error_report("The image size is too large for file format '%s'",
3817 fmt);
3818 } else {
3819 error_report("%s: error while creating %s: %s", filename, fmt,
3820 strerror(-ret));
3824 out:
3825 free_option_parameters(create_options);
3826 free_option_parameters(param);
3828 if (bs) {
3829 bdrv_delete(bs);
3832 return ret;