2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
36 #include <sys/types.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
52 BDRV_REQ_COPY_ON_READ
= 0x1,
53 BDRV_REQ_ZERO_WRITE
= 0x2,
56 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
57 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
58 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
59 BlockDriverCompletionFunc
*cb
, void *opaque
);
60 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
61 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
62 BlockDriverCompletionFunc
*cb
, void *opaque
);
63 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
64 int64_t sector_num
, int nb_sectors
,
66 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
67 int64_t sector_num
, int nb_sectors
,
69 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
70 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
71 BdrvRequestFlags flags
);
72 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
73 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
74 BdrvRequestFlags flags
);
75 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
79 BlockDriverCompletionFunc
*cb
,
82 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
84 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
85 bool is_write
, double elapsed_time
, uint64_t *wait
);
86 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
87 double elapsed_time
, uint64_t *wait
);
88 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
89 bool is_write
, int64_t *wait
);
91 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
92 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
94 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
95 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
97 /* The device to use for VM snapshots */
98 static BlockDriverState
*bs_snapshots
;
100 /* If non-zero, use only whitelisted block drivers */
101 static int use_bdrv_whitelist
;
104 static int is_windows_drive_prefix(const char *filename
)
106 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
107 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
111 int is_windows_drive(const char *filename
)
113 if (is_windows_drive_prefix(filename
) &&
116 if (strstart(filename
, "\\\\.\\", NULL
) ||
117 strstart(filename
, "//./", NULL
))
123 /* throttling disk I/O limits */
124 void bdrv_io_limits_disable(BlockDriverState
*bs
)
126 bs
->io_limits_enabled
= false;
128 while (qemu_co_queue_next(&bs
->throttled_reqs
));
130 if (bs
->block_timer
) {
131 qemu_del_timer(bs
->block_timer
);
132 qemu_free_timer(bs
->block_timer
);
133 bs
->block_timer
= NULL
;
139 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
142 static void bdrv_block_timer(void *opaque
)
144 BlockDriverState
*bs
= opaque
;
146 qemu_co_queue_next(&bs
->throttled_reqs
);
149 void bdrv_io_limits_enable(BlockDriverState
*bs
)
151 qemu_co_queue_init(&bs
->throttled_reqs
);
152 bs
->block_timer
= qemu_new_timer_ns(vm_clock
, bdrv_block_timer
, bs
);
153 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
154 bs
->slice_start
= qemu_get_clock_ns(vm_clock
);
155 bs
->slice_end
= bs
->slice_start
+ bs
->slice_time
;
156 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
157 bs
->io_limits_enabled
= true;
160 bool bdrv_io_limits_enabled(BlockDriverState
*bs
)
162 BlockIOLimit
*io_limits
= &bs
->io_limits
;
163 return io_limits
->bps
[BLOCK_IO_LIMIT_READ
]
164 || io_limits
->bps
[BLOCK_IO_LIMIT_WRITE
]
165 || io_limits
->bps
[BLOCK_IO_LIMIT_TOTAL
]
166 || io_limits
->iops
[BLOCK_IO_LIMIT_READ
]
167 || io_limits
->iops
[BLOCK_IO_LIMIT_WRITE
]
168 || io_limits
->iops
[BLOCK_IO_LIMIT_TOTAL
];
171 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
172 bool is_write
, int nb_sectors
)
174 int64_t wait_time
= -1;
176 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
177 qemu_co_queue_wait(&bs
->throttled_reqs
);
180 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
181 * throttled requests will not be dequeued until the current request is
182 * allowed to be serviced. So if the current request still exceeds the
183 * limits, it will be inserted to the head. All requests followed it will
184 * be still in throttled_reqs queue.
187 while (bdrv_exceed_io_limits(bs
, nb_sectors
, is_write
, &wait_time
)) {
188 qemu_mod_timer(bs
->block_timer
,
189 wait_time
+ qemu_get_clock_ns(vm_clock
));
190 qemu_co_queue_wait_insert_head(&bs
->throttled_reqs
);
193 qemu_co_queue_next(&bs
->throttled_reqs
);
196 /* check if the path starts with "<protocol>:" */
197 static int path_has_protocol(const char *path
)
200 if (is_windows_drive(path
) ||
201 is_windows_drive_prefix(path
)) {
206 return strchr(path
, ':') != NULL
;
209 int path_is_absolute(const char *path
)
213 /* specific case for names like: "\\.\d:" */
214 if (*path
== '/' || *path
== '\\')
217 p
= strchr(path
, ':');
223 return (*p
== '/' || *p
== '\\');
229 /* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
232 void path_combine(char *dest
, int dest_size
,
233 const char *base_path
,
234 const char *filename
)
241 if (path_is_absolute(filename
)) {
242 pstrcpy(dest
, dest_size
, filename
);
244 p
= strchr(base_path
, ':');
249 p1
= strrchr(base_path
, '/');
253 p2
= strrchr(base_path
, '\\');
265 if (len
> dest_size
- 1)
267 memcpy(dest
, base_path
, len
);
269 pstrcat(dest
, dest_size
, filename
);
273 void bdrv_register(BlockDriver
*bdrv
)
275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv
->bdrv_co_readv
) {
277 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
278 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
283 if (!bdrv
->bdrv_aio_readv
) {
284 /* add AIO emulation layer */
285 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
286 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
290 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
293 /* create a new block device (by default it is empty) */
294 BlockDriverState
*bdrv_new(const char *device_name
)
296 BlockDriverState
*bs
;
298 bs
= g_malloc0(sizeof(BlockDriverState
));
299 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
300 if (device_name
[0] != '\0') {
301 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
303 bdrv_iostatus_disable(bs
);
307 BlockDriver
*bdrv_find_format(const char *format_name
)
310 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
311 if (!strcmp(drv1
->format_name
, format_name
)) {
318 static int bdrv_is_whitelisted(BlockDriver
*drv
)
320 static const char *whitelist
[] = {
321 CONFIG_BDRV_WHITELIST
326 return 1; /* no whitelist, anything goes */
328 for (p
= whitelist
; *p
; p
++) {
329 if (!strcmp(drv
->format_name
, *p
)) {
336 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
338 BlockDriver
*drv
= bdrv_find_format(format_name
);
339 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
342 int bdrv_create(BlockDriver
*drv
, const char* filename
,
343 QEMUOptionParameter
*options
)
345 if (!drv
->bdrv_create
)
348 return drv
->bdrv_create(filename
, options
);
351 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
355 drv
= bdrv_find_protocol(filename
);
360 return bdrv_create(drv
, filename
, options
);
364 void get_tmp_filename(char *filename
, int size
)
366 char temp_dir
[MAX_PATH
];
368 GetTempPath(MAX_PATH
, temp_dir
);
369 GetTempFileName(temp_dir
, "qem", 0, filename
);
372 void get_tmp_filename(char *filename
, int size
)
376 /* XXX: race condition possible */
377 tmpdir
= getenv("TMPDIR");
380 snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
);
381 fd
= mkstemp(filename
);
387 * Detect host devices. By convention, /dev/cdrom[N] is always
388 * recognized as a host CDROM.
390 static BlockDriver
*find_hdev_driver(const char *filename
)
392 int score_max
= 0, score
;
393 BlockDriver
*drv
= NULL
, *d
;
395 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
396 if (d
->bdrv_probe_device
) {
397 score
= d
->bdrv_probe_device(filename
);
398 if (score
> score_max
) {
408 BlockDriver
*bdrv_find_protocol(const char *filename
)
415 /* TODO Drivers without bdrv_file_open must be specified explicitly */
418 * XXX(hch): we really should not let host device detection
419 * override an explicit protocol specification, but moving this
420 * later breaks access to device names with colons in them.
421 * Thanks to the brain-dead persistent naming schemes on udev-
422 * based Linux systems those actually are quite common.
424 drv1
= find_hdev_driver(filename
);
429 if (!path_has_protocol(filename
)) {
430 return bdrv_find_format("file");
432 p
= strchr(filename
, ':');
435 if (len
> sizeof(protocol
) - 1)
436 len
= sizeof(protocol
) - 1;
437 memcpy(protocol
, filename
, len
);
438 protocol
[len
] = '\0';
439 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
440 if (drv1
->protocol_name
&&
441 !strcmp(drv1
->protocol_name
, protocol
)) {
448 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
450 int ret
, score
, score_max
;
451 BlockDriver
*drv1
, *drv
;
453 BlockDriverState
*bs
;
455 ret
= bdrv_file_open(&bs
, filename
, 0);
461 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
462 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
464 drv
= bdrv_find_format("raw");
472 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
481 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
482 if (drv1
->bdrv_probe
) {
483 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
484 if (score
> score_max
) {
498 * Set the current 'total_sectors' value
500 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
502 BlockDriver
*drv
= bs
->drv
;
504 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
508 /* query actual device if possible, otherwise just trust the hint */
509 if (drv
->bdrv_getlength
) {
510 int64_t length
= drv
->bdrv_getlength(bs
);
514 hint
= length
>> BDRV_SECTOR_BITS
;
517 bs
->total_sectors
= hint
;
522 * Set open flags for a given cache mode
524 * Return 0 on success, -1 if the cache mode was invalid.
526 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
528 *flags
&= ~BDRV_O_CACHE_MASK
;
530 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
531 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
532 } else if (!strcmp(mode
, "directsync")) {
533 *flags
|= BDRV_O_NOCACHE
;
534 } else if (!strcmp(mode
, "writeback")) {
535 *flags
|= BDRV_O_CACHE_WB
;
536 } else if (!strcmp(mode
, "unsafe")) {
537 *flags
|= BDRV_O_CACHE_WB
;
538 *flags
|= BDRV_O_NO_FLUSH
;
539 } else if (!strcmp(mode
, "writethrough")) {
540 /* this is the default */
549 * The copy-on-read flag is actually a reference count so multiple users may
550 * use the feature without worrying about clobbering its previous state.
551 * Copy-on-read stays enabled until all users have called to disable it.
553 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
558 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
560 assert(bs
->copy_on_read
> 0);
565 * Common part for opening disk images and files
567 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
568 int flags
, BlockDriver
*drv
)
574 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
577 bs
->total_sectors
= 0;
581 bs
->open_flags
= flags
;
583 bs
->buffer_alignment
= 512;
585 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
586 if ((flags
& BDRV_O_RDWR
) && (flags
& BDRV_O_COPY_ON_READ
)) {
587 bdrv_enable_copy_on_read(bs
);
590 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
591 bs
->backing_file
[0] = '\0';
593 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
598 bs
->opaque
= g_malloc0(drv
->instance_size
);
600 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
603 * Clear flags that are internal to the block layer before opening the
606 open_flags
= flags
& ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
609 * Snapshots should be writable.
611 if (bs
->is_temporary
) {
612 open_flags
|= BDRV_O_RDWR
;
615 bs
->keep_read_only
= bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
617 /* Open the image, either directly or using a protocol */
618 if (drv
->bdrv_file_open
) {
619 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
621 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
623 ret
= drv
->bdrv_open(bs
, open_flags
);
631 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
637 if (bs
->is_temporary
) {
645 bdrv_delete(bs
->file
);
655 * Opens a file using a protocol (file, host_device, nbd, ...)
657 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
659 BlockDriverState
*bs
;
663 drv
= bdrv_find_protocol(filename
);
669 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
680 * Opens a disk image (raw, qcow2, vmdk, ...)
682 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
686 char tmp_filename
[PATH_MAX
];
688 if (flags
& BDRV_O_SNAPSHOT
) {
689 BlockDriverState
*bs1
;
692 BlockDriver
*bdrv_qcow2
;
693 QEMUOptionParameter
*options
;
694 char backing_filename
[PATH_MAX
];
696 /* if snapshot, we create a temporary backing file and open it
697 instead of opening 'filename' directly */
699 /* if there is a backing file, use it */
701 ret
= bdrv_open(bs1
, filename
, 0, drv
);
706 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
708 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
713 get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
715 /* Real path is meaningless for protocols */
717 snprintf(backing_filename
, sizeof(backing_filename
),
719 else if (!realpath(filename
, backing_filename
))
722 bdrv_qcow2
= bdrv_find_format("qcow2");
723 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
725 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
726 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
728 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
732 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
733 free_option_parameters(options
);
738 filename
= tmp_filename
;
740 bs
->is_temporary
= 1;
743 /* Find the right image format driver */
745 ret
= find_image_format(filename
, &drv
);
749 goto unlink_and_fail
;
753 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
755 goto unlink_and_fail
;
758 /* If there is a backing file, use it */
759 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
760 char backing_filename
[PATH_MAX
];
762 BlockDriver
*back_drv
= NULL
;
764 bs
->backing_hd
= bdrv_new("");
766 if (path_has_protocol(bs
->backing_file
)) {
767 pstrcpy(backing_filename
, sizeof(backing_filename
),
770 path_combine(backing_filename
, sizeof(backing_filename
),
771 filename
, bs
->backing_file
);
774 if (bs
->backing_format
[0] != '\0') {
775 back_drv
= bdrv_find_format(bs
->backing_format
);
778 /* backing files always opened read-only */
780 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
782 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
787 if (bs
->is_temporary
) {
788 bs
->backing_hd
->keep_read_only
= !(flags
& BDRV_O_RDWR
);
790 /* base image inherits from "parent" */
791 bs
->backing_hd
->keep_read_only
= bs
->keep_read_only
;
795 if (!bdrv_key_required(bs
)) {
796 bdrv_dev_change_media_cb(bs
, true);
799 /* throttling disk I/O limits */
800 if (bs
->io_limits_enabled
) {
801 bdrv_io_limits_enable(bs
);
807 if (bs
->is_temporary
) {
813 void bdrv_close(BlockDriverState
*bs
)
817 block_job_cancel_sync(bs
->job
);
821 if (bs
== bs_snapshots
) {
824 if (bs
->backing_hd
) {
825 bdrv_delete(bs
->backing_hd
);
826 bs
->backing_hd
= NULL
;
828 bs
->drv
->bdrv_close(bs
);
831 if (bs
->is_temporary
) {
832 unlink(bs
->filename
);
837 bs
->copy_on_read
= 0;
839 if (bs
->file
!= NULL
) {
840 bdrv_close(bs
->file
);
843 bdrv_dev_change_media_cb(bs
, false);
846 /*throttling disk I/O limits*/
847 if (bs
->io_limits_enabled
) {
848 bdrv_io_limits_disable(bs
);
852 void bdrv_close_all(void)
854 BlockDriverState
*bs
;
856 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
862 * Wait for pending requests to complete across all BlockDriverStates
864 * This function does not flush data to disk, use bdrv_flush_all() for that
865 * after calling this function.
867 void bdrv_drain_all(void)
869 BlockDriverState
*bs
;
873 /* If requests are still pending there is a bug somewhere */
874 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
875 assert(QLIST_EMPTY(&bs
->tracked_requests
));
876 assert(qemu_co_queue_empty(&bs
->throttled_reqs
));
880 /* make a BlockDriverState anonymous by removing from bdrv_state list.
881 Also, NULL terminate the device_name to prevent double remove */
882 void bdrv_make_anon(BlockDriverState
*bs
)
884 if (bs
->device_name
[0] != '\0') {
885 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
887 bs
->device_name
[0] = '\0';
891 * Add new bs contents at the top of an image chain while the chain is
892 * live, while keeping required fields on the top layer.
894 * This will modify the BlockDriverState fields, and swap contents
895 * between bs_new and bs_top. Both bs_new and bs_top are modified.
897 * bs_new is required to be anonymous.
899 * This function does not create any image files.
901 void bdrv_append(BlockDriverState
*bs_new
, BlockDriverState
*bs_top
)
903 BlockDriverState tmp
;
905 /* bs_new must be anonymous */
906 assert(bs_new
->device_name
[0] == '\0');
910 /* there are some fields that need to stay on the top layer: */
913 tmp
.dev_ops
= bs_top
->dev_ops
;
914 tmp
.dev_opaque
= bs_top
->dev_opaque
;
915 tmp
.dev
= bs_top
->dev
;
916 tmp
.buffer_alignment
= bs_top
->buffer_alignment
;
917 tmp
.copy_on_read
= bs_top
->copy_on_read
;
919 /* i/o timing parameters */
920 tmp
.slice_time
= bs_top
->slice_time
;
921 tmp
.slice_start
= bs_top
->slice_start
;
922 tmp
.slice_end
= bs_top
->slice_end
;
923 tmp
.io_limits
= bs_top
->io_limits
;
924 tmp
.io_base
= bs_top
->io_base
;
925 tmp
.throttled_reqs
= bs_top
->throttled_reqs
;
926 tmp
.block_timer
= bs_top
->block_timer
;
927 tmp
.io_limits_enabled
= bs_top
->io_limits_enabled
;
930 tmp
.cyls
= bs_top
->cyls
;
931 tmp
.heads
= bs_top
->heads
;
932 tmp
.secs
= bs_top
->secs
;
933 tmp
.translation
= bs_top
->translation
;
936 tmp
.on_read_error
= bs_top
->on_read_error
;
937 tmp
.on_write_error
= bs_top
->on_write_error
;
940 tmp
.iostatus_enabled
= bs_top
->iostatus_enabled
;
941 tmp
.iostatus
= bs_top
->iostatus
;
943 /* keep the same entry in bdrv_states */
944 pstrcpy(tmp
.device_name
, sizeof(tmp
.device_name
), bs_top
->device_name
);
945 tmp
.list
= bs_top
->list
;
947 /* The contents of 'tmp' will become bs_top, as we are
948 * swapping bs_new and bs_top contents. */
949 tmp
.backing_hd
= bs_new
;
950 pstrcpy(tmp
.backing_file
, sizeof(tmp
.backing_file
), bs_top
->filename
);
951 bdrv_get_format(bs_top
, tmp
.backing_format
, sizeof(tmp
.backing_format
));
953 /* swap contents of the fixed new bs and the current top */
957 /* device_name[] was carried over from the old bs_top. bs_new
958 * shouldn't be in bdrv_states, so we need to make device_name[]
959 * reflect the anonymity of bs_new
961 bs_new
->device_name
[0] = '\0';
963 /* clear the copied fields in the new backing file */
964 bdrv_detach_dev(bs_new
, bs_new
->dev
);
966 qemu_co_queue_init(&bs_new
->throttled_reqs
);
967 memset(&bs_new
->io_base
, 0, sizeof(bs_new
->io_base
));
968 memset(&bs_new
->io_limits
, 0, sizeof(bs_new
->io_limits
));
969 bdrv_iostatus_disable(bs_new
);
971 /* we don't use bdrv_io_limits_disable() for this, because we don't want
972 * to affect or delete the block_timer, as it has been moved to bs_top */
973 bs_new
->io_limits_enabled
= false;
974 bs_new
->block_timer
= NULL
;
975 bs_new
->slice_time
= 0;
976 bs_new
->slice_start
= 0;
977 bs_new
->slice_end
= 0;
980 void bdrv_delete(BlockDriverState
*bs
)
986 /* remove from list, if necessary */
990 if (bs
->file
!= NULL
) {
991 bdrv_delete(bs
->file
);
994 assert(bs
!= bs_snapshots
);
998 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
999 /* TODO change to DeviceState *dev when all users are qdevified */
1005 bdrv_iostatus_reset(bs
);
1009 /* TODO qdevified devices don't use this, remove when devices are qdevified */
1010 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
1012 if (bdrv_attach_dev(bs
, dev
) < 0) {
1017 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
1018 /* TODO change to DeviceState *dev when all users are qdevified */
1020 assert(bs
->dev
== dev
);
1023 bs
->dev_opaque
= NULL
;
1024 bs
->buffer_alignment
= 512;
1027 /* TODO change to return DeviceState * when all users are qdevified */
1028 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
1033 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
1037 bs
->dev_opaque
= opaque
;
1038 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
1039 bs_snapshots
= NULL
;
1043 void bdrv_emit_qmp_error_event(const BlockDriverState
*bdrv
,
1044 BlockQMPEventAction action
, int is_read
)
1047 const char *action_str
;
1050 case BDRV_ACTION_REPORT
:
1051 action_str
= "report";
1053 case BDRV_ACTION_IGNORE
:
1054 action_str
= "ignore";
1056 case BDRV_ACTION_STOP
:
1057 action_str
= "stop";
1063 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1066 is_read
? "read" : "write");
1067 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
1069 qobject_decref(data
);
1072 static void bdrv_emit_qmp_eject_event(BlockDriverState
*bs
, bool ejected
)
1076 data
= qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1077 bdrv_get_device_name(bs
), ejected
);
1078 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED
, data
);
1080 qobject_decref(data
);
1083 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
1085 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
1086 bool tray_was_closed
= !bdrv_dev_is_tray_open(bs
);
1087 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
1088 if (tray_was_closed
) {
1090 bdrv_emit_qmp_eject_event(bs
, true);
1094 bdrv_emit_qmp_eject_event(bs
, false);
1099 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
1101 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
1104 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
1106 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
1107 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
1111 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
1113 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
1114 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
1119 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
1121 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
1122 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
1126 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
1128 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
1129 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
1135 * Run consistency checks on an image
1137 * Returns 0 if the check could be completed (it doesn't mean that the image is
1138 * free of errors) or -errno when an internal error occurred. The results of the
1139 * check are stored in res.
1141 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
)
1143 if (bs
->drv
->bdrv_check
== NULL
) {
1147 memset(res
, 0, sizeof(*res
));
1148 return bs
->drv
->bdrv_check(bs
, res
);
1151 #define COMMIT_BUF_SECTORS 2048
1153 /* commit COW file into the raw image */
1154 int bdrv_commit(BlockDriverState
*bs
)
1156 BlockDriver
*drv
= bs
->drv
;
1157 BlockDriver
*backing_drv
;
1158 int64_t sector
, total_sectors
;
1159 int n
, ro
, open_flags
;
1160 int ret
= 0, rw_ret
= 0;
1162 char filename
[1024];
1163 BlockDriverState
*bs_rw
, *bs_ro
;
1168 if (!bs
->backing_hd
) {
1172 if (bs
->backing_hd
->keep_read_only
) {
1176 if (bdrv_in_use(bs
) || bdrv_in_use(bs
->backing_hd
)) {
1180 backing_drv
= bs
->backing_hd
->drv
;
1181 ro
= bs
->backing_hd
->read_only
;
1182 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
1183 open_flags
= bs
->backing_hd
->open_flags
;
1187 bdrv_delete(bs
->backing_hd
);
1188 bs
->backing_hd
= NULL
;
1189 bs_rw
= bdrv_new("");
1190 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
1194 /* try to re-open read-only */
1195 bs_ro
= bdrv_new("");
1196 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1200 /* drive not functional anymore */
1204 bs
->backing_hd
= bs_ro
;
1207 bs
->backing_hd
= bs_rw
;
1210 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
1211 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
1213 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
1214 if (bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
1216 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
1221 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
1228 if (drv
->bdrv_make_empty
) {
1229 ret
= drv
->bdrv_make_empty(bs
);
1234 * Make sure all data we wrote to the backing device is actually
1238 bdrv_flush(bs
->backing_hd
);
1245 bdrv_delete(bs
->backing_hd
);
1246 bs
->backing_hd
= NULL
;
1247 bs_ro
= bdrv_new("");
1248 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1252 /* drive not functional anymore */
1256 bs
->backing_hd
= bs_ro
;
1257 bs
->backing_hd
->keep_read_only
= 0;
1263 int bdrv_commit_all(void)
1265 BlockDriverState
*bs
;
1267 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1268 int ret
= bdrv_commit(bs
);
1276 struct BdrvTrackedRequest
{
1277 BlockDriverState
*bs
;
1281 QLIST_ENTRY(BdrvTrackedRequest
) list
;
1282 Coroutine
*co
; /* owner, used for deadlock detection */
1283 CoQueue wait_queue
; /* coroutines blocked on this request */
1287 * Remove an active request from the tracked requests list
1289 * This function should be called when a tracked request is completing.
1291 static void tracked_request_end(BdrvTrackedRequest
*req
)
1293 QLIST_REMOVE(req
, list
);
1294 qemu_co_queue_restart_all(&req
->wait_queue
);
1298 * Add an active request to the tracked requests list
1300 static void tracked_request_begin(BdrvTrackedRequest
*req
,
1301 BlockDriverState
*bs
,
1303 int nb_sectors
, bool is_write
)
1305 *req
= (BdrvTrackedRequest
){
1307 .sector_num
= sector_num
,
1308 .nb_sectors
= nb_sectors
,
1309 .is_write
= is_write
,
1310 .co
= qemu_coroutine_self(),
1313 qemu_co_queue_init(&req
->wait_queue
);
1315 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
1319 * Round a region to cluster boundaries
1321 static void round_to_clusters(BlockDriverState
*bs
,
1322 int64_t sector_num
, int nb_sectors
,
1323 int64_t *cluster_sector_num
,
1324 int *cluster_nb_sectors
)
1326 BlockDriverInfo bdi
;
1328 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
1329 *cluster_sector_num
= sector_num
;
1330 *cluster_nb_sectors
= nb_sectors
;
1332 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
1333 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
1334 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
1339 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
1340 int64_t sector_num
, int nb_sectors
) {
1342 if (sector_num
>= req
->sector_num
+ req
->nb_sectors
) {
1346 if (req
->sector_num
>= sector_num
+ nb_sectors
) {
1352 static void coroutine_fn
wait_for_overlapping_requests(BlockDriverState
*bs
,
1353 int64_t sector_num
, int nb_sectors
)
1355 BdrvTrackedRequest
*req
;
1356 int64_t cluster_sector_num
;
1357 int cluster_nb_sectors
;
1360 /* If we touch the same cluster it counts as an overlap. This guarantees
1361 * that allocating writes will be serialized and not race with each other
1362 * for the same cluster. For example, in copy-on-read it ensures that the
1363 * CoR read and write operations are atomic and guest writes cannot
1364 * interleave between them.
1366 round_to_clusters(bs
, sector_num
, nb_sectors
,
1367 &cluster_sector_num
, &cluster_nb_sectors
);
1371 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
1372 if (tracked_request_overlaps(req
, cluster_sector_num
,
1373 cluster_nb_sectors
)) {
1374 /* Hitting this means there was a reentrant request, for
1375 * example, a block driver issuing nested requests. This must
1376 * never happen since it means deadlock.
1378 assert(qemu_coroutine_self() != req
->co
);
1380 qemu_co_queue_wait(&req
->wait_queue
);
1391 * -EINVAL - backing format specified, but no file
1392 * -ENOSPC - can't update the backing file because no space is left in the
1394 * -ENOTSUP - format driver doesn't support changing the backing file
1396 int bdrv_change_backing_file(BlockDriverState
*bs
,
1397 const char *backing_file
, const char *backing_fmt
)
1399 BlockDriver
*drv
= bs
->drv
;
1401 if (drv
->bdrv_change_backing_file
!= NULL
) {
1402 return drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
1408 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
1413 if (!bdrv_is_inserted(bs
))
1419 len
= bdrv_getlength(bs
);
1424 if ((offset
> len
) || (len
- offset
< size
))
1430 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1433 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1434 nb_sectors
* BDRV_SECTOR_SIZE
);
1437 typedef struct RwCo
{
1438 BlockDriverState
*bs
;
1446 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1448 RwCo
*rwco
= opaque
;
1450 if (!rwco
->is_write
) {
1451 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1452 rwco
->nb_sectors
, rwco
->qiov
, 0);
1454 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1455 rwco
->nb_sectors
, rwco
->qiov
, 0);
1460 * Process a synchronous request using coroutines
1462 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1463 int nb_sectors
, bool is_write
)
1466 struct iovec iov
= {
1467 .iov_base
= (void *)buf
,
1468 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1473 .sector_num
= sector_num
,
1474 .nb_sectors
= nb_sectors
,
1476 .is_write
= is_write
,
1480 qemu_iovec_init_external(&qiov
, &iov
, 1);
1483 * In sync call context, when the vcpu is blocked, this throttling timer
1484 * will not fire; so the I/O throttling function has to be disabled here
1485 * if it has been enabled.
1487 if (bs
->io_limits_enabled
) {
1488 fprintf(stderr
, "Disabling I/O throttling on '%s' due "
1489 "to synchronous I/O.\n", bdrv_get_device_name(bs
));
1490 bdrv_io_limits_disable(bs
);
1493 if (qemu_in_coroutine()) {
1494 /* Fast-path if already in coroutine context */
1495 bdrv_rw_co_entry(&rwco
);
1497 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1498 qemu_coroutine_enter(co
, &rwco
);
1499 while (rwco
.ret
== NOT_DONE
) {
1506 /* return < 0 if error. See bdrv_write() for the return codes */
1507 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1508 uint8_t *buf
, int nb_sectors
)
1510 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1513 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1514 int nb_sectors
, int dirty
)
1517 unsigned long val
, idx
, bit
;
1519 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1520 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1522 for (; start
<= end
; start
++) {
1523 idx
= start
/ (sizeof(unsigned long) * 8);
1524 bit
= start
% (sizeof(unsigned long) * 8);
1525 val
= bs
->dirty_bitmap
[idx
];
1527 if (!(val
& (1UL << bit
))) {
1532 if (val
& (1UL << bit
)) {
1534 val
&= ~(1UL << bit
);
1537 bs
->dirty_bitmap
[idx
] = val
;
1541 /* Return < 0 if error. Important errors are:
1542 -EIO generic I/O error (may happen for all errors)
1543 -ENOMEDIUM No media inserted.
1544 -EINVAL Invalid sector number or nb_sectors
1545 -EACCES Trying to write a read-only device
1547 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1548 const uint8_t *buf
, int nb_sectors
)
1550 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1553 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1554 void *buf
, int count1
)
1556 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1557 int len
, nb_sectors
, count
;
1562 /* first read to align to sector start */
1563 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1566 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1568 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1570 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1578 /* read the sectors "in place" */
1579 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1580 if (nb_sectors
> 0) {
1581 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1583 sector_num
+= nb_sectors
;
1584 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1589 /* add data from the last sector */
1591 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1593 memcpy(buf
, tmp_buf
, count
);
1598 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1599 const void *buf
, int count1
)
1601 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1602 int len
, nb_sectors
, count
;
1607 /* first write to align to sector start */
1608 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1611 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1613 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1615 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1616 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1625 /* write the sectors "in place" */
1626 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1627 if (nb_sectors
> 0) {
1628 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1630 sector_num
+= nb_sectors
;
1631 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1636 /* add data from the last sector */
1638 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1640 memcpy(tmp_buf
, buf
, count
);
1641 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1648 * Writes to the file and ensures that no writes are reordered across this
1649 * request (acts as a barrier)
1651 * Returns 0 on success, -errno in error cases.
1653 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
1654 const void *buf
, int count
)
1658 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
1663 /* No flush needed for cache modes that use O_DSYNC */
1664 if ((bs
->open_flags
& BDRV_O_CACHE_WB
) != 0) {
1671 static int coroutine_fn
bdrv_co_do_copy_on_readv(BlockDriverState
*bs
,
1672 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1674 /* Perform I/O through a temporary buffer so that users who scribble over
1675 * their read buffer while the operation is in progress do not end up
1676 * modifying the image file. This is critical for zero-copy guest I/O
1677 * where anything might happen inside guest memory.
1679 void *bounce_buffer
;
1681 BlockDriver
*drv
= bs
->drv
;
1683 QEMUIOVector bounce_qiov
;
1684 int64_t cluster_sector_num
;
1685 int cluster_nb_sectors
;
1689 /* Cover entire cluster so no additional backing file I/O is required when
1690 * allocating cluster in the image file.
1692 round_to_clusters(bs
, sector_num
, nb_sectors
,
1693 &cluster_sector_num
, &cluster_nb_sectors
);
1695 trace_bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
,
1696 cluster_sector_num
, cluster_nb_sectors
);
1698 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
1699 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
1700 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
1702 ret
= drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
1708 if (drv
->bdrv_co_write_zeroes
&&
1709 buffer_is_zero(bounce_buffer
, iov
.iov_len
)) {
1710 ret
= drv
->bdrv_co_write_zeroes(bs
, cluster_sector_num
,
1711 cluster_nb_sectors
);
1713 ret
= drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
1718 /* It might be okay to ignore write errors for guest requests. If this
1719 * is a deliberate copy-on-read then we don't want to ignore the error.
1720 * Simply report it in all cases.
1725 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
1726 qemu_iovec_from_buffer(qiov
, bounce_buffer
+ skip_bytes
,
1727 nb_sectors
* BDRV_SECTOR_SIZE
);
1730 qemu_vfree(bounce_buffer
);
1735 * Handle a read request in coroutine context
1737 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
1738 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
1739 BdrvRequestFlags flags
)
1741 BlockDriver
*drv
= bs
->drv
;
1742 BdrvTrackedRequest req
;
1748 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1752 /* throttling disk read I/O */
1753 if (bs
->io_limits_enabled
) {
1754 bdrv_io_limits_intercept(bs
, false, nb_sectors
);
1757 if (bs
->copy_on_read
) {
1758 flags
|= BDRV_REQ_COPY_ON_READ
;
1760 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1761 bs
->copy_on_read_in_flight
++;
1764 if (bs
->copy_on_read_in_flight
) {
1765 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1768 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, false);
1770 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1773 ret
= bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
1778 if (!ret
|| pnum
!= nb_sectors
) {
1779 ret
= bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
1784 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
1787 tracked_request_end(&req
);
1789 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1790 bs
->copy_on_read_in_flight
--;
1796 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
1797 int nb_sectors
, QEMUIOVector
*qiov
)
1799 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
1801 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
, 0);
1804 int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
1805 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1807 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
);
1809 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
,
1810 BDRV_REQ_COPY_ON_READ
);
1813 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
1814 int64_t sector_num
, int nb_sectors
)
1816 BlockDriver
*drv
= bs
->drv
;
1821 /* First try the efficient write zeroes operation */
1822 if (drv
->bdrv_co_write_zeroes
) {
1823 return drv
->bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
1826 /* Fall back to bounce buffer if write zeroes is unsupported */
1827 iov
.iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
;
1828 iov
.iov_base
= qemu_blockalign(bs
, iov
.iov_len
);
1829 memset(iov
.iov_base
, 0, iov
.iov_len
);
1830 qemu_iovec_init_external(&qiov
, &iov
, 1);
1832 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, &qiov
);
1834 qemu_vfree(iov
.iov_base
);
1839 * Handle a write request in coroutine context
1841 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
1842 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
1843 BdrvRequestFlags flags
)
1845 BlockDriver
*drv
= bs
->drv
;
1846 BdrvTrackedRequest req
;
1852 if (bs
->read_only
) {
1855 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1859 /* throttling disk write I/O */
1860 if (bs
->io_limits_enabled
) {
1861 bdrv_io_limits_intercept(bs
, true, nb_sectors
);
1864 if (bs
->copy_on_read_in_flight
) {
1865 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1868 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, true);
1870 if (flags
& BDRV_REQ_ZERO_WRITE
) {
1871 ret
= bdrv_co_do_write_zeroes(bs
, sector_num
, nb_sectors
);
1873 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
1876 if (bs
->dirty_bitmap
) {
1877 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1880 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
1881 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
1884 tracked_request_end(&req
);
1889 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
1890 int nb_sectors
, QEMUIOVector
*qiov
)
1892 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
1894 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
, 0);
1897 int coroutine_fn
bdrv_co_write_zeroes(BlockDriverState
*bs
,
1898 int64_t sector_num
, int nb_sectors
)
1900 trace_bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
1902 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, NULL
,
1903 BDRV_REQ_ZERO_WRITE
);
1907 * Truncate file to 'offset' bytes (needed only for file protocols)
1909 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
1911 BlockDriver
*drv
= bs
->drv
;
1915 if (!drv
->bdrv_truncate
)
1919 if (bdrv_in_use(bs
))
1921 ret
= drv
->bdrv_truncate(bs
, offset
);
1923 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
1924 bdrv_dev_resize_cb(bs
);
1930 * Length of a allocated file in bytes. Sparse files are counted by actual
1931 * allocated space. Return < 0 if error or unknown.
1933 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
1935 BlockDriver
*drv
= bs
->drv
;
1939 if (drv
->bdrv_get_allocated_file_size
) {
1940 return drv
->bdrv_get_allocated_file_size(bs
);
1943 return bdrv_get_allocated_file_size(bs
->file
);
1949 * Length of a file in bytes. Return < 0 if error or unknown.
1951 int64_t bdrv_getlength(BlockDriverState
*bs
)
1953 BlockDriver
*drv
= bs
->drv
;
1957 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
1958 if (drv
->bdrv_getlength
) {
1959 return drv
->bdrv_getlength(bs
);
1962 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
1965 /* return 0 as number of sectors if no device present or error */
1966 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
1969 length
= bdrv_getlength(bs
);
1973 length
= length
>> BDRV_SECTOR_BITS
;
1974 *nb_sectors_ptr
= length
;
1978 uint8_t boot_ind
; /* 0x80 - active */
1979 uint8_t head
; /* starting head */
1980 uint8_t sector
; /* starting sector */
1981 uint8_t cyl
; /* starting cylinder */
1982 uint8_t sys_ind
; /* What partition type */
1983 uint8_t end_head
; /* end head */
1984 uint8_t end_sector
; /* end sector */
1985 uint8_t end_cyl
; /* end cylinder */
1986 uint32_t start_sect
; /* starting sector counting from 0 */
1987 uint32_t nr_sects
; /* nr of sectors in partition */
1990 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1991 static int guess_disk_lchs(BlockDriverState
*bs
,
1992 int *pcylinders
, int *pheads
, int *psectors
)
1994 uint8_t buf
[BDRV_SECTOR_SIZE
];
1995 int ret
, i
, heads
, sectors
, cylinders
;
1996 struct partition
*p
;
1998 uint64_t nb_sectors
;
2001 bdrv_get_geometry(bs
, &nb_sectors
);
2004 * The function will be invoked during startup not only in sync I/O mode,
2005 * but also in async I/O mode. So the I/O throttling function has to
2006 * be disabled temporarily here, not permanently.
2008 enabled
= bs
->io_limits_enabled
;
2009 bs
->io_limits_enabled
= false;
2010 ret
= bdrv_read(bs
, 0, buf
, 1);
2011 bs
->io_limits_enabled
= enabled
;
2014 /* test msdos magic */
2015 if (buf
[510] != 0x55 || buf
[511] != 0xaa)
2017 for(i
= 0; i
< 4; i
++) {
2018 p
= ((struct partition
*)(buf
+ 0x1be)) + i
;
2019 nr_sects
= le32_to_cpu(p
->nr_sects
);
2020 if (nr_sects
&& p
->end_head
) {
2021 /* We make the assumption that the partition terminates on
2022 a cylinder boundary */
2023 heads
= p
->end_head
+ 1;
2024 sectors
= p
->end_sector
& 63;
2027 cylinders
= nb_sectors
/ (heads
* sectors
);
2028 if (cylinders
< 1 || cylinders
> 16383)
2031 *psectors
= sectors
;
2032 *pcylinders
= cylinders
;
2034 printf("guessed geometry: LCHS=%d %d %d\n",
2035 cylinders
, heads
, sectors
);
2043 void bdrv_guess_geometry(BlockDriverState
*bs
, int *pcyls
, int *pheads
, int *psecs
)
2045 int translation
, lba_detected
= 0;
2046 int cylinders
, heads
, secs
;
2047 uint64_t nb_sectors
;
2049 /* if a geometry hint is available, use it */
2050 bdrv_get_geometry(bs
, &nb_sectors
);
2051 bdrv_get_geometry_hint(bs
, &cylinders
, &heads
, &secs
);
2052 translation
= bdrv_get_translation_hint(bs
);
2053 if (cylinders
!= 0) {
2058 if (guess_disk_lchs(bs
, &cylinders
, &heads
, &secs
) == 0) {
2060 /* if heads > 16, it means that a BIOS LBA
2061 translation was active, so the default
2062 hardware geometry is OK */
2064 goto default_geometry
;
2069 /* disable any translation to be in sync with
2070 the logical geometry */
2071 if (translation
== BIOS_ATA_TRANSLATION_AUTO
) {
2072 bdrv_set_translation_hint(bs
,
2073 BIOS_ATA_TRANSLATION_NONE
);
2078 /* if no geometry, use a standard physical disk geometry */
2079 cylinders
= nb_sectors
/ (16 * 63);
2081 if (cylinders
> 16383)
2083 else if (cylinders
< 2)
2088 if ((lba_detected
== 1) && (translation
== BIOS_ATA_TRANSLATION_AUTO
)) {
2089 if ((*pcyls
* *pheads
) <= 131072) {
2090 bdrv_set_translation_hint(bs
,
2091 BIOS_ATA_TRANSLATION_LARGE
);
2093 bdrv_set_translation_hint(bs
,
2094 BIOS_ATA_TRANSLATION_LBA
);
2098 bdrv_set_geometry_hint(bs
, *pcyls
, *pheads
, *psecs
);
2102 void bdrv_set_geometry_hint(BlockDriverState
*bs
,
2103 int cyls
, int heads
, int secs
)
2110 void bdrv_set_translation_hint(BlockDriverState
*bs
, int translation
)
2112 bs
->translation
= translation
;
2115 void bdrv_get_geometry_hint(BlockDriverState
*bs
,
2116 int *pcyls
, int *pheads
, int *psecs
)
2119 *pheads
= bs
->heads
;
2123 /* throttling disk io limits */
2124 void bdrv_set_io_limits(BlockDriverState
*bs
,
2125 BlockIOLimit
*io_limits
)
2127 bs
->io_limits
= *io_limits
;
2128 bs
->io_limits_enabled
= bdrv_io_limits_enabled(bs
);
2131 /* Recognize floppy formats */
2132 typedef struct FDFormat
{
2140 static const FDFormat fd_formats
[] = {
2141 /* First entry is default format */
2142 /* 1.44 MB 3"1/2 floppy disks */
2143 { FDRIVE_DRV_144
, 18, 80, 1, FDRIVE_RATE_500K
, },
2144 { FDRIVE_DRV_144
, 20, 80, 1, FDRIVE_RATE_500K
, },
2145 { FDRIVE_DRV_144
, 21, 80, 1, FDRIVE_RATE_500K
, },
2146 { FDRIVE_DRV_144
, 21, 82, 1, FDRIVE_RATE_500K
, },
2147 { FDRIVE_DRV_144
, 21, 83, 1, FDRIVE_RATE_500K
, },
2148 { FDRIVE_DRV_144
, 22, 80, 1, FDRIVE_RATE_500K
, },
2149 { FDRIVE_DRV_144
, 23, 80, 1, FDRIVE_RATE_500K
, },
2150 { FDRIVE_DRV_144
, 24, 80, 1, FDRIVE_RATE_500K
, },
2151 /* 2.88 MB 3"1/2 floppy disks */
2152 { FDRIVE_DRV_288
, 36, 80, 1, FDRIVE_RATE_1M
, },
2153 { FDRIVE_DRV_288
, 39, 80, 1, FDRIVE_RATE_1M
, },
2154 { FDRIVE_DRV_288
, 40, 80, 1, FDRIVE_RATE_1M
, },
2155 { FDRIVE_DRV_288
, 44, 80, 1, FDRIVE_RATE_1M
, },
2156 { FDRIVE_DRV_288
, 48, 80, 1, FDRIVE_RATE_1M
, },
2157 /* 720 kB 3"1/2 floppy disks */
2158 { FDRIVE_DRV_144
, 9, 80, 1, FDRIVE_RATE_250K
, },
2159 { FDRIVE_DRV_144
, 10, 80, 1, FDRIVE_RATE_250K
, },
2160 { FDRIVE_DRV_144
, 10, 82, 1, FDRIVE_RATE_250K
, },
2161 { FDRIVE_DRV_144
, 10, 83, 1, FDRIVE_RATE_250K
, },
2162 { FDRIVE_DRV_144
, 13, 80, 1, FDRIVE_RATE_250K
, },
2163 { FDRIVE_DRV_144
, 14, 80, 1, FDRIVE_RATE_250K
, },
2164 /* 1.2 MB 5"1/4 floppy disks */
2165 { FDRIVE_DRV_120
, 15, 80, 1, FDRIVE_RATE_500K
, },
2166 { FDRIVE_DRV_120
, 18, 80, 1, FDRIVE_RATE_500K
, },
2167 { FDRIVE_DRV_120
, 18, 82, 1, FDRIVE_RATE_500K
, },
2168 { FDRIVE_DRV_120
, 18, 83, 1, FDRIVE_RATE_500K
, },
2169 { FDRIVE_DRV_120
, 20, 80, 1, FDRIVE_RATE_500K
, },
2170 /* 720 kB 5"1/4 floppy disks */
2171 { FDRIVE_DRV_120
, 9, 80, 1, FDRIVE_RATE_250K
, },
2172 { FDRIVE_DRV_120
, 11, 80, 1, FDRIVE_RATE_250K
, },
2173 /* 360 kB 5"1/4 floppy disks */
2174 { FDRIVE_DRV_120
, 9, 40, 1, FDRIVE_RATE_300K
, },
2175 { FDRIVE_DRV_120
, 9, 40, 0, FDRIVE_RATE_300K
, },
2176 { FDRIVE_DRV_120
, 10, 41, 1, FDRIVE_RATE_300K
, },
2177 { FDRIVE_DRV_120
, 10, 42, 1, FDRIVE_RATE_300K
, },
2178 /* 320 kB 5"1/4 floppy disks */
2179 { FDRIVE_DRV_120
, 8, 40, 1, FDRIVE_RATE_250K
, },
2180 { FDRIVE_DRV_120
, 8, 40, 0, FDRIVE_RATE_250K
, },
2181 /* 360 kB must match 5"1/4 better than 3"1/2... */
2182 { FDRIVE_DRV_144
, 9, 80, 0, FDRIVE_RATE_250K
, },
2184 { FDRIVE_DRV_NONE
, -1, -1, 0, 0, },
2187 void bdrv_get_floppy_geometry_hint(BlockDriverState
*bs
, int *nb_heads
,
2188 int *max_track
, int *last_sect
,
2189 FDriveType drive_in
, FDriveType
*drive
,
2192 const FDFormat
*parse
;
2193 uint64_t nb_sectors
, size
;
2194 int i
, first_match
, match
;
2196 bdrv_get_geometry_hint(bs
, nb_heads
, max_track
, last_sect
);
2197 if (*nb_heads
!= 0 && *max_track
!= 0 && *last_sect
!= 0) {
2198 /* User defined disk */
2199 *rate
= FDRIVE_RATE_500K
;
2201 bdrv_get_geometry(bs
, &nb_sectors
);
2204 for (i
= 0; ; i
++) {
2205 parse
= &fd_formats
[i
];
2206 if (parse
->drive
== FDRIVE_DRV_NONE
) {
2209 if (drive_in
== parse
->drive
||
2210 drive_in
== FDRIVE_DRV_NONE
) {
2211 size
= (parse
->max_head
+ 1) * parse
->max_track
*
2213 if (nb_sectors
== size
) {
2217 if (first_match
== -1) {
2223 if (first_match
== -1) {
2226 match
= first_match
;
2228 parse
= &fd_formats
[match
];
2230 *nb_heads
= parse
->max_head
+ 1;
2231 *max_track
= parse
->max_track
;
2232 *last_sect
= parse
->last_sect
;
2233 *drive
= parse
->drive
;
2234 *rate
= parse
->rate
;
2238 int bdrv_get_translation_hint(BlockDriverState
*bs
)
2240 return bs
->translation
;
2243 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
2244 BlockErrorAction on_write_error
)
2246 bs
->on_read_error
= on_read_error
;
2247 bs
->on_write_error
= on_write_error
;
2250 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
2252 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
2255 int bdrv_is_read_only(BlockDriverState
*bs
)
2257 return bs
->read_only
;
2260 int bdrv_is_sg(BlockDriverState
*bs
)
2265 int bdrv_enable_write_cache(BlockDriverState
*bs
)
2267 return bs
->enable_write_cache
;
2270 int bdrv_is_encrypted(BlockDriverState
*bs
)
2272 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2274 return bs
->encrypted
;
2277 int bdrv_key_required(BlockDriverState
*bs
)
2279 BlockDriverState
*backing_hd
= bs
->backing_hd
;
2281 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
2283 return (bs
->encrypted
&& !bs
->valid_key
);
2286 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
2289 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
2290 ret
= bdrv_set_key(bs
->backing_hd
, key
);
2296 if (!bs
->encrypted
) {
2298 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
2301 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
2304 } else if (!bs
->valid_key
) {
2306 /* call the change callback now, we skipped it on open */
2307 bdrv_dev_change_media_cb(bs
, true);
2312 void bdrv_get_format(BlockDriverState
*bs
, char *buf
, int buf_size
)
2317 pstrcpy(buf
, buf_size
, bs
->drv
->format_name
);
2321 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
2326 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
2327 it(opaque
, drv
->format_name
);
2331 BlockDriverState
*bdrv_find(const char *name
)
2333 BlockDriverState
*bs
;
2335 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2336 if (!strcmp(name
, bs
->device_name
)) {
2343 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
2346 return QTAILQ_FIRST(&bdrv_states
);
2348 return QTAILQ_NEXT(bs
, list
);
2351 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
2353 BlockDriverState
*bs
;
2355 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2360 const char *bdrv_get_device_name(BlockDriverState
*bs
)
2362 return bs
->device_name
;
2365 void bdrv_flush_all(void)
2367 BlockDriverState
*bs
;
2369 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2374 int bdrv_has_zero_init(BlockDriverState
*bs
)
2378 if (bs
->drv
->bdrv_has_zero_init
) {
2379 return bs
->drv
->bdrv_has_zero_init(bs
);
2385 typedef struct BdrvCoIsAllocatedData
{
2386 BlockDriverState
*bs
;
2392 } BdrvCoIsAllocatedData
;
2395 * Returns true iff the specified sector is present in the disk image. Drivers
2396 * not implementing the functionality are assumed to not support backing files,
2397 * hence all their sectors are reported as allocated.
2399 * If 'sector_num' is beyond the end of the disk image the return value is 0
2400 * and 'pnum' is set to 0.
2402 * 'pnum' is set to the number of sectors (including and immediately following
2403 * the specified sector) that are known to be in the same
2404 * allocated/unallocated state.
2406 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2407 * beyond the end of the disk image it will be clamped.
2409 int coroutine_fn
bdrv_co_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
2410 int nb_sectors
, int *pnum
)
2414 if (sector_num
>= bs
->total_sectors
) {
2419 n
= bs
->total_sectors
- sector_num
;
2420 if (n
< nb_sectors
) {
2424 if (!bs
->drv
->bdrv_co_is_allocated
) {
2429 return bs
->drv
->bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
2432 /* Coroutine wrapper for bdrv_is_allocated() */
2433 static void coroutine_fn
bdrv_is_allocated_co_entry(void *opaque
)
2435 BdrvCoIsAllocatedData
*data
= opaque
;
2436 BlockDriverState
*bs
= data
->bs
;
2438 data
->ret
= bdrv_co_is_allocated(bs
, data
->sector_num
, data
->nb_sectors
,
2444 * Synchronous wrapper around bdrv_co_is_allocated().
2446 * See bdrv_co_is_allocated() for details.
2448 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
2452 BdrvCoIsAllocatedData data
= {
2454 .sector_num
= sector_num
,
2455 .nb_sectors
= nb_sectors
,
2460 co
= qemu_coroutine_create(bdrv_is_allocated_co_entry
);
2461 qemu_coroutine_enter(co
, &data
);
2462 while (!data
.done
) {
2468 BlockInfoList
*qmp_query_block(Error
**errp
)
2470 BlockInfoList
*head
= NULL
, *cur_item
= NULL
;
2471 BlockDriverState
*bs
;
2473 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2474 BlockInfoList
*info
= g_malloc0(sizeof(*info
));
2476 info
->value
= g_malloc0(sizeof(*info
->value
));
2477 info
->value
->device
= g_strdup(bs
->device_name
);
2478 info
->value
->type
= g_strdup("unknown");
2479 info
->value
->locked
= bdrv_dev_is_medium_locked(bs
);
2480 info
->value
->removable
= bdrv_dev_has_removable_media(bs
);
2482 if (bdrv_dev_has_removable_media(bs
)) {
2483 info
->value
->has_tray_open
= true;
2484 info
->value
->tray_open
= bdrv_dev_is_tray_open(bs
);
2487 if (bdrv_iostatus_is_enabled(bs
)) {
2488 info
->value
->has_io_status
= true;
2489 info
->value
->io_status
= bs
->iostatus
;
2493 info
->value
->has_inserted
= true;
2494 info
->value
->inserted
= g_malloc0(sizeof(*info
->value
->inserted
));
2495 info
->value
->inserted
->file
= g_strdup(bs
->filename
);
2496 info
->value
->inserted
->ro
= bs
->read_only
;
2497 info
->value
->inserted
->drv
= g_strdup(bs
->drv
->format_name
);
2498 info
->value
->inserted
->encrypted
= bs
->encrypted
;
2499 if (bs
->backing_file
[0]) {
2500 info
->value
->inserted
->has_backing_file
= true;
2501 info
->value
->inserted
->backing_file
= g_strdup(bs
->backing_file
);
2504 if (bs
->io_limits_enabled
) {
2505 info
->value
->inserted
->bps
=
2506 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2507 info
->value
->inserted
->bps_rd
=
2508 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_READ
];
2509 info
->value
->inserted
->bps_wr
=
2510 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_WRITE
];
2511 info
->value
->inserted
->iops
=
2512 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2513 info
->value
->inserted
->iops_rd
=
2514 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_READ
];
2515 info
->value
->inserted
->iops_wr
=
2516 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_WRITE
];
2520 /* XXX: waiting for the qapi to support GSList */
2522 head
= cur_item
= info
;
2524 cur_item
->next
= info
;
2532 /* Consider exposing this as a full fledged QMP command */
2533 static BlockStats
*qmp_query_blockstat(const BlockDriverState
*bs
, Error
**errp
)
2537 s
= g_malloc0(sizeof(*s
));
2539 if (bs
->device_name
[0]) {
2540 s
->has_device
= true;
2541 s
->device
= g_strdup(bs
->device_name
);
2544 s
->stats
= g_malloc0(sizeof(*s
->stats
));
2545 s
->stats
->rd_bytes
= bs
->nr_bytes
[BDRV_ACCT_READ
];
2546 s
->stats
->wr_bytes
= bs
->nr_bytes
[BDRV_ACCT_WRITE
];
2547 s
->stats
->rd_operations
= bs
->nr_ops
[BDRV_ACCT_READ
];
2548 s
->stats
->wr_operations
= bs
->nr_ops
[BDRV_ACCT_WRITE
];
2549 s
->stats
->wr_highest_offset
= bs
->wr_highest_sector
* BDRV_SECTOR_SIZE
;
2550 s
->stats
->flush_operations
= bs
->nr_ops
[BDRV_ACCT_FLUSH
];
2551 s
->stats
->wr_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_WRITE
];
2552 s
->stats
->rd_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_READ
];
2553 s
->stats
->flush_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_FLUSH
];
2556 s
->has_parent
= true;
2557 s
->parent
= qmp_query_blockstat(bs
->file
, NULL
);
2563 BlockStatsList
*qmp_query_blockstats(Error
**errp
)
2565 BlockStatsList
*head
= NULL
, *cur_item
= NULL
;
2566 BlockDriverState
*bs
;
2568 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2569 BlockStatsList
*info
= g_malloc0(sizeof(*info
));
2570 info
->value
= qmp_query_blockstat(bs
, NULL
);
2572 /* XXX: waiting for the qapi to support GSList */
2574 head
= cur_item
= info
;
2576 cur_item
->next
= info
;
2584 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
2586 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2587 return bs
->backing_file
;
2588 else if (bs
->encrypted
)
2589 return bs
->filename
;
2594 void bdrv_get_backing_filename(BlockDriverState
*bs
,
2595 char *filename
, int filename_size
)
2597 pstrcpy(filename
, filename_size
, bs
->backing_file
);
2600 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
2601 const uint8_t *buf
, int nb_sectors
)
2603 BlockDriver
*drv
= bs
->drv
;
2606 if (!drv
->bdrv_write_compressed
)
2608 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
2611 if (bs
->dirty_bitmap
) {
2612 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2615 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
2618 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
2620 BlockDriver
*drv
= bs
->drv
;
2623 if (!drv
->bdrv_get_info
)
2625 memset(bdi
, 0, sizeof(*bdi
));
2626 return drv
->bdrv_get_info(bs
, bdi
);
2629 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
2630 int64_t pos
, int size
)
2632 BlockDriver
*drv
= bs
->drv
;
2635 if (drv
->bdrv_save_vmstate
)
2636 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
2638 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
2642 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
2643 int64_t pos
, int size
)
2645 BlockDriver
*drv
= bs
->drv
;
2648 if (drv
->bdrv_load_vmstate
)
2649 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
2651 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2655 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2657 BlockDriver
*drv
= bs
->drv
;
2659 if (!drv
|| !drv
->bdrv_debug_event
) {
2663 return drv
->bdrv_debug_event(bs
, event
);
2667 /**************************************************************/
2668 /* handling of snapshots */
2670 int bdrv_can_snapshot(BlockDriverState
*bs
)
2672 BlockDriver
*drv
= bs
->drv
;
2673 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2677 if (!drv
->bdrv_snapshot_create
) {
2678 if (bs
->file
!= NULL
) {
2679 return bdrv_can_snapshot(bs
->file
);
2687 int bdrv_is_snapshot(BlockDriverState
*bs
)
2689 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2692 BlockDriverState
*bdrv_snapshots(void)
2694 BlockDriverState
*bs
;
2697 return bs_snapshots
;
2701 while ((bs
= bdrv_next(bs
))) {
2702 if (bdrv_can_snapshot(bs
)) {
2710 int bdrv_snapshot_create(BlockDriverState
*bs
,
2711 QEMUSnapshotInfo
*sn_info
)
2713 BlockDriver
*drv
= bs
->drv
;
2716 if (drv
->bdrv_snapshot_create
)
2717 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2719 return bdrv_snapshot_create(bs
->file
, sn_info
);
2723 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2724 const char *snapshot_id
)
2726 BlockDriver
*drv
= bs
->drv
;
2731 if (drv
->bdrv_snapshot_goto
)
2732 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2735 drv
->bdrv_close(bs
);
2736 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2737 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2739 bdrv_delete(bs
->file
);
2749 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2751 BlockDriver
*drv
= bs
->drv
;
2754 if (drv
->bdrv_snapshot_delete
)
2755 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2757 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2761 int bdrv_snapshot_list(BlockDriverState
*bs
,
2762 QEMUSnapshotInfo
**psn_info
)
2764 BlockDriver
*drv
= bs
->drv
;
2767 if (drv
->bdrv_snapshot_list
)
2768 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2770 return bdrv_snapshot_list(bs
->file
, psn_info
);
2774 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2775 const char *snapshot_name
)
2777 BlockDriver
*drv
= bs
->drv
;
2781 if (!bs
->read_only
) {
2784 if (drv
->bdrv_snapshot_load_tmp
) {
2785 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2790 BlockDriverState
*bdrv_find_backing_image(BlockDriverState
*bs
,
2791 const char *backing_file
)
2797 if (bs
->backing_hd
) {
2798 if (strcmp(bs
->backing_file
, backing_file
) == 0) {
2799 return bs
->backing_hd
;
2801 return bdrv_find_backing_image(bs
->backing_hd
, backing_file
);
2808 #define NB_SUFFIXES 4
2810 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
2812 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
2817 snprintf(buf
, buf_size
, "%" PRId64
, size
);
2820 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
2821 if (size
< (10 * base
)) {
2822 snprintf(buf
, buf_size
, "%0.1f%c",
2823 (double)size
/ base
,
2826 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
2827 snprintf(buf
, buf_size
, "%" PRId64
"%c",
2828 ((size
+ (base
>> 1)) / base
),
2838 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
2840 char buf1
[128], date_buf
[128], clock_buf
[128];
2850 snprintf(buf
, buf_size
,
2851 "%-10s%-20s%7s%20s%15s",
2852 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2856 ptm
= localtime(&ti
);
2857 strftime(date_buf
, sizeof(date_buf
),
2858 "%Y-%m-%d %H:%M:%S", ptm
);
2860 localtime_r(&ti
, &tm
);
2861 strftime(date_buf
, sizeof(date_buf
),
2862 "%Y-%m-%d %H:%M:%S", &tm
);
2864 secs
= sn
->vm_clock_nsec
/ 1000000000;
2865 snprintf(clock_buf
, sizeof(clock_buf
),
2866 "%02d:%02d:%02d.%03d",
2868 (int)((secs
/ 60) % 60),
2870 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
2871 snprintf(buf
, buf_size
,
2872 "%-10s%-20s%7s%20s%15s",
2873 sn
->id_str
, sn
->name
,
2874 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
2881 /**************************************************************/
2884 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
2885 QEMUIOVector
*qiov
, int nb_sectors
,
2886 BlockDriverCompletionFunc
*cb
, void *opaque
)
2888 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
2890 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2894 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
2895 QEMUIOVector
*qiov
, int nb_sectors
,
2896 BlockDriverCompletionFunc
*cb
, void *opaque
)
2898 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
2900 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2905 typedef struct MultiwriteCB
{
2910 BlockDriverCompletionFunc
*cb
;
2912 QEMUIOVector
*free_qiov
;
2916 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
2920 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2921 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
2922 if (mcb
->callbacks
[i
].free_qiov
) {
2923 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
2925 g_free(mcb
->callbacks
[i
].free_qiov
);
2929 static void multiwrite_cb(void *opaque
, int ret
)
2931 MultiwriteCB
*mcb
= opaque
;
2933 trace_multiwrite_cb(mcb
, ret
);
2935 if (ret
< 0 && !mcb
->error
) {
2939 mcb
->num_requests
--;
2940 if (mcb
->num_requests
== 0) {
2941 multiwrite_user_cb(mcb
);
2946 static int multiwrite_req_compare(const void *a
, const void *b
)
2948 const BlockRequest
*req1
= a
, *req2
= b
;
2951 * Note that we can't simply subtract req2->sector from req1->sector
2952 * here as that could overflow the return value.
2954 if (req1
->sector
> req2
->sector
) {
2956 } else if (req1
->sector
< req2
->sector
) {
2964 * Takes a bunch of requests and tries to merge them. Returns the number of
2965 * requests that remain after merging.
2967 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
2968 int num_reqs
, MultiwriteCB
*mcb
)
2972 // Sort requests by start sector
2973 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
2975 // Check if adjacent requests touch the same clusters. If so, combine them,
2976 // filling up gaps with zero sectors.
2978 for (i
= 1; i
< num_reqs
; i
++) {
2980 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
2982 // Handle exactly sequential writes and overlapping writes.
2983 if (reqs
[i
].sector
<= oldreq_last
) {
2987 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
2993 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
2994 qemu_iovec_init(qiov
,
2995 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
2997 // Add the first request to the merged one. If the requests are
2998 // overlapping, drop the last sectors of the first request.
2999 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
3000 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, size
);
3002 // We should need to add any zeros between the two requests
3003 assert (reqs
[i
].sector
<= oldreq_last
);
3005 // Add the second request
3006 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, reqs
[i
].qiov
->size
);
3008 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
3009 reqs
[outidx
].qiov
= qiov
;
3011 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
3014 reqs
[outidx
].sector
= reqs
[i
].sector
;
3015 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
3016 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
3024 * Submit multiple AIO write requests at once.
3026 * On success, the function returns 0 and all requests in the reqs array have
3027 * been submitted. In error case this function returns -1, and any of the
3028 * requests may or may not be submitted yet. In particular, this means that the
3029 * callback will be called for some of the requests, for others it won't. The
3030 * caller must check the error field of the BlockRequest to wait for the right
3031 * callbacks (if error != 0, no callback will be called).
3033 * The implementation may modify the contents of the reqs array, e.g. to merge
3034 * requests. However, the fields opaque and error are left unmodified as they
3035 * are used to signal failure for a single request to the caller.
3037 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
3042 /* don't submit writes if we don't have a medium */
3043 if (bs
->drv
== NULL
) {
3044 for (i
= 0; i
< num_reqs
; i
++) {
3045 reqs
[i
].error
= -ENOMEDIUM
;
3050 if (num_reqs
== 0) {
3054 // Create MultiwriteCB structure
3055 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
3056 mcb
->num_requests
= 0;
3057 mcb
->num_callbacks
= num_reqs
;
3059 for (i
= 0; i
< num_reqs
; i
++) {
3060 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
3061 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
3064 // Check for mergable requests
3065 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
3067 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
3069 /* Run the aio requests. */
3070 mcb
->num_requests
= num_reqs
;
3071 for (i
= 0; i
< num_reqs
; i
++) {
3072 bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
3073 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
3079 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
3081 acb
->pool
->cancel(acb
);
3084 /* block I/O throttling */
3085 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
3086 bool is_write
, double elapsed_time
, uint64_t *wait
)
3088 uint64_t bps_limit
= 0;
3089 double bytes_limit
, bytes_base
, bytes_res
;
3090 double slice_time
, wait_time
;
3092 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3093 bps_limit
= bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
3094 } else if (bs
->io_limits
.bps
[is_write
]) {
3095 bps_limit
= bs
->io_limits
.bps
[is_write
];
3104 slice_time
= bs
->slice_end
- bs
->slice_start
;
3105 slice_time
/= (NANOSECONDS_PER_SECOND
);
3106 bytes_limit
= bps_limit
* slice_time
;
3107 bytes_base
= bs
->nr_bytes
[is_write
] - bs
->io_base
.bytes
[is_write
];
3108 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3109 bytes_base
+= bs
->nr_bytes
[!is_write
] - bs
->io_base
.bytes
[!is_write
];
3112 /* bytes_base: the bytes of data which have been read/written; and
3113 * it is obtained from the history statistic info.
3114 * bytes_res: the remaining bytes of data which need to be read/written.
3115 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3116 * the total time for completing reading/writting all data.
3118 bytes_res
= (unsigned) nb_sectors
* BDRV_SECTOR_SIZE
;
3120 if (bytes_base
+ bytes_res
<= bytes_limit
) {
3128 /* Calc approx time to dispatch */
3129 wait_time
= (bytes_base
+ bytes_res
) / bps_limit
- elapsed_time
;
3131 /* When the I/O rate at runtime exceeds the limits,
3132 * bs->slice_end need to be extended in order that the current statistic
3133 * info can be kept until the timer fire, so it is increased and tuned
3134 * based on the result of experiment.
3136 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3137 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3139 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3145 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
3146 double elapsed_time
, uint64_t *wait
)
3148 uint64_t iops_limit
= 0;
3149 double ios_limit
, ios_base
;
3150 double slice_time
, wait_time
;
3152 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3153 iops_limit
= bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
3154 } else if (bs
->io_limits
.iops
[is_write
]) {
3155 iops_limit
= bs
->io_limits
.iops
[is_write
];
3164 slice_time
= bs
->slice_end
- bs
->slice_start
;
3165 slice_time
/= (NANOSECONDS_PER_SECOND
);
3166 ios_limit
= iops_limit
* slice_time
;
3167 ios_base
= bs
->nr_ops
[is_write
] - bs
->io_base
.ios
[is_write
];
3168 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3169 ios_base
+= bs
->nr_ops
[!is_write
] - bs
->io_base
.ios
[!is_write
];
3172 if (ios_base
+ 1 <= ios_limit
) {
3180 /* Calc approx time to dispatch */
3181 wait_time
= (ios_base
+ 1) / iops_limit
;
3182 if (wait_time
> elapsed_time
) {
3183 wait_time
= wait_time
- elapsed_time
;
3188 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3189 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3191 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3197 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
3198 bool is_write
, int64_t *wait
)
3200 int64_t now
, max_wait
;
3201 uint64_t bps_wait
= 0, iops_wait
= 0;
3202 double elapsed_time
;
3203 int bps_ret
, iops_ret
;
3205 now
= qemu_get_clock_ns(vm_clock
);
3206 if ((bs
->slice_start
< now
)
3207 && (bs
->slice_end
> now
)) {
3208 bs
->slice_end
= now
+ bs
->slice_time
;
3210 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
3211 bs
->slice_start
= now
;
3212 bs
->slice_end
= now
+ bs
->slice_time
;
3214 bs
->io_base
.bytes
[is_write
] = bs
->nr_bytes
[is_write
];
3215 bs
->io_base
.bytes
[!is_write
] = bs
->nr_bytes
[!is_write
];
3217 bs
->io_base
.ios
[is_write
] = bs
->nr_ops
[is_write
];
3218 bs
->io_base
.ios
[!is_write
] = bs
->nr_ops
[!is_write
];
3221 elapsed_time
= now
- bs
->slice_start
;
3222 elapsed_time
/= (NANOSECONDS_PER_SECOND
);
3224 bps_ret
= bdrv_exceed_bps_limits(bs
, nb_sectors
,
3225 is_write
, elapsed_time
, &bps_wait
);
3226 iops_ret
= bdrv_exceed_iops_limits(bs
, is_write
,
3227 elapsed_time
, &iops_wait
);
3228 if (bps_ret
|| iops_ret
) {
3229 max_wait
= bps_wait
> iops_wait
? bps_wait
: iops_wait
;
3234 now
= qemu_get_clock_ns(vm_clock
);
3235 if (bs
->slice_end
< now
+ max_wait
) {
3236 bs
->slice_end
= now
+ max_wait
;
3249 /**************************************************************/
3250 /* async block device emulation */
3252 typedef struct BlockDriverAIOCBSync
{
3253 BlockDriverAIOCB common
;
3256 /* vector translation state */
3260 } BlockDriverAIOCBSync
;
3262 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
3264 BlockDriverAIOCBSync
*acb
=
3265 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
3266 qemu_bh_delete(acb
->bh
);
3268 qemu_aio_release(acb
);
3271 static AIOPool bdrv_em_aio_pool
= {
3272 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
3273 .cancel
= bdrv_aio_cancel_em
,
3276 static void bdrv_aio_bh_cb(void *opaque
)
3278 BlockDriverAIOCBSync
*acb
= opaque
;
3281 qemu_iovec_from_buffer(acb
->qiov
, acb
->bounce
, acb
->qiov
->size
);
3282 qemu_vfree(acb
->bounce
);
3283 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
3284 qemu_bh_delete(acb
->bh
);
3286 qemu_aio_release(acb
);
3289 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
3293 BlockDriverCompletionFunc
*cb
,
3298 BlockDriverAIOCBSync
*acb
;
3300 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
3301 acb
->is_write
= is_write
;
3303 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
3304 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
3307 qemu_iovec_to_buffer(acb
->qiov
, acb
->bounce
);
3308 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3310 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3313 qemu_bh_schedule(acb
->bh
);
3315 return &acb
->common
;
3318 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
3319 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3320 BlockDriverCompletionFunc
*cb
, void *opaque
)
3322 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
3325 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
3326 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3327 BlockDriverCompletionFunc
*cb
, void *opaque
)
3329 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
3333 typedef struct BlockDriverAIOCBCoroutine
{
3334 BlockDriverAIOCB common
;
3338 } BlockDriverAIOCBCoroutine
;
3340 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
3345 static AIOPool bdrv_em_co_aio_pool
= {
3346 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
3347 .cancel
= bdrv_aio_co_cancel_em
,
3350 static void bdrv_co_em_bh(void *opaque
)
3352 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3354 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
3355 qemu_bh_delete(acb
->bh
);
3356 qemu_aio_release(acb
);
3359 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3360 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
3362 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3363 BlockDriverState
*bs
= acb
->common
.bs
;
3365 if (!acb
->is_write
) {
3366 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
3367 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3369 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
3370 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3373 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3374 qemu_bh_schedule(acb
->bh
);
3377 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
3381 BlockDriverCompletionFunc
*cb
,
3386 BlockDriverAIOCBCoroutine
*acb
;
3388 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3389 acb
->req
.sector
= sector_num
;
3390 acb
->req
.nb_sectors
= nb_sectors
;
3391 acb
->req
.qiov
= qiov
;
3392 acb
->is_write
= is_write
;
3394 co
= qemu_coroutine_create(bdrv_co_do_rw
);
3395 qemu_coroutine_enter(co
, acb
);
3397 return &acb
->common
;
3400 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
3402 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3403 BlockDriverState
*bs
= acb
->common
.bs
;
3405 acb
->req
.error
= bdrv_co_flush(bs
);
3406 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3407 qemu_bh_schedule(acb
->bh
);
3410 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
3411 BlockDriverCompletionFunc
*cb
, void *opaque
)
3413 trace_bdrv_aio_flush(bs
, opaque
);
3416 BlockDriverAIOCBCoroutine
*acb
;
3418 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3419 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
3420 qemu_coroutine_enter(co
, acb
);
3422 return &acb
->common
;
3425 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
3427 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3428 BlockDriverState
*bs
= acb
->common
.bs
;
3430 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
3431 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3432 qemu_bh_schedule(acb
->bh
);
3435 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
3436 int64_t sector_num
, int nb_sectors
,
3437 BlockDriverCompletionFunc
*cb
, void *opaque
)
3440 BlockDriverAIOCBCoroutine
*acb
;
3442 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
3444 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3445 acb
->req
.sector
= sector_num
;
3446 acb
->req
.nb_sectors
= nb_sectors
;
3447 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
3448 qemu_coroutine_enter(co
, acb
);
3450 return &acb
->common
;
3453 void bdrv_init(void)
3455 module_call_init(MODULE_INIT_BLOCK
);
3458 void bdrv_init_with_whitelist(void)
3460 use_bdrv_whitelist
= 1;
3464 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
3465 BlockDriverCompletionFunc
*cb
, void *opaque
)
3467 BlockDriverAIOCB
*acb
;
3469 if (pool
->free_aiocb
) {
3470 acb
= pool
->free_aiocb
;
3471 pool
->free_aiocb
= acb
->next
;
3473 acb
= g_malloc0(pool
->aiocb_size
);
3478 acb
->opaque
= opaque
;
3482 void qemu_aio_release(void *p
)
3484 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
3485 AIOPool
*pool
= acb
->pool
;
3486 acb
->next
= pool
->free_aiocb
;
3487 pool
->free_aiocb
= acb
;
3490 /**************************************************************/
3491 /* Coroutine block device emulation */
3493 typedef struct CoroutineIOCompletion
{
3494 Coroutine
*coroutine
;
3496 } CoroutineIOCompletion
;
3498 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
3500 CoroutineIOCompletion
*co
= opaque
;
3503 qemu_coroutine_enter(co
->coroutine
, NULL
);
3506 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
3507 int nb_sectors
, QEMUIOVector
*iov
,
3510 CoroutineIOCompletion co
= {
3511 .coroutine
= qemu_coroutine_self(),
3513 BlockDriverAIOCB
*acb
;
3516 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
3517 bdrv_co_io_em_complete
, &co
);
3519 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
3520 bdrv_co_io_em_complete
, &co
);
3523 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
3527 qemu_coroutine_yield();
3532 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
3533 int64_t sector_num
, int nb_sectors
,
3536 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
3539 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
3540 int64_t sector_num
, int nb_sectors
,
3543 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
3546 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
3548 RwCo
*rwco
= opaque
;
3550 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
3553 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
3557 if (!bs
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
3561 /* Write back cached data to the OS even with cache=unsafe */
3562 if (bs
->drv
->bdrv_co_flush_to_os
) {
3563 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
3569 /* But don't actually force it to the disk with cache=unsafe */
3570 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
3574 if (bs
->drv
->bdrv_co_flush_to_disk
) {
3575 ret
= bs
->drv
->bdrv_co_flush_to_disk(bs
);
3576 } else if (bs
->drv
->bdrv_aio_flush
) {
3577 BlockDriverAIOCB
*acb
;
3578 CoroutineIOCompletion co
= {
3579 .coroutine
= qemu_coroutine_self(),
3582 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
3586 qemu_coroutine_yield();
3591 * Some block drivers always operate in either writethrough or unsafe
3592 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3593 * know how the server works (because the behaviour is hardcoded or
3594 * depends on server-side configuration), so we can't ensure that
3595 * everything is safe on disk. Returning an error doesn't work because
3596 * that would break guests even if the server operates in writethrough
3599 * Let's hope the user knows what he's doing.
3607 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3608 * in the case of cache=unsafe, so there are no useless flushes.
3610 return bdrv_co_flush(bs
->file
);
3613 void bdrv_invalidate_cache(BlockDriverState
*bs
)
3615 if (bs
->drv
&& bs
->drv
->bdrv_invalidate_cache
) {
3616 bs
->drv
->bdrv_invalidate_cache(bs
);
3620 void bdrv_invalidate_cache_all(void)
3622 BlockDriverState
*bs
;
3624 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3625 bdrv_invalidate_cache(bs
);
3629 void bdrv_clear_incoming_migration_all(void)
3631 BlockDriverState
*bs
;
3633 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3634 bs
->open_flags
= bs
->open_flags
& ~(BDRV_O_INCOMING
);
3638 int bdrv_flush(BlockDriverState
*bs
)
3646 if (qemu_in_coroutine()) {
3647 /* Fast-path if already in coroutine context */
3648 bdrv_flush_co_entry(&rwco
);
3650 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
3651 qemu_coroutine_enter(co
, &rwco
);
3652 while (rwco
.ret
== NOT_DONE
) {
3660 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
3662 RwCo
*rwco
= opaque
;
3664 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
3667 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
3672 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
3674 } else if (bs
->read_only
) {
3676 } else if (bs
->drv
->bdrv_co_discard
) {
3677 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
3678 } else if (bs
->drv
->bdrv_aio_discard
) {
3679 BlockDriverAIOCB
*acb
;
3680 CoroutineIOCompletion co
= {
3681 .coroutine
= qemu_coroutine_self(),
3684 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
3685 bdrv_co_io_em_complete
, &co
);
3689 qemu_coroutine_yield();
3697 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
3702 .sector_num
= sector_num
,
3703 .nb_sectors
= nb_sectors
,
3707 if (qemu_in_coroutine()) {
3708 /* Fast-path if already in coroutine context */
3709 bdrv_discard_co_entry(&rwco
);
3711 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
3712 qemu_coroutine_enter(co
, &rwco
);
3713 while (rwco
.ret
== NOT_DONE
) {
3721 /**************************************************************/
3722 /* removable device support */
3725 * Return TRUE if the media is present
3727 int bdrv_is_inserted(BlockDriverState
*bs
)
3729 BlockDriver
*drv
= bs
->drv
;
3733 if (!drv
->bdrv_is_inserted
)
3735 return drv
->bdrv_is_inserted(bs
);
3739 * Return whether the media changed since the last call to this
3740 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3742 int bdrv_media_changed(BlockDriverState
*bs
)
3744 BlockDriver
*drv
= bs
->drv
;
3746 if (drv
&& drv
->bdrv_media_changed
) {
3747 return drv
->bdrv_media_changed(bs
);
3753 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3755 void bdrv_eject(BlockDriverState
*bs
, bool eject_flag
)
3757 BlockDriver
*drv
= bs
->drv
;
3759 if (drv
&& drv
->bdrv_eject
) {
3760 drv
->bdrv_eject(bs
, eject_flag
);
3763 if (bs
->device_name
[0] != '\0') {
3764 bdrv_emit_qmp_eject_event(bs
, eject_flag
);
3769 * Lock or unlock the media (if it is locked, the user won't be able
3770 * to eject it manually).
3772 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
3774 BlockDriver
*drv
= bs
->drv
;
3776 trace_bdrv_lock_medium(bs
, locked
);
3778 if (drv
&& drv
->bdrv_lock_medium
) {
3779 drv
->bdrv_lock_medium(bs
, locked
);
3783 /* needed for generic scsi interface */
3785 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
3787 BlockDriver
*drv
= bs
->drv
;
3789 if (drv
&& drv
->bdrv_ioctl
)
3790 return drv
->bdrv_ioctl(bs
, req
, buf
);
3794 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
3795 unsigned long int req
, void *buf
,
3796 BlockDriverCompletionFunc
*cb
, void *opaque
)
3798 BlockDriver
*drv
= bs
->drv
;
3800 if (drv
&& drv
->bdrv_aio_ioctl
)
3801 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
3805 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
3807 bs
->buffer_alignment
= align
;
3810 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
3812 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
3815 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
3817 int64_t bitmap_size
;
3819 bs
->dirty_count
= 0;
3821 if (!bs
->dirty_bitmap
) {
3822 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
3823 BDRV_SECTORS_PER_DIRTY_CHUNK
* 8 - 1;
3824 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* 8;
3826 bs
->dirty_bitmap
= g_malloc0(bitmap_size
);
3829 if (bs
->dirty_bitmap
) {
3830 g_free(bs
->dirty_bitmap
);
3831 bs
->dirty_bitmap
= NULL
;
3836 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
3838 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
3840 if (bs
->dirty_bitmap
&&
3841 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
3842 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
3843 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
3849 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
3852 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
3855 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
3857 return bs
->dirty_count
;
3860 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
3862 assert(bs
->in_use
!= in_use
);
3863 bs
->in_use
= in_use
;
3866 int bdrv_in_use(BlockDriverState
*bs
)
3871 void bdrv_iostatus_enable(BlockDriverState
*bs
)
3873 bs
->iostatus_enabled
= true;
3874 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3877 /* The I/O status is only enabled if the drive explicitly
3878 * enables it _and_ the VM is configured to stop on errors */
3879 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
3881 return (bs
->iostatus_enabled
&&
3882 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
3883 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
3884 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
3887 void bdrv_iostatus_disable(BlockDriverState
*bs
)
3889 bs
->iostatus_enabled
= false;
3892 void bdrv_iostatus_reset(BlockDriverState
*bs
)
3894 if (bdrv_iostatus_is_enabled(bs
)) {
3895 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3899 /* XXX: Today this is set by device models because it makes the implementation
3900 quite simple. However, the block layer knows about the error, so it's
3901 possible to implement this without device models being involved */
3902 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
3904 if (bdrv_iostatus_is_enabled(bs
) &&
3905 bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
3907 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
3908 BLOCK_DEVICE_IO_STATUS_FAILED
;
3913 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
3914 enum BlockAcctType type
)
3916 assert(type
< BDRV_MAX_IOTYPE
);
3918 cookie
->bytes
= bytes
;
3919 cookie
->start_time_ns
= get_clock();
3920 cookie
->type
= type
;
3924 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
3926 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
3928 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
3929 bs
->nr_ops
[cookie
->type
]++;
3930 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
3933 int bdrv_img_create(const char *filename
, const char *fmt
,
3934 const char *base_filename
, const char *base_fmt
,
3935 char *options
, uint64_t img_size
, int flags
)
3937 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
3938 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
3939 BlockDriverState
*bs
= NULL
;
3940 BlockDriver
*drv
, *proto_drv
;
3941 BlockDriver
*backing_drv
= NULL
;
3944 /* Find driver and parse its options */
3945 drv
= bdrv_find_format(fmt
);
3947 error_report("Unknown file format '%s'", fmt
);
3952 proto_drv
= bdrv_find_protocol(filename
);
3954 error_report("Unknown protocol '%s'", filename
);
3959 create_options
= append_option_parameters(create_options
,
3960 drv
->create_options
);
3961 create_options
= append_option_parameters(create_options
,
3962 proto_drv
->create_options
);
3964 /* Create parameter list with default values */
3965 param
= parse_option_parameters("", create_options
, param
);
3967 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
3969 /* Parse -o options */
3971 param
= parse_option_parameters(options
, create_options
, param
);
3972 if (param
== NULL
) {
3973 error_report("Invalid options for file format '%s'.", fmt
);
3979 if (base_filename
) {
3980 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
3982 error_report("Backing file not supported for file format '%s'",
3990 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
3991 error_report("Backing file format not supported for file "
3992 "format '%s'", fmt
);
3998 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
3999 if (backing_file
&& backing_file
->value
.s
) {
4000 if (!strcmp(filename
, backing_file
->value
.s
)) {
4001 error_report("Error: Trying to create an image with the "
4002 "same filename as the backing file");
4008 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
4009 if (backing_fmt
&& backing_fmt
->value
.s
) {
4010 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
4012 error_report("Unknown backing file format '%s'",
4013 backing_fmt
->value
.s
);
4019 // The size for the image must always be specified, with one exception:
4020 // If we are using a backing file, we can obtain the size from there
4021 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
4022 if (size
&& size
->value
.n
== -1) {
4023 if (backing_file
&& backing_file
->value
.s
) {
4029 ret
= bdrv_open(bs
, backing_file
->value
.s
, flags
, backing_drv
);
4031 error_report("Could not open '%s'", backing_file
->value
.s
);
4034 bdrv_get_geometry(bs
, &size
);
4037 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
4038 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
4040 error_report("Image creation needs a size parameter");
4046 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
4047 print_option_parameters(param
);
4050 ret
= bdrv_create(drv
, filename
, param
);
4053 if (ret
== -ENOTSUP
) {
4054 error_report("Formatting or formatting option not supported for "
4055 "file format '%s'", fmt
);
4056 } else if (ret
== -EFBIG
) {
4057 error_report("The image size is too large for file format '%s'",
4060 error_report("%s: error while creating %s: %s", filename
, fmt
,
4066 free_option_parameters(create_options
);
4067 free_option_parameters(param
);
4076 void *block_job_create(const BlockJobType
*job_type
, BlockDriverState
*bs
,
4077 BlockDriverCompletionFunc
*cb
, void *opaque
)
4081 if (bs
->job
|| bdrv_in_use(bs
)) {
4084 bdrv_set_in_use(bs
, 1);
4086 job
= g_malloc0(job_type
->instance_size
);
4087 job
->job_type
= job_type
;
4090 job
->opaque
= opaque
;
4095 void block_job_complete(BlockJob
*job
, int ret
)
4097 BlockDriverState
*bs
= job
->bs
;
4099 assert(bs
->job
== job
);
4100 job
->cb(job
->opaque
, ret
);
4103 bdrv_set_in_use(bs
, 0);
4106 int block_job_set_speed(BlockJob
*job
, int64_t value
)
4110 if (!job
->job_type
->set_speed
) {
4113 rc
= job
->job_type
->set_speed(job
, value
);
4120 void block_job_cancel(BlockJob
*job
)
4122 job
->cancelled
= true;
4125 bool block_job_is_cancelled(BlockJob
*job
)
4127 return job
->cancelled
;
4130 void block_job_cancel_sync(BlockJob
*job
)
4132 BlockDriverState
*bs
= job
->bs
;
4134 assert(bs
->job
== job
);
4135 block_job_cancel(job
);
4136 while (bs
->job
!= NULL
&& bs
->job
->busy
) {